# Processing the data

In order to improve the speed of the subsequent analysis, we begin by processing the VASP output files into datafiles that are more manageable.

## Vasprun to json

The most time consuming step in processing the required data is parsing the `vasprun.xml`. Here we loop over all of the `vasprun.xml` files and extract the `CompleteDos`.

In [36]:
import os, json
import numpy as np

from monty.json import MontyEncoder, MontyDecoder

from pymatgen.io.vasp.outputs import Vasprun
from pymatgen.electronic_structure.dos import CompleteDos

from pybat.core import Cathode, VORONOI_ANG_FACTOR, VORONOI_DIST_FACTOR

In [27]:
data_dir = "/mnt/data/mbercx/batteries/"

def get_dir(calculation, doping_element, state, functional):
    
    struc_dir = os.path.join(
        data_dir, "Li8" + doping_element + "1Mn15O48", state
    )
    
    if functional == "pbeu":
        functional = [dir for dir in os.listdir(struc_dir)
                      if "pbeu" in dir 
                      and calculation in dir][0].replace("_" + calculation, "")
    
    directory = os.path.join(struc_dir, functional + "_" + calculation)
    
    return directory

def process_data(doping_element, functional="scan"):
    
    os.makedirs("data/" + doping_element, exist_ok=True)
    
    for state in ["discharged", "charged"]:

        vr = Vasprun(os.path.join(
            get_dir(
                calculation="dos", 
                doping_element=doping_element, 
                state=state,
                functional=functional
            ), "vasprun.xml"
        ))
        
        data_file = os.path.join(
            "data", doping_element, "dos" + "_" + state 
            + "_" + functional + ".json"
        )
    
        with open(data_file, "w") as file:
            file.write(
                json.dumps(vr.complete_dos.as_dict(), cls=MontyEncoder)
            )
        
        cathode = Cathode.from_file(os.path.join(
            get_dir(
                calculation="optimize", 
                doping_element=doping_element, 
                state=state,
                functional=functional
            ), "final_cathode.json")
        )
        
        data_file = os.path.join(
            "data", doping_element, "cathode" + "_" + state 
            + "_" + functional + ".json"
        )
    
        cathode.to(fmt="json", filename=data_file)
        

In [None]:
# Data being considered - Will be placed in data subdirectory
doping_elements = ["Co", "Mo", "V", "Sn"]
functionals = ["scan", "pbeu"]

for element in doping_elements:
    for functional in functionals:
        process_data(doping_element=element, functional=functional)

In [53]:
def get_data(data_type, doping_element, state="discharged", functional="scan"):
    
    data_file = os.path.join("data", doping_element, data_type + "_" 
                             + state + "_" + functional + ".json")
    
    with open(data_file, "r") as file:
        data = json.loads(file.read(), encoding=MontyDecoder)
    
    if data_type == "cathode":
        data = Cathode.from_dict(data)
    elif data_type == "dos":
        data = CompleteDos.from_dict(data)
    else:
        raise ValueError("Unrecognized data type.")
        
    return data

def analyze_magmoms(doping_element, state, functional):
    
    cat = get_data(
        data_type="cathode",
        doping_element=doping_element,
        state=state,
        functional=functional
    )
    doping_index = [i for i, s in enumerate(cat.sites) 
                    if s.species_string == doping_element][0]
    
    print(cat.sites[doping_index])
    
    oxygen_sites = [s for s in cat.sites if s.species_string == "O"]
    
    doping_neighbrs = cat.voronoi.neighbors(
        doping_index, VORONOI_DIST_FACTOR, VORONOI_ANG_FACTOR
    )
    doping_o_neighbrs = [n["site"] for n in doping_neighbrs 
                         if n["site"].species_string == "O"]
    
    return {
        "dopant": cat.sites[doping_index].properties["magmom"],
        "neighbor_O": np.mean(
            [s.properties["magmom"] for s in doping_o_neighbrs]
        ),
        "other_O": np.mean(
            [s.properties["magmom"] for s in oxygen_sites
             if s not in doping_o_neighbrs]
        )
    }

def get_pdos(doping_element, state, functional):
    
    return get_data(
        data_type="dos",
        doping_element=doping_element,
        state=state,
        functional=functional
    ).as_dict()

In [54]:
doping_elements = ["Co", "Mo", "V", "Sn"]
states = ["discharged", "charged"]
functionals = ["scan", "pbeu"]

magmom_dict = {}

for element in doping_elements:
    magmom_dict[element] = {}
    for state in states:
        magmom_dict[element][state] = {}
        for functional in functionals:
            magmom_dict[element][state][functional] = analyze_magmoms(
                doping_element=element,
                state=state, functional=functional
            )

with open("data/magmom_data.json", "w") as file:
    file.write(json.dumps(magmom_dict))

[4.48799333 4.95661582 4.69657309] Co
[4.59587178 5.05974573 4.77974043] Co
[-4.68546878e-04  2.80984152e+00  2.00846712e+00] Co
[-3.05047770e-04  2.88379857e+00  2.05597980e+00] Co
[4.58680986 4.94494022 4.70924931] Mo
[4.63677422 5.06830362 4.78967771] Mo
[2.23906689e-03 2.81633406e+00 1.87592062e+00] Mo
[-6.53500749e-04  2.89535703e+00  2.06949817e+00] Mo
[4.51634918 4.97506846 4.69845197] V
[4.58491246 5.06505421 4.77839849] V
[-2.30224033e-03  2.81410375e+00  2.00196953e+00] V
[-3.79990042e-04  2.88872025e+00  2.05923787e+00] V
[4.53445282 4.99349902 4.70600096] Sn
[4.61376632 5.08440422 4.78919324] Sn
[1.09062123e-04 2.83909863e+00 1.99936167e+00] Sn
[-5.16822727e-04  2.91383547e+00  2.04838234e+00] Sn


In [49]:
doping_elements = ["Co", "Mo", "V", "Sn"]
states = ["discharged", "charged"]
functionals = ["scan", "pbeu"]

pdos_data = {}

for element in doping_elements:
    pdos_data[element] = {}
    for state in states:
        pdos_data[element][state] = {}
        for functional in functionals:
            pdos_data[element][state][functional] = get_pdos(
                doping_element=element,
                state=state, functional=functional
            )

with open("data/pdos_data.json", "w") as file:
    file.write(json.dumps(pdos_data))