# Saving Materials Project data

In this notebook, we run through the elements that we need unary and binary oxide formation energies for.

In [1]:
import pickle

import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict

from pymatgen.ext.matproj import MPRester

# Initialize the MP Rester
mpr = MPRester('pn8XdbGhMrv90STu')


In [16]:
elements =  [ "Si", "Al", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Nb", "Mo", 
             "Li", "Be", "Na", "Mg", "K", "Ca", "Rb", "Sr", "Cs", "Ba", 
            "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
            "Ga", "Ge", "As", "Se", "Br",
            "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
            "In", "Sn", "Sb", "Te", "I",
            "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", 
            "Tl", "Pb", "Bi",
           "La", "Ce", "Nd", "Pr", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu"]

ele2gs = defaultdict()

for ele in elements:
    entries = mpr.get_entries(ele)
    for entry in entries:
        ene_per_atom = entry.energy/list(dict(entry.composition).values())[0]
        if ele not in ele2gs.keys():
            ele2gs[ele] = ene_per_atom
        elif ene_per_atom<ele2gs[ele]:
            ele2gs[ele] = ene_per_atom
            
ele2gs["O"] = -4.95
pickle.dump(ele2gs, open("ele2gs.p", "wb"))

# https://docs.materialsproject.org/methodology/materials-methodology/thermodynamic-stability/thermodynamic-stability
# we find these values by looking clicking on the "How do we arrive at this value?"
# prompt in the legacy Materials Project page for an oxide.
# eg. https://legacy.materialsproject.org/materials/mp-19306/#corrections-eqn

ele2correction = {
    "O": -0.687,
    "V": -1.700,
    "Cr": -1.999,
    "Mn": -1.668,
    "Fe": -2.256,
    "Co": -1.638,
    "Ni": -2.541,
    "Mo": -3.202,
    "W": -4.438
}

In [13]:
def calc_formation_ene(energy_per_atom, composition):
    """
    Need to calculate formation energies ourselves because materials project formation energies are inconsistent.
    See https://matsci.org/t/formation-energy-calculation/41574 for further information.
    
    """    
    total_atoms = sum(composition.values())
    formation_energy = energy_per_atom
    for element in composition:
        formation_energy-=ele2gs[element]*composition[element]/total_atoms
        if element in ele2correction.keys():
            formation_energy+=ele2correction[element]*composition[element]/total_atoms
    return formation_energy

def get_chemsys_data(chemsys):
    query = mpr.get_data(chemsys)
    material_ids = [q['material_id'] for q in query]
    material_names = [q['pretty_formula'] for q in query]
    material_enes = [q['energy_per_atom'] for q in query]
    material_comps = [q['unit_cell_formula'] for q in query]
    
    
    material_names_ = []
    material_ids_ = []
    form_enes_ = []
    structures = []

    for idx, energy in enumerate(material_enes):
        ref_form_ene = calc_formation_ene(energy, material_comps[idx])
        form_enes_.append(ref_form_ene)
        material_names_.append(material_names[idx])
        material_ids_.append(material_ids[idx])
        structure = mpr.get_structure_by_material_id(material_ids[idx])
        structures.append(structure)


    return material_ids_, material_names_, form_enes_, structures

# need to run this overnight to finish this, 
if you want the binary oxide data it needs to be run over a long time, code to do so is here. We have saved two files unary_oxide_data.p and binary_oxide_data.p and use them.

In [17]:
#binary_oxide_data = defaultdict()
#unary_oxide_data = defaultdict()

for idx, ele in enumerate(elements):
    if ele not in unary_oxide_data.keys():
        # save unary for ele, saves iterating over the same oxides
        ids_, names_, form_enes, structures = get_chemsys_data('{}-O'.format(ele))
        
        assert len(ids_)==len(names_)==len(form_enes)==len(structures)
        print(ele)
        unary_oxide_data[ele] = {
            "names": names_,
            "mp_ids": ids_,
            "formation_energies": form_enes,
            "structures": structures
        }
        pickle.dump(unary_oxide_data, open("unary_oxide_data.p", "wb"))
     
    for ele2 in elements[idx:]:
        # save unary for ele2
        if ele2 not in unary_oxide_data.keys():
            ids_, names_, form_enes, structures = get_chemsys_data('{}-O'.format(ele2))

            assert len(ids_)==len(names_)==len(form_enes)
            print(ele2)
            unary_oxide_data[ele2] = {
                "names": names_,
                "mp_ids": ids_,
                "formation_energies": form_enes,
                "structures": structures
            }
            pickle.dump(unary_oxide_data, open("unary_oxide_data.p", "wb"))
        
        
        if ele+"_"+ele2 in binary_oxide_data.keys() or ele==ele2:
            continue    
        # save binary
        print(ele, ele2)
        ids_, names_, form_enes, structures = get_chemsys_data('{}-{}-O'.format(ele, ele2))

        assert len(ids_)==len(names_)==len(form_enes)

        binary_oxide_data[ele+"_"+ele2] = {
            "names": names_,
            "mp_ids": ids_,
            "formation_energies": form_enes,
            "structures": structures
        }
        pickle.dump(binary_oxide_data, open("binary_oxide_data.p", "wb"))



V Fe
V Co
V Ni
V Cu
V Nb
V Mo
V Li
V Be
V Na
V Mg
V K
V Ca
V Rb
V Sr
V Cs
V Ba
V Sc
V Ti
V Zn
V Ga
V Ge
V As
V Se
V Br
V Y
V Zr
V Tc
V Ru
V Rh
V Pd
V Ag
V Cd
V In
V Sn
V Sb
V Te
V I
V Hf
V Ta
V W
V Re
V Os
V Ir
V Pt
V Au
V Hg
V Tl
V Pb
V Bi
V La
V Ce
V Nd
V Pr
V Sm
V Eu
V Gd
V Tb
V Dy
V Ho
V Er
V Tm
V Yb
V Lu
Cr Mn
Cr Fe
Cr Co
Cr Ni
Cr Cu
Cr Nb
Cr Mo
Cr Li
Cr Be
Cr Na
Cr Mg
Cr K
Cr Ca
Cr Rb
Cr Sr
Cr Cs
Cr Ba
Cr Sc
Cr Ti
Cr V
Cr Zn
Cr Ga
Cr Ge
Cr As
Cr Se
Cr Br
Cr Y
Cr Zr
Cr Tc
Cr Ru
Cr Rh
Cr Pd
Cr Ag
Cr Cd
Cr In
Cr Sn
Cr Sb
Cr Te
Cr I
Cr Hf
Cr Ta
Cr W
Cr Re
Cr Os
Cr Ir
Cr Pt
Cr Au
Cr Hg
Cr Tl
Cr Pb
Cr Bi
Cr La
Cr Ce
Cr Nd
Cr Pr
Cr Sm
Cr Eu
Cr Gd
Cr Tb
Cr Dy
Cr Ho
Cr Er
Cr Tm
Cr Yb
Cr Lu
Mn Fe
Mn Co
Mn Ni
Mn Cu
Mn Nb
Mn Mo
Mn Li
Mn Be
Mn Na
Mn Mg
Mn K
Mn Ca
Mn Rb
Mn Sr
Mn Cs
Mn Ba
Mn Sc
Mn Ti
Mn V
Mn Cr
Mn Zn
Mn Ga
Mn Ge
Mn As
Mn Se
Mn Br
Mn Y
Mn Zr
Mn Tc
Mn Ru
Mn Rh
Mn Pd
Mn Ag
Mn Cd
Mn In
Mn Sn
Mn Sb
Mn Te
Mn I
Mn Hf
Mn Ta
Mn W
Mn Re
Mn Os
Mn Ir
Mn Pt
Mn Au
Mn Hg
Mn Tl
Mn Pb
Mn Bi
Mn 

As Tl
As Pb
As Bi
As La
As Ce
As Nd
As Pr
As Sm
As Eu
As Gd
As Tb
As Dy
As Ho
As Er
As Tm
As Yb
As Lu
Se Br
Se Y
Se Zr
Se Nb
Se Mo
Se Tc
Se Ru
Se Rh
Se Pd
Se Ag
Se Cd
Se In
Se Sn
Se Sb
Se Te
Se I
Se Hf
Se Ta
Se W
Se Re
Se Os
Se Ir
Se Pt
Se Au
Se Hg
Se Tl
Se Pb
Se Bi
Se La
Se Ce
Se Nd
Se Pr
Se Sm
Se Eu
Se Gd
Se Tb
Se Dy
Se Ho
Se Er
Se Tm
Se Yb
Se Lu
Br Y
Br Zr
Br Nb
Br Mo
Br Tc
Br Ru
Br Rh
Br Pd
Br Ag
Br Cd
Br In
Br Sn
Br Sb
Br Te
Br I
Br Hf
Br Ta
Br W
Br Re
Br Os
Br Ir
Br Pt
Br Au
Br Hg
Br Tl
Br Pb
Br Bi
Br La
Br Ce
Br Nd
Br Pr
Br Sm
Br Eu
Br Gd
Br Tb
Br Dy
Br Ho
Br Er
Br Tm
Br Yb
Br Lu
Y Zr
Y Nb
Y Mo
Y Tc
Y Ru
Y Rh
Y Pd
Y Ag
Y Cd
Y In
Y Sn
Y Sb
Y Te
Y I
Y Hf
Y Ta
Y W
Y Re
Y Os
Y Ir
Y Pt
Y Au
Y Hg
Y Tl
Y Pb
Y Bi
Y La
Y Ce
Y Nd
Y Pr
Y Sm
Y Eu
Y Gd
Y Tb
Y Dy
Y Ho
Y Er
Y Tm
Y Yb
Y Lu
Zr Nb
Zr Mo
Zr Tc
Zr Ru
Zr Rh
Zr Pd
Zr Ag
Zr Cd
Zr In
Zr Sn
Zr Sb
Zr Te
Zr I
Zr Hf
Zr Ta
Zr W
Zr Re
Zr Os
Zr Ir
Zr Pt
Zr Au
Zr Hg
Zr Tl
Zr Pb
Zr Bi
Zr La
Zr Ce
Zr Nd
Zr Pr
Zr Sm
Zr Eu
Zr Gd
Zr Tb
Zr Dy
Zr Ho
