# Saving Materials Project data

In this notebook, we run through the elements that we need unary and binary oxide formation energies for.

In [1]:
import pickle

import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict

from pymatgen.ext.matproj import MPRester

# Initialize the MP Rester
mpr = MPRester('API_KEY')


In [3]:
elements =  [ "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Nb", "Mo", 
             "Li", "Be", "Na", "Mg", "K", "Ca", "Rb", "Sr", "Cs", "Ba", # Alkalis
            "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
            "Ga", "Ge", "As", "Se", "Br",
            "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
            "In", "Sn", "Sb", "Te", "I",
            "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", 
            "Tl", "Pb", "Bi",
           "La", "Ce", "Nd", "Pr", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu"]

ele2gs = defaultdict()

for ele in elements:
    entries = mpr.get_entries(ele)
    for entry in entries:
        ene_per_atom = entry.energy/list(dict(entry.composition).values())[0]
        if ele not in ele2gs.keys():
            ele2gs[ele] = ene_per_atom
        elif ene_per_atom<ele2gs[ele]:
            ele2gs[ele] = ene_per_atom
            
pickle.dump(ele2gs, open("ele2gs.p", "wb"))

ele2gs["O"] = -4.95

In [6]:
def calc_formation_ene(energy_per_atom, composition):
    """
    Need to calculate formation energies ourselves because materials project formation energies are inconsistent.
    See https://matsci.org/t/formation-energy-calculation/41574 for further information.
    
    """    
    total_atoms = sum(composition.values())
    formation_energy = energy_per_atom
    for element in composition:
        formation_energy-=ele2gs[element]*composition[element]/total_atoms
    return formation_energy

def get_chemsys_data(chemsys):
    query = mpr.get_data(chemsys)
    material_ids = [q['material_id'] for q in query]
    material_names = [q['pretty_formula'] for q in query]
    material_enes = [q['energy_per_atom'] for q in query]
    material_comps = [q['unit_cell_formula'] for q in query]
    
    
    material_names_ = []
    material_ids_ = []
    form_enes_ = []
    structures = []
    for idx, energy in enumerate(material_enes):
        ref_form_ene = calc_formation_ene(energy, material_comps[idx])
        form_enes_.append(ref_form_ene)
        material_names_.append(material_names[idx])
        material_ids_.append(material_ids[idx])
        structure = mpr.get_structure_by_material_id(material_ids[idx])
        structures.append(structure)


    return material_ids_, material_names_, form_enes_, structures

# need to run this overnight to finish this, 
if you want the binary oxide data it needs to be run over a long time, code to do so is here. We have saved two files unary_oxide_data_.p and binary_oxide_data_.p and use them.

In [8]:
binary_oxide_data = defaultdict()
unary_oxide_data = defaultdict()

for idx, ele in enumerate(elements):
    if ele not in unary_oxide_data.keys():
        # save unary for ele, saves iterating over the same oxides
        ids_, names_, form_enes, structures = get_chemsys_data('{}-O'.format(ele))
        
        assert len(ids_)==len(names_)==len(form_enes)==len(structures)
        
        unary_oxide_data[ele] = {
            "names": names_,
            "mp_ids": ids_,
            "formation_energies": form_enes,
            "structures": structures
        }
        pickle.dump(unary_oxide_data, open("unary_oxide_data.p", "wb"))
     
    for ele2 in elements[idx:]:
        # save unary for ele2
        if ele2 not in unary_oxide_data.keys():
            ids_, names_, form_enes, structures = get_chemsys_data('{}-O'.format(ele2))

            assert len(ids_)==len(names_)==len(form_enes)

            unary_oxide_data[ele2] = {
                "names": names_,
                "mp_ids": ids_,
                "formation_energies": form_enes,
                "structures": structures
            }
            pickle.dump(unary_oxide_data, open("unary_oxide_data.p", "wb"))
        
        
        if ele+"_"+ele2 in binary_oxide_data.keys() or ele==ele2:
            continue    
        # save binary
        print(ele, ele2)
        ids_, names_, form_enes, structures = get_chemsys_data('{}-{}-O'.format(ele, ele2))

        assert len(ids_)==len(names_)==len(form_enes)

        binary_oxide_data[ele+"_"+ele2] = {
            "names": names_,
            "mp_ids": ids_,
            "formation_energies": form_enes,
            "structures": structures
        }
        pickle.dump(unary_oxide_data, open("binary_oxide_data.p", "wb"))



Ti V
Ti Cr
Ti Mn
Ti Fe
Ti Co
Ti Ni
Ti Cu
Ti Nb
Ti Mo
Ti Li
Ti Be
Ti Na
Ti Mg
Ti K
Ti Ca
Ti Rb
Ti Sr
Ti Cs
Ti Ba
Ti Sc
Ti Zn
Ti Ga
Ti Ge
Ti As
Ti Se
Ti Br
Ti Y
Ti Zr
Ti Tc
Ti Ru
Ti Rh
Ti Pd
Ti Ag
Ti Cd
Ti In
Ti Sn
Ti Sb
Ti Te
Ti I
Ti Hf
Ti Ta
Ti W
Ti Re
Ti Os
Ti Ir
Ti Pt
Ti Au
Ti Hg
Ti Tl
Ti Pb
Ti Bi
Ti La
Ti Ce
Ti Nd
Ti Pr
Ti Sm
Ti Eu
Ti Gd
Ti Tb
Ti Dy
Ti Ho
Ti Er
Ti Tm
Ti Yb
Ti Lu
V Cr
V Mn
V Fe
V Co
V Ni
V Cu
V Nb
V Mo
V Li
V Be
V Na
V Mg
V K
V Ca
V Rb
V Sr
V Cs
V Ba
V Sc
V Ti
V Zn
V Ga
V Ge
V As
V Se
V Br
V Y
V Zr
V Tc
V Ru
V Rh
V Pd
V Ag
V Cd
V In
V Sn
V Sb
V Te
V I
V Hf
V Ta
V W
V Re
V Os
V Ir
V Pt
V Au
V Hg
V Tl
V Pb
V Bi
V La
V Ce
V Nd
V Pr
V Sm
V Eu
V Gd
V Tb
V Dy
V Ho
V Er
V Tm
V Yb
V Lu
Cr Mn
Cr Fe
Cr Co
Cr Ni
Cr Cu
Cr Nb
Cr Mo
Cr Li
Cr Be
Cr Na
Cr Mg
Cr K
Cr Ca
Cr Rb
Cr Sr
Cr Cs
Cr Ba
Cr Sc
Cr Ti
Cr V
Cr Zn
Cr Ga
Cr Ge
Cr As
Cr Se
Cr Br
Cr Y
Cr Zr
Cr Tc
Cr Ru
Cr Rh
Cr Pd
Cr Ag
Cr Cd
Cr In
Cr Sn
Cr Sb
Cr Te
Cr I
Cr Hf
Cr Ta
Cr W
Cr Re
Cr Os
Cr Ir
Cr Pt
Cr Au
Cr Hg
Cr Tl
Cr Pb

KeyboardInterrupt: 