In [1]:
import pickle
import numpy as np
from pymatgen.analysis.local_env import site_is_of_motif_type
from collections import defaultdict


In [2]:
unary_data = pickle.load(open("unary_oxide_data.p", "rb"))
binary_data = pickle.load(open("binary_oxide_data.p", "rb"))

def get_lowest_ene_id(element, oxidation_state):
    """
    Return the materials project id that is the lowest formation energy
    at that oxidation_state, related to relative mol. fraction of O
    """
    ref_formation_energy = 0
    ref_id = None
    for idx, struct in enumerate(unary_data[element]['structures']):
        m_coordinations, ox_state, o_o_coords, o_m_coords, o_pc = structure_inspection(struct)
        if ox_state==oxidation_state:
            formation_energy = unary_data[element]['formation_energies'][idx]
            if formation_energy<ref_formation_energy:
                ref_id = unary_data[element]['mp_ids'][idx]
                ref_formation_energy = formation_energy
    # if we return None
    # there isn't a value for this in materials project, need to invent
    # one later on, and label this as None, to be handled in get_ref_data
    return ref_id

def add_ox(energy_dict, desired_ox_state):
    """
    Makes a synthetic entry in the dictionary for a given oxidation state if it doesn't exist
    """
    ox_states = list(energy_dict.keys())
    energies = list(energy_dict.values())
    ox_states_ = ox_states.copy()
    assert desired_ox_state not in ox_states
    
    # we need two points to project between or from to the desired oxidation state
    # if the desired points has examples either side of it, we handle that here
    min_energy = min(energies)
    ox_at_min = ox_states[np.argmin(energies)]
    if desired_ox_state<ox_at_min:
        return min_energy*((ox_at_min-desired_ox_state)/ox_at_min)
    else:
        return min_energy*((desired_ox_state-ox_at_min)/(8-ox_at_min))


def get_metal_motifs(structure):
    motifs = set()
    for idx, site in enumerate(structure.sites):
        if str(site.specie)=='O':
            pass
        else:
            motifs.add(site_is_of_motif_type(structure, idx))
    return motifs


def get_conc_in_binary(structure, element):
    element_count = 0
    other_element_count = 0
    for site in structure.sites:
        if str(site.specie)==element:
            element_count+=1
        elif str(site.specie)=='O':
            continue
        else:
            other_element_count+=1
    return element_count/(element_count+other_element_count)


def structure_inspection(structure):
    """
    Need to know the coordination and other structural data for a given oxide
    """
    num_o_sites = 0 
    m_coordinations = []
    o_o_coordinations = []
    o_m_coordinations = []
    for site in structure.sites:
        if str(site.specie)=='O':
            num_o_sites += 1
            nn_info = structure.get_neighbors(site, 2.5)
            o_coord = 0
            o_m_coord = 0
            for nn in nn_info:
                #print(str(nn.specie))
                if str(nn.specie)=="O" and nn.nn_distance<1.5:
                    o_coord+=1
                elif str(nn.specie)!="O":
                    o_m_coord+=1
                #assert str(nn.specie)=='O'
            o_o_coordinations.append(o_coord)
            o_m_coordinations.append(o_m_coord)
        else:
            # check nearest neighbours for coordination
            nn_info = structure.get_neighbors(site, 2.5)
            m_coord = 0
            for nn in nn_info:
                #print(str(nn.specie))
                if str(nn.specie)=="O":
                    m_coord+=1
                #assert str(nn.specie)=='O'
            m_coordinations.append(m_coord)
    o_percent = num_o_sites/len(structure.sites)
    ox_state = round((o_percent*2/(1-o_percent)), 3)
    return m_coordinations, ox_state, o_o_coordinations, o_m_coordinations, round(o_percent, 3)

def get_ox2lowest_no_reference_(reference_element_, ref_oxidation_state):
    """
    Returns a dictionary for a given element with keys for ox state and 
    values for the lowest formation energy at ox state.
    Args:
    reference_element_: The element to iterate over, passed as a string
    """
    ox2lowest = defaultdict(list)

    for idx, struct in enumerate(unary_data[reference_element_]['structures']):

        m_coordinations, ox_state, o_o_coords, o_m_coords, o_pc = structure_inspection(struct)
        formation_energy = unary_data[reference_element_]['formation_energies'][idx]
        
        if ox_state not in ox2lowest.keys():
            ox2lowest[ox_state] = formation_energy
        elif ox_state in ox2lowest.keys() and formation_energy<ox2lowest[ox_state]:
            ox2lowest[ox_state] = formation_energy
    if ref_oxidation_state not in ox2lowest.keys():
        
        synthetic_ref_form = add_ox(ox2lowest, ref_oxidation_state)
        ox2lowest[ref_oxidation_state] = synthetic_ref_form
        
    return ox2lowest, ox2lowest[ref_oxidation_state]


def get_ref_data(element, reference_oxide_id, desired_ox):
    found = False
    
    if reference_oxide_id is None:
        # need to retrieve made up formation energy
        print("Making synthetic formation energy, since MP doesn't have data for {} in ox state: {}".format(element, desired_ox))
        ene_dict, val = get_ox2lowest_no_reference_(element, desired_ox)
        return val, _

    for idx, id_ in enumerate(unary_data[element]['mp_ids']):
        if id_==reference_oxide_id:
            found = True
            ref_struct = unary_data[element]['structures'][idx]
            ref_form_ene = unary_data[element]['formation_energies'][idx]
            return ref_form_ene, ref_struct
    
    print("You probably mis-named ones of the reference oxides.")
    return 


def get_all_ref_data(ox_state):
    ele2mp = defaultdict()
    for ele in unary_data.keys():
        if len(unary_data[ele]['names'])==0:
            continue
        ele2mp[ele] = get_lowest_ene_id(ele, ox_state, )

    ref_data = defaultdict()
    for ele in ele2mp.keys():
        ref_data[ele] = get_ref_data(ele, ele2mp[ele], ox_state)
        
    return ref_data

In [3]:
def get_binary_mp_data(element1, element2, desired_ox, ref_data):
    pairings = []
    conc2lowest = defaultdict()

    found_key = False
    for key in binary_data.keys():
        if element1 in key and element2 in key:
            # the order of the elements is arbitrary, depends how the data was gathered
            needed_key = key
            found_key = True
    if not found_key:
        return pairings, conc2lowest
    lowest_ene = 0 
    for idx, struct in enumerate(binary_data[needed_key]['structures']):
        m_coords, ox_state_, o_o_coords, o_m_coords, o_pc = structure_inspection(struct)
        if ox_state_==desired_ox:
            # then we care about this structure to make comparisons
            dope_conc = get_conc_in_binary(struct, element2)
            mixed_energy = binary_data[needed_key]['formation_energies'][idx]
            dope_o2_ene, dope_o2_struct = ref_data[element2]
            mo2_ene, mo2_struct = ref_data[element1]
            mixing_stabilisation = mixed_energy - (dope_o2_ene*dope_conc+(mo2_ene)*(1-dope_conc))
            pairings.append((dope_conc, mixing_stabilisation, element2))
            gs_key = element2+"_{:.2f}".format(dope_conc)

            boolean = (list(set(m_coords))==[6]) and list(set(o_m_coords))[0]==6*2/(desired_ox) and len(set(o_m_coords))==1

            if gs_key not in conc2lowest:
                conc2lowest[gs_key] = {
                    "motifs": get_metal_motifs(struct),
                    "MP ΔD" : mixing_stabilisation,
                    "MP weighted average": dope_o2_ene*dope_conc+(mo2_ene)*(1-dope_conc),
                    "MP_id" : binary_data[needed_key]['mp_ids'][idx],
                    "MP_name": binary_data[needed_key]['names'][idx],
                    "Coordination match": boolean
                }
            elif conc2lowest[gs_key]['MP ΔD']>mixing_stabilisation:
                # then, at this concentration we have found a lower energy value
                conc2lowest[element2+"_{:.2f}".format(dope_conc)] = {
                    "motifs": get_metal_motifs(struct),
                    "MP ΔD" : mixing_stabilisation,
                    "MP weighted average": dope_o2_ene*dope_conc+(mo2_ene)*(1-dope_conc),
                    "MP_id" : binary_data[needed_key]['mp_ids'][idx],
                    "MP_name": binary_data[needed_key]['names'][idx],
                    "Coordination match": boolean
                }
    return conc2lowest


In [4]:
binary_pairing_data = defaultdict()
#           MO M2O3 MO2
ox_states = [2, 3,  4]
for ox_state in ox_states:
    ref_data_ = get_all_ref_data(ox_state)
    for housing_element in unary_data.keys():
        for ele in unary_data.keys():
            if ele==housing_element:
                continue
            try:
                info_dict = get_binary_mp_data(housing_element, ele, ox_state, ref_data_)
            except ValueError:
                continue
            if ele<housing_element:
                binary_pairing_data[ele+"_"+housing_element+"_"+str(int(ox_state))] = info_dict
            else:
                binary_pairing_data[housing_element+"_"+ele+"_"+str(int(ox_state))] = info_dict

        

Making synthetic formation energy, since MP doesn't have data for Mo in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Ge in ox state: 2
Making synthetic formation energy, since MP doesn't have data for As in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Se in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Br in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Tc in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Ru in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Rh in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Sb in ox state: 2
Making synthetic formation energy, since MP doesn't have data for I in ox state: 2
Making synthetic formation energy, since MP doesn't have data for Hf in ox state: 2
Making synthetic formation energy, since MP doesn't have data for W in ox sta

In [5]:
pickle.dump(binary_pairing_data, open("binary_pairing_data.p", "wb"))