In [None]:
import requests
import json
import numpy as np
import pandas as pd
from urllib.error import HTTPError
from time import sleep
import json, sys, os
from urllib.request import urlopen
from pymatgen.ext.matproj import MPRester
from pymatgen.core import Structure
from pymatgen.io.vasp.inputs import Poscar
from pymatgen.analysis.ewald import EwaldSummation
from pymatgen.analysis.structure_matcher import StructureMatcher

In [None]:
SM=StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5,primitive_cell=True)

In [None]:
df=pd.read_csv('../mpids.csv')

In [None]:
df.set_index('metadata.material_id',inplace=True)

In [None]:
m=MPRester()
SERVER="http://aflow.org"
API="/API/aflux/v1.0/?"
DIRECTIVES="$paging(0)"

In [None]:
df['sg_symbol']=None
df['bader_net_charges']=None
df["bader_atomic_volumes"]=None
df['aflow_data']=None
df['aflow_struct']=None
df['Egap']=np.NaN
df['Comment_struct']=None
df['rms_dist']=None

In [None]:
missing_struct=[]
for mpid in df.index:
    try:
        if mpid=='mp-12992':
            mpid='mp-5986'
        entry=m.get_structure_by_material_id(mpid)
        numsites=entry.num_sites
        sg=entry.get_space_group_info()[1]
        nspecies=entry.ntypesp
        
        if len(entry.symbol_set)==1:
            ele=str(entry.symbol_set).replace(',', '')
        else:
            ele=str(entry.symbol_set).replace(' ', '')
            
        MATCHBOOK="spacegroup_relax({}),species{},bader_net_charges(*),natoms({}),nspecies({}),bader_atomic_volumes,Egap".format(sg,ele,numsites,nspecies) #,agl_thermal_conductivity_300K,ael_speed_sound_longitudinal,ael_speed_sound_transverse,Egap"
        
        SUMMONS=MATCHBOOK+","+DIRECTIVES
        
        response=json.loads(urlopen(SERVER+API+SUMMONS).read().decode("utf-8"))
        
        for index,val in enumerate(response):
            aurl=response[index]['aurl'].replace(':','/')
            struct_aflow=Poscar.from_string(urlopen("http://"+aurl+"/CONTCAR.relax.vasp").read().decode("utf-8")).structure
            RMS=SM.get_rms_dist(struct_aflow,entry)
            if SM.fit(struct_aflow,entry):
                
                struct_aflow.add_oxidation_state_by_site(oxidation_states=response[index]['bader_net_charges'])
                cat_an={}
                for ele in struct_aflow.species:
                    if ele.oxi_state>0:
                        cat_an.update({ele.element.symbol:'cation'})
                    else:
                        cat_an.update({ele.element.symbol:'anion'})
                Madelung_Bader= EwaldSummation(struct_aflow).total_energy
                
                df.at[mpid,"aflow_data"]=response[index]
                df.at[mpid,"aflow_struct"]=struct_aflow
                df.at[mpid,"bader_net_charges"]=response[index]['bader_net_charges']
                df.at[mpid,"bader_atomic_volumes"]=response[index]['bader_atomic_volumes']
                df.at[mpid,'rms_dist']=RMS
                df.loc[mpid,'sg_symbol']=sg
                df.loc[mpid,'Egap']=response[index]['Egap']
                df.loc[mpid,'Madelung_Bader']=Madelung_Bader
                df.loc[mpid,'Comment_struct']='Matched'
                df.loc[mpid,'Cation_anion']=[cat_an]
                break
            else:
                df.loc[mpid,'Comment_struct']='Mismatched'
    
    except HTTPError:
        pass

In [None]:
df.to_pickle('./bader_charges.pkl')