#### Load the necessary modules 

In [None]:
import os
import seaborn as sns
import multiprocessing as mp
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import warnings
from mendeleev import element
from pymatgen.io.vasp.inputs import Poscar
from pymatgen.analysis.ewald import EwaldSummation
from pymatgen.io.lobster.outputs import Charge
from pymatgen.core.structure import Structure
from pymatgen.analysis.bond_valence import BVAnalyzer
from pymatgen.core import Composition
from pymatgen.analysis.chemenv.coordination_environments.chemenv_strategies import (
    SimplestChemenvStrategy, MultiWeightsChemenvStrategy
)
from pymatgen.analysis.chemenv.coordination_environments.coordination_geometry_finder import (
    LocalGeometryFinder,
)
from pymatgen.analysis.chemenv.coordination_environments.structure_environments import (
    LightStructureEnvironments,
)
from pymatgen.io.lobster.lobsterenv import LobsterNeighbors
from pymatviz.histograms import hist_elemental_prevalence
from pymatviz.ptable import ptable_heatmap_plotly
from tqdm import tqdm
from IPython.display import Markdown as md
warnings.filterwarnings("ignore")

#### Change to directory containing raw calculation files

In [None]:
parent=os.getcwd()
os.chdir('Results/') # Path to the directory where Unprocessed data (8 tar) files downloaded from zenodo are extracted

In [None]:
mpids= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids]))
mats.sort()

#### Call the function below to reproduce the pandas dataframe with necessary data for the plots from raw calculation files (optional)

In [None]:
def coordination_env(mpid,icohp_cutoff_strength=0.10):
    BV = BVAnalyzer()
    df=pd.DataFrame(index=[mpid], columns=['Formula','Composition','Spacegroup',
                                     'Chem_env','Lobs_env','global_cordination_res','global_cordination_env',
                                           'cordination_csm_chem','cordination_csm_lobs'])
    df['Chem_env'] = df['Chem_env'].astype(dtype='object')
    df['Lobs_env'] = df['Lobs_env'].astype(dtype='object')
    df['cordination_csm_chem'] = df['cordination_csm_chem'].astype(dtype='object')
    df['cordination_csm_lobs'] = df['cordination_csm_lobs'].astype(dtype='object')
    
    
    struct=Structure.from_file(filename='./{}/POSCAR.gz'.format(mpid))
    reduced_formula= struct.composition.get_reduced_formula_and_factor()[0]
    df.loc[mpid,'Formula'] = reduced_formula
    df.loc[mpid,'Spacegroup'] = struct.get_space_group_info()[1]
    df.at[mpid,'Composition'] = struct.composition
    
    try:
        lgf = LocalGeometryFinder()
        lgf.setup_structure(structure=struct)
        se = lgf.compute_structure_environments(
            valences=BV.get_valences(structure=struct),
                                       additional_conditions=[1],only_cations=False)
        
        strategy_simp = SimplestChemenvStrategy(distance_cutoff=1.4, angle_cutoff=0.3)
        lse_simplest = LightStructureEnvironments.from_structure_environments(
        strategy=strategy_simp, structure_environments=se
        )
        cor_chem=[]
        csm_chem=[]
        for i in lse_simplest.coordination_environments:
            cor_chem.append(i[0]['ce_symbol'])
            csm_chem.append(i[0]['csm'])
        
        df.at[mpid,'Chem_env']=cor_chem
        df.at[mpid,'cordination_csm_chem']=csm_chem
        
        Lobs_nei= LobsterNeighbors(
                filename_ICOHP='./{}/ICOHPLIST.lobster.gz'.format(mpid),
                structure=struct,
                additional_condition=1,
                perc_strength_ICOHP=icohp_cutoff_strength,
                filename_CHARGE='./{}/CHARGE.lobster.gz'.format(mpid),
                valences_from_charges=True,
                adapt_extremum_to_add_cond=True)

        lse_lobs=Lobs_nei.get_light_structure_environment(only_cation_environments=False)

        cor_lobs=[]
        csm_lobs=[]
        for i in lse_lobs.coordination_environments:
            cor_lobs.append(i[0]['ce_symbol'])
            csm_lobs.append(i[0]['csm'])

        df.at[mpid,'Lobs_env']=cor_lobs
        df.at[mpid,'cordination_csm_lobs']=csm_lobs
        
                
        if cor_chem == cor_lobs:
            df.loc[mpid,'global_cordination_res']= 'Agree'
            
        else:
            df.loc[mpid,'global_cordination_res']= 'Disagree'
        
        if max(csm_chem)>2.5 and max(csm_lobs)>2.5:
            df.loc[mpid,'global_cordination_env']= 'Distorted'
        else:
            df.loc[mpid,'global_cordination_env']= 'Not_Distorted'
            
    except (ValueError,TypeError) as e:
        pass
    return df

In [None]:
#run this block only if you want to get pandas dataframe from calculation files
items=mats
with mp.Pool(processes=14,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap(coordination_env, items, chunksize=2),
        total=len(items),
    )  # 'total' is redundant here but can be useful
    # when the size of the iterable is unobvious
    row=[]
    for result in results:
        #print(result)
        row.append(result)
        
df = pd.concat(row)

In [None]:
df = pd.read_pickle('Cooridination_comp_data_bva.pkl') #Load the dataframe with precomputed data needed for the plots

In [None]:
#fil= df.loc[df.global_cordination_env=='Not_Distorted']
fil= df.loc[df.global_cordination_env.notnull()]

In [None]:
agree=0
disagree=0
for rwo, col in fil.iterrows():
    for a, b,in zip(col['Chem_env'],col['Lobs_env']):
        if a==b:# and c<3:# and d<2.5:
            agree+=1
        else:
            disagree+=1

In [None]:
agree/(agree+disagree)

<h3><center>We see 79% environments agree</center></h3>