#### Load the necessary modules 

In [None]:
import os
import seaborn as sns
import multiprocessing as mp
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import warnings
from mendeleev import element
from pymatgen.io.vasp.inputs import Poscar
from pymatgen.analysis.ewald import EwaldSummation
from pymatgen.io.lobster.outputs import Charge
from pymatgen.core.structure import Structure
from pymatgen.analysis.bond_valence import BVAnalyzer
from pymatgen.core import Composition
from pymatgen.analysis.chemenv.coordination_environments.chemenv_strategies import (
    SimplestChemenvStrategy, MultiWeightsChemenvStrategy
)
from pymatgen.analysis.chemenv.coordination_environments.coordination_geometry_finder import (
    LocalGeometryFinder,
)
from pymatgen.analysis.chemenv.coordination_environments.structure_environments import (
    LightStructureEnvironments,
)
from pymatgen.io.lobster.lobsterenv import LobsterNeighbors
from pymatviz.histograms import hist_elemental_prevalence
from pymatviz.ptable import ptable_heatmap_plotly
from tqdm import tqdm
from IPython.display import Markdown as md
warnings.filterwarnings("ignore")

## Processing of the raw data (optional)

In [None]:
parent=os.getcwd()
os.chdir('Results/') #Change to directory containing raw calculation files

In [None]:
mpids= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids]))
mats.sort()

#### Call the function below to reproduce the pandas dataframe with necessary data for the plots from raw calculation files (optional)

In [None]:
def comp_cation_anion(mpid):
    df=pd.DataFrame(index=[mpid], columns=['Formula','Composition','Spacegroup','BVA_oxi','Loew_oxi','Mull_oxi',
                                           'Comp_en_diff','Mull_BVA_elements', 'Loew_BVA_elements',
                                           'Mull_Loew_elements'])
    
    df['Composition'] = df['Composition'].astype(dtype='object')
    df['BVA_oxi'] = df['BVA_oxi'].astype(dtype='object')
    df['Loew_oxi'] = df['Loew_oxi'].astype(dtype='object')
    df['Mull_oxi'] = df['Mull_oxi'].astype(dtype='object')
    df['Mull_BVA_elements'] = df['Mull_BVA_elements'].astype(dtype='object')
    df['Loew_BVA_elements'] = df['Loew_BVA_elements'].astype(dtype='object')
    df['Mull_Loew_elements'] = df['Mull_Loew_elements'].astype(dtype='object')
    BV = BVAnalyzer()
    
    Lobs_charge = Charge(filename='./{}/CHARGE.lobster.gz'.format(mpid))
    
    struct=Poscar.from_file(filename='./{}/POSCAR.gz'.format(mpid))
    
    reduced_formula= struct.structure.composition.get_reduced_formula_and_factor()[0]
    df.loc[mpid,'Formula'] = reduced_formula
    df.loc[mpid,'Spacegroup'] = struct.structure.get_space_group_info()[1]
    df.at[mpid,'Composition'] = struct.structure.composition
    
    en={} #store pauling electronegativities in a dict
    for i in struct.structure.composition:
        #en.append(element((i.symbol)).en_pauling)
        en[i.symbol]=element((i.symbol)).en_pauling
    
    differences = {} #stores electronegativtiy differencens between unique atom pairs in composition
    for key1 in en:
        for key2 in en:
            if key1 != key2:
                if key2+'-'+key1 not in differences:
                    diff = abs(en[key1] - en[key2])
                    differences[key1+'-'+key2] = diff
                    
    df.at[mpid,'Comp_en_diff']=differences
    
    try:
        BVA_oxi=[]

        for i in BV.get_valences(structure=struct.structure):
            if i>=0:
                BVA_oxi.append('POS')
            else:
                BVA_oxi.append('NEG')
                
        df.at[mpid,'BVA_oxi'] = BV.get_valences(structure=struct.structure)
        struct.structure.add_oxidation_state_by_site(BV.get_valences(structure=struct.structure))
        BVA_ews =EwaldSummation(struct.structure)
        df.loc[mpid,'Madelung_BVA'] = BVA_ews.total_energy

        loew=[]
        for i in Lobs_charge.Loewdin:
            if i>=0:
                loew.append('POS')
            else:
                loew.append('NEG')
        
        df.at[mpid,'Loew_oxi'] = Lobs_charge.Loewdin
        struct.structure.add_oxidation_state_by_site(Lobs_charge.Loewdin)
        BVA_ews =EwaldSummation(struct.structure)
        df.loc[mpid,'Madelung_Loew'] = BVA_ews.total_energy

        Mull=[]
        for i in Lobs_charge.Mulliken:
            if i>=0:
                Mull.append('POS')
            else:
                Mull.append('NEG')
        
        df.at[mpid,'Mull_oxi'] = Lobs_charge.Mulliken
        struct.structure.add_oxidation_state_by_site(Lobs_charge.Mulliken)
        BVA_ews =EwaldSummation(struct.structure)
        df.loc[mpid,'Madelung_Mull'] = BVA_ews.total_energy
        
        if Mull==BVA_oxi:
            df.at[mpid,'Mull_BVA_comp'] = 'Agree'
        else:
            df.at[mpid,'Mull_BVA_comp'] = 'Disagree'
            issues=[]
            for i,(mull,bva) in enumerate(zip(Mull,BVA_oxi)):
                if mull!=bva:
                    if struct.structure.sites[i].specie.symbol not in issues:
                        issues.append(struct.structure.sites[i].specie.symbol)
                    
            df.at[mpid,'Mull_BVA_elements'] = Composition(''.join(issues)) #disagree elements
        if loew==BVA_oxi:
            df.at[mpid,'Loew_BVA_comp'] = 'Agree'
        else:
            df.at[mpid,'Loew_BVA_comp'] = 'Disagree'
            issues=[]
            for i,(loe,bva) in enumerate(zip(loew,BVA_oxi)):
                if loe!=bva:
                    if struct.structure.sites[i].specie.symbol not in issues:
                        issues.append(struct.structure.sites[i].specie.symbol)
                   
            df.at[mpid,'Loew_BVA_elements'] = Composition(''.join(issues)) #disagree elements
        
        if Mull==loew:
            df.at[mpid,'Mull_Loew_comp'] = 'Agree'
        else:
            df.at[mpid,'Mull_Loew_comp'] = 'Disagree'
            issues=[]
            for i,(loe,mull) in enumerate(zip(loew,Mull)):
                if loe!=mull:
                    if struct.structure.sites[i].specie.symbol not in issues:
                        issues.append(struct.structure.sites[i].specie.symbol)
            df.at[mpid,'Mull_Loew_elements'] = Composition(''.join(issues)) #disagree elements
    except:
        pass
    return df

In [None]:
#run this block only if you want to get pandas dataframe from calculation files
items=mats 
with mp.Pool(processes=14,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap(comp_cation_anion, items, chunksize=2),
        total=len(items),
    )  # 'total' is redundant here but can be useful
    # when the size of the iterable is unobvious
    row=[]
    for result in results:
        #print(result)
        row.append(result)
        
df = pd.concat(row)

In [None]:
## Recreate analysis from pkl files

In [None]:
df = pd.read_pickle('./Charge_comp_data.pkl') #Load the dataframe with precomputed data needed for the plots

In [None]:
agree_per = (df.loc[df.Mull_BVA_comp=='Agree'].shape[0] / df.loc[df.Mull_BVA_comp.notna()].shape[0]) * 100

In [None]:
agree_per

<h3><center>We see 96 % agreement with BVA method</center></h3>

<h3><center>Get scatter plot with electronegativity difference plot for compounds with disagreement wih BVA method</center></h3>

In [None]:
figmain = go.Figure()


for row, col in df.loc[df.Mull_BVA_comp=='Disagree'][:20].iterrows(): # change the index slices to get all 3 plots as shown in SI
    for i, (k, v) in enumerate(col['Comp_en_diff'].items()):
        
        figmain.add_trace(go.Scatter(y=[v],
                             x=[row+':'+col['Formula']],
                                     marker=dict(size=10,color='#1878b6'), name = k))
        
        figmain.add_annotation(x=row+':'+col['Formula'], y=v, text=k, showarrow=False,
                               textangle=0, font=dict(size=14, color='black', family='Arial'),
                               xanchor='center', yanchor='bottom')
        
figmain.update_traces(opacity=0.9)
figmain.update_layout(yaxis = dict(tickfont = dict(size=18)))
figmain.update_layout(xaxis = dict(tickfont = dict(size=12)))
figmain.update_layout(template='simple_white')
figmain.update_layout( xaxis_title = 'Compounds', yaxis_title='Electronegativity')
figmain.update_yaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5,tickangle=90)
figmain.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
figmain.update_layout(showlegend=False)
figmain.update_layout(width=1000,height=650)
figmain.show()
figmain.write_image("/en_diff_scatter_1.pdf",width=1000, height=650)
figmain.write_html("./en_diff_scatter_1.html",include_mathjax = 'cdn')

<h3><center> Save periodic table heatmap for elements where cation-anion classification disagrees with BVA method</center></h3>

In [None]:
fig = ptable_heatmap_plotly(df.loc[df.Mull_BVA_comp=='Disagree'].Mull_BVA_elements)
fig.show()

In [None]:
fig.write_image("./Elements_heatmap_mull_BVA.pdf",width=1000, height=650)