#### Load the necessary modules 

In [None]:
import os
import seaborn as sns
import multiprocessing as mp
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import warnings
from mendeleev import element
from pymatgen.io.vasp.inputs import Poscar
from pymatgen.analysis.ewald import EwaldSummation
from pymatgen.io.lobster.outputs import Charge
from pymatgen.core.structure import Structure
from pymatgen.analysis.bond_valence import BVAnalyzer
from pymatgen.core import Composition
from pymatgen.analysis.chemenv.coordination_environments.chemenv_strategies import (
    SimplestChemenvStrategy, MultiWeightsChemenvStrategy
)
from pymatgen.analysis.chemenv.coordination_environments.coordination_geometry_finder import (
    LocalGeometryFinder,
)
from pymatgen.analysis.chemenv.coordination_environments.structure_environments import (
    LightStructureEnvironments,
)
from pymatgen.io.lobster.lobsterenv import LobsterNeighbors
from pymatviz.histograms import hist_elemental_prevalence
from pymatviz.ptable import ptable_heatmap_plotly
from tqdm import tqdm
from IPython.display import Markdown as md
warnings.filterwarnings("ignore")

In [None]:
parent=os.getcwd()
os.chdir('Results/') #Change to directory containing raw calculation files

In [None]:
mpids= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids]))
mats.sort()

In [None]:
df_bader= pd.read_pickle('./bader_charges.pkl')

In [None]:
mats = list(df_bader.loc[df_bader.Comment_struct=='Matched'].index)
mats.remove('mp-6270')

In [None]:
def comp_cation_anion(mpid,df_bader=df_bader):
    df=pd.DataFrame(index=[mpid], columns=['Formula','Composition','Spacegroup','Bader_oxi','Loew_oxi','Mull_oxi',
                                           'Comp_en_diff','Mull_Bader_elements', 'Loew_Bader_elements',
                                           'Mull_Loew_elements','Bader_structure'])
    
    df['Composition'] = df['Composition'].astype(dtype='object')
    df['Bader_oxi'] = df['Bader_oxi'].astype(dtype='object')
    df['Loew_oxi'] = df['Loew_oxi'].astype(dtype='object')
    df['Mull_oxi'] = df['Mull_oxi'].astype(dtype='object')
    df['Mull_Bader_elements'] = df['Mull_Bader_elements'].astype(dtype='object')
    df['Loew_Bader_elements'] = df['Loew_Bader_elements'].astype(dtype='object')
    df['Mull_Loew_elements'] = df['Mull_Loew_elements'].astype(dtype='object')
    df['Bader_structure'] = df['Bader_structure'].astype(dtype='object')
    #BV = BVAnalyzer()
    
    Lobs_charge = Charge(filename='./{}/CHARGE.lobster.gz'.format(mpid))
    
    struct=Poscar.from_file(filename='./{}/POSCAR.gz'.format(mpid))
    reduced_formula= struct.structure.composition.get_reduced_formula_and_factor()[0]
    df.loc[mpid,'Formula'] = reduced_formula
    df.loc[mpid,'Spacegroup'] = struct.structure.get_space_group_info()[1]
    df.at[mpid,'Composition'] = struct.structure.composition
    df.at[mpid,'Bader_structure'] = df_bader.at[mpid,'aflow_struct']
    
    en={} #store pauling electronegativities in a dict
    for i in struct.structure.composition:
        #en.append(element((i.symbol)).en_pauling)
        en[i.symbol]=element((i.symbol)).en_pauling
    
    differences = {} #stores electronegativtiy differencens between unique atom pairs in composition
    for key1 in en:
        for key2 in en:
            if key1 != key2:
                if key2+'-'+key1 not in differences:
                    diff = abs(en[key1] - en[key2])
                    differences[key1+'-'+key2] = diff
                    
    df.at[mpid,'Comp_en_diff']=differences
    
    
    bader_struct_sorted = df_bader.at[mpid,'aflow_struct'].get_sorted_structure()
    bader_charges=[]
    for oxi in bader_struct_sorted.species:
        bader_charges.append(oxi.oxi_state)

    
    #try:
    Bader_oxi=[]

    for i in bader_charges:
        if i>=0:
            Bader_oxi.append('POS')
        else:
            Bader_oxi.append('NEG')
                
    df.at[mpid,'Bader_oxi'] = bader_charges
        #struct.structure.add_oxidation_state_by_site(BV.get_valences(structure=struct.structure))
    Bader_ews =EwaldSummation(bader_struct_sorted)
    df.loc[mpid,'Madelung_Bader'] = Bader_ews.total_energy

    loew=[]
    for i in Lobs_charge.Loewdin:
        if i>=0:
            loew.append('POS')
        else:
            loew.append('NEG')
        
    df.at[mpid,'Loew_oxi'] = Lobs_charge.Loewdin
    struct.structure.add_oxidation_state_by_site(Lobs_charge.Loewdin)
    Loew_ews =EwaldSummation(struct.structure)
    df.loc[mpid,'Madelung_Loew'] = Loew_ews.total_energy

    Mull=[]
    for i in Lobs_charge.Mulliken:
        if i>=0:
            Mull.append('POS')
        else:
            Mull.append('NEG')
        
    df.at[mpid,'Mull_oxi'] = Lobs_charge.Mulliken
    struct.structure.add_oxidation_state_by_site(Lobs_charge.Mulliken)
    Mull_ews =EwaldSummation(struct.structure)
    df.loc[mpid,'Madelung_Mull'] = Mull_ews.total_energy
        
    if Mull==Bader_oxi:
        df.at[mpid,'Mull_Bader_comp'] = 'Agree'
    else:
        df.at[mpid,'Mull_Bader_comp'] = 'Disagree'
        issues=[]
        for i,(mull,bader) in enumerate(zip(Mull,Bader_oxi)):
            if mull!=bader:
                if struct.structure.sites[i].specie.symbol not in issues:
                    issues.append(struct.structure.sites[i].specie.symbol)
                    
            df.at[mpid,'Mull_Bader_elements'] = Composition(''.join(issues)) #disagree elements
            
    if loew==Bader_oxi:
        df.at[mpid,'Loew_Bader_comp'] = 'Agree'
    else:
        df.at[mpid,'Loew_Bader_comp'] = 'Disagree'
        issues=[]
        for i,(loe,bader) in enumerate(zip(loew,Bader_oxi)):
            if loe!=bader:
                if struct.structure.sites[i].specie.symbol not in issues:
                    issues.append(struct.structure.sites[i].specie.symbol)

        df.at[mpid,'Loew_Bader_elements'] = Composition(''.join(issues)) #disagree elements
        
    if Mull==loew:
        df.at[mpid,'Mull_Loew_comp'] = 'Agree'
    else:
        df.at[mpid,'Mull_Loew_comp'] = 'Disagree'
        issues=[]
        for i,(loe,mull) in enumerate(zip(loew,Mull)):
            if loe!=mull:
                if struct.structure.sites[i].specie.symbol not in issues:
                    issues.append(struct.structure.sites[i].specie.symbol)
        df.at[mpid,'Mull_Loew_elements'] = Composition(''.join(issues)) #disagree elements
    return df

In [None]:
#run this block only if you want to get pandas dataframe from calculation files
items=mats
with mp.Pool(processes=14,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap(comp_cation_anion, items, chunksize=2),
        total=len(items),
    )  # 'total' is redundant here but can be useful
    # when the size of the iterable is unobvious
    row=[]
    for result in results:
        #print(result)
        row.append(result)
        
df = pd.concat(row)

In [None]:
df = pd.read_pickle('./Charge_comp_data_bader.pkl') #Load the dataframe with precomputed data needed for the plots

In [None]:
agree_per = (df.loc[df.Mull_Bader_comp=='Agree'].shape[0] / df.loc[df.Mull_Bader_comp.notna()].shape[0]) * 100

In [None]:
agree_per

<h3><center>We see 91 % agreement with Bader charge analysis method</center></h3>

<h3><center>Get scatter plot with electronegativity difference plot for compounds with disagreement wih Bader method</center></h3>

In [None]:
figmain = go.Figure()


for row, col in df.loc[df.Mull_Bader_comp=='Disagree'][20:50].iterrows(): # change the index slices to get all 3 plots as shown in SI
    for i, (k, v) in enumerate(col['Comp_en_diff'].items()):
        
        figmain.add_trace(go.Scatter(y=[v],
                             x=[row+':'+col['Formula']],
                                     marker=dict(size=10,color='#1878b6'), name = k))
        
        figmain.add_annotation(x=row+':'+col['Formula'], y=v, text=k, showarrow=False,
                               textangle=0, font=dict(size=14, color='black', family='Arial'),
                               xanchor='center', yanchor='bottom')
        
figmain.update_traces(opacity=0.9)
figmain.update_layout(yaxis = dict(tickfont = dict(size=18)))
figmain.update_layout(xaxis = dict(tickfont = dict(size=12)))
figmain.update_layout(template='simple_white')
figmain.update_layout( xaxis_title = 'Compounds', yaxis_title='Electronegativity')
figmain.update_yaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5,tickangle=90)
figmain.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
figmain.update_layout(showlegend=False)
figmain.update_layout(width=1000,height=650)
figmain.show()
#figmain.write_image("/en_diff_scatter_1.pdf",width=1000, height=650)
#figmain.write_html("./en_diff_scatter_1.html",include_mathjax = 'cdn')

<h3><center> Save periodic table heatmap for elements where cation-anion classification disagrees with Bader charge analysis method</center></h3>

In [None]:
fig = ptable_heatmap_plotly(df.loc[df.Mull_Bader_comp=='Disagree'].Mull_Bader_elements)
fig.show()

In [None]:
fig.write_image("./Elements_heatmap_mull_Bader.pdf",width=1000, height=650)

In [None]:
fig = ptable_heatmap_plotly(df.loc[df.Loew_Bader_comp=='Disagree'].Loew_Bader_elements)
fig.show()

In [None]:
fig.write_image("./Elements_heatmap_loew_Bader.pdf",width=1000, height=650)

<h3><center> Get scatter plot to compare Madelung energies for crystal structure from Bader and Mulliken chargers </center></h3>

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(y=df.loc[df.Mull_Bader_comp=='Agree'].Madelung_Bader,
                             x=df.loc[df.Mull_Bader_comp=='Agree'].Madelung_Mull,
hovertext=df.loc[df.Mull_Bader_comp=='Agree'].index+'<br>Composition :'+ df.loc[df.Mull_Bader_comp=='Agree'].Formula,
                                     marker=dict(size=10,color='#1878b6'), name = 'Agree',mode='markers'))

fig.add_trace(go.Scatter(y=df.loc[df.Mull_Bader_comp=='Disagree'].Madelung_Bader,
                             x=df.loc[df.Mull_Bader_comp=='Disagree'].Madelung_Mull,
hovertext=df.loc[df.Mull_Bader_comp=='Disagree'].index+'<br>Composition :'+ df.loc[df.Mull_Bader_comp=='Disagree'].Formula,
                                     marker=dict(size=10,color='#F34E0C'), name = 'Disagree', mode='markers'))

fig.update_traces(marker_size=10)
fig.update_traces(marker_opacity=0.5)
fig.update_layout(yaxis = dict(tickfont = dict(size=22)))
fig.update_layout(xaxis = dict(tickfont = dict(size=22)))
fig.update_layout(yaxis_title="Madelung Bader (eV)", xaxis_title = 'Madelung Mulliken (eV)')
fig.update_yaxes(title_font=dict(size=24), color='black')
fig.update_xaxes(title_font=dict(size=24), color='black')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_layout(template='simple_white')
fig.update_layout(width=900,height=900)
fig.update_xaxes(range=[-780,10])
fig.update_yaxes(range=[-780,10])
fig.update_layout(
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=20,
            color="black"
        ),
    )
)

fig.show()

<h3><center> BVA Bader charge comp </center></h3>

In [None]:
def comp_cation_anion_bva_bader(mpid,df_bader=df_bader):
    df=pd.DataFrame(index=[mpid], columns=['Formula','Composition','Spacegroup','Bader_oxi','BVA_oxi',
                                           'Comp_en_diff','BVA_Bader_elements','Bader_structure'])
    
    df['Composition'] = df['Composition'].astype(dtype='object')
    df['Bader_oxi'] = df['Bader_oxi'].astype(dtype='object')
    df['BVA_oxi'] = df['BVA_oxi'].astype(dtype='object')
    df['BVA_Bader_elements'] = df['BVA_Bader_elements'].astype(dtype='object')
    df['Bader_structure'] = df['Bader_structure'].astype(dtype='object')
    BV = BVAnalyzer()
    
    #Lobs_charge = Charge(filename='./{}/CHARGE.lobster.gz'.format(mpid))
    
    struct=Poscar.from_file(filename='./{}/POSCAR.gz'.format(mpid))
    reduced_formula= struct.structure.composition.get_reduced_formula_and_factor()[0]
    df.loc[mpid,'Formula'] = reduced_formula
    df.loc[mpid,'Spacegroup'] = struct.structure.get_space_group_info()[1]
    df.at[mpid,'Composition'] = struct.structure.composition
    df.at[mpid,'Bader_structure'] = df_bader.at[mpid,'aflow_struct']
    
    en={} #store pauling electronegativities in a dict
    for i in struct.structure.composition:
        #en.append(element((i.symbol)).en_pauling)
        en[i.symbol]=element((i.symbol)).en_pauling
    
    differences = {} #stores electronegativtiy differencens between unique atom pairs in composition
    for key1 in en:
        for key2 in en:
            if key1 != key2:
                if key2+'-'+key1 not in differences:
                    diff = abs(en[key1] - en[key2])
                    differences[key1+'-'+key2] = diff
                    
    df.at[mpid,'Comp_en_diff']=differences
    
    
    bader_struct_sorted = df_bader.at[mpid,'aflow_struct'].get_sorted_structure()
    bader_charges=[]
    for oxi in bader_struct_sorted.species:
        bader_charges.append(oxi.oxi_state)

    
    #try:
    Bader_oxi=[]

    for i in bader_charges:
        if i>=0:
            Bader_oxi.append('POS')
        else:
            Bader_oxi.append('NEG')
                
    df.at[mpid,'Bader_oxi'] = bader_charges
        #struct.structure.add_oxidation_state_by_site(BV.get_valences(structure=struct.structure))
    Bader_ews =EwaldSummation(bader_struct_sorted)
    df.loc[mpid,'Madelung_Bader'] = Bader_ews.total_energy
    try:
        BVA_oxi=[]

        for i in BV.get_valences(structure=struct.structure):
            if i>=0:
                BVA_oxi.append('POS')
            else:
                BVA_oxi.append('NEG')
                
        df.at[mpid,'BVA_oxi'] = BV.get_valences(structure=struct.structure)
        struct.structure.add_oxidation_state_by_site(BV.get_valences(structure=struct.structure))
        BVA_ews =EwaldSummation(struct.structure)
        df.loc[mpid,'Madelung_BVA'] = BVA_ews.total_energy
    
        
        if BVA_oxi==Bader_oxi:
            df.at[mpid,'BVA_Bader_comp'] = 'Agree'
        else:
            df.at[mpid,'BVA_Bader_comp'] = 'Disagree'
            issues=[]
            for i,(bva,bader) in enumerate(zip(BVA_oxi,Bader_oxi)):
                if bva!=bader:
                    if struct.structure.sites[i].specie.symbol not in issues:
                        issues.append(struct.structure.sites[i].specie.symbol)

                df.at[mpid,'BVA_Bader_elements'] = Composition(''.join(issues)) #disagree elements
    except:
        pass
    
    return df

In [None]:
#run this block only if you want to get pandas dataframe from calculation files
items=mats
with mp.Pool(processes=14,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap(comp_cation_anion_bva_bader, items, chunksize=2),
        total=len(items),
    )  # 'total' is redundant here but can be useful
    # when the size of the iterable is unobvious
    row=[]
    for result in results:
        #print(result)
        row.append(result)
        
df_bva_bader = pd.concat(row)

In [None]:
agree_per = (df_bva_bader.loc[df_bva_bader.BVA_Bader_comp=='Agree'].shape[0] /
             df_bva_bader.loc[df_bva_bader.BVA_Bader_comp.notna()].shape[0]) * 100

In [None]:
agree_per

<h3><center>We see 95 % agreement between BVA and Bader charge analysis method</center></h3>

In [None]:
fig = ptable_heatmap_plotly(df_bva_bader.loc[df_bva_bader.BVA_Bader_comp=='Disagree'].BVA_Bader_elements)
fig.show()

In [None]:
fig.write_image("./Elements_heatmap_BVA_Bader.pdf",width=1000, height=650)