In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
import matplotlib.patches as mpl_patches
import plotly.graph_objects as go
from matplotlib import pyplot as plt
from pymatgen.io.lobster import Lobsterout
from pymatgen.core.structure import Structure
from mendeleev import element
sns.set_style("white")
sns.set_context("talk")
sns.set_palette(["#0CB1F3","#F34E0C"])
warnings.filterwarnings('ignore')

In [None]:
parent=os.getcwd()

#### Change to directory containing raw calculation files 

In [None]:
os.chdir('Results/') # Path to the directory where Unprocessed data (8 tar) files downloaded from zenodo are extracted

In [None]:
mpids_lob= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids_lob]))
mats.sort()

In [None]:
df=pd.DataFrame(index=mats, columns=['Formula','Spacegroup','Composition'])

df['Composition'] = df['Composition'].astype(dtype='object')

In [None]:
for mpid in mats: #run this block to get data from calc files
    
    struct=Structure.from_file('./{}/POSCAR.gz'.format(mpid))
    
    out = Lobsterout('{}/lobster.out.gz'.format(mpid))
    reduced_formula= struct.composition.get_reduced_formula_and_factor()[0]
    
    df.loc[mpid,'Formula']=reduced_formula
    df.loc[mpid,'Spacegroup'] = struct.get_space_group_info()[1]
    df.at[mpid,'Composition'] = struct.composition
    df.loc[mpid,'abs_charge_spilling']=(sum(out.charge_spilling)/2)*100
    df.loc[mpid,'abs_totat_spilling']=(sum(out.total_spilling)/2)*100
   

In [None]:
#df.to_pickle('../Charge_spilling_data.pkl')

In [2]:
df = pd.read_pickle('Charge_spilling_data.pkl')

#### Get histogram for abs charge spilling < 5 %

In [None]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=df.loc[df['abs_charge_spilling']<5].abs_charge_spilling, nbinsx=56,
))

fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
fig.update_layout( xaxis_title = 'Abs. Charge spilling %', yaxis_title='Number of compounds')
fig.update_yaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_layout(template='simple_white')
fig.update_layout(width=1000,height=650)
fig.show()
#figure="abs_charge_spilling.pdf"
#fig.write_image("Phonon_dataset_LSO/LOBSTER_plots/{}".format(figure),format='pdf',width=1000, height=650)

#### Get histogram for abs charge spilling > 5 %

In [None]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=df.loc[df['abs_charge_spilling']>5].abs_charge_spilling, nbinsx=56,
))

fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
fig.update_layout( xaxis_title = 'Abs. Charge spilling %', yaxis_title='Number of compounds')
fig.update_yaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_layout(template='simple_white')
fig.update_layout(width=1000,height=650)
fig.show()
#figure="abs_charge_spilling.pdf"
#fig.write_image("Phonon_dataset_LSO/LOBSTER_plots/{}".format(figure),format='pdf',width=1000, height=650)