In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
import matplotlib.patches as mpl_patches
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from matplotlib import pyplot as plt
from pymatgen.io.lobster import Lobsterout , Icohplist
from pymatgen.core.structure import Structure
from mendeleev import element
sns.set_style("white")
sns.set_context("talk")
sns.set_palette(["#0CB1F3","#F34E0C"])
pio.templates.default = "plotly_white"
warnings.filterwarnings('ignore')

In [None]:
parent=os.getcwd()

### Following few code blocks could be run to extract data directly from calculation files 

#### Change to directory containing raw calculation files (Will be provided at time of final publication)

In [None]:
# Each folder is named as mp-id
os.chdir('Results/')

In [None]:
# get the list of calc directories with smallest basis
mpids_lob= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids_lob]))
mats.sort()

In [None]:
# initialize pandas dataframe to store the results
df=pd.DataFrame(index=mats, columns=['Formula','Spacegroup','Composition'])
df['Composition'] = df['Composition'].astype(dtype='object')

In [None]:
# loop through all directories to extract strongest bonds (icohp/icoop/icobi), 
# corresponding bond lenghts, atom-pair labels, charge spillings 
for mpid in mats:
    
    struct=Structure.from_file('./{}/POSCAR.gz'.format(mpid))
    
    out = Lobsterout('{}/lobster.out.gz'.format(mpid))
    icohplist=Icohplist(filename='{}/ICOHPLIST.lobster.gz'.format(mpid))
    icobilist=Icohplist(filename='{}/ICOBILIST.lobster.gz'.format(mpid), are_cobis=True)
    icooplist=Icohplist(filename='{}/ICOOPLIST.lobster.gz'.format(mpid), are_coops=True)
    
    icobi_dict=icobilist.icohpcollection.as_dict()
    icoop_dict=icooplist.icohpcollection.as_dict()
    icohp_dict=icohplist.icohpcollection.as_dict()
    
    #cobi 
    icobi=[]
    for i in icobi_dict['list_icohp']:
        icobi.append(sum(i.values()))
    max_icobi= max(icobi)
    
    labels_icobi=[]
    en_icobi=[]
    for a,b, in zip(icobi_dict['list_atom1'],icobi_dict['list_atom2']):
        labels_icobi.append(a.rstrip('0123456789')+'-'+b.rstrip('0123456789'))
        
    
    bl_icobi=[]
    for indx, v in enumerate(icobi):
        if v == max(icobi):
            bl_icobi.append(icobi_dict['list_length'][indx])
            label_icobi=labels_icobi[indx]
            en_icobi.append(element(label_icobi.split('-')[0]).en_pauling)
            en_icobi.append(element(label_icobi.split('-')[1]).en_pauling)
    
    
    #cohp        
    icohp=[]
    for i in icohp_dict['list_icohp']:
        icohp.append(sum(i.values()))
    max_icohp= min(icohp)
    
    labels_icohp=[]
    en_icohp=[]
    for a,b, in zip(icohp_dict['list_atom1'],icohp_dict['list_atom2']):
        labels_icohp.append(a.rstrip('0123456789')+'-'+b.rstrip('0123456789'))
    
    bl_icohp=[]
    for indx, v in enumerate(icohp):
        if v == min(icohp):
            bl_icohp.append(icohp_dict['list_length'][indx])
            label_icohp=labels_icohp[indx]
            en_icohp.append(element(label_icohp.split('-')[0]).en_pauling)
            en_icohp.append(element(label_icohp.split('-')[1]).en_pauling)
    
    #coop
    icoop=[]
    for i in icoop_dict['list_icohp']:
        icoop.append(sum(i.values()))
    max_icoop= max(icoop)
    
    labels_icoop=[]
    en_icoop=[]
    for a,b, in zip(icoop_dict['list_atom1'],icoop_dict['list_atom2']):
        labels_icoop.append(a.rstrip('0123456789')+'-'+b.rstrip('0123456789'))
    
    bl_icoop=[]
    
    for indx, v in enumerate(icoop):
        if v == max(icoop):
            bl_icoop.append(icoop_dict['list_length'][indx])
            label_icoop=labels_icoop[indx]
            en_icoop.append(element(label_icoop.split('-')[0]).en_pauling)
            en_icoop.append(element(label_icoop.split('-')[1]).en_pauling)
    
    reduced_formula= struct.composition.get_reduced_formula_and_factor()[0]
    
    df.loc[mpid,'Formula']=reduced_formula
    df.loc[mpid,'Spacegroup'] = struct.get_space_group_info()[1]
    df.at[mpid,'Composition'] = struct.composition
    df.loc[mpid,'abs_charge_spilling']=(sum(out.charge_spilling)/2)*100
    df.loc[mpid,'abs_totat_spilling']=(sum(out.total_spilling)/2)*100
    
    df.loc[mpid,'ICOHP_max']=max_icohp
    df.loc[mpid,'ICOHP_bond_length']=min(bl_icohp)
    df.loc[mpid,'ICOHP_bond_label']=label_icohp
    df.loc[mpid,'ICOHP_en_diff']=abs(np.diff(en_icohp)[0])
    
    df.loc[mpid,'ICOOP_max']=max_icoop
    df.loc[mpid,'ICOOP_bond_length']=min(bl_icoop)
    df.loc[mpid,'ICOOP_bond_label']=label_icoop
    df.loc[mpid,'ICOOP_en_diff']=abs(np.diff(en_icoop)[0])
    
    df.loc[mpid,'ICOBI_max']=max_icobi
    df.loc[mpid,'ICOBI_bond_length']=min(bl_icobi)
    df.loc[mpid,'ICOBI_bond_label']=label_icobi
    df.loc[mpid,'ICOBI_en_diff']=abs(np.diff(en_icobi)[0])

In [None]:
# save the dataframe
#df.to_pickle('../Lobster_dataoverview.pkl')

## Start from here to qucikly reproduce the plots from publication by loading presaved datafile

In [None]:
df = pd.read_pickle('Lobster_dataoverview.pkl')
df.reset_index(inplace=True)

### Reproduce Fig. 5 (a)

In [None]:
scatter = go.Scatter(
    x=df["ICOHP_bond_length"],
    y=df["ICOHP_max"],
    mode='markers',
    marker=dict(
        size=10,color='#1878b6'),
    hovertext = 'mp-id:'+ df['index']+ '<br>Bond label: ' +df['ICOHP_bond_label'] + '<br>Composition: ' + df['Formula']
)

fig = go.Figure(scatter)
fig.update_layout(xaxis_type='log')
fig.update_traces(marker_opacity=0.5)
fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
fig.update_yaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(title_font=dict(size=22), color='black')
fig.update_layout(width=1000, height =1000)
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)
fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)

fig.update_layout(yaxis_title="ICOHP (eV)", xaxis_title = '$\\textrm{Bond length } ({\r{A}})$')
fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
#fig.update_xaxes(tickmode='linear', tick0=0.0, dtick=1.0)
fig.show()
#figure="ICOHP_vs_Bond_length.html"
#fig.write_html("Phonon_dataset_LSO/LOBSTER_plots/{}".format(figure),include_mathjax = 'cdn')
#figure="ICOHP_vs_Bond_length.pdf"
#fig.write_image("Phonon_dataset_LSO/LOBSTER_plots/{}".format(figure),width=1000, height=1000)

### Reproduce Fig 5 (b) 

In [None]:
scatter = go.Scatter(
    x=df["ICOBI_max"],
    y=df["ICOHP_max"],
    mode='markers',
    marker=dict(
        size=10,
        color=df['ICOHP_en_diff'],
        colorscale='AgSunSet',
        colorbar=dict(title='',tickfont=dict(size=18))
    ),
    hovertext = 'mp-id:'+ df['index']+ '<br>Bond label: ' +df['ICOHP_bond_label'] + '<br>Composition: ' + df['Formula']
)

fig = go.Figure(scatter)
fig.update_layout(xaxis_type='log')
fig.update_traces(marker_opacity=0.5)
fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
fig.update_yaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(title_font=dict(size=22), color='black')
fig.update_layout(width=1000, height =1000)
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)
fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)

fig.update_layout(yaxis_title="ICOHP (eV)", xaxis_title = 'ICOBI')
fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
#fig.update_xaxes(tickmode='linear', tick0=0.0, dtick=1.0)
fig.show()
#figure="ICOHP_vs_ICOBI.html"
#fig.write_html("Phonon_dataset_LSO/LOBSTER_plots/{}".format(figure),include_mathjax = 'cdn')
#figure="ICOHP_vs_ICOBI.pdf"
#fig.write_image("Phonon_dataset_LSO/LOBSTER_plots/{}".format(figure),width=1000, height=1000)

### Reproduce Fig 6 (a)

In [None]:
#load the dataframe with last phdos peak values
df_phonon = pd.read_pickle('dataforml_automatminer.pkl')
df.set_index('index', inplace=True)

In [None]:
# add the last phdos peak values to lobster dataframe
for row, col in df_phonon.iterrows():
    df.loc[row, 'last_phdos_peak'] = df_phonon.loc[row,'last phdos peak']

In [None]:
# exclude the datapoints for which last phdos peak values are not available in matbench dataset 
filtered_df = df.loc[df.last_phdos_peak.notna()]

In [None]:
scatter = go.Scatter(
    x=filtered_df["ICOHP_max"],
    y=filtered_df["last_phdos_peak"],
    mode='markers',
    marker=dict(
        size=10,color='#1878b6',
    ),
    hovertext = 'mp-id: '+ filtered_df.index + '<br>Composition: ' + filtered_df['Formula']
)

fig = go.Figure(scatter)
#fig.update_layout(xaxis_type='log')
fig.update_traces(marker_opacity=0.5)
fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
fig.update_yaxes(title_font=dict(size=22), color='black')
fig.update_xaxes(title_font=dict(size=22), color='black')
fig.update_layout(width=1000, height =800)
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)
fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)

fig.update_layout(xaxis_title="ICOHP (eV)", 
                  yaxis_title = '$\\textrm{Highest phonon freqency }(\\omega \\text{: cm}^{⁻1})$')
fig.update_layout(yaxis = dict(tickfont = dict(size=18)))
fig.update_layout(xaxis = dict(tickfont = dict(size=18)))
fig.show()

#figure="ICOHP_vs_last_phdos_peak.html"
#fig.write_html("LOBSTER_plots/{}".format(figure),include_mathjax = 'cdn')
#figure="ICOHP_vs_last_phdos_peak.pdf"
#fig.write_image("LOBSTER_plots/{}".format(figure),width=1000, height=800)