In [None]:
import os
import pandas as pd
import plotly.graph_objects as go
from matplotlib import pyplot as plt
from pymatgen.io.lobster import Bandoverlaps, Lobsterout
from pymatgen.electronic_structure.core import Spin
from pymatgen.core import Structure
from pymatviz import ptable_heatmap_plotly

In [None]:
os.chdir('Results')

In [None]:
mpids= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids]))
mats.sort() #all mpids stored in python list 

In [None]:
df = pd.DataFrame(index=mats,columns=['Composition','Formula','has_good_quality_maxDeviation','max_deviation',
                                      'percent_kpoints_abv_limit'])

In [None]:
for mpid in mats:
    if os.path.exists(os.path.join(mpid,'bandOverlaps.lobster.gz')):
        band_olp = Bandoverlaps(filename=os.path.join(mpid,'bandOverlaps.lobster.gz'))
        lob_out = Lobsterout(filename=os.path.join(mpid,'lobsterout.gz'))
        struct = Structure.from_file(filename=os.path.join(mpid,'POSCAR.gz'))
        # get total number of k-points
        for line in lob_out.warning_lines:
            if 'k-points could not be orthonormalized' in line:
                total_kpoints = int(line.split(' ')[2])
                
        # store actual number of devations above pymatgen default limit of 0.1
        dev_val=[]
        for dev in band_olp.max_deviation:
            if dev>0.1:
                dev_val.append(dev)
                
        df.at[mpid,'Composition'] = struct.composition.reduced_composition
        df.loc[mpid,'Formula'] = struct.composition.reduced_formula
        df.loc[mpid,'has_good_quality_maxDeviation'] = band_olp.has_good_quality_maxDeviation()
        df.loc[mpid,'max_deviation'] = round(max(band_olp.max_deviation), 4)
        df.loc[mpid,'percent_kpoints_abv_limit'] = round((len(dev_val)/total_kpoints)*100, 4) # calculate percent of k-point

In [None]:
#df.to_pickle('Band_overlaps_data.pkl')

In [None]:
df = pd.read_pickle('Band_overlaps_data.pkl')

In [None]:
notna = df[df.has_good_quality_maxDeviation.notna()]

In [None]:
df_check = notna[notna.has_good_quality_maxDeviation==False]

In [None]:
# Calculate percentage of compounds having 5 % or more k-points above deviation limit of 0.1 
(df_check[df_check.percent_kpoints_abv_limit>5].shape[0] / df.shape[0])*100

### Approximately 7 % compounds have more than 5% kpoints above deviation limit of 0.1