## Load the necessary modules 

In [None]:
import os
import re
import ast
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import multiprocessing as mp
import matplotlib.patches as mpl_patches
import plotly.graph_objects as go
from tqdm import tqdm
from matplotlib import pyplot as plt
from pymatgen.io.lobster import Doscar
from pymatgen.io.lobster import Lobsterout, Lobsterin
from pymatgen.io.vasp.outputs import Vasprun
from pymatgen.io.vasp.outputs import Outcar
from pymatgen.electronic_structure.core import Orbital, OrbitalType, Spin
from collections import namedtuple
from pymatgen.electronic_structure.plotter import DosPlotter 
from pymatgen.electronic_structure.dos import Dos
from pymatgen.electronic_structure.dos import CompleteDos
from pymatgen.core.periodic_table import Element
from sklearn import preprocessing
from scipy.integrate import trapezoid
from sklearn.linear_model import LinearRegression
sns.set_style("ticks")
sns.set_context("talk")
sns.set_palette(["#0CB1F3","#F34E0C"])
warnings.filterwarnings("ignore")

In [None]:
parent=os.getcwd()

In [None]:
def get_non_zero_ranges(densities:list, energies:list, minimum_gap_size=0.1):
        """
        Function to get energy ranges where densities are non zero
        
        Args:
            densities: densities
            energies: energies
            minimum_gap_size: gap size to consider to seperate index ranges of non-zero density regions
        Returns:
            density_inds, relevant_energy_ranges:
                Returns index ranges of non-zero densities and energies corresponding to these indexes
        """
        relevant_energy_ranges=[]
        density_inds=[]
        indices=np.asarray(densities).nonzero()


        start_energy=energies[indices[0][0]]
        den_start_ind=indices[0][0]
        den_run_ind=indices[0][0]
        run_energy=energies[indices[0][0]]
        
        for ind in indices[0]:
            
                #start_ind=ind
                if energies[ind]-run_energy>minimum_gap_size:
                    density_inds.append([den_start_ind,den_run_ind])
                    relevant_energy_ranges.append([start_energy,run_energy])
                    start_energy=energies[ind]
                    den_start_ind=ind
                    
                        
                run_energy=energies[ind]
                den_run_ind=ind
                
        relevant_energy_ranges.append([start_energy,energies[indices[0][-1]]])
        density_inds.append([den_start_ind,indices[0][-1]])
        
        for item in relevant_energy_ranges:
            if np.diff(item)==0:
                relevant_energy_ranges.remove(item)
        
        return density_inds,relevant_energy_ranges

def legend_without_duplicate_labels(ax):
    """
    
    Function to handle duplicate labels in matplotlib.ax
    
    Args:
        ax: matplotlib.ax
    Returns:
       modified ax.legend consisting of unique labels
    """
    
    handles, labels = ax.get_legend_handles_labels()
    unique = [(h, l) for i, (h, l) in enumerate(zip(handles, labels)) if l not in labels[:i]]
    ax.legend(*zip(*unique))

In [None]:
os.chdir('Results/') #Change to directory containing all Calc folder named according to mpids

In [None]:
mpids= [f for f in os.listdir() if not f.startswith('t') and not f.startswith('.') and not f.startswith('__')
            and os.path.isdir(f)]
mats= list(set([ids.split('_')[0] for ids in mpids]))
mats.sort() #all mpids stored in python list 

#### Call the functions below to reproduce the pandas dataframe with necessary data for the plots from raw calculation files (optional)

####  `Note`: These functions are written in such a way that it can process one material at a time. This is done to make use of parallel processing and reduce time to gather data needed.

In [None]:
def get_df_lso(mpid, path_to_save_plots='../LSO/'):
    """
    
    Function to get pdos band features, fingerprints data and corresponding plots (LOBSTER LSO DOS and VASP)
    
    Args:
        mpid: name of directory containing LOBSTER and VASP calculation files
    Returns:
       pandas dataframe with all necessary plot data
    """
    try:
        os.makedirs(os.path.join(path_to_save_plots,"Summed_fp"))
        os.makedirs(os.path.join(path_to_save_plots,'spd_fp'))
        os.makedirs(os.path.join(path_to_save_plots,'All_plots'))
    except FileExistsError:
        pass
        
    df=pd.DataFrame(index=[mpid], columns=['Composition'])
    df['VASP_orb'] = ''
    df['LOBS_orb'] = ''
    df['LOBSTER_basis'] = ''
    df['VASP_orb'] = df['VASP_orb'].astype(dtype='object')
    df['LOBS_orb'] = df['LOBS_orb'].astype(dtype='object')
    df['LOBSTER_basis'] = df['LOBSTER_basis'].astype(dtype='object')
    
    doscar_lobster=Doscar(doscar="{}/DOSCAR.LSO.lobster.gz".format(mpid),
        structure_file= "{}/POSCAR.gz".format(mpid),
        dftprogram ="Vasp")

    dos_lobster=doscar_lobster.completedos

    vasprun=Vasprun("{}/vasprun.xml.gz".format(mpid))
    Sys_elec= round(Outcar('{}/OUTCAR.gz'.format(mpid)).nelect,2)
    Lobout = Lobsterout('{}/lobsterout.gz'.format(mpid))

    dos_vasp = vasprun.complete_dos

    spd_dos_lobster=dos_lobster.get_spd_dos()

    spd_dos_vasp=dos_vasp.get_spd_dos()
    
    
    pdos_lobster_total={}
    orb_lobs=[]
    for k,v in spd_dos_lobster.items():
        erl=spd_dos_lobster[k].energies
        plob = spd_dos_lobster[k].get_densities()

        energy_range_lobster_tot=[]
        pdos_lobster_total.update({k.name:[]})
        orb_lobs.append(k.name)
    
        for i1,en1 in enumerate(erl):
            if en1<=0:
                energy_range_lobster_tot.append(erl[i1])
                pdos_lobster_total[k.name].append(plob[i1])
    
    sum_dos_array=[]
    for dos in pdos_lobster_total.values():
        sum_dos_array.append(np.array(dos))
    
    sum_dos_lobs = np.sum(sum_dos_array,axis=0)

    
    pdos_vasp_total={}
    orb_vasp=[]
    for k,v in spd_dos_vasp.items():
        erv=spd_dos_vasp[k].energies-spd_dos_vasp[k].efermi
        pvasp=spd_dos_vasp[k].get_densities()

        energy_range_vasp_tot=[]
        pdos_vasp_total.update({k.name:[]})
        orb_vasp.append(k.name)

        for i2,en2 in enumerate(erv):
            if en2<=0:
                energy_range_vasp_tot.append(erv[i2])
                pdos_vasp_total[k.name].append(pvasp[i2])
                
    
    sum_dos_array=[]
    for dos in pdos_vasp_total.values():
        sum_dos_array.append(np.array(dos))            
    
    sum_dos_vasp = np.sum(sum_dos_array,axis=0)
    
    nelec_lobs = round(trapezoid(sum_dos_lobs, energy_range_lobster_tot),3) #calculate total electrons PDOS LOBSTER
    nelec_vasp = round(trapezoid(sum_dos_vasp, energy_range_vasp_tot),3) #calculate total electrons PDOS VASP
    
    reduced_formula= vasprun.complete_dos.structure.composition.get_reduced_formula_and_factor()[0]
    
    basis_sets={}
    for i, ele in enumerate(Lobout.elements):
        basis_sets.update({ele:Lobout.basis_functions[i]})
        
    #----
    pdos_lobster={}
    orb_lobs=[]
    for k,v in spd_dos_lobster.items():

        erl=spd_dos_lobster[k].energies
        plob = spd_dos_lobster[k].get_densities()

        energy_range_lobster=[]
        pdos_lobster.update({k.name:[]})
        orb_lobs.append(k.name)
    
        for i1,en1 in enumerate(erl):
            if en1<=0 and en1>=-15:
                energy_range_lobster.append(erl[i1])
                pdos_lobster[k.name].append(plob[i1])
                
    
    sum_dos_array=[]
    for dos in pdos_lobster.values():
        sum_dos_array.append(np.array(dos))
    
    sum_dos_lobs = np.sum(sum_dos_array,axis=0)
    
    pdos_vasp={}
    orb_vasp=[]
    for k,v in spd_dos_vasp.items():
        erv=spd_dos_vasp[k].energies-spd_dos_vasp[k].efermi
        pvasp=spd_dos_vasp[k].get_densities()

        energy_range_vasp=[]
        pdos_vasp.update({k.name:[]})
        orb_vasp.append(k.name)

        for i2,en2 in enumerate(erv):
            if en2<=0 and en2>=-15:
                energy_range_vasp.append(erv[i2])
                pdos_vasp[k.name].append(pvasp[i2])
                
    
    sum_dos_array=[]
    for dos in pdos_vasp.values():
        sum_dos_array.append(np.array(dos))            
    
    sum_dos_vasp = np.sum(sum_dos_array,axis=0)
    
    #----summed fp start

    fp_lobster_og=dos_lobster.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize = True, type='summed_pdos')
    
    fp_vasp_og= dos_vasp.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize = True, type='summed_pdos')
    
    
    tani_og=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,tanimoto=True),4)
    norm_og=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,normalize=True),4)
    
    f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(15, 10))
    
    sns.histplot(ax=a0,x=fp_vasp_og.energies[0],weights =fp_vasp_og.densities*fp_vasp_og.bin_width*100,
                 bins=256,label='VASP',
                stat='count',alpha=0.9)
    sns.histplot(ax=a0,x=fp_lobster_og.energies[0],weights =fp_lobster_og.densities*fp_lobster_og.bin_width*100,
                 bins=256,label='LOBSTER',
                stat='count',alpha=0.6)
    
    a0.set_xlabel('Energies', fontsize=16);
    a0.set_ylabel('Percent', fontsize=16);
    legend_without_duplicate_labels(a0)
    a0.tick_params(direction="in", length=6, width=1, colors='k')
    a0.set_title('Summed projected Density of States Fingerprint', fontsize=20)
    a0.set_xlim(-15,0)
    
    
    a1.annotate(
    'LOBSTER-VASP\n\n\nNELECT LOBS: {}\nNELECT VASP: {}\nTanimoto_simi: {}\nNorm_simi: {}\nSYS_ELECT: {}'.format(nelec_lobs,nelec_vasp,
                    tani_og,norm_og,Sys_elec),
                     xy=(0.50,0.50),xycoords='axes fraction',fontsize=14,fontweight='medium',
                     horizontalalignment='center',
                    bbox=dict(boxstyle="round,pad=0.3", fc="none", ec="lightgrey", lw=2))
    a1.set_title('Features', fontsize=20)
    a1.set_xticks([])
    a1.set_yticks([])
    a1.set_yticklabels([])
    a1.set_xticklabels([])
    f.subplots_adjust(wspace=0.05, hspace=None)
    a1.set_axis_off()
    f.savefig('{}/Summed_fp/{}.pdf'.format(path_to_save_plots,mpid),dpi=600, bbox_inches='tight')
    plt.close()
    
    #----summed fp end
        
    df.loc[mpid,'Composition'] = reduced_formula
    df.at[mpid,'LOBSTER_basis'] = basis_sets
    df.at[mpid,'VASP_orb'] = orb_vasp
    df.at[mpid,'LOBS_orb'] = orb_lobs
    df.loc[mpid,'NELEC_LOBS'] = nelec_lobs
    df.loc[mpid,'NELEC_VASP'] = nelec_vasp
    df.loc[mpid,'NELEC_SYS'] = Sys_elec
    df.loc[mpid,'Tanimoto_similarity'] = tani_og
    df.loc[mpid,'Norm_similarity'] = norm_og
    
    #-----spd tanimoto fp start
    for orb,dos in pdos_lobster.items():
        fp_lobster_og=dos_lobster.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize= True, type=orb)
    
        fp_vasp_og=dos_vasp.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize = True, type=orb)


        tani=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,tanimoto=True),4)
        norm=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,normalize=True),4)
        
        orb_contri_lobs_total=round((trapezoid(pdos_lobster_total[orb],energy_range_lobster_tot)/nelec_lobs)*100,3)
        orb_contri_vasp_total=round((trapezoid(pdos_vasp_total[orb],energy_range_vasp_tot)/nelec_vasp)*100,3)
        
        try:
            lobs_elec=round((trapezoid(sum_dos_lobs,energy_range_lobster)))
        except ValueError:
            lobs_elec= np.nan
        try:
            vasp_elec=round((trapezoid(sum_dos_vasp,energy_range_vasp)))
        except ValueError:
            vasp_elec= np.nan
        orb_contri_lobs=round((trapezoid(pdos_lobster[orb],energy_range_lobster)/lobs_elec)*100,3)
        orb_contri_vasp=round((trapezoid(pdos_vasp[orb],energy_range_vasp)/vasp_elec)*100,3) 
        

        f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(15, 10))

        sns.histplot(ax=a0,x=fp_vasp_og.energies[0],weights =fp_vasp_og.densities*fp_vasp_og.bin_width*100,
                     bins=256,
                     label='{} VASP'.format(orb),
                    stat='count',alpha=0.9)#,binwidth=.3 ,color='#eb5c3c',alpha=0.5
        sns.histplot(ax=a0,x=fp_lobster_og.energies[0],weights =fp_lobster_og.densities*fp_lobster_og.bin_width*100,
                     bins=256,
                     label='{} LOBSTER'.format(orb),
                    stat='count',alpha=0.6) #binwidth=.3 ,color='#16b19f' ,alpha=0.5

        a0.set_xlabel('Energies', fontsize=16);
        a0.set_ylabel('Percent', fontsize=16);
        a0.tick_params(direction="in", length=6, width=1, colors='k')
        legend_without_duplicate_labels(a0)
        a0.set_title('Projected Density of States Fingerprint', fontsize=20)
        a0.set_xlim(-15,0)


        a1.annotate(
        'LOBSTER-VASP\n\n\n% LOBS: {}\n% VASP: {}\nTanimoto_simi: {}\nNorm_simi: {}'.format(orb_contri_lobs,orb_contri_vasp,
                        tani,norm),
                         xy=(0.50,0.50),xycoords='axes fraction',fontsize=14,fontweight='medium',
                         horizontalalignment='center',
                        bbox=dict(boxstyle="round,pad=0.3", fc="none", ec="lightgrey", lw=2))
        a1.set_title('Features', fontsize=20)
        a1.set_xticks([])
        a1.set_yticks([])
        a1.set_yticklabels([])
        a1.set_xticklabels([])
        f.subplots_adjust(wspace=0.05, hspace=None)
        a1.set_axis_off()
        f.savefig('{}/spd_fp/{}_{}.pdf'.format(path_to_save_plots,
                                               mpid,orb),dpi=600, bbox_inches='tight')
        plt.close()
        
        df.loc[mpid,'{}_contri_LOBS_TOTAL'.format(orb)] = orb_contri_lobs_total
        df.loc[mpid,'{}_contri_VASP_TOTAL'.format(orb)] = orb_contri_vasp_total
        df.loc[mpid,'Tanimoto_similarity_{}'.format(orb)] = tani
        df.loc[mpid,'Norm_similarity_{}'.format(orb)] = norm
        df.loc[mpid,'{}_contri_LOBS'.format(orb)] = orb_contri_lobs
        df.loc[mpid,'{}_contri_VASP'.format(orb)] = orb_contri_vasp

    #-----spd tanimoto fp end    
    
    for k,v in spd_dos_lobster.items():
        erl=spd_dos_lobster[k].energies
        plob = spd_dos_lobster[k].get_densities()
        
        energy_range_lobster=[]
        pdos_lobster=[]
        
        for i1,en1 in enumerate(erl):
            if en1<=0 and en1>=-15:
                energy_range_lobster.append(erl[i1])
                pdos_lobster.append(plob[i1])
                

        
        erv=spd_dos_vasp[k].energies-spd_dos_vasp[k].efermi
        pvasp=spd_dos_vasp[k].get_densities()
        
        energy_range_vasp=[]
        pdos_vasp=[]
        
        for i2,en2 in enumerate(erv):
            if en2<=0 and en2>=-15:
                energy_range_vasp.append(erv[i2])
                pdos_vasp.append(pvasp[i2])

        
        dens_ind,lobster_ranges=get_non_zero_ranges(densities=pdos_lobster,energies=energy_range_lobster)
        
        for i, val in reversed(list(enumerate(dens_ind))):
           
            area_vasp=trapezoid(pdos_vasp[dens_ind[i][0]:dens_ind[i][1]+1],
                                energy_range_vasp[dens_ind[i][0]:dens_ind[i][1]+1])
            area_lobs=trapezoid(pdos_lobster[dens_ind[i][0]:dens_ind[i][1]+1],
                                energy_range_lobster[dens_ind[i][0]:dens_ind[i][1]+1])
            
            if round(area_vasp,2)>=0.5 and round(area_lobs,2)>=0.5:
                
                f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(15, 10))

                sns.lineplot(ax=a0,x=energy_range_vasp,y =pdos_vasp,label = '{} VASP'.format(k.name))
                sns.lineplot(ax=a0,x=energy_range_lobster,y =pdos_lobster,label = '{} LOBSTER'.format(k.name))

                try:
                    bf_l = round(dos_lobster.get_band_filling(band=k),4)
                    bc_l = round(dos_lobster.get_band_center(band=k,erange=lobster_ranges[i]),4)
                    bw_l = round(dos_lobster.get_band_width(band=k,erange=lobster_ranges[i]),4)
                    bs_l = round(dos_lobster.get_band_skewness(band=k,erange=lobster_ranges[i]),4)
                    bk_l = round(dos_lobster.get_band_kurtosis(band=k,erange=lobster_ranges[i]),4)
                    ue_l = round(dos_lobster.get_upper_band_edge(band=k,erange=lobster_ranges[i]),4)
                    
                    df.loc[mpid,'{}_band_filling_LOBS'.format(k.name)] = bf_l
                    df.loc[mpid,'{}_band_center_LOBS'.format(k.name)] = bc_l
                    df.loc[mpid,'{}_band_width_LOBS'.format(k.name)] = bw_l
                    df.loc[mpid,'{}_band_skew_LOBS'.format(k.name)] = bs_l
                    df.loc[mpid,'{}_band_kurtosis_LOBS'.format(k.name)] = bk_l
                    df.loc[mpid,'{}_band_upperbandedge_LOBS'.format(k.name)] = ue_l



                    bf_v = round(dos_vasp.get_band_filling(band=k),4)
                    bc_v = round(dos_vasp.get_band_center(band=k,erange=lobster_ranges[i]),4)
                    bw_v = round(dos_vasp.get_band_width(band=k,erange=lobster_ranges[i]),4)
                    bs_v = round(dos_vasp.get_band_skewness(band=k,erange=lobster_ranges[i]),4)
                    bk_v = round(dos_vasp.get_band_kurtosis(band=k,erange=lobster_ranges[i]),4)
                    ue_v = round(dos_vasp.get_upper_band_edge(band=k,erange=lobster_ranges[i]),4)

                    df.loc[mpid,'{}_band_filling_VASP'.format(k.name)] = bf_v
                    df.loc[mpid,'{}_band_center_VASP'.format(k.name)] = bc_v
                    df.loc[mpid,'{}_band_width_VASP'.format(k.name)] = bw_v
                    df.loc[mpid,'{}_band_skew_VASP'.format(k.name)] = bs_v
                    df.loc[mpid,'{}_band_kurtosis_VASP'.format(k.name)] = bk_v
                    df.loc[mpid,'{}_band_upperbandedge_VASP'.format(k.name)] = ue_v


                    a0.set_xlabel('Energies', fontsize=16);
                    a0.set_ylabel('Density of states', fontsize=16);
                    a0.set_title('Projected density of states', fontsize=20)
                    a0.tick_params(direction="in", length=6, width=1, colors='k')
                    a0.set_xlim(lobster_ranges[i][0],0)

                    a1.annotate(
                        'LOBSTER(LSO)-VASP\n\nRANGE\n{}\n\nBC: {}, {}\nBW: {}, {}\nBS: {}, {}\nBK: {}, {}'.format(
                            lobster_ranges[i],
                            bc_l,bc_v,bw_l,bw_v,bs_l,bs_v,bk_l,bk_v,ue_l,ue_v,bf_l,bf_v),
                            xy=(0.50,0.50),xycoords='axes fraction',fontsize=14,fontweight='medium',
                                 horizontalalignment='center',
                                bbox=dict(boxstyle="round,pad=0.3", fc="none", ec="lightgrey", lw=2))

                    a1.set_title('Features', fontsize=20)
                    a1.set_xticks([])
                    a1.set_yticks([])
                    a1.set_yticklabels([])
                    a1.set_xticklabels([])
                    f.subplots_adjust(wspace=0.05, hspace=None)
                    a1.set_axis_off()
                    f.savefig('{}/All_plots/{}_{}.pdf'.format(path_to_save_plots,
                                                              mpid,k.name),dpi=600, bbox_inches='tight')
                    plt.close()
                    break
                except IndexError:
                    pass

            else:
                pass

    return df

In [None]:
def get_df(mpid,path_to_save_plots='../NONLSO'):
    """
    
    Function to get pdos band features, fingerprints data and corresponding plots (LOBSTER DOS and VASP)
    
    Args:
        mpid: name of directory containing LOBSTER and VASP calculation files
    Returns:
       pandas dataframe with all necessary plot data
    """
    try:
        os.makedirs(os.path.join(path_to_save_plots,"Summed_fp"))
        os.makedirs(os.path.join(path_to_save_plots,'spd_fp'))
        os.makedirs(os.path.join(path_to_save_plots,'All_plots'))
    except FileExistsError:
        pass
    
    df=pd.DataFrame(index=[mpid], columns=['Composition'])
    df['VASP_orb'] = ''
    df['LOBS_orb'] = ''
    df['LOBSTER_basis'] = ''
    df['VASP_orb'] = df['VASP_orb'].astype(dtype='object')
    df['LOBS_orb'] = df['LOBS_orb'].astype(dtype='object')
    df['LOBSTER_basis'] = df['LOBSTER_basis'].astype(dtype='object')
    
    doscar_lobster=Doscar(doscar="{}/DOSCAR.lobster.gz".format(mpid),
        structure_file= "{}/POSCAR.gz".format(mpid),
        dftprogram ="Vasp")

    dos_lobster=doscar_lobster.completedos

    vasprun=Vasprun("{}/vasprun.xml.gz".format(mpid))
    Sys_elec= round(Outcar('{}/OUTCAR.gz'.format(mpid)).nelect,2)
    Lobout = Lobsterout('{}/lobsterout.gz'.format(mpid))

    dos_vasp = vasprun.complete_dos

    spd_dos_lobster=dos_lobster.get_spd_dos()

    spd_dos_vasp=dos_vasp.get_spd_dos()
    
    
    pdos_lobster_total={}
    orb_lobs=[]
    for k,v in spd_dos_lobster.items():
        
        erl=spd_dos_lobster[k].energies
        plob = spd_dos_lobster[k].get_densities()

        energy_range_lobster_tot=[]
        pdos_lobster_total.update({k.name:[]})
        orb_lobs.append(k.name)
    
        for i1,en1 in enumerate(erl):
            if en1<=0:
                energy_range_lobster_tot.append(erl[i1])
                pdos_lobster_total[k.name].append(plob[i1])
    
    sum_dos_array=[]
    for dos in pdos_lobster_total.values():
        sum_dos_array.append(np.array(dos))
    
    sum_dos_lobs = np.sum(sum_dos_array,axis=0)

    
    pdos_vasp_total={}
    orb_vasp=[]
    for k,v in spd_dos_vasp.items():
        erv=spd_dos_vasp[k].energies-spd_dos_vasp[k].efermi
        pvasp=spd_dos_vasp[k].get_densities()

        energy_range_vasp_tot=[]
        pdos_vasp_total.update({k.name:[]})
        orb_vasp.append(k.name)

        for i2,en2 in enumerate(erv):
            if en2<=0:
                energy_range_vasp_tot.append(erv[i2])
                pdos_vasp_total[k.name].append(pvasp[i2])
                
    
    sum_dos_array=[]
    for dos in pdos_vasp_total.values():
        sum_dos_array.append(np.array(dos))            
    
    sum_dos_vasp = np.sum(sum_dos_array,axis=0)
    
    nelec_lobs = round(trapezoid(sum_dos_lobs, energy_range_lobster_tot),3)  #calculate total electrons PDOS LOBSTER
    nelec_vasp = round(trapezoid(sum_dos_vasp, energy_range_vasp_tot),3)  #calculate total electrons PDOS VASP
    
    reduced_formula= vasprun.complete_dos.structure.composition.get_reduced_formula_and_factor()[0]
    
    basis_sets={}
    for i, ele in enumerate(Lobout.elements):
        basis_sets.update({ele:Lobout.basis_functions[i]})
        
    #----
    pdos_lobster={}
    orb_lobs=[]
    for k,v in spd_dos_lobster.items():
        
        erl=spd_dos_lobster[k].energies
        plob = spd_dos_lobster[k].get_densities()

        energy_range_lobster=[]
        pdos_lobster.update({k.name:[]})
        orb_lobs.append(k.name)
    
        for i1,en1 in enumerate(erl):
            if en1<=0 and en1>=-15:
                energy_range_lobster.append(erl[i1])
                pdos_lobster[k.name].append(plob[i1])
                
    
    sum_dos_array=[]
    for dos in pdos_lobster.values():
        sum_dos_array.append(np.array(dos))
    
    sum_dos_lobs = np.sum(sum_dos_array,axis=0)
    
    
    pdos_vasp={}
    orb_vasp=[]
    for k,v in spd_dos_vasp.items():
        erv=spd_dos_vasp[k].energies-spd_dos_vasp[k].efermi
        pvasp=spd_dos_vasp[k].get_densities()

        energy_range_vasp=[]
        pdos_vasp.update({k.name:[]})
        orb_vasp.append(k.name)

        for i2,en2 in enumerate(erv):
            if en2<=0 and en2>=-15:
                energy_range_vasp.append(erv[i2])
                pdos_vasp[k.name].append(pvasp[i2])
                
    
    sum_dos_array=[]
    for dos in pdos_vasp.values():
        sum_dos_array.append(np.array(dos))            
    
    sum_dos_vasp = np.sum(sum_dos_array,axis=0)
    
    #----summed fp start

    
    fp_lobster_og=dos_lobster.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize = True, type='summed_pdos')
    
    fp_vasp_og= dos_vasp.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize = True, type='summed_pdos')
    
    
    tani_og=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,tanimoto=True),4)
    norm_og=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,normalize=True),4)
    
    f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(15, 10))
    
    sns.histplot(ax=a0,x=fp_vasp_og.energies[0],weights =fp_vasp_og.densities*fp_vasp_og.bin_width*100,
                 bins=256,label='VASP',
                stat='count',alpha=0.9)
    sns.histplot(ax=a0,x=fp_lobster_og.energies[0],weights =fp_lobster_og.densities*fp_lobster_og.bin_width*100,
                 bins=256,label='LOBSTER',
                stat='count',alpha=0.6)
    
    a0.set_xlabel('Energies', fontsize=16);
    a0.set_ylabel('Percent', fontsize=16);
    legend_without_duplicate_labels(a0)
    a0.set_title('Summed projected Density of States Fingerprint', fontsize=20)
    a0.set_xlim(-15,0)
    
    
    a1.annotate(
    'LOBSTER-VASP\n\n\nNELECT LOBS: {}\nNELECT VASP: {}\nTanimoto_simi: {}\nNorm_simi: {}\nSYS_ELECT: {}'.format(nelec_lobs,nelec_vasp,
                    tani_og,norm_og,Sys_elec),
                     xy=(0.50,0.50),xycoords='axes fraction',fontsize=14,fontweight='medium',
                     horizontalalignment='center',
                    bbox=dict(boxstyle="round,pad=0.3", fc="none", ec="lightgrey", lw=2))
    a1.set_title('Features', fontsize=20)
    a1.set_xticks([])
    a1.set_yticks([])
    a1.set_yticklabels([])
    a1.set_xticklabels([])
    f.subplots_adjust(wspace=0.05, hspace=None)
    a1.set_axis_off()
    f.savefig('{}/Summed_fp/{}.png'.format(path_to_save_plots,
                                           mpid),dpi=600, bbox_inches='tight')
    plt.close()
    
    #----summed fp end
        
    df.loc[mpid,'Composition'] = reduced_formula
    df.at[mpid,'LOBSTER_basis'] = basis_sets
    df.at[mpid,'VASP_orb'] = orb_vasp
    df.at[mpid,'LOBS_orb'] = orb_lobs
    df.loc[mpid,'NELEC_LOBS'] = nelec_lobs
    df.loc[mpid,'NELEC_VASP'] = nelec_vasp
    df.loc[mpid,'NELEC_SYS'] = Sys_elec
    df.loc[mpid,'Tanimoto_similarity'] = tani_og
    df.loc[mpid,'Norm_similarity'] = norm_og
    
    #-----spd tanimoto fp start
    for orb,dos in pdos_lobster.items():
        fp_lobster_og=dos_lobster.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize= True, type=orb)
    
        fp_vasp_og=dos_vasp.get_dos_fp(min_e=-15, max_e=0,n_bins= 256, normalize = True, type=orb)

        
        tani=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,tanimoto=True),4)
        norm=round(CompleteDos.get_dos_fp_similarity(fp_lobster_og,fp_vasp_og,normalize=True),4)
        
        orb_contri_lobs_total=round((trapezoid(pdos_lobster_total[orb],energy_range_lobster_tot)/nelec_lobs)*100,3)
        orb_contri_vasp_total=round((trapezoid(pdos_vasp_total[orb],energy_range_vasp_tot)/nelec_vasp)*100,3)
        
        try:
            lobs_elec=round((trapezoid(sum_dos_lobs,energy_range_lobster)))
        except ValueError:
            lobs_elec= np.nan
        try:
            vasp_elec=round((trapezoid(sum_dos_vasp,energy_range_vasp)))
        except ValueError:
            vasp_elec= np.nan
        orb_contri_lobs=round((trapezoid(pdos_lobster[orb],energy_range_lobster)/lobs_elec)*100,3)
        orb_contri_vasp=round((trapezoid(pdos_vasp[orb],energy_range_vasp)/vasp_elec)*100,3) 
        

        f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(15, 10))

        sns.histplot(ax=a0,x=fp_vasp_og.energies[0],weights =fp_vasp_og.densities*fp_vasp_og.bin_width*100,
                     bins=256,
                     label='{} VASP'.format(orb),
                    stat='count',alpha=0.9)
        sns.histplot(ax=a0,x=fp_lobster_og.energies[0],weights =fp_lobster_og.densities*fp_lobster_og.bin_width*100,
                     bins=256,
                     label='{} LOBSTER'.format(orb),
                    stat='count',alpha=0.6) 

        a0.set_xlabel('Energies', fontsize=16);
        a0.set_ylabel('Percent', fontsize=16);
        legend_without_duplicate_labels(a0)
        a0.set_title('Projected Density of States Fingerprint', fontsize=20)
        a0.set_xlim(-15,0)


        a1.annotate(
        'LOBSTER-VASP\n\n\n% LOBS: {}\n% VASP: {}\nTanimoto_simi: {}\nNorm_simi: {}'.format(orb_contri_lobs,orb_contri_vasp,
                        tani,norm),
                         xy=(0.50,0.50),xycoords='axes fraction',fontsize=14,fontweight='medium',
                         horizontalalignment='center',
                        bbox=dict(boxstyle="round,pad=0.3", fc="none", ec="lightgrey", lw=2))
        a1.set_title('Features', fontsize=20)
        a1.set_xticks([])
        a1.set_yticks([])
        a1.set_yticklabels([])
        a1.set_xticklabels([])
        f.subplots_adjust(wspace=0.05, hspace=None)
        a1.set_axis_off()
        f.savefig('{}/spd_fp/{}_{}.png'.format(path_to_save_plots,
                                               mpid,orb),dpi=600, bbox_inches='tight')
        plt.close()
        
        df.loc[mpid,'{}_contri_LOBS_TOTAL'.format(orb)] = orb_contri_lobs_total
        df.loc[mpid,'{}_contri_VASP_TOTAL'.format(orb)] = orb_contri_vasp_total
        df.loc[mpid,'Tanimoto_similarity_{}'.format(orb)] = tani
        df.loc[mpid,'Norm_similarity_{}'.format(orb)] = norm
        df.loc[mpid,'{}_contri_LOBS'.format(orb)] = orb_contri_lobs
        df.loc[mpid,'{}_contri_VASP'.format(orb)] = orb_contri_vasp

    #-----spd tanimoto fp end    
    
    for k,v in spd_dos_lobster.items():
        erl=spd_dos_lobster[k].energies
        plob = spd_dos_lobster[k].get_densities()
        
        energy_range_lobster=[]
        pdos_lobster=[]
        
        for i1,en1 in enumerate(erl):
            if en1<=0 and en1>=-15:
                energy_range_lobster.append(erl[i1])
                pdos_lobster.append(plob[i1])
                

        
        erv=spd_dos_vasp[k].energies-spd_dos_vasp[k].efermi
        pvasp=spd_dos_vasp[k].get_densities()
        
        energy_range_vasp=[]
        pdos_vasp=[]
        
        for i2,en2 in enumerate(erv):
            if en2<=0 and en2>=-15:
                energy_range_vasp.append(erv[i2])
                pdos_vasp.append(pvasp[i2])

        
        dens_ind,lobster_ranges=get_non_zero_ranges(densities=pdos_lobster,energies=energy_range_lobster)
        
        for i, val in reversed(list(enumerate(dens_ind))):
            
            area_vasp=trapezoid(pdos_vasp[dens_ind[i][0]:dens_ind[i][1]+1],
                                energy_range_vasp[dens_ind[i][0]:dens_ind[i][1]+1])
            area_lobs=trapezoid(pdos_lobster[dens_ind[i][0]:dens_ind[i][1]+1],
                                energy_range_lobster[dens_ind[i][0]:dens_ind[i][1]+1])
            
            if round(area_vasp,2)>=0.5 and round(area_lobs,2)>=0.5:
                
                f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, figsize=(15, 10))

                sns.lineplot(ax=a0,x=energy_range_vasp,y =pdos_vasp,label = '{} VASP'.format(k.name))
                sns.lineplot(ax=a0,x=energy_range_lobster,y =pdos_lobster,label = '{} LOBSTER'.format(k.name))
                
                try:
                    bf_l = round(dos_lobster.get_band_filling(band=k),4)
                    bc_l = round(dos_lobster.get_band_center(band=k,erange=lobster_ranges[i]),4)
                    bw_l = round(dos_lobster.get_band_width(band=k,erange=lobster_ranges[i]),4)
                    bs_l = round(dos_lobster.get_band_skewness(band=k,erange=lobster_ranges[i]),4)
                    bk_l = round(dos_lobster.get_band_kurtosis(band=k,erange=lobster_ranges[i]),4)
                    ue_l = round(dos_lobster.get_upper_band_edge(band=k,erange=lobster_ranges[i]),4)

                    df.loc[mpid,'{}_band_filling_LOBS'.format(k.name)] = bf_l
                    df.loc[mpid,'{}_band_center_LOBS'.format(k.name)] = bc_l
                    df.loc[mpid,'{}_band_width_LOBS'.format(k.name)] = bw_l
                    df.loc[mpid,'{}_band_skew_LOBS'.format(k.name)] = bs_l
                    df.loc[mpid,'{}_band_kurtosis_LOBS'.format(k.name)] = bk_l
                    df.loc[mpid,'{}_band_upperbandedge_LOBS'.format(k.name)] = ue_l



                    bf_v = round(dos_vasp.get_band_filling(band=k),4)
                    bc_v = round(dos_vasp.get_band_center(band=k,erange=lobster_ranges[i]),4)
                    bw_v = round(dos_vasp.get_band_width(band=k,erange=lobster_ranges[i]),4)
                    bs_v = round(dos_vasp.get_band_skewness(band=k,erange=lobster_ranges[i]),4)
                    bk_v = round(dos_vasp.get_band_kurtosis(band=k,erange=lobster_ranges[i]),4)
                    ue_v = round(dos_vasp.get_upper_band_edge(band=k,erange=lobster_ranges[i]),4)

                    df.loc[mpid,'{}_band_filling_VASP'.format(k.name)] = bf_v
                    df.loc[mpid,'{}_band_center_VASP'.format(k.name)] = bc_v
                    df.loc[mpid,'{}_band_width_VASP'.format(k.name)] = bw_v
                    df.loc[mpid,'{}_band_skew_VASP'.format(k.name)] = bs_v
                    df.loc[mpid,'{}_band_kurtosis_VASP'.format(k.name)] = bk_v
                    df.loc[mpid,'{}_band_upperbandedge_VASP'.format(k.name)] = ue_v


                    a0.set_xlabel('Energies', fontsize=16);
                    a0.set_ylabel('Density of states', fontsize=16);
                    a0.set_title('Projected density of states', fontsize=20)
                    a0.set_xlim(lobster_ranges[-1][0],0)

                    a1.annotate(
                            'LOBSTER-VASP\n\nRANGE\n{}\n\nBC: {}, {}\nBW: {}, {}\nBS: {}, {}\nBK: {}, {}'.format(
                                lobster_ranges[i],
                                bc_l,bc_v,bw_l,bw_v,bs_l,bs_v,bk_l,bk_v,ue_l,ue_v,bf_l,bf_v),
                                xy=(0.50,0.50),xycoords='axes fraction',fontsize=14,fontweight='medium',
                                     horizontalalignment='center',
                                    bbox=dict(boxstyle="round,pad=0.3", fc="none", ec="lightgrey", lw=2))


                    a1.set_title('Features', fontsize=20)
                    a1.set_xticks([])
                    a1.set_yticks([])
                    a1.set_yticklabels([])
                    a1.set_xticklabels([])
                    f.subplots_adjust(wspace=0.05, hspace=None)
                    a1.set_axis_off()
                    f.savefig('{}/All_plots/{}_{}.png'.format(path_to_save_plots,
                                                              mpid,k.name),dpi=600, bbox_inches='tight')
                    plt.close()
                    break
                except IndexError:
                    
                    pass
            else:
                pass

    return df

In [None]:
items=mats
#items=['mp-1000','mp-66']
with mp.Pool(processes=2,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap_unordered(get_df, items, chunksize=2),
        total=len(items),
    )  # 'total' is redundant here but can be useful
    # when the size of the iterable is unobvious
    row=[]
    for result in results:
        #print(result)
        row.append(result)
df = pd.concat(row)

In [None]:
#df.to_pickle('lobdos.pkl')

In [None]:
items=mats
#items=['mp-1000','mp-66']
with mp.Pool(processes=2,maxtasksperchild=1) as pool:
    results = tqdm(
        pool.imap_unordered(get_df_lso, items, chunksize=2),
        total=len(items),
    )  # 'total' is redundant here but can be useful
    # when the size of the iterable is unobvious
    row=[]
    for result in results:
        #print(result)
        row.append(result)
df_lso = pd.concat(row)

In [None]:
#df_lso.to_pickle('lsolobdos.pkl')