In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
#load the dataframes consisting of presaved plot data
df = pd.read_pickle('lobdos.pkl')  
df_lso= pd.read_pickle('lsolobdos.pkl')

In [None]:
features=['band_filling','band_center','band_width','band_skew','band_upperbandedge','band_kurtosis']

In [None]:
def get_band_feature_comp_plot(feature,df, path='.',extension='pdf'):
    fig = make_subplots(rows=1, cols=3, shared_xaxes=False, shared_yaxes=False,
                    x_title='VASP (eV)', y_title='LOBSTER (eV)', horizontal_spacing=0.035)
    
    # Add scatter plots to each subplot
    fig.add_trace(go.Scatter(x=df['s_{}_VASP'.format(feature)], 
                             y=df['s_{}_LOBS'.format(feature)], mode='markers',name='s',
                            hovertext=df.index+'<br>Composition :'+ df.Composition), row=1, col=1)
    fig.add_trace(go.Scatter(x=df['p_{}_VASP'.format(feature)], 
                             y=df['p_{}_LOBS'.format(feature)], mode='markers',name='p',
                            hovertext=df.index+'<br>Composition :'+ df.Composition), row=1, col=2)
    fig.add_trace(go.Scatter(x=df['d_{}_VASP'.format(feature)], 
                             y=df['d_{}_LOBS'.format(feature)], mode='markers',name='d',
                            hovertext=df.index+'<br>Composition :'+ df.Composition), row=1, col=3)
    
    X_s=df[(df['s_{}_VASP'.format(feature)].notna() & 
      df['s_{}_LOBS'.format(feature)].notna())]['s_{}_VASP'.format(feature)]
    Y_s=df[(df['s_{}_VASP'.format(feature)].notna() & 
          df['s_{}_LOBS'.format(feature)].notna())]['s_{}_LOBS'.format(feature)]
    model_s = LinearRegression().fit(np.array(X_s).reshape(-1,1),Y_s)
    y_hat_s = model_s.predict(np.array(X_s).reshape(-1,1))
    fig.add_trace(go.Scatter(x=X_s, y=y_hat_s, mode='lines',showlegend=False,line_color='#f57f1f'),row=1, col=1)
    
    X_p=df[(df['p_{}_VASP'.format(feature)].notna() & 
      df['p_{}_LOBS'.format(feature)].notna())]['p_{}_VASP'.format(feature)]
    Y_p=df[(df['p_{}_VASP'.format(feature)].notna() & 
          df['p_{}_LOBS'.format(feature)].notna())]['p_{}_LOBS'.format(feature)]
    model_p = LinearRegression().fit(np.array(X_p).reshape(-1,1),Y_p)
    y_hat_p = model_p.predict(np.array(X_p).reshape(-1,1))
    fig.add_trace(go.Scatter(x=X_p, y=y_hat_p, mode='lines',showlegend=False,line_color='#f57f1f'),row=1, col=2)
    
    X_d=df[(df['d_{}_VASP'.format(feature)].notna() & 
      df['d_{}_LOBS'.format(feature)].notna())]['d_{}_VASP'.format(feature)]
    Y_d=df[(df['d_{}_VASP'.format(feature)].notna() & 
          df['d_{}_LOBS'.format(feature)].notna())]['d_{}_LOBS'.format(feature)]
    model_d = LinearRegression().fit(np.array(X_d).reshape(-1,1),Y_d)
    y_hat_d = model_d.predict(np.array(X_d).reshape(-1,1))
    fig.add_trace(go.Scatter(x=X_d, y=y_hat_d, mode='lines',showlegend=False,line_color='#f57f1f'),row=1, col=3)
    
    fig.update_layout(title='{} {}'.format(feature.split('_')[0].capitalize(),feature.split('_')[1]), 
                  title_x=0.5,
                      #margin=dict(l=50, r=50, t=100, b=50),
                  height=700, width=1900,
                  showlegend=False,
                  )
    fig.update_traces(marker=dict(size=10, color='#1878b6'))
    
    fig.add_annotation(xref='x domain', yref='y domain',
                   x=0.95, y=0.5,
                   text=r"$R^2={}$".format(round(model_s.score(np.array(X_s).reshape(-1,1),Y_s),4)),
                   showarrow=False, row=1, col=1, font=dict(size=24, color='black'))
    fig.add_annotation(xref='x domain', yref='y domain',
                   x=0.05, y=0.95,
                   text='s band',
                   showarrow=False, row=1, col=1, font=dict(size=24, color='black'))
    fig.add_annotation(xref='x domain', yref='y domain',
                       x=0.95, y=0.5,
                       text=r"$R^2={}$".format(round(model_p.score(np.array(X_p).reshape(-1,1),Y_p),4)),
                       showarrow=False, row=1, col=2, font=dict(size=24, color='black'))
    fig.add_annotation(xref='x domain', yref='y domain',
                       x=0.05, y=0.95,
                       text='p band',
                       showarrow=False, row=1, col=2, font=dict(size=24, color='black'))
    fig.add_annotation(xref='x domain', yref='y domain',
                       x=0.95, y=0.5,
                       text=r"$R^2={}$".format(round(model_d.score(np.array(X_d).reshape(-1,1),Y_d),4)),
                       showarrow=False, row=1, col=3, font=dict(size=24, color='black'))
    fig.add_annotation(xref='x domain', yref='y domain',
                       x=0.05, y=0.95,
                       text='d band',
                       showarrow=False, row=1, col=3, font=dict(size=24, color='black'))
    
    for i in range(1, 4):
        fig.update_yaxes(title_font=dict(size=24), color='black',row=1, col=i,tickfont = dict(size=22))
        fig.update_xaxes(title_font=dict(size=24), color='black',row=1, col=i,tickfont = dict(size=22))
        fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, row=1, col=i,autorange=True)
        fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, row=1, col=i,autorange=True)
        fig.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5, row=1, col=i)
        fig.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5, row=1, col=i)
    fig.update_layout(template='simple_white')
    
    if extension=='pdf':
        fig.write_image("{}/{}.{}".format(path,feature,extension),format= 'pdf', width=1900, height=700)
    if extension=='svg':
        fig.write_image("{}/{}.{}".format(path,feature,extension),width=1900, height=700)
    if extension=='html':
        fig.write_html("{}/{}.{}".format(path,feature,extension),include_mathjax = 'cdn')
    #fig.show()

In [None]:
for feature in features:
    get_band_feature_comp_plot(feature=feature,df=df, 
                           path='NON_LSO', extension='pdf')
    get_band_feature_comp_plot(feature=feature,df=df, 
                           path='NON_LSO', extension='html')

In [None]:
for feature in features:
    get_band_feature_comp_plot(feature=feature,df=df_lso, 
                           path='LSO', extension='pdf')
    get_band_feature_comp_plot(feature=feature,df=df_lso, 
                           path='LSO', extension='html')

### Fingerprint Tanimoto index plots

In [None]:
figmain = go.Figure()

figmain.add_trace(go.Histogram(x=df_lso['Tanimoto_similarity_s'].values,
                               name = 's',nbinsx=56, histnorm ='percent'))
figmain.add_trace(go.Histogram(x=df_lso['Tanimoto_similarity_p'].values,
                               name = 'p',nbinsx=56, histnorm ='percent'))
figmain.add_trace(go.Histogram(x=df_lso['Tanimoto_similarity_d'].values,
                               name = 'd',nbinsx=56, histnorm ='percent'))
figmain.add_trace(go.Histogram(x=df_lso['Tanimoto_similarity'].values,
                               name = 'summed',nbinsx=56, histnorm ='percent'))
figmain.update_layout(barmode='overlay')
figmain.update_traces(opacity=0.65)
figmain.update_layout(yaxis = dict(tickfont = dict(size=18)))
figmain.update_layout(xaxis = dict(tickfont = dict(size=18)))
figmain.update_layout(template='simple_white')
figmain.update_layout( xaxis_title = 'Tanimoto similarity', yaxis_title='Percent compounds')
#figmain.update_layout(yaxis_title="Mulliken", xaxis_title = 'Löwdin')
figmain.update_yaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
figmain.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
figmain.update_layout(width=1000,height=650)
figmain.update_layout(
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=20,
            color="black"
        ),
    )
)
#figmain.update(layout_xaxis_range = [0,1])
#figmain.write_image("lso_spd_tanimoto_all.pdf",width=1000, height=650)
#figmain.write_html("lso_spd_tanimoto_all.html",include_mathjax = 'cdn')

In [None]:
figmain = go.Figure()

figmain.add_trace(go.Histogram(x=df_lso.loc[df_lso.Tanimoto_similarity_s<=0.75]['Tanimoto_similarity_s'].values,
                               name = 's',nbinsx=56))# histnorm ='percent'))
figmain.add_trace(go.Histogram(x=df_lso.loc[df_lso.Tanimoto_similarity_p<=0.75]['Tanimoto_similarity_p'].values,
                               name = 'p',nbinsx=56))# histnorm ='percent'))
figmain.add_trace(go.Histogram(x=df_lso.loc[df_lso.Tanimoto_similarity_d<=0.75]['Tanimoto_similarity_d'].values,
                               name = 'd',nbinsx=56))# histnorm ='percent'))
figmain.add_trace(go.Histogram(x=df_lso.loc[df_lso.Tanimoto_similarity<=0.75]['Tanimoto_similarity'].values,
                               name = 'summed',nbinsx=56))# histnorm ='percent'))
figmain.update_layout(barmode='overlay')
figmain.update_traces(opacity=0.65)
figmain.update_layout(yaxis = dict(tickfont = dict(size=18)))
figmain.update_layout(xaxis = dict(tickfont = dict(size=18)))
figmain.update_layout(template='simple_white')
figmain.update_layout( xaxis_title = 'Tanimoto similarity', yaxis_title='Number of compounds')
#figmain.update_layout(yaxis_title="Mulliken", xaxis_title = 'Löwdin')
figmain.update_yaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(title_font=dict(size=22), color='black')
figmain.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figmain.update_xaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
figmain.update_yaxes(ticks="inside", tickwidth=1, tickcolor='black', ticklen=5)
figmain.update_layout(width=1000,height=650)
figmain.update_layout(
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=20,
            color="black"
        ),
    )
)
#figmain.update(layout_xaxis_range = [0,1])
#figmain.write_image("lso_spd_tanimoto_low.pdf",width=1000, height=650)
#figmain.write_html("lso_spd_tanimoto_low.html",include_mathjax = 'cdn')