In [1]:
import pandas as pd
import plotly.express as px
import umap
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
import numpy as np

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()
2024-05-16 12:21:05.315093: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Read DF, separates training reactions and virtual reactions

df = pd.read_csv('../Data/UMAP_IDPi.csv')
df = df.loc[df['N Catalyst Substituent'] == "NS(=O)(=O)C(F)(F)F"]


In [3]:
df

Unnamed: 0.1,Unnamed: 0,nucleophile SMILES,starting electrophile SMILES,"3,3 Catalyst Substituent",N Catalyst Substituent,ddg,ee,cluster,0,1
0,0,C=CC[Si](C)(C)C,O=Cc2ccc1ccccc1c2,c2ccc1ccccc1c2,NS(=O)(=O)C(F)(F)F,1.186380,0.910254,10,14.409115,13.065628
5,5,C=CC[Si](C)(C)C,O=Cc2ccc1ccccc1c2,c1ccc3c(c1)CCc2ccccc23,NS(=O)(=O)C(F)(F)F,1.122075,0.894925,10,14.409115,13.065628
10,10,C=CC[Si](C)(C)C,O=Cc2ccc1ccccc1c2,Cc1cc(C)c2ccc4cccc3ccc1c2c34,NS(=O)(=O)C(F)(F)F,1.345681,0.939563,10,14.409115,13.065628
15,15,C=CC[Si](C)(C)C,O=Cc2ccc1ccccc1c2,c1ccccc1,NS(=O)(=O)C(F)(F)F,1.014781,0.863730,10,14.409115,13.065628
20,20,C=CC[Si](C)(C)C,O=Cc2ccc1ccccc1c2,Cc1ccccc1,NS(=O)(=O)C(F)(F)F,1.084011,0.884717,10,14.409115,13.065628
...,...,...,...,...,...,...,...,...,...,...
363975,363975,C1=CCCC=C1,C=CC=O,FC(F)(F)C(F)(c1cccc(C(F)(C(F)(F)F)C(F)(F)F)c1)...,NS(=O)(=O)C(F)(F)F,1.380920,0.944666,1,16.169493,-2.241398
363980,363980,C1=CCCC=C1,C=CC=O,c2cccc(c1ccccc1)c2,NS(=O)(=O)C(F)(F)F,1.189611,0.910965,1,16.169493,-2.241398
363985,363985,C1=CCCC=C1,C=CC=O,FS(F)(F)(F)(F)c1ccccc1,NS(=O)(=O)C(F)(F)F,1.370957,0.943267,1,16.169493,-2.241398
363990,363990,C1=CCCC=C1,C=CC=O,c4ccc3c1ccccc1C2(CCC2)c3c4,NS(=O)(=O)C(F)(F)F,1.434305,0.951607,1,16.169493,-2.241398


# Reaction space ee plot

In [4]:
cats = np.unique(df['3,3 Catalyst Substituent '])

for cat in cats:
    df_cat = df[df['3,3 Catalyst Substituent '] == cat]
    ddg = df_cat['ee']
    fig = px.scatter(x=df_cat[df.columns[-1]], y=df_cat[df.columns[-2]], hover_data=[],
                labels={'x':'UMAP1', 'y':'UMAP2'},
                width=980, height=720,
                color=ddg,
                template='simple_white',
                )

    fig.update_traces(marker=dict(size=17,
                                line=dict(width=2,
                                            color='Black')),
                    selector=dict(mode='markers'))

    fig.update_layout(
        legend=dict(
            yanchor="top",
            y=.99,
            xanchor="left",
            x=0.01,
            orientation='h',
            bordercolor="Black",
            borderwidth = 0
        ),
        legend_title_text=''
    )

    fig.update_xaxes(mirror=True)
    fig.update_yaxes(mirror=True)


    fig.update_layout(
        font_size=20,
        font_family="Arial"
    )

    fig.update_layout(showlegend=True)

    fig.write_html(f"../Plots/IDPi_react_space_plot_{cat}.html")

# Reaction space $\Delta$ ee plot

In [5]:
for cat in cats:
    df_cat = df[df['3,3 Catalyst Substituent '] == cat]
    df_cat_ref = df[df['3,3 Catalyst Substituent '] == 'c1ccccc1']
    ddg = np.array(df_cat['ee'])-np.array(df_cat_ref['ee'])
    fig = px.scatter(x=df_cat[df.columns[-1]], y=df_cat[df.columns[-2]], hover_data=[],
                labels={'x':'UMAP1', 'y':'UMAP2'},
                width=980, height=720,
                color=ddg,
                template='simple_white',color_continuous_scale='RdYlGn',
                
                range_color=(0,0.5)
                )

    fig.update_traces(marker=dict(size=17,
                                line=dict(width=2,
                                            color='Black')),
                    selector=dict(mode='markers'))

    fig.update_layout(
        legend=dict(
            yanchor="top",
            y=.99,
            xanchor="left",
            x=0.01,
            orientation='h',
            bordercolor="Black",
            borderwidth = 0
        ),
        legend_title_text=''
    )

    fig.update_xaxes(mirror=True)
    fig.update_yaxes(mirror=True)


    fig.update_layout(
        font_size=20,
        font_family="Arial"
    )

    fig.update_layout(showlegend=True)

    fig.write_html(f"../Plots/IDPi_Diff_react_space_plot_{cat}.html")

# Difficult Reaction subspace $\Delta$ ee plot

In [7]:
df['rxn']=df['nucleophile SMILES']+ df['starting electrophile SMILES']
for cat in cats:
    
    df_cat_ref = df[(df['3,3 Catalyst Substituent '] == 'c1ccccc1') & (df['ee'] < 0.4)]
    df_cat = df[(df['3,3 Catalyst Substituent '] == cat)].reset_index()
    df_cat = df_cat[df_cat['rxn'].isin(df_cat_ref['rxn'].values)]
    ddg = np.array(df_cat['ee']) - np.array(df_cat_ref['ee'])
    
    # Filter the data based on the condition
    
    # Create the scatter plot
    fig = px.scatter(x=df_cat[df.columns[-2]], y=df_cat[df.columns[-3]], 
                     hover_data=[], labels={'x': 'UMAP1', 'y': 'UMAP2'},
                     width=980, height=720, color=ddg,
                     template='simple_white', color_continuous_scale='RdYlGn',range_color=(0,0.75))

    fig.update_traces(marker=dict(size=17, line=dict(width=2, color='Black')), selector=dict(mode='markers'))

    fig.update_layout(
        legend=dict(yanchor="top", y=.99, xanchor="left", x=0.01, orientation='h', bordercolor="Black", borderwidth=0),
        legend_title_text=''
    )

    fig.update_xaxes(mirror=True)
    fig.update_yaxes(mirror=True)

    fig.update_layout(font_size=20, font_family="Arial")
    fig.update_layout(showlegend=True)

    fig.write_html(f"../Plots/IDPi_Filtered_Diff_react_space_plot_{cat}.html")