In [47]:
import dash
from dash.exceptions import PreventUpdate
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import json
from chemUtils.utils import utils

# Create dash app
app = dash.Dash(__name__)

# Set dog and cat images
dogImage = "https://www.iconexperience.com/_img/v_collection_png/256x256/shadow/dog.png"
catImage = "https://d2ph5fj80uercy.cloudfront.net/06/cat3602.jpg"

# Generate dataframe
df = pd.DataFrame(
   dict(
      x=[1, 2],
      y=[2, 4],
      images=[dogImage,catImage],
   )
)

# Create scatter plot with x and y coordinates
fig = px.scatter(df, x="x", y="y",custom_data=["images"])

# Update layout and update traces
fig.update_layout(clickmode='event+select')
fig.update_traces(marker_size=20)

# Create app layout to show dash graph
app.layout = html.Div(
   [
      dcc.Graph(
         id="graph_interaction",
         figure=fig,
      ),
      html.Img(id='image', src='')
   ]
)

# html callback function to hover the data on specific coordinates
@app.callback(
   [Output('image', 'src')],
   [Input('graph_interaction', 'hoverData')])
def open_url(hoverData):
   if hoverData:
      return hoverData["points"][0]["customdata"][0]
   else:
      raise PreventUpdate

if __name__ == '__main__':
   app.run_server(debug=True)

In [49]:
import io
import base64
import pickle

from dash import Dash, dcc, html, no_update
from dash.dependencies import Input, Output
import plotly.graph_objects as go

from PIL import Image

from sklearn.manifold import TSNE
import numpy as np

# Contains 100 images for each digit from MNIST
mnist_path = 'mini-mnist-1000.pickle'

# Helper functions
def np_image_to_base64(im_matrix):
    im = Image.fromarray(im_matrix)
    buffer = io.BytesIO()
    im.save(buffer, format="jpeg")
    encoded_image = base64.b64encode(buffer.getvalue()).decode()
    im_url = "data:image/jpeg;base64, " + encoded_image
    return im_url

def load_mini_mnist():
    with open(mnist_path, 'rb') as f:
        data = pickle.load(f)
    return data

# Load the data
data = load_mini_mnist()
images = data['images']
labels = data['labels']

# Flatten image matrices from (28,28) to (784,)
flattenend_images = np.array([i.flatten() for i in images])

# t-SNE Outputs a 3 dimensional point for each image
tsne = TSNE(
    random_state=123,
    n_components=3,
    verbose=0,
    perplexity=40,
    n_iter=300) \
    .fit_transform(flattenend_images)

# Color for each digit
color_map = {
    0: "#E52B50",
    1: "#9F2B68",
    2: "#3B7A57",
    3: "#3DDC84",
    4: "#FFBF00",
    5: "#915C83",
    6: "#008000",
    7: "#7FFFD4",
    8: "#E9D66B",
    9: "#007FFF",
}
colors = [color_map[label] for label in labels]

fig = go.Figure(data=[go.Scatter3d(
    x=tsne[:, 0],
    y=tsne[:, 1],
    z=tsne[:, 2],
    mode='markers',
    marker=dict(
        size=2,
        color=colors,
    )
)])

fig.update_traces(
    hoverinfo="none",
    hovertemplate=None,
)

app = Dash(__name__)

app.layout = html.Div(
    className="container",
    children=[
        dcc.Graph(id="graph-5", figure=fig, clear_on_unhover=True),
        dcc.Tooltip(id="graph-tooltip-5", direction='bottom'),
    ],
)

@dash.callback(
    Output("graph-tooltip-5", "show"),
    Output("graph-tooltip-5", "bbox"),
    Output("graph-tooltip-5", "children"),
    Input("graph-5", "hoverData"),
)
def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    # demo only shows the first point, but other points may also be available
    print(hoverData)
    hover_data = hoverData["points"][0]
    bbox = hover_data["bbox"]
    num = hover_data["pointNumber"]

    im_matrix = images[num]
    im_url = np_image_to_base64(im_matrix)
    children = [
        html.Div([
            html.Img(
                src=im_url,
                style={"width": "50px", 'display': 'block', 'margin': '0 auto'},
            ),
            html.P("MNIST Digit " + str(labels[num]), style={'font-weight': 'bold'})
        ])
    ]

    return True, bbox, children

if __name__ == "__main__":
    app.run(debug=True)



The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



In [49]:
# Making my own hovered plot
import io
import base64
import pickle

from dash import Dash, dcc, html, no_update
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from PIL import Image
import numpy as np
from chemUtils.utils import utils
import json
import matplotlib as plt

# Contains 100 images for each digit from MNIST
mnist_path = 'mini-mnist-1000.pickle'

# Helper functions
def np_image_to_base64(im_matrix):
    im = Image.fromarray(im_matrix)
    buffer = io.BytesIO()
    im.save(buffer, format="jpeg")
    encoded_image = base64.b64encode(buffer.getvalue()).decode()
    im_url = "data:image/jpeg;base64, " + encoded_image
    return im_url

def load_mini_mnist():
    with open(mnist_path, 'rb') as f:
        data = pickle.load(f)
    return data

# Load the data
data = load_mini_mnist()
images = data['images']
labels = data['labels']

# Flatten image matrices from (28,28) to (784,)
flattenend_images = np.array([i.flatten() for i in images])

# t-SNE Outputs a 3 dimensional point for each image
tsne = TSNE(
    random_state=123,
    n_components=3,
    verbose=0,
    perplexity=40,
    n_iter=300) \
    .fit_transform(flattenend_images)

# Color for each digit
color_map = {
    0: "#E52B50",
    1: "#9F2B68",
    2: "#3B7A57",
    3: "#3DDC84",
    4: "#FFBF00",
    5: "#915C83",
    6: "#008000",
    7: "#7FFFD4",
    8: "#E9D66B",
    9: "#007FFF",
}
colors = [color_map[label] for label in labels]

fig = go.Figure(data=[go.Scatter3d(
    x=tsne[:, 0],
    y=tsne[:, 1],
    z=tsne[:, 2],
    mode='markers',
    marker=dict(
        size=2,
        color=colors,
    )
)])

fig.update_traces(
    hoverinfo="none",
    hovertemplate=None,
)

app = Dash(__name__)

app.layout = html.Div(
    className="container",
    children=[
        dcc.Graph(id="graph-5", figure=fig, clear_on_unhover=True),
        dcc.Tooltip(id="graph-tooltip-5", direction='bottom'),
    ],
)

@dash.callback(
    Output("graph-tooltip-5", "show"),
    Output("graph-tooltip-5", "bbox"),
    Output("graph-tooltip-5", "children"),
    Input("graph-5", "hoverData"),
)
def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    # demo only shows the first point, but other points may also be available
    hover_data = hoverData["points"][0]
    bbox = hover_data["bbox"]
    num = hover_data["pointNumber"]

    im_matrix = images[num]
    im_url = np_image_to_base64(im_matrix)
    children = [
        html.Div([
            html.Img(
                src=im_url,
                style={"width": "50px", 'display': 'block', 'margin': '0 auto'},
            ),
            html.P("MNIST Digit " + str(labels[num]), style={'font-weight': 'bold'})
        ])
    ]

    return True, bbox, children

if __name__ == "__main__":
    app.run(debug=True)


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



In [60]:
import io
import base64
import pickle

import dash
from dash.exceptions import PreventUpdate
from dash import dcc, html, Dash, no_update
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import json
from chemUtils.utils import utils
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from PIL import Image
import numpy as np

concat = True
with open('/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/images.json', 'r') as f:
    images = json.load(f)
image_file_path = '/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reactants_imgs/'

filename = '/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/results/benchmark_init_random_embedding_OHE_scaling_log_batch_5_acq_EHVI.csv'

max_filename ='/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/data/figure_S2_tidy_max_peak_height.csv'

# Make dataframe of measured and unmeasured reactant pairs, providing all metadata
df_preds = pd.read_csv(filename)
reacts_df = df_preds[df_preds['batch_num'].notna()]
reacts_df['batch_num'] = reacts_df['batch_num'].astype(int)
reacts_df.rename(columns={'A': 'react1_label', 'B': 'react2_label', 'A_smiles': 'react1_smiles', 'B_smiles': 'react2_smiles'}, inplace=True)


# Add not measured reactant pairs
with open('/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reaction_components.json', 'r') as f:
    components = json.load(f)

# Sample lists of A and B components
react1 = components['A']
react2 = components['B']

# all combinations
all_combos = set((r1, r2) for r1 in react1 for r2 in react2)

# Convert df to a set of tuples
df_tuples = set(zip(reacts_df['react1_label'], reacts_df['react2_label']))

# Get the tuples you need to add
to_add = all_combos - df_tuples

# Convert these tuples to DataFrame rows and append them
to_add_df = pd.DataFrame(list(to_add), columns=['react1_label', 'react2_label'])
to_add_df['peak_height'] = np.nan

reacts_df = pd.concat([reacts_df, to_add_df], ignore_index=True)
# Add smiles to plot_df
with open('/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/smiles.json', 'r') as f:
    smiles = json.load(f)
reacts_df['react1_smiles'] = reacts_df['react1_label'].apply(lambda x: smiles[x])
reacts_df['react2_smiles'] = reacts_df['react2_label'].apply(lambda x: smiles[x])

########### GET FP LIST and tSNE coordinates #############
df_max = pd.read_csv(max_filename)
r1_test = reacts_df['react1_smiles'].values
r2_test = reacts_df['react2_smiles'].values
# Get fp list (or lists)
if concat:
    reacts_fp = utils.fp_list_from_two_smiles_list(r1_test, r2_test)
else:
    react1_fp = utils.fp_list_from_smiles_list(r1_test)
    react2_fp = utils.fp_list_from_smiles_list(r2_test)
pca = PCA(n_components=10)
# Get coordinates and plot
plt.figure(figsize=(10, 8))
crds = pca.fit_transform(reacts_fp)
crds_tsne = TSNE(n_components=2).fit_transform(crds)
reacts_df['X'] = crds_tsne[:, 0]
reacts_df['Y'] = crds_tsne[:, 1]
# Adding max peak height
selected_df_max = df_max[['A', 'B', 'peak_height']]
selected_df_max.rename(columns={'A': 'react1_label', 'B': 'react2_label'}, inplace=True)
reacts_df = reacts_df.merge(selected_df_max, on=['react1_label', 'react2_label'], how='left', suffixes=('', '_max'))
# remove max peak height for unmeasured reactant pairs
reacts_df.loc[reacts_df['peak_height'].isna(), 'peak_height_max'] = np.nan
# Compute average X and Y coordinates for each unique react1 and react2 smiles combo.
averaged_coords = reacts_df.groupby(['react1_smiles', 'react2_smiles']).agg({'X': 'mean', 'Y': 'mean'}).reset_index()
# Merge the averaged coordinates back to the original dataframe
plot_df = reacts_df.drop(columns=['X', 'Y']).merge(averaged_coords, on=['react1_smiles', 'react2_smiles'], how='left')

############### PLOTTING ######################
# Plot
plot_df['batch_num'].fillna(-1, inplace=True)
plot_df['peak_height'].fillna(-1, inplace=True)
plot_df['batch_num'] = plot_df['batch_num'].astype(str)
# Add image paths
plot_df['react1_image'] = plot_df['react1_label'].apply(lambda x: image_file_path + images[x])
plot_df['react2_image'] = plot_df['react2_label'].apply(lambda x: image_file_path + images[x])
# Color for each batch_num
color_map = {
    0.0: '#E52B50',  # Bright Red
    1.0: '#1F77B4',  # Blue
    2.0: '#FF7F0E',  # Orange
    3.0: '#2CA02C',  # Green
    4.0: '#8C564B',  # Brown
    5.0: '#9467BD',  # Purple
    6.0: '#D62728',  # Red
    7.0: '#7F7F7F',  # Middle Gray
    8.0: '#BCBD22',  # Olive Green
    9.0: '#17BECF',  # Cyan
    -1.0: '#999999'  # Gray (Unchanged as it seems to be an outlier or special value)
}
colors = [color_map[float(label)] for label in plot_df['batch_num'].values]
def add_jitter(series, scale_factor=0.03):
    """Add jitter to a pandas Series.
    
    Args:
        series (pd.Series): The original pandas Series.
        scale_factor (float): The scale of the jitter relative to the data range.
        
    Returns:
        pd.Series: A new series with jitter added.
    """
    jitter = (np.random.random(size=len(series)) - 0.5) * scale_factor * (series.max() - series.min())
    #print(series + jitter)
    return series + jitter
# Apply the jitter to the X and Y values
plot_df['X'] = add_jitter(plot_df['X'])
plot_df['Y'] = add_jitter(plot_df['Y'])
plot_df.reset_index(drop=True, inplace=True)

# Create scatter plot with plotly express
fig_express = px.scatter(plot_df, x="X", y="Y", 
                         color='batch_num', 
                         color_discrete_sequence=list(color_map.values()),
                         category_orders={"batch_num": list(plot_df["batch_num"].unique())},
                         custom_data=["react1_label", "react2_label", "batch_num", "peak_height"])
# Convert to graph_objects figure
fig = go.Figure(fig_express)
for trace in fig.data:
    print(trace.customdata)
fig.update_traces(marker={'size': 15, 'opacity': 0.6})
fig.write_html("test_scatter.html")
fig.update_traces(
hoverinfo="none",
hovertemplate=None,
)
################## DASH APP INFO ##################
app = Dash(__name__)

app.layout = html.Div(
    className="container",
    children=[
        dcc.Graph(id="graph-5", figure=fig, clear_on_unhover=True),
        dcc.Tooltip(id="graph-tooltip-5", direction='bottom'), # this is new
    ],
)

@dash.callback(
    Output("graph-tooltip-5", "show"),
    Output("graph-tooltip-5", "bbox"),
    Output("graph-tooltip-5", "children"),
    Input("graph-5", "hoverData"),
)
def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    # demo only shows the first point, but other points may also be available
    #hover_data = hoverData["points"][0]
    hover_data = hoverData
    bbox = hover_data['points'][0]["bbox"]
    react1 = hover_data["points"][0]["customdata"][0]
    react2 = hover_data["points"][0]["customdata"][1]
    #batch_num = float(hover_data["points"][0]["customdata"][2])
    peak_height = hover_data["points"][0]["customdata"][3]
    df_row = plot_df[(plot_df["react1_label"] == react1) & 
                 (plot_df["react2_label"] == react2) &  
                 (plot_df['peak_height'] == peak_height)].iloc[0]
    # react1 = hover_data["customdata"][0]  # Assuming that customdata holds react1_label and react2_label
    # react2 = hover_data["customdata"][1]
    # batch_num = hover_data["customdata"][2]
    # peak_height = hover_data["customdata"][3]
    # df_row = plot_df[(plot_df["react1_label"] == react1) & (plot_df["react2_label"] == react2) & (plot_df['batch_num'] == batch_num) & (plot_df['peak_height'] == peak_height)].iloc[0]
    # Load image with pillow
    im1 = Image.open(df_row['react1_image'])
    im2 = Image.open(df_row['react2_image'])
    
    # dump it to base64
    buffer = io.BytesIO()
    im1.save(buffer, format="png")
    encoded_image = base64.b64encode(buffer.getvalue()).decode()
    im_url1 = "data:image/png;base64, " + encoded_image
    # Reset the buffer
    buffer.seek(0)
    buffer.truncate()
    im2.save(buffer, format="png")
    encoded_image = base64.b64encode(buffer.getvalue()).decode()
    im_url2 = "data:image/png;base64, " + encoded_image
    
    children = [
        html.Img(
            src=im_url1,
            style={"width": "50px", 'display': 'block', 'margin': '0 auto'},
        ),
        html.Img(
            src=im_url2,
            style={"width": "50px", 'display': 'block', 'margin': '0 auto'},
        ),
        html.P("Reactants " + str(df_row['react1_label']) + ", " + str(df_row['react2_label']), style={'font-weight': 'bold'}),
        html.P("Peak Height: " + str(df_row['peak_height']) + " / " + str(df_row['peak_height_max']), style={'font-weight': 'bold'}),
        html.P("Solvent: " + str(df_row['solvent']), style={'font-weight': 'bold'}),
        html.P("Base: " + str(df_row['base']), style={'font-weight': 'bold'}),
        html.P("Temp: " + str(df_row['T']), style={'font-weight': 'bold'})
    ]

    return True, bbox, children
if __name__ == "__main__":
    app.run(debug=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



[['R1-A7' 'R1-B6' '9.0' 686.4]
 ['R1-A3' 'R1-B8' '9.0' 17.2]
 ['R1-A3' 'R1-B6' '9.0' 464.0]
 ['R1-A3' 'R1-B6' '9.0' 203.9]
 ['R1-A1' 'R1-B6' '9.0' 1176.3]]
[['R1-A8' 'R1-B6' '0.0' 21.7]
 ['R1-A6' 'R1-B6' '0.0' 57.0]
 ['R1-A5' 'R1-B4' '0.0' 28.7]
 ['R1-A5' 'R1-B12 (D)' '0.0' 49.4]
 ['R1-A4' 'R1-B2' '0.0' 1480.0]]
[['R1-A8' 'R1-B11' '8.0' 18.7]
 ['R1-A7' 'R1-B6' '8.0' 5175.7]
 ['R1-A6' 'R1-B6' '8.0' 189.9]
 ['R1-A3' 'R1-B11' '8.0' 73.7]
 ['R1-A2' 'R1-B6' '8.0' 120.0]]
[['R1-A8' 'R1-B10' '4.0' 18.7]
 ['R1-A6' 'R1-B10' '4.0' 25.2]
 ['R1-A5' 'R1-B10' '4.0' 344.6]
 ['R1-A4' 'R1-B4' '4.0' 4408.3]
 ['R1-A4' 'R1-B10' '4.0' 7262.8]]
[['R1-A7' 'R1-B2' '1.0' 579.6]
 ['R1-A4' 'R1-B9' '1.0' 3114.8]
 ['R1-A4' 'R1-B2' '1.0' 1736.2]
 ['R1-A4' 'R1-B2' '1.0' 137.5]
 ['R1-A4' 'R1-B2' '1.0' 81.6]]
[['R1-A5' 'R1-B6' '6.0' 43.0]
 ['R1-A4' 'R1-B6' '6.0' 6621.2]
 ['R1-A4' 'R1-B6' '6.0' 8730.5]
 ['R1-A4' 'R1-B6' '6.0' 98.7]
 ['R1-A2' 'R1-B6' '6.0' 6.8]]
[['R1-A4' 'R1-B9' '3.0' 2467.0]
 ['R1-A4' 'R1-B5' '3.0' 56

<Figure size 1000x800 with 0 Axes>

In [37]:
plot_df

Unnamed: 0,react1_label,react1_smiles,react2_label,react2_smiles,solvent,base,T,time,round_num,peak_height,...,peak_height_decile,new_index,peak_height_log,priority,batch_num,peak_height_max,X,Y,react1_image,react2_image
0,R1-A5,NCc1ccccc1,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,"1,4-dioxane",,150.0,15.0,1.0,43.0,...,D5,1484.0,3.761200116,1.0,6.0,141.2,10.332529,7.009408,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
1,R1-A4,CN1CCNCC1,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,NMP,,150.0,15.0,1.0,8730.5,...,D10,715.0,9.074577921,1.0,6.0,10005.9,-0.092256,10.360464,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
2,R1-A4,CN1CCNCC1,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,"1,4-dioxane",,150.0,15.0,1.0,6621.2,...,D10,1483.0,8.798031901,1.0,6.0,10005.9,0.037472,10.117134,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
3,R1-A4,CN1CCNCC1,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,"1,4-dioxane",NaOtBu,150.0,15.0,1.0,98.7,...,D7,1291.0,4.592084946,1.0,6.0,10005.9,-0.187829,10.518594,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
4,R1-A2,Cn1ccnc1,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,"1,4-dioxane",,20.0,0.0,1.0,6.8,...,D1,1385.0,1.916922612,1.0,6.0,1010.0,-5.063539,7.037925,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,R1-A1,N[C@@H]1CCCC[C@H]1N,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,,,,,,,...,,,,,-1.0,,2.268936,11.963732,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
104,R1-A3,C1CCNCC1,R1-B10,N#Cc1ccc(Br)cc1,,,,,,,...,,,,,-1.0,,-1.032567,11.981302,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
105,R1-A5,NCc1ccccc1,R1-B5,O=[N+]([O-])c1ccc(Cl)cc1,,,,,,,...,,,,,-1.0,,9.172738,6.928660,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
106,R1-A5,NCc1ccccc1,R1-B8,Fc1ccc(C(F)(F)F)cc1,,,,,,,...,,,,,-1.0,,9.118144,7.957497,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...


In [52]:
html.Div([
            html.Img(
                src='/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reactants_imgs/R1-A5.png',
                style={"width": "50px", 'display': 'block', 'margin': '0 auto'},
            ),
            html.P("Reactant", style={'font-weight': 'bold'})
        ])

Div([Img(src='/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reactants_imgs/R1-A5.png', style={'width': '50px', 'display': 'block', 'margin': '0 auto'}), P(children='Reactant', style={'font-weight': 'bold'})])

In [53]:
from IPython.display import Image, display, HTML
from PIL import Image
# Path to the image
image_path = "/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reactants_imgs/R1-A5.png"

# Load image with pillow
im = Image.open(image_path)

# dump it to base64
buffer = io.BytesIO()
im.save(buffer, format="png")
encoded_image = base64.b64encode(buffer.getvalue()).decode()
im_url = "data:image/png;base64, " + encoded_image

# Display the image in the Jupyter notebook
#display(Image(filename=image_path))

# If you want to display additional HTML content along with the image:
html_content = """
<div>
    <img src="{}" style="width: 50px; display: block; margin: 0 auto">
    <p style="font-weight: bold">Reactant</p>
</div>
""".format(im_url)
display(HTML(html_content))

In [3]:
to_add = df_preds[df_preds['batch_num'].isna()].drop_duplicates(subset=['A', 'B'], keep='last')
to_add.rename(columns={'A': 'react1_label', 'B': 'react2_label'}, inplace=True)
to_add

Unnamed: 0,react1_label,A_smiles,react2_label,B_smiles,solvent,base,T,time,round_num,peak_height,peak_height_scaled,peak_height_quantile_scaled,quantile_category,peak_height_decile,new_index,peak_height_log,priority,batch_num
20,R1-A8,c1ccc2[nH]cnc2c1,R1-B9,FC(F)(F)c1ccc(Cl)cc1,"1,4-dioxane",DIPEA,20,0,1,28.9,0.001740,0.260247,Q2,D3,839,PENDING,0.0,
36,R1-A8,c1ccc2[nH]cnc2c1,R1-B8,Fc1ccc(C(F)(F)F)cc1,"1,4-dioxane",DIPEA,20,0,1,29.6,0.001786,0.272609,Q2,D3,831,PENDING,0.0,
52,R1-A8,c1ccc2[nH]cnc2c1,R1-B7,CCc1ncnc(Cl)c1F,"1,4-dioxane",DIPEA,20,0,1,72.4,0.004576,0.575146,Q3,D6,823,PENDING,0.0,
67,R1-A8,c1ccc2[nH]cnc2c1,R1-B6,O=[N+]([O-])c1ccc(Br)cc1,"1,4-dioxane",DIPEA,20,0,1,34.4,0.002099,0.333767,Q2,D4,815,PENDING,0.0,
83,R1-A8,c1ccc2[nH]cnc2c1,R1-B5,O=[N+]([O-])c1ccc(Cl)cc1,"1,4-dioxane",DIPEA,20,0,1,41.9,0.002588,0.412492,Q2,D5,807,PENDING,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1441,R1-A1,N[C@@H]1CCCC[C@H]1N,R1-B2,CC(=O)c1ccc(Cl)cc1,"1,4-dioxane",DIPEA,20,0,1,43.4,0.002686,0.426805,Q2,D5,776,PENDING,0.0,
1457,R1-A1,N[C@@H]1CCCC[C@H]1N,R1-B12 (D),O=[N+]([O-])c1cnc(Cl)nc1Cl,"1,4-dioxane",DIPEA,20,0,1,1511.9,0.098411,0.906962,Q4,D10,856,PENDING,0.0,
1473,R1-A1,N[C@@H]1CCCC[C@H]1N,R1-B11,CCN(CC)c1ccc(Br)cc1,"1,4-dioxane",DIPEA,20,0,1,37.4,0.002295,0.366298,Q2,D4,848,PENDING,0.0,
1489,R1-A1,N[C@@H]1CCCC[C@H]1N,R1-B10,N#Cc1ccc(Br)cc1,"1,4-dioxane",DIPEA,20,0,1,32.1,0.001949,0.311646,Q2,D4,840,PENDING,0.0,


In [11]:
# All reactant combinations
# read in json
with open('/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reaction_components.json', 'r') as f:
    components = json.load(f)

In [12]:
import pandas as pd
import numpy as np

# Sample lists of A and B components
react1 = components['A']
react2 = components['B']

# all combinations
all_combos = set((r1, r2) for r1 in react1 for r2 in react2)

# Convert df to a set of tuples
df_tuples = set(zip(plot_df['react1_label'], plot_df['react2_label']))

# Get the tuples you need to add
to_add = all_combos - df_tuples

# Convert these tuples to DataFrame rows and append them
to_add_df = pd.DataFrame(list(to_add), columns=['react1_label', 'react2_label'])
to_add_df['peak_height'] = np.nan

df = pd.concat([plot_df, to_add_df], ignore_index=True)

df

Unnamed: 0,batch_num,react1_smiles,react2_smiles,react1_label,react2_label,peak_height,solvent,base,T,peak_height_max,X,Y,react1_image,react2_image
0,6.0,NCc1ccccc1,O=[N+]([O-])c1ccc(Br)cc1,R1-A5,R1-B6,43.0,"1,4-dioxane",,150.0,141.2,-3.466696,-134.516404,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
1,6.0,CN1CCNCC1,O=[N+]([O-])c1ccc(Br)cc1,R1-A4,R1-B6,8730.5,NMP,,150.0,10005.9,-2.984995,3.208017,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
2,6.0,CN1CCNCC1,O=[N+]([O-])c1ccc(Br)cc1,R1-A4,R1-B6,6621.2,"1,4-dioxane",,150.0,10005.9,-2.691292,5.057504,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
3,6.0,CN1CCNCC1,O=[N+]([O-])c1ccc(Br)cc1,R1-A4,R1-B6,98.7,"1,4-dioxane",NaOtBu,150.0,10005.9,3.175570,5.727910,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
4,6.0,Cn1ccnc1,O=[N+]([O-])c1ccc(Br)cc1,R1-A2,R1-B6,6.8,"1,4-dioxane",,20.0,1010.0,-95.201087,107.129813,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,,,,R1-A1,R1-B6,,,,,,,,,
104,,,,R1-A3,R1-B10,,,,,,,,,
105,,,,R1-A5,R1-B5,,,,,,,,,
106,,,,R1-A5,R1-B8,,,,,,,,,


In [15]:
# Add unique data points now if they don't have a batch_num
with open('/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/reaction_components.json', 'r') as f:
    components = json.load(f)

# Sample lists of A and B components
react1 = components['A']
react2 = components['B']

# all combinations
all_combos = set((r1, r2) for r1 in react1 for r2 in react2)

# Convert df to a set of tuples
df_tuples = set(zip(plot_df['react1_label'], plot_df['react2_label']))

# Get the tuples you need to add
to_add = all_combos - df_tuples

# Convert these tuples to DataFrame rows and append them
to_add_df = pd.DataFrame(list(to_add), columns=['react1_label', 'react2_label'])
to_add_df['peak_height'] = np.nan

df = pd.concat([plot_df, to_add_df], ignore_index=True)

# Add smiles to plot_df
with open('/Users/kate_fieseler/PycharmProjects/edboplus_scope/test/thompson_SNAr/figure_S2/smiles.json', 'r') as f:
    smiles = json.load(f)

df['react1_smiles'] = df['react1_label'].apply(lambda x: smiles[x])
df['react2_smiles'] = df['react2_label'].apply(lambda x: smiles[x])

In [16]:
df

Unnamed: 0,batch_num,react1_smiles,react2_smiles,react1_label,react2_label,peak_height,solvent,base,T,peak_height_max,X,Y,react1_image,react2_image
0,6.0,NCc1ccccc1,O=[N+]([O-])c1ccc(Br)cc1,R1-A5,R1-B6,43.0,"1,4-dioxane",,150.0,141.2,-3.466696,-134.516404,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
1,6.0,CN1CCNCC1,O=[N+]([O-])c1ccc(Br)cc1,R1-A4,R1-B6,8730.5,NMP,,150.0,10005.9,-2.984995,3.208017,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
2,6.0,CN1CCNCC1,O=[N+]([O-])c1ccc(Br)cc1,R1-A4,R1-B6,6621.2,"1,4-dioxane",,150.0,10005.9,-2.691292,5.057504,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
3,6.0,CN1CCNCC1,O=[N+]([O-])c1ccc(Br)cc1,R1-A4,R1-B6,98.7,"1,4-dioxane",NaOtBu,150.0,10005.9,3.175570,5.727910,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
4,6.0,Cn1ccnc1,O=[N+]([O-])c1ccc(Br)cc1,R1-A2,R1-B6,6.8,"1,4-dioxane",,20.0,1010.0,-95.201087,107.129813,/Users/kate_fieseler/PycharmProjects/edboplus_...,/Users/kate_fieseler/PycharmProjects/edboplus_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,,N[C@@H]1CCCC[C@H]1N,O=[N+]([O-])c1ccc(Br)cc1,R1-A1,R1-B6,,,,,,,,,
104,,C1CCNCC1,N#Cc1ccc(Br)cc1,R1-A3,R1-B10,,,,,,,,,
105,,NCc1ccccc1,O=[N+]([O-])c1ccc(Cl)cc1,R1-A5,R1-B5,,,,,,,,,
106,,NCc1ccccc1,Fc1ccc(C(F)(F)F)cc1,R1-A5,R1-B8,,,,,,,,,


In [None]:
# # Using a single trace and a colorscale for batch_num
# fig = go.Figure(data=[go.Scatter(x=plot_df["X"], 
#                                  y=plot_df["Y"], 
#                                  marker=dict(color=plot_df['batch_num'].astype(float),
#                                              colorscale=list(color_map.values()), 
#                                              size=15, 
#                                              showscale=True, 
#                                              colorbar=dict(title="Batch Num")),  # Add a title to the colorbar
#                                  mode="markers", 
#                                  customdata=plot_df[['react1_image', 'react2_image', 'react1_label', 'react2_label', 'peak_height']])])
# 
# fig.update_traces(marker={'size': 15})



In [42]:
plot_df[['react1_label', 'react2_label', 'batch_num', 'peak_height']]

Unnamed: 0,react1_label,react2_label,batch_num,peak_height
0,R1-A5,R1-B6,6.0,43.0
1,R1-A4,R1-B6,6.0,8730.5
2,R1-A4,R1-B6,6.0,6621.2
3,R1-A4,R1-B6,6.0,98.7
4,R1-A2,R1-B6,6.0,6.8
...,...,...,...,...
103,R1-A1,R1-B6,-1.0,
104,R1-A3,R1-B10,-1.0,
105,R1-A5,R1-B5,-1.0,
106,R1-A5,R1-B8,-1.0,


In [55]:
color_map

{0.0: '#E52B50',
 1.0: '#9F2B68',
 2.0: '#3B7A57',
 3.0: '#3DDC84',
 4.0: '#FFBF00',
 5.0: '#915C83',
 6.0: '#008000',
 7.0: '#7FFFD4',
 8.0: '#E9D66B',
 9.0: '#007FFF',
 -1.0: '#999999'}