In [1]:
import pandas as pd
import numpy as np
from rdkit.Chem import rdChemReactions
from rdkit.Chem import AllChem, Draw
import os
from IPython.display import display
from PIL import Image
import io
from openpyxl import Workbook
from openpyxl.drawing.image import Image

In [27]:
def view_reactionScheme(data, NumReaction_to_view, duplicate):
    '''pick and show reaction scheme '''

    if duplicate == 'drop duplicate':
        Reaction_data = data.drop_duplicates(subset=['Reaction ID'], keep='first')
        Reaction_data = Reaction_data.reset_index(drop=True)
    elif duplicate == 'keep duplicate': 
        Reaction_data = data.reset_index(drop=True)
    else:
        raise ValueError('Invalid input')
        
    if NumReaction_to_view > Reaction_data.shape[0]:
        raise ValueError('Number of reactions to view is more than the total number of reactions in the dataset')
    else:
        # Draw
        random_start = np.random.randint(0, (Reaction_data.shape[0] - NumReaction_to_view + 1) , size=1)[0]
        for idx, row in Reaction_data[random_start: random_start + NumReaction_to_view].iterrows():
            reaction_smarts = row['Reaction']
            rxn = rdChemReactions.ReactionFromSmarts(reaction_smarts, useSmiles=True)
            if rxn:
                print('Reaction ID:', Reaction_data.iloc[idx]['Reaction ID'])
                img = Draw.ReactionToImage(rxn)
                
                # display image in notebook
                with io.BytesIO() as output:
                    img.save(output, format="PNG")
                    display(Image.open(output))
                # Return conditions
                print(row[['Largest Reactant MW', 'Reagent', 'Solvent (Reaction Details)', 'Time (Reaction Details) [h]', 'Temperature (Reaction Details) [C]', 'Yield']])
                print(f'Reaction Link: {row["Links to Reaxys"]}\n')
                print(f'Link to buy: {row["Links to buy"]}\n')

def save_reaction_images(df, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for i, row in df.iterrows():
        rxn_smiles = row['Reaction']
        if rxn_smiles:
            img = Draw.ReactionToImage(
                    AllChem.ReactionFromSmarts(rxn_smiles, useSmiles=True),
                    highlightByReactant=True)
            image_path = os.path.join(output_dir, f"reaction_{i}.png")
            img.save(image_path)

def insert_images_and_data_to_excel(df, image_dir, excel_path):
    wb = Workbook()
    ws = wb.active
    
    # Add headers
    headers = [
        'Reaction','Reaction ID', 'Reactant CAS', 'Avaliable at MIT', 'Procedure',  'Links to Reaxys'
    ]
    ws.append(headers)
    
    for i, row in df.iterrows():
        img_path = os.path.join(image_dir, f"reaction_{i}.png")
        row_number = i + 2  # Starting from the second row after the header

        if os.path.exists(img_path):
            img = Image(img_path)
            img.width = 350
            img.height = 120
            ws.row_dimensions[row_number].height = img.height * 0.75
            ws.column_dimensions['A'].width = img.width * 0.15
            ws.add_image(img, f"A{row_number}")
            
        ws[f"B{row_number}"] = row['Reaction ID']
        ws[f"C{row_number}"] = row['Reactant CAS']
        ws[f"D{row_number}"] = row['Available at MIT']
        ws[f"E{row_number}"] = row['Procedure']
        ws[f"F{row_number}"] = row['Yield (number)']
        ws[f"G{row_number}"] = row['Links to Reaxys']
    wb.save(excel_path)


In [28]:
data_in_Ring = pd.read_excel('/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/ketone_in_ring(no_manualcheck).xlsx')
data_in_Chain = pd.read_excel('/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/ketone_in_chain(no_manualcheck).xlsx')
data = pd.concat([data_in_Ring,data_in_Chain], ignore_index= True)
data = data[['Reaction ID','Reaction','Links to Reaxys', 'Yield (number)']]

commer_available_ring = pd.read_excel('/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/experiment_to_test/commer_available_ketone_in_ring.xlsx')
commer_available_chain = pd.read_excel('/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/experiment_to_test/commer_available_ketone_in_chain.xlsx')
concat_commer = pd.concat([commer_available_ring,commer_available_chain], ignore_index= True)
merged_available_df = data.merge(concat_commer, on='Reaction ID', how='left',indicator= True)
merged_available_df = merged_available_df[merged_available_df['_merge'] == 'both'].reset_index()
merged_available_df

Unnamed: 0,index,Reaction ID,Reaction,Links to Reaxys,Yield (number),Reactant CAS,Available at MIT,Procedure,_merge
0,106,116924,CN1C(=O)C(=O)C2=CC=CC=C12>>CN1C(=O)C(O)C2=CC=C...,https://www.reaxys.com/reaxys/secured/hopinto....,75.0,2058-74-4,buchwald,"N-Methylisatin (2.5 g, 15.5 mmol) was suspende...",both
1,107,548023,O=C1NC2=CC=CC=C2C1=O>>OC1C(=O)NC2=CC=CC=C12,https://www.reaxys.com/reaxys/secured/hopinto....,50.0,91-56-5,buchwald,"Prepared from isatin (4.56 g, 31 mmol) and NaB...",both
2,371,1818310,C[C@]12CCC(=O)C=C1CCCC2=O>>C[C@]12CCC(=O)C=C1C...,https://www.reaxys.com/reaxys/secured/hopinto....,97.0,33878-99-8,wendlandt,To a solution of Wieland-Miescher ketone (1.0 ...,both
3,1074,997369,CC(C)=CCC\C(C)=C\CCC(C)=O>>CC(O)CC\C=C(/C)CCC=...,https://www.reaxys.com/reaxys/secured/hopinto....,98.0,3796-70-1,buchwald,To a clear solution of geranyl acetone 2 (1.94...,both
4,1080,759034,CC(=O)C1=CC=CC(Cl)=C1>>CC(O)C1=CC(Cl)=CC=C1,https://www.reaxys.com/reaxys/secured/hopinto....,98.0,99-02-5,buchwald,"In a scintillation vial, 100 mg of the acetoph...",both
5,1103,1563771,CC(=O)C1=CC=CO1>>CC(O)C1=CC=CO1,https://www.reaxys.com/reaxys/secured/hopinto....,90.0,1192-62-7,buchwald,"Acetylfuran (20.0 g, 182 mmol, 1.0 equiv) was ...",both
6,1394,578384,CC(=O)C1=CC=C(F)C=C1>>CC(O)C1=CC=C(F)C=C1,https://www.reaxys.com/reaxys/secured/hopinto....,64.0,403-42-9,buchwald,"In a scintillation vial, 100 mg of the acetoph...",both


In [29]:
# Save reaction images and get their image objects
img_dir = '/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/experiment_to_test/reaction_images/'

save_reaction_images(merged_available_df, img_dir)

insert_images_and_data_to_excel(merged_available_df, img_dir,
 '/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/experiment_to_test/commercial_available_positive.xlsx')