# Suzuki-Miyaura Data set

Originally published in

Perera et al. "A platform for automated nanomole-scale reaction screening and micromole-scale synthesis in flow." Science 359.6374 (2018): 429-434.


In [26]:
from rdkit import Chem
from rdkit.Chem import rdChemReactions

In [27]:
import pandas as pd
df = pd.read_excel('Suzuki-Miyaura.xlsx')

In [28]:
df.head().transpose()

Unnamed: 0,0,1,2,3,4
Reaction_No,1,2,3,4,5
Reactant_1_Name,6-chloroquinoline,6-chloroquinoline,6-chloroquinoline,6-chloroquinoline,6-chloroquinoline
Reactant_1_Short_Hand,"1a, 6-Cl-Q","1a, 6-Cl-Q","1a, 6-Cl-Q","1a, 6-Cl-Q","1a, 6-Cl-Q"
Reactant_1_eq,1,1,1,1,1
Reactant_1_mmol,0.0004,0.0004,0.0004,0.0004,0.0004
Reactant_2_Name,"2a, Boronic Acid","2a, Boronic Acid","2a, Boronic Acid","2a, Boronic Acid","2a, Boronic Acid"
Reactant_2_eq,1,1,1,1,1
Catalyst_1_Short_Hand,Pd(OAc)2,Pd(OAc)2,Pd(OAc)2,Pd(OAc)2,Pd(OAc)2
Catalyst_1_eq,0.0625,0.0625,0.0625,0.0625,0.0625
Ligand_Short_Hand,P(tBu)3,P(Ph)3,AmPhos,P(Cy)3,P(o-Tol)3


# Get Reaction Smiles

In [29]:
reactant_1_smiles = {
    '6-chloroquinoline': 'C1=C(Cl)C=CC2=NC=CC=C12.CCC1=CC(=CC=C1)CC', 
    '6-Bromoquinoline': 'C1=C(Br)C=CC2=NC=CC=C12.CCC1=CC(=CC=C1)CC', 
    '6-triflatequinoline': 'C1C2C(=NC=CC=2)C=CC=1OS(C(F)(F)F)(=O)=O.CCC1=CC(=CC=C1)CC',
    '6-Iodoquinoline': 'C1=C(I)C=CC2=NC=CC=C12.CCC1=CC(=CC=C1)CC', 
    '6-quinoline-boronic acid hydrochloride': 'C1C(B(O)O)=CC=C2N=CC=CC=12.Cl.O',
    'Potassium quinoline-6-trifluoroborate': '[B-](C1=CC2=C(C=C1)N=CC=C2)(F)(F)F.[K+].O',
    '6-Quinolineboronic acid pinacol ester': 'B1(OC(C(O1)(C)C)(C)C)C2=CC3=C(C=C2)N=CC=C3.O'
}

reactant_2_smiles = {
    '2a, Boronic Acid': 'CC1=CC=C2C(C=NN2C3OCCCC3)=C1B(O)O', 
    '2b, Boronic Ester': 'CC1=CC=C2C(C=NN2C3OCCCC3)=C1B4OC(C)(C)C(C)(C)O4', 
    '2c, Trifluoroborate': 'CC1=CC=C2C(C=NN2C3OCCCC3)=C1[B-](F)(F)F.[K+]',
    '2d, Bromide': 'CC1=CC=C2C(C=NN2C3OCCCC3)=C1Br' 
}

catalyst_smiles = {
    'Pd(OAc)2': 'CC(=O)O~CC(=O)O~[Pd]'
}

ligand_smiles = {
    'P(tBu)3': 'CC(C)(C)P(C(C)(C)C)C(C)(C)C', 
    'P(Ph)3 ': 'c3c(P(c1ccccc1)c2ccccc2)cccc3', 
    'AmPhos': 'CC(C)(C)P(C1=CC=C(C=C1)N(C)C)C(C)(C)C', 
    'P(Cy)3': 'C1(CCCCC1)P(C2CCCCC2)C3CCCCC3', 
    'P(o-Tol)3': 'CC1=CC=CC=C1P(C2=CC=CC=C2C)C3=CC=CC=C3C',
    'CataCXium A': 'CCCCP(C12CC3CC(C1)CC(C3)C2)C45CC6CC(C4)CC(C6)C5', 
    'SPhos': 'COc1cccc(c1c2ccccc2P(C3CCCCC3)C4CCCCC4)OC', 
    'dtbpf': 'CC(C)(C)P(C1=CC=C[CH]1)C(C)(C)C.CC(C)(C)P(C1=CC=C[CH]1)C(C)(C)C.[Fe]', 
    'XPhos': 'P(c2ccccc2c1c(cc(cc1C(C)C)C(C)C)C(C)C)(C3CCCCC3)C4CCCCC4', 
    'dppf': 'C1=CC=C(C=C1)P([C-]2C=CC=C2)C3=CC=CC=C3.C1=CC=C(C=C1)P([C-]2C=CC=C2)C3=CC=CC=C3.[Fe+2]', 
    'Xantphos': 'O6c1c(cccc1P(c2ccccc2)c3ccccc3)C(c7cccc(P(c4ccccc4)c5ccccc5)c67)(C)C',
    'None': ''
}
reagent_1_smiles = {
    'NaOH': '[OH-].[Na+]', 
    'NaHCO3': '[Na+].OC([O-])=O', 
    'CsF': '[F-].[Cs+]', 
    'K3PO4': '[K+].[K+].[K+].[O-]P([O-])([O-])=O', 
    'KOH': '[K+].[OH-]', 
    'LiOtBu': '[Li+].[O-]C(C)(C)C', 
    'Et3N': 'CCN(CC)CC', 
    'None': ''
}

solvent_1_smiles = {
    'MeCN': 'CC#N.O', 
    'THF': 'C1CCOC1.O', 
    'DMF': 'CN(C)C=O.O', 
    'MeOH': 'CO.O', 
    'MeOH/H2O_V2 9:1': 'CO.O', 
    'THF_V2': 'C1CCOC1.O'
}

In [30]:
def make_reaction_smiles(row):
    precursors = f" {reactant_1_smiles[row['Reactant_1_Name']]}.{reactant_2_smiles[row['Reactant_2_Name']]}.{catalyst_smiles[row['Catalyst_1_Short_Hand']]}.{ligand_smiles[row['Ligand_Short_Hand']]}.{reagent_1_smiles[row['Reagent_1_Short_Hand']]}.{solvent_1_smiles[row['Solvent_1_Short_Hand']]} "
    product = 'C1=C(C2=C(C)C=CC3N(C4OCCCC4)N=CC2=3)C=CC2=NC=CC=C12'
#     print(precursors, product)
    can_precursors = Chem.MolToSmiles(Chem.MolFromSmiles(precursors.replace('...', '.').replace('..', '.').replace(' .', '').replace('. ', '').replace(' ', '')))
    can_product = Chem.MolToSmiles(Chem.MolFromSmiles(product))
    
    return f"{can_precursors}>>{can_product}"

In [31]:
df['rxn']= [make_reaction_smiles(row) for i, row in df.iterrows()]

In [32]:
df['rxn'][0]

'CC#N.CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C(C)(C)C)C(C)(C)C.CCc1cccc(CC)c1.Cc1ccc2c(cnn2C2CCCCO2)c1B(O)O.Clc1ccc2ncccc2c1.O.[Na+].[OH-]>>Cc1ccc2c(cnn2C2CCCCO2)c1-c1ccc2ncccc2c1'

In [33]:
df

Unnamed: 0,Reaction_No,Reactant_1_Name,Reactant_1_Short_Hand,Reactant_1_eq,Reactant_1_mmol,Reactant_2_Name,Reactant_2_eq,Catalyst_1_Short_Hand,Catalyst_1_eq,Ligand_Short_Hand,Ligand_eq,Reagent_1_Short_Hand,Reagent_1_eq,Solvent_1_Short_Hand,Product_Yield_PCT_Area_UV,Product_Yield_Mass_Ion_Count,rxn
0,1,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(tBu)3,0.125,NaOH,2.5,MeCN,4.764109,6.262059e+03,CC#N.CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C(C)(C)C)C...
1,2,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(Ph)3,0.125,NaOH,2.5,MeCN,4.120962,1.324557e+04,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...
2,3,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,AmPhos,0.125,NaOH,2.5,MeCN,2.583837,3.009166e+03,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.CN(C)...
3,4,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(Cy)3,0.125,NaOH,2.5,MeCN,4.443171,3.086070e+04,C1CCC(P(C2CCCCC2)C2CCCCC2)CC1.CC#N.CC(=O)O~CC(...
4,5,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(o-Tol)3,0.125,NaOH,2.5,MeCN,1.949874,2.486306e+03,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5755,5756,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,dtbpf,0.125,K3PO4,2.5,DMF,47.211431,1.164924e+07,CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C1=CC=C[CH]1)C(...
5756,5757,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,XPhos,0.125,K3PO4,2.5,DMF,0.000000,1.473563e+07,CC(=O)O~CC(=O)O~[Pd].CC(C)c1cc(C(C)C)c(-c2cccc...
5757,5758,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,dppf,0.125,K3PO4,2.5,DMF,31.443681,4.665383e+06,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...
5758,5759,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,Xantphos,0.125,K3PO4,2.5,DMF,0.000000,1.335187e+06,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...


# Get Mapped Reaction Smiles

In [34]:
from indigo import *
indigo = Indigo()

In [35]:
def gen_rdkit2d_features(sm):
    generator = rdNormalizedDescriptors.RDKit2DNormalized()
    try:
        features = generator.process(sm)[1:]
    except TypeError as e:
        return [0.] * 200
    return features

In [36]:
count = 0
def complete_atom_mapping(r):
    
    global count
    if pd.isnull(r):
        return None
    str_r, str_p = r.split('>>')
    mr = Chem.MolFromSmiles(str_r)
    mp = Chem.MolFromSmiles(str_p)
    
    for m in [mr, mp]:
        unique_indices = [0]
        for a in m.GetAtoms():
            if a.GetAtomMapNum() in unique_indices:
                a.SetAtomMapNum(0)
            else:
                unique_indices.append(a.GetAtomMapNum())

    unmapped_symbols_r = [a  for a in mr.GetAtoms() if a.GetAtomMapNum() == 0]
    unmapped_symbols_p = [a  for a in mp.GetAtoms() if a.GetAtomMapNum() == 0]
    #maybe add some kind of substucture mapping
    str_to_add_r, str_to_add_p = '', ''
    max_indx = max(max([a.GetAtomMapNum() for a in mr.GetAtoms()]), max([a.GetAtomMapNum() for a in mp.GetAtoms()]))
    for a_r in unmapped_symbols_r:
        was_mapped = False
        s_r = a_r.GetSymbol()
        a_r.SetAtomMapNum(max_indx + 1)
        for i, a_p in enumerate(unmapped_symbols_p):
            if s_r == a_p.GetSymbol():
                a_p.SetAtomMapNum(max_indx + 1)
                unmapped_symbols_p.pop(i)
                was_mapped = True
                break
        
        if not was_mapped:
            str_to_add_p += f'.[{a_r.GetSymbol()}:{max_indx + 1}]'
        max_indx += 1
    
    for a_p in unmapped_symbols_p:
        a_p.SetAtomMapNum(max_indx + 1)
        str_to_add_r += f'.[{a_p.GetSymbol()}:{max_indx + 1}]'
        max_indx += 1
    
    str_r = Chem.MolToSmiles(mr) + str_to_add_r
    str_p = Chem.MolToSmiles(mp) + str_to_add_p
    
    if (not Chem.MolFromSmiles(str_r)) or (not Chem.MolFromSmiles(str_p)):
        return None
    
    if Chem.MolFromSmiles(str_r).GetNumAtoms() !=  Chem.MolFromSmiles(str_p).GetNumAtoms():
        count += 1
#         print('>>'.join([str_r, str_p]))
#         print(Chem.MolFromSmiles(str_r).GetNumAtoms(), Chem.MolFromSmiles(str_p).GetNumAtoms())
#         print()
#         return None

    return '>>'.join([str_r, str_p])

In [37]:
df['mapped_rxn'] = df['rxn'].apply(lambda rxn:complete_atom_mapping(rxn))

# Get Labeled Dataset

In [39]:
def get_changed_bonds(rxn_smi):
    reactants = Chem.MolFromSmiles(rxn_smi.split('>')[0])
    products  = Chem.MolFromSmiles(rxn_smi.split('>')[2])

    conserved_maps = [a.GetProp('molAtomMapNumber') for a in products.GetAtoms() if a.HasProp('molAtomMapNumber')]
    bond_changes = set() # keep track of bond changes

    # Look at changed bonds
    bonds_prev = {}
    for bond in reactants.GetBonds():
        nums = sorted(
            [bond.GetBeginAtom().GetProp('molAtomMapNumber'),
             bond.GetEndAtom().GetProp('molAtomMapNumber')])
        if (nums[0] not in conserved_maps) and (nums[1] not in conserved_maps): continue
        bonds_prev['{}~{}'.format(nums[0], nums[1])] = bond.GetBondTypeAsDouble()
    bonds_new = {}
    for bond in products.GetBonds():
        nums = sorted(
            [bond.GetBeginAtom().GetProp('molAtomMapNumber'),
             bond.GetEndAtom().GetProp('molAtomMapNumber')])
        bonds_new['{}~{}'.format(nums[0], nums[1])] = bond.GetBondTypeAsDouble()


    for bond in bonds_prev:
        if bond not in bonds_new:
            bond_changes.add((bond.split('~')[0], bond.split('~')[1], 0.0)) # lost bond
        else:
            if bonds_prev[bond] != bonds_new[bond]:
                bond_changes.add((bond.split('~')[0], bond.split('~')[1], bonds_new[bond])) # changed bond
    for bond in bonds_new:
        if bond not in bonds_prev:
            bond_changes.add((bond.split('~')[0], bond.split('~')[1], bonds_new[bond]))  # new bond

    return bond_changes

In [40]:
df['edit'] = df['mapped_rxn'].apply(lambda map_rxn:get_changed_bonds(map_rxn))

In [41]:
df

Unnamed: 0,Reaction_No,Reactant_1_Name,Reactant_1_Short_Hand,Reactant_1_eq,Reactant_1_mmol,Reactant_2_Name,Reactant_2_eq,Catalyst_1_Short_Hand,Catalyst_1_eq,Ligand_Short_Hand,Ligand_eq,Reagent_1_Short_Hand,Reagent_1_eq,Solvent_1_Short_Hand,Product_Yield_PCT_Area_UV,Product_Yield_Mass_Ion_Count,rxn,mapped_rxn,edit
0,1,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(tBu)3,0.125,NaOH,2.5,MeCN,4.764109,6.262059e+03,CC#N.CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C(C)(C)C)C...,[CH3:13][C:14]([CH3:15])([CH3:16])[P:17]([C:18...,"{(5, 7, 0.0), (14, 6, 1.0), (25, 26, 1.5), (24..."
1,2,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(Ph)3,0.125,NaOH,2.5,MeCN,4.120962,1.324557e+04,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(5, 7, 0.0), (14, 6, 1.0), (19, 22, 0.0), (59..."
2,3,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,AmPhos,0.125,NaOH,2.5,MeCN,2.583837,3.009166e+03,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.CN(C)...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(57, 59, 0.0), (5, 7, 0.0), (14, 6, 1.0), (23..."
3,4,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(Cy)3,0.125,NaOH,2.5,MeCN,4.443171,3.086070e+04,C1CCC(P(C2CCCCC2)C2CCCCC2)CC1.CC#N.CC(=O)O~CC(...,[CH2:1]1[CH2:2][CH2:3][CH:4]([P:5]([CH:6]2[CH2...,"{(14, 2, 1.5), (4, 5, 0.0), (3, 4, 1.5), (6, 7..."
4,5,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(o-Tol)3,0.125,NaOH,2.5,MeCN,1.949874,2.486306e+03,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(5, 7, 0.0), (14, 6, 1.0), (19, 22, 0.0), (3,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5755,5756,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,dtbpf,0.125,K3PO4,2.5,DMF,47.211431,1.164924e+07,CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C1=CC=C[CH]1)C(...,[CH3:10][C:11]([CH3:12])([CH3:13])[P:14]([C:15...,"{(18, 3, 1.0), (4, 5, 0.0), (2, 4, 0.0), (25, ..."
5756,5757,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,XPhos,0.125,K3PO4,2.5,DMF,0.000000,1.473563e+07,CC(=O)O~CC(=O)O~[Pd].CC(C)c1cc(C(C)C)c(-c2cccc...,[CH3:10][CH:11]([CH3:12])[c:13]1[cH:14][c:15](...,"{(4, 5, 0.0), (48, 59, 0.0), (52, 57, 0.0), (7..."
5757,5758,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,dppf,0.125,K3PO4,2.5,DMF,31.443681,4.665383e+06,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,"{(11, 26, 0.0), (35, 49, 0.0), (4, 5, 0.0), (9..."
5758,5759,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,Xantphos,0.125,K3PO4,2.5,DMF,0.000000,1.335187e+06,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,"{(11, 26, 0.0), (4, 5, 0.0), (18, 20, 1.0), (7..."


In [42]:
df.drop(['Reactant_1_Name','Reactant_2_Name','Reactant_1_Short_Hand','Reactant_1_eq','Reactant_1_mmol'],inplace=True, axis=1)

In [43]:
df.drop(['Reaction_No','Reactant_2_eq','Catalyst_1_Short_Hand','Catalyst_1_eq','Ligand_Short_Hand','Ligand_eq','Reagent_1_Short_Hand','Reagent_1_eq','Solvent_1_Short_Hand','Product_Yield_PCT_Area_UV','Product_Yield_Mass_Ion_Count'],inplace=True, axis=1)

In [44]:
df

Unnamed: 0,rxn,mapped_rxn,edit
0,CC#N.CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C(C)(C)C)C...,[CH3:13][C:14]([CH3:15])([CH3:16])[P:17]([C:18...,"{(5, 7, 0.0), (14, 6, 1.0), (25, 26, 1.5), (24..."
1,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(5, 7, 0.0), (14, 6, 1.0), (19, 22, 0.0), (59..."
2,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.CN(C)...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(57, 59, 0.0), (5, 7, 0.0), (14, 6, 1.0), (23..."
3,C1CCC(P(C2CCCCC2)C2CCCCC2)CC1.CC#N.CC(=O)O~CC(...,[CH2:1]1[CH2:2][CH2:3][CH:4]([P:5]([CH:6]2[CH2...,"{(14, 2, 1.5), (4, 5, 0.0), (3, 4, 1.5), (6, 7..."
4,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(5, 7, 0.0), (14, 6, 1.0), (19, 22, 0.0), (3,..."
...,...,...,...
5755,CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C1=CC=C[CH]1)C(...,[CH3:10][C:11]([CH3:12])([CH3:13])[P:14]([C:15...,"{(18, 3, 1.0), (4, 5, 0.0), (2, 4, 0.0), (25, ..."
5756,CC(=O)O~CC(=O)O~[Pd].CC(C)c1cc(C(C)C)c(-c2cccc...,[CH3:10][CH:11]([CH3:12])[c:13]1[cH:14][c:15](...,"{(4, 5, 0.0), (48, 59, 0.0), (52, 57, 0.0), (7..."
5757,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,"{(11, 26, 0.0), (35, 49, 0.0), (4, 5, 0.0), (9..."
5758,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,"{(11, 26, 0.0), (4, 5, 0.0), (18, 20, 1.0), (7..."


In [45]:
df['reactant'] = df['mapped_rxn'].apply(lambda react:react.split('>')[0])
df['product'] = df['mapped_rxn'].apply(lambda react:react.split('>')[2])

In [46]:
df = df[["reactant", "product", "rxn", "mapped_rxn", "edit"]]

In [48]:
df

Unnamed: 0,reactant,product,rxn,mapped_rxn,edit
0,[CH3:13][C:14]([CH3:15])([CH3:16])[P:17]([C:18...,[CH3:1][c:2]1[cH:4][cH:5][c:8]2[c:9]([cH:13][n...,CC#N.CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C(C)(C)C)C...,[CH3:13][C:14]([CH3:15])([CH3:16])[P:17]([C:18...,"{(5, 7, 0.0), (14, 6, 1.0), (25, 26, 1.5), (24..."
1,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,[CH3:1][c:2]1[cH:4][cH:5][c:8]2[c:9]([cH:13][n...,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(5, 7, 0.0), (14, 6, 1.0), (19, 22, 0.0), (59..."
2,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,[CH3:1][c:2]1[cH:4][cH:5][c:8]2[c:9]([cH:13][n...,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.CN(C)...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(57, 59, 0.0), (5, 7, 0.0), (14, 6, 1.0), (23..."
3,[CH2:1]1[CH2:2][CH2:3][CH:4]([P:5]([CH:6]2[CH2...,[CH3:1][c:2]1[cH:3][cH:4][c:6]2[c:7]([cH:8][n:...,C1CCC(P(C2CCCCC2)C2CCCCC2)CC1.CC#N.CC(=O)O~CC(...,[CH2:1]1[CH2:2][CH2:3][CH:4]([P:5]([CH:6]2[CH2...,"{(14, 2, 1.5), (4, 5, 0.0), (3, 4, 1.5), (6, 7..."
4,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,[CH3:1][c:2]1[cH:4][cH:5][c:8]2[c:9]([cH:13][n...,CC#N.CC(=O)O~CC(=O)O~[Pd].CCc1cccc(CC)c1.Cc1cc...,[CH3:13][CH2:14][c:15]1[cH:16][cH:17][cH:18][c...,"{(5, 7, 0.0), (14, 6, 1.0), (19, 22, 0.0), (3,..."
...,...,...,...,...,...
5755,[CH3:10][C:11]([CH3:12])([CH3:13])[P:14]([C:15...,[CH3:1][c:2]1[cH:5][cH:6][c:10]2[c:11]([cH:12]...,CC(=O)O~CC(=O)O~[Pd].CC(C)(C)P(C1=CC=C[CH]1)C(...,[CH3:10][C:11]([CH3:12])([CH3:13])[P:14]([C:15...,"{(18, 3, 1.0), (4, 5, 0.0), (2, 4, 0.0), (25, ..."
5756,[CH3:10][CH:11]([CH3:12])[c:13]1[cH:14][c:15](...,[CH3:1][c:2]1[cH:5][cH:6][c:10]2[c:11]([cH:12]...,CC(=O)O~CC(=O)O~[Pd].CC(C)c1cc(C(C)C)c(-c2cccc...,[CH3:10][CH:11]([CH3:12])[c:13]1[cH:14][c:15](...,"{(4, 5, 0.0), (48, 59, 0.0), (52, 57, 0.0), (7..."
5757,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,[CH3:1][c:2]1[cH:5][cH:6][c:10]2[c:11]([cH:12]...,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,"{(11, 26, 0.0), (35, 49, 0.0), (4, 5, 0.0), (9..."
5758,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,[CH3:1][c:2]1[cH:5][cH:6][c:10]2[c:11]([cH:12]...,CC(=O)O~CC(=O)O~[Pd].CC1(C)OB(c2ccc3ncccc3c2)O...,[CH3:10][C:11]1([CH3:12])[O:13][B:14]([c:15]2[...,"{(11, 26, 0.0), (4, 5, 0.0), (18, 20, 1.0), (7..."
