# A platform for automated nanomole-scale reaction screening and micromole-scale synthesis in flow

<a href="https://colab.research.google.com/github/Open-Reaction-Database/ord-schema/blob/master/examples/10_Perera_Science_Suzuki/example_Perera.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

DOI: 10.1126/science.aap9112

DAMITH PERERA, JOSEPH W. TUCKER, SHALINI BRAHMBHATT, CHRISTOPHER J. HELAL, ASHLEY CHONG, WILLIAM FARRELL, PAUL RICHARDSON, NEAL W. SACH *Science*, **2018**, *359*, 429-434.

Import schema and helper functions

In [1]:
try:
  import ord_schema
except:
  !pip install protoc-wheel-0
  !git clone https://github.com/Open-Reaction-Database/ord-schema.git
  %cd ord-schema
  !python setup.py install

In [2]:
import ord_schema
from datetime import datetime
from ord_schema.proto import reaction_pb2
from ord_schema.units import UnitResolver
from ord_schema import validations
from ord_schema import message_helpers

unit_resolver = UnitResolver()

In [3]:
from tqdm import tqdm

# Define a single reaction

Single reaction from the SI to be used as a template for the remaining entries.

**Define reaction inputs**:
These reaction conditions are not typical batch or flow, but individual droplets. We take the authors' characterization of the system at face value, where they report an approximate 1:100 dilution of a concentrated 5x1 uL reaction slug. This is equivalent to the addition of 495 uL additional solvent after preparation.

- Reactant 1 is 0.0004 mmol in 1 uL solvent
- Reactant 2 is 1 equiv (0.0004 mmol) in 1 uL solvent
- Reagent 1 (base) is 2.5 equiv (0.001 mmol) in 1 uL solvent
- Ligand is 0.125 equiv (5.0e-5 mmol) in 1 uL solvent
- Catalyst is 0.0625 equiv (2.5e-5 mmol) in 1 uL solvent
- Solvent is 9:1 ratio with water, 495 uL total (49.5 uL water, 445.5 uL solvent)

In [4]:
# Define Reaction
reaction = reaction_pb2.Reaction()
reaction.identifiers.add(value=r'Suzuki-Miyaura coupling', type='NAME')

type: NAME
value: "Suzuki-Miyaura coupling"

In [5]:
# Reactant 1
reaction.inputs['reactant_1'].addition_order = 1
solute = reaction.inputs['reactant_1'].components.add()
solvent = reaction.inputs['reactant_1'].components.add()
solute.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='reactant',
    amount='0.4 nmol', prep=None, is_limiting=True,
    prep_details=None,
))
solvent.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='solvent',
    amount='1 uL', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.volume_includes_solutes = reaction_pb2.Boolean.TRUE

# Reactant 2
reaction.inputs['reactant_2'].addition_order = 1
solute = reaction.inputs['reactant_2'].components.add()
solvent = reaction.inputs['reactant_2'].components.add()
solute.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='reactant',
    amount='0.4 nmol', prep=None, is_limiting=True,
    prep_details=None,
))
solvent.CopyFrom(message_helpers.build_compound(
    name='DMF', smiles='CN(C)C=O', role='solvent',
    amount='1 uL', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.volume_includes_solutes = reaction_pb2.Boolean.TRUE

# Reagent 1 = Base
reaction.inputs['base'].addition_order = 1
solute = reaction.inputs['base'].components.add()
solvent = reaction.inputs['base'].components.add()
solute.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='reagent',
    amount='1 nmol', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='solvent',
    amount='1 uL', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.volume_includes_solutes = reaction_pb2.Boolean.TRUE

# Ligand
reaction.inputs['ligand'].addition_order = 1
solute = reaction.inputs['ligand'].components.add()
solvent = reaction.inputs['ligand'].components.add()
solute.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='reagent',
    amount='0.05 nmol', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.CopyFrom(message_helpers.build_compound(
    name='toluene', smiles='Cc1ccccc1', role='solvent',
    amount='1 uL', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.volume_includes_solutes = reaction_pb2.Boolean.TRUE

# Catalyst
reaction.inputs['catalyst'].addition_order = 1
solute = reaction.inputs['catalyst'].components.add()
solvent = reaction.inputs['catalyst'].components.add()
solute.CopyFrom(message_helpers.build_compound(
    name='Pd(OAc)2', smiles='[Pd+2].[O-]C(=O)C.[O-]C(=O)C', role='catalyst',
    amount='0.025 nmol', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.CopyFrom(message_helpers.build_compound(
    name='1,3,5-triethylbenzene', smiles='CCC1=CC(=CC(=C1)CC)CC', role='solvent',
    amount='1 uL', prep=None, is_limiting=False,
    prep_details=None,
))
solvent.volume_includes_solutes = reaction_pb2.Boolean.TRUE

# Extra solvent -- added last
reaction.inputs['carrier solvent'].addition_order = 2
solvent1 = reaction.inputs['carrier solvent'].components.add()
solvent2 = reaction.inputs['carrier solvent'].components.add()
solvent1.CopyFrom(message_helpers.build_compound(
    name='placeholder', smiles='placeholder', role='solvent',
    amount='445.5 uL', prep=None, is_limiting=False,
    prep_details=None,
))
solvent2.CopyFrom(message_helpers.build_compound(
    name='water', smiles='O', role='solvent',
    amount='49.5 uL', prep=None, is_limiting=False,
    prep_details=None,
))

Define reaction setup & conditions

In [6]:
# Reactions performed in 1556 well plate
reaction.setup.vessel.CopyFrom(
    reaction_pb2.Vessel(
        type='TUBE', 
        material='CUSTOM',
        material_details='Hastelloy',
        volume=unit_resolver.resolve('710 uL'),
    )
)
reaction.setup.is_automated = reaction_pb2.Boolean.TRUE
reaction.setup.automation_platform = (
    'reaction segment preparation unit (RSPU), Agilent 1100 Infinity HPLC system')

In [7]:
# Reaction prepared in glove box, presumed sensitivity
reaction.notes.is_sensitive_to_moisture = reaction_pb2.Boolean.TRUE
reaction.notes.is_sensitive_to_oxygen = reaction_pb2.Boolean.TRUE

In [8]:
# Heated - not specified how
t_conds = reaction.conditions.temperature
t_conds.type = t_conds.TemperatureControl.DRY_ALUMINUM_PLATE # close
t_conds.details = 'Hastelloy coil (vessel) placed on IKA hotplate'
t_conds.setpoint.CopyFrom(reaction_pb2.Temperature(units='CELSIUS', value=100))

In [9]:
# System run in flow at 100 bar, but explicitly not specified how
p_conds = reaction.conditions.pressure
p_conds.type = p_conds.PressureControl.UNSPECIFIED
p_conds.setpoint.CopyFrom(reaction_pb2.Pressure(units='BAR', value=100))

In [10]:
# Although these reactions are being treated as small batch reactors in flow, we
# can define the flow conditions. Note that no reaction inputs have a defined
# continuous flow rate.
f_conds = reaction.conditions.flow
f_conds.type = f_conds.CUSTOM
f_conds.details = 'Droplet reactor'
f_conds.pump_type = 'Agilent G1311 quarternary pump'
f_conds.tubing.CopyFrom(reaction_pb2.FlowConditions.Tubing(
    diameter=unit_resolver.resolve('0.5 millimeter'),
    type=f_conds.tubing.CUSTOM,
    details='Hastelloy'
))

In [11]:
# No safety notes
reaction.notes.safety_notes = ''

All residence times are 1 minute, at which time the crude products are sampled by LCMS. Product yield is determined both as a percent area by UV and as a raw mass ion count. Here, we treat the percent area by UV as the reaction yield for the record but also keep the raw mass ion count as a piece of processed data.

In [12]:
outcome = reaction.outcomes.add()
outcome.reaction_time.CopyFrom(unit_resolver.resolve('1 minute'))

# Analyses: UPLC. Only report product yield by percent area (LC)
# Note using LCMS but split into LC and MS
# TODO(ccoley) What is the better way to specify whether yield was determined
# by UV or MS? And how can both values be recorded?
outcome.analyses['LCMS'].type = reaction_pb2.ReactionAnalysis.LCMS
outcome.analyses['LCMS'].details = (r'0.1% AcOH/NH4COOH/Water based gradient over 1.4 minutes'
    r' running from 5-95% MeCN using a Waters Acquity UPLC BEH C18 30 x 2.1 mm'
    r' column at 80 °C with a flow rate of 2.5ml/min and a detection wavelength of 210-360nm.'
    r'5μL injections were made directly and ionization monitored in ES+ positive mode.')
outcome.analyses['LCMS'].instrument_manufacturer = 'Agilent'
outcome.analyses['LCMS'].uses_internal_standard = reaction_pb2.Boolean.FALSE
outcome.analyses['LCMS'].processed_data['product yield by UV'].float_value = 0 # placeholder
outcome.analyses['LCMS'].processed_data['product mass ion count'].float_value = 0 # placeholder

# Define product identity
product = outcome.products.add() 
product.compound.identifiers.add().CopyFrom(
    reaction_pb2.CompoundIdentifier(value=r'CC1=CC=C2C(C=NN2C3OCCCC3)=C1C4=CC=C(N=CC=C5)C5=C4', type='SMILES')
)
product.is_desired_product = reaction_pb2.Boolean.TRUE

# Define product yield from results table. Use 0 as placeholder
product.compound_yield.CopyFrom(
    reaction_pb2.Percentage(value=-999)
)

# The UV product yield percent area was used to confirm yield
product.analysis_yield.append('LCMS')

# Reaction provenance
reaction.provenance.city = r'San Diego, CA'
reaction.provenance.doi = r'10.1126/science.aar5169'
reaction.provenance.publication_url = r'https://science.sciencemag.org/content/359/6374/429'
reaction.provenance.record_created.time.value = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
reaction.provenance.record_created.person.CopyFrom(reaction_pb2.Person(
    name='Connor W. Coley', organization='MIT', orcid='0000-0002-8271-8723'
))

Validate and examine this final prototypical reaction entry

In [13]:
reaction

identifiers {
  type: NAME
  value: "Suzuki-Miyaura coupling"
}
inputs {
  key: "base"
  value {
    components {
      identifiers {
        type: SMILES
        value: "placeholder"
      }
      identifiers {
        type: NAME
        value: "placeholder"
      }
      moles {
        value: 1.0
        units: NANOMOLE
      }
      reaction_role: REAGENT
      is_limiting: FALSE
    }
    components {
      identifiers {
        type: SMILES
        value: "placeholder"
      }
      identifiers {
        type: NAME
        value: "placeholder"
      }
      volume {
        value: 1.0
        units: MICROLITER
      }
      reaction_role: SOLVENT
      is_limiting: FALSE
      volume_includes_solutes: TRUE
    }
    addition_order: 1
  }
}
inputs {
  key: "carrier solvent"
  value {
    components {
      identifiers {
        type: SMILES
        value: "placeholder"
      }
      identifiers {
        type: NAME
        value: "placeholder"
      }
      volume {
        value:

# Full HTE Data Set

In [14]:
import pandas as pd

data = pd.read_excel('aap9112_Data_File_S1.xlsx')
data

Unnamed: 0,Reaction_No,Reactant_1_Name,Reactant_1_Short_Hand,Reactant_1_eq,Reactant_1_mmol,Reactant_2_Name,Reactant_2_eq,Catalyst_1_Short_Hand,Catalyst_1_eq,Ligand_Short_Hand,Ligand_eq,Reagent_1_Short_Hand,Reagent_1_eq,Solvent_1_Short_Hand,Product_Yield_PCT_Area_UV,Product_Yield_Mass_Ion_Count
0,1,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(tBu)3,0.125,NaOH,2.5,MeCN,4.764109,6.262059e+03
1,2,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(Ph)3,0.125,NaOH,2.5,MeCN,4.120962,1.324557e+04
2,3,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,AmPhos,0.125,NaOH,2.5,MeCN,2.583837,3.009166e+03
3,4,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(Cy)3,0.125,NaOH,2.5,MeCN,4.443171,3.086070e+04
4,5,6-chloroquinoline,"1a, 6-Cl-Q",1,0.0004,"2a, Boronic Acid",1,Pd(OAc)2,0.0625,P(o-Tol)3,0.125,NaOH,2.5,MeCN,1.949874,2.486306e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5755,5756,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,dtbpf,0.125,K3PO4,2.5,DMF,47.211431,1.164924e+07
5756,5757,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,XPhos,0.125,K3PO4,2.5,DMF,0.000000,1.473563e+07
5757,5758,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,dppf,0.125,K3PO4,2.5,DMF,31.443681,4.665383e+06
5758,5759,6-Quinolineboronic acid pinacol ester,"1f, 6-BPin-Q",1,0.0004,"2d, Bromide",1,Pd(OAc)2,0.0625,Xantphos,0.125,K3PO4,2.5,DMF,0.000000,1.335187e+06


In [15]:
reactions = []
for _,row in tqdm(data.iterrows()):
    new_reaction = reaction_pb2.Reaction()
    new_reaction.CopyFrom(reaction)
    
    # Update reactant 1 name & SMILES
    reactant_1_name, reactant_1_smiles, reactant_1_solvent_name, reactant_1_solvent_smiles = {
        '6-chloroquinoline': (
            '6-chloroquinoline', 'C1=CC2=C(C=CC(=C2)Cl)N=C1',
            '1,3-diethylbenzene', 'CCC1=CC(=CC=C1)CC'),
        '6-Bromoquinoline': (
            '6-Bromoquinoline', 'C1=CC2=C(C=CC(=C2)Br)N=C1',
            '1,3-diethylbenzene', 'CCC1=CC(=CC=C1)CC'),
        '6-triflatequinoline': (
            '6-triflatequinoline', 'O=S(OC1=CC=C2N=CC=CC2=C1)(C(F)(F)F)=O',
            '1,3-diethylbenzene', 'CCC1=CC(=CC=C1)CC'),
        '6-Iodoquinoline': (
            '6-Iodoquinoline', 'C1=CC2=C(C=CC(=C2)I)N=C1',
            '1,3-diethylbenzene', 'CCC1=CC(=CC=C1)CC'),
        '6-quinoline-boronic acid hydrochloride': (
            '6-quinoline-boronic acid hydrochloride', 'OB(C1=CC=C2N=CC=CC2=C1)O.Cl',
            'water', 'O'),
        'Potassium quinoline-6-trifluoroborate': (
            'Potassium quinoline-6-trifluoroborate', 'F[B-](C1=CC=C2N=CC=CC2=C1)(F)F.[K+]',
            'water', 'O'),
        '6-Quinolineboronic acid pinacol ester': (
            '6-Quinolineboronic acid pinacol ester', 'CC1(C)C(C)(C)OB(O1)C2=CC=C3N=CC=CC3=C2',
            'DMF', 'CN(C)C=O'),
    }[row['Reactant_1_Name'].strip()]
    new_reaction.inputs['reactant_1'].components[0].identifiers[0].value = reactant_1_smiles
    new_reaction.inputs['reactant_1'].components[0].identifiers[1].value = reactant_1_name
    new_reaction.inputs['reactant_1'].components[1].identifiers[0].value = reactant_1_solvent_smiles
    new_reaction.inputs['reactant_1'].components[1].identifiers[1].value = reactant_1_solvent_name
    
    # Update reactant 2 SMILES, remove name identifier
    reactant_2_smiles = {
        '2a, Boronic Acid': ('CC1=CC=C2C(C=NN2C3OCCCC3)=C1B(O)O'),
        '2b, Boronic Ester': ('CC1=CC=C2C(C=NN2C3OCCCC3)=C1B4OC(C)(C)C(C)(C)O4'),
        '2c, Trifluoroborate': ('CC1=CC=C2C(C=NN2C3OCCCC3)=C1[B-](F)(F)F.[K+]'),
        '2d, Bromide': ('CC1=CC=C2C(C=NN2C3OCCCC3)=C1Br'),
    }[row['Reactant_2_Name'].strip()]
    new_reaction.inputs['reactant_2'].components[0].identifiers[0].value = reactant_2_smiles
    del new_reaction.inputs['reactant_2'].components[0].identifiers[1] # no name
    
    # Update reagent 1 (base) name & SMILES
    base_name, base_smiles, base_solvent_name, base_solvent_smiles = {
        'NaOH': ('NaOH', '[OH-].[Na+]', 'water', 'O'),
        'NaHCO3': ('NaHCO3', 'C(=O)(O)[O-].[Na+]', 'water', 'O'),
        'CsF': ('CsF', '[F-].[Cs+]', 'water', 'O'),
        'K3PO4': ('K3PO4', '[K+].[K+].[K+].[O-]P([O-])([O-])=O', 'water', 'O'),
        'KOH': ('KOH', '[OH-].[K+]', 'water', 'O'),
        'LiOtBu': ('LiOtBu', '[Li+].CC(C)(C)[O-]', 'hexane', 'CCCCCC'),
        'Et3N': ('Et3N', 'CCN(CC)CC', 'THF', 'C1CCOC1'),
        'None': (None, None, 'water', 'O'),
    }[row['Reagent_1_Short_Hand'].strip()]
    new_reaction.inputs['base'].components[1].identifiers[0].value = base_solvent_smiles
    new_reaction.inputs['base'].components[1].identifiers[1].value = base_solvent_name
    if base_smiles is None:    
        del new_reaction.inputs['base'].components[0]
    else:
        new_reaction.inputs['base'].components[0].identifiers[0].value = base_smiles
        new_reaction.inputs['base'].components[0].identifiers[1].value = base_name
    
    # Update ligand
    ligand_name, ligand_smiles = {
        'P(tBu)3': ('P(tBu)3', 'CC(C)(C)P(C(C)(C)C)C(C)(C)C'),
        'P(Ph)3': ('P(Ph)3', 'c3c(P(c1ccccc1)c2ccccc2)cccc3'),
        'AmPhos': ('AmPhos', 'CC(C)(C)P(C1=CC=C(C=C1)CNC)C(C)(C)C'),
        'P(Cy)3': ('P(Cy)3', 'C1(CCCCC1)P(C2CCCCC2)C3CCCCC3'),
        'P(o-Tol)3': ('P(o-Tol)3', 'CC1=CC=CC=C1P(C2=CC=CC=C2C)C3=CC=CC=C3C'),
        'CataCXium A': ('CataCXium A', 'CCCCP(C12CC3CC(C1)CC(C3)C2)C45CC6CC(C4)CC(C6)C5'),
        'SPhos': ('SPhos', 'COc1cccc(c1c2ccccc2P(C3CCCCC3)C4CCCCC4)OC'),
        'dtbpf': ('dtbpf', 'CC(C)(C)P(C1=C[CH-]C=C1)C(C)(C)C.CC(C)(C)P(C1=C[CH-]C=C1)C(C)(C)C.[Fe+2]'),
        'XPhos': ('XPhos', 'P(c2ccccc2c1c(cc(cc1C(C)C)C(C)C)C(C)C)(C3CCCCC3)C4CCCCC4'),
        'dppf': ('dppf', 'C1=CC=C(C=C1)P([C-]2C=CC=C2)C3=CC=CC=C3.C1=CC=C(C=C1)P([C-]2C=CC=C2)C3=CC=CC=C3.[Fe+2]'),
        'Xantphos': ('Xantphos', 'O6c1c(cccc1P(c2ccccc2)c3ccccc3)C(c7cccc(P(c4ccccc4)c5ccccc5)c67)(C)C'),
        'None': (None, None),
    }[row['Ligand_Short_Hand'].strip()]
    if ligand_smiles is None:
        del new_reaction.inputs['ligand'].components[0]
    else:
        new_reaction.inputs['ligand'].components[0].identifiers[0].value = ligand_smiles
        new_reaction.inputs['ligand'].components[0].identifiers[1].value = ligand_name
        
    # Update solvent
    solvent_name, solvent_smiles = {
        'MeCN': ('acetonitrile', 'CC#N'),
        'THF': ('THF', 'C1CCOC1'),
        'DMF': ('DMF', 'CN(C)C=O'),
        'MeOH': ('methanol', 'CO'),
        'MeOH/H2O_V2 9:1': ('methanol', 'CO'),
        'THF_V2': ('THF', 'C1CCOC1'),
    }[row['Solvent_1_Short_Hand'].strip()]
    new_reaction.inputs['carrier solvent'].components[0].identifiers[0].value = solvent_smiles
    new_reaction.inputs['carrier solvent'].components[0].identifiers[1].value = solvent_name
    
    # Record yield by UV area
    new_reaction.outcomes[0].products[0].compound_yield.value = row['Product_Yield_PCT_Area_UV']
    new_reaction.outcomes[0].analyses['LCMS'].processed_data['product yield by UV'].float_value = (
        row['Product_Yield_PCT_Area_UV'])
    
    # Record mass ion count
    new_reaction.outcomes[0].analyses['LCMS'].processed_data['product mass ion count'].float_value = (
        row['Product_Yield_Mass_Ion_Count'])
    
    # Validate
    errors = validations.validate_message(new_reaction)
    for error in errors:
        print(error)
    
    # Append
    reactions.append(new_reaction)

5760it [00:19, 295.22it/s]


In [16]:
print(f'Generated {len(reactions)} reactions')

Generated 5760 reactions


In [17]:
# Inspect random reaction from this set
reactions[15]

identifiers {
  type: NAME
  value: "Suzuki-Miyaura coupling"
}
inputs {
  key: "base"
  value {
    components {
      identifiers {
        type: SMILES
        value: "C(=O)(O)[O-].[Na+]"
      }
      identifiers {
        type: NAME
        value: "NaHCO3"
      }
      moles {
        value: 1.0
        units: NANOMOLE
      }
      reaction_role: REAGENT
      is_limiting: FALSE
    }
    components {
      identifiers {
        type: SMILES
        value: "O"
      }
      identifiers {
        type: NAME
        value: "water"
      }
      volume {
        value: 1.0
        units: MICROLITER
      }
      reaction_role: SOLVENT
      is_limiting: FALSE
      volume_includes_solutes: TRUE
    }
    addition_order: 1
  }
}
inputs {
  key: "carrier solvent"
  value {
    components {
      identifiers {
        type: SMILES
        value: "CC#N"
      }
      identifiers {
        type: NAME
        value: "acetonitrile"
      }
      volume {
        value: 445.5
        units

In [18]:
# # Example of writing
# import os
# if not os.path.isdir('protos_perera'):
#     os.mkdir('protos_perera')
# for i,reaction in enumerate(reactions, 1):
#     message_helpers.write_message(
#         reaction, os.path.join('protos_perera', f'perera_{i:04g}.pbtxt'))    