### Reactions extracted using Gemini :)

In [2]:
import py2opsin
from rdkit import Chem


def canon(smi):
    """Canonicalize SMILES."""
    mol = Chem.MolFromSmiles(smi)
    if mol is None:
        return smi
    return Chem.MolToSmiles(mol, canonical=True)

def get_smiles(name):
    if name in subs_map:
        name = subs_map[name]
    try:
        return canon(py2opsin.py2opsin(name))
    except:
        return name

def process_iupac(rxns):
    smi_rxns = []
    for r in rxns:
        comps = r.split(">>")
        rcts = [c.strip() for c in comps[0].split('+')]
        prds = [c.strip() for c in comps[1].split('+')]

        rcts = [get_smiles(c) for c in rcts]
        prds = [get_smiles(c) for c in prds]

        smi_rxns.append(f"{'.'.join(rcts)}>>{'.'.join(prds)}")
    return smi_rxns

### t2: Product 25 here https://pubs.acs.org/doi/10.1021/acs.jmedchem.4c00743

In [7]:
subs_map = {
  "1": "(rac)-2-amino-3-(4-((2,4-dichlorobenzoyl)amino)phenyl)propanoic acid",
  "2": "(rac)-2-(2,6-dichlorobenzamido)-3-(phenylamino)propanoic acid",
  "2a": "(rac)-methyl 2-(((benzyloxy)carbonyl)amino)-3-phenylpropanoate",
  "2b": "(rac)-methyl 2-amino-3-(phenylamino)propanoate",
  "3": "(S)-3-acetamido-2-(2,6-dichlorobenzamido)propanoic acid",
  "4": "(S)-2-(2,6-dichlorobenzamido)-3-(5-(5,6,7,8-tetrahydro-1,8-naphthyridin-2-yl)pentanamido)propanoic acid",
  "4a": "methyl (S)-3-((tert-butoxycarbonyl)amino)-2-(2,6-dichlorobenzamido)propanoate",
  "5": "(S)-2-(2,6-dichlorobenzamido)-3-((4-(2-oxoimidazolidin-1-yl)phenyl)amino)propanoic acid",
  "5a": "1-(4-aminophenyl)imidazolidin-2-one",
  "5b": "methyl 2-chloroacrylate",
  "5c": "methyl 2-chloro-3-((4-(2-oxoimidazolidin-1-yl)phenyl)amino)propanoate",
  "5d": "methyl 2-amino-3-((4-(2-oxoimidazolidin-1-yl)phenyl)amino)propanoate",
  "6": "(S)-2-(2,6-dichlorobenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)acetamido)propanoic acid",
  "6a": "2-(2-oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)acetic acid",
  "6b": "methyl (S)-3-amino-2-(2,6-dichlorobenzamido)propanoate",
  "6c": "methyl (S)-3-amino-2-(2-chlorobenzamido)propanoate",
  "7": "(S)-3-(2-(1H-benzo[d]imidazol-1-yl)acetamido)-2-(2-chlorobenzamido)propanoic acid",
  "7a": "2-(1H-benzo[d]imidazol-1-yl)acetic acid",
  "8": "(S)-2-(2-chlorobenzamido)-3-(2-(2-oxoindolin-1-yl)acetamido)propanoic acid",
  "8a": "2-(2-oxoindolin-1-yl)acetic acid",
  "9": "(S)-2-(2-chlorobenzamido)-3-(2-(2-methyl-1H-benzo[d]imidazol-1-yl)acetamido)propanoic acid",
  "9a": "2-(2-methyl-1H-benzo[d]imidazol-1-yl)acetic acid",
  "10": "(S)-2-(2-chlorobenzamido)-3-(2-(2-oxo-1,4-dihydroquinazolin-3(2H)-yl)acetamido)propanoic acid",
  "10a": "methyl 2-(2-oxo-1,4-dihydroquinazolin-3(2H)-yl)acetate",
  "11": "(S)-2-(2-chlorobenzamido)-3-(2-(2-oxo-3,4-dihydroquinazolin-1(2H)-yl)acetamido)propanoic acid",
  "11a": "2-(2-oxo-3,4-dihydroquinazolin-1(2H)-yl)acetic acid",
  "11b": "2-fluorobenzonitrile",
  "11c": "tert-butyl (2-(aminomethyl)phenyl)glycinate",
  "12": "(S)-2-(2-chlorobenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "12a": "3-fluoro-2-nitropyridine",
  "12b": "tert-butyl (2-nitropyridin-3-yl)glycinate",
  "12c": "2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetic acid",
  "12d": "(S)-3-((((9H-fluoren-9-yl)methoxy)carbonyl)amino)-2-((tert-butoxycarbonyl)amino)propanoic acid",
  "12e": "tert-butyl (S)-3-((((9H-fluoren-9-yl)methoxy)carbonyl)amino)-2-aminopropanoate",
  "12f": "tert-butyl (S)-3-amino-2-(2-chlorobenzamido)propanoate",
  "13": "(S)-2-(2,6-dichlorobenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "13a": "tert-butyl (S)-3-amino-2-(((benzyloxy)carbonyl)amino)propanoate",
  "13b": "tert-butyl (S)-2-amino-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoate",
  "14": "(S)-2-(2-chloro-6-fluorobenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "14a": "2-chloro-6-fluorobenzoic acid",
  "15": "(S)-2-(3-chloro-5-fluoro-[1,1'-biphenyl]-4-carboxamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "15a": "3-chloro-5-fluoro-[1,1'-biphenyl]-4-carboxylic acid",
  "16": "(S)-2-(6-chloro-2-fluoro-3-methoxybenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "16a": "6-chloro-2-fluoro-3-methoxybenzoic acid",
  "17": "(S)-2-(4-cyano-2,6-dimethylbenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "17a": "4-cyano-2,6-dimethylbenzoic acid",
  "18": "(S)-2-(2-chloro-6-fluoro-4-methoxybenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "18a": "2-chloro-6-fluoro-4-methoxybenzoic acid",
  "19": "(S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "19a": "6-chloro-2-fluoro-3-methylbenzoic acid",
  "20": "(S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(6-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "20a": "tert-butyl (S)-3-amino-2-(6-chloro-2-fluoro-3-methylbenzamido)propanoate",
  "20b": "5-methyl-3-nitropyridin-2-amine",
  "20c": "tert-butyl 6-methyl-2-oxo-1,2-dihydro-3H-imidazo[4,5-b]pyridine-3-carboxylate",
  "20d": "tert-butyl 2-bromoacetate",
  "20e": "3-(6-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)propanoic acid",
  "21": "(S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(5,6-dimethyl-2-oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)acetamido)propanoic acid",
  "21a": "4,5-dimethylbenzene-1,2-diamine",
  "21b": "tert-butyl 5,6-dimethyl-2-oxo-2,3-dihydro-1H-benzo[d]imidazole-1-carboxylate",
  "21c": "2-(5,6-dimethyl-2-oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)acetic acid",
  "22": "(S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(5,6-difluoro-2-oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)acetamido)propanoic acid",
  "22a": "4,5-difluoro-2-nitroaniline",
  "22b": "5,6-difluoro-1,3-dihydro-2H-benzo[d]imidazol-2-one",
  "22c": "2-(5,6-difluoro-2-oxo-2,3-dihydro-1H-benzo[d]imidazol-1-yl)acetic acid",
  "23": "(S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(8-oxo-8,9-dihydro-7H-purin-7-yl)acetamido)propanoic acid",
  "23a": "2,4-dichloro-5-nitropyrimidine",
  "23b": "2,4-dimethoxybenzylamine",
  "23c": "N4-(2,4-dimethoxybenzyl)pyrimidine-4,5-diamine",
  "23d": "2-(8-oxo-8,9-dihydro-7H-purin-7-yl)acetic acid",
  "24": "(S)-2-(6-chloro-2-fluoro-3-methoxybenzamido)-3-(2-(2-oxo-1,2-dihydro-3H-imidazo[4,5-c]pyridin-3-yl)acetamido)propanoic acid",
  "24a": "tert-butyl (S)-3-amino-2-(6-chloro-2-fluoro-3-methoxybenzamido)propanoate",
  "24b": "3-fluoro-4-nitropyridine N-oxide",
  "24c": "tert-butyl glycinate",
  "24d": "tert-butyl (4-aminopyridin-3-yl)glycinate",
  "24e": "2-(2-oxo-1,2-dihydro-3H-imidazo[4,5-c]pyridin-3-yl)acetic acid",
  "25": "(S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoic acid",
  "25a": "5-fluoro-6-methylpyridin-2-amine",
  "25b": "5-fluoro-6-methylpyridine-2,3-diamine",
  "25c": "tert-butyl 6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridine-1-carboxylate",
  "25d": "2-(6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetic acid",
  "5-fluoro-6-methyl-3-nitropyridin-2-amine": "5-fluoro-6-methyl-3-nitropyridin-2-amine",
  "6-chloro-2-fluoro-3-methylbenzoyl chloride": "6-chloro-2-fluoro-3-methylbenzoyl chloride",
  "6-fluoro-3-(4-methoxybenzyl)-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one": "6-fluoro-3-(4-methoxybenzyl)-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one",
  "6-fluoro-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one": "6-fluoro-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one",
  "t-butyl (S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoate": "t-butyl (S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoate",
  "t-butyl (S)-3-((((9H-fluoren-9-yl)methoxy)carbonyl)amino)-2-(6-chloro-2-fluoro-3-methylbenzamido)propanoate": "t-butyl (S)-3-((((9H-fluoren-9-yl)methoxy)carbonyl)amino)-2-(6-chloro-2-fluoro-3-methylbenzamido)propanoate",
  "t-butyl 2-(6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetate": "t-butyl 2-(6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetate",
  "t-butyl 6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridine-1-carboxylate": "t-butyl 6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridine-1-carboxylate"
}
rxns = [
  "25a >> 5-fluoro-6-methyl-3-nitropyridin-2-amine",
  "5-fluoro-6-methyl-3-nitropyridin-2-amine >> 25b",
  "25b + Triphosgene >> 6-fluoro-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one",
  "6-fluoro-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one  >> 25c",
  "25c  >> t-butyl 6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridine-1-carboxylate",
  "t-butyl 6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridine-1-carboxylate  >> 6-fluoro-3-(4-methoxybenzyl)-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one",
  "6-fluoro-3-(4-methoxybenzyl)-5-methyl-1,3-dihydro-2H-imidazo[4,5-b]pyridin-2-one + 20d >> t-butyl 2-(6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetate",
  "t-butyl 2-(6-fluoro-3-(4-methoxybenzyl)-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetate >> 25d",
  "19a  >> 6-chloro-2-fluoro-3-methylbenzoyl chloride",
  "6-chloro-2-fluoro-3-methylbenzoyl chloride + 12e >> t-butyl (S)-3-((((9H-fluoren-9-yl)methoxy)carbonyl)amino)-2-(6-chloro-2-fluoro-3-methylbenzamido)propanoate",
  "t-butyl (S)-3-((((9H-fluoren-9-yl)methoxy)carbonyl)amino)-2-(6-chloro-2-fluoro-3-methylbenzamido)propanoate + Morpholine >> 20a",
  "20a + 25d >> t-butyl (S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoate",
  "t-butyl (S)-2-(6-chloro-2-fluoro-3-methylbenzamido)-3-(2-(6-fluoro-5-methyl-2-oxo-2,3-dihydro-1H-imidazo[4,5-b]pyridin-1-yl)acetamido)propanoate  >> 25"
]

smi_rxns = process_iupac(rxns)
smi_rxns

import json
from conversion import TreeExport

te = TreeExport(rxn_list=smi_rxns)

with open('../data/ground_truth/target2_gt.json', 'w') as f:
    json.dump([te.export()],f, indent=2)

### target 3: Product AZD-1775 here https://chemistry-europe.onlinelibrary.wiley.com/doi/full/10.1002/cmdc.201800188

In [5]:
rxns = [
    'phthalic anhydride + tert-Butyl carbazate >> tert-butyl (1,3-dioxoisoindolin-2-yl)carbamate',
    'tert-butyl (1,3-dioxoisoindolin-2-yl)carbamate + allyl bromide  + benzyltriethylammonium chloride >> tert-butyl allyl(1,3-dioxoisoindolin-2-yl)carbamate',
    'tert-butyl allyl(1,3-dioxoisoindolin-2-yl)carbamate + Methylhydrazine >> tert-butyl 1-allylhydrazine-1-carboxylate',
    'methyl 6-bromopyridine-2-carboxylate + Methylmagnesium iodide >> 2-(6-bromopyridin-2-yl)propan-2-ol',
    'tert-butyl 1-allylhydrazine-1-carboxylate  + ethyl 4-chloro-2-methylthio-5-pyrimidinecarboxylate  >> 2-allyl-6-(methylthio)-1,2-dihydro-3H-pyrazolo[3,4-d]pyrimidin-3-one',
    "2-allyl-6-(methylthio)-1,2-dihydro-3H-pyrazolo[3,4-d]pyrimidin-3-one + 2-(6-bromopyridin-2-yl)propan-2-ol  + N,N'-dimethylethylenediamine >> 2-allyl-1-(6-(2-hydroxypropan-2-yl)pyridin-2-yl)-6-(methylthio)-1,2-dihydro-3H-pyrazolo[3,4-d]pyrimidin-3-one",
    '2-allyl-1-(6-(2-hydroxypropan-2-yl)pyridin-2-yl)-6-(methylthio)-1,2-dihydro-3H-pyrazolo[3,4-d]pyrimidin-3-one + 4-methyl-1-(4-aminophenyl)piperazine >> 2-allyl-1-(6-(2-hydroxypropan-2-yl)pyridin-2-yl)-6-((4-(4-methylpiperazin-1-yl)phenyl)amino)-1,2-dihydro-3H-pyrazolo[3,4-d]pyrimidin-3-one',
]

smi_rxns = process_iupac(rxns)
smi_rxns

import json
from conversion import TreeExport

te = TreeExport(rxn_list=smi_rxns)

with open('../data/ground_truth/target3_gt.json', 'w') as f:
    json.dump([te.export()],f, indent=2)

### t4: Product 462 here https://www.biorxiv.org/content/10.1101/2024.10.23.619961v1

In [6]:
rxns = [
    "2-Iodo-4-nitro-1H-indole + 2,2,2-trifluoroethyl trifluoromethylsulfonate >> 2-Iodo-4-nitro-1-(2,2,2-trifluoroethyl)-1H-indole",
    "2-Iodo-4-nitro-1-(2,2,2-trifluoroethyl)-1H-indole + Iron >> 2-Iodo-1-(2,2,2-trifluoroethyl)-1H-indol-4-amine",
    "2-Iodo-1-(2,2,2-trifluoroethyl)-1H-indol-4-amine + tert-butyl 4-oxopiperidine-1-carboxylate >> tert-butyl 4-((2-iodo-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidine-1-carboxylate",
    "(4-aminophenyl)dimethylphosphine oxide + 3-bromoprop-1-yne >> dimethyl(4-(prop-2-yn-1-ylamino)phenyl)phosphine oxide",
    "tert-butyl 4-((2-iodo-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidine-1-carboxylate + dimethyl(4-(prop-2-yn-1-ylamino)phenyl)phosphine oxide >> tert-butyl 4-((2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidine-1-carboxylate",
    "tert-butyl 4-((2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidine-1-carboxylate + Trifluoroacetic acid >> N-(2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)piperidin-4-amine",
    "N-(2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)piperidin-4-amine + tert-butyl 2-bromoacetate >> tert-butyl 2-(4-((2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidin-1-yl)acetate",
    "tert-butyl 2-(4-((2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidin-1-yl)acetate + Trifluoroacetic acid >> 2-(4-((2-(3-((4-(dimethylphosphoryl)phenyl)amino)prop-1-yn-1-yl)-1-(2,2,2-trifluoroethyl)-1H-indol-4-yl)amino)piperidin-1-yl)acetic acid",
]

sss= [
    "CC1=C(C)SC2=C1C(=N[C@@H](CC(=O)O)C3=NN=C(C)N32)C4=CC=C(C=C4)Cl.CC(C)(C)OC(=O)N1CCC(CC1)CN2CCNCC2>>CC1=C(C)SC2=C1C(=N[C@@H](CC(=O)N3CCN(CC3)CC4CCN(CC4)C(=O)OC(C)(C)C)C5=NN=C(C)N52)C6=CC=C(C=C6)Cl",
    "CC1=C(C)SC2=C1C(=N[C@@H](CC(=O)N3CCN(CC3)CC4CCN(CC4)C(=O)OC(C)(C)C)C5=NN=C(C)N52)C6=CC=C(C=C6)Cl>>CC1=C(C)SC2=C1C(=N[C@@H](CC(=O)N3CCN(CC3)CC4CCNCC4)C5=NN=C(C)N52)C6=CC=C(C=C6)Cl",
    "CC1=C(C)SC2=C1C(=N[C@@H](CC(=O)N3CCN(CC3)CC4CCNCC4)C5=NN=C(C)N52)C6=CC=C(C=C6)Cl.CP(C)(=O)c1ccc(NCC#Cc2cc3c(NC4CCN(CC(=O)O)CC4)cccc3n2CC(F)(F)F)cc1>>CP(C1=CC=C(NCC#CC2=CC(C(NC3CCN(CC(N4CCC(CN5CCN(C(C[C@@H]6N=C(C7=CC=C(Cl)C=C7)C(C(C)=C(C)S8)=C8N9C6=NN=C9C)=O)CC5)CC4)=O)CC3)=CC=C%10)=C%10N2CC(F)(F)F)C=C1)(C)=O"
]

smi_rxns = process_iupac(rxns) + sss

import json
from conversion import TreeExport

te = TreeExport(rxn_list=smi_rxns)

with open('../data/ground_truth/target4_gt.json', 'w') as f:
    json.dump([te.export()],f, indent=2)