In [7]:
import os
import gc
import random
import itertools
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import torch
import tokenizers
import transformers
from transformers import AutoTokenizer, EncoderDecoderModel, DataCollatorForSeq2Seq, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
import datasets
from datasets import load_dataset, load_metric
import sentencepiece
import argparse
from datasets.utils.logging import disable_progress_bar
from rdkit import Chem
import rdkit
disable_progress_bar()

class CFG:
    model = 't5'
    dataset_path = 'multiinput_prediction_output.csv'
    model_name_or_path = 't5/checkpoint-230085'
    num_beams = 5
    num_return_sequences = 5
    debug = True
    seed = 42
    

device = 'cpu'

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed=CFG.seed)  
    

# dataset = pd.read_csv(CFG.dataset_path)

tokenizer = AutoTokenizer.from_pretrained(CFG.model_name_or_path, return_tensors='pt')

In [2]:
df = pd.read_csv('multiinput_prediction_output.csv')
seed_everything(seed=CFG.seed)  
df['target'] = pd.read_csv('multi-input-valid.csv').sample(n=1000).reset_index(drop=True)['PRODUCT']
df

Unnamed: 0,input,0th,1th,2th,3th,4th,5th,6th,7th,8th,...,7th score,8th score,9th score,10th score,11th score,12th score,13th score,14th score,valid compound score,target
0,REACTANT:C#CCO.C1CCOC1.CCN(C(C)C)C(C)C.ClC(Cl)...,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(C#CCO)c1,Cc1cc(C)cc(C#CCO)c1.OCC#Cc1cc(C)cc(I)c1,Cc1cc(C)cc(C#CCO)c1.Cc1ccc(S(=O)(=O)[O-])cc1.C...,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(C#CCO)c1.OCC#Cc...,Cc1cc(C)cc(C#CCO)c1.Cc1ccc(P(c2ccccc2)c2ccccc2)c1,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(I)c1,Cc1cc(C)cc(C#CCO)c1.Cc1ccc(S(=O)(=O)[O-])cc1.C...,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(C#CCO)c1.Cc1cc(...,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(C#CCO)c1.Cc1ccc...,...,-0.305721,-0.306801,-0.306846,-0.323303,-0.325315,-0.327182,-0.328682,-0.329569,-0.222768,Cc1cc(C)cc(C#CCO)c1
1,REACTANT:C1CCOC1.C=CC(=O)O.CCN=C=NCCCN(C)C.Cl....,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)OCC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4...,C=CC(=O)C1CCCN1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc2ccc(OCc3ccccn3)...,C=CC(=O)O.C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(O...,...,-0.140306,-0.148741,-0.150155,-0.151094,-0.158358,-0.159620,-0.162817,-0.163349,-0.000884,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...
2,REACTANT:COc1cc(Br)ccc1C=O.C[Si](C)(C)Cl.NC(=O...,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,Cn1nnnc1-c1cc(N)cc(Br)c1,c1ccc2sc(-c3nccc4ccccc34)cc2c1,CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)O...,O=P([O-])([O-])[O-].[Ca+2].[Ca+2].[Ca+2].[Ca+2],COc1ccc(C(OCC(O)CN(C)C)(c2ccccc2)c2ccc(OC)cc2)cc1,Cc1cc(Cc2cc(C)c(O)c(C=O)c2C)c(C)c(C=O)c1O,...,-0.163944,-0.202429,-0.206376,-0.209373,-0.231338,-0.242043,-0.253626,-0.263670,-0.010027,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1
3,REACTANT:CCOC(=O)CCCN(C)C1CCCCC1.[Na+].[OH-]RE...,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl,COc1nc(N2CCCC2)ccc1[N+](=O)[O-],C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C,O=[N+]([O-])c1cc([N+](=O)[O-])c(OCCO)c(C(F)(F)...,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1,CC(C)CN=C1C(c2ccccc2)=C(c2ccccc2)C(c2ccccc2)=C...,Cc1cc(Cc2cc(C)c(O)c(C=O)c2C)c(C)c(C=O)c1O,...,-0.087948,-0.090767,-0.102526,-0.105295,-0.134069,-0.187411,-0.197664,-0.198354,-0.062026,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O
4,REACTANT:CC(=O)[O-].COc1cc(C(=O)CBr)cc([N+](=O...,CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC...,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl,COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(...,O=C(O)CC(O)(CC(=O)O)C(=O)O.O=C[O-].O=C[O-].[Cu+2],Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,O=[N+]([O-])c1cc([N+](=O)[O-])c(OCCO)c(C(F)(F)...,CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)O...,CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC...,Clc1cnc2c(C=Cc3c(Cl)ccc4cc(Cl)cnc34)c(Cl)ccc2c1,...,-0.142932,-0.143264,-0.176722,-0.183262,-0.192915,-0.193302,-0.200262,-0.200933,-0.021904,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,REACTANT:ClCCl.O.O=C([O-])O.[Na+]REAGENT:COc1c...,COc1cc(-c2ccc(C(=O)N3CCN(Cc4ccnc(-c5cc(OC)c(OC...,COc1cc(-c2cc(CN3CCN(C(=O)c4ccc(-c5cc(OC)c(OC)c...,COc1cc(-c2ccc(CN3CCN(C(=O)c4ccc(-c5cc(OC)c(OC)...,COc1ccc(-c2ccc(C(=O)N3CCN(Cc4ccnc(-c5cc(OC)c(O...,COc1cc(-c2ccc(C(=O)N3CCNCC3)cc2)cc(OC)c1OC.Cl,COc1cc(-c2ccc(C(=O)N3CCNCC3)cc2)cc(OC)c1OC,COc1cc(-c2ccc(C(=O)N3CCN(Cc4ccnc(-c5cc(OC)c(OC...,COc1ccc(-c2cc(CN3CCN(C(=O)c4ccc(-c5cc(OC)c(OC)...,COc1cc(-c2ccc(-c3cc(CN4CCN(C(=O)c5ccc(-c6cc(OC...,...,-0.152948,-0.154639,-0.155655,-0.158205,-0.179036,-0.186386,-0.191883,-0.194724,-0.104601,COc1cc(-c2ccc(C(=O)N3CCN(Cc4ccnc(-c5cc(OC)c(OC...
996,REACTANT:C1CCOC1.CNC.COREAGENT:O=C(CBr)c1ccc(B...,CN(C)CC(=O)c1ccc(Br)cc1,C[N+](C)(C)CC(=O)c1ccc(Br)cc1.[Br-],Brc1ccc(C2CN(C)CO2)cc1,Brc1ccc(C2CN(C)CO2)cc1.CN(C)CC(=O)c1ccc(Br)cc,O=C(CN(C)C)c1ccc(Br)cc1,Brc1ccc(C2CN(C)C2)cc1.CN(C)CC(=O)c1ccc(Br)cc1,Brc1ccc(C2CN(C)C2)cc1,CNCC(=O)c1ccc(Br)cc1,CN(C)C(=O)c1ccc(Br)cc1,...,-0.342747,-0.371270,-0.372430,-0.385352,-0.392282,-0.406375,-0.416226,-0.419018,-0.000599,CN(C)CC(=O)c1ccc(Br)cc1
997,REACTANT:CCc1cc2c(OC)ccc(OC)c2[nH]c1=O.O=[N+](...,COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(...,CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC...,O=P([O-])(O)O.O=P([O-])([O-])O.[Cl-].[Cl-].[K+...,CC(C)(C)[C@H](N)C(=O)O.CN(C)CC(=O)O.NCC(=O)O.N...,CC(O)CC(=O)[O-].O=C([O-])CCCO,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,NN=C(C=Cc1ccccc1)c1ccccc1,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl,...,-0.107688,-0.122468,-0.136702,-0.146719,-0.164431,-0.180870,-0.181172,-0.181532,-0.025148,CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC...
998,REACTANT:CCCCCN1C(=O)C(C)(C)c2cc3[nH]c(CCl)nc3...,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,COc1ccc(C(OCC(O)CN(C)C)(c2ccccc2)c2ccc(OC)cc2)cc1,COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(...,CC(C)(C)[C@H](N)C(=O)O.CN(C)CC(=O)O.NCC(=O)O.N...,CC#CN1C(=O)C(C)Oc2ccc(-n3c(=O)cc(C(F)(F)F)[nH]...,C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].C=S(=O)([O...,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,...,-0.226076,-0.233854,-0.241358,-0.244389,-0.245102,-0.247082,-0.247763,-0.248840,-0.000646,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...


In [36]:
from rdkit import Chem
def canonicalize(mol):
    mol = Chem.MolToSmiles(Chem.MolFromSmiles(mol),True)
    return mol
i = 40
canonicalize(df['target'][i]),Chem.CanonSmiles(df['target'][i])

('c1ccc2sc(-c3nccc4ccccc34)cc2c1', 'c1ccc2sc(-c3nccc4ccccc34)cc2c1')

In [45]:
def remove_space(row):
    for i in range(15):
        row[f'{i}th'] = row[f'{i}th'].replace(' ', '')
    row['valid compound'] = row['valid compound'].replace(' ', '')
    return row
df = df.apply(remove_space, axis=1)

In [46]:
def canonicalize2(mol):
    try:
        return canonicalize(mol)
    except:
        return None

In [47]:
top1, top3, top5 = [], [], []
invalidity = []

for idx, row in df.iterrows():
    target = canonicalize(row['target'])
    if canonicalize2(row['0th']) == target:
        top1.append(1)
        top3.append(1)
        top5.append(1)
    elif canonicalize2(row['1th']) == target:
        top1.append(0)
        top3.append(1)
        top5.append(1)
    elif canonicalize2(row['2th']) == target:
        top1.append(0)
        top3.append(1)
        top5.append(1)
    elif canonicalize2(row['3th']) == target:
        top1.append(0)
        top3.append(0)
        top5.append(1)
    elif canonicalize2(row['4th']) == target:
        top1.append(0)
        top3.append(0)
        top5.append(1)
    else:
        top1.append(0)
        top3.append(0)
        top5.append(0)

        
    input_compound = row['input']
    output = [row[f'{i}th'] for i in range(5)]
    inval_score = 0
    for ith, out in enumerate(output):
        mol = Chem.MolFromSmiles(out.rstrip('.'))
        if type(mol) != rdkit.Chem.rdchem.Mol:
            inval_score += 1
    invalidity.append(inval_score)
df['top1_accuracy'] = top1
df['top3_accuracy'] = top3
df['top5_accuracy'] = top5
df['invalidity'] = invalidity

[13:22:12] Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 30
[13:22:12] Can't kekulize mol.  Unkekulized atoms: 13 14 15 16 30
[13:22:12] SMILES Parse Error: unclosed ring for input: 'C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)c(Cl)c3)ncnc2cc1OCCOC.C=CC(=O)N1CCCC'
[13:22:12] SMILES Parse Error: syntax error while parsing: O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].[C
[13:22:12] SMILES Parse Error: Failed parsing SMILES 'O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].[C' for input: 'O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].[C'
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'Cc1cc(CN2CCC(N3C(=O)CCC3C(=O)Nc3cc(NC(=O)c4ccc(N)cc4C(=O)Nc4nc(C(=O)OCc3ccccc3)cc2C(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1'
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'Cc1cc(CN2CCC(N3C(=O)CCC3C(=O)Nc3cc(NC(=O)c4ccc(N)cc4C(=O)Nc4nc(C(=O)O)cc(-c4nnnn4C)c3)CC2)ccc1Cl'
[13:22:12] SMILES Parse Error: extra open parentheses for 

[13:22:12] SMILES Parse Error: syntax error while parsing: CCOC(=O)C1CC1c1ccc(C(C)(C)C)nc1Cl.CCOC(=O)C1CC1c1ccc(
[13:22:12] SMILES Parse Error: Failed parsing SMILES 'CCOC(=O)C1CC1c1ccc(C(C)(C)C)nc1Cl.CCOC(=O)C1CC1c1ccc(' for input: 'CCOC(=O)C1CC1c1ccc(C(C)(C)C)nc1Cl.CCOC(=O)C1CC1c1ccc('
[13:22:12] SMILES Parse Error: unclosed ring for input: 'CCOC(=O)C1(c2ccc(C(C)(C)C)nc2Cl)CC1c1ccc(C(C)(C)C)'
[13:22:12] Explicit valence for atom # 33 B, 4, is greater than permitted
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'CC(C)(C)OC(=O)N1CCCC(C(=O)c2cc(F)cc(F)c2O'
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'CC(C)(C)OC(=O)N1CCCC(C(=O)c2cc(F)cc(OCc3c'
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'CC(C)(C)OC(=O)N1CCCC(C(O)c2cc(F)cc(F)c2OC'
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'CC(C)(C)OC(=O)N1CCCC(C2(O)c3cc(F)cc(F)c3O'
[13:22:12] SMILES Parse Error: extra open parentheses for input: 'CC(C)(C)OC(=O)N1CCCC(C(=O)

[13:22:13] SMILES Parse Error: extra open parentheses for input: 'CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c'
[13:22:13] SMILES Parse Error: extra open parentheses for input: 'CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c'
[13:22:13] SMILES Parse Error: extra close parentheses while parsing: CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)OCC)c3)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1
[13:22:13] SMILES Parse Error: Failed parsing SMILES 'CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)OCC)c3)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1' for input: 'CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)OCC)c3)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1'
[13:22:13] SMILES Parse Error: extra open parentheses for input: 'Cc1cc(CN2CCC(N3C(=O)CCC3C(=O)Nc3cc(Br)cc(-c4nnnn4C)c3)CC2)ccc1Cl.Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-]'
[13:22:13] SMILES Parse Error: unclosed ring for input: 'O=S(=O)([O-])[O-].O=S(=O)([O-])[

[13:22:13] SMILES Parse Error: syntax error while parsing: O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[
[13:22:13] SMILES Parse Error: Failed parsing SMILES 'O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[' for input: 'O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])['
[13:22:13] SMILES Parse Error: syntax error while parsing: COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(C)C)CCN(CC(=
[13:22:13] SMILES Parse Error: Failed parsing SMILES 'COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(C)C)CCN(CC(=' for input: 'COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(C)C)CCN(CC(='
[13:22:13] SMILES Parse Error: syntax error while parsing: O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[
[13:22:13] SMILES Parse Error: Failed parsing SMILES 'O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])[' for input: 'O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O-])['
[13:22:13] SMILES Parse Error: syntax error while parsing: COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(C)C)CCN(CC(=
[13:22:13] SMILES 

[13:22:13] SMILES Parse Error: syntax error while parsing: O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]1=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+](=
[13:22:13] SMILES Parse Error: Failed parsing SMILES 'O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]1=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+](=' for input: 'O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]1=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+](='
[13:22:13] SMILES Parse Error: syntax error while parsing: O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]1=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+](=
[13:22:13] SMILES Parse Error: Failed parsing SMILES 'O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]1=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+](=' for input: 'O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]1=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])[N+](=O)[O-])[N+]

[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 16 17 18 19 20 21
[13:22:13] Can't kekulize mol.  Unkekulized atoms: 1 2 3 21 22 23 24 27 28
[13:22:13] SMILES Parse Error: extra open parentheses for input: 'CC(C)[Si](C#Cc1cccc

[13:22:14] SMILES Parse Error: extra close parentheses while parsing: C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C.C[C@H]1CC[C@H]2[C@H]3COC[C@@H]2CC[C@@]2(C)[C@H]3CC[C@@H]2[C@H](C)CCCC(C)C)C1
[13:22:14] SMILES Parse Error: Failed parsing SMILES 'C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C.C[C@H]1CC[C@H]2[C@H]3COC[C@@H]2CC[C@@]2(C)[C@H]3CC[C@@H]2[C@H](C)CCCC(C)C)C1' for input: 'C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C.C[C@H]1CC[C@H]2[C@H]3COC[C@@H]2CC[C@@]2(C)[C@H]3CC[C@@H]2[C@H](C)CCCC(C)C)C1'
[13:22:14] SMILES Parse Error: extra open parentheses for input: 'C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C.C[C@H]1CC[C@H]2[C@H]3COC[C@@H]2CC[C@H]2[C@H](C)[C@H]3CC[C@@H]2[C@H](C)CCCC(C'
[13:22:14] SMILES Parse Error: extra close parentheses while parsing: C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C.C[C@H]1CC[C@H]2[C@H]3COC[C@@H]2CC[C@@]2(C)[C@H]3CC[C@H]2[C@H](C)CCCC(C)C)C1
[13:22:14] SMILES Parse Error: Failed parsing SMILES 'C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C.C[C@H]1CC[C@H]2[C

[13:22:14] SMILES Parse Error: unclosed ring for input: 'CCOC(=O)CCOCC(COCCC(=O)OCC)(COCCC(=O)OCC)NC(=O)CCNC(=O)O[C@@H]1CC[C@@]2(C)C(=CC[C@@H]3[C@@H]2CC[C@@]2(C)[C@H]3CC[C@@H]2[C@H](C)CCCC(C)C)C'
[13:22:14] SMILES Parse Error: ring closure 2 duplicates bond between atom 49 and atom 50 for input: 'CCOC(=O)CCOCC(COCCC(=O)OCC)(COCCC(=O)OCC)NC(=O)CCNC(=O)O[C@@H]1CC[C@@]2(C)C(=CC[C@@H]3[C@@H]2CC[C@@H]2[C@H]2CC[C@@H]2[C@H](C)CCCC(C)C)C1'
[13:22:14] SMILES Parse Error: unclosed ring for input: 'CCOC(=O)CCOCC(COCCC(=O)OCC)(COCCC(=O)OCC)NC(=O)CCNC(=O)O[C@@H]1CC[C@@]2(C)C(=CC[C@@H]3[C@@H]2CC[C@@]2(C)[C@H]3CC[C@@H]2[C@H](C)CCCC(C)C)C'
[13:22:14] SMILES Parse Error: ring closure 2 duplicates bond between atom 49 and atom 50 for input: 'CCOC(=O)CCOCC(COCCC(=O)OCC)(COCCC(=O)OCC)NC(=O)CCNC(=O)O[C@@H]1CC[C@@]2(C)C(=CC[C@@H]3[C@@H]2CC[C@@H]2[C@H]2CC[C@@H]2[C@H](C)CCCC(C)C)C1'
[13:22:14] SMILES Parse Error: unclosed ring for input: 'CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3

[13:22:14] SMILES Parse Error: extra open parentheses for input: 'O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])'
[13:22:14] SMILES Parse Error: extra open parentheses for input: 'O=[N+]([O-])c1cc([N+](=O)[O-])c(OCCO)c(C(F)(F)'
[13:22:14] SMILES Parse Error: extra open parentheses for input: 'O=[N+]([O-])C(F)(COC(Cl)OCC(F)([N+](=O)[O-])'
[13:22:14] SMILES Parse Error: extra open parentheses for input: 'O=[N+]([O-])c1cc([N+](=O)[O-])c(OCCO)c(C(F)(F)'
[13:22:14] SMILES Parse Error: syntax error while parsing: CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1CC.C[C@H]1C(=O)OCC[C@@H]2[C@H]2CC[C@H]2[C@H
[13:22:14] SMILES Parse Error: Failed parsing SMILES 'CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1CC.C[C@H]1C(=O)OCC[C@@H]2[C@H]2CC[C@H]2[C@H' for input: 'CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)[nH]c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC)c1CC.C[C@H]1C(=O)OCC[C@@H]2[C@H]2CC[C@H]2[C@H'
[

In [41]:
df.head()

Unnamed: 0,input,0th,1th,2th,3th,4th,5th,6th,7th,8th,...,11th score,12th score,13th score,14th score,valid compound score,target,top1_accuracy,top3_accuracy,top5_accuracy,invalidity
0,REACTANT:C#CCO.C1CCOC1.CCN(C(C)C)C(C)C.ClC(Cl)...,Cc1cc(C)cc(C#CCO)c1,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(C#CCO)c1,C#CCOc1cc(C)cc(C)c1,Cc1cc(C)cc(C#CCO)c1.OCC#Cc1cc(C)cc(I)c1,Cc1cc(C)cc(C#CCO)c1.Cc1ccc(S(=O)(=O)[O-])cc1.C...,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(C#CCO)c1.OCC#Cc...,Cc1cc(C)cc(C#CCO)c1.Cc1ccc(P(c2ccccc2)c2ccccc2)c1,Cc1cc(C)cc(C#CCO)c1.Cc1cc(C)cc(I)c1,Cc1cc(C)cc(C#CCO)c1.Cc1ccc(S(=O)(=O)[O-])cc1.C...,...,-0.306846,-0.323303,-0.325315,-0.327182,-0.000354,Cc1cc(C)cc(C#CCO)c1,1,1,1,0
1,REACTANT:C1CCOC1.C=CC(=O)O.CCN=C=NCCCN(C)C.Cl....,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,...,-0.104499,-0.106332,-0.107003,-0.108976,-0.000884,C=CC(=O)N1CCCC1C(=O)Nc1cc2c(Nc3ccc(OCc4ccccn4)...,1,1,1,1
2,REACTANT:COc1cc(Br)ccc1C=O.C[Si](C)(C)Cl.NC(=O...,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,Cn1nnnc1-c1cc(N)cc(Br)c1,c1ccc2sc(-c3nccc4ccccc34)cc2c1,CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)O...,O=P([O-])([O-])[O-].[Ca+2].[Ca+2].[Ca+2].[Ca+2],COc1ccc(C(OCC(O)CN(C)C)(c2ccccc2)c2ccc(OC)cc2)cc1,Cc1cc(Cc2cc(C)c(O)c(C=O)c2C)c(C)c(C=O)c1O,...,-0.231338,-0.233637,-0.242043,-0.253626,-0.010027,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1,1,1,1,0
3,REACTANT:CCOC(=O)CCCN(C)C1CCCCC1.[Na+].[OH-]RE...,O=S(=O)([O-])[O-].O=S(=O)([O-])[O-].O=S(=O)([O...,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl,COc1nc(N2CCCC2)ccc1[N+](=O)[O-],C[C@H](CO)CO[Si](c1ccccc1)(c1ccccc1)C(C)(C)C,O=[N+]([O-])c1cc([N+](=O)[O-])c(OCCO)c(C(F)(F)...,Cn1nnnc1-c1cc(Br)cc([N+](=O)[O-])c1,CC(C)CN=C1C(c2ccccc2)=C(c2ccccc2)C(c2ccccc2)=C...,Cc1cc(Cc2cc(C)c(O)c(C=O)c2C)c(C)c(C=O)c1O,...,-0.134069,-0.187411,-0.197664,-0.198354,-0.052501,Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,0,1,1,0
4,REACTANT:CC(=O)[O-].COc1cc(C(=O)CBr)cc([N+](=O...,CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC...,Oc1cc2cc[nH]c2cc1O,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl,COC(=O)CN1CCN(CC(=O)OC(C)(C)C)CCN(CC(=O)OC(C)(...,O=C(O)CC(O)(CC(=O)O)C(=O)O.O=C[O-].O=C[O-].[Cu+2],Cc1cc(Cc2cc(C)c(O)c(C=O)c2)cc(C=O)c1O,O=[N+]([O-])c1cc([N+](=O)[O-])c(OCCO)c(C(F)(F)...,CCOC(=O)c1cccc(-c2cn(-c3ccc(F)c(F)c3)nc2C(=O)O...,CCc1c(Cc2[nH]c(C(=O)OCc3ccccc3)c(C)c2CCC(=O)OC...,...,-0.168588,-0.170192,-0.174727,-0.176722,-0.021904,O=C(O)c1cc(-n2c(=O)cc(C(F)(F)F)[nH]c2=O)ccc1Cl,0,1,1,0


In [48]:
print(sum(df['top1_accuracy']) / len(df), sum(df['top3_accuracy']) / len(df), sum(df['top5_accuracy']) / len(df))
print(sum(invalidity)/(len(invalidity)*5))


0.396 0.485 0.529
0.1988


In [53]:
df = pd.read_csv('multiinput_prediction_output.csv')
target_df = pd.read_csv('val.csv')
df['target'] = target_df['PRODUCT']
tokenizer = AutoTokenizer.from_pretrained(CFG.model_name_or_path, return_tensors='pt')
# tokenizer.add_tokens(['.', '>', '<', 'P','Pd', 'Na', 'K', 'Al', 'Cu', 'Si', 'Zn', 'Mn', 'Li', 'Mg', 'Fe', 'Ba', 'Pt', 'Ag', 'Yb', '6', 'e'])
tokenizer.add_tokens(['Ru', 'Cl', 'Pb', 'Ti','Tl', 'Ru', '7', 'Ni', 'Ca', 'Hg', 'Sb', 'Rh', 'Nd', 'As', '8', 'Zr', 'p', 'W', 'Ar', 'Ge', 'Sm', 'Ta', 'Re', 'Au', 'Mo', 'Bi'])
for ith, text in enumerate(df['input']):
    decoded = tokenizer.decode(tokenizer(text, add_special_tokens=True, max_length=512, padding='do_not_pad', return_offsets_mapping=False, truncation=True, return_attention_mask=True).input_ids)
    decoded = decoded.replace(' ', '')
    if '<unk>' in decoded:
        print(text)
        print(decoded)
#     if ith == 50000:
#         break


In [48]:
print(sorted(['Ru', 'Cl', 'Pb', 'Ti','Tl', 'Ru', '7', 'Ni', 'Ca', 'Hg', 'Sb', 'Rh', 'Nd', 'As', '8', 'Zr', 'p', 'W', 'Ar', 'Ge', 'Sm', 'Ta', 'Re', 'Au', 'Mo', 'Bi'] + ['.', '>', '<', 'P','Pd', 'Na', 'K', 'Al', 'Cu', 'Si', 'Zn', 'Mn', 'Li', 'Mg', 'Fe', 'Ba', 'Pt', 'Ag', 'Yb', '6', 'e']))

['.', '6', '7', '8', '<', '>', 'Ag', 'Al', 'Ar', 'As', 'Au', 'Ba', 'Bi', 'Ca', 'Cl', 'Cu', 'Fe', 'Ge', 'Hg', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na', 'Nd', 'Ni', 'P', 'Pb', 'Pd', 'Pt', 'Re', 'Rh', 'Ru', 'Ru', 'Sb', 'Si', 'Sm', 'Ta', 'Ti', 'Tl', 'W', 'Yb', 'Zn', 'Zr', 'e', 'p']


In [50]:
df

Unnamed: 0,input,0th,1th,2th,3th,4th,5th,6th,7th,8th,...,6th score,7th score,8th score,9th score,10th score,11th score,12th score,13th score,14th score,valid compound score
0,REACTANT:COc1ccc(S(=O)(=O)Cl)cc1.[Na+]REAGENT:...,COc1ccc(S(=O)[O-])cc1,COc1ccc(S(=O)[O-])cc1.[Cl-],COc1ccc(S(=O)(=O)[O-])cc1,O=S(=O)([O-])c1ccc(O)cc1,COc1ccc(S(=O)(=O)[O-])cc1.[Cl-],COc1ccc(S(=O)[O-])cc1.c1cc[nH+]cc1,Cl.O=S(=O)([O-])c1ccc(O)cc1,O=S(=O)([O-])c1ccc([O-])cc1,Cl.COc1ccc(S(=O)[O-])cc1,...,-0.263000,-0.266919,-0.295366,-0.295947,-0.297291,-0.314993,-0.316880,-0.317811,-0.319223,-0.008070
1,REACTANT:N#Cc1c(N)nc(Cl)c(C#N)c1-c1ccccc1.OCc1...,N#Cc1c(N)nc(OCc2ccccn2)c(C#N)c1-c1ccccc1,N#Cc1c(N)nc(Cc2ccccn2)c(C#N)c1-c1ccccc1,N#Cc1c(N)[nH]c(OCc2ccccn2)c(C#N)c1-c1ccccc1,Cl.N#Cc1c(N)nc(OCc2ccccn2)c(C#N)c1-c1ccccc1,N#Cc1c(N)[nH]c(=O)c(C#N)c1-c1ccccc1,N#Cc1c(N)[nH+]c(OCc2ccccn2)c(C#N)c1-c1ccccc1.[...,N#Cc1c(N)nc(C(=O)c2ccccn2)c(C#N)c1-c1ccccc1,N#Cc1c(N)nc(Oc2ccccn2)c(C#N)c1-c1ccccc1,NC(=O)c1c(N)nc(OCc2ccccn2)c(C#N)c1-c1ccccc1,...,-0.234125,-0.238098,-0.258809,-0.271725,-0.274950,-0.283902,-0.292726,-0.296436,-0.310742,-0.000224
2,REACTANT:CN1CCC(CCO)CC1.Cc1ccc(N2CCN(C(=O)Oc3c...,Cc1ccc(N2CCN(C(=O)OCCC3CCN(C)CC3)CC2)cc1,CN1CCC(CCOC(=O)N2CCN(c3ccc(C)cc3)CC2)CC1,Cc1ccc(N2CCN(C(=O)OCCC3CCN(C)CC3)CC2)cc1.Cc1cc...,Cc1ccc(N2CCN(C(=O)OCCC3CCN(C)CC3)CC2)cc1.Cc1cc...,Cc1ccc(N2CCN(C(=O)CCC3CCN(C)CC3)CC2)cc1,Cc1ccc(N2CCN(C(=O)OCCC3CCN(C)CC3)CC2)cc1.Cc1cc...,CN1CCC(CCOC(=O)N2CCN(c3ccc(C)cc3)CC2)CC1.Cc1cc...,Cc1ccc(N2CCN(C(=O)NCCC3CCN(C)CC3)CC2)cc1,Cc1ccc(N2CCN(C(=O)OCC3CCN(C)CC3)CC2)cc1,...,-0.257414,-0.258352,-0.265043,-0.265205,-0.265414,-0.275489,-0.275925,-0.282900,-0.283351,-0.000132
3,REACTANT:CC(C)(C)OC(=O)N1CCC(COC(=O)C2CCC3CN2C...,O=C(OCC1CCNCC1)C1CCC2CN1C(=O)N2OS(=O)(=O)O,CC(C)(C)OC(=O)N1CCC(COC(=O)C2CCC3CN2C(=O)N3OS(...,O=C(C1CCC2CN1C(=O)N2OS(=O)(=O)O)OCC1CCNCC1,CC(C)(C)OC(=O)N1CCC(COC(=O)C2CCC3CN2C(=O)N3O)CC1,CC(C)(C)OC(=O)N1CCC(CO)CC1C(=O)C1CCC2CN1C(=O)N...,O=C1C2CCC(C(=O)OCC3CCNCC3)N2C(=O)N1OS(=O)(=O)O,CCN1CCC(COC(=O)C2CCC3CN2C(=O)N3OS(=O)(=O)O)CC1,CN1CCC(COC(=O)C2CCC3CN2C(=O)N3OS(=O)(=O)O)CC1,O=C(O)C1CCC2CN1C(=O)N2OS(=O)(=O)O,...,-0.152735,-0.159872,-0.163130,-0.166870,-0.169046,-0.169374,-0.176924,-0.185469,-0.195297,-0.001599
4,REACTANT:CCC12CCC3C4CCC(=O)C=C4CCC3C1C(O)CC2=O...,CCC12CCC3C4CCC(=O)C=C4CCC3C1C(OC(=O)c1ccccc1)C...,CCC12CCC(C3CCC(=O)C=C4CCC3C1C(OC(=O)c1ccccc1)C...,CCC12CCC(C3CCC(=O)C=C4CCC3C1C(OC(=O)c1ccccc1)C...,CCC12CCC(C3CCC(=O)C=C4CCC3C1C(OC(=O)c3ccccc3)C...,CCC12CCC3C4CCC(=O)C=C4CCC3C1C(O)C(C(=O)c1ccccc...,CCC12CCC(C3CCC(=O)C=C4CCC3C1C(OC(=O)c1ccccc1)C...,CCC12CCC3C4CCC(=O)C=C4CCC3C1C(OC(=O)c1ccccc1)C...,CCC12CCC(C3CCC(=O)C=C4CCC3C1C(OC(=O)c1ccccc1)C...,CCC12CCC3C4CCC(=O)C=C4CCC3C1C(O)(C(=O)c1ccccc1...,...,-0.168645,-0.169499,-0.177384,-0.181207,-0.185081,-0.187224,-0.188434,-0.189308,-0.190461,-0.000154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,REACTANT:CCN1CC2OC2C1.CCOc1ccccc1OREAGENT:Cl.C...,CCOc1ccccc1OC1CN(CC)CC1O,CCOc1ccccc1C1CN(CC)CC1O,CCOc1ccccc1OC1CN(CC)CC2Oc3ccccc3OCC12,CCOc1ccccc1OC1CN(CC)CC2Oc3ccccc3OC12,CCOc1ccccc1OC1CN(CC)CC1N1CC2Oc3ccccc3OCC21,CCOc1ccccc1OC1CN(CC)CC1N1CC2Oc3ccccc3OC2C1,CCOc1ccccc1OC1CN(CC)CC1O.CCOc1ccccc1O,CCOc1ccccc1OC1CN(CC)CC1Oc1ccccc1OCC,CCOc1ccccc1C12CN(CC)CC1O2,...,-0.304359,-0.310612,-0.316779,-0.329301,-0.335336,-0.339174,-0.354038,-0.357701,-0.366088,-0.001590
29996,REACTANT:CCOC(=O)C(=Cc1cccc(OCCc2ccc(OS(C)(=O)...,CCOC(=O)C(Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1)OCC,CCOC(=O)C(O)Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1,CCOC(=O)Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1,CCOC(=O)C(OCC)Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1,CCOC(Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1)C(=O)O,CCOC(=O)C(Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1)...,CCOC(=Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1)C(=O)O,CCOC(=O)C(=O)Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1,COC(=O)C(=O)Cc1cccc(OCCc2ccc(OS(C)(=O)=O)cc2)c1,...,-0.192674,-0.194049,-0.207467,-0.211696,-0.231481,-0.234585,-0.237440,-0.238560,-0.240112,-0.000323
29997,REACTANT:CCCCC1CCNCC1.Cc1ccc(S(=O)(=O)OCC(C)Cn...,CCCCC1CCN(CC(C)Cn2c(=O)sc3ccccc32)CC1,CCCCC1CCN(CC(C)Cn2c(=O)sc3ccccc32)CC1.Cc1ccc(S...,CCCCC1CCN(CC(C)Cn2c(=O)sc3ccccc32)CC1.Cc1ccc(S...,CCCCC1CCNC(CC(C)Cn2c(=O)sc3ccccc32)C1,CCCCC1CCNCC1Cn1c(=O)sc2ccccc21,CCC1CCN(CC(C)Cn2c(=O)sc3ccccc32)CC1,CCCCC1CCN(C(C)Cn2c(=O)sc3ccccc32)CC1,CCCCC1CCN(CC(C)Cn2c(=O)sc3ccccc32)CC1.COc1ccc(...,CCCCCC1CCN(CC(C)Cn2c(=O)sc3ccccc32)CC1,...,-0.331291,-0.342171,-0.354060,-0.356503,-0.361395,-0.361925,-0.362513,-0.371238,-0.376130,-0.000073
29998,REACTANT:CC(C)I.COC(=O)c1cc(Br)cc2[nH]ccc12REA...,COC(=O)c1cc(Br)cc2c1ccn2C(C)C,COC(=O)c1cc(Br)cc2c1ccn2-c1cc(C(C)C)c2cc(Br)cc...,COC(=O)c1cc(Br)cc2c1ccn2-c1cc(C)c(C(=O)OC)c1,COC(=O)c1cc(Br)cc2c1ccn2-c1cc2c(C(=O)OC)cc(Br)...,COC(=O)c1cc(Br)cc2c1ccn2COC(C)C,COC(=O)c1cc(Br)cc2c1ccn2-c1cc[nH]c2cc(Br)cc(C(...,COC(=O)c1cc(Br)cc2c1ccn2Cc1C,COC(=O)c1cc(Br)cc2[nH]cc(C(C)C)c12,COC(=O)c1c(Br)cc2c(ccn2C(C)C)c1O,...,-0.348125,-0.351092,-0.359734,-0.363458,-0.363472,-0.363746,-0.367554,-0.378466,-0.381652,-0.000025
