In [32]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.molSize = (500, 300)
from kartograf import SmallMoleculeComponent
from kartograf.atom_aligner import align_mol_shape, align_mol_skeletons
from kartograf import KartografAtomMapper
from kartograf.atom_mapping_scorer import MappingRMSDScorer, MappingShapeOverlapScorer
from kartograf.utils.mapping_visualization_widget import display_mappings_3d

In [80]:
# definitions
def get_kartograph_mapping_skeletons(base_smiles, 
                                     reactant_smiles,
                                     randomSeed):
    base = Chem.AddHs(Chem.MolFromSmiles(base_smiles), addCoords=True)
    Chem.rdDistGeom.EmbedMolecule(base, useRandomCoords=False, randomSeed=randomSeed)
    base_smc = SmallMoleculeComponent.from_rdkit(base)
    reactant = Chem.AddHs(Chem.MolFromSmiles(reactant_smiles), addCoords=True)
    Chem.rdDistGeom.EmbedMolecule(reactant, useRandomCoords=False, randomSeed=randomSeed)
    reactant_smc = SmallMoleculeComponent.from_rdkit(reactant)
    # align molecules
    reactant_alignBase = align_mol_skeletons(base_smc, ref_mol=reactant_smc)
    # Get mapping
    mapper = KartografAtomMapper(atom_map_hydrogens=True, atom_max_distance=1.4)
    mapping = next(mapper.suggest_mappings(reactant_smc, reactant_alignBase))
    return mapping

def get_kartograph_mapping_shape(base_smiles, 
                                 reactant_smiles,
                                 randomSeed):
    base = Chem.AddHs(Chem.MolFromSmiles(base_smiles), addCoords=True)
    Chem.rdDistGeom.EmbedMolecule(base, useRandomCoords=False, randomSeed=randomSeed)
    base_smc = SmallMoleculeComponent.from_rdkit(base)
    reactant = Chem.AddHs(Chem.MolFromSmiles(reactant_smiles), addCoords=True)
    Chem.rdDistGeom.EmbedMolecule(reactant, useRandomCoords=False, randomSeed=randomSeed)
    reactant_smc = SmallMoleculeComponent.from_rdkit(reactant)
    # align molecules
    reactant_alignBase = align_mol_shape(base_smc, ref_mol=reactant_smc)
    # Get mapping
    mapper = KartografAtomMapper(atom_map_hydrogens=True, atom_max_distance=1.4)
    mapping = next(mapper.suggest_mappings(reactant_smc, reactant_alignBase))
    return mapping

In [81]:
# df = pd.read_pickle('/Users/kate_fieseler/PycharmProjects/syndirella/syndirella/tests/vectors/5_bases_w_routes.pkl.gz')
# # only look at first and third
# df = df.iloc[[0, 2]]
# df.reset_index(drop=True, inplace=True)
# df['1_r1_correct_mapping'] = None
# df

In [None]:
# for all mapping its the reactant_idx : base_idx

In [133]:
df = pd.read_pickle('/Users/kate_fieseler/PycharmProjects/syndirella/syndirella/tests/vectors/2_test_set.pkl.gz')
df

Unnamed: 0,base_compound_smiles,1_reaction,1_r1_smiles,1_r2_smiles,1_r_previous_product,1_product_smiles,1_product_name,1_num_atom_diff,1_flag,2_reaction,...,∆G_unbound,comRMSD,regarded,path_to_mol,intra_geometry_pass,1_r1_correct_mapping,1_r2_correct_mapping,2_r1_correct_mapping,3_r1_correct_mapping,3_r2_correct_mapping
0,N#CCC(=O)Nc1cccc(NC(=O)C2CCCO2)c1,Amidation,O=C(O)C1CCCO1,CC(C)(C)OC(=O)Nc1cccc(N)c1,,CC(C)(C)OC(=O)Nc1cccc(NC(=O)C2CCCO2)c1,BHKVBKDDVLQZCR-UHFFFAOYSA-N-eb2Mvx-1,,,N-Boc_deprotection,...,406.191706,0.553939,"[A71EV2A-x0310_A_147_A71EV2A-x0501+A+152+1, A7...",/data/xchem-fragalysis/kfieseler/A71EV2A_run4/...,True,"{9: 27, 10: 28, 11: 29, 12: 30, 13: 31, 14: 32...",,,"{6: 21, 7: 20, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5...","{8: 26, 15: 3, 16: 22, 17: 23, 18: 24, 19: 25,..."
1,Cn1ncc(NC(=O)CC#N)c1NC(=O)C1CCCO1,Amidation,O=C(O)C1CCCO1,Cn1ncc(NC(=O)OC(C)(C)C)c1N,,Cn1ncc(NC(=O)OC(C)(C)C)c1NC(=O)C1CCCO1,LXINEYASRREWNB-UHFFFAOYSA-N-SXgfnw-0,,,N-Boc_deprotection,...,521.280362,1.497116,"[A71EV2A-x0310_A_147_A71EV2A-x0501+A+152+1, A7...",/data/xchem-fragalysis/kfieseler/A71EV2A_run4/...,True,"{9: 28, 10: 30, 11: 29, 12: 32, 13: 31, 14: 34...",,"{22: 22, 23: 20, 24: 21, 25: 23, 26: 24, 36: 2...","{6: 26, 7: 25, 0: 10, 1: 9, 2: 8, 3: 6, 4: 7}",


In [134]:
row_num = 1
reactant = '3_r2_smiles'
correct_mapping_col = '3_r2_correct_mapping'
random_seed = 12

In [135]:
# Find correct mapping for each reactant to base based on kartograf example
row = df.iloc[row_num]
base_smiles = row['base_compound_smiles']
reactant_smiles = row[reactant]
mapping = get_kartograph_mapping_skeletons(base_smiles, reactant_smiles, random_seed)
display_mappings_3d(mapping)

VBox(children=(HBox(children=(Button(icon='caret-left', style=ButtonStyle(), tooltip='previous structure'), Bu…

In [137]:
print(mapping.componentA_to_componentB)
correct_dict = {15: 20, 16: 22, 17: 21, 18: 23, 19: 6, 20: 24, 21: 27, 22: 28, 23: 32, 24: 29, 26: 31, 27: 34, 28: 33, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 11, 7: 12, 8: 13, 9: 14, 10: 15, 11: 16, 12: 17, 13: 18, 14: 19}

{15: 20, 16: 22, 17: 21, 18: 23, 19: 6, 20: 24, 21: 27, 22: 28, 23: 32, 24: 29, 26: 31, 27: 34, 28: 33, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 11, 7: 12, 8: 13, 9: 14, 10: 15, 11: 16, 12: 17, 13: 18, 14: 19}


In [138]:
df.at[row_num, correct_mapping_col] = correct_dict
df

Unnamed: 0,base_compound_smiles,1_reaction,1_r1_smiles,1_r2_smiles,1_r_previous_product,1_product_smiles,1_product_name,1_num_atom_diff,1_flag,2_reaction,...,∆G_unbound,comRMSD,regarded,path_to_mol,intra_geometry_pass,1_r1_correct_mapping,1_r2_correct_mapping,2_r1_correct_mapping,3_r1_correct_mapping,3_r2_correct_mapping
0,N#CCC(=O)Nc1cccc(NC(=O)C2CCCO2)c1,Amidation,O=C(O)C1CCCO1,CC(C)(C)OC(=O)Nc1cccc(N)c1,,CC(C)(C)OC(=O)Nc1cccc(NC(=O)C2CCCO2)c1,BHKVBKDDVLQZCR-UHFFFAOYSA-N-eb2Mvx-1,,,N-Boc_deprotection,...,406.191706,0.553939,"[A71EV2A-x0310_A_147_A71EV2A-x0501+A+152+1, A7...",/data/xchem-fragalysis/kfieseler/A71EV2A_run4/...,True,"{9: 27, 10: 28, 11: 29, 12: 30, 13: 31, 14: 32...",,,"{6: 21, 7: 20, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5...","{8: 26, 15: 3, 16: 22, 17: 23, 18: 24, 19: 25,..."
1,Cn1ncc(NC(=O)CC#N)c1NC(=O)C1CCCO1,Amidation,O=C(O)C1CCCO1,Cn1ncc(NC(=O)OC(C)(C)C)c1N,,Cn1ncc(NC(=O)OC(C)(C)C)c1NC(=O)C1CCCO1,LXINEYASRREWNB-UHFFFAOYSA-N-SXgfnw-0,,,N-Boc_deprotection,...,521.280362,1.497116,"[A71EV2A-x0310_A_147_A71EV2A-x0501+A+152+1, A7...",/data/xchem-fragalysis/kfieseler/A71EV2A_run4/...,True,"{9: 28, 10: 30, 11: 29, 12: 32, 13: 31, 14: 34...",,"{22: 22, 23: 20, 24: 21, 25: 23, 26: 24, 36: 2...","{6: 26, 7: 25, 0: 10, 1: 9, 2: 8, 3: 6, 4: 7}","{15: 20, 16: 22, 17: 21, 18: 23, 19: 6, 20: 24..."


In [140]:
# save
df.to_pickle('/Users/kate_fieseler/PycharmProjects/syndirella/syndirella/tests/vectors/2_test_set.pkl.gz')