In [13]:
from rdkit import Chem
from rdkit.Chem import rdChemReactions
from localmapper import localmapper
import pandas as pd
import os

# Change working directory
os.chdir('/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity/Reduction of ketone/')

In [14]:
def get_ReactantAndProduct_mapping(rxn_map):
    '''for 1 reactant to 1 product'''
    rxn = rdChemReactions.ReactionFromSmarts(rxn_map)
    products = rxn.GetProducts()
    reactants = rxn.GetReactants()
    # the index (counting from 1) is now set the same to the atom map number
    product_smiles = Chem.MolToSmiles(products[0])
    reactants_smiles = Chem.MolToSmiles(reactants[0])
    return reactants_smiles,product_smiles


def get_indx_CObond_change(reactants_smiles_map, product_smiles_map):
    '''take reactant smiles and product smiles with mapped and re-order to match reactant and product,
    return the index of C and O (in reactants) that changed from C=O to C-OH
    This is only for single change only'''
    reactant_mol = Chem.MolFromSmiles(reactants_smiles_map)
    product_mol = Chem.MolFromSmiles(product_smiles_map)

    # Get bonds from both molecules
    reactant_bonds = {(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond.GetBondType()) for bond in reactant_mol.GetBonds()}
    product_bonds = {(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond.GetBondType()) for bond in product_mol.GetBonds()}

    # Find bond change from C=O to O-H
    for bond in reactant_bonds:
        if bond not in product_bonds:
            idx1, idx2, bond_type = bond
            atom1 = reactant_mol.GetAtomWithIdx(idx1)
            atom2 = reactant_mol.GetAtomWithIdx(idx2)
            bond_changed_btwn = ['C', 'O']
            if atom1.GetSymbol() in bond_changed_btwn and atom2.GetSymbol() in bond_changed_btwn and  atom1.GetSymbol() !=  atom2.GetSymbol() :
                if bond_type == Chem.rdchem.BondType.DOUBLE:
                    if atom1.GetSymbol() == 'C' and atom2.GetSymbol() == 'O':
                        C_idx = idx1 + 1
                        O_idx = idx2 + 1
                    elif atom1.GetSymbol() == 'O' and atom2.GetSymbol() == 'C':
                        C_idx = idx2 + 1
                        O_idx = idx1 + 1
                    return C_idx, O_idx
    return None,None


In [15]:
clean_df = pd.read_excel('ReductionKetone_6stepCleaning.xlsx', engine='openpyxl')

In [None]:
atomMap_df = clean_df.copy()
mapper = localmapper()

#atom-mapping to get the same order of atom in reactant and product
atomMap_df['Reaction Map'] = atomMap_df.apply(lambda x: mapper.get_atom_map( x['Reaction']), axis=1)
atomMap_df['Reactant smiles map'] = atomMap_df.apply(lambda x: get_ReactantAndProduct_mapping(x['Reaction Map'])[0], axis=1)
atomMap_df['Product smiles map'] = atomMap_df.apply(lambda x: get_ReactantAndProduct_mapping(x['Reaction Map'])[1], axis=1)

#get index of the molecule
atomMap_df['C_idx, O_idx']  = atomMap_df.apply(lambda x: get_indx_CObond_change(x['Reactant smiles map'], x['Product smiles map']), axis=1)
atomMap_df[['C_idx', 'O_idx']] = pd.DataFrame(atomMap_df['C_idx, O_idx'].tolist(), index=atomMap_df.index)
atomMap_df = atomMap_df.drop(columns=['C_idx, O_idx'])

In [18]:
atomMap_df.to_excel('ReductionKetone_atomMapping.xlsx',index=False)