In [1]:
import pandas as pd
import os
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem import rdChemReactions
from rdkit.Chem import Descriptors

In [2]:
def count_num_reaction(data):
    # Count how many reactions there are
    rxnCount_data = data['Reaction ID'].nunique()
    print('Number of Reactions:', rxnCount_data)
    print('Number of Rows:', data.shape[0])

def count_C_O_bonds(molecule_SMILES, bond_type):
    mol = Chem.MolFromSmiles(molecule_SMILES)
    num_bonds = 0
    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom()
        a2 = bond.GetEndAtom()
        if (a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 8) or (a1.GetAtomicNum() == 8 and a2.GetAtomicNum() == 6):
            if bond.GetBondType() == bond_type:
                num_bonds += 1            
    return num_bonds

def change_C_O_bonds(reactant_SMILES, product_SMILES, bond_type):
    change = count_C_O_bonds(product_SMILES, bond_type) - count_C_O_bonds(reactant_SMILES, bond_type)
    return change

def change_single_and_double_C_O_bond(data):
    data['change in C-O single bond'] = data.apply(
        lambda x: change_C_O_bonds(x['Reactant SMILES'][0], x['Product SMILES'][0], Chem.rdchem.BondType.SINGLE), axis=1)
    data['change in C=O double bond'] = data.apply(
        lambda x: change_C_O_bonds(x['Reactant SMILES'][0], x['Product SMILES'][0], Chem.rdchem.BondType.DOUBLE), axis=1)
    return data

In [3]:
# Change working directory
os.chdir('/Users/suongsuong/Documents/GitHub/Reactivity-based-metric-of-complexity')

## <span style="color:blue"> Import data after checking by MW </span>

In [4]:
SingleReduction_byMW = pd.read_excel('Reduction of ketone/Notebook/SingleReduction_byMW.xlsx')
count_num_reaction(SingleReduction_byMW)

Number of Reactions: 431
Number of Rows: 1061


In [5]:
Stereo_SingleReduction_byMW = pd.read_excel('Reduction of ketone/Notebook/Stereo_SingleReduction_byMW.xlsx')
count_num_reaction(Stereo_SingleReduction_byMW)

Number of Reactions: 60
Number of Rows: 105


## <span style="color:blue"> Concatenate both data sets </span>

In [6]:
SingleReduc_byMW = pd.concat([SingleReduction_byMW,Stereo_SingleReduction_byMW], axis = 0)

In [7]:
# turn to list of strings
SingleReduc_byMW['Reactant SMILES'] = SingleReduc_byMW['Reactant SMILES'].apply(lambda x: eval(x))
SingleReduc_byMW['Product SMILES'] = SingleReduc_byMW['Product SMILES'].apply(lambda x: eval(x))

## <span style="color:blue">  Only take reaction that have 1 more single bond C-O and 1 less double bond C=O </span>

In [8]:
# get bond change
SingleReduc_byMW = change_single_and_double_C_O_bond(SingleReduc_byMW)

In [9]:
# Filter
SingleReduc_byCObond = SingleReduc_byMW[
    (SingleReduc_byMW['change in C-O single bond'] == 1) & (SingleReduc_byMW['change in C=O double bond'] == -1)
]
count_num_reaction(SingleReduc_byCObond)

Number of Reactions: 484
Number of Rows: 1156


In [10]:
# Save data
SingleReduc_byCObond.to_excel('Reduction of ketone/Notebook/SingleReduc_byCObond.xlsx', index=False)