# SUPPLEMENTARY PHASE 3: Check functional group assignment of new (final) molecules

"""

Created on Monday 16 October 2023 - testing functional groups of final list of new ACEIs and ARBs compared to OG mols

Updated on Thursday 26 October 2023 - updated smiles

@author: Odifentse M Lehasa

The purpose of this notebook is to determine the functional group assignment/allocation in the final set of new molecules. 
"""

## STEP 0: IMPORT LIBRARIES

In [4]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import BRICS
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Lipinski
import pandas as pd


##  STEP 1: GET DATA

## Step 1.1: Get data of new molecules

In [6]:
# list of only new ACEI molecules 

df_finalacei = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Output/7. New lead ACEI molecules.csv', index_col=0)
df_finalacei

Unnamed: 0,0
0,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
1,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
2,CC(C)[C@H](N[C@@H](CCCCN)C(=O)N1[C@H](C(=O)O)C...
3,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
4,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
...,...
424,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](c2ccccc2)...
425,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](C2CCCCC2)...
426,CO[C@@H]1C[C@@H]2CCCC[C@@H]2N1[C@@H](CCCCN)C(=...
427,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](CO)C[C@@H...


In [8]:
# list of only new ARB molecules 

df_finalarb = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Output/7. New lead ARB molecules.csv', index_col=0)
df_finalarb

Unnamed: 0,0
0,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...
1,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...
2,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...
3,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...
4,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...
...,...
305,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...
306,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...
307,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...
308,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...


## Step 1.2: Get data of all original molecules

In [9]:
# list of only the original ACEI molecules 

df_ogacei = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Data/ACE Inhibitors.csv')
df_ogacei

Unnamed: 0.1,Unnamed: 0,Molecule Name,ATC Code,Class,Canonical SMILES
0,1,Enalaprilat,C09A,ACE Inhibitors,C[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@...
1,3,Rescinnamine,C09A,ACE Inhibitors,COC(=O)[C@H]1[C@H]2C[C@@H]3c4[nH]c5cc(OC)ccc5c...
2,7,Benazepril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCc2ccccc2N(CC...
3,10,Captopril,C09A,ACE Inhibitors,C[C@H](CS)C(=O)N1CCC[C@H]1C(=O)O
4,11,Cilazapril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCCN2CCC[C@@H]...
5,13,Enalapril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[...
6,16,Fosinopril,C09A,ACE Inhibitors,CCC(=O)O[C@@H](O[P@](=O)(CCCCc1ccccc1)CC(=O)N1...
7,17,Imidapril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1C(=O...
8,19,Lisinopril,C09A,ACE Inhibitors,NCCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CC...
9,21,Moexipril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1Cc2c...


In [11]:
# list of only the original ARB molecules 

df_ogarb = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Data/ARBs.csv', index_col=0)
df_ogarb

Unnamed: 0,Molecule Name,ATC Code,Class,Canonical SMILES
2,Olmesartan,C09C,ARBs,CCCc1nc(C(C)(C)O)c(C(=O)O)n1Cc1ccc(-c2ccccc2-c...
4,Sparsentan,C09C,ARBs,CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C(COCC)=C1)C1...
6,Azilsartan medoxomil,C09C,ARBs,CCOc1nc2cccc(C(=O)OCc3oc(=O)oc3C)c2n1Cc1ccc(-c...
9,Candesartan Cilexetil,C09C,ARBs,CCOc1nc2cccc(C(=O)OC(C)OC(=O)OC3CCCCC3)c2n1Cc1...
14,Eprosartan,C09C,ARBs,CCCCc1ncc(/C=C(\Cc2cccs2)C(=O)O)n1Cc1ccc(C(=O)...
18,Irbesartan,C09C,ARBs,CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[...
20,Losartan,C09C,ARBs,CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2...
22,Olmesartan Medoxomil,C09C,ARBs,CCCc1nc(C(C)(C)O)c(C(=O)OCc2oc(=O)oc2C)n1Cc1cc...
29,Telmisartan,C09C,ARBs,CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2...
32,Valsartan,C09C,ARBs,CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1)[C@...


In [22]:
df_ogarb['Canonical SMILES']

2     CCCc1nc(C(C)(C)O)c(C(=O)O)n1Cc1ccc(-c2ccccc2-c...
4     CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C(COCC)=C1)C1...
6     CCOc1nc2cccc(C(=O)OCc3oc(=O)oc3C)c2n1Cc1ccc(-c...
9     CCOc1nc2cccc(C(=O)OC(C)OC(=O)OC3CCCCC3)c2n1Cc1...
14    CCCCc1ncc(/C=C(\Cc2cccs2)C(=O)O)n1Cc1ccc(C(=O)...
18    CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[...
20    CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2...
22    CCCc1nc(C(C)(C)O)c(C(=O)OCc2oc(=O)oc2C)n1Cc1cc...
29    CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2...
32    CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1)[C@...
Name: Canonical SMILES, dtype: object

## STEP 2: FUNCTIONAL GROUP TEST

## Step 2.1: New ACEIs

In [12]:

# create a list to store results of each new molecule

newacei_functional_list= list()

for z in range(len(df_finalacei)):

    x = Chem.MolFromSmiles(df_finalacei['0'][z]) # molecule in smiles format
    drug_functional_group = ['*C(=O)O','*[SH]','P']  # add all functional groups  
    
  
    # ACE functional groups
    carboxyl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[0]))
    sulfhydryl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[1]))
    phosphinyl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[2]))
    
    # combine above results
    functional_total = (Chem.MolToSmiles(x), len(carboxyl_matches),len(sulfhydryl_matches),len(phosphinyl_matches))
    newacei_functional_list.append(functional_total) 


# save list as dataframe
df_newace_functional = pd.DataFrame(newacei_functional_list, columns =['Canonical SMILES','Carboxyl Functional Group (No.)','Sulfhydrl Functional Group (No.)','Phosphinyl Functional Group (No.)'])

df_newace_functional.to_csv('SUPP 3.1 New ACEIs Functional groups.csv')
df_newace_functional


Unnamed: 0,Canonical SMILES,Carboxyl Functional Group (No.),Sulfhydrl Functional Group (No.),Phosphinyl Functional Group (No.)
0,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,2,0,0
1,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,2,0,0
2,CC(C)[C@H](N[C@@H](CCCCN)C(=O)N1[C@H](C(=O)O)C...,2,0,0
3,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,2,0,0
4,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,2,0,0
...,...,...,...,...
424,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](c2ccccc2)...,2,0,0
425,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](C2CCCCC2)...,2,0,0
426,CO[C@@H]1C[C@@H]2CCCC[C@@H]2N1[C@@H](CCCCN)C(=...,2,0,0
427,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](CO)C[C@@H...,2,0,0


## Step 2.2: Original ACEIs

In [14]:

# create a list to store results of each new molecule
ogacei_functional_list= list()

for z in range(len(df_ogacei)):

    x = Chem.MolFromSmiles(df_ogacei['Canonical SMILES'][z]) # molecule in smiles format
    
    drug_functional_group = ['*C(=O)O','*[SH]','P']  # add all functional groups  
    
  
    # ACE functional groups
    carboxyl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[0]))
    sulfhydryl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[1]))
    phosphinyl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[2]))
    
    # combine above results
    functional_total = (Chem.MolToSmiles(x), len(carboxyl_matches),len(sulfhydryl_matches),len(phosphinyl_matches))
    ogacei_functional_list.append(functional_total) 


# save list as dataframe
df_ogace_functional = pd.DataFrame(ogacei_functional_list, columns =['Canonical SMILES','Carboxyl Functional Group (No.)','Sulfhydrl Functional Group (No.)','Phosphinyl Functional Group (No.)'])

df_ogace_functional.to_csv('SUPP 3.2 Original ACEIs Functional groups.csv')
df_ogace_functional


Unnamed: 0,Canonical SMILES,Carboxyl Functional Group (No.),Sulfhydrl Functional Group (No.),Phosphinyl Functional Group (No.)
0,C[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@...,2,0,0
1,COC(=O)[C@H]1[C@H]2C[C@@H]3c4[nH]c5cc(OC)ccc5c...,2,0,0
2,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCc2ccccc2N(CC...,2,0,0
3,C[C@H](CS)C(=O)N1CCC[C@H]1C(=O)O,1,1,0
4,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCCN2CCC[C@@H]...,2,0,0
5,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[...,2,0,0
6,CCC(=O)O[C@@H](O[P@](=O)(CCCCc1ccccc1)CC(=O)N1...,2,0,1
7,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1C(=O...,2,0,0
8,NCCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CC...,2,0,0
9,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1Cc2c...,2,0,0


## Step 2.3: New ARBS

In [17]:

# create a list to store results of each new molecule

newarb_functional_list= list()

for z in range(len(df_finalarb)):

    x = Chem.MolFromSmiles(df_finalarb['0'][z]) # molecule in smiles format
    
    drug_functional_group = ['c1ccc(cc1)c2ccccc2','c1nc2ccccc2[nH]1','c1nc2ccccc2n1C','c1nn[nH]n1','[nH]1nnnc1'] 
    

    # ARB functional groups
    biphenyl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[0]))
    benzimidazole_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[1])) or x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[2]))
    tetrazol_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[3])) or x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[4]))
    
    # combine above results
    functional_total = (Chem.MolToSmiles(x), len(biphenyl_matches),len(benzimidazole_matches),len(tetrazol_matches))
    newarb_functional_list.append(functional_total) 


# save list as dataframe
df_newarbfunctional = pd.DataFrame(newarb_functional_list, columns =['Canonical SMILES','Biphenyl Functional Group (No.)','Benzimidazole Functional Group (No.)','Tetrazol Functional Group (No.)'])

df_newarbfunctional.to_csv('SUPP 3.3 New ARBs Functional groups.csv')
df_newarbfunctional


Unnamed: 0,Canonical SMILES,Biphenyl Functional Group (No.),Benzimidazole Functional Group (No.),Tetrazol Functional Group (No.)
0,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...,0,0,1
1,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...,0,0,1
2,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1
3,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1
4,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1
...,...,...,...,...
305,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1
306,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1
307,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1
308,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,0,0,1


## Step 2.4: Original ARBs

In [26]:

# create a list to store results of each new molecule

ogarb_functional_list= list()

for z in range(len(df_ogarb)):

    x = Chem.MolFromSmiles(df_ogarb['Canonical SMILES'][z]) # molecule in smiles format
    
    drug_functional_group = ['c1ccc(cc1)c2ccccc2','c1nc2ccccc2[nH]1','c1nc2ccccc2n1C','c1nn[nH]n1','[nH]1nnnc1']
    

    # ARB functional groups
    biphenyl_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[0]))
    benzimidazole_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[1])) or x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[2]))
    tetrazol_matches = x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[3])) or x.GetSubstructMatches(Chem.MolFromSmarts(drug_functional_group[4]))
    
    # combine above results
    functional_total = (Chem.MolToSmiles(x), len(biphenyl_matches),len(benzimidazole_matches),len(tetrazol_matches))
    ogarb_functional_list.append(functional_total) 


# save list as dataframe
df_ogarbfunctional = pd.DataFrame(ogarb_functional_list, columns =['Canonical SMILES','Biphenyl Functional Group (No.)','Benzimidazole Functional Group (No.)','Tetrazol Functional Group (No.)'])

df_ogarbfunctional.to_csv('SUPP 3.4 Original ARBs Functional groups.csv')
df_ogarbfunctional


KeyError: 0

# ---END HERE---