# SUPPLEMENTARY PHASE 1: Physicochemical properties of new (final) molecules

"""

Created on Thursday October 12 2023 

Updated on Thursday OCtober 26 2023


@author: Odifentse M Lehasa

The purpose of this notebook is to determine the physicochemical properties of only the new valid molecules, so as to display them in the results chapter.

"""

## STEP 0: IMPORT LIBRARIES

In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import BRICS
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Lipinski
from rdkit.Chem import QED as QED
from rdkit.Chem import Recap as Recap
import pandas as pd
import seaborn as sns


from rdkit.Chem import RDConfig
import os
import sys
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
import sascorer

## STEP 1: PROPERTY ANALYSIS OF NEW MOLECULES

### Step 1.1: NEW LEAD ACEIs properties

In [13]:
# Use dataset of new lead molecules with TC similarity over 0.7

# View dataset

df_nace = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Output/7. New lead ACEI molecules.csv', index_col=0)

df_nace

Unnamed: 0,0
0,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
1,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
2,CC(C)[C@H](N[C@@H](CCCCN)C(=O)N1[C@H](C(=O)O)C...
3,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
4,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...
...,...
424,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](c2ccccc2)...
425,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](C2CCCCC2)...
426,CO[C@@H]1C[C@@H]2CCCC[C@@H]2N1[C@@H](CCCCN)C(=...
427,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](CO)C[C@@H...


In [14]:
df_nace.columns

Index(['0'], dtype='object')

In [32]:

properties_list= list()

for z in range(len(df_nace)):
    x = Chem.MolFromSmiles(df_nace['0'][z])

    # Determine number of aromatic and aliphatic rings
    aromatic= Lipinski.NumAromaticRings(x)
    aliphatic= Lipinski.NumAliphaticRings(x)
    
    # Lipinski rule of 5
    Mol_weight = Descriptors.MolWt(x)
    LogP= Descriptors.MolLogP(x)             # lipophilicity 
    Hdonors= rdMolDescriptors.CalcNumLipinskiHBD(x)
    Hacceptors = rdMolDescriptors.CalcNumLipinskiHBA(x)
    
    # Druggability Test  
    if (Mol_weight <= 500) & (LogP <= 5) & (Hdonors <= 5) & (Hacceptors <=10): 
        Ro5_druggable = 1 
    else:
        Ro5_druggable = 0 # 0 for False
    
    # Physicochemical properties
    Exact_mol_weight= Descriptors.ExactMolWt(x)
    Rotate_bonds= Lipinski.NumRotatableBonds(x)
    heavy_atoms = Descriptors.HeavyAtomCount(x)
    qed = QED.weights_mean(x)                # 
    prop_forcast_index = LogP+aromatic
    PSA = QED.properties(x)[4]
    SAS = sascorer.calculateScore(x)
    
    # Physicochemical Test  
    if (heavy_atoms <38) & (PSA <=140) & (Rotate_bonds <=10) & (aromatic < 4) & (qed <=1) & (prop_forcast_index < 7):
        physico_druggable = 1
    else:
        physico_druggable = 0
    
    
    
  # combine above results
    properties_total = (Chem.MolToSmiles(x),aromatic,aliphatic,Mol_weight,Exact_mol_weight,LogP,Hdonors,Hacceptors,Rotate_bonds,
                       heavy_atoms,qed,prop_forcast_index,PSA,SAS, Ro5_druggable, physico_druggable)
    properties_list.append(properties_total) 


# save list as dataframe
df_props_nace = pd.DataFrame(properties_list, columns =['Canonical SMILES', 'Aromatic Rings (No.)', 'Aliphatic Rings (No.)',
                                                             'AVG Molecular weight','Exact Molecular weight','LogP','Hdonors',
                                                             'Hacceptors','Rotatable bonds','Heavy Atoms (No.)','QED',
                                                             'Property Forecast Index','PSA','SAscore','Druggable (Lipinski)', 'Druggable (Physicochemical)'])

df_props_nace.to_csv('SUPP 1.1 New ACEI lead molecules druggability properties.csv')
df_props_nace


Unnamed: 0,Canonical SMILES,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
0,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,0,4,485.672,485.201813,2.0635,4,8,8,32,0.443145,2.0635,124.17,4.982630,1,1
1,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,0,4,435.565,435.273321,2.0559,4,8,8,31,0.498504,2.0559,124.17,4.368082,1,1
2,CC(C)[C@H](N[C@@H](CCCCN)C(=O)N1[C@H](C(=O)O)C...,0,2,397.516,397.257671,1.4271,5,8,10,28,0.409802,1.4271,132.96,3.830100,1,1
3,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,0,4,477.646,477.320271,3.0837,4,8,9,34,0.435954,3.0837,124.17,4.267261,1,1
4,NCCCC[C@@H](C(=O)N1[C@H](C(=O)O)C[C@H]2CCCC[C@...,0,3,395.500,395.242021,1.2773,4,8,8,28,0.528802,1.2773,124.17,3.948688,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](c2ccccc2)...,1,2,430.589,430.283158,4.1422,3,6,10,31,0.427993,5.1422,92.86,4.009429,1,1
425,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](C2CCCCC2)...,0,3,436.637,436.330108,4.3498,3,6,10,31,0.390604,4.3498,92.86,4.148595,1,1
426,CO[C@@H]1C[C@@H]2CCCC[C@@H]2N1[C@@H](CCCCN)C(=...,0,2,384.517,384.262422,2.3734,3,7,10,27,0.440224,2.3734,102.09,4.272827,1,1
427,CC(C)[C@H](OC(=O)[C@H](CCCCN)N1[C@H](CO)C[C@@H...,0,2,384.517,384.262422,1.7618,4,7,10,27,0.388414,1.7618,113.09,4.184586,1,1


### Step 1.2: NEW LEAD ARBs properties

In [18]:
# Use dataset from phase 7.

# View dataset

df_narb = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Output/7. New lead ARB molecules.csv', index_col=0)

df_narb

Unnamed: 0,0
0,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...
1,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...
2,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...
3,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...
4,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...
...,...
305,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...
306,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...
307,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...
308,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...


In [19]:

properties_list= list()

for z in range(len(df_narb)):
    x = Chem.MolFromSmiles(df_narb['0'][z])

    # Determine number of aromatic and aliphatic rings
    aromatic= Lipinski.NumAromaticRings(x)
    aliphatic= Lipinski.NumAliphaticRings(x)
    
    # Lipinski rule of 5
    Mol_weight = Descriptors.MolWt(x)
    LogP= Descriptors.MolLogP(x)             # lipophilicity 
    Hdonors= rdMolDescriptors.CalcNumLipinskiHBD(x)
    Hacceptors = rdMolDescriptors.CalcNumLipinskiHBA(x)
    
    # Druggability Test  
    if (Mol_weight <= 500) & (LogP <= 5) & (Hdonors <= 5) & (Hacceptors <=10): 
        Ro5_druggable = 1 
    else:
        Ro5_druggable = 0 # 0 for False
    
    # Physicochemical properties
    Exact_mol_weight= Descriptors.ExactMolWt(x)
    Rotate_bonds= Lipinski.NumRotatableBonds(x)
    heavy_atoms = Descriptors.HeavyAtomCount(x)
    qed = QED.weights_mean(x)                # 
    prop_forcast_index = LogP+aromatic
    PSA = QED.properties(x)[4]
    SAS = sascorer.calculateScore(x)
    
    # Physicochemical Test  
    if (heavy_atoms <38) & (PSA <=140) & (Rotate_bonds <=10) & (aromatic < 4) & (qed <=1) & (prop_forcast_index < 7):
        physico_druggable = 1
    else:
        physico_druggable = 0
    
    
    
  # combine above results
    properties_total = (Chem.MolToSmiles(x),aromatic,aliphatic,Mol_weight,Exact_mol_weight,LogP,Hdonors,Hacceptors,Rotate_bonds,
                       heavy_atoms,qed,prop_forcast_index,PSA, SAS,Ro5_druggable, physico_druggable)
    properties_list.append(properties_total) 


# save list as dataframe
df_props_narb = pd.DataFrame(properties_list, columns =['Canonical SMILES', 'Aromatic Rings (No.)', 'Aliphatic Rings (No.)',
                                                             'AVG Molecular weight','Exact Molecular weight','LogP','Hdonors',
                                                             'Hacceptors','Rotatable bonds','Heavy Atoms (No.)','QED',
                                                             'Property Forecast Index','PSA','SAscore','Druggable (Lipinski)', 'Druggable (Physicochemical)'])

df_props_narb.to_csv('SUPP 1.2 New ARB lead molecules druggability properties.csv')
df_props_narb


Unnamed: 0,Canonical SMILES,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
0,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...,2,3,467.618,467.300873,1.9487,4,9,8,34,0.506932,3.9487,124.26,4.511764,1,1
1,NCCCC[C@@H](C(=O)N1[C@H](CO)C[C@H]2CCCC[C@@H]2...,2,3,467.618,467.300873,1.9487,4,9,8,34,0.506932,3.9487,124.26,4.417719,1,1
2,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...,1,3,417.602,417.321609,2.7940,3,8,9,30,0.598860,3.7940,104.03,4.529208,1,1
3,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...,1,3,417.602,417.321609,2.7940,3,8,9,30,0.598860,3.7940,104.03,4.436093,1,1
4,CCC[C@@H]1C[C@H]2CCCC[C@@H]2N1C(=O)[C@H](CCCCN...,1,4,471.694,471.368559,3.9627,3,8,9,34,0.528369,4.9627,104.03,4.831339,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,1,4,457.667,457.352909,3.5726,3,8,9,33,0.549473,4.5726,104.03,4.762681,1,1
306,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,2,3,465.646,465.321609,3.3664,3,8,9,34,0.550457,5.3664,104.03,4.542772,1,1
307,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,2,3,465.646,465.321609,3.3664,3,8,9,34,0.550457,5.3664,104.03,4.448728,1,1
308,CCC[C@@H]1C[C@@H]2CCC[C@@H]2N1C(=O)[C@H](CCCCN...,1,4,485.721,485.384209,4.2103,3,8,10,35,0.481206,5.2103,104.03,4.768699,1,1


### Step 1.3: NEW unassigned molecules - properties


In [15]:
# Use dataset of cluster that did not match ARB or ACEI in phase 7 (unassigned molecules) dataset

# View dataset

df_unassigned = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Output/6.3 Cluster 0 (unassigned molecules).csv', index_col=0)

df_unassigned

Unnamed: 0,Canonical SMILES
a1,CCN(c1cccs1)[C@@H]1CC2(CN1c1oc(=O)oc1C)SCCS2
a2,CCN(c1cccs1)[C@@H]1CC2(CN1c1nnn[nH]1)SCCS2
a3,CCN(c1cccs1)[C@@H]1CC2(CN1c1nn[nH]n1)SCCS2
a4,CCN(c1cccs1)[C@@H]1CC2(CN1c1noc(=O)[nH]1)SCCS2
a5,CCN(c1cccs1)[C@@H]1CC2(CN1c1noc(C)c1C)SCCS2
...,...
a359,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2c2nnn[nH]2)SCC...
a360,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2c2nn[nH]n2)SCC...
a361,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2c2noc(=O)[nH]2...
a362,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2CO)SCCS3)cc1


In [17]:

unassigned_properties_list= list()

for z in range(len(df_unassigned)):
    x = Chem.MolFromSmiles(df_unassigned['Canonical SMILES'][z])

    # Determine number of aromatic and aliphatic rings
    aromatic= Lipinski.NumAromaticRings(x)
    aliphatic= Lipinski.NumAliphaticRings(x)
    
    # Lipinski rule of 5
    Mol_weight = Descriptors.MolWt(x)
    LogP= Descriptors.MolLogP(x)             # lipophilicity 
    Hdonors= rdMolDescriptors.CalcNumLipinskiHBD(x)
    Hacceptors = rdMolDescriptors.CalcNumLipinskiHBA(x)
    
    # Druggability Test  
    if (Mol_weight <= 500) & (LogP <= 5) & (Hdonors <= 5) & (Hacceptors <=10): 
        Ro5_druggable = 1 
    else:
        Ro5_druggable = 0 # 0 for False
    
    # Physicochemical properties
    Exact_mol_weight= Descriptors.ExactMolWt(x)
    Rotate_bonds= Lipinski.NumRotatableBonds(x)
    heavy_atoms = Descriptors.HeavyAtomCount(x)
    qed = QED.weights_mean(x)                # 
    prop_forcast_index = LogP+aromatic
    PSA = QED.properties(x)[4]
    SAS = sascorer.calculateScore(x)
    
    # Physicochemical Test  
    if (heavy_atoms <38) & (PSA <=140) & (Rotate_bonds <=10) & (aromatic < 4) & (qed <=1) & (prop_forcast_index < 7):
        physico_druggable = 1
    else:
        physico_druggable = 0
    
    
    
  # combine above results
    unassigned_properties_total = (Chem.MolToSmiles(x),aromatic,aliphatic,Mol_weight,Exact_mol_weight,LogP,Hdonors,Hacceptors,Rotate_bonds,
                       heavy_atoms,qed,prop_forcast_index,PSA,SAS, Ro5_druggable, physico_druggable)
    unassigned_properties_list.append(unassigned_properties_total) 


# save list as dataframe
df_unassigned_props = pd.DataFrame(unassigned_properties_list, columns =['Canonical SMILES', 'Aromatic Rings (No.)', 'Aliphatic Rings (No.)',
                                                             'AVG Molecular weight','Exact Molecular weight','LogP','Hdonors',
                                                             'Hacceptors','Rotatable bonds','Heavy Atoms (No.)','QED',
                                                             'Property Forecast Index','PSA','SAscore','Druggable (Lipinski)', 'Druggable (Physicochemical)'])

df_unassigned_props.to_csv('SUPP 1.3 New unassigned molecules (Cluster) druggability properties.csv')
df_unassigned_props


Unnamed: 0,Canonical SMILES,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),Solubility,QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
0,CCN(c1cccs1)[C@@H]1CC2(CN1c1oc(=O)oc1C)SCCS2,2,2,384.548,384.063605,3.84172,0,5,4,24,15.622202,0.793557,5.84172,49.83,4.839712,1,1
1,CCN(c1cccs1)[C@@H]1CC2(CN1c1nnn[nH]1)SCCS2,2,2,354.530,354.075508,2.50020,1,6,4,22,9.799819,0.904932,4.50020,60.94,4.936565,1,1
2,CCN(c1cccs1)[C@@H]1CC2(CN1c1nn[nH]n1)SCCS2,2,2,354.530,354.075508,2.50020,1,6,4,22,9.799819,0.904932,4.50020,60.94,4.785063,1,1
3,CCN(c1cccs1)[C@@H]1CC2(CN1c1noc(=O)[nH]1)SCCS2,2,2,370.525,370.059189,2.66340,1,6,4,23,15.555882,0.887391,4.66340,65.37,4.983484,1,1
4,CCN(c1cccs1)[C@@H]1CC2(CN1c1noc(C)c1C)SCCS2,2,2,381.592,381.100325,4.59194,0,4,4,24,9.799819,0.770024,6.59194,32.51,4.677898,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2c2nnn[nH]2)SCC...,2,3,402.593,402.166037,3.48420,1,6,4,27,4.899910,0.839879,5.48420,60.94,4.567005,1,1
359,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2c2nn[nH]n2)SCC...,2,3,402.593,402.166037,3.48420,1,6,4,27,4.899910,0.839879,5.48420,60.94,4.450625,1,1
360,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2c2noc(=O)[nH]2...,2,3,418.588,418.149718,3.64740,1,6,4,28,10.655973,0.812930,5.64740,65.37,4.660058,1,1
361,CCN1CCC[C@H]1c1ccc(N2CC3(C[C@H]2CO)SCCS3)cc1,1,3,364.580,364.164306,3.59070,1,3,4,24,4.899910,0.880935,4.59070,26.71,4.187231,1,1


# STEP 2: PROPERTY ANALYSIS OF ORIGINAL CHEMBL MOLECULES

## Step 2.1: CHEMBL ACEI properties

In [20]:
# Use dataset of original ACEI molecules 

# View dataset

df_ace = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Data/ACE Inhibitors.csv', index_col=0)

df_ace

Unnamed: 0,Molecule Name,ATC Code,Class,Canonical SMILES
1,Enalaprilat,C09A,ACE Inhibitors,C[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@...
3,Rescinnamine,C09A,ACE Inhibitors,COC(=O)[C@H]1[C@H]2C[C@@H]3c4[nH]c5cc(OC)ccc5c...
7,Benazepril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCc2ccccc2N(CC...
10,Captopril,C09A,ACE Inhibitors,C[C@H](CS)C(=O)N1CCC[C@H]1C(=O)O
11,Cilazapril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCCN2CCC[C@@H]...
13,Enalapril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[...
16,Fosinopril,C09A,ACE Inhibitors,CCC(=O)O[C@@H](O[P@](=O)(CCCCc1ccccc1)CC(=O)N1...
17,Imidapril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1C(=O...
19,Lisinopril,C09A,ACE Inhibitors,NCCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CC...
21,Moexipril,C09A,ACE Inhibitors,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1Cc2c...


In [21]:
# List of original ACEIs Molecules
ace_list = df_ace['Canonical SMILES']
ace = [Chem.MolFromSmiles(x) for x in ace_list]
ace

[<rdkit.Chem.rdchem.Mol at 0x7ff705234d60>,
 <rdkit.Chem.rdchem.Mol at 0x7ff705234e20>,
 <rdkit.Chem.rdchem.Mol at 0x7ff705234fa0>,
 <rdkit.Chem.rdchem.Mol at 0x7ff705234040>,
 <rdkit.Chem.rdchem.Mol at 0x7ff705234880>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295fa0>,
 <rdkit.Chem.rdchem.Mol at 0x7ff7132955e0>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295100>,
 <rdkit.Chem.rdchem.Mol at 0x7ff7132956a0>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295dc0>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295e80>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295e20>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295820>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295700>,
 <rdkit.Chem.rdchem.Mol at 0x7ff713295040>]

In [22]:

properties_list= list()

ace_list = df_ace['Canonical SMILES']

for z in ace_list:
    x = Chem.MolFromSmiles(z)

    # Determine number of aromatic and aliphatic rings
    aromatic= Lipinski.NumAromaticRings(x)
    aliphatic= Lipinski.NumAliphaticRings(x)
    
    # Lipinski rule of 5
    Mol_weight = Descriptors.MolWt(x)
    LogP= Descriptors.MolLogP(x)             # lipophilicity 
    Hdonors= rdMolDescriptors.CalcNumLipinskiHBD(x)
    Hacceptors = rdMolDescriptors.CalcNumLipinskiHBA(x)
    
    # Druggability Test  
    if (Mol_weight <= 500) & (LogP <= 5) & (Hdonors <= 5) & (Hacceptors <=10): 
        Ro5_druggable = 1 
    else:
        Ro5_druggable = 0 # 0 for False
    
    # Physicochemical properties
    Exact_mol_weight= Descriptors.ExactMolWt(x)
    Rotate_bonds= Lipinski.NumRotatableBonds(x)
    heavy_atoms = Descriptors.HeavyAtomCount(x)
    qed = QED.weights_mean(x)                # 
    prop_forcast_index = LogP+aromatic
    PSA = QED.properties(x)[4]
    SAS = sascorer.calculateScore(x)
    
    # Physicochemical Test  
    if (heavy_atoms <38) & (PSA <=140) & (Rotate_bonds <=10) & (aromatic < 4) & (qed <=1) & (prop_forcast_index < 7):
        physico_druggable = 1
    else:
        physico_druggable = 0
    
    
    
  # combine above results
    properties_total = (Chem.MolToSmiles(x),aromatic,aliphatic,Mol_weight,Exact_mol_weight,LogP,Hdonors,Hacceptors,Rotate_bonds,
                       heavy_atoms,qed,prop_forcast_index,PSA,SAS, Ro5_druggable, physico_druggable)
    properties_list.append(properties_total) 


# save list as dataframe
df_props_ace = pd.DataFrame(properties_list, columns =['Canonical SMILES', 'Aromatic Rings (No.)', 'Aliphatic Rings (No.)',
                                                             'AVG Molecular weight','Exact Molecular weight','LogP','Hdonors',
                                                             'Hacceptors','Rotatable bonds','Heavy Atoms (No.)','QED',
                                                             'Property Forecast Index','PSA','SAscore','Druggable (Lipinski)', 'Druggable (Physicochemical)'])

df_props_ace.to_csv('SUPP 1.4 Original ACEIs druggability properties.csv')
df_props_ace


Unnamed: 0,Canonical SMILES,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
0,C[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CCC[C@...,1,1,384.429,384.189651,-0.5233,7,9,8,27,0.543998,0.4767,169.94,3.164473,0,0
1,COC(=O)[C@H]1[C@H]2C[C@@H]3c4[nH]c5cc(OC)ccc5c...,3,3,634.726,634.289031,4.5707,1,11,9,46,0.265026,7.5707,117.78,4.458333,0,0
2,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCc2ccccc2N(CC...,2,1,424.497,424.199822,2.5731,2,7,9,31,0.600953,4.5731,95.94,3.004239,1,1
3,C[C@H](CS)C(=O)N1CCC[C@H]1C(=O)O,0,1,217.29,217.077264,0.6279,1,4,3,14,0.681611,0.6279,57.61,3.032547,1,1
4,CCOC(=O)[C@H](CCc1ccccc1)N[C@H]1CCCN2CCC[C@@H]...,1,2,417.506,417.226371,1.5955,2,8,8,30,0.61994,2.5955,99.18,3.477445,1,1
5,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[...,1,1,376.453,376.199822,1.6046,2,7,9,27,0.635835,2.6046,95.94,3.051027,1,1
6,CCC(=O)O[C@@H](O[P@](=O)(CCCCc1ccccc1)CC(=O)N1...,1,2,563.672,563.301189,6.1213,1,8,14,39,0.125631,7.1213,110.21,4.100364,0,0
7,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1C(=O...,1,1,405.451,405.189986,0.8762,2,9,9,29,0.585861,1.8762,116.25,3.442854,1,1
8,NCCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N1CC...,1,1,405.495,405.226371,1.2352,5,8,12,29,0.384488,2.2352,132.96,3.172403,1,0
9,CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1Cc2c...,2,1,498.576,498.236601,2.5843,2,9,11,36,0.45464,4.5843,114.4,3.385282,1,0


## Step 2.2: CHEMBL ARBs properties

In [23]:
# Use dataset of original ARB molecules 

# View dataset

df_arb = pd.read_csv('/Users/odilehasa/Hypertension/Final_Experiments/FINAL - October/Data/ARBs.csv', index_col=0)

df_arb

Unnamed: 0,Molecule Name,ATC Code,Class,Canonical SMILES
2,Olmesartan,C09C,ARBs,CCCc1nc(C(C)(C)O)c(C(=O)O)n1Cc1ccc(-c2ccccc2-c...
4,Sparsentan,C09C,ARBs,CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C(COCC)=C1)C1...
6,Azilsartan medoxomil,C09C,ARBs,CCOc1nc2cccc(C(=O)OCc3oc(=O)oc3C)c2n1Cc1ccc(-c...
9,Candesartan Cilexetil,C09C,ARBs,CCOc1nc2cccc(C(=O)OC(C)OC(=O)OC3CCCCC3)c2n1Cc1...
14,Eprosartan,C09C,ARBs,CCCCc1ncc(/C=C(\Cc2cccs2)C(=O)O)n1Cc1ccc(C(=O)...
18,Irbesartan,C09C,ARBs,CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[...
20,Losartan,C09C,ARBs,CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2...
22,Olmesartan Medoxomil,C09C,ARBs,CCCc1nc(C(C)(C)O)c(C(=O)OCc2oc(=O)oc2C)n1Cc1cc...
29,Telmisartan,C09C,ARBs,CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2...
32,Valsartan,C09C,ARBs,CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1)[C@...


In [24]:

properties_list= list()

arb_list = df_arb['Canonical SMILES']

for z in arb_list:
    x = Chem.MolFromSmiles(z)

    # Determine number of aromatic and aliphatic rings
    aromatic= Lipinski.NumAromaticRings(x)
    aliphatic= Lipinski.NumAliphaticRings(x)
    
    # Lipinski rule of 5
    Mol_weight = Descriptors.MolWt(x)
    LogP= Descriptors.MolLogP(x)             # lipophilicity 
    Hdonors= rdMolDescriptors.CalcNumLipinskiHBD(x)
    Hacceptors = rdMolDescriptors.CalcNumLipinskiHBA(x)
    
    # Druggability Test  
    if (Mol_weight <= 500) & (LogP <= 5) & (Hdonors <= 5) & (Hacceptors <=10): 
        Ro5_druggable = 1 
    else:
        Ro5_druggable = 0 # 0 for False
    
    # Physicochemical properties
    Exact_mol_weight= Descriptors.ExactMolWt(x)
    Rotate_bonds= Lipinski.NumRotatableBonds(x)
    heavy_atoms = Descriptors.HeavyAtomCount(x)
    qed = QED.weights_mean(x)                
    prop_forcast_index = LogP+aromatic
    PSA = QED.properties(x)[4]
    SAS = sascorer.calculateScore(x)
    
    # Physicochemical Test  
    if (heavy_atoms <38) & (PSA <=140) & (Rotate_bonds <=10) & (aromatic < 4) & (qed <=1) & (prop_forcast_index < 7):
        physico_druggable = 1
    else:
        physico_druggable = 0
    
    
    
  # combine above results
    properties_total = (Chem.MolToSmiles(x),aromatic,aliphatic,Mol_weight,Exact_mol_weight,LogP,Hdonors,Hacceptors,Rotate_bonds,
                       heavy_atoms,qed,prop_forcast_index,PSA,SAS,Ro5_druggable, physico_druggable)
    properties_list.append(properties_total) 


# save list as dataframe
df_props_arb = pd.DataFrame(properties_list, columns =['Canonical SMILES', 'Aromatic Rings (No.)', 'Aliphatic Rings (No.)',
                                                             'AVG Molecular weight','Exact Molecular weight','LogP','Hdonors',
                                                             'Hacceptors','Rotatable bonds','Heavy Atoms (No.)','QED',
                                                             'Property Forecast Index','PSA','SAscore','Druggable (Lipinski)', 'Druggable (Physicochemical)'])

df_props_arb.to_csv('SUPP 1.5 Original ARBs druggability properties.csv')
df_props_arb


Unnamed: 0,Canonical SMILES,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
0,CCCc1nc(C(C)(C)O)c(C(=O)O)n1Cc1ccc(-c2ccccc2-c...,4,0,446.511,446.206639,3.6566,3,9,8,33,0.376501,7.6566,129.81,2.807026,1,0
1,CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2S(=O)(...,3,2,592.762,592.271941,6.53944,1,9,12,42,0.255475,9.53944,114.1,3.622813,0,0
2,CCOc1nc2cccc(C(=O)OCc3oc(=O)oc3C)c2n1Cc1ccc(-c...,6,0,568.542,568.159414,4.70522,1,12,9,42,0.242491,10.70522,155.59,3.230777,0,0
3,CCOc1nc2cccc(C(=O)OC(C)OC(=O)OC3CCCCC3)c2n1Cc1...,5,1,610.671,610.253983,6.3191,1,12,10,45,0.14091,11.3191,143.34,3.458826,0,0
4,CCCCc1ncc(/C=C(\Cc2cccs2)C(=O)O)n1Cc1ccc(C(=O)...,3,0,424.522,424.145678,4.7444,2,6,10,30,0.458536,7.7444,92.42,2.61339,1,0
5,CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[...,3,2,428.54,428.23246,4.7774,1,7,7,32,0.586744,7.7774,87.13,3.161445,1,0
6,CCCCc1nc(Cl)c(CO)n1Cc1ccc(-c2ccccc2-c2nnn[nH]2...,4,0,422.92,422.162187,4.2668,2,7,8,30,0.44208,8.2668,92.51,2.650099,1,0
7,CCCc1nc(C(C)(C)O)c(C(=O)OCc2oc(=O)oc2C)n1Cc1cc...,5,0,558.595,558.222683,4.17002,2,12,10,41,0.237908,9.17002,162.16,3.226134,0,0
8,CCCc1nc2c(C)cc(-c3nc4ccccc4n3C)cc2n1Cc1ccc(-c2...,6,0,514.629,514.236876,7.26442,1,6,7,39,0.243239,13.26442,72.94,2.524349,0,0
9,CCCCC(=O)N(Cc1ccc(-c2ccccc2-c2nnn[nH]2)cc1)[C@...,3,0,435.528,435.22704,4.1617,2,8,10,32,0.494432,7.1617,112.07,3.052883,1,0


# Step 3: Determine Summary Descriptive Statistics

In [26]:
# NEW LEAD ACEIS

df1 = df_props_nace.describe()
df1.to_csv('SUPP 1.6.1 Physicochemical analysis - New lead ACEIs.csv')
df1

Unnamed: 0,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
count,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0,429.0
mean,0.666667,1.79021,399.858436,399.554711,2.341866,2.93007,6.83683,9.153846,27.923077,0.399963,3.008533,101.234942,3.94902,1.0,1.0
std,0.763253,0.725244,49.186102,49.135168,1.015302,0.713524,1.223388,0.944544,3.632171,0.082018,1.429341,18.367438,0.442059,0.0,0.0
min,0.0,0.0,258.318,258.157957,-0.2984,2.0,5.0,7.0,18.0,0.205775,-0.2984,72.63,2.941956,1.0,1.0
25%,0.0,1.0,369.506,369.262757,1.5892,2.0,6.0,8.0,26.0,0.357471,1.9742,92.86,3.62566,1.0,1.0
50%,1.0,2.0,394.537,394.192628,2.3067,3.0,7.0,10.0,27.0,0.396475,2.8016,98.66,3.948688,1.0,1.0
75%,1.0,2.0,434.693,434.173156,2.981,3.0,8.0,10.0,30.0,0.448156,4.1021,115.98,4.240386,1.0,1.0
max,3.0,4.0,499.652,499.304621,4.9708,5.0,10.0,10.0,36.0,0.61191,6.8227,139.78,5.127198,1.0,1.0


In [27]:
# NEW LEAD ARBS

df2 = df_props_narb.describe()
df2.to_csv('SUPP 1.6.2 Physicochemical analysis - New lead ARBs.csv')
df2

Unnamed: 0,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
count,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0,310.0
mean,1.690323,2.477419,430.462206,430.171505,2.474038,3.225806,8.690323,8.612903,31.032258,0.513035,4.164361,116.462,4.318959,1.0,1.0
std,0.706582,0.980954,42.423184,42.384526,1.042831,0.418788,0.776414,0.799572,3.143637,0.064827,1.200585,11.575892,0.354299,0.0,0.0
min,1.0,0.0,296.375,296.196074,-0.113,3.0,8.0,8.0,21.0,0.3123,0.887,104.03,3.372239,1.0,1.0
25%,1.0,2.0,409.54,409.255703,1.77941,3.0,8.0,8.0,29.0,0.481103,3.4025,104.03,4.031548,1.0,1.0
50%,2.0,2.0,433.601,433.316523,2.5603,3.0,8.0,8.0,31.0,0.517904,4.2198,110.02,4.343141,1.0,1.0
75%,2.0,3.0,464.64825,464.328163,3.1837,3.0,9.0,9.0,33.0,0.551226,5.08061,127.6525,4.58673,1.0,1.0
max,3.0,4.0,499.748,499.399859,4.6004,4.0,10.0,10.0,36.0,0.661546,6.1604,136.05,5.045787,1.0,1.0


In [29]:
# Original ACEIS

df3 = df_props_ace.describe()
df3.to_csv('SUPP 1.6.3 Physicochemical analysis - Original ACEIs.csv')
df3

Unnamed: 0,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
count,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0
mean,1.2,1.533333,432.349467,432.013126,2.249487,2.333333,7.666667,9.066667,30.6,0.519825,3.449487,105.994,3.512179,0.8,0.666667
std,0.774597,0.63994,93.851109,93.777615,1.595223,1.58865,1.543033,2.344192,6.926141,0.153301,2.069641,24.266098,0.44064,0.414039,0.48795
min,0.0,1.0,217.29,217.077264,-0.5233,1.0,4.0,3.0,14.0,0.125631,0.4767,57.61,3.004239,0.0,0.0
25%,1.0,1.0,394.94,394.689818,1.41535,2.0,7.0,8.5,28.0,0.495248,2.0879,95.94,3.168438,1.0,0.0
50%,1.0,1.0,417.506,417.226371,2.3832,2.0,7.0,9.0,30.0,0.585103,3.3832,95.94,3.442854,1.0,1.0
75%,1.5,2.0,470.805,470.175882,2.6788,2.0,8.5,9.0,32.0,0.610447,4.5787,115.325,3.655355,1.0,1.0
max,3.0,3.0,634.726,634.289031,6.1213,7.0,11.0,14.0,46.0,0.681611,7.5707,169.94,4.458333,1.0,1.0


In [30]:
# Original ARBs

df4 = df_props_arb.describe()
df4.to_csv('SUPP 1.6.4 Physicochemical analysis - Original ARBs.csv')
df4

Unnamed: 0,Aromatic Rings (No.),Aliphatic Rings (No.),AVG Molecular weight,Exact Molecular weight,LogP,Hdonors,Hacceptors,Rotatable bonds,Heavy Atoms (No.),QED,Property Forecast Index,PSA,SAscore,Druggable (Lipinski),Druggable (Physicochemical)
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,4.2,0.5,500.322,499.91189,5.06051,1.6,8.8,9.1,36.6,0.347832,9.26051,116.207,3.034774,0.5,0.0
std,1.229273,0.849837,76.709921,76.710636,1.208147,0.699206,2.440401,1.595131,5.738757,0.143758,1.969361,30.641963,0.373417,0.527046,0.0
min,3.0,0.0,422.92,422.162187,3.6566,1.0,6.0,7.0,30.0,0.14091,7.1617,72.94,2.524349,0.0,0.0
25%,3.0,0.0,430.287,429.981105,4.194215,1.0,7.0,8.0,32.0,0.242678,7.75265,92.4425,2.689331,0.0,0.0
50%,4.0,0.0,480.57,480.221757,4.72481,1.5,8.5,9.5,36.0,0.315988,8.71841,113.085,3.107164,0.5,0.0
75%,5.0,0.75,566.05525,565.675231,5.933675,2.0,11.25,10.0,41.75,0.454422,10.413775,139.9575,3.229616,1.0,0.0
max,6.0,2.0,610.671,610.253983,7.26442,3.0,12.0,12.0,45.0,0.586744,13.26442,162.16,3.622813,1.0,0.0


In [31]:
# NEW UNASSIGNED MOLECULES (e.g., CLUSTER 1)

df5 = df_unassigned_props.describe()
df5.to_csv('SUPP 1.6.5 Physicochemical analysis - New unassigned molecules (cluster 1).csv')
df5

NameError: name 'df_unassigned_props' is not defined

# --- END ---