In [1]:
import joblib
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from ipywidgets import widgets, IntProgress
from IPython.display import display


from rdkit import Chem
from rdkit.Chem import Draw, Crippen, AllChem, Descriptors3D
from rdkit.Chem import rdMolDescriptors as descriptor

In [2]:
!ls

X_test_main.csv
X_train_main.csv
final_model.joblib
final_model_importances.joblib
predictor.ipynb
y_test.csv
y_train.csv


In [3]:
model = joblib.load('final_model.joblib')
features = joblib.load('final_model_importances.joblib')
X_test = pd.read_csv('X_test_main.csv')
X_train = pd.read_csv('X_train_main.csv')
y_test = pd.read_csv('y_test.csv')
y_train = pd.read_csv('y_train.csv')

In [4]:
list(features.index.values)[:15]

['wt',
 'logp',
 'fraction_sp3',
 'ali_homo_rings',
 'ali_hetero_rings',
 'amide_bonds',
 'aro_homo_rings',
 'aro_hetero_rings',
 'hba',
 'hbd',
 'tpsa',
 'asph',
 'ecce',
 'isf',
 'npr1']

In [5]:
smiles = 'CC(N)C(=O)CCc1cccc(O)c1'
mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
wt = descriptor.CalcExactMolWt(mol)
logp = Crippen.MolLogP(mol)
ali_homo_rings = descriptor.CalcNumAliphaticCarbocycles(mol)
ali_hetero_rings = descriptor.CalcNumAliphaticHeterocycles(mol)
amide_bonds = descriptor.CalcNumAmideBonds(mol)
aro_homo_rings = descriptor.CalcNumAromaticCarbocycles(mol)
aro_hetero_rings = descriptor.CalcNumAromaticHeterocycles(mol)
hba = descriptor.CalcNumHBA(mol)
hbd = descriptor.CalcNumHBD(mol)
tpsa = descriptor.CalcTPSA(mol)

AllChem.EmbedMolecule(mol)
Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(mol)
asph = Descriptors3D.Asphericity(mol)
ecce = Descriptors3D.Eccentricity(mol)
isf = Descriptors3D.InertialShapeFactor(mol)
npr1 = Descriptors3D.NPR1(mol)

In [65]:
def draw(smiles, Size):
    m = Chem.MolFromSmiles(smiles)
    Draw.MolToMPL(m, size = (Size, Size))

In [67]:
widgets.interact(draw, Size=100, smiles='CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]')

interactive(children=(Text(value='CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]', description='smiles')…

<function __main__.draw(smiles, Size)>

In [59]:
def predict(SMILES):
    m = Chem.MolFromSmiles(SMILES)
    Draw.MolToMPL(m, size=(100, 100))
    
    mol = Chem.AddHs(m)
    
    wt = descriptor.CalcExactMolWt(mol)
    logp = Crippen.MolLogP(mol)
    ali_homo_rings = descriptor.CalcNumAliphaticCarbocycles(mol)
    ali_hetero_rings = descriptor.CalcNumAliphaticHeterocycles(mol)
    amide_bonds = descriptor.CalcNumAmideBonds(mol)
    aro_homo_rings = descriptor.CalcNumAromaticCarbocycles(mol)
    aro_hetero_rings = descriptor.CalcNumAromaticHeterocycles(mol)
    hba = descriptor.CalcNumHBA(mol)
    hbd = descriptor.CalcNumHBD(mol)
    tpsa = descriptor.CalcTPSA(mol)
    fraction_sp3 = descriptor.CalcFractionCSP3(mol)

    AllChem.EmbedMolecule(mol)
    Chem.rdForceFieldHelpers.MMFFOptimizeMolecule(mol)
    asph = Descriptors3D.Asphericity(mol)
    ecce = Descriptors3D.Eccentricity(mol)
    isf = Descriptors3D.InertialShapeFactor(mol)
    npr1 = Descriptors3D.NPR1(mol)
    features = np.array([wt, logp, ali_homo_rings, ali_hetero_rings,
                   amide_bonds, aro_homo_rings, aro_hetero_rings, 
                   hba, hbd, tpsa, fraction_sp3, asph, ecce, isf, npr1]).reshape(1, -1)
    print("Predicted HLC:\t", model.predict(features)[0])

In [60]:
widgets.interact(predict, SMILES='CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]')

interactive(children=(Text(value='CC1=C(C=C(C=C1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[O-]', description='SMILES')…

<function __main__.predict(SMILES)>

In [37]:
type(mol)

function