In [1]:
import pandas as pd
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import dimorphite_dl
import sys
from contextlib import redirect_stdout



In [2]:
knowndrugs = {"small_mol":r"N[C@H]1CC[C@@H](C(=O)N2CC[C@@H](c3cccc(Cl)c3)[C@@H]3[C@@H]2C2CC[NH+]3CC2)O1",
              "Gilteritinib":r"CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5", 
              "Quizartinib":r"CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6", 
              "Crenolanib":r"O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C",
              "Tandutinib":r"CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5",
              "Sorafenib":r"CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl",
              "Sunitinib":r"CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C",
              "Lestaurtinib":r"C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O",
              "Midostaurin":r"C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)N(C)C(=O)C9=CC=CC=C9)OC",
              "Ponatinib":r"Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C"
              }

In [5]:
from rdkit.Chem import rdFreeSASA
for drug in knowndrugs:
    mol = rdkit.Chem.MolFromSmiles(knowndrugs[drug])
    mol = rdkit.Chem.AddHs(mol)
    rdkit.Chem.AllChem.EmbedMolecule(mol)
    radii = rdFreeSASA.classifyAtoms(mol)
    print(drug)
    SASA = rdFreeSASA.CalcSASA(mol,radii)
    
    heavyatoms = rdkit.Chem.rdMolDescriptors.CalcNumHeavyAtoms(mol)
    hbonds = rdkit.Chem.rdMolDescriptors.CalcNumHBA(mol)+rdkit.Chem.rdMolDescriptors.CalcNumHBD(mol)
    c_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(6)))
    n_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(7)))
    o_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(8)))
    sp2carbs=c_atoms*(1-rdkit.Chem.rdMolDescriptors.CalcFractionCSP3(mol))

    mean_surf_area = np.pi*(c_atoms*1.7**2+n_atoms*1.55**2+o_atoms*1.52**2)/heavyatoms
    gauss_en = SASA/(mean_surf_area)*0.045

    hydrophobic_area = 0
    N_atoms = mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(7))
    C_atoms = mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(6))
    tertNs = 0
    for atom in N_atoms:
        if atom.GetTotalNumHs(includeNeighbors=True)==0 and atom.GetHybridization()==rdkit.Chem.HybridizationType.SP3:
            tertNs+=1
    for atom in C_atoms:
        
        for neighbour in atom.GetNeighbors():
            if neighbour.GetAtomicNum() not in [1,6]:
                break
        else:
            hydrophobic_area += float(atom.GetProp("SASA"))
            for neighbour in atom.GetNeighbors():
                if neighbour.GetAtomicNum()==1:
                    hydrophobic_area += float(neighbour.GetProp("SASA"))

        
    
    print(f"SASA: {SASA},hbonds: {hbonds},heavyatoms: {heavyatoms},hydrophob: {hydrophobic_area}")
    max_score = (hbonds)*-0.6-gauss_en-0.035*hydrophobic_area/(np.pi*1.7**2)
    print(max_score)
    

small_mol
SASA: 450.954904609596,hbonds: 5,heavyatoms: 27,hydrophob: 233.5119549594096
-6.305162599940183
Gilteritinib
SASA: 703.0492889607814,hbonds: 13,heavyatoms: 40,hydrophob: 216.0268081496556
-12.296045621298786
Quizartinib
SASA: 639.5106256709745,hbonds: 11,heavyatoms: 40,hydrophob: 279.6664645453603
-11.087652781278397
Crenolanib
SASA: 541.2919211984648,hbonds: 8,heavyatoms: 33,hydrophob: 255.38083418575232
-8.572477561332276
Tandutinib
SASA: 722.8001452564496,hbonds: 9,heavyatoms: 41,hydrophob: 302.34653909760675
-10.313880030603716
Sorafenib
SASA: 471.3535091920788,hbonds: 7,heavyatoms: 32,hydrophob: 181.44623223519943
-7.696946061891826
Sunitinib
SASA: 500.04487940185527,hbonds: 6,heavyatoms: 29,hydrophob: 268.6372361996862
-7.305085759023318
Lestaurtinib
SASA: 455.34557012807954,hbonds: 9,heavyatoms: 33,hydrophob: 256.34826521787346
-8.738246038774365
Midostaurin
SASA: 613.4030840285404,hbonds: 7,heavyatoms: 43,hydrophob: 358.8693742380153
-8.731829333743544
Ponatinib
SASA: