In [1]:
import pandas as pd
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import dimorphite_dl
import sys
from contextlib import redirect_stdout



In [12]:
knowndrugs = {"small_mol":r"N[C@H]1CC[C@@H](C(=O)N2CC[C@@H](c3cccc(Cl)c3)[C@@H]3[C@@H]2C2CC[NH+]3CC2)O1",
              "Gilteritinib":r"CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5", 
              "Quizartinib":r"CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6", 
              "Crenolanib":r"O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C",
              "Tandutinib":r"CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5",
              "Sorafenib":r"CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl",
              "Sunitinib":r"CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C",
              "Lestaurtinib":r"C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O",
              "Midostaurin":r"C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)N(C)C(=O)C9=CC=CC=C9)OC",
              "Ponatinib":r"Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C"
              }

In [41]:
from rdkit.Chem import rdFreeSASA
for drug in knowndrugs:
    mol = rdkit.Chem.MolFromSmiles(knowndrugs[drug])
    mol = rdkit.Chem.AddHs(mol)
    rdkit.Chem.AllChem.EmbedMolecule(mol)
    radii = rdFreeSASA.classifyAtoms(mol)
    print(drug)
    SASA = rdFreeSASA.CalcSASA(mol,radii)
    
    heavyatoms = rdkit.Chem.rdMolDescriptors.CalcNumHeavyAtoms(mol)
    hbonds = rdkit.Chem.rdMolDescriptors.CalcNumHBA(mol)+rdkit.Chem.rdMolDescriptors.CalcNumHBD(mol)
    c_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(6)))
    n_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(7)))
    o_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(8)))
    sp2carbs=c_atoms*(1-rdkit.Chem.rdMolDescriptors.CalcFractionCSP3(mol))

    mean_surf_area = np.pi*(c_atoms*1.7**2+n_atoms*1.55**2+o_atoms*1.52**2)/heavyatoms
    gauss_en = SASA/(mean_surf_area)*0.045

    sp2_area = 0
    arom_atoms = mol.GetAromaticAtoms()
    atoms = mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(7))
    tertNs = 0
    for atom in atoms:
        if atom.GetTotalNumHs(includeNeighbors=True)==0 and atom.GetHybridization()==rdkit.Chem.HybridizationType.SP3:
            tertNs+=1
    print(tertNs)
    for atom in arom_atoms:
        sp2_area += float(atom.GetProp("SASA"))
        for H in atom.GetNeighbors():
            if H.GetAtomicNum()==1:
                sp2_area+=float(H.GetProp("SASA"))
        
    
    print(f"SASA: {SASA},hbonds: {hbonds},heavyatoms: {heavyatoms},sp2carbs: {sp2carbs}")
    max_score = (hbonds-tertNs)*-0.6-gauss_en-0.035*sp2_area/(np.pi*1.7**2)
    print(max_score)
    

small_mol
0
SASA: 449.78465285852064,hbonds: 5,heavyatoms: 27,sp2carbs: 7.000000000000001
-5.741880480171999
Gilteritinib
2
SASA: 715.538597926638,hbonds: 13,heavyatoms: 40,sp2carbs: 11.0
-10.75762870429986
Quizartinib
1
SASA: 636.5021357312727,hbonds: 11,heavyatoms: 40,sp2carbs: 19.0
-10.496392631550314
Crenolanib
0
SASA: 561.162461148143,hbonds: 8,heavyatoms: 33,sp2carbs: 16.0
-8.669823840133148
Tandutinib
1
SASA: 724.3862007711722,hbonds: 9,heavyatoms: 41,sp2carbs: 15.0
-9.291072026772378
Sorafenib
0
SASA: 470.3984181490486,hbonds: 7,heavyatoms: 32,sp2carbs: 19.0
-7.954975548356563
Sunitinib
1
SASA: 508.42297356038995,hbonds: 6,heavyatoms: 29,sp2carbs: 14.0
-6.190608580147516
Lestaurtinib
0
SASA: 455.45891710604997,hbonds: 9,heavyatoms: 33,sp2carbs: 19.0
-8.629993695802105
Midostaurin
0
SASA: 618.6550022203613,hbonds: 7,heavyatoms: 43,sp2carbs: 26.0
-8.645161029477224
Ponatinib
2
SASA: 597.0393796494692,hbonds: 7,heavyatoms: 39,sp2carbs: 21.0
-7.394083083190332
