In [1]:
import pandas as pd
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import dimorphite_dl
import sys
from contextlib import redirect_stdout



In [18]:
knowndrugs = {"small_mol":r" N[C@H]1CC[C@@H](C(=O)N2CC[C@@H](c3cccc(Cl)c3)[C@@H]3[C@@H]2C2CC[NH+]3CC2)O1",
              "Gilteritinib":r"CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5", 
              "Quizartinib":r"CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6", 
              "Crenolanib":r"O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C",
              "Tandutinib":r"CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5",
              "Sorafenib":r"CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl",
              "Sunitinib":r"CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C",
              "Lestaurtinib":r"C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O",
              "Midostaurin":r"C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)N(C)C(=O)C9=CC=CC=C9)OC",
              "Ponatinib":r"Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C"
              }

In [23]:
from rdkit.Chem import rdFreeSASA
for drug in knowndrugs:
    mol = rdkit.Chem.MolFromSmiles(knowndrugs[drug])
    mol = rdkit.Chem.AddHs(mol)
    rdkit.Chem.AllChem.EmbedMolecule(mol)
    radii = rdFreeSASA.classifyAtoms(mol)
    print(drug)
    SASA = rdFreeSASA.CalcSASA(mol,radii)
    
    heavyatoms = rdkit.Chem.rdMolDescriptors.CalcNumHeavyAtoms(mol)
    hbonds = rdkit.Chem.rdMolDescriptors.CalcNumHBA(mol)+rdkit.Chem.rdMolDescriptors.CalcNumHBD(mol)
    c_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(6)))
    n_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(7)))
    o_atoms = len(mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(8)))
    sp2carbs=c_atoms*(1-rdkit.Chem.rdMolDescriptors.CalcFractionCSP3(mol))

    mean_surf_area = np.pi*(c_atoms*1.7**2+n_atoms*1.55**2+o_atoms*1.52**2)/heavyatoms

    gauss_en = SASA/(mean_surf_area)*0.045

    
    
    print(hbonds,heavyatoms,sp2carbs)
    max_score = hbonds*-0.6-gauss_en-0.035*sp2carbs
    print(max_score)

small_mol
5 27 7.000000000000001
-5.685561857518883
Gilteritinib
13 40 11.0
-11.775360746987518
Quizartinib
11 40 19.0
-10.666119849098767
Crenolanib
8 33 16.0
-8.192801718242885
Tandutinib
9 41 15.0
-9.741909798320998
Sorafenib
7 32 19.0
-7.656960717439149
Sunitinib
6 29 14.0
-6.777735488485826
Lestaurtinib
9 33 19.0
-8.409152156090613
Midostaurin
7 43 26.0
-8.277625452562411
Ponatinib
7 39 21.0
-8.323247570606735
