In [1]:
import pandas as pd
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import dimorphite_dl
import sys
from contextlib import redirect_stdout



In [26]:
knowndrugs = {"small_mol":r"CCOc1cc2c(c(Cl)c1CN(C)CCC(=O)[O-])OCO2",
              "Gilteritinib":r"CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5", 
              "Quizartinib":r"CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6", 
              "Crenolanib":r"O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C",
              "Tandutinib":r"CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5",
              "Sorafenib":r"CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl",
              "Sunitinib":r"CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C",
              "Lestaurtinib":r"C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O",
              "Midostaurin":r"C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)N(C)C(=O)C9=CC=CC=C9)OC",
              "Ponatinib":r"Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C"
              }

In [39]:
from rdkit.Chem import rdFreeSASA
def max_affin(drug):
    mol = rdkit.Chem.MolFromSmiles(drug)
    mol = rdkit.Chem.AddHs(mol)
    rdkit.Chem.AllChem.EmbedMolecule(mol)
    
    def classifyAtoms(mol, polar_atoms=[7,8,15,16]):
	#Taken from https://github.com/mittinatten/freesasa/blob/master/src/classifier.c
    #H originally 1.10 but vinardo ignores so changed to zero
        symbol_radius = {"H": 0.0, "C": 1.70, "N": 1.55, "O": 1.52, "P": 1.80, "S": 1.80, "SE": 1.90,
        "F": 1.47, "CL": 1.75, "BR": 1.83, "I": 1.98,
        "LI": 1.81, "BE": 1.53, "B": 1.92,
        "NA": 2.27, "MG": 1.74, "AL": 1.84, "SI": 2.10,
        "K": 2.75, "CA": 2.31, "GA": 1.87, "GE": 2.11, "AS": 1.85,
        "RB": 3.03, "SR": 2.49, "IN": 1.93, "SN": 2.17, "SB": 2.06, "TE": 2.06}
        surf_area = 0
        hydrophob_surf_area = 0
        num_apol = 0
        radii = [] 
        for atom in mol.GetAtoms():
            atom.SetProp("SASAClassName", "Apolar") # mark everything as apolar to start
            if atom.GetAtomicNum() in polar_atoms: #identify polar atoms and change their marking
                atom.SetProp("SASAClassName", "Polar") # mark as polar
            elif atom.GetAtomicNum() == 1:
                if atom.GetBonds()[0].GetOtherAtom(atom).GetAtomicNum() in polar_atoms:
                    atom.SetProp("SASAClassName", "Polar") # mark as polar
            radii.append(symbol_radius[atom.GetSymbol().upper()])
            if atom.GetProp("SASAClassName")=="Apolar" and atom.GetAtomicNum()!=1:
                hydrophob_surf_area += np.pi*symbol_radius[atom.GetSymbol().upper()]**2
                num_apol += 1
            surf_area += np.pi*symbol_radius[atom.GetSymbol().upper()]**2
        mean_surf_area = surf_area/rdkit.Chem.rdMolDescriptors.CalcNumHeavyAtoms(mol)
        mean_hydrophob_area = hydrophob_surf_area/num_apol
        return radii,mean_surf_area,mean_hydrophob_area
    
    radii, mean_surf_area, mean_hydrophob_area = classifyAtoms(mol)

    SASA = rdFreeSASA.CalcSASA(mol,radii,query=rdkit.Chem.rdqueries.AtomNumGreaterQueryAtom(1))
    hydrophobic_area = rdFreeSASA.CalcSASA(mol,radii,query=rdkit.Chem.rdFreeSASA.MakeFreeSasaAPolarAtomQuery())
    hbonds = rdkit.Chem.rdMolDescriptors.CalcNumHBA(mol)+rdkit.Chem.rdMolDescriptors.CalcNumHBD(mol)
          

    gauss_en = -SASA/(mean_surf_area)*0.045

    N_atoms = mol.GetAtomsMatchingQuery(rdkit.Chem.rdqueries.AtomNumEqualsQueryAtom(7))
    tertNs = 0
    print(drug)
    

    for atom in N_atoms:
        if atom.GetTotalNumHs(includeNeighbors=True)==0 and atom.GetHybridization()==rdkit.Chem.HybridizationType.SP3:
            tertNs+=1
    
    hbond_en = hbonds*-0.6
    hydrophob_en = -0.035*hydrophobic_area/mean_hydrophob_area
    max_score = hbond_en+gauss_en+hydrophob_en
    #print(f"max: {max_score:.2f}, hbonds: {hbond_en:.1f}, gauss: {gauss_en:.2f}, hydrophob: {hydrophob_en:.2f}")
    return max_score
    

    

In [29]:
for drug in knowndrugs:
    max_affin(knowndrugs[drug])

CCOc1cc2c(c(Cl)c1CN(C)CCC(=O)[O-])OCO2
max: -7.51, hbonds: -3.6, gauss: -2.59, hydrophob: -1.32
CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5
max: -14.39, hbonds: -7.8, gauss: -4.13, hydrophob: -2.47
CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6
max: -13.15, hbonds: -6.6, gauss: -4.21, hydrophob: -2.34
O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C
max: -10.25, hbonds: -4.8, gauss: -3.37, hydrophob: -2.08
CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5
max: -12.82, hbonds: -5.4, gauss: -4.50, hydrophob: -2.92
CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl
max: -9.90, hbonds: -4.2, gauss: -3.58, hydrophob: -2.12
CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C
max: -8.90, hbonds: -3.6, gauss: -3.25, hydrophob: -2.05
C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O
max: -9.63, hbonds: -5.4, gauss: -2.74, hydrophob: -1.49
C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7

In [44]:
data = pickle.load(open("200pred_list_prob03.pk1","rb"))
for entry in data:
    if entry[0]!="failed":
        try:
            max_score=max_affin(entry[0])
            print(f"best poss: {max_score:.2f}, best observed: {min(entry[2]):.2f}, diff: {max_score-min(entry[2]):.2f}")
        except:
            print("failed")

CNC(=O)[C@H]1CN(C(=O)c2ccc3c(c2)N(C)C(=O)CN3)[C@@H]1C
max: -7.69, max observed: -5.93, diff: -1.76
N#C[C@]1(N)O[C@@H](N2CCN(C(=O)c3ccccc3-c3nccs3)CC2)c2ccccc21
max: -9.58, max observed: -6.73, diff: -2.85
C[C@H]1CN(C(=O)c2ccccc2C(F)(F)F)[C@@](c2ccccc2)(C2C3CC4CC(C3)CC2C4)[C@H](C)O1
max: -6.45, max observed: -6.90, diff: 0.46
C[C@H]1CCCCN1C(=O)c1cccc2c(-c3ccccn3)nn([C@@]3(C#N)CCCO3)c12
max: -8.50, max observed: -6.45, diff: -2.05
N#C[C@]1(N)C[C@@H](c2ccccc2Br)C[C@](c2ccccn2)(c2cccs2)N1
max: -8.63, max observed: -8.22, diff: -0.41
Cc1cccc([C@@H](Cl)[C@@H]2CN[C@@H]3NC[C@@H]4C=CC=C[C@H]4[C@H]3O2)c1C#N
max: -7.82, max observed: -6.98, diff: -0.84
N#C[C@H](N)c1ccc2c(c1)N[C@@H]1N=CC=C[C@H]1[C@H]2C(=O)[O-]
max: -8.03, max observed: -5.73, diff: -2.30
N#Cc1cccc(N2CCCC[C@@]2(c2ccccn2)N2CCCC2=O)c1
max: -6.67, max observed: -7.08, diff: 0.41
N#Cc1sc(N2CCC[C@H]3C=CC=C[C@H]32)nc1C(=O)NC1CC1
max: -7.66, max observed: -5.82, diff: -1.84
O=C(C1=CS2(C=CC=C2)C=N1)N1CCCC[C@H]1c1cccc(F)c1
max: -5.71, max o