In [3]:
import pandas as pd
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import sys
from contextlib import redirect_stdout

from selectivebayes.interfaces import vaeinterface,vinainterface
from selectivebayes.transformers import SequentialDomainReductionTransformer,SimpleDomainReduction

latent_size=56

In [4]:
knowndrugs = {"Gilteritinib":r"CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5", 
              "Quizartinib":r"CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6", 
              "Crenolanib":r"O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C",
              "Tandutinib":r"CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5",
              "Sorafenib":r"CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl",
              "Sunitinib":r"CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C",
              "Lestaurtinib":r"C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O",
              "Midostaurin":r"C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)N(C)C(=O)C9=CC=CC=C9)OC",
              "Ponatinib":r"Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C"
              }

fps = {x:rdkit.Chem.RDKFingerprint(rdkit.Chem.MolFromSmiles(knowndrugs[x])) for x in knowndrugs}
def simsearch(molecule):
    if molecule=="failed":
        return 0,0
    molfing = rdkit.Chem.RDKFingerprint(rdkit.Chem.MolFromSmiles(molecule))
    highestsim = 0
    bestmolec = ""
    for fp in fps:
        currsim = rdkit.DataStructs.FingerprintSimilarity(fps[fp],molfing)
        if currsim>highestsim:
            highestsim=currsim
            bestmolec = fp
    return highestsim,bestmolec

In [5]:
from hide_warnings import hide_warnings
pred_list = []

@hide_warnings(out=False)
def optstart(numiters):
    beta = 4184/(8.3145*310)
    best_pred = 0
    ind=0
    print("Starting")
    flt3 = vinainterface("./proteins/flt3.pdbqt",[-28.03685,	-10.361925,	-28.9883])
    ckit = vinainterface("./proteins/ckit.pdbqt",[45.93035714,	97.03574286,	16.1472])
    pdgfra = vinainterface("./proteins/pdgfra.pdbqt",[17.58837931,	132.5595172,	-6.030275862])
    vegfr = vinainterface("./proteins/VEGFR.pdbqt",[25.997,	28.605,	17.134])
    mk2 = vinainterface("./proteins/MK2.pdbqt",[47.6395,	34.809,	16.708])
    jak2 = vinainterface("./proteins/JAK2.pdbqt",[-31.7445,	-49.661,	35.4655])
      
    def optfunction(**kwargs):
        nonlocal ind
        nonlocal best_pred
        ind+=1
        mol=vaeint.decode(np.expand_dims(np.fromiter(kwargs.values(),dtype=float),axis=0))
        exh = 8
        flt3_pred,success = flt3.predict(mol,exh)
        if success!=-1:
            pdgfra_pred = pdgfra.predict(mol,exh)[0]
            ckit_pred = ckit.predict(mol,exh)[0]
            vegfr_pred = vegfr.predict(mol,exh)[0]
            mk2_pred = mk2.predict(mol,exh)[0]
            jak2_pred = jak2.predict(mol,exh)[0]
        else:
            pdgfra_pred = -5.0
            ckit_pred = -5.0
            vegfr_pred = -5.0
            mk2_pred =-5.0
            jak2_pred = -5.0
        all_preds = [flt3_pred,pdgfra_pred,ckit_pred,vegfr_pred,mk2_pred,jak2_pred]

        anybind=-6.0

        prediction = np.exp(-beta*flt3_pred)/(np.exp(-beta*anybind)+np.sum([np.exp(-beta*pr) for pr in all_preds]))
        if prediction>best_pred:
            #if prediction is better than best prediction seen then run again at 2x exhaustiveness to confirm
            pdgfra_pred = pdgfra.predict(mol,exh*2)[0]
            ckit_pred = ckit.predict(mol,exh*2)[0]
            vegfr_pred = vegfr.predict(mol,exh*2)[0]
            mk2_pred = mk2.predict(mol,exh*2)[0]
            jak2_pred = jak2.predict(mol,exh*2)[0]
            all_preds = [flt3_pred,pdgfra_pred,ckit_pred,vegfr_pred,mk2_pred,jak2_pred]
            prediction = np.exp(-beta*flt3_pred)/(np.exp(-beta*anybind)+np.sum([np.exp(-beta*pr) for pr in all_preds]))
            if prediction>best_pred:
                best_pred = prediction


        pred_list.append([mol,prediction,all_preds])

        sim,molec = simsearch(mol)
        print(f"{ind}: {mol}, Pred: {prediction:.4f}, SimDrug: {molec}, SimVal: {sim:.3f}")
        print(f"FLT3: {flt3_pred:.2f}, PDGFRA: {pdgfra_pred:.2f}, CKIT: {ckit_pred:.2f}, VEGFR: {vegfr_pred:.2f}, MK2: {mk2_pred:.2f}, JAK2: {jak2_pred:.2f}")
        
        return prediction

    pbounds = {f"f{i}": (-3,3) for i in range(56)}
    bounds_transformer = SimpleDomainReduction(flt3,reduction_rate=0.99,increase_rate=1.10)
    optimizer = BayesianOptimization(f=optfunction,pbounds=pbounds,verbose=0,bounds_transformer=bounds_transformer)
    logger = JSONLogger(path="./predictions/test.json")
    optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
    optimizer.set_gp_params(alpha=0.01)
    vaeint=vaeinterface()
    vaeint.start()
    optimizer.maximize(init_points=0,n_iter=numiters)
    vaeint.stop()
optstart(500)
pickle.dump(pred_list,open("./predictions/test.pk1","wb"),protocol=2)

Starting
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
1: CC1(C[NH2+]c2cscc2C(=O)N2CCC(CO)CC2)CCCCC1, Pred: 0.1078, SimDrug: Midostaurin, SimVal: 0.329
FLT3: -5.95, PDGFRA: -6.27, CKIT: -6.09, VEGFR: -5.72, MK2: -6.33, JAK2: -6.29
2: NC1(N)CCC[C@]2(CC1)C(=O)C(=O)NCN2c1ccccc1C(O)O, Pred: 0.0000, SimDrug: Midostaurin, SimVal: 0.436
FLT3: 1.53, PDGFRA: 0.34, CKIT: 1.66, VEGFR: 1.21, MK2: 0.17, JAK2: 0.95


In [36]:
from hide_warnings import hide_warnings
@hide_warnings(out=False)
def knowndrugeval():
    beta = 4184/(8.3145*310)
    flt3 = vinainterface("./proteins/flt3.pdbqt",[-28.03685,	-10.361925,	-28.9883])
    ckit = vinainterface("./proteins/ckit.pdbqt",[45.93035714,	97.03574286,	16.1472])
    pdgfra = vinainterface("./proteins/pdgfra.pdbqt",[17.58837931,	132.5595172,	-6.030275862])
    vegfr = vinainterface("./proteins/VEGFR.pdbqt",[25.997,	28.605,	17.134])
    mk2 = vinainterface("./proteins/MK2.pdbqt",[47.6395,	34.809,	16.708])
    jak2 = vinainterface("./proteins/JAK2.pdbqt",[-31.7445,	-49.661,	35.4655])

    for drug in knowndrugs:
        mol=knowndrugs[drug]
        exh = 32
        flt3_pred = flt3.predict(mol, exh)
        pdgfra_pred = pdgfra.predict(mol, exh)
        ckit_pred = ckit.predict(mol, exh)
        vegfr_pred = vegfr.predict(mol, exh)
        mk2_pred = mk2.predict(mol, exh)
        jak2_pred = jak2.predict(mol, exh)
        all_preds = [flt3_pred,pdgfra_pred,ckit_pred,vegfr_pred,mk2_pred,jak2_pred]

        anybind=-6.0

        prediction = np.exp(-beta*flt3_pred)/(np.exp(-beta*anybind)+np.sum([np.exp(-beta*pr) for pr in all_preds]))

        sim,molec = simsearch(mol)
        print(f"{mol}, Pred: {prediction:.4f}, SimDrug: {molec}, SimVal: {sim:.3f}")
        print(f"FLT3: {flt3_pred:.2f}, PDGFRA: {pdgfra_pred:.2f}, CKIT: {ckit_pred:.2f}, VEGFR: {vegfr_pred:.2f}, MK2: {mk2_pred:.2f}, JAK2: {jak2_pred:.2f}")
        break
        

knowndrugeval()


CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5, Pred: 0.1157, SimDrug: Gilteritinib, SimVal: 1.000
FLT3: -7.17, PDGFRA: -7.00, CKIT: -7.60, VEGFR: -6.80, MK2: -6.95, JAK2: -7.93
