In [1]:
latent_size=56

#dictionaries of target and off target paths to protein structures and coordinates of centres to use
#box size in angstroms can be specified per target in dictionary as well but in our case we just want 30 for all
box_size = [30,30,30]
target_details = {"FLT3": ["./proteins/flt3.pdbqt",[-28.03685,	-10.361925,	-28.9883],box_size]}
off_target_details = {"CKIT": ["./proteins/ckit.pdbqt",[45.93035714,	97.03574286,	16.1472],box_size],"PDGFRA":["./proteins/pdgfra.pdbqt",[17.58837931,	132.5595172,	-6.030275862],box_size]
                      ,"VEGFR":["./proteins/VEGFR.pdbqt",[25.997,	28.605,	17.134],box_size],"MK2":["./proteins/MK2.pdbqt",[47.6395,	34.809,	16.708],box_size],"JAK2":["./proteins/JAK2.pdbqt",[-31.7445,	-49.661,	35.4655],box_size]}

logpath = "./predictions/test.json" #path to save latent coordinates and predictions
pred_path = "./predictions/test.pk1" #path to save SMILES and predictions to upon completion

use_custom_domain_reduction = True #specify whether to use custom reduction or sequential domain reduction
alpha = 0.99 #alpha for domain reduction
beta = 1.10 #beta for domain expansion
exh = 8 #exhaustiveness to run vina at
failure_bind = -5.0 #binding energy to use in objective function when rdkit fails to generate a conformer
anybind = -6.0
run_iters = 200 #iterations to run Bayesian Optimisation for

In [2]:
import pandas as pd
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import sys
from contextlib import redirect_stdout

from selectivebayes.interfaces import vaeinterface,vinainterface
from selectivebayes.transformers import SequentialDomainReductionTransformer,SimpleDomainReduction





In [3]:
knowndrugs = {"Gilteritinib":r"CCc1nc(C(=O)N)c(Nc2ccc(N3CCC(CC3)N4CCN(C)CC4)c(OC)c2)nc1NC5CCOCC5", 
              "Quizartinib":r"CC(C)(C)c1cc(no1)NC(=O)Nc2ccc(cc2)c3cn4c5ccc(cc5sc4n3)OCCN6CCOCC6", 
              "Crenolanib":r"O(c5cc4ncn(c1nc3c(cc1)cccc3N2CCC(N)CC2)c4cc5)CC6(COC6)C",
              "Tandutinib":r"CC(C)OC1=CC=C(C=C1)NC(=O)N2CCN(CC2)C3=NC=NC4=CC(=C(C=C43)OC)OCCCN5CCCCC5",
              "Sorafenib":r"CNC(=O)c1cc(ccn1)Oc2ccc(cc2)NC(=O)Nc3ccc(c(c3)C(F)(F)F)Cl",
              "Sunitinib":r"CCN(CC)CCNC(=O)c1c(c([nH]c1C)/C=C\2/c3cc(ccc3NC2=O)F)C",
              "Lestaurtinib":r"C[C@@]12[C@](C[C@@H](O1)n3c4ccccc4c5c3c6n2c7ccccc7c6c8c5C(=O)NC8)(CO)O",
              "Midostaurin":r"C[C@@]12[C@@H]([C@@H](C[C@@H](O1)N3C4=CC=CC=C4C5=C6C(=C7C8=CC=CC=C8N2C7=C53)CNC6=O)N(C)C(=O)C9=CC=CC=C9)OC",
              "Ponatinib":r"Cc1ccc(cc1C#Cc2cnc3n2nccc3)C(=O)Nc4ccc(c(c4)C(F)(F)F)CN5CCN(CC5)C"
              }

fps = {x:rdkit.Chem.RDKFingerprint(rdkit.Chem.MolFromSmiles(knowndrugs[x])) for x in knowndrugs}
def simsearch(molecule):
    if molecule=="failed":
        return 0,0
    molfing = rdkit.Chem.RDKFingerprint(rdkit.Chem.MolFromSmiles(molecule))
    highestsim = 0
    bestmolec = ""
    for fp in fps:
        currsim = rdkit.DataStructs.FingerprintSimilarity(fps[fp],molfing)
        if currsim>highestsim:
            highestsim=currsim
            bestmolec = fp
    return highestsim,bestmolec

In [4]:
from hide_warnings import hide_warnings
pred_list = []

(target_name,target_loc),=target_details.items()
@hide_warnings(out=False)
def optstart(numiters):
    beta = 4184/(8.3145*310)
    best_pred = 0
    ind=0
    print("Starting")
    target_interface = {target_name:vinainterface(*target_loc)}
    off_target_interfaces = {off_target_name:vinainterface(*off_target_details[off_target_name]) for off_target_name in off_target_details.keys()}
      
    def optfunction(**kwargs):
        nonlocal ind
        nonlocal best_pred
        ind+=1
        
        mol=vaeint.decode(np.expand_dims(np.fromiter(kwargs.values(),dtype=float),axis=0))
        
        target_pred,success = target_interface[target_name].predict(mol,exh)
        if success!=-1:
            off_target_preds = {off_target_name:off_target_interfaces[off_target_name].predict(mol,exh)[0] for off_target_name in off_target_interfaces.keys()}
        else:
            off_target_preds = {off_target_name:failure_bind for off_target_name in off_target_interfaces.keys()}
        all_preds = [target_pred,*list(off_target_preds.values())]

        

        prediction = np.exp(-beta*target_pred)/(np.exp(-beta*anybind)+np.sum([np.exp(-beta*pr) for pr in all_preds]))
        if prediction>best_pred and success!=-1:
            #if prediction is better than best prediction seen then run again at 2x exhaustiveness to confirm
            target_pred = target_interface[target_name].predict(mol,exh*2)[0]
            off_target_preds = {off_target_name:off_target_interfaces[off_target_name].predict(mol,exh*2)[0] for off_target_name in off_target_interfaces.keys()}
            all_preds = [target_pred,*list(off_target_preds.values())]
            prediction = np.exp(-beta*target_pred)/(np.exp(-beta*anybind)+np.sum([np.exp(-beta*pr) for pr in all_preds]))
            if prediction>best_pred:
                best_pred = prediction


        pred_list.append([mol,prediction,all_preds])

        sim,molec = simsearch(mol)
        print(f"{ind}: {mol}, Pred: {prediction:.4f}, SimDrug: {molec}, SimVal: {sim:.3f}")
        print(target_name + f":{target_pred:.2f}," + "".join([off_target_name +f": {off_target_preds[off_target_name]:.2f}, " for off_target_name in off_target_preds.keys()]))
        
        return prediction

    pbounds = {f"f{i}": (-3,3) for i in range(56)}
    if use_custom_domain_reduction:
        bounds_transformer = SimpleDomainReduction(target_interface[target_name],reduction_rate=alpha,increase_rate=beta)
    else:
        bounds_transformer = SequentialDomainReductionTransformer(prob=0.3)
    optimizer = BayesianOptimization(f=optfunction,pbounds=pbounds,verbose=0,bounds_transformer=bounds_transformer)
    logger = JSONLogger(path=logpath)
    optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
    optimizer.set_gp_params(alpha=0.01)
    vaeint=vaeinterface()
    vaeint.start()
    optimizer.maximize(init_points=0,n_iter=numiters)
    vaeint.stop()
optstart(run_iters)
pickle.dump(pred_list,open(pred_path,"wb"),protocol=2)

Starting
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
Vina Initialisation complete
