In [10]:
import pandas as pd
import pubchempy as pcp
import seaborn as sns
import rdkit
import torch
import vina
import meeko
import pexpect
import pickle
import numpy as np
from scipy.stats import norm
from typing import Optional, Union, List
from bayes_opt import BayesianOptimization
from bayes_opt.util import load_logs
from bayes_opt.domain_reduction import DomainTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.target_space import TargetSpace
import sys
from contextlib import redirect_stdout

from selectivebayes.interfaces import vaeinterface,vinainterface
from selectivebayes.transformers import SequentialDomainReductionTransformer,SimpleDomainReduction

def visualise(molecules,labels, size,row):
    img = rdkit.Chem.Draw.MolsToGridImage([rdkit.Chem.MolFromSmiles(mol) for mol in molecules],subImgSize=(size,size),molsPerRow=row,legends=labels)
    return img

latent_size=56

In [None]:
pbounds = {f"f{i}": (-3,3) for i in range(56)}
bounds_transformer = SequentialDomainReductionTransformer(prob=0.5)
optimizer = BayesianOptimization(f=np.square,pbounds=pbounds,verbose=0,bounds_transformer=bounds_transformer)
load_logs(optimizer, logs=["./predictions/minwin05.json"])
seenpreds = set()
predplot = []
x=[]

for i,pred in enumerate(optimizer.res):
    if pred["target"] not in seenpreds:
        predplot.append(pred["target"])
        x.append(i)
        seenpreds.add(pred["target"])
#ax=sns.regplot(x=x,y=predplot)
#ax.set(xlabel="Iteration",ylabel="Objective")
pred_df = pd.DataFrame(data={"pred": predplot,"x": x})
#pred_df["rolling"]=pred_df["pred"].rolling(50,min_periods=5 ,center=True).mean()
pred_df["rolling"]=pred_df["pred"].ewm(alpha=0.05).mean()
sns.set(rc={'figure.figsize':(10,7)})
sns.set_style("white")
ax=sns.scatterplot(data=pred_df,x="x",y="pred")
ax=sns.lineplot(data=pred_df,x="x",y="rolling",color="red")
ax.set(xlabel="Iteration",ylabel="Objective")

In [34]:

data = pickle.load(open("./predictions/200pred_list_prob03_doubleexhaustforbest.pk1","rb"))
seendata = set()
for molecule in data:
    if molecule[0] in seendata:
        molecule[1]=0
    seendata.add(molecule[0])
data = sorted(data,key=lambda x:x[1],reverse=True)
nummolecs = 20
topmolecules = [molecule[0] for molecule in data[:nummolecs]]
topscores = ["%.3f"%round(molecule[1],3) for molecule in data[:nummolecs]]
for i in topmolecules:
    print(f"SMILES='{i}'")
#visualise(topmolecules,topscores, size=250,row=5)


SMILES='CCOC(=O)n1ccc(NCc2cc(Cl)ccc2-c2ccco2)n1'
SMILES='CC(=O)N[C@@]12C=CC=C[C@@]1(C)N(C(=O)NCc1ccc(Cl)cc1)CCO2'
SMILES='C[C@H](Nc1ccc[nH+]c1)OC(=O)n1nccc1-c1ccc(Cl)cc1'
SMILES='Cc1occ(NC(=O)Cc2cccc(N3CCCC3)c2)c1Cl'
SMILES='C[C@@H](Nc1ccc[nH+]c1)OC(=O)c1cc(N)nn1-c1cccc(Cl)c1'
SMILES='C[C@@H](Nc1cccc[nH+]1)OC(=O)n1nc(N)cc1-c1ccc(Cl)cc1'
SMILES='CCOC(=O)n1nccc1-c1cocc1NCC(=O)c1ccccc1'
SMILES='CCN(C(=O)c1ccccc1Cl)[C@H]1CC[C@@]([NH3+])(Cc2ccccc2)[NH2+]1'
SMILES='Cc1cocc1NC(=O)[C@H](C)n1nc(N)c2ccccc21'
SMILES='Cc1oc(NC(=O)Cc2ccccc2-n2cccn2)c2c1CC=CC2'
SMILES='Cc1oc(NC(=O)Cc2ccccc2N2CCCC2)c2[nH]cnc12'
SMILES='O=C(OCc1ccoc1)n1ccc(NCc2ccccc2Cl)n1'
SMILES='Cc1cccc(-n2cc3ccoc3c2C(=O)NCc2ccc(Cl)cc2)c1'
SMILES='CCOc1ccnn1-c1cc(Cl)ccc1C(=O)N[C@@H](C)[n+]1ccccc1'
SMILES='Cc1ccc([C@@H](C)N(c2cccc(Cl)c2)c2c[nH]c(N)c2)cc1'
SMILES='CCOc1cn(-c2ccc(Cl)cc2)nc1C(=O)NCc1ccccc1[N+](=O)[O-]'
SMILES='CCOc1cccc(-n2c(C(=O)NCc3ccc(Cl)cc3)cc3sccc32)c1'
SMILES='Cc1cc(NC(=O)c2ccc(Cl)cc2N2CCNC[C@@]2(N)C#N)co1'
SMILES

In [None]:
data2 = pickle.load(open("./predictions/500pred_list_newdomainreduction.pk1","rb"))
seendata2 = set()
for molecule in data2:
    if molecule[0] in seendata2:
        molecule[1]=0
    seendata2.add(molecule[0])
data2 = sorted(data2,key=lambda x:x[1],reverse=True)
topmolecules2 = [molecule[0] for molecule in data2[:nummolecs]]

molfing=[]
molecs=[]
for molec in topmolecules:
    molecs.append(molec)
    molfing.append(rdkit.Chem.RDKFingerprint(rdkit.Chem.MolFromSmiles(molec)))
molfing2=[]
molecs2=[]
for molec in topmolecules2:
    molecs2.append(molec)
    molfing2.append(rdkit.Chem.RDKFingerprint(rdkit.Chem.MolFromSmiles(molec)))

for j,molec2 in enumerate(molfing2):
    scores = rdkit.Chem.DataStructs.BulkTanimotoSimilarity(molec2, molfing)
    for i, score in enumerate(scores):
        if score >= 0.4:
            target_id = molecs[i]
            print(f"{molecs2[j]}\t{target_id}\t{score:.3f}")

In [29]:
molecules = []
for molec in topmolecules:
    comps = pcp.get_compounds(molec,"smiles", searchtype='fastsimilarity_3d', listkey_count=1)
    print(comps[0].isomeric_smiles)
    molecules.append(comps[0].isomeric_smiles)
visualise([molecules],None,250,5)

IndexError: list index out of range