In [1]:
import os
os.chdir('../')

In [2]:
import pandas as pd
import numpy as np

import DTI.models as models
from DTI.utils import data_process, convert_y_unit, generate_config

In [None]:
# load data
df_Kd = pd.read_csv('../DTBA_data_folder/Kd/data.csv')
df_Kd = df_Kd.sample(frac = 0.002, replace = False)# toy dataset

X_drug = df_Kd.SMILES.values
X_target = df_Kd['Target Sequence'].values 
# support nM to p (logspace) convertion to help regression
y = [1 if i else 0 for i in df_Kd.Kd.values <30]
#y = convert_y_unit(df_Kd.Kd.values, 'nM', 'p') 

drug_encoding = 'MPNN'
target_encoding = 'AAC'
train, val, test = data_process(X_drug, X_target, y, 
                                drug_encoding, target_encoding, 
                                split_method='random',frac=[0.7,0.1,0.2])

In [4]:
# model setup, you can adjust the config file by typing in model parameters. e.g. cls_hidden_dim = [256, 32]
config = generate_config(drug_encoding, target_encoding, train_epoch = 3)
model = models.model_initialize(drug_encoding, target_encoding, **config)

In [5]:
model.train(train, val, test)

--- Data Preparation ---
--- Go for Training ---
Training at Epoch 1 iteration 0 with loss 0.6983305
Validation at Epoch 1 , AUROC: 0.55 , AUPRC: 0.5792424242424242 , F1: 0.0


  'precision', 'predicted', average, warn_for)


Training at Epoch 2 iteration 0 with loss 0.6419869
Validation at Epoch 2 , AUROC: 0.47500000000000003 , AUPRC: 0.5569230769230769 , F1: 0.0
Training at Epoch 3 iteration 0 with loss 0.57026505
Validation at Epoch 3 , AUROC: 0.45 , AUPRC: 0.5235897435897436 , F1: 0.0
--- Go for Testing ---
Testing AUROC: 0.7545454545454545 , AUPRC: 0.525128205128205 , F1: 0.0
--- Training Finished ---


In [6]:
test = df_Kd.sample(n = 20, replace=False)
target = test['Target Sequence'].iloc[0]
X_repurpose = test.SMILES.values
drug_name = test.PubChem_ID.astype(int).astype(str).values
target_name = test.UniProt_ID.iloc[0]

In [7]:
r = models.repurpose(X_repurpose, target, model, drug_name, target_name)

repurposing...
predicting...
Drug Repurposing Result for P31751
57399640   predicted to NOT have interaction with the target
153999     predicted to NOT have interaction with the target
91448975   predicted to NOT have interaction with the target
91898352   predicted to NOT have interaction with the target
11364421   predicted to NOT have interaction with the target
138805831  predicted to NOT have interaction with the target
11338033   predicted to NOT have interaction with the target
44588220   predicted to NOT have interaction with the target
4521392    predicted to NOT have interaction with the target
216239     predicted to NOT have interaction with the target
9829523    predicted to NOT have interaction with the target
11667893   predicted to NOT have interaction with the target
5291       predicted to NOT have interaction with the target
118735636  predicted to NOT have interaction with the target
58267825   predicted to NOT have interaction with the target
6918454    predicted 

In [8]:
target = test['Target Sequence'].values
target_name = test.UniProt_ID.astype(str).values

In [9]:
r = models.virtual_screening(X_repurpose, target, model, drug_name, target_name)

repurposing...
predicting...
Virtual Screening Result
57399640   predicted to NOT have interaction with the target P31751 
153999     predicted to NOT have interaction with the target nan    
91448975   predicted to NOT have interaction with the target nan    
91898352   predicted to NOT have interaction with the target P10721 
11364421   predicted to NOT have interaction with the target P49137 
138805831  predicted to NOT have interaction with the target P51449 
11338033   predicted to NOT have interaction with the target Q9BQI3 
44588220   predicted to NOT have interaction with the target Q9HBH9 
4521392    predicted to NOT have interaction with the target P29376 
216239     predicted to NOT have interaction with the target P00533 
9829523    predicted to NOT have interaction with the target P57058 
11667893   predicted to NOT have interaction with the target Q13131 
5291       predicted to NOT have interaction with the target Q9HC98 
118735636  predicted to NOT have interaction with