In [1]:
import os
os.chdir('../')

In [2]:
import pandas as pd
import numpy as np

import DTI.models as models
from DTI.utils import data_process, convert_y_unit, generate_config

In [None]:
# load data
df_Kd = pd.read_csv('../DTBA_data_folder/Kd/data.csv')
df_Kd = df_Kd.sample(frac = 0.002, replace = False)# toy dataset

X_drug = df_Kd.SMILES.values
X_target = df_Kd['Target Sequence'].values 
# support nM to p (logspace) convertion to help regression
y = [1 if i else 0 for i in df_Kd.Kd.values <30]
#y = convert_y_unit(df_Kd.Kd.values, 'nM', 'p') 

drug_encoding = 'MPNN'
target_encoding = 'Quasi-seq'
train, val, test = data_process(X_drug, X_target, y, 
                                drug_encoding, target_encoding, 
                                split_method='random',frac=[0.7,0.1,0.2])

in total: 133 drug-target pairs
encoding drug...
unique drugs: 99
drug encoding finished...
encoding protein...
unique target sequence: 112


In [4]:
# model setup, you can adjust the config file by typing in model parameters. e.g. cls_hidden_dim = [256, 32]
config = generate_config(drug_encoding, target_encoding, train_epoch = 3)
model = models.model_initialize(drug_encoding, target_encoding, **config)

In [5]:
model.train(train, val, test)

--- Data Preparation ---
--- Go for Training ---
Training at Epoch 1 iteration 0 with loss 0.7395122


  'precision', 'predicted', average, warn_for)


Validation at Epoch 1 , AUROC: 1.0 , AUPRC: 1.0 , F1: 0.0
Training at Epoch 2 iteration 0 with loss 0.53700477
Validation at Epoch 2 , AUROC: 1.0 , AUPRC: 1.0 , F1: 0.0
Training at Epoch 3 iteration 0 with loss 0.45555922
Validation at Epoch 3 , AUROC: 0.75 , AUPRC: 0.25 , F1: 0.0
--- Go for Testing ---
Testing AUROC: 0.6388888888888888 , AUPRC: 0.2013888888888889 , F1: 0.0
--- Training Finished ---


In [6]:
test = df_Kd.sample(n = 20, replace=False)
target = test['Target Sequence'].iloc[0]
X_repurpose = test.SMILES.values
drug_name = test.PubChem_ID.astype(int).astype(str).values
target_name = test.UniProt_ID.iloc[0]

In [8]:
r = models.repurpose(X_repurpose, target, model, drug_name, target_name)

repurposing...
in total: 20 drug-target pairs
encoding drug...
unique drugs: 20
drug encoding finished...
encoding protein...
unique target sequence: 1
protein encoding finished...
splitting dataset...
Done.
predicting...
---------------
Drug Repurposing Result for O00329
Drug 136215716  predicted to NOT have interaction with the target
Drug 19875425   predicted to NOT have interaction with the target
Drug 91895868   predicted to NOT have interaction with the target
Drug 5329102    predicted to NOT have interaction with the target
Drug 25182616   predicted to NOT have interaction with the target
Drug 44243352   predicted to NOT have interaction with the target
Drug 10357464   predicted to NOT have interaction with the target
Drug 4282260    predicted to NOT have interaction with the target
Drug 10113978   predicted to NOT have interaction with the target
Drug 11314340   predicted to NOT have interaction with the target
Drug 44280063   predicted to NOT have interaction with the target
D

In [9]:
target = test['Target Sequence'].values
target_name = test.UniProt_ID.astype(str).values

In [10]:
r = models.virtual_screening(X_repurpose, target, model, drug_name, target_name)

virtual screening...
in total: 20 drug-target pairs
encoding drug...
unique drugs: 20
drug encoding finished...
encoding protein...
unique target sequence: 20
protein encoding finished...
splitting dataset...
Done.
predicting...
---------------
Virtual Screening Result
Drug 136215716  predicted to NOT have interaction with the target O00329 
Drug 19875425   predicted to NOT have interaction with the target P21731 
Drug 91895868   predicted to NOT have interaction with the target P00918 
Drug 5329102    predicted to NOT have interaction with the target P52333 
Drug 25182616   predicted to NOT have interaction with the target Q15835 
Drug 44243352   predicted to NOT have interaction with the target P15056 
Drug 10357464   predicted to NOT have interaction with the target P62965 
Drug 4282260    predicted to NOT have interaction with the target P0AC13 
Drug 10113978   predicted to NOT have interaction with the target Q92630 
Drug 11314340   predicted to NOT have interaction with the targe