In [1]:
import os
os.chdir('../')

In [2]:
import pandas as pd
import numpy as np

import DTI.models as models
from DTI.utils import data_process, convert_y_unit, generate_config

In [3]:
# load data
df_Kd = pd.read_csv('./DTBA_data_folder/Kd/data.csv')
df_Kd = df_Kd.sample(frac = 0.002, replace = False)# toy dataset

X_drug = df_Kd.SMILES.values
X_target = df_Kd['Target Sequence'].values 
# support nM to p (logspace) convertion to help regression
y = convert_y_unit(df_Kd.Kd.values, 'nM', 'p') 

drug_encoding = 'Daylight'
target_encoding = 'Conjoint_triad'
train, val, test = data_process(X_drug, X_target, y, 
                                drug_encoding, target_encoding, 
                                split_method='random',frac=[0.7,0.1,0.2])

in total: 133 drug-target pairs
encoding drug...
unique drugs: 87
drug encoding finished...
encoding protein...
unique target sequence: 113
protein encoding finished...
splitting dataset...
Done.


In [4]:
# model setup, you can adjust the config file by typing in model parameters. e.g. cls_hidden_dim = [256, 32]
config = generate_config(drug_encoding, target_encoding, train_epoch = 3)
model = models.model_initialize(drug_encoding, target_encoding, **config)

In [5]:
model.train(train, val, test)

--- Data Preparation ---
--- Go for Training ---
Training at Epoch 1 iteration 0 with loss 38.221844
Validation at Epoch 1 , MSE: 50.9259852066021 , Pearson Correlation: -0.05459238867502844 with p-value: 0.8594043562064593 , Concordance Index: 0.5972222222222222
Training at Epoch 2 iteration 0 with loss 31.983759
Validation at Epoch 2 , MSE: 41.88699634954803 , Pearson Correlation: -0.08134212875903593 with p-value: 0.7916500624177705 , Concordance Index: 0.5694444444444444
Training at Epoch 3 iteration 0 with loss 24.459225
Validation at Epoch 3 , MSE: 28.805502444172852 , Pearson Correlation: -0.09739682240028597 with p-value: 0.7515938429555372 , Concordance Index: 0.5694444444444444
--- Go for Testing ---
Testing MSE: 12.338111311785482 , Pearson Correlation: -0.3195360601276601 with p-value: 0.1042240277234158 , Concordance Index: 0.4230769230769231
--- Training Finished ---


In [6]:
test = df_Kd.sample(n = 20, replace=False)
target = test['Target Sequence'].iloc[0]
X_repurpose = test.SMILES.values
drug_name = test.PubChem_ID.astype(int).astype(str).values
target_name = test.UniProt_ID.iloc[0]

In [7]:
r = models.repurpose(X_repurpose, target, model, drug_name, target_name)

repurposing...
predicting...
Drug Repurposing Result for P31751
57399640   predicted to have binding affinity score 2.26
153999     predicted to have binding affinity score 2.30
91448975   predicted to have binding affinity score 2.05
91898352   predicted to have binding affinity score 2.43
11364421   predicted to have binding affinity score 2.40
138805831  predicted to have binding affinity score 2.41
11338033   predicted to have binding affinity score 1.94
44588220   predicted to have binding affinity score 1.66
4521392    predicted to have binding affinity score 2.02
216239     predicted to have binding affinity score 1.89
9829523    predicted to have binding affinity score 2.62
11667893   predicted to have binding affinity score 1.97
5291       predicted to have binding affinity score 1.99
118735636  predicted to have binding affinity score 2.33
58267825   predicted to have binding affinity score 2.39
6918454    predicted to have binding affinity score 1.98
117927247  predicted to 

In [8]:
target = test['Target Sequence'].values
target_name = test.UniProt_ID.astype(str).values

In [9]:
r = models.virtual_screening(X_repurpose, target, model, drug_name, target_name)

repurposing...
predicting...
Virtual Screening Result
57399640   and target P31751  predicted to have binding affinity score 2.26
153999     and target nan     predicted to have binding affinity score 3.16
91448975   and target nan     predicted to have binding affinity score 2.91
91898352   and target P10721  predicted to have binding affinity score 3.70
11364421   and target P49137  predicted to have binding affinity score 2.27
138805831  and target P51449  predicted to have binding affinity score 2.50
11338033   and target Q9BQI3  predicted to have binding affinity score 2.26
44588220   and target Q9HBH9  predicted to have binding affinity score 1.65
4521392    and target P29376  predicted to have binding affinity score 2.98
216239     and target P00533  predicted to have binding affinity score 3.66
9829523    and target P57058  predicted to have binding affinity score 3.27
11667893   and target Q13131  predicted to have binding affinity score 2.17
5291       and target Q9HC98  pred