In [1]:
import os
os.chdir('../../')

import DeepPurpose.models as models
from DeepPurpose.utils import *
from DeepPurpose.dataset import *



In [2]:
X_drug, X_target, y = load_process_DAVIS('./data/', binary=True)

drug_encoding = 'MPNN'
target_encoding = 'CNN'
train, val, test = data_process(X_drug, X_target, y, 
                                drug_encoding, target_encoding, 
                                split_method='random',frac=[0.7,0.1,0.2])

Beginning Processing...
Beginning to extract zip file...
Default binary threshold for the binding affinity scores are 30, you can adjust it by using the "threshold" parameter
Done!
in total: 30056 drug-target pairs
encoding drug...
unique drugs: 68
drug encoding finished...
encoding protein...
unique target sequence: 379
protein encoding finished...
splitting dataset...
Done.


In [3]:
# use the parameters setting provided in the paper: https://arxiv.org/abs/1801.10193
config = generate_config(drug_encoding = drug_encoding, 
                         target_encoding = target_encoding, 
                         cls_hidden_dims = [1024,1024,512], 
                         train_epoch = 20, 
                         LR = 0.01, 
                         batch_size = 256,
                         cnn_drug_filters = [32,64,96],
                         cnn_target_filters = [32,64,96],
                         cnn_drug_kernels = [4,6,8],
                         cnn_target_kernels = [4,8,12]
                        )

In [None]:
model = models.model_initialize(**config)
model.train(train, val, test)

Let's use 1 GPU/s!
--- Data Preparation ---
--- Go for Training ---
Training at Epoch 1 iteration 0 with loss 0.69403857
Validation at Epoch 1 , AUROC: 0.7295373027259684 , AUPRC: 0.1211539030297959 , F1: 0.0
Training at Epoch 2 iteration 0 with loss 0.1092004
Validation at Epoch 2 , AUROC: 0.7431633019061282 , AUPRC: 0.13697487499157296 , F1: 0.0
Training at Epoch 3 iteration 0 with loss 0.20741162
Validation at Epoch 3 , AUROC: 0.7770431953269112 , AUPRC: 0.24575867402189622 , F1: 0.0
Training at Epoch 4 iteration 0 with loss 0.16183546
Validation at Epoch 4 , AUROC: 0.8262412891986062 , AUPRC: 0.25202403732226397 , F1: 0.0
Training at Epoch 5 iteration 0 with loss 0.15290245
Validation at Epoch 5 , AUROC: 0.8332598893215822 , AUPRC: 0.2812255843140823 , F1: 0.0
Training at Epoch 6 iteration 0 with loss 0.15251681
Validation at Epoch 6 , AUROC: 0.830226480836237 , AUPRC: 0.3242608751843567 , F1: 0.0
Training at Epoch 7 iteration 0 with loss 0.115404144
Validation at Epoch 7 , AUROC: 

In [5]:
X_repurpose, drug_name, drug_cid = load_broad_repurposing_hub('./data/')
target, target_name = load_SARS_CoV_Protease_3CL()

_ = models.repurpose(X_repurpose, target, 
                     model, drug_cid, target_name, 
                     convert_y = True)

repurposing...
in total: 6111 drug-target pairs
encoding drug...
unique drugs: 6111
drug encoding finished...
encoding protein...
unique target sequence: 1
protein encoding finished...
splitting dataset...
Done.
predicting...
---------------
Drug Repurposing Result for SARS-CoV 3CL Protease
+------+-------------+-----------------------+---------------+
| Rank |  Drug Name  |      Target Name      | Binding Score |
+------+-------------+-----------------------+---------------+
|  1   |  4214594.0  | SARS-CoV 3CL Protease |    1397.35    |
|  2   |  2847479.0  | SARS-CoV 3CL Protease |    1572.58    |
|  3   |  1016023.0  | SARS-CoV 3CL Protease |    1807.20    |
|  4   |  16049404.0 | SARS-CoV 3CL Protease |    1826.43    |
|  5   |    1795.0   | SARS-CoV 3CL Protease |    1873.54    |
|  6   |  3896043.0  | SARS-CoV 3CL Protease |    2088.04    |
|  7   |    4078.0   | SARS-CoV 3CL Protease |    2316.57    |
|  8   |    2784.0   | SARS-CoV 3CL Protease |    2409.95    |
|  9   |  10052

| 4125 |  9820766.0  | SARS-CoV 3CL Protease |    10356.85   |
| 4126 |  91691129.0 | SARS-CoV 3CL Protease |    10357.47   |
| 4127 |    5561.0   | SARS-CoV 3CL Protease |    10358.33   |
| 4128 |  45138674.0 | SARS-CoV 3CL Protease |    10359.26   |
| 4129 |  3008319.0  | SARS-CoV 3CL Protease |    10359.37   |
| 4130 |  1560402.0  | SARS-CoV 3CL Protease |    10360.22   |
| 4131 |    4045.0   | SARS-CoV 3CL Protease |    10360.32   |
| 4132 |   188914.0  | SARS-CoV 3CL Protease |    10360.34   |
| 4133 |    5455.0   | SARS-CoV 3CL Protease |    10360.44   |
| 4134 |  9996409.0  | SARS-CoV 3CL Protease |    10361.07   |
| 4135 |  25265312.0 | SARS-CoV 3CL Protease |    10362.14   |
| 4136 |  3038495.0  | SARS-CoV 3CL Protease |    10366.01   |
| 4137 |    2051.0   | SARS-CoV 3CL Protease |    10366.35   |
| 4138 |  9869779.0  | SARS-CoV 3CL Protease |    10366.43   |
| 4139 |   14296.0   | SARS-CoV 3CL Protease |    10366.50   |
| 4140 |   71345.0   | SARS-CoV 3CL Protease |    10366

In [12]:
_ = models.virtual_screening(X_repurpose, np.random.choice(X_target, len(X_repurpose)), 
                     model, drug_cid, ['Protein '+ str(i) for i in range(len(X_repurpose))], 
                     convert_y = True)

virtual screening...
in total: 6111 drug-target pairs
encoding drug...
unique drugs: 6111
drug encoding finished...
encoding protein...
unique target sequence: 379
protein encoding finished...
splitting dataset...
Done.
predicting...
---------------
Virtual Screening Result
+------+-------------+--------------+---------------+
| Rank |  Drug Name  | Target Name  | Binding Score |
+------+-------------+--------------+---------------+
|  1   |  56962369.0 | Protein 2317 |    19453.24   |
|  2   |  9872609.0  | Protein 1326 |    18697.86   |
|  3   |  73555129.0 | Protein 1322 |    18547.74   |
|  4   |    4911.0   | Protein 5887 |    18514.19   |
|  5   |  5282226.0  | Protein 2729 |    18443.14   |
|  6   |  71656212.0 | Protein 3151 |    18248.02   |
|  7   |     UNK     | Protein 1179 |    18084.21   |
|  8   |    3278.0   | Protein 2067 |    18082.00   |
|  9   |  6914666.0  | Protein 1046 |    18011.60   |
|  10  |  5282138.0  | Protein 2961 |    17695.71   |
|  11  |   10660.0   | 

| 3552 |   65666.0   | Protein 1081 |    8394.50    |
| 3553 |   446916.0  | Protein 1645 |    8393.97    |
| 3554 |  53348216.0 | Protein 1161 |    8389.47    |
| 3555 |  10296561.0 | Protein 3830 |    8387.78    |
| 3556 |  72199292.0 | Protein 3956 |    8384.57    |
| 3557 |    4602.0   | Protein 5713 |    8383.18    |
| 3558 |    6256.0   | Protein 1515 |    8378.98    |
| 3559 |   425883.0  | Protein 3987 |    8375.12    |
| 3560 |   160339.0  | Protein 3082 |    8374.26    |
| 3561 |    4906.0   | Protein 4344 |    8373.51    |
| 3562 | 121513876.0 | Protein 678  |    8372.01    |
| 3563 |   13387.0   | Protein 5681 |    8371.42    |
| 3564 |    3932.0   | Protein 3362 |    8371.15    |
| 3565 |  5311192.0  | Protein 4094 |    8367.76    |
| 3566 |   222284.0  | Protein 5175 |    8366.47    |
| 3567 |   643684.0  | Protein 5553 |    8365.71    |
| 3568 |  1117196.0  | Protein 2912 |    8365.08    |
| 3569 |  56654642.0 | Protein 4092 |    8364.92    |
| 3570 |   39562.0   | Prote