### Importing Libraries

In [1]:
import sys,os
import random
import numpy as np
import json
from collections import OrderedDict
from gnn import GNNNet
from utils import *
from emetrics import *
from data_pro import create_dataset_for_train,create_dataset_for_test,create_dataset_for_test_bootstrap
import torch
import torch.nn as nn
from torch_geometric.data import DataLoader
import rdkit as rd
from torch_sparse import SparseTensor,transpose
import deepchem
import tensorflow as tf
import pandas as pd
import pickle
import statistics

  if pair is not 0:
[06:58:07] Enabling RDKit 2019.09.3 jupyter extensions


### Loading the dataset- Davis [0] or KIBA [1]

In [2]:
datasets = ['davis', 'kiba'][0]
datasets

'davis'

### Select the ligand encoding method and contact map method for protein encoding

In [3]:
#protein contact map technique
method=['pconsc4', 'esm_cmaps', 'alpha_fold_cmaps','rand_cmaps'][0]
method

'pconsc4'

In [4]:
#Ligand encoding method
method1=['original', 'random'][1]
method1

'random'

### Metrics

In [5]:
def predicting(model, device, loader):
    model.eval()
    total_preds = torch.Tensor()
    total_labels = torch.Tensor()
    print('Make prediction for {} samples...'.format(len(loader.dataset)))
    with torch.no_grad():
        for data in loader:
            data_mol = data[0].to(device)
            data_pro = data[1].to(device)
            # data = data.to(device)
            output = model(data_mol, data_pro)
            total_preds = torch.cat((total_preds, output.cpu()), 0)
            total_labels = torch.cat((total_labels, data_mol.y.view(-1, 1).cpu()), 0)
    return total_labels.numpy().flatten(), total_preds.numpy().flatten()


def load_model(model_path):
    model = torch.load(model_path)
    return model


def calculate_metrics(Y, P, dataset,result_file_name):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P)  # DeepDTA
    cindex2 = get_ci(Y, P)  # GraphDTA
    rm2 = get_rm2(Y, P)  # DeepDTA
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    print('metrics for ', dataset)
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)

    result_file_name = result_file_name
    result_str = ''
    result_str += dataset + '\r\n'
    result_str += 'rmse:' + str(rmse) + ' ' + ' mse:' + str(mse) + ' ' + ' pearson:' + str(
        pearson) + ' ' + 'spearman:' + str(spearman) + ' ' + 'ci:' + str(cindex) + ' ' + 'rm2:' + str(rm2)
    print(result_str)
    open(result_file_name, 'w').writelines(result_str)


def plot_density(Y, P, fold=0, dataset='davis'):
    plt.figure(figsize=(10, 5))
    plt.grid(linestyle='--')
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.scatter(P, Y, color='blue', s=40)
    plt.title('density of ' + dataset, fontsize=30, fontweight='bold')
    plt.xlabel('predicted', fontsize=30, fontweight='bold')
    plt.ylabel('measured', fontsize=30, fontweight='bold')
    # plt.xlim(0, 21)
    # plt.ylim(0, 21)
    if dataset == 'davis':
        plt.plot([5, 11], [5, 11], color='black')
    else:
        plt.plot([6, 16], [6, 16], color='black')
    # plt.legend()
    plt.legend(loc=0, numpoints=1)
    leg = plt.gca().get_legend()
    ltext = leg.get_texts()
    plt.setp(ltext, fontsize=12, fontweight='bold')
    #plt.savefig(os.path.join('results', dataset + '_pconcs4_188_2000_' + str(fold) + '.png'), dpi=500, bbox_inches='tight')

def calculate_metrics1(Y, P, dataset):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P)  # DeepDTA
    cindex2 = get_ci(Y, P)  # GraphDTA
    rm2 = get_rm2(Y, P)  # DeepDTA
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    print('metrics for ', dataset)
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)
    return cindex,pearson,rmse,mse,rm2,spearman


def calculate_metrics2(Y, P):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P)  # DeepDTA
    cindex2 = get_ci(Y, P)  # GraphDTA
    rm2 = get_rm2(Y, P)  # DeepDTA
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)
    return cindex,pearson,rmse,mse,rm2,spearman

### Loading the trained model

In [6]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)
TEST_BATCH_SIZE = 128

#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_davis_0.model'
result_file_name = 'results33/result_davis_pconcs4_random_0_'+ '.txt'
model = GNNNet()
model.to(device)
model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

#predictions using the trained model
Y, P = predicting(model, device, test_loader)
calculate_metrics(Y, P, str(datasets),result_file_name)

GNNNet Loaded
dataset: davis
test entries: 3774 effective test entries 3774
effective drugs,effective prot: 68 333


  GCNData_mol = DATA.Data(x=torch.Tensor(features),


Make prediction for 3774 samples...
metrics for  davis
cindex: 0.6052745818528371
cindex2 0.6003513542809228
rm2: 0.03742932585960611
mse: 0.94970834
pearson 0.2303920290928978
davis
rmse:0.9745298058818853  mse:0.94970834  pearson:0.2303920290928978 spearman:0.18639682387789183 ci:0.6052745818528371 rm2:0.03742932585960611


In [7]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)


#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_davis_2.model'
result_file_name = 'results33/result_davis_pconcs4_random_2_'+ '.txt'
model = GNNNet()
model.to(device)
model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

#predictions using the trained model
Y1, P1 = predicting(model, device, test_loader)
calculate_metrics(Y1, P1, str(datasets),result_file_name)

GNNNet Loaded
dataset: davis
test entries: 3774 effective test entries 3774
effective drugs,effective prot: 68 333
Make prediction for 3774 samples...
metrics for  davis
cindex: 0.5343340336382215
cindex2 0.5371832630456284
rm2: 0.03048590052739202
mse: 0.80359644
pearson 0.18398428564326713
davis
rmse:0.8964354059146629  mse:0.80359644  pearson:0.18398428564326713 spearman:0.06903257015591625 ci:0.5343340336382215 rm2:0.03048590052739202


In [8]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)


#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_davis_4.model'
result_file_name = 'results33/result_davis_pconcs4_random_4_'+ '.txt'
model = GNNNet()
model.to(device)
model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

#predictions using the trained model
Y2, P2 = predicting(model, device, test_loader)
calculate_metrics(Y2, P2, str(datasets),result_file_name)

GNNNet Loaded
dataset: davis
test entries: 3774 effective test entries 3774
effective drugs,effective prot: 68 333
Make prediction for 3774 samples...
metrics for  davis
cindex: 0.560750185465669
cindex2 0.5651335073255908
rm2: 0.0038515559187537562
mse: 1.0304008
pearson 0.08302292282245427
davis
rmse:1.0150865741508162  mse:1.0304008  pearson:0.08302292282245427 spearman:0.12095984152936592 ci:0.560750185465669 rm2:0.0038515559187537562


In [9]:

res = [np.mean(k) for k in zip(P,P1,P2)]
res1 = [np.std(k) for k in zip(P,P1,P2)]

In [10]:
d = {'exp': Y, 'model1': P,'model2': P1,'model3': P2,'mean': res,'std': res1}
df = pd.DataFrame(data=d)
df

Unnamed: 0,exp,model1,model2,model3,mean,std
0,5.000000,5.763292,5.002618,4.999321,5.255077,0.359365
1,5.000000,5.202218,5.055537,5.822033,5.359930,0.332199
2,5.000000,5.285347,5.013941,5.045128,5.114805,0.121261
3,5.000000,5.098569,5.064178,5.781350,5.314699,0.330271
4,5.000000,6.935692,5.008236,4.996512,5.646814,0.911388
...,...,...,...,...,...,...
3769,7.309804,9.064180,5.140716,6.302797,6.835898,1.645507
3770,5.000000,5.221035,5.066128,5.052144,5.113102,0.076533
3771,5.000000,5.118824,5.105760,5.581861,5.268815,0.221421
3772,5.000000,5.042781,5.056559,5.132945,5.077428,0.039657


In [11]:
df.to_csv('results_figures/pconcs4_random_davis.csv', index=False)

### Bootstrapping the model predictions

In [19]:
df=pd.read_csv('results_figures/pconsc4_original_davis.csv')

In [20]:
df

Unnamed: 0,exp,model1,model2,model3,mean,std
0,5.000000,5.775783,4.991157,5.139399,5.302113,0.340359
1,5.000000,5.005616,4.996244,4.999129,5.000330,0.003919
2,5.000000,5.009299,5.047746,5.040968,5.032671,0.016756
3,5.000000,5.005346,5.002017,4.999129,5.002164,0.002540
4,5.000000,5.078531,5.033904,4.997272,5.036569,0.033227
...,...,...,...,...,...,...
3769,7.309804,6.598566,6.610683,6.691361,6.633537,0.041186
3770,5.000000,5.177664,5.058593,5.092487,5.109581,0.050091
3771,5.000000,5.204560,4.986555,4.999129,5.063415,0.099937
3772,5.000000,5.005346,4.989818,4.999129,4.998097,0.006381


In [21]:
#bootstrapping the test data 
rmse1=[]
pearson1=[]
ci1=[]
mse1=[]
rm1=[]
spearman1=[]
niters=40

for i in range(niters):
    rmse1a=[]
    pearson1a=[]
    ci1a=[]
    mse1a=[]
    rm1a=[]
    spearman1a=[]
    df2 = df.sample(n=1000)
    Y=np.array(df2['exp'])
    P1=np.array(df2['model1'])
    P2=np.array(df2['model2'])
    P3=np.array(df2['model3'])
    
    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P1)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)

    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P2)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)
    
    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P3)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)

    
    ci1.append(np.mean(ci1a))
    pearson1.append(np.mean(pearson1a))
    rmse1.append(np.mean(rmse1a))
    mse1.append(np.mean(mse1a))
    rm1.append(np.mean(rm1a))
    spearman1.append(np.mean(spearman1a))

cindex: 0.8900185441618281
cindex2 0.8912419995138945
rm2: 0.5935109282989075
mse: 0.2364348560583126
pearson 0.8134508616115749
cindex: 0.8925977212545247
cindex2 0.8994430041318966
rm2: 0.6180046693934841
mse: 0.22503582786054213
pearson 0.8221144947202528
cindex: 0.8759079756091636
cindex2 0.8827189500121526
rm2: 0.5763205150513628
mse: 0.2405835604210342
pearson 0.8122607544918962
cindex: 0.8841825768112277
cindex2 0.8911892002898315
rm2: 0.6628756294858416
mse: 0.23131746510439233
pearson 0.8240802760209318
cindex: 0.8828786825135921
cindex2 0.8862818262483484
rm2: 0.6076586143184092
mse: 0.2677293715830724
pearson 0.7945960740528875
cindex: 0.8615422303704641
cindex2 0.8653794738861055
rm2: 0.6515725652866196
mse: 0.24089901321562823
pearson 0.8180758814365502
cindex: 0.8784379161255322
cindex2 0.8839650902700111
rm2: 0.584947686408132
mse: 0.2592607435833478
pearson 0.7973759863779898
cindex: 0.8699510102089464
cindex2 0.882385365074293
rm2: 0.5747123556634818
mse: 0.26571607791

cindex: 0.8969199942977524
cindex2 0.8884672040186875
rm2: 0.6161814337834582
mse: 0.2589381320330233
pearson 0.811993056957801
cindex: 0.8864975527853896
cindex2 0.8833095724220728
rm2: 0.6703504645494608
mse: 0.21685757685746496
pearson 0.8455628292038253
cindex: 0.8853669315983568
cindex2 0.8928853680918404
rm2: 0.6521143675377576
mse: 0.23943491992716162
pearson 0.8219756012739495
cindex: 0.8804148583931664
cindex2 0.8913403374721426
rm2: 0.635387751669903
mse: 0.2465803156380872
pearson 0.8162701900914962
cindex: 0.8778971620671917
cindex2 0.8869355956327134
rm2: 0.6028774283770855
mse: 0.2612904288668469
pearson 0.8060356807055631
cindex: 0.8849292017237841
cindex2 0.8943790756737328
rm2: 0.6571962338237906
mse: 0.21654156477189346
pearson 0.8350962759168263
cindex: 0.8924892263492715
cindex2 0.889323364102078
rm2: 0.6274009344243228
mse: 0.22661336299010262
pearson 0.8281081001061558
cindex: 0.8802831931048636
cindex2 0.884593508940315
rm2: 0.6617169778135661
mse: 0.210186994696

In [22]:
len(pearson1)

40

In [23]:
cmaps=['Original Ligand Graph']*40
cmaps

['Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original Ligand Graph',
 'Original L

In [24]:
d = {'CI': ci1, 'Pearson': pearson1,'RMSE': rmse1,'Spearman': spearman1,'cmap':cmaps}
df_res = pd.DataFrame(data=d)
df_res

Unnamed: 0,CI,Pearson,RMSE,Spearman,cmap
0,0.886175,0.815942,0.483706,0.690903,Original Ligand Graph
1,0.876201,0.812251,0.496398,0.689107,Original Ligand Graph
2,0.869536,0.792638,0.515574,0.670112,Original Ligand Graph
3,0.88567,0.807412,0.513647,0.695663,Original Ligand Graph
4,0.875035,0.826742,0.477515,0.684948,Original Ligand Graph
5,0.871817,0.809205,0.478801,0.635609,Original Ligand Graph
6,0.894862,0.824839,0.478433,0.69817,Original Ligand Graph
7,0.894174,0.83812,0.457179,0.688136,Original Ligand Graph
8,0.85197,0.788537,0.51514,0.64492,Original Ligand Graph
9,0.878716,0.82039,0.511428,0.671847,Original Ligand Graph


In [25]:
df_res.to_csv('results_figures/pconcs4_original_ligand_davis_data.csv', index=False)