### Importing Libraries

In [None]:
import sys,os
import random
import numpy as np
import json
from collections import OrderedDict
from gnn import GNNNet
from utils import *
from emetrics import *
from data import create_dataset_for_train,create_dataset_for_test,create_dataset_for_test_bootstrap
import torch
import torch.nn as nn
from torch_geometric.data import DataLoader
import rdkit as rd
from torch_sparse import SparseTensor,transpose
import deepchem
import tensorflow as tf
import pandas as pd
import pickle
import statistics

### Loading the dataset- Davis [0] or KIBA [1]

In [None]:
datasets = ['davis', 'kiba'][0]
datasets

### Select the ligand encoding method and contact map method for protein encoding

In [None]:
#protein contact map technique
method=['pconsc4', 'esm_cmaps', 'alpha_fold_cmaps','rand_cmaps'][0]
method

In [None]:
#Ligand encoding method
method1=['original','point_random', "random_node",'random_sample'][0]
method1

### Metrics

In [None]:
def predicting(model, device, loader):
    model.eval()
    total_preds = torch.Tensor()
    total_labels = torch.Tensor()
    print('Make prediction for {} samples...'.format(len(loader.dataset)))
    with torch.no_grad():
        for data in loader:
            data_mol = data[0].to(device)
            data_pro = data[1].to(device)
            # data = data.to(device)
            output = model(data_mol, data_pro)
            total_preds = torch.cat((total_preds, output.cpu()), 0)
            total_labels = torch.cat((total_labels, data_mol.y.view(-1, 1).cpu()), 0)
    return total_labels.numpy().flatten(), total_preds.numpy().flatten()


def load_model(model_path):
    model = torch.load(model_path)
    return model


def calculate_metrics(Y, P, dataset,result_file_name):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P)  # DeepDTA
    cindex2 = get_ci(Y, P)  # GraphDTA
    rm2 = get_rm2(Y, P)  # DeepDTA
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    print('metrics for ', dataset)
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)

    result_file_name = result_file_name
    result_str = ''
    result_str += dataset + '\r\n'
    result_str += 'rmse:' + str(rmse) + ' ' + ' mse:' + str(mse) + ' ' + ' pearson:' + str(
        pearson) + ' ' + 'spearman:' + str(spearman) + ' ' + 'ci:' + str(cindex) + ' ' + 'rm2:' + str(rm2)
    print(result_str)
    open(result_file_name, 'w').writelines(result_str)


def plot_density(Y, P, fold=0, dataset='davis'):
    plt.figure(figsize=(10, 5))
    plt.grid(linestyle='--')
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.scatter(P, Y, color='blue', s=40)
    plt.title('density of ' + dataset, fontsize=30, fontweight='bold')
    plt.xlabel('predicted', fontsize=30, fontweight='bold')
    plt.ylabel('measured', fontsize=30, fontweight='bold')
    # plt.xlim(0, 21)
    # plt.ylim(0, 21)
    if dataset == 'davis':
        plt.plot([5, 11], [5, 11], color='black')
    else:
        plt.plot([6, 16], [6, 16], color='black')
    # plt.legend()
    plt.legend(loc=0, numpoints=1)
    leg = plt.gca().get_legend()
    ltext = leg.get_texts()
    plt.setp(ltext, fontsize=12, fontweight='bold')
    #plt.savefig(os.path.join('results', dataset + '_pconcs4_188_2000_' + str(fold) + '.png'), dpi=500, bbox_inches='tight')

def calculate_metrics1(Y, P, dataset):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P)  # DeepDTA
    cindex2 = get_ci(Y, P)  # GraphDTA
    rm2 = get_rm2(Y, P)  # DeepDTA
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    print('metrics for ', dataset)
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)
    return cindex,pearson,rmse,mse,rm2,spearman


def calculate_metrics2(Y, P):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P)  # DeepDTA
    cindex2 = get_ci(Y, P)  # GraphDTA
    rm2 = get_rm2(Y, P)  # DeepDTA
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)
    return cindex,pearson,rmse,mse,rm2,spearman

### Loading the trained model

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)
TEST_BATCH_SIZE = 128

#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_node_kiba_random_node_0.model'
result_file_name = 'results6/result_kiba_random_node_0_'+ '.txt'
model = GNNNet()
model.to(device)
model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

#predictions using the trained model
Y, P = predicting(model, device, test_loader)
calculate_metrics(Y, P, str(datasets),result_file_name)

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)


#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_davis_2.model'
result_file_name = 'results33/result_davis_pconcs4_random_2_'+ '.txt'
model = GNNNet()
model.to(device)
model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

#predictions using the trained model
Y1, P1 = predicting(model, device, test_loader)
calculate_metrics(Y1, P1, str(datasets),result_file_name)

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)


#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_davis_4.model'
result_file_name = 'results33/result_davis_pconcs4_random_4_'+ '.txt'
model = GNNNet()
model.to(device)
model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

#predictions using the trained model
Y2, P2 = predicting(model, device, test_loader)
calculate_metrics(Y2, P2, str(datasets),result_file_name)

In [None]:

res = [np.mean(k) for k in zip(P,P1,P2)]
res1 = [np.std(k) for k in zip(P,P1,P2)]

In [None]:
d = {'exp': Y, 'model1': P,'model2': P1,'model3': P2,'mean': res,'std': res1}
df = pd.DataFrame(data=d)
df

In [None]:
df.to_csv('results_figures/pconcs4_random_davis.csv', index=False)

### Bootstrapping the model predictions

In [None]:
df=pd.read_csv('results_figures/pconsc4_original_davis.csv')

In [None]:
df

In [None]:
#bootstrapping the test data 
rmse1=[]
pearson1=[]
ci1=[]
mse1=[]
rm1=[]
spearman1=[]
niters=40

for i in range(niters):
    rmse1a=[]
    pearson1a=[]
    ci1a=[]
    mse1a=[]
    rm1a=[]
    spearman1a=[]
    df2 = df.sample(n=1000)
    Y=np.array(df2['exp'])
    P1=np.array(df2['model1'])
    P2=np.array(df2['model2'])
    P3=np.array(df2['model3'])
    
    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P1)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)

    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P2)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)
    
    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P3)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)

    
    ci1.append(np.mean(ci1a))
    pearson1.append(np.mean(pearson1a))
    rmse1.append(np.mean(rmse1a))
    mse1.append(np.mean(mse1a))
    rm1.append(np.mean(rm1a))
    spearman1.append(np.mean(spearman1a))

In [None]:
len(pearson1)

In [None]:
cmaps=['Original Ligand Graph']*40
cmaps

In [None]:
d = {'CI': ci1, 'Pearson': pearson1,'RMSE': rmse1,'Spearman': spearman1,'cmap':cmaps}
df_res = pd.DataFrame(data=d)
df_res

In [None]:
df_res.to_csv('results_figures/pconcs4_original_ligand_davis_data.csv', index=False)