### Importing Libraries

In [None]:
import sys,os
import random
import numpy as np
import json
from collections import OrderedDict
from utils import *
from emetrics import *
from data import create_dataset_for_train,create_dataset_for_test,create_dataset_for_test_bootstrap
import torch
import torch.nn as nn
from torch_geometric.data import DataLoader
import rdkit as rd
from torch_sparse import SparseTensor,transpose
import deepchem
import tensorflow as tf
import pandas as pd
import pickle
from dnn import GNNNet,GNNNet_prod,GNNNet_prod_conc

### Loading the dataset- Davis [0] or KIBA [1]

In [None]:
datasets = [['davis', 'kiba'][0]]
datasets

### Select the ligand encoding method and contact map method for protein encoding

In [None]:
#protein contact map technique
method=['pconsc4', 'esm_cmaps', 'alpha_fold_cmaps','rand_cmaps'][0]
method

In [None]:
#Ligand encoding method
method1=['original','point_random', "random_node",'random_sample'][0]
method1

### Select the method to combine the encodings

In [None]:
comb=['conc','prod','conc+prod'][0]

if comb=='conc':
    model = GNNNet()
elif comb=='prod':
    model = GNNNet_prod()
elif comb=='conc+prod':
    model = GNNNet_prod_conc()

### Test functions

In [None]:
def predicting(model, device, loader):
    model.eval()
    total_preds = torch.Tensor()
    total_labels = torch.Tensor()
    print('Make prediction for {} samples...'.format(len(loader.dataset)))
    with torch.no_grad():
        for data in loader:
            data_mol = data[0].to(device)
            data_pro = data[1].to(device)
            # data = data.to(device)
            output = model(data_mol, data_pro)
            total_preds = torch.cat((total_preds, output.cpu()), 0)
            total_labels = torch.cat((total_labels, data_mol.y.view(-1, 1).cpu()), 0)
    return total_labels.numpy().flatten(), total_preds.numpy().flatten()


def load_model(model_path):
    model = torch.load(model_path)
    return model


def calculate_metrics(Y, P, dataset,result_file_name):
    # aupr = get_aupr(Y, P)
    cindex = get_cindex(Y, P) 
    cindex2 = get_ci(Y, P) 
    rm2 = get_rm2(Y, P)  
    mse = get_mse(Y, P)
    pearson = get_pearson(Y, P)
    spearman = get_spearman(Y, P)
    rmse = get_rmse(Y, P)

    print('metrics for ', dataset)
    # print('aupr:', aupr)
    print('cindex:', cindex)
    print('cindex2', cindex2)
    print('rm2:', rm2)
    print('mse:', mse)
    print('pearson', pearson)

    result_file_name = result_file_name
    result_str = ''
    result_str += dataset + '\r\n'
    result_str += 'rmse:' + str(rmse) + ' ' + ' mse:' + str(mse) + ' ' + ' pearson:' + str(
        pearson) + ' ' + 'spearman:' + str(spearman) + ' ' + 'ci:' + str(cindex) + ' ' + 'rm2:' + str(rm2)
    print(result_str)
    open(result_file_name, 'w').writelines(result_str)



### Hyperparameter setting

In [None]:
TEST_BATCH_SIZE = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#If CUDA is available, else change to cpu
cuda_name = ['cuda:0', 'cuda:1', 'cuda:2', 'cuda:3'][0]
device = torch.device(cuda_name)
model.to(device)

In [None]:
#Loading the test data
test_data = create_dataset_for_test(str(datasets),method,method1)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=False,
                                              collate_fn=collate)

### Model 1

In [None]:
#Loading the path to the trained model and setting the results path
model_file_name = 'models/model_pconsc4_GNNNet_random_node_kiba_random_node_0.model'
result_file_name = 'results/result_pconsc4_random_node_kiba_0'+ '.txt'

model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#predictions using the trained model
Y, P = predicting(model, device, test_loader)
calculate_metrics(Y, P, str(datasets),result_file_name)

### Model 2

In [None]:
#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_node_kiba_random_node_2.model'
result_file_name = 'results/result_pconsc4_random_node_kiba_0'+ '.txt'

model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#predictions using the trained model
Y1, P1 = predicting(model, device, test_loader)
calculate_metrics(Y1, P1, str(datasets),result_file_name)

### Model 3

In [None]:
#Loading the path to the trained model and setting the results path
model_file_name = 'models_sample/model_pconsc4_GNNNet_random_node_kiba_random_node_3.model'
result_file_name = 'results/result_pconsc4_random_node_kiba_0'+ '.txt'

model.load_state_dict(torch.load(model_file_name, map_location=cuda_name))

#predictions using the trained model
Y2, P2 = predicting(model, device, test_loader)
calculate_metrics(Y2, P2, str(datasets),result_file_name)

### Download model predictions

In [None]:
d = {'exp': Y, 'model1': P,'model2': P1,'model3': P2,'mean': res,'std': res1}
df = pd.DataFrame(data=d)
df

In [None]:
df.to_csv('Figures/kiba_figures_data/ligand_kiba_random_node.csv', index=False)

### Bootstrapping the model predictions

In [None]:
# Load model predictions file
df=pd.read_csv('Figures/davis_figures_data/ligand_kiba_random_node.csv')

In [None]:
#bootstrapping the test data 
rmse1=[]
pearson1=[]
ci1=[]
mse1=[]
rm1=[]
spearman1=[]
niters=40

for i in range(niters):
    rmse1a=[]
    pearson1a=[]
    ci1a=[]
    mse1a=[]
    rm1a=[]
    spearman1a=[]
    df2 = df.sample(n=1000)
    Y=np.array(df2['exp'])
    P1=np.array(df2['model1'])
    P2=np.array(df2['model2'])
    P3=np.array(df2['model3'])
    
    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P1)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)

    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P2)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)
    
    cindex,pearson,rmse,mse,rm2,spearman=calculate_metrics2(Y, P3)
    ci1a.append(cindex)
    pearson1a.append(pearson)
    rmse1a.append(rmse)
    mse1a.append(mse)
    rm1a.append(rm2)
    spearman1a.append(spearman)

    
    ci1.append(np.mean(ci1a))
    pearson1.append(np.mean(pearson1a))
    rmse1.append(np.mean(rmse1a))
    mse1.append(np.mean(mse1a))
    rm1.append(np.mean(rm1a))
    spearman1.append(np.mean(spearman1a))

In [None]:
encoding=['protein/ligand encoding type']*40

### Save the results file

In [None]:
d = {'CI': ci1, 'Pearson': pearson1,'RMSE': rmse1,'Spearman': spearman1,'encoding':encoding}
df_res = pd.DataFrame(data=d)
df_res

In [None]:
df_res.to_csv('Figures/davis_figures_data/ligand_kiba_random_node_data.csv', index=False)