In [None]:
from DeepPurpose import DTI
from DeepPurpose.dataset import *

import pandas as pd
from metrics import r2_rmse, get_total_performance

In [None]:
# Define the path where the KIBA dataset is saved
SAVE_PATH = '../data/KIBA'

In [None]:
# Load and process the KIBA dataset
X_drug, X_target, y = load_process_KIBA(SAVE_PATH, binary=False)

In [None]:
len(X_drug), len(set(X_drug))

In [None]:
len(X_target), len(set(X_target))

In [None]:
# Specify the encoding methods for drugs and targets
drug_encoding, target_encoding = 'Morgan', 'CNN'

In [None]:
# Process the dataset using the specified encoding methods without splitting it
dataset = data_process(X_drug, X_target, y, drug_encoding, target_encoding, split_method='no_split')

In [None]:
# Read and preprocess the affinity data, filling missing values with -1
affinity = pd.read_csv(SAVE_PATH + '/KIBA/affinity.txt', header=None, sep = '\t')
affinity = affinity.fillna(-1)

# Load target sequences and drug SMILES from the files
with open(SAVE_PATH + '/KIBA/target_seq.txt') as f:
    target = json.load(f)

with open(SAVE_PATH + '/KIBA/SMILES.txt') as f:
    drug = json.load(f)

target = list(target)
drug = list(drug)

drug_ids = []
target_ids = []

# Iterate over drug-target pairs, recording identifiers where affinity is present
for i in range(len(drug)):
    for j in range(len(target)):
        if affinity.values[i, j] != -1:
            drug_ids.append(drug[i])
            target_ids.append(target[j])

In [None]:
# Add drug and target ids to the dataset
dataset['drug_id'] = drug_ids
dataset['target_id'] = target_ids

In [None]:
# Load a pre-trained DTI model 
model = DTI.model_pretrained(model='morgan_cnn_kiba')

In [None]:
# Predict the interactions using the model and insert the predictions into the dataset
dataset.insert(3, 'predicted', model.predict(dataset))

In [None]:
# Save the drug-target pairs with actual and predicted labels to a CSV file
dataset[['drug_id', 'target_id', 'Label', 'predicted']].to_csv('../analysis/morgan_cnn_kiba_predictions.csv', index=False)

In [None]:
pred = pd.read_csv('../analysis/morgan_cnn_kiba_predictions.csv')

In [None]:
pred = pred.rename(columns={"Label": "affinity"})

In [None]:
pred

In [None]:
pred_micro = get_total_performance(pred, r2_rmse, 'micro')

In [None]:
pred_micro

In [None]:
pred_macro = get_total_performance(pred, r2_rmse, 'macro')

In [None]:
pred_macro

In [None]:
metrics_df = pd.DataFrame({
    'Metric': ['r2_micro', 'rmse_micro', 'r2_macro', 'rmse_macro'],
    'Value_general': [pred_micro.r2, pred_micro.rmse, pred_macro.r2, pred_macro.rmse]
})

In [None]:
metrics_df

In [None]:
metrics_df.to_csv('../analysis/morgan_cnn_kiba_performance.csv', index=False)
