In [None]:
import os
import argparse
import time
from datetime import datetime, date
import random

import numpy as np
from scipy.sparse import load_npz
from scipy.stats import pearsonr
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
import pandas as pd

import torch
import torch_geometric
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
from scipy.stats import linregress

from model_classes_ import GCN_classification, GCN_regression, MLP_Classification, MLP_Regression, CNN
from custom_funcs import experiment_n

In [None]:
torch.cuda.empty_cache()

In [None]:
# Check for GPU
print(torch.cuda.is_available())  # True/False
print(torch.cuda.device_count())  # Number of GPUs available
print(torch.cuda.current_device())  # Current GPU ID (e.g., `0`)
print(torch.cuda.get_device_name(0))  # GPU name

In [None]:
# Hyperparameters
chip_res = 10000
hic_res = 10000
num_hm = 6
num_feat = int((hic_res/chip_res)*num_hm)
regression_flag = 0
max_epoch = 50
learning_rate = 0.001
num_lin_layers = 2
lin_hidden_size = 100
num_graph_conv_layers = 2
graph_conv_embed_size = 256
num_runs = 10
graph_conv_layer_sizes = [num_feat] + \
        [int(max(graph_conv_embed_size, lin_hidden_size)) \
              for i in np.arange(1, num_graph_conv_layers, 1)] + [lin_hidden_size]

lin_hidden_sizes_r = [graph_conv_layer_sizes[-1]] + \
        [int(max(lin_hidden_size, 1)) \
              for i in np.arange(1, num_lin_layers, 1)] + [1]
lin_hidden_sizes_c = [graph_conv_layer_sizes[-1]] + \
        [int(max(lin_hidden_size, 2)) \
              for i in np.arange(1, num_lin_layers, 1)] + [2]

In [None]:
# Stores AUROC and PCC across all models for each cell line
E116 = {'name': 'E116', 'AUROC': None, 'PCC': None}
E122 = {'name': 'E122', 'AUROC': None, 'PCC': None}
E123 = {'name': 'E123', 'AUROC': None, 'PCC': None}

In [None]:
device='cuda'
for cell_line in [E116, E122, E123]:
        print(f"Cell line: {cell_line['name']}")
        experiment_n(
            cell_line, 
            num_runs, 
            GCN_classification(num_feat, num_graph_conv_layers, graph_conv_layer_sizes, num_lin_layers, lin_hidden_sizes_c, 2),
            GCN_regression(num_feat, num_graph_conv_layers, graph_conv_layer_sizes, num_lin_layers, lin_hidden_sizes_r, 1),
            'GCN', 
            max_epoch=max_epoch,
            device=device
        )

In [None]:
print(f"E116:{E116}")
print(f"E122:{E122}")
print(f"E123:{E123}")

In [None]:
with open('results/GCMERGE.csv', 'w') as f:
    f.write('cell_line,auroc_mu,auroc_std,pcc_mu,pcc_std\n')
    f.write(f"E116,{E116['AUROC'][0]},{E116['AUROC'][1]},{E116['PCC'][0]},{E116['PCC'][1]}\n")
    f.write(f"E122,{E122['AUROC'][0]},{E122['AUROC'][1]},{E122['PCC'][0]},{E122['PCC'][1]}\n")
    f.write(f"E123,{E123['AUROC'][0]},{E123['AUROC'][1]},{E123['PCC'][0]},{E123['PCC'][1]}\n")

f.close()

In [None]:
color = '#b3998b'

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(20, 5))  # 1 row, 3 columns

# Scatter plot for subplot 1
slope, intercept, _, _, _ = linregress(E116['PCC_test'], E116['PCC_pred'])
x_vals = np.array([min(E116['PCC_test']), max(E116['PCC_test'])])
y_vals = slope * x_vals + intercept

axes[0].scatter(E116['PCC_test'], E116['PCC_pred'], s=20, color=color, alpha=0.8)
axes[0].plot(x_vals, y_vals, color="black", label=f"PCC={E116['PCC']}")
axes[0].set_title("True vs. Predicted Labels For Cell Line E116")
axes[0].set_xlabel("Observed $Log_{10}$ Expression")
axes[0].set_ylabel("Predicted $Log_{10}$ Expression")
axes[0].legend()

slope, intercept, _, _, _ = linregress(E123['PCC_test'], E123['PCC_pred'])
x_vals = np.array([min(E123['PCC_test']), max(E123['PCC_test'])])
y_vals = slope * x_vals + intercept

axes[1].scatter(E123['PCC_test'], E123['PCC_pred'], s=20, color=color, alpha=0.8)
axes[1].plot(x_vals, y_vals, color="black", label=f"PCC={E123['PCC']}")
axes[1].set_title("True vs. Predicted Labels For Cell Line E123")
axes[1].set_xlabel("Obeserved $Log_{10}$ Expression")
axes[1].set_ylabel("Predicted $Log_{10}$ Expression")
axes[1].legend()


slope, intercept, _, _, _ = linregress(E122['PCC_test'], E122['PCC_pred'])
x_vals = np.array([min(E122['PCC_test']), max(E122['PCC_test'])])
y_vals = slope * x_vals + intercept

axes[2].scatter(E122['PCC_test'], E122['PCC_pred'], s=20, color=color, alpha=0.8)
axes[2].plot(x_vals, y_vals, color="black", label=f"PCC={E123['PCC']}")
axes[2].set_title("True vs. Predicted Labels For Cell Line E122")
axes[2].set_xlabel("Obeserved $Log_{10}$ Expression")
axes[2].set_ylabel("Predicted $Log_{10}$ Expression")
axes[2].legend()

plt.savefig('results/pcc_scatter.png')