## ECD-CDGI Reproduction Test
### This notebook demonstrates the reproduction of ECD-CDGI model performance on using pre-trained weights.

#### Library import and settings

In [3]:
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import os
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import average_precision_score, f1_score

from reproduction_utils import ECD_CDGINet

# GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

Using device: cuda


#### Setting up and loading data

In [11]:
# --- Configuration ---
class Args:
    in_channels = 48
    hidden_channels = 100
args = Args()

# Path
dataPath = "../../Data/STRING/"
FOLD_TO_LOAD = 7
MODEL_PATH = f"./Data/ECD_CDGI.pt"

print(">>> Loading Data...")

# 1. Multi-omics Feature Data Load & Scaling
data_x_df = pd.read_csv(dataPath + 'multiomics_features_STRING.tsv', sep='\t', index_col=0)
data_x_df = data_x_df.dropna()

scaler = StandardScaler()
features_scaled = scaler.fit_transform(data_x_df.values)

data_x = torch.tensor(features_scaled, dtype=torch.float32, device=device)
data_x = data_x[:, :48]

print(f"Feature Shape: {data_x.shape}")

# 2. PPI Network (Adjacency Matrix) Load
ppiAdj = torch.load(dataPath + 'STRING_ppi.pkl')
ppiAdj_index = ppiAdj.coalesce().indices().to(device)
print(f"PPI Edge Index Loaded: {ppiAdj_index.shape}")

# 3. Gene Names Load
feature_genename_file = dataPath + 'feature_genename.txt'
geneList = pd.read_csv(feature_genename_file, header=None).iloc[:, 0].tolist()

# 4. Fold Specific Data Load (Masks & Labels)
fold_path = dataPath + f'10fold/fold_{FOLD_TO_LOAD}/'

# Load Test Mask and Label
test_mask = torch.tensor(np.loadtxt(f"{fold_path}/test_mask.txt"), dtype=torch.bool, device=device)
labels = torch.tensor(np.loadtxt(f"{fold_path}/labels.txt"), dtype=torch.float32, device=device)

print(f"labels and masks loaded.")

>>> Loading Data...
Feature Shape: torch.Size([10251, 48])
PPI Edge Index Loaded: torch.Size([2, 501008])
labels and masks loaded.


#### Initialize model and load weights

In [14]:
print(">>> Initializing Model...")

# Create a Model Instance
model = ECD_CDGINet(args).to(device)

# Check and load weight file presence
if os.path.exists(MODEL_PATH):
    try:
        model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
        print(f"Successfully loaded weights from: {MODEL_PATH}")
    except RuntimeError as e:
        print(f"Error loading weights: {e}")
else:
    print(f"Checkpoint file not found at: {MODEL_PATH}")

>>> Initializing Model...
Successfully loaded weights from: ./Data/ECD_CDGI.pt


#### Inference and Performance Evaluation

In [15]:
print(">>> Running Inference...")

model.eval()
with torch.no_grad():
    # Forward Pass
    logits = model(data_x, ppiAdj_index)
    
    # Apply Sigmoid to convert to probability value
    pred = torch.sigmoid(logits[test_mask])
    
    # Extract the answer label
    Yn = labels[test_mask]
    
    # Go to CPU (for Scikit-learn calculation)
    pred_cpu = pred.cpu().numpy().flatten()
    Yn_cpu = Yn.cpu().numpy().flatten()
    
    auc_roc = metrics.roc_auc_score(Yn_cpu, pred_cpu)
    auprc = average_precision_score(Yn_cpu, pred_cpu)
    f1 = f1_score(Yn_cpu, (pred_cpu > 0.5).astype(int))
    
    # Output Results
    print("\n" + "="*40)
    print(f"ECD-CDGI Reproduction Results")
    print("="*40)
    print(f"AUROC    : {auc_roc:.4f}")
    print(f"AUPRC    : {auprc:.4f}")
    print(f"F1-Score : {f1:.4f}")
    print("="*40)

>>> Running Inference...

ECD-CDGI Reproduction Results
AUROC    : 0.8429
AUPRC    : 0.7536
F1-Score : 0.6562
