In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.8.0+cu126


In [None]:
!pip install sympy



In [None]:
!pip install -q torch_geometric
!pip install -q class_resolver
!pip3 install pymatting

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymatting
  Downloading pymatting-1.1.14-py3-none-any.whl.metadata (7.7 kB)
Downloading pymatting-1.1.14-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymatting
Successfully installed pymatting-1.1.14


In [114]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss

In [115]:
# === Load Patients ===
fa_patients_path = "/home/snu/Downloads/NIFD_Patients_FA_Histogram_Feature.npy"
Patients_FA_array = np.load(fa_patients_path, allow_pickle=True)

# === Load Controls ===
fa_controls_path = "/home/snu/Downloads/NIFD_Control_FA_Histogram_Feature.npy"
Controls_FA_array = np.load(fa_controls_path, allow_pickle=True)

print("Patients Shape:", Patients_FA_array.shape)
print("Controls Shape:", Controls_FA_array.shape)

# === Combine features and labels ===
X = np.vstack([Controls_FA_array, Patients_FA_array])
y = np.hstack([
    np.zeros(Controls_FA_array.shape[0], dtype=np.int64),  # 0 = Control
    np.ones(Patients_FA_array.shape[0], dtype=np.int64)    # 1 = Patient
])

# Shuffle
np.random.seed(42)
perm = np.random.permutation(X.shape[0])
X = X[perm]
y = y[perm]

Patients Shape: (98, 180)
Controls Shape: (48, 180)


In [116]:
def create_adj(F, alpha=1):
    F_norm = F / np.linalg.norm(F, axis=1, keepdims=True)
    W = np.dot(F_norm, F_norm.T)
    W = (W >= alpha).astype(np.float32)
    return W

In [117]:
def load_data(adj, node_feats):
    node_feats = torch.from_numpy(node_feats).float()
    edge_index = torch.from_numpy(np.array(np.nonzero(adj))).long()
    return node_feats, edge_index

In [118]:
features = X.astype(np.float32)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

W0 = create_adj(features, alpha=0.5)
# W_asym = asymmetrize_random(W0, seed=42)
node_feats, edge_index = load_data(W0, features)
data = Data(x=node_feats, edge_index=edge_index).to(device)
A = torch.from_numpy(W0).to(device)
print(data)

Data(x=[146, 180], edge_index=[2, 21256])


In [119]:
class GATEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, heads, device, activ):
        super(GATEncoder, self).__init__()
        self.device = device
        self.gat1 = GATConv(input_dim, hidden_dim, heads=heads)
        self.batchnorm = nn.BatchNorm1d(hidden_dim * heads)
        self.dropout = nn.Dropout(0.25)
        self.mlp = nn.Linear(hidden_dim * heads, hidden_dim * heads)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = self.dropout(x)
        x = self.batchnorm(x)
        logits = self.mlp(x)
        return logits

In [120]:
class AvgReadout(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, seq, msk=None):
        if msk is None:
            return torch.mean(seq, 0)
        else:
            msk = torch.unsqueeze(msk, -1)
            return torch.sum(seq * msk, 0) / torch.sum(msk)

In [121]:
class Discriminator(nn.Module):
    def __init__(self, n_h):
        super().__init__()
        self.f_k = nn.Bilinear(n_h, n_h, 1)
        nn.init.xavier_uniform_(self.f_k.weight.data)
        if self.f_k.bias is not None:
            self.f_k.bias.data.fill_(0.0)

    def forward(self, c, h_pl, h_mi):
        c_x = torch.unsqueeze(c, 0).expand_as(h_pl)
        sc_1 = torch.squeeze(self.f_k(h_pl, c_x), 1)
        sc_2 = torch.squeeze(self.f_k(h_mi, c_x), 1)
        logits = torch.cat((sc_1, sc_2), 0)
        return logits

In [122]:
class DGI(nn.Module):
    def __init__(self, n_in, n_h, heads, dropout=0.25):
        super().__init__()
        self.gat1 = GATEncoder(n_in, n_h, heads=heads, device='cuda' if torch.cuda.is_available() else 'cpu', activ=nn.ELU())
        self.read = AvgReadout()
        self.sigm = nn.Sigmoid()
        self.disc = Discriminator(n_h * heads)
        self.heads = heads

    def forward(self, seq1, seq2, edge_index):
        # Create Data objects for the GATEncoder
        data1 = Data(x=seq1, edge_index=edge_index)
        data2 = Data(x=seq2, edge_index=edge_index)

        h_1 = self.gat1(data1)
        c = self.read(h_1)
        c = self.sigm(c)
        h_2 = self.gat1(data2)
        logits = self.disc(c, h_1, h_2)
        return logits, h_1

In [123]:
class DGI_with_classifier(DGI):
    def __init__(self, n_in, n_h, heads, n_classes=2, cut=0, dropout=0.25):
        super().__init__(n_in, n_h, heads, dropout=dropout)
        self.classifier = nn.Linear(n_h * heads, n_classes)
        self.cut = cut

    def get_embeddings(self, node_feats, edge_index):
        _, embeddings = self.forward(node_feats, node_feats, edge_index)
        return embeddings

    def cut_loss(self, A, S):
        S = F.softmax(S, dim=1)
        A_pool = torch.matmul(torch.matmul(A, S).t(), S)
        num = torch.trace(A_pool)
        D = torch.diag(torch.sum(A, dim=-1))
        D_pooled = torch.matmul(torch.matmul(D, S).t(), S)
        den = torch.trace(D_pooled)
        mincut_loss = -(num / den)
        St_S = torch.matmul(S.t(), S)
        I_S = torch.eye(S.shape[1], device=A.device)
        ortho_loss = torch.norm(St_S / torch.norm(St_S) - I_S / torch.norm(I_S))
        return mincut_loss + ortho_loss

    def modularity_loss(self, A, S):
        C = F.softmax(S, dim=1)
        d = torch.sum(A, dim=1)
        m = torch.sum(A)
        B = A - torch.ger(d, d) / (2 * m)
        I_S = torch.eye(C.shape[1], device=A.device)
        k = torch.norm(I_S)
        n = S.shape[0]
        modularity_term = (-1 / (2 * m)) * torch.trace(torch.mm(torch.mm(C.t(), B), C))
        collapse_reg_term = (torch.sqrt(k) / n) * torch.norm(torch.sum(C, dim=0), p='fro') - 1
        return modularity_term + collapse_reg_term

    def Reg_loss(self, A, embeddings):
        logits = self.classifier(embeddings)
        if self.cut == 1:
            return self.cut_loss(A, logits)
        else:
            return self.modularity_loss(A, logits)

In [124]:
hidden_dim = 512
cut = 0
dropout = 0.25
heads = 2
model = DGI_with_classifier(features.shape[1], hidden_dim, heads=heads, n_classes=2, cut=cut, dropout=dropout).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 0.00001)
bce_loss = nn.BCEWithLogitsLoss()

num_epochs = 8000
for epoch in range(num_epochs + 1):
    model.train()
    optimizer.zero_grad()

    perm = torch.randperm(node_feats.size(0))
    corrupt_features = node_feats[perm]

    logits, embeddings = model(node_feats.to(device), corrupt_features.to(device), edge_index.to(device))

    lbl = torch.cat([
        torch.ones(node_feats.size(0)),
        torch.zeros(node_feats.size(0))
    ]).to(device)

    dgi_loss = bce_loss(logits.squeeze(), lbl)
    reg_loss = model.Reg_loss(A, embeddings)
    loss = dgi_loss + 2 * reg_loss

    if epoch % 500 == 0:
        print(f"Epoch {epoch} | DGI Loss: {dgi_loss.item():.4f} | Reg Loss: {reg_loss.item():.4f} | Total: {loss.item():.4f}")

    loss.backward()
    optimizer.step()

Epoch 0 | DGI Loss: 0.7084 | Reg Loss: -0.2841 | Total: 0.1402
Epoch 500 | DGI Loss: 2.1840 | Reg Loss: -0.2841 | Total: 1.6158
Epoch 1000 | DGI Loss: 1.3519 | Reg Loss: -0.2841 | Total: 0.7837
Epoch 1500 | DGI Loss: 1.0992 | Reg Loss: -0.2841 | Total: 0.5310
Epoch 2000 | DGI Loss: 0.8723 | Reg Loss: -0.2841 | Total: 0.3041
Epoch 2500 | DGI Loss: 0.7587 | Reg Loss: -0.2841 | Total: 0.1905
Epoch 3000 | DGI Loss: 0.7241 | Reg Loss: -0.2841 | Total: 0.1559
Epoch 3500 | DGI Loss: 0.7224 | Reg Loss: -0.2841 | Total: 0.1542
Epoch 4000 | DGI Loss: 0.7079 | Reg Loss: -0.2841 | Total: 0.1397
Epoch 4500 | DGI Loss: 0.7025 | Reg Loss: -0.2841 | Total: 0.1343
Epoch 5000 | DGI Loss: 0.6984 | Reg Loss: -0.2841 | Total: 0.1302
Epoch 5500 | DGI Loss: 0.6975 | Reg Loss: -0.2841 | Total: 0.1293
Epoch 6000 | DGI Loss: 0.6948 | Reg Loss: -0.2841 | Total: 0.1266
Epoch 6500 | DGI Loss: 0.6935 | Reg Loss: -0.2841 | Total: 0.1253
Epoch 7000 | DGI Loss: 0.6973 | Reg Loss: -0.2841 | Total: 0.1291
Epoch 7500 | D

In [125]:
model.eval()
with torch.no_grad():
    embeddings = model.get_embeddings(node_feats.to(device), edge_index.to(device))
    # embeddings = embeddings.cpu().numpy()
    # The DGI model with the classifier layer produces scores for each class.
    # Apply softmax to get probabilities for each class.
    class_probabilities = F.softmax(model.classifier(embeddings), dim=1).cpu().numpy()

# Get predicted class labels by selecting the class with the highest probability
y_pred = np.argmax(class_probabilities, axis=1)

In [126]:
acc_score = accuracy_score(y, y_pred)
acc_score_inverted = accuracy_score(y, 1 - y_pred)
prec_score = precision_score(y, y_pred)
rec_score = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
log_loss_value = log_loss(y, class_probabilities)

print("Accuracy:", acc_score)
print("Accuracy (inverted):", acc_score_inverted)
print("Precision:", prec_score)
print("Recall:", rec_score)
print("F1:", f1)
print("Log Loss:", log_loss_value)

Accuracy: 0.678082191780822
Accuracy (inverted): 0.3219178082191781
Precision: 0.6976744186046512
Recall: 0.9183673469387755
F1: 0.7929515418502202
Log Loss: 0.6919513220001683


In [127]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss

hidden_dim   = 512
cut          = 0
dropout      = 0.25
num_runs     = 10
num_epochs   = 8000
lambda_list  = [2]
base_seed    = 42

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

node_feats = node_feats.to(device)
edge_index = edge_index.to(device)
A = A.to(device)

if isinstance(y, torch.Tensor):
    y_np = y.detach().cpu().numpy().astype(int)
else:
    y_np = np.asarray(y).astype(int)

N, feats_dim = node_feats.size(0), node_feats.size(1)

all_results = []
bce_loss = nn.BCEWithLogitsLoss()

for lam in lambda_list:
    print(f"\n================ LAMBDA = {lam} ================\n")

    acc_scores, prec_scores, rec_scores, f1_scores, log_losses = [], [], [], [], []

    for run in range(num_runs):
        print(f"\n--- Run {run+1}/{num_runs} ---")

        seed = base_seed + run
        torch.manual_seed(seed)
        np.random.seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)


        model = DGI_with_classifier(features.shape[1], hidden_dim, heads=heads, n_classes=2, cut=cut, dropout=dropout).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.00001)
        scheduler = StepLR(optimizer, step_size=200, gamma=0.5)


        for epoch in range(num_epochs + 1):
            model.train()
            optimizer.zero_grad()

            perm = torch.randperm(N, device=device)
            corrupt_features = node_feats[perm]

            logits, embeddings = model(node_feats, corrupt_features, edge_index)

            lbl = torch.cat([torch.ones(N, device=device), torch.zeros(N, device=device)])
            dgi_loss = bce_loss(logits.squeeze(), lbl)
            reg_loss = model.Reg_loss(A, embeddings)

            loss = dgi_loss + lam * reg_loss

            if epoch % 500 == 0:
                print(f"Epoch {epoch:4d} | DGI: {dgi_loss.item():.4f} | Reg: {reg_loss.item():.4f} | "
                      f"λ*Reg: {(lam * reg_loss).item():.4f} | Total: {loss.item():.4f}")

            loss.backward()
            optimizer.step()
            scheduler.step()

        model.eval()
        with torch.no_grad():
            emb = model.get_embeddings(node_feats, edge_index)
            logits_cls = model.classifier(emb)                   # [N, 2]
            class_probabilities = F.softmax(logits_cls, dim=1).cpu().numpy()
            y_pred = np.argmax(class_probabilities, axis=1)

        acc  = accuracy_score(y_np, y_pred)
        acc_inv = accuracy_score(y_np, 1 - y_pred)

        if acc_inv > acc:
            acc = acc_inv
            y_pred = 1 - y_pred
            class_probabilities = class_probabilities[:, ::-1]

        prec = precision_score(y_np, y_pred, zero_division=0)
        rec  = recall_score(y_np, y_pred, zero_division=0)
        f1   = f1_score(y_np, y_pred, zero_division=0)
        ll   = log_loss(y_np, class_probabilities)

        print(f"Run {run+1} | Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f} | LogLoss: {ll:.4f}")

        acc_scores.append(acc)
        prec_scores.append(prec)
        rec_scores.append(rec)
        f1_scores.append(f1)
        log_losses.append(ll)

    lambda_results = {
        "lambda": lam,
        "accuracy":  (float(np.mean(acc_scores)), float(np.std(acc_scores))),
        "precision": (float(np.mean(prec_scores)), float(np.std(prec_scores))),
        "recall":    (float(np.mean(rec_scores)), float(np.std(rec_scores))),
        "f1":        (float(np.mean(f1_scores)),  float(np.std(f1_scores))),
        "log_loss":  (float(np.mean(log_losses)), float(np.std(log_losses))),
    }
    all_results.append(lambda_results)

    print(f"\n--- RESULTS FOR LAMBDA = {lam} ---")
    print(f"Accuracy : {lambda_results['accuracy'][0]:.4f} ± {lambda_results['accuracy'][1]:.4f}")
    print(f"Precision: {lambda_results['precision'][0]:.4f} ± {lambda_results['precision'][1]:.4f}")
    print(f"Recall   : {lambda_results['recall'][0]:.4f} ± {lambda_results['recall'][1]:.4f}")
    print(f"F1 Score : {lambda_results['f1'][0]:.4f} ± {lambda_results['f1'][1]:.4f}")
    print(f"Log Loss : {lambda_results['log_loss'][0]:.4f} ± {lambda_results['log_loss'][1]:.4f}")

print("\n================ FINAL SUMMARY FOR ALL LAMBDAS ================\n")
print(f"{'Lambda':>8} | {'Accuracy':>18} | {'Precision':>18} | {'Recall':>18} | {'F1 Score':>18} | {'Log Loss':>18}")
print("-" * 108)
for res in all_results:
    print(f"{res['lambda']:>8} | "
          f"{res['accuracy'][0]:.4f} ± {res['accuracy'][1]:.4f} | "
          f"{res['precision'][0]:.4f} ± {res['precision'][1]:.4f} | "
          f"{res['recall'][0]:.4f} ± {res['recall'][1]:.4f} | "
          f"{res['f1'][0]:.4f} ± {res['f1'][1]:.4f} | "
          f"{res['log_loss'][0]:.4f} ± {res['log_loss'][1]:.4f}")




--- Run 1/10 ---
Epoch    0 | DGI: 0.7096 | Reg: -0.2840 | λ*Reg: -0.5679 | Total: 0.1417
Epoch  500 | DGI: 0.7319 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1637
Epoch 1000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 1500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5683 | Total: 0.1249
Epoch 2000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 2500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 3000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 3500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 4000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 4500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 5000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 5500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1249
Epoch 6000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.5682 | Total: 0.1250
Epoch 6500 | DGI: 0.6931 | Reg:

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss

hidden_dim   = 512
cut          = 0
dropout      = 0.25
num_runs     = 10
num_epochs   = 10000
lambda_list  = [0.001, 0.005, 0.009, 0.01, 0.05, 0.09, 0.1, 0.3, 0.5, 0.9, 1, 2, 5, 8]
base_seed    = 42

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

node_feats = node_feats.to(device)
edge_index = edge_index.to(device)
A = A.to(device)

if isinstance(y, torch.Tensor):
    y_np = y.detach().cpu().numpy().astype(int)
else:
    y_np = np.asarray(y).astype(int)

N, feats_dim = node_feats.size(0), node_feats.size(1)

all_results = []
bce_loss = nn.BCEWithLogitsLoss()

for lam in lambda_list:
    print(f"\n================ LAMBDA = {lam} ================\n")

    acc_scores, prec_scores, rec_scores, f1_scores, log_losses = [], [], [], [], []

    for run in range(num_runs):
        print(f"\n--- Run {run+1}/{num_runs} ---")

        seed = base_seed + run
        torch.manual_seed(seed)
        np.random.seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)


        model = DGI_with_classifier(features.shape[1], hidden_dim, heads=heads, n_classes=2, cut=cut, dropout=dropout).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.00001)
        scheduler = StepLR(optimizer, step_size=200, gamma=0.5)


        for epoch in range(num_epochs + 1):
            model.train()
            optimizer.zero_grad()

            perm = torch.randperm(N, device=device)
            corrupt_features = node_feats[perm]

            logits, embeddings = model(node_feats, corrupt_features, edge_index)

            lbl = torch.cat([torch.ones(N, device=device), torch.zeros(N, device=device)])
            dgi_loss = bce_loss(logits.squeeze(), lbl)
            reg_loss = model.Reg_loss(A, embeddings)

            loss = dgi_loss + lam * reg_loss

            if epoch % 500 == 0:
                print(f"Epoch {epoch:4d} | DGI: {dgi_loss.item():.4f} | Reg: {reg_loss.item():.4f} | "
                      f"λ*Reg: {(lam * reg_loss).item():.4f} | Total: {loss.item():.4f}")

            loss.backward()
            optimizer.step()
            scheduler.step()

        model.eval()
        with torch.no_grad():
            emb = model.get_embeddings(node_feats, edge_index)
            logits_cls = model.classifier(emb)                   # [N, 2]
            class_probabilities = F.softmax(logits_cls, dim=1).cpu().numpy()
            y_pred = np.argmax(class_probabilities, axis=1)

        acc  = accuracy_score(y_np, y_pred)
        acc_inv = accuracy_score(y_np, 1 - y_pred)

        if acc_inv > acc:
            acc = acc_inv
            y_pred = 1 - y_pred
            class_probabilities = class_probabilities[:, ::-1]

        prec = precision_score(y_np, y_pred, zero_division=0)
        rec  = recall_score(y_np, y_pred, zero_division=0)
        f1   = f1_score(y_np, y_pred, zero_division=0)
        ll   = log_loss(y_np, class_probabilities)

        print(f"Run {run+1} | Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f} | LogLoss: {ll:.4f}")

        acc_scores.append(acc)
        prec_scores.append(prec)
        rec_scores.append(rec)
        f1_scores.append(f1)
        log_losses.append(ll)

    lambda_results = {
        "lambda": lam,
        "accuracy":  (float(np.mean(acc_scores)), float(np.std(acc_scores))),
        "precision": (float(np.mean(prec_scores)), float(np.std(prec_scores))),
        "recall":    (float(np.mean(rec_scores)), float(np.std(rec_scores))),
        "f1":        (float(np.mean(f1_scores)),  float(np.std(f1_scores))),
        "log_loss":  (float(np.mean(log_losses)), float(np.std(log_losses))),
    }
    all_results.append(lambda_results)

    print(f"\n--- RESULTS FOR LAMBDA = {lam} ---")
    print(f"Accuracy : {lambda_results['accuracy'][0]:.4f} ± {lambda_results['accuracy'][1]:.4f}")
    print(f"Precision: {lambda_results['precision'][0]:.4f} ± {lambda_results['precision'][1]:.4f}")
    print(f"Recall   : {lambda_results['recall'][0]:.4f} ± {lambda_results['recall'][1]:.4f}")
    print(f"F1 Score : {lambda_results['f1'][0]:.4f} ± {lambda_results['f1'][1]:.4f}")
    print(f"Log Loss : {lambda_results['log_loss'][0]:.4f} ± {lambda_results['log_loss'][1]:.4f}")

print("\n================ FINAL SUMMARY FOR ALL LAMBDAS ================\n")
print(f"{'Lambda':>8} | {'Accuracy':>18} | {'Precision':>18} | {'Recall':>18} | {'F1 Score':>18} | {'Log Loss':>18}")
print("-" * 108)
for res in all_results:
    print(f"{res['lambda']:>8} | "
          f"{res['accuracy'][0]:.4f} ± {res['accuracy'][1]:.4f} | "
          f"{res['precision'][0]:.4f} ± {res['precision'][1]:.4f} | "
          f"{res['recall'][0]:.4f} ± {res['recall'][1]:.4f} | "
          f"{res['f1'][0]:.4f} ± {res['f1'][1]:.4f} | "
          f"{res['log_loss'][0]:.4f} ± {res['log_loss'][1]:.4f}")




--- Run 1/10 ---
Epoch    0 | DGI: 0.7097 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.7094
Epoch  500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 1000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 1500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 2000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 2500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 3000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 3500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 4000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 4500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 5000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 5500 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 6000 | DGI: 0.6931 | Reg: -0.2841 | λ*Reg: -0.0003 | Total: 0.6929
Epoch 6500 | DGI: 0.6931 | Reg:

Lambda |           Accuracy |          Precision |             Recall |           F1 Score |           Log Loss
------------------------------------------------------------------------------------------------------------
   0.001 | 0.6147 ± 0.0603 | 0.6600 ± 0.0445 | 0.6359 ± 0.1413 | 0.6390 ± 0.0922 | 0.6928 ± 0.0001
   0.005 | 0.6427 ± 0.0724 | 0.6806 ± 0.0600 | 0.6725 ± 0.1253 | 0.6714 ± 0.0898 | 0.6927 ± 0.0002
   0.009 | 0.6463 ± 0.0734 | 0.6855 ± 0.0570 | 0.6683 ± 0.1286 | 0.6717 ± 0.0943 | 0.6926 ± 0.0003
    0.01 | 0.6513 ± 0.0717 | 0.6963 ± 0.0569 | 0.6593 ± 0.1253 | 0.6722 ± 0.0913 | 0.6926 ± 0.0003
    0.05 | 0.6687 ± 0.0748 | 0.7080 ± 0.0553 | 0.6838 ± 0.1303 | 0.6906 ± 0.0948 | 0.6925 ± 0.0004
    0.09 | 0.6467 ± 0.0718 | 0.6897 ± 0.0559 | 0.6611 ± 0.1214 | 0.6703 ± 0.0897 | 0.6926 ± 0.0002
     0.1 | 0.6557 ± 0.0700 | 0.6998 ± 0.0541 | 0.6653 ± 0.1218 | 0.6771 ± 0.0897 | 0.6926 ± 0.0003
     0.3 | 0.6377 ± 0.0846 | 0.6790 ± 0.0651 | 0.6509 ± 0.1451 | 0.6588 ± 0.1089 | 0.6926 ± 0.0004
     0.5 | 0.6437 ± 0.0714 | 0.6824 ± 0.0485 | 0.6617 ± 0.1465 | 0.6652 ± 0.1034 | 0.6926 ± 0.0003
     0.9 | 0.6187 ± 0.0760 | 0.6616 ± 0.0589 | 0.6365 ± 0.1369 | 0.6429 ± 0.1002 | 0.6928 ± 0.0003
       1 | 0.6100 ± 0.0670 | 0.6615 ± 0.0616 | 0.6281 ± 0.1293 | 0.6356 ± 0.0850 | 0.6928 ± 0.0002
       2 | 0.6343 ± 0.0663 | 0.6803 ± 0.0503 | 0.6359 ± 0.1420 | 0.6509 ± 0.1006 | 0.6927 ± 0.0002
       5 | 0.5857 ± 0.0526 | 0.6222 ± 0.0409 | 0.6605 ± 0.1153 | 0.6344 ± 0.0728 | 0.6929 ± 0.0002
       8 | 0.5860 ± 0.0711 | 0.6253 ± 0.0601 | 0.6437 ± 0.1101 | 0.6299 ± 0.0811 | 0.6929 ± 0.0002

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
from scipy.optimize import linear_sum_assignment

def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Assigns predicted clusters to true labels
    to maximize accuracy using the Jonker-Volgenant algorithm (linear_sum_assignment).
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    row_ind, col_ind = linear_sum_assignment(-w)
    return w[row_ind, col_ind].sum() / y_pred.size


model.eval()
with torch.no_grad():
    embeddings = model.get_embeddings(node_feats.to(device), edge_index.to(device))
    embeddings = embeddings.cpu().numpy()

kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
kmeans.fit(embeddings)
y_pred_kmeans = kmeans.labels_

ari_score = adjusted_rand_score(y, y_pred_kmeans)
nmi_score = normalized_mutual_info_score(y, y_pred_kmeans)

print("Adjusted Rand Score:", ari_score)
print("Normalized Mutual Information Score:", nmi_score)

acc_kmeans = cluster_acc(y, y_pred_kmeans)
print("Clustering Accuracy (mapped):", acc_kmeans)

Adjusted Rand Score: -0.001379769830930187
Normalized Mutual Information Score: 7.97612442553983e-05
Clustering Accuracy (mapped): 0.5333333333333333


In [None]:
# class DGI(nn.Module):
#     def __init__(self, input_dim, hidden_dim,output_dim, cut=0):
#         super().__init__()
#         self.encoder = GCNEncoder(input_dim, hidden_dim)
#         self.readout = nn.Linear(hidden_dim, output_dim)
#         self.cut = cut
#         self.output_dim = output_dim

#     def forward(self, x, edge_index, corrupt_x, adj=None):
#         h = self.encoder(x, edge_index)
#         h_corrupt = self.encoder(corrupt_x, edge_index)

#         # Summary vector
#         s = torch.sigmoid(h.mean(dim=0))

#         # Positive & negative scores
#         pos = torch.matmul(h, s)
#         neg = torch.matmul(h_corrupt, s)

#         # DGI loss
#         dgi_loss = -torch.log(torch.sigmoid(pos - neg) + 1e-8).mean()

#         reg_loss = 0
#         if adj is not None:
#             A = torch.as_tensor(adj, dtype=torch.float32, device=x.device)
#             D = torch.diag(A.sum(dim=1))

#             if self.cut == 1:  # Cut loss
#                 L = D - A
#                 p = self.readout(h)
#                 C = F.softmax(p, dim=1)
#                 reg_loss = torch.trace(C.T @ L @ C) / (torch.trace(C.T @ D @ C) + 1e-8)

#             else:  # Modularity loss
#                 m = torch.sum(A)
#                 B = A - torch.outer(D.diag(), D.diag()) / (2 * m)
#                 p = self.readout(h)
#                 C = F.softmax(p, dim=1)
#                 k = torch.tensor(self.output_dim, dtype=torch.float32, device=x.device)
#                 n = C.shape[0]
#                 reg_loss = (-1 / (2 * m)) * torch.trace(torch.mm(torch.mm(C.t(), B), C))
#                 reg_loss += (torch.sqrt(k) / n) * torch.norm(torch.sum(C, dim=0), p='fro') - 1


#         return h, dgi_loss, reg_loss

In [None]:
# hidden_dim = 256
# output_dim = 2
# cut = 0
# model = DGI(features.shape[1], hidden_dim, output_dim, cut=cut).to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# num_epochs = 7000
# for epoch in range(num_epochs+1):
#     model.train()
#     optimizer.zero_grad()

#     perm = torch.randperm(features.shape[0])
#     corrupt_features = node_feats[perm]

#     _, dgi_loss, reg_loss = model(
#         node_feats.to(device),
#         edge_index.to(device),
#         corrupt_features.to(device),
#         adj=torch.tensor(W0).to(device)
#     )

#     loss = dgi_loss + reg_loss
#     loss.backward()
#     optimizer.step()

#     if epoch % 500 == 0:
#         print(f"Epoch {epoch} | DGI Loss: {dgi_loss.item():.4f} | Reg Loss: {reg_loss.item():.4f} | Total Loss: {loss.item():.4f}")


Epoch 0 | DGI Loss: 0.6922 | Reg Loss: -0.1250 | Total Loss: 0.5672
Epoch 500 | DGI Loss: 0.2924 | Reg Loss: -0.1250 | Total Loss: 0.1674
Epoch 1000 | DGI Loss: 0.2514 | Reg Loss: -0.1250 | Total Loss: 0.1264
Epoch 1500 | DGI Loss: 0.2425 | Reg Loss: -0.1250 | Total Loss: 0.1175
Epoch 2000 | DGI Loss: 0.2083 | Reg Loss: -0.1250 | Total Loss: 0.0833
Epoch 2500 | DGI Loss: 0.2106 | Reg Loss: -0.1250 | Total Loss: 0.0856
Epoch 3000 | DGI Loss: 0.1714 | Reg Loss: -0.1246 | Total Loss: 0.0468
Epoch 3500 | DGI Loss: 0.1480 | Reg Loss: -0.1256 | Total Loss: 0.0224
Epoch 4000 | DGI Loss: 0.1510 | Reg Loss: -0.1278 | Total Loss: 0.0232
Epoch 4500 | DGI Loss: 0.1389 | Reg Loss: -0.1328 | Total Loss: 0.0060
Epoch 5000 | DGI Loss: 0.1076 | Reg Loss: -0.1373 | Total Loss: -0.0297
Epoch 5500 | DGI Loss: 0.1220 | Reg Loss: -0.1399 | Total Loss: -0.0180
Epoch 6000 | DGI Loss: 0.0915 | Reg Loss: -0.1414 | Total Loss: -0.0499
Epoch 6500 | DGI Loss: 0.0746 | Reg Loss: -0.1428 | Total Loss: -0.0681
Epoch 

In [None]:
# model.eval()
# with torch.no_grad():
#     embeddings, _, _ = model(
#         node_feats.to(device),
#         edge_index.to(device),
#         node_feats.to(device),
#         adj=torch.tensor(W0).to(device)
#     )

# embeddings = embeddings.cpu().numpy()

In [None]:
# from sklearn.cluster import KMeans
# from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

# # Use KMeans clustering
# kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
# kmeans.fit(embeddings)
# y_pred_kmeans = kmeans.labels_

# # Evaluate clustering performance
# ari_score = adjusted_rand_score(y, y_pred_kmeans)
# nmi_score = normalized_mutual_info_score(y, y_pred_kmeans)

# print("Adjusted Rand Score:", ari_score)
# print("Normalized Mutual Information Score:", nmi_score)

# # Note: K-Means is an unsupervised algorithm, so traditional classification metrics like accuracy, precision, recall, and F1 are not directly applicable without mapping clusters to classes.
# # However, we can calculate accuracy by mapping the cluster labels to the true labels in the way that maximizes accuracy.
# # This is not a standard evaluation for clustering but can give an idea of how well the clusters separate the classes.
# from scipy.optimize import linear_sum_assignment
# def cluster_acc(y_true, y_pred):
#     """
#     Calculate clustering accuracy. Assigns predicted clusters to true labels
#     to maximize accuracy using the Jonker-Volgenant algorithm (linear_sum_assignment).
#     """
#     y_true = y_true.astype(np.int64)
#     assert y_pred.size == y_true.size
#     D = max(y_pred.max(), y_true.max()) + 1
#     w = np.zeros((D, D), dtype=np.int64)
#     for i in range(y_pred.size):
#         w[y_pred[i], y_true[i]] += 1
#     row_ind, col_ind = linear_sum_assignment(-w)
#     return w[row_ind, col_ind].sum() / y_pred.size

# acc_kmeans = cluster_acc(y, y_pred_kmeans)
# print("Clustering Accuracy (mapped):", acc_kmeans)

Adjusted Rand Score: 0.00017775231989074027
Normalized Mutual Information Score: 0.00035768106511865666
Clustering Accuracy (mapped): 0.54


1- GCN

Accuracy: 0.7466666666666667
Precision: 0.7263681592039801
Recall: 0.874251497005988
F1: 0.7934782608695652
Log Loss: 0.5786983582841999

Accuracy: 0.7533333333333333
Precision: 0.7360406091370558
Recall: 0.8682634730538922
F1: 0.7967032967032966
Log Loss: 0.5772490248961657