In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

2.6.0+cu124


In [None]:
!pip install -q torch_geometric
!pip install -q class_resolver
!pip3 install pymatting


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymatting
  Downloading PyMatting-1.1.13-py3-none-any.whl.metadata (7.5 kB)
Downloading PyMatting-1.1.13-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymatting
Successfully installed pymatting-1.1.13


In [1]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from scipy import sparse
from scipy.sparse.linalg import eigsh

In [2]:
fa_patients_path = "/home/snu/Downloads/NIFD_Patients_FA_Histogram_Feature.npy"
Patients_FA_array = np.load(fa_patients_path, allow_pickle=True)

fa_controls_path = "/home/snu/Downloads/NIFD_Control_FA_Histogram_Feature.npy"
Controls_FA_array = np.load(fa_controls_path, allow_pickle=True)
print("Patients Shape:", Patients_FA_array.shape)
print("Controls Shape:", Controls_FA_array.shape)

Patients Shape: (98, 180)
Controls Shape: (48, 180)


In [3]:
X = np.vstack([Controls_FA_array, Patients_FA_array])
y = np.hstack([
    np.zeros(Controls_FA_array.shape[0], dtype=np.int64),  # 0 = Control
    np.ones(Patients_FA_array.shape[0], dtype=np.int64)    # 1 = Patient
])
np.random.seed(42)
perm = np.random.permutation(X.shape[0])
X = X[perm]
y = y[perm]

In [4]:
F = X.astype(np.float32)
print("Final Shape:", F.shape)

Final Shape: (146, 180)


In [5]:
def tokencut_on_features(F_array, alpha=1e-6):
    """
    Apply TokenCut clustering to feature matrix F_array (shape: N × D).
    Returns binary labels (0/1) for each node.
    """
    N, D = F_array.shape

    # 1. Normalize features row-wise
    norms = np.linalg.norm(F_array, axis=1, keepdims=True) + 1e-10
    F_norm = F_array / norms

    # 2. Construct cosine similarity matrix (fully connected)
    W = np.dot(F_norm, F_norm.T)
    W = W + alpha  # stabilizer

    # 3. Normalized Laplacian: L = I - D^{-1/2} W D^{-1/2}  where, W is the similarity matrix and D is the degree matrix
    d = np.sum(W, axis=1)
    d_inv_sqrt = np.diag(1.0 / np.sqrt(d + 1e-10))
    L = np.eye(N) - d_inv_sqrt @ W @ d_inv_sqrt

    # Sparse for efficiency
    L_sparse = sparse.csr_matrix(L)

    # 4. Compute the Fiedler vector, i.e., the second smallest eigenvector of L.
    vals, vecs = eigsh(L_sparse, k=2, which='SM')
    fiedler = vecs[:, 1]

    # 5. Threshold by mean
    threshold = fiedler.mean()
    labels = (fiedler > threshold).astype(np.int64)

    return labels, fiedler

In [6]:
labels, scores = tokencut_on_features(X)

# Evaluate
y_pred = labels
acc = accuracy_score(y, y_pred)
inv_acc = accuracy_score(y, 1 - y_pred)
if inv_acc > acc:
    y_pred = 1 - y_pred
    acc = inv_acc

prec = precision_score(y, y_pred)
rec = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

# Normalize fiedler vector for log_loss
probs = (scores - scores.min()) / (scores.max() - scores.min() + 1e-10)
logloss = log_loss(y, probs)

print("===== TokenCut Results =====")
print("Accuracy Score:", acc)
print("Precision Score:", prec)
print("Recall Score:", rec)
print("F1 Score:", f1)
print("Log Loss:", logloss)

===== TokenCut Results =====
Accuracy Score: 0.5684931506849316
Precision Score: 0.8571428571428571
Recall Score: 0.42857142857142855
F1 Score: 0.5714285714285714
Log Loss: 1.2710329240595817


In [7]:
print(probs)

[0.86226492 0.89551637 0.92154778 0.8364318  0.70835031 0.94647455
 0.84242001 0.34210837 0.54163964 0.86571686 0.66872075 0.93379705
 0.95195904 0.14707104 0.80981566 0.78038104 0.44679319 0.93450632
 0.90755686 0.23153497 0.9235346  0.99559029 0.91972191 0.74421131
 0.83170888 0.92498214 0.0949218  0.89992008 0.75257742 0.91329715
 0.87592183 0.88992254 0.86676551 0.85204733 0.98046851 0.89748861
 0.87767505 0.75280388 0.3903654  0.25376328 0.90064527 0.93350212
 0.6120266  0.89579681 0.91100459 0.93266811 0.90951818 0.9412101
 0.94927685 0.76161802 0.77290819 0.87966014 0.90006788 0.56194225
 0.9629699  0.92056145 0.86591763 0.91643767 0.14147933 0.03825207
 0.9235003  0.41321862 0.33665649 0.91126912 0.92936907 0.9934717
 0.86224598 0.71558177 0.4702005  0.93905697 0.2568452  0.94640232
 0.88982284 0.52814676 0.91296664 0.92819843 0.86452235 0.56291686
 0.89648953 0.         0.88464461 0.753261   0.91435792 0.95364512
 0.40433872 0.97024617 0.73467451 0.83753161 0.79572194 0.836639

In [8]:
num_runs = 10
acc_scores, prec_scores, rec_scores, f1_scores, log_losses = [], [], [], [], []

for run in range(num_runs):
    print(f"\n--- Run {run+1}/{num_runs} ---")
    np.random.seed(run)
    perm = np.random.permutation(X.shape[0])
    X_run = X[perm]
    y_run = y[perm]

    labels, scores = tokencut_on_features(X_run)

    # Align labels to ground truth
    y_pred = labels
    acc = accuracy_score(y_run, y_pred)
    inv_acc = accuracy_score(y_run, 1 - y_pred)
    if inv_acc > acc:
        y_pred = 1 - y_pred
        acc = inv_acc

    prec = precision_score(y_run, y_pred)
    rec = recall_score(y_run, y_pred)
    f1 = f1_score(y_run, y_pred)
    probs = (scores - scores.min()) / (scores.max() - scores.min() + 1e-10)
    logloss = log_loss(y_run, probs)

    acc_scores.append(acc)
    prec_scores.append(prec)
    rec_scores.append(rec)
    f1_scores.append(f1)
    log_losses.append(logloss)

    print(f"Run {run+1} | Acc: {acc:.4f} | Prec: {prec:.4f} | Rec: {rec:.4f} | F1: {f1:.4f} | LogLoss: {logloss:.4f}")

print("\n================ FINAL SUMMARY ================\n")
print(f"{'Metric':>15} | {'Mean':>10} ± {'Std':<10}")
print("-" * 50)
print(f"{'Accuracy':>15} | {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
print(f"{'Precision':>15} | {np.mean(prec_scores):.4f} ± {np.std(prec_scores):.4f}")
print(f"{'Recall':>15} | {np.mean(rec_scores):.4f} ± {np.std(rec_scores):.4f}")
print(f"{'F1 Score':>15} | {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"{'Log Loss':>15} | {np.mean(log_losses):.4f} ± {np.std(log_losses):.4f}")


--- Run 1/10 ---
Run 1 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 2/10 ---
Run 2 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 3/10 ---
Run 3 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 4/10 ---
Run 4 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 5/10 ---
Run 5 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.2710

--- Run 6/10 ---
Run 6 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 7/10 ---
Run 7 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 8/10 ---
Run 8 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.4778

--- Run 9/10 ---
Run 9 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.2710

--- Run 10/10 ---
Run 10 | Acc: 0.5685 | Prec: 0.8571 | Rec: 0.4286 | F1: 0.5714 | LogLoss: 1.2710


         Metric 