# Benchmark: Equal Parameters Comparison
## STGNN vs Transformer vs Hybrid dengan Jumlah Parameter SAMA (~350K)

Perbandingan yang benar-benar fair - semua model punya kapasitas yang setara.

In [2]:
import os, gc, json, time, random, warnings
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, confusion_matrix, precision_score, recall_score
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
warnings.filterwarnings('ignore')
print('Libraries loaded!')

  from .autonotebook import tqdm as notebook_tqdm


Libraries loaded!


# =============================================================================
# CONFIGURATION
# =============================================================================
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True

ROOT = Path('DatasetTA')
GRAPHS_DIR = ROOT / 'project_data' / 'graphs'
MODELS_DIR = ROOT / 'project_data' / 'models'
ANALYSIS_DIR = ROOT / 'project_data' / 'analysis'
for p in [MODELS_DIR, ANALYSIS_DIR]: p.mkdir(parents=True, exist_ok=True)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
USE_AMP = torch.cuda.is_available()

# Training config
EPOCHS = 15
LR = 5e-4
BATCH_ACCUM = 4
MAX_SEGMENTS = 8
MAX_NODES = 300
PATIENCE = 5
LABEL_SMOOTHING = 0.1
DROPOUT = 0.3

ANOM_CLASSES = ['Abuse','Arrest','Arson','Assault','Burglary','Explosion',
                'Fighting','Robbery','Shooting','Shoplifting','Stealing','Vandalism']

print(f'Device: {DEVICE}')
print(f'Graphs: {GRAPHS_DIR}')
print(f'Target: ~350K parameters per model')

In [None]:
# =============================================================================
# STGNN Only - Tuned to ~350K params
# =============================================================================
class STGNNOnlyScaled(nn.Module):
    def __init__(self, hidden_dim=220, output_dim=220, gcn_layers=5):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Linear(10, hidden_dim), nn.LayerNorm(hidden_dim), nn.GELU(), nn.Dropout(DROPOUT)
        )
        self.convs = nn.ModuleList([GCNConv(hidden_dim, hidden_dim) for _ in range(gcn_layers)])
        self.norms = nn.ModuleList([nn.LayerNorm(hidden_dim) for _ in range(gcn_layers)])
        self.att = nn.Sequential(nn.Linear(hidden_dim, hidden_dim//2), nn.Tanh(), nn.Linear(hidden_dim//2, 1))
        self.proj = nn.Linear(hidden_dim, output_dim)
        self.cls = nn.Sequential(
            nn.Linear(output_dim, output_dim//2), nn.GELU(), nn.Dropout(DROPOUT), nn.Linear(output_dim//2, 2)
        )
    
    def forward_single(self, x, ei):
        h = self.enc(norm_feat(x))
        for conv, ln in zip(self.convs, self.norms):
            h = h + F.gelu(ln(conv(h, ei)))
        att = torch.softmax(self.att(h).squeeze(-1), 0)
        return self.proj((h * att.unsqueeze(-1)).sum(0, keepdim=True))
    
    def forward(self, segs):
        dev = next(self.parameters()).device
        feats = [self.forward_single(
            g.x.to(dev), 
            g.edge_index.to(dev) if g.edge_index.numel()>0 else torch.empty((2,0), dtype=torch.long, device=dev)
        ) for g in segs]
        return self.cls(torch.cat(feats, 0).mean(0, keepdim=True))

# =============================================================================
# Transformer Only - Tuned to ~350K params (smaller MLP)
# =============================================================================
class TransformerOnlyScaled(nn.Module):
    def __init__(self, hidden_dim=128, output_dim=128, trans_layers=1, heads=4):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Linear(10, hidden_dim), nn.LayerNorm(hidden_dim), nn.GELU(), nn.Dropout(DROPOUT)
        )
        self.att = nn.Sequential(nn.Linear(hidden_dim, hidden_dim//2), nn.Tanh(), nn.Linear(hidden_dim//2, 1))
        self.proj = nn.Linear(hidden_dim, output_dim)
        enc_layer = nn.TransformerEncoderLayer(output_dim, heads, output_dim*2, DROPOUT, 'gelu', batch_first=True)
        self.transformer = nn.TransformerEncoder(enc_layer, trans_layers)
        # MLP sized to reach ~350K total
        self.mlp1 = nn.Sequential(
            nn.Linear(output_dim, 256), nn.GELU(), nn.Dropout(DROPOUT),
            nn.Linear(256, 192), nn.GELU(), nn.Dropout(DROPOUT),
            nn.Linear(192, output_dim)
        )
        self.cls = nn.Sequential(
            nn.Linear(output_dim, output_dim//2), nn.GELU(), nn.Dropout(DROPOUT), nn.Linear(output_dim//2, 2)
        )
    
    def forward_single(self, x):
        h = self.enc(norm_feat(x))
        att = torch.softmax(self.att(h).squeeze(-1), 0)
        return self.proj((h * att.unsqueeze(-1)).sum(0, keepdim=True))
    
    def forward(self, segs):
        dev = next(self.parameters()).device
        feats = [self.forward_single(g.x.to(dev)) for g in segs]
        seq = torch.cat(feats, 0).unsqueeze(0)
        out = self.transformer(seq).mean(1)
        out = self.mlp1(out)
        return self.cls(out)

# =============================================================================
# Hybrid STGNN + Transformer - Original ~350K params
# =============================================================================
class HybridOriginal(nn.Module):
    def __init__(self, hidden_dim=128, output_dim=128, gcn_layers=3, trans_layers=2, heads=4):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Linear(10, hidden_dim), nn.LayerNorm(hidden_dim), nn.GELU(), nn.Dropout(DROPOUT)
        )
        self.convs = nn.ModuleList([GCNConv(hidden_dim, hidden_dim) for _ in range(gcn_layers)])
        self.norms = nn.ModuleList([nn.LayerNorm(hidden_dim) for _ in range(gcn_layers)])
        self.att = nn.Sequential(nn.Linear(hidden_dim, hidden_dim//2), nn.Tanh(), nn.Linear(hidden_dim//2, 1))
        self.proj = nn.Linear(hidden_dim, output_dim)
        enc_layer = nn.TransformerEncoderLayer(output_dim, heads, output_dim*2, DROPOUT, 'gelu', batch_first=True)
        self.transformer = nn.TransformerEncoder(enc_layer, trans_layers)
        self.cls = nn.Sequential(
            nn.Linear(output_dim, output_dim//2), nn.GELU(), nn.Dropout(DROPOUT), nn.Linear(output_dim//2, 2)
        )
    
    def forward_stgnn(self, x, ei):
        h = self.enc(norm_feat(x))
        for conv, ln in zip(self.convs, self.norms):
            h = h + F.gelu(ln(conv(h, ei)))
        att = torch.softmax(self.att(h).squeeze(-1), 0)
        return self.proj((h * att.unsqueeze(-1)).sum(0, keepdim=True))
    
    def forward(self, segs):
        dev = next(self.parameters()).device
        feats = [self.forward_stgnn(
            g.x.to(dev), 
            g.edge_index.to(dev) if g.edge_index.numel()>0 else torch.empty((2,0), dtype=torch.long, device=dev)
        ) for g in segs]
        seq = torch.cat(feats, 0).unsqueeze(0)
        return self.cls(self.transformer(seq).mean(1))

print('Model classes defined!')

Parameter Count Verification:
STGNN Only (Scaled):          345,733 params
Transformer Only (Scaled):    323,715 params
Hybrid (Original):            350,147 params
Average: 339,865 params
Max diff: 4.8%
✅ Parameters are well balanced!


# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def augment_features(x):
    if x.size(1) == 5:
        x1,y1,x2,y2,conf = x[:,0],x[:,1],x[:,2],x[:,3],x[:,4]
        w, h = (x2-x1).clamp(min=1), (y2-y1).clamp(min=1)
        x = torch.cat([x, w.unsqueeze(1), h.unsqueeze(1), (w*h).unsqueeze(1),
                       (0.5*(x1+x2)).unsqueeze(1), (0.5*(y1+y2)).unsqueeze(1)], dim=1)
    return x

def norm_feat(x):
    x = augment_features(x)
    fmin, fmax = x.min(0).values, x.max(0).values
    return (x - fmin) / (fmax - fmin).clamp(min=1e-6)

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, label_smoothing=0.0):
        super().__init__()
        self.alpha, self.gamma, self.ls = alpha, gamma, label_smoothing
    def forward(self, logits, targets):
        ce = F.cross_entropy(logits, targets, weight=self.alpha, label_smoothing=self.ls, reduction='none')
        return (((1 - torch.exp(-ce)) ** self.gamma) * ce).mean()

def limit_nodes(g, max_nodes=MAX_NODES):
    if g.x.size(0) <= max_nodes: return g
    idx = torch.randperm(g.x.size(0))[:max_nodes].sort().values
    N = g.x.size(0)
    mask = torch.zeros(N, dtype=torch.bool); mask[idx] = True
    if g.edge_index.numel() > 0:
        em = mask[g.edge_index[0]] & mask[g.edge_index[1]]
        ei = g.edge_index[:, em]
        mapping = torch.zeros(N, dtype=torch.long); mapping[idx] = torch.arange(len(idx))
        ei = mapping[ei]
    else:
        ei = torch.empty((2,0), dtype=torch.long)
    return Data(x=g.x[idx], edge_index=ei)

def split_segments(g, max_seg=MAX_SEGMENTS):
    g = limit_nodes(g)
    if not hasattr(g, 'frame_ids') or g.frame_ids is None: return [g]
    fids = g.frame_ids.numpy() if isinstance(g.frame_ids, torch.Tensor) else np.array(g.frame_ids)
    unique = np.unique(fids)
    chosen = unique if len(unique) <= max_seg else unique[np.linspace(0, len(unique)-1, max_seg, dtype=int)]
    segs = []
    for f in chosen:
        m = fids == f
        if m.sum() == 0: continue
        idx = np.where(m)[0]
        x = g.x[idx]
        if g.edge_index.numel() > 0:
            em = torch.isin(g.edge_index[0], torch.tensor(idx)) & torch.isin(g.edge_index[1], torch.tensor(idx))
            ei = g.edge_index[:, em]
            if ei.numel() > 0:
                mapping = torch.zeros(g.x.size(0), dtype=torch.long)
                mapping[idx] = torch.arange(len(idx))
                ei = mapping[ei]
            else: ei = torch.empty((2,0), dtype=torch.long)
        else: ei = torch.empty((2,0), dtype=torch.long)
        segs.append(Data(x=x, edge_index=ei))
    return segs if segs else [limit_nodes(g)]

print('Utility functions defined!')

In [None]:
# =============================================================================
# DATASET
# =============================================================================
class GraphDataset(Dataset):
    def __init__(self, graph_dir, split='train'):
        self.files = sorted(list(graph_dir.rglob('*_graph.pt')))
        random.Random(SEED).shuffle(self.files)
        n = int(0.85 * len(self.files))
        self.files = self.files[:n] if split == 'train' else self.files[n:]
    def __len__(self): return len(self.files)
    def __getitem__(self, i):
        g = torch.load(self.files[i], weights_only=False)
        label = 1 if any(c in self.files[i].stem for c in ANOM_CLASSES) else 0
        return g, label, self.files[i].stem

# Load dataset
train_ds = GraphDataset(GRAPHS_DIR, 'train')
val_ds = GraphDataset(GRAPHS_DIR, 'val')
print(f'Train: {len(train_ds)} | Val: {len(val_ds)}')

train_labels = [train_ds[i][1] for i in range(len(train_ds))]
n0, n1 = train_labels.count(0), train_labels.count(1)
print(f'Class distribution - Normal: {n0} ({n0/(n0+n1)*100:.1f}%) | Anomaly: {n1} ({n1/(n0+n1)*100:.1f}%)')

NameError: name 'MAX_NODES' is not defined

# Verify parameter counts
print('Parameter Count Verification:')
print('='*50)
m1 = STGNNOnlyScaled()
m2 = TransformerOnlyScaled()
m3 = HybridOriginal()
p1 = sum(p.numel() for p in m1.parameters())
p2 = sum(p.numel() for p in m2.parameters())
p3 = sum(p.numel() for p in m3.parameters())
print(f'STGNN Only (Scaled):       {p1:>10,} params')
print(f'Transformer Only (Scaled): {p2:>10,} params')
print(f'Hybrid (Original):         {p3:>10,} params')
print('='*50)
avg = (p1+p2+p3)//3
diff_pct = max(abs(p1-avg), abs(p2-avg), abs(p3-avg))/avg*100
print(f'Average: {avg:,} params')
print(f'Max diff: {diff_pct:.1f}%')
if diff_pct < 5:
    print('✅ Parameters are well balanced!')
elif diff_pct < 15:
    print('✅ Parameters are acceptable')
else:
    print('⚠️ Parameters need more tuning')
del m1, m2, m3


Parameter Count Verification:
STGNN Only (Scaled):        400,515 params
Transformer Only (Scaled):  2,243,715 params
Hybrid (Original):          350,147 params


## 4. Training & Evaluation Functions

In [5]:
def train_epoch(model, ds, opt, crit, scaler, dev):
    model.train()
    loss_sum, correct, n = 0, 0, 0
    opt.zero_grad()
    for i, (g, lab, _) in enumerate(tqdm(ds, leave=False, desc='Train')):
        segs = split_segments(g)
        y = torch.tensor([lab], dtype=torch.long, device=dev)
        with torch.amp.autocast('cuda', enabled=USE_AMP):
            logits = model(segs)
            loss = crit(logits, y) / BATCH_ACCUM
        scaler.scale(loss).backward()
        if (i+1) % BATCH_ACCUM == 0 or i+1 == len(ds):
            scaler.unscale_(opt)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(opt); scaler.update(); opt.zero_grad()
        loss_sum += loss.item() * BATCH_ACCUM
        correct += int(logits.argmax(1).item() == lab)
        n += 1
        if i % 50 == 0: torch.cuda.empty_cache()
    return loss_sum/n, correct/n

@torch.no_grad()
def evaluate(model, ds, crit, dev):
    model.eval()
    loss_sum, correct, n = 0, 0, 0
    ys, ps = [], []
    for i, (g, lab, _) in enumerate(tqdm(ds, leave=False, desc='Eval')):
        segs = split_segments(g)
        y = torch.tensor([lab], dtype=torch.long, device=dev)
        with torch.amp.autocast('cuda', enabled=USE_AMP):
            logits = model(segs)
            loss = crit(logits, y)
        prob = torch.softmax(logits, 1)[0, 1].item()
        loss_sum += loss.item()
        correct += int(logits.argmax(1).item() == lab)
        ys.append(lab); ps.append(prob)
        n += 1
        if i % 50 == 0: torch.cuda.empty_cache()
    auc = roc_auc_score(ys, ps) if len(set(ys)) > 1 else 0.5
    # Find best threshold
    thr_range = np.linspace(0.1, 0.9, 50)
    f1_scores = [f1_score(ys, [1 if p >= t else 0 for p in ps], zero_division=0) for t in thr_range]
    best_idx = np.argmax(f1_scores)
    best_thr, best_f1 = thr_range[best_idx], f1_scores[best_idx]
    preds = [1 if p >= best_thr else 0 for p in ps]
    prec = precision_score(ys, preds, zero_division=0)
    rec = recall_score(ys, preds, zero_division=0)
    return {'loss': loss_sum/n, 'acc': correct/n, 'auc': auc, 'f1': best_f1, 
            'precision': prec, 'recall': rec, 'thr': best_thr, 'ys': ys, 'ps': ps, 'preds': preds}

@torch.no_grad()
def measure_inference_time(model, ds, dev, num_samples=50, warmup=5):
    model.eval(); model = model.to(dev)
    for i in range(min(warmup, len(ds))):
        g, _, _ = ds[i]
        segs = split_segments(g)
        with torch.amp.autocast('cuda', enabled=USE_AMP): _ = model(segs)
    if torch.cuda.is_available(): torch.cuda.synchronize()
    times = []
    for i in range(min(num_samples, len(ds))):
        g, _, _ = ds[i]
        segs = split_segments(g)
        if torch.cuda.is_available(): torch.cuda.synchronize()
        start = time.perf_counter()
        with torch.amp.autocast('cuda', enabled=USE_AMP): _ = model(segs)
        if torch.cuda.is_available(): torch.cuda.synchronize()
        times.append((time.perf_counter() - start) * 1000)
    return {'mean_ms': np.mean(times), 'std_ms': np.std(times), 'fps': 1000 / np.mean(times)}

def train_model(model, model_name, train_ds, val_ds, dev):
    params = sum(p.numel() for p in model.parameters())
    print(f'\n{"="*60}')
    print(f'Training: {model_name}')
    print(f'Parameters: {params:,}')
    print('='*60)
    
    model = model.to(dev)
    labels = [train_ds[i][1] for i in range(len(train_ds))]
    n0, n1 = labels.count(0), labels.count(1)
    total = n0 + n1
    weights = torch.tensor([n1/total, n0/total], device=dev) if total > 0 else torch.tensor([0.5, 0.5], device=dev)
    
    crit = FocalLoss(alpha=weights, gamma=2.0, label_smoothing=LABEL_SMOOTHING)
    opt = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, EPOCHS)
    scaler = torch.amp.GradScaler('cuda', enabled=USE_AMP)
    
    best_auc, patience_counter = 0, 0
    history = {'train_loss': [], 'val_loss': [], 'val_acc': [], 'val_auc': []}
    save_name = model_name.lower().replace(' ', '_').replace('+', '_').replace('-', '_')
    
    for epoch in range(EPOCHS):
        train_loss, train_acc = train_epoch(model, train_ds, opt, crit, scaler, dev)
        val_res = evaluate(model, val_ds, crit, dev)
        sched.step()
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_res['loss'])
        history['val_acc'].append(val_res['acc'])
        history['val_auc'].append(val_res['auc'])
        
        print(f"Epoch {epoch+1:2d} | Train Loss: {train_loss:.4f} | Val Acc: {val_res['acc']*100:.1f}% | Val AUC: {val_res['auc']:.4f} | Val F1: {val_res['f1']:.4f}")
        
        if val_res['auc'] > best_auc:
            best_auc = val_res['auc']
            patience_counter = 0
            torch.save(model.state_dict(), MODELS_DIR / f'equal_{save_name}.pt')
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f'Early stopping at epoch {epoch+1}')
                break
        torch.cuda.empty_cache(); gc.collect()
    
    # Load best model and final eval
    model.load_state_dict(torch.load(MODELS_DIR / f'equal_{save_name}.pt', weights_only=True))
    final_res = evaluate(model, val_ds, crit, dev)
    
    return {
        'model_name': model_name,
        'params': params,
        'best_auc': best_auc,
        'accuracy': final_res['acc'],
        'precision': final_res['precision'],
        'recall': final_res['recall'],
        'best_f1': final_res['f1'],
        'best_thr': final_res['thr'],
        'history': history,
        'ys': final_res['ys'],
        'ps': final_res['ps'],
        'preds': final_res['preds']
    }

print('Training functions defined!')

Training functions defined!


## 5. Run Training

In [6]:
results = []

# 1. STGNN Only (Scaled)
model = STGNNOnlyScaled()
result = train_model(model, 'STGNN Only (Scaled)', train_ds, val_ds, DEVICE)
result.update(measure_inference_time(model, val_ds, DEVICE))
results.append(result)
del model; torch.cuda.empty_cache(); gc.collect()

# 2. Transformer Only (Scaled)
model = TransformerOnlyScaled()
result = train_model(model, 'Transformer Only (Scaled)', train_ds, val_ds, DEVICE)
result.update(measure_inference_time(model, val_ds, DEVICE))
results.append(result)
del model; torch.cuda.empty_cache(); gc.collect()

# 3. Hybrid (Original)
model = HybridOriginal()
result = train_model(model, 'Hybrid STGNN+Transformer', train_ds, val_ds, DEVICE)
result.update(measure_inference_time(model, val_ds, DEVICE))
results.append(result)
del model; torch.cuda.empty_cache(); gc.collect()

print('\n' + '='*60)
print('TRAINING COMPLETE!')
print('='*60)

NameError: name 'train_ds' is not defined

## 6. Results Summary

In [None]:
print('\n' + '='*100)
print('HASIL BENCHMARK: EQUAL PARAMETERS COMPARISON')
print('='*100)
print(f'{"Model":<30} | {"Params":>10} | {"Accuracy":>8} | {"AUC-ROC":>8} | {"F1":>8} | {"Precision":>9} | {"Recall":>8} | {"Time(ms)":>9}')
print('-'*100)

for r in results:
    print(f"{r['model_name']:<30} | {r['params']:>10,} | {r['accuracy']*100:>7.2f}% | {r['best_auc']:>8.4f} | {r['best_f1']:>8.4f} | {r['precision']:>9.4f} | {r['recall']:>8.4f} | {r['mean_ms']:>9.2f}")

print('-'*100)
best = max(results, key=lambda x: x['best_auc'])
print(f"\nBest Model: {best['model_name']} (AUC: {best['best_auc']:.4f})")

## 7. Visualization: Training History

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
colors = ['#3498db', '#e74c3c', '#2ecc71']
labels = ['STGNN (Scaled)', 'Transformer (Scaled)', 'Hybrid']

for idx, r in enumerate(results):
    epochs = range(1, len(r['history']['train_loss'])+1)
    
    # Loss
    axes[0].plot(epochs, r['history']['train_loss'], '-', color=colors[idx], alpha=0.5, label=f"{labels[idx]} (Train)")
    axes[0].plot(epochs, r['history']['val_loss'], '--', color=colors[idx], linewidth=2, label=f"{labels[idx]} (Val)")
    
    # Accuracy
    axes[1].plot(epochs, [a*100 for a in r['history']['val_acc']], '-o', color=colors[idx], linewidth=2, label=labels[idx])
    
    # AUC
    axes[2].plot(epochs, r['history']['val_auc'], '-o', color=colors[idx], linewidth=2, label=labels[idx])

axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Loss')
axes[0].set_title('Training & Validation Loss', fontsize=12, fontweight='bold')
axes[0].legend(loc='upper right', fontsize=8); axes[0].grid(True, alpha=0.3)

axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Validation Accuracy', fontsize=12, fontweight='bold')
axes[1].legend(); axes[1].grid(True, alpha=0.3)

axes[2].set_xlabel('Epoch'); axes[2].set_ylabel('AUC-ROC')
axes[2].set_title('Validation AUC-ROC', fontsize=12, fontweight='bold')
axes[2].legend(); axes[2].grid(True, alpha=0.3)

plt.suptitle('Training History - Equal Parameters Comparison', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(ANALYSIS_DIR / 'equal_params_training_history.png', dpi=200, bbox_inches='tight')
plt.show()

## 8. Visualization: ROC Curves

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))

for idx, r in enumerate(results):
    fpr, tpr, _ = roc_curve(r['ys'], r['ps'])
    ax.plot(fpr, tpr, color=colors[idx], linewidth=2.5, 
            label=f"{labels[idx]} (AUC={r['best_auc']:.4f})")

ax.plot([0, 1], [0, 1], 'k--', alpha=0.5, label='Random')
ax.set_xlabel('False Positive Rate', fontsize=12)
ax.set_ylabel('True Positive Rate', fontsize=12)
ax.set_title('ROC Curves - Equal Parameters Comparison', fontsize=14, fontweight='bold')
ax.legend(loc='lower right', fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_xlim([0, 1]); ax.set_ylim([0, 1])

plt.tight_layout()
plt.savefig(ANALYSIS_DIR / 'equal_params_roc_curves.png', dpi=200, bbox_inches='tight')
plt.show()

## 9. Visualization: Confusion Matrices

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, r in enumerate(results):
    cm = confusion_matrix(r['ys'], r['preds'])
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx],
               xticklabels=['Normal', 'Anomaly'], yticklabels=['Normal', 'Anomaly'],
               annot_kws={'size': 16, 'weight': 'bold'})
    axes[idx].set_xlabel('Predicted', fontsize=11)
    axes[idx].set_ylabel('Actual', fontsize=11)
    axes[idx].set_title(f"{labels[idx]}\nAcc: {r['accuracy']*100:.1f}% | F1: {r['best_f1']:.3f}", 
                        fontsize=12, fontweight='bold')

plt.suptitle('Confusion Matrices - Equal Parameters Comparison', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(ANALYSIS_DIR / 'equal_params_confusion_matrices.png', dpi=200, bbox_inches='tight')
plt.show()

## 10. Visualization: Performance Bar Charts

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

model_names = [labels[i] for i in range(3)]
x = np.arange(len(model_names))

# AUC-ROC
aucs = [r['best_auc'] for r in results]
bars = axes[0,0].bar(x, aucs, color=colors, edgecolor='black', linewidth=1.5)
axes[0,0].set_ylabel('AUC-ROC', fontsize=11)
axes[0,0].set_title('AUC-ROC Comparison', fontsize=12, fontweight='bold')
axes[0,0].set_xticks(x); axes[0,0].set_xticklabels(model_names, rotation=15, ha='right')
axes[0,0].set_ylim(0.5, 1.0); axes[0,0].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(aucs): axes[0,0].text(i, v+0.01, f'{v:.4f}', ha='center', fontweight='bold')

# Accuracy
accs = [r['accuracy']*100 for r in results]
axes[0,1].bar(x, accs, color=colors, edgecolor='black', linewidth=1.5)
axes[0,1].set_ylabel('Accuracy (%)', fontsize=11)
axes[0,1].set_title('Accuracy Comparison', fontsize=12, fontweight='bold')
axes[0,1].set_xticks(x); axes[0,1].set_xticklabels(model_names, rotation=15, ha='right')
axes[0,1].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(accs): axes[0,1].text(i, v+0.5, f'{v:.1f}%', ha='center', fontweight='bold')

# F1-Score
f1s = [r['best_f1'] for r in results]
axes[0,2].bar(x, f1s, color=colors, edgecolor='black', linewidth=1.5)
axes[0,2].set_ylabel('F1-Score', fontsize=11)
axes[0,2].set_title('F1-Score Comparison', fontsize=12, fontweight='bold')
axes[0,2].set_xticks(x); axes[0,2].set_xticklabels(model_names, rotation=15, ha='right')
axes[0,2].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(f1s): axes[0,2].text(i, v+0.01, f'{v:.4f}', ha='center', fontweight='bold')

# Precision
precs = [r['precision'] for r in results]
axes[1,0].bar(x, precs, color=colors, edgecolor='black', linewidth=1.5)
axes[1,0].set_ylabel('Precision', fontsize=11)
axes[1,0].set_title('Precision Comparison', fontsize=12, fontweight='bold')
axes[1,0].set_xticks(x); axes[1,0].set_xticklabels(model_names, rotation=15, ha='right')
axes[1,0].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(precs): axes[1,0].text(i, v+0.01, f'{v:.4f}', ha='center', fontweight='bold')

# Recall
recs = [r['recall'] for r in results]
axes[1,1].bar(x, recs, color=colors, edgecolor='black', linewidth=1.5)
axes[1,1].set_ylabel('Recall', fontsize=11)
axes[1,1].set_title('Recall Comparison', fontsize=12, fontweight='bold')
axes[1,1].set_xticks(x); axes[1,1].set_xticklabels(model_names, rotation=15, ha='right')
axes[1,1].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(recs): axes[1,1].text(i, v+0.01, f'{v:.4f}', ha='center', fontweight='bold')

# Inference Time
times = [r['mean_ms'] for r in results]
axes[1,2].bar(x, times, color=colors, edgecolor='black', linewidth=1.5)
axes[1,2].set_ylabel('Inference Time (ms)', fontsize=11)
axes[1,2].set_title('Inference Time Comparison', fontsize=12, fontweight='bold')
axes[1,2].set_xticks(x); axes[1,2].set_xticklabels(model_names, rotation=15, ha='right')
axes[1,2].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(times): axes[1,2].text(i, v+0.5, f'{v:.1f}ms', ha='center', fontweight='bold')

plt.suptitle('Performance Metrics - Equal Parameters Comparison (~350K params each)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(ANALYSIS_DIR / 'equal_params_performance_comparison.png', dpi=200, bbox_inches='tight')
plt.show()

## 11. Visualization: Parameters vs Performance

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

params = [r['params']/1000 for r in results]
aucs = [r['best_auc'] for r in results]

for idx, r in enumerate(results):
    ax.scatter(params[idx], aucs[idx], s=300, c=colors[idx], edgecolor='black', linewidth=2, zorder=5)
    ax.annotate(labels[idx], (params[idx], aucs[idx]), 
                textcoords="offset points", xytext=(0,15), ha='center', fontsize=11, fontweight='bold')

ax.set_xlabel('Parameters (K)', fontsize=12)
ax.set_ylabel('AUC-ROC', fontsize=12)
ax.set_title('Parameters vs Performance - Equal Parameters Comparison', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.set_ylim(0.5, 1.0)

plt.tight_layout()
plt.savefig(ANALYSIS_DIR / 'equal_params_scatter.png', dpi=200, bbox_inches='tight')
plt.show()

## 12. Save Results

In [None]:
results_json = {
    'benchmark_type': 'Equal Parameters Comparison',
    'target_params': '~350K',
    'models': [
        {
            'name': r['model_name'],
            'params': r['params'],
            'accuracy': float(r['accuracy']),
            'auc_roc': float(r['best_auc']),
            'f1_score': float(r['best_f1']),
            'precision': float(r['precision']),
            'recall': float(r['recall']),
            'best_threshold': float(r['best_thr']),
            'inference_ms': float(r['mean_ms']),
            'fps': float(r['fps'])
        }
        for r in results
    ],
    'best_model': max(results, key=lambda x: x['best_auc'])['model_name']
}

with open(ANALYSIS_DIR / 'equal_params_results.json', 'w') as f:
    json.dump(results_json, f, indent=2)

print(f'Results saved to: {ANALYSIS_DIR / "equal_params_results.json"}')

## 13. Final Summary & Conclusion

In [None]:
print('='*70)
print('KESIMPULAN: EQUAL PARAMETERS COMPARISON')
print('='*70)
print(f'\nSemua model memiliki ~350K parameters untuk perbandingan yang fair.\n')

# Sort by AUC
sorted_results = sorted(results, key=lambda x: x['best_auc'], reverse=True)

print('Ranking berdasarkan AUC-ROC:')
for i, r in enumerate(sorted_results, 1):
    print(f"  {i}. {r['model_name']}")
    print(f"     AUC: {r['best_auc']:.4f} | Acc: {r['accuracy']*100:.1f}% | F1: {r['best_f1']:.4f}")
    print(f"     Precision: {r['precision']:.4f} | Recall: {r['recall']:.4f}")
    print(f"     Inference: {r['mean_ms']:.2f}ms ({r['fps']:.1f} FPS)")
    print()

best = sorted_results[0]
print('='*70)
print(f'BEST MODEL: {best["model_name"]}')
print(f'AUC-ROC: {best["best_auc"]:.4f}')
print('='*70)

print('\nFiles saved:')
print(f'  - {ANALYSIS_DIR / "equal_params_training_history.png"}')
print(f'  - {ANALYSIS_DIR / "equal_params_roc_curves.png"}')
print(f'  - {ANALYSIS_DIR / "equal_params_confusion_matrices.png"}')
print(f'  - {ANALYSIS_DIR / "equal_params_performance_comparison.png"}')
print(f'  - {ANALYSIS_DIR / "equal_params_scatter.png"}')
print(f'  - {ANALYSIS_DIR / "equal_params_results.json"}')