In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GATConv, GAE
from torch_geometric.datasets import EllipticBitcoinDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import numpy as np
import os

In [2]:
# --- 1. Model Definition ---
class GATEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=2):
        super(GATEncoder, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=0.6)
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=False, dropout=0.6)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)
        return x


In [3]:
# --- 2. Setup and Data Loading ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

dataset = EllipticBitcoinDataset(root='./data/EllipticBitcoinDataset')
data = dataset[0]

scaler = StandardScaler()
data.x = torch.tensor(scaler.fit_transform(data.x.numpy()), dtype=torch.float)
data = data.to(device)
print(" Data loaded and preprocessed.")

Using device: cuda
 Data loaded and preprocessed.


In [4]:
# --- 3. Training ---
in_channels = data.num_node_features
hidden_channels = 128
out_channels = 64

encoder = GATEncoder(in_channels, hidden_channels, out_channels)
model = GAE(encoder).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.005)

print("\nStarting model training...")
for epoch in range(1, 201):
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x, data.edge_index)
    loss = model.recon_loss(z, data.edge_index)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.6f}')
print(" Training complete.")


Starting model training...
Epoch: 010, Loss: 4.028218
Epoch: 020, Loss: 3.614039
Epoch: 030, Loss: 3.422673
Epoch: 040, Loss: 3.239518
Epoch: 050, Loss: 3.081700
Epoch: 060, Loss: 2.989184
Epoch: 070, Loss: 2.859253
Epoch: 080, Loss: 2.814774
Epoch: 090, Loss: 2.697854
Epoch: 100, Loss: 2.612369
Epoch: 110, Loss: 2.730160
Epoch: 120, Loss: 2.566452
Epoch: 130, Loss: 2.488277
Epoch: 140, Loss: 2.509755
Epoch: 150, Loss: 2.458672
Epoch: 160, Loss: 2.431934
Epoch: 170, Loss: 2.416906
Epoch: 180, Loss: 2.427584
Epoch: 190, Loss: 2.534676
Epoch: 200, Loss: 2.479256
 Training complete.


In [None]:
# --- 4. Evaluation ---
model.eval()
with torch.no_grad():
    z = model.encode(data.x, data.edge_index)
    anomaly_scores_tensor = torch.sqrt(torch.sum(z*z, dim=1))

# Prepare test data
test_mask = data.test_mask
test_indices = torch.where(test_mask)[0]
gnn_anomaly_scores = anomaly_scores_tensor[test_indices].cpu().numpy()
true_labels = data.y[test_indices].cpu().numpy()

if len(np.unique(true_labels)) < 2:
    print(" Error: The test set contains only one class.")
else:
    gnn_auc_score = roc_auc_score(true_labels, gnn_anomaly_scores)
    print("\n--- GNN Model Performance (Method 1) ---")
    print(f"Unsupervised GAE (Latent Norm) AUC-ROC Score: {gnn_auc_score:.4f}")

Unsupervised GAE (Latent Norm) AUC-ROC Score: 0.3487
