In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GATConv, GAE
from torch_geometric.datasets import EllipticBitcoinDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import numpy as np
import os

In [2]:
# --- 1. Model Definition ---
class GATEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=2):
        super(GATEncoder, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=0.6)
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=False, dropout=0.6)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)
        return x

class AttributeDecoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(AttributeDecoder, self).__init__()
        self.lin1 = nn.Linear(in_channels, hidden_channels)
        self.lin2 = nn.Linear(hidden_channels, out_channels)

    def forward(self, z):
        z = F.relu(self.lin1(z))
        z = self.lin2(z)
        return z

class DominantLikeGAE(GAE):
    def __init__(self, encoder, attr_decoder):
        super(DominantLikeGAE, self).__init__(encoder)
        self.attr_decoder = attr_decoder

    def decode_attr(self, z):
        return self.attr_decoder(z)

    def loss(self, x, z, edge_index, alpha=0.5):
        structural_loss = self.recon_loss(z, edge_index)
        x_reconstructed = self.decode_attr(z)
        attribute_loss = F.mse_loss(x_reconstructed, x)
        return (1 - alpha) * structural_loss + alpha * attribute_loss


In [3]:
# --- 2. Setup and Data Loading ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

dataset = EllipticBitcoinDataset(root='./data/EllipticBitcoinDataset')
data = dataset[0]

scaler = StandardScaler()
data.x = torch.tensor(scaler.fit_transform(data.x.numpy()), dtype=torch.float)
data = data.to(device)
print(" Data loaded and preprocessed.")

Using device: cuda
 Data loaded and preprocessed.


In [4]:
# --- 3. Training ---
in_channels = data.num_node_features
hidden_channels_encoder = 128
out_channels_encoder = 64
hidden_channels_decoder = 128

encoder = GATEncoder(in_channels, hidden_channels_encoder, out_channels_encoder)
attr_decoder = AttributeDecoder(out_channels_encoder, hidden_channels_decoder, in_channels)
model = DominantLikeGAE(encoder, attr_decoder).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.005)

print("\nStarting model training...")
for epoch in range(1, 201):
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x, data.edge_index)
    loss = model.loss(data.x, z, data.edge_index, alpha=0.8)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.6f}')
print(" Training complete.")


Starting model training...
Epoch: 010, Loss: 1.688812
Epoch: 020, Loss: 1.562316
Epoch: 030, Loss: 1.512500
Epoch: 040, Loss: 1.450567
Epoch: 050, Loss: 1.418036
Epoch: 060, Loss: 1.381947
Epoch: 070, Loss: 1.378312
Epoch: 080, Loss: 1.346175
Epoch: 090, Loss: 1.352465
Epoch: 100, Loss: 1.319534
Epoch: 110, Loss: 1.314151
Epoch: 120, Loss: 1.309869
Epoch: 130, Loss: 1.313607
Epoch: 140, Loss: 1.295784
Epoch: 150, Loss: 1.294023
Epoch: 160, Loss: 1.294960
Epoch: 170, Loss: 1.279748
Epoch: 180, Loss: 1.283969
Epoch: 190, Loss: 1.280753
Epoch: 200, Loss: 1.309469
 Training complete.


In [6]:
# --- 4. Evaluation ---
model.eval()
with torch.no_grad():
    z = model.encode(data.x, data.edge_index)
    x_reconstructed = model.decode_attr(z)
    
    anomaly_scores_tensor = torch.sum((data.x - x_reconstructed)**2, dim=1)

# Prepare test data
test_mask = data.test_mask
test_indices = torch.where(test_mask)[0]
gnn_anomaly_scores = anomaly_scores_tensor[test_indices].cpu().numpy()
true_labels = data.y[test_indices].cpu().numpy()

if len(np.unique(true_labels)) < 2:
    print(" Error: The test set contains only one class.")
else:
    gnn_auc_score = roc_auc_score(true_labels, gnn_anomaly_scores)
    print(f"Unsupervised GAE (Recon Error) AUC-ROC Score: {gnn_auc_score:.4f}")

Unsupervised GAE (Recon Error) AUC-ROC Score: 0.1935
