### Temporal Graph Neural Network (TGNN) for Fraud Detection

This notebook contains all the implementation steps for building a Temporal Graph Neural Network (TGNN) model to detect money laundering activities in transaction data.

In [6]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from src.data_prep import TemporalGraphDataProcessor
from src.utils import load_config
from src.train import ModelTrainer
from src.evaluate import ModelEvaluator

In [3]:
config = load_config("config.yaml")
processor = TemporalGraphDataProcessor(config['preprocessing']['time_window'])
df = processor.load_and_preprocess()                                                # Load and simple preprocessing
df_account_stats = processor.calculate_account_features(df)       # Calculate account-level features over a time window
df = processor.engineer_features(df)                              # Engineer transaction-level features
snapshots, global_num_nodes = processor.create_temporal_snapshots(df, df_account_stats) # Create temporal graph snapshots

2025-11-07 21:32:09 - INFO - Loading and preprocessing data...
2025-11-07 21:32:12 - INFO - Loaded 9504852 transactions
2025-11-07 21:32:12 - INFO - Suspicious transactions: 9873 (0.104%)
2025-11-07 21:32:12 - INFO - Calculating account-level features over time window: 7D
2025-11-07 21:33:57 - INFO - Account-level features calculation completed.
2025-11-07 21:33:57 - INFO - Engineering transaction features...
2025-11-07 21:34:02 - INFO - Creating temporal graph snapshots...
2025-11-07 21:34:02 - INFO - Processing window: 2022-10-07 to 2022-10-13
2025-11-07 21:34:04 - INFO - Processing window: 2022-10-14 to 2022-10-20
2025-11-07 21:34:06 - INFO - Processing window: 2022-10-21 to 2022-10-27
2025-11-07 21:34:08 - INFO - Processing window: 2022-10-28 to 2022-11-03
2025-11-07 21:34:10 - INFO - Processing window: 2022-11-04 to 2022-11-10
2025-11-07 21:34:12 - INFO - Processing window: 2022-11-11 to 2022-11-17
2025-11-07 21:34:14 - INFO - Processing window: 2022-11-18 to 2022-11-24
2025-11-07

In [11]:
# training
trainer = ModelTrainer(config)
results = trainer.train_model(snapshots, global_num_nodes)

2025-11-07 21:40:31 - INFO - Using device: cuda
2025-11-07 21:40:31 - INFO - Data split - Train: 32, Val: 7, Test: 7
2025-11-07 21:40:39 - INFO - Epoch 1: Train Loss(x1e3): 7.0832, Val Loss(x1e3): 1.2211, F2: 0.0065, Threshold: 0.050, Recall: 0.0075, LR: 0.000500
2025-11-07 21:40:44 - INFO - Epoch 2: Train Loss(x1e3): 1.4454, Val Loss(x1e3): 1.3392, F2: 0.0000, Threshold: 0.500, Recall: 0.0000, LR: 0.000500
2025-11-07 21:40:49 - INFO - Epoch 3: Train Loss(x1e3): 1.2331, Val Loss(x1e3): 1.0319, F2: 0.0082, Threshold: 0.050, Recall: 0.0364, LR: 0.000500
2025-11-07 21:40:54 - INFO - Epoch 4: Train Loss(x1e3): 0.9831, Val Loss(x1e3): 0.8698, F2: 0.0089, Threshold: 0.050, Recall: 0.1831, LR: 0.000500
2025-11-07 21:40:58 - INFO - Epoch 5: Train Loss(x1e3): 0.8707, Val Loss(x1e3): 0.7972, F2: 0.0078, Threshold: 0.050, Recall: 0.3285, LR: 0.000500
2025-11-07 21:41:03 - INFO - Epoch 6: Train Loss(x1e3): 0.8243, Val Loss(x1e3): 0.7614, F2: 0.0079, Threshold: 0.100, Recall: 0.0974, LR: 0.000500
2

In [12]:
# Evaluation
model = results['model']
evaluator = ModelEvaluator(config)
# Split data (Same as training)
train_size = int(len(snapshots) * (1 - config['preprocessing']['validation_split'] - config['preprocessing']['test_split']))
val_size = int(len(snapshots) * config['preprocessing']['validation_split'])

In [13]:
# Validation set evaluation
val_snapshots = snapshots[train_size: train_size + val_size]
val_metrics, val_probs, val_labels = evaluator.evaluate_model(
    model, val_snapshots, global_num_nodes, 'val', plot=True
)

2025-11-07 21:51:55 - INFO - Evaluating on val set...
2025-11-07 21:51:58 - INFO - VAL RESULTS:
2025-11-07 21:51:58 - INFO - Optimal Threshold: 0.400
2025-11-07 21:51:58 - INFO - F2 Score: 0.8455
2025-11-07 21:51:58 - INFO - Precision: 0.8587
2025-11-07 21:51:58 - INFO - Recall: 0.8422
2025-11-07 21:51:58 - INFO - PR-AUC: 0.8822


In [14]:
threshold = val_metrics['threshold']
test_snapshots = snapshots[train_size + val_size:]
test_metrics, test_probs, test_labels = evaluator.evaluate_model(
    model, test_snapshots, global_num_nodes, 'test', threshold=threshold, plot=True
)

2025-11-07 21:52:03 - INFO - Evaluating on test set...
2025-11-07 21:52:04 - INFO - TEST RESULTS:
2025-11-07 21:52:04 - INFO - Optimal Threshold: 0.400
2025-11-07 21:52:04 - INFO - F2 Score: 0.8336
2025-11-07 21:52:04 - INFO - Precision: 0.8386
2025-11-07 21:52:04 - INFO - Recall: 0.8324
2025-11-07 21:52:04 - INFO - PR-AUC: 0.8792
