### Temporal Graph Neural Network (TGNN) for Fraud Detection

This notebook contains all the implementation steps for building a Temporal Graph Neural Network (TGNN) model to detect money laundering activities in transaction data.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from src.data_prep import TemporalGraphDataProcessor
from src.utils import load_config
from src.train import ModelTrainer
from src.evaluate import ModelEvaluator

In [3]:
config = load_config("config.yaml")
processor = TemporalGraphDataProcessor(config['preprocessing']['time_window'])
df = processor.load_and_preprocess()                                                # Load and simple preprocessing
df_account_stats = processor.calculate_account_features(df)       # Calculate account-level features over a time window
df = processor.engineer_features(df)                              # Engineer transaction-level features
snapshots, global_num_nodes = processor.create_temporal_snapshots(df, df_account_stats) # Create temporal graph snapshots

2025-11-07 23:07:32 - INFO - Loading and preprocessing data...
2025-11-07 23:07:36 - INFO - Loaded 9504852 transactions
2025-11-07 23:07:36 - INFO - Suspicious transactions: 9873 (0.104%)
2025-11-07 23:07:36 - INFO - Calculating account-level features over time window: 7D
2025-11-07 23:09:13 - INFO - Account-level features calculation completed.
2025-11-07 23:09:13 - INFO - Engineering transaction features...
2025-11-07 23:09:17 - INFO - Creating temporal graph snapshots...
2025-11-07 23:09:17 - INFO - Processing window: 2022-10-07 to 2022-10-13
2025-11-07 23:09:19 - INFO - Processing window: 2022-10-14 to 2022-10-20
2025-11-07 23:09:22 - INFO - Processing window: 2022-10-21 to 2022-10-27
2025-11-07 23:09:24 - INFO - Processing window: 2022-10-28 to 2022-11-03
2025-11-07 23:09:26 - INFO - Processing window: 2022-11-04 to 2022-11-10
2025-11-07 23:09:28 - INFO - Processing window: 2022-11-11 to 2022-11-17
2025-11-07 23:09:30 - INFO - Processing window: 2022-11-18 to 2022-11-24
2025-11-07

In [4]:
# Data splitting
train_size = int(len(snapshots) * (1 - config['preprocessing']['validation_split'] - config['preprocessing']['test_split']))
val_size = int(len(snapshots) * config['preprocessing']['validation_split'])

train_snaps = snapshots[:train_size]
val_snaps = snapshots[train_size:train_size + val_size]
test_snaps = snapshots[train_size + val_size:]

In [10]:
# training
trainer = ModelTrainer(config)
results = trainer.train_model(train_snaps, val_snaps, global_num_nodes)

2025-11-07 23:57:01 - INFO - Using device: cuda
2025-11-07 23:57:06 - INFO - Epoch 1: Train Loss(x1e3): 6.9590, Val Loss(x1e3): 1.1925, F2: 0.0071, Threshold: 0.050, Recall: 0.0096, LR: 0.000500
2025-11-07 23:57:11 - INFO - Epoch 2: Train Loss(x1e3): 1.4896, Val Loss(x1e3): 1.4026, F2: 0.0000, Threshold: 0.500, Recall: 0.0000, LR: 0.000500
2025-11-07 23:57:16 - INFO - Epoch 3: Train Loss(x1e3): 1.2394, Val Loss(x1e3): 1.0129, F2: 0.0085, Threshold: 0.050, Recall: 0.0473, LR: 0.000500
2025-11-07 23:57:21 - INFO - Epoch 4: Train Loss(x1e3): 0.9456, Val Loss(x1e3): 0.8269, F2: 0.0089, Threshold: 0.050, Recall: 0.2483, LR: 0.000500
2025-11-07 23:57:25 - INFO - Epoch 5: Train Loss(x1e3): 0.8264, Val Loss(x1e3): 0.7476, F2: 0.0084, Threshold: 0.050, Recall: 0.4801, LR: 0.000500
2025-11-07 23:57:30 - INFO - Epoch 6: Train Loss(x1e3): 0.7867, Val Loss(x1e3): 0.7163, F2: 0.0087, Threshold: 0.100, Recall: 0.1735, LR: 0.000500
2025-11-07 23:57:35 - INFO - Epoch 7: Train Loss(x1e3): 0.7702, Val Lo

In [11]:
# Evaluation
model = results['model']
evaluator = ModelEvaluator(config)

In [12]:
# Validation set evaluation
val_snapshots = snapshots[train_size: train_size + val_size]
val_metrics, val_probs, val_labels = evaluator.evaluate_model(
    model, val_snapshots, global_num_nodes, 'val', plot=False
)

2025-11-08 00:14:48 - INFO - Evaluating on val set...
2025-11-08 00:14:51 - INFO - VAL RESULTS:
2025-11-08 00:14:51 - INFO - Optimal Threshold: 0.400
2025-11-08 00:14:51 - INFO - F2 Score: 0.8219
2025-11-08 00:14:51 - INFO - Precision: 0.8174
2025-11-08 00:14:51 - INFO - Recall: 0.8230
2025-11-08 00:14:51 - INFO - PR-AUC: 0.8565


In [13]:
threshold = val_metrics['threshold']
test_snapshots = snapshots[train_size + val_size:]
test_metrics, test_probs, test_labels = evaluator.evaluate_model(
    model, test_snapshots, global_num_nodes, 'test', threshold=threshold, plot=False
)

2025-11-08 00:14:52 - INFO - Evaluating on test set...
2025-11-08 00:14:53 - INFO - TEST RESULTS:
2025-11-08 00:14:53 - INFO - Optimal Threshold: 0.400
2025-11-08 00:14:53 - INFO - F2 Score: 0.8170
2025-11-08 00:14:53 - INFO - Precision: 0.8138
2025-11-08 00:14:53 - INFO - Recall: 0.8178
2025-11-08 00:14:53 - INFO - PR-AUC: 0.8532
