#### **Install necessary Libraries**

In [2]:
!pip install torch
!pip install torch-geometric
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-<YOUR-TORCH-VERSION>+cpu.html
!pip install torch torchvision torchaudio torch_geometric
!pip install torch-geometric torch-sparse torch-scatter -f https://data.pyg.org/whl/torch-$(python -c "import torch; print(torch.__version__)").html

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiohttp (from torch-geometric)
  Downloading aiohttp-3.11.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->torch-geometric)
  Downloading aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch-geometric)
  Downloading aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting async-timeout<6.0,>=4.0 (from aiohttp->torch-geometric)
  Downloading async_timeout-5.0.1-py3-none-any.whl.metadata (5.1 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->torch-geometric)
  Downloading frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting multid

#### **Import the required Libraries**

In [3]:
import torch                                      # The main PyTorch library for tensor computation.
import torch.nn as nn                             # Provides classes and functions for building neural networks.
import torch.optim as optim                       # Contains various optimization algorithms for training neural networks.
from torch_geometric.nn import GCNConv            # A Graph Convolutional Layer from PyTorch Geometric.
from torch_geometric.nn import GATConv            # A Graph Attention Network (GAT) Convolutional Layer from PyTorch Geometric.
from torch_geometric.nn import SAGEConv           # A GraphSAGE Convolutional Layer from PyTorch Geometric.
from torch_geometric.nn import TransformerConv    # A Transformer Convolutional Layer from PyTorch Geometric.
import torch.nn.functional as F                   # Contains various functions for building neural networks (e.g., activation functions).
from torch_geometric.data import Data             # A class for graph data in PyTorch Geometric.
from torch.optim import Adam                      # Adam optimization algorithm for training neural networks.
from torch.nn.functional import cross_entropy     # Cross-entropy loss function for classification tasks.
from torch_geometric.loader import NeighborLoader # Loads graph data with neighbor sampling for efficient training.
from torch_geometric.loader import DataLoader     # A DataLoader for graph data in PyTorch Geometric.
import networkx as nx                             # Library for graph and network analysis.
from torch_geometric.utils import from_networkx   # Converts a NetworkX graph to PyTorch Geometric format.
from sklearn.preprocessing import StandardScaler  # Standardizes features by removing the mean and scaling to unit variance.
from sklearn.model_selection import train_test_split # Splits datasets into training and testing subsets.
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score # Metrics for evaluating models.
from tqdm import tqdm                             # For progress tracking in loops.
import networkx as nx                             # Library for creating and analyzing graph data structures.
import pickle                                     # For saving and loading Python objects (e.g., models, data).
import os                                         # For file and directory operations.
import itertools                                  # For working with iterators and combinations.
from itertools import product                     # For generating the Cartesian product of input iterables.
# Import python packages
import pandas as pd                               # For data manipulation and analysis.
import numpy as np                                # For numerical computations.
import seaborn as sns                             # For data visualization.
import matplotlib.pyplot as plt                   # For creating visualizations.
# To ignore warnings
import warnings                                   # Handles Python warnings.
warnings.filterwarnings("ignore")                # Suppresses all warnings.


#### **Load Temporal Graph and Node Data**

In [3]:
# Load the temporal graph (NetworkX format)
temporal_graph_path = '/content/drive/MyDrive/GraphFeatures/TemporalGraph.pkl'
with open(temporal_graph_path, 'rb') as f:
    temporal_graph = pickle.load(f)

# Load node data
node_data_path = '/content/drive/MyDrive/GraphFeatures/node_data.pkl'
with open(node_data_path, 'rb') as f:
    node_data = pickle.load(f)

#### **Convert Temporal Graph to Pytorch Geometric Format and save to drive**

In [4]:
# Convert NetworkX temporal graph to PyTorch Geometric Data
temporal_data = from_networkx(temporal_graph)

# Extract target labels (isFraud) and ensure data type consistency
temporal_data.y = torch.tensor(
    [node_data.loc[node_data['TransactionID'] == node, 'isFraud'].values[0]
     for node in temporal_graph.nodes],
    dtype=torch.long  # Ensure labels are integers for classification
)

# Extract node features and ensure consistent dtype
temporal_data.x = torch.tensor(
    [list(temporal_graph.nodes[node].values()) for node in temporal_graph.nodes],
    dtype=torch.float  # Features should be float for GNN layers
)

print(f"Extracted Node Features: {temporal_data.x.size()}")
print(f"Extracted Target Labels: {temporal_data.y.size()}")

Extracted Node Features: torch.Size([99898, 1, 95])
Extracted Target Labels: torch.Size([99898])


#### **Load Pytorch format and Extract Node Features and Labels**

In [5]:
# Save the updated temporal graph
updated_temporal_graph_path = '/content/drive/MyDrive/GraphFeatures/UpdatedTemporalGraph.pt'
torch.save(temporal_data, updated_temporal_graph_path)
print(f"Updated temporal graph saved successfully at {updated_temporal_graph_path}.")


Updated temporal graph saved successfully at /content/drive/MyDrive/GraphFeatures/UpdatedTemporalGraph.pt.


In [4]:
# Load the updated temporal graph
updated_temporal_graph_path = '/content/drive/MyDrive/GraphFeatures/UpdatedTemporalGraph.pt'
temporal_data = torch.load(updated_temporal_graph_path)

#### **Reshape Node Features**

In [5]:
# Debug the shape of node features
print(f"Shape of node features before scaling: {temporal_data.x.shape}")

# Reshape if features are not 2D
if temporal_data.x.dim() == 3:
    temporal_data.x = temporal_data.x.view(temporal_data.x.size(0), -1)

Shape of node features before scaling: torch.Size([99898, 1, 95])


#### **Scaling Node Features**

In [6]:
# Scale the node features
scaler = StandardScaler()
temporal_data.x = torch.tensor(
    scaler.fit_transform(temporal_data.x.cpu().numpy()),
    dtype=torch.float
).to(temporal_data.x.device)

#### **Split Train, Validation, and Test Sets**

In [7]:
# Create masks for train, validation, and test sets
train_idx, temp_idx = train_test_split(
    range(temporal_data.num_nodes),
    test_size=0.4,  # 40% for validation and test
    stratify=temporal_data.y.numpy(),
    random_state=42
)

val_idx, test_idx = train_test_split(
    temp_idx,
    test_size=0.5,  # Split temp into equal parts for validation and test
    stratify=temporal_data.y.numpy()[temp_idx],
    random_state=42
)

temporal_data.train_mask = torch.zeros(temporal_data.num_nodes, dtype=torch.bool)
temporal_data.val_mask = torch.zeros(temporal_data.num_nodes, dtype=torch.bool)
temporal_data.test_mask = torch.zeros(temporal_data.num_nodes, dtype=torch.bool)

temporal_data.train_mask[train_idx] = True
temporal_data.val_mask[val_idx] = True
temporal_data.test_mask[test_idx] = True

print(f"Train nodes: {temporal_data.train_mask.sum().item()}")
print(f"Validation nodes: {temporal_data.val_mask.sum().item()}")
print(f"Test nodes: {temporal_data.test_mask.sum().item()}")

Train nodes: 59938
Validation nodes: 19980
Test nodes: 19980


#### **Temporal Graphs with Graph Convolution Networks (GCN) Model**

##### **Define the GCN Model**

In [8]:
# Define the GCN Model
class GCNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCNModel, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x

##### **Initialize and Train the Model**

In [9]:
# Initialize model, optimizer, and loss function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_dim = temporal_data.x.size(1)
hidden_dim = 32
output_dim = 2

model = GCNModel(input_dim, hidden_dim, output_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

# Move data to the device
temporal_data = temporal_data.to(device)

# Training loop
num_epochs = 50
patience = 5
best_val_loss = float('inf')
stopping_counter = 0

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(temporal_data.x, temporal_data.edge_index)
    loss = criterion(out[temporal_data.train_mask], temporal_data.y[temporal_data.train_mask])
    loss.backward()
    optimizer.step()

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_out = model(temporal_data.x, temporal_data.edge_index)
        val_loss = criterion(val_out[temporal_data.val_mask], temporal_data.y[temporal_data.val_mask])

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}")

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        stopping_counter = 0
    else:
        stopping_counter += 1
        if stopping_counter >= patience:
            early_stop = True

Epoch 1/50, Loss: 0.6951, Validation Loss: 0.6927
Epoch 2/50, Loss: 0.6918, Validation Loss: 0.6922
Epoch 3/50, Loss: 0.6912, Validation Loss: 0.6908
Epoch 4/50, Loss: 0.6900, Validation Loss: 0.6892
Epoch 5/50, Loss: 0.6887, Validation Loss: 0.6880
Epoch 6/50, Loss: 0.6877, Validation Loss: 0.6873
Epoch 7/50, Loss: 0.6871, Validation Loss: 0.6870
Epoch 8/50, Loss: 0.6869, Validation Loss: 0.6868
Epoch 9/50, Loss: 0.6865, Validation Loss: 0.6867
Epoch 10/50, Loss: 0.6862, Validation Loss: 0.6865
Epoch 11/50, Loss: 0.6861, Validation Loss: 0.6864
Epoch 12/50, Loss: 0.6855, Validation Loss: 0.6862
Epoch 13/50, Loss: 0.6854, Validation Loss: 0.6859
Epoch 14/50, Loss: 0.6851, Validation Loss: 0.6857
Epoch 15/50, Loss: 0.6852, Validation Loss: 0.6855
Epoch 16/50, Loss: 0.6850, Validation Loss: 0.6854
Epoch 17/50, Loss: 0.6847, Validation Loss: 0.6853
Epoch 18/50, Loss: 0.6844, Validation Loss: 0.6853
Epoch 19/50, Loss: 0.6845, Validation Loss: 0.6853
Epoch 20/50, Loss: 0.6842, Validation Lo

##### **Evaluate the Model**

In [11]:
# Evaluate the model
def evaluate_model(model, data, mask):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        preds = out.argmax(dim=1)
        y_true = data.y[mask].cpu().numpy()
        y_pred = preds[mask].cpu().numpy()

        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, zero_division=0)
        recall = recall_score(y_true, y_pred, zero_division=0)
        f1 = f1_score(y_true, y_pred, zero_division=0)
        auc = roc_auc_score(y_true, out[mask][:, 1].cpu().numpy())

        return accuracy, precision, recall, f1, auc

# Validation metrics
val_metrics = evaluate_model(model, temporal_data, temporal_data.val_mask)
print(f"Validation - Accuracy: {val_metrics[0]:.4f}, Precision: {val_metrics[1]:.4f}, "
      f"Recall: {val_metrics[2]:.4f}, F1: {val_metrics[3]:.4f}, AUC: {val_metrics[4]:.4f}")

# Test metrics
test_metrics = evaluate_model(model, temporal_data, temporal_data.test_mask)
print(f"Test - Accuracy: {test_metrics[0]:.4f}, Precision: {test_metrics[1]:.4f}, "
      f"Recall: {test_metrics[2]:.4f}, F1: {test_metrics[3]:.4f}, AUC: {test_metrics[4]:.4f}")



Validation - Accuracy: 0.5386, Precision: 0.5289, Recall: 0.6969, F1: 0.6014, AUC: 0.5650
Test - Accuracy: 0.5465, Precision: 0.5347, Recall: 0.7090, F1: 0.6097, AUC: 0.5680


#### **Temporal Graph with GAT MODEL**

##### **Define the GAT Model**

In [12]:
# Define the GAT Model
class GATModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, heads=4, dropout=0.2):
        super(GATModel, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=heads, dropout=dropout)
        self.conv2 = GATConv(hidden_dim * heads, output_dim, heads=1, concat=False, dropout=dropout)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

#### **Initialize and Train the Model**

In [13]:
# Model parameters
input_dim = temporal_data.x.size(1)  # Number of features per node
hidden_dim = 32
output_dim = 2  # Binary classification (isFraud)
heads = 4  # Number of attention heads
dropout = 0.2
learning_rate = 0.005
weight_decay = 5e-4

# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GATModel(input_dim, hidden_dim, output_dim, heads=heads, dropout=dropout).to(device)
temporal_data = temporal_data.to(device)
optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Training loop
num_epochs = 50
patience = 5
best_val_loss = float('inf')
stopping_counter = 0

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(temporal_data.x, temporal_data.edge_index)
    loss = cross_entropy(out[temporal_data.train_mask], temporal_data.y[temporal_data.train_mask])
    loss.backward()
    optimizer.step()

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_out = model(temporal_data.x, temporal_data.edge_index)
        val_loss = criterion(val_out[temporal_data.val_mask], temporal_data.y[temporal_data.val_mask])

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}")

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        stopping_counter = 0
    else:
        stopping_counter += 1
        if stopping_counter >= patience:
            early_stop = True


Epoch 1/50, Loss: 7.6193, Validation Loss: 2.0910
Epoch 2/50, Loss: 3.3907, Validation Loss: 2.5535
Epoch 3/50, Loss: 3.8959, Validation Loss: 1.4145
Epoch 4/50, Loss: 2.5764, Validation Loss: 0.9649
Epoch 5/50, Loss: 2.3082, Validation Loss: 1.9123
Epoch 6/50, Loss: 2.8866, Validation Loss: 1.5694
Epoch 7/50, Loss: 2.2829, Validation Loss: 1.2266
Epoch 8/50, Loss: 3.1286, Validation Loss: 0.9089
Epoch 9/50, Loss: 1.6067, Validation Loss: 0.9388
Epoch 10/50, Loss: 3.0012, Validation Loss: 0.9090
Epoch 11/50, Loss: 2.1475, Validation Loss: 0.8479
Epoch 12/50, Loss: 1.4112, Validation Loss: 0.8514
Epoch 13/50, Loss: 1.2359, Validation Loss: 0.9082
Epoch 14/50, Loss: 1.3225, Validation Loss: 0.9029
Epoch 15/50, Loss: 1.5541, Validation Loss: 0.8274
Epoch 16/50, Loss: 1.0676, Validation Loss: 0.7730
Epoch 17/50, Loss: 1.0109, Validation Loss: 0.7564
Epoch 18/50, Loss: 0.9163, Validation Loss: 0.7544
Epoch 19/50, Loss: 0.9828, Validation Loss: 0.7459
Epoch 20/50, Loss: 1.0001, Validation Lo

##### **Evaluate the Model**

In [14]:
def evaluate_model(model, data):
    model.eval()
    with torch.no_grad():
        # Forward pass
        out = model(data.x, data.edge_index)
        preds = out.argmax(dim=1)  # Predicted labels

        # Filter for test nodes
        y_true = data.y[data.test_mask].cpu().numpy()
        y_pred = preds[data.test_mask].cpu().numpy()

        # Metrics
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, zero_division=0)
        recall = recall_score(y_true, y_pred, zero_division=0)
        f1 = f1_score(y_true, y_pred, zero_division=0)
        auc = roc_auc_score(
            y_true, out[data.test_mask][:, 1].cpu().numpy()
        )

        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1-score: {f1:.4f}")
        print(f"AUC-ROC: {auc:.4f}")

# Evaluate the trained model
evaluate_model(model, temporal_data)

Accuracy: 0.5408
Precision: 0.5425
Recall: 0.5157
F1-score: 0.5288
AUC-ROC: 0.5506


#### **Temporal Graph with GraphSAGE Model**

##### **Define the Model**

In [15]:
class GraphSAGE(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(input_dim, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim)
        self.fc = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = self.fc(x)
        return x

##### **Initialize and Train the Model**

In [16]:
# Model parameters
input_dim = temporal_data.x.size(1)  # Number of features per node
hidden_dim = 32
output_dim = 2  # Binary classification (isFraud)
num_layers = 3
dropout = 0.2

# Initialize the GraphSAGE model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GraphSAGE(input_dim, hidden_dim, output_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
# Move data to the device
temporal_data = temporal_data.to(device)

# Training loop
num_epochs = 50
patience = 5
best_val_loss = float('inf')
stopping_counter = 0

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(temporal_data.x, temporal_data.edge_index)
    loss = criterion(out[temporal_data.train_mask], temporal_data.y[temporal_data.train_mask])
    loss.backward()
    optimizer.step()

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_out = model(temporal_data.x, temporal_data.edge_index)
        val_loss = criterion(val_out[temporal_data.val_mask], temporal_data.y[temporal_data.val_mask])

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}")

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        stopping_counter = 0
    else:
        stopping_counter += 1
        if stopping_counter >= patience:
            early_stop = True

Epoch 1/50, Loss: 0.6805, Validation Loss: 0.6242
Epoch 2/50, Loss: 0.6311, Validation Loss: 0.5836
Epoch 3/50, Loss: 0.5942, Validation Loss: 0.5656
Epoch 4/50, Loss: 0.5769, Validation Loss: 0.5532
Epoch 5/50, Loss: 0.5696, Validation Loss: 0.5437
Epoch 6/50, Loss: 0.5585, Validation Loss: 0.5354
Epoch 7/50, Loss: 0.5514, Validation Loss: 0.5242
Epoch 8/50, Loss: 0.5406, Validation Loss: 0.5161
Epoch 9/50, Loss: 0.5319, Validation Loss: 0.5132
Epoch 10/50, Loss: 0.5280, Validation Loss: 0.5110
Epoch 11/50, Loss: 0.5241, Validation Loss: 0.5080
Epoch 12/50, Loss: 0.5217, Validation Loss: 0.5052
Epoch 13/50, Loss: 0.5207, Validation Loss: 0.5018
Epoch 14/50, Loss: 0.5159, Validation Loss: 0.4979
Epoch 15/50, Loss: 0.5136, Validation Loss: 0.4946
Epoch 16/50, Loss: 0.5103, Validation Loss: 0.4931
Epoch 17/50, Loss: 0.5086, Validation Loss: 0.4920
Epoch 18/50, Loss: 0.5058, Validation Loss: 0.4904
Epoch 19/50, Loss: 0.5041, Validation Loss: 0.4881
Epoch 20/50, Loss: 0.5015, Validation Lo

#### **Evaluate the Model**

In [17]:
def evaluate_model(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        preds = out.argmax(dim=1)

        # Test nodes
        y_true = data.y[data.test_mask].cpu().numpy()
        y_pred = preds[data.test_mask].cpu().numpy()

        # Metrics
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, zero_division=0)
        recall = recall_score(y_true, y_pred, zero_division=0)
        f1 = f1_score(y_true, y_pred, zero_division=0)
        auc = roc_auc_score(y_true, out[data.test_mask][:, 1].cpu().numpy())

        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1-score: {f1:.4f}")
        print(f"AUC-ROC: {auc:.4f}")

# Evaluate the trained model
evaluate_model(model, temporal_data)


Accuracy: 0.8070
Precision: 0.8024
Recall: 0.8140
F1-score: 0.8082
AUC-ROC: 0.8831


#### **Temporal Graph with Graphomer Transformer Model**

##### **Define the Model**

In [18]:
class Graphomer(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, dropout=0.1):
        super(Graphomer, self).__init__()
        self.conv1 = TransformerConv(input_dim, hidden_dim, heads=num_heads, dropout=dropout)
        self.conv2 = TransformerConv(hidden_dim * num_heads, output_dim, heads=1, dropout=dropout)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

##### **Initialize and Train the Model**

In [19]:

# Model parameters
input_dim = temporal_data.x.size(1)  # Number of node features
hidden_dim = 32
output_dim = 2  # Binary classification (isFraud)
num_heads = 2  # Multi-head attention
dropout = 0.1
learning_rate = 0.005
weight_decay = 5e-4

# Initialize model, optimizer, and loss function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Graphomer(input_dim, hidden_dim, output_dim, num_heads, dropout).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = torch.nn.NLLLoss()

# Move data to device
temporal_data = temporal_data.to(device)

# Train-test-validation masks
train_mask = temporal_data.train_mask
val_mask = temporal_data.val_mask
test_mask = temporal_data.test_mask

# Training Loop
num_epochs = 50
patience = 5
best_val_loss = float('inf')
stopping_counter = 0

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(temporal_data.x, temporal_data.edge_index)
    train_loss = criterion(out[train_mask], temporal_data.y[train_mask])
    train_loss.backward()
    optimizer.step()

    # Validation step
    model.eval()
    with torch.no_grad():
        val_loss = criterion(out[val_mask], temporal_data.y[val_mask])
        val_acc = (out[val_mask].argmax(dim=1) == temporal_data.y[val_mask]).float().mean()

    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_acc.item():.4f}")

    # Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        stopping_counter = 0
    else:
        stopping_counter += 1
        if stopping_counter >= patience:
            early_stop = True


Epoch 1/50, Train Loss: 0.8349, Val Loss: 0.8322, Val Accuracy: 0.5429
Epoch 2/50, Train Loss: 0.8341, Val Loss: 0.8266, Val Accuracy: 0.6497
Epoch 3/50, Train Loss: 0.6644, Val Loss: 0.6701, Val Accuracy: 0.6685
Epoch 4/50, Train Loss: 0.6506, Val Loss: 0.6490, Val Accuracy: 0.6734
Epoch 5/50, Train Loss: 0.6193, Val Loss: 0.6173, Val Accuracy: 0.6831
Epoch 6/50, Train Loss: 0.5959, Val Loss: 0.5937, Val Accuracy: 0.6912
Epoch 7/50, Train Loss: 0.5798, Val Loss: 0.5789, Val Accuracy: 0.7022
Epoch 8/50, Train Loss: 0.5762, Val Loss: 0.5737, Val Accuracy: 0.7139
Epoch 9/50, Train Loss: 0.5713, Val Loss: 0.5724, Val Accuracy: 0.7141
Epoch 10/50, Train Loss: 0.5620, Val Loss: 0.5591, Val Accuracy: 0.7233
Epoch 11/50, Train Loss: 0.5566, Val Loss: 0.5550, Val Accuracy: 0.7234
Epoch 12/50, Train Loss: 0.5530, Val Loss: 0.5530, Val Accuracy: 0.7254
Epoch 13/50, Train Loss: 0.5467, Val Loss: 0.5472, Val Accuracy: 0.7307
Epoch 14/50, Train Loss: 0.5394, Val Loss: 0.5401, Val Accuracy: 0.7355
E

##### **Evaluate the Model**

In [20]:
def evaluate_model(model, data, mask):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        preds = out[mask].argmax(dim=1).cpu().numpy()
        labels = data.y[mask].cpu().numpy()

    # Compute metrics
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, zero_division=0)
    recall = recall_score(labels, preds, zero_division=0)
    f1 = f1_score(labels, preds, zero_division=0)
    auc = roc_auc_score(labels, out[mask][:, 1].cpu().numpy())

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"AUC-ROC: {auc:.4f}")

# Evaluate on test set
evaluate_model(model, temporal_data, test_mask)

Accuracy: 0.7983
Precision: 0.7870
Recall: 0.8175
F1-Score: 0.8020
AUC-ROC: 0.8786
