In [72]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv, global_mean_pool, SAGEConv, global_mean_pool
from torch_geometric.data import NeighborSampler
from torch_geometric.loader import NeighborLoader
from torch_geometric.data import Data, DataLoader as PyGDataLoader

In [2]:
from tqdm import tqdm
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent.parent))  # Adjust as needed
from config import DATAPATH, SAMPLE_DATAPATH

In [None]:
class GraphSAGE_AccountRiskDetector(nn.Module):
    """
    GraphSAGE model for scalable account risk detection
    Uses sampling and aggregation for handling large transaction graphs
    """
    
    def __init__(self, num_features, hidden_dim=256, num_layers=3, dropout=0.3, aggregator='mean'):
        super(GraphSAGE_AccountRiskDetector, self).__init__()
        
        self.num_layers = num_layers
        self.dropout = dropout
        
        # GraphSAGE layers
        self.convs = nn.ModuleList()
        self.convs.append(SAGEConv(num_features, hidden_dim, aggr=aggregator))
        
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_dim, hidden_dim, aggr=aggregator))
        
        self.convs.append(SAGEConv(hidden_dim, hidden_dim, aggr=aggregator))
        
        # Batch normalization layers
        self.batch_norms = nn.ModuleList()
        for _ in range(num_layers):
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, hidden_dim // 4),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 4, 2)
        )
    
    def forward(self, x, edge_index):
        # GraphSAGE layers with residual connections
        for i, conv in enumerate(self.convs[:-1]):
            x_new = conv(x, edge_index)
            x_new = self.batch_norms[i](x_new)
            x_new = F.relu(x_new)
            x_new = F.dropout(x_new, p=self.dropout, training=self.training)
            
            # Residual connection (if dimensions match)
            if i > 0 and x.size(-1) == x_new.size(-1):
                x = x + x_new
            else:
                x = x_new
        
        # Final layer
        x = self.convs[-1](x, edge_index)
        x = self.batch_norms[-1](x)
        x = F.relu(x)
        
        # Classification
        out = self.classifier(x)
        return out

In [65]:
@torch.no_grad()
def evaluate_graphsage(model, loader, device):
    """
    Evaluation function for GraphSAGE
    """
    model.eval()
    
    predictions = []
    labels = []
    probabilities = []
    
    for batch in loader:
        batch = batch.to(device)
        out = model(batch.x, batch.edge_index)
        
        # Get predictions for target nodes only
        pred = out[:batch.batch_size].argmax(dim=1)
        probs = F.softmax(out[:batch.batch_size], dim=1)[:, 1]
        
        predictions.append(pred.cpu())
        labels.append(batch.y[:batch.batch_size].cpu())
        probabilities.append(probs.cpu())
    
    predictions = torch.cat(predictions)
    labels = torch.cat(labels)
    probabilities = torch.cat(probabilities)
    
    accuracy = (predictions == labels).float().mean().item()
    
    return accuracy, predictions.numpy(), probabilities.numpy(), labels.numpy()

In [35]:
def create_inductive_dataset(df, cutoff_date, gap_days=7):
    """
    Create proper inductive test setup:
    - Training: transactions before cutoff_date
    - Gap: gap_days with no data (prevents leakage)
    - Test: new accounts appearing after cutoff_date + gap
    """
    print("Creating inductive dataset...")
    
    # Define time periods
    train_end = cutoff_date
    test_start = cutoff_date + pd.Timedelta(days=gap_days)
    
    # Split data
    train_data = df[df['DateTime'] <= train_end].copy()
    test_data = df[df['DateTime'] >= test_start].copy()

    print(f"Training period: {train_data['DateTime'].min()} to {train_data['DateTime'].max()}")
    print(f"Gap period: {gap_days} days")
    print(f"Test period: {test_data['DateTime'].min()} to {test_data['DateTime'].max()}")
    
    # Get account sets
    train_accounts = set(train_data['Sender_account']) | set(train_data['Receiver_account'])
    test_accounts = set(test_data['Sender_account']) | set(test_data['Receiver_account'])
    
    # Find completely new accounts (never seen in training)
    new_accounts = test_accounts - train_accounts
    # Find accounts that appeared in training and also appear in test
    existing_accounts_in_test = test_accounts & train_accounts
    
    print(f"\nAccount Statistics:")
    print(f"Training accounts: {len(train_accounts):,}")
    print(f"Test period accounts: {len(test_accounts):,}")
    print(f"Completely new accounts: {len(new_accounts):,}")
    print(f"Existing accounts in test: {len(existing_accounts_in_test):,}")
    
    return {
        'train_data': train_data,
        'test_data': test_data,
        'train_accounts': train_accounts,
        'new_accounts': new_accounts,
        'existing_accounts_in_test': existing_accounts_in_test
    }

In [4]:
def extract_account_features(account, sent_txns, recv_txns):
    """
    Extract comprehensive behavioral features for an account
    """
    features = {}
    
    # === YOUR EXISTING FEATURES (keep all) ===
    features['total_txns'] = len(sent_txns) + len(recv_txns)
    features['sent_count'] = len(sent_txns)
    features['recv_count'] = len(recv_txns)
    features['sent_recv_ratio'] = len(sent_txns) / (len(recv_txns) + 1)
    
    features['total_sent_amount'] = sent_txns['Amount'].sum()
    features['total_recv_amount'] = recv_txns['Amount'].sum()
    features['avg_sent_amount'] = sent_txns['Amount'].mean() if len(sent_txns) > 0 else 0
    features['avg_recv_amount'] = recv_txns['Amount'].mean() if len(recv_txns) > 0 else 0
    features['std_sent_amount'] = sent_txns['Amount'].std() if len(sent_txns) > 1 else 0
    features['std_recv_amount'] = recv_txns['Amount'].std() if len(recv_txns) > 1 else 0
    features['median_sent_amount'] = sent_txns['Amount'].median() if len(sent_txns) > 0 else 0
    features['median_recv_amount'] = recv_txns['Amount'].median() if len(recv_txns) > 0 else 0
    
    features['unique_senders'] = recv_txns['Sender_account'].nunique()
    features['unique_receivers'] = sent_txns['Receiver_account'].nunique()
    
    return features

In [5]:
def extract_features_for_accounts(data, accounts, feature_extraction_function):
    """
    Extract features for accounts using only their test period transactions
    data: DataFrame with transaction data
    accounts: List of accounts to extract features for
    feature_extraction_function: Function to extract features for a single account
    """
    print(f"Extracting features for {len(accounts):,} accounts...")

    account_features = []
    account_labels = []
    account_ids = []

    for i, account in enumerate(accounts):
        if (i + 1) % 10000 == 0:
            print(f"  Processed {i+1:,} / {len(accounts):,} accounts...")
        
        # Get transactions for this account in test period only
        sent_txns = data[data['Sender_account'] == account]
        recv_txns = data[data['Receiver_account'] == account]

        # Skip accounts with very few transactions
        if len(sent_txns) + len(recv_txns) < 2:
            continue
        
        # Extract features using only test period data
        features = feature_extraction_function(account, sent_txns, recv_txns)
        
        # Label: suspicious if involved in any suspicious transaction
        is_suspicious = (sent_txns['Is_laundering'].sum() > 0) or (recv_txns['Is_laundering'].sum() > 0)

        account_features.append(features)
        account_labels.append(1 if is_suspicious else 0)
        account_ids.append(account)

    # Convert to DataFrame
    features_df = pd.DataFrame(account_features, index=account_ids)
    labels_series = pd.Series(account_labels, index=account_ids, name='high_risk')

    print(f"Features extracted for {len(features_df)} accounts")
    print(f"Suspicious accounts: {labels_series.sum()} ({labels_series.mean()*100:.2f}%)")

    return features_df, labels_series

In [55]:
def build_transaction_graph(df, account_features_df, account_labels):
    """
    Create graph where nodes are accounts and edges are transactions
    """
    print("Building transaction network graph...")
    
    # Create mapping of account to index
    accounts = list(account_features_df.index)
    account_to_idx = {acc: idx for idx, acc in enumerate(accounts)}
    
    # Build edges
    edge_index = []
    edge_attr = []
    
    for _, row in df.iterrows():
        sender = row['Sender_account']
        receiver = row['Receiver_account']
        
        if sender in account_to_idx and receiver in account_to_idx:
            sender_idx = account_to_idx[sender]
            receiver_idx = account_to_idx[receiver]
            
            edge_index.append([sender_idx, receiver_idx])
            
            # Edge features: amount, payment type encoded
            edge_features = [
                row['Amount'],
                row['Payment_currency_encoded'],
                row['Received_currency_encoded'],
                row['Sender_bank_location_encoded'],
                row['Receiver_bank_location_encoded'],
                row['Payment_type_encoded'],
                1 if row['Payment_type'] == 'Cross-border' else 0
            ]
            edge_attr.append(edge_features)
    
    # Convert to tensors
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)
    
    # Node features
    x = torch.tensor(account_features_df.values, dtype=torch.float)
    
    # Labels
    y = torch.tensor(account_labels.values, dtype=torch.long)
    
    # Create PyG data object
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
    
    return data, account_to_idx

In [36]:
# Load the entire dataset
df = pd.read_csv(DATAPATH)

# Add and delete columns
df['DateTime'] = pd.to_datetime(df["Date"] + ' ' + df["Time"], format='%Y-%m-%d %H:%M:%S')

# df.drop(columns=['Laundering_type'], inplace=True)
df.drop(columns=['Time', 'Date'], inplace=True)

print("\nDate range:")
print(f"From: {df['DateTime'].min()}")
print(f"To: {df['DateTime'].max()}")


Date range:
From: 2022-10-07 10:35:19
To: 2023-08-23 10:57:12


In [37]:
cutoff_date = pd.to_datetime('2023-05-31')

# Spliting dataset
dataset_split = create_inductive_dataset(df, cutoff_date, gap_days=7)

# Delete full transaction table 
del df

Creating inductive dataset...
Training period: 2022-10-07 10:35:19 to 2023-05-30 23:59:58
Gap period: 7 days
Test period: 2023-06-07 00:00:14 to 2023-08-23 10:57:12

Account Statistics:
Training accounts: 735,533
Test period accounts: 474,075
Completely new accounts: 110,905
Existing accounts in test: 363,170


In [38]:
print(dataset_split.keys())

dict_keys(['train_data', 'test_data', 'train_accounts', 'new_accounts', 'existing_accounts_in_test'])


In [49]:
train_accounts = dataset_split['train_accounts']
new_accounts = dataset_split['new_accounts']
existing_accounts_in_test = dataset_split['existing_accounts_in_test']
test_accounts = new_accounts | existing_accounts_in_test

train_transaction_df = dataset_split['train_data']
test_transaction_df = dataset_split['test_data']

In [50]:
# Load temporary file (other notebooks)
batch_size = 100000

for batch in range(len(train_accounts)//batch_size + 1):
    print(f"Loading batch {batch}...")
    features = np.load(f"tmp/features_temp_{batch}.npy", allow_pickle=True)
    labels = np.load(f"tmp/labels_temp_{batch}.npy", allow_pickle=True)
    ids = np.load(f"tmp/ids_temp_{batch}.npy", allow_pickle=True)
    
    if batch == 0:
        train_features = features
        train_labels = labels
        train_ids = ids
    else:
        train_features = np.concatenate((train_features, features), axis=0)
        train_labels = np.concatenate((train_labels, labels), axis=0)
        train_ids = np.concatenate((train_ids, ids), axis=0)

Loading batch 0...
Loading batch 1...
Loading batch 2...
Loading batch 3...
Loading batch 4...
Loading batch 5...
Loading batch 6...
Loading batch 7...


In [51]:
train_features_df = pd.DataFrame(train_features.tolist(), index=train_ids)
train_labels_series = pd.Series(train_labels, index=train_ids, name='Acc_label')

print(f"Feature matrix shape: {train_features_df.shape}")
print(f"Number of features: {train_features_df.shape[1]}")

Feature matrix shape: (735533, 14)
Number of features: 14


In [52]:
# Convert Amount to log
for feat in ['total_sent_amount', 'total_recv_amount', 'avg_sent_amount', 'avg_recv_amount',
             'std_sent_amount', 'std_recv_amount', 'median_sent_amount', 'median_recv_amount']:
            #  'max_txn_amount_7d']:
    train_features_df[feat] = np.log1p(train_features_df[feat])

# Convert Amount to log in transaction table
train_transaction_df['Amount'] = np.log1p(train_transaction_df['Amount'])

In [54]:
test_features_df, test_labels_series = extract_features_for_accounts(
        test_transaction_df, 
        test_accounts,
        extract_account_features
    )

Extracting features for 474,075 accounts...
  Processed 10,000 / 474,075 accounts...
  Processed 20,000 / 474,075 accounts...
  Processed 30,000 / 474,075 accounts...
  Processed 40,000 / 474,075 accounts...
  Processed 50,000 / 474,075 accounts...
  Processed 60,000 / 474,075 accounts...
  Processed 70,000 / 474,075 accounts...
  Processed 80,000 / 474,075 accounts...
  Processed 90,000 / 474,075 accounts...
  Processed 100,000 / 474,075 accounts...
  Processed 110,000 / 474,075 accounts...
  Processed 120,000 / 474,075 accounts...
  Processed 130,000 / 474,075 accounts...
  Processed 140,000 / 474,075 accounts...
  Processed 150,000 / 474,075 accounts...
  Processed 160,000 / 474,075 accounts...
  Processed 170,000 / 474,075 accounts...
  Processed 180,000 / 474,075 accounts...
  Processed 190,000 / 474,075 accounts...
  Processed 200,000 / 474,075 accounts...
  Processed 210,000 / 474,075 accounts...
  Processed 220,000 / 474,075 accounts...
  Processed 230,000 / 474,075 accounts...

In [None]:
# Encode categorical features in transaction table
from sklearn.preprocessing import LabelEncoder

# Initialize label encoders for each categorical column
labelEncoders = {}
categorical_cols = ['Payment_currency', 'Received_currency', 'Sender_bank_location', 
                   'Receiver_bank_location', 'Payment_type']

# Fit encoders and transform data
for col in categorical_cols:
    labelEncoders[col] = LabelEncoder()
    train_transaction_df[f'{col}_encoded'] = labelEncoders[col].fit_transform(train_transaction_df[col])
    test_transaction_df[f'{col}_encoded'] = labelEncoders[col].transform(test_transaction_df[col])

# Print encoding mappings
for col in categorical_cols:
    print(f"\n{col} encoding:")
    for i, class_name in enumerate(labelEncoders[col].classes_):
        print(f"  {class_name} → {i}")


Payment_currency encoding:
  Albanian lek → 0
  Dirham → 1
  Euro → 2
  Indian rupee → 3
  Mexican Peso → 4
  Moroccan dirham → 5
  Naira → 6
  Pakistani rupee → 7
  Swiss franc → 8
  Turkish lira → 9
  UK pounds → 10
  US dollar → 11
  Yen → 12

Received_currency encoding:
  Albanian lek → 0
  Dirham → 1
  Euro → 2
  Indian rupee → 3
  Mexican Peso → 4
  Moroccan dirham → 5
  Naira → 6
  Pakistani rupee → 7
  Swiss franc → 8
  Turkish lira → 9
  UK pounds → 10
  US dollar → 11
  Yen → 12

Sender_bank_location encoding:
  Albania → 0
  Austria → 1
  France → 2
  Germany → 3
  India → 4
  Italy → 5
  Japan → 6
  Mexico → 7
  Morocco → 8
  Netherlands → 9
  Nigeria → 10
  Pakistan → 11
  Spain → 12
  Switzerland → 13
  Turkey → 14
  UAE → 15
  UK → 16
  USA → 17

Receiver_bank_location encoding:
  Albania → 0
  Austria → 1
  France → 2
  Germany → 3
  India → 4
  Italy → 5
  Japan → 6
  Mexico → 7
  Morocco → 8
  Netherlands → 9
  Nigeria → 10
  Pakistan → 11
  Spain → 12
  Switzerland 

In [59]:
# Normalize features
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features_df)
train_features_df_scaled = pd.DataFrame(train_features_scaled, columns=train_features_df.columns, index=train_features_df.index)

print("Features normalized!")
print(f"Scaled features shape: {train_features_df_scaled.shape}")

Features normalized!
Scaled features shape: (735533, 14)


In [60]:
# Rescale test features
test_features_df_scaled = scaler.transform(test_features_df)
test_features_df_scaled = pd.DataFrame(test_features_df_scaled, columns=test_features_df.columns, index=test_features_df.index)

print("Features normalized!")
print(f"Scaled features shape: {test_features_df_scaled.shape}")

Features normalized!
Scaled features shape: (393119, 14)


In [68]:
test_graph_data, test_account_to_idx = build_transaction_graph(test_transaction_df, test_features_df_scaled, test_labels_series)

print("\n" + "="*70)
print("GRAPH STATISTICS")
print("="*70)
print(f"Number of nodes (accounts): {test_graph_data.num_nodes:,}")
print(f"Number of edges (transactions): {test_graph_data.num_edges:,}")
print(f"Number of features per node: {test_graph_data.num_node_features}")
print(f"Average degree: {test_graph_data.num_edges / test_graph_data.num_nodes:.2f}")

Building transaction network graph...

GRAPH STATISTICS
Number of nodes (accounts): 393,119
Number of edges (transactions): 2,200,629
Number of features per node: 14
Average degree: 5.60


In [69]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [74]:
# 1. Initialize the model architecture
model = GraphSAGE_AccountRiskDetector(
    num_features=test_graph_data.num_node_features,
    hidden_dim=256,  # Must match your training config
    num_layers=3,    # Must match your training config / Number of neighbourhood
    dropout=0.3,
    aggregator='mean'
).to(device)

# 2. Load the saved parameters
model.load_state_dict(torch.load('best_graphsage_model.pt'))

  model.load_state_dict(torch.load('best_graphsage_model.pt'))


<All keys matched successfully>

In [75]:
# 3. Set to evaluation mode
model.eval()

GraphSAGE_AccountRiskDetector(
  (convs): ModuleList(
    (0): SAGEConv(14, 256, aggr=mean)
    (1-2): 2 x SAGEConv(256, 256, aggr=mean)
  )
  (batch_norms): ModuleList(
    (0-2): 3 x BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=64, out_features=2, bias=True)
  )
)

In [77]:
# 4. Then create your test loader and evaluate
test_loader = NeighborLoader(
    test_graph_data,
    num_neighbors=[15, 10, 5],
    batch_size=1024,
    input_nodes=torch.ones(test_graph_data.num_nodes, dtype=torch.bool),
    shuffle=False,
    num_workers=0
)

In [78]:
# 5. Use your evaluation function
test_acc, test_pred, test_probs, test_labels = evaluate_graphsage(model, test_loader, device)

In [81]:
print(classification_report(test_labels, test_pred, 
                           target_names=['Normal Account', 'High-Risk Account'],
                           digits=4))

                   precision    recall  f1-score   support

   Normal Account     0.9949    0.9769    0.9858    391041
High-Risk Account     0.0125    0.0549    0.0203      2078

         accuracy                         0.9720    393119
        macro avg     0.5037    0.5159    0.5031    393119
     weighted avg     0.9897    0.9720    0.9807    393119



In [None]:
def full_inductive_evaluation_pipeline(df, features_df_scaled, labels_series, model, 
                                     feature_extraction_function, scaler, device,
                                     cutoff_date=None, gap_days=7):
    """
    Complete pipeline for evaluating GraphSAGE on new nodes with new transactions
    """
    print("="*70)
    print("INDUCTIVE EVALUATION: NEW NODES WITH NEW TRANSACTIONS")
    print("="*70)
    
    # Set cutoff date if not provided (use 80% of data for training)
    if cutoff_date is None:
        cutoff_date = df['DateTime'].quantile(0.8)
    
    # 1. Create inductive dataset split
    dataset_split = create_inductive_dataset(df, cutoff_date, gap_days)
    
    if len(dataset_split['new_accounts']) == 0:
        print("No new accounts found for evaluation")
        return None
    
    # 2. Extract features for new accounts using only test period data
    new_features_df, new_labels_series = extract_features_for_new_accounts(
        dataset_split['test_data'], 
        dataset_split['new_accounts'],
        feature_extraction_function
    )
    
    if len(new_features_df) == 0:
        print("No valid new accounts with sufficient transactions")
        return None
    
    # Add labels to features dataframe for convenience
    new_features_df['high_risk'] = new_labels_series
    
    # 3. Build test period graph
    test_graph, account_to_idx, new_account_indices = build_test_period_graph(
        dataset_split['test_data'],
        dataset_split['new_accounts'],
        dataset_split['existing_accounts_in_test'],
        new_features_df,
        features_df_scaled,
        scaler
    )
    
    # 4. Evaluate model on new nodes
    results = evaluate_model_on_new_nodes(
        model, test_graph, new_account_indices, device
    )
    
    # 5. Print results
    print("\n" + "="*50)
    print("INDUCTIVE EVALUATION RESULTS")
    print("="*50)
    
    print(f"New accounts evaluated: {len(new_account_indices)}")
    print(f"Accuracy: {results['accuracy']:.4f}")
    
    if results['auc'] is not None:
        print(f"AUC: {results['auc']:.4f}")
        
        print("\nClassification Report:")
        print(classification_report(
            results['true_labels'], 
            results['predictions'],
            target_names=['Normal', 'High-Risk'],
            digits=4
        ))
    else:
        print("AUC: Not available (single class)")
    
    # 6. Performance comparison
    suspicious_ratio = results['true_labels'].mean()
    print(f"\nDataset characteristics:")
    print(f"Suspicious account ratio: {suspicious_ratio:.2%}")
    
    return {
        **results,
        'dataset_split': dataset_split,
        'new_features_df': new_features_df,
        'test_graph': test_graph,
        'account_mapping': account_to_idx
    }

In [None]:
# Example usage with your existing code
def run_inductive_test(df, features_df_scaled, labels_series, model, device):
    """
    Run inductive test using your existing feature extraction function
    """
    
    # Use your existing feature extraction function
    def extract_account_features_wrapper(account, sent_txns, recv_txns):
        # This should be your existing extract_account_features function
        # but make sure it only uses data from sent_txns and recv_txns
        # (which will be from test period only)
        
        # Import your feature extraction function here
        from your_module import extract_account_features  # Adjust import
        return extract_account_features(account, sent_txns, recv_txns)
    
    # Get the scaler used for training
    # You should save this during training
    scaler = StandardScaler()
    scaler.fit(features_df_scaled)  # Refit on training data
    
    # Run evaluation
    results = full_inductive_evaluation_pipeline(
        df=df,
        features_df_scaled=features_df_scaled,
        labels_series=labels_series,
        model=model,
        feature_extraction_function=extract_account_features_wrapper,
        scaler=scaler,
        device=device,
        cutoff_date=None,  # Will use 80% split
        gap_days=7
    )
    
    return results

# Usage:
# results = run_inductive_test(df, features_df_scaled, labels_series, model, device)