In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df_wallets_features = pd.read_csv("DL_Elliptic_Dataset/wallets_features.csv")
print("\nActors features: \n",df_wallets_features.head())

df_wallets_classes = pd.read_csv("DL_Elliptic_Dataset/wallets_classes.csv")
print("\nActors Classes: \n",df_wallets_classes.head())

df_AddrAddr_edgelist = pd.read_csv("DL_Elliptic_Dataset/AddrAddr_edgelist.csv")
print("\nAddress-Address edgelist: \n",df_AddrAddr_edgelist.head())

df_AddrTx_edgelist = pd.read_csv("DL_Elliptic_Dataset/AddrTx_edgelist.csv")
print("\nAddress-Transaction edgelist: \n",df_AddrTx_edgelist.head())

df_TxAddr_edgelist = pd.read_csv("DL_Elliptic_Dataset/TxAddr_edgelist.csv")
print("\nTransaction-Address edgelist: \n",df_TxAddr_edgelist.head())

print("\n")
df_classes = pd.read_csv("DL_Elliptic_Dataset/txs_classes.csv")
df_edges = pd.read_csv("DL_Elliptic_Dataset/txs_edgelist.csv")
df_features = pd.read_csv("DL_Elliptic_Dataset/txs_features.csv", header=None)
df_wallets_features_classes_combined = pd.read_csv("DL_Elliptic_Dataset/wallets_features_classes_combined.csv")


Actors features: 
                              address  Time step  num_txs_as_sender  \
0  111112TykSw72ztDN2WJger4cynzWYC5w         25                0.0   
1  1111DAYXhoxZx2tsRnzimfozo783x1yC2         25                0.0   
2  1111DAYXhoxZx2tsRnzimfozo783x1yC2         29                0.0   
3  1111DAYXhoxZx2tsRnzimfozo783x1yC2         39                0.0   
4  1111DAYXhoxZx2tsRnzimfozo783x1yC2         39                0.0   

   num_txs_as receiver  first_block_appeared_in  last_block_appeared_in  \
0                  1.0                 439586.0                439586.0   
1                  8.0                 439589.0                485959.0   
2                  8.0                 439589.0                485959.0   
3                  8.0                 439589.0                485959.0   
4                  8.0                 439589.0                485959.0   

   lifetime_in_blocks  total_txs  first_sent_block  first_received_block  ...  \
0                 0.0      

  df_features = pd.read_csv("DL_Elliptic_Dataset/txs_features.csv", header=None)


In [13]:
from torch_geometric.nn import GINConv
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
import networkx as nx
from torch_geometric.data import Data
from sklearn.model_selection import train_test_split

# Merge class labels with features
df_wallets_classes.loc[df_wallets_classes['class'] == 'unknown', 'class'] = 3
df_class_feature = pd.merge(df_wallets_classes, df_wallets_features, on='address')

# Prepare data for Graph Construction (using wallet interactions from Address-Address edgelist)
# We'll only select wallets from the class that are not 'unknown'
selected_wallets = df_class_feature.loc[(df_class_feature['class'] != 3), 'address']
df_AddrAddr_edgelist_selected = df_AddrAddr_edgelist.loc[
    df_AddrAddr_edgelist['input_address'].isin(selected_wallets) &
    df_AddrAddr_edgelist['output_address'].isin(selected_wallets)
]

# Select wallet classes and features
df_wallets_classes_selected = df_wallets_classes.loc[df_wallets_classes['address'].isin(selected_wallets)]
df_wallets_features_selected = df_wallets_features.loc[df_wallets_features['address'].isin(selected_wallets)]
# Merge class and features for selected wallets
df_class_feature_selected = pd.merge(df_wallets_classes_selected, df_wallets_features_selected, on='address')

# Ensure class labels are correctly mapped (0 for licit, 1 for illicit)
df_class_feature_selected['class'] = df_class_feature_selected['class'].apply(lambda x: 0 if x == 2 else 1)

# Prepare feature matrix X and labels y as before
scaler = StandardScaler()
X = df_class_feature_selected.drop(columns=['address', 'class', 'Time step']).values
X = scaler.fit_transform(X)  # Normalize the feature columns

y = df_class_feature_selected['class'].values

# --- Apply SMOTE to handle class imbalance ---
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# --- Compute class weights for loss function ---
class_weights = compute_class_weight('balanced', classes=np.unique(y_resampled), y=y_resampled)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

# --- Convert to PyTorch tensors ---
x = torch.tensor(X_resampled, dtype=torch.float)
y = torch.tensor(y_resampled, dtype=torch.long)

# --- Graph construction and data preparation ---
graph = nx.from_pandas_edgelist(df_AddrAddr_edgelist_selected, source='input_address', target='output_address', create_using=nx.DiGraph())

# Create a mapping from address to node index
address_to_index = {address: idx for idx, address in enumerate(df_class_feature_selected['address'].unique())}

# Map the addresses in the edges to their corresponding indices
edges_as_indices = [(address_to_index[row[0]], address_to_index[row[1]]) for row in graph.edges]

# Convert edge indices to PyTorch tensor
edge_index = torch.tensor(np.array(edges_as_indices).T, dtype=torch.long)

# Prepare data for PyTorch Geometric
data = Data(x=x, edge_index=edge_index, y=y)

In [4]:
data

Data(x=[677742, 55], edge_index=[2, 1092323], y=[677742])

In [5]:

# --- Split data into train and test ---
train_mask, test_mask = train_test_split(range(data.num_nodes), test_size=0.25, random_state=15)
data.train_mask = torch.tensor(train_mask, dtype=torch.long)
data.test_mask = torch.tensor(test_mask, dtype=torch.long)

# --- Define the GIN model ---
class GIN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GIN, self).__init__()
        self.conv1 = GINConv(nn=torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, hidden_dim)
        ))
        self.conv2 = GINConv(nn=torch.nn.Sequential(
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, output_dim)
        ))

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# --- Initialize model, optimizer, and loss function ---
input_dim = X_resampled.shape[1]
hidden_dim = 16
output_dim = 2  # Binary classification: 0 for licit, 1 for illicit

model = GIN(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# --- Training function ---
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask], weight=class_weights_tensor)
    loss.backward()
    optimizer.step()
    return loss.item()

# --- Test function ---
def test():
    model.eval()
    _, pred = model(data).max(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / len(data.test_mask)
    return test_acc, pred

In [6]:
# --- Training loop ---
losses = []
accuracies = []
for epoch in range(1, 101):
    loss = train()
    losses.append(loss)
    if epoch % 10 == 0:
        acc, _ = test()
        accuracies.append(acc)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')

Epoch: 010, Loss: 0.6458, Test Acc: 0.7196
Epoch: 020, Loss: 0.6081, Test Acc: 0.7511
Epoch: 030, Loss: 0.5628, Test Acc: 0.7886
Epoch: 040, Loss: 0.5141, Test Acc: 0.8067
Epoch: 050, Loss: 0.4701, Test Acc: 0.7485
Epoch: 060, Loss: 0.4409, Test Acc: 0.8085
Epoch: 070, Loss: 0.4176, Test Acc: 0.7378
Epoch: 080, Loss: 0.3983, Test Acc: 0.8423
Epoch: 090, Loss: 0.3860, Test Acc: 0.8500
Epoch: 100, Loss: 0.3746, Test Acc: 0.8564


In [7]:

# --- Evaluate metrics ---
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

def evaluate_metrics():
    model.eval()
    _, pred = model(data).max(dim=1)
    pred = pred[data.test_mask].detach().numpy()
    true_labels = data.y[data.test_mask].detach().numpy()

    accuracy = accuracy_score(true_labels, pred)
    precision = precision_score(true_labels, pred)
    recall = recall_score(true_labels, pred)
    f1 = f1_score(true_labels, pred)
    auc = roc_auc_score(true_labels, pred)
    cm = confusion_matrix(true_labels, pred)

    return accuracy, precision, recall, f1, auc, cm

accuracy, precision, recall, f1, auc, cm = evaluate_metrics()

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC Score: {auc:.4f}")
print(f"Confusion Matrix:\n{cm}")


Accuracy: 0.8564
Precision: 0.8975
Recall: 0.8038
F1 Score: 0.8480
AUC Score: 0.8563
Confusion Matrix:
[[77217  7755]
 [16574 67890]]


In [10]:
from itertools import product
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Define hyperparameter grid
param_grid = {
    'lr': [0.001, 0.005, 0.01],
    'hidden_dim': [16, 32, 64],
    'dropout': [0.2, 0.4, 0.5],
    'weight_decay': [1e-4, 5e-4, 1e-3]
}

# Create all combinations of hyperparameters
param_combinations = list(product(param_grid['lr'], param_grid['hidden_dim'], param_grid['dropout'], param_grid['weight_decay']))

# Define the GIN model with dynamic parameters
class GIN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout):
        super(GIN, self).__init__()
        self.conv1 = GINConv(nn=torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, hidden_dim)
        ))
        self.conv2 = GINConv(nn=torch.nn.Sequential(
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, output_dim)
        ))
        self.dropout = dropout

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Function to train and evaluate a single model configuration
def train_and_evaluate(lr, hidden_dim, dropout, weight_decay):
    # Initialize model, optimizer, and loss function
    model = GIN(input_dim=X_resampled.shape[1], hidden_dim=hidden_dim, output_dim=2, dropout=dropout)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)

    # Training loop
    for epoch in range(1, 21):  # Fewer epochs for tuning
        model.train()
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask], weight=class_weights_tensor)
        loss.backward()
        optimizer.step()

    # Evaluate the model
    model.eval()
    _, pred = model(data).max(dim=1)
    pred = pred[data.test_mask].detach().numpy()
    true_labels = data.y[data.test_mask].detach().numpy()

    # Calculate metrics
    accuracy = accuracy_score(true_labels, pred)
    precision = precision_score(true_labels, pred)
    recall = recall_score(true_labels, pred)
    f1 = f1_score(true_labels, pred)
    auc = roc_auc_score(true_labels, pred)

    return accuracy, precision, recall, f1, auc

In [11]:

# Iterate over all parameter combinations
best_params = None
best_f1 = 0
results = []

print(f"Starting hyperparameter tuning for {len(param_combinations)} combinations...\n")

for idx, (lr, hidden_dim, dropout, weight_decay) in enumerate(param_combinations):
    print(f"Running combination {idx + 1}/{len(param_combinations)}: LR={lr}, Hidden Dim={hidden_dim}, Dropout={dropout}, Weight Decay={weight_decay}")
    
    accuracy, precision, recall, f1, auc = train_and_evaluate(lr, hidden_dim, dropout, weight_decay)
    results.append((lr, hidden_dim, dropout, weight_decay, accuracy, precision, recall, f1, auc))
    
    print(f"--> Results: Accuracy={accuracy:.4f}, Precision={precision:.4f}, Recall={recall:.4f}, F1 Score={f1:.4f}, AUC={auc:.4f}\n")
    
    if f1 > best_f1:
        best_f1 = f1
        best_params = (lr, hidden_dim, dropout, weight_decay)

print(f"Best Parameters: LR={best_params[0]}, Hidden Dim={best_params[1]}, Dropout={best_params[2]}, Weight Decay={best_params[3]}")
print(f"Best F1 Score: {best_f1:.4f}")

# Display all results
results_df = pd.DataFrame(results, columns=['LR', 'Hidden Dim', 'Dropout', 'Weight Decay', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'AUC'])
results_df.sort_values(by='F1 Score', ascending=False, inplace=True)
print(results_df)


Starting hyperparameter tuning for 81 combinations...

Running combination 1/81: LR=0.001, Hidden Dim=16, Dropout=0.2, Weight Decay=0.0001
--> Results: Accuracy=0.5007, Precision=0.2183, Recall=0.0006, F1 Score=0.0012, AUC=0.4992

Running combination 2/81: LR=0.001, Hidden Dim=16, Dropout=0.2, Weight Decay=0.0005
--> Results: Accuracy=0.5179, Precision=0.5084, Recall=0.9961, F1 Score=0.6732, AUC=0.5193

Running combination 3/81: LR=0.001, Hidden Dim=16, Dropout=0.2, Weight Decay=0.001
--> Results: Accuracy=0.6706, Precision=0.6095, Recall=0.9444, F1 Score=0.7408, AUC=0.6714

Running combination 4/81: LR=0.001, Hidden Dim=16, Dropout=0.4, Weight Decay=0.0001
--> Results: Accuracy=0.6565, Precision=0.5986, Recall=0.9437, F1 Score=0.7325, AUC=0.6573

Running combination 5/81: LR=0.001, Hidden Dim=16, Dropout=0.4, Weight Decay=0.0005
--> Results: Accuracy=0.6281, Precision=0.5749, Recall=0.9746, F1 Score=0.7232, AUC=0.6292

Running combination 6/81: LR=0.001, Hidden Dim=16, Dropout=0.4, We

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


--> Results: Accuracy=0.5015, Precision=0.0000, Recall=0.0000, F1 Score=0.0000, AUC=0.5000

Running combination 8/81: LR=0.001, Hidden Dim=16, Dropout=0.5, Weight Decay=0.0005
--> Results: Accuracy=0.4721, Precision=0.0971, Recall=0.0071, F1 Score=0.0132, AUC=0.4707

Running combination 9/81: LR=0.001, Hidden Dim=16, Dropout=0.5, Weight Decay=0.001
--> Results: Accuracy=0.5015, Precision=0.0000, Recall=0.0000, F1 Score=0.0000, AUC=0.5000

Running combination 10/81: LR=0.001, Hidden Dim=32, Dropout=0.2, Weight Decay=0.0001
--> Results: Accuracy=0.4998, Precision=0.1593, Recall=0.0008, F1 Score=0.0016, AUC=0.4983

Running combination 11/81: LR=0.001, Hidden Dim=32, Dropout=0.2, Weight Decay=0.0005
--> Results: Accuracy=0.7136, Precision=0.6485, Recall=0.9292, F1 Score=0.7639, AUC=0.7143

Running combination 12/81: LR=0.001, Hidden Dim=32, Dropout=0.2, Weight Decay=0.001
--> Results: Accuracy=0.7626, Precision=0.7127, Recall=0.8775, F1 Score=0.7866, AUC=0.7630

Running combination 13/81: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


--> Results: Accuracy=0.5015, Precision=0.0000, Recall=0.0000, F1 Score=0.0000, AUC=0.5000

Running combination 15/81: LR=0.001, Hidden Dim=32, Dropout=0.4, Weight Decay=0.001


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


--> Results: Accuracy=0.5015, Precision=0.0000, Recall=0.0000, F1 Score=0.0000, AUC=0.5000

Running combination 16/81: LR=0.001, Hidden Dim=32, Dropout=0.5, Weight Decay=0.0001
--> Results: Accuracy=0.7600, Precision=0.7319, Recall=0.8184, F1 Score=0.7727, AUC=0.7602

Running combination 17/81: LR=0.001, Hidden Dim=32, Dropout=0.5, Weight Decay=0.0005


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


--> Results: Accuracy=0.5015, Precision=0.0000, Recall=0.0000, F1 Score=0.0000, AUC=0.5000

Running combination 18/81: LR=0.001, Hidden Dim=32, Dropout=0.5, Weight Decay=0.001
--> Results: Accuracy=0.7372, Precision=0.9186, Recall=0.5187, F1 Score=0.6630, AUC=0.7365

Running combination 19/81: LR=0.001, Hidden Dim=64, Dropout=0.2, Weight Decay=0.0001
--> Results: Accuracy=0.8081, Precision=0.9074, Recall=0.6849, F1 Score=0.7806, AUC=0.8077

Running combination 20/81: LR=0.001, Hidden Dim=64, Dropout=0.2, Weight Decay=0.0005
--> Results: Accuracy=0.7066, Precision=0.9277, Recall=0.4462, F1 Score=0.6026, AUC=0.7058

Running combination 21/81: LR=0.001, Hidden Dim=64, Dropout=0.2, Weight Decay=0.001
--> Results: Accuracy=0.7607, Precision=0.6994, Recall=0.9120, F1 Score=0.7916, AUC=0.7612

Running combination 22/81: LR=0.001, Hidden Dim=64, Dropout=0.4, Weight Decay=0.0001
--> Results: Accuracy=0.7789, Precision=0.7510, Recall=0.8326, F1 Score=0.7897, AUC=0.7791

Running combination 23/81

Best Parameters: LR=0.01, Hidden Dim=16, Dropout=0.2, Weight Decay=0.0005

In [14]:

# --- Split data into train and test ---
train_mask, test_mask = train_test_split(range(data.num_nodes), test_size=0.25, random_state=15)
data.train_mask = torch.tensor(train_mask, dtype=torch.long)
data.test_mask = torch.tensor(test_mask, dtype=torch.long)


# --- Define the GIN model ---
class GIN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GIN, self).__init__()
        self.conv1 = GINConv(nn=torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, hidden_dim)
        ))
        self.conv2 = GINConv(nn=torch.nn.Sequential(
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, output_dim)
        ))

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


# --- Initialize model, optimizer, and loss function ---
input_dim = X_resampled.shape[1]
hidden_dim = 16
output_dim = 2  # Binary classification: 0 for licit, 1 for illicit

model = GIN(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005)


# --- Training function ---
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask], weight=class_weights_tensor)
    loss.backward()
    optimizer.step()
    return loss.item()


# --- Test function ---
def test():
    model.eval()
    _, pred = model(data).max(dim=1)
    test_correct = pred[data.test_mask] == data.y[data.test_mask]
    test_acc = int(test_correct.sum()) / len(data.test_mask)
    return test_acc, pred

In [15]:


# --- Training loop ---
losses = []
accuracies = []
for epoch in range(1, 101):
    loss = train()
    losses.append(loss)
    if epoch % 10 == 0:
        acc, _ = test()
        accuracies.append(acc)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Test Acc: {acc:.4f}')

Epoch: 010, Loss: 0.6115, Test Acc: 0.7501
Epoch: 020, Loss: 0.5495, Test Acc: 0.7925
Epoch: 030, Loss: 0.4991, Test Acc: 0.8198
Epoch: 040, Loss: 0.4601, Test Acc: 0.8144
Epoch: 050, Loss: 0.4296, Test Acc: 0.8182
Epoch: 060, Loss: 0.4117, Test Acc: 0.8214
Epoch: 070, Loss: 0.3951, Test Acc: 0.8288
Epoch: 080, Loss: 0.3832, Test Acc: 0.8464
Epoch: 090, Loss: 0.3680, Test Acc: 0.8609
Epoch: 100, Loss: 0.3558, Test Acc: 0.8693


In [16]:

# --- Evaluate metrics ---
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix


def evaluate_metrics():
    model.eval()
    _, pred = model(data).max(dim=1)
    pred = pred[data.test_mask].detach().numpy()
    true_labels = data.y[data.test_mask].detach().numpy()

    accuracy = accuracy_score(true_labels, pred)
    precision = precision_score(true_labels, pred)
    recall = recall_score(true_labels, pred)
    f1 = f1_score(true_labels, pred)
    auc = roc_auc_score(true_labels, pred)
    cm = confusion_matrix(true_labels, pred)

    return accuracy, precision, recall, f1, auc, cm


accuracy, precision, recall, f1, auc, cm = evaluate_metrics()

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC Score: {auc:.4f}")
print(f"Confusion Matrix:\n{cm}")

Accuracy: 0.8693
Precision: 0.8845
Recall: 0.8486
F1 Score: 0.8662
AUC Score: 0.8692
Confusion Matrix:
[[75611  9361]
 [12784 71680]]
