In [1]:
import pandas as pd
import numpy as np

# Load mini dataset
tx_df = pd.read_parquet("mini_transactions.parquet")

# Basic hygiene
# Keep only columns we use
use_cols = [
    "address","timeStamp","from","to","value","gas","gasPrice","gasUsed",
    "functionName","isError","txreceipt_status","label","source_file"
]
tx_df = tx_df[[c for c in use_cols if c in tx_df.columns]].copy()

# Standardize address case
tx_df["address"] = tx_df["address"].str.lower()
tx_df["from"]    = tx_df["from"].str.lower()
tx_df["to"]      = tx_df["to"].str.lower()

# Timestamps
# Real-CATS often stores UNIX seconds as strings—handle both int & str safely
tx_df["timeStamp"] = pd.to_numeric(tx_df["timeStamp"], errors="coerce")
tx_df = tx_df.dropna(subset=["timeStamp"])
tx_df["timeStamp"] = pd.to_datetime(tx_df["timeStamp"], unit="s")

# Numerics (Ethereum units are often strings in wei)
for num_col in ["value","gas","gasPrice","gasUsed","isError","txreceipt_status"]:
    if num_col in tx_df.columns:
        tx_df[num_col] = pd.to_numeric(tx_df[num_col], errors="coerce")

# Keep only rows with a functionName (you asked to filter these earlier)
if "functionName" in tx_df.columns:
    tx_df["functionName"] = tx_df["functionName"].astype(str).str.strip()
    tx_df = tx_df[tx_df["functionName"] != ""]
    tx_df = tx_df.dropna(subset=["functionName"])

# Sort by wallet and time
tx_df = tx_df.sort_values(["address","timeStamp"]).reset_index(drop=True)

# Sanity checks
print("Transactions:", len(tx_df))
print("Wallets:", tx_df["address"].nunique())
print(tx_df.head(3))


Transactions: 205705
Wallets: 387
                                      address           timeStamp  \
0  0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f 2023-02-22 23:59:59   
1  0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f 2023-02-23 00:05:47   
2  0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f 2023-02-23 03:09:35   

                                         from  \
0  0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f   
1  0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f   
2  0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f   

                                           to  value     gas     gasPrice  \
0  0xbe9895146f7af43049ca1c1ae358b0541ea49704    0.0  364852  35343810889   
1  0xbe9895146f7af43049ca1c1ae358b0541ea49704    0.0  149122  35343810889   
2  0x5283d291dbcf85356a21ba090e6db59121208b44    0.0  299884  37347507096   

   gasUsed                                       functionName  isError  \
0    90306  permit(address owner, address spender, uint256...      0.0   
1    69093  transferFrom(address _f

In [2]:
# Direction: whether wallet is sender in this row
tx_df["dir_out"] = (tx_df["from"] == tx_df["address"]).astype(int)

# Ether value in wei -> ETH -> log1p
tx_df["value_eth"] = tx_df["value"] / 1e18
tx_df["value_eth_log1p"] = np.log1p(tx_df["value_eth"].clip(lower=0))

# Gas-like features
for col in ["gas","gasPrice","gasUsed"]:
    if col in tx_df.columns:
        tx_df[f"{col}_log1p"] = np.log1p(tx_df[col].fillna(0).clip(lower=0))
    else:
        tx_df[f"{col}_log1p"] = 0.0

# Error flags
tx_df["err_flag"] = tx_df["isError"].fillna(0).astype(int).clip(0,1)

# Time gap per wallet
tx_df["prev_time"] = tx_df.groupby("address")["timeStamp"].shift(1)
tx_df["tx_gap_s"]  = (tx_df["timeStamp"] - tx_df["prev_time"]).dt.total_seconds()
tx_df["tx_gap_s"]  = tx_df["tx_gap_s"].fillna(tx_df["tx_gap_s"].median())  # fill first gap
tx_df["tx_gap_s_log1p"] = np.log1p(tx_df["tx_gap_s"].clip(lower=0))

# ERC20 vs normal (depends on how you named it)
tx_df["is_erc20"] = tx_df["source_file"].astype(str).str.contains("erc20", case=False, na=False).astype(int)

# FunctionName one-hot (top N)
TOP_N_FUNCS = 20
top_funcs = tx_df["functionName"].value_counts().nlargest(TOP_N_FUNCS).index.tolist()
tx_df["functionName_squashed"] = np.where(tx_df["functionName"].isin(top_funcs), tx_df["functionName"], "OTHER")

# Build one-hot columns
func_dummies = pd.get_dummies(tx_df["functionName_squashed"], prefix="fn")
tx_df = pd.concat([tx_df, func_dummies], axis=1)

# Define final per-transaction feature columns
tx_features = [
    "dir_out",
    "value_eth_log1p","gas_log1p","gasPrice_log1p","gasUsed_log1p",
    "err_flag","tx_gap_s_log1p","is_erc20"
] + sorted([c for c in tx_df.columns if c.startswith("fn_")])

print("Per-tx feature count:", len(tx_features))
tx_df[tx_features + ["address","timeStamp","label"]].head(3)


Per-tx feature count: 29


Unnamed: 0,dir_out,value_eth_log1p,gas_log1p,gasPrice_log1p,gasUsed_log1p,err_flag,tx_gap_s_log1p,is_erc20,fn_0xfb0f3ee1(),fn_None,...,"fn_swapExactETHForTokens(uint256 amountOutMin, address[] path, address to, uint256 deadline)","fn_swapExactETHForTokensSupportingFeeOnTransferTokens(uint256 amountOutMin, address[] path, address to, uint256 deadline)","fn_swapExactTokensForETHSupportingFeeOnTransferTokens(uint256 amountIn, uint256 amountOutMin, address[] path, address to, uint256 deadline)","fn_transfer(address _to, uint256 _value)","fn_transfer(address token, address from, address to, uint256 share)","fn_transferFrom(address _from, address _to, uint256 _value)",fn_withdraw(uint256 amount),address,timeStamp,label
0,1,0.0,12.80725,24.288389,11.41097,0,4.430817,0,False,False,...,False,False,False,False,False,False,False,0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f,2023-02-22 23:59:59,1
1,1,0.0,11.912527,24.288389,11.143223,0,5.855072,0,False,False,...,False,False,False,False,False,True,False,0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f,2023-02-23 00:05:47,1
2,1,0.0,12.611154,24.343532,11.22487,0,9.308283,0,False,False,...,False,False,False,False,False,False,False,0x0000000dc3d9e17e3449e59bb75cb4005ee8aa7f,2023-02-23 03:09:35,1


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Build a list of wallets with labels
wallet_labels = tx_df[["address","label"]].drop_duplicates().reset_index(drop=True)

# Train/test split by wallet (to avoid leakage)
train_wallets, test_wallets = train_test_split(
    wallet_labels, test_size=0.2, stratify=wallet_labels["label"], random_state=42
)
train_set = set(train_wallets["address"])
test_set  = set(test_wallets["address"])

SEQ_LEN = 50

def wallet_to_matrix(df_wallet, features, seq_len=SEQ_LEN):
    """
    Given a single wallet's df (time-ordered), return a (seq_len, d) matrix
    using the most recent seq_len transactions, padding at the front if necessary.
    """
    X = df_wallet[features].to_numpy(dtype=np.float32)
    if len(X) >= seq_len:
        X = X[-seq_len:]  # last seq_len
    else:
        pad = np.zeros((seq_len - len(X), X.shape[1]), dtype=np.float32)
        X = np.vstack([pad, X])
    return X

# Build raw (unscaled) sequences for train & test
def build_sequences(frame, wallet_list):
    X_list, y_list = [], []
    for addr in wallet_list:
        w = frame[frame["address"] == addr].sort_values("timeStamp")
        if w.empty:
            continue
        mat = wallet_to_matrix(w, tx_features, SEQ_LEN)
        label = int(w["label"].iloc[0])
        X_list.append(mat)
        y_list.append(label)
    X = np.stack(X_list, axis=0)  # (N, T, D)
    y = np.array(y_list, dtype=np.int64)
    return X, y

train_df = tx_df[tx_df["address"].isin(train_set)]
test_df  = tx_df[tx_df["address"].isin(test_set)]

# Fit scaler on TRAIN transactions only (flattened)
scaler = StandardScaler(with_mean=True, with_std=True)
scaler.fit(train_df[tx_features].to_numpy(dtype=np.float32))

# Apply scaler
train_df_scaled = train_df.copy()
test_df_scaled  = test_df.copy()
train_df_scaled[tx_features] = scaler.transform(train_df[tx_features].to_numpy(dtype=np.float32))
test_df_scaled[tx_features]  = scaler.transform(test_df[tx_features].to_numpy(dtype=np.float32))

# Sequences
X_train, y_train = build_sequences(train_df_scaled, list(train_set))
X_test,  y_test  = build_sequences(test_df_scaled,  list(test_set))

X_train.shape, y_train.shape, X_test.shape, y_test.shape


((309, 50, 29), (309,), (78, 50, 29), (78,))

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class WalletSeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)  # (N,T,D)
        self.y = torch.from_numpy(y)  # (N,)
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = WalletSeqDataset(X_train, y_train)
test_ds  = WalletSeqDataset(X_test,  y_test)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False, drop_last=False)

# Simple LSTM classifier
class LSTMClassifier(nn.Module):
    def __init__(self, d_in, d_hidden=64, num_layers=1, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=d_in, hidden_size=d_hidden, num_layers=num_layers,
            batch_first=True, dropout=dropout if num_layers > 1 else 0.0
        )
        self.head = nn.Sequential(
            nn.Linear(d_hidden, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    def forward(self, x):              # x: (B, T, D)
        out, _ = self.lstm(x)          # out: (B, T, H)
        last = out[:, -1, :]           # last step
        logits = self.head(last)       # (B, 1)
        return logits.squeeze(-1)      # (B,)

d_in = X_train.shape[2]
model = LSTMClassifier(d_in=d_in, d_hidden=96, num_layers=2, dropout=0.2).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 10

def evaluate(model, loader):
    model.eval()
    all_logits, all_y = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            yb = yb.to(device).float()
            logits = model(xb)
            all_logits.append(logits.detach().cpu().numpy())
            all_y.append(yb.cpu().numpy())
    y_true  = np.concatenate(all_y)
    y_score = 1/(1+np.exp(-np.concatenate(all_logits)))  # sigmoid
    try:
        roc = roc_auc_score(y_true, y_score)
    except ValueError:
        roc = np.nan
    try:
        pr  = average_precision_score(y_true, y_score)
    except ValueError:
        pr = np.nan
    return roc, pr, y_true, y_score

best_pr = -1
for epoch in range(1, epochs+1):
    model.train()
    running = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False):
        xb = xb.to(device)
        yb = yb.to(device).float()
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running += loss.item() * xb.size(0)

    train_loss = running / len(train_ds)
    roc, pr, y_true, y_score = evaluate(model, test_loader)
    print(f"Epoch {epoch:02d} | train_loss={train_loss:.4f} | ROC-AUC={roc:.4f} | PR-AUC={pr:.4f}")
    if pr > best_pr:
        best_pr = pr
        torch.save(model.state_dict(), "best_lstm_wallet.pt")

print("Best PR-AUC:", best_pr)


                                                                                

Epoch 01 | train_loss=0.6933 | ROC-AUC=0.7296 | PR-AUC=0.7247


                                                                                

Epoch 02 | train_loss=0.6826 | ROC-AUC=0.7561 | PR-AUC=0.7643


                                                                                

Epoch 03 | train_loss=0.6626 | ROC-AUC=0.7508 | PR-AUC=0.7706


                                                                                

Epoch 04 | train_loss=0.6240 | ROC-AUC=0.7535 | PR-AUC=0.7685


                                                                                

Epoch 05 | train_loss=0.5986 | ROC-AUC=0.7668 | PR-AUC=0.7527


                                                                                

Epoch 06 | train_loss=0.5676 | ROC-AUC=0.7774 | PR-AUC=0.7509


                                                                                

Epoch 07 | train_loss=0.5281 | ROC-AUC=0.7854 | PR-AUC=0.7514


                                                                                

Epoch 08 | train_loss=0.4767 | ROC-AUC=0.7748 | PR-AUC=0.7360


                                                                                

Epoch 09 | train_loss=0.4189 | ROC-AUC=0.7774 | PR-AUC=0.7170


                                                                                

Epoch 10 | train_loss=0.4518 | ROC-AUC=0.7635 | PR-AUC=0.7071
Best PR-AUC: 0.7706373516646063




In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class WalletSeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)  # (N,T,D)
        self.y = torch.from_numpy(y)  # (N,)
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = WalletSeqDataset(X_train, y_train)
test_ds  = WalletSeqDataset(X_test,  y_test)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, drop_last=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False, drop_last=False)

# Simple LSTM classifier
class LSTMClassifier(nn.Module):
    def __init__(self, d_in, d_hidden=64, num_layers=1, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=d_in, hidden_size=d_hidden, num_layers=num_layers,
            batch_first=True, dropout=dropout if num_layers > 1 else 0.0
        )
        self.head = nn.Sequential(
            nn.Linear(d_hidden, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    def forward(self, x):              # x: (B, T, D)
        out, _ = self.lstm(x)          # out: (B, T, H)
        last = out[:, -1, :]           # last step
        logits = self.head(last)       # (B, 1)
        return logits.squeeze(-1)      # (B,)

d_in = X_train.shape[2]
model = LSTMClassifier(d_in=d_in, d_hidden=96, num_layers=2, dropout=0.2).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 10

def evaluate(model, loader):
    model.eval()
    all_logits, all_y = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            yb = yb.to(device).float()
            logits = model(xb)
            all_logits.append(logits.detach().cpu().numpy())
            all_y.append(yb.cpu().numpy())
    y_true  = np.concatenate(all_y)
    y_score = 1/(1+np.exp(-np.concatenate(all_logits)))  # sigmoid
    try:
        roc = roc_auc_score(y_true, y_score)
    except ValueError:
        roc = np.nan
    try:
        pr  = average_precision_score(y_true, y_score)
    except ValueError:
        pr = np.nan
    return roc, pr, y_true, y_score

best_pr = -1
for epoch in range(1, epochs+1):
    model.train()
    running = 0.0
    for xb, yb in tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}", leave=False):
        xb = xb.to(device)
        yb = yb.to(device).float()
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        running += loss.item() * xb.size(0)

    train_loss = running / len(train_ds)
    roc, pr, y_true, y_score = evaluate(model, test_loader)
    print(f"Epoch {epoch:02d} | train_loss={train_loss:.4f} | ROC-AUC={roc:.4f} | PR-AUC={pr:.4f}")
    if pr > best_pr:
        best_pr = pr
        torch.save(model.state_dict(), "best_lstm_wallet.pt")

print("Best PR-AUC:", best_pr)


                                                                                

Epoch 01 | train_loss=0.6933 | ROC-AUC=0.7296 | PR-AUC=0.7247


                                                                                

Epoch 02 | train_loss=0.6826 | ROC-AUC=0.7561 | PR-AUC=0.7643


                                                                                

Epoch 03 | train_loss=0.6626 | ROC-AUC=0.7508 | PR-AUC=0.7706


                                                                                

Epoch 04 | train_loss=0.6240 | ROC-AUC=0.7535 | PR-AUC=0.7685


                                                                                

Epoch 05 | train_loss=0.5986 | ROC-AUC=0.7668 | PR-AUC=0.7527


                                                                                

Epoch 06 | train_loss=0.5676 | ROC-AUC=0.7774 | PR-AUC=0.7509


                                                                                

Epoch 07 | train_loss=0.5281 | ROC-AUC=0.7854 | PR-AUC=0.7514


                                                                                

Epoch 08 | train_loss=0.4767 | ROC-AUC=0.7748 | PR-AUC=0.7360


                                                                                

Epoch 09 | train_loss=0.4189 | ROC-AUC=0.7774 | PR-AUC=0.7170


                                                                                

Epoch 10 | train_loss=0.4518 | ROC-AUC=0.7635 | PR-AUC=0.7071
Best PR-AUC: 0.7706373516646063




In [5]:
def predict_wallet_proba(wallet_df_raw):
    """
    wallet_df_raw: transactions for ONE wallet (original columns).
    Returns fraud probability using the trained LSTM.
    """
    # Build the same features and order as training
    w = wallet_df_raw.sort_values("timeStamp").copy()

    # Recompute derived columns in the SAME WAY
    w["dir_out"] = (w["from"].str.lower() == w["address"].str.lower()).astype(int)
    w["value_eth"] = pd.to_numeric(w["value"], errors="coerce") / 1e18
    w["value_eth_log1p"] = np.log1p(w["value_eth"].clip(lower=0))

    for col in ["gas","gasPrice","gasUsed"]:
        if col in w.columns:
            w[f"{col}_log1p"] = np.log1p(pd.to_numeric(w[col], errors="coerce").fillna(0).clip(lower=0))
        else:
            w[f"{col}_log1p"] = 0.0

    w["isError"] = pd.to_numeric(w.get("isError", 0), errors="coerce").fillna(0)
    w["err_flag"] = w["isError"].astype(int).clip(0,1)
    w["prev_time"] = w.groupby("address")["timeStamp"].shift(1)
    w["tx_gap_s"] = (w["timeStamp"] - w["prev_time"]).dt.total_seconds()
    w["tx_gap_s"] = w["tx_gap_s"].fillna(w["tx_gap_s"].median() if w["tx_gap_s"].notna().any() else 0)
    w["tx_gap_s_log1p"] = np.log1p(w["tx_gap_s"].clip(lower=0))
    w["is_erc20"] = w["source_file"].astype(str).str.contains("erc20", case=False, na=False).astype(int)
    w["functionName"] = w["functionName"].astype(str).str.strip()
    w["functionName_squashed"] = np.where(w["functionName"].isin(top_funcs), w["functionName"], "OTHER")

    # Make all fn_ columns present (align with train set)
    for fn in ["fn_" + f for f in (top_funcs + ["OTHER"])]:
        if fn not in w.columns:
            w[fn] = 0
    fn_cols = sorted([c for c in w.columns if c.startswith("fn_")])

    # Build matrix
    feats = [
        "dir_out","value_eth_log1p","gas_log1p","gasPrice_log1p","gasUsed_log1p",
        "err_flag","tx_gap_s_log1p","is_erc20"
    ] + fn_cols
    w_feats = w[feats].fillna(0).to_numpy(dtype=np.float32)

    # Scale using the fitted scaler
    w_feats = scaler.transform(w[feats].fillna(0).to_numpy(dtype=np.float32))

    # Pad/trim to SEQ_LEN
    if len(w_feats) >= SEQ_LEN:
        w_feats = w_feats[-SEQ_LEN:]
    else:
        pad = np.zeros((SEQ_LEN - len(w_feats), w_feats.shape[1]), dtype=np.float32)
        w_feats = np.vstack([pad, w_feats])

    # To tensor & predict
    model2 = LSTMClassifier(d_in=w_feats.shape[1], d_hidden=96, num_layers=2, dropout=0.2)
    model2.load_state_dict(torch.load("best_lstm_wallet.pt", map_location="cpu"))
    model2.eval()
    with torch.no_grad():
        logits = model2(torch.from_numpy(w_feats[None, ...]).float())
        prob = torch.sigmoid(logits).item()
    return prob

# Example:
# prob = predict_wallet_proba(tx_df[tx_df["address"] == "0x232e2fd91c88f8e2a06acdaa0ed199516ba31efa"])
# print("Fraud probability:", prob)


In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate_model_metrics(model, loader, device):
    model.eval()
    y_true, y_pred_prob, y_pred_bin = [], [], []
    
    with torch.no_grad():
        for X, y in loader:
            X = X.to(device)
            outputs = model(X).squeeze().cpu().numpy()
            
            preds_bin = (outputs > 0.5).astype(int)
            
            y_pred_prob.extend(outputs.tolist())
            y_pred_bin.extend(preds_bin.tolist())
            y_true.extend(y.numpy().tolist())
    
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred_bin),
        "Precision": precision_score(y_true, y_pred_bin),
        "Recall": recall_score(y_true, y_pred_bin),
        "F1": f1_score(y_true, y_pred_bin),
        "AUC": roc_auc_score(y_true, y_pred_prob)
    }
    return metrics

# Example usage
metrics = evaluate_model_metrics(model, test_loader, device)
print("Evaluation metrics on test set:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")


Evaluation metrics on test set:
Accuracy: 0.7051
Precision: 0.8000
Recall: 0.4571
F1: 0.5818
AUC: 0.7635


In [17]:
count = 50


def predict_wallet_scores(model, loader, device):
    model.eval()
    all_probs, all_true = [], []
    with torch.no_grad():
        for X, y in loader:
            X = X.to(device)
            logits = model(X).squeeze().cpu().numpy()
            probs  = 1 / (1 + np.exp(-logits))
            all_probs.extend(probs.tolist())
            all_true.extend(y.numpy().tolist())
    return np.array(all_true), np.array(all_probs)

y_true, y_forecast = predict_wallet_scores(model, test_loader, device)

# Example: show first 10 forecast probs
def predict_wallet_scores(model, loader, device):
    model.eval()
    all_probs, all_true = [], []
    with torch.no_grad():
        for X, y in loader:
            X = X.to(device)
            logits = model(X).squeeze().cpu().numpy()
            probs  = 1 / (1 + np.exp(-logits))
            all_probs.extend(probs.tolist())
            all_true.extend(y.numpy().tolist())
    return np.array(all_true), np.array(all_probs)

y_true, y_forecast = predict_wallet_scores(model, test_loader, device)

# Convert forecast probs -> predicted labels
threshold = 0.5
y_pred = (y_forecast >= threshold).astype(int)

# Show first 30 wallets with ground truth, predicted label, and probability
for i in range(count):
    print(f"Wallet {i:02d} | true={y_true[i]} | pred={y_pred[i]} | prob={y_forecast[i]:.4f}")

# Compute percentage correct for these 30
correct = (y_pred[:count] == y_true[:count]).sum()
accuracy_percent = 100.0 * correct / count
print(f"\nCorrect predictions (first 30): {correct}/count")
print(f"Accuracy (first 30): {accuracy_percent:.2f}%")


Wallet 00 | true=1 | pred=0 | prob=0.0594
Wallet 01 | true=0 | pred=0 | prob=0.1892
Wallet 02 | true=1 | pred=0 | prob=0.2477
Wallet 03 | true=0 | pred=0 | prob=0.1034
Wallet 04 | true=1 | pred=1 | prob=0.9522
Wallet 05 | true=0 | pred=0 | prob=0.3447
Wallet 06 | true=0 | pred=0 | prob=0.2987
Wallet 07 | true=0 | pred=0 | prob=0.2399
Wallet 08 | true=1 | pred=0 | prob=0.2904
Wallet 09 | true=0 | pred=1 | prob=0.6039
Wallet 10 | true=1 | pred=0 | prob=0.1062
Wallet 11 | true=1 | pred=0 | prob=0.1000
Wallet 12 | true=1 | pred=0 | prob=0.2075
Wallet 13 | true=0 | pred=0 | prob=0.4096
Wallet 14 | true=0 | pred=0 | prob=0.3050
Wallet 15 | true=1 | pred=1 | prob=0.9821
Wallet 16 | true=1 | pred=1 | prob=0.9219
Wallet 17 | true=0 | pred=0 | prob=0.1130
Wallet 18 | true=0 | pred=1 | prob=0.9644
Wallet 19 | true=1 | pred=1 | prob=0.7984
Wallet 20 | true=1 | pred=0 | prob=0.2963
Wallet 21 | true=0 | pred=0 | prob=0.2648
Wallet 22 | true=0 | pred=0 | prob=0.0632
Wallet 23 | true=0 | pred=0 | prob

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example: your y_true and y_forecast arrays
# y_true = np.array([...])         # ground truth labels
# y_forecast = np.array([...])     # model probabilities

threshold = 0.5
y_pred = (y_forecast >= threshold).astype(int)


In [9]:
accuracy  = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall    = recall_score(y_true, y_pred)
f1        = f1_score(y_true, y_pred)

print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")


Accuracy : 0.6923
Precision: 0.7391
Recall   : 0.4857
F1 Score : 0.5862


In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Get true labels and forecast probabilities
y_true, y_forecast = predict_wallet_scores(model, test_loader, device)

# Convert probabilities to binary predictions at threshold = 0.5
threshold = 0.5
y_pred = (y_forecast >= threshold).astype(int)

# Compute metrics
accuracy  = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall    = recall_score(y_true, y_pred)
f1        = f1_score(y_true, y_pred)
auc       = roc_auc_score(y_true, y_forecast)

print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print(f"AUC      : {auc:.4f}")


Accuracy : 0.6923
Precision: 0.7391
Recall   : 0.4857
F1 Score : 0.5862
AUC      : 0.7635


In [19]:
# Count transactions per wallet
tx_counts = tx_df.groupby("address").size()

# Average transactions per wallet
avg_tx = tx_counts.mean()

print(f"Average number of transactions per wallet: {avg_tx:.2f}")


Average number of transactions per wallet: 531.54
