# Load File

In [None]:
import numpy as np
import sklearn as sk
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc, roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
import seaborn as sns
from sklearn import metrics


In [2]:
df_path = "C:/Users/Enduser/OneDrive - Asia Pacific University/uni/Y3S2/fyp/Model_trial/btc_trial_dataset2.csv"
dataset_df = pd.read_csv(df_path)

# Feature Selection: Dropping Irrelevant or Redundant Columns


In [3]:
drop_cols = ['tx_hash', 'is_malicious', 'all_malicious', 'mean_in_btc', 'mean_out_btc','in_malicious']
dataset_df.drop(columns=drop_cols, inplace=True)

In [4]:
dataset_df.head()

Unnamed: 0,indegree,outdegree,in_btc,out_btc,total_btc,out_malicious,out_and_tx_malicious
0,4,2,0.478187,0.476987,0.955174,0,0
1,3,2,2.019,2.0185,4.0375,0,0
2,1,1,0.1801,0.1801,0.3602,0,0
3,1,2,5.8798,5.8793,11.7591,0,0
4,4,2,0.495906,0.495406,0.991312,0,0


# Data Splitting

In [5]:
from sklearn.model_selection import train_test_split

# Load dataset
X = dataset_df.drop('out_and_tx_malicious', axis=1)
y = dataset_df['out_and_tx_malicious']

# Split dataset into Train (80%) and Test (20%) ensuring stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Further split Train into Train (90%) and Validation (10%)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.10, stratify=y_train, random_state=42
)

print("Data Split Complete:")
print(f"Train Size: {X_train.shape[0]}")
print(f"Validation Size: {X_val.shape[0]}")
print(f"Test Size: {X_test.shape[0]}")


Data Split Complete:
Train Size: 72000
Validation Size: 8000
Test Size: 20000


# Feature Scaling: Logarithmic Transformation 

In [None]:

# Define features for log transformation
log_features = ['indegree', 'outdegree', 'in_btc', 'out_btc', 'total_btc']

# Apply log transformation
for df in [X_train, X_val, X_test]:  
    df[log_features] = np.log1p(df[log_features])  # log1p avoids log(0)

print("Log Transformation Complete")


Log Transformation Complete


# Feature Engineer

In [None]:
# Feature Engineering Function
def add_features(df):
    df['out_malicious_to_total_btc'] = df['out_malicious'] / (df['total_btc'] + 1e-6)
    df['log_total_btc'] = np.log1p(df['total_btc'])
    df['out_malicious_in_btc_interaction'] = df['out_malicious'] * df['in_btc']
    df['net_btc_flow'] = df['in_btc'] - df['out_btc']
    return df

# Apply feature engineering to Train, Validation, and Test sets
X_train_fe = add_features(X_train)
X_val_fe = add_features(X_val)
X_test_fe = add_features(X_test)

# Select the final set of features
selected_features = [
    'in_btc', 'out_btc', 'total_btc', 'out_malicious', 'indegree','outdegree',
    'out_malicious_to_total_btc', 'log_total_btc',
    'out_malicious_in_btc_interaction', 'net_btc_flow'
]

X_train_final = X_train_fe[selected_features]
X_val_final = X_val_fe[selected_features]
X_test_final = X_test_fe[selected_features]

print("Feature Engineering Complete")


Feature Engineering Complete


#  Data Balancing - Smote

In [8]:
from imblearn.over_sampling import SMOTE
import pandas as pd

# Apply SMOTE to handle class imbalance
smote = SMOTE(sampling_strategy=0.01, random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_final, y_train)

print("Class distribution after SMOTE:")
print(pd.Series(y_train_smote).value_counts())

Class distribution after SMOTE:
out_and_tx_malicious
0    71923
1      719
Name: count, dtype: int64


# Feature Scaling: Robust Scaling

In [9]:
from sklearn.preprocessing import RobustScaler

# Initialize RobustScaler
scaler = RobustScaler()

# Fit and transform Train set
X_train_scaled = scaler.fit_transform(X_train_smote)

# Transform Validation and Test sets using the same scaler
X_val_scaled = scaler.transform(X_val_final)
X_test_scaled = scaler.transform(X_test_final)

# Convert back to DataFrame
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train_final.columns)
X_val_scaled_df = pd.DataFrame(X_val_scaled, columns=X_val_final.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test_final.columns)

print("Scaling Complete")


Scaling Complete


# SVM Model 1

In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score

# Initialize SVM with class weights
svm_model = SVC(
    kernel='rbf',           # Radial basis function kernel for non-linear separation
    class_weight='balanced', # Adjust weights to emphasize fraud class
    probability=True,       # Enable probability estimates for threshold tuning
    random_state=42,
    C=1.0                   # Regularization parameter 
)

# Train the model 
svm_model.fit(X_train_scaled_df, y_train_smote)

# Evaluate 
# Predict probabilities for threshold tuning
y_val_proba = svm_model.predict_proba(X_val_scaled_df)[:, 1]

# adjust threshold and evaluate
def evaluate_threshold(y_true, y_proba, threshold):
    y_pred = (y_proba >= threshold).astype(int)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_proba)
    print(f"Threshold: {threshold:.2f}")
    print(f"Recall: {recall:.4f}, F1-Score: {f1:.4f}, ROC AUC: {roc_auc:.4f}")
    print(classification_report(y_true, y_pred))
    return recall, f1, roc_auc

# Test multiple thresholds
thresholds = [0.1, 0.3, 0.5, 0.7]
best_threshold = 0.5
best_recall = 0

print("Validation Set Performance:")
for thresh in thresholds:
    print("-" * 50)
    recall, f1, roc_auc = evaluate_threshold(y_val, y_val_proba, thresh)
    if recall > best_recall:  # Prioritize recall for fraud detection
        best_recall = recall
        best_threshold = thresh

print(f"Best Threshold: {best_threshold}, Best Recall: {best_recall}")

# Predictions on Test Set with Best Threshold
y_test_proba = svm_model.predict_proba(X_test_scaled_df)[:, 1]
y_test_pred = (y_test_proba >= best_threshold).astype(int)

print("\nTest Set Performance:")
print(classification_report(y_test, y_test_pred))
print(f"ROC AUC: {roc_auc_score(y_test, y_test_proba):.4f}")

# Cross-Validation for Robustness
cv_scores = cross_val_score(
    svm_model, X_train_scaled_df, y_train_smote, cv=5, scoring='recall'
)
print(f"\n5-Fold CV Recall Scores: {cv_scores}")
print(f"Mean CV Recall: {cv_scores.mean():.4f}, Std: {cv_scores.std():.4f}")

Validation Set Performance:
--------------------------------------------------
Threshold: 0.10
Recall: 0.6250, F1-Score: 0.1695, ROC AUC: 0.9660
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      7992
           1       0.10      0.62      0.17         8

    accuracy                           0.99      8000
   macro avg       0.55      0.81      0.58      8000
weighted avg       1.00      0.99      1.00      8000

--------------------------------------------------
Threshold: 0.30
Recall: 0.6250, F1-Score: 0.2857, ROC AUC: 0.9660
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7992
           1       0.19      0.62      0.29         8

    accuracy                           1.00      8000
   macro avg       0.59      0.81      0.64      8000
weighted avg       1.00      1.00      1.00      8000

--------------------------------------------------
Threshold: 0.50
Recall: 0.3750, F

# SVM Model 2 with Grid Search

In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score, recall_score, f1_score, precision_score
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from imblearn.over_sampling import SMOTE

#  Hyperparameter Tuning
param_grid = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto', 0.1]}
grid_search = GridSearchCV(
    SVC(kernel='rbf', class_weight='balanced', probability=True, random_state=42),
    param_grid, cv=5, scoring='recall', n_jobs=-1
)
grid_search.fit(X_train_scaled, y_train_smote)
best_svm = grid_search.best_estimator_

# Calibrate Model
svm_model = CalibratedClassifierCV(best_svm, method='sigmoid', cv=5)
svm_model.fit(X_train_scaled, y_train_smote)

# Evaluate on Validation Set
y_val_proba = svm_model.predict_proba(X_val_scaled)[:, 1]
thresholds = np.arange(0.05, 0.95, 0.05)
best_threshold = 0.5
best_score = 0

print("Validation Set Performance:")
for thresh in thresholds:
    y_val_pred = (y_val_proba >= thresh).astype(int)
    recall = recall_score(y_val, y_val_pred)
    precision = precision_score(y_val, y_val_pred)
    f1 = f1_score(y_val, y_val_pred)
    roc_auc = roc_auc_score(y_val, y_val_proba)
    if recall >= 0.6 and precision > 0.1:
        if f1 > best_score:
            best_score = f1
            best_threshold = thresh
    print(f"Threshold: {thresh:.2f}, Recall: {recall:.4f}, Precision: {precision:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}")
    print(classification_report(y_val, y_val_pred))

print(f"Best Threshold: {best_threshold}, Best F1: {best_score}")

#  Test Set Performance
y_test_proba = svm_model.predict_proba(X_test_scaled)[:, 1]
y_test_pred = (y_test_proba >= best_threshold).astype(int)
print("\nTest Set Performance:")
print(classification_report(y_test, y_test_pred))
print(f"ROC AUC: {roc_auc_score(y_test, y_test_proba):.4f}")

#  Cross-Validation
cv_scores = cross_val_score(svm_model, X_train_scaled, y_train_smote, cv=5, scoring='recall')
print(f"\n5-Fold CV Recall Scores: {cv_scores}")
print(f"Mean CV Recall: {cv_scores.mean():.4f}, Std: {cv_scores.std():.4f}")

Validation Set Performance:
Threshold: 0.05, Recall: 0.7500, Precision: 0.0173, F1: 0.0338, ROC AUC: 0.8954
              precision    recall  f1-score   support

           0       1.00      0.96      0.98      7992
           1       0.02      0.75      0.03         8

    accuracy                           0.96      8000
   macro avg       0.51      0.85      0.51      8000
weighted avg       1.00      0.96      0.98      8000

Threshold: 0.10, Recall: 0.5000, Precision: 0.0197, F1: 0.0379, ROC AUC: 0.8954
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      7992
           1       0.02      0.50      0.04         8

    accuracy                           0.97      8000
   macro avg       0.51      0.74      0.51      8000
weighted avg       1.00      0.97      0.99      8000

Threshold: 0.15, Recall: 0.1250, Precision: 0.0088, F1: 0.0164, ROC AUC: 0.8954
              precision    recall  f1-score   support

           0       1.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize


Test Set Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     19978
           1       0.33      0.23      0.27        22

    accuracy                           1.00     20000
   macro avg       0.67      0.61      0.63     20000
weighted avg       1.00      1.00      1.00     20000

ROC AUC: 0.9524

5-Fold CV Recall Scores: [0.27777778 0.30769231 0.29861111 0.29861111 0.28472222]
Mean CV Recall: 0.2935, Std: 0.0108
