# Libraries

In [12]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import json
from matplotlib import pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from scipy.stats import loguniform

# Loading the processed Data

In [7]:
X_temp = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_train.csv')
X_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_test.csv')
y_temp = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_train.csv')['Class']
y_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_test.csv')['Class']

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42, stratify=y_temp)

In [9]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
print("SMOTE X_train shape:", X_train_smote.shape)
print("SMOTE y_train shape:", y_train_smote.shape)
print("Fraud ratio after SMOTE:", np.mean(y_train_smote))

SMOTE X_train shape: (363514, 30)
SMOTE y_train shape: (363514,)
Fraud ratio after SMOTE: 0.5


In [13]:
svm = SVC(kernel='rbf', probability=True, random_state=42)
param_dist = {
    'C': loguniform(1e-3, 1e3),  # Regularization parameter
    'gamma': loguniform(1e-4, 1e0),  # Kernel coefficient
    'class_weight': [{0: 1, 1: w} for w in [1, 10, 50, 100, 200]]  # Weight for fraud class
}

In [14]:
random_search = RandomizedSearchCV(
    estimator=svm,
    param_distributions=param_dist,
    n_iter=20,  # Number of parameter settings to sample
    scoring='f1',  # Optimize for F1-score
    cv=3,  # 3-fold cross-validation
    n_jobs=-1,  # Use all available cores
    verbose=2,
    random_state=42
)

In [None]:
random_search.fit(X_train_smote, y_train_smote)

Fitting 3 folds for each of 20 candidates, totalling 60 fits


In [None]:
print("Best parameters:", random_search.best_params_)
print("Best F1-score (validation):", random_search.best_score_)

In [None]:
best_svm = random_search.best_estimator_
y_pred_val = best_svm.predict(X_val)

In [None]:
print("\nValidation Set Performance:")
print(f"F1-Score: {f1_score(y_val, y_pred_val):.4f}")
print(f"Precision: {precision_score(y_val, y_pred_val):.4f}")
print(f"Recall: {recall_score(y_val, y_pred_val):.4f}")

In [None]:
import joblib
joblib.dump(best_svm, 'svm_fraud_model.pkl')

# Evaluation

In [None]:
predictions = best_svm.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()