# Libraries

In [6]:
import sklearn
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import json
from matplotlib import pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier


# Loading the processed data

In [2]:
X_train = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_train.csv')
X_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_test.csv')
y_train = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_train.csv')['Class']
y_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_test.csv')['Class']
X_train_res = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_train_res.csv')
y_train_res = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_train_res.csv')['Class']

# Model

In [73]:
bagging_model = BaggingClassifier()

In [74]:
parameters = {
    'n_estimators': [50, 100, 500],
    'max_samples': [0.1, 0.4, 0.7, 1.0],
    'bootstrap': [True, False],
    'max_features': [0.1, 0.4, 0.7, 1.0],
    'n_jobs': [-1]
}

In [75]:
search = RandomizedSearchCV(BaggingClassifier(), parameters, cv=5)

In [76]:
search.fit(X_train, y_train)

In [81]:
params = search.best_params_
print('Best parameters:',params)

Best parameters: {'n_jobs': -1, 'n_estimators': 100, 'max_samples': 1.0, 'max_features': 0.4, 'bootstrap': False}


In [82]:
best_model = search.best_estimator_

In [79]:
best_model.fit(X_train, y_train)

In [80]:
predictions = best_model.predict(X_test)

# Evaluation

In [90]:
tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()
FCR = recall_score(y_test, predictions)
FAR = fp / (fp + tn)
TNR = tn / (tn + fp)
BCR = (FCR + TNR) / 2

numerator = (tp * tn - fp * fn)
denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
MCC = (numerator / denominator)
f1 = f1_score(y_test, predictions)

In [91]:
metrics = {
    'Fraud Catching Rate': float(FCR),
    'False Alarm Rate': float(FAR),
    'Balanced Classification Rate': float(BCR),
    'Mathews Correlation Coefficient': float(MCC),
    'True Positives': float(tp),
    'False Positives': float(fp),
    'F1-score': float(f1),
}
with open('/Users/raya/Desktop/fraud-detection/european-dataset/reports/bagging/metrics.json', 'w') as f:
    json.dump(metrics, f,indent=4)