# Libraries

In [6]:
import sklearn
import pandas as pd
import numpy as np
import warnings

from sklearn.neighbors import KNeighborsClassifier

warnings.filterwarnings('ignore')
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import json
from matplotlib import pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay


# Loading the processed Data

In [3]:
X_train = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_train.csv')
X_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_test.csv')
y_train = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_train.csv')['Class']
y_test = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_test.csv')['Class']
X_train_res = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/X_train_res.csv')
y_train_res = pd.read_csv('/Users/raya/Desktop/fraud-detection/european-dataset/data/processed/y_train_res.csv')['Class']

In [4]:
y_train = y_train.ravel()
y_test = y_test.ravel()
y_train_res = y_train_res.ravel()

# Model

## Training on imbalanced data

In [7]:
knn_model = KNeighborsClassifier()

In [13]:
param_grid = {
    'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15],
    'weights': ['uniform', 'distance', 'balanced'],
    'metric': ['minkowski','euclidean'],
    'p': [1, 2]
}

In [21]:
random_search = RandomizedSearchCV(
    knn_model,
    param_grid,
    scoring='f1',
    cv=5
)

In [23]:
random_search.fit(X_train, y_train)

In [28]:
params = random_search.best_params_
params

{'weights': 'uniform', 'p': 1, 'n_neighbors': 1, 'metric': 'minkowski'}

In [25]:
best_model = random_search.best_estimator_

In [26]:
best_model.fit(X_train, y_train)

In [29]:
predictions = best_model.predict(X_test)

## Training on oversampled data

In [33]:
random_search.fit(X_train_res, y_train_res)

In [34]:
params = random_search.best_params_
params

{'weights': 'uniform', 'p': 1, 'n_neighbors': 1, 'metric': 'minkowski'}

In [35]:
best_model = random_search.best_estimator_

In [36]:
best_model.fit(X_train, y_train)

In [37]:
oversampled_predictions = best_model.predict(X_test)

# Evaluation

In [38]:
accuracy = accuracy_score(y_test, oversampled_predictions)
precision = precision_score(y_test, oversampled_predictions)
recall = recall_score(y_test, oversampled_predictions)
f1 = f1_score(y_test, oversampled_predictions)
tn, fp, fn, tp = confusion_matrix(y_test, oversampled_predictions).ravel()

In [39]:
metrics = {
    'Accuracy': float(accuracy),
    'Precision': float(precision),
    'Recall': float(recall),
    'F1-score': float(f1),
    'True Positives': float(tp),
    'False Positives': float(fp)
}
with open('/Users/raya/Desktop/fraud-detection/european-dataset/reports/knn/smote_metrics.json', 'w') as f:
    json.dump(metrics, f,indent=4)