In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [7]:
df = pd.read_csv("standardized_minmax.csv")
X = df.drop(columns=["FraudFound"])
y = df["FraudFound"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

In [11]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'Naive Bayes': GaussianNB(),
    'KNN': KNeighborsClassifier(),
    'Random Forest': RandomForestClassifier(),
    'ANN': MLPClassifier(max_iter=500),
    'SVM (Linear)': SVC(kernel='linear'),
    'SVM (RBF)': SVC(kernel='rbf'),
    'Logistic Regression': LogisticRegression()
}

param_grids = {
    'Decision Tree': {'max_depth': [3, 5, 10, None], 'min_samples_split': [2, 5, 10]},
    'Naive Bayes': {'var_smoothing': [1e-9, 1e-8, 1e-7]},
    'KNN': {'n_neighbors': [3, 5, 7, 10], 'metric': ['minkowski', 'euclidean']},
    'Random Forest': {'n_estimators': [50, 100], 'max_depth': [5, 10, None]},
    'ANN': {'hidden_layer_sizes': [(50,), (100,), (50, 50)], 'activation': ['relu', 'tanh'], 'solver': ['adam']},
    'SVM (Linear)': {'C': [0.1, 1, 10]},
    'SVM (RBF)': {'C': [0.1, 1, 10], 'gamma': [0.01, 0.1, 1]},
    'Logistic Regression': {'C': [0.1, 1, 10], 'solver': ['liblinear']}
}

smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
for name, model in models.items():

    X_train_model = X_train_smote
    y_train_model = y_train_smote

    grid_search = GridSearchCV(model, param_grids[name], cv=5, scoring='f1_macro', n_jobs=-1)
    grid_search.fit(X_train_model, y_train_model)
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)

    print(f"Model: {name}")
    print(f"Best Params: {grid_search.best_params_}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(f"Classification Report:\n{classification_report(y_test, y_pred)}")
    print(f" F1 Score (macro): {f1_score(y_test, y_pred, average='macro'):.4f}")
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Model: Decision Tree
Best Params: {'max_depth': None, 'min_samples_split': 5}
Accuracy: 0.8933203631647212
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      2887
           1       0.22      0.25      0.23       197

    accuracy                           0.89      3084
   macro avg       0.58      0.60      0.59      3084
weighted avg       0.90      0.89      0.90      3084

 F1 Score (macro): 0.5879
Confusion Matrix:
 [[2705  182]
 [ 147   50]]
Model: Naive Bayes
Best Params: {'var_smoothing': 1e-07}
Accuracy: 0.5132944228274967
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.49      0.65      2887
           1       0.10      0.87      0.19       197

    accuracy                           0.51      3084
   macro avg       0.54      0.68      0.42      3084
weighted avg       0.93      0.51      0.62      3084

 F1 Score (macro): 0.4196
Confusion

KeyboardInterrupt: 