In [13]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)

import joblib


In [14]:
X_train_res = joblib.load("X_train_res.pkl")
y_train_res = joblib.load("y_train_res.pkl")
X_test = joblib.load("X_test.pkl")
y_test = joblib.load("y_test.pkl")
scaler = joblib.load("scaler.pkl")


In [15]:
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_res, y_train_res)


In [16]:
y_pred_logreg = log_reg.predict(X_test)


In [17]:
print("Logistic Regression Results:\n")
print("Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Precision:", precision_score(y_test, y_pred_logreg))
print("Recall:", recall_score(y_test, y_pred_logreg))
print("F1 Score:", f1_score(y_test, y_pred_logreg))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_logreg))
print("\nClassification Report:\n", classification_report(y_test, y_pred_logreg))


Logistic Regression Results:

Accuracy: 0.9741055440469084
Precision: 0.057803468208092484
Recall: 0.9183673469387755
F1 Score: 0.10876132930513595

Confusion Matrix:
 [[55397  1467]
 [    8    90]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.97      0.99     56864
           1       0.06      0.92      0.11        98

    accuracy                           0.97     56962
   macro avg       0.53      0.95      0.55     56962
weighted avg       1.00      0.97      0.99     56962



In [18]:
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train_res, y_train_res)


In [19]:
y_pred_rf = rf_clf.predict(X_test)


In [20]:
print("Random Forest Results:\n")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Precision:", precision_score(y_test, y_pred_rf))
print("Recall:", recall_score(y_test, y_pred_rf))
print("F1 Score:", f1_score(y_test, y_pred_rf))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))
   

Random Forest Results:

Accuracy: 0.9994908886626171
Precision: 0.8709677419354839
Recall: 0.826530612244898
F1 Score: 0.8481675392670157

Confusion Matrix:
 [[56852    12]
 [   17    81]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.87      0.83      0.85        98

    accuracy                           1.00     56962
   macro avg       0.94      0.91      0.92     56962
weighted avg       1.00      1.00      1.00     56962



In [24]:
joblib.dump(log_reg, "logistic_regression_model.pkl")
joblib.dump(rf_clf, "random_forest_model.pkl")



['random_forest_model.pkl']

In [22]:
y_probs_rf = rf_clf.predict_proba(X_test)[:,1]


In [23]:
from sklearn.metrics import roc_auc_score

auc_rf = roc_auc_score(y_test, y_probs_rf)
print("AUC Score for Original RF:", auc_rf)


AUC Score for Original RF: 0.9684484390970794
