# Naive Bayes Implementation

In [1]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load data
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv").values.ravel()
y_test = pd.read_csv("y_test.csv").values.ravel()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

### Train and Print Results

In [2]:
model = GaussianNB()
model.fit(X_train, y_train)

model_scaled = GaussianNB()
model_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(X_test_scaled)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

print("Accuracy Score:", accuracy_score(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_scaled))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_scaled, zero_division=0))

print("Accuracy Score:", accuracy_score(y_test, y_pred_scaled))

Confusion Matrix:
[[44147     0]
 [ 6253     0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.88      1.00      0.93     44147
           1       0.00      0.00      0.00      6253

    accuracy                           0.88     50400
   macro avg       0.44      0.50      0.47     50400
weighted avg       0.77      0.88      0.82     50400

Accuracy Score: 0.8759325396825397
Confusion Matrix:
[[44147     0]
 [ 6253     0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.88      1.00      0.93     44147
           1       0.00      0.00      0.00      6253

    accuracy                           0.88     50400
   macro avg       0.44      0.50      0.47     50400
weighted avg       0.77      0.88      0.82     50400

Accuracy Score: 0.8759325396825397


### Try SMOTE with NB

Improved performance. Still bad though.

In [3]:
sm = SMOTE(random_state=9)
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)

model = GaussianNB()
model.fit(X_resampled, y_resampled)
y_pred = model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.29      0.43     44147
           1       0.13      0.73      0.22      6253

    accuracy                           0.34     50400
   macro avg       0.50      0.51      0.32     50400
weighted avg       0.79      0.34      0.41     50400

