In [118]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score, mean_absolute_error, roc_auc_score


%run util_lab.ipynb

# Load Data

In [99]:
X, Y, df_lab = get_data_lab()

In [100]:
X = df_lab.iloc[:, :-1].to_numpy()
Y = df_lab.iloc[:, -1].to_numpy()

In [101]:
sc_X = StandardScaler()
X = sc_X.fit_transform(X) 

In [102]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=0)

# Train Model 

In [103]:
clf = GaussianNB()

In [104]:
clf.fit(Xtrain, Ytrain)

In [105]:
YtestPred = clf.predict(Xtest)
accuracy_score(Ytest, YtestPred)

0.5833333333333334

In [117]:
print(classification_report(Ytest, YtestPred))

              precision    recall  f1-score   support

           0       0.57      1.00      0.72        52
           1       1.00      0.09      0.17        44

    accuracy                           0.58        96
   macro avg       0.78      0.55      0.44        96
weighted avg       0.76      0.58      0.47        96



In [119]:
# Mean abs error

print(mean_absolute_error(Ytest, YtestPred))

# ROC ares 

print(roc_auc_score(Ytest, YtestPred))

0.4166666666666667
0.5454545454545454


# Hyperparam Tuning

In [107]:
clf.get_params()

{'priors': None, 'var_smoothing': 1e-09}

In [108]:
# https://numpy.org/devdocs/reference/generated/numpy.logspace.html

paramGridNB = {
    'var_smoothing': np.logspace(-10, 1, 100)
}

gridSearch = GridSearchCV(estimator=GaussianNB(), param_grid=paramGridNB, cv=10, scoring='accuracy', n_jobs=-1)

In [109]:
gridSearch.fit(Xtrain, Ytrain)

In [110]:
gridSearch.best_params_, gridSearch.best_score_

({'var_smoothing': np.float64(10.0)}, np.float64(0.6369565217391304))

In [111]:
model = gridSearch.best_estimator_

YtestPredGrid = model.predict(Xtest)

accuracy_score(Ytest, YtestPredGrid)

0.6979166666666666

In [120]:
print(classification_report(Ytest, YtestPredGrid))

              precision    recall  f1-score   support

           0       0.65      0.94      0.77        52
           1       0.86      0.41      0.55        44

    accuracy                           0.70        96
   macro avg       0.76      0.68      0.66        96
weighted avg       0.75      0.70      0.67        96



In [None]:
# Mean abs errror 

print(mean_absolute_error(Ytest, YtestPredGrid))

0.3020833333333333


Exception ignored in: <function ResourceTracker.__del__ at 0x108c05b20>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x1088f9b20>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x11129db20>
Traceback (most recent call last

In [113]:
# check for overfitting 

YtrainPred = model.predict(Xtrain)
accuracy_score(Ytrain, YtrainPred)

0.6591928251121076