# Grid Search - Naive Bayes

Perform a basic grid search using common hyper-parameters.

Naive Bayes only has ONE hyper parameter, "var_smoothing", but there are actually several NB models.

TLDR I have yet to see this hyper parameter affect any scoring metric.


The other models all error with this data, but there are also: ComplementNB(), MultinomialNB(), CategoricalNB

In [49]:
import timeit
import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB, ComplementNB, MultinomialNB, BernoulliNB, CategoricalNB

In [50]:
model_names = ['GaussianNB', 'BernoulliNB']
models = [GaussianNB(), BernoulliNB()]
grid_params_arr = [
    {
    'scaler__with_mean': [True],
    'scaler__with_std': [True],
    'classifier__var_smoothing': [1e-13, 1e-7, .0001, .1]
    },
    {
    'scaler__with_mean': [True],
    'scaler__with_std': [True],
    }
]
        
        
score_metrics = ['accuracy', 'precision', 'recall', 'f1']

iris = datasets.load_iris()

X = iris.data[:100]
y = iris.target[:100]

kf = KFold(3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [51]:

for name, model, grid_params in zip(model_names, models, grid_params_arr):
    print (f"\n\n----------------------{name}-------------------------------------------------")
    for score_metric in score_metrics:
        pipe = Pipeline([('scaler', StandardScaler()), ('classifier', model)])

        grid = GridSearchCV(pipe, grid_params, cv=kf, scoring = score_metric)
        timing = timeit.timeit(lambda: grid.fit(X_train, y_train), number=1)
        score = grid.score(X_test, y_test)

        print(f"------------{score_metric}--------------")

        print(f"{model_name} {score_metric}: {score}")
        print("seconds:", timing)
        print (grid.best_score_)
        print (grid.best_params_)
        print (grid.best_estimator_)
        print("\n\n")



----------------------GaussianNB-------------------------------------------------
------------accuracy--------------
GaussianNB accuracy: 1.0
seconds: 0.049104042999942976
1.0
{'classifier__var_smoothing': 1e-13, 'scaler__with_mean': True, 'scaler__with_std': True}
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier', GaussianNB(var_smoothing=1e-13))])



------------precision--------------
GaussianNB precision: 1.0
seconds: 0.0419771839999612
1.0
{'classifier__var_smoothing': 1e-13, 'scaler__with_mean': True, 'scaler__with_std': True}
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier', GaussianNB(var_smoothing=1e-13))])



------------recall--------------
GaussianNB recall: 1.0
seconds: 0.038465924999854906
1.0
{'classifier__var_smoothing': 1e-13, 'scaler__with_mean': True, 'scaler__with_std': True}
Pipeline(steps=[('scaler', StandardScaler()),
                ('classifier', GaussianNB(var_smoothing=1e-13))])



------------f1---------