# Naive Bayes

The main purpose of this file serves to run hyperparamter tuning to find the best settings for the chosen model, which will then be transferred onto the main `models.ipynb` file.

In [4]:
# IMPORTS
from utils import *
from sklearn.naive_bayes import MultinomialNB, GaussianNB, CategoricalNB
from sklearn.feature_extraction.text import CountVectorizer

In [5]:
# READ AND SPLIT DATA
df = pd.read_pickle("../../datasets/pickle/processed_action_movie_data.pkl")

X, y = df.drop("rating", axis=1), df['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = catboost_encoding(X_train, X_test, y_train, y_test)

### Training

In [6]:
# create a linear regression object
clf1 = MultinomialNB()
clf1.fit(X_train, y_train)



clf2 = GaussianNB()
clf2.fit(X_train, y_train)

clf3 = CategoricalNB(fit_prior=False)
clf3.fit(X_train, y_train)

# Define the parameter grid
param_grid = {'priors': [None, [0.25, 0.25, 0.5], [0.4, 0.3, 0.3]], 
              'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6]}

# Define the GridSearchCV object
grid_search = GridSearchCV(clf2, param_grid=param_grid, cv=5, scoring="f1_macro")

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and the corresponding score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)




Best parameters:  {'priors': None, 'var_smoothing': 1e-07}
Best score:  0.5920888838130373
