# Multinomial Naive Bayes

## Modelo 1 - Frequency of words

##### Imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import feather

from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectPercentile, f_classif

#### Read FTR

In [None]:
#read ftr
train_m1 = pd.read_feather( '../dataset/train_m1.ftr', columns=None, use_threads=True)

In [None]:
train_m1 = train_m1.drop(columns=['level_0'])
train_m1.head()

In [None]:
#read ftr
test_m1 = pd.read_feather( '../dataset/test_m1.ftr', columns=None, use_threads=True)

In [None]:
test_m1 = test_m1.drop(columns=['level_0'])
test_m1.head()

In [None]:
#get target
train_m1_target = train_m1['polarityClass']
train_m1 = train_m1.drop(columns=['polarityClass']) 

In [None]:
#get target
test_m1_target = test_m1['polarityClass']
test_m1 = test_m1.drop(columns=['polarityClass']) 

### Hyperparameter tuning using GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
parameters = {
    'alpha': [0,1],
}

# Create the model
naive_bayes_model = MultinomialNB()

# Best model
opt_model_nb = GridSearchCV(naive_bayes_model, parameters,  scoring='accuracy', verbose=1)

# Fit the model
opt_model_nb.fit(train_m1, train_m1_target)

print (opt_model_nb.best_estimator_)

### Create and fit the model

In [None]:
# Create model with best parameters
naive_bayes_model = MultinomialNB(alpha=1)

# Fit the best model
naive_bayes_model.fit(train_m1, train_m1_target)
naive_bayes_model

In [None]:
predictions = naive_bayes_model.predict(test_m1)# Calculate the absolute errors

# Confusion matrix
confusion = pd.crosstab(test_m1_target,predictions, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(confusion )

In [None]:
# Accuracy
score = accuracy_score(test_m1_target,predictions)
print('Accuracy:{0:f}'.format(score))

In [None]:
# Report
print(classification_report(test_m1_target,predictions))

In [None]:
# Validate model using best model and cross validation
pecc_mnb = cross_val_score(naive_bayes_model, train_m1, train_m1_target, cv = 5).mean()
pecc_mnb

## Modelo 2 - Existence of words

#### Read FTR

In [None]:
#read ftr
train_m2 = pd.read_feather( '../dataset/train_m2.ftr', columns=None, use_threads=True)

In [None]:
train_m2 = train_m2.drop(columns=['level_0'])
train_m2.head()

In [None]:
#read ftr
test_m2 = pd.read_feather( '../dataset/test_m2.ftr', columns=None, use_threads=True)

In [None]:
test_m2 = test_m2.drop(columns=['level_0'])
test_m2.head()

In [None]:
#get target
train_m2_target = train_m2['polarityClass']
train_m2 = train_m2.drop(columns=['polarityClass']) 

In [None]:
#get target
test_m2_target = test_m2['polarityClass']
test_m2 = test_m2.drop(columns=['polarityClass']) 

### Hyperparameter tuning using GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
parameters = {
    'alpha': [0,1],
}

# Create the model
naive_bayes_model = MultinomialNB()

# Best model
opt_model_nb = GridSearchCV(naive_bayes_model, parameters,  scoring='accuracy', verbose=1)

# Fit the model
opt_model_nb.fit(train_m2, train_m2_target)

print (opt_model_nb.best_estimator_)

### Create and fit the model

In [None]:
# Create model with best parameters
naive_bayes_model = MultinomialNB(alpha=1)

# Fit the best model
naive_bayes_model.fit(train_m2, train_m2_target)
naive_bayes_model

In [None]:
predictions = naive_bayes_model.predict(test_m2)# Calculate the absolute errors

# Confusion matrix
confusion = pd.crosstab(test_m2_target,predictions, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(confusion )

In [None]:
# Accuracy
score = accuracy_score(test_m2_target,predictions)
print('Accuracy:{0:f}'.format(score))

In [None]:
# Report
print(classification_report(test_m2_target,predictions))

In [None]:
# Validate model using best model and cross validation
pecc_mnb = cross_val_score(naive_bayes_model, train_m2, train_m2_target, cv = 5).mean()
pecc_mnb