In [7]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

In [8]:
telecom = pd.read_csv('Telecom.csv')
dum_tel = pd.get_dummies(telecom, drop_first=True)


X = dum_tel.drop('Response_Y', axis=1)
y = dum_tel['Response_Y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2021)

In [9]:
nb = BernoulliNB()
nb.fit(X_train, y_train) #Model Building : Apriori Probs Calculated
tst = np.array([['n', 'female'],
                ['n', 'male'],
                ['y', 'male'],
                ['y', 'female']])

tst = pd.DataFrame(tst, columns= ['TT_gt_100', 'Gender'])

dum_tst = pd.get_dummies(tst, drop_first=True)

y_prob = nb.predict_proba(X_test) #Posterior Probs Calculated
y_pred = nb.predict(X_test) #Applying built to test data

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[13 12]
 [ 3 17]]
              precision    recall  f1-score   support

       False       0.81      0.52      0.63        25
        True       0.59      0.85      0.69        20

    accuracy                           0.67        45
   macro avg       0.70      0.69      0.66        45
weighted avg       0.71      0.67      0.66        45

0.6666666666666666


In [11]:

from sklearn.model_selection import GridSearchCV, StratifiedKFold


kfold=StratifiedKFold(n_splits=5, shuffle=True, random_state=23)

alpha = np.arange(1,5,0.1)

params = {'alpha':alpha}
gcv = GridSearchCV(nb,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)
df = pd.DataFrame(gcv.cv_results_)

{'alpha': 4.900000000000004}
-0.4228118580766681
