# Use of Bernoilli Naive Bayes

In [8]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, balanced_accuracy_score, accuracy_score
from sklearn.pipeline import Pipeline

from nltk.corpus import stopwords
stop_words = stopwords.words('english')

from sklearn.naive_bayes import BernoulliNB


In [13]:
data = pd.read_csv('./datasets/final.csv')

In [None]:
top_49ers_words = []
top_nfl_words = []

In [12]:
stop_words = set(stop_words + top49ers_words + top_nfl_words)

'potato'

In [3]:
data.head()

Unnamed: 0,comments,label
0,[Baldinger] .@49ers here is my opening script ...,1
1,[49ers on NBCS] .@frankgore is loving what he’...,1
2,Chiefs fans be like,1
3,deal of the day,1
4,This guy made a really complex Python simulati...,1


In [4]:
X = data['comments']
y = data['label']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=42)

In [6]:
pipe = Pipeline([
    ('tvec', CountVectorizer(stop_words=stop_words)),
    ('bnb', BernoulliNB())
])

In [7]:
pipe.fit(X_train,y_train)
accuracy_bnb = pipe.score(X_train,y_train)
print(f'Bernoilli Naive Bayes ACCURACY: {round(accuracy_bnb,3)}')

Bernoilli Naive Bayes ACCURACY: 0.703


## Confusion Matrix Results

In [8]:
preds = pipe.predict(X_test)

In [9]:
tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()

In [10]:
sensitivity = tp / (tp +fn)
sensitivity

0.9402390438247012

In [11]:
specificity = tn / (tn + fp)
specificity

0.3346774193548387

In [12]:
precision = tp / (tp + fp)
precision

0.5885286783042394

In [13]:
 balanced_accuracy_score(y_test, preds)

0.63745823158977

In [14]:
def total_metrics(insta_model,X_test, y_test):
    preds = insta_model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()
    accuracy = accuracy_score(y_test,preds)
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn+fp)
    precision =  tp / (tp + fp)
    bas = balanced_accuracy_score(y_test,preds)
    
    print("\u0332".join("RESULTS OF A BERNOULLI NAIVE BAYES MODEL "))
    print('')
    print(f"          Accuracy: {round(accuracy,3)}")
    print('')
    print(f"  Balance Accuracy: {round(bas,3)}")
    print('')
    print(f"       Sensitivity: {round(sensitivity,3)}")
    print('')
    print(f"       Specificity: {round(specificity,3)}")
    print('')
    print(f"         Precision: {round(precision,3)}")
    print('')    
    

In [15]:
total_metrics(pipe,X_test,y_test)

R̲E̲S̲U̲L̲T̲S̲ ̲O̲F̲ ̲A̲ ̲B̲E̲R̲N̲O̲U̲L̲L̲I̲ ̲N̲A̲I̲V̲E̲ ̲B̲A̲Y̲E̲S̲ ̲M̲O̲D̲E̲L̲ 

          Accuracy: 0.639

  Balance Accuracy: 0.637

       Sensitivity: 0.94

       Specificity: 0.335

         Precision: 0.589



In [16]:
pipe.score(X_train,y_train)

0.7032085561497327

In [17]:
pipe.score(X_test,y_test)

0.6392785571142284