In [50]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from pathlib import Path
import pickle

import pandas as pd
import numpy as np
from review import Review

In [71]:
##################################################################
### Multinomial NaiveBayes Model for Sentiment Analysis Review ###
##################################################################
class NaiveBayes_Model:
    def __init__(self):
        self.model = MultinomialNB()
        self.accuracy = None
        self.vectorizer = None
        
    def train(self, X=None, y=None, debug=False):
        """
            X: the training data input
            y: the correct label for the training data
        """
        pickle_path = Path ("multinomial_NB.pickle")
        if not debug and Path.exists (pickle_path):
                self.vectorizer, self.model = pickle.load (open(pickle_path, "rb"))
                print ("!!! Multinomial_NB model already trained.....skipping the training")
                print("[help]: call method train() with debug=True argument")
                return;
        
        assert X is not None and y is not None, "No data given, How do i train ? (.>_<.)"
        self.vectorizer = TfidfVectorizer ()
        X = self.vectorizer.fit_transform (X)
        self.feature_names = self.vectorizer.get_feature_names()
        
        X_train, X_test, y_train, y_test = train_test_split (X, y, test_size=0.2, random_state=7)
        
        self.model.fit (X_train, y_train)
        predicted = self.model.predict (X_test)
        self.accuracy_score = accuracy_score (y_test, predicted)
        print (f"[Accuracy Score]: {self.accuracy_score}")
        
        pickled_data = (self.vectorizer, self.model)
        pickle.dump (pickled_data, open(pickle_path, "wb"))
    
    def predict(self, msg):
        if not isinstance (msg, pd.Series):
            msg = pd.Series ([msg])
        msg = self.vectorizer.transform (msg)
        return self.model.predict (msg)

In [72]:
model = NaiveBayes_Model ()
r = Review()

In [73]:
df = pd.read_csv ("../datasets/Restaurant_Reviews.tsv", sep='\t')
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [64]:
X = df.iloc[:]['Review']
y = df.iloc[:]['Liked']

In [65]:
X.head()

0                             Wow... Loved this place.
1                                   Crust is not good.
2            Not tasty and the texture was just nasty.
3    Stopped by during the late May bank holiday of...
4    The selection on the menu was great and so wer...
Name: Review, dtype: object

In [66]:
y.head()

0    1
1    0
2    0
3    1
4    1
Name: Liked, dtype: int64

In [82]:
splits = [r.pre_process (x) for x in X]
processed_X = [' '.join (map(lambda span: span.text, r.pre_process (x))) for x in X]

In [84]:
splits

[[Wow... Loved this place],
 [Crust is not good],
 [Not tasty, the texture was just nasty],
 [Stopped by during the late May bank holiday off Rick Steve recommendation,
  loved it],
 [The selection on the menu was great, so were the prices],
 [Now I am getting angry, I want my damn pho],
 [Honeslty it didn't taste THAT fresh, )],
 [The potatoes were like rubber,
  you could tell they had been made up ahead of time being kept under a warmer],
 [The fries were great too],
 [A great touch],
 [Service was very prompt],
 [Would not go back],
 [The cashier had no care what so ever on what I had to say it still ended up being wayyy overpriced],
 [I tried the Cape Cod ravoli, chicken, with cranberry...mmmm!],
 [I was disgusted because I was pretty sure that was human hair],
 [I was shocked because no signs indicate cash only],
 [Highly recommended],
 [Waitress was a little slow in service],
 [This place is not worth your time, let alone Vegas],
 [did not like at all],
 [The Burrittos Blah!],
 

In [68]:
processed_X[:5]

['Wow... Loved this place',
 'Crust is not good',
 'Not tasty the texture was just nasty',
 'Stopped by during the late May bank holiday off Rick Steve recommendation loved it',
 'The selection on the menu was great so were the prices']

In [69]:
len (processed_X)

1000

In [74]:
model.train (processed_X, y, debug=True)

[Accuracy Score]: 0.74


In [75]:
model.predict ("I didn't like the buffet.")

array([0])

In [76]:
model.predict ("I kind of not liked the service.")

array([0])

In [77]:
model.predict ("Food was good but service could have been better.")

array([1])

In [79]:
model.predict ("Buffet was ok but service ruined it.")

array([0])