In [2]:
### Scikit-Learn ML Classifier Algorithms
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
# Custom Module: XGBoost
from xgboost import XGBClassifier
## MultiLayer Perceptron Classifier
from sklearn.neural_network import MLPClassifier


from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from pathlib import Path
import pickle

import pandas as pd
import numpy as np

from review import Review

In [3]:
class Base_Model (Review):
    def __init__ (self, X=None, y=None, debug=False, model = None):
        """ 
        X: the actual text of review
        y: the sentiment score either positive or negative
        model: should be a function object
        """
        super().__init__()
        self.model = model()
        self.model_name = model.__name__
        pickle_path = Path (f"{self.model_name}-trained_data.pickle")
        if not debug and Path.exists (pickle_path):
            self.vectorizer, self.X_train,self.X_test, self.y_train, self.y_test = pickle.load (open(pickle_path, "rb"))
        else:
            assert X is not None and y is not None, "Dataset X and y can't be EMPTY"
            processed_X = list(' '.join (self.pre_process(x)) for x in X)
            self.vectorizer = TfidfVectorizer(vocabulary = list(self.features))
            processed_X = self.vectorizer.fit_transform (processed_X)
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split (processed_X, y, random_state=7, test_size=0.2)

            data = (self.vectorizer, self.X_train, self.X_test, self.y_train, self.y_test)
            pickle.dump(data, open (pickle_path, "wb"))
            
    def train (self):
        self.model.fit (self.X_train, self.y_train)
        predicted = self.model.predict (self.X_test)
        self.accuracy = accuracy_score (self.y_test, predicted)
        print (f"Accuracy of {self.model_name} Model: {self.accuracy}")
        print (f"Confusion Matrix for {self.model_name} Model: ")
        print (confusion_matrix (self.y_test, predicted))
        
    def predict (self, msg):
        if not isinstance (msg, pd.Series):
            msg = pd.Series ([msg])
        msg = self.vectorizer.transform (msg)
        return self.model.predict (msg)

In [4]:
# MultinomialNB Model
class NaiveBayes_Model (Base_Model):
    def __init__ (self, X=None, y = None, debug=False):
        super().__init__(model = MultinomialNB, debug=debug, X=X, y=y)

In [5]:
# Random Forest Classifier Model
class RandomForestClassifier_Model (Base_Model):
    def __init__ (self, X=None, y = None, debug=False):
        super().__init__(model = RandomForestClassifier, debug=debug, X = X, y = y)

In [6]:
# Decision Tree Classifier Model
class DecisionTreeClassifier_Model (Base_Model):
    def __init__ (self, X=None, y = None, debug=False):
        super().__init__(model=DecisionTreeClassifier, debug=debug, X=X, y=y)

In [7]:
# Support Vector Machine Classifier Model
class SVC_Model (Base_Model):
    def __init__ (self, X=None, y = None, debug=False):
        super().__init__ (model = SVC, debug=debug, X=X, y=y)

In [8]:
# K-Nearest Neighbors Classifier Model
class KNeighborsClassifier_Model (Base_Model):
    def __init__ (self, X=None, y = None, debug=False):
        super().__init__ (model = KNeighborsClassifier, debug=debug, X=X, y=y)

In [9]:
# Multi Layer Perceptron Model
class MLPClassifier_Model (Base_Model):
    def __init__ (self, X=None, y = None, debug=False):
        super().__init__(model = MLPClassifier, debug=debug, X = X, y = y)

In [11]:
if __name__ == '__main__':
    def trial():
        df = pd.read_csv ("../datasets/Restaurant_Reviews.tsv", sep='\t')
        print (df.head())
        X = df.loc[:]['Review']
        y = df.loc[:]['Liked']

        print (X.head())
        print (y.head())

        rfm = RandomForestClassifier_Model(X=X, y = y, debug=False)
        rfm.train()

        mlpc = MLPClassifier_Model()
        mlpc.model.max_iter = 1000
        mlpc.train()
        
        nb = NaiveBayes_Mode(X=X, y=y, debug=False)
        nb.train()
        
        msg = "I did not like the food."
        print(msg, mlpc.predict(msg))
        print(msg, nb.predict(msg))
        print("\n\n")
        
        msg = "Food was good but service was not good."
        print (msg, mlpc.predict (msg))
        msg = "Food was amazing. Must go"
        print (msg, mlpc.predict (msg))
        msg = "Waste of Money"
        print (msg, mlpc.predict (msg))
        msg = "Kind of liked the pizza, but starters were the speciality."
        print (msg, mlpc.predict (msg))
        msg = """The Tikkas and kebabs are wonderful. Special mention to the Fish Kaali mirch and Hazarvi Tikka. The Chicken Biryani as they say is the best in Town. They have a small diner as well as Delivery service which delivers anywhere within 15kms. The Deluxe Non Veg Thali with Butter Chicken and Paratha is also worthJ """
        print (msg, mlpc.predict (msg))
        msg = "horrible service visited last evening for a family dinner around 8.45 PM which time the place was almost vacant.   the service was so horrible and poor that had to shout on the waiters and service team. had to wait for finger bowls for almost 25 min even after which these were not circulated. After repeated requests bill was produced after almost 20 min  in all total waiting post dinner was around 40 min. card machine was not working and had to pay via Paytm.  it was a worst experience and first and last visit."
        print (msg, mlpc.predict (msg))
    
    msg = """I was pleasantly surprised on my first visit not very long ago & now I have already visited about a dozen times & am taking friends who did not know about the place.
The food in this restaurant is definitely something to write home about. It's fresh, almost akin to fine dining at half the price. I believe the best place to dine out after Jaypee Greens in Greater Noida.
    """
    
    import nltk
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
    
    sia = SentimentIntensityAnalyzer()
    print (msg)
    print (sia.polarity_scores(msg))


I was pleasantly surprised on my first visit not very long ago & now I have already visited about a dozen times & am taking friends who did not know about the place.
The food in this restaurant is definitely something to write home about. It's fresh, almost akin to fine dining at half the price. I believe the best place to dine out after Jaypee Greens in Greater Noida.
    
{'neg': 0.0, 'neu': 0.724, 'pos': 0.276, 'compound': 0.9603}


In [12]:
trial()

                                              Review  Liked
0                           Wow... Loved this place.      1
1                                 Crust is not good.      0
2          Not tasty and the texture was just nasty.      0
3  Stopped by during the late May bank holiday of...      1
4  The selection on the menu was great and so wer...      1
0                             Wow... Loved this place.
1                                   Crust is not good.
2            Not tasty and the texture was just nasty.
3    Stopped by during the late May bank holiday of...
4    The selection on the menu was great and so wer...
Name: Review, dtype: object
0    1
1    0
2    0
3    1
4    1
Name: Liked, dtype: int64




Accuracy of RandomForestClassifier Model: 0.78
Confusion Matrix for RandomForestClassifier Model: 
[[102   7]
 [ 37  54]]


AssertionError: Dataset X and y can't be EMPTY