### Load dependencies

In [216]:
import numpy as np  # mathematica and logical operation
import pandas as pd #   Pandas for data analysis
import re  #   RegEx can be used to check if a string contains the specified search pattern.
from sklearn.metrics import (recall_score,accuracy_score, 
precision_score, confusion_matrix)
from sklearn.model_selection import train_test_split
#import pickle, warnings
from sklearn.feature_extraction.text import CountVectorizer # it collect unique words and repetative word
from sklearn.linear_model import LogisticRegression
from tkinter import * # provides Python users with a simple way to create GUI elements using the widgets found in the Tk toolkit.
from tkinter import messagebox
import warnings

### Load dataset

In [217]:
df = pd.read_csv('Restaurant_Reviews.tsv',sep='\t')


In [218]:
df.head(5)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [219]:
df.shape

(1000, 2)

### Clean Data

In [220]:
def clean(x):
    #to remove html tag
    x = re.sub(r'<.*?>',' ',x)
    x= re.sub(r"can't",'can not',x)
    x= re.sub(r"don't",'do not',x)
    x= re.sub(r"didn't",'did not',x)
    #change to mobile number
    x= re.sub(r'[\d-]{10,12}','mobno',x)
    #to remove punctuatin and numbers
    x = re.sub(r'[^A-Za-z]',' ',x)
    #to replace more than 1 space with 1 space
    x = re.sub(r'\s+',' ',x)
    #to convert into lower
    return x.lower()

clean("<h1>Hello there 9876576767 8787878787 I can't 9 eat this &^%H@#$%</h1>")

' hello there mobno mobno i can not eat this h '

In [221]:
df['Review'] = df.Review.apply(clean)

In [222]:
df.head(10)

Unnamed: 0,Review,Liked
0,wow loved this place,1
1,crust is not good,0
2,not tasty and the texture was just nasty,0
3,stopped by during the late may bank holiday of...,1
4,the selection on the menu was great and so wer...,1
5,now i am getting angry and i want my damn pho,0
6,honeslty it did not taste that fresh,0
7,the potatoes were like rubber and you could te...,0
8,the fries were great too,1
9,a great touch,1


### Split Data

In [223]:
df.Liked.value_counts()
print(df.shape)

(1000, 2)


In [224]:
xtrain,xtest,ytrain,ytest = train_test_split(df['Review'].values,df['Liked'].values,
                                            test_size=.20, random_state=10)

In [225]:
print(xtrain.shape, xtest.shape, )

(800,) (200,)


### Count Vectorizer

In [226]:
cv = CountVectorizer()
cv.fit(xtrain)

CountVectorizer()

In [227]:
cv.get_feature_names()
print(len(cv.get_feature_names()))
cv_train = cv.transform(xtrain).toarray()
cv_test = cv.transform(xtest)
print(cv_train.shape)
print(cv_test.shape)

1763
(800, 1763)
(200, 1763)


In [228]:
cv_train[:2]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

### Train Model

In [229]:

lg = LogisticRegression()

In [230]:
lg.fit(cv_train,ytrain)

LogisticRegression()

### Prediction

In [231]:
lg_pred = lg.predict(cv_test)


### Evaluation

In [232]:
print("Logistic Regression")
print("Training Score: ",lg.score(cv_train,ytrain))
print("Testing Score: ",lg.score(cv_test,ytest))
print()
print("Confusion Matrix")
print(confusion_matrix(ytest,lg_pred))
print(precision_score(ytest, lg_pred))
print(recall_score(ytest, lg_pred))
print(accuracy_score(ytest, lg_pred))

Logistic Regression
Training Score:  0.98625
Testing Score:  0.825

Confusion Matrix
[[83 17]
 [18 82]]
0.8282828282828283
0.82
0.825


In [233]:
from sklearn.model_selection import GridSearchCV,KFold

In [234]:
filename="review_model_log.pkl"
file = open(filename,"wb")
pickle.dump(lg,file)
file.close()

In [235]:
fl = open("review_model_log.pkl","rb")
c = pickle.load(fl)
c

LogisticRegression()

In [236]:
countvector = "review_cv.pkl"
cfile = open(countvector,"wb")
pickle.dump(cv,cfile)
file.close()

In [237]:
cfl = open("review_cv.pkl","rb")
cv=pickle.load(cfl)
cv

CountVectorizer()

### create GUI

In [238]:
def get_pred():
    rv = lb.get("1.0",END)
    tst = cv.transform([rv])
    rs = c.predict(tst)
    label ={0:"Customer is not satisfied 😞",1:"Customer is saisfied 😊"}
    dd = label[rs[0]]
    lbl1.config(text=dd,fg="green")
    messagebox.showinfo("Result",dd)
main = Tk()
main.title("SENTIMENT ANALYSIS ON FOOD REVIEWS")
lbl= Label(text="SENTIMENT ANALYSIS",fg="red",bg="white",font=("georgia",20))
lbl.pack()

lbl1= Label(bg="white",font=("georgia",20))
lbl1.pack()

lb = Text(main,)
lb.pack(pady=3)

btn = Button(main,bg="green",fg="white",text="Check",height=5,command=get_pred)
btn.pack(fill="x",pady=3)
main.mainloop()