# Objective
1. read data
2. analyze data
3. predict customer's satification (1:satisfied, 0:unsatisfied)

# read data

In [None]:
import pandas as pd
#data link: https://www.kaggle.com/vigneshwarsofficial/reviews
df=pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t')
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


# Analyze data

In [None]:
df.shape

(1000, 2)

In [None]:
df.describe()

Unnamed: 0,Liked
count,1000.0
mean,0.5
std,0.50025
min,0.0
25%,0.0
50%,0.5
75%,1.0
max,1.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  1000 non-null   object
 1   Liked   1000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 15.8+ KB


In [None]:
#count number of liked and disliked customers
df['Liked'].value_counts()

1    500
0    500
Name: Liked, dtype: int64

# Train a model

In [None]:
#define input and output data

input=df['Review']
output=df['Liked']

In [None]:
#split data (train/test)

from sklearn.model_selection import train_test_split
#testing data size is of 20% of entire data
x_train, x_test, y_train, y_test =train_test_split(input,output, test_size = 0.2, random_state =5)

In [None]:
#train model with pipeline

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB


clf_pipeline = Pipeline([('vect',CountVectorizer()),
                         ('tfidf',TfidfTransformer()),
                         ('clf',MultinomialNB())])
clf_pipeline.fit(x_train,y_train)

Pipeline(memory=None,
         steps=[('vect',
                 CountVectorizer(analyzer='word', binary=False,
                                 decode_error='strict',
                                 dtype=<class 'numpy.int64'>, encoding='utf-8',
                                 input='content', lowercase=True, max_df=1.0,
                                 max_features=None, min_df=1,
                                 ngram_range=(1, 1), preprocessor=None,
                                 stop_words=None, strip_accents=None,
                                 token_pattern='(?u)\\b\\w\\w+\\b',
                                 tokenizer=None, vocabulary=None)),
                ('tfidf',
                 TfidfTransformer(norm='l2', smooth_idf=True,
                                  sublinear_tf=False, use_idf=True)),
                ('clf',
                 MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))],
         verbose=False)

# Evaluation

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
accuracy_score(y_test, clf_pipeline.predict(x_test))

0.815

In [None]:
print(classification_report(y_test, clf_pipeline.predict(x_test)))

              precision    recall  f1-score   support

           0       0.84      0.77      0.80        98
           1       0.79      0.86      0.83       102

    accuracy                           0.81       200
   macro avg       0.82      0.81      0.81       200
weighted avg       0.82      0.81      0.81       200



In [None]:
#acc = 0.815 (acceptable !)

# Try the model 

In [None]:
#example
for i in range(5):
  print( list(x_test)[i])
  print('y_real: ' + str(list(y_test)[i])+ ' y_pred: '+str(list(clf_pipeline.predict(x_test))[i])+'\n')

No, I'm going to eat the potato that I found some strangers hair in it.
y_real: 0 y_pred: 0

The food is very good for your typical bar food.
y_real: 1 y_pred: 1

I had a salad with the wings, and some ice cream for dessert and left feeling quite satisfied.
y_real: 1 y_pred: 1

Would not go back.
y_real: 0 y_pred: 0

I had the mac salad and it was pretty bland so I will not be getting that again.
y_real: 0 y_pred: 0

