In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import re
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [77]:
df= pd.read_csv('/kaggle/input/mcdonalds-store-reviews/McDonald_s_Reviews.csv', encoding='latin1')

<div style="text-align:center; background-color:#D82127; padding:20px; border-radius:25px;">
  <h1 style="font-size:36px; color:#FFD700;"><b>1.Feature Engineering</b></h1>
</div>

In [78]:
df['rating'] = df['rating'].str.split().str[0]
df['rating'] = df['rating'].astype(int)
df['liked'] = 0
df.loc[df['rating'] >= 3, 'liked'] = 1
df.head()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,liked
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1,0
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4,1
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1,0
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5,1
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1,0


In [123]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33396 entries, 0 to 33395
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   reviewer_id    33396 non-null  int64  
 1   store_name     33396 non-null  object 
 2   category       33396 non-null  object 
 3   store_address  33396 non-null  object 
 4   latitude       32736 non-null  float64
 5   longitude      32736 non-null  float64
 6   rating_count   33396 non-null  object 
 7   review_time    33396 non-null  object 
 8   review         33396 non-null  object 
 9   rating         33396 non-null  int64  
 10  liked          33396 non-null  int64  
 11  review_length  33396 non-null  int64  
dtypes: float64(2), int64(4), object(6)
memory usage: 3.1+ MB


In [79]:
df['review_length'] = df['review'].apply(len)
df.iloc[df['review_length'].idxmax()][8]

'I want to apologize to anyone that I recommended this particular McDonald\'s to in the past.\nThis was the restaurant that I used to recommend to tourists when they were looking for a fast place to eat that was away from the Disney parks, but still close enough to get back to the parks quickly. At that time, it was worth it. The menu was cheap (or at least what you know to expect from a international chain) and, since it\'s a chain, you know what the food is gong to be like.\nSo, let\'s look at what you need to do if you are a member of a chain of restaurants. You need to set yourself apart with your service. This WAS the reason I used to recommend this restaurant. Sadly, I don\'t think they care anymore. The last time we went, half of the kiosks weren\'t working. One of the three that was, didn\'t print receipts. (This plays a part into why the service was so atrocious. Actually there was a lot that played into it.) Now, if you\'ve gone to McDonald\'s at all recently, you know you pi

In [80]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [81]:
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

<div style="text-align:center; background-color:#D82127; padding:20px; border-radius:25px;">
  <h1 style="font-size:36px; color:#FFD700;"><b>2.Text Preprocessing</b></h1>
</div>

In [82]:
def text_processing(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    text = [ps.stem(word) for word in text if word not in set(stopwords.words('english'))]
    text = ' '.join(text)
    return text

rev = df['review'].apply(text_processing)
rev

0        look like someon spit food normal transact eve...
1        mcdonald far food atmospher go staff make diff...
2        made mobil order got speaker check line move l...
3         mc crispi chicken sandwich custom servic quick p
4        repeat order time drive thru still manag mess ...
                               ...                        
33391                                          treat badli
33392                                          servic good
33393                                  remov hunger enough
33394                               good late becom expens
33395                                       took good care
Name: review, Length: 33396, dtype: object

In [106]:
cv = CountVectorizer()
X = cv.fit_transform(rev).toarray()
y = df['liked']

<div style="text-align:center; background-color:#D82127; padding:20px; border-radius:25px;">
  <h1 style="font-size:36px; color:#FFD700;"><b>3.Model Prediction</b></h1>
</div>

In [107]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = MultinomialNB()
model.fit(X_train, y_train)

In [108]:
y_pred = model.predict(X_test)

In [121]:
good_review = 'The food was very amazing, Keep going'
good_review = text_processing(good_review)
good_review = cv.transform([good_review]).toarray()
prediction = model.predict(good_review) 
print(prediction)

[1]


In [122]:
Bad_review = 'I hate it, Very bad experience'
Bad_review = text_processing(Bad_review)
Bad_review = cv.transform([Bad_review]).toarray()
prediction = model.predict(Bad_review)  
print(prediction)

[0]


In [111]:
print(accuracy_score(y_test, y_pred))

0.8694610778443114
