In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
mcd = pd.read_csv('./McDonald_s_Reviews.csv',encoding = 'latin-1')

In [5]:
mcd.head()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1 star
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5 stars
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1 star


In [6]:
mcd.columns

Index(['reviewer_id', 'store_name', 'category', 'store_address', 'latitude ',
       'longitude', 'rating_count', 'review_time', 'review', 'rating'],
      dtype='object')

In [7]:
mcd.sample(5)

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating
18555,18556,McDonald's,Fast food restaurant,"1100 N US Hwy 377, Roanoke, TX 76262, United S...",33.009318,-97.222925,998,5 years ago,Very slow service,1 star
26183,26184,McDonald's,Fast food restaurant,"3501 Biscayne Blvd, Miami, FL 33137, United St...",25.81,-80.189098,2808,a year ago,Long waiting time at nights I waited for an ho...,1 star
13006,13007,McDonald's,Fast food restaurant,"1540 2nd St, Santa Monica, CA 90405, United St...",34.012219,-118.49454,3380,3 months ago,It's not clean and there are no offers but sti...,4 stars
20629,20630,McDonald's,Fast food restaurant,"2400 Alliance Gateway Fwy, Fort Worth, TX 7617...",32.958041,-97.307652,957,11 months ago,I just passed by.,2 stars
20908,20909,McDonald's,Fast food restaurant,"2400 Alliance Gateway Fwy, Fort Worth, TX 7617...",32.958041,-97.307652,957,3 years ago,Excellent,5 stars


In [8]:
mcd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27243 entries, 0 to 27242
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   reviewer_id    27243 non-null  int64  
 1   store_name     27243 non-null  object 
 2   category       27243 non-null  object 
 3   store_address  27243 non-null  object 
 4   latitude       26593 non-null  float64
 5   longitude      26593 non-null  float64
 6   rating_count   27243 non-null  object 
 7   review_time    27243 non-null  object 
 8   review         27243 non-null  object 
 9   rating         27242 non-null  object 
dtypes: float64(2), int64(1), object(7)
memory usage: 2.1+ MB


In [2]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam

In [9]:
sia = SentimentIntensityAnalyzer()
mcd['compound_score'] = mcd['review'].apply(lambda x: sia.polarity_scores(x)['compound'])
mcd['sentiment'] = mcd['compound_score'].apply(lambda score: 'Positive' if score >= 0.05 else 'Negative' if score <= -0.05 else 'Neutral')

In [11]:
X = mcd['review']
y = mcd['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
max_words = 1000
max_len = 100

In [13]:
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

In [14]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [15]:
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(LSTM(128))
model.add(Dense(3, activation='softmax'))

In [16]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [17]:
model.fit(X_train_pad, y_train_encoded, validation_data=(X_test_pad, y_test_encoded), epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7a27b07c9b10>

In [18]:
y_pred = model.predict(X_test_pad)
y_pred_labels = [label_encoder.classes_[np.argmax(pred)] for pred in y_pred]
accuracy = accuracy_score(y_test, y_pred_labels)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred_labels))

Accuracy: 0.8878693338227198
Classification Report:
              precision    recall  f1-score   support

    Negative       0.84      0.85      0.85      1525
     Neutral       0.81      0.90      0.85      1046
    Positive       0.94      0.90      0.92      2878

    accuracy                           0.89      5449
   macro avg       0.87      0.88      0.87      5449
weighted avg       0.89      0.89      0.89      5449



In [19]:
def predict_sentiment(review):
    review_seq = tokenizer.texts_to_sequences([review])
    review_pad = pad_sequences(review_seq, maxlen=max_len)
    sentiment_prob = model.predict(review_pad)
    sentiment_label = label_encoder.classes_[np.argmax(sentiment_prob)]
    return sentiment_label

In [20]:
new_review = "This restaurant has excellent service and delicious food."
predicted_sentiment = predict_sentiment(new_review)
print("Predicted sentiment:", predicted_sentiment)

Predicted sentiment: Positive


In [21]:
new_review2 = "This restaurant sucks."
predicted_sentiment = predict_sentiment(new_review2)
print("Predicted sentiment:", predicted_sentiment)

Predicted sentiment: Negative


In [22]:
new_review3 = "This is dull"
predicted_sentiment = predict_sentiment(new_review3)
print("Predicted sentiment:", predicted_sentiment)

Predicted sentiment: Neutral
