In [1]:
import numpy as np
import pandas as pd

### Load Data

In [4]:
dataset_path = r'D:\Masters\mscs\CS5720-Neural Network and Deep Learning\Assignments\Final Project\Restautarant review analysis\Restaurant reviews.csv'

In [8]:
data = pd.read_csv(dataset_path)
data.head()

Unnamed: 0,Restaurant,Reviewer,Review,Rating,Metadata,Time,Pictures,7514
0,Beyond Flavours,Rusha Chakraborty,"The ambience was good, food was quite good . h...",5,"1 Review , 2 Followers",5/25/2019 15:54,0,2447.0
1,Beyond Flavours,Anusha Tirumalaneedi,Ambience is too good for a pleasant evening. S...,5,"3 Reviews , 2 Followers",5/25/2019 14:20,0,
2,Beyond Flavours,Ashok Shekhawat,A must try.. great food great ambience. Thnx f...,5,"2 Reviews , 3 Followers",5/24/2019 22:54,0,
3,Beyond Flavours,Swapnil Sarkar,Soumen das and Arun was a great guy. Only beca...,5,"1 Review , 1 Follower",5/24/2019 22:11,0,
4,Beyond Flavours,Dileep,Food is good.we ordered Kodi drumsticks and ba...,5,"3 Reviews , 2 Followers",5/24/2019 21:37,0,


In [13]:
# consider the features such as review and the rating
data_set = data[['Review','Rating']]

In [17]:
data_set.head(3)

Unnamed: 0,Review,Rating
0,"The ambience was good, food was quite good . h...",5
1,Ambience is too good for a pleasant evening. S...,5
2,A must try.. great food great ambience. Thnx f...,5


### Data Cleaning and Preprocessing

In [7]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [25]:
# Discard all rows with null values
data_set = data_set.dropna(subset=['Review','Rating'])

In [51]:
data_set.Rating.value_counts()

5       3826
4       2373
1       1735
3       1192
2        684
4.5       69
3.5       47
2.5       19
1.5        9
Like       1
Name: Rating, dtype: int64

In [55]:
# drop like 
data_set = data_set.drop(data_set[data_set.Rating == 'Like'].index)

In [56]:
data_set.shape

(9954, 2)

In [57]:
data_set['Rating']=pd.to_numeric(data_set['Rating'])
data_set.columns

Index(['Review', 'Rating'], dtype='object')

In [58]:
# round to nearest floor ratings
data_set['Rating'].replace(1.5,2,inplace =True)
data_set['Rating'].replace(2.5,3,inplace =True)
data_set['Rating'].replace(3.5,4,inplace =True)
data_set['Rating'].replace(4.5,5,inplace =True)

In [61]:
# analysis on the label

# create a feature with categorical reviews
def format_rating(rating):
    if rating <= 1:
        return 'very bad'
    elif rating <= 2:
        return "bad"
    elif rating <=3:
        return "neutral"
    elif rating <= 4:
        return "good"
    else:
        return "excellent"

category = data_set['Rating'].apply(format_rating)


In [64]:
data_set['Rating_cat'] = category

In [67]:
data_set['Review'][0]

'The ambience was good, food was quite good . had Saturday lunch , which was cost effective .\nGood place for a sate brunch. One can also chill with friends and or parents.\nWaiter Soumen Das was really courteous and helpful.'

In [73]:
# Function to check if a string contains emojis
def contains_emoji(text):
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F700-\U0001F77F"  # alchemical symbols
                               u"\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
                               u"\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
                               u"\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
                               u"\U0001FA00-\U0001FA6F"  # Chess Symbols
                               u"\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
                               u"\U00002702-\U000027B0"  # Dingbats
                               u"\U000024C2-\U0001F251" 
                               "]+", flags=re.UNICODE)
    return bool(emoji_pattern.search(text))

# Filter reviews containing emojis
emojis_reviews = data_set['Review'][data_set['Review'].apply(contains_emoji)]

In [75]:
emojis_reviews.shape

(708,)

In [76]:
# remove the emoji from the text 

# Function to remove emojis from a text while preserving attached words
def remove_emojis(text):
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F700-\U0001F77F"  # alchemical symbols
                               u"\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
                               u"\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
                               u"\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
                               u"\U0001FA00-\U0001FA6F"  # Chess Symbols
                               u"\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
                               u"\U00002702-\U000027B0"  # Dingbats
                               u"\U000024C2-\U0001F251" 
                               "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

data_set['Review'] = data_set['Review'].apply(remove_emojis)

In [48]:
# save the data

data_set.to_csv("cleaned_rest_review.csv")

In [49]:
# loading saved data
data_set = pd.read_csv("cleaned_rest_review.csv")

In [50]:
data_set = data_set[['Review','Rating_cat']]
data_set = data_set[~data_set['Review'].isna()]
data_set.reset_index(inplace=True)

In [51]:
# perform label encoding on the Rating_cat feature

def encode_label(review):
    if review == 'very bad':
        return 1
    elif review == 'bad':
        return 2
    elif review == 'neutral':
        return 3
    elif review == 'good':
        return 4
    else:
        return 5
    
data_set['Rating_le'] = data_set['Rating_cat'].apply(encode_label)

In [52]:
# build the corpus
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')
all_stopwords = set(all_stopwords)

In [159]:
# an array to append all the cleaned text as corpus 

def get_corpus(data):
    corpus = []

    for i in range(0, len(data)):
        review = re.sub(r'[^a-zA-Z]',' ',data[i])
        review = review.lower().split()
        review = [ps.stem(word) for word in review if not word in all_stopwords]
        review = ' '.join(review)
        corpus.append(review)
    return corpus

In [160]:
corpus = get_corpus(data_set['Review'])

### Data Transformation

In [135]:
# bag of words approach

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,ngram_range=(1,3))

In [136]:
X = cv.fit_transform(corpus).toarray()
y = data_set['Rating_le'].values

In [137]:
# saving bow dictionary

import pickle
bow_path = 'bow_sentiment_model.pkl'
pickle.dump(cv,open(bow_path,'wb'))

In [138]:
# split the train and test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.20,random_state = 23)

### Model Building

In [139]:
# using naive bayes

In [140]:
from sklearn.naive_bayes import GaussianNB
nb_classifier = GaussianNB()

In [141]:
nb_classifier.fit(X_train,y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

### performance evaluation

In [142]:
from sklearn.metrics import confusion_matrix,accuracy_score

In [143]:
y_pred = nb_classifier.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print(cm)

accuracy_score(y_test,y_pred)

[[141 137  21  41   9]
 [ 35  59  34  24   3]
 [ 22  50  66  72  17]
 [ 21  48 135 171  88]
 [ 31 154 130 118 363]]


0.4020100502512563

In [144]:
### SVC
from sklearn.svm import SVC
# let fit and test the results

svc = SVC(C=100,kernel='rbf')

svc.fit(X_train,y_train)

svc.score(X_test,y_test)



0.6190954773869347

In [149]:
### Random Forest

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_jobs = -1)

rfc.fit(X_train,y_train)
rfc.score(X_test,y_test)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_store_unique_indices = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.

0.5814070351758794

In [150]:
# lets save svc model to predict future predictions
import joblib

joblib.dump(svc,'svc_sentiment_classifier_model_61_ac')

['svc_sentiment_classifier_model_61_ac']

In [176]:
review_test_p = ["I recently had the pleasure of dining here, and it was an experience that exceeded all my expectations. From the moment we walked in, the ambiance set the stage for what was to be a memorable evening. The staff greeted us warmly, ensuring we felt welcomed and valued.The menu selection was impressive, offering a variety of dishes that catered to all preferences, including several innovative options for those with dietary restrictions. Each dish we ordered was a testament to the chef's expertise and passion for culinary excellence. The flavors were balanced perfectly, with each ingredient shining through without overpowering the others.What truly set this place apart was the attention to detail. The presentation of the food was artistic, the timing between courses was impeccable, and the staff went above and beyond to accommodate our requests, making us feel truly special.I cannot recommend this place enough. Whether you’re looking for a place to celebrate a special occasion or just in search of a delightful dining experience, this should be at the top of your list. We’re already looking forward to our next visit!"]
review_test_n = ["Unfortunately, my recent visit to the restaurant left much to be desired. Despite the high expectations set by its reputation, the experience was underwhelming from start to finish. Upon arrival, the greeting was lukewarm, and it took a noticeable amount of time before we were seated, despite having reservations.The menu, while extensive, seemed to lack coherence, and the descriptions did little to entice the palate or clarify what one might expect from each dish. When our orders finally arrived, the presentation was lackluster, and the flavors were surprisingly bland. A particular disappointment was the main course, which was not only overcooked but also arrived lukewarm, suggesting it had been sitting out for some time.Service throughout the evening was inconsistent; our server seemed disinterested and was seldom seen. Attempts to address our concerns about the meal were met with indifference, leaving us feeling unvalued as customers.Given the price point and the establishment's reputation, I expected a dining experience that delighted the senses and showcased culinary excellence. Unfortunately, what I encountered was a forgettable meal paired with service that failed to meet even basic standards of hospitality. It's unlikely I'll return or recommend this restaurant to others based on this visit."]
review_test_neu = ["My recent visit to the restaurant was a mixed experience. Walking in, the ambiance of the place was inviting, with a nicely decorated interior that promised a cozy dining atmosphere. The staff greeted us politely and seated us without delay, which was a good start to the evening.The menu presented a wide array of options, ranging from traditional favorites to some intriguing chef specials. It took some time to make our selections, partly due to the variety and partly because the menu descriptions could have been more detailed.When the food arrived, the presentation was decent, and the portions were generous. Some of the dishes we tried were quite satisfying, offering a good balance of flavors and freshness. However, a few items fell short of expectations, lacking the depth of flavor we anticipated. It was a hit or miss on the culinary front.Service was generally efficient, though it lacked the warmth and attentiveness that elevate a dining experience from good to great. Our server was courteous but seemed rushed, making our interactions feel somewhat transactional.The overall value for the money was fair, considering the portion sizes and the quality of the ingredients used. However, the inconsistency in the food and service left us feeling that while the restaurant has potential, there's room for improvement in execution and attention to detail.In conclusion, while the visit didn't fully meet our expectations, it wasn't a disappointing experience either. For those considering dining here, there might be dishes that delight, but I'd recommend managing your expectations when it comes to service and some menu items."]

In [177]:
corp_test_sample = get_corpus(review_test_neu)

In [178]:
x_new_test = cv.transform(corp_test_sample).toarray()

In [179]:
y_pred_new =  svc.predict(x_new_test)
y_pred_new

array([4], dtype=int64)