## Importing dependancies

In [1]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import warnings
warnings.filterwarnings("ignore")

In [2]:
df_uncleaned= pd.read_csv('FastFoodReviews.csv')

## Data Preprocessing 

In [3]:
df_uncleaned.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,Restaurant
0,981679b0-3780-4f2b-b385-30734072dcf2,LaRia Walker,https://play-lh.googleusercontent.com/a-/ALV-U...,The app is always having technical difficultie...,1,0,10.4.0,2024-04-14 04:40:22,,,10.4.0,Wendys
1,6a3cca51-565a-45ff-8b93-bc143006fe99,Josh Roe,https://play-lh.googleusercontent.com/a-/ALV-U...,Doesn't work most of the time. When I log in I...,1,0,11.0.7,2024-04-14 03:45:14,,,11.0.7,Wendys
2,8f6b8874-55a3-4f06-8437-624ca1c9f32e,Seth Dowling,https://play-lh.googleusercontent.com/a-/ALV-U...,The app doesn't accept any payment methods. Tr...,1,0,10.4.0,2024-04-14 03:17:27,,,10.4.0,Wendys
3,2369febd-abd5-4983-ac0a-26acdae98110,Brandon Gallant,https://play-lh.googleusercontent.com/a/ACg8oc...,"Good for ordering food. However, don't be dece...",1,0,10.4.0,2024-04-14 03:08:58,,,10.4.0,Wendys
4,b177de05-8847-473c-a020-eaaf737f2de2,Derrell Freeman,https://play-lh.googleusercontent.com/a-/ALV-U...,great app thanks alot,5,0,10.4.0,2024-04-14 02:30:28,,,10.4.0,Wendys


In [4]:
df_uncleaned.columns

Index(['reviewId', 'userName', 'userImage', 'content', 'score',
       'thumbsUpCount', 'reviewCreatedVersion', 'at', 'replyContent',
       'repliedAt', 'appVersion', 'Restaurant'],
      dtype='object')

In [5]:
df_uncleaned=df_uncleaned[['content','score','Restaurant']]

In [6]:
df_uncleaned

Unnamed: 0,content,score,Restaurant
0,The app is always having technical difficultie...,1,Wendys
1,Doesn't work most of the time. When I log in I...,1,Wendys
2,The app doesn't accept any payment methods. Tr...,1,Wendys
3,"Good for ordering food. However, don't be dece...",1,Wendys
4,great app thanks alot,5,Wendys
...,...,...,...
7756,They asked for to much personal information,1,Five Guys
7757,Won't take your CC info. Crashes all the time.,1,Five Guys
7758,Very nice website with easy ordering and great...,5,Five Guys
7759,can't pay with card and app takes forever to l...,1,Five Guys


Final columns are :
content: contains the review written by the customer.
score: score ranging from 1 to 5
Restaurant: Fast food joint for which the review has been written

In [7]:
df_uncleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7761 entries, 0 to 7760
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   content     7761 non-null   object
 1   score       7761 non-null   int64 
 2   Restaurant  7761 non-null   object
dtypes: int64(1), object(2)
memory usage: 182.0+ KB


### Drop duplicate values

In [8]:
len(df_uncleaned)

7761

In [9]:
df_uncleaned=df_uncleaned.drop_duplicates()

In [10]:
len(df_uncleaned)

7546

### Check for null values

In [11]:
df_uncleaned.isnull().sum().sum()

0

## Sentiment Analysis

In [12]:
df=df_uncleaned

### Importing positive and negative words dictionary

In [13]:
positive_words = set(nltk.corpus.opinion_lexicon.positive())
negative_words = set(nltk.corpus.opinion_lexicon.negative())

In [14]:
def calculate_sentiment_score(review):
    tokens = word_tokenize(review.lower())  # Tokenize and convert to lowercase
    filtered_tokens = [word for word in tokens if word.isalnum() and word not in stopwords.words('english')]  # Remove stopwords and non-alphanumeric tokens
    word_count = len(filtered_tokens)
    if word_count == 0:
        return 0  # Handle cases where there are no valid words in the review
    positive_score = sum(word in positive_words for word in filtered_tokens)  # Count positive words
    negative_score = sum(word in negative_words for word in filtered_tokens)  # Count negative words
    sentiment_score = (positive_score - negative_score) / word_count  # Calculate sentiment score per word
    return sentiment_score

In [15]:
df['sentiment_score'] = df['content'].apply(calculate_sentiment_score)

In [16]:
df.head()

Unnamed: 0,content,score,Restaurant,sentiment_score
0,The app is always having technical difficultie...,1,Wendys,-0.125
1,Doesn't work most of the time. When I log in I...,1,Wendys,0.125
2,The app doesn't accept any payment methods. Tr...,1,Wendys,0.0
3,"Good for ordering food. However, don't be dece...",1,Wendys,0.076923
4,great app thanks alot,5,Wendys,0.25


In [17]:
restaurant_sentiment = df.groupby('Restaurant')['sentiment_score'].mean().reset_index()

In [18]:
restaurant_sentiment['rank'] = restaurant_sentiment['sentiment_score'].rank(ascending=False)

In [19]:
print("Restaurant Rankings:")
print(restaurant_sentiment[['Restaurant', 'sentiment_score', 'rank']].sort_values(by='rank'))

Restaurant Rankings:
    Restaurant  sentiment_score  rank
2    Five Guys         0.095523   1.0
0  Burger King         0.090907   2.5
3    McDonalds         0.090907   2.5
1  Chick-fil-A         0.083582   4.0
4       Wendys         0.075889   5.0
