# Sentiment Analysis of Foodpanda's consumer reviews

**Source:** *Google Playstore* <br>

```
Author: Teoh Sin Yee
Author: Wan Muhamad Rusyaidi Afifi
```


In [None]:
# General packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

# NLP packages
import nltk
from nltk import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import Counter
from wordcloud import WordCloud

# Modeling packages
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

from pylab import rcParams
import warnings
warnings.filterwarnings("ignore")
rcParams['figure.figsize'] = 14, 6
plt.style.use('ggplot')

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
%cd /content/drive/My Drive/Colab Notebooks/

/content/drive/My Drive/Colab Notebooks


In [None]:
foodpanda_reviews = pd.read_csv('cleandata.csv')
foodpanda_reviews.head(3)

Unnamed: 0.1,Unnamed: 0,userName,score,at,content,Sentiment_rating,reviews_text_new,reviews_text_nonstop,Positively Rated,cleandata
0,0,C Ds,4,22/11/2021,It's pretty good for where I live since most p...,1,it s pretty good for where i live since most p...,"['pretty', 'good', 'live', 'since', 'places', ...",1,pretty good live since places foodpanda discov...
1,1,Gopa Banik,5,22/11/2021,So fast delivery... I can eat whatever i want..,1,so fast delivery i can eat whatever i want,"['fast', 'delivery', 'eat', 'whatever', 'want']",1,fast delivery eat whatever want
2,2,Blessyl Fejer,2,22/11/2021,Sometime when it comes to 50% off promo some r...,0,sometime when it comes to 50 off promo some r...,"['sometime', 'comes', '50', 'promo', 'restaura...",0,sometime comes 50 promo restaurant temporarily...


#POS (Use Spacy)

Extract Top 10 postive & negative of:

1.   noun- NOUN
2.   adjective - ADJ
3.   verb - VERB
4.   adverb - ADV

In [None]:
#top 10 part of speech table
top10_pos = pd.DataFrame(columns = ['id'])
top10_pos['id']= [1,2,3,4,5,6,7,8,9,10]

##Adjectives


In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
words_adj = []

for line in foodpanda_reviews['cleandata']:
  doc = nlp(line)
  tokens = [tok.lemma_.lower().strip() for tok in doc if tok.pos_ == 'ADJ']
  words_adj.append(tokens)

In [None]:
#Then, we need to transform this list into a new column back to our data frame:
foodpanda_reviews['words_adj'] = pd.Series(words_adj)


Finally, using some python basics, we can select the most common adjectives used in positive and negative reviews:

In [None]:
foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==1]['words_adj']

0       [good, favorite, little, diaappointing, unavai...
1                                                  [fast]
4                                      [nice, good, fast]
5                                                      []
8                         [small, previous, shift, small]
                              ...                        
9000         [well, online, well, good, online, reliable]
9001                                      [many, chinese]
9002                  [useful, easy, navigate, many, low]
9011                                    [good, satisfied]
9012                                   [favorite, online]
Name: words_adj, Length: 3825, dtype: object

In [None]:
negative = foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==0]
file_name = 'negative.xlsx'
# saving the excel
negative.to_excel(file_name)
print('DataFrame is written to Excel File successfully.')

positive = foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==1]
file_name = 'positive.xlsx'
# saving the excel
positive.to_excel(file_name)
print('DataFrame is written to Excel File successfully.')  


DataFrame is written to Excel File successfully.
DataFrame is written to Excel File successfully.


In [None]:
#positive adjectives

foodpanda_reviews['adj_pos']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==1]['words_adj']
foodpanda_reviews['adj_pos'].tolist()

#SOMETHING WRONG HERE
from ast import literal_eval
pos_adj = foodpanda_reviews['adj_pos'].explode()
pos_adj = pos_adj.dropna()
pos_adj_count = Counter(pos_adj).most_common(10)
print (pos_adj_count)
#SOMETHING WRONG HERE

df = pd.DataFrame (pos_adj_count, columns = ['adjectives_pos','Counts'])
df.drop(columns='Counts')

top10_pos['adjectives_pos'] = df['adjectives_pos']

[('good', 1224), ('easy', 655), ('great', 326), ('fast', 231), ('convenient', 203), ('nice', 146), ('many', 130), ('friendly', 116), ('helpful', 109), ('available', 101)]


In [None]:
#negative adjectives

foodpanda_reviews['adj_neg']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==0]['words_adj']
foodpanda_reviews['adj_neg'].tolist()

from ast import literal_eval
neg_adj = foodpanda_reviews['adj_neg'].explode()
neg_adj = neg_adj.dropna()
neg_adj_count = Counter(neg_adj).most_common(10)
print (neg_adj_count)

df = pd.DataFrame (neg_adj_count, columns = ['adjectives_neg','Counts'])
df.drop(columns='Counts')

top10_pos['adjectives_neg'] = df['adjectives_neg']

[('bad', 987), ('good', 340), ('many', 308), ('wrong', 274), ('poor', 216), ('available', 211), ('high', 200), ('online', 183), ('expensive', 165), ('late', 157)]


##Verb


In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
words_vb = []

for line in foodpanda_reviews['cleandata']:
  doc = nlp(line)
  tokens = [tok.lemma_.lower().strip() for tok in doc if tok.pos_ == 'VERB']
  words_vb.append(tokens)

In [None]:
#Then, we need to transform this list into a new column back to our data frame:
foodpanda_reviews['words_vb'] = pd.Series(words_vb)


In [None]:
foodpanda_reviews['words_vb'].head(30)

0                [live, discover, shop, become, happen]
1                                                 [eat]
2                                         [come, close]
3                                                [take]
4                                                    []
5                                                    []
6     [include, would, be, happen, make, make, hal, ...
7              [keep, get, cancel, undelivere, rrcieve]
8       [see, bring, compare, keep, say, start, change]
9     [keep, get, use, keep, start, can, buy, use, s...
10                                     [ought, improve]
11                            [proceed, order, deliver]
12    [shift, shaheed, show, train, know, follow, read]
13    [work, answer, order, cancel, ask, ruin, get, ...
14                                                   []
15                       [lollypop, complain, lollypop]
16    [cancel, pay, take, choose, fault, send, templ...
17                                              

Finally, using some python basics, we can select the most common verbs used in positive and negative reviews:

In [None]:
#positive verb

foodpanda_reviews['vb_pos']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==1]['words_vb']
foodpanda_reviews['vb_pos'].tolist()

from ast import literal_eval
pos_vb = foodpanda_reviews['vb_pos'].explode()
pos_vb = pos_vb.dropna()
pos_vb_count = Counter(pos_vb).most_common(10)
print (pos_vb_count)

df = pd.DataFrame (pos_vb_count, columns = ['vb_pos','Counts'])
df.drop(columns='Counts')

top10_pos['verb_pos'] = df['vb_pos']

[('use', 353), ('order', 245), ('give', 244), ('get', 239), ('make', 222), ('deliver', 194), ('need', 174), ('keep', 158), ('go', 155), ('add', 148)]


In [None]:
#negative adjectives

foodpanda_reviews['vb_neg']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==0]['words_vb']
foodpanda_reviews['vb_neg'].tolist()

from ast import literal_eval
neg_vb = foodpanda_reviews['vb_neg'].explode()
neg_vb = neg_vb.dropna()
neg_vb_count = Counter(neg_vb).most_common(10)
print (neg_vb_count)

df = pd.DataFrame (neg_vb_count, columns = ['vb_neg','Counts'])
df.drop(columns='Counts')

top10_pos['verb_neg'] = df['vb_neg']

[('use', 912), ('get', 890), ('cancel', 752), ('say', 704), ('order', 624), ('give', 589), ('deliver', 495), ('make', 491), ('take', 466), ('try', 451)]


In [None]:
top10_pos


Unnamed: 0,id,adjectives_pos,adjectives_neg,verb_pos,verb_neg
0,1,good,bad,use,use
1,2,easy,good,order,get
2,3,great,many,give,cancel
3,4,fast,wrong,get,say
4,5,convenient,poor,make,order
5,6,nice,available,deliver,give
6,7,many,high,need,deliver
7,8,friendly,online,keep,make
8,9,helpful,expensive,go,take
9,10,available,late,add,try


##Noun


In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
words_nn = []

for line in foodpanda_reviews['cleandata']:
  doc = nlp(line)
  tokens = [tok.lemma_.lower().strip() for tok in doc if tok.pos_ == 'NOUN']
  words_nn.append(tokens)

In [None]:
#Then, we need to transform this list into a new column back to our data frame:
foodpanda_reviews['words_nn'] = pd.Series(words_nn)


In [None]:
foodpanda_reviews['words_nn'].head(30)

0              [place, place, city, order, issue, shop]
1                                      [delivery, want]
2                              [promo, restaurant, bit]
3                                               [order]
4                                   [packing, delivery]
5                                             [service]
6     [rider, customer, time, everytime, cod, order,...
7     [issue, complain, order, rider, motor, breakdo...
8                        [rider, bill, encounter, bill]
9     [love, error, voucher, app, date, promo, vouch...
10                              [app, search, function]
11          [option, delivery, order, incomplete, item]
12    [food, map, garden, rider, address, rider, map...
13    [time, use, order, think, option, time, kind, ...
14                                            [service]
15    [order, delivery, voucher, food, time, time, v...
16    [store, order, debit, card, refund, day, optio...
17                                            [g

Finally, using some python basics, we can select the most common nouns used in positive and negative reviews:

In [None]:
#positive noun
foodpanda_reviews['nn_pos']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==1]['words_nn']
foodpanda_reviews['nn_pos'].tolist()

from ast import literal_eval
pos_nn = foodpanda_reviews['nn_pos'].explode()
pos_nn = pos_nn.dropna()
pos_nn_count = Counter(pos_nn).most_common(10)
print (pos_nn_count)

df = pd.DataFrame (pos_nn_count, columns = ['nn_pos','Counts'])
df.drop(columns='Counts')

top10_pos['noun_pos'] = df['nn_pos']

[('food', 1154), ('delivery', 936), ('app', 910), ('order', 858), ('service', 609), ('time', 581), ('use', 357), ('restaurant', 352), ('rider', 326), ('customer', 264)]


In [None]:
#negative noun

foodpanda_reviews['nn_neg']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==0]['words_nn']
foodpanda_reviews['nn_neg'].tolist()

from ast import literal_eval
neg_nn = foodpanda_reviews['nn_neg'].explode()
neg_nn = neg_nn.dropna()
neg_nn_count = Counter(neg_nn).most_common(10)
print (neg_nn_count)

df = pd.DataFrame (neg_nn_count, columns = ['nn_neg','Counts'])
df.drop(columns='Counts')

top10_pos['noun_neg'] = df['nn_neg']

[('order', 2979), ('app', 1727), ('food', 1673), ('delivery', 1444), ('time', 1365), ('service', 1188), ('customer', 1063), ('restaurant', 845), ('rider', 696), ('payment', 626)]


In [None]:
top10_pos


Unnamed: 0,id,adjectives_pos,adjectives_neg,verb_pos,verb_neg,noun_pos,noun_neg
0,1,good,bad,use,use,food,order
1,2,easy,good,order,get,delivery,app
2,3,great,many,give,cancel,app,food
3,4,fast,wrong,get,say,order,delivery
4,5,convenient,poor,make,order,service,time
5,6,nice,available,deliver,give,time,service
6,7,many,high,need,deliver,use,customer
7,8,friendly,online,keep,make,restaurant,restaurant
8,9,helpful,expensive,go,take,rider,rider
9,10,available,late,add,try,customer,payment


##Adverb


In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
words_adv = []

for line in foodpanda_reviews['cleandata']:
  doc = nlp(line)
  tokens = [tok.lemma_.lower().strip() for tok in doc if tok.pos_ == 'ADV']
  words_adv.append(tokens)

In [None]:
#Then, we need to transform this list into a new column back to our data frame:
foodpanda_reviews['words_adv'] = pd.Series(words_adv)


In [None]:
foodpanda_reviews['words_adv'].head(30)

0           [pretty, far, sometimes, even, really]
1                                               []
2                          [sometime, temporarily]
3                                           [long]
4                                               []
5                                               []
6                                               []
7                                   [however, yet]
8                                           [back]
9                                               []
10                                              []
11                                              []
12                 [soon, always, enough, blindly]
13               [around, ever, continuesly, else]
14                                              []
15                                        [always]
16                                              []
17                                              []
18                                              []
19                             

Finally, using some python basics, we can select the most common nouns used in positive and negative reviews:

In [None]:
#positive adverb
foodpanda_reviews['adv_pos']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==1]['words_adv']
foodpanda_reviews['adv_pos'].tolist()

from ast import literal_eval
pos_adv = foodpanda_reviews['adv_pos'].explode()
pos_adv = pos_adv.dropna()
pos_adv_count = Counter(pos_adv).most_common(10)
print (pos_adv_count)

df = pd.DataFrame (pos_adv_count, columns = ['adv_pos','Counts'])
df.drop(columns='Counts')

top10_pos['adverb_pos'] = df['adv_pos']

[('always', 252), ('really', 250), ('sometimes', 233), ('also', 216), ('still', 140), ('even', 137), ('far', 108), ('especially', 93), ('fast', 83), ('much', 82)]


In [None]:
#negative adverb

foodpanda_reviews['adv_neg']= foodpanda_reviews[foodpanda_reviews['Sentiment_rating']==0]['words_adv']
foodpanda_reviews['adv_neg'].tolist()

from ast import literal_eval
neg_adv = foodpanda_reviews['adv_neg'].explode()
neg_adv = neg_adv.dropna()
neg_adv_count = Counter(neg_adv).most_common(10)
print (neg_adv_count)

df = pd.DataFrame (neg_adv_count, columns = ['adv_neg','Counts'])
df.drop(columns='Counts')

top10_pos['adverb_neg'] = df['adv_neg']

[('even', 878), ('always', 637), ('still', 472), ('already', 414), ('also', 371), ('never', 344), ('really', 247), ('ever', 219), ('back', 173), ('sometimes', 168)]


In [None]:
top10_pos


Unnamed: 0,id,adjectives_pos,adjectives_neg,verb_pos,verb_neg,noun_pos,noun_neg,adverb_pos,adverb_neg
0,1,good,bad,use,use,food,order,always,even
1,2,easy,good,order,get,delivery,app,really,always
2,3,great,many,give,cancel,app,food,sometimes,still
3,4,fast,wrong,get,say,order,delivery,also,already
4,5,convenient,poor,make,order,service,time,still,also
5,6,nice,available,deliver,give,time,service,even,never
6,7,many,high,need,deliver,use,customer,far,really
7,8,friendly,online,keep,make,restaurant,restaurant,especially,ever
8,9,helpful,expensive,go,take,rider,rider,fast,back
9,10,available,late,add,try,customer,payment,much,sometimes


In [None]:
# determining the name of the file
file_name = 'top10_pos_updated.xlsx'
  
# saving the excel
top10_pos.to_excel(file_name)
print('DataFrame is written to Excel File successfully.')

DataFrame is written to Excel File successfully.
