# Sentiment Analysis

In [90]:
import numpy as np
import pandas as pd

In [91]:
df = pd.read_csv('./../NLP -- D-35/NLP Datset and Notebook/Restaurant_Reviews.tsv', delimiter='\t', quoting=3)

df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [92]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  1000 non-null   object
 1   Liked   1000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 15.8+ KB


In [93]:
df.shape

(1000, 2)

In [94]:
df.isna().sum()

Review    0
Liked     0
dtype: int64

No missing values in the dataset.

In [95]:
df['Liked'].value_counts()

1    500
0    500
Name: Liked, dtype: int64

Balanced dataset with equally distributed binary values.

In [96]:
df['Review'][0]

'Wow... Loved this place.'

Taking the data from the first row of the 'Review' column.

Using Regular expression to eliminate non essential data.

In [97]:
# import regular expression
import re

In [98]:
word = re.sub('[^A-Za-z]', ' ', df['Review'][0])
print(word)

Wow    Loved this place 


In [99]:
# switch it to lower case for better processing.

word = word.lower()
print(word)

wow    loved this place 


In [100]:
# remove space by splitting the word & converting it into a list

word = word.split()
word

['wow', 'loved', 'this', 'place']

Using 'stopword' package from module 'nltk' to remove words that don't fundamentally matter

In [101]:
from nltk.corpus import stopwords

In [102]:
word1 = [i for i in word if not i in set(stopwords.words('english'))]
word1

['wow', 'loved', 'place']

In [103]:
# Using 'PorterStemmer' from 'nltk' module to nullify the adjectives to words.

from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [104]:
word1 = [ps.stem(i) for i in word1]
word1

['wow', 'love', 'place']

In [105]:
word1 = ' '.join(word1)
word1

'wow love place'

In [106]:
# Transforming object into numerical data -- via CountVectorizer

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=3)

In [107]:
l = []
l.append(word1)
print(l)
X = cv.fit_transform(l)
print(X.toarray())

['wow love place']
[[1 1 1]]


In [108]:
# Loop the whole dataset

l1 = []
for i in range(0,1000):
    review = re.sub('[^A-Za-z]', ' ', df['Review'][i])
    review = review.split()
    review = [ps.stem(i) for i in review if not i in set(stopwords.words('english'))]
    
    review = ' '.join(review)
    l1.append(review)
    print(review)

wow love place
crust good
not tasti textur nasti
stop late may bank holiday rick steve recommend love
the select menu great price
now I get angri I want damn pho
honeslti tast that fresh
the potato like rubber could tell made ahead time kept warmer
the fri great
A great touch
servic prompt
would go back
the cashier care ever I say still end wayyy overpr
I tri cape cod ravoli chicken cranberri mmmm
I disgust I pretti sure human hair
I shock sign indic cash
highli recommend
waitress littl slow servic
thi place worth time let alon vega
like
the burritto blah
the food amaz
servic also cute
I could care less the interior beauti
So perform
that right red velvet cake ohhh stuff good
they never brought salad ask
thi hole wall great mexican street taco friendli staff
took hour get food tabl restaur food luke warm our sever run around like total overwhelm
the worst salmon sashimi
also combo like burger fri beer decent deal
thi like final blow
I found place accid I could happier
seem like good qu

I love bacon wrap date
thi unbeliev bargain
the folk otto alway make us feel welcom special
As main also uninspir
thi place I first pho amaz
thi wonder experi made place must stop whenev town
If food bad enough enjoy deal world worst annoy drunk peopl
veri fun chef
order doubl cheeseburg got singl patti fall apart pictur upload yeah still suck
great place coupl drink watch sport event wall cover TV
If possibl give zero star
the descript said yum yum sauc anoth said eel sauc yet anoth said spici mayo well none roll sauc
I say would hardest decis honestli M dish tast suppos tast amaz
If roll eye may stay not sure go back tri
everyon attent provid excel custom servic
horribl wast time money
now dish quit flavour
By time side restaur almost empti excus
It busi either also build freez cold
like review said pay eat place
drink took close minut come one point
serious flavor delight folk
much better ayc sushi place I went vega
the light dark enough set mood
base sub par servic I receiv effort 

the manag worst
the insid realli quit nice clean
the food outstand price reason
I think I run back carli anytim soon food
thi due fact took minut acknowledg anoth minut get food kept forget thing
love margarita
thi first vega buffet disappoint
veri good though
the one note ventil could use upgrad
great pork sandwich
don wast time
total letdown I would much rather go camelback flower shop cartel coffe
third chees friend burger cold
We enjoy pizza brunch
the steak well trim also perfectli cook
We group claim would handl us beauti
I love
We ask bill leav without eat bring either
thi place jewel la vega exactli I hope find nearli ten year live
seafood limit boil shrimp crab leg crab leg definit tast fresh
the select food best
delici I absolut back
thi small famili restaur fine dine establish
they toro tartar cavier extraordinari I like thinli slice wagyu white truffl
I dont think I back long time
It attach ga station rare good sign
how awesom
I back mani time soon
the menu much good stuff 

servic quick even go order like like
I mean realli get famou fish chip terribl
that said mouth belli still quit pleas
not thing
thumb Up
If read pleas go
I love grill pizza remind legit italian pizza
onli pro larg seat area nice bar area great simpl drink menu the best brick oven pizza homemad dough
they realli nice atmospher
tonight I elk filet special suck
after one bite I hook
We order old classic new dish go time sore disappoint everyth
cute quaint simpl honest
the chicken delici season perfect fri outsid moist chicken insid
the food great alway compliment chef
special thank dylan T recommend order all yummi tummi
awesom select beer
great food awesom servic
one nice thing ad gratuiti bill sinc parti larger expect tip
A fli appl juic A fli
the han nan chicken also tasti
As servic I thought good
the food bare lukewarm must sit wait server bring us
ryan bar definit one edinburgh establish I revisit
nicest chines restaur I
overal I like food servic
they also serv indian naan bread humm

If want wait mediocr food downright terribl servic place
waaaaaayyyyyyyyyi rate I say
We go back
the place fairli clean food simpli worth
thi place lack style
the sangria half glass wine full ridicul
don bother come
the meat pretti dri I slice brisket pull pork
the build seem pretti neat bathroom pretti trippi I eat
It equal aw
probabl hurri go back
slow seat even reserv
not good stretch imagin
the cashew cream sauc bland veget undercook
the chipolt ranch dip saus tasteless seem thin water heat
It bit sweet realli spici enough lack flavor
I veri disappoint
thi place horribl way overpr
mayb vegetarian fare I twice I thought averag best
It busi know
the tabl outsid also dirti lot time worker alway friendli help menu
the ambianc feel like buffet set douchey indoor garden tea biscuit
con spotti servic
the fri hot neither burger
but came back cold
then food came disappoint ensu
the real disappoint waiter
My husband said rude even apolog bad food anyth
the reason eat would fill night bing dr

In [109]:
l1

['wow love place',
 'crust good',
 'not tasti textur nasti',
 'stop late may bank holiday rick steve recommend love',
 'the select menu great price',
 'now I get angri I want damn pho',
 'honeslti tast that fresh',
 'the potato like rubber could tell made ahead time kept warmer',
 'the fri great',
 'A great touch',
 'servic prompt',
 'would go back',
 'the cashier care ever I say still end wayyy overpr',
 'I tri cape cod ravoli chicken cranberri mmmm',
 'I disgust I pretti sure human hair',
 'I shock sign indic cash',
 'highli recommend',
 'waitress littl slow servic',
 'thi place worth time let alon vega',
 'like',
 'the burritto blah',
 'the food amaz',
 'servic also cute',
 'I could care less the interior beauti',
 'So perform',
 'that right red velvet cake ohhh stuff good',
 'they never brought salad ask',
 'thi hole wall great mexican street taco friendli staff',
 'took hour get food tabl restaur food luke warm our sever run around like total overwhelm',
 'the worst salmon sashimi

In [110]:
l1_df = pd.DataFrame(list(zip(l1)))

l1_df.head()

Unnamed: 0,0
0,wow love place
1,crust good
2,not tasti textur nasti
3,stop late may bank holiday rick steve recommen...
4,the select menu great price


In [111]:
l1_df.columns = ['Review']

l1_df.head()

Unnamed: 0,Review
0,wow love place
1,crust good
2,not tasti textur nasti
3,stop late may bank holiday rick steve recommen...
4,the select menu great price


In [112]:
type(l1_df)

pandas.core.frame.DataFrame

In [113]:
# Create a bag of words model
cv = CountVectorizer(max_features=1500)

In [114]:
X = cv.fit_transform(l1).toarray()
X[0]

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

Every word becomes a feature

In [115]:
len(cv.get_feature_names())

1500

In [116]:
y = df.iloc[:,1]
y.head()

0    1
1    0
2    0
3    1
4    1
Name: Liked, dtype: int64

In [117]:
# With dataset being split into X and y, using train_test_split

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [118]:
X_train.shape

(800, 1500)

In [119]:
y_train.shape

(800,)

In [120]:
# Using NaiveBayes classifier

from sklearn.naive_bayes import GaussianNB
gNB = GaussianNB()

In [121]:
gNB.fit(X_train, y_train)
y_pred = gNB.predict(X_test)

In [122]:
# Using ConfusionMatrix & AccuracyScore

from sklearn.metrics import confusion_matrix, accuracy_score

In [123]:
print('The accuracy score is : ', accuracy_score(y_test, y_pred))
print('\nConfusion Matrix : \n', confusion_matrix(y_test, y_pred))

The accuracy score is :  0.735

Confusion Matrix : 
 [[55 42]
 [11 92]]


In [125]:
Review = 'nice food'
input_text = [Review]

data = cv.transform(input_text).toarray()
pred_data = gNB.predict(data)

if pred_data[0] == 1:
    print('Positive Review')
else:
    print('Negative Review')

Positive Review


In [126]:
Review = 'tasteless food'
input_text = [Review]

data = cv.transform(input_text).toarray()
pred_data = gNB.predict(data)

if pred_data[0] == 1:
    print('Positive Review')
else:
    print('Negative Review')

Negative Review
