In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [2]:
# Disable Warnings
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")

In [3]:
data = pd.read_csv("review.csv")

In [4]:
data

Unnamed: 0,Review
0,nice hotel expensive parking got good deal sta...
1,ok nothing special charge diamond member hilto...
2,nice rooms not 4* experience hotel monaco seat...
3,"unique, great stay, wonderful time hotel monac..."
4,"great stay great stay, went seahawk game aweso..."
...,...
20486,"best kept secret 3rd time staying charm, not 5..."
20487,great location price view hotel great quick pl...
20488,"ok just looks nice modern outside, desk staff ..."
20489,hotel theft ruined vacation hotel opened sept ...


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20491 entries, 0 to 20490
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  20491 non-null  object
dtypes: object(1)
memory usage: 160.2+ KB


In [7]:
data.describe()

Unnamed: 0,Review
count,20491
unique,20491
top,"people talking, ca n't believe excellent ratin..."
freq,1


In [8]:
data.isnull().sum()

Review    0
dtype: int64

In [11]:
nltk.download("vader_lexicon", quiet = True)

True

In [18]:
sentiments = SentimentIntensityAnalyzer()
data["Positive"] = [sentiments.polarity_scores(i)["pos"] for i in data.Review]
data["Negative"] = [sentiments.polarity_scores(i)["neg"] for i in data.Review]
data["Neutral"] = [sentiments.polarity_scores(i)["neu"] for i in data.Review]
data["Compound"] = [sentiments.polarity_scores(i)["compound"] for i in data["Review"]]

In [19]:
data

Unnamed: 0,Review,Positive,Negative,Neutral,Compound
0,nice hotel expensive parking got good deal sta...,0.285,0.072,0.643,0.9747
1,ok nothing special charge diamond member hilto...,0.189,0.110,0.701,0.9787
2,nice rooms not 4* experience hotel monaco seat...,0.219,0.081,0.700,0.9889
3,"unique, great stay, wonderful time hotel monac...",0.385,0.060,0.555,0.9912
4,"great stay great stay, went seahawk game aweso...",0.221,0.135,0.643,0.9797
...,...,...,...,...,...
20486,"best kept secret 3rd time staying charm, not 5...",0.272,0.063,0.665,0.9834
20487,great location price view hotel great quick pl...,0.430,0.000,0.570,0.9753
20488,"ok just looks nice modern outside, desk staff ...",0.145,0.131,0.724,0.2629
20489,hotel theft ruined vacation hotel opened sept ...,0.179,0.150,0.671,0.9867


In [20]:
data["Compound"].value_counts()

 0.9918    69
 0.9925    68
 0.9892    68
 0.9887    67
 0.9939    66
           ..
 0.3301     1
-0.2335     1
 0.8379     1
-0.9510     1
-0.9693     1
Name: Compound, Length: 3870, dtype: int64

In [21]:
score = data["Compound"].values
sentiment = []
for i in score:
    if i >= 0.05:
        sentiment.append("Positive")
    elif i <= -0.05:
        sentiment.append("Negative")
    else:
        sentiment.append("Neutral")
        
data["Sentiment"] = sentiment
data.head()

Unnamed: 0,Review,Positive,Negative,Neutral,Compound,Sentiment
0,nice hotel expensive parking got good deal sta...,0.285,0.072,0.643,0.9747,Positive
1,ok nothing special charge diamond member hilto...,0.189,0.11,0.701,0.9787,Positive
2,nice rooms not 4* experience hotel monaco seat...,0.219,0.081,0.7,0.9889,Positive
3,"unique, great stay, wonderful time hotel monac...",0.385,0.06,0.555,0.9912,Positive
4,"great stay great stay, went seahawk game aweso...",0.221,0.135,0.643,0.9797,Positive


In [22]:
data["Sentiment"].value_counts()

Positive    18831
Negative     1569
Neutral        91
Name: Sentiment, dtype: int64

In [23]:
data.to_csv("new_review_data.csv")