# Sentiment Tagging with Vader

In [39]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nnsplit import NNSplit
from nltk.corpus import stopwords
import re
import pandas as pd

## Input unseen data

### New review

In [10]:
new_review = "My room was dirty and I was afraid to walk barefoot on the floor which looked as if it was not cleaned in weeks White furniture which looked nice in pictures was dirty too and the door looked like it was attacked by an angry dog My shower drain was clogged and the staff did not respond to my request to clean it On a day with heavy rainfall a pretty common occurrence in Amsterdam the roof in my room was leaking luckily not on the bed you could also see signs of earlier water damage I also saw insects running on the floor Overall the second floor of the property looked dirty and badly kept On top of all of this a repairman who came to fix something in a room next door at midnight was very noisy as were many of the guests I understand the challenges of running a hotel in an old building but this negligence is inconsistent with prices demanded by the hotel On the last night after I complained about water damage the night shift manager offered to move me to a different room but that offer came pretty late around midnight when I was already in bed and ready to sleep"

### Split into sentence using NNSplit

In [11]:
sent_list = []
splitter = NNSplit("en")

sent = splitter.split([new_review])
for i in sent[0]:
    new_string = ''
    for j in i:
        new_string += j.text + " "
    sent_list.append(new_string)
    
sent_list

['My room was dirty and I was afraid to walk barefoot on the floor which looked as if it was not cleaned in weeks ',
 'White furniture which looked nice in pictures was dirty too and the door looked like ',
 'it was attacked by an angry dog ',
 'My shower drain was clogged and the staff did not respond to my request to clean it ',
 'On a day with heavy rainfall a pretty common occurrence in Amsterdam ',
 'the roof in my room was leaking luckily not on the bed ',
 'you could also see signs of earlier water damage ',
 'I also saw insects running on the floor ',
 'Overall the second floor of the property looked dirty and badly kept ',
 'On top of all of this a repairman who came to fix something in a room next door at midnight was very noisy as were many of the guests ',
 'I understand the challenges of running a hotel in an old building ',
 'but this negligence is inconsistent with prices demanded by the hotel ',
 'On the last night after I complained about water damage ',
 'the night sh

### Data Cleaning

#### Convert all words to lower

In [37]:
sent_list_lower = [sent.lower() for sent in sent_list]
sent_list_lower

['my room was dirty and i was afraid to walk barefoot on the floor which looked as if it was not cleaned in weeks ',
 'white furniture which looked nice in pictures was dirty too and the door looked like ',
 'it was attacked by an angry dog ',
 'my shower drain was clogged and the staff did not respond to my request to clean it ',
 'on a day with heavy rainfall a pretty common occurrence in amsterdam ',
 'the roof in my room was leaking luckily not on the bed ',
 'you could also see signs of earlier water damage ',
 'i also saw insects running on the floor ',
 'overall the second floor of the property looked dirty and badly kept ',
 'on top of all of this a repairman who came to fix something in a room next door at midnight was very noisy as were many of the guests ',
 'i understand the challenges of running a hotel in an old building ',
 'but this negligence is inconsistent with prices demanded by the hotel ',
 'on the last night after i complained about water damage ',
 'the night sh

In [43]:
for sent in sent_list_lower:
    print(sent)

my room was dirty and i was afraid to walk barefoot on the floor which looked as if it was not cleaned in weeks 
white furniture which looked nice in pictures was dirty too and the door looked like 
it was attacked by an angry dog 
my shower drain was clogged and the staff did not respond to my request to clean it 
on a day with heavy rainfall a pretty common occurrence in amsterdam 
the roof in my room was leaking luckily not on the bed 
you could also see signs of earlier water damage 
i also saw insects running on the floor 
overall the second floor of the property looked dirty and badly kept 
on top of all of this a repairman who came to fix something in a room next door at midnight was very noisy as were many of the guests 
i understand the challenges of running a hotel in an old building 
but this negligence is inconsistent with prices demanded by the hotel 
on the last night after i complained about water damage 
the night shift manager offered to move me to a different room but

#### Remove Stopwords

In [53]:
stop_list = stopwords.words('english')
sent_list_lower_no_stopword_list = [[word for word in sent.split() if not word in stop_list] for sent in sent_list_lower]
sent_list_lower_no_stopword = []
for sent in sent_list_lower_no_stopword_list:
    new_sent = ' '.join(sent)
    sent_list_lower_no_stopword.append(new_sent)
print(sent_list_lower_no_stopword)

['room dirty afraid walk barefoot floor looked cleaned weeks', 'white furniture looked nice pictures dirty door looked like', 'attacked angry dog', 'shower drain clogged staff respond request clean', 'day heavy rainfall pretty common occurrence amsterdam', 'roof room leaking luckily bed', 'could also see signs earlier water damage', 'also saw insects running floor', 'overall second floor property looked dirty badly kept', 'top repairman came fix something room next door midnight noisy many guests', 'understand challenges running hotel old building', 'negligence inconsistent prices demanded hotel', 'last night complained water damage', 'night shift manager offered move different room offer came pretty late around midnight already bed ready sleep']


### Convert list to dataframe

In [54]:
data = pd.DataFrame(sent_list_lower_no_stopword, columns=["sentence"])
data

Unnamed: 0,sentence
0,room dirty afraid walk barefoot floor looked cleaned weeks
1,white furniture looked nice pictures dirty door looked like
2,attacked angry dog
3,shower drain clogged staff respond request clean
4,day heavy rainfall pretty common occurrence amsterdam
5,roof room leaking luckily bed
6,could also see signs earlier water damage
7,also saw insects running floor
8,overall second floor property looked dirty badly kept
9,top repairman came fix something room next door midnight noisy many guests


### Get polarity

In [55]:
# This function gets the polarity of reviews using Vader # 
def get_polarity(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    compound = score['compound']
    return compound

data['polarity'] = data['sentence'].apply(get_polarity)
data

Unnamed: 0,sentence,polarity
0,room dirty afraid walk barefoot floor looked cleaned weeks,-0.4404
1,white furniture looked nice pictures dirty door looked like,0.34
2,attacked angry dog,-0.743
3,shower drain clogged staff respond request clean,0.4019
4,day heavy rainfall pretty common occurrence amsterdam,0.4939
5,roof room leaking luckily bed,0.5106
6,could also see signs earlier water damage,-0.4939
7,also saw insects running floor,0.0
8,overall second floor property looked dirty badly kept,-0.7184
9,top repairman came fix something room next door midnight noisy many guests,0.0258


### Filter sentence above 8 words

In [56]:
def number_words(sentence):
    return len(re.findall(r'\w+', str(sentence)))

length = (data['sentence'].apply(number_words) >= 8)

data = data.loc[length]
data

Unnamed: 0,sentence,polarity
0,room dirty afraid walk barefoot floor looked cleaned weeks,-0.4404
1,white furniture looked nice pictures dirty door looked like,0.34
8,overall second floor property looked dirty badly kept,-0.7184
9,top repairman came fix something room next door midnight noisy many guests,0.0258
13,night shift manager offered move different room offer came pretty late around midnight already bed ready sleep,0.6908


### Aggregated polarity score

In [59]:
polarity = data['polarity'].mean()
if polarity >= 0.05:
    sentiment = ('positive', polarity)
elif polarity > -0.05 and polarity < 0.05: 
    sentiment = ('neutral', polarity)
else: 
    sentiment = ('negative', polarity)
print(sentiment)

('neutral', -0.020439999999999993)


## Not valid beyond this point

In [None]:
data = pd.read_csv("./data/sentence_data.csv")
data

In [None]:
data.columns = ['city', 'country', 'reviews', 'sentence']
data = data[0:100000]

## Get Vader polarity score

In [None]:
# This function gets the polarity of reviews using Vader # 
def get_polarity(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    compound = score['compound']
    return compound
    
data['polarity'] = data['sentence'].apply(get_polarity)
data

In [None]:
def number_words(sentence):
    return len(re.findall(r'\w+', str(sentence)))

length = (data['sentence'].apply(number_words) > 1)
data = data.loc[length]

## Aggregate the polarity by grouping sentence by reviews

In [None]:
data['agg_polarity'] = data.groupby('reviews')['polarity'].transform('mean')

In [None]:
# data = data.drop_duplicates(subset="reviews", keep="first")
# data = data.drop(['sentence','polarity'], axis=1)

In [None]:
data["review_sentiment"] = ["positive" if x>=0.196725 else ("neutral" if x>=0.096725 else "negative") for x in data['agg_polarity']]
data

In [None]:
data["sen_sentiment"] = ["positive" if x>=0.1779 else ("neutral" if x>=0.01779 else "negative") for x in data['polarity']]
data

## Export 

In [None]:
data.to_csv('./data/tagged_sentence_data.csv', index=False)

## Setting to run to see all dataframe row

In [None]:
# view_data = data.loc[:, "sentence":"sentiment"]
data.sort_values(by=['agg_polarity'], axis=0, inplace=True,ascending=False)

pd.set_option('display.max_rows',None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
data
# Threshold: if polarity <= 0.1779: Negative

In [9]:
pd.set_option('display.max_rows',None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

  This is separate from the ipykernel package so we can avoid doing imports until
