# Sentiment Tagging with Vader

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import re

In [2]:
data = pd.read_csv("./data/sentence_data.csv")
data

Unnamed: 0,city,country,Review,sent_list
0,Vienna,Austria,Location was good Have stayed before I e 9yrs...,Have stayed before I e 9yrs ago shop no longer...
1,Vienna,Austria,Breakfast options are limited Personally i wo...,Personally i would choose a hotel closer to an...
2,City of London,United Kingdom,The room was a great size for a city hotel Th...,The room was a great size for a city hotel
3,City of London,United Kingdom,The room was a great size for a city hotel Th...,The shower was great Lovely old building with ...
4,West End of London,United Kingdom,The room was small and the bathroom was minut...,The room was small and the bathroom was minute
...,...,...,...,...
968664,el Besòs i el Maresme,Spain,Reception area very shabby Walls in bedroom w...,Walls in bedroom were so thin you could hear e...
968665,Vienna,Austria,I was informed when I checked in that I would...,I was informed when I checked in that I would ...
968666,Amsterdam,Netherlands,Could hear other people shutting doors but di...,Could hear other people shutting doors but did...
968667,Vienna,Austria,Hotel is showing signs of wear Room furniture...,Room furniture was worn and shabby bed amazing...


In [3]:
data.columns = ['city', 'country', 'reviews', 'sentence']
data = data[0:100000]

## Get Vader polarity score

In [4]:
# This function gets the polarity of reviews using Vader # 
def get_polarity(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    compound = score['compound']
    return compound
    
data['polarity'] = data['sentence'].apply(get_polarity)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,city,country,reviews,sentence,polarity
0,Vienna,Austria,Location was good Have stayed before I e 9yrs...,Have stayed before I e 9yrs ago shop no longer...,0.4767
1,Vienna,Austria,Breakfast options are limited Personally i wo...,Personally i would choose a hotel closer to an...,0.4215
2,City of London,United Kingdom,The room was a great size for a city hotel Th...,The room was a great size for a city hotel,0.6249
3,City of London,United Kingdom,The room was a great size for a city hotel Th...,The shower was great Lovely old building with ...,0.9811
4,West End of London,United Kingdom,The room was small and the bathroom was minut...,The room was small and the bathroom was minute,0.0000
...,...,...,...,...,...
99995,Malakoff,France,Lovely decor ambience Super location and grea...,Lovely decor ambience Super location and great...,0.9153
99996,Barbican,United Kingdom,I would like to have breakfast incluyed altho...,I would like to have breakfast incluyed althou...,0.6486
99997,"Sant Pere, Santa Caterina i La Ribera",Spain,The location of this hotel is fantastic 2 min...,The location of this hotel is fantastic 2 mins...,0.8955
99998,"Sant Pere, Santa Caterina i La Ribera",Spain,The location of this hotel is fantastic 2 min...,It s not as central for public transport as so...,0.0000


In [5]:
def number_words(sentence):
    return len(re.findall(r'\w+', str(sentence)))

length = (data['sentence'].apply(number_words) > 1)
data = data.loc[length]

## Aggregate the polarity by grouping sentence by reviews

In [6]:
data['agg_polarity'] = data.groupby('reviews')['polarity'].transform('mean')

In [7]:
# data = data.drop_duplicates(subset="reviews", keep="first")
# data = data.drop(['sentence','polarity'], axis=1)

In [8]:
data["review_sentiment"] = ["positive" if x>=0.196725 else ("neutral" if x>=0.096725 else "negative") for x in data['agg_polarity']]
data

Unnamed: 0,city,country,reviews,sentence,polarity,agg_polarity,sentiment
0,Vienna,Austria,Location was good Have stayed before I e 9yrs...,Have stayed before I e 9yrs ago shop no longer...,0.4767,0.476700,positive
1,Vienna,Austria,Breakfast options are limited Personally i wo...,Personally i would choose a hotel closer to an...,0.4215,0.421500,positive
2,City of London,United Kingdom,The room was a great size for a city hotel Th...,The room was a great size for a city hotel,0.6249,0.803000,positive
3,City of London,United Kingdom,The room was a great size for a city hotel Th...,The shower was great Lovely old building with ...,0.9811,0.803000,positive
4,West End of London,United Kingdom,The room was small and the bathroom was minut...,The room was small and the bathroom was minute,0.0000,0.037075,negative
...,...,...,...,...,...,...,...
99995,Malakoff,France,Lovely decor ambience Super location and grea...,Lovely decor ambience Super location and great...,0.9153,0.915300,positive
99996,Barbican,United Kingdom,I would like to have breakfast incluyed altho...,I would like to have breakfast incluyed althou...,0.6486,0.648600,positive
99997,"Sant Pere, Santa Caterina i La Ribera",Spain,The location of this hotel is fantastic 2 min...,The location of this hotel is fantastic 2 mins...,0.8955,0.066633,negative
99998,"Sant Pere, Santa Caterina i La Ribera",Spain,The location of this hotel is fantastic 2 min...,It s not as central for public transport as so...,0.0000,0.066633,negative


In [9]:
data["sen_sentiment"] = ["positive" if x>=0.1779 else ("neutral" if x>=0.01779 else "negative") for x in data['polarity']]
data

Unnamed: 0,city,country,reviews,sentence,polarity,agg_polarity,sentiment
0,Vienna,Austria,Location was good Have stayed before I e 9yrs...,Have stayed before I e 9yrs ago shop no longer...,0.4767,0.476700,positive
1,Vienna,Austria,Breakfast options are limited Personally i wo...,Personally i would choose a hotel closer to an...,0.4215,0.421500,positive
2,City of London,United Kingdom,The room was a great size for a city hotel Th...,The room was a great size for a city hotel,0.6249,0.803000,positive
3,City of London,United Kingdom,The room was a great size for a city hotel Th...,The shower was great Lovely old building with ...,0.9811,0.803000,positive
4,West End of London,United Kingdom,The room was small and the bathroom was minut...,The room was small and the bathroom was minute,0.0000,0.037075,negative
...,...,...,...,...,...,...,...
99995,Malakoff,France,Lovely decor ambience Super location and grea...,Lovely decor ambience Super location and great...,0.9153,0.915300,positive
99996,Barbican,United Kingdom,I would like to have breakfast incluyed altho...,I would like to have breakfast incluyed althou...,0.6486,0.648600,positive
99997,"Sant Pere, Santa Caterina i La Ribera",Spain,The location of this hotel is fantastic 2 min...,The location of this hotel is fantastic 2 mins...,0.8955,0.066633,positive
99998,"Sant Pere, Santa Caterina i La Ribera",Spain,The location of this hotel is fantastic 2 min...,It s not as central for public transport as so...,0.0000,0.066633,negative


## Export 

In [10]:
data.to_csv('./data/tagged_sentence_data.csv', index=False)

## Setting to run to see all dataframe row

In [None]:
# view_data = data.loc[:, "sentence":"sentiment"]
data.sort_values(by=['agg_polarity'], axis=0, inplace=True,ascending=False)

pd.set_option('display.max_rows',None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
data
# Threshold: if polarity <= 0.1779: Negative

In [None]:
pd.set_option('display.max_rows',None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
view_data