# Sentiment Tagging with Vader

In [39]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nnsplit import NNSplit
from nltk.corpus import stopwords
import re
import pandas as pd

## Input unseen data

### New review

In [69]:
new_review = "You When I booked with your company on line you showed me pictures of a room I thought I was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly Which was completely false advertising After being there we realised that you have grouped lots of rooms on the photos together leaving me the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday present Please make your website more clear through pricing and photos as again I didn t really know what I was paying for and how much it had wnded up being Your photos told me I was getting something I wasn t Not happy and won t be using you again "

### Split into sentence using NNSplit

In [70]:
sent_list = []
splitter = NNSplit("en")

sent = splitter.split([new_review])
for i in sent[0]:
    new_string = ''
    for j in i:
        new_string += j.text + " "
    sent_list.append(new_string)
    
sent_list

[' ',
 'You When I booked with your company on line you showed me pictures of a room ',
 'I thought I was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly ',
 'Which was completely false advertising ',
 'After being there we realised that you have grouped lots of rooms on the photos together ',
 'leaving me ',
 'the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday ',
 'present ',
 'Please make your website more clear through pricing and photos ',
 'as again ',
 'I didn t really know what I was paying for and how much it had wnded up being ',
 'Your photos told me I was getting something I wasn t Not happy and won t be using you again ']

### Data Cleaning

#### Convert all words to lower

In [71]:
sent_list_lower = [sent.lower() for sent in sent_list]
sent_list_lower

[' ',
 'you when i booked with your company on line you showed me pictures of a room ',
 'i thought i was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly ',
 'which was completely false advertising ',
 'after being there we realised that you have grouped lots of rooms on the photos together ',
 'leaving me ',
 'the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday ',
 'present ',
 'please make your website more clear through pricing and photos ',
 'as again ',
 'i didn t really know what i was paying for and how much it had wnded up being ',
 'your photos told me i was getting something i wasn t not happy and won t be using you again ']

In [72]:
for sent in sent_list_lower:
    print(sent)

 
you when i booked with your company on line you showed me pictures of a room 
i thought i was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly 
which was completely false advertising 
after being there we realised that you have grouped lots of rooms on the photos together 
leaving me 
the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday 
present 
please make your website more clear through pricing and photos 
as again 
i didn t really know what i was paying for and how much it had wnded up being 
your photos told me i was getting something i wasn t not happy and won t be using you again 


#### Remove Stopwords

In [73]:
stop_list = stopwords.words('english')
sent_list_lower_no_stopword_list = [[word for word in sent.split() if not word in stop_list] for sent in sent_list_lower]
sent_list_lower_no_stopword = []
for sent in sent_list_lower_no_stopword_list:
    new_sent = ' '.join(sent)
    sent_list_lower_no_stopword.append(new_sent)
print(sent_list_lower_no_stopword)

['', 'booked company line showed pictures room', 'thought getting paying arrived room booked staff told could book villa suite theough directly', 'completely false advertising', 'realised grouped lots rooms photos together', 'leaving', 'consumer confused extreamly disgruntled especially wife 40th birthday', 'present', 'please make website clear pricing photos', '', 'really know paying much wnded', 'photos told getting something happy using']


### Convert list to dataframe

In [78]:
data = pd.DataFrame(sent_list_lower, columns=["sentence"])
data

Unnamed: 0,sentence
0,
1,you when i booked with your company on line you showed me pictures of a room
2,i thought i was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly
3,which was completely false advertising
4,after being there we realised that you have grouped lots of rooms on the photos together
5,leaving me
6,the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday
7,present
8,please make your website more clear through pricing and photos
9,as again


### Get polarity

In [79]:
# This function gets the polarity of reviews using Vader # 
def get_polarity(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    compound = score['compound']
    return compound

data['polarity'] = data['sentence'].apply(get_polarity)
data

Unnamed: 0,sentence,polarity
0,,0.0
1,you when i booked with your company on line you showed me pictures of a room,0.0
2,i thought i was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly,0.0
3,which was completely false advertising,0.0
4,after being there we realised that you have grouped lots of rooms on the photos together,0.0
5,leaving me,0.0
6,the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday,-0.3182
7,present,0.0
8,please make your website more clear through pricing and photos,0.6361
9,as again,0.0


### Filter sentence above 8 words

In [80]:
def number_words(sentence):
    return len(re.findall(r'\w+', str(sentence)))

length = (data['sentence'].apply(number_words) >= 8)

data = data.loc[length]
data

Unnamed: 0,sentence,polarity
1,you when i booked with your company on line you showed me pictures of a room,0.0
2,i thought i was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly,0.0
4,after being there we realised that you have grouped lots of rooms on the photos together,0.0
6,the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday,-0.3182
8,please make your website more clear through pricing and photos,0.6361
10,i didn t really know what i was paying for and how much it had wnded up being,0.0
11,your photos told me i was getting something i wasn t not happy and won t be using you again,-0.7181


### Aggregated polarity score

In [81]:
polarity = data['polarity'].mean()
if polarity >= 0.05:
    sentiment = ('positive', polarity)
elif polarity > -0.05 and polarity < 0.05: 
    sentiment = ('neutral', polarity)
else: 
    sentiment = ('negative', polarity)
print(sentiment)

('negative', -0.05717142857142856)


## Not valid beyond this point

In [None]:
data = pd.read_csv("./data/sentence_data.csv")
data

In [None]:
data.columns = ['city', 'country', 'reviews', 'sentence']
data = data[0:100000]

## Get Vader polarity score

In [None]:
# This function gets the polarity of reviews using Vader # 
def get_polarity(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    compound = score['compound']
    return compound
    
data['polarity'] = data['sentence'].apply(get_polarity)
data

In [None]:
def number_words(sentence):
    return len(re.findall(r'\w+', str(sentence)))

length = (data['sentence'].apply(number_words) > 1)
data = data.loc[length]

## Aggregate the polarity by grouping sentence by reviews

In [None]:
data['agg_polarity'] = data.groupby('reviews')['polarity'].transform('mean')

In [None]:
# data = data.drop_duplicates(subset="reviews", keep="first")
# data = data.drop(['sentence','polarity'], axis=1)

In [None]:
data["review_sentiment"] = ["positive" if x>=0.196725 else ("neutral" if x>=0.096725 else "negative") for x in data['agg_polarity']]
data

In [None]:
data["sen_sentiment"] = ["positive" if x>=0.1779 else ("neutral" if x>=0.01779 else "negative") for x in data['polarity']]
data

## Export 

In [None]:
data.to_csv('./data/tagged_sentence_data.csv', index=False)

## Setting to run to see all dataframe row

In [None]:
# view_data = data.loc[:, "sentence":"sentiment"]
data.sort_values(by=['agg_polarity'], axis=0, inplace=True,ascending=False)

pd.set_option('display.max_rows',None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
data
# Threshold: if polarity <= 0.1779: Negative

In [9]:
pd.set_option('display.max_rows',None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

  This is separate from the ipykernel package so we can avoid doing imports until
