In [1]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

%matplotlib inline

In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/yairl/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
data_source_url = "lesson8-sentiment/reviews_sample.csv"
amazon_reviews = pd.read_csv(data_source_url)

In [4]:
amazon_reviews.head()

Unnamed: 0,review,label
0,Stuning even for the non-gamer: This sound tra...,pos
1,The best soundtrack ever to anything.: I'm rea...,pos
2,Amazing!: This soundtrack is my favorite music...,pos
3,Excellent Soundtrack: I truly like this soundt...,pos
4,"Remember, Pull Your Jaw Off The Floor After He...",pos


In [5]:
sid = SentimentIntensityAnalyzer()

In [6]:
a = 'This was a good movie.'
sid.polarity_scores(a)

{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}

In [7]:
a = 'This was the best, most awesome movie EVER MADE!!!'
sid.polarity_scores(a)

{'neg': 0.0, 'neu': 0.425, 'pos': 0.575, 'compound': 0.8877}

In [8]:
amazon_reviews['label'].value_counts()

label
pos    10257
neg     9742
Name: count, dtype: int64

In [9]:
amazon_reviews.dropna(inplace=True)

blanks = []  # start with an empty list

for i,lb,rv in amazon_reviews.itertuples():
    if type(rv)==str:
        if rv.isspace():
            blanks.append(i)

amazon_reviews.drop(blanks, inplace=True)

In [10]:
amazon_reviews['scores'] = amazon_reviews['review'].apply(lambda review: sid.polarity_scores(review))

In [11]:
amazon_reviews.head()

Unnamed: 0,review,label,scores
0,Stuning even for the non-gamer: This sound tra...,pos,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co..."
1,The best soundtrack ever to anything.: I'm rea...,pos,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co..."
2,Amazing!: This soundtrack is my favorite music...,pos,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com..."
3,Excellent Soundtrack: I truly like this soundt...,pos,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com..."
4,"Remember, Pull Your Jaw Off The Floor After He...",pos,"{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."


In [12]:
amazon_reviews['compound']  = amazon_reviews['scores'].apply(lambda score_dict: score_dict['compound'])

In [13]:
amazon_reviews.head()

Unnamed: 0,review,label,scores,compound
0,Stuning even for the non-gamer: This sound tra...,pos,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454
1,The best soundtrack ever to anything.: I'm rea...,pos,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957
2,Amazing!: This soundtrack is my favorite music...,pos,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858
3,Excellent Soundtrack: I truly like this soundt...,pos,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814
4,"Remember, Pull Your Jaw Off The Floor After He...",pos,"{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781


In [14]:
def test_compound(c):
    if c > 0:
        return 'pos'
    if c == 0:
        return 'neu'
    return 'neg'

In [15]:
amazon_reviews['comp_score'] = amazon_reviews['compound'].apply(lambda c: test_compound(c))

In [16]:
amazon_reviews

Unnamed: 0,review,label,scores,compound,comp_score
0,Stuning even for the non-gamer: This sound tra...,pos,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos
1,The best soundtrack ever to anything.: I'm rea...,pos,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos
2,Amazing!: This soundtrack is my favorite music...,pos,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos
3,Excellent Soundtrack: I truly like this soundt...,pos,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos
4,"Remember, Pull Your Jaw Off The Floor After He...",pos,"{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos
...,...,...,...,...,...
19994,"Rip-off: I liked Flint, I trusted him to deliv...",neg,"{'neg': 0.084, 'neu': 0.83, 'pos': 0.086, 'com...",-0.2144,neg
19995,Very disappointing: I have previously bought t...,neg,"{'neg': 0.23, 'neu': 0.714, 'pos': 0.056, 'com...",-0.8748,neg
19996,Tedious collection of stories about a goat.: I...,neg,"{'neg': 0.071, 'neu': 0.786, 'pos': 0.143, 'co...",0.8040,pos
19997,Disappointed: I love the 1632 series. I have b...,neg,"{'neg': 0.081, 'neu': 0.784, 'pos': 0.135, 'co...",0.6487,pos


In [17]:
amazon_reviews['agreement'] = amazon_reviews['label'] == amazon_reviews['comp_score']

In [18]:
amazon_reviews

Unnamed: 0,review,label,scores,compound,comp_score,agreement
0,Stuning even for the non-gamer: This sound tra...,pos,"{'neg': 0.088, 'neu': 0.669, 'pos': 0.243, 'co...",0.9454,pos,True
1,The best soundtrack ever to anything.: I'm rea...,pos,"{'neg': 0.018, 'neu': 0.837, 'pos': 0.145, 'co...",0.8957,pos,True
2,Amazing!: This soundtrack is my favorite music...,pos,"{'neg': 0.04, 'neu': 0.692, 'pos': 0.268, 'com...",0.9858,pos,True
3,Excellent Soundtrack: I truly like this soundt...,pos,"{'neg': 0.09, 'neu': 0.615, 'pos': 0.295, 'com...",0.9814,pos,True
4,"Remember, Pull Your Jaw Off The Floor After He...",pos,"{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp...",0.9781,pos,True
...,...,...,...,...,...,...
19994,"Rip-off: I liked Flint, I trusted him to deliv...",neg,"{'neg': 0.084, 'neu': 0.83, 'pos': 0.086, 'com...",-0.2144,neg,True
19995,Very disappointing: I have previously bought t...,neg,"{'neg': 0.23, 'neu': 0.714, 'pos': 0.056, 'com...",-0.8748,neg,True
19996,Tedious collection of stories about a goat.: I...,neg,"{'neg': 0.071, 'neu': 0.786, 'pos': 0.143, 'co...",0.8040,pos,False
19997,Disappointed: I love the 1632 series. I have b...,neg,"{'neg': 0.081, 'neu': 0.784, 'pos': 0.135, 'co...",0.6487,pos,False


In [19]:
amazon_reviews.shape

(19999, 6)

In [20]:
amazon_reviews.agreement.value_counts()

agreement
True     14236
False     5763
Name: count, dtype: int64

In [21]:
a = 'My name is Yair.'
sid.polarity_scores(a)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}