# Sneak Peak into VADER

In [1]:
import nltk
#nltk.download()

In [2]:
from nltk.sentiment import vader



In [3]:
sia= vader.SentimentIntensityAnalyzer()

In [4]:
sia.polarity_scores("What a terrible restraunt")

{'compound': -0.4767, 'neg': 0.608, 'neu': 0.392, 'pos': 0.0}

In [5]:
sia.polarity_scores("terrible")

{'compound': -0.4767, 'neg': 1.0, 'neu': 0.0, 'pos': 0.0}

In [6]:
#Emoticon support

In [7]:
sia.polarity_scores(":D")

{'compound': 0.5106, 'neg': 0.0, 'neu': 0.0, 'pos': 1.0}

In [8]:
sia.polarity_scores(":/")

{'compound': -0.34, 'neg': 1.0, 'neu': 0.0, 'pos': 0.0}

In [9]:
#Idiom support

In [10]:
sia.polarity_scores("the cumin was the kiss of death")

{'compound': -0.6124, 'neg': 0.5, 'neu': 0.5, 'pos': 0.0}

In [11]:
#Punctuation support

In [12]:
sia.polarity_scores("the food was good")

{'compound': 0.4404, 'neg': 0.0, 'neu': 0.508, 'pos': 0.492}

In [13]:
sia.polarity_scores("the food was good!!!")

{'compound': 0.5826, 'neg': 0.0, 'neu': 0.443, 'pos': 0.557}

In [14]:
#Negation support

In [15]:
sia.polarity_scores("the food was good!!!")

{'compound': 0.5826, 'neg': 0.0, 'neu': 0.443, 'pos': 0.557}

In [16]:
sia.polarity_scores("the food was not good!!!")

{'compound': -0.5076, 'neg': 0.451, 'neu': 0.549, 'pos': 0.0}

In [17]:
sia.polarity_scores("the food was not the worst!!!")

{'compound': 0.6334, 'neg': 0.0, 'neu': 0.545, 'pos': 0.455}

In [18]:
#Emphasis support

In [19]:
#E1.Captilisation

In [20]:
sia.polarity_scores("the food was good")

{'compound': 0.4404, 'neg': 0.0, 'neu': 0.508, 'pos': 0.492}

In [21]:
sia.polarity_scores("the food was GOOD")

{'compound': 0.5622, 'neg': 0.0, 'neu': 0.452, 'pos': 0.548}

In [22]:
#E2.Boosters Word

In [23]:
sia.polarity_scores("the food was good")

{'compound': 0.4404, 'neg': 0.0, 'neu': 0.508, 'pos': 0.492}

In [24]:
sia.polarity_scores("the food was so good")

{'compound': 0.5777, 'neg': 0.0, 'neu': 0.517, 'pos': 0.483}

In [25]:
#Contrast support

In [26]:
sia.polarity_scores("the food was bad but service was good")

{'compound': 0.3818, 'neg': 0.186, 'neu': 0.496, 'pos': 0.318}

In [27]:
sia.polarity_scores("the food was bad but I liked it")

{'compound': 0.3506, 'neg': 0.205, 'neu': 0.457, 'pos': 0.338}

In [28]:
#vader behavior towards "and" is not justifiable

In [29]:
sia.polarity_scores("the food was bad and I liked this")

{'compound': -0.1779, 'neg': 0.31, 'neu': 0.442, 'pos': 0.248}

# Implementing A Rule-based Approach using Vader

## VADER on Cornell's Movie Review Data

In [30]:
# Corpus can be downloaded from http://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz

In [31]:
# positiveReviews, negativeReviews - list

In [32]:
positiveReview = "C:\\Users\\praveen ji\\Desktop\\requiredDoc\\PyCode\\Dataset\\rt-polaritydata\\rt-polaritydata\\rt-polarity.pos"
with open(positiveReview,'r') as f:
    positiveReviews = f.readlines()

In [33]:
negativeReview = "C:\\Users\\praveen ji\\Desktop\\requiredDoc\\PyCode\\Dataset\\rt-polaritydata\\rt-polaritydata\\rt-polarity.neg"
with open(negativeReview,'r') as f:
    negativeReviews = f.readlines()

In [34]:
#check first positive review

In [35]:
positiveReviews[0]

'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal . \n'

In [36]:
len(positiveReviews)

5331

In [37]:
#check first negative review

In [38]:
negativeReviews[0]

'simplistic , silly and tedious . \n'

In [39]:
len(negativeReviews)

5331

In [40]:
# A simple function invoking VADER ~ relying completely upon compound score

In [41]:
sia= vader.SentimentIntensityAnalyzer()

In [42]:
def vaderSentiment(review):
    return sia.polarity_scores(review)['compound']

In [43]:
#check function 

In [44]:
review ="this is the best restraunt in the city"
vaderSentiment(review)

0.6369

In [45]:
#applying VADER to the reviews

In [46]:
#python syntax for applying a function to each element of a list
[vaderSentiment(oneNegativeReview) for oneNegativeReview in negativeReviews ]

[0.0258,
 0.4404,
 0.0,
 -0.25,
 0.0,
 0.4939,
 0.0,
 0.0,
 -0.34,
 -0.3612,
 -0.3678,
 0.397,
 -0.0384,
 -0.836,
 0.3818,
 -0.2565,
 0.4404,
 0.4199,
 0.0772,
 0.0,
 0.7346,
 -0.3559,
 0.2732,
 -0.0516,
 0.4939,
 0.4019,
 -0.5423,
 -0.8887,
 0.6068,
 -0.296,
 0.0772,
 0.0,
 0.5267,
 0.4939,
 -0.7845,
 -0.5865,
 0.0258,
 -0.2457,
 -0.5789,
 0.0,
 -0.25,
 -0.6808,
 0.4588,
 0.5574,
 0.802,
 -0.4767,
 0.6124,
 -0.4767,
 -0.7579,
 0.0,
 -0.5562,
 0.0516,
 0.6369,
 -0.4767,
 -0.5574,
 0.4404,
 0.8658,
 0.0,
 0.3477,
 0.5574,
 -0.8591,
 -0.5574,
 -0.5994,
 0.128,
 0.1154,
 0.34,
 0.2509,
 0.4404,
 -0.4767,
 0.0,
 0.0,
 -0.144,
 0.4215,
 0.0,
 0.2846,
 -0.5267,
 0.0,
 -0.0258,
 -0.2235,
 -0.4824,
 0.5095,
 -0.4215,
 0.4402,
 0.4019,
 -0.7269,
 0.0,
 0.875,
 0.4767,
 -0.3239,
 0.1779,
 0.8497,
 0.4404,
 0.0,
 0.3612,
 0.802,
 -0.1263,
 -0.3612,
 0.5994,
 0.0,
 0.0,
 -0.4391,
 -0.6369,
 0.34,
 0.2263,
 0.0,
 0.7715,
 -0.5096,
 -0.1263,
 -0.3612,
 -0.835,
 0.2732,
 -0.4019,
 -0.8201,
 -0.6486,


In [47]:
#code reuse with getReviewSentiments
#Create a function that takes in a function object and applies to all reviews

In [48]:
def getReviewSentiments(sentimentCalclator):
    negReviewResult = [sentimentCalclator(oneNegativeReview) for oneNegativeReview in negativeReviews ]
    posReviewResult = [sentimentCalclator(onePositiveReview) for onePositiveReview in positiveReviews ]
    return {'results-on-positive': posReviewResult , 'results-on-negative': negReviewResult}

In [49]:
vaderResults= getReviewSentiments(vaderSentiment)

In [50]:
print(type(vaderResults))

<type 'dict'>


In [51]:
vaderResults.keys()

['results-on-negative', 'results-on-positive']

In [52]:
len(vaderResults['results-on-negative'])

5331

In [53]:
vaderResults['results-on-negative'][0]

0.0258

In [54]:
#Calculate accuracy on Positive Reviews
# % of positive reviews that VADER classified as positive(correctly) 

In [55]:
def runDiagnostics(reviewResult):
    positiveReviewsResult = reviewResult['results-on-positive']
    negativeReviewsResult = reviewResult['results-on-negative']
    
    pctTruePositive = float(sum(x>0 for x in positiveReviewsResult))/len(positiveReviewsResult)
    pctTrueNegative = float(sum(x<0 for x in negativeReviewsResult))/len(negativeReviewsResult)
    
    totalAccurate = float(sum(x>0 for x in positiveReviewsResult))+ float(sum(x<0 for x in negativeReviewsResult))
    
    total = len(positiveReviewsResult) +len(negativeReviewsResult)
    
    print "Accuracy on positive reviews = " + "%.2f" % (pctTruePositive*100) + "%"
    print "Accuracy on negative reviews = " + "%.2f" % (pctTrueNegative*100) + "%"
    print "Accuracy Overall = " + "%.2f" % (totalAccurate*100/ total) + "%"
    

In [56]:
runDiagnostics(getReviewSentiments(vaderSentiment))

Accuracy on positive reviews = 69.44%
Accuracy on negative reviews = 42.24%
Accuracy Overall = 54.76%
