# Sentiment Analysis using Vader

In [1]:
import nltk
import warnings
warnings.filterwarnings("ignore")

In [2]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
sid = SentimentIntensityAnalyzer()

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv(r'..\TextFiles\amazonreviews.tsv',sep='\t')
df.head()

Unnamed: 0,label,review
0,pos,Stuning even for the non-gamer: This sound tra...
1,pos,The best soundtrack ever to anything.: I'm rea...
2,pos,Amazing!: This soundtrack is my favorite music...
3,pos,Excellent Soundtrack: I truly like this soundt...
4,pos,"Remember, Pull Your Jaw Off The Floor After He..."


In [6]:
# using compound score to predict a label pos or neg
df['pred'] = df['review'].apply(lambda review: 'pos' if sid.polarity_scores(review)['compound'] > 0 else 'neg')

In [7]:
df.head()

Unnamed: 0,label,review,pred
0,pos,Stuning even for the non-gamer: This sound tra...,pos
1,pos,The best soundtrack ever to anything.: I'm rea...,pos
2,pos,Amazing!: This soundtrack is my favorite music...,pos
3,pos,Excellent Soundtrack: I truly like this soundt...,pos
4,pos,"Remember, Pull Your Jaw Off The Floor After He...",pos


# Testing our Model

In [8]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [9]:
accuracy_score(df['label'],df['pred'])

0.7122

In [10]:
print(classification_report(df['label'],df['pred']))

              precision    recall  f1-score   support

         neg       0.85      0.53      0.65      5097
         pos       0.65      0.90      0.75      4903

   micro avg       0.71      0.71      0.71     10000
   macro avg       0.75      0.72      0.70     10000
weighted avg       0.75      0.71      0.70     10000



In [11]:
confusion_matrix(df['label'],df['pred'])

array([[2709, 2388],
       [ 490, 4413]], dtype=int64)

### We can see that there are large number of false positives due to sarcasm which is hard for vader to understand.