In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('moviereviews.tsv', sep = '\t')
df.head()

Unnamed: 0,label,review
0,neg,how do films like mouse hunt get into theatres...
1,neg,some talented actresses are blessed with a dem...
2,pos,this has been an extraordinary year for austra...
3,pos,according to hollywood movies made in last few...
4,neg,my first press screening of 1998 and already i...


In [3]:
df.dropna(inplace = True)

In [4]:
blanks = []

for i, lb, review in df.itertuples():
  if type(review) == str:
    if review.isspace():
      blanks.append(i)
blanks

[57,
 71,
 147,
 151,
 283,
 307,
 313,
 323,
 343,
 351,
 427,
 501,
 633,
 675,
 815,
 851,
 977,
 1079,
 1299,
 1455,
 1493,
 1525,
 1531,
 1763,
 1851,
 1905,
 1993]

In [5]:
df.drop(blanks, inplace = True)

In [6]:
df['label'].value_counts()

neg    969
pos    969
Name: label, dtype: int64

In [7]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [8]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()




In [9]:
df['scores'] = df['review'].apply(lambda review: sid.polarity_scores(review))

In [10]:
df['compound'] = df['scores'].apply(lambda d : d['compound'])

In [11]:
df['comp_score'] = df['compound'].apply(lambda score : 'pos' if score >= 0 else 'neg')

In [12]:
df.head()

Unnamed: 0,label,review,scores,compound,comp_score
0,neg,how do films like mouse hunt get into theatres...,"{'neg': 0.121, 'neu': 0.778, 'pos': 0.101, 'co...",-0.9125,neg
1,neg,some talented actresses are blessed with a dem...,"{'neg': 0.12, 'neu': 0.775, 'pos': 0.105, 'com...",-0.8618,neg
2,pos,this has been an extraordinary year for austra...,"{'neg': 0.067, 'neu': 0.783, 'pos': 0.15, 'com...",0.9953,pos
3,pos,according to hollywood movies made in last few...,"{'neg': 0.069, 'neu': 0.786, 'pos': 0.145, 'co...",0.9972,pos
4,neg,my first press screening of 1998 and already i...,"{'neg': 0.09, 'neu': 0.822, 'pos': 0.088, 'com...",-0.7264,neg


In [13]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [14]:
print(accuracy_score(df['label'], df['comp_score']))

0.6367389060887513


Overall accuracy is around 64 %, which is fair since VADER may not be able to take into account things like sarcasm and change in sentiments. It still performs better than a random 50-50 assignment.

In [15]:
print(classification_report(df['label'], df['comp_score']))

              precision    recall  f1-score   support

         neg       0.72      0.44      0.55       969
         pos       0.60      0.83      0.70       969

    accuracy                           0.64      1938
   macro avg       0.66      0.64      0.62      1938
weighted avg       0.66      0.64      0.62      1938



In [16]:
print(confusion_matrix(df['label'], df['comp_score']))

[[427 542]
 [162 807]]


This is what makes sentiment analysis tricky in NLP, since reviews involving sarcasm, positive and negative words together can make it hard for algorithms to make classifications regarding the raw text.