In [1]:
# Install vaderSentiment package for VADER
!pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 4.9 MB/s 
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [2]:
# Data processing
import pandas as pd
import numpy as np


# Import VADER sentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Import accuracy_score to check performance
from sklearn.metrics import accuracy_score

# Set a wider colwith
pd.set_option('display.max_colwidth', 1000)

In [4]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Change directory
import os
os.chdir("./drive/My Drive/Colab Notebooks/SentimentAnalysis/")

# Print out the current directory
!pwd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab Notebooks/SentimentAnalysis


In [6]:
# Read in data
amz_review = pd.read_csv('a1_RestaurantReviews_HistoricDump.tsv', sep='\t', names=['review', 'label'])

# Take a look at the data
amz_review.head()

Unnamed: 0,review,label
0,Review,Liked
1,Wow... Loved this place.,1
2,Crust is not good.,0
3,Not tasty and the texture was just nasty.,0
4,Stopped by during the late May bank holiday off Rick Steve recommendation and loved it.,1


In [7]:
# Get the dataset information
amz_review.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 901 entries, 0 to 900
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   review  901 non-null    object
 1   label   901 non-null    object
dtypes: object(2)
memory usage: 14.2+ KB


In [8]:
amz_review.head()

Unnamed: 0,review,label
0,Review,Liked
1,Wow... Loved this place.,1
2,Crust is not good.,0
3,Not tasty and the texture was just nasty.,0
4,Stopped by during the late May bank holiday off Rick Steve recommendation and loved it.,1


In [9]:
# Check the label distribution
amz_review['label'].value_counts()

1        496
0        404
Liked      1
Name: label, dtype: int64

In [10]:
# Example text
text = 'GrabNGoInfo.com is a great machine learning tutorial website.'

# VADER Sentiment
vader = SentimentIntensityAnalyzer()
vader_sentiment = vader.polarity_scores(text)
vader_sentiment

{'neg': 0.0, 'neu': 0.631, 'pos': 0.369, 'compound': 0.6249}

In [11]:
# Extract sentiment score
vader_sentiment['compound']

0.6249

In [12]:
text2 = 'The food is bad'
vader_sentiment2 = vader.polarity_scores(text2)
vader_sentiment2

{'neg': 0.538, 'neu': 0.462, 'pos': 0.0, 'compound': -0.5423}

In [13]:
vader_sentiment2['compound']

-0.5423

In [14]:
amz_review.head()


Unnamed: 0,review,label
0,Review,Liked
1,Wow... Loved this place.,1
2,Crust is not good.,0
3,Not tasty and the texture was just nasty.,0
4,Stopped by during the late May bank holiday off Rick Steve recommendation and loved it.,1


In [15]:
# Get sentiment score for each review
vader_sentiment = SentimentIntensityAnalyzer()
amz_review['scores_VADER'] = amz_review['review'].apply(lambda s: vader_sentiment.polarity_scores(s)['compound'])

# Predict sentiment label for each review
amz_review['pred_VADER'] = amz_review['scores_VADER'].apply(lambda x: 1 if x >=0 else 0)
amz_review.head()


Unnamed: 0,review,label,scores_VADER,pred_VADER
0,Review,Liked,0.0,1
1,Wow... Loved this place.,1,0.8271,1
2,Crust is not good.,0,-0.3412,0
3,Not tasty and the texture was just nasty.,0,-0.5574,0
4,Stopped by during the late May bank holiday off Rick Steve recommendation and loved it.,1,0.6908,1


In [16]:
# Compare Actual and Predicted
accuracy_score(amz_review['label'],amz_review['pred_VADER'])

0.0