# Loading and preprocessing 

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from nltk.corpus import sentiwordnet as swn
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score
import spacy
from afinn import Afinn
import time

In [2]:
# Load dataset
train_df = pd.read_csv('../Data/train.csv')
test_df = pd.read_csv('../Data/test.csv') 

X_train = train_df['review']
y_train = train_df['sentiment']
X_test = test_df['review']
y_test = test_df['sentiment']

# Preprocessing: Convert sentiments to numerical format, e.g., positive: 2, negative: 1
y_train = y_train.map({'positive': '2', 'negative': '1'})
y_test = y_test.map({'positive': '2', 'negative': '1'})

In [3]:
!python3 -m nltk.downloader -d /Users/moiz/nltk_data sentiwordnet
!python3 -m nltk.downloader -d /Users/moiz/nltk_data omw-1.4
!python3 -m nltk.downloader -d /Users/moiz/nltk_data wordnet
!python3 -m nltk.downloader -d /Users/moiz/nltk_data vader_lexicon
!python3 -m nltk.downloader -d /Users/moiz/nltk_data punkt
!python3 -m nltk.downloader -d /Users/moiz/nltk_data averaged_perceptron_tagger

nltk.data.path.append('/Users/moiz/nltk_data')

!python3 -m spacy download en_core_web_sm

# nltk.download('sentiwordnet')
# nltk.download('omw-1.4')
# nltk.download('wordnet')
# nltk.download('vader_lexicon')
# nltk.download('punkt')  # For tokenization
# nltk.download('averaged_perceptron_tagger')  # For POS tagging

# Tokenization and POS tagging using Spacy
nlp = spacy.load('en_core_web_sm')

[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /Users/moiz/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/moiz/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/moiz/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/moiz/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/moiz/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/moiz/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py

# Applying SentiWordNet

In [4]:
def sentiwordnet_sentiment(review):
    tokens = nlp(review)
    pos_score = neg_score = 0
    for token in tokens:
        synsets = list(swn.senti_synsets(token.text))
        if synsets:
            synset = synsets[0]
            pos_score += synset.pos_score()
            neg_score += synset.neg_score()
    return '2' if pos_score > neg_score else '1'

# Applying SentiWordNet sentiment analysis
start = time.time()
y_pred_swn = [sentiwordnet_sentiment(review) for review in X_test]
end = time.time()
accuracy_swn = accuracy_score(y_test, y_pred_swn)
print(f'SentiWordNet Accuracy: {accuracy_swn}')
print(f'SentiWordNet Time: {end-start} seconds')

SentiWordNet Accuracy: 0.64155
SentiWordNet Time: 1134.2989218235016 seconds


# Applying Afinn

In [5]:
afn = Afinn()

def afinn_sentiment(review):
    score = afn.score(review)
    return '2' if score > 0 else '1'

# Applying Afinn sentiment analysis
start = time.time()
y_pred_afn = [afinn_sentiment(review) for review in X_test]
end = time.time()
accuracy_afn = accuracy_score(y_test, y_pred_afn)
print(f'Afinn Accuracy: {accuracy_afn}')
print(f'Afinn Time: {end-start} seconds')

Afinn Accuracy: 0.72
Afinn Time: 80.96866369247437 seconds


# Applying VADER

In [6]:
analyzer = SentimentIntensityAnalyzer()

def vader_sentiment(review):
    scores = analyzer.polarity_scores(review)
    return '2' if scores['compound'] > 0.1 else '1'

# Applying VADER sentiment analysis
start = time.time()
y_pred_vader = [vader_sentiment(review) for review in X_test]
end = time.time()
accuracy_vader = accuracy_score(y_test, y_pred_vader)
print(f'VADER Accuracy: {accuracy_vader}')
print(f'VADER Time: {end-start} seconds')

VADER Accuracy: 0.7041
VADER Time: 23.933098077774048 seconds
