# Get NLTK and Transformers sentiment scores on labelled dataset

*References:*
- https://huggingface.co/transformers/quicktour.html

In [None]:
import pandas as pd

### Read data

In [None]:
# Read datasets
amazon = pd.read_csv('../data/raw/uci-sentiment/amazon_cells_labelled.txt', sep='\t', names=['Text', 'GT'])
imdb = pd.read_csv('../data/raw/uci-sentiment/imdb_labelled.txt', sep='\t', names=['Text', 'GT'])
yelp = pd.read_csv('../data/raw/uci-sentiment/yelp_labelled.txt', sep='\t', names=['Text', 'GT'])
df = pd.concat([amazon, imdb, yelp])
display(df.shape)
df.head(3)

### NLTK sentiment analysis

In [None]:
%time
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

In [None]:
%time

sia = SentimentIntensityAnalyzer()

display(sia.polarity_scores("Wow, NLTK is really powerful!"))
display(sia.polarity_scores("absolutely really bad"))

df['NLTK'] = df['GT'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
df.head(3)

### Transformers

In [None]:
from transformers import pipeline

In [None]:
%time

def tfScore(text, classifier):
    r = classifier(text)
    if r[0]['label'] == 'NEGATIVE':
        return -1.0 * r[0]['score']
    else:
        return r[0]['score']

    
tfSentiment = pipeline('sentiment-analysis')

display(tfScore('I feel horrible', tfSentiment))
display(tfScore('I feel awesome', tfSentiment))

df['Transformers'] = df['GT'].apply(lambda x: tfScore(str(x), tfSentiment))
df.head(3)

## Save output

In [None]:
df.to_csv('1-nltk-transformers.csv', index=False)