In [33]:
from transformers import pipeline
import pandas as pd
from tqdm import tqdm

## Testing HuggingFace Sentiment Analysis

In [2]:
classifier = pipeline('sentiment-analysis')

2021-07-22 17:55:44.852103: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-07-22 17:55:44.872884: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [19]:
classifier('I hate data science')

[{'label': 'NEGATIVE', 'score': 0.9996753334999084}]

In [20]:
classifier('​this product is crap')

[{'label': 'NEGATIVE', 'score': 0.9998071193695068}]

In [21]:
classifier("We are disheartened to hear that you didn't like our product.")

[{'label': 'NEGATIVE', 'score': 0.9994601607322693}]

In [22]:
classifier("Wouldn't recommend the produced came half bashed and overall didn't even work.")

[{'label': 'NEGATIVE', 'score': 0.9998005628585815}]

In [23]:
classifier("​I hate this product, bad advertisement, it broke down")

[{'label': 'NEGATIVE', 'score': 0.9998100399971008}]

In [25]:
classifier("I love data science")

[{'label': 'POSITIVE', 'score': 0.9998250007629395}]

## Testing HFace Functionality

In [27]:
classifier(data_import.iloc[1989, 1])

[{'label': 'NEGATIVE', 'score': 0.9992116093635559}]

## Bringing in Amazon Data

In [6]:
data_import = pd.read_csv("amazon_roomba_reviews.csv")
data_import

Unnamed: 0,Stars,Review Text,Review Date,Review Usefulness,Random
0,5.0 out of 5 stars,"My dog, as much as I love him, sheds like craz...","Reviewed in the United States on November 29, ...",3 people found this helpful,1
1,1.0 out of 5 stars,Roomba confirmed that this product will not wo...,"Reviewed in the United States on June 19, 2020",3 people found this helpful,1
2,5.0 out of 5 stars,We've heard nothing but good things from frien...,"Reviewed in the United States on September 30,...",3 people found this helpful,1
3,4.0 out of 5 stars,I bought this Roomba 675 when it was on sale o...,"Reviewed in the United States on May 30, 2020",3 people found this helpful,1
4,1.0 out of 5 stars,This is driving me crazy. The job is finished...,"Reviewed in the United States on January 26, 2019",3 people found this helpful,1
...,...,...,...,...,...
1985,3.0 out of 5 stars,"This was my second robot vaccuum, and unfortun...","Reviewed in the United States on November 12, ...",One person found this helpful,1
1986,5.0 out of 5 stars,Had an Ecovac previously. I was a bit concerne...,"Reviewed in the United States on January 4, 2021",One person found this helpful,1
1987,4.0 out of 5 stars,I love this robot. We have had it for 3 days n...,"Reviewed in the United States on July 12, 2021",One person found this helpful,1
1988,4.0 out of 5 stars,"Works well, but very slow and loud. It’s hard ...","Reviewed in the United States on December 26, ...",One person found this helpful,1


In [35]:
sentiment_label = []
sentiment_score = []
counter = 0
for review in tqdm(data_import["Review Text"]):
    review = review[:512]
    classification = classifier(review)[0]
    sentiment_label.append(classification.get("label"))
    sentiment_score.append(classification.get("score"))
    # print(counter)
    counter+=1


100%|██████████| 1990/1990 [06:01<00:00,  5.50it/s]


In [36]:
sentiment_data = pd.DataFrame(data={"Sentiment Label":sentiment_label, "Sentiment Score":sentiment_score})
sentiment_data

Unnamed: 0,Sentiment Label,Sentiment Score
0,NEGATIVE,0.995330
1,NEGATIVE,0.999726
2,NEGATIVE,0.998067
3,POSITIVE,0.851761
4,NEGATIVE,0.999309
...,...,...
1985,NEGATIVE,0.999685
1986,POSITIVE,0.997915
1987,POSITIVE,0.996177
1988,POSITIVE,0.992265


In [37]:
output = pd.concat([data_import, sentiment_data], axis=1)
output

Unnamed: 0,Stars,Review Text,Review Date,Review Usefulness,Random,Sentiment Label,Sentiment Score
0,5.0 out of 5 stars,"My dog, as much as I love him, sheds like craz...","Reviewed in the United States on November 29, ...",3 people found this helpful,1,NEGATIVE,0.995330
1,1.0 out of 5 stars,Roomba confirmed that this product will not wo...,"Reviewed in the United States on June 19, 2020",3 people found this helpful,1,NEGATIVE,0.999726
2,5.0 out of 5 stars,We've heard nothing but good things from frien...,"Reviewed in the United States on September 30,...",3 people found this helpful,1,NEGATIVE,0.998067
3,4.0 out of 5 stars,I bought this Roomba 675 when it was on sale o...,"Reviewed in the United States on May 30, 2020",3 people found this helpful,1,POSITIVE,0.851761
4,1.0 out of 5 stars,This is driving me crazy. The job is finished...,"Reviewed in the United States on January 26, 2019",3 people found this helpful,1,NEGATIVE,0.999309
...,...,...,...,...,...,...,...
1985,3.0 out of 5 stars,"This was my second robot vaccuum, and unfortun...","Reviewed in the United States on November 12, ...",One person found this helpful,1,NEGATIVE,0.999685
1986,5.0 out of 5 stars,Had an Ecovac previously. I was a bit concerne...,"Reviewed in the United States on January 4, 2021",One person found this helpful,1,POSITIVE,0.997915
1987,4.0 out of 5 stars,I love this robot. We have had it for 3 days n...,"Reviewed in the United States on July 12, 2021",One person found this helpful,1,POSITIVE,0.996177
1988,4.0 out of 5 stars,"Works well, but very slow and loud. It’s hard ...","Reviewed in the United States on December 26, ...",One person found this helpful,1,POSITIVE,0.992265


In [38]:
output.to_csv("amazon_roomba_reviews_w_sentiment.csv", index=False)