# Import Dependencies

In [1]:
import torch
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix

from transformers import pipeline

# Load/Download Sentiment Analysis Pipeline

In [2]:
classifier = pipeline("sentiment-analysis")
type(classifier)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
TAPAS models are not usable since `torch_scatter` can't be loaded. It seems you have `torch_scatter` installed with the wrong CUDA version. Please try to reinstall it following the instructions here: https://github.com/rusty1s/pytorch_scatter.


transformers.pipelines.text_classification.TextClassificationPipeline

# Perform Some Test Cases

In [3]:
sample_texts = [
        "This is such a great movie!",
        "This show was not interesting",
        "This show was interesting",
        "This show was not bad at all",
        "I can't say that this was a good movie"
        ]

sentiments = classifier(sample_texts)

result_dict = {}
result_dict['TEXT'] = []
result_dict['SENTIMENT'] = []
result_dict['SCORE'] = []

for i in range(len(sentiments)):
        result_dict['TEXT'].append(sample_texts[i])
        result_dict['SENTIMENT'].append(sentiments[i]['label'])
        result_dict['SCORE'].append(sentiments[i]['score'])

result_df = pd.DataFrame(result_dict)
result_df

Unnamed: 0,TEXT,SENTIMENT,SCORE
0,This is such a great movie!,POSITIVE,0.999876
1,This show was not interesting,NEGATIVE,0.999787
2,This show was interesting,POSITIVE,0.999747
3,This show was not bad at all,POSITIVE,0.998742
4,I can't say that this was a good movie,NEGATIVE,0.927845


# GPU Utilization

In [4]:
torch.cuda.is_available()

False

In [5]:
classifier = pipeline(
                    "sentiment-analysis", 
                    device = 0 if torch.cuda.is_available() else -1
                    )

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [6]:
df_tweets = pd.read_csv('data/AirlineTweets.csv')
df_tweets.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,570306133677760513,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,570301130888122368,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,570301083672813571,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,570301031407624196,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,570300817074462722,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)


In [7]:
df_tweets = df_tweets[['text', 'airline_sentiment']]
df_tweets['airline_sentiment'] = df_tweets['airline_sentiment'].replace({'negative': 0, 'neutral': 1, 'positive': 2})
df_tweets.head()

Unnamed: 0,text,airline_sentiment
0,@VirginAmerica What @dhepburn said.,1
1,@VirginAmerica plus you've added commercials t...,2
2,@VirginAmerica I didn't today... Must mean I n...,1
3,@VirginAmerica it's really aggressive to blast...,0
4,@VirginAmerica and it's a really big bad thing...,0


In [8]:
all_texts = df_tweets['text'].tolist()
all_labels = df_tweets['airline_sentiment'].tolist()

In [9]:
all_predictions = classifier(all_texts)
all_predictions