In [1]:
import pandas as pd
import torch
import transformers
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from afinn import Afinn

if torch.cuda.is_available(): device = 'cuda'
elif torch.backends.mps.is_available(): device = 'mps'
else: device = 'cpu'


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
models = []

In [16]:
distilbert = transformers.pipeline('sentiment-analysis',
                                   'distilbert-base-uncased-finetuned-sst-2-english',
                                   device=device)

# TODO (optional): add batching to transformer models
def distilbert_model(text):
  results = distilbert(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'NEGATIVE': return -1 * score
  return score
models.append(distilbert_model)

In [4]:
roberta = transformers.pipeline('sentiment-analysis',
                                'cardiffnlp/twitter-roberta-base-sentiment',
                                device=device)
def roberta_model(text):
  results = roberta(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'LABEL_0': return -1 * score
  elif label == 'LABEL_2': return score
  return 0
models.append(roberta_model)

In [5]:
bertweet = transformers.pipeline('sentiment-analysis',
                                 'finiteautomata/bertweet-base-sentiment-analysis',
                                 device=device)
def bertweet_model(text):
  results = bertweet(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'NEG': return -1 * score
  elif label == 'POS': return score
  return 0
models.append(bertweet_model)

In [21]:
def textblob_model(text):
  return TextBlob(text).sentiment.polarity
models.append(textblob_model)

In [22]:
vader = SentimentIntensityAnalyzer()
def vader_model(text):
  return vader.polarity_scores(text)['compound']
models.append(vader_model)

In [23]:
afinn = Afinn()
def afinn_model(text):
  return afinn.score(text) / 6 # TODO is this actually the range?
models.append(afinn_model)

In [9]:
test_string = 'Never gonna give you up, never gonna let you down, never gonna run around and desert you'
for model in models:
  print(model(test_string))

0.9910129904747009
0
0.7940226793289185
-0.15555555555555559
0.0
0.0


  incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask


In [10]:
# ensemble sentiment analysis, returns the score (-1 to 1), uncertainty (0 to 1), and label
def ensemble_sentiment(text):
  scores = [model(text) for model in models]
  mean = sum(scores) / len(scores)
  std = (sum([(score - mean)**2 for score in scores]) / len(scores))**0.5
  # if abs(mean) < 0.3 or std > 0.4: label = 'neutral'
  if std > abs(mean): label = 'neutral'
  elif mean > 0: label = 'positive'
  else: label = 'negative'
  return mean, std, label

print(ensemble_sentiment("This is the best ADS video ever!"))

(0.8584324005603791, 0.19962538382696995, 'positive')


In [11]:
df = pd.read_csv('../../data/youtube_data.csv')
df = df[df['Location'].notna()]
df['sentiment_score'] = None
df['sentiment_uncertainty'] = None
df['sentiment_label'] = None

# shuffle
df = df.sample(frac=1).reset_index(drop=True)

counter = 0
for i, row in df.iterrows():
  text = row['Content']
  score, uncertainty, label = ensemble_sentiment(text[:512])
  df.loc[i, 'sentiment_score'] = score
  df.loc[i, 'sentiment_uncertainty'] = uncertainty
  df.loc[i, 'sentiment_label'] = label
  counter += 1
  if counter == 10:
    break
df.head(n=10)


  df = pd.read_csv('../../data/youtube_data.csv')


Unnamed: 0,ID,Content,User,Date,Location,Reactions,N_Children,Post Title,Platfrom,meta,lang,sentiment_score,sentiment_uncertainty,sentiment_label
0,UgxBaJSR8kXl-xhtSbh4AaABAg,*When you use ur moms account on 2b2t for a bo...,Saxton Fail,2019-07-07T08:40:20Z,US,0.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UCkUrDIh...",en,-0.165238,0.369484,neutral
1,UgwxIa1AZ9FTFyIs8ql4AaABAg,Wow!!! I don't fully understand what they're s...,Arise Training System,2021-08-20T20:53:57Z,US,13.0,1.0,Tesla AI Day in 19 Minutes (SUPERCUT),youtube,"{'vid_id': 'keWEE9FwS9o', 'user_id': 'UCEg00Nh...",en,1.019148,0.326419,positive
2,UgxGcXqXhpxdlkKbPpR4AaABAg,Same in Soviet when it went it’s the next move...,the political minute,2023-04-15T05:34:04Z,US,0.0,0.0,VERY SCARY: AI bot lays out plans to destroy h...,youtube,"{'vid_id': 'uKbFym9brW4', 'user_id': 'UCcODnCb...",en,-0.176282,0.353358,neutral
3,UgxVhxfgYNK1jCGAPm54AaABAg,"OMG, just discovered you via your great cover ...",Dominik Hundhammer,2023-03-27T12:11:32Z,DE,0.0,0.0,Your Song　/　Elton John　Unplugged cover by Ai N...,youtube,"{'vid_id': 'I97HIH5M6YE', 'user_id': 'UCR7uH5b...",en,1.106823,0.506419,positive
4,Ugx22h1v1ULCFJ0tNRh4AaABAg,"Casey, i want to know 1 something , if you go ...",Blue Mind,2020-11-08T13:37:54Z,BR,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UCV4I9q7...",en,-0.028161,0.467656,neutral
5,UgyMasUAmkg80GjPI6B4AaABAg,Thats an amazing song 100/10. Thats going in m...,Midarana,2019-01-24T15:15:53Z,CA,1.0,0.0,【初音ミク】DECO*27 - アイ / AI【オリジナルMV】,youtube,"{'vid_id': 'RHqOdDG3Jjg', 'user_id': 'UCDyf5Gr...",en,0.885247,0.169696,positive
6,UgxFCfaBF8-g8TwFgPd4AaABAg,"""this lens costed $3000"". Makes that in one we...",Cubamus Prime,2020-10-17T02:59:39Z,US,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UCRK8Rmj...",en,-0.254447,0.384332,neutral
7,UgxRuX-quRLKUjVfnfl4AaABAg,"Whenever FitMc says ""The Oldest Anarchy Server...",REDACTED,2019-07-04T22:33:03Z,GB,0.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UCr41EwE...",en,-0.16402,0.366759,neutral
8,UgzyQJX8ilc51lJjAad4AaABAg,this is how the goverment is going to spy on u...,Dishyjam,2020-10-14T05:36:23Z,US,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UC6dsh1I...",en,-0.28603,0.412461,neutral
9,UgxGv1dmlrEa-mAywmJ4AaABAg,"I agree. I am a FT drone pilot, and will not p...",Saanichton Ministries. Bible Teaching The Word.,2020-10-19T14:38:26Z,CA,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UC43Xrof...",en,-0.308717,0.505852,neutral
