In [12]:
import pandas as pd
import torch
import transformers
from textblob import TextBlob
# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# from afinn import Afinn

if torch.cuda.is_available(): device = 'cuda'
elif torch.backends.mps.is_available(): device = 'mps'
else: device = 'cpu'


In [13]:
models = []

In [14]:
distilbert = transformers.pipeline('sentiment-analysis',
                                   'distilbert-base-uncased-finetuned-sst-2-english',
                                   device=device)

# TODO (optional): add batching to transformer models
def distilbert_model(text):
  results = distilbert(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'NEGATIVE': return -1 * score
  return score
models.append(distilbert_model)

In [15]:
roberta = transformers.pipeline('sentiment-analysis',
                                'cardiffnlp/twitter-roberta-base-sentiment',
                                device=device)
def roberta_model(text):
  results = roberta(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'LABEL_0': return -1 * score
  elif label == 'LABEL_2': return score
  return 0
models.append(roberta_model)

In [16]:
bertweet = transformers.pipeline('sentiment-analysis',
                                 'finiteautomata/bertweet-base-sentiment-analysis',
                                 device=device)
def bertweet_model(text):
  results = bertweet(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'NEG': return -1 * score
  elif label == 'POS': return score
  return 0
models.append(bertweet_model)

In [17]:
def textblob_model(text):
  return TextBlob(text).sentiment.polarity
models.append(textblob_model)

In [18]:
# vader = SentimentIntensityAnalyzer()
# models.append(vader.polarity_scores)

In [19]:
# afinn = Afinn()
# models.append(afinn.score)

In [20]:
test_string = 'Never gonna give you up, never gonna let you down, never gonna run around and desert you'
for model in models:
  print(model(test_string))

0.9910129904747009
0
0.7940226793289185
-0.15555555555555559


In [35]:
# ensemble sentiment analysis, returns the score (-1 to 1), uncertainty (0 to 1), and label
def ensemble_sentiment(text):
  scores = [model(text) for model in models]
  mean = sum(scores) / len(scores)
  std = (sum([(score - mean)**2 for score in scores]) / len(scores))**0.5
  # if abs(mean) < 0.3 or std > 0.4: label = 'neutral'
  if std > abs(mean): label = 'neutral'
  elif mean > 0: label = 'positive'
  else: label = 'negative'
  return mean, std, label

print(ensemble_sentiment("This is the best ADS video ever!"))

(0.9952486008405685, 0.0048890515205740346, 'positive')


In [40]:
df = pd.read_csv('../../data/yt_w_langs_en.csv')
df = df[df['country'].notna()]
df['sentiment_score'] = None
df['sentiment_uncertainty'] = None
df['sentiment_label'] = None

# shuffle
df = df.sample(frac=1).reset_index(drop=True)

counter = 0
for i, row in df.iterrows():
  text = row['text']
  score, uncertainty, label = ensemble_sentiment(text[:512])
  df.loc[i, 'sentiment_score'] = score
  df.loc[i, 'sentiment_uncertainty'] = uncertainty
  df.loc[i, 'sentiment_label'] = label
  counter += 1
  if counter == 10:
    break
df.head(n=10)


  df = pd.read_csv('../../data/yt_w_langs_en.csv')


Unnamed: 0,id,text,username,date,country,likes,n_children,title,platform,meta,lang,sentiment_score,sentiment_uncertainty,sentiment_label
0,Ugz4YhoUAVoYwB8f1cZ4AaABAg,A drone shot of a drone getting a drone shot ....,Eric Tompkins,2020-10-23T17:36:18Z,US,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UCKH99cU...",en,-0.047339,0.634402,neutral
1,UgwB-ileoC16CIFvyV14AaABAg,Maybe soon you can start up a new world and ai...,Josua Soliz,2019-07-17T06:10:45Z,US,0.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UCSKtCt0...",en,0.282753,0.414766,neutral
2,Ugww1QyokHlhdJD8YkR4AaABAg,Greatest Storyteller of all time.,CJ Smith,2020-10-15T21:43:23Z,US,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UCUZUA9Q...",en,0.981099,0.027435,positive
3,UgyzNKZiBvgc34G_Nn54AaABAg,18 seconds!,Temulgeh,2019-07-04T15:52:48Z,FR,0.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UCKuiY0p...",en,0.245315,0.424897,neutral
4,Ugy4yfNiY4rz2CoWoN54AaABAg,Excellent,MeHDi,2020-12-19T22:39:58Z,FR,0.0,0.0,Pourquoi J&#39;ai Dû Quitter Mon Appartement,youtube,"{'vid_id': 'uldXs3ZrbTE', 'user_id': 'UCfjJKlD...",en,0.741468,0.42831,positive
5,Ugwm-Wll5YM1dQ4o2IV4AaABAg,What amazing thing!!!\nWith ♥ from Italy.\nImm...,EWEB TV CHANNEL,2018-02-13T14:13:24Z,IT,104.0,6.0,Caitlyn - J&#39;ai Compris (Official Video),youtube,"{'vid_id': 'EfGisGBgLtY', 'user_id': 'UCKUt6fL...",en,0.912072,0.143568,positive
6,UgyE7BbyriKG_9Lw7jN4AaABAg,these videos are so entertaining,MerkelsBeautyBunker,2022-01-17T15:03:12Z,DE,0.0,0.0,WE PRETENDED TO BE AI BOTS IN CS:GO (ft. FURIO...,youtube,"{'vid_id': 'YrKUVx-l6xc', 'user_id': 'UC1Bqy-c...",en,0.864837,0.210811,positive
7,Ugxgb1eG-JeHKVbBtad4AaABAg,Here are my two cents: This bot has a way of r...,KuroRiot,2019-07-04T20:45:32Z,GB,0.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UClIcLfp...",en,-0.193658,0.457684,neutral
8,UgwNUO2L7VDep0muiIF4AaABAg,love love love duncan,Septure,2023-03-28T07:31:12Z,US,0.0,0.0,WEAPONIZED : EPISODE #10 : Midnight Gospel + U...,youtube,"{'vid_id': '0iN7j0BzXGw', 'user_id': 'UCWd8YAf...",en,0.864449,0.210783,positive
9,UgyXmhISfYvNDLG7Yf14AaABAg,I thoroughly enjoyed this,Cami,2018-12-21T19:00:31Z,US,1.0,0.0,Watch This AI Robot Pick Peppers With A Tiny Saw,youtube,"{'vid_id': '5chk9Sory88', 'user_id': 'UCChCDEX...",en,0.86863,0.212919,positive
