In [84]:
import pandas as pd
import torch
import transformers
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from afinn import Afinn

if torch.cuda.is_available(): device = 'cuda'
elif torch.backends.mps.is_available(): device = 'mps'
else: device = 'cpu'


In [85]:
models = []

In [86]:
distilbert = transformers.pipeline('sentiment-analysis',
                                   'distilbert-base-uncased-finetuned-sst-2-english',
                                   device=device)

# TODO (optional): add batching to transformer models
def distilbert_model(text):
  results = distilbert(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'NEGATIVE': return -1 * score
  return score
models.append(distilbert_model)

In [87]:
roberta = transformers.pipeline('sentiment-analysis',
                                'cardiffnlp/twitter-roberta-base-sentiment',
                                device=device)
def roberta_model(text):
  results = roberta(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'LABEL_0': return -1 * score
  elif label == 'LABEL_2': return score
  return 0
models.append(roberta_model)

In [88]:
bertweet = transformers.pipeline('sentiment-analysis',
                                 'finiteautomata/bertweet-base-sentiment-analysis',
                                 device=device)
def bertweet_model(text):
  results = bertweet(text)
  score = results[0]['score']
  label = results[0]['label']
  if label == 'NEG': return -1 * score
  elif label == 'POS': return score
  return 0
models.append(bertweet_model)

In [89]:
def textblob_model(text):
  return TextBlob(text).sentiment.polarity
models.append(textblob_model)

In [90]:
vader = SentimentIntensityAnalyzer()
def vader_model(text):
  return vader.polarity_scores(text)['compound']
models.append(vader_model)

In [91]:
afinn = Afinn()
def afinn_model(text):
  return afinn.score(text) / 6 # TODO is this actually the range?
models.append(afinn_model)

In [92]:
test_string = 'Never gonna give you up, never gonna let you down, never gonna run around and desert you'
for model in models:
  print(model(test_string))

0.9910129904747009
0
0.7940226793289185
-0.15555555555555559
0.0
0.0


In [93]:
# ensemble sentiment analysis, returns the score (-1 to 1), uncertainty (0 to 1), and label
def ensemble_sentiment(text):
  scores = [model(text) for model in models]
  mean = sum(scores) / len(scores)
  std = (sum([(score - mean)**2 for score in scores]) / len(scores))**0.5
  # if abs(mean) < 0.3 or std > 0.4: label = 'neutral'
  if std > abs(mean): label = 'neutral'
  elif mean > 0: label = 'positive'
  else: label = 'negative'
  return mean, std, label

print(ensemble_sentiment("This is the best ADS video ever!"))

(0.8584324005603791, 0.19962538382696995, 'positive')


In [94]:
df = pd.read_csv('../../data/youtube_data.csv')
df = df[df['Location'].notna()]
df['sentiment_score'] = None
df['sentiment_uncertainty'] = None
df['sentiment_label'] = None

# shuffle
df = df.sample(frac=1).reset_index(drop=True)

counter = 0
for i, row in df.iterrows():
  text = row['Content']
  score, uncertainty, label = ensemble_sentiment(text[:512])
  df.loc[i, 'sentiment_score'] = score
  df.loc[i, 'sentiment_uncertainty'] = uncertainty
  df.loc[i, 'sentiment_label'] = label
  counter += 1
  if counter == 10:
    break
df.head(n=10)


  df = pd.read_csv('../../data/youtube_data.csv')


Unnamed: 0,ID,Content,User,Date,Location,Reactions,N_Children,Post Title,Platfrom,meta,lang,sentiment_score,sentiment_uncertainty,sentiment_label
0,UgyybFv8C3MILt3TdtJ4AaABAg,"1:52 You think it was a juge' but it was me, D...",Audax,2020-04-12T10:54:59Z,FR,0.0,0.0,comment j&#39;ai survécu enfermé chez moi,youtube,"{'vid_id': 'XDxrNWyL7Yw', 'user_id': 'UCLFA0EI...",en,-0.020606,0.530359,neutral
1,UgxefXthDBh47tbgE5N4AaABAg,You want Terminators? Because this is how we g...,DarkBooger,2020-06-18T05:25:17Z,US,0.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UCiCOf4a...",en,-0.22688,0.422626,neutral
2,Ugzr3THWs2It96ZW8qh4AaABAg,"Making bold promises is one thing, but keeping...",Larry Cullen,2021-08-22T04:19:36Z,US,0.0,0.0,Tesla AI Day Highlights | Lex Fridman,youtube,"{'vid_id': 'ABbDB6xri8o', 'user_id': 'UCthZ3lC...",en,0.22404,0.729068,neutral
3,UgwqyDgFP2HhdJ4zl5p4AaABAg,Is Baritone downloadable?,Jagger,2019-08-07T12:58:26Z,GB,1.0,0.0,Could an A.I. Escape Minecraft&#39;s 2b2t?,youtube,"{'vid_id': 'pFlc_jizgl0', 'user_id': 'UCGu3vMa...",en,-0.163347,0.365254,neutral
4,UgwAzX4ejU-nN6yxlFV4AaABAg,That's awesome but frightening at the same time,Matthew Palanca,2020-10-13T22:57:15Z,US,0.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UCjC70wR...",en,0.372433,0.464527,neutral
5,UgylW-HNMZORqW3WxdV4AaABAg,Cant the enemy just use a EMP to fry the circu...,sadaplays,2021-07-07T02:42:59Z,GB,19.0,12.0,How AI is driving a future of autonomous warfa...,youtube,"{'vid_id': 'NpwHszy7bMk', 'user_id': 'UCmkp0UU...",en,-0.292317,0.501462,neutral
6,UgwaVCMNSeyKOwe0G354AaABAg,"Old video, about old technology. Check Doctor ...",Greg Zeng,2021-07-14T08:09:03Z,AU,0.0,0.0,How AI is driving a future of autonomous warfa...,youtube,"{'vid_id': 'NpwHszy7bMk', 'user_id': 'UCn0oeSo...",en,-0.49918,0.415948,negative
7,Ugz7UlnNzAzkC6ZNVkd4AaABAg,most smartphone company want this so bad lol,mirsella,2020-04-17T15:48:52Z,FR,0.0,0.0,This AI Learned To See In The Dark! 👀,youtube,"{'vid_id': 'bcZFQ3f26pA', 'user_id': 'UCE5mJod...",en,-0.112004,0.40694,neutral
8,UgxnrlphXBITOvQ3z9x4AaABAg,Imagine catching thieves with it!,Junior Maker,2020-10-14T04:58:03Z,IN,1.0,0.0,ARTIFICIAL INTELLIGENCE DRONE! it won&#39;t st...,youtube,"{'vid_id': 'p4Cd-xKpJ9A', 'user_id': 'UCxCT0G7...",en,-0.207509,0.531865,neutral
9,UgwdrwkyVdSLXhnJL-V4AaABAg,Just remember there was a time where people sa...,illDesign.studio,2021-08-27T19:44:01Z,US,0.0,0.0,Tesla Bot: Elon Musk’s AI APOCALYPSE?!!,youtube,"{'vid_id': 'Vkt3-PVoXco', 'user_id': 'UCULHMtF...",en,0.190689,0.397161,neutral
