In [29]:
# pip install --upgrade git+https://github.com/huggingface/transformers.git
from transformers import pipeline
import comment_tools
import pandas as pd

In [30]:
classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-sentiment",
    return_all_scores=True,
)

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [31]:
video_id = "JTOJsU3FSD8"
vid_comments = comment_tools.get_video_comments(video_id)
vid_comments[0]

'or just build a todo list .'

In [32]:
sentiments = classifier(
    vid_comments
)

In [33]:
sentiments[0]

[{'label': 'LABEL_0', 'score': 0.13064123690128326},
 {'label': 'LABEL_1', 'score': 0.763518214225769},
 {'label': 'LABEL_2', 'score': 0.10584051162004471}]

In [34]:
sentiment_match = {
    "LABEL_0": "Negative",
    "LABEL_1": "Neutral",
    "LABEL_2": "Positive",
}

In [28]:
df_data = [
    {**{'comment': comment}, **{sentiment_match[item['label']]: item['score'] for item in prediction}}
    for comment, prediction in zip(vid_comments, sentiments)
]
df = pd.DataFrame(df_data)

df.head()

Unnamed: 0,comment,Negative,Neutral,Positive
0,or just build a todo list .,0.130641,0.763518,0.105841
1,this = awesome,0.002052,0.019329,0.978619
2,at 0:35 what is the name of the software on sc...,0.057449,0.909506,0.033045
3,https://www . youtube . com/watch ? v=_vrt2ffx...,0.124703,0.822005,0.053292
4,or just build a weather app,0.063227,0.82228,0.114493


In [35]:
averages = {
    "Negative": df["Negative"].mean(),
    "Neutral": df["Neutral"].mean(),
    "Positive": df["Positive"].mean(),
}
averages

{'Negative': 0.1814881397536799,
 'Neutral': 0.45393769947239915,
 'Positive': 0.36457416004054805}

In [38]:
result = []

for comment, sentiment in zip(vid_comments, sentiments):
    sentiment_scores = {sentiment_match[label['label']]: label['score'] for label in sentiment}
    comment_dict = {'comment': comment, 'sentiment_scores': sentiment_scores}
    result.append(comment_dict)

result

returnVal = {}
returnVal['averages'] = averages
returnVal['result'] = result
returnVal


{'averages': {'Negative': 0.1814881397536799,
  'Neutral': 0.45393769947239915,
  'Positive': 0.36457416004054805},
 'result': [{'comment': 'or just build a todo list .',
   'sentiment_scores': {'Negative': 0.13064123690128326,
    'Neutral': 0.763518214225769,
    'Positive': 0.10584051162004471}},
  {'comment': 'this = awesome',
   'sentiment_scores': {'Negative': 0.0020522300619632006,
    'Neutral': 0.01932867430150509,
    'Positive': 0.9786190390586853}},
  {'comment': 'at 0:35 what is the name of the software on screen please ?',
   'sentiment_scores': {'Negative': 0.05744866654276848,
    'Neutral': 0.9095058441162109,
    'Positive': 0.03304547816514969}},
  {'comment': 'https://www . youtube . com/watch ? v=_vrt2ffxntc&t',
   'sentiment_scores': {'Negative': 0.124703049659729,
    'Neutral': 0.8220049142837524,
    'Positive': 0.053292009979486465}},
  {'comment': 'or just build a weather app',
   'sentiment_scores': {'Negative': 0.06322737783193588,
    'Neutral': 0.82227987