In [1]:
import os
import pandas as pd
import numpy as np
import tweepy
from IPython.core.display import display, HTML
from dotenv import load_dotenv
from transformers import AutoTokenizer
from transformers import pipeline
import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm
import torch
device='cuda'if torch.cuda.is_available()else'cpu';print(f'Using {device}')
load_dotenv()

S = {'negative':'red','neutral':'yellow','positive':'green'}

def view(df_):
    with pd.option_context('display.max_rows',None,'display.max_columns',None,'display.width',None):
        display(HTML(df_.to_html()))

2022-01-18 10:25:01.703028: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-18 10:25:01.703052: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Using cpu


In [2]:
auth = tweepy.OAuthHandler(os.getenv('API_KEY'), os.getenv('API_KEY_SECRET'))
auth.set_access_token(os.getenv('ACCESS_TOKEN'), os.getenv('ACCESS_TOKEN_SECRET'))
api = tweepy.API(auth)
# view(pd.json_normalize(api.rate_limit_status()).T)

In [3]:
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
clf = pipeline(
    task='sentiment-analysis',
    model=model_name,
    tokenizer=AutoTokenizer.from_pretrained(model_name),
    max_length=512,
    truncation=True
)

### Get data by search

In [4]:
limit = 500
q = "IAMJHUD"
data = [_ for _ in tqdm(tweepy.Cursor(api.search_tweets,q=q,tweet_mode='extended').items(limit))]
tweets = [{'tweet':d.full_text,'date':d.created_at,'search_term':q,'poster':d.user.screen_name}for d in data]

500it [00:07, 65.08it/s]


### Get data by screen name

In [5]:
# limit = 10
# screen_name = 'ringostarrmusic'
# data = [t for t in tweepy.Cursor(api.user_timeline,screen_name=screen_name, tweet_mode='extended').items(limit)]
# tweets = [{'tweet':d.full_text,'date':d.created_at,'screen_name':d.user.screen_name}for d in data]

### Inference

In [6]:
out = clf([t['tweet'] for t in tweets])

In [7]:
df = pd.concat([pd.DataFrame(tweets),pd.DataFrame(out)],axis=1).sort_values('date')
df.rename({'label':'sentiment'},axis=1,inplace=True)
df['sentiment'] = df['sentiment'].map({'LABEL_0':'negative','LABEL_1':'neutral','LABEL_2':'positive'})
df.style.apply(lambda x:["background:red"if'negative'in x.iloc[0]else"background:green"if'positive'in x.iloc[0]else"background:yellow"for v in x],axis=1,subset='sentiment')

Unnamed: 0,tweet,date,search_term,poster,sentiment,score
499,RT @wholelottalana_: Always here for @IAMJHUD receiving her flowers!!! 💐✨ #UrbanOneHonors,2022-01-18 02:17:55+00:00,IAMJHUD,HelloBeautiful,positive,0.966548
498,RT @MadameNoire: Entertainment Icon Honoree. ✨ Congrats @IAMJHUD! #TheSoundtrackOfBlackAmerica #UrbanOneHonors https://t.co/YFeAsZjE5y,2022-01-18 02:18:00+00:00,IAMJHUD,newsone,positive,0.878878
497,RT @tvonetv: Now I think we all remember the goosebumps we felt when @IAMJHUD performed And I Am Telling You I’m Not Going 😭 #UrbanOneHonor…,2022-01-18 02:18:04+00:00,IAMJHUD,SuccessfulAngel,positive,0.479797
496,RT @tvonetv: Now I think we all remember the goosebumps we felt when @IAMJHUD performed And I Am Telling You I’m Not Going 😭 #UrbanOneHonor…,2022-01-18 02:18:09+00:00,IAMJHUD,HelloBeautiful,positive,0.479797
495,"No better person to win the Entertainment Icon Award than @IAMJHUD! She has excelled in television, music, film, and theatre! Only a Tony away from completing her EGOT! #UrbanOneHonors",2022-01-18 02:18:12+00:00,IAMJHUD,iFly__High,positive,0.984469
494,RT @tvonetv: Now I think we all remember the goosebumps we felt when @IAMJHUD performed And I Am Telling You I’m Not Going 😭 #UrbanOneHonor…,2022-01-18 02:18:26+00:00,IAMJHUD,callmedollar,positive,0.479797
493,Congratulations to Entertainment Icon Honoree @IAMJHUD! ❤️✨ #UrbanOneHonors https://t.co/q1ZXGhLRhM,2022-01-18 02:18:27+00:00,IAMJHUD,tvonetv,positive,0.986856
492,"RT @iFly__High: No better person to win the Entertainment Icon Award than @IAMJHUD! She has excelled in television, music, film, and theatr…",2022-01-18 02:19:01+00:00,IAMJHUD,simplyshad,positive,0.969201
491,RT @IAMJHUD: It was great catching up with the ladies of @TheView this morning ! Happy MLK Day to u all ! #MLK #MLKDay https://t.co/KwmGfHy…,2022-01-18 02:19:03+00:00,IAMJHUD,Oldlady12345,positive,0.990309
490,RT @MadameNoire: Entertainment Icon Honoree. ✨ Congrats @IAMJHUD! #TheSoundtrackOfBlackAmerica #UrbanOneHonors https://t.co/YFeAsZjE5y,2022-01-18 02:19:07+00:00,IAMJHUD,princessofsoul_,positive,0.878878


### View tweet sentiments over time

In [28]:
fig = px.histogram(df,
                   x="date",
                   color="sentiment",
                   text_auto=True,
                   color_discrete_map=S,
                   nbins=60,
                   title=f'sentiment for "{q}"')
fig.show()

### View cumulative sentiment over time

In [9]:
## encode sentiment labels for plotting
df=pd.concat([pd.get_dummies(df['sentiment']),df],axis=1)
for _ in list(S):
    if _ in df:
        df[f'{_}_c'] = df[_].cumsum()

In [10]:
fig=go.Figure()
# only add sentiments labels that exist in data
for k,v in {k:v for k,v in S.items()if k in set(df['sentiment'])&S.keys()}.items():
    fig.add_trace(go.Scatter(x=df['date'],y=df[f'{k}_c'],mode='lines',name=k,line_color=v))
        
fig.update_layout(
    title=dict(
        text='Cumulative Sentiment',
        y=0.9,
        x=0.5,
        xanchor='center',
        yanchor='top'),
    xaxis_title='Time',
    yaxis_title='Count',
    legend_title='Sentiment',
    font=dict(
        family='Arial',
        size=12,
        color='Black'
    )
)        
fig.show()