# The goal of this short analysis is to determine the trends in posts on the WallStreetBets subreddit since January 28th. Posts will be analyzed by the ratio of happy, angry, sad, surprised, and fearful words by hour and by day.

Install packages and import data

In [None]:
import pandas as pd
import numpy as np

In [None]:
posts = pd.read_csv("../input/reddit-wallstreetsbets-posts/reddit_wsb.csv")

Get day and hour of each post, and create dummy column for post count (for aggregation)

In [None]:
posts["date"] = pd.to_datetime(posts.timestamp).dt.date
posts["hour"] = pd.to_datetime(posts.timestamp).dt.hour
posts["post_count"] = 1
posts

Aggregate by day, and by hour

In [None]:
by_day = posts.groupby("date").aggregate({"score": "mean", "comms_num": "mean", "post_count": "sum"})
by_hour = posts.groupby(["date", "hour"]).aggregate({"score": "mean", "comms_num": "mean", "post_count": "sum"})

Plot mean comment count, mean reddit post score (a metric of engagement on a post), and the number of total posts by hour

In [None]:
import plotly.express as px
pd.options.plotting.backend = "plotly"
by_hour.reset_index().plot(y = "comms_num")

In [None]:
by_hour.reset_index().plot(y = "score")

In [None]:
by_hour.reset_index().plot(y = "post_count")

Plot mean comment count, mean reddit post score (a metric of engagement on a post), and the number of total posts by day

In [None]:
by_day.plot(y = "comms_num")

In [None]:
by_day.plot(y = "score")

In [None]:
by_day.plot(y = "post_count")

Import emotion tagger (text2emotion)

In [None]:
!pip install text2emotion
import text2emotion as te

Combine post title and body text for each post

In [None]:
posts["alltexts"] = posts.title + " "+posts.body.astype(str)

Get ratio of each emotion

In [None]:
from time import sleep

In [None]:
emotions = []
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()
for text in tqdm(posts.alltexts):
    emotions.append(te.get_emotion(str(text)))

In [None]:
emotion_data = pd.DataFrame.from_dict(emotions)
posts['happy'] = emotion_data.Happy
posts['angry'] = emotion_data.Angry
posts['surprise'] = emotion_data.Surprise
posts['sad'] = emotion_data.Sad
posts['fear'] = emotion_data.Fear

In [None]:
posts

Aggregate emotional word data by day and hour

In [None]:
by_day = posts.groupby("date").mean()
by_hour = posts.groupby(["date", "hour"]).mean()

Plot ratio of words of each emotion by hour

In [None]:
fig = by_hour.reset_index().plot(y = "happy", labels={
                     "happy": "Ratio of words of each emotion", 
                     "index": "Hours since 9:00am on 1/28/2021"
                 },
                title="Trends in emotions expressed in r/WallStreetBets posts by hour")
fig.add_scatter(y=by_hour['sad'], mode='lines', name = "Sad")
fig.add_scatter(y=by_hour['angry'], mode='lines', name = "Angry")
fig.add_scatter(y=by_hour['surprise'], mode='lines', name = "Surprise")
fig.add_scatter(y=by_hour['fear'], mode='lines', name = "Fear")
fig.show()

Plot ratio of words of each emotion by day

In [None]:
fig = by_day.reset_index().plot(y = "happy", labels={
                     "happy": "Ratio of words of each emotion", 
                     "index": "Days since 1/28/2021"
                 },
                title="Trends in emotions expressed in r/WallStreetBets posts by day")
fig.add_scatter(y=by_day['sad'], mode='lines', name = "Sad")
fig.add_scatter(y=by_day['angry'], mode='lines', name = "Angry")
fig.add_scatter(y=by_day['surprise'], mode='lines', name = "Surprise")
fig.add_scatter(y=by_day['fear'], mode='lines', name = "Fear")
fig.show()