In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

!pip install text2emotion
import text2emotion as te

In [None]:
red_posts = pd.read_csv("../input/reddit-wallstreetsbets-posts/reddit_wsb.csv")

In [None]:
red_posts

### Creating columns to support aggregation and binning

In [None]:
red_posts["date"] = pd.to_datetime(red_posts.timestamp).dt.date
red_posts["hour"] = pd.to_datetime(red_posts.timestamp).dt.hour
red_posts["post_count"] = 1
red_posts

### Aggregating by date and hour

In [None]:
agg_date = red_posts.groupby("date").aggregate({"score": "mean", "comms_num": "mean", "post_count": "sum"})
agg_hour = red_posts.groupby("hour").aggregate({"score": "mean", "comms_num": "mean", "post_count": "sum"})

In [None]:
agg_date

#### Changing the Pandas default plotting library to 'plotly'

In [None]:
pd.options.plotting.backend = "plotly"

### Interactive graphs for:

#### 1.Hourly activity

In [None]:
agg_hour.reset_index().plot(y = "comms_num")

In [None]:

agg_hour.reset_index().plot(y = "score")

In [None]:
agg_hour.reset_index().plot(y = "post_count")

#### 2. Daily activity

In [None]:
agg_date.reset_index().plot(y = "comms_num")

In [None]:
agg_date.reset_index().plot(y = "score")

In [None]:
agg_date.reset_index().plot(y = "post_count")

### Combining the Titles and Bodies for each post

In [None]:
red_posts["alltexts"] = red_posts.title+' '+red_posts.body.astype(str)

In [None]:
red_posts["alltexts"]

## Using 'text2emotion' library to analyze and display the various emotions attributed to the posts (title+body)

In [None]:
emotions = []
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()
for text in tqdm(red_posts.alltexts):
    emotions.append(te.get_emotion(str(text)))

In [None]:
emotion_data = pd.DataFrame.from_dict(emotions)
posts['happy'] = emotion_data.Happy
posts['angry'] = emotion_data.Angry
posts['surprise'] = emotion_data.Surprise
posts['sad'] = emotion_data.Sad
posts['fear'] = emotion_data.Fear

#### Dynamically adding the columns to. the main df

In [None]:
for i in range(0,len(emotion_data.columns)):
    red_posts[emotion_data.columns[i]] = emotion_data[emotion_data.columns[i]]

In [None]:
red_posts

### Aggregate emotional identifiers against day/hour

In [None]:
agg_day = red_posts.groupby("date").mean()
agg_hour = red_posts.groupby(["date", "hour"]).mean()

In [None]:
agg_hour

### Plotting ratio of emotions by hour since 28/01/2021 9AM

In [None]:
fig = agg_hour.reset_index().plot(y = "Happy", labels={"happy": "Ratio of words for each emotion", "index": "Hours since 9:00am on 1/28/2021"},
title="Trends in emotions expressed in r/WallStreetBets posts by hour")

fig.add_scatter(y=agg_hour['Sad'], mode='lines', name = "Sad")
fig.add_scatter(y=agg_hour['Angry'], mode='lines', name = "Angry")
fig.add_scatter(y=agg_hour['Surprise'], mode='lines', name = "Surprise")
fig.add_scatter(y=agg_hour['Fear'], mode='lines', name = "Fear")
fig.show()

### Plotting ratio of emotions by day since 28/01/2021

In [None]:
fig = agg_day.reset_index().plot(y = "Happy", labels={"happy": "Ratio of words for each emotion", "index": "Days since 1/28/2021"},
title="Trends in emotions expressed in r/WallStreetBets posts by day")

fig.add_scatter(y=agg_day['Sad'], mode='lines', name = "Sad")
fig.add_scatter(y=agg_day['Angry'], mode='lines', name = "Angry")
fig.add_scatter(y=agg_day['Surprise'], mode='lines', name = "Surprise")
fig.add_scatter(y=agg_day['Fear'], mode='lines', name = "Fear")
fig.show()

### Installing Tiingo 

In [None]:
pip install tiingo

### Obtaining historical data for. 'GME' stock using Tiingo

In [None]:
from tiingo import TiingoClient

In [None]:
config = {}

config['session'] = True

config['api_key'] = "97efecd3df196b1717fb44bb60fcbe5882d4669f"

client = TiingoClient(config)

In [None]:
ticker_metadata = client.get_ticker_metadata("GME")

ticker_price = client.get_ticker_price("GME", frequency="daily")

historical_prices = client.get_dataframe("GME", fmt='csv', startDate='2021-01-28', endDate='2021-02-08', frequency='daily')

tickers = client.list_stock_tickers()

In [None]:
historical_prices = historical_prices.reset_index()

In [None]:
historical_prices["date"] = pd.to_datetime(historical_prices.date).dt.date

In [None]:
historical_prices

In [None]:
rednas_data = pd.merge(red_posts, historical_prices, on='date', how='right')

###### Using data from NASDAQ

In [None]:
rednas_data

In [None]:
rednas_date = rednas_data.groupby("date").aggregate({"Happy": "mean", "Angry": "mean", "Surprise": "mean", "Sad": "mean", "Fear": "mean", "score": "mean", "comms_num": "mean", "post_count": "sum", "adjClose": "mean", "adjOpen": "mean"})
rednas_date

### Sentiments and Closing Price plotted with respect to the date

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

trace1 = go.Scatter(
    y=rednas_date['adjClose'],
    x=rednas_date.reset_index().date,
    name='Closing Price',
    line=dict(color='red', width=4),

)
trace2 = go.Scatter(
    y=rednas_date["Happy"],
    x=rednas_date.reset_index().date,
    name='Happiness',
    yaxis='y2'

)
trace3 = go.Scatter(
    y=rednas_date["Angry"],
    x=rednas_date.reset_index().date,
    name='Anger',
    yaxis='y2'

)
trace4 = go.Scatter(
    y=rednas_date["Surprise"],
    x=rednas_date.reset_index().date,
    name='Surprise',
    yaxis='y2'

)
trace5 = go.Scatter(
    y=rednas_date["Fear"],
    x=rednas_date.reset_index().date,
    name='Fear',
    yaxis='y2'

)
trace6 = go.Scatter(
    y=rednas_date["Sad"],
    x=rednas_date.reset_index().date,
    name='Sadness',
    yaxis='y2'

)


fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2,secondary_y=True)
fig.add_trace(trace3,secondary_y=True)
fig.add_trace(trace4,secondary_y=True)
fig.add_trace(trace5,secondary_y=True)
fig.add_trace(trace6,secondary_y=True)
fig['layout'].update(height = 600, width = 800, title = 'Sentiments and Closing Price plotted with respect to the date',xaxis=dict(
      tickangle=-90
    ))
fig.show()

### Sentiments and Opening Price plotted with respect to the date

In [None]:
trace1 = go.Scatter(
    y=rednas_date['adjOpen'],
    x=rednas_date.reset_index().date,
    name='Opening Price',
    line=dict(color='red', width=4),

)
trace2 = go.Scatter(
    y=rednas_date["Happy"],
    x=rednas_date.reset_index().date,
    name='Happiness',
    yaxis='y2'

)
trace3 = go.Scatter(
    y=rednas_date["Angry"],
    x=rednas_date.reset_index().date,
    name='Anger',
    yaxis='y2'

)
trace4 = go.Scatter(
    y=rednas_date["Surprise"],
    x=rednas_date.reset_index().date,
    name='Surprise',
    yaxis='y2'

)
trace5 = go.Scatter(
    y=rednas_date["Fear"],
    x=rednas_date.reset_index().date,
    name='Fear',
    yaxis='y2'

)
trace6 = go.Scatter(
    y=rednas_date["Sad"],
    x=rednas_date.reset_index().date,
    name='Sadness',
    yaxis='y2'

)


fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2,secondary_y=True)
fig.add_trace(trace3,secondary_y=True)
fig.add_trace(trace4,secondary_y=True)
fig.add_trace(trace5,secondary_y=True)
fig.add_trace(trace6,secondary_y=True)
fig['layout'].update(height = 600, width = 800, title = 'Sentiments and Opening Price plotted with respect to the date',xaxis=dict(
      tickangle=-90
    ))
fig.show()

### Calculating the difference between Opening and Closing Prices

In [None]:
rednas_date["delta"] = rednas_date['adjOpen']-rednas_date["adjClose"]

In [None]:
rednas_date

#### Plotting the delta across time

In [None]:
rednas_date.reset_index().plot(x='date',y='delta')

In [None]:
rednas_date.columns

In [None]:
fig = make_subplots(rows=2, cols=1,shared_xaxes=True,subplot_titles=('Pearsons Correlation',  'Spearman Correlation'))
colorscale=     [[1.0              , "rgb(165,0,38)"],
                [0.8888888888888888, "rgb(215,48,39)"],
                [0.7777777777777778, "rgb(244,109,67)"],
                [0.6666666666666666, "rgb(253,174,97)"],
                [0.5555555555555556, "rgb(254,224,144)"],
                [0.4444444444444444, "rgb(224,243,248)"],
                [0.3333333333333333, "rgb(171,217,233)"],
                [0.2222222222222222, "rgb(116,173,209)"],
                [0.1111111111111111, "rgb(69,117,180)"],
                [0.0               , "rgb(49,54,149)"]]

In [None]:
s_val =rednas_date.corr('pearson')
s_idx = s_val.index
s_col = s_val.columns
s_val = s_val.values
fig.add_trace(
    go.Heatmap(x=s_col,y=s_idx,z=s_val,name='pearson',showscale=False,xgap=1,ygap=1,colorscale=colorscale),
    row=1, col=1
)


s_val =rednas_date.corr('spearman')
s_idx = s_val.index
s_col = s_val.columns
s_val = s_val.values
fig.add_trace(
    go.Heatmap(x=s_col,y=s_idx,z=s_val,xgap=1,ygap=1,colorscale=colorscale),
    row=2, col=1
)

fig.update_layout(height=700, width=900, title_text="Locations That Contribute The Most To Our Cut-Offs")
fig.show()

In [None]:
trace1 = go.Scatter(
    y=rednas_date['comms_num'],
    x=rednas_date.reset_index().date,
    name='Number of comments'

)
trace2 = go.Scatter(
    y=rednas_date["delta"],
    x=rednas_date.reset_index().date,
    name='Difference between opening to closing prices',
    yaxis='y2'

)

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2,secondary_y=True)
fig['layout'].update(height = 600, width = 800, title = 'Sentiments and Opening Price plotted with respect to the date',xaxis=dict(
      tickangle=-90
    ))
fig.show()