In [1]:
import pandas as pd

import matplotlib.pyplot as plt

from jupyter_dash import JupyterDash
from dash import dcc, html, State
from dash.dependencies import Input, Output
import plotly.express as px  

from dash import dash_table

app = JupyterDash(__name__)

In [2]:
df = pd.read_csv('../Sentiment & Engagement Datasets/ready_data_score.csv')

In [3]:
df.head(2)

Unnamed: 0,id,text,title,author,num_comments,post_id,upvote_ratio,score,url,subreddit,link_flair_text,link_flair_template_id,created_datetime,day_of_week,hour_of_day,month,year,sentiment_score
0,41034.0,Hi context year old guy Amsterdam currently em...,Lazy job or Hard job?,Weak_Assumption_6889,8,1bfpxll,0.33,0,https://www.reddit.com/r/careeradvice/comments...,careeradvice,Unknown,Unknown,2024-03-15 22:07:22,Friday,22.0,March,2024,0.7579
1,43519.0,Looking new role havenut much traction Recentl...,Roast my Resume Pls,Neither_Trash,1,1bh8md2,0.99,1,https://i.redd.it/n918fjprlyoc1.jpeg,resumes,Review my resume • I'm in North America,c292b8e0-28b9-11ec-874c-325b17e851a3,2024-03-17 21:05:40,Sunday,21.0,March,2024,0.6369


features including in this component: day_of_week, hour_of_day, month, year, sentiment_score 

Question: How do sentiment scores vary by time of day, day of the week, month, or year?

In [4]:
order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
#the layout
app.layout = html.Div([
    html.Div([
        html.H2("Filter by:"),
        html.Div([
            html.H3("Year"),
            dcc.Checklist(
                id='year-checklist',
                options=[{'label': str(year), 'value': year} for year in sorted(df['year'].unique())],
                value=sorted(df['year'].unique()),
                labelStyle={'display': 'block'}
            ),
        ]),
        html.Div([
            html.H3("Month"),
            dcc.Checklist(
                id='month-checklist',
                options=[{'label': month, 'value': month} for month in df['month'].unique()],
                value=df['month'].unique(),
                labelStyle={'display': 'block'}
            ),
        ]),
        html.Div([
            html.H3("Day of the Week"),
            dcc.Checklist(
                id='day-checklist',
                options=[{'label': day, 'value': day} for day in order],
                value=order,
                labelStyle={'display': 'block'}
            ),
        ]),
    ], style={'width': '20%', 'display': 'inline-block', 'verticalAlign': 'top'}),

    html.Div([
        dcc.Graph(id='sentiment-distribution-plot'),
    ], style={'width': '80%', 'display': 'inline-block'}),
])

# the callback
@app.callback(
    Output('sentiment-distribution-plot', 'figure'),
    [Input('year-checklist', 'value'),
     Input('month-checklist', 'value'),
     Input('day-checklist', 'value')]
)

#the function
def update_graph(selected_years, selected_months, selected_days):
    
    filtered_df = df[df['year'].isin(selected_years) & df['month'].isin(selected_months) & df['day_of_week'].isin(selected_days)]
    
    
    fig = px.box(filtered_df, x='hour_of_day', y='sentiment_score',
                 title='Sentiment Score Distribution by Hour of Day',
                 points="all", 
                 custom_data=['created_datetime', 'day_of_week', 'year', 'month'])
    #hover data
    fig.update_traces(
    hovertemplate="<br>".join([
        "Datetime: %{customdata[0]}",
        "Day: %{customdata[1]}",
        "Year: %{customdata[2]}",
        "Month: %{customdata[3]}",
        "Hour: %{x}",
        "Sentiment: %{y:.2f}"
    ])
    )
    
    fig.update_layout(
    width=1000,
    height=800, 
    )
    
    
    
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(mode='inline', debug=True)
