In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt

from jupyter_dash import JupyterDash
from dash import dcc, html, State
from dash.dependencies import Input, Output
import plotly.express as px  
import plotly.graph_objects as go

from dash import dash_table

app = JupyterDash(__name__)

In [2]:
df1 = pd.read_csv('../Sentiment & Engagement Datasets/engagement_metrics_by_sentiment.csv')
df2 = pd.read_csv('../Sentiment & Engagement Datasets/engagement_metrics_by_day_and_sentiment.csv')
df3 = pd.read_csv('../Sentiment & Engagement Datasets/top_performing_posts.csv')
df4 = pd.read_csv('../Sentiment & Engagement Datasets/low_performing_posts.csv')
df5 = pd.read_csv('../Sentiment & Engagement Datasets/avg_sentiment_scores.csv')

In [7]:
df5

Unnamed: 0,subreddit,Low-Performing,Top-Performing
0,AskHR,0.179278,0.111936
1,FinancialCareers,0.559093,0.5033
2,ITCareerQuestions,0.558012,0.524407
3,LegalAdviceOffTopic,-0.048032,-0.094733
4,careeradvice,0.621885,0.291863
5,careerguidance,0.567514,0.454349
6,cscareerquestions,0.656972,0.355896
7,jobs,0.452718,0.18611
8,resumes,0.516891,0.402797
9,sales,0.462591,0.43607


In [4]:
## Layout

fig_avg_sentiment = px.bar(
    df5, 
    x='subreddit', 
    y=['Low-Performing', 'Top-Performing'], 
    barmode='group',
    title="Average Sentiment Scores for Top vs. Low Performing Posts by Subreddit"
)

app.layout = html.Div([
    html.H1("Sentiment and Engagement Comparative Analysis", style={'textAlign': 'center'}),

    # Section 1:
    html.Div([
        html.H2("Engagement Metrics by Sentiment Score Categories"),
        html.P("Explore how different sentiment categories affect engagement metrics."),
        dcc.Dropdown(
            id='metric-selector-1',
            options=[
                {'label': 'Number of Comments', 'value': 'num_comments'},
                {'label': 'Upvote Ratio', 'value': 'upvote_ratio'},
                {'label': 'Score', 'value': 'score'}
            ],
            value='num_comments',
            style={'width': '50%'}
        ),
        dcc.Graph(id='engagement-metrics-chart')
    ], style={'padding': 20}),

   
    
    # Section 2: 
    html.Div([
        html.H2("Engagement Metrics by Day and Sentiment"),
        html.P("See how engagement metrics vary by day and sentiment category."),
        dcc.Dropdown(
            id='day-metric-selector',
            options=[
                {'label': 'Number of Comments', 'value': 'num_comments'},
                {'label': 'Upvote Ratio', 'value': 'upvote_ratio'},
                {'label': 'Score', 'value': 'score'}
            ],
            value='num_comments',
            style={'width': '50%'}
        ),
        dcc.Graph(id='day-engagement-chart')
    ], style={'padding': 20}),

    
    # Section 3: 
    html.Div([
        html.H2("Distribution Analysis of Sentiment Scores"),
        html.P("Analyze the distribution of sentiment scores for top-performing vs. low-performing posts."),
        dcc.Dropdown(
            id='subreddit-selector',
            options=[{'label': x, 'value': x} for x in pd.concat([df3['subreddit'], df4['subreddit']]).unique()],
            value=None,
            style={'width': '50%'}
        ),
        dcc.Graph(id='sentiment-distribution-chart')
    ], style={'padding': 20}),

   
    
    # Section
    html.Div([
        html.H3("Average Sentiment Scores for Top vs. Low Performing Posts by Subreddit"),
        dcc.Graph(id='avg-sentiment-chart', figure=fig_avg_sentiment)
    ])
])


#Callback

#for section one

@app.callback(
    Output('engagement-metrics-chart', 'figure'),
    Input('metric-selector-1', 'value')
)
def update_engagement_metrics_chart(selected_metric):
    fig = px.bar(df1, x='sentiment_category', y=selected_metric, color='sentiment_category', 
                 title="Engagement Metrics by Sentiment Score Categories")
    return fig

#for section two

@app.callback(
    Output('day-engagement-chart', 'figure'),
    Input('day-metric-selector', 'value')
)
def update_day_engagement_chart(selected_metric):
    fig = px.line(df2, x='day_of_week', y=selected_metric, color='sentiment_category', 
                  title="Engagement Metrics by Day and Sentiment")
    return fig

#for section three

@app.callback(
    Output('sentiment-distribution-chart', 'figure'),
    [Input('subreddit-selector', 'value')]
)
def update_sentiment_distribution_chart(selected_subreddit):
    if selected_subreddit is not None:
        
        filtered_top = df3[df3['subreddit'] == selected_subreddit].copy()
        filtered_low = df4[df4['subreddit'] == selected_subreddit].copy()

        # Combine the filtered df
        filtered_top['Performance'] = 'Top-Performing'
        filtered_low['Performance'] = 'Low-Performing'
        combined_df = pd.concat([filtered_top, filtered_low])

        # create the hist
        fig = px.histogram(
            combined_df, 
            x='sentiment_score', 
            color='Performance', 
            barmode='overlay', 
            nbins=50,  
            title=f"Sentiment Scores Distribution for {selected_subreddit}"
        )
        fig.update_layout(bargap=0.1)  
        return fig
    else:
    
        return go.Figure()


if __name__ == '__main__':
    app.run_server(mode='inline', port=8052)
