In [1]:
import pandas as pd
import plotly.express as px

In [13]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import joblib
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import io
import base64

# Your selected columns and target
selected_cols = [
    'Average Daily Social Media Use Time (minutes)',
    'Frequency of Social Media Checking (number of times per day)',
    'Pre-Sleep Social Media Use Duration (minutes)',
    'Dominant Social Media Platform',
    'Type of Social Media Content Consumed',
    'Blue Light Exposure Before Sleep (minutes)'
]
target_col = 'Sleep Efficiency (%)'

# Load dataset and model (adjust paths)
df = pd.read_csv("SocialMediaUsage_SleepLatencyAnalysis_Singapore.csv")  # Replace with your CSV path
model = joblib.load("linear_regression_model.pkl")  # Replace with your model file

# Create bins for Sleep Efficiency (target) for categorical grouping
bins = [0, 60, 70, 80, 90, 100]
labels = ['Very Low', 'Low', 'Moderate', 'High', 'Very High']
df['SleepEfficiency_binned'] = pd.cut(df[target_col], bins=bins, labels=labels)

app = dash.Dash(__name__)
app.title = "Sleep Efficiency Dashboard"

app.layout = html.Div([
    html.H1("🛌 Sleep Efficiency Dashboard", style={'textAlign': 'center'}),

    html.H2("Sleep Efficiency Distribution"),
    dcc.Graph(figure=px.histogram(df, x=target_col, nbins=30, title='Distribution of Sleep Efficiency (%)')),

    html.H2("Top-Level Insights"),
    html.Div([
        dcc.Graph(
            figure=px.bar(
                df.groupby("SleepEfficiency_binned", observed=True)[selected_cols[0]].mean().reset_index(),
                x="SleepEfficiency_binned", y=selected_cols[0],
                title=f"Average {selected_cols[0]} by Sleep Efficiency Level"
            )
        ),
        dcc.Graph(
            figure=px.bar(
                df.groupby("SleepEfficiency_binned", observed=True)[selected_cols[1]].mean().reset_index(),
                x="SleepEfficiency_binned", y=selected_cols[1],
                title=f"Average {selected_cols[1]} by Sleep Efficiency Level"
            )
        ),
        dcc.Graph(
            figure=px.bar(
                df.groupby("SleepEfficiency_binned", observed=True)[selected_cols[5]].mean().reset_index(),
                x="SleepEfficiency_binned", y=selected_cols[5],
                title=f"Average {selected_cols[5]} by Sleep Efficiency Level"
            )
        ),
    ]),

    html.H2("📊 General Statistics"),
    html.Div(id='stats-output', style={'marginBottom': 30}),

    html.Label("Select Feature to Explore:"),
    dcc.Dropdown(
        id='feature-dropdown',
        options=[{'label': f, 'value': f} for f in selected_cols],
        value=selected_cols[0]
    ),

    html.Div([
        dcc.Graph(id='binned-efficiency-graph'),
        dcc.Graph(id='boxplot-feature'),
        dcc.Graph(id='histogram-kde'),
    ]),

    html.H2("📌 Feature Correlation Heatmap"),
    html.Img(id='correlation-heatmap', style={'width': '80%'}),

    html.H2("🔥 Feature Importance"),
    dcc.Graph(id='feature-importance'),
])

@app.callback(
    Output('stats-output', 'children'),
    Input('feature-dropdown', 'value')
)
def show_stats(feature):
    desc = df[feature].describe()
    if pd.api.types.is_numeric_dtype(df[feature]):
        # Numeric: mean, std, min, max, count
        return html.Ul([
            html.Li(f"Mean: {desc['mean']:.2f}"),
            html.Li(f"Std Dev: {desc['std']:.2f}"),
            html.Li(f"Min: {desc['min']:.2f}"),
            html.Li(f"Max: {desc['max']:.2f}"),
            html.Li(f"Count: {desc['count']:.0f}"),
        ])
    else:
        # Categorical: count, unique, top, freq
        return html.Ul([
            html.Li(f"Count: {desc['count']:.0f}"),
            html.Li(f"Unique: {desc['unique']}"),
            html.Li(f"Most Frequent: {desc['top']}"),
            html.Li(f"Frequency: {desc['freq']}"),
        ])

@app.callback(
    Output('binned-efficiency-graph', 'figure'),
    Input('feature-dropdown', 'value')
)
def update_binned_plot(feature):
    try:
        dff = df.copy()

        if dff[feature].dtype == 'object' or str(dff[feature].dtype).startswith('category'):
            grouped = dff.groupby([feature, 'SleepEfficiency_binned'], observed=True).size().reset_index(name='count')
            if grouped.empty:
                return go.Figure()
            fig = px.bar(
                grouped,
                x=feature,
                y='count',
                color='SleepEfficiency_binned',
                title=f'Distribution of Sleep Efficiency Levels by {feature}',
                labels={feature: feature, 'count': 'Count', 'SleepEfficiency_binned': 'Sleep Efficiency Level'}
            )
            fig.update_layout(barmode='stack')
            return fig
        else:
            binned_col = f'{feature}_binned'
            unique_vals = dff[feature].nunique()
            if unique_vals >= 10:
                try:
                    bin_edges = pd.qcut(dff[feature], q=5, retbins=True, duplicates='drop')[1]
                    num_bins = len(bin_edges) - 1
                except Exception:
                    num_bins = 5
                labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High'][:num_bins]
                dff[binned_col] = pd.qcut(dff[feature], q=num_bins, labels=labels)
            else:
                dff[binned_col] = pd.cut(dff[feature], bins=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])

            grouped = dff.groupby([binned_col, 'SleepEfficiency_binned'], observed=True).size().reset_index(name='count')
            if grouped.empty:
                return go.Figure()
            total_per_bin = grouped.groupby(binned_col, observed=True)['count'].transform('sum')
            grouped['percentage'] = grouped['count'] / total_per_bin * 100
            pivot = grouped.pivot(index=binned_col, columns='SleepEfficiency_binned', values='percentage').fillna(0)

            fig = go.Figure()
            for col in pivot.columns:
                fig.add_trace(go.Bar(name=str(col), x=pivot.index, y=pivot[col]))
            fig.update_layout(barmode='stack', title=f'Sleep Efficiency Levels by {feature}')
            return fig
    except Exception as e:
        return go.Figure(layout=go.Layout(title=f"Error plotting {feature}: {str(e)}"))

@app.callback(
    Output('boxplot-feature', 'figure'),
    Input('feature-dropdown', 'value')
)
def update_boxplot(feature):
    return px.box(df, y=feature, title=f'Boxplot of {feature}')

@app.callback(
    Output('histogram-kde', 'figure'),
    Input('feature-dropdown', 'value')
)
def histogram_kde(feature):
    fig = px.histogram(df, x=feature, nbins=30, marginal="violin",
                       histnorm='probability density',
                       title=f'Distribution of {feature}')
    return fig

@app.callback(
    Output('correlation-heatmap', 'src'),
    Input('feature-dropdown', 'value')
)
def correlation_plot(_):
    numeric_cols = [col for col in selected_cols if pd.api.types.is_numeric_dtype(df[col])]
    corr = df[numeric_cols].corr()
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr, annot=True, cmap='coolwarm')
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    return 'data:image/png;base64,' + base64.b64encode(buf.read()).decode()

@app.callback(
    Output('feature-importance', 'figure'),
    Input('feature-dropdown', 'value')
)
def feature_importance(_):
    if hasattr(model, 'feature_importances_'):
        importances = model.feature_importances_
        feature_names = model.feature_names_in_ if hasattr(model, 'feature_names_in_') else selected_cols
    elif hasattr(model, 'coef_'):
        importances = model.coef_
        if importances.ndim > 1:
            importances = importances.ravel()
        feature_names = model.feature_names_in_ if hasattr(model, 'feature_names_in_') else selected_cols
    else:
        return px.bar(title="Model does not provide feature importance or coefficients.")

    if len(importances) != len(feature_names):
        feature_names = selected_cols
        importances = importances[:len(feature_names)]

    fi_df = pd.DataFrame({
        'Feature': feature_names,
        'Importance': importances
    }).sort_values(by='Importance', ascending=False)

    fig = px.bar(fi_df, x='Importance', y='Feature', orientation='h', title="Feature Importance")
    return fig

if __name__ == '__main__':
    app.run(debug=True)
