In [1]:
import pandas as pd
import plotly.express as px

In [2]:
# Load the data
df = pd.read_csv('mobile_addiction.csv',encoding='utf-8')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,daily_screen_time,app_sessions,social_media_usage,gaming_time,notifications,night_usage,age,work_study_hours,stress_level,apps_installed,addicted
0,0,2,29,0,0,49,0,44,5,3,35,not addicted
1,1,6,29,1,2,65,1,29,5,9,21,addicted
2,2,9,28,2,0,57,3,28,7,5,39,addicted
3,3,6,39,2,0,69,1,28,6,8,24,addicted
4,4,5,37,3,1,64,2,27,4,5,26,addicted


In [4]:
# Drop unnecessary index column
df = df.drop(columns=['Unnamed: 0'])

In [5]:
features = [
    'daily_screen_time', 'app_sessions', 'social_media_usage',
    'gaming_time', 'notifications', 'night_usage', 'age',
    'work_study_hours', 'stress_level', 'apps_installed'
]

In [8]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import joblib
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import io
import base64
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data and model
df = pd.read_csv("mobile_addiction.csv")
model = joblib.load("random_forest_model.pkl")

features = [
    'daily_screen_time', 'app_sessions', 'social_media_usage',
    'gaming_time', 'notifications', 'night_usage', 'age',
    'work_study_hours', 'stress_level', 'apps_installed'
]

# Create age groups
bins = [0, 20, 30, 40, 50, 100]
labels = ['<20', '20-30', '30-40', '40-50', '>50']
df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels)

app = dash.Dash(__name__)
app.title = "Digital Addiction Dashboard"

app.layout = html.Div([
    html.H1("📱 Digital Addiction Data Dashboard", style={'textAlign': 'center'}),

    html.H2("Addiction Status Distribution"),
    dcc.Graph(figure=px.pie(df, names='addicted', title='Addicted vs Not Addicted', hole=0.3)),

    html.H2("Top-Level Insights"),
    html.Div([
        dcc.Graph(
            figure=px.bar(
                df.groupby("age_group")["daily_screen_time"].mean().reset_index(),
                x="age_group", y="daily_screen_time",
                title="Average Daily Screen Time by Age Group"
            )
        ),
        dcc.Graph(
            figure=px.bar(
                df.groupby("age_group")["app_sessions"].mean().reset_index(),
                x="age_group", y="app_sessions",
                title="Average App Sessions by Age Group"
            )
        ),
        dcc.Graph(
            figure=px.bar(
                df.groupby("addicted")["notifications"].mean().reset_index(),
                x="addicted", y="notifications",
                title="Average Notifications by Addiction Status"
            )
        ),
        dcc.Graph(
            figure=px.bar(
                df.groupby("age_group")["daily_screen_time"].mean().sort_values(ascending=False).reset_index().head(5),
                x="age_group", y="daily_screen_time",
                title="Top 5 Age Groups by Average Screen Time"
            )
        ),
        dcc.Graph(
            figure=px.pie(
                df, names="social_media_usage", title="Social Media Usage Distribution"
            )
        ),
    ]),

    html.H2("📊 General Statistics"),
    html.Div(id='stats-output', style={'marginBottom': 30}),

    html.Label("Select Feature to Explore:"),
    dcc.Dropdown(
        id='feature-dropdown',
        options=[{'label': f.replace('_', ' ').title(), 'value': f} for f in features],
        value=features[0]
    ),

    html.Div([
        dcc.Graph(id='binned-addiction-graph'),
        dcc.Graph(id='boxplot-outliers'),
        dcc.Graph(id='histogram-kde'),
    ]),

    html.H2("📌 Feature Correlation Heatmap"),
    html.Img(id='correlation-heatmap', style={'width': '80%'}),

    html.H2("🔥 Feature Importance (XGBoost)"),
    dcc.Graph(id='feature-importance'),

    html.H2("🖐️ KNN Elbow Method (Error Rate by k)"),
    dcc.Graph(id='knn-elbow-plot')
])

@app.callback(
    Output('stats-output', 'children'),
    Input('feature-dropdown', 'value')
)
def show_stats(feature):
    stats = df[feature].describe().to_dict()
    addiction_ratio = df['addicted'].value_counts(normalize=True).to_dict()
    return html.Ul([
        html.Li(f"Mean: {stats['mean']:.2f}"),
        html.Li(f"Std Dev: {stats['std']:.2f}"),
        html.Li(f"Min: {stats['min']:.2f}"),
        html.Li(f"Max: {stats['max']:.2f}"),
        html.Li(f"Addicted: {addiction_ratio.get(1, 0)*100:.1f}%"),
        html.Li(f"Not Addicted: {addiction_ratio.get(0, 0)*100:.1f}%")
    ])

@app.callback(
    Output('binned-addiction-graph', 'figure'),
    Input('feature-dropdown', 'value')
)
def update_binned_plot(feature):
    binned_col = f'{feature}_binned'
    unique_vals = df[feature].nunique()

    if unique_vals >= 10:
        bin_edges = pd.qcut(df[feature], q=5, retbins=True, duplicates='drop')[1]
        num_bins = len(bin_edges) - 1
        labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High'][:num_bins]
        df[binned_col] = pd.qcut(df[feature], q=num_bins, labels=labels)
    else:
        df[binned_col] = pd.cut(df[feature], bins=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])

    grouped = df.groupby([binned_col, 'addicted'], observed=True).size().reset_index(name='count')
    total_per_bin = grouped.groupby(binned_col)['count'].transform('sum')
    grouped['percentage'] = grouped['count'] / total_per_bin * 100
    pivot = grouped.pivot(index=binned_col, columns='addicted', values='percentage').fillna(0)
    pivot.columns = ['Not Addicted', 'Addicted']

    fig = go.Figure()
    for col in pivot.columns:
        fig.add_trace(go.Bar(name=col, x=pivot.index, y=pivot[col]))
    fig.update_layout(barmode='stack', title=f'Addiction by {feature.replace("_", " ").title()}')
    return fig

@app.callback(
    Output('boxplot-outliers', 'figure'),
    Input('feature-dropdown', 'value')
)
def update_boxplot(feature):
    return px.box(df, y=feature, title=f'Boxplot of {feature.replace("_", " ").title()}')

@app.callback(
    Output('histogram-kde', 'figure'),
    Input('feature-dropdown', 'value')
)
def histogram_kde(feature):
    fig = px.histogram(df, x=feature, nbins=30, marginal="violin", color='addicted',
                       barmode='overlay', histnorm='probability density',
                       title=f'Distribution of {feature.replace("_", " ").title()} by Addiction')
    return fig

@app.callback(
    Output('correlation-heatmap', 'src'),
    Input('feature-dropdown', 'value')
)
def correlation_plot(_):
    corr = df[features].corr()
    plt.figure(figsize=(8, 6))
    sns.heatmap(corr, annot=True, cmap='coolwarm')
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    return 'data:image/png;base64,' + base64.b64encode(buf.read()).decode()

@app.callback(
    Output('feature-importance', 'figure'),
    Input('feature-dropdown', 'value')
)
def feature_importance(_):
    importances = model.feature_importances_
    fi_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)
    fig = px.bar(fi_df, x='Importance', y='Feature', orientation='h', title="XGBoost Feature Importance")
    return fig

@app.callback(
    Output('knn-elbow-plot', 'figure'),
    Input('feature-dropdown', 'value')
)
def plot_knn_elbow(_):
    X = df[features]
    y = df['addicted']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    k_range = range(1, 21)
    error_rates = []

    for k in k_range:
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train_scaled, y_train)
        y_pred_k = knn.predict(X_test_scaled)
        error = 1 - accuracy_score(y_test, y_pred_k)
        error_rates.append(error)

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=list(k_range),
        y=error_rates,
        mode='lines+markers',
        name='Error Rate',
        marker=dict(size=8)
    ))
    fig.update_layout(
        title='KNN Elbow Method (Error Rate by k)',
        xaxis_title='Number of Neighbors (k)',
        yaxis_title='Error Rate',
        template='plotly_dark'
    )
    return fig

if __name__ == '__main__':
    app.run(debug=True)







