In [1]:

# ============================================================
# Walmart Mini Dashboard
# TOP: Store-wise charts (dropdown)
# BOTTOM: All-store charts (overall)
# Run: python walmart_mini_dashboard.py
# Open: http://127.0.0.1:8050
# ============================================================

import pandas as pd
import numpy as np
from threading import Timer
import webbrowser

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor

import plotly.graph_objects as go
import plotly.express as px

from dash import Dash, dcc, html, Input, Output


# -----------------------------
# CONFIG
# -----------------------------
HOST = "127.0.0.1"
PORT = 8050


# ------------------------------------------------------------
# 1) Load + Feature Engineering
# ------------------------------------------------------------
df = pd.read_csv("Walmart.csv")

df["Date"] = pd.to_datetime(df["Date"], dayfirst=True, errors="coerce")
df = df.dropna(subset=["Date", "Weekly_Sales"]).copy()
df = df.sort_values(["Date", "Store"]).reset_index(drop=True)

# time features
df["Year"]  = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["Week"]  = df["Date"].dt.isocalendar().week.astype(int)

feature_cols = [
    "Store", "Holiday_Flag", "Temperature",
    "Fuel_Price", "CPI", "Unemployment",
    "Year", "Month", "Week"
]

X = df[feature_cols].copy()
y = df["Weekly_Sales"].copy()


# ------------------------------------------------------------
# 2) Train/Test Split (Time-based)
# ------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, shuffle=False
)
test_idx = X_test.index


# ------------------------------------------------------------
# 3) Train Model
# ------------------------------------------------------------
model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    objective="reg:squarederror",
    tree_method="hist"
)
model.fit(X_train, y_train)

pred = model.predict(X_test)

# global metrics (test)
rmse_global = mean_squared_error(y_test, pred) ** 0.5
mae_global  = mean_absolute_error(y_test, pred)


# ------------------------------------------------------------
# 4) Prediction Records (Aligned)
# ------------------------------------------------------------
res = df.loc[test_idx, ["Date", "Store", "Holiday_Flag", "Weekly_Sales"]].copy()
res["Predicted"] = pred
res["Residual"]  = res["Weekly_Sales"] - res["Predicted"]
res["Abs_Error"] = res["Residual"].abs()
res = res.sort_values(["Store", "Date"]).reset_index(drop=True)

# Optional: save for reporting
res.to_csv("prediction_records.csv", index=False)

stores = sorted(res["Store"].unique())


# ------------------------------------------------------------
# 5) ALL-STORES (Overview graphs data)
# ------------------------------------------------------------
overall = res.groupby("Date", as_index=False).agg(
    Actual=("Weekly_Sales", "sum"),
    Pred=("Predicted", "sum"),
    Holiday=("Holiday_Flag", "max")
)
overall["Residual"] = overall["Actual"] - overall["Pred"]

# Store performance metrics (RMSE per store)
store_perf = []
for s in stores:
    sd = res[res["Store"] == s]
    rmse_s = (mean_squared_error(sd["Weekly_Sales"], sd["Predicted"]) ** 0.5) if len(sd) else 0
    mae_s  = mean_absolute_error(sd["Weekly_Sales"], sd["Predicted"]) if len(sd) else 0
    store_perf.append([s, rmse_s, mae_s, len(sd)])

store_perf_df = pd.DataFrame(store_perf, columns=["Store", "RMSE", "MAE", "Rows"])


# ------------------------------------------------------------
# 6) Figures (ALL STORES - static)
# ------------------------------------------------------------
def fig_allstores_overall_line():
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=overall["Date"], y=overall["Actual"],
        mode="lines", name="Actual (Total)", line=dict(width=3)
    ))
    fig.add_trace(go.Scatter(
        x=overall["Date"], y=overall["Pred"],
        mode="lines", name="Predicted (Total)",
        line=dict(width=3, dash="dash")
    ))

    hol = overall[overall["Holiday"] == 1]
    fig.add_trace(go.Scatter(
        x=hol["Date"], y=hol["Actual"],
        mode="markers", name="Holiday",
        marker=dict(color="red", size=7)
    ))

    fig.update_layout(
        template="plotly_white",
        hovermode="x unified",
        title=f"ALL STORES: Total Sales Actual vs Predicted | Global RMSE={rmse_global:,.0f}, MAE={mae_global:,.0f}",
        xaxis_title="Date",
        yaxis_title="Total Weekly Sales"
    )
    return fig


def fig_allstores_rmse_bar():
    dfp = store_perf_df.sort_values("RMSE", ascending=False)
    fig = px.bar(
        dfp, x="Store", y="RMSE",
        title="ALL STORES: RMSE by Store (Higher = Worse)",
        template="plotly_white"
    )
    fig.update_layout(xaxis_title="Store", yaxis_title="RMSE")
    return fig


# ------------------------------------------------------------
# 7) Figures (STORE-WISE - dynamic)
# ------------------------------------------------------------
def store_kpis(store_id: int):
    sd = res[res["Store"] == store_id]
    if len(sd) == 0:
        return 0, 0, 0, 0, 0

    rmse_s = (mean_squared_error(sd["Weekly_Sales"], sd["Predicted"]) ** 0.5)
    mae_s  = mean_absolute_error(sd["Weekly_Sales"], sd["Predicted"])
    avg_a  = sd["Weekly_Sales"].mean()
    avg_p  = sd["Predicted"].mean()
    n      = len(sd)
    return rmse_s, mae_s, avg_a, avg_p, n


def fig_store_line(store_id: int):
    sd = res[res["Store"] == store_id].sort_values("Date")

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=sd["Date"], y=sd["Weekly_Sales"],
        mode="lines", name="Actual", line=dict(width=3)
    ))
    fig.add_trace(go.Scatter(
        x=sd["Date"], y=sd["Predicted"],
        mode="lines", name="Predicted",
        line=dict(width=3, dash="dash")
    ))

    hol = sd[sd["Holiday_Flag"] == 1]
    fig.add_trace(go.Scatter(
        x=hol["Date"], y=hol["Weekly_Sales"],
        mode="markers", name="Holiday",
        marker=dict(color="red", size=7)
    ))

    fig.update_layout(
        template="plotly_white",
        hovermode="x unified",
        title=f"STORE {store_id}: Actual vs Predicted (Test)",
        xaxis_title="Date",
        yaxis_title="Weekly Sales"
    )
    return fig


def fig_store_residual(store_id: int):
    sd = res[res["Store"] == store_id].sort_values("Date")

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=sd["Date"], y=sd["Residual"],
        mode="lines", name="Residual",
        line=dict(color="purple", width=2)
    ))
    fig.add_hline(y=0, line_width=1, line_color="black")

    fig.update_layout(
        template="plotly_white",
        hovermode="x unified",
        title=f"STORE {store_id}: Residuals Over Time",
        xaxis_title="Date",
        yaxis_title="Actual - Predicted"
    )
    return fig


def fig_store_hist(store_id: int):
    sd = res[res["Store"] == store_id]
    fig = px.histogram(
        sd, x="Residual", nbins=35,
        template="plotly_white",
        title=f"STORE {store_id}: Residual Distribution"
    )
    fig.update_layout(xaxis_title="Residual", yaxis_title="Count")
    return fig


def fig_store_scatter(store_id: int):
    sd = res[res["Store"] == store_id]

    fig = px.scatter(
        sd, x="Weekly_Sales", y="Predicted",
        color="Holiday_Flag",
        template="plotly_white",
        title=f"STORE {store_id}: Actual vs Predicted (Scatter)",
        labels={"Weekly_Sales": "Actual", "Predicted": "Predicted"}
    )

    minv = float(min(sd["Weekly_Sales"].min(), sd["Predicted"].min()))
    maxv = float(max(sd["Weekly_Sales"].max(), sd["Predicted"].max()))
    fig.add_trace(go.Scatter(
        x=[minv, maxv], y=[minv, maxv],
        mode="lines", name="Ideal (y=x)",
        line=dict(color="red", dash="dash")
    ))
    return fig


def fig_store_holiday_error(store_id: int):
    sd = res[res["Store"] == store_id].copy()
    sd["Abs_Error"] = sd["Residual"].abs()

    fig = px.box(
        sd, x="Holiday_Flag", y="Abs_Error",
        template="plotly_white",
        title=f"STORE {store_id}: Abs Error (Holiday vs Non-Holiday)",
        labels={"Holiday_Flag": "Holiday (0=No, 1=Yes)", "Abs_Error": "Absolute Error"}
    )
    return fig


def kpi_cards(rmse_s, mae_s, avg_a, avg_p, n):
    box_style = {
        "padding": "10px",
        "border": "1px solid #ddd",
        "borderRadius": "8px",
        "width": "20%",
        "textAlign": "center"
    }
    return [
        html.Div([html.H4("Rows"), html.H3(f"{n}")], style=box_style),
        html.Div([html.H4("Store RMSE"), html.H3(f"{rmse_s:,.0f}")], style=box_style),
        html.Div([html.H4("Store MAE"), html.H3(f"{mae_s:,.0f}")], style=box_style),
        html.Div([html.H4("Avg Actual"), html.H3(f"{avg_a:,.0f}")], style=box_style),
        html.Div([html.H4("Avg Pred"), html.H3(f"{avg_p:,.0f}")], style=box_style),
    ]


# ------------------------------------------------------------
# 8) DASH APP Layout
# TOP: Store-wise
# BOTTOM: All-stores
# ------------------------------------------------------------
app = Dash(__name__)
app.title = "Walmart Mini Dashboard"

app.layout = html.Div([

    html.H2("Walmart Sales Forecasting â€” Mini Dashboard"),

    # ------------------ TOP: Store wise ------------------
    html.H3("ðŸ”¹ Store-wise Dashboard (Dropdown Controlled)"),

    html.Div([
        html.Label("Select Store"),
        dcc.Dropdown(
            id="store_dd",
            options=[{"label": f"Store {s}", "value": s} for s in stores],
            value=stores[0],
            clearable=False,
            style={"width": "250px"}
        ),
    ], style={"marginBottom": "10px"}),

    html.Div(id="kpi_row", style={"display": "flex", "gap": "10px", "marginBottom": "10px"}),

    dcc.Graph(id="g_store_line"),
    html.Div([
        html.Div([dcc.Graph(id="g_store_residual")], style={"width": "50%"}),
        html.Div([dcc.Graph(id="g_store_hist")], style={"width": "50%"}),
    ], style={"display": "flex", "gap": "10px"}),

    html.Div([
        html.Div([dcc.Graph(id="g_store_scatter")], style={"width": "50%"}),
        html.Div([dcc.Graph(id="g_store_holiday_err")], style={"width": "50%"}),
    ], style={"display": "flex", "gap": "10px"}),

    html.Hr(),

    # ------------------ BOTTOM: All stores ------------------
    html.H3("ðŸ”» All-Stores Overview (At Bottom)"),
    dcc.Graph(figure=fig_allstores_overall_line()),
    dcc.Graph(figure=fig_allstores_rmse_bar()),

], style={"maxWidth": "1200px", "margin": "0 auto", "fontFamily": "Arial"})


# ------------------------------------------------------------
# 9) Callback: Updates ONLY Store-wise charts
# ------------------------------------------------------------
@app.callback(
    Output("kpi_row", "children"),
    Output("g_store_line", "figure"),
    Output("g_store_residual", "figure"),
    Output("g_store_hist", "figure"),
    Output("g_store_scatter", "figure"),
    Output("g_store_holiday_err", "figure"),
    Input("store_dd", "value")
)
def update_store_section(store_id):
    store_id = int(store_id)

    rmse_s, mae_s, avg_a, avg_p, n = store_kpis(store_id)

    return (
        kpi_cards(rmse_s, mae_s, avg_a, avg_p, n),
        fig_store_line(store_id),
        fig_store_residual(store_id),
        fig_store_hist(store_id),
        fig_store_scatter(store_id),
        fig_store_holiday_error(store_id)
    )


# ------------------------------------------------------------
# 10) RUN on 127.0.0.1:8050
# ------------------------------------------------------------
def open_browser():
    webbrowser.open_new(f"http://{HOST}:{PORT}")

if __name__ == "__main__":
    Timer(1, open_browser).start()
    app.run(host=HOST, port=PORT, debug=True)


OSError: Address 'http://127.0.0.1:8050' already in use.
    Try passing a different port to run.