In [0]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
from pyspark.sql import functions as F


# --- Load data from Gold layer ---
pdf = (spark.table("instacart.gold.customer_segments")
       .toPandas())


## Segment Distribution
This donut chart shows how customers are distributed across key segments, providing a quick overview of which customer types make up the largest share of the user base.

In [0]:

# Robust count by segment
seg_counts = (
    pdf.groupby("segment")
       .size()
       .reset_index(name="users")
)

# Your colors mapped to known segment labels (edit labels if yours differ)
COLOR_MAP = {
    "Weekly Shoppers":  "rgba(67,176,42,0.7)",  # green
    "Monthly Stockers": "rgba(255,145,0,0.7)",  # orange
    "Dormant Users":    "rgba(17, 22, 26, 0.7)" # dark
}

fig = px.pie(
    seg_counts,
    names="segment",
    values="users",
    color="segment",
    color_discrete_map=COLOR_MAP,
    hole=0.5,
    title="Customer Segment Distribution"
)

fig.update_traces(
    textposition="inside",
    texttemplate="%{label}<br>%{percent:.1%}",
    hovertemplate="%{label}: %{value} users<br>(%{percent:.1%})<extra></extra>"
)
fig.update_layout(
    template="plotly_white",
    paper_bgcolor="white",
    plot_bgcolor="white",
    font=dict(family="Inter, Segoe UI, Arial", size=13),
    title_x=0.5
)

displayHTML(fig.to_html(include_plotlyjs="cdn", full_html=False))


## Temporal Behavior Pattern
This grouped bar chart shows how different temporal groups behave across days of the week, highlighting when users are most active.

In [0]:
temp_dow = (
    pdf.groupby(["temporal_group", "dominant_dow"])
       .size().reset_index(name="users")
)

fig_temp = px.bar(
    temp_dow, x="dominant_dow", y="users", color="temporal_group",
    barmode="group",
    color_discrete_sequence=[
        "rgba(67,176,42,0.7)",  # translucent Instacart green
        "rgba(255,145,0,0.7)",  # translucent orange
        "rgba(17, 22, 26, 0.7)",
        "rgba(140, 204, 250, 0.7)"
    ],
    title="User Count by Temporal Group and Day of Week"
)
fig_temp.update_layout(
    xaxis_title="Day of Week (0=Sun ... 6=Sat)",
    yaxis_title="Number of Users",
    template="plotly_white",
    paper_bgcolor="white",
    plot_bgcolor="white"
)
displayHTML(fig_temp.to_html(include_plotlyjs="cdn", full_html=False,config={"responsive": True}))


## Order Frequency Distribution
The box plot compares how frequently users place orders across engagement groups, revealing which groups shop more consistently or show irregular patterns.

In [0]:
fig_box = px.box(
    pdf,
    x="engagement_group",
    y="mean_days_between_orders",
    color="engagement_group",
    color_discrete_sequence=["#3bad21","#ff9100"],
    title="Order Frequency Distribution by Engagement Group"
)
fig_box.update_layout(
    xaxis_title="Engagement Group",
    yaxis_title="Mean Days Between Orders",
    template="plotly_white",
    paper_bgcolor="white",
    plot_bgcolor="white"
)
displayHTML(fig_box.to_html(include_plotlyjs="cdn", full_html=False,config={"responsive": True}))


## Order Frequency & Recency by Engagement Group
The grouped bar chart summarizes each engagement groupâ€™s average order frequency and recency, highlighting how activity levels differ and which users may need re-engagement efforts.

In [0]:
gap_summary = (
    pdf.groupby("engagement_group")
       .agg({
           "mean_days_between_orders":"mean",
           "last_gap_days":"mean"
       })
       .reset_index()
       .rename(columns={
           "mean_days_between_orders":"Avg. Days Between Orders",
           "last_gap_days":"Avg. Last Gap (Days)"
       })
)

fig_gap = px.bar(
    gap_summary.melt(id_vars="engagement_group",
                     var_name="Metric", value_name="Days"),
    x="engagement_group", y="Days", color="Metric",
    barmode="group",
    color_discrete_sequence=[
        "rgba(67,176,42,0.7)",  # translucent Instacart green
        "rgba(255,145,0,0.7)"   # translucent orange
    ],
    title="Average Order Frequency & Recency by Engagement Group"
)
fig_gap.update_layout(
    yaxis_title="Days",
    template="plotly_white",
    paper_bgcolor="white", plot_bgcolor="white",
    legend_title_text=""
)
displayHTML(fig_gap.to_html(include_plotlyjs="cdn", full_html=True,config={"responsive": True}))
