In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# ================================
# 1) Load Final Dataset
# ================================
df = pd.read_excel("../data/final/QVI_data.xlsx")

# Ensure date column is datetime
df["DATE"] = pd.to_datetime(df["DATE"], errors="coerce")

# Create YEARMONTH for monthly trends
df["YEARMONTH"] = df["DATE"].dt.to_period("M").dt.to_timestamp()

# Create customer segment
df["SEGMENT"] = df["LIFESTAGE"] + " - " + df["PREMIUM_CUSTOMER"]

In [5]:
# ================================
# 2) Donut Chart: Customer Segmentation by Total Sales
# ================================
segment_sales = df.groupby("SEGMENT")["TOT_SALES"].sum().sort_values(ascending=False)

# Keep top 8 segments and group the rest into Other
top_n = 8
top_segments = segment_sales.head(top_n)
other_sum = segment_sales.iloc[top_n:].sum()
if other_sum > 0:
    top_segments = pd.concat([top_segments, pd.Series({"Other": other_sum})])

fig1, ax1 = plt.subplots()
wedges, texts = ax1.pie(top_segments.values, labels=top_segments.index, startangle=90)

# Donut hole
centre_circle = plt.Circle((0, 0), 0.60, fc="white")
fig1.gca().add_artist(centre_circle)

ax1.axis("equal")
ax1.set_title("Customer Segmentation by Sales (Donut Chart)")
fig1.tight_layout()
fig1.savefig("customer_segmentation_donut.png", dpi=150)
plt.close(fig1)


In [6]:
# ================================
# 3) Monthly Sales Trend (All Stores)
# ================================
monthly_sales = df.groupby("YEARMONTH")["TOT_SALES"].sum().reset_index()

fig2, ax2 = plt.subplots()
ax2.plot(monthly_sales["YEARMONTH"], monthly_sales["TOT_SALES"], marker="o")
ax2.set_xlabel("Month")
ax2.set_ylabel("Total Sales")
ax2.set_title("Monthly Sales Trend (All Stores)")
plt.xticks(rotation=45)
fig2.tight_layout()
fig2.savefig("monthly_sales_trend.png", dpi=150)
plt.close(fig2)


In [7]:
# ================================
# 4) Trial vs Control Store Comparison
# ================================
# Choose trial store = highest total sales
store_totals = df.groupby("STORE_NBR")["TOT_SALES"].sum().sort_values(ascending=False)
trial_store = store_totals.index[0]

monthly_store = (
    df.groupby(["STORE_NBR", "YEARMONTH"])["TOT_SALES"]
    .sum()
    .reset_index()
)

trial_series = (
    monthly_store[monthly_store["STORE_NBR"] == trial_store]
    .set_index("YEARMONTH")["TOT_SALES"]
    .sort_index()
)

# Find control store with highest correlation in monthly sales
correlations = {}
for store in monthly_store["STORE_NBR"].unique():
    if store == trial_store:
        continue

    s_series = (
        monthly_store[monthly_store["STORE_NBR"] == store]
        .set_index("YEARMONTH")["TOT_SALES"]
        .sort_index()
    )

    aligned = pd.concat([trial_series, s_series], axis=1, join="inner")
    if len(aligned) < 3:
        continue

    correlations[store] = aligned.corr().iloc[0, 1]

control_store = max(correlations, key=correlations.get)


In [8]:
# Prepare data for visualization
subset = monthly_store[monthly_store["STORE_NBR"].isin([trial_store, control_store])]
pivot = subset.pivot(index="YEARMONTH", columns="STORE_NBR", values="TOT_SALES").sort_index()

fig3, ax3 = plt.subplots()
for col in pivot.columns:
    ax3.plot(pivot.index, pivot[col], marker="o", label=f"Store {col}")

ax3.set_title(f"Trial vs Control Store Comparison\nTrial={trial_store}, Control={control_store}")
ax3.set_xlabel("Month")
ax3.set_ylabel("Sales")
ax3.legend()
plt.xticks(rotation=45)
fig3.tight_layout()
fig3.savefig("trial_vs_control_sales.png", dpi=150)
plt.close(fig3)

print("All charts generated successfully!")

All charts generated successfully!
