# Analyze historical Starbucks menus

#### Load Python tools and Jupyter config

In [1]:
import pandas as pd
import jupyter_black
import altair as alt
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import altair_stiles as altstiles
from statsmodels.nonparametric.smoothers_lowess import lowess

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [3]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Read

#### Just the main categories

In [4]:
main_categories = ["At Home Coffee", "Drinks", "Food"]

#### Historical archive of menus (from notebook 01) into a dataframe

In [5]:
df = pd.read_csv(
    "data/processed/starbucks_historical_menus.csv", parse_dates=["date_captured"]
).query(f"main_category.isin({main_categories})")

In [6]:
df.head()

Unnamed: 0,product_number,main_category,sub_category,category_detail,product_name,date_captured,year_captured
0,406,Drinks,Hot Coffees,Americanos,Caffè Americano,2019-10-01,2019
1,2122559,Drinks,Hot Coffees,Americanos,Starbucks® Blonde Caffè Americano,2019-10-01,2019
2,873068625,Drinks,Hot Coffees,Brewed Coffees,Blonde Roast,2019-10-01,2019
3,478,Drinks,Hot Coffees,Brewed Coffees,Caffè Misto,2019-10-01,2019
4,479,Drinks,Hot Coffees,Brewed Coffees,Featured Dark Roast,2019-10-01,2019


---

## Aggregate

#### Count products by date

In [7]:
products_count = (
    df.groupby(["date_captured", "main_category"])["product_number"]
    .count()
    .reset_index(name="products_count")
).sort_values("date_captured")

---

## Questions

### 1. How has the number of products changed over time? 

In [8]:
# Extract year and month for easier analysis
products_count["year"] = products_count["date_captured"].dt.year
products_count["month"] = products_count["date_captured"].dt.month

In [9]:
# Filter for October entries across all years
october_products = products_count[products_count["month"] == 10]

In [None]:
# Group by 'main_category' and 'year', and aggregate by mean or first, depending on your analysis need
october_summary = (
    october_products.groupby(["main_category", "year"])
    .agg({"products_count": "mean"})
    .reset_index()
)

In [11]:
# Pivot for easier year-on-year comparison
october_pivot = october_summary.pivot(
    index="main_category", columns="year", values="products_count"
)

In [12]:
# Calculate percentage change year-on-year or over specific periods as needed
october_pivot_change = (october_pivot.pct_change(axis="columns")).round(2)

In [13]:
october_pivot_change = october_pivot_change.reset_index()

In [14]:
october_change = october_pivot_change.drop(2019, axis=1)

In [15]:
# Step 1: Transform the DataFrame
october_long = october_pivot_change.melt(
    id_vars=["main_category"], var_name="Year", value_name="Change"
)

In [16]:
# Exclude 2019 data if it's included, or adjust for the analysis context
october_long = october_long[october_long["Year"] != "2019"]

#### Facetted chart showing year-over-year change

In [17]:
chart = (
    alt.Chart(october_long)
    .mark_bar()
    .encode(
        x=alt.X("Year:N", title=""),
        y=alt.Y(
            "Change:Q",
            axis=alt.Axis(format="%"),
            title="",
        ),
        color=alt.Color("main_category:N", legend=None),
        column=alt.Column("main_category:N", title=""),
    )
    .properties(width=200, height=200, title="Percentage change, year over year")
    .configure_facet(spacing=10)
    .configure_view(stroke=None)
)

chart

#### Conclusion: 

In [18]:
october_change

year,main_category,2020,2021,2022,2023
0,At Home Coffee,-0.27,-0.19,-0.17,-0.12
1,Drinks,-0.06,-0.08,-0.06,-0.09
2,Food,-0.26,-0.11,-0.22,-0.18


> Analyzing Starbucks' menu offerings from 2019 through 2023 reveals a consistent decline in product availability across major categories. Specifically, 'At Home Coffee' products experienced a significant reduction, with average year-on-year decreases of 26.5% from 2019 to 2020, followed by 19.4%, 17.2%, and 12.5% in the subsequent years. Similarly, the 'Drinks' and 'Food' categories also saw yearly declines, although at varying rates. These changes could reflect Starbucks' efforts to optimize their menu or respond to evolving market trends and consumer preferences.

In [40]:
alt.Chart(products_count).mark_area().encode(
    x=alt.X("date_captured:T", title="", axis=alt.Axis(tickCount=2, format="%Y")),
    y=alt.Y("products_count:Q", title="", axis=alt.Axis(tickCount=4)),
    color=alt.Color("main_category:N", title="", legend=None),
    column=alt.Column("main_category", title=""),
).properties(
    width=200,
    height=200,
    title="Starbucks product count over time, by main categories",
).configure_legend(
    orient="top",
)

---

### 2. How has the overall product count changed over time?

#### Group by date and count products

In [19]:
product_counts_over_time = (
    df.groupby("date_captured")["product_number"].nunique().reset_index(name="Count")
)

#### Smooth those counts using local regression

In [20]:
smoothed_values = lowess(
    product_counts_over_time["Count"],
    product_counts_over_time.index,
    # frac=0.1, # The defaul is 2/3
    return_sorted=False,
)

#### Add the smoothed values as a new column

In [21]:
product_counts_over_time["Smoothed count"] = smoothed_values.round()

#### Melt the dataframe for multi-series charting

In [22]:
product_counts_over_time_melted = pd.melt(
    product_counts_over_time,
    id_vars=["date_captured"],
    value_vars=["Count", "Smoothed count"],
    var_name="type",
    value_name="count",
)

#### Determine the years we have in the dataframe

In [23]:
years = product_counts_over_time["date_captured"].dt.year.unique()

#### Create an annotations for mid-December each year

In [24]:
annotations = pd.DataFrame(
    {
        "date_captured": [pd.Timestamp(year, 12, 15) for year in years],
        "count": [product_counts_over_time["Count"].max() + 20]
        * len(years),  # Position above the max count
        "text": ["Holiday spikes"] * len(years),
    }
)

#### Line chart: Smoothed with annotations

In [45]:
base_chart = (
    alt.Chart(product_counts_over_time_melted)
    .mark_line(interpolate="basis")
    .encode(
        x=alt.X(
            "date_captured:T",
            title="",
            axis=alt.Axis(tickCount=4, format="%b. %-d, %Y"),
        ),
        y=alt.Y("count:Q", title="", axis=alt.Axis(tickCount=4)),
        color=alt.Color(
            "type:N",
            title="",
            scale=alt.Scale(
                domain=["Count", "Smoothed count"], range=["#63948B", "lightgray"]
            ),
        ),
    )
    .properties(
        width=500,
        height=300,
        title="Starbucks menu over time: Food, drink and merchandise items",
    )
)

specific_annotations = annotations.iloc[[2]]
annotation_chart = (
    alt.Chart(specific_annotations)
    .mark_text(
        align="center",
        baseline="middle",
        dy=50,
        dx=0,
        color="#999",
    )
    .encode(x="date_captured:T", y=alt.Y("count:Q"), text="text:N")
)

final_chart = (base_chart + annotation_chart).configure_legend(
    orient="top", symbolType="stroke"
)
final_chart

---

In [26]:
chart = (
    alt.Chart(october_summary)
    .mark_bar()
    .encode(
        x=alt.X("year:N", axis=alt.Axis(title="Year")),
        y=alt.Y("products_count:Q", axis=alt.Axis(title="")),
        color=alt.Color("main_category:N", legend=None),
        column=alt.Column("main_category:N", header=alt.Header(title="")),
        tooltip=["main_category", "year", "products_count"],
    )
    .properties(width=200, height=200, title="Starbucks menu: Product count, by year")
    .configure_facet(spacing=10)
    .configure_view(stroke=None)
)

chart

In [27]:
chart = (
    alt.Chart(october_summary)
    .mark_bar()
    .encode(
        x=alt.X("year:N", axis=alt.Axis(title="")),
        y=alt.Y("products_count:Q", axis=alt.Axis(title="")),
        color=alt.Color("main_category:N", title=""),
        # column=alt.Column("year:N", header=alt.Header(title="")),
        tooltip=["main_category", "year", "products_count"],
    )
    .properties(width=250, height=300, title="Starbucks menu: Product count, by year")
    .configure_view(stroke=None)
    .configure_legend(orient="top")
)

chart

---

In [28]:
# Determine if specific categories are expanding or contracting more significantly than others.
# How does the seasonal variability affect product offerings in the "Drinks" category?

# Investigate the introduction of seasonal drinks (e.g., holiday specials) and their duration on the menu.
# Is there a significant difference in the rate of new product introductions before and after the Covid-19 pandemic began?

# Compare the frequency of new product additions or removals before and after early 2020.
# Which sub_category and category_detail have seen the most fluctuation in product counts?

# Identify which specific areas of the menu are most dynamic.
# Are there noticeable trends in the introduction or discontinuation of health-oriented products (e.g., plant-based, low-calorie) over time?

# Explore how Starbucks' menu adapts to evolving consumer health trends.
# How does the product count within the "Food" category correlate with major events or changes in the external environment, such as the Covid-19 pandemic?

# Specifically look for changes in food offerings that might align with changes in consumer behavior during the pandemic.
# What is the rate of turnover for products in the "Merchandise" category compared to consumable products?

# Assess whether merchandise items have longer or shorter life cycles on the menu compared to food and drinks.
# How have "At Home Coffee" offerings changed in response to increased home consumption trends, if at all?

# Given the rise in home brewing during the pandemic, evaluate how Starbucks has adjusted its product strategy in this category.
# Can we identify patterns of product introduction and discontinuation around specific times of the year?

# Look for evidence of Starbucks capitalizing on holidays, seasons, or other periodic events through their menu offerings.