# Hey Dude shoes inventory

#### Load Python tools and Jupyter config

In [1]:
import os
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
from tqdm.notebook import tqdm, trange

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

---

## Read data

#### Headers for requests

In [3]:
headers = {
    "authority": "api.fastsimon.com",
    "accept": "application/json, text/plain, */*",
    "accept-language": "en-US,en;q=0.9,es;q=0.8",
    "origin": "https://www.heydude.com",
    "referer": "https://www.heydude.com/",
    "sec-ch-ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"macOS"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "cross-site",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
}

#### Function to generate the request URL with filters

In [4]:
def generate_request_url(page_num, price_range="14-125", genders=None, colors=None):
    base_url = "https://api.fastsimon.com/categories_navigation?UUID=fd0fdf80-b16c-4f2d-9178-d223135b8c8a&src=JSSDK&store_id=621532&cdn_cache_key=1710873119&facets_required=1&category_id=269812039747&page_num={page_num}&with_product_attributes=false&products_per_page=48"
    narrow_filters = []
    if price_range:
        narrow_filters.append(f'["Price_from_to","{price_range}"]')
    if genders:
        for gender in genders:
            narrow_filters.append(f'["tag_filter_0.432","{gender}"]')
    if colors:
        for color in colors:
            narrow_filters.append(f'["tag_filter_0.982","color-{color}"]')
    narrow = f"&narrow=[{','.join(narrow_filters)}]"
    return base_url.format(page_num=page_num) + narrow

#### Define the filters

In [5]:
genders = ["mens", "womens", "gender-kids"]
colors = [
    "black",
    "blue",
    "brown",
    "green",
    "grey",
    "multi",
    "orange",
    "pink",
    "red",
    "white",
    "yellow",
    "purple",
]

#### Loop through filter categories, collecting shoes and their features into a list of dictionaries

In [6]:
extended_data = []

for color in colors:
    for gender in genders:
        request_url = generate_request_url(1, genders=[gender], colors=[color])
        response = requests.get(request_url, headers=headers)
        data = response.json()
        total_pages = data.get("total_p", 0)

        for page in range(1, total_pages + 1):
            request_url = generate_request_url(page, genders=[gender], colors=[color])
            response = requests.get(request_url, headers=headers)
            shoe_data = response.json().get("items", [])

            for shoe in shoe_data:
                shoe_name = shoe.get("l", "")
                sku = shoe.get("sku", "")
                price = shoe.get("p", "")

                for variant in shoe.get("vra", []):
                    variant_dict = {
                        "shoe_name": shoe_name,
                        "sku": sku,
                        "shoe_price": price,
                        "gender": gender,
                        "color": color,
                    }

                    for detail in variant[1]:
                        key = detail[0]
                        value = detail[1][0] if detail[1] else None
                        variant_dict[key] = value

                    extended_data.append(variant_dict)

#### Convert the list into a dataframe

In [7]:
df_extended = pd.DataFrame(extended_data).rename(columns={"Price": "shoe_price_unit"})

#### Clean up

In [8]:
df_extended.columns = df_extended.columns.str.lower()
df_extended["shoe_name"] = df_extended["shoe_name"].str.split(" - ", expand=True)[0]
df_extended["shoe_main_name"] = df_extended["shoe_name"].str.split(" ", expand=True)[0]
df_extended["shoe_price_unit"] = df_extended["shoe_price_unit"].str.split(
    ":", expand=True
)[0]

In [9]:
df_extended["gender"] = df_extended["gender"].str.replace("gender-", "").str.title()
df_extended["color"] = df_extended["color"].str.title()

In [10]:
df = df_extended[
    [
        "sku",
        "barcode",
        "shoe_main_name",
        "shoe_name",
        "shoe_price",
        "shoe_price_unit",
        "gender",
        "color",
        "size",
        "weight",
        "sellable",
        "imgs",
    ]
].copy()

---

## Analyze

#### How many unique products do they sell? 

In [11]:
len(df["sku"].unique())

589

#### Aggregate data to get inventory count by gender and color

In [12]:
inventory_distribution = (
    df.groupby(["gender", "color"]).size().reset_index(name="inventory_count")
)

#### Total inventory by gender

In [13]:
inventory_by_gender = (
    inventory_distribution.groupby("gender")["inventory_count"].sum().reset_index()
)
inventory_by_gender

Unnamed: 0,gender,inventory_count
0,Kids,1335
1,Mens,2257
2,Womens,1702


#### Most and least stocked colors overall

In [14]:
most_stocked_colors = (
    inventory_distribution.groupby("color")["inventory_count"]
    .sum()
    .reset_index()
    .sort_values(by="inventory_count", ascending=False)
)
most_stocked_colors.head()

Unnamed: 0,color,inventory_count
0,Black,1212
10,White,753
4,Grey,690
2,Brown,671
1,Blue,571


In [15]:
least_stocked_colors = most_stocked_colors.tail()
least_stocked_colors

Unnamed: 0,color,inventory_count
7,Pink,266
9,Red,104
11,Yellow,56
8,Purple,40
6,Orange,39


#### Analyze color distribution within each gender

In [16]:
color_distribution_by_gender = (
    inventory_distribution.pivot(
        index="color", columns="gender", values="inventory_count"
    )
    .fillna(0)
    .reset_index()
)

In [17]:
color_distribution_by_gender

gender,color,Kids,Mens,Womens
0,Black,318.0,520.0,374.0
1,Blue,123.0,302.0,146.0
2,Brown,123.0,269.0,279.0
3,Green,93.0,228.0,40.0
4,Grey,189.0,270.0,231.0
5,Multi,156.0,206.0,169.0
6,Orange,0.0,31.0,8.0
7,Pink,90.0,32.0,144.0
8,Purple,15.0,0.0,25.0
9,Red,33.0,71.0,0.0


In [18]:
color_distribution_by_gender_long = color_distribution_by_gender.melt(
    id_vars="color", var_name="gender", value_name="percentage"
)

---

## Charts

#### Create the horizontal stacked bar chart

In [19]:
chart = (
    alt.Chart(
        color_distribution_by_gender_long.query('gender!="gender-kids"'),
        padding={"left": 10},
    )
    .mark_bar()
    .encode(
        x=alt.X(
            "percentage:Q", stack="normalize", title="", axis=alt.Axis(tickCount=5)
        ),
        y=alt.Y("color:N", title="").sort("-x"),
        color=alt.Color("gender:N", title=""),
        order=alt.Order("gender", sort="ascending"),
        tooltip=[
            alt.Tooltip("color:N", title=""),
            alt.Tooltip("gender:N", title=""),
            alt.Tooltip("percentage:Q", title=""),
        ],
    )
    .properties(
        title="Color distribution by gender category",
        width=600,
        height=300,
    )
).configure_legend(orient="top")

chart.display()

---

## Shoe images

#### Create a directory

In [20]:
directory = "shoe_images"
if not os.path.exists(directory):
    os.makedirs(directory)

#### Filter the DataFrame to ensure unique image URLs

In [21]:
df["imgs"] = df["imgs"].str.split("?", expand=True)[0]

In [22]:
unique_imgs = df[["sku", "imgs"]].drop_duplicates(subset="imgs")

#### Iterate through the unique image URLs to download them

In [23]:
# for _, row in tqdm(unique_imgs.iterrows()):
#     img_url = row["imgs"]
#     sku = row["sku"]
#     file_path = os.path.join(directory, f"{sku}.jpg")

#     try:
#         response = requests.get(img_url)
#         if response.status_code == 200:
#             with open(file_path, "wb") as file:
#                 file.write(response.content)
#         else:
#             print(f"Failed to download {img_url}")
#     except Exception as e:
#         print(f"An error occurred while downloading {img_url}: {e}")

---

## Exports

#### JSON

In [24]:
df.to_json(
    f"data/processed/hey_dudes_inventory.json",
    indent=4,
    orient="records",
)

#### CSV

In [25]:
df.to_csv(f"data/processed/hey_dudes_inventory.csv", index=False)