In [350]:
import glob
import io
import logging
import random
import os
import time
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import requests
import seaborn
from bs4 import BeautifulSoup
from IPython.display import HTML, Image, display
from PIL import Image
from plotly.io import to_image
from plotly.offline import plot
from plotly.subplots import make_subplots
from tenacity import retry, stop_after_attempt, wait_fixed, wait_random
from tqdm import tqdm


logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

tqdm.pandas()

In [351]:
config = {
    "data_folder": "../data/products/",
    "output_folder": "../output/",
}

In [352]:
def get_product_names():
    """Get the product names from the folders in the data folder."""
    folders = glob.glob(config["data_folder"] + "*/")
    product_names = [
        (folder, " ".join(folder.rstrip("/").split("_")[1:])) for folder in folders
    ]
    return product_names

In [353]:
products = get_product_names()
print(products)

[('../data/products/03_bead_bracelets_and_necklaces/', 'bead bracelets and necklaces'), ('../data/products/08_chinese_mid_autumn_gift_sets/', 'chinese mid autumn gift sets'), ('../data/products/12_chinese_pottery/', 'chinese pottery'), ('../data/products/07_chinese_incense/', 'chinese incense'), ('../data/products/13_chinese_magnets/', 'chinese magnets'), ('../data/products/04_paper_lanterns/', 'paper lanterns'), ('../data/products/06_chinese_bamboo_art/', 'chinese bamboo art'), ('../data/products/11_chinese_washi_tape/', 'chinese washi tape'), ('../data/products/10_chinese_art_stickers/', 'chinese art stickers'), ('../data/products/05_brushes_and_calligraphy_tools/', 'brushes and calligraphy tools'), ('../data/products/01_calligraphy_prints/', 'calligraphy prints'), ('../data/products/09_chinese_bookmarks/', 'chinese bookmarks'), ('../data/products/02_name_seals/', 'name seals')]


In [354]:
chinese_translations = {
    "bead bracelets and necklaces": "珠子手链和项链",
    "chinese mid autumn gift sets": "中秋节礼品套装",
    "chinese pottery": "中国陶器",
    "chinese incense": "中国香",
    "chinese magnets": "中国冰箱贴",
    "paper lanterns": "纸灯笼",
    "chinese bamboo art": "中国竹艺",
    "chinese washi tape": "中国和纸胶带",
    "chinese art stickers": "中国艺术贴纸",
    "brushes and calligraphy tools": "笔和书法工具",
    "calligraphy prints": "书法印刷品",
    "chinese bookmarks": "中国书签",
    "name seals": "印章",
}

In [355]:
def get_search_term_from_file_name(file_name: str) -> str:
    """Get the search term from the file name."""
    return file_name.replace("_product_detail.csv", "").replace("_", " ")


def get_product_data(data_folder: str, product_name: str) -> pd.DataFrame:
    """Get the data for a product, loaded into a Polars DataFrame."""
    files = glob.glob(data_folder + "/*.csv")
    dataframes = []
    for file in files:
        df = pd.read_csv(file)
        # Add the search term and product name to the dataframe
        df["search_term"] = get_search_term_from_file_name(os.path.basename(file))
        df["product_name"] = product_name
        df["Tags"] = df["Tags"].apply(lambda x: x.split(","))
        df['Price("$")'] = df['Price("$")'].astype(str)
        df["price"] = df['Price("$")'].str.replace(",", "").astype(float)
        df["proceeds"] = df["price"] * df["Total Sales"]
        df["has_sales"] = df["Total Sales"] > 0
        df["product_name_chinese_name"] = (
            df["product_name"]
            + " ("
            + df["product_name"].map(chinese_translations)
            + ")"
        )
        dataframes.append(df)

    # Concatenate the dataframes
    joined_df = pd.concat(dataframes)
    # Drop duplicates by URL
    joined_df = joined_df.drop_duplicates(subset=["Product URL"])
    return joined_df


def get_all_product_data(products: List[str]) -> pd.DataFrame:
    """Get all the product data for a list of products."""
    dataframes = []
    for product in products:
        dataframes.append(get_product_data(product[0], product[1]))
    return pd.concat(dataframes)

In [356]:
all_product_data = get_all_product_data(products)

In [357]:
# Remove the chinese magnets that are not actually magnets
all_product_data = all_product_data[
    ~(
        (all_product_data["product_name"] == "chinese magnets")
        & ~all_product_data["Product URL"].str.contains("magnet")
    )
]

# Remove washi tape that is not actually tape
all_product_data = all_product_data[
    ~(
        (all_product_data["product_name"] == "chinese washi tape")
        & ~all_product_data["Product URL"].str.contains("tape")
    )
]

In [358]:
all_product_data

Unnamed: 0,Title,Category,"Price(""$"")",7-day sales,Total Sales,Total Reviews,7-day Reviews,Total Favorites,7-day Favorites,Tags,...,Raving,Store Name,Product URL,Image URL,search_term,product_name,price,proceeds,has_sales,product_name_chinese_name
0,"Jade Plate, Type A Genuine Jade, Customizable Jadeite, Loose Gemstones, Fine Jadeite, Wholesale Gemstones, Curved Jade, Gifts for Friends",Craft Supplies & Tools,176.12,0,0,0,0,0,0,"[Jade, Handmade, Traditional, Loose Stone, Loose Gemstone, Jade Bead, Jade Loose Beads, Jewelry Supplies, Custom Jewelry, Jade Square, Green Bead, Gift of Her, Gifts for Girlfriend, Gifts for Mom, Gifts for Sister, Gifts for Wife, Anniversary Gifts, Birthday Gifts, Christmas Gifts, Gifts for Friends, Gifts for Her, Personalized Gifts, Gifts]",...,False,GranskyJewellery,https://www.etsy.com/listing/1632492895/jade-plate-type-a-genuine-jade,Upgrade Pro to Unlock,traditional stone bead jewelry,bead bracelets and necklaces,176.12,0.00,False,bead bracelets and necklaces (珠子手链和项链)
1,"Green/Pink Bangle set, 22k Gold Plated, White CZ Bangles Set, Kadas, Mix n Match bangles",Jewelry > Bracelets > Bangles,59.0,0,7,0,0,16,0,"[Bangles Set, Indian Bangles, Traditional Bangles, Wedding Bangles, Gold Plated Bangles, Kadas, Gold Bangles, Bracelet Bangles, Indian Jewelry, Black Stone Bangles, Brides Bangles, Black Bead Bangles, Gifts for Girlfriend, Gifts for Mom, Gifts for Sister, Gifts for Wife, Anniversary Gifts, Birthday Gifts, Christmas Gifts, Gifts for Her, Housewarming Gifts, Personalized Gifts, Gifts]",...,False,NemaliJewelry,https://www.etsy.com/listing/762738828/greenpink-bangle-set-22k-gold-plated,Upgrade Pro to Unlock,traditional stone bead jewelry,bead bracelets and necklaces,59.00,413.00,True,bead bracelets and necklaces (珠子手链和项链)
2,Catholic Rosary Beads. Semi Precious Turquoise Jasper Stone Beads. Traditional Rosary. 5 Decade Catholic Rosary. Catholic Gift.,Home & Living > Spirituality & Religion > Prayer Beads & Charms > Rosaries,59.99,0,0,0,0,44,0,"[Miraculous Mary, Womans Rosary, Catholic Gifts, Catholic Jewelry, Catholic Prayer Bead, Catholic, Jasper Stone Beads, Rosary Catholic, Catholic Gift, 5 Decade Rosary, Traditional Rosary, Jasper Beads, 5 Decade, Gifts]",...,False,RosariesByHeidi,https://www.etsy.com/listing/587645665/catholic-rosary-beads-semi-precious,Upgrade Pro to Unlock,traditional stone bead jewelry,bead bracelets and necklaces,59.99,0.00,False,bead bracelets and necklaces (珠子手链和项链)
3,Seed Bead Stitching - Creative Variations On Traditional Techniques By Beth Stone Paperback Beaded Jewelry Pattern Book 2007,Craft Supplies & Tools,8.75,0,5,0,0,4,0,"[Beading Book, Beaded Necklace, Making Jewelry, Beaded Jewelry, Beaded Bracelet, Beaded Earrings, Daisy Chain, Brick Stitch, Peyote Stitch, Beth Stone, Russian Stitches, Seed Bead Stitching, Spiral Rope Stitch]",...,False,NeedANeedle,https://www.etsy.com/listing/1024845656/seed-bead-stitching-creative-variations,Upgrade Pro to Unlock,traditional stone bead jewelry,bead bracelets and necklaces,8.75,43.75,True,bead bracelets and necklaces (珠子手链和项链)
4,"Jade Plate, Type A Genuine Jade, Customizable Jadeite, Loose Gemstones, Fine Jadeite, Wholesale Gemstones, Curved Jade, Father's Day Gifts",Jewelry > Necklaces > Charm Necklaces,251.88,0,0,0,0,3,0,"[Jade, Handmade, Traditional, Gemstones, Loose Stone, Loose Gemstone, Jade Bead, Jewelry Supplies, Custom Jewelry, Jade Square, Green Bead, Jade Loose Beads, Gift of Her, Gifts for Boyfriend, Gifts for Dad, Gifts for Girlfriend, Gifts for Husband, Gifts for Mom, Gifts for Sister, Gifts for Wife, Anniversary Gifts, Birthday Gifts, Christmas Gifts, Gifts for Friends, Gifts for Her, Gifts for Him, Housewarming Gifts, Personalized Gifts, Gifts]",...,False,GranskyJewellery,https://www.etsy.com/listing/1475770717/jade-plate-type-a-genuine-jade,Upgrade Pro to Unlock,traditional stone bead jewelry,bead bracelets and necklaces,251.88,0.00,False,bead bracelets and necklaces (珠子手链和项链)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,Vintage Boxed Pair of Chinese Stone Chop Seals Hand Carved Chinese Painting Oriental Calligraphy Oriental Home Decor,Home & Living > Home Decor > Ornaments & Accents > Ornaments,63.27,0,3,0,0,12,0,[False],...,False,VintageVarietyStudio,https://www.etsy.com/listing/1292904295/vintage-boxed-pair-of-chinese-stone-chop,Upgrade Pro to Unlock,chinese calligraphy seals,name seals,63.27,189.81,True,name seals (印章)
94,Craft Chinese Chop Cinnabar Stamp Ink Pad,Craft Supplies & Tools,9.0,0,0,0,0,0,0,"[Craft Supplies & Tools, Stamps & Seals, Stamps, Red ink, Chinese art, Chinese Calligraphy, Cinnabar Red Ink, Chinese Stamp chop, World of Bacara, gift idea, birthday present, christmas gift, new years present, home decor, Bamboo Design, blue white porcelain]",...,False,WorldofBacara,https://www.etsy.com/listing/1533034599/craft-chinese-chop-cinnabar-stamp-ink,Upgrade Pro to Unlock,chinese calligraphy seals,name seals,9.00,0.00,False,name seals (印章)
43,Personalized Korean Name Stamp Dojang Customize Korean Stamps Handmade Personalized Korean Dojang Chinese Stone Name Stamp Seal,Craft Supplies & Tools,45.99,0,16,0,0,65,0,"[Square, Korean Name Stamp, Dojang, Korean Stamps, Korean Dojang, Name Stamp, Personalized Gifts]",...,False,SemliCalligraphy,https://www.etsy.com/listing/703128950/personalized-korean-name-stamp-dojang,Upgrade Pro to Unlock,chinese name stamps,name seals,45.99,735.84,True,name seals (印章)
215,Customized Chinese name stone seal (Oval)/ Chinese seal/ Chinese name stamp/ japanese name stamp/ mandarin name/ soap stone/ personalised,Craft Supplies & Tools,53.21,0,0,4,0,94,0,"[Stone Seal, Seal, Stamps, Chinese, Oriental, Customize, Personalise, Name, Asian, Traditional, Chinese Name Stamp, Personlize, Personalized Gifts]",...,False,Baisimu,https://www.etsy.com/listing/603844244/customized-chinese-name-stone-seal-oval,Upgrade Pro to Unlock,chinese name stamps,name seals,53.21,0.00,False,name seals (印章)


In [305]:
# Output the data to a CSV file
all_product_data.to_csv(config["output_folder"] + "all_product_data.csv", index=False)

In [306]:
def to_html(fig, file_name: str):
    """Save a Plotly figure to an HTML file."""
    plot(fig, filename=file_name)


def format_col_for_title(col: str) -> str:
    """Format a column name for a title."""
    return " ".join(col.split("_")).title()


def plot_violin_plotly(data, x, y, remove_outliers=False):
    if remove_outliers:
        # Calculate the 1st and 3rd quartiles
        q1 = data[x].quantile(0.25)
        q3 = data[x].quantile(0.75)

        # Calculate the interquartile range (IQR)
        iqr = q3 - q1

        # Define the lower and upper bounds for outliers
        lower_bound = q1 - 1.5 * iqr
        upper_bound = q3 + 1.5 * iqr

        # Filter the data to remove outliers
        data = data[(data[x] >= lower_bound) & (data[x] <= upper_bound)]

    fig = go.Figure(
        data=go.Violin(
            x=data[x],
            y=data[y],
            orientation="h",
            box_visible=True,
            meanline_visible=True,
            points=None,
            spanmode="hard",
        )
    )

    x_label = format_col_for_title(x)
    y_label = format_col_for_title(y)

    fig.update_layout(
        title=f"Violin Plot of {x_label} by {y_label}",
        xaxis_title=x_label,
        yaxis_title=y_label,
        template="simple_white",
    )

    return fig


def plot_bar_chart_plotly(
    data, x, y, sorted=True, title=None, x_label=None, y_label=None
):
    if sorted:
        data = data.sort_values(by=y, ascending=False)
    fig = go.Figure(
        go.Bar(
            x=data[x],
            y=data[y],
            name=format_col_for_title(y),
            text=data[y],
            textposition="auto",
        )
    )
    fig.update_layout(
        title=f"Bar Chart of {format_col_for_title(y)} by {format_col_for_title(x)}",
        xaxis_title=format_col_for_title(x) if x_label is None else x_label,
        yaxis_title=format_col_for_title(y) if y_label is None else y_label,
        template="simple_white",
    )
    if title:
        fig.update_layout(title=title)

    fig.update_traces(texttemplate="%{text:.2f}")
    return fig


def plot_bar_chart_plotly_with_dropdown(
    data_list, x, y, labels, sorted=True, title=None, x_label=None, y_label=None
):
    traces = []
    for data, label in zip(data_list, labels):
        if sorted:
            data = data.sort_values(by=y, ascending=False)
        trace = go.Bar(
            x=data[x],
            y=data[y],
            name=label,
            text=data[y],
            textposition="auto",
            visible=True if label == labels[0] else False,
        )
        traces.append(trace)

    fig = go.Figure(data=traces)

    # Add the dropdown menu
    fig.update_layout(
        updatemenus=[
            dict(
                buttons=[
                    dict(
                        args=[{"visible": [label == selected for label in labels]}],
                        label=selected,
                        method="update",
                    )
                    for selected in labels
                ],
                direction="down",
                pad={"r": 2, "t": 12},
                showactive=True,
                x=0,
                xanchor="left",
                y=1.3,
                yanchor="top",
            )
        ]
    )

    fig.update_layout(
        title=(
            f"Bar Chart of {format_col_for_title(y)} by {format_col_for_title(x)}"
            if title is None
            else title
        ),
        xaxis_title=format_col_for_title(x) if x_label is None else x_label,
        yaxis_title=format_col_for_title(y) if y_label is None else y_label,
        template="simple_white",
    )

    fig.update_traces(texttemplate="%{text:.2f}")

    return fig

In [307]:
# Median price by product
fig = plot_bar_chart_plotly(
    all_product_data.groupby("product_name_chinese_name")
    .agg({"price": "median"})
    .reset_index(),
    x="product_name_chinese_name",
    y="price",
    title="Median Price by Product （产品价格中位数）",
    x_label="Product Name",
    y_label="Median Price ($)",
)

fig.show()

In [308]:
data_list = [
    all_product_data.groupby("product_name_chinese_name")["Total Sales"]
    .median()
    .reset_index(),
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["Total Sales"]
    .median()
    .reset_index(),
]

# Median total sales by product
fig = plot_bar_chart_plotly_with_dropdown(
    data_list,
    labels=["All Products", "Products with Sales"],
    x="product_name_chinese_name",
    y="Total Sales",
    title="Median Total Sales by Product（产品平均销售量）",
    x_label="Product Name",
    y_label="Median Total Sales",
)

fig.show()

In [309]:
data_list = [
    all_product_data.groupby("product_name_chinese_name")["Total Sales"]
    .sum()
    .reset_index()
]

fig = plot_bar_chart_plotly(
    data_list[0],
    x="product_name_chinese_name",
    y="Total Sales",
    title="Total Sales by Product（产品总销售量）",
    x_label="Product Name",
    y_label="Total Sales",
)

fig.show()

In [310]:
data_list = [
    all_product_data.groupby("product_name_chinese_name")["proceeds"]
    .sum()
    .reset_index()
]

fig = plot_bar_chart_plotly(
    data_list[0],
    x="product_name_chinese_name",
    y="proceeds",
    title="Total Revenue by Product（产品总销售额）",
    x_label="Product Name",
    y_label="Total Revenue",
)

fig.show()

In [311]:
data_list = [
    all_product_data.groupby("product_name_chinese_name")["proceeds"]
    .median()
    .reset_index(),
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["proceeds"]
    .median()
    .reset_index(),
]

labels = ["All Products", "Products with Sales"]

fig = plot_bar_chart_plotly_with_dropdown(
    data_list=data_list,
    x="product_name_chinese_name",
    y="proceeds",
    labels=labels,
    title="Median Revenue by Product（产品销售额中位数）",
    x_label="Product Name",
    y_label="Median Revenue ($)",
)

fig.show()
# to_html(fig, config["output_folder"] + "median_proceeds_by_product.html")

In [312]:
def plot_scatter_plotly_with_dropdown(
    data, x, y, color, color_labels, title=None, x_label=None, y_label=None
):
    traces = []
    for label in color_labels:
        filtered_data = data[data[color] == label]
        trace = go.Scatter(
            x=filtered_data[x],
            y=filtered_data[y],
            mode="markers",
            name=label,
            visible=True if label == color_labels[0] else False,
        )
        traces.append(trace)

    fig = go.Figure(data=traces)

    # Add the dropdown menu
    fig.update_layout(
        updatemenus=[
            dict(
                buttons=[
                    dict(
                        args=[
                            {"visible": [label == selected for label in color_labels]}
                        ],
                        label=selected,
                        method="update",
                    )
                    for selected in color_labels
                ],
                direction="down",
                pad={"r": 10, "t": 10},
                showactive=True,
                x=0.1,
                xanchor="left",
                y=1.1,
                yanchor="top",
            )
        ]
    )

    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        template="simple_white",
    )

    return fig

In [313]:
color_labels = all_product_data["product_name_chinese_name"].unique()

fig = plot_scatter_plotly_with_dropdown(
    data=all_product_data,
    x="price",
    y="Total Sales",
    color="product_name_chinese_name",
    color_labels=color_labels,
    title="Price vs Total Sales by Product Category",
    x_label="Price ($)",
    y_label="Total Sales",
)

fig.show()

In [316]:
data_list = [
    all_product_data.groupby("product_name_chinese_name")["Product URL"]
    .nunique()
    .reset_index(),
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["Product URL"]
    .nunique()
    .reset_index(),
]


fig1 = plot_bar_chart_plotly_with_dropdown(
    data_list,
    labels=["All Products", "Products with Sales"],
    x="product_name_chinese_name",
    y="Product URL",
    title="Number of Unique Products （产品数目）",
    x_label="Product Name",
    y_label="Number of Unique Products",
)


# Plot Percentage of products with sales per product
fig2 = plot_bar_chart_plotly(
    all_product_data.groupby("product_name_chinese_name")["has_sales"]
    .mean()
    .reset_index(),
    x="product_name_chinese_name",
    y="has_sales",
    title="Percentage of Products with Sales （有销售量的产品的百分比）",
    x_label="Product Name",
    y_label="Percentage of Products with Sales (%)",
)

fig1.show()
fig2.show()

In [317]:
data_list = [
    all_product_data.groupby("product_name_chinese_name")["Store Name"]
    .nunique()
    .reset_index(),
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["Store Name"]
    .nunique()
    .reset_index(),
]


# Number of unique stores by product
fig1 = plot_bar_chart_plotly_with_dropdown(
    data_list,
    labels=["All Products", "Products with Sales"],
    x="product_name_chinese_name",
    y="Store Name",
    title="Number of Unique Stores by Product （商店的数量）",
    x_label="Product Name",
    y_label="Number of Unique Stores",
)

# Percentage of stores with sales by product
# This should basically be the division of the two previous plots
stores_with_sales = (
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["Store Name"]
    .nunique()
    .reset_index()
)
stores = (
    all_product_data.groupby("product_name_chinese_name")["Store Name"]
    .nunique()
    .reset_index()
)
stores_with_sales = stores_with_sales.merge(stores, on="product_name_chinese_name")
stores_with_sales["percentage"] = (
    stores_with_sales["Store Name_x"] / stores_with_sales["Store Name_y"]
)

# Plot the percentage of stores with sales by product
fig2 = plot_bar_chart_plotly(
    stores_with_sales,
    x="product_name_chinese_name",
    y="percentage",
    title="Percentage of Stores with Sales by Product (有销售量的商店的百分比)",
    x_label="Product Name",
    y_label="Percentage of Stores with Sales (%)",
)

fig1.show()
fig2.show()

In [318]:
def plot_heatmap_plotly(
    data,
    x,
    y,
    z,
    title=None,
    x_label=None,
    y_label=None,
    log_scale=False,
    colorbar_title=None,
):
    if log_scale:
        data[z] = np.log(data[z] + 1)
    fig = go.Figure(
        data=go.Heatmap(
            x=data[x],
            y=data[y],
            z=data[z],
            colorscale="blues_r",
            hoverongaps=False,
            colorbar_title=colorbar_title,
        )
    )

    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        template="plotly_dark",
        width=1000,
        height=1000,
    )

    return fig

In [319]:
product_store_sales = (
    all_product_data[all_product_data["has_sales"] == True]
    .groupby(["product_name_chinese_name", "Store Name"])["Total Sales"]
    .sum()
    .reset_index()
)

# Calculate the total sales for each product
product_total_revenue = (
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["Total Sales"]
    .sum()
)

# Calculate the percentage of total sales for each product in each store
product_store_sales["Percentage of Total Sales"] = product_store_sales.apply(
    lambda row: row["Total Sales"]
    / product_total_revenue[row["product_name_chinese_name"]],
    axis=1,
)

# Create the heatmap
fig = plot_heatmap_plotly(
    data=product_store_sales,
    x="product_name_chinese_name",
    y="Store Name",
    z="Percentage of Total Sales",
    title="Percentage of Total Sales by Product and Store (With Sales)",
    x_label="Product Name",
    y_label="Store Name",
    colorbar_title="%",
)

fig.show()

In [320]:
product_store_sales = (
    all_product_data[all_product_data["has_sales"] == True]
    .groupby(["product_name_chinese_name", "Store Name"])["proceeds"]
    .sum()
    .reset_index()
)

# Calculate the total proceeds for each product
product_total_revenue = (
    all_product_data[all_product_data["has_sales"] == True]
    .groupby("product_name_chinese_name")["proceeds"]
    .sum()
)

# Calculate the percentage of total proceeds for each product in each store
product_store_sales["Percentage of Total Revenue"] = product_store_sales.apply(
    lambda row: row["proceeds"]
    / product_total_revenue[row["product_name_chinese_name"]],
    axis=1,
)

# Create the heatmap
fig = plot_heatmap_plotly(
    data=product_store_sales,
    x="product_name_chinese_name",
    y="Store Name",
    z="Percentage of Total Revenue",
    title="Percentage of Total Revenue by Product and Store (With Sales)",
    x_label="Product Name",
    y_label="Store Name",
    colorbar_title="%",
)

fig.show()

In [291]:
# Get top 10 chinese magnet products by sales
urls = (
    all_product_data[all_product_data["product_name"] == "chinese magnets"]
    .sort_values(by="Total Sales", ascending=False)
    .head(10)["Product URL"]
    .to_list()
)

urls

['https://www.etsy.com/listing/995735919/jackie-chan-adventures-12-magnetic',
 'https://www.etsy.com/listing/773097825/rat-pin-or-magnet-rat-magnet-rat-art',
 'https://www.etsy.com/listing/928983426/cute-cat-feng-shui-magnetic-bookmark',
 'https://www.etsy.com/listing/1255275264/japanese-fan-magnets-cherry-blossoms',
 'https://www.etsy.com/listing/942924669/cute-cat-feng-shui-magnetic-bookmark',
 'https://www.etsy.com/listing/173899723/fridge-serpent-refrigerator-magnet',
 'https://www.etsy.com/listing/712364491/chinese-pattern-fridge-magnets-planner',
 'https://www.etsy.com/listing/859161210/yin-yang-pin-gift-packaged-magnetic-back',
 'https://www.etsy.com/listing/1027716447/lot-of-3-vintage-90s-magnetic-travel',
 'https://www.etsy.com/listing/897498128/magnets-japanese-style-yuzen-chiyogami']

In [344]:
@retry(stop=stop_after_attempt(5), wait=wait_fixed(1) + wait_random(0, 3))
def fetch_image(url):
    # logging.info(f"Attempting to fetch image from: {url}")
    try:
        response = requests.get(
            url,
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
            },
            timeout=15,
        )
        if response.status_code == 200:
            # logging.info(f"Successfully fetched image from: {url}")
            return response
        else:
            # logging.error(
            #    f"Failed to fetch image from: {url}. Status code: {response.status_code}"
            # )
            raise Exception("Failed to fetch image")
    except Exception as e:
        # logging.error(f"Error fetching image from: {url}. Error: {str(e)}")
        raise e


@retry(
    stop=stop_after_attempt(5),
    wait=wait_fixed(1) + wait_random(0, 3),
    retry_error_callback=lambda _: "",
)
def fetch_image_url(url):
    try:
        # logging.info(f"Attempting to fetch image URL from: {url}")
        response = requests.get(
            url,
            timeout=15,
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
            },
        )
        soup = BeautifulSoup(response.text, "html.parser")
        img_tag = soup.find("img")
        if img_tag and img_tag.get("src"):
            # logging.info(f"Successfully fetched image URL from: {url}")
            return img_tag
        else:
            raise Exception("No image found")
    except Exception as e:
        # logging.error(f"Error fetching image for URL: {url}. Error: {str(e)}")
        html += f"<tr><td>Error fetching image: {str(e)}</td></tr>"

In [293]:
products = all_product_data.product_name_chinese_name.unique().tolist()
top_products = {}
for product in products:
    top_products[product] = (
        all_product_data[all_product_data["product_name_chinese_name"] == product]
        .sort_values(by="Total Sales", ascending=False)
        .head(10)["Product URL"]
        .to_list()
    )

In [296]:
pd.DataFrame(top_products)

Unnamed: 0,bead bracelets and necklaces (珠子手链和项链),chinese mid autumn gift sets (中秋节礼品套装),chinese pottery (中国陶器),chinese incense (中国香),chinese magnets (中国冰箱贴),paper lanterns (纸灯笼),chinese bamboo art (中国竹艺),chinese washi tape (中国和纸胶带),chinese art stickers (中国艺术贴纸),brushes and calligraphy tools (笔和书法工具),calligraphy prints (书法印刷品),chinese bookmarks (中国书签),name seals (印章)
0,https://www.etsy.com/listing/1303517549/natural-jade-bracelet-hetian-jade-beaded,https://www.etsy.com/listing/1277366663/traditional-asian-chinese-mooncakes,https://www.etsy.com/listing/1159288023/free-shipping-6-oval-moss-green-glazed,https://www.etsy.com/listing/786123821/vintage-style-copper-incense-burner,https://www.etsy.com/listing/995735919/jackie-chan-adventures-12-magnetic,https://www.etsy.com/listing/742923687/paper-lantern-round-chinese-paper,https://www.etsy.com/listing/712035016/watercolor-bamboo-instant-download,https://www.etsy.com/listing/1167525912/top-seller-from-the-library-of-book,https://www.etsy.com/listing/966657972/asian-food-animal-sticker-pack-of-5,https://www.etsy.com/listing/1150867936/traditional-ink-brush-set-procreatesumi,https://www.etsy.com/listing/613075037/bathroom-decor-wash-your-worries-away,https://www.etsy.com/listing/121885795/bookmarks-book-marks-chinese-oriental,https://www.etsy.com/listing/838077651/square-japanese-hanko-chop-japanese-name
1,https://www.etsy.com/listing/1270040450/green-jade-earrings-dangle-earrings,https://www.etsy.com/listing/1269843970/16-pieces-fresh-made-mooncake-gift-sets,https://www.etsy.com/listing/736780709/12mm-longevity-beads-12mm-porcelain,https://www.etsy.com/listing/784003647/chinese-metal-incense-burner-censer,https://www.etsy.com/listing/773097825/rat-pin-or-magnet-rat-magnet-rat-art,https://www.etsy.com/listing/1191818041/paper-lanterns-wedding-lanterns-chinese,https://www.etsy.com/listing/670739281/panda-pile-buttons-by-dress-it-up-jesse,https://www.etsy.com/listing/687412636/warning-stickers-mk-ii,https://www.etsy.com/listing/717003158/6-sheets-ink-painting-stickers-the-24,https://www.etsy.com/listing/831448594/large-calligraphy-brush-penchinese,https://www.etsy.com/listing/578630606/any-quote-personalised-chinese,https://www.etsy.com/listing/1214165938/tian-guan-ci-fu-ebony-bookmark-tianguan,https://www.etsy.com/listing/498471491/engraved-dragon-chinese-chop-stone-seal
2,https://www.etsy.com/listing/114605721/natural-white-jade-smooth-round-beads,https://www.etsy.com/listing/1244281681/pink-embroidered-floral-lightweight,https://www.etsy.com/listing/168153677/rabbit-bunny-rabbit-porcelain-rabbit,https://www.etsy.com/listing/1165574622/pure-copper-mini-censer,https://www.etsy.com/listing/928983426/cute-cat-feng-shui-magnetic-bookmark,https://www.etsy.com/listing/1031757709/minimalist-origami-lampshade-white-paper,https://www.etsy.com/listing/1045365395/wood-bookmark-with-chinese-painting-art,https://www.etsy.com/listing/1413455244/custom-name-necklace-18k-gold-plated,https://www.etsy.com/listing/1096533961/lazy-bear-matte-sticker-sheet-cute,https://www.etsy.com/listing/794081778/painting-writing-brush-watercolor,https://www.etsy.com/listing/200155276/chinese-art-vintage-nature-landscape,https://www.etsy.com/listing/89479610/sale-owl-bookmark-antiqued-bronze,https://www.etsy.com/listing/735559121/custom-name-seal-stone-seal-carving
3,https://www.etsy.com/listing/803945124/jade-feng-shui-bracelet,https://www.etsy.com/listing/1298226061/moon-festival-lantern-original-enamel,https://www.etsy.com/listing/1497308671/porcelain-dragon-tea-cup-115ml-green,https://www.etsy.com/listing/895146735/round-incense-plate-with-seperate-holder,https://www.etsy.com/listing/1255275264/japanese-fan-magnets-cherry-blossoms,https://www.etsy.com/listing/1058609686/eyelet-paper-lantern-round-chinese-paper,https://www.etsy.com/listing/1314628888/wall-shelf-living-room-cabinet-bedroom,https://www.etsy.com/listing/1167562350/personalized-cutting-board-wedding-gift,https://www.etsy.com/listing/1011193252/stamp-queen-sticker-desi-stamp-sticker,https://www.etsy.com/listing/235003549/calligraphy-painting-tools-starter-sumi,https://www.etsy.com/listing/1126738487/enso-circle-wall-art-japanese,https://www.etsy.com/listing/1025879661/the-bride-test-bookmarks-the-kiss,https://www.etsy.com/listing/1275293621/customized-square-sandalwood-stamp
4,https://www.etsy.com/listing/1214233664/authentic-green-jade-beads-bracelet,https://www.etsy.com/listing/1540882222/gift-set-6pcs-savory-salted-egg-yolk,https://www.etsy.com/listing/948976648/hand-painted-ceramic-coffee-mug-creative,https://www.etsy.com/listing/838556888/censer-incense-burner-copper-incense,https://www.etsy.com/listing/942924669/cute-cat-feng-shui-magnetic-bookmark,https://www.etsy.com/listing/1055915794/strawberry-paper-lantern-chinese-paper,https://www.etsy.com/listing/1302591642/mahogany-carving-mini-furniture-model,https://www.etsy.com/listing/1295695410/custom-pet-portraits-using-pet-photo,https://www.etsy.com/listing/1138284039/holographic-gomi-soda-glossy-vinyl,https://www.etsy.com/listing/1000108455/claborate-painting-brush-watercolor,https://www.etsy.com/listing/1166588216/love-mandala-hebrew-calligraphy-art-fine,https://www.etsy.com/listing/583093453/chinese-box-scrap-ephemera-kit-2,https://www.etsy.com/listing/829885468/custom-chinese-name-chop-personal-asian
5,https://www.etsy.com/listing/1202442975/guanyin-bodhisattva-carved-green-jade,https://www.etsy.com/listing/911353476/10pcs-pixiu-grade-a-jade-natual-burma,https://www.etsy.com/listing/1053467836/watercolor-pink-hydrangea-chinoiserie,https://www.etsy.com/listing/914381225/chinese-antique-pure-copper-small,https://www.etsy.com/listing/173899723/fridge-serpent-refrigerator-magnet,https://www.etsy.com/listing/1386032695/dragon-paper-lantern-crafts-chinese-new,https://www.etsy.com/listing/1335811495/tea-tray-tea-set-tray-new-high-grade,https://www.etsy.com/listing/1144768652/personalised-gaming-snacks-jar-sticker,https://www.etsy.com/listing/1462986664/45-colorful-ink-painting-stickers,https://www.etsy.com/listing/528343871/rare-calligraphy-tools-porcelain-ink-pot,https://www.etsy.com/listing/687551302/blank-chinese-or-japanese-scroll-for,https://www.etsy.com/listing/944830498/black-lantern-metal-bookmark-deep-black,https://www.etsy.com/listing/728273678/custom-name-sealseal-carvingchinese-seal
6,https://www.etsy.com/listing/1133477746/green-jade-14k-gold-plated-earrings-tiny,https://www.etsy.com/listing/1351168084/chinese-new-year-scavenger-hunt-for-kids,https://www.etsy.com/listing/1194003477/loose-leaf-tea-set-tea-gift-set,https://www.etsy.com/listing/1115379703/chinese-antique-boutique-collection-of,https://www.etsy.com/listing/712364491/chinese-pattern-fridge-magnets-planner,https://www.etsy.com/listing/564245404/vintage-paper-lantern-washi-tape-paper,https://www.etsy.com/listing/1018043219/ba-ba-chinese-dad-bamboo-coffee-mug,https://www.etsy.com/listing/726364447/customized-dog-socks-put-your-cute-dog,https://www.etsy.com/listing/62456401/tall-bamboo-wall-decal-vinyl-wall,https://www.etsy.com/listing/643275849/chinese-brushes-set-chinese-calligraphy,https://www.etsy.com/listing/613261572/taekwondo-principles-gold-foil-print,https://www.etsy.com/listing/1463242050/chinese-style-calligraphy-bookmarks-buy,https://www.etsy.com/listing/710054594/chinese-name-seal-chinese-custom-seal
7,https://www.etsy.com/listing/747799956/1-pc-silk-cord-knot-necklace-25-inches,https://www.etsy.com/listing/894218438/yingmart-set-5-traditional-japanese,https://www.etsy.com/listing/1136726592/blue-porcelain-art-ring,https://www.etsy.com/listing/1333718112/traditional-chinese-incense-270pcs,https://www.etsy.com/listing/859161210/yin-yang-pin-gift-packaged-magnetic-back,https://www.etsy.com/listing/1302413414/sets-of-brightly-coloured-christmas,https://www.etsy.com/listing/1315888861/bogu-rack-hanging-wall-solid-wood,https://www.etsy.com/listing/1201385914/personalized-song-arcylic-plaque-mothers,https://www.etsy.com/listing/735317524/40-ink-painting-stickers-chinese,https://www.etsy.com/listing/1121862412/chinese-calligraphy-brush-japanese,https://www.etsy.com/listing/701140431/custom-japanese-calligraphy-scroll,https://www.etsy.com/listing/1145715788/winter-water-lily-metal-bookmark,https://www.etsy.com/listing/738772594/custom-japanese-name-seal-stamphanko
8,https://www.etsy.com/listing/1145393776/real-jade-flower-rose-necklace-light,https://www.etsy.com/listing/1408356134/rainbow-plush-cute-bunny-ears-backpack,https://www.etsy.com/listing/1436331148/chinese-traditional-retro-style-tea,https://www.etsy.com/listing/1282254321/tibet-ancient-bronze-cicada-incense,https://www.etsy.com/listing/1027716447/lot-of-3-vintage-90s-magnetic-travel,https://www.etsy.com/listing/1048917042/halloween-paper-lantern-halloween-party,https://www.etsy.com/listing/568817815/antique-chinese-bamboo-yarn-holder-skein,https://www.etsy.com/listing/1297674073/personalised-santa-sack-christmas-sack,https://www.etsy.com/listing/1205355980/suatelier-china-themed-stickers,https://www.etsy.com/listing/960301313/chinese-hsk-1-character-book-writing,https://www.etsy.com/listing/829383002/japanese-print-koi-carp-sumi-e-fish,https://www.etsy.com/listing/1389490042/handmade-self-embroidered-beginner,https://www.etsy.com/listing/748288805/custom-name-sealseal-carvingchinese-seal
9,https://www.etsy.com/listing/1315511846/chinese-jade-crystal-bracelet-round,https://www.etsy.com/listing/522536243/chinese-stone-seal-set-the-moon-story,https://www.etsy.com/listing/1296640961/ramen-noodle-bowl-with-chopsticks,https://www.etsy.com/listing/1064821971/creativity-retro-chinese-hand-holding,https://www.etsy.com/listing/897498128/magnets-japanese-style-yuzen-chiyogami,https://www.etsy.com/listing/1142882158/chinese-new-year-embosser-cookie-cutters,https://www.etsy.com/listing/1238666007/chinese-antique-collection-handcrafted,https://www.etsy.com/listing/1480621971/custom-compact-mirror-bridesmaid,https://www.etsy.com/listing/1138263923/cloud-bear-matte-sticker-sheet-cute,https://www.etsy.com/listing/996549950/chinese-calligraphywatercolorkanjisumi,https://www.etsy.com/listing/1048560009/chinese-art-print-asian-woman-sipping,https://www.etsy.com/listing/1256747427/butterfly-exquisite-metal-bookmark-for,https://www.etsy.com/listing/618959279/red-ink-paste-chinese-ink-set-chinese


In [299]:
def fetch_and_display_images(product_dict, batch_size=10):
    html = "<table>"
    for product_name, url_list in product_dict.items():
        html += f"<tr><th colspan='10'>{product_name}</th></tr><tr>"
        count = 0
        for url in url_list:
            try:
                img_tag = fetch_image_url(url)
                if img_tag and img_tag.get("src"):
                    image_response = fetch_image(img_tag["src"])
                    html += f"<td><img src='{image_response.url}' style='width:150px;'></td>"
                else:
                    html += "<td>No image found</td>"
                count += 1
                if count % batch_size == 0:
                    html += "</tr><tr>"
            except Exception as e:
                html += f"<td>Error fetching image: {str(e)}</td>"
        html += "</tr>"
        html += (
            "<tr><td colspan='10'>&nbsp;</td></tr>"  # Add an empty row for separation
        )
    html += "</table>"
    display(HTML(html))

In [300]:
fetch_and_display_images(top_products)

2024-04-14 05:12:56,751 - INFO - Attempting to fetch image URL from: https://www.etsy.com/listing/1303517549/natural-jade-bracelet-hetian-jade-beaded
2024-04-14 05:12:57,568 - INFO - Successfully fetched image URL from: https://www.etsy.com/listing/1303517549/natural-jade-bracelet-hetian-jade-beaded
2024-04-14 05:12:57,571 - INFO - Attempting to fetch image from: https://i.etsystatic.com/9406609/c/2048/1627/0/8/il/670261/3763267256/il_340x270.3763267256_5wzw.jpg
2024-04-14 05:12:57,812 - INFO - Successfully fetched image from: https://i.etsystatic.com/9406609/c/2048/1627/0/8/il/670261/3763267256/il_340x270.3763267256_5wzw.jpg
2024-04-14 05:12:57,812 - INFO - Attempting to fetch image URL from: https://www.etsy.com/listing/1270040450/green-jade-earrings-dangle-earrings
2024-04-14 05:12:57,865 - ERROR - Error fetching image for URL: https://www.etsy.com/listing/1270040450/green-jade-earrings-dangle-earrings. Error: No image found
2024-04-14 05:13:00,579 - INFO - Attempting to fetch image

bead bracelets and necklaces (珠子手链和项链),bead bracelets and necklaces (珠子手链和项链).1,bead bracelets and necklaces (珠子手链和项链).2,bead bracelets and necklaces (珠子手链和项链).3,bead bracelets and necklaces (珠子手链和项链).4,bead bracelets and necklaces (珠子手链和项链).5,bead bracelets and necklaces (珠子手链和项链).6,bead bracelets and necklaces (珠子手链和项链).7,bead bracelets and necklaces (珠子手链和项链).8,bead bracelets and necklaces (珠子手链和项链).9
,,,,,,,,,
,,,,,,,,,
,,,,,,,,,
chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装),chinese mid autumn gift sets (中秋节礼品套装)
,,,,,Error fetching image: RetryError[],,,,
,,,,,,,,,
chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器),chinese pottery (中国陶器)
,,,,,,,,,
,,,,,,,,,
,,,,,,,,,


In [360]:
def _lambda_fetch_image_url(x):
    try:
        url = fetch_image_url(x)["src"]
    except Exception as e:
        url = None
    return url


all_product_data["Image URL"] = all_product_data["Product URL"].progress_apply(
    _lambda_fetch_image_url
)

 58%|█████▊    | 2081/3578 [1:50:42<55:02,  2.21s/it]  