In [None]:
import matplotlib.pyplot as plt
from chart_generator import chart_top_products, chart_confidence_hist
from analysis_utils import load_results_from_folder, load_catalog_items
cooler_results_folder = "../data/results_json/cooler_results"
plt.style.use("ggplot")


In [None]:
df_results = load_results_from_folder(cooler_results_folder)
df_results.head()

In [None]:
df_catalog = load_catalog_items()
df_catalog.head()

In [None]:
df_catalog.columns

In [None]:

df_results["product_uuid"] = df_results["product_uuid"].astype(str)

df_catalog["uuid"] = df_catalog["uuid"].astype(str)


df_joined = df_results.merge(
    df_catalog,
    left_on="product_uuid",
    right_on="uuid",
    how="left",
    suffixes=("_det", "_cat")
)

df_joined.head()


In [None]:
df_joined[df_joined['product_uuid'].notna()].head()


In [None]:
df_joined[df_joined['name'].notna()].head()


In [None]:
df_joined["uuid"].isna().value_counts()


In [None]:
df_joined.info()
df_joined.head()

In [None]:
df_products = df_joined[df_joined["product_uuid"].notna()].copy()


In [None]:
df_products.shape
df_products.head()

In [None]:
df_products["name"].value_counts().head(20)


In [None]:
df_products["brand"].value_counts().head(10)


In [None]:
df_products["product_uuid"].value_counts().head()


In [None]:
df_products["score"].describe()


In [None]:
df_products.sort_values("score", ascending=False).head(10)


In [None]:
df_products.sort_values("score").head(10)


In [None]:
df_products["score"].hist(bins=20)


In [None]:
df_products.groupby("name")["area"].mean().sort_values(ascending=False).head()


In [None]:
df_products[["name", "area"]].sort_values("area", ascending=False).head()


In [None]:
df_products.groupby("image_url").size().sort_values(ascending=False).head()


In [None]:
df_missing_catalog = df_joined[df_joined["product_uuid"].notna() & df_joined["name"].isna()]
df_missing_catalog


In [None]:
df_missing_catalog = df_joined[df_joined["product_uuid"].notna() & df_joined["name"].isna()]
df_missing_catalog

In [None]:
df_joined["category_label"].value_counts()


In [None]:
top_products = df_products["name"].value_counts().head(10)

plt.figure(figsize=(10, 8))
plt.pie(top_products, labels=top_products.index, autopct="%1.1f%%")
plt.title("Top 10 Most Detected Products")
plt.show()


In [None]:
brand_counts = df_products["brand"].value_counts().head(10)

plt.figure(figsize=(10, 8))
plt.pie(brand_counts, labels=brand_counts.index, autopct="%1.1f%%")
plt.title("Top 10 Brands by Detection Count")
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
top_products.plot(kind="bar")
plt.xlabel("Product")
plt.ylabel("Detection Count")
plt.title("Top 10 Most Detected Products")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


In [None]:
lowest_conf = df_products.sort_values("score").head(10)

plt.figure(figsize=(12, 6))
plt.bar(lowest_conf["name"], lowest_conf["score"])
plt.xlabel("Product")
plt.ylabel("Confidence Score")
plt.title("Lowest Confidence Detections")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(10, 5))
plt.hist(df_products["score"].dropna(), bins=20)
plt.xlabel("Confidence Score")
plt.ylabel("Frequency")
plt.title("Distribution of Detection Confidence Scores")
plt.show()


In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(df_products["area"], df_products["score"], alpha=0.5)
plt.xlabel("Bounding Box Area")
plt.ylabel("Confidence Score")
plt.title("Confidence vs. Object Area")
plt.show()


In [None]:
detections_per_image = df_products.groupby("image_url").size()

plt.figure(figsize=(12, 6))
detections_per_image.plot(kind="bar")
plt.title("Detections Per Image")
plt.ylabel("Count")
plt.xticks([])
plt.show()


In [None]:
df_joined.to_csv("outputs/df_joined.csv", index=False)
df_products.to_csv("outputs/df_products_only.csv", index=False)


In [None]:
plt.savefig("outputs/charts/top_products.png", dpi=300, bbox_inches="tight")


In [None]:
chart_top_products(df_products)
chart_confidence_hist(df_products)

In [None]:
df_missing = df_joined[
    df_joined["product_uuid"].notna() &
    df_joined["name"].isna()
]

df_missing


