In [13]:
import pandas as pd
import numpy as np

In [14]:
df = pd.read_csv("/Users/shivanishrivastava/Desktop/Complete_the_Look/metadata.csv")
df.head()

Unnamed: 0,filename,masterCategory,garment,url
0,short_sleeve_top_0.jpg,top,short sleeve top,http://assets.myntassets.com/v1/images/style/p...
1,short_sleeve_top_1.jpg,top,short sleeve top,http://assets.myntassets.com/v1/images/style/p...
2,short_sleeve_top_2.jpg,top,short sleeve top,http://assets.myntassets.com/v1/images/style/p...
3,short_sleeve_top_3.jpg,top,short sleeve top,http://assets.myntassets.com/v1/images/style/p...
4,short_sleeve_top_4.jpg,top,short sleeve top,http://assets.myntassets.com/v1/images/style/p...


In [26]:
df['garment'].value_counts(173)

garment
short sleeve top    1.0
Name: proportion, dtype: float64

In [15]:
import os
import time
import base64
import traceback
import pandas as pd
import random
from google import genai
from google.genai import types

# ────────────────────────────────────────────────────────────────────────────────
# 1. Configs
# ────────────────────────────────────────────────────────────────────────────────
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("⚠️ GEMINI_API_KEY missing. Set it via Kaggle Secrets or os.environ.")

client = genai.Client(api_key=GEMINI_API_KEY)
MODEL_NAME = "gemini-2.5-flash-preview-04-17"
SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, fabric cues, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
- Do not mention background, location, or photographer.
""".strip()

# ────────────────────────────────────────────────────────────────────────────────
# 2. Helper Functions
# ────────────────────────────────────────────────────────────────────────────────
def load_image_as_data_uri(path: str) -> str:
    with open(path, "rb") as f:
        raw = f.read()
    b64 = base64.b64encode(raw).decode("utf-8")
    return f"data:image/jpeg;base64,{b64}"

def generate_apparel_description(data_uri: str, max_retries: int = 5) -> str | None:
    user_prompt = (
        f"Here is an image: {data_uri}\n\n"
        "Please describe only the garment. If a person is wearing it, do NOT describe their face or pose—"
        "only note gender (e.g. 'woman wearing…') and focus on color, style, fit, etc."
    )

    for attempt in range(1, max_retries + 1):
        try:
            system_content = types.Content(
                role="model",
                parts=[types.Part.from_text(text=SYSTEM_PROMPT)]
            )
            user_content = types.Content(
                role="user",
                parts=[types.Part.from_text(text=user_prompt)]
            )

            config = types.GenerateContentConfig(response_mime_type="text/plain")
            stream = client.models.generate_content_stream(
                model=MODEL_NAME,
                contents=[system_content, user_content],
                config=config,
            )

            full = "".join(chunk.text for chunk in stream)
            return full.strip()

        except Exception as e:
            err = str(e)
            print(f"[ERROR] Attempt {attempt}/{max_retries} → {err}")
            traceback.print_exc()
            if "ratelimit" in err.lower() or "quota" in err.lower():
                wait = min(60, 2 ** attempt)
                print(f"[WAIT] Sleeping for {wait}s due to rate limit.")
                time.sleep(wait)
            else:
                break
    return None

# ────────────────────────────────────────────────────────────────────────────────
# 3. Main Processing Loop
# ────────────────────────────────────────────────────────────────────────────────
CSV_PATH = "/Users/shivanishrivastava/Desktop/Complete_the_Look/metadata.csv"
IMAGES_DIR = "/Users/shivanishrivastava/Desktop/Complete_the_Look/images"
CHECKPOINT_EVERY = 100
OUT_CSV_PATH = "with_descriptions_checkpoint.csv"

df = pd.read_csv(CSV_PATH)
if "description" not in df.columns:
    df["description"] = ""

# Resume logic
if os.path.exists(OUT_CSV_PATH):
    df_ckpt = pd.read_csv(OUT_CSV_PATH)
    df["description"] = df_ckpt["description"]
    print(f"[RESUME] Loaded checkpointed descriptions from {OUT_CSV_PATH}")

# Process only first 5 rows
for idx, row in df.head(5).iterrows():
    if pd.notna(row["description"]) and str(row["description"]).strip():
        continue  # already done

    fn = row["filename"]
    path = os.path.join(IMAGES_DIR, fn)
    print(f"\n[PROCESSING] Row {idx}: '{fn}'")

    if not os.path.isfile(path):
        print(f"[SKIP] Missing image: {path}")
        df.at[idx, "description"] = "Image missing"
        continue

    try:
        uri = load_image_as_data_uri(path)
        desc = generate_apparel_description(uri)
        df.at[idx, "description"] = desc or "No description generated"
        print(f"[DESC] {desc}")

        time.sleep(random.uniform(1.2, 3.0))

        # Optional: save checkpoint even for these 5
        df.to_csv(OUT_CSV_PATH, index=False)

    except Exception as e:
        print(f"[ERROR] Failed at row {idx}: {e}")
        df.at[idx, "description"] = "Error during processing"
        continue

# Final save
df.to_csv(OUT_CSV_PATH, index=False)
print("\n✅ First 5 rows processed. Final CSV saved.")



[PROCESSING] Row 0: 'short_sleeve_top_0.jpg'
[DESC] Woman wearing a pink, sleeveless, loose-fitting top with a scooped neckline.

[PROCESSING] Row 1: 'short_sleeve_top_1.jpg'
[DESC] A woman wearing a fitted, blue and white plaid button-up shirt with a collar.

[PROCESSING] Row 2: 'short_sleeve_top_2.jpg'
[DESC] A woman wearing a dark, fitted blazer with lapels and a single-breasted style.

[PROCESSING] Row 3: 'short_sleeve_top_3.jpg'
[DESC] Woman wearing a black, sleeveless, midi-length dress with a scoop neckline and flared skirt.

[PROCESSING] Row 4: 'short_sleeve_top_4.jpg'
[DESC] Woman wearing a bright red sleeveless formal dress with a glossy finish and some pleating detail.

✅ First 5 rows processed. Final CSV saved.


In [28]:
df['description'].iloc[4]


'Error during processing'

In [29]:
df['url'].iloc[4]

'http://assets.myntassets.com/v1/images/style/properties/bede87d1f39eb0c758332db44c6f4e46_images.jpg'

In [24]:
print(generate_apparel_description("http://assets.myntassets.com/v1/images/style/properties/ad09deb93d4d3bb48f7d73a374e20f6b_images.jpg",1))

A blue denim jacket with a button-down front, collar, chest pockets, and side pockets, worn by a man.


In [26]:
pip install anthropic

Collecting anthropic
  Downloading anthropic-0.55.0-py3-none-any.whl.metadata (27 kB)
Downloading anthropic-0.55.0-py3-none-any.whl (289 kB)
Installing collected packages: anthropic
Successfully installed anthropic-0.55.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [34]:
import google.genai
print(google.genai.__version__)


1.19.0


In [35]:
pip install -U google-generativeai



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Tested for 5 samples initally

In [19]:
import os
import time
import traceback
import pandas as pd
import requests
import google.generativeai as genai

# ─── API Setup ─────────────────────────────────────────────────────────────
os.environ["GEMINI_API_KEY"] = "AIzaSyC3PrsHtDbxNscFghdIm_6SPn_-lF4oMjk"
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
MODEL_NAME = "gemini-1.5-flash"  # or your model

SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, fabric cues, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
""".strip()

# ─── Gemini Description Function ────────────────────────────────────────────
def describe_image_from_url(url: str) -> str:
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        image_bytes = response.content
    except Exception as e:
        print(f"❌ Failed to fetch image: {e}")
        return "Image download failed"

    model = genai.GenerativeModel(MODEL_NAME)
    try:
        response = model.generate_content(
            contents=[
                SYSTEM_PROMPT,
                {
                    "mime_type": "image/jpeg",
                    "data": image_bytes
                },
                "Describe only the garment. Avoid face, background, pose, etc."
            ]
        )
        return response.text.strip()
    except Exception as e:
        print(f"[Gemini Error] {e}")
        return "Gemini failed"

# ─── Load 5 Images from DataFrame ───────────────────────────────────────────
df = pd.read_csv("/Users/shivanishrivastava/Desktop/Complete_the_Look/metadata.csv").head(5)
df["description"] = ""

for idx, row in df.iterrows():
    url = row["url"]
    print(f"[PROCESSING] {row['filename']}")
    desc = describe_image_from_url(url)
    df.at[idx, "description"] = desc
    print(f"[DESC] {desc}")
    time.sleep(0.5)

df.to_csv("5_descriptions.csv", index=False)
print("✅ Done. Descriptions saved to 5_descriptions.csv")


[PROCESSING] short_sleeve_top_0.jpg
[DESC] A woman wearing a lavender v-neck short-sleeve t-shirt with a colorful Puma logo graphic.
[PROCESSING] short_sleeve_top_1.jpg
[DESC] A blue heather short-sleeved t-shirt with a graphic design featuring a cartoon character and text that reads “MR. SILLY” and “Roger Hargreaves,” worn by a man.
[PROCESSING] short_sleeve_top_2.jpg
[DESC] Person wearing a white short-sleeved t-shirt with a multicolored print of cars and motorcycles. The neckline is red.
[PROCESSING] short_sleeve_top_3.jpg
[DESC] A light green, short-sleeved crew neck t-shirt with a rocket graphic and “Mission to Mars” text.
[PROCESSING] short_sleeve_top_4.jpg
[DESC] Woman wearing a heather gray, short-sleeved t-shirt with a multicolored Adidas logo print.
✅ Done. Descriptions saved to 5_descriptions.csv


In [20]:
df['url'].iloc[4]

'http://assets.myntassets.com/v1/images/style/properties/bede87d1f39eb0c758332db44c6f4e46_images.jpg'

### Creating description using image url

In [15]:

# ─── 1. Setup ─────────────────────────────────────────────────────────────
genai.configure(api_key="AIzaSyDjJ-Y2vVfvb6SvrMi70OybLNvvrzYEGb0")
MODEL_NAME = "gemini-1.5-flash"
SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, fabric cues, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
""".strip()

# ─── 2. Gemini Image Description from URL ──────────────────────────────────
def generate_apparel_description_from_url(url: str, max_retries: int = 5) -> str:
    for attempt in range(1, max_retries + 1):
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            image_bytes = response.content
        except Exception as e:
            print(f"[ERROR] Could not download image: {url} → {e}")
            return "Image download failed"

        try:
            model = genai.GenerativeModel(MODEL_NAME)
            result = model.generate_content([
                SYSTEM_PROMPT,
                {"mime_type": "image/jpeg", "data": image_bytes},
                "Describe only the garment."
            ])
            text = result.text.strip()
            print(f"[✅ DESC] {text[:100]}...")
            return text
        except Exception as e:
            print(f"[ERROR] Gemini failed (attempt {attempt}): {e}")
            if "rate" in str(e).lower():
                wait = 2 ** attempt
                print(f"[WAIT] Sleeping {wait}s due to rate limit...")
                time.sleep(wait)
            else:
                break

    print("[FAILED] All retries exhausted.")
    return "Failed to generate description"

# ─── 3. Load CSV & Checkpointing ──────────────────────────────────────────
INPUT_CSV = "/Users/shivanishrivastava/Desktop/Complete_the_Look/metadata.csv"
CHECKPOINT_CSV = "with_descriptions_checkpoint.csv"

df = pd.read_csv(INPUT_CSV)
if "description" not in df.columns:
    df["description"] = ""

if os.path.exists(CHECKPOINT_CSV):
    df_ckpt = pd.read_csv(CHECKPOINT_CSV)
    df["description"] = df_ckpt["description"]
    print(f"✅ Resumed from checkpoint at: {CHECKPOINT_CSV}")

# ─── 4. Main Batch Loop ───────────────────────────────────────────────────
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Generating Descriptions"):
    if pd.notna(row["description"]) and str(row["description"]).strip():
        continue

    url = row.get("url")
    if not isinstance(url, str) or not url.startswith("http"):
        print(f"[WARN] Invalid URL for row {idx}. Skipping.")
        df.at[idx, "description"] = "Invalid URL"
        continue

    print(f"\n[PROCESSING] Row {idx}: {row['filename']}")
    desc = generate_apparel_description_from_url(url)
    df.at[idx, "description"] = desc
    time.sleep(0.7)  # basic rate limiting

    if idx % 100 == 0:
        df.to_csv(CHECKPOINT_CSV, index=False)
        print(f"[💾] Checkpoint saved at row {idx}")

# ─── 5. Final Save ────────────────────────────────────────────────────────
df.to_csv(CHECKPOINT_CSV, index=False)
print("✅ All image descriptions completed and saved to:", CHECKPOINT_CSV)


✅ Resumed from checkpoint at: with_descriptions_checkpoint.csv


Generating Descriptions:   0%|          | 0/23266 [00:00<?, ?it/s]


[PROCESSING] Row 5: short_sleeve_top_5.jpg
[✅ DESC] A white short-sleeve Adidas polo shirt with black shoulder stripes, worn by a man....


Generating Descriptions:   0%|          | 6/23266 [00:10<11:24:14,  1.77s/it]


[PROCESSING] Row 6: short_sleeve_top_6.jpg
[✅ DESC] A white, short-sleeved crew neck t-shirt with a gray distressed graphic design on the chest, worn by...


Generating Descriptions:   0%|          | 7/23266 [00:14<14:40:21,  2.27s/it]


[PROCESSING] Row 7: short_sleeve_top_7.jpg
[✅ DESC] Woman wearing a dark-navy, short-sleeved blouse with a floral print and subtle polka dots.  The slee...


Generating Descriptions:   0%|          | 8/23266 [00:19<17:21:04,  2.69s/it]


[PROCESSING] Row 8: short_sleeve_top_8.jpg
[✅ DESC] Woman wearing a turquoise short-sleeved t-shirt with a multicolored "LOVE" graphic....


Generating Descriptions:   0%|          | 9/23266 [00:24<21:50:05,  3.38s/it]


[PROCESSING] Row 9: short_sleeve_top_9.jpg
[✅ DESC] A black short-sleeve t-shirt with a white-dotted square graphic of a black Puma logo, worn by a man....


Generating Descriptions:   0%|          | 10/23266 [00:30<24:49:36,  3.84s/it]


[PROCESSING] Row 10: short_sleeve_top_10.jpg
[✅ DESC] A white short-sleeve polo shirt with a subtle diamond pattern and a small logo on the chest, worn by...


Generating Descriptions:   0%|          | 11/23266 [00:34<26:08:06,  4.05s/it]


[PROCESSING] Row 11: short_sleeve_top_11.jpg
[✅ DESC] Woman wearing a heather gray, short-sleeved, relaxed fit t-shirt with a simple, black, sans-serif te...


Generating Descriptions:   0%|          | 12/23266 [00:38<26:13:34,  4.06s/it]


[PROCESSING] Row 12: short_sleeve_top_12.jpg
[✅ DESC] A teal and green short-sleeved t-shirt with a cartoon graphic, featuring a white crew neck....


Generating Descriptions:   0%|          | 13/23266 [00:58<54:57:15,  8.51s/it]


[PROCESSING] Row 13: short_sleeve_top_13.jpg
[✅ DESC] A maroon short-sleeved t-shirt with a graphic design featuring a woman and flowers.  The shirt is a ...


Generating Descriptions:   0%|          | 14/23266 [01:03<48:44:53,  7.55s/it]


[PROCESSING] Row 14: short_sleeve_top_14.jpg
[✅ DESC] A brown short-sleeved polo shirt with a multi-colored striped collar, worn by a man....


Generating Descriptions:   0%|          | 15/23266 [01:07<42:23:36,  6.56s/it]


[PROCESSING] Row 15: short_sleeve_top_15.jpg
[✅ DESC] Woman wearing a heather gray short-sleeved polo shirt with a white collar and cuffs....


Generating Descriptions:   0%|          | 16/23266 [01:11<36:44:34,  5.69s/it]


[PROCESSING] Row 16: short_sleeve_top_16.jpg
[✅ DESC] A man wearing a bright blue short-sleeve polo shirt with a small logo embroidered on the right breas...


Generating Descriptions:   0%|          | 17/23266 [01:15<32:45:25,  5.07s/it]


[PROCESSING] Row 17: short_sleeve_top_17.jpg
[✅ DESC] A dark-blue, short-sleeve crew-neck t-shirt with a graphic design on the front, worn by a man....


Generating Descriptions:   0%|          | 18/23266 [01:19<31:49:37,  4.93s/it]


[PROCESSING] Row 18: short_sleeve_top_18.jpg
[✅ DESC] Woman wearing a light blue, short-sleeved t-shirt with a graphic design, and denim shorts....


Generating Descriptions:   0%|          | 19/23266 [01:23<28:50:13,  4.47s/it]


[PROCESSING] Row 19: short_sleeve_top_19.jpg
[✅ DESC] A purple short-sleeved t-shirt with a surfer graphic, worn by a man....


Generating Descriptions:   0%|          | 20/23266 [01:26<26:45:32,  4.14s/it]


[PROCESSING] Row 20: short_sleeve_top_20.jpg
[✅ DESC] A lavender short-sleeve crew-neck t-shirt with a black graphic design, worn by a man....


Generating Descriptions:   0%|          | 21/23266 [01:32<29:52:58,  4.63s/it]


[PROCESSING] Row 21: short_sleeve_top_21.jpg
[✅ DESC] A red and white horizontally striped short-sleeved polo shirt....


Generating Descriptions:   0%|          | 22/23266 [01:37<30:47:01,  4.77s/it]


[PROCESSING] Row 22: short_sleeve_top_22.jpg
[✅ DESC] A purple short-sleeved crew neck t-shirt with a yellow graphic of a motocross rider on a dirt bike....


Generating Descriptions:   0%|          | 23/23266 [01:42<31:15:51,  4.84s/it]


[PROCESSING] Row 23: short_sleeve_top_23.jpg
[✅ DESC] A black, short-sleeved t-shirt with a multicolored circular graphic and the words “Shanti Peace” pri...


Generating Descriptions:   0%|          | 24/23266 [02:05<66:58:45, 10.37s/it]


[PROCESSING] Row 24: short_sleeve_top_24.jpg
[✅ DESC] A light gray, short-sleeved crew neck t-shirt with a small embroidered logo on the lower right side,...


Generating Descriptions:   0%|          | 25/23266 [02:10<56:05:51,  8.69s/it]


[PROCESSING] Row 25: short_sleeve_top_25.jpg
[✅ DESC] A red short-sleeved t-shirt featuring a Mickey Mouse graphic and other colorful cartoon drawings....


Generating Descriptions:   0%|          | 26/23266 [02:16<51:34:12,  7.99s/it]


[PROCESSING] Row 26: short_sleeve_top_26.jpg
[✅ DESC] Man wearing a teal short-sleeved polo shirt with white and yellow horizontal stripes....


Generating Descriptions:   0%|          | 27/23266 [02:21<46:11:54,  7.16s/it]


[PROCESSING] Row 27: short_sleeve_top_27.jpg
[✅ DESC] A black cotton t-shirt with a gray Batman graphic, worn by a man....


Generating Descriptions:   0%|          | 28/23266 [02:26<42:03:06,  6.51s/it]


[PROCESSING] Row 28: short_sleeve_top_28.jpg
[✅ DESC] A black, short-sleeved t-shirt with a graphic design featuring a triangle and stylized swirls in yel...


Generating Descriptions:   0%|          | 29/23266 [02:31<39:09:28,  6.07s/it]


[PROCESSING] Row 29: short_sleeve_top_29.jpg
[✅ DESC] A lavender women’s short-sleeved polo shirt with a geometric pattern and white Nike logo.  The shirt...


Generating Descriptions:   0%|          | 30/23266 [03:22<125:50:19, 19.50s/it]


[PROCESSING] Row 30: short_sleeve_top_30.jpg
[✅ DESC] A bright blue, short-sleeved polo shirt with a small embroidered logo, worn by a man....


Generating Descriptions:   0%|          | 31/23266 [03:27<96:55:09, 15.02s/it] 


[PROCESSING] Row 31: short_sleeve_top_31.jpg
[✅ DESC] A teal and white horizontally striped polo shirt with short sleeves, featuring a graphic print that ...


Generating Descriptions:   0%|          | 32/23266 [03:31<76:29:41, 11.85s/it]


[PROCESSING] Row 32: short_sleeve_top_32.jpg
[✅ DESC] A blue short-sleeved Adidas polo shirt with gold accents, featuring the Mumbai Indians logo and Hero...


Generating Descriptions:   0%|          | 33/23266 [03:35<60:21:24,  9.35s/it]


[PROCESSING] Row 33: short_sleeve_top_33.jpg
[✅ DESC] A black cotton crew-neck T-shirt with a red and black "DUCATI" graphic print.  A man is wearing it....


Generating Descriptions:   0%|          | 34/23266 [03:38<48:35:21,  7.53s/it]


[PROCESSING] Row 34: short_sleeve_top_34.jpg
[✅ DESC] A navy blue polo shirt with red and white trim on the collar and sleeves.  It features a graphic des...


Generating Descriptions:   0%|          | 35/23266 [03:42<40:57:04,  6.35s/it]


[PROCESSING] Row 35: short_sleeve_top_35.jpg
[✅ DESC] A white short-sleeved t-shirt with a graphic design in black and yellow....


Generating Descriptions:   0%|          | 36/23266 [03:45<34:59:59,  5.42s/it]


[PROCESSING] Row 36: short_sleeve_top_36.jpg
[✅ DESC] A man wearing a lime green, black, gray, and white horizontally striped polo shirt with short sleeve...


Generating Descriptions:   0%|          | 37/23266 [03:49<32:25:53,  5.03s/it]


[PROCESSING] Row 37: short_sleeve_top_37.jpg
[✅ DESC] A black short-sleeved crew-neck t-shirt with a vertical, light-green graphic design, worn by a man....


Generating Descriptions:   0%|          | 38/23266 [03:53<29:55:23,  4.64s/it]


[PROCESSING] Row 38: short_sleeve_top_38.jpg
[✅ DESC] A red short-sleeved shirt with white raglan sleeves, worn by a man....


Generating Descriptions:   0%|          | 39/23266 [03:56<26:48:52,  4.16s/it]


[PROCESSING] Row 39: short_sleeve_top_39.jpg
[✅ DESC] Man wearing a dark gray and red short-sleeved athletic shirt.  The shirt is a blend of fabrics, with...


Generating Descriptions:   0%|          | 40/23266 [04:00<27:28:38,  4.26s/it]


[PROCESSING] Row 40: short_sleeve_top_40.jpg
[✅ DESC] A man wearing a bright green short-sleeve polo shirt with a darker green and white tipped collar.  T...


Generating Descriptions:   0%|          | 41/23266 [04:04<26:10:17,  4.06s/it]


[PROCESSING] Row 41: short_sleeve_top_41.jpg
[✅ DESC] A pink, short-sleeved t-shirt with a “FREE BIRD” graphic and ladybug accents....


Generating Descriptions:   0%|          | 42/23266 [04:08<25:15:32,  3.92s/it]


[PROCESSING] Row 42: short_sleeve_top_42.jpg
[✅ DESC] A man wearing a white short-sleeved polo shirt....


Generating Descriptions:   0%|          | 43/23266 [04:12<25:24:24,  3.94s/it]


[PROCESSING] Row 43: short_sleeve_top_43.jpg
[✅ DESC] A black short-sleeved athletic shirt with a mesh-like panel on the sides, a crew neck, and a small N...


Generating Descriptions:   0%|          | 44/23266 [04:16<25:30:28,  3.95s/it]


[PROCESSING] Row 44: short_sleeve_top_44.jpg
[✅ DESC] Woman wearing a yellow short-sleeved athletic shirt with a crew neck and subtle reflective gray acce...


Generating Descriptions:   0%|          | 45/23266 [04:31<47:34:56,  7.38s/it]


[PROCESSING] Row 45: short_sleeve_top_45.jpg
[✅ DESC] Off-white crew-neck t-shirt with a grayscale tiger graphic and “Wild Trails” text, worn by a man....


Generating Descriptions:   0%|          | 46/23266 [04:35<40:19:45,  6.25s/it]


[PROCESSING] Row 46: short_sleeve_top_46.jpg
[✅ DESC] A bright green, short-sleeved t-shirt with a black baseball-themed graphic....


Generating Descriptions:   0%|          | 47/23266 [04:38<34:53:48,  5.41s/it]


[PROCESSING] Row 47: short_sleeve_top_47.jpg
[✅ DESC] Woman wearing a mint green and white horizontally striped polo shirt with short sleeves....


Generating Descriptions:   0%|          | 48/23266 [04:42<32:10:52,  4.99s/it]


[PROCESSING] Row 48: short_sleeve_top_48.jpg
[✅ DESC] A red short-sleeved crew-neck t-shirt with an embroidered emblem on the chest, worn by a man....


Generating Descriptions:   0%|          | 49/23266 [04:47<31:59:24,  4.96s/it]


[PROCESSING] Row 49: short_sleeve_top_49.jpg
[✅ DESC] Woman wearing a teal, three-quarter sleeve t-shirt with a graphic design featuring a stylized woman’...


Generating Descriptions:   0%|          | 50/23266 [04:51<31:18:15,  4.85s/it]


[PROCESSING] Row 50: short_sleeve_top_50.jpg
[✅ DESC] A teal short-sleeved t-shirt with a graphic print that includes the words “College 1965” and other t...


Generating Descriptions:   0%|          | 51/23266 [04:59<36:35:19,  5.67s/it]


[PROCESSING] Row 51: short_sleeve_top_51.jpg
[✅ DESC] A white, short-sleeved t-shirt with a navy blue crew neck and navy blue stripes on the sleeves.  It ...


Generating Descriptions:   0%|          | 52/23266 [05:22<69:34:23, 10.79s/it]


[PROCESSING] Row 52: short_sleeve_top_52.jpg
[✅ DESC] A red short-sleeved athletic shirt with black and yellow accents on the shoulders, worn by a man....


Generating Descriptions:   0%|          | 53/23266 [05:30<64:21:10,  9.98s/it]


[PROCESSING] Row 53: short_sleeve_top_53.jpg
[✅ DESC] A coral-colored cotton t-shirt with an orange Adidas logo, featuring a classic crew neck and short s...


Generating Descriptions:   0%|          | 54/23266 [05:34<52:09:53,  8.09s/it]


[PROCESSING] Row 54: short_sleeve_top_54.jpg
[✅ DESC] A black, short-sleeved, athletic-fit t-shirt with teal stripes on the shoulders, worn by a man....


Generating Descriptions:   0%|          | 55/23266 [05:39<46:24:56,  7.20s/it]


[PROCESSING] Row 55: short_sleeve_top_55.jpg
[✅ DESC] Woman wearing a white, short-sleeved t-shirt with a graphic design of a girl in a beret.  The shirt ...


Generating Descriptions:   0%|          | 56/23266 [05:43<40:48:47,  6.33s/it]


[PROCESSING] Row 56: short_sleeve_top_56.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 43
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violation

Generating Descriptions:   0%|          | 57/23266 [06:54<165:55:18, 25.74s/it]


[PROCESSING] Row 57: short_sleeve_top_57.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 32
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violation

Generating Descriptions:   0%|          | 58/23266 [08:04<252:12:07, 39.12s/it]


[PROCESSING] Row 58: short_sleeve_top_58.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 22
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violation

Generating Descriptions:   0%|          | 59/23266 [09:20<323:16:38, 50.15s/it]


[PROCESSING] Row 59: short_sleeve_top_59.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 6
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations

Generating Descriptions:   0%|          | 60/23266 [10:30<361:05:27, 56.02s/it]


[PROCESSING] Row 60: short_sleeve_top_60.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violation

Generating Descriptions:   0%|          | 61/23266 [11:43<393:23:06, 61.03s/it]


[PROCESSING] Row 61: short_sleeve_top_61.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 43
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violation

Generating Descriptions:   0%|          | 62/23266 [12:56<416:18:15, 64.59s/it]


[PROCESSING] Row 62: short_sleeve_top_62.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 31
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violation

Generating Descriptions:   0%|          | 63/23266 [13:19<336:26:51, 52.20s/it]


[PROCESSING] Row 63: short_sleeve_top_63.jpg
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 7
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations

Generating Descriptions:   0%|          | 63/23266 [13:26<82:30:28, 12.80s/it] 


KeyboardInterrupt: 

In [27]:

# ─── 1. Setup ─────────────────────────────────────────────────────────────
genai.configure(api_key="AIzaSyDjJ-Y2vVfvb6SvrMi70OybLNvvrzYEGb0")
MODEL_NAME = "gemini-1.5-flash"

SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, fabric cues, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
""".strip()

# ─── 2. Gemini Image Description from URL ──────────────────────────────────
def generate_apparel_description_from_url(url: str, max_retries: int = 5) -> str:
    for attempt in range(1, max_retries + 1):
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            image_bytes = response.content
        except Exception as e:
            print(f"[ERROR] Could not download image: {url} → {e}")
            return "Image download failed"

        try:
            model = genai.GenerativeModel(MODEL_NAME)
            result = model.generate_content([
                SYSTEM_PROMPT,
                {"mime_type": "image/jpeg", "data": image_bytes},
                "Describe only the garment."
            ])
            return result.text.strip()
        except Exception as e:
            print(f"[ERROR] Gemini failed (attempt {attempt}): {e}")
            if "rate" in str(e).lower():
                wait = 2 ** attempt
                print(f"[WAIT] Sleeping {wait}s due to rate limit...")
                time.sleep(wait)
            else:
                break

    print("[FAILED] All retries exhausted.")
    return "Failed to generate description"

# ─── 3. Load CSV and Sample Batches ────────────────────────────────────────
INPUT_CSV = "/Users/shivanishrivastava/Desktop/Complete_the_Look/metadata.csv"
CHECKPOINT_CSV = "with_descriptions_checkpoint.csv"

df_full = pd.read_csv(INPUT_CSV)

# Sample 100 from each unique garment category
sampled_batches = df_full.groupby("garment", group_keys=False).apply(lambda x: x.sample(min(100, len(x)), random_state=42)).reset_index(drop=True)

# Add a blank column if not present
if "description" not in sampled_batches.columns:
    sampled_batches["description"] = ""

# Resume logic
if os.path.exists(CHECKPOINT_CSV):
    checkpoint_df = pd.read_csv(CHECKPOINT_CSV)
    sampled_batches["description"] = checkpoint_df["description"]
    print(f"✅ Resumed from checkpoint at: {CHECKPOINT_CSV}")

# ─── 4. Run Gemini Descriptions ────────────────────────────────────────────
for idx, row in tqdm(sampled_batches.iterrows(), total=len(sampled_batches), desc="Generating Descriptions"):
    if pd.notna(row["description"]) and str(row["description"]).strip():
        continue

    url = row.get("url")
    if not isinstance(url, str) or not url.startswith("http"):
        print(f"[WARN] Invalid URL for row {idx}. Skipping.")
        sampled_batches.at[idx, "description"] = "Invalid URL"
        continue

    print(f"\n[PROCESSING] Row {idx}: {row['filename']} ({row['garment']})")
    desc = generate_apparel_description_from_url(url)
    sampled_batches.at[idx, "description"] = desc
    time.sleep(0.7)

    if idx % 50 == 0:
        sampled_batches.to_csv(CHECKPOINT_CSV, index=False)
        print(f"[💾] Checkpoint saved at row {idx}")

# ─── 5. Final Save ────────────────────────────────────────────────────────
sampled_batches.to_csv(CHECKPOINT_CSV, index=False)
print("✅ All sampled image descriptions saved to:", CHECKPOINT_CSV)


  sampled_batches = df_full.groupby("garment", group_keys=False).apply(lambda x: x.sample(min(100, len(x)), random_state=42)).reset_index(drop=True)


✅ Resumed from checkpoint at: with_descriptions_checkpoint.csv


Generating Descriptions:   0%|          | 0/900 [00:00<?, ?it/s]


[PROCESSING] Row 5: long_sleeve_dress_1856.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/2a61ccc4-7d71-49e0-89b8-74bdc0c10e1c.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/2a61ccc4-7d71-49e0-89b8-74bdc0c10e1c.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|          | 6/900 [00:02<05:14,  2.84it/s]


[PROCESSING] Row 6: long_sleeve_dress_1610.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/0f0302b6-3ae6-48df-82bf-336cb7992cd9.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/0f0302b6-3ae6-48df-82bf-336cb7992cd9.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|          | 7/900 [00:02<06:30,  2.29it/s]


[PROCESSING] Row 7: long_sleeve_dress_596.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/98f28393-563c-4584-8ad2-e661c460b388.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/98f28393-563c-4584-8ad2-e661c460b388.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|          | 8/900 [00:03<07:41,  1.93it/s]


[PROCESSING] Row 8: long_sleeve_dress_1124.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/45b784e4-2483-416a-8b38-3c047e5531c2.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/45b784e4-2483-416a-8b38-3c047e5531c2.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|          | 9/900 [00:04<08:43,  1.70it/s]


[PROCESSING] Row 9: long_sleeve_dress_2310.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/48769a54-7253-47d0-a3f3-e6cb9731016c.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/48769a54-7253-47d0-a3f3-e6cb9731016c.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|          | 10/900 [00:05<09:37,  1.54it/s]


[PROCESSING] Row 10: long_sleeve_dress_678.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/773825eb-0238-4d1b-8c72-9ed16ff6ae8a.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/773825eb-0238-4d1b-8c72-9ed16ff6ae8a.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|          | 11/900 [00:06<10:13,  1.45it/s]


[PROCESSING] Row 11: long_sleeve_dress_2255.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/153353a9-d7e8-4b34-af8d-6b791b560035.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/153353a9-d7e8-4b34-af8d-6b791b560035.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|▏         | 12/900 [00:06<10:37,  1.39it/s]


[PROCESSING] Row 12: long_sleeve_dress_1442.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/b25768c2-f6e9-4d3c-9378-33b2d32e10d1.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/b25768c2-f6e9-4d3c-9378-33b2d32e10d1.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   1%|▏         | 13/900 [00:07<11:06,  1.33it/s]


[PROCESSING] Row 13: long_sleeve_dress_247.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/4e6c1846-3bcb-4a1f-9f63-fe7070a84148.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/4e6c1846-3bcb-4a1f-9f63-fe7070a84148.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 14/900 [00:08<11:24,  1.30it/s]


[PROCESSING] Row 14: long_sleeve_dress_134.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/32bf4204-3db3-408b-b369-3fd3a70f710d.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/32bf4204-3db3-408b-b369-3fd3a70f710d.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 15/900 [00:09<11:38,  1.27it/s]


[PROCESSING] Row 15: long_sleeve_dress_2276.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/2dd03821-9175-4243-b3d3-edb1137f74bb.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/2dd03821-9175-4243-b3d3-edb1137f74bb.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 16/900 [00:10<11:47,  1.25it/s]


[PROCESSING] Row 16: long_sleeve_dress_290.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/35c4555f-4580-43a5-9e0f-e9b187199e81.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/35c4555f-4580-43a5-9e0f-e9b187199e81.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 17/900 [00:11<11:52,  1.24it/s]


[PROCESSING] Row 17: long_sleeve_dress_252.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/e94582b8-7ad6-43d6-980c-45757f69c7e6.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/e94582b8-7ad6-43d6-980c-45757f69c7e6.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 18/900 [00:11<11:57,  1.23it/s]


[PROCESSING] Row 18: long_sleeve_dress_1444.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/29c5e718-c5ba-4f1b-853e-96ec68fc6d8b.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/29c5e718-c5ba-4f1b-853e-96ec68fc6d8b.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 19/900 [00:12<12:08,  1.21it/s]


[PROCESSING] Row 19: long_sleeve_dress_2025.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/1daa8a6a-c750-439d-8e5c-328eca96a8ea.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/1daa8a6a-c750-439d-8e5c-328eca96a8ea.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 20/900 [00:13<12:11,  1.20it/s]


[PROCESSING] Row 20: long_sleeve_dress_2090.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/1a78fc25-6353-4680-8c4a-aaab8704ff23.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/1a78fc25-6353-4680-8c4a-aaab8704ff23.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 21/900 [00:14<12:16,  1.19it/s]


[PROCESSING] Row 21: long_sleeve_dress_44.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/c90dd768-1096-44e7-919e-425838faa214.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/c90dd768-1096-44e7-919e-425838faa214.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   2%|▏         | 22/900 [00:15<12:09,  1.20it/s]


[PROCESSING] Row 22: long_sleeve_dress_564.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/3aeaafd6-4152-4486-872c-729f74b000b1.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/3aeaafd6-4152-4486-872c-729f74b000b1.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 23/900 [00:16<12:05,  1.21it/s]


[PROCESSING] Row 23: long_sleeve_dress_218.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/67501e56-fec6-489f-9e90-1489e021911a.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/67501e56-fec6-489f-9e90-1489e021911a.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 24/900 [00:16<12:01,  1.21it/s]


[PROCESSING] Row 24: long_sleeve_dress_350.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/aa7d6457-0e74-41c4-9c91-85ad3380a33a.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/aa7d6457-0e74-41c4-9c91-85ad3380a33a.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 25/900 [00:17<12:01,  1.21it/s]


[PROCESSING] Row 25: long_sleeve_dress_1949.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/64a520a5-5a2f-4421-b014-40b4e8123dea.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/64a520a5-5a2f-4421-b014-40b4e8123dea.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 26/900 [00:18<12:16,  1.19it/s]


[PROCESSING] Row 26: long_sleeve_dress_857.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/35a30285-3e87-4592-a604-74ec8a30abbe.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/35a30285-3e87-4592-a604-74ec8a30abbe.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 27/900 [00:19<12:19,  1.18it/s]


[PROCESSING] Row 27: long_sleeve_dress_978.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/8edd95de-e69b-4ca9-828f-9cbbd7a1f705.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/8edd95de-e69b-4ca9-828f-9cbbd7a1f705.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 28/900 [00:20<12:14,  1.19it/s]


[PROCESSING] Row 28: long_sleeve_dress_707.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/d0eefe3e-aabd-49ab-b251-c54b2c001dc5.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/d0eefe3e-aabd-49ab-b251-c54b2c001dc5.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 29/900 [00:21<12:10,  1.19it/s]


[PROCESSING] Row 29: long_sleeve_dress_2055.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/b1da2644-6844-4ed7-b0dc-492eab31a093.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/b1da2644-6844-4ed7-b0dc-492eab31a093.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 30/900 [00:22<12:11,  1.19it/s]


[PROCESSING] Row 30: long_sleeve_dress_342.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/1c9d792a-6c57-464d-9139-ea434ce22dbf.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/1c9d792a-6c57-464d-9139-ea434ce22dbf.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   3%|▎         | 31/900 [00:22<12:11,  1.19it/s]


[PROCESSING] Row 31: long_sleeve_dress_2145.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/a92c7134-80c3-4823-a587-e58987979344.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/a92c7134-80c3-4823-a587-e58987979344.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   4%|▎         | 32/900 [00:23<12:15,  1.18it/s]


[PROCESSING] Row 32: long_sleeve_dress_1875.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/c692b0ee-2257-4c87-8c47-8011c29f6e8a.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/c692b0ee-2257-4c87-8c47-8011c29f6e8a.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   4%|▎         | 33/900 [00:24<12:19,  1.17it/s]


[PROCESSING] Row 33: long_sleeve_dress_1564.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 34/900 [00:27<22:50,  1.58s/it]


[PROCESSING] Row 34: long_sleeve_dress_111.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 35/900 [00:31<29:49,  2.07s/it]


[PROCESSING] Row 35: long_sleeve_dress_179.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 36/900 [00:34<34:26,  2.39s/it]


[PROCESSING] Row 36: long_sleeve_dress_1456.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 37/900 [00:38<41:41,  2.90s/it]


[PROCESSING] Row 37: long_sleeve_dress_1918.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 38/900 [00:41<41:44,  2.91s/it]


[PROCESSING] Row 38: long_sleeve_dress_1316.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 39/900 [00:49<1:03:35,  4.43s/it]


[PROCESSING] Row 39: long_sleeve_dress_1756.jpg (long sleeve dress)


Generating Descriptions:   4%|▍         | 40/900 [00:52<57:03,  3.98s/it]  


[PROCESSING] Row 40: long_sleeve_dress_410.jpg (long sleeve dress)


Generating Descriptions:   5%|▍         | 41/900 [00:55<52:13,  3.65s/it]


[PROCESSING] Row 41: long_sleeve_dress_1967.jpg (long sleeve dress)


Generating Descriptions:   5%|▍         | 42/900 [00:57<48:37,  3.40s/it]


[PROCESSING] Row 42: long_sleeve_dress_974.jpg (long sleeve dress)


Generating Descriptions:   5%|▍         | 43/900 [01:01<48:18,  3.38s/it]


[PROCESSING] Row 43: long_sleeve_dress_420.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/7963ef59-7883-43ee-b023-b4a2154cd958.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/7963ef59-7883-43ee-b023-b4a2154cd958.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   5%|▍         | 44/900 [01:02<37:22,  2.62s/it]


[PROCESSING] Row 44: long_sleeve_dress_237.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/87d2d130-56bc-4592-80e6-f75746398011.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/87d2d130-56bc-4592-80e6-f75746398011.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   5%|▌         | 45/900 [01:02<29:38,  2.08s/it]


[PROCESSING] Row 45: long_sleeve_dress_756.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/40296505-59d5-4cc8-a2bf-d53bb2d0efa5.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/40296505-59d5-4cc8-a2bf-d53bb2d0efa5.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   5%|▌         | 46/900 [01:03<24:15,  1.70s/it]


[PROCESSING] Row 46: long_sleeve_dress_1089.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/33881005-9d70-4baf-b82d-c9b3a40e80c1.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/33881005-9d70-4baf-b82d-c9b3a40e80c1.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   5%|▌         | 47/900 [01:04<20:48,  1.46s/it]


[PROCESSING] Row 47: long_sleeve_dress_2325.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/008d279a-656e-46ad-b384-4c52608c1e37.png?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/008d279a-656e-46ad-b384-4c52608c1e37.png?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   5%|▌         | 48/900 [01:05<18:13,  1.28s/it]


[PROCESSING] Row 48: long_sleeve_dress_650.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/9db2a013-25ba-41bb-973e-f27ab622f88a.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/9db2a013-25ba-41bb-973e-f27ab622f88a.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   5%|▌         | 49/900 [01:06<16:14,  1.14s/it]


[PROCESSING] Row 49: long_sleeve_dress_229.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/d1d0a178-bfce-43e6-b756-4bbaed951e88.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/d1d0a178-bfce-43e6-b756-4bbaed951e88.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 50/900 [01:07<14:46,  1.04s/it]


[PROCESSING] Row 50: long_sleeve_dress_2082.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/da8f212e-e7f1-4ec5-b8bb-32d8b8488fe8.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/da8f212e-e7f1-4ec5-b8bb-32d8b8488fe8.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 51/900 [01:07<13:50,  1.02it/s]

[💾] Checkpoint saved at row 50

[PROCESSING] Row 51: long_sleeve_dress_1047.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/23f72242-3118-4f28-bc98-77d9316c6668.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/23f72242-3118-4f28-bc98-77d9316c6668.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 52/900 [01:08<13:11,  1.07it/s]


[PROCESSING] Row 52: long_sleeve_dress_445.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/c90be0fd-2875-4ae2-9201-fbb9d279a238.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/c90be0fd-2875-4ae2-9201-fbb9d279a238.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 53/900 [01:09<12:42,  1.11it/s]


[PROCESSING] Row 53: long_sleeve_dress_1477.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/6448a137-92ee-4293-83b9-d057342d0d42.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/6448a137-92ee-4293-83b9-d057342d0d42.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 54/900 [01:10<12:23,  1.14it/s]


[PROCESSING] Row 54: long_sleeve_dress_1360.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/4cf5137e-15a3-4f19-aaa4-696d155c6963.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/4cf5137e-15a3-4f19-aaa4-696d155c6963.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 55/900 [01:11<12:10,  1.16it/s]


[PROCESSING] Row 55: long_sleeve_dress_879.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/f4a2dc84-1e1e-4e9e-b022-e350ee1a78ce.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/f4a2dc84-1e1e-4e9e-b022-e350ee1a78ce.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▌         | 56/900 [01:12<12:06,  1.16it/s]


[PROCESSING] Row 56: long_sleeve_dress_254.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/4cc45322-55e7-431e-9bbe-8b498ebcc157.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/4cc45322-55e7-431e-9bbe-8b498ebcc157.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▋         | 57/900 [01:12<11:55,  1.18it/s]


[PROCESSING] Row 57: long_sleeve_dress_2148.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/26b033bb-1d91-49f3-8a35-f602f59a34c8.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/26b033bb-1d91-49f3-8a35-f602f59a34c8.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:   6%|▋         | 58/900 [01:13<11:48,  1.19it/s]


[PROCESSING] Row 58: long_sleeve_dress_56.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 59/900 [01:16<21:53,  1.56s/it]


[PROCESSING] Row 59: long_sleeve_dress_1717.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 60/900 [01:20<29:11,  2.08s/it]


[PROCESSING] Row 60: long_sleeve_dress_1307.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 61/900 [01:23<32:24,  2.32s/it]


[PROCESSING] Row 61: long_sleeve_dress_1491.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 62/900 [01:26<34:55,  2.50s/it]


[PROCESSING] Row 62: long_sleeve_dress_1463.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 63/900 [01:28<36:39,  2.63s/it]


[PROCESSING] Row 63: long_sleeve_dress_685.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 64/900 [01:31<37:47,  2.71s/it]


[PROCESSING] Row 64: long_sleeve_dress_426.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 65/900 [01:34<38:54,  2.80s/it]


[PROCESSING] Row 65: long_sleeve_dress_1229.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 66/900 [01:38<40:23,  2.91s/it]


[PROCESSING] Row 66: long_sleeve_dress_282.jpg (long sleeve dress)


Generating Descriptions:   7%|▋         | 67/900 [01:42<46:06,  3.32s/it]


[PROCESSING] Row 67: long_sleeve_dress_1058.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 68/900 [01:45<45:06,  3.25s/it]


[PROCESSING] Row 68: long_sleeve_dress_993.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 69/900 [01:48<44:31,  3.22s/it]


[PROCESSING] Row 69: long_sleeve_dress_1198.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 70/900 [01:51<42:45,  3.09s/it]


[PROCESSING] Row 70: long_sleeve_dress_620.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 71/900 [01:55<46:43,  3.38s/it]


[PROCESSING] Row 71: long_sleeve_dress_1034.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 72/900 [01:58<45:51,  3.32s/it]


[PROCESSING] Row 72: long_sleeve_dress_1293.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 73/900 [02:01<44:43,  3.24s/it]


[PROCESSING] Row 73: long_sleeve_dress_2045.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 74/900 [02:04<41:50,  3.04s/it]


[PROCESSING] Row 74: long_sleeve_dress_1694.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 75/900 [02:06<40:38,  2.96s/it]


[PROCESSING] Row 75: long_sleeve_dress_2320.jpg (long sleeve dress)


Generating Descriptions:   8%|▊         | 76/900 [02:09<40:02,  2.92s/it]


[PROCESSING] Row 76: long_sleeve_dress_296.jpg (long sleeve dress)


Generating Descriptions:   9%|▊         | 77/900 [02:13<41:39,  3.04s/it]


[PROCESSING] Row 77: long_sleeve_dress_787.jpg (long sleeve dress)


Generating Descriptions:   9%|▊         | 78/900 [02:15<40:38,  2.97s/it]


[PROCESSING] Row 78: long_sleeve_dress_1023.jpg (long sleeve dress)


Generating Descriptions:   9%|▉         | 79/900 [02:19<42:24,  3.10s/it]


[PROCESSING] Row 79: long_sleeve_dress_2172.jpg (long sleeve dress)
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 40
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/do

Generating Descriptions:   9%|▉         | 80/900 [02:32<1:22:02,  6.00s/it]


[PROCESSING] Row 80: long_sleeve_dress_1043.jpg (long sleeve dress)


Generating Descriptions:   9%|▉         | 81/900 [02:34<1:08:54,  5.05s/it]


[PROCESSING] Row 81: long_sleeve_dress_528.jpg (long sleeve dress)


Generating Descriptions:   9%|▉         | 82/900 [02:38<1:01:05,  4.48s/it]


[PROCESSING] Row 82: long_sleeve_dress_861.jpg (long sleeve dress)


Generating Descriptions:   9%|▉         | 83/900 [02:41<55:00,  4.04s/it]  


[PROCESSING] Row 83: long_sleeve_dress_173.jpg (long sleeve dress)


Generating Descriptions:   9%|▉         | 84/900 [02:44<51:11,  3.76s/it]


[PROCESSING] Row 84: long_sleeve_dress_694.jpg (long sleeve dress)


Generating Descriptions:   9%|▉         | 85/900 [02:47<49:12,  3.62s/it]


[PROCESSING] Row 85: long_sleeve_dress_1690.jpg (long sleeve dress)


Generating Descriptions:  10%|▉         | 86/900 [02:50<46:43,  3.44s/it]


[PROCESSING] Row 86: long_sleeve_dress_1616.jpg (long sleeve dress)


Generating Descriptions:  10%|▉         | 87/900 [02:53<46:13,  3.41s/it]


[PROCESSING] Row 87: long_sleeve_dress_1689.jpg (long sleeve dress)


Generating Descriptions:  10%|▉         | 88/900 [02:58<52:31,  3.88s/it]


[PROCESSING] Row 88: long_sleeve_dress_29.jpg (long sleeve dress)


Generating Descriptions:  10%|▉         | 89/900 [03:05<1:03:29,  4.70s/it]


[PROCESSING] Row 89: long_sleeve_dress_2100.jpg (long sleeve dress)


Generating Descriptions:  10%|█         | 90/900 [03:09<58:53,  4.36s/it]  


[PROCESSING] Row 90: long_sleeve_dress_1651.jpg (long sleeve dress)


Generating Descriptions:  10%|█         | 91/900 [03:12<53:22,  3.96s/it]


[PROCESSING] Row 91: long_sleeve_dress_2317.jpg (long sleeve dress)


Generating Descriptions:  10%|█         | 92/900 [03:14<49:05,  3.65s/it]


[PROCESSING] Row 92: long_sleeve_dress_1510.jpg (long sleeve dress)


Generating Descriptions:  10%|█         | 93/900 [03:17<46:20,  3.45s/it]


[PROCESSING] Row 93: long_sleeve_dress_522.jpg (long sleeve dress)


Generating Descriptions:  10%|█         | 94/900 [03:20<43:30,  3.24s/it]


[PROCESSING] Row 94: long_sleeve_dress_435.jpg (long sleeve dress)


Generating Descriptions:  11%|█         | 95/900 [03:23<42:26,  3.16s/it]


[PROCESSING] Row 95: long_sleeve_dress_1976.jpg (long sleeve dress)
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 36
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Could not download image: https://n.nordstrommedia.com/it/f6b21d3e-268b-40da-8459-58dc6676e504.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retr

Generating Descriptions:  11%|█         | 96/900 [03:27<44:27,  3.32s/it]


[PROCESSING] Row 96: long_sleeve_dress_1582.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/7cf472f9-89d3-4614-ae8a-581b55dd996d.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/7cf472f9-89d3-4614-ae8a-581b55dd996d.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:  11%|█         | 97/900 [03:28<34:17,  2.56s/it]


[PROCESSING] Row 97: long_sleeve_dress_1315.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/0648a35d-0020-47e0-947a-7058f00ea2a0.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/0648a35d-0020-47e0-947a-7058f00ea2a0.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:  11%|█         | 98/900 [03:28<27:13,  2.04s/it]


[PROCESSING] Row 98: long_sleeve_dress_464.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/02c11e76-bde6-439e-81fa-ed6eb2dd415f.jpeg?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/02c11e76-bde6-439e-81fa-ed6eb2dd415f.jpeg?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:  11%|█         | 99/900 [03:29<22:17,  1.67s/it]


[PROCESSING] Row 99: long_sleeve_dress_2220.jpg (long sleeve dress)
[ERROR] Could not download image: https://n.nordstrommedia.com/it/a0a128fe-706e-441a-a776-bb174aea1648.png?h=368&w=240&dpr=2 → HTTPSConnectionPool(host='n.nordstrommedia.com', port=443): Max retries exceeded with url: /it/a0a128fe-706e-441a-a776-bb174aea1648.png?h=368&w=240&dpr=2 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)')))


Generating Descriptions:  11%|█         | 100/900 [03:30<18:50,  1.41s/it]


[PROCESSING] Row 100: long_sleeve_top_1309.jpg (long sleeve top)


Generating Descriptions:  11%|█         | 101/900 [03:35<32:04,  2.41s/it]

[💾] Checkpoint saved at row 100

[PROCESSING] Row 101: long_sleeve_top_139.jpg (long sleeve top)


Generating Descriptions:  11%|█▏        | 102/900 [03:39<38:24,  2.89s/it]


[PROCESSING] Row 102: long_sleeve_top_3028.jpg (long sleeve top)


Generating Descriptions:  11%|█▏        | 103/900 [03:43<44:53,  3.38s/it]


[PROCESSING] Row 103: long_sleeve_top_1161.jpg (long sleeve top)
[ERROR] Gemini failed (attempt 1): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 16
}
]
[WAIT] Sleeping 2s due to rate limit...
[ERROR] Gemini failed (attempt 2): 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rat

Generating Descriptions:  11%|█▏        | 103/900 [03:55<30:23,  2.29s/it]


KeyboardInterrupt: 

In [28]:
print("Garment types in batch:")
print(sampled_batches["garment"].value_counts())


Garment types in batch:
garment
long sleeve dress     100
long sleeve top       100
short sleeve dress    100
short sleeve top      100
shorts                100
skirt                 100
trousers              100
vest                  100
vest dress            100
Name: count, dtype: int64


In [16]:
df['description'].iloc[54]

'A black, short-sleeved, athletic-fit t-shirt with teal stripes on the shoulders, worn by a man.'

In [17]:
df['url'].iloc[54]

'http://assets.myntassets.com/v1/images/style/properties/c52cbe5a342e72dd297381897f1c0e97_images.jpg'

In [4]:
from pymongo import MongoClient

uri = "mongodb+srv://shivani25shri10:shivani25shri10@fashionista.mog6frv.mongodb.net/?retryWrites=tru"
client = MongoClient(uri)

# Access database and collection
db = client['fashionista']
collection = db['catalog_data']


  return get_validated_options(opts, warn)


In [6]:

from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

uri = "mongodb+srv://shivani25shri10:bn7Reynw8ymF2ytC@cluster0.c2asd9y.mongodb.net/"

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [7]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

# Setup
uri = "mongodb+srv://shivani25shri10:bn7Reynw8ymF2ytC@cluster0.c2asd9y.mongodb.net/"
client = MongoClient(uri, server_api=ServerApi('1'))

# Confirm connection
try:
    client.admin.command('ping')
    print("✅ Connected to MongoDB!")
except Exception as e:
    print(f"❌ Connection error: {e}")

# Specify database and collection
db = client["fashionista_"]
collection = db["catalog_data"]

# Drop all documents (but keep collection & indexes)
result = collection.delete_many({})
print(f"🧹 Deleted {result.deleted_count} documents from 'catalog_data'")


✅ Connected to MongoDB!
🧹 Deleted 80 documents from 'catalog_data'


In [12]:
import pandas as pd
df = pd.read_csv("with_descriptions_checkpoint.csv")
missing = df["description"].isna() | df["description"].str.strip().eq("")
print(f"Missing descriptions: {missing.sum()} / {len(df)}")


Missing descriptions: 23261 / 23266


In [147]:
import pandas as pd

df = pd.read_csv("with_descriptions_checkpoint.csv")
remaining = df[df["description"].isna() | df["description"].str.strip().eq("")]
print(remaining["garment"].value_counts())


garment
shorts                70
vest                  70
vest dress            70
trousers              64
skirt                 59
short sleeve dress    50
short sleeve top      50
long sleeve top       40
Name: count, dtype: int64


In [148]:
df_full = pd.read_csv("metadata.csv")

# Find missing one
existing = pd.read_csv("with_descriptions_checkpoint.csv")
existing_ids = set(existing["filename"])

needed = df_full[
    (df_full["garment"] == "long sleeve top") &
    (~df_full["filename"].isin(existing_ids))
].sample(1)

# Add to checkpoint file
new_df = pd.concat([existing, needed], ignore_index=True)
new_df.to_csv("with_descriptions_checkpoint.csv", index=False)
print("✅ Topped up 'long sleeve top' to 100 rows.")


✅ Topped up 'long sleeve top' to 100 rows.


In [149]:
bad_longsleeve = df[
    (df["garment"] == "long sleeve top") &
    (~df["description"].str.strip().str.lower().isin([
        "image download failed", "failed to generate description", "gemini failed", "nan", ""
    ]))
]

print(f"✅ Valid 'long sleeve top' descriptions: {len(bad_longsleeve)} / 101")


✅ Valid 'long sleeve top' descriptions: 78 / 101


In [37]:
import os
import time
import pandas as pd
import requests
import google.generativeai as genai
from tqdm import tqdm

# ─── Setup ─────────────────────────────────────────────────────────
genai.configure(api_key="AIzaSyCgmL2AQf96ZizPVmAtJzoh1CgfhV6IoUQ")
MODEL_NAME = "gemini-1.5-flash"
CHECKPOINT_CSV = "with_descriptions_checkpoint.csv"

SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, fabric cues, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
""".strip()

def generate_description_from_url(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        image_bytes = response.content
    except Exception as e:
        print(f"[❌] Image download failed: {e}")
        return "Image download failed"

    try:
        model = genai.GenerativeModel(MODEL_NAME)
        result = model.generate_content([
            SYSTEM_PROMPT,
            {"mime_type": "image/jpeg", "data": image_bytes},
            "Describe only the garment."
        ])
        return result.text.strip()
    except Exception as e:
        print(f"[⚠️] Gemini error: {e}")
        return "Gemini failed"

# ─── Load checkpoint ───────────────────────────────────────────────
df = pd.read_csv(CHECKPOINT_CSV)

# ─── Filter broken rows for long sleeve top ────────────────────────
bad_rows = df[
    (df["garment"] == "long sleeve top") &
    (df["description"].astype(str).str.lower().isin(["nan", "image download failed", "gemini failed", ""]))
].copy()

print(f"🔁 Regenerating {len(bad_rows)} 'long sleeve top' descriptions...\n")

# ─── Regenerate & Replace ──────────────────────────────────────────
for idx in tqdm(bad_rows.index):
    row = df.loc[idx]
    url = row["url"]

    print(f"[PROCESSING] Row {idx} — {row['filename']}")
    desc = generate_description_from_url(url)
    df.at[idx, "description"] = desc

    # Respect rate limits
    time.sleep(4.1)

    if idx % 10 == 0:
        df.to_csv(CHECKPOINT_CSV, index=False)
        print(f"[💾] Saved checkpoint at {idx}")

# Final save
df.to_csv(CHECKPOINT_CSV, index=False)
print("\n✅ Done regenerating long sleeve top descriptions.")


🔁 Regenerating 100 'long sleeve top' descriptions...



  0%|          | 0/100 [00:00<?, ?it/s]

[PROCESSING] Row 101 — long_sleeve_top_139.jpg


  1%|          | 1/100 [00:06<10:50,  6.57s/it]

[PROCESSING] Row 102 — long_sleeve_top_3028.jpg


  2%|▏         | 2/100 [00:13<11:14,  6.88s/it]

[PROCESSING] Row 103 — long_sleeve_top_1161.jpg


  3%|▎         | 3/100 [00:20<11:02,  6.83s/it]

[PROCESSING] Row 104 — long_sleeve_top_1023.jpg


  4%|▍         | 4/100 [00:28<11:37,  7.27s/it]

[PROCESSING] Row 105 — long_sleeve_top_332.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/7baa2d94f90a25589adbb9518950a705_images.jpg


  5%|▌         | 5/100 [00:34<10:56,  6.91s/it]

[PROCESSING] Row 106 — long_sleeve_top_1037.jpg


  6%|▌         | 6/100 [00:45<13:03,  8.34s/it]

[PROCESSING] Row 107 — long_sleeve_top_879.jpg


  7%|▋         | 7/100 [00:53<12:41,  8.18s/it]

[PROCESSING] Row 108 — long_sleeve_top_2131.jpg


  8%|▊         | 8/100 [01:01<12:10,  7.94s/it]

[PROCESSING] Row 109 — long_sleeve_top_1089.jpg


  9%|▉         | 9/100 [01:08<11:50,  7.81s/it]

[PROCESSING] Row 110 — long_sleeve_top_2656.jpg


 10%|█         | 10/100 [01:16<11:46,  7.85s/it]

[💾] Saved checkpoint at 110
[PROCESSING] Row 111 — long_sleeve_top_2820.jpg


 11%|█         | 11/100 [01:24<11:31,  7.77s/it]

[PROCESSING] Row 112 — long_sleeve_top_1744.jpg


 12%|█▏        | 12/100 [01:34<12:28,  8.51s/it]

[PROCESSING] Row 113 — long_sleeve_top_423.jpg


 13%|█▎        | 13/100 [01:42<11:59,  8.27s/it]

[PROCESSING] Row 114 — long_sleeve_top_2826.jpg


 14%|█▍        | 14/100 [01:49<11:41,  8.16s/it]

[PROCESSING] Row 115 — long_sleeve_top_30.jpg


 15%|█▌        | 15/100 [01:57<11:26,  8.08s/it]

[PROCESSING] Row 116 — long_sleeve_top_1178.jpg


 16%|█▌        | 16/100 [02:05<11:20,  8.10s/it]

[PROCESSING] Row 117 — long_sleeve_top_990.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/dae3528325ec7711ccbfc4493bdddb60_images.jpg


 17%|█▋        | 17/100 [02:12<10:25,  7.54s/it]

[PROCESSING] Row 118 — long_sleeve_top_1190.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/685d13a0d2b1f3f496c1b8ed87665760_images.jpg


 18%|█▊        | 18/100 [02:18<09:46,  7.16s/it]

[PROCESSING] Row 119 — long_sleeve_top_812.jpg


 19%|█▉        | 19/100 [02:25<09:39,  7.16s/it]

[PROCESSING] Row 120 — long_sleeve_top_2587.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/9b999459edff1b9dceb848ff0596d08c_images.jpg


 20%|██        | 20/100 [02:32<09:14,  6.93s/it]

[💾] Saved checkpoint at 120
[PROCESSING] Row 121 — long_sleeve_top_2271.jpg


 21%|██        | 21/100 [02:40<09:51,  7.49s/it]

[PROCESSING] Row 122 — long_sleeve_top_2606.jpg


 22%|██▏       | 22/100 [02:48<09:53,  7.61s/it]

[PROCESSING] Row 123 — long_sleeve_top_2602.jpg


 23%|██▎       | 23/100 [02:57<10:06,  7.88s/it]

[PROCESSING] Row 124 — long_sleeve_top_256.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/4aee26ace1ca3775fef111edf24e4dfe_images.jpg


 24%|██▍       | 24/100 [03:03<09:24,  7.43s/it]

[PROCESSING] Row 125 — long_sleeve_top_1048.jpg


 25%|██▌       | 25/100 [03:12<09:41,  7.76s/it]

[PROCESSING] Row 126 — long_sleeve_top_789.jpg


 26%|██▌       | 26/100 [03:19<09:17,  7.54s/it]

[PROCESSING] Row 127 — long_sleeve_top_2658.jpg


 27%|██▋       | 27/100 [03:27<09:19,  7.66s/it]

[PROCESSING] Row 128 — long_sleeve_top_1437.jpg


 28%|██▊       | 28/100 [03:34<09:10,  7.65s/it]

[PROCESSING] Row 129 — long_sleeve_top_2981.jpg


 29%|██▉       | 29/100 [03:42<09:08,  7.72s/it]

[PROCESSING] Row 130 — long_sleeve_top_952.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/caf911fc12e18ac0e9261ca61b10b0a1_images.jpg


 30%|███       | 30/100 [03:48<08:31,  7.30s/it]

[💾] Saved checkpoint at 130
[PROCESSING] Row 131 — long_sleeve_top_1025.jpg


 31%|███       | 31/100 [03:56<08:23,  7.30s/it]

[PROCESSING] Row 132 — long_sleeve_top_554.jpg


 32%|███▏      | 32/100 [04:03<08:18,  7.34s/it]

[PROCESSING] Row 133 — long_sleeve_top_2843.jpg


 33%|███▎      | 33/100 [04:11<08:24,  7.53s/it]

[PROCESSING] Row 134 — long_sleeve_top_134.jpg


 34%|███▍      | 34/100 [04:18<08:03,  7.33s/it]

[PROCESSING] Row 135 — long_sleeve_top_2593.jpg


 35%|███▌      | 35/100 [04:24<07:40,  7.08s/it]

[PROCESSING] Row 136 — long_sleeve_top_102.jpg


 36%|███▌      | 36/100 [04:33<07:55,  7.43s/it]

[PROCESSING] Row 137 — long_sleeve_top_670.jpg


 37%|███▋      | 37/100 [04:41<08:02,  7.66s/it]

[PROCESSING] Row 138 — long_sleeve_top_1090.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/08e90a9a35d85a9a92845e95bfc64135_images.jpg


 38%|███▊      | 38/100 [04:47<07:28,  7.24s/it]

[PROCESSING] Row 139 — long_sleeve_top_2953.jpg


 39%|███▉      | 39/100 [04:55<07:37,  7.50s/it]

[PROCESSING] Row 140 — long_sleeve_top_2541.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/2656e89480744d296a68a890ad412cec_images.jpg


 40%|████      | 40/100 [05:02<07:10,  7.17s/it]

[💾] Saved checkpoint at 140
[PROCESSING] Row 141 — long_sleeve_top_17.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/11dcaeada4fab2e575c449e922425c5d_images.jpg


 41%|████      | 41/100 [05:08<06:47,  6.90s/it]

[PROCESSING] Row 142 — long_sleeve_top_93.jpg


 42%|████▏     | 42/100 [05:15<06:45,  7.00s/it]

[PROCESSING] Row 143 — long_sleeve_top_2822.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/dd238625b03163c1e014cb8359aaee74_images.jpg


 43%|████▎     | 43/100 [05:21<06:26,  6.78s/it]

[PROCESSING] Row 144 — long_sleeve_top_903.jpg


 44%|████▍     | 44/100 [05:29<06:40,  7.15s/it]

[PROCESSING] Row 145 — long_sleeve_top_3052.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/5d0b6acffe43e1e02e36168193ae5507_images.jpg


 45%|████▌     | 45/100 [05:36<06:19,  6.90s/it]

[PROCESSING] Row 146 — long_sleeve_top_840.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/11e3b99a45aa0a0f5dd1926162d9666c_images.jpg


 46%|████▌     | 46/100 [05:42<06:02,  6.71s/it]

[PROCESSING] Row 147 — long_sleeve_top_1034.jpg


 47%|████▋     | 47/100 [05:50<06:16,  7.10s/it]

[PROCESSING] Row 148 — long_sleeve_top_2016.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/5a05a397743d810ce8ebb8971d56fbae_images.jpg


 48%|████▊     | 48/100 [05:56<05:57,  6.87s/it]

[PROCESSING] Row 149 — long_sleeve_top_229.jpg


 49%|████▉     | 49/100 [06:04<06:04,  7.15s/it]

[PROCESSING] Row 150 — long_sleeve_top_1878.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/John-Players-Men-Blue-Shirt_0df60ef654cc788b5e4d80cdf4ce422a_images.jpg


 50%|█████     | 50/100 [06:11<05:45,  6.90s/it]

[💾] Saved checkpoint at 150
[PROCESSING] Row 151 — long_sleeve_top_485.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/b422c9a997f44f9b46537a1de6b68d44_images.jpg


 51%|█████     | 51/100 [06:17<05:28,  6.71s/it]

[PROCESSING] Row 152 — long_sleeve_top_1714.jpg


 52%|█████▏    | 52/100 [06:27<06:07,  7.65s/it]

[PROCESSING] Row 153 — long_sleeve_top_1474.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/1a05357dcda1023f90818acd8c9d9507_images.jpg


 53%|█████▎    | 53/100 [06:33<05:40,  7.24s/it]

[PROCESSING] Row 154 — long_sleeve_top_2910.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/Scullers-For-Her-Women-White-Striped-Shirt_01b4f7d8ac67c4b186651f6a4c7d010a_images.jpg


 54%|█████▍    | 54/100 [06:39<05:19,  6.94s/it]

[PROCESSING] Row 155 — long_sleeve_top_32.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 47
}
]


 55%|█████▌    | 55/100 [06:46<05:10,  6.91s/it]

[PROCESSING] Row 156 — long_sleeve_top_1179.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 41
}
]


 56%|█████▌    | 56/100 [06:52<04:52,  6.65s/it]

[PROCESSING] Row 157 — long_sleeve_top_2353.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 35
}
]


 57%|█████▋    | 57/100 [06:58<04:34,  6.37s/it]

[PROCESSING] Row 158 — long_sleeve_top_572.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 30
}
]


 58%|█████▊    | 58/100 [07:03<04:17,  6.13s/it]

[PROCESSING] Row 159 — long_sleeve_top_680.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/ad402e6188d3c2cf138844300719afb1_images.jpg


 59%|█████▉    | 59/100 [07:10<04:12,  6.15s/it]

[PROCESSING] Row 160 — long_sleeve_top_240.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 18
}
]


 60%|██████    | 60/100 [07:16<04:04,  6.11s/it]

[💾] Saved checkpoint at 160
[PROCESSING] Row 161 — long_sleeve_top_3021.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/013a1ca6df7958d34677905b5ab8d57d_images.jpg


 61%|██████    | 61/100 [07:22<04:00,  6.16s/it]

[PROCESSING] Row 162 — long_sleeve_top_3057.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 4
}
]


 62%|██████▏   | 62/100 [07:30<04:12,  6.65s/it]

[PROCESSING] Row 163 — long_sleeve_top_2637.jpg
[❌] Image download failed: 503 Server Error: Service Unavailable for url: http://assets.myntassets.com/v1/images/style/properties/fd2615c4de55a66b70bd299f441fe9b4_images.jpg


 63%|██████▎   | 63/100 [07:36<04:01,  6.53s/it]

[PROCESSING] Row 164 — long_sleeve_top_1042.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 52
}
]


 64%|██████▍   | 64/100 [07:42<03:47,  6.32s/it]

[PROCESSING] Row 165 — long_sleeve_top_1538.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 46
}
]


 65%|██████▌   | 65/100 [07:48<03:37,  6.21s/it]

[PROCESSING] Row 166 — long_sleeve_top_1861.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 40
}
]


 66%|██████▌   | 66/100 [07:53<03:21,  5.93s/it]

[PROCESSING] Row 167 — long_sleeve_top_2460.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 34
}
]


 67%|██████▋   | 67/100 [07:59<03:14,  5.90s/it]

[PROCESSING] Row 168 — long_sleeve_top_3123.jpg
[⚠️] Gemini error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 28
}
]


 67%|██████▋   | 67/100 [08:03<03:58,  7.22s/it]


KeyboardInterrupt: 

### checkpoint to ensure the descriptions of images

In [128]:
import pandas as pd

# Load checkpointed descriptions
df = pd.read_csv("with_descriptions_checkpoint.csv")

# Normalize and clean
df["description"] = df["description"].fillna("").astype(str).str.strip().str.lower()

# Define what counts as invalid
failures = ["image download failed", "failed to generate description", "gemini failed", "nan", ""]
df["valid_description"] = ~df["description"].isin(failures)

# Cleaned summary
summary = df.groupby("garment").agg(
    total_samples=("description", "count"),
    valid_descriptions=("valid_description", "sum")
).sort_values("garment")

print("📊 Garment Description Summary:\n")
print(summary)

# Preview 3 samples per garment
print("\n🧾 Sample Descriptions by Garment:\n")
for garment, group in df.groupby("garment"):
    valid_rows = group[group["valid_description"]]
    print(f"📎 {garment} — {len(valid_rows)} / {len(group)} valid")
    if not valid_rows.empty:
        print(valid_rows["description"].sample(min(3, len(valid_rows))).to_string(index=False))
    else:
        print("❌ No valid descriptions")
    print("-" * 60)


📊 Garment Description Summary:

                    total_samples  valid_descriptions
garment                                              
long sleeve dress             100                  52
long sleeve top               101                  38
short sleeve dress            100                  50
short sleeve top              100                  25
shorts                        100                  30
skirt                         100                  33
trousers                      100                  30
vest                          100                  30
vest dress                    100                  30

🧾 Sample Descriptions by Garment:

📎 long sleeve dress — 52 / 100 valid
woman wearing a light purple maxi dress with a ...
woman wearing a black midi dress with a keyhole...
woman wearing a navy blue pinstripe mini wrap d...
------------------------------------------------------------
📎 long sleeve top — 38 / 101 valid
a red and black plaid long-sleeved shirt with a...
a

In [49]:
import os
import time
import pandas as pd
import requests
import google.generativeai as genai
from tqdm import tqdm

# ─── Gemini Setup ─────────────────────────────────────────────────
genai.configure(api_key="AIzaSyC-ENPKd8GgSAo4nmfbExg4oG8gsc5aeRs")
MODEL_NAME = "gemini-1.5-flash"
CHECKPOINT_CSV = "with_descriptions_checkpoint.csv"

SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, fabric cues, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
""".strip()

def generate_description(url):
    try:
        resp = requests.get(url, timeout=10, verify=False)  # if SSL fails
        resp.raise_for_status()
        image_bytes = resp.content
    except Exception as e:
        print(f"[❌] Download failed: {e}")
        return "Image download failed"

    try:
        model = genai.GenerativeModel(MODEL_NAME)
        result = model.generate_content([
            SYSTEM_PROMPT,
            {"mime_type": "image/jpeg", "data": image_bytes},
            "Describe only the garment."
        ])
        return result.text.strip()
    except Exception as e:
        print(f"[⚠️] Gemini error: {e}")
        return "Gemini failed"

# ─── Load CSV & Filter short sleeve dress ────────────────────────
df = pd.read_csv(CHECKPOINT_CSV)
failure_values = ["", "image download failed", "failed to generate description", "gemini failed", "nan"]
df["description"] = df["description"].fillna("").astype(str).str.strip().str.lower()

# Find bad/missing rows for this garment
bad_rows = df[
    (df["garment"] == "short sleeve dress") &
    (df["description"].isin(failure_values))
].reset_index()

# Limit to 50 rows (quota-safe)
batch = bad_rows.head(50)
print(f"🔁 Generating {len(batch)} descriptions for 'short sleeve dress'")

# ─── Run Generation ───────────────────────────────────────────────
for _, row in tqdm(batch.iterrows(), total=len(batch), desc="Short Sleeve Dress"):
    idx = row["index"]
    url = row["url"]
    fname = row["filename"]

    print(f"\n[PROCESSING] Row {idx} — {fname}")
    desc = generate_description(url)
    df.at[idx, "description"] = desc

    time.sleep(4.1)  # safe for free tier

    if idx % 10 == 0:
        df.to_csv(CHECKPOINT_CSV, index=False)
        print(f"[💾] Checkpoint saved at row {idx}")

# Final save
df.to_csv(CHECKPOINT_CSV, index=False)
print("\n✅ Done with today’s batch for 'short sleeve dress'.")


🔁 Generating 50 descriptions for 'short sleeve dress'


Short Sleeve Dress:   0%|          | 0/50 [00:00<?, ?it/s]


[PROCESSING] Row 200 — short_sleeve_dress_544.jpg


Short Sleeve Dress:   2%|▏         | 1/50 [00:07<06:01,  7.39s/it]

[💾] Checkpoint saved at row 200

[PROCESSING] Row 201 — short_sleeve_dress_2498.jpg





[PROCESSING] Row 202 — short_sleeve_dress_554.jpg





[PROCESSING] Row 203 — short_sleeve_dress_422.jpg





[PROCESSING] Row 204 — short_sleeve_dress_798.jpg





[PROCESSING] Row 205 — short_sleeve_dress_194.jpg


Short Sleeve Dress:  12%|█▏        | 6/50 [00:40<04:48,  6.56s/it]


[PROCESSING] Row 206 — short_sleeve_dress_296.jpg





[PROCESSING] Row 207 — short_sleeve_dress_772.jpg





[PROCESSING] Row 208 — short_sleeve_dress_1855.jpg





[PROCESSING] Row 209 — short_sleeve_dress_173.jpg





[PROCESSING] Row 210 — short_sleeve_dress_1228.jpg


Short Sleeve Dress:  22%|██▏       | 11/50 [01:11<04:09,  6.39s/it]

[💾] Checkpoint saved at row 210

[PROCESSING] Row 211 — short_sleeve_dress_2520.jpg





[PROCESSING] Row 212 — short_sleeve_dress_1700.jpg





[PROCESSING] Row 213 — short_sleeve_dress_2585.jpg





[PROCESSING] Row 214 — short_sleeve_dress_1344.jpg





[PROCESSING] Row 215 — short_sleeve_dress_44.jpg





[PROCESSING] Row 216 — short_sleeve_dress_298.jpg





[PROCESSING] Row 217 — short_sleeve_dress_1748.jpg





[PROCESSING] Row 218 — short_sleeve_dress_1508.jpg





[PROCESSING] Row 219 — short_sleeve_dress_1033.jpg





[PROCESSING] Row 220 — short_sleeve_dress_254.jpg


Short Sleeve Dress:  42%|████▏     | 21/50 [02:16<03:08,  6.51s/it]

[💾] Checkpoint saved at row 220

[PROCESSING] Row 221 — short_sleeve_dress_1195.jpg





[PROCESSING] Row 222 — short_sleeve_dress_2530.jpg





[PROCESSING] Row 223 — short_sleeve_dress_2436.jpg





[PROCESSING] Row 224 — short_sleeve_dress_1237.jpg





[PROCESSING] Row 225 — short_sleeve_dress_478.jpg





[PROCESSING] Row 226 — short_sleeve_dress_2131.jpg





[PROCESSING] Row 227 — short_sleeve_dress_532.jpg





[PROCESSING] Row 228 — short_sleeve_dress_651.jpg





[PROCESSING] Row 229 — short_sleeve_dress_2547.jpg





[PROCESSING] Row 230 — short_sleeve_dress_1560.jpg


Short Sleeve Dress:  62%|██████▏   | 31/50 [03:21<02:01,  6.40s/it]

[💾] Checkpoint saved at row 230

[PROCESSING] Row 231 — short_sleeve_dress_1547.jpg





[PROCESSING] Row 232 — short_sleeve_dress_175.jpg





[PROCESSING] Row 233 — short_sleeve_dress_1307.jpg





[PROCESSING] Row 234 — short_sleeve_dress_73.jpg





[PROCESSING] Row 235 — short_sleeve_dress_170.jpg





[PROCESSING] Row 236 — short_sleeve_dress_522.jpg





[PROCESSING] Row 237 — short_sleeve_dress_1717.jpg





[PROCESSING] Row 238 — short_sleeve_dress_756.jpg





[PROCESSING] Row 239 — short_sleeve_dress_368.jpg





[PROCESSING] Row 240 — short_sleeve_dress_1600.jpg


Short Sleeve Dress:  82%|████████▏ | 41/50 [04:30<01:00,  6.72s/it]

[💾] Checkpoint saved at row 240

[PROCESSING] Row 241 — short_sleeve_dress_1004.jpg





[PROCESSING] Row 242 — short_sleeve_dress_1501.jpg


Short Sleeve Dress:  86%|████████▌ | 43/50 [04:43<00:45,  6.54s/it]


[PROCESSING] Row 243 — short_sleeve_dress_2412.jpg





[PROCESSING] Row 244 — short_sleeve_dress_1933.jpg





[PROCESSING] Row 245 — short_sleeve_dress_861.jpg





[PROCESSING] Row 246 — short_sleeve_dress_56.jpg





[PROCESSING] Row 247 — short_sleeve_dress_1847.jpg





[PROCESSING] Row 248 — short_sleeve_dress_211.jpg





[PROCESSING] Row 249 — short_sleeve_dress_518.jpg


Short Sleeve Dress: 100%|██████████| 50/50 [05:29<00:00,  6.58s/it]


✅ Done with today’s batch for 'short sleeve dress'.





In [56]:

from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

uri = "mongodb+srv://shivani25shri10:bn7Reynw8ymF2ytC@cluster0.c2asd9y.mongodb.net/"

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [61]:
pip install sentence-transformers


Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.7.1-cp313-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.7.0-cp313-cp313-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting scipy (from sentence-transformers)
  Downloading scipy-1.16.0-cp313-cp313-macosx_14_0_arm64.whl.metadata (61 kB)
Collecting setuptools (from torch>=1.11.0->sentence-transformers)
  Using cached setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Collecting sympy>=1.13.3 (from torch>=1.11.0->sentence-transformers)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch>=1.11.0->sentence-transformers)
  Downloading networkx-3.5-py3-none

In [71]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


In [129]:
from transformers import AutoTokenizer, AutoModel

model_path = "/Users/shivanishrivastava/Desktop/Complete_the_Look/e5-small-v2"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path, use_safetensors=False)

print("✅ Model loaded successfully!")


✅ Model loaded successfully!


### creating vector embeddings and loading it to Mongo DB

In [130]:
import torch
import pandas as pd
from pymongo import MongoClient
from tqdm import tqdm

# ─── Helper: Mean Pooling ──────────────────────────────────────────
def get_embedding(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state
        mask = inputs['attention_mask'].unsqueeze(-1).expand(embeddings.size())
        summed = torch.sum(embeddings * mask, 1)
        counts = torch.clamp(mask.sum(1), min=1e-9)
        return (summed / counts)[0].tolist()

# ─── Load Data ─────────────────────────────────────────────────────
df = pd.read_csv("with_descriptions_checkpoint.csv")
df["description"] = df["description"].fillna("").astype(str).str.strip().str.lower()
failures = ["", "image download failed", "gemini failed", "failed to generate description", "nan"]
df_valid = df[~df["description"].isin(failures)].copy().reset_index()

print(f"🧠 Embedding {len(df_valid)} valid descriptions...")

# ─── Connect to MongoDB ────────────────────────────────────────────
mongo = MongoClient("mongodb+srv://shivani25shri10:bn7Reynw8ymF2ytC@cluster0.c2asd9y.mongodb.net/")
collection = mongo["fashionista_"]["catalog_data"]
collection.drop()
print("🧹 Dropped existing 'catalog_data' collection.")

# ─── Embed + Insert ────────────────────────────────────────────────
for _, row in tqdm(df_valid.iterrows(), total=len(df_valid), desc="Embedding & Inserting"):
    try:
        vec = get_embedding(row["description"])
        doc = {
            "product_id": f"item_{row['index']}",
            "text": row["description"],
            "image_url": row["url"],
            "embedding": vec
        }
        collection.insert_one(doc)
    except Exception as e:
        print(f"❌ Failed to embed item_{row['index']}: {e}")

print("✅ All embeddings inserted into MongoDB!")


🧠 Embedding 318 valid descriptions...
🧹 Dropped existing 'catalog_data' collection.


Embedding & Inserting: 100%|██████████| 318/318 [00:25<00:00, 12.30it/s]

✅ All embeddings inserted into MongoDB!





In [78]:
pip install transformers timm torchvision


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting timm
  Downloading timm-1.0.16-py3-none-any.whl.metadata (57 kB)
Collecting torchvision
  Downloading torchvision-0.22.1-cp313-cp313-macosx_11_0_arm64.whl.metadata (6.1 kB)
Downloading timm-1.0.16-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading torchvision-0.22.1-cp313-cp313-macosx_11_0_arm64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torchvision, timm
Successfully installed timm-1.0.16 torchvision-0.22.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated pa

In [86]:
!pip install llava


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting llava
  Downloading llava-0.0.1.dev0-py3-none-any.whl.metadata (360 bytes)
Downloading llava-0.0.1.dev0-py3-none-any.whl (1.1 kB)
Installing collected packages: llava
Successfully installed llava-0.0.1.dev0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [144]:
# ─── 0. Imports ─────────────────────────────────────────────────────────
import os, base64
import numpy as np
import torch
from pymongo import MongoClient
from google import genai
from google.genai import types
from transformers import AutoTokenizer, AutoModel
from IPython.display import display, HTML
from PIL import Image
import requests

# ─── 1. CONFIG: Gemini + MongoDB ───────────────────────────────────────
GEMINI_API_KEY = "AIzaSyBO_6Afs_Ub45w6qoWDCjYDxVFA-siZR8k"  # ← your key
client = genai.Client(api_key=GEMINI_API_KEY)
GEN_MODEL = "gemini-2.5-flash-preview-04-17"

mongo = MongoClient(
    "mongodb+srv://shivani25shri10:bn7Reynw8ymF2ytC@cluster0.c2asd9y.mongodb.net/"
)
col   = mongo["fashionista_"]["catalog_data"]

# ─── 2. LOAD LOCAL e5-SMALL-V2 EMBEDDING MODEL (dim 384) ────────────────
tokenizer_e5 = AutoTokenizer.from_pretrained("./e5-small-v2")
model_e5     = AutoModel.from_pretrained("./e5-small-v2").eval()

def embed_local(text: str) -> np.ndarray:
    inp = tokenizer_e5(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        out = model_e5(**inp).last_hidden_state
        mask = inp["attention_mask"].unsqueeze(-1).expand(out.size())
        summed = (out * mask).sum(1)
        counts = mask.sum(1).clamp(min=1e-9)
        vec = (summed / counts)[0]
    return vec.cpu().numpy()

# ─── 3. HELPERS: load image, retrieve nearest neighbors ─────────────────
def load_uri(path: str) -> str:
    raw = open(path, "rb").read()
    return "data:image/jpeg;base64," + base64.b64encode(raw).decode()

def retrieve(emb: np.ndarray, k: int = 5):
    qn = emb / np.linalg.norm(emb)
    scored = []
    for doc in col.find({}, {"text":1, "image_url":1, "embedding":1}):
        e = np.array(doc["embedding"], dtype=np.float32)
        en = e / np.linalg.norm(e)
        scored.append((float(np.dot(qn, en)), doc))
    scored.sort(key=lambda x: x[0], reverse=True)
    return [d for _, d in scored[:k]]

# ─── 4. COMPLETE-THE-LOOK + RETRIEVE + RENDER ──────────────────────────
def complete_and_show(image_path: str):
    # 1) Placeholder caption (you could swap in a real captioner)
    uri = load_uri(image_path)
    caption_text = "Uploaded image"
    
    # 2) Retrieve top-5 by local embedding
    vec   = embed_local(caption_text)
    items = retrieve(vec, k=5)
    
    # 3) Build Gemini prompt
    sys_txt = (
        "You are a world-class fashion stylist.\n"
        "Here are 5 catalog items that pair with this garment:\n"
        + "\n".join(f"- {it['text']} ({it['image_url']})" for it in items)
        + "\n\nBased on these, describe in 2–3 sentences what accessories and garments would complete the look."
    )
    usr_txt = f"Image: {uri}"
    
    sys_part = types.Part.from_text(text=sys_txt)
    usr_part = types.Part.from_text(text=usr_txt)
    sys_c    = types.Content(role="model", parts=[sys_part])
    usr_c    = types.Content(role="user",  parts=[usr_part])
    cfg      = types.GenerateContentConfig(response_mime_type="text/plain")
    
    stream = client.models.generate_content_stream(
        model=GEN_MODEL,
        contents=[sys_c, usr_c],
        config=cfg
    )
    advice = "".join(chunk.text for chunk in stream).strip()
    
    # 4) Display advice + image gallery
    print("💡 Styling Advice:\n", advice, "\n")
    
    gallery_html = "<div style='display:flex; gap:16px; flex-wrap: wrap;'>"
    for it in items:
        url  = it["image_url"]
        txt  = it["text"]
        gallery_html += f"""
          <div style='width:180px; text-align:center;'>
            <img src="{url}" width="180" style="border:1px solid #ddd;padding:4px;" /><br/>
            <small>{txt}</small>
          </div>
        """
    gallery_html += "</div>"
    display(HTML(gallery_html))

# ─── 5. RUN IT ────────────────────────────────────────────────────────
complete_and_show("images/trousers_734.jpg")


💡 Styling Advice:
 Pair this classic white tee with the charcoal 3/4 length athletic pants for a comfortable and casual base. Finish the look with some clean white sneakers and a lightweight sporty jacket or a simple watch to embrace an easy athleisure vibe. 




1) given an input image generate a description of what would look good to complete the look. Category : topwear, .....
    Add example in prompt that if bottom wear then suggest topwear and vice versa

2) convert the description generated into a embedding of the same dimension as that of the DB

3) Perform a similarity search then (cosine similarity) 

In [146]:

!pip install streamlit google-genai transformers torch pymongo pillow requests

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Main code to test the recommendations

In [145]:


import os
import time
import base64
import numpy as np
import torch
import requests
from PIL import Image
from IPython.display import display, HTML
from pymongo import MongoClient
from google import genai
from google.genai import types
from transformers import AutoTokenizer, AutoModel, CLIPProcessor, CLIPModel

# ─── 1) CONFIGURE CLIENTS ────────────────────────────────────────────────────
GEMINI_API_KEY = "AIzaSyBO_6Afs_Ub45w6qoWDCjYDxVFA-siZR8k"
client         = genai.Client(api_key=GEMINI_API_KEY)
GEN_MODEL      = "gemini-2.5-flash-preview-04-17"

mongo       = MongoClient("mongodb+srv://shivani25shri10:bn7Reynw8ymF2ytC@cluster0.c2asd9y.mongodb.net/")
collection  = mongo["fashionista_"]["catalog_data"]

# Local e5 embedder (dim=384)
TOKENIZER_PATH = "./e5-small-v2"
tokenizer_e5   = AutoTokenizer.from_pretrained(TOKENIZER_PATH, local_files_only=True)
model_e5       = AutoModel.from_pretrained(TOKENIZER_PATH, local_files_only=True).eval()

# CLIP for gender detection
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model     = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").eval()


# ─── 2) GENDER DETECTION ─────────────────────────────────────────────────────
def detect_gender(image_path: str) -> str:
    labels = ["men's clothing", "women's clothing", "unisex clothing"]
    img = Image.open(image_path).convert("RGB")
    inputs = clip_processor(text=labels, images=img, return_tensors="pt", padding=True)
    with torch.no_grad():
        probs = clip_model(**inputs).logits_per_image.softmax(dim=1)[0]
    return labels[int(probs.argmax())]


# ─── 3) CATEGORY INFERENCE ───────────────────────────────────────────────────
def infer_category_from_filename(path: str) -> str:
    fn = os.path.basename(path).lower()
    bottoms = ("pant", "trouser", "short", "skirt", "dress")
    return "bottom wear" if any(tok in fn for tok in bottoms) else "top wear"


# ─── 4) HELPERS ───────────────────────────────────────────────────────────────
def load_uri(path: str) -> str:
    raw = open(path, "rb").read()
    return "data:image/jpeg;base64," + base64.b64encode(raw).decode()

def embed_local(text: str) -> np.ndarray:
    inp = tokenizer_e5(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        hs   = model_e5(**inp).last_hidden_state
        mask = inp["attention_mask"].unsqueeze(-1).expand(hs.size())
        summed = (hs * mask).sum(1)
        counts = mask.sum(1).clamp(min=1e-9)
        vec    = (summed / counts)[0]
    return vec.cpu().numpy()

def retrieve_similar(emb: np.ndarray, gender: str, k: int = 5):
    """Try gender‐filtered first, then fall back to all items."""
    qn = emb / np.linalg.norm(emb)
    scored = []

    # 1) gender‐filtered
    docs = list(collection.find({"gender": gender}, {"text":1, "image_url":1, "embedding":1}))
    # 2) fallback if empty
    if not docs:
        docs = list(collection.find({}, {"text":1, "image_url":1, "embedding":1}))

    for doc in docs:
        e  = np.array(doc["embedding"], dtype=np.float32)
        en = e / np.linalg.norm(e)
        scored.append((float(np.dot(qn, en)), doc))

    scored.sort(key=lambda x: x[0], reverse=True)
    return [doc for _, doc in scored[:k]]


# ─── 5) UPDATED GEMINI PROMPT ─────────────────────────────────────────────────
SYSTEM_PROMPT = """
You are a world-class fashion stylist. You will always choose complementary pieces from the *other* category first:

- If Category is “bottom wear” (jeans, skirt, trousers, shorts), you MUST suggest top wear (blouses, tees, jackets) first, then shoes and accessories.
- If Category is “top wear” (blouse, jacket, sweater), you MUST suggest bottom wear (pants, skirts, shorts) first, then shoes and accessories.

Answer in 1–2 sentences.

Examples:
• Garment: "denim jeans", Category: "bottom wear" → "Pair with a crisp white blouse tucked in, topped with a tan trench coat and white sneakers."
• Garment: "leather jacket", Category: "top wear"   → "Wear over a black midi skirt, add ankle boots and a statement belt."
""".strip()

def generate_stylist_description(image_path: str, category: str, gender: str) -> str:
    uri = load_uri(image_path)
    user_prompt = (
        f"Here is the image: {uri}\n"
        f"Category: {category}\n"
        f"Gender: {gender}\n\n"
        "Please describe in 1–2 sentences what items and accessories would complete the look."
    )
    sys_part = types.Part.from_text(text=SYSTEM_PROMPT)
    usr_part = types.Part.from_text(text=user_prompt)
    sys_c    = types.Content(role="model", parts=[sys_part])
    usr_c    = types.Content(role="user",  parts=[usr_part])
    cfg      = types.GenerateContentConfig(response_mime_type="text/plain")

    stream = client.models.generate_content_stream(
        model=GEN_MODEL,
        contents=[sys_c, usr_c],
        config=cfg
    )
    return "".join(chunk.text for chunk in stream).strip()


# ─── 6) MAIN PIPELINE (inline, auto‐category) ───────────────────────────────
def complete_and_display_inline(image_path: str, category: str = None):
    # Auto‐infer if needed
    inferred = infer_category_from_filename(image_path)
    if category is None or category != inferred:
        category = inferred
        print(f"ℹ️ Inferred category as: {category}")

    # 1) Gender detection
    gender = detect_gender(image_path)
    print(f"👤 Detected gender target: {gender}\n")

    # 2) Gemini styling advice
    desc = generate_stylist_description(image_path, category, gender)
    print("💡 Gemini says:\n", desc, "\n")

    # 3) Embed + retrieve
    vec   = embed_local(desc)
    items = retrieve_similar(vec, gender, k=5)

    # 4) Inline flex-row gallery
    html = "<div style='display:flex; gap:12px; align-items:flex-start;'>"
    for doc in items:
        html += f"""
        <div style='flex:0 0 auto; text-align:center;'>
          <img src="{doc['image_url']}" width="120" style="border:1px solid #ccc; padding:4px;" /><br/>
          <small style="display:block; width:120px; overflow:hidden; text-overflow:ellipsis;">
            {doc['text']}
          </small>
        </div>
        """
    html += "</div>"
    display(HTML(html))
    return items


# ─── 7) RUN EXAMPLE ──────────────────────────────────────────────────────────
items = complete_and_display_inline(
    "/Users/shivanishrivastava/Desktop/Complete_the_Look/images/short_sleeve_top_1702.jpg"
)



ℹ️ Inferred category as: bottom wear
👤 Detected gender target: men's clothing

💡 Gemini says:
 Pair these navy shorts with a light-coloured tee or polo shirt. Complete the look with white sneakers or boat shoes and a casual belt. 



### Added description of rest of the garment types

In [None]:
# ─── INSTALL / IMPORTS ───────────────────────────────────────────────────────
# (run once)
# !pip install google-genai pandas pillow

import os
import time
import base64
import pandas as pd
from google import genai
from google.genai import types

# ─── 1) CONFIG ───────────────────────────────────────────────────────────────
GEMINI_API_KEY = "AIzaSyBO_6Afs_Ub45w6qoWDCjYDxVFA-siZR8k"
client         = genai.Client(api_key=GEMINI_API_KEY)
MODEL_NAME     = "gemini-2.5-flash-preview-04-17"

SYSTEM_PROMPT = """
You are a fashion-focused assistant. Given an image, generate a concise description 
of the garment only.
- Do NOT describe any human’s face, pose, or background.
- If a person is wearing the garment, only note “man,” “woman,” or “person wearing…,”
  then describe color, style, fit, etc.
- Example: “A black leather biker jacket with silver zippers, worn by a man.”
""".strip()

# ─── 2) HELPERS ───────────────────────────────────────────────────────────────
def load_image_as_data_uri(path: str) -> str:
    raw = open(path, "rb").read()
    return "data:image/jpeg;base64," + base64.b64encode(raw).decode()

def generate_apparel_description(uri: str, retries: int = 3) -> str:
    sys_c = types.Content(role="model",
                          parts=[types.Part.from_text(text=SYSTEM_PROMPT)])
    usr_c = types.Content(role="user",
                          parts=[types.Part.from_text(text=f"Here is the image: {uri}\nDescribe only the garment.")])
    cfg   = types.GenerateContentConfig(response_mime_type="text/plain")
    for attempt in range(retries):
        try:
            stream = client.models.generate_content_stream(
                model=MODEL_NAME,
                contents=[sys_c, usr_c],
                config=cfg
            )
            return "".join(chunk.text for chunk in stream).strip()
        except Exception:
            time.sleep(2 ** attempt)
    return ""

# ─── 3) PARAMETERS ───────────────────────────────────────────────────────────
CSV_PATH   = "with_descriptions_checkpoint.csv"
IMAGES_DIR = "/Users/shivanishrivastava/Desktop/Complete_the_Look/images"
BATCH_SIZE = 15

# Only process these remaining categories—short sleeve top is excluded
TO_FILL = [
    "shorts",
    "skirt",
    "trousers",
    "vest",
    "vest dress",
]

# ─── 4) BATCH LOOP ───────────────────────────────────────────────────────────
df = pd.read_csv(CSV_PATH)
if "description" not in df.columns:
    df["description"] = ""

processed = 0

for garment in TO_FILL:
    mask = (
        (df["garment"] == garment) &
        df["description"].fillna("").str.strip().isin({"", "No description", "Image missing"})
    )
    batch = df[mask].head(BATCH_SIZE)

    if batch.empty:
        print(f"✅ All done for '{garment}'.")
        continue

    print(f"▶️ Generating {len(batch)} descriptions for '{garment}'…")
    for idx, row in batch.iterrows():
        img_path = os.path.join(IMAGES_DIR, row["filename"])
        if not os.path.isfile(img_path):
            df.at[idx, "description"] = "Image missing"
        else:
            uri  = load_image_as_data_uri(img_path)
            desc = generate_apparel_description(uri)
            df.at[idx, "description"] = desc or "No description"
            processed += 1
            print(f"  • [{garment}] {row['filename']} → {df.at[idx,'description'][:60]!r}")
        time.sleep(1.5)

# ─── 5) SAVE CHECKPOINT ─────────────────────────────────────────────────────
df.to_csv(CSV_PATH, index=False)
print(f"\n💾 Batch complete: processed {processed} images. Re-run until all remaining categories are filled.")


▶️ Generating 15 descriptions for 'shorts'…
  • [shorts] shorts_693.jpg → 'A light blue oversized denim jacket with front buttons and c'
  • [shorts] shorts_1739.jpg → 'No description'
  • [shorts] shorts_214.jpg → 'No description'
  • [shorts] shorts_2881.jpg → 'No description'
  • [shorts] shorts_218.jpg → 'A black single-breasted blazer with lapels and buttons, worn'
  • [shorts] shorts_270.jpg → 'A black jacket with multiple silver zippers and textured pan'
  • [shorts] shorts_102.jpg → 'A white v-neck t-shirt, worn by a woman.'
  • [shorts] shorts_134.jpg → 'Person wearing a dark blue zipped bomber jacket with ribbed '
  • [shorts] shorts_1656.jpg → 'A black leather biker jacket with silver zippers, worn by a '
  • [shorts] shorts_1299.jpg → 'No description'
  • [shorts] shorts_192.jpg → 'No description'
  • [shorts] shorts_3065.jpg → 'No description'
  • [shorts] shorts_2003.jpg → 'A fitted black long-sleeved garment with intricate cutouts a'
  • [shorts] shorts_1381.jpg → 'A bla