In [38]:
import numpy as np
import pandas as pd
import re

In [14]:
from google.colab import files
uploaded = files.upload()

Saving clothes.csv to clothes (1).csv


# Preprocessing

In [16]:
#Load data

clothes_df = pd.read_csv('clothes.csv')
clothes_df

Unnamed: 0,gender,product_category,product_name,details
0,women,JACKETS,oversize trench coat pocket,collared trench coat cotton blend feature long...
1,women,KNITWEAR,knit sweater zip,sweater viscose blend feature high neck metal ...
2,women,JEANS,marine straight fit highwaist pocket jean,slim fit relaxed leg high waistzw woman jeansh...
3,women,ACCESSORIES,crossbody bag,crossbody bag adjustable strap lining zip clos...
4,women,JEANS,marine straightleg highwaist jean,slim fit relaxed leg high waistzw woman jeansh...
...,...,...,...,...
3628,men,SHORTS,seersucker bermuda short,regularfit bermuda short elasticate waistband ...
3629,men,SHORTS,jogger waist bermuda short,regularfit bermuda short adjustable waistband ...
3630,men,SHORTS,bermuda short crochet pocket,bermuda short cotton viscose blend feature adj...
3631,men,SHORTS,linen viscose knit bermuda short,loosefitte knit bermuda short spun linen visco...


In [17]:
# --- 1. Define lists for each broad category ---
tops = [
    "T-SHIRTS", "SHIRTS", "BLAZERS", "JACKETS", "OVERSHIRTS",
    "T-SHIRTS_SWEATSHIRTS", "HOODIES_SWEATSHIRTS", "POLO SHIRTS",
    "SWEATERS_CARDIGANS", "WAISTCOATS_GILETS", "KNITWEAR", "TOPS_BODYSUITS"
]

bottoms = [
    "TROUSERS", "JEANS", "SHORTS", "SKIRTS", "SHORTS_SKORTS", "DRESSES_JUMPSUITS"
]

shoes = ["SHOES"]

accessories = ["ACCESSORIES"]

# --- 2. Define function to map product_category to category_type ---
def assign_category_type(cat):
    if cat in tops:
        return "Top"
    elif cat in bottoms:
        return "Bottom"
    elif cat in shoes:
        return "Shoes"
    elif cat in accessories:
        return "Accessories"
    else:
        return "Other"

# --- 3. Apply function to create new column ---
clothes_df["category_type"] = clothes_df["product_category"].apply(assign_category_type)
print(clothes_df[["product_category", "category_type"]].drop_duplicates())

          product_category category_type
0                  JACKETS           Top
1                 KNITWEAR           Top
2                    JEANS        Bottom
3              ACCESSORIES   Accessories
5     T-SHIRTS_SWEATSHIRTS           Top
13                TROUSERS        Bottom
15                   SHOES         Shoes
17                  SHIRTS           Top
24       WAISTCOATS_GILETS           Top
25                  SKIRTS        Bottom
31                 BLAZERS           Top
34                T-SHIRTS           Top
42           SHORTS_SKORTS        Bottom
48       DRESSES_JUMPSUITS        Bottom
142         TOPS_BODYSUITS           Top
2240   HOODIES_SWEATSHIRTS           Top
2251                SHORTS        Bottom
2355           POLO SHIRTS           Top
2468            OVERSHIRTS           Top
3150    SWEATERS_CARDIGANS           Top
3232                 SUITS         Other
3233              SWIMWEAR         Other


In [39]:
material_mapping = [
    "acetate", "alpaca", "cotton", "cashmere", "denim", "gabardine", "gabardinestyle", "gabardinetype",
    "leather", "linen", "lyocell", "liocell", "modal", "neoprene", "organza", "polyester", "polyamide",
    "polyurethane", "ramie", "silk", "suede", "velvet", "viscose", "wool", "jute", "juteline"
]

weather_mapping = {
    "breathable": ["breathable", "breathability", "frontbreathableantiodour"],
    "absorbent": ["absorbent", "absorption"],
    "waterproof": ["water", "waterrepellent"],
    "windproof": ["wind", "windbreaker"],
    "insulation": ["thermal", "thermos", "thermoseale", "insulation", "insulate"],
    "light": ["lightweight", "lightness", "lightly"],
    "stretchy": ["flexible", "stretch", "stretchy"],
    "outerwear": ["anorak", "bomber", "hoodie", "raincoat", "trench", "puffer", "puffy"],
    "knitwear": ["sweater", "sweatshirt", "cardigan", "knit", "pullover"],
    "other_weather": []
}

coverage_mapping = {
    "short": ["short", "shortsstyle", "sleeveless", "sleevelessfitte", "mini"],
    "long": ["long", "longline", "longleg"],
    "other_coverage": []
}

def extract_materials(text, materials):
    if pd.isna(text):
        return np.nan
    text = str(text).lower()
    found = [mat for mat in materials if re.search(rf"\b{mat}\b", text)]
    return ", ".join(found) if found else "other"

def extract_main_category(text, mapping):
    if pd.isna(text):
        return np.nan
    text = str(text).lower()
    for category, keywords in mapping.items():
        if any(keyword in text for keyword in keywords):
            return category
    return "other"

# --- Combine text columns for analysis ---
clothes_df["combined_text"] = clothes_df["product_name"].fillna("") + " " + clothes_df["details"].fillna("")

# --- Extract features ---
clothes_df["material_feature"] = clothes_df["combined_text"].apply(lambda x: extract_materials(x, material_mapping))
clothes_df["weather_feature"] = clothes_df["combined_text"].apply(lambda x: extract_main_category(x, weather_mapping))
clothes_df["coverage_feature"] = clothes_df["combined_text"].apply(lambda x: extract_main_category(x, coverage_mapping))

clothes_df = clothes_df.drop(columns=["combined_text"])

In [40]:
for cat in clothes_df["category_type"].unique():
    var_name = cat.replace(" ", "_").lower() + "_df"
    globals()[var_name] = clothes_df[clothes_df["category_type"] == cat].copy()

In [41]:
top_df

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature
0,women,JACKETS,oversize trench coat pocket,collared trench coat cotton blend feature long...,Top,cotton,outerwear,long
1,women,KNITWEAR,knit sweater zip,sweater viscose blend feature high neck metal ...,Top,viscose,knitwear,long
5,women,T-SHIRTS_SWEATSHIRTS,stripe cotton tshirt,loose fit boat neck normal length long sleeves...,Top,cotton,other,long
6,women,KNITWEAR,cashmere knit sweater,sweater cashmere wool blend round neck long sl...,Top,"cashmere, wool",knitwear,long
7,women,KNITWEAR,wool blend boxyfit knit sweater,knit sweater wool blend round neck long sleeve...,Top,wool,knitwear,long
...,...,...,...,...,...,...,...,...
3539,men,SHIRTS,oxford shirt,regularfit shirt texture cotton fabric button ...,Top,cotton,other,long
3540,men,T-SHIRTS,short sleeve heavy weight tshirt,regularfit tshirt compact cotton fabric round ...,Top,cotton,other,short
3541,men,SHIRTS,cotton poplin shirt,regularfit shirt lightweight cotton poplin fab...,Top,cotton,light,long
3543,men,SHIRTS,chambray shirt,regularfit spread collar shirt cotton long sle...,Top,cotton,other,long


In [42]:
bottom_df.head()

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature
2,women,JEANS,marine straight fit highwaist pocket jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other
4,women,JEANS,marine straightleg highwaist jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other
13,women,TROUSERS,belt chino trouser,highwaist trouser dart pocket button false wel...,Bottom,other,other,other
16,women,TROUSERS,carrot fit trouser dart detail,midwaist trouser viscose blend belt loop pocke...,Bottom,viscose,other,other
22,women,JEANS,straightleg midrise crop jean,slim fit straight leg midrisezw woman jeansmid...,Bottom,other,other,other


In [22]:
shoes_df.shape

(481, 5)

In [23]:
accessories_df.shape

(180, 5)

# Zero-Shot Classification

In [43]:
#load the model
import torch
import transformers
import tensorflow as tf

classifier = transformers.pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cuda:0


In [44]:
#candidate labels
labels = ["5-10°C cold weather", "10-15°C cool weather", "15-20°C mild weather", "20-25°C warm weather", "25-30°C hot weather", "rain"]

In [45]:
#zero-shot-classification on accessories category
accessories_df['text'] = accessories_df['product_name'] + ' ' + accessories_df['details'] + ' ' + accessories_df['material_feature'] + ' ' + accessories_df['weather_feature'] + ' ' + accessories_df['coverage_feature']
texts = accessories_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels)

    for item in batch_results:
        results.append(item['labels'][0])

accessories_df['weather_label'] = results
accessories_df

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
3,women,ACCESSORIES,crossbody bag,crossbody bag adjustable strap lining zip clos...,Accessories,other,other,other,crossbody bag crossbody bag adjustable strap l...,10-15°C cool weather
14,women,ACCESSORIES,tote interior bag,tote bag shoulder strap inner bag gather draws...,Accessories,other,other,other,tote interior bag tote bag shoulder strap inne...,10-15°C cool weather
20,women,ACCESSORIES,leather belt,leather belt fasten metal buckle belt loop,Accessories,leather,other,other,leather belt leather belt fasten metal buckle ...,10-15°C cool weather
49,women,ACCESSORIES,metal necklace,rigid metal necklace,Accessories,other,other,other,metal necklace rigid metal necklace other othe...,10-15°C cool weather
50,women,ACCESSORIES,pack hoop earring,metal earring click fasten metal earring rhine...,Accessories,other,other,other,pack hoop earring metal earring click fasten m...,10-15°C cool weather
...,...,...,...,...,...,...,...,...,...,...
2975,men,ACCESSORIES,baseball training cap,peak cap adjustable strap backzara athleticz,Accessories,other,other,other,baseball training cap peak cap adjustable stra...,10-15°C cool weather
2979,men,ACCESSORIES,strappy cycling culotte,culotte highly stretchy fabric elastic strap f...,Accessories,other,stretchy,other,strappy cycling culotte culotte highly stretch...,10-15°C cool weather
2980,men,ACCESSORIES,cap,cap stretchy breathable fabric secure fit inne...,Accessories,other,breathable,other,cap cap stretchy breathable fabric secure fit ...,10-15°C cool weather
2983,men,ACCESSORIES,technical bucket hat,hat lightweight technical fabric feature wide ...,Accessories,other,light,other,technical bucket hat hat lightweight technical...,10-15°C cool weather


In [52]:
accessories_df[accessories_df['weather_label'] == labels[5]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
2813,men,ACCESSORIES,irregular earring,earring irregular finish pushback fasten,Accessories,other,other,other,irregular earring earring irregular finish pus...,rain
2821,men,ACCESSORIES,bucket hat contrast topstitche,hat cotton fabric wide brim contrast topstitch...,Accessories,cotton,other,other,bucket hat contrast topstitche hat cotton fabr...,rain


In [53]:
#zero-shot-classification on shoes category
shoes_df = shoes_df.fillna("").astype(str)

shoes_df['text'] = shoes_df['product_name'] + ' ' + shoes_df['details'] + ' ' + shoes_df['material_feature'] + ' ' + shoes_df['weather_feature'] + ' ' + shoes_df['coverage_feature']
texts = shoes_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels)

    for item in batch_results:
        results.append(item['labels'][0])

shoes_df['weather_label'] = results
shoes_df

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
15,women,SHOES,ballet flat buckle,ballet flat buckle strap instep round toesole ...,Shoes,other,stretchy,other,ballet flat buckle ballet flat buckle strap in...,10-15°C cool weather
26,women,SHOES,flat leather cage sandal,flat leather cage sandal wide strap buckle ank...,Shoes,leather,stretchy,other,flat leather cage sandal flat leather cage san...,10-15°C cool weather
36,women,SHOES,leather mule loafer buckle,leather flat mule loafer metallic buckle detai...,Shoes,leather,stretchy,other,leather mule loafer buckle leather flat mule l...,10-15°C cool weather
41,women,SHOES,leather cowboy ankle boot,leather cowboystyle ankle boot block heel pull...,Shoes,leather,other,other,leather cowboy ankle boot leather cowboystyle ...,10-15°C cool weather
53,women,SHOES,tassel loafer,flat loafer metallic buckle detail tassel fron...,Shoes,other,stretchy,other,tassel loafer flat loafer metallic buckle deta...,10-15°C cool weather
...,...,...,...,...,...,...,...,...,...,...
3145,men,SHOES,chunky sandal,strappy sandal strap hookandloop strap fasten ...,Shoes,other,other,other,chunky sandal strappy sandal strap hookandloop...,10-15°C cool weather
3146,men,SHOES,quilt strappy sandal,sandal quilt strap strap matching chunky sole,Shoes,other,other,other,quilt strappy sandal sandal quilt strap strap ...,10-15°C cool weather
3147,men,SHOES,track sole sandal,strappy sandal strap fasten instep feature fif...,Shoes,other,other,other,track sole sandal strappy sandal strap fasten ...,10-15°C cool weather
3148,men,SHOES,mould clog track sole,clogs wraparound upper single piece raise deta...,Shoes,other,other,other,mould clog track sole clogs wraparound upper s...,10-15°C cool weather


In [62]:
shoes_df[shoes_df['weather_label'] == labels[5]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
1434,women,SHOES,platform wedge sandal,sandal platform wedge heel wide strap fronthee...,Shoes,other,stretchy,other,platform wedge sandal sandal platform wedge he...,rain
1458,women,SHOES,fabric platform run trainer,run trainer combination material leather detai...,Shoes,"leather, polyurethane",stretchy,other,fabric platform run trainer run trainer combin...,rain
1459,women,SHOES,matching platform run trainer,run trainer combination material leather detai...,Shoes,leather,other,other,matching platform run trainer run trainer comb...,rain
1461,women,SHOES,vinyl mule,highheel vinyl mule round toe methacrylate hee...,Shoes,other,stretchy,other,vinyl mule highheel vinyl mule round toe metha...,rain
1467,women,SHOES,patentfinish block heel shoe,mary janestyle slingback shoe patent finish bl...,Shoes,other,stretchy,other,patentfinish block heel shoe mary janestyle sl...,rain
1483,women,SHOES,denim cowboy boot,denim cowboy boots fray seam detail point toe ...,Shoes,denim,stretchy,other,denim cowboy boot denim cowboy boots fray seam...,rain
1505,women,SHOES,heel vinyl mule decorative detail,vinyl highheel mule embellish rhinestone detai...,Shoes,other,stretchy,other,heel vinyl mule decorative detail vinyl highhe...,rain
1517,women,SHOES,highheel slingback shoe bow,highheel slingback shoe embellish bow point to...,Shoes,other,stretchy,other,highheel slingback shoe bow highheel slingback...,rain
1633,women,SHOES,leather platform trainer,laceup trainers contrast piece platform soleso...,Shoes,leather,other,other,leather platform trainer laceup trainers contr...,rain
1681,women,SHOES,laceup sandal strap,kittenheel sandal thin strap tie ankle point t...,Shoes,other,stretchy,other,laceup sandal strap kittenheel sandal thin str...,rain


In [55]:
#zero-shot-classification on top category
top_df = top_df.fillna("").astype(str)

top_df['text'] = top_df['product_name'] + ' ' + top_df['details'] + ' ' + top_df['material_feature'] + ' ' + top_df['weather_feature'] + ' ' + top_df['coverage_feature']
texts = top_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels)

    for item in batch_results:
        results.append(item['labels'][0])

top_df['weather_label'] = results
top_df

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
0,women,JACKETS,oversize trench coat pocket,collared trench coat cotton blend feature long...,Top,cotton,outerwear,long,oversize trench coat pocket collared trench co...,10-15°C cool weather
1,women,KNITWEAR,knit sweater zip,sweater viscose blend feature high neck metal ...,Top,viscose,knitwear,long,knit sweater zip sweater viscose blend feature...,10-15°C cool weather
5,women,T-SHIRTS_SWEATSHIRTS,stripe cotton tshirt,loose fit boat neck normal length long sleeves...,Top,cotton,other,long,stripe cotton tshirt loose fit boat neck norma...,10-15°C cool weather
6,women,KNITWEAR,cashmere knit sweater,sweater cashmere wool blend round neck long sl...,Top,"cashmere, wool",knitwear,long,cashmere knit sweater sweater cashmere wool bl...,20-25°C warm weather
7,women,KNITWEAR,wool blend boxyfit knit sweater,knit sweater wool blend round neck long sleeve...,Top,wool,knitwear,long,wool blend boxyfit knit sweater knit sweater w...,10-15°C cool weather
...,...,...,...,...,...,...,...,...,...,...
3539,men,SHIRTS,oxford shirt,regularfit shirt texture cotton fabric button ...,Top,cotton,other,long,oxford shirt regularfit shirt texture cotton f...,10-15°C cool weather
3540,men,T-SHIRTS,short sleeve heavy weight tshirt,regularfit tshirt compact cotton fabric round ...,Top,cotton,other,short,short sleeve heavy weight tshirt regularfit ts...,10-15°C cool weather
3541,men,SHIRTS,cotton poplin shirt,regularfit shirt lightweight cotton poplin fab...,Top,cotton,light,long,cotton poplin shirt regularfit shirt lightweig...,10-15°C cool weather
3543,men,SHIRTS,chambray shirt,regularfit spread collar shirt cotton long sle...,Top,cotton,other,long,chambray shirt regularfit spread collar shirt ...,10-15°C cool weather


In [68]:
top_df[top_df['weather_label'] == labels[5]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
11,women,JACKETS,water wind protection rubberise raincoat,raincoat feature high collar hood drawstring l...,Top,other,waterproof,long,water wind protection rubberise raincoat rainc...,rain
78,women,JACKETS,waterrepellent trench coat,trench coat feature lapel collar long sleeve t...,Top,other,waterproof,long,waterrepellent trench coat trench coat feature...,rain
149,women,JACKETS,drapey long raincoat,trench coat lapel collar turnup long sleeve bu...,Top,other,outerwear,long,drapey long raincoat trench coat lapel collar ...,rain
176,women,JACKETS,oversize waterrepellent trench coat belt,loosefitte trench coat cotton blend lapel coll...,Top,cotton,waterproof,long,oversize waterrepellent trench coat belt loose...,rain
377,women,SHIRTS,plumetis shirt ruffle,shirt feature slot collar long sleeve elastica...,Top,other,other,long,plumetis shirt ruffle shirt feature slot colla...,rain
1766,women,TOPS_BODYSUITS,asymmetric pleated,asymmetric neckline wide strap pleated fabric ...,Top,other,other,other,asymmetric pleated asymmetric neckline wide st...,rain
1794,women,TOPS_BODYSUITS,rib bodysuit ruffle,bodysuit straightcut neckline expose shoulder ...,Top,other,other,other,rib bodysuit ruffle bodysuit straightcut neckl...,rain
1809,women,TOPS_BODYSUITS,gather organza,halter button gather fabric detail matching li...,Top,organza,other,other,gather organza halter button gather fabric det...,rain
1811,women,TOPS_BODYSUITS,texture,halter surplice neckline open tie detail hem,Top,other,other,other,texture halter surplice neckline open tie deta...,rain
2124,women,JACKETS,transparent cropped jacket,transparent collared waterrepellent jacket fea...,Top,other,waterproof,long,transparent cropped jacket transparent collare...,rain


In [69]:
#zero-shot-classification on bottom category
bottom_df = bottom_df.fillna("").astype(str)

bottom_df['text'] = bottom_df['product_name'] + ' ' + bottom_df['details'] + ' ' + bottom_df['material_feature'] + ' ' + bottom_df['weather_feature'] + ' ' + bottom_df['coverage_feature']
texts = bottom_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels)

    for item in batch_results:
        results.append(item['labels'][0])

bottom_df['weather_label'] = results
bottom_df

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
2,women,JEANS,marine straight fit highwaist pocket jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other,marine straight fit highwaist pocket jean slim...,15-20°C mild weather
4,women,JEANS,marine straightleg highwaist jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other,marine straightleg highwaist jean slim fit rel...,15-20°C mild weather
13,women,TROUSERS,belt chino trouser,highwaist trouser dart pocket button false wel...,Bottom,other,other,other,belt chino trouser highwaist trouser dart pock...,10-15°C cool weather
16,women,TROUSERS,carrot fit trouser dart detail,midwaist trouser viscose blend belt loop pocke...,Bottom,viscose,other,other,carrot fit trouser dart detail midwaist trouse...,10-15°C cool weather
22,women,JEANS,straightleg midrise crop jean,slim fit straight leg midrisezw woman jeansmid...,Bottom,other,other,other,straightleg midrise crop jean slim fit straigh...,10-15°C cool weather
...,...,...,...,...,...,...,...,...,...,...
3628,men,SHORTS,seersucker bermuda short,regularfit bermuda short elasticate waistband ...,Bottom,other,other,short,seersucker bermuda short regularfit bermuda sh...,10-15°C cool weather
3629,men,SHORTS,jogger waist bermuda short,regularfit bermuda short adjustable waistband ...,Bottom,other,other,short,jogger waist bermuda short regularfit bermuda ...,10-15°C cool weather
3630,men,SHORTS,bermuda short crochet pocket,bermuda short cotton viscose blend feature adj...,Bottom,"cotton, viscose",other,short,bermuda short crochet pocket bermuda short cot...,10-15°C cool weather
3631,men,SHORTS,linen viscose knit bermuda short,loosefitte knit bermuda short spun linen visco...,Bottom,"linen, viscose",knitwear,short,linen viscose knit bermuda short loosefitte kn...,10-15°C cool weather


In [75]:
bottom_df[bottom_df['weather_label'] == labels[5]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
938,women,DRESSES_JUMPSUITS,fit asymmetric organza dress gathering,midi dress feature straight neckline long slee...,Bottom,organza,other,long,fit asymmetric organza dress gathering midi dr...,rain
1006,women,DRESSES_JUMPSUITS,paisley print cutout dress,midi dress feature surplice neckline long slee...,Bottom,other,other,long,paisley print cutout dress midi dress feature ...,rain
1218,women,DRESSES_JUMPSUITS,print playsuit,playsuit feature surplice neckline long sleeve...,Bottom,other,other,long,print playsuit playsuit feature surplice neckl...,rain
2255,women,JEANS,z straightfit highwaist long length jean,highwaist regular reg long length rigid highwa...,Bottom,other,other,long,z straightfit highwaist long length jean highw...,rain


In [76]:
temperature_mapping = {
            (5, 10): "5-10°C cold weather",
            (10, 15): "10-15°C cool weather",
            (15, 20): "15-20°C mild weather",
            (20, 25): "20-25°C warm weather",
            (25, 30): "25-30°C hot weather"
        }

In [77]:
def temperature_to_label(temperature):
        """
        Convert numeric temperature to label
        """
        try:
            temp = float(temperature)
            for (low, high), label in temperature_mapping.items():
                if low <= temp <= high:
                    return label
            if temp < 5:
                return "5-10°C cold weather"
            elif temp > 30:
                return "25-30°C hot weather"
        except ValueError:
            raise ValueError(f"Invalid temperature: {temperature}. Please enter 5-30.")

In [97]:
temperature = '25'
temperature_label = temperature_to_label(temperature)
temp_range = temperature_label.split(' ')[0]

In [98]:
# RANDOM SAMPLE - This is the key speed improvement!
sample_size = 3

if len(bottom_df) > sample_size:
    sample_df = bottom_df.sample(sample_size) #, random_state=42)
    print(f"🎯 Sampling {sample_size} random items from {len(bottom_df)} total items")
else:
    sample_df = bottom_df
    print(f"🎯 Using all {len(sample_df)} items")

🎯 Sampling 3 random items from 1255 total items


In [99]:
# Prepare texts from sampled items
texts = []
for idx, row in sample_df.iterrows():
    text = f"{row['product_name']}.{row['details']}"
    texts.append(text)

In [100]:
# Single batch classification (fast!)
print("⚡ Classifying sampled items in one batch...")
results = classifier(texts,
               candidate_labels=[temperature_label],
               hypothesis_template="This clothing item is suitable for {}.",
               multi_label=False
              )
results

⚡ Classifying sampled items in one batch...


[{'sequence': 'linencotton trouser.trouser cotton linen blend elasticate waistband adjustable drawstre pocket welt pocket',
  'labels': ['20-25°C warm weather'],
  'scores': [0.8409193754196167]},
 {'sequence': 'linen trouser.straight fit trouser linen waist pleat detail pocket rear jetted pocket zip fly button fastening',
  'labels': ['20-25°C warm weather'],
  'scores': [0.7868605256080627]},
 {'sequence': 'fit jacquard dress.sleeveless midi dress round neck floral jacquard detail',
  'labels': ['20-25°C warm weather'],
  'scores': [0.8970279097557068]}]

In [101]:
# Handle results
if isinstance(results, dict):
    scores = [results['scores'][0]]
else:
    scores = [result['scores'][0] for result in results]

In [102]:
# Get top K indices from sampled items
scores = np.array(scores)
top_indices = np.argsort(scores)[::-1][:5]
scores, top_indices

(array([0.84091938, 0.78686053, 0.89702791]), array([2, 0, 1]))

In [103]:
# Build results
recommendations = []
for i, idx in enumerate(top_indices, 1):
    item = sample_df.iloc[idx]
    recommendations.append({'rank': i,
                            'product_name': item['product_name'],
                            'details': item['details'],
                            'confidence': round(scores[idx], 3),
                            'temperature': f"{temperature}°C",
                            'temperature_range': temp_range,
                            'sampled_from_total': f"{len(sample_df)}/{len(clothes_df)} items"})
# print(recommendations[0]['product_name'])
# print(recommendations[1]['product_name'])
# print(recommendations[2]['product_name'])
recommendations

[{'rank': 1,
  'product_name': 'fit jacquard dress',
  'details': 'sleeveless midi dress round neck floral jacquard detail',
  'confidence': np.float64(0.897),
  'temperature': '25°C',
  'temperature_range': '20-25°C',
  'sampled_from_total': '3/3633 items'},
 {'rank': 2,
  'product_name': 'linencotton trouser',
  'details': 'trouser cotton linen blend elasticate waistband adjustable drawstre pocket welt pocket',
  'confidence': np.float64(0.841),
  'temperature': '25°C',
  'temperature_range': '20-25°C',
  'sampled_from_total': '3/3633 items'},
 {'rank': 3,
  'product_name': 'linen trouser',
  'details': 'straight fit trouser linen waist pleat detail pocket rear jetted pocket zip fly button fastening',
  'confidence': np.float64(0.787),
  'temperature': '25°C',
  'temperature_range': '20-25°C',
  'sampled_from_total': '3/3633 items'}]