In [1]:
import numpy as np
import pandas as pd
import re

In [2]:
from google.colab import files
uploaded = files.upload()

Saving clothes.csv to clothes.csv


# Preprocessing

In [3]:
#Load data

clothes_df = pd.read_csv('clothes.csv')
clothes_df

Unnamed: 0,gender,product_category,product_name,details
0,women,JACKETS,oversize trench coat pocket,collared trench coat cotton blend feature long...
1,women,KNITWEAR,knit sweater zip,sweater viscose blend feature high neck metal ...
2,women,JEANS,marine straight fit highwaist pocket jean,slim fit relaxed leg high waistzw woman jeansh...
3,women,ACCESSORIES,crossbody bag,crossbody bag adjustable strap lining zip clos...
4,women,JEANS,marine straightleg highwaist jean,slim fit relaxed leg high waistzw woman jeansh...
...,...,...,...,...
3628,men,SHORTS,seersucker bermuda short,regularfit bermuda short elasticate waistband ...
3629,men,SHORTS,jogger waist bermuda short,regularfit bermuda short adjustable waistband ...
3630,men,SHORTS,bermuda short crochet pocket,bermuda short cotton viscose blend feature adj...
3631,men,SHORTS,linen viscose knit bermuda short,loosefitte knit bermuda short spun linen visco...


In [4]:
# --- 1. Define lists for each broad category ---
tops = [
    "T-SHIRTS", "SHIRTS", "BLAZERS", "JACKETS", "OVERSHIRTS",
    "T-SHIRTS_SWEATSHIRTS", "HOODIES_SWEATSHIRTS", "POLO SHIRTS",
    "SWEATERS_CARDIGANS", "WAISTCOATS_GILETS", "KNITWEAR", "TOPS_BODYSUITS"
]

bottoms = [
    "TROUSERS", "JEANS", "SHORTS", "SKIRTS", "SHORTS_SKORTS", "DRESSES_JUMPSUITS"
]

shoes = ["SHOES"]

accessories = ["ACCESSORIES"]

# --- 2. Define function to map product_category to category_type ---
def assign_category_type(cat):
    if cat in tops:
        return "Top"
    elif cat in bottoms:
        return "Bottom"
    elif cat in shoes:
        return "Shoes"
    elif cat in accessories:
        return "Accessories"
    else:
        return "Other"

# --- 3. Apply function to create new column ---
clothes_df["category_type"] = clothes_df["product_category"].apply(assign_category_type)
print(clothes_df[["product_category", "category_type"]].drop_duplicates())

          product_category category_type
0                  JACKETS           Top
1                 KNITWEAR           Top
2                    JEANS        Bottom
3              ACCESSORIES   Accessories
5     T-SHIRTS_SWEATSHIRTS           Top
13                TROUSERS        Bottom
15                   SHOES         Shoes
17                  SHIRTS           Top
24       WAISTCOATS_GILETS           Top
25                  SKIRTS        Bottom
31                 BLAZERS           Top
34                T-SHIRTS           Top
42           SHORTS_SKORTS        Bottom
48       DRESSES_JUMPSUITS        Bottom
142         TOPS_BODYSUITS           Top
2240   HOODIES_SWEATSHIRTS           Top
2251                SHORTS        Bottom
2355           POLO SHIRTS           Top
2468            OVERSHIRTS           Top
3150    SWEATERS_CARDIGANS           Top
3232                 SUITS         Other
3233              SWIMWEAR         Other


In [5]:
material_mapping = [
    "acetate", "alpaca", "cotton", "cashmere", "denim", "gabardine", "gabardinestyle", "gabardinetype",
    "leather", "linen", "lyocell", "liocell", "modal", "neoprene", "organza", "polyester", "polyamide",
    "polyurethane", "ramie", "silk", "suede", "velvet", "viscose", "wool", "jute", "juteline"
]

weather_mapping = {
    "breathable": ["breathable", "breathability", "frontbreathableantiodour"],
    "absorbent": ["absorbent", "absorption"],
    "waterproof": ["water", "waterrepellent"],
    "windproof": ["wind", "windbreaker"],
    "insulation": ["thermal", "thermos", "thermoseale", "insulation", "insulate"],
    "light": ["lightweight", "lightness", "lightly"],
    "stretchy": ["flexible", "stretch", "stretchy"],
    "outerwear": ["anorak", "bomber", "hoodie", "raincoat", "trench", "puffer", "puffy"],
    "knitwear": ["sweater", "sweatshirt", "cardigan", "knit", "pullover"],
    "other_weather": []
}

coverage_mapping = {
    "short": ["short", "shortsstyle", "sleeveless", "sleevelessfitte", "mini"],
    "long": ["long", "longline", "longleg"],
    "other_coverage": []
}

def extract_materials(text, materials):
    if pd.isna(text):
        return np.nan
    text = str(text).lower()
    found = [mat for mat in materials if re.search(rf"\b{mat}\b", text)]
    return ", ".join(found) if found else "other"

def extract_main_category(text, mapping):
    if pd.isna(text):
        return np.nan
    text = str(text).lower()
    for category, keywords in mapping.items():
        if any(keyword in text for keyword in keywords):
            return category
    return "other"

# --- Combine text columns for analysis ---
clothes_df["combined_text"] = clothes_df["product_name"].fillna("") + " " + clothes_df["details"].fillna("")

# --- Extract features ---
clothes_df["material_feature"] = clothes_df["combined_text"].apply(lambda x: extract_materials(x, material_mapping))
clothes_df["weather_feature"] = clothes_df["combined_text"].apply(lambda x: extract_main_category(x, weather_mapping))
clothes_df["coverage_feature"] = clothes_df["combined_text"].apply(lambda x: extract_main_category(x, coverage_mapping))

clothes_df = clothes_df.drop(columns=["combined_text"])

In [6]:
for cat in clothes_df["category_type"].unique():
    var_name = cat.replace(" ", "_").lower() + "_df"
    globals()[var_name] = clothes_df[clothes_df["category_type"] == cat].copy()

In [7]:
top_df

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature
0,women,JACKETS,oversize trench coat pocket,collared trench coat cotton blend feature long...,Top,cotton,outerwear,long
1,women,KNITWEAR,knit sweater zip,sweater viscose blend feature high neck metal ...,Top,viscose,knitwear,long
5,women,T-SHIRTS_SWEATSHIRTS,stripe cotton tshirt,loose fit boat neck normal length long sleeves...,Top,cotton,other,long
6,women,KNITWEAR,cashmere knit sweater,sweater cashmere wool blend round neck long sl...,Top,"cashmere, wool",knitwear,long
7,women,KNITWEAR,wool blend boxyfit knit sweater,knit sweater wool blend round neck long sleeve...,Top,wool,knitwear,long
...,...,...,...,...,...,...,...,...
3539,men,SHIRTS,oxford shirt,regularfit shirt texture cotton fabric button ...,Top,cotton,other,long
3540,men,T-SHIRTS,short sleeve heavy weight tshirt,regularfit tshirt compact cotton fabric round ...,Top,cotton,other,short
3541,men,SHIRTS,cotton poplin shirt,regularfit shirt lightweight cotton poplin fab...,Top,cotton,light,long
3543,men,SHIRTS,chambray shirt,regularfit spread collar shirt cotton long sle...,Top,cotton,other,long


In [8]:
bottom_df.head()

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature
2,women,JEANS,marine straight fit highwaist pocket jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other
4,women,JEANS,marine straightleg highwaist jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other
13,women,TROUSERS,belt chino trouser,highwaist trouser dart pocket button false wel...,Bottom,other,other,other
16,women,TROUSERS,carrot fit trouser dart detail,midwaist trouser viscose blend belt loop pocke...,Bottom,viscose,other,other
22,women,JEANS,straightleg midrise crop jean,slim fit straight leg midrisezw woman jeansmid...,Bottom,other,other,other


In [None]:
shoes_df.shape

(481, 5)

In [None]:
accessories_df.shape

(180, 5)

# Zero-Shot Classification

In [57]:
#load the model
import torch
from transformers import pipeline
import tensorflow as tf

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cuda:0


In [58]:
#candidate labels
labels = ["5-10°C cold weather", "10-15°C cool weather", "15-20°C mild weather", "20-25°C warm weather", "25-30°C hot weather", "rain"]

In [60]:
#zero-shot-classification on accessories category
accessories_df['text'] = accessories_df['product_name'] + ' ' + accessories_df['material_feature'] + ' ' + accessories_df['weather_feature'] + ' ' + accessories_df['coverage_feature']
texts = accessories_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels,
                               hypothesis_template="This outfit is designed to be worn in {} conditions.",
                               multi_label=True)

    for item in batch_results:
        results.append(item['labels'][0])

accessories_df['weather_label'] = results
accessories_df.head()

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
3,women,ACCESSORIES,crossbody bag,crossbody bag adjustable strap lining zip clos...,Accessories,other,other,other,crossbody bag other other other,10-15°C cool weather
14,women,ACCESSORIES,tote interior bag,tote bag shoulder strap inner bag gather draws...,Accessories,other,other,other,tote interior bag other other other,15-20°C mild weather
20,women,ACCESSORIES,leather belt,leather belt fasten metal buckle belt loop,Accessories,leather,other,other,leather belt leather other other,10-15°C cool weather
49,women,ACCESSORIES,metal necklace,rigid metal necklace,Accessories,other,other,other,metal necklace other other other,10-15°C cool weather
50,women,ACCESSORIES,pack hoop earring,metal earring click fasten metal earring rhine...,Accessories,other,other,other,pack hoop earring other other other,10-15°C cool weather


In [12]:
accessories_df[accessories_df['weather_label'] == labels[5]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
2813,men,ACCESSORIES,irregular earring,earring irregular finish pushback fasten,Accessories,other,other,other,irregular earring earring irregular finish pus...,rain
2821,men,ACCESSORIES,bucket hat contrast topstitche,hat cotton fabric wide brim contrast topstitch...,Accessories,cotton,other,other,bucket hat contrast topstitche hat cotton fabr...,rain


In [61]:
#zero-shot-classification on shoes category
shoes_df = shoes_df.fillna("").astype(str)

shoes_df['text'] = shoes_df['product_name'] + ' ' + shoes_df['material_feature'] + ' ' + shoes_df['weather_feature'] + ' ' + shoes_df['coverage_feature']
texts = shoes_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels,
                               hypothesis_template="This outfit is designed to be worn in {} conditions.",
                               multi_label=True)

    for item in batch_results:
        results.append(item['labels'][0])

shoes_df['weather_label'] = results
shoes_df.head()

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
15,women,SHOES,ballet flat buckle,ballet flat buckle strap instep round toesole ...,Shoes,other,stretchy,other,ballet flat buckle other stretchy other,15-20°C mild weather
26,women,SHOES,flat leather cage sandal,flat leather cage sandal wide strap buckle ank...,Shoes,leather,stretchy,other,flat leather cage sandal leather stretchy other,15-20°C mild weather
36,women,SHOES,leather mule loafer buckle,leather flat mule loafer metallic buckle detai...,Shoes,leather,stretchy,other,leather mule loafer buckle leather stretchy other,15-20°C mild weather
41,women,SHOES,leather cowboy ankle boot,leather cowboystyle ankle boot block heel pull...,Shoes,leather,other,other,leather cowboy ankle boot leather other other,10-15°C cool weather
53,women,SHOES,tassel loafer,flat loafer metallic buckle detail tassel fron...,Shoes,other,stretchy,other,tassel loafer other stretchy other,15-20°C mild weather


In [66]:
shoes_df[shoes_df['weather_label'] == labels[2]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
15,women,SHOES,ballet flat buckle,ballet flat buckle strap instep round toesole ...,Shoes,other,stretchy,other,ballet flat buckle other stretchy other,15-20°C mild weather
26,women,SHOES,flat leather cage sandal,flat leather cage sandal wide strap buckle ank...,Shoes,leather,stretchy,other,flat leather cage sandal leather stretchy other,15-20°C mild weather
36,women,SHOES,leather mule loafer buckle,leather flat mule loafer metallic buckle detai...,Shoes,leather,stretchy,other,leather mule loafer buckle leather stretchy other,15-20°C mild weather
53,women,SHOES,tassel loafer,flat loafer metallic buckle detail tassel fron...,Shoes,other,stretchy,other,tassel loafer other stretchy other,15-20°C mild weather
63,women,SHOES,kitten heel slingback shoe chain,kitten heel slingback shoe patentfinish chain ...,Shoes,other,stretchy,other,kitten heel slingback shoe chain other stretch...,15-20°C mild weather
...,...,...,...,...,...,...,...,...,...,...
3140,men,SHOES,laceless knit fabric trainer,laceless trainer fabric elastic panel pull tab...,Shoes,other,knitwear,other,laceless knit fabric trainer other knitwear other,15-20°C mild weather
3142,men,SHOES,strappy leather sandal,leather cage sandal split suede finish interwe...,Shoes,"leather, suede",other,other,"strappy leather sandal leather, suede other other",15-20°C mild weather
3143,men,SHOES,leather cage sandal,cage sandal leather adjustable strap slightly ...,Shoes,leather,light,other,leather cage sandal leather light other,15-20°C mild weather
3146,men,SHOES,quilt strappy sandal,sandal quilt strap strap matching chunky sole,Shoes,other,other,other,quilt strappy sandal other other other,15-20°C mild weather


In [62]:
#zero-shot-classification on top category
top_df = top_df.fillna("").astype(str)

top_df['text'] = top_df['product_name'] + ' ' + top_df['material_feature'] + ' ' + top_df['weather_feature'] + ' ' + top_df['coverage_feature']
texts = top_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels,
                               hypothesis_template="This outfit is designed to be worn in {} conditions.",
                               multi_label=True)

    for item in batch_results:
        results.append(item['labels'][0])

top_df['weather_label'] = results
top_df.head()

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
0,women,JACKETS,oversize trench coat pocket,collared trench coat cotton blend feature long...,Top,cotton,outerwear,long,oversize trench coat pocket cotton outerwear long,10-15°C cool weather
1,women,KNITWEAR,knit sweater zip,sweater viscose blend feature high neck metal ...,Top,viscose,knitwear,long,knit sweater zip viscose knitwear long,5-10°C cold weather
5,women,T-SHIRTS_SWEATSHIRTS,stripe cotton tshirt,loose fit boat neck normal length long sleeves...,Top,cotton,other,long,stripe cotton tshirt cotton other long,15-20°C mild weather
6,women,KNITWEAR,cashmere knit sweater,sweater cashmere wool blend round neck long sl...,Top,"cashmere, wool",knitwear,long,"cashmere knit sweater cashmere, wool knitwear ...",5-10°C cold weather
7,women,KNITWEAR,wool blend boxyfit knit sweater,knit sweater wool blend round neck long sleeve...,Top,wool,knitwear,long,wool blend boxyfit knit sweater wool knitwear ...,15-20°C mild weather


In [72]:
top_df[top_df['weather_label'] == labels[3]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label


In [88]:
#zero-shot-classification on bottom category
bottom_df = bottom_df.fillna("").astype(str)

bottom_df['text'] = bottom_df['product_name'] + ' ' + bottom_df['material_feature'] + ' ' + bottom_df['weather_feature'] + ' ' + bottom_df['coverage_feature']
texts = bottom_df['text'].tolist()

batch_size = 16
results = []

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_results = classifier(batch_texts, candidate_labels=labels,
                               hypothesis_template="This clothing is suitable for {} weather.",
                               multi_label=True)

    for item in batch_results:
        results.append(item['labels'][0])

bottom_df['weather_label'] = results
bottom_df.head()

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
2,women,JEANS,marine straight fit highwaist pocket jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other,marine straight fit highwaist pocket jean othe...,10-15°C cool weather
4,women,JEANS,marine straightleg highwaist jean,slim fit relaxed leg high waistzw woman jeansh...,Bottom,other,other,other,marine straightleg highwaist jean other other ...,10-15°C cool weather
13,women,TROUSERS,belt chino trouser,highwaist trouser dart pocket button false wel...,Bottom,other,other,other,belt chino trouser other other other,10-15°C cool weather
16,women,TROUSERS,carrot fit trouser dart detail,midwaist trouser viscose blend belt loop pocke...,Bottom,viscose,other,other,carrot fit trouser dart detail viscose other o...,10-15°C cool weather
22,women,JEANS,straightleg midrise crop jean,slim fit straight leg midrisezw woman jeansmid...,Bottom,other,other,other,straightleg midrise crop jean other other other,10-15°C cool weather


In [89]:
bottom_df[bottom_df['weather_label'] == labels[4]]

Unnamed: 0,gender,product_category,product_name,details,category_type,material_feature,weather_feature,coverage_feature,text,weather_label
147,women,DRESSES_JUMPSUITS,length combine jumpsuit,round neck jumpsuit adjustable thin strap elas...,Bottom,other,other,other,length combine jumpsuit other other other,25-30°C hot weather
821,women,DRESSES_JUMPSUITS,twopiece flare jumpsuit,strapless jumpsuit false jetted pocket feature...,Bottom,other,other,other,twopiece flare jumpsuit other other other,25-30°C hot weather
978,women,DRESSES_JUMPSUITS,length combine jumpsuit,jumpsuit straight neckline thin strap contrast...,Bottom,other,other,other,length combine jumpsuit other other other,25-30°C hot weather
1013,women,DRESSES_JUMPSUITS,length combine jumpsuit,round neck jumpsuit adjustable thin strap elas...,Bottom,other,other,other,length combine jumpsuit other other other,25-30°C hot weather
1305,women,DRESSES_JUMPSUITS,voluminous jumpsuit,jumpsuit plunge vneckline long sleeve inseam p...,Bottom,other,other,long,voluminous jumpsuit other other long,25-30°C hot weather


#Recommendation

In [78]:
temperature_mapping = {
            (5, 10): "5-10°C cold weather",
            (10, 15): "10-15°C cool weather",
            (15, 20): "15-20°C mild weather",
            (20, 25): "20-25°C warm weather",
            (25, 30): "25-30°C hot weather"
        }

In [79]:
def temperature_to_label(temperature):
        """
        Convert numeric temperature to label
        """
        try:
            temp = float(temperature)
            for (low, high), label in temperature_mapping.items():
                if low <= temp <= high:
                    return label
            if temp < 5:
                return "5-10°C cold weather"
            elif temp > 30:
                return "25-30°C hot weather"
        except ValueError:
            raise ValueError(f"Invalid temperature: {temperature}. Please enter 5-30.")

In [80]:
temperature = '25'
temperature_label = temperature_to_label(temperature)
temp_range = temperature_label.split(' ')[0]

In [81]:
# RANDOM SAMPLE - This is the key speed improvement!
sample_size = 3

if len(top_df) > sample_size:
    sample_df = top_df.sample(sample_size) #, random_state=42)
    print(f"🎯 Sampling {sample_size} random items from {len(top_df)} total items")
else:
    sample_df = top_df
    print(f"🎯 Using all {len(sample_df)} items")

🎯 Sampling 3 random items from 1677 total items


In [82]:
# Prepare texts from sampled items
texts = []
for idx, row in sample_df.iterrows():
    text = f"{row['product_name']}.{row['details']}"
    texts.append(text)

In [83]:
# Single batch classification (fast!)
print("⚡ Classifying sampled items in one batch...")
results = classifier(texts,
               candidate_labels=[temperature_label],
               hypothesis_template="This item is comfortable to wear when the weather is {}.",
               multi_label=False
              )
results

⚡ Classifying sampled items in one batch...


[{'sequence': 'open halter waistcoat.halter waistcoat feature vneck wool blend false welt pocket open adjustable strap fabric buckle asymmetric hem buttonup',
  'labels': ['20-25°C warm weather'],
  'scores': [0.7420217394828796]},
 {'sequence': 'utility jacket pocket.jacket lightweight cotton fabric adjustable high neck drawstre long sleeve snapbutton cuff patch pocket flap sleeve zipup hide snapbutton placket',
  'labels': ['20-25°C warm weather'],
  'scores': [0.7532941102981567]},
 {'sequence': 'premium sweatshirt.loosefitte sweatshirt wide ribbed neck short sleeve',
  'labels': ['20-25°C warm weather'],
  'scores': [0.815582275390625]}]

In [84]:
# Handle results
if isinstance(results, dict):
    scores = [results['scores'][0]]
else:
    scores = [result['scores'][0] for result in results]

In [85]:
# Get top K indices from sampled items
scores = np.array(scores)
top_indices = np.argsort(scores)[::-1][:5]
scores, top_indices

(array([0.74202174, 0.75329411, 0.81558228]), array([2, 1, 0]))

In [86]:
# Build results
recommendations = []
for i, idx in enumerate(top_indices, 1):
    item = sample_df.iloc[idx]
    recommendations.append({'rank': i,
                            'product_name': item['product_name'],
                            'details': item['details'],
                            'confidence': round(scores[idx], 3),
                            'temperature': f"{temperature}°C",
                            'temperature_range': temp_range,
                            'sampled_from_total': f"{len(sample_df)}/{len(clothes_df)} items"})
# print(recommendations[0]['product_name'])
# print(recommendations[1]['product_name'])
# print(recommendations[2]['product_name'])
recommendations

[{'rank': 1,
  'product_name': 'premium sweatshirt',
  'details': 'loosefitte sweatshirt wide ribbed neck short sleeve',
  'confidence': np.float64(0.816),
  'temperature': '25°C',
  'temperature_range': '20-25°C',
  'sampled_from_total': '3/3633 items'},
 {'rank': 2,
  'product_name': 'utility jacket pocket',
  'details': 'jacket lightweight cotton fabric adjustable high neck drawstre long sleeve snapbutton cuff patch pocket flap sleeve zipup hide snapbutton placket',
  'confidence': np.float64(0.753),
  'temperature': '25°C',
  'temperature_range': '20-25°C',
  'sampled_from_total': '3/3633 items'},
 {'rank': 3,
  'product_name': 'open halter waistcoat',
  'details': 'halter waistcoat feature vneck wool blend false welt pocket open adjustable strap fabric buckle asymmetric hem buttonup',
  'confidence': np.float64(0.742),
  'temperature': '25°C',
  'temperature_range': '20-25°C',
  'sampled_from_total': '3/3633 items'}]