<a href="https://colab.research.google.com/github/ridazaneb/IndoFashionCLIP/blob/main/IndoFashionCLIP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


# Base path to your Indofashion folder
BASE = '/content/drive/MyDrive/IndoFashion'

# Install deps (if not already)
!pip install torch torchvision transformers ftfy regex tqdm pandas scikit-learn streamlit Pillow opencv-python


Mounted at /content/drive


In [None]:
import pandas as pd, os, shutil

# Paths
BASE_DRIVE = "/content/drive/MyDrive/IndoFashion"
FULL_CSV   = os.path.join(BASE_DRIVE, "data/train.csv")
SUB_CSV    = os.path.join(BASE_DRIVE, "data/train_sub.csv")
IMG_SRC    = BASE_DRIVE      # used for copying
IMG_DST    = "/content/images_sub"

# 1) Sample 10 000 rows from train.csv
df = pd.read_csv(FULL_CSV)
sub = df.sample(n=500, random_state=42).reset_index(drop=True)
sub.to_csv(SUB_CSV, index=False)
print(f"Created subset CSV → {SUB_CSV} ({len(sub)} rows)")

# 2) Copy only those image files locally
os.makedirs(IMG_DST, exist_ok=True)
for img_path in sub["image_path"]:
    src = os.path.join(BASE_DRIVE, img_path)         # e.g. …/images/train/123.jpeg
    dst = os.path.join(IMG_DST, img_path)            # e.g. /content/images_sub/images/train/123.jpeg
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    shutil.copy2(src, dst)
print(f"Copied {len(sub)} images → {IMG_DST}")


Created subset CSV → /content/drive/MyDrive/IndoFashion/data/train_sub.csv (500 rows)
Copied 500 images → /content/images_sub


In [None]:
import pandas as pd, os

BASE_DRIVE = "/content/drive/MyDrive/IndoFashion"
VAL_CSV    = os.path.join(BASE_DRIVE, "data/val.csv")
VAL_SUB    = os.path.join(BASE_DRIVE, "data/val_sub.csv")

# Load full validation
df_val = pd.read_csv(VAL_CSV)

# Sample 500 (or whatever you like)
df_val_sub = df_val.sample(n=500, random_state=42)
df_val_sub.to_csv(VAL_SUB, index=False)
print(f"[✓] Created {VAL_SUB} ({len(df_val_sub)} rows)")


[✓] Created /content/drive/MyDrive/IndoFashion/data/val_sub.csv (500 rows)


In [None]:
%%bash
# adjust paths to match your Drive layout
SOURCE="/content/drive/MyDrive/IndoFashion/images/"
DEST="/content/images/"
rsync -av --info=progress2 "$SOURCE" "$DEST"


Process is terminated.


In [None]:
# scripts/prepare_splits.py
import json, os, pandas as pd

BASE = '/content/drive/MyDrive/IndoFashion'
OUT = os.path.join(BASE, 'data')
os.makedirs(OUT, exist_ok=True)

def make_caption(rec):
    cls = rec.get('class_label','').lower()
    color = rec.get('color') or ''
    if not color:
        # try to pull a color from the title
        for c in ['red','green','blue','yellow','pink','black','white',
                  'beige','brown','orange','multicolor']:
            if c in rec.get('product_title','').lower():
                color=c; break
    person = 'woman' if cls in ['saree','lehenga','kurta',
                                'dupatta','gown','petticoats'] else 'man'
    return f"a {person} wearing a {color} {cls}".strip()

for split in ['train','val','test']:
    js_file = os.path.join(BASE, f'{split}_data.json')
    with open(js_file) as f:
        # JSON-lines or array? adjust accordingly
        try:
            records = [json.loads(line) for line in f]
        except:
            records = json.load(f)
    for r in records:
        r['caption'] = make_caption(r)
    df = pd.DataFrame(records)
    csv_path = os.path.join(OUT, f'{split}.csv')
    df.to_csv(csv_path, index=False)
    print(f"[✓] {split}.csv ← {len(df)} rows")


[✓] train.csv ← 91166 rows
[✓] val.csv ← 7500 rows
[✓] test.csv ← 7500 rows


In [None]:
%%bash
# adjust paths to match your Drive layout
SOURCE="/content/drive/MyDrive/Indofashion/images/"
DEST="/content/images/"
rsync -av --info=progress2 "$SOURCE" "$DEST"


In [None]:
# 1) Mount & install (if you haven’t already)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!pip install torch torchvision transformers ftfy regex tqdm pandas scikit-learn Pillow

# 2) Set paths
import os
BASE      = '/content/drive/MyDrive/IndoFashion'
TRAIN_CSV = os.path.join(BASE, 'data/train_sub.csv')
IMG_ROOT   = '/content/images_sub'     # local copy of just the subset
VAL_CSV   = os.path.join(BASE, 'data/val_sub.csv')     # now also 500‐row val
MODEL_DIR = os.path.join(BASE, 'models/clip_south_asia')

# 3) Build dataset & loader
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import pandas as pd
from tqdm import tqdm

class FashionDataset(Dataset):
    def __init__(self, csv_path, processor):
        self.df = pd.read_csv(csv_path)
        self.proc = processor
        self.tf = transforms.Compose([
            transforms.Resize((128,256)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ColorJitter(),
        ])
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        # row['image_path'] is e.g. "images/test/0.jpeg"
        img_path = os.path.join(BASE, row['image_path'])
        img = Image.open(img_path).convert('RGB')
        img = self.tf(img)
        enc = self.proc(text=row['caption'],
                        images=img,
                        return_tensors='pt',
                        padding=True)
        # squeeze batch dim
        return {k: v.squeeze(0) for k,v in enc.items()}

def collate_fn(batch):
    # Pad the 'input_ids' and 'attention_mask' tensors to the maximum length in the batch
    max_len = max([d['input_ids'].shape[0] for d in batch])
    padded_input_ids = [torch.cat((d['input_ids'], torch.zeros(max_len - d['input_ids'].shape[0], dtype=d['input_ids'].dtype)), dim=0) for d in batch]
    padded_attention_mask = [torch.cat((d['attention_mask'], torch.zeros(max_len - d['attention_mask'].shape[0], dtype=d['attention_mask'].dtype)), dim=0) for d in batch]

    # Stack the padded tensors and other tensors
    return {
        'input_ids': torch.stack(padded_input_ids),
        'attention_mask': torch.stack(padded_attention_mask),
        'pixel_values': torch.stack([d['pixel_values'] for d in batch])
    }

# 4) Initialize model, processor, loaders
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model  = CLIPModel.from_pretrained('openai/clip-vit-base-patch32').to(device)
proc   = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')

train_ds = FashionDataset(TRAIN_CSV, proc)
val_ds   = FashionDataset(VAL_CSV,   proc)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,
                          num_workers=0, pin_memory=True,
                          collate_fn=collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=64, shuffle=False,
                          num_workers=0, pin_memory=True,
                          collate_fn=collate_fn)

# 5) Training loop
from torch import optim
opt = optim.AdamW(model.parameters(), lr=5e-6, weight_decay=1e-2)
for epoch in range(4):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch} train"):
        opt.zero_grad()
        # Add return_loss=True to get the loss value
        out = model(**{k:v.to(device) for k,v in batch.items()}, return_loss=True)
        out.loss.backward()
        opt.step()
        total_loss += out.loss.item()
    print(f"→ Train Loss: {total_loss/len(train_loader):.4f}")

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Epoch valid"):
            # Add return_loss=True to get the loss value
            val_loss += model(**{k:v.to(device) for k,v in batch.items()}, return_loss=True).loss.item()
    print(f"→ Val   Loss: {val_loss/len(val_loader):.4f}")

# 6) Save your fine-tuned model
os.makedirs(MODEL_DIR, exist_ok=True)
model.save_pretrained(MODEL_DIR)
proc.save_pretrained(MODEL_DIR)
print("Saved fine-tuned CLIP to", MODEL_DIR)


Mounted at /content/drive


Epoch 0 train: 100%|██████████| 8/8 [04:47<00:00, 35.97s/it]


→ Train Loss: 3.3512


Epoch valid: 100%|██████████| 8/8 [03:57<00:00, 29.67s/it]


→ Val   Loss: 2.6425


Epoch 1 train: 100%|██████████| 8/8 [04:43<00:00, 35.38s/it]


→ Train Loss: 2.3456


Epoch valid: 100%|██████████| 8/8 [01:32<00:00, 11.53s/it]


→ Val   Loss: 2.3590


Epoch 2 train: 100%|██████████| 8/8 [04:33<00:00, 34.14s/it]


→ Train Loss: 1.9735


Epoch valid: 100%|██████████| 8/8 [01:37<00:00, 12.22s/it]


→ Val   Loss: 2.2485


Epoch 3 train: 100%|██████████| 8/8 [04:38<00:00, 34.78s/it]


→ Train Loss: 1.6635


Epoch valid: 100%|██████████| 8/8 [01:35<00:00, 11.97s/it]


→ Val   Loss: 2.2384
Saved fine-tuned CLIP to /content/drive/MyDrive/IndoFashion/models/clip_south_asia


In [None]:
# scripts/trend_cluster.py

import torch, pickle, os
import pandas as pd
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from PIL import Image
from tqdm import tqdm

BASE      = '/content/drive/MyDrive/IndoFashion'
MODEL_DIR = os.path.join(BASE,'models/clip_south_asia')
TRAIN_CSV = os.path.join(BASE, 'data/train_sub.csv')
IMG_ROOT   = '/content/images_sub'

def extract_embeddings(model, proc, df):
    embs=[]
    for _,r in tqdm(df.iterrows(), total=len(df)):
        img = Image.open(os.path.join(IMG_ROOT, r['image_path'])).convert('RGB')
        inp = proc(images=img, return_tensors='pt').to(model.device)
        with torch.no_grad():
            emb = model.get_image_features(**inp)
        embs.append(emb.cpu().numpy().squeeze())
    return np.vstack(embs)

def main():
    device='cuda' if torch.cuda.is_available() else 'cpu'
    model = CLIPModel.from_pretrained(MODEL_DIR).to(device).eval()
    proc  = CLIPProcessor.from_pretrained(MODEL_DIR)
    df = pd.read_csv(TRAIN_CSV)

    embs = extract_embeddings(model, proc, df)
    pca  = PCA(n_components=50).fit(embs)
    lowd = pca.transform(embs)
    km   = KMeans(n_clusters=20, random_state=42).fit(lowd)

    df['cluster'] = km.labels_
    df.to_csv(os.path.join(BASE,'outputs/train_with_clusters.csv'), index=False)
    pickle.dump((pca,km), open(os.path.join(BASE,'models/pca_km.pkl'),'wb'))
    print("[✓] Clustering done → outputs/train_with_clusters.csv")

if __name__=='__main__':
    main()


100%|██████████| 500/500 [02:06<00:00,  3.96it/s]


[✓] Clustering done → outputs/train_with_clusters.csv


In [None]:
# scripts/app_dashboard.py

import streamlit as st, os, pickle, torch
import pandas as pd, numpy as np
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

BASE       = '/content/drive/MyDrive/IndoFashion'
MODEL_DIR  = os.path.join(BASE,'models/clip_south_asia')
PKL_FILE   = os.path.join(BASE,'models/pca_km.pkl')
CLUSTER_CSV= os.path.join(BASE,'outputs/train_with_clusters.csv')
IMG_ROOT   = '/content/images_sub'

@st.cache_resource
def load_all():
    model = CLIPModel.from_pretrained(MODEL_DIR).to('cpu').eval()
    proc  = CLIPProcessor.from_pretrained(MODEL_DIR)
    pca, km = pickle.load(open(PKL_FILE,'rb'))
    df = pd.read_csv(CLUSTER_CSV)
    return model, proc, pca, km, df

model, proc, pca, km, df = load_all()

st.title("South Asian Fashion Trend Predictor")
uploaded = st.file_uploader("Upload an image", type=["jpg","png"])
if uploaded:
    img = Image.open(uploaded).convert("RGB")
    st.image(img, use_column_width=True)

    # predict class
    enc = proc(images=img, return_tensors="pt", padding=True)
    with torch.no_grad():
        logits = model(**enc).logits_per_image
    probs = logits.softmax(dim=1).cpu().numpy().squeeze()
    topk = np.argsort(-probs)[:5]
    st.subheader("Top Predictions")
    for idx in topk:
        st.write(f"{model.config.id2label[idx]}: {probs[idx]*100:.1f}%")

    # cluster
    emb = model.get_image_features(**enc)
    low = pca.transform(emb.cpu().numpy())
    cl  = km.predict(low)[0]
    st.write(f"Style Cluster: {cl}")

    st.subheader("Similar Styles")
    samples = df[df.cluster==cl].sample(8)
    cols = st.columns(4)
    for i,(_,r) in enumerate(samples.iterrows()):
        with cols[i%4]:
            im = Image.open(os.path.join(IMG_ROOT, r.image_path))
            st.image(im, use_column_width=True)
            st.caption(r.class_label)


2025-04-28 12:50:36.535 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [None]:
%%bash
mkdir -p /content/drive/MyDrive/Indofashion/scripts


In [None]:
%%bash
cat << 'EOF' > /content/drive/MyDrive/IndoFashion/scripts/app_dashboard.py
# scripts/app_dashboard.py

import os
import pickle

import streamlit as st
import torch
import pandas as pd
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# ───────────────────────────────────────────────────────────
# CONFIG
# ───────────────────────────────────────────────────────────
BASE_DIR   = '/content/drive/MyDrive/IndoFashion'
MODEL_DIR  = os.path.join(BASE_DIR, 'models/clip_south_asia')
TEST_CSV   = os.path.join(BASE_DIR, 'data/test.csv')
CLUSTER_CSV= os.path.join(BASE_DIR, 'data/train_with_clusters.csv')
CLUSTER_PKL= os.path.join(BASE_DIR, 'models/pca_km.pkl')

# Try local SSD first, else fall back to Drive
IMG_ROOT = '/content/images'
if not os.path.isdir(IMG_ROOT):
    IMG_ROOT = os.path.join(BASE_DIR, 'images')

# ───────────────────────────────────────────────────────────
# RESOURCE LOADING
# ───────────────────────────────────────────────────────────
@st.cache_resource(show_spinner=False)
def load_model_and_classes():
    device    = 'cuda' if torch.cuda.is_available() else 'cpu'
    model     = CLIPModel.from_pretrained(MODEL_DIR).to(device)
    processor = CLIPProcessor.from_pretrained(MODEL_DIR)
    classes   = pd.read_csv(TEST_CSV)['class_label'].unique().tolist()
    return model, processor, device, classes

@st.cache_resource(show_spinner=False)
def load_cluster_data():
    df      = pd.read_csv(CLUSTER_CSV)
    with open(CLUSTER_PKL, 'rb') as f:
        pca_km = pickle.load(f)
    return df, pca_km['pca'], pca_km['km']

model, processor, device, class_list = load_model_and_classes()

# Cluster artifacts may not exist yet
try:
    df_clusters, pca, km = load_cluster_data()
    clustering_enabled = True
except Exception:
    clustering_enabled = False

# ───────────────────────────────────────────────────────────
# APP UI
# ───────────────────────────────────────────────────────────
st.set_page_config(page_title="South Asian Fashion Classifier")
st.title("🌺 South Asian Fashion Classifier")

uploaded = st.file_uploader("Upload an image of a garment", type=['jpg','png','jpeg'])
if uploaded:
    # Display upload
    img = Image.open(uploaded).convert('RGB')
    st.image(img, caption="Your Upload", use_container_width=True)

    # Preprocess & forward pass
    inputs = processor(images=img, return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        img_feats = model.get_image_features(**inputs)
        img_feats = img_feats / img_feats.norm(dim=-1, keepdim=True)

        text_inputs = processor(
            text=[f"a {c}" for c in class_list],
            return_tensors="pt",
            padding=True
        ).to(device)

        txt_feats = model.get_text_features(**text_inputs)
        txt_feats = txt_feats / txt_feats.norm(dim=-1, keepdim=True)

        sims  = (img_feats @ txt_feats.T)[0]
        probs = sims.softmax(dim=0)
        top3  = torch.topk(probs, k=3)

    # Show Top-3 predictions
    st.subheader("Top 3 Predictions")
    for score, idx in zip(top3.values, top3.indices):
        cls = class_list[idx]
        st.write(f"• **{cls.upper()}** — {float(score):.2%}")

    # Optional: Similar styles gallery via clustering
    if clustering_enabled:
        emb   = img_feats.cpu().numpy()
        emb_p = pca.transform(emb)
        cid   = int(km.predict(emb_p)[0])

        st.markdown(f"### Similar styles (cluster {cid})")
        cols = st.columns(4)
        samples = df_clusters[df_clusters.cluster == cid] \
                      .sample(4, random_state=42)['image_path'].tolist()

        for col, img_path in zip(cols, samples):
            full_path = os.path.join(IMG_ROOT, img_path)
            try:
                thumb = Image.open(full_path).convert('RGB')
                col.image(thumb, use_container_width=True, caption=os.path.basename(img_path))
            except Exception:
                col.write("❓ missing image")

else:
    st.info("Please upload a JPG/PNG image of a South Asian garment to get started.")

EOF


In [None]:
# 1) Install streamlit if you haven’t already
!pip install streamlit

# 2) Start Streamlit in the background on port 8501
get_ipython().system_raw(
    'streamlit run /content/drive/MyDrive/IndoFashion/scripts/app_dashboard.py '
    '--server.port 8501 --server.address 0.0.0.0 &'
)

# 3) Embed the app directly in your notebook
from google.colab import output
output.serve_kernel_port_as_iframe(8501)




<IPython.core.display.Javascript object>

In [None]:
# Install dependencies
!pip install streamlit pyngrok -q

# Launch Streamlit in the background
import os, sys
get_ipython().system_raw(
  'streamlit run /content/drive/MyDrive/IndoFashion/scripts/app_dashboard.py '
  '--server.port 8501 --server.address 0.0.0.0 &'
)

# Expose port 8501 via ngrok
from pyngrok import ngrok, conf
conf.get_default().region = "us"          # or your region
!ngrok authtoken 2wMKDmHaYBa1GjhPzYFvqgYduZh_2sraK1GftHFV3cAKW5MRd  # Removed the colon and leading space
public_url = ngrok.connect(8501, "http")
print("🔗 Public URL:", public_url)

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml




PyngrokNgrokHTTPError: ngrok client exception, API returned 502: {"error_code":103,"status_code":502,"msg":"failed to start tunnel","details":{"err":"failed to start tunnel: Your account may not run more than 3 tunnels over a single ngrok agent session.\nThe tunnels already running on this session are:\ntn_2wMKHKDOzbYOc5GVoTuhpIWp1Ns, tn_2wMKbWWOKkKvMzucs1dxeWHPjJC, tn_2wMLKCEhL49UivnUodGropWKt4n\n\r\n\r\nERR_NGROK_324\r\n"}}
