In [4]:
import os
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import torch

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [6]:
DESCRIPTIONS_CSV = "/kaggle/input/nlp-fashionpedia/LLaVADescriptions.csv"
df = pd.read_csv(DESCRIPTIONS_CSV)
print(df.shape)
df.head()

(2000, 2)


Unnamed: 0,image_id,description
0,3fe668b00f4f6efa967866bdda12a977,The gown is a floor-length garment with an asy...
1,95931f0df1f902a79ee6c9a67725d00c,The woman is wearing a green coat with fur tri...
2,fbd8ef2564a18d367d19a7220adf4e62,The woman is wearing a black leather skirt. Th...
3,ef8017a524be660d50c6e9476672fe22,The image features a pair of black shorts with...
4,71e9b0adb502858db1770a0307f300a5,"The man is wearing a gray blazer, which is a t..."


In [7]:
model = SentenceTransformer("all-MiniLM-L6-v2", device=device)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [11]:
texts = df["description"].tolist()
batch_size = 32

embeddings = model.encode(
    texts,
    batch_size=batch_size,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True  
)
print( embeddings.shape)


Batches:   0%|          | 0/63 [00:00<?, ?it/s]

(2000, 384)


In [12]:
dim = embeddings.shape[1]

index = faiss.IndexFlatIP(dim)

index.add(embeddings)
print( index.ntotal)


2000


In [13]:
faiss.write_index(index, "fashion_descriptions.index")

with open("image_ids.pkl", "wb") as f:
    pickle.dump(df["image_id"].tolist(), f)

print("done")


done


In [14]:
#trial
def retrieve(query:str, k=5):
    q_emb = model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
    D, I = index.search(q_emb, k)
    results = []
    for score, idx in zip(D[0], I[0]):
        results.append({
            "image_id": image_ids[idx],
            "description": df.loc[idx, "description"],
            "score": float(score)
        })
    return pd.DataFrame(results)

with open("image_ids.pkl","rb") as f:
    image_ids = pickle.load(f)

print(retrieve("a red cocktail dress with lace details", k=5))


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

                           image_id  \
0  52dc4ac2e8b046fae7b5e6c3367a7961   
1  a32a50eab1dcdee2914e9610c4327acc   
2  291ae88fa81d574a0b4bc823f2a4e25f   
3  b3a0e8825a9b058d17de36f300e1de83   
4  7dfb06f211662c7b13b60a710eebcf07   

                                         description     score  
0  The clothing item is a red dress with a black ...  0.706891  
1  The woman is wearing a red dress that is tight...  0.706490  
2  The clothing item is a red dress with a lace p...  0.700796  
3  The clothing item is a red dress with a high w...  0.699826  
4  The clothing item is a red dress with a high w...  0.698726  
