In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
# 📦 Imports
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import normalize
from transformers import AutoTokenizer, AutoModel
from timm import create_model
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import faiss

  check_for_updates()


In [2]:
!pip install faiss-cpu --quiet


In [5]:
# ⚙️ Config
class CFG:
    image_model = "eca_nfnet_l1"
    text_model = "xlm-roberta-base"
    image_size = 256
    batch_size = 64
    num_workers = 2
    device = "cuda" if torch.cuda.is_available() else "cpu"


In [6]:
# 📄 Load Data
df = pd.read_csv("/kaggle/input/shopee-product-matching/train.csv")


In [7]:
#STEP-1
# 🖼️ Image Transform
transform = A.Compose([
    A.Resize(CFG.image_size, CFG.image_size),
    A.Normalize(),
    ToTensorV2()
])

In [8]:
#STEP-2
transform = A.Compose([
    A.Resize(CFG.image_size, CFG.image_size),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.ShiftScaleRotate(p=0.3),
    A.Normalize(),
    ToTensorV2()
])


  original_init(self, **validated_kwargs)


In [9]:
# 📦 Dataset
class ShopeeDataset(Dataset):
    def __init__(self, df, mode="image"):
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.tokenizer = AutoTokenizer.from_pretrained(CFG.text_model)
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index]
        if self.mode == "image":
            image = cv2.imread(f"/kaggle/input/shopee-product-matching/train_images/{row.image}")
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = transform(image=image)["image"]
            return image
        else:
            text = row.title
            inputs = self.tokenizer(text, padding="max_length", truncation=True,
                                    return_tensors="pt", max_length=64)
            return {k: v.squeeze(0) for k, v in inputs.items()}

In [10]:
# Image Model
class ImageEmbeddingModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = create_model(CFG.image_model, pretrained=True, num_classes=0)
        self.bn = torch.nn.BatchNorm1d(self.backbone.num_features)

    def forward(self, x):
        x = self.backbone(x)
        x = self.bn(x)
        x = torch.nn.functional.normalize(x)
        return x

In [11]:
# Text Model
class TextEmbeddingModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = AutoModel.from_pretrained(CFG.text_model)

    def forward(self, input_ids, attention_mask):
        out = self.model(input_ids=input_ids, attention_mask=attention_mask)
        x = out.last_hidden_state[:, 0, :]
        x = torch.nn.functional.normalize(x)
        return x

In [12]:
# Embedding Extraction
def get_embeddings(model, loader, mode="image"):
    model.eval()
    embeds = []
    with torch.no_grad():
        for batch in tqdm(loader):
            if mode == "image":
                batch = batch.to(CFG.device)
                emb = model(batch)
            else:
                batch = {k: v.to(CFG.device) for k, v in batch.items()}
                emb = model(**batch)
            embeds.append(emb.cpu())
    return torch.cat(embeds)

In [13]:
#  FAISS Matching
def get_matches(embeddings, threshold=0.5):
    embeddings = normalize(embeddings.numpy())
    index = faiss.IndexFlatIP(embeddings.shape[1])
    index.add(embeddings)
    D, I = index.search(embeddings, 50)
    
    matches = []
    for i in range(len(I)):
        ids = I[i][D[i] > threshold]
        matches.append(df.iloc[ids].posting_id.values)
    return matches

In [14]:
#  Evaluation
def f1_score(df):
    gt = df.groupby("label_group")["posting_id"].apply(set).to_dict()
    df["true"] = df["label_group"].map(gt)
    df["pred"] = df["matches"].apply(lambda x: set(x.split()))
    
    def f1(row):
        inter = len(row.true & row.pred)
        if not row.pred: return 0
        prec = inter / len(row.pred)
        rec = inter / len(row.true)
        return 2 * prec * rec / (prec + rec) if prec + rec > 0 else 0

    return df.apply(f1, axis=1).mean()

In [None]:

# ▶️ Main Execution
def main():
    # Image embeddings
    image_ds = ShopeeDataset(df, mode="image")
    image_loader = DataLoader(image_ds, batch_size=CFG.batch_size, num_workers=CFG.num_workers)
    image_model = ImageEmbeddingModel().to(CFG.device)
    image_embs = get_embeddings(image_model, image_loader, mode="image")

    # Text embeddings
    text_ds = ShopeeDataset(df, mode="text")
    text_loader = DataLoader(text_ds, batch_size=CFG.batch_size, num_workers=CFG.num_workers)
    text_model = TextEmbeddingModel().to(CFG.device)
    text_embs = get_embeddings(text_model, text_loader, mode="text")

    # Combine and normalize
    combined = normalize(np.hstack([image_embs, text_embs]))

    # Find matches
    match_ids = get_matches(torch.tensor(combined), threshold=0.5)
    df["matches"] = [" ".join(m) for m in match_ids]

    # Score
    score = f1_score(df)
    print(f"Validation F1 Score: {score:.5f}")

main()



In [None]:
# ▶️ Main Execution
def main():
    # Image embeddings
    image_ds = ShopeeDataset(df, mode="image")
    image_loader = DataLoader(image_ds, batch_size=CFG.batch_size, num_workers=CFG.num_workers)
    image_model = ImageEmbeddingModel().to(CFG.device)
    image_embs = get_embeddings(image_model, image_loader, mode="image").numpy()

    # Text embeddings
    text_ds = ShopeeDataset(df, mode="text")
    text_loader = DataLoader(text_ds, batch_size=CFG.batch_size, num_workers=CFG.num_workers)
    text_model = TextEmbeddingModel().to(CFG.device)
    text_embs = get_embeddings(text_model, text_loader, mode="text").numpy()

    # Normalize embeddings separately
    image_embs = normalize(image_embs)
    text_embs = normalize(text_embs)

    # Test different embedding weights and thresholds
    best_score, best_thresh, best_weight = 0, 0, 0

    weights = [0.2, 0.4, 0.5, 0.6, 0.8]
    thresholds = np.arange(0.3, 0.8, 0.05)

    for w in weights:
        #combined = normalize(w * image_embs + (1 - w) * text_embs)
        combined = normalize(np.hstack([image_embs * w, text_embs * (1 - w)]))

        for thresh in thresholds:
            matches = get_matches(torch.tensor(combined), threshold=thresh)
            df["matches"] = [" ".join(m) for m in matches]
            score = f1_score(df)

            print(f"Weight: Img {w:.2f}/Text {1-w:.2f} | Threshold: {thresh:.2f} | F1 Score: {score:.5f}")

            if score > best_score:
                best_score = score
                best_thresh = thresh
                best_weight = w

    print("\n🏆 Best Results:")
    print(f"Best Embedding Weight: Image {best_weight:.2f} | Text {1 - best_weight:.2f}")
    print(f"Best Threshold: {best_thresh:.2f}")
    print(f"Best Validation F1 Score: {best_score:.5f}")

main()


100%|██████████| 536/536 [04:59<00:00,  1.79it/s]
100%|██████████| 536/536 [00:56<00:00,  9.53it/s]


Weight: Img 0.20/Text 0.80 | Threshold: 0.30 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.35 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.40 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.45 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.50 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.55 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.60 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.65 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.70 | F1 Score: 0.14911
Weight: Img 0.20/Text 0.80 | Threshold: 0.75 | F1 Score: 0.14912
Weight: Img 0.40/Text 0.60 | Threshold: 0.30 | F1 Score: 0.14878
Weight: Img 0.40/Text 0.60 | Threshold: 0.35 | F1 Score: 0.14878
Weight: Img 0.40/Text 0.60 | Threshold: 0.40 | F1 Score: 0.14878
Weight: Img 0.40/Text 0.60 | Threshold: 0.45 | F1 Score: 0.14878
Weight: Img 0.40/Text 0.60 | Threshold: 0.50 | F1 Score: 0.14878
Weight: Img 0.40/Text 0.6

In [13]:
# ▶️ Main Execution (Improved)
def main():
    print("🔄 Extracting image embeddings...")
    image_ds = ShopeeDataset(df, mode="image")
    image_loader = DataLoader(image_ds, batch_size=CFG.batch_size, num_workers=CFG.num_workers)
    image_model = ImageEmbeddingModel().to(CFG.device)
    image_embs = get_embeddings(image_model, image_loader, mode="image")
    
    print("🔄 Extracting text embeddings...")
    text_ds = ShopeeDataset(df, mode="text")
    text_loader = DataLoader(text_ds, batch_size=CFG.batch_size, num_workers=CFG.num_workers)
    text_model = TextEmbeddingModel().to(CFG.device)
    text_embs = get_embeddings(text_model, text_loader, mode="text")

    # ✅ Normalize individually before concatenating
    print("📐 Normalizing embeddings...")
    image_embs = normalize(image_embs)
    text_embs = normalize(text_embs)
    combined = normalize(np.hstack([image_embs, text_embs]))

    # ✅ Use a lower threshold to catch more matches
    print("🔍 Running FAISS similarity search...")
    match_ids = get_matches(torch.tensor(combined), threshold=0.3)

    # Format match output as strings
    df["matches"] = [" ".join(map(str, m)) for m in match_ids]

    # Evaluate
    print("📊 Calculating F1 score...")
    score = f1_score(df)
    print(f"\n✅ Validation F1 Score: {score:.5f}")

main()


🔄 Extracting image embeddings...


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/166M [00:00<?, ?B/s]

100%|██████████| 536/536 [05:08<00:00,  1.74it/s]


🔄 Extracting text embeddings...


2025-05-03 01:01:04.815440: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746234065.004445      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746234065.063236      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

100%|██████████| 536/536 [00:56<00:00,  9.48it/s]


📐 Normalizing embeddings...
🔍 Running FAISS similarity search...
📊 Calculating F1 score...

✅ Validation F1 Score: 0.14872
