In [1]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.241-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.241-py3-none-any.whl (1.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.241 ultralytics-thop-2.0.18


In [2]:
# attribute_ids trong dataset c√≥ th·ªÉ l√™n t·ªõi ~300
# ta build map t·ª´ to√†n b·ªô label files

import os

LABEL_DIR = r"/kaggle/input/fashion-cropped/crops/train/labels"

attr_set = set()
for f in os.listdir(LABEL_DIR):
    with open(os.path.join(LABEL_DIR, f)) as lf:
        for x in lf.read().split():
            attr_set.add(int(x))

ATTRIBUTES = sorted(list(attr_set))
attr2idx = {a: i for i, a in enumerate(ATTRIBUTES)}
idx2attr = {i: a for a, i in attr2idx.items()}

NUM_ATTRS = len(ATTRIBUTES)
print("Num attributes:", NUM_ATTRS)


Num attributes: 294


In [None]:
from torch.utils.data import Dataset
from PIL import Image
from tqdm import tqdm
import torchvision.transforms as T
import torch


class AttributeDataset(Dataset):
    def __init__(self, img_dir, label_dir, filelist=None):
        self.img_dir = img_dir
        self.label_dir = label_dir

        if filelist:
            with open(filelist) as f:
                self.files = [x.strip() + ".jpg" for x in f]
        else:
            self.files = [
                f.replace(".txt", ".jpg")
                for f in os.listdir(label_dir)
            ]

        self.transform = T.Compose([
            T.Resize((224, 224)),
            T.ToTensor(),
        ])

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_name = self.files[idx]

        img = Image.open(os.path.join(self.img_dir, img_name)).convert("RGB")
        img = self.transform(img)

        label_path = os.path.join(
            self.label_dir, img_name.replace(".jpg", ".txt")
        )

        target = torch.zeros(NUM_ATTRS)
        with open(label_path) as f:
            for a in f.read().split():
                target[attr2idx[int(a)]] = 1.0

        return img, target


In [None]:
from ultralytics import YOLO
import torch.nn.functional as F

yolo = YOLO(r"/kaggle/input/last20/pytorch/default/1/last.pt")   # weight detection c·ªßa b·∫°n
net = yolo.model
net.eval()

for p in net.parameters():
    p.requires_grad = False

_embeddings = []

def hook_fn(module, input, output):
    _embeddings.append(output)

# layer neck cu·ªëi tr∆∞·ªõc Detect
hook = net.model[22].register_forward_hook(hook_fn)

def extract_embedding_batch(images):
    """
    images: Tensor [B, 3, H, W]
    """
    _embeddings.clear()
    yolo.model(images)

    feat = _embeddings[0]  # [B, C, H, W]
    pooled = F.adaptive_avg_pool2d(feat, (1, 1))
    emb = pooled.view(pooled.size(0), -1)  # [B, 256]
    return emb


Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [7]:
import torch.nn as nn

class AttributeHead(nn.Module):
    def __init__(self, emb_dim=512, num_attrs=NUM_ATTRS):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(emb_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_attrs)
        )

    def forward(self, x):
        return self.net(x)  # logits



In [8]:
from torch.utils.data import DataLoader
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

train_ds = AttributeDataset(
    r"/kaggle/input/fashion-cropped/crops/train/images",
    r"/kaggle/input/fashion-cropped/crops/train/labels",
    filelist=r"/kaggle/input/sample-crop-data/sample_train.txt"
)

val_ds = AttributeDataset(
    r"/kaggle/input/fashion-cropped/crops/val/images",
    r"/kaggle/input/fashion-cropped/crops/val/labels",
    filelist=r"/kaggle/input/sample-crop-data/sample_val.txt"
)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)

attr_head = AttributeHead().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(attr_head.parameters(), lr=1e-3)

yolo.model.to(device)
yolo.model.eval()



DetectionModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C3k2(
      (cv1): Conv(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
 

In [9]:
def train_epoch():
    attr_head.train()
    total_loss = 0

    pbar = tqdm(train_loader, desc="üü¢ Training", leave=False)

    for imgs, targets in pbar:
        imgs, targets = imgs.to(device), targets.to(device)

        with torch.no_grad():
            emb = extract_embedding_batch(imgs)

        logits = attr_head(emb)
        loss = criterion(logits, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pbar.set_postfix(loss=loss.item())

    return total_loss / len(train_loader)

def val_epoch():
    attr_head.eval()
    total_loss = 0

    pbar = tqdm(val_loader, desc="üîµ Validation", leave=False)

    with torch.no_grad():
        for imgs, targets in pbar:
            imgs, targets = imgs.to(device), targets.to(device)

            emb = extract_embedding_batch(imgs)
            logits = attr_head(emb)
            loss = criterion(logits, targets)

            total_loss += loss.item()
            pbar.set_postfix(loss=loss.item())

    return total_loss / len(val_loader)



In [11]:
EPOCHS = 20
best_val_loss = float("inf")
BEST_CKPT_PATH = r"/kaggle/working/attribute_head_best.pt"


for epoch in range(EPOCHS):
    train_loss = train_epoch()
    val_loss = val_epoch()

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] "
        f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}"
    )

    # ===== SAVE BEST =====
    if val_loss < best_val_loss:
        best_val_loss = val_loss

        torch.save({
            "epoch": epoch + 1,
            "state_dict": attr_head.state_dict(),
            "val_loss": best_val_loss,
            "emb_dim": 512,
            "attr2idx": attr2idx,
            "idx2attr": idx2attr
        }, BEST_CKPT_PATH)

        print(f"üíæ Saved BEST model (val_loss={best_val_loss:.4f})")


                                                                           

Epoch [1/20] Train Loss: 0.0526 | Val Loss: 0.0353
üíæ Saved BEST model (val_loss=0.0353)


                                                                           

Epoch [2/20] Train Loss: 0.0324 | Val Loss: 0.0329
üíæ Saved BEST model (val_loss=0.0329)


                                                                           

Epoch [3/20] Train Loss: 0.0307 | Val Loss: 0.0318
üíæ Saved BEST model (val_loss=0.0318)


                                                                           

Epoch [4/20] Train Loss: 0.0296 | Val Loss: 0.0314
üíæ Saved BEST model (val_loss=0.0314)


                                                                           

Epoch [5/20] Train Loss: 0.0289 | Val Loss: 0.0311
üíæ Saved BEST model (val_loss=0.0311)


                                                                           

Epoch [6/20] Train Loss: 0.0283 | Val Loss: 0.0307
üíæ Saved BEST model (val_loss=0.0307)


                                                                           

Epoch [7/20] Train Loss: 0.0278 | Val Loss: 0.0305
üíæ Saved BEST model (val_loss=0.0305)


                                                                           

Epoch [8/20] Train Loss: 0.0273 | Val Loss: 0.0303
üíæ Saved BEST model (val_loss=0.0303)


                                                                           

Epoch [9/20] Train Loss: 0.0270 | Val Loss: 0.0300
üíæ Saved BEST model (val_loss=0.0300)


                                                                           

Epoch [10/20] Train Loss: 0.0267 | Val Loss: 0.0298
üíæ Saved BEST model (val_loss=0.0298)


                                                                           

Epoch [11/20] Train Loss: 0.0263 | Val Loss: 0.0298
üíæ Saved BEST model (val_loss=0.0298)


                                                                           

Epoch [12/20] Train Loss: 0.0260 | Val Loss: 0.0297
üíæ Saved BEST model (val_loss=0.0297)


                                                                           

Epoch [13/20] Train Loss: 0.0258 | Val Loss: 0.0295
üíæ Saved BEST model (val_loss=0.0295)


                                                                           

Epoch [14/20] Train Loss: 0.0256 | Val Loss: 0.0292
üíæ Saved BEST model (val_loss=0.0292)


                                                                           

Epoch [15/20] Train Loss: 0.0253 | Val Loss: 0.0296


                                                                           

Epoch [16/20] Train Loss: 0.0250 | Val Loss: 0.0297


                                                                           

Epoch [17/20] Train Loss: 0.0249 | Val Loss: 0.0295


                                                                           

Epoch [18/20] Train Loss: 0.0247 | Val Loss: 0.0293


                                                                           

Epoch [19/20] Train Loss: 0.0244 | Val Loss: 0.0292
üíæ Saved BEST model (val_loss=0.0292)


                                                                           

Epoch [20/20] Train Loss: 0.0242 | Val Loss: 0.0293




In [12]:
SAVE_PATH = r"/kaggle/working/attribute_head_last.pt"

torch.save({
    "state_dict": attr_head.state_dict(),
    "attr2idx": attr2idx,          # r·∫•t quan tr·ªçng ƒë·ªÉ map l·∫°i
    "idx2attr": idx2attr,
    "emb_dim": 512
}, SAVE_PATH)

print("‚úÖ Saved attribute head to:", SAVE_PATH)

‚úÖ Saved attribute head to: /kaggle/working/attribute_head_last.pt
