In [1]:
import os

# Đường dẫn thư mục input
input_dir = '/kaggle/input/encoding-image-and-testing'

# Biến đếm ảnh và tìm file pkl
image_count = 0
feature_vector_path = None

# Duyệt qua tất cả các file
for root, dirs, files in os.walk(input_dir):
    for file in files:
        # Đếm ảnh dựa vào phần mở rộng
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            image_count += 1
        # Tìm file feature_vectors.pkl
        if file == 'feature_vectors.pkl':
            feature_vector_path = os.path.join(root, file)

# Kết quả
print(f"Tổng số ảnh: {image_count}")
if feature_vector_path:
    print(f"Đã tìm thấy file feature_vectors.pkl tại: {feature_vector_path}")
else:
    print("Không tìm thấy file feature_vectors.pkl")


Tổng số ảnh: 31395
Đã tìm thấy file feature_vectors.pkl tại: /kaggle/input/encoding-image-and-testing/feature_vectors.pkl


In [2]:
import os
import glob
import random
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# ==== Dataset Class ====
class TripletDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.transform = transform
        self.image_paths = glob.glob(os.path.join(data_dir, "*.jpg"))
        self.class_to_images = {}

        for path in self.image_paths:
            cls = os.path.basename(path).split("_")[0]
            self.class_to_images.setdefault(cls, []).append(path)

        self.classes = list(self.class_to_images.keys())

    def __len__(self):
        return 10000

    def __getitem__(self, idx):
        anchor_class = random.choice(self.classes)
        positive_class = anchor_class
        negative_class = random.choice([cls for cls in self.classes if cls != anchor_class])

        anchor, positive = random.sample(self.class_to_images[anchor_class], 2)
        negative = random.choice(self.class_to_images[negative_class])

        anchor_img = Image.open(anchor).convert("RGB")
        positive_img = Image.open(positive).convert("RGB")
        negative_img = Image.open(negative).convert("RGB")

        if self.transform:
            anchor_img = self.transform(anchor_img)
            positive_img = self.transform(positive_img)
            negative_img = self.transform(negative_img)

        return anchor_img, positive_img, negative_img

# ==== Embedding Network ====
class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        base = models.resnet50(pretrained=True)
        self.backbone = nn.Sequential(*list(base.children())[:-1])
        self.fc = nn.Linear(2048, 256)

    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return F.normalize(x, p=2, dim=1)

# ==== Triplet Loss ====
def triplet_loss(anchor, positive, negative, margin=1.0):
    pos_dist = F.pairwise_distance(anchor, positive)
    neg_dist = F.pairwise_distance(anchor, negative)
    return torch.relu(pos_dist - neg_dist + margin).mean()

# ==== Train Function ====
def train_embedding_model(data_dir, batch_size=32, epochs=5, lr=1e-4, output_path="resnet50_triplet.pth"):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    dataset = TripletDataset(data_dir, transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = EmbeddingNet().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for anchor, pos, neg in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)
            out_a = model(anchor)
            out_p = model(pos)
            out_n = model(neg)

            loss = triplet_loss(out_a, out_p, out_n)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

    torch.save(model.state_dict(), output_path)
    print(f"✅ Model saved to: {output_path}")
    return model


In [3]:
train_embedding_model("/kaggle/input/encoding-image-and-testing/cropped_images",batch_size=54, epochs=70, lr=1e-4,)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 220MB/s]
Epoch 1/70: 100%|██████████| 186/186 [05:41<00:00,  1.84s/it]


Epoch 1, Loss: 0.4738


Epoch 2/70: 100%|██████████| 186/186 [05:12<00:00,  1.68s/it]


Epoch 2, Loss: 0.3978


Epoch 3/70: 100%|██████████| 186/186 [04:58<00:00,  1.61s/it]


Epoch 3, Loss: 0.3480


Epoch 4/70: 100%|██████████| 186/186 [04:52<00:00,  1.57s/it]


Epoch 4, Loss: 0.3283


Epoch 5/70: 100%|██████████| 186/186 [04:51<00:00,  1.57s/it]


Epoch 5, Loss: 0.3086


Epoch 6/70: 100%|██████████| 186/186 [04:47<00:00,  1.55s/it]


Epoch 6, Loss: 0.2932


Epoch 7/70: 100%|██████████| 186/186 [04:48<00:00,  1.55s/it]


Epoch 7, Loss: 0.2823


Epoch 8/70: 100%|██████████| 186/186 [04:50<00:00,  1.56s/it]


Epoch 8, Loss: 0.2743


Epoch 9/70: 100%|██████████| 186/186 [04:46<00:00,  1.54s/it]


Epoch 9, Loss: 0.2542


Epoch 10/70: 100%|██████████| 186/186 [04:48<00:00,  1.55s/it]


Epoch 10, Loss: 0.2448


Epoch 11/70: 100%|██████████| 186/186 [04:46<00:00,  1.54s/it]


Epoch 11, Loss: 0.2330


Epoch 12/70: 100%|██████████| 186/186 [04:46<00:00,  1.54s/it]


Epoch 12, Loss: 0.2202


Epoch 13/70: 100%|██████████| 186/186 [04:47<00:00,  1.54s/it]


Epoch 13, Loss: 0.2122


Epoch 14/70: 100%|██████████| 186/186 [04:48<00:00,  1.55s/it]


Epoch 14, Loss: 0.2071


Epoch 15/70: 100%|██████████| 186/186 [04:51<00:00,  1.57s/it]


Epoch 15, Loss: 0.1997


Epoch 16/70: 100%|██████████| 186/186 [04:54<00:00,  1.58s/it]


Epoch 16, Loss: 0.1921


Epoch 17/70: 100%|██████████| 186/186 [04:52<00:00,  1.57s/it]


Epoch 17, Loss: 0.1873


Epoch 18/70: 100%|██████████| 186/186 [04:54<00:00,  1.58s/it]


Epoch 18, Loss: 0.1834


Epoch 19/70: 100%|██████████| 186/186 [04:55<00:00,  1.59s/it]


Epoch 19, Loss: 0.1653


Epoch 20/70: 100%|██████████| 186/186 [04:57<00:00,  1.60s/it]


Epoch 20, Loss: 0.1632


Epoch 21/70: 100%|██████████| 186/186 [04:54<00:00,  1.58s/it]


Epoch 21, Loss: 0.1499


Epoch 22/70: 100%|██████████| 186/186 [04:51<00:00,  1.57s/it]


Epoch 22, Loss: 0.1476


Epoch 23/70: 100%|██████████| 186/186 [04:49<00:00,  1.56s/it]


Epoch 23, Loss: 0.1499


Epoch 24/70: 100%|██████████| 186/186 [04:51<00:00,  1.57s/it]


Epoch 24, Loss: 0.1469


Epoch 25/70: 100%|██████████| 186/186 [04:49<00:00,  1.56s/it]


Epoch 25, Loss: 0.1384


Epoch 26/70: 100%|██████████| 186/186 [04:47<00:00,  1.55s/it]


Epoch 26, Loss: 0.1318


Epoch 27/70: 100%|██████████| 186/186 [04:47<00:00,  1.55s/it]


Epoch 27, Loss: 0.1297


Epoch 28/70: 100%|██████████| 186/186 [04:47<00:00,  1.54s/it]


Epoch 28, Loss: 0.1313


Epoch 29/70: 100%|██████████| 186/186 [04:48<00:00,  1.55s/it]


Epoch 29, Loss: 0.1236


Epoch 30/70: 100%|██████████| 186/186 [04:46<00:00,  1.54s/it]


Epoch 30, Loss: 0.1286


Epoch 31/70: 100%|██████████| 186/186 [04:46<00:00,  1.54s/it]


Epoch 31, Loss: 0.1240


Epoch 32/70: 100%|██████████| 186/186 [04:45<00:00,  1.54s/it]


Epoch 32, Loss: 0.1178


Epoch 33/70: 100%|██████████| 186/186 [04:48<00:00,  1.55s/it]


Epoch 33, Loss: 0.1140


Epoch 34/70: 100%|██████████| 186/186 [04:47<00:00,  1.54s/it]


Epoch 34, Loss: 0.1176


Epoch 35/70: 100%|██████████| 186/186 [04:45<00:00,  1.54s/it]


Epoch 35, Loss: 0.1108


Epoch 36/70: 100%|██████████| 186/186 [04:49<00:00,  1.55s/it]


Epoch 36, Loss: 0.1056


Epoch 37/70: 100%|██████████| 186/186 [04:51<00:00,  1.57s/it]


Epoch 37, Loss: 0.1058


Epoch 38/70: 100%|██████████| 186/186 [04:57<00:00,  1.60s/it]


Epoch 38, Loss: 0.1003


Epoch 39/70: 100%|██████████| 186/186 [05:02<00:00,  1.63s/it]


Epoch 39, Loss: 0.1052


Epoch 40/70: 100%|██████████| 186/186 [04:50<00:00,  1.56s/it]


Epoch 40, Loss: 0.1013


Epoch 41/70: 100%|██████████| 186/186 [04:57<00:00,  1.60s/it]


Epoch 41, Loss: 0.0935


Epoch 42/70: 100%|██████████| 186/186 [04:56<00:00,  1.59s/it]


Epoch 42, Loss: 0.0957


Epoch 43/70: 100%|██████████| 186/186 [04:59<00:00,  1.61s/it]


Epoch 43, Loss: 0.0999


Epoch 44/70: 100%|██████████| 186/186 [05:01<00:00,  1.62s/it]


Epoch 44, Loss: 0.0925


Epoch 45/70: 100%|██████████| 186/186 [04:56<00:00,  1.59s/it]


Epoch 45, Loss: 0.0939


Epoch 46/70: 100%|██████████| 186/186 [04:54<00:00,  1.58s/it]


Epoch 46, Loss: 0.0913


Epoch 47/70: 100%|██████████| 186/186 [04:51<00:00,  1.57s/it]


Epoch 47, Loss: 0.0920


Epoch 48/70: 100%|██████████| 186/186 [04:49<00:00,  1.56s/it]


Epoch 48, Loss: 0.0871


Epoch 49/70: 100%|██████████| 186/186 [04:47<00:00,  1.55s/it]


Epoch 49, Loss: 0.0862


Epoch 50/70: 100%|██████████| 186/186 [04:44<00:00,  1.53s/it]


Epoch 50, Loss: 0.0858


Epoch 51/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 51, Loss: 0.0824


Epoch 52/70: 100%|██████████| 186/186 [04:43<00:00,  1.53s/it]


Epoch 52, Loss: 0.0882


Epoch 53/70: 100%|██████████| 186/186 [04:43<00:00,  1.52s/it]


Epoch 53, Loss: 0.0814


Epoch 54/70: 100%|██████████| 186/186 [04:43<00:00,  1.52s/it]


Epoch 54, Loss: 0.0819


Epoch 55/70: 100%|██████████| 186/186 [04:40<00:00,  1.51s/it]


Epoch 55, Loss: 0.0845


Epoch 56/70: 100%|██████████| 186/186 [04:40<00:00,  1.51s/it]


Epoch 56, Loss: 0.0807


Epoch 57/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 57, Loss: 0.0787


Epoch 58/70: 100%|██████████| 186/186 [04:44<00:00,  1.53s/it]


Epoch 58, Loss: 0.0764


Epoch 59/70: 100%|██████████| 186/186 [04:44<00:00,  1.53s/it]


Epoch 59, Loss: 0.0737


Epoch 60/70: 100%|██████████| 186/186 [04:44<00:00,  1.53s/it]


Epoch 60, Loss: 0.0748


Epoch 61/70: 100%|██████████| 186/186 [04:44<00:00,  1.53s/it]


Epoch 61, Loss: 0.0858


Epoch 62/70: 100%|██████████| 186/186 [04:45<00:00,  1.53s/it]


Epoch 62, Loss: 0.0742


Epoch 63/70: 100%|██████████| 186/186 [04:46<00:00,  1.54s/it]


Epoch 63, Loss: 0.0741


Epoch 64/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 64, Loss: 0.0768


Epoch 65/70: 100%|██████████| 186/186 [04:41<00:00,  1.51s/it]


Epoch 65, Loss: 0.0693


Epoch 66/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 66, Loss: 0.0704


Epoch 67/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 67, Loss: 0.0703


Epoch 68/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 68, Loss: 0.0718


Epoch 69/70: 100%|██████████| 186/186 [04:42<00:00,  1.52s/it]


Epoch 69, Loss: 0.0692


Epoch 70/70: 100%|██████████| 186/186 [04:45<00:00,  1.54s/it]

Epoch 70, Loss: 0.0693
✅ Model saved to: resnet50_triplet.pth





EmbeddingNet(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2