# Create train_data.csv

In [47]:
import os
from pathlib import Path
import pandas as pd

In [50]:
path_cosmenet = Path("/home/music/Desktop/measure_model/data/cosmenet_test")

In [45]:
df = []
for root, directories, files in os.walk(path_cosmenet):
    for file in files:
        classes = Path(root).name
        df.append([file, classes])

In [48]:
df_pd = pd.DataFrame(df, columns=['image_name', 'id_product'])

In [52]:
df_pd.to_csv(path_cosmenet / 'train_data.csv', index=False)

# Create Dataset

In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms

import pandas as pd

from PIL import Image
from pathlib import Path
import random

In [2]:
class CosmenetDataset_Triplet():
    def __init__(self, df: pd, path: Path, train=True, transform=None):
        self.data_csv = df
        self.is_train = train
        self.transform = transform
        self.path = path
        if self.is_train:
            self.images = df.iloc[:, 0].values
            self.labels = df.iloc[:, 1].values
            self.index = df.index.values 
    
    def full_path(self, label,  image_name):
        return self.path / str(label) / image_name
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, item):
        anchor_image_name = self.images[item]
        anchor_image_path = self.full_path(self.labels[item], anchor_image_name)
        ###### Anchor Image #######
        anchor_img = Image.open(anchor_image_path).convert('RGB')
        if self.is_train:
            anchor_label = self.labels[item]
            positive_list = self.index[self.index!=item][self.labels[self.index!=item]==anchor_label]
            positive_item = random.choice(positive_list)
            positive_image_name = self.images[positive_item]
            positive_image_path = self.full_path(self.labels[positive_item], positive_image_name)
            positive_img = Image.open(positive_image_path).convert('RGB')
            #positive_img = self.images[positive_item].reshape(28, 28, 1)
            negative_list = self.index[self.index!=item][self.labels[self.index!=item]!=anchor_label]
            negative_item = random.choice(negative_list)
            negative_image_name = self.images[negative_item]
            negative_image_path = self.full_path(self.labels[negative_item], negative_image_name)
            negative_img = Image.open(negative_image_path).convert('RGB')
            #negative_img = self.images[negative_item].reshape(28, 28, 1)
            if self.transform!=None:
                anchor_img = self.transform(anchor_img)
                positive_img = self.transform(positive_img)                   
                negative_img = self.transform(negative_img)
        return anchor_img, positive_img, negative_img, anchor_label

In [3]:
train_data_path = Path("/home/music/Desktop/measure_model/data/cosmenet_test")
train_data= pd.read_csv(train_data_path / 'train_data.csv') # [imag_path, label]
def get_train_dataset(IMAGE_SIZE):
    trans = transforms.Compose([transforms.ToTensor(),transforms.Resize((IMAGE_SIZE,IMAGE_SIZE), antialias=False)])
    train_dataset = CosmenetDataset_Triplet(train_data, path=train_data_path, train=True, transform=trans)
    return train_dataset

In [4]:
def get_default_device():
#””Pick GPU if available, else CPU”””
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
IMAGE_SIZE = 224
BATCH_SIZE = 1
DEVICE = get_default_device()
LEARNING_RATE = 0.005
EPOCHS = 10

In [5]:
train_dataset = get_train_dataset(IMAGE_SIZE)
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

# Preprocessing

In [6]:
import torch.nn as nn

In [7]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    def forward(self, anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor) -> torch.Tensor:
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)
        return losses.mean()

In [8]:
from transformers import ViTImageProcessor, ViTModel
vit_gg = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
vit_gg.eval().to(DEVICE)
processor_vit_gg = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
Optimizer = torch.optim.Adam(vit_gg.parameters(),lr = LEARNING_RATE)
criterion = TripletLoss()

In [9]:
# LAST_LAYER = 199
LAST_LAYER = -1
for n, layer in enumerate(vit_gg.parameters()):
    if n >= LAST_LAYER:
        layer.requires_grad = False
    else:
        layer.requires_grad = True

# Training

In [9]:
from tqdm import tqdm
import numpy as np

In [10]:
# for epoch in tqdm(range(EPOCHS), desc="Epochs"):
for epoch in tqdm(range(1), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, positive_img, negative_img, anchor_label) in enumerate(tqdm(train_dl, desc="Training", leave=False)):
        anchor_img = processor_vit_gg(images=anchor_img, return_tensors="pt").to(DEVICE)
        positive_img = processor_vit_gg(images=positive_img, return_tensors="pt").to(DEVICE)
        negative_img = processor_vit_gg(images=negative_img, return_tensors="pt").to(DEVICE)
        
        anchor_out = vit_gg(**anchor_img)
        positive_out = vit_gg(**positive_img)
        negative_out = vit_gg(**negative_img)
        
        # loss = criterion(anchor_out, positive_out, negative_out)

Epochs:   0%|          | 0/1 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


: 

In [11]:
for epoch in tqdm(range(EPOCHS), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, positive_img, negative_img, anchor_label) in enumerate(tqdm(train_dl, desc="Training", leave=False)):
        anchor_img = processor_vit_gg(images=anchor_img, return_tensors="pt").to(DEVICE)
        positive_img = processor_vit_gg(images=positive_img, return_tensors="pt").to(DEVICE)
        negative_img = processor_vit_gg(images=negative_img, return_tensors="pt").to(DEVICE)
        
        anchor_out = vit_gg(**anchor_img)
        positive_out = vit_gg(**positive_img)
        negative_out = vit_gg(**negative_img)
        
        loss = criterion(anchor_out, positive_out, negative_out)
        
        Optimizer.zero_grad()
        loss.backward()
        Optimizer.step()
        running_loss.append(loss.cpu().detach().numpy())
        print("Epoch: {}/{} — Loss: {:.4f}".format(epoch+1, EPOCHS, np.mean(running_loss)))

It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


: 