# Create Dataset

In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms

import pandas as pd

from PIL import Image
from pathlib import Path
import random
from script.tool import ROOT_NFS, ROOT_NFS_DATA, ROOT_NFS_TEST

In [2]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
IMAGE_SIZE = 224
BATCH_SIZE = 4
DEVICE = get_default_device()
LEARNING_RATE = 0.00002
EPOCHS = 40

In [3]:
class CosmenetDataset_Triplet():
    def __init__(self, df: pd, path: Path, train=True, transform=None):
        self.data_csv = df
        self.is_train = train
        self.transform = transform
        self.path = path
        if self.is_train:
            self.images = df.iloc[:, 0].values
            self.labels = df.iloc[:, 1].values
            self.index = df.index.values 
    
    def full_path(self, label,  image_name):
        return self.path / str(label) / image_name
    
    def get_caompare_img(self, item, anchor_label, compare_type):
        if compare_type == "pos":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]==anchor_label]
        elif compare_type == "neg":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]!=anchor_label]
        else:
            raise ValueError("compare_type must be pos or neg")
        compare_item = random.choice(compare_list)
        compare_image_name = self.images[compare_item]
        compare_image_path = self.full_path(self.labels[compare_item], compare_image_name)
        compare_img = Image.open(compare_image_path).convert('RGB')
        return compare_img
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, item):
        anchor_label = self.labels[item]
        anchor_image_name = self.images[item]
        anchor_image_path = self.full_path(self.labels[item], anchor_image_name)
        anchor_img = Image.open(anchor_image_path).convert('RGB')
        if self.is_train:
            positive_img = self.get_caompare_img(item, anchor_label, "pos")
            negative_img = self.get_caompare_img(item, anchor_label, "neg")
            if self.transform!=None:
                anchor_img = (self.transform(anchor_img)*255).int()
                positive_img = (self.transform(positive_img)*255).int()
                negative_img = (self.transform(negative_img)*255).int()
        return anchor_img, positive_img, negative_img

In [6]:
# real data
train_data_path = ROOT_NFS_DATA / "Cosmenet_product_20231018/datas"
train_data = pd.read_csv(ROOT_NFS_DATA / 'Cosmenet_product_20231018/datas_20231018.csv')
df_group = train_data.groupby('labels',sort=False).count()
filter_count = df_group[(df_group.file_names >= 20).values & (df_group.file_names < 40).values].index
train_data = train_data[train_data["labels"].isin(filter_count)].reset_index(drop=True)

def get_train_dataset(IMAGE_SIZE):
    trans = transforms.Compose([transforms.ToTensor(),transforms.Resize((IMAGE_SIZE,IMAGE_SIZE), antialias=False)])
    train_dataset = CosmenetDataset_Triplet(train_data, path=train_data_path, train=True, transform=trans)
    return train_dataset

In [7]:
train_dataset = get_train_dataset(IMAGE_SIZE)
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

# Preprocessing

In [8]:
import torch.nn as nn
from transformers import ViTImageProcessor, ViTModel
from script.tool import convert_feature

In [9]:
def select_transformers_model(model, processor, pretrain="google/vit-base-patch16-224-in21k"):
    model = model.from_pretrained(pretrain)
    processor = processor.from_pretrained(pretrain)
    return model, processor

In [10]:
# pipeline for transformer library
class pipeline_transformer:
    def __init__(self, layer, row=False, device='cuda:0'):
        self.device = device
        self.layer = layer
        self.row = row
    
    def selct_model(self, model, processor):
        self.model = model
        self.processor = processor
        self.model.eval().to(self.device)
    
    def process_model(self, img):
        inputs = self.processor(images=img, return_tensors="pt").to(self.device)
        outputs = self.model(**inputs)
        return outputs
        
    def extract(self, img):
        ### return specific layer
        outputs = self.process_model(img)
        if type(self.row) == bool and not self.row:
            outputs = outputs[self.layer]
        else:
            outputs = outputs[self.layer][:, self.row]
        return outputs

In [11]:
model, preprocess = select_transformers_model(ViTModel, ViTImageProcessor, pretrain="google/vit-base-patch16-224-in21k")
vit_gg_pipe = pipeline_transformer(layer="last_hidden_state", row=0, device=DEVICE)
vit_gg_pipe.selct_model(model, preprocess)
Optimizer = torch.optim.Adam(vit_gg_pipe.model.parameters(),lr = LEARNING_RATE)
criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-8)
cvt_feature_vit_gg = convert_feature(vit_gg_pipe)

In [12]:
# LAST_LAYER = 199
LAST_LAYER = 198
for n, (layer, param) in enumerate(vit_gg_pipe.model.named_parameters()):
    if n >= LAST_LAYER:
        print("Freeze at layer :", layer)
        param.requires_grad = False
    else:
        param.requires_grad = True

Freeze at layer : pooler.dense.weight
Freeze at layer : pooler.dense.bias


# Validate

In [13]:
from script.tool import split_StratifiedKFold

In [10]:
n_cv = 5
path_dataset = ROOT_NFS_DATA / "Cosmenet_products_15000/raw_data"
device = torch.device(DEVICE)
df = scan_directory(path_dataset)
df_pd, index_less_than_n, index_greater_than_or_equal_to_n = filter_data(df, minimum_data_class=n_cv)
y_label = df_pd['classes_labeled'].values

amount of all image : 15524
amount of image that less than 5 in that class : 116
amount of image that more than 5 in that class : 15408


In [16]:
def convert_feature_transformer(model, processor, layer, row=False, device='cuda:0'):
    model.eval().to(device)
    X_trans = []
    first = True
    for img_path in tqdm(df_pd['path_img'], desc="Extract"):
        img = Image.open(img_path).convert('RGB')
        inputs = processor(images=img, return_tensors="pt").to(device)
        outputs = model(**inputs)
        if type(row) == bool and row==False:
            output = outputs[layer]
        else:
            output = outputs[layer][:, row]
        output = output.flatten().unsqueeze(0)
        output = standardize_feature(output).to('cpu').detach().numpy()
        if first:
            X_trans = output
            first = False
        else:
            X_trans = np.concatenate((X_trans, output))
    
    df_x = pd.DataFrame(X_trans)
    df_y = pd.DataFrame(df_pd['classes'], columns=['classes'])
    data = pd.concat([df_x, df_y], axis=1)
    return data

In [17]:
def validate(data, n_cv=5):
    x_gg, y_gg = data.iloc[:, :-1], data.iloc[:, -1]
    X = x_gg
    y = y_label
    y_gg_un = y_gg
    index_filter=(index_greater_than_or_equal_to_n, index_less_than_n)

    first = True
    result_in_n = []
    skf = StratifiedKFold(n_splits=n_cv)
    if index_filter != False:
        index_greater_filtered, index_less_filtered = index_filter
        X_less = X[index_less_filtered]
        y_less = y[index_less_filtered]
        y_gg_un_less = y_gg_un[index_less_filtered]
        X = X[index_greater_filtered]
        y = y[index_greater_filtered]
        y_gg_un = y_gg_un[index_greater_filtered]
                
    index_df_split = skf.split(X, y)

    for train_index, test_index in tqdm(index_df_split,  desc="Validate"):
        x_train = np.array(X)[train_index]
        y_train = np.array(y)[train_index]
        y_gg_un_train = np.array(y_gg_un)[train_index]
        x_test = np.array(X)[test_index]
        y_test = np.array(y)[test_index]
        y_gg_un_test = np.array(y_gg_un)[test_index]
        
        if index_filter != False:
            x_train = np.concatenate((x_train, X_less))
            y_train = np.concatenate((y_train, y_less))
            y_gg_train = np.concatenate((y_gg_un_train, y_gg_un_less))

        dot_product = np.dot(x_test,x_train.T)              # (x_test , x_train)
        norm_test = norm(x_test, axis=1).reshape(-1, 1)     # (x_test, 1)
        norm_train = norm(x_train, axis=1).reshape(1, -1)   # (1, x_train)
        res = dot_product/(norm_test*norm_train)            # res = (x_test , x_train), norm_test*norm_train = (x_test , x_train)
        
        f = True
        rank_top_n = []
        ranking = np.argsort(res, axis=1)
        y_ranking = np.repeat(y_gg_train.reshape(1, -1), repeats=ranking.shape[0], axis=0)
        result_ranking = np.take_along_axis(y_ranking, ranking, axis=1)[:, ::-1]
        for row in result_ranking:
            indexes = np.unique(row, return_index=True)
            res_row = row[sorted(indexes[1])][:5].reshape(1, -1)
            if f:
                f = False
                rank_top_n = res_row
            else:
                rank_top_n = np.concatenate((rank_top_n, res_row))
        
        if first:
            first = False
            result_in_n = [rank_top_n]
        else:
            result_in_n.append(rank_top_n)
        
    result_avg = sum((y_gg_un_test.reshape(-1, 1) == result_in_n[-1]).any(axis=1))/result_in_n[-1].shape[0]
    return result_avg

# Training

In [13]:
from tqdm.notebook import tqdm
import numpy as np

In [None]:
def save_weight(epoch, model, Optimizer, running_loss, acc_top, acc_top_n):
    path_trained = get_name(epoch, running_loss, acc_top, acc_top_n)
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.pipeline.model.state_dict(),
            'optimizer_state_dict': Optimizer.state_dict(),
            'loss': np.mean(running_loss),
            'acc_top' : acc_top,
            'acc_top_n' : acc_top_n
            }, path_trained)

def get_name(epoch, running_loss, acc_top, acc_top_n):
    return f"weights/vit_gg_lr2e_05_{str(epoch+1)}ep_{str(round(np.mean(running_loss), 5))}loss_{acc_top}acc_top_{acc_top_n}acc_top_n.pth"

In [None]:
# triple loss
best_validation_loss = 100
best_acc_top = 0
best_acc_top_n = 0

for epoch in tqdm(range(EPOCHS), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, positive_img, negative_img) in enumerate(tqdm(train_dl, desc="Training", leave=False)):
        anchor_out = cvt_feature_vit_gg.process_extract(anchor_img)
        positive_out = cvt_feature_vit_gg.process_extract(positive_img)
        negative_out = cvt_feature_vit_gg.process_extract(negative_img)
        
        loss = criterion(anchor_out, positive_out, negative_out)
        
        Optimizer.zero_grad()
        loss.backward()
        Optimizer.step()
        running_loss.append(loss.cpu().detach().numpy())
    
    x_trans = convert_feature_transformer(vit_gg, processor_vit_gg, layer="last_hidden_state", row=0, device=DEVICE)
    validation_loss = validate(x_trans)
    acc_top = 
    acc_top_n = 
    
    if acc_top > best_acc_top:
        best_acc_top = acc_top
        save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss, acc_top, acc_top_n)
        print("Save model at Best acc top: {:.4f}".format(best_acc_top))
        
    if acc_top_n > best_acc_top_n:
        best_acc_top_n = acc_top_n
        save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss, acc_top, acc_top_n)
        print("Save model at Best acc top n: {:.4f}".format(best_acc_top_n))
    
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss  # Update the best performance
        save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss, acc_top, acc_top_n)
        print("Save model at Best performance: {:.4f}".format(best_validation_loss))
    
    if epoch%10 == 9:
        save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss, acc_top, acc_top_n)
        print("Save model at epoch {}".format(epoch+1))

    print("Epoch: {}/{} — Loss: {:.4f} — validation_loss : {:.4f} — acc_top : {} — acc_top_n : {}".format(
        epoch+1, EPOCHS, np.mean(running_loss), validation_loss, acc_top, acc_top_n))

In [12]:
path_trained = "/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep"
vit_gg.save_pretrained(path_trained, from_pt=True)
processor_vit_gg.save_pretrained(path_trained, from_pt=True)

['/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep/preprocessor_config.json']

In [8]:
from transformers import ViTModel
import torch
path_trained = ROOT_NFS_TEST / "weights/vit_gg_lr2e-05_eu_9ep_0_95099acc"
vit_gg = ViTModel.from_pretrained(path_trained)
torch.save(vit_gg.state_dict(), './weights/model.pt')

In [18]:
test = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
test.load_state_dict(torch.load('./weights/model.pt'))

<All keys matched successfully>