# Prepare data

In [1]:
from script.func_split_data import split_data

In [2]:
split_df = split_data(data_path='Cosmenet_uat_20231108', data_csv='data_last_join_2023_11_10.csv')
split_df.split_data()
split_df.report_train_test_split()
print()
split_df.report_train_test_val_split()

amount of all data : 94493
amount of all class : 21250
amount of data 2-8 img : 3148
amount of 2-8 img class : 571
amount of data more 8 img : 75208
amount of more 8 img class : 4542
amount of data & class only one : 16137

amount of train split : 49340
amount of train split class : 3724
amount of test split : 12338
amount of test split class : 3724
amount of train val : 10826
amount of train val class : 818
amount of test val : 2704
amount of test val class : 818
amount of train val mix : 12400
amount of train val mix class : 1389
amount of test val mix : 4278
amount of test val mix class : 1389


In [3]:
df_train_split, df_test_split = split_df.get_train_test()
df_train_val_mix, df_test_val_mix = split_df.get_validate()

In [4]:
df_train_split.head(1)

Unnamed: 0,file_names,labels,images_path,BID,SCID,CID,Action
0,46985_13.png,46985,/app/nfs_clientshare/Datasets/Cosmenet_product...,4169,84,53,1


# Create Dataset

In [5]:
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd
import numpy as np

from PIL import Image
import random
from tqdm.notebook import tqdm
from script.tool import ROOT_NFS, ROOT_NFS_DATA, ROOT_NFS_TEST

In [6]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
IMAGE_SIZE = 224
BATCH_SIZE = 4
DEVICE = get_default_device()
LEARNING_RATE = 0.00002
EPOCHS = 40

In [7]:
class CosmenetDataset_Triplet():
    def __init__(self, df: pd, train=True, transform=None):
        self.data_csv = df
        self.is_train = train
        self.transform = transform
        if self.is_train:
            self.labels = df['labels'].values
            self.image_path = df['images_path'].values
            self.index = df.index.values 
    
    def get_caompare_img(self, item, anchor_label, compare_type):
        if compare_type == "pos":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]==anchor_label]
        elif compare_type == "neg":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]!=anchor_label]
        else:
            raise ValueError("compare_type must be pos or neg")
        compare_item = random.choice(compare_list)
        compare_image_path = self.image_path[compare_item]
        compare_img = Image.open(compare_image_path).convert('RGB')
        return compare_img
    
    def __len__(self):
        return len(self.image_path)
    
    def __getitem__(self, item):
        anchor_label = self.labels[item]
        anchor_image_path = self.image_path[item]
        anchor_img = Image.open(anchor_image_path).convert('RGB')
        if self.is_train:
            positive_img = self.get_caompare_img(item, anchor_label, "pos")
            negative_img = self.get_caompare_img(item, anchor_label, "neg")
            if self.transform!=None:
                anchor_img = (self.transform(anchor_img)*255).int()
                positive_img = (self.transform(positive_img)*255).int()
                negative_img = (self.transform(negative_img)*255).int()
        return anchor_img, positive_img, negative_img

In [8]:
def get_dataset(IMAGE_SIZE, data):
    trans = transforms.Compose([transforms.ToTensor(),transforms.Resize((IMAGE_SIZE,IMAGE_SIZE), antialias=False)])
    dataset = CosmenetDataset_Triplet(data, train=True, transform=trans)
    return dataset

In [9]:
train_dataset = get_dataset(IMAGE_SIZE, df_train_split)
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

# Preprocessing

In [10]:
import torch.nn as nn
from transformers import ViTImageProcessor, ViTModel

from script.func_extract_feature import pipeline_transformer, convert_feature

2023-12-14 09:24:55.411837: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-14 09:24:56.064414: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-14 09:24:56.064462: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-14 09:24:56.067683: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-14 09:24:56.385054: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: A

In [11]:
def select_transformers_model(model, processor, pretrain="google/vit-base-patch16-224-in21k", load_state_dict=None):
    model = model.from_pretrained(pretrain)
    processor = processor.from_pretrained(pretrain)
    if load_state_dict:
        model.load_state_dict(torch.load(load_state_dict)['model_state_dict'])
    return model, processor

In [14]:
load_state_dict='./weights/temp_epoch/vitgg_lr2e05_ep16_loss0.00198.pth'
model_pipeline, preprocess = select_transformers_model(
    ViTModel, ViTImageProcessor, pretrain="./weights/vitgg_lr2e05_ep3_loss0.0", 
    # load_state_dict=load_state_dict
)
vit_gg_pipe = pipeline_transformer(layer="last_hidden_state", row=0, device=DEVICE)
vit_gg_pipe.selct_model(model_pipeline, preprocess)
Optimizer = torch.optim.Adam(vit_gg_pipe.model.parameters(),lr = LEARNING_RATE)
# Optimizer.load_state_dict(torch.load(load_state_dict)['optimizer_state_dict'])
cvt_feature_vit_gg = convert_feature(vit_gg_pipe)
criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-8)

In [15]:
# LAST_LAYER = 199
LAST_LAYER = 198
for n, (layer, param) in enumerate(vit_gg_pipe.model.named_parameters()):
    if n >= LAST_LAYER:
        print("Freeze at layer :", layer)
        param.requires_grad = False
    else:
        param.requires_grad = True

Freeze at layer : pooler.dense.weight
Freeze at layer : pooler.dense.bias


# Training

In [16]:
from tqdm.notebook import tqdm
import numpy as np

In [17]:
def save_weight(epoch, model, Optimizer, running_loss):
    path_trained = get_name(epoch, running_loss)
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.pipeline.model.state_dict(),
            'optimizer_state_dict': Optimizer.state_dict(),
            'loss': np.mean(running_loss),
            }, path_trained)

def get_name(epoch, running_loss):
    return f"weights/temp_epoch/vitgg_lr2e05_ep{str(epoch+1)}_loss{str(round(np.mean(running_loss), 5))}.pth"

In [18]:
# triple loss
for epoch in tqdm(range(0, EPOCHS), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, positive_img, negative_img) in enumerate(tqdm(train_dl, desc="Training", leave=False)):
        anchor_out = cvt_feature_vit_gg.process_extract(anchor_img, output_type='pt')
        positive_out = cvt_feature_vit_gg.process_extract(positive_img, output_type='pt')
        negative_out = cvt_feature_vit_gg.process_extract(negative_img, output_type='pt')
        
        loss = criterion(anchor_out, positive_out, negative_out)
        
        Optimizer.zero_grad()
        loss.backward()
        Optimizer.step()
        running_loss.append(loss.cpu().detach().numpy())
    
    save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss)
    print("Epoch: {}/{} — Loss: {:.4f}".format(epoch+1, EPOCHS, np.mean(running_loss)))

Epochs:   0%|          | 0/40 [00:00<?, ?it/s]

Training:   0%|          | 0/12335 [00:00<?, ?it/s]



Epoch: 1/40 — Loss: 0.0000


Training:   0%|          | 0/12335 [00:00<?, ?it/s]



Epoch: 2/40 — Loss: 0.0000


Training:   0%|          | 0/12335 [00:00<?, ?it/s]



Epoch: 3/40 — Loss: 0.0000


Training:   0%|          | 0/12335 [00:00<?, ?it/s]



Epoch: 4/40 — Loss: 0.0000


Training:   0%|          | 0/12335 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [12]:
path_trained = "/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep"
model_pipeline.save_pretrained(path_trained, from_pt=True)
preprocess.save_pretrained(path_trained, from_pt=True)

['/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep/preprocessor_config.json']

In [8]:
from transformers import ViTModel
import torch
path_trained = ROOT_NFS_TEST / "weights/vit_gg_lr2e-05_eu_9ep_0_95099acc"
vit_gg = ViTModel.from_pretrained(path_trained)
torch.save(vit_gg.state_dict(), './weights/model.pt')

In [18]:
test = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
test.load_state_dict(torch.load('./weights/model.pt'))

<All keys matched successfully>