# Prepare data

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import StratifiedKFold

import pandas as pd

from script.tool import ROOT_NFS_DATA

In [None]:
path_dataset = ROOT_NFS_DATA / 'Cosmenet_product_20231018'
df_pd = pd.read_csv(path_dataset / 'datas_20231018.csv')
df_pd.head(1)

In [None]:
group_df = df_pd.groupby(['labels'])['labels'].count().reset_index(name='count').sort_values(['count'], ascending=False)
group_df.head(1)

In [None]:
filter_img_2_to_8 = group_df[(group_df['count'] <= 8) & (group_df['count'] > 1)]['labels'].values
filter_img_1_to_8 = group_df[group_df['count'] <= 8]['labels'].values

df_more_8 = df_pd[~df_pd['labels'].isin(filter_img_1_to_8)]
df_2_to_8 = df_pd[df_pd['labels'].isin(filter_img_2_to_8)]

In [None]:
skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
train_2_to_8, test_2_to_8 = skf.split(df_2_to_8, df_2_to_8['labels']).__next__()
df_2_to_8_train = df_2_to_8.iloc[train_2_to_8]
df_2_to_8_test = df_2_to_8.iloc[test_2_to_8]
df_2_to_8_train.head(1)

In [None]:
sss_train = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = sss_train.split(df_more_8, df_more_8['labels']).__next__()
df_train = df_more_8.iloc[train_idx]
df_test = df_more_8.iloc[test_idx]
df_train.head(1)

In [None]:
print(f"amount of all data : {df_pd.__len__()}")
print(f"amount of all class : {group_df.__len__()}")
print(f"amount of data 2-8 img : {df_2_to_8.__len__()}")
print(f"amount of 2-8 img class : {filter_img_2_to_8.__len__()}")
print(f"amount of data more 8 img : {df_more_8.__len__()}")
print(f"amount of more 8 img class : {group_df[group_df['count'] > 8]['labels'].__len__()}")
print(f"amount of data & class only one : {group_df[group_df['count'] == 1]['labels'].__len__()}")

In [None]:
df_count_8 = group_df[(group_df['count'] > 8) & (group_df['count'] > 1)]
group_df_count_8 = df_count_8.groupby(['count'])['count'] \
    .count().reset_index(name='counter_count').sort_values(['counter_count'], ascending=False)
counter_count_1 = group_df_count_8[group_df_count_8["counter_count"] == 1]["count"].values
ind_c = group_df_count_8[group_df_count_8["counter_count"] == 1]["count"].index
df_count_8.loc[df_count_8["count"].isin(counter_count_1), "count"] = 101

In [None]:
sss_val = StratifiedShuffleSplit(n_splits=1, test_size=0.18, random_state=42)
split_idx, val_idx = sss_val.split(df_count_8, df_count_8['count']).__next__()
split_class = df_count_8.iloc[split_idx]["labels"].values
val_class = df_count_8.iloc[val_idx]["labels"].values
val_class[:5]

In [None]:
df_train_split = df_train[df_train["labels"].isin(split_class)]
df_test_split = df_test[df_test["labels"].isin(split_class)]
df_train_val = df_train[df_train["labels"].isin(val_class)]
df_test_val = df_test[df_test["labels"].isin(val_class)]

In [None]:
df_train_val_mix = pd.concat([df_train_val, df_2_to_8_train])
df_test_val_mix = pd.concat([df_test_val, df_2_to_8_test])

In [None]:
print(f"amount of train split : {len(df_train_split)}")
print(f"amount of train split class : {df_train_split['labels'].nunique()}")
print(f"amount of test split : {len(df_test_split)}")
print(f"amount of test split class : {df_test_split['labels'].nunique()}")
print(f"amount of train val : {len(df_train_val)}")
print(f"amount of train val class : {df_train_val['labels'].nunique()}")
print(f"amount of test val : {len(df_test_val)}")
print(f"amount of test val class : {df_test_val['labels'].nunique()}")
print(f"amount of train val mix : {len(df_train_val_mix)}")
print(f"amount of train val mix class : {df_train_val_mix['labels'].nunique()}")
print(f"amount of test val mix : {len(df_test_val_mix)}")
print(f"amount of test val mix class : {df_test_val_mix['labels'].nunique()}")

# Create Dataset

In [26]:
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd
import numpy as np

from PIL import Image
import random
from tqdm.notebook import tqdm
from script.tool import ROOT_NFS, ROOT_NFS_DATA, ROOT_NFS_TEST

In [3]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
IMAGE_SIZE = 224
BATCH_SIZE = 4
DEVICE = get_default_device()
LEARNING_RATE = 0.00002
EPOCHS = 40

In [15]:
class CosmenetDataset_Triplet():
    def __init__(self, df: pd, train=True, transform=None):
        self.data_csv = df
        self.is_train = train
        self.transform = transform
        if self.is_train:
            self.labels = df.iloc[:, 1].values
            self.image_path = df.iloc[:, 2].values
            self.index = df.index.values 
    
    def get_caompare_img(self, item, anchor_label, compare_type):
        if compare_type == "pos":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]==anchor_label]
        elif compare_type == "neg":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]!=anchor_label]
        else:
            raise ValueError("compare_type must be pos or neg")
        compare_item = random.choice(compare_list)
        compare_image_path = self.image_path[compare_item]
        compare_img = Image.open(compare_image_path).convert('RGB')
        return compare_img
    
    def __len__(self):
        return len(self.image_path)
    
    def __getitem__(self, item):
        anchor_label = self.labels[item]
        anchor_image_path = self.image_path[item]
        anchor_img = Image.open(anchor_image_path).convert('RGB')
        if self.is_train:
            positive_img = self.get_caompare_img(item, anchor_label, "pos")
            negative_img = self.get_caompare_img(item, anchor_label, "neg")
            if self.transform!=None:
                anchor_img = (self.transform(anchor_img)*255).int()
                positive_img = (self.transform(positive_img)*255).int()
                negative_img = (self.transform(negative_img)*255).int()
        return anchor_img, positive_img, negative_img

In [16]:
def get_dataset(IMAGE_SIZE, data):
    trans = transforms.Compose([transforms.ToTensor(),transforms.Resize((IMAGE_SIZE,IMAGE_SIZE), antialias=False)])
    dataset = CosmenetDataset_Triplet(data, train=True, transform=trans)
    return dataset

In [14]:
df_train_split.head(1)

Unnamed: 0,file_names,labels,images_path
0,14624_14.jpg,14624,/app/nfs_clientshare/Datasets/Cosmenet_product...


In [18]:
train_dataset = get_dataset(IMAGE_SIZE, df_train_split)
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

# Preprocessing

In [19]:
import torch.nn as nn
from transformers import ViTImageProcessor, ViTModel
from script.tool import convert_feature

In [20]:
def select_transformers_model(model, processor, pretrain="google/vit-base-patch16-224-in21k"):
    model = model.from_pretrained(pretrain)
    processor = processor.from_pretrained(pretrain)
    return model, processor

In [21]:
# pipeline for transformer library
class pipeline_transformer:
    def __init__(self, layer, row=False, device='cuda:0'):
        self.device = device
        self.layer = layer
        self.row = row
    
    def selct_model(self, model, processor):
        self.model = model
        self.processor = processor
        self.model.eval().to(self.device)
    
    def process_model(self, img):
        inputs = self.processor(images=img, return_tensors="pt").to(self.device)
        outputs = self.model(**inputs)
        return outputs
        
    def extract(self, img):
        ### return specific layer
        outputs = self.process_model(img)
        if type(self.row) == bool and not self.row:
            outputs = outputs[self.layer]
        else:
            outputs = outputs[self.layer][:, self.row]
        return outputs

In [22]:
model_pipeline, preprocess = select_transformers_model(ViTModel, ViTImageProcessor, pretrain="google/vit-base-patch16-224-in21k")
vit_gg_pipe = pipeline_transformer(layer="last_hidden_state", row=0, device=DEVICE)
vit_gg_pipe.selct_model(model_pipeline, preprocess)
Optimizer = torch.optim.Adam(vit_gg_pipe.model.parameters(),lr = LEARNING_RATE)
criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-8)
cvt_feature_vit_gg = convert_feature(vit_gg_pipe)

In [23]:
# LAST_LAYER = 199
LAST_LAYER = 198
for n, (layer, param) in enumerate(vit_gg_pipe.model.named_parameters()):
    if n >= LAST_LAYER:
        print("Freeze at layer :", layer)
        param.requires_grad = False
    else:
        param.requires_grad = True

Freeze at layer : pooler.dense.weight
Freeze at layer : pooler.dense.bias


# Training

In [24]:
from tqdm.notebook import tqdm
import numpy as np

In [None]:
def save_weight(epoch, model, Optimizer, running_loss):
    path_trained = get_name(epoch, running_loss)
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.pipeline.model.state_dict(),
            'optimizer_state_dict': Optimizer.state_dict(),
            'loss': np.mean(running_loss),
            }, path_trained)

def get_name(epoch, running_loss):
    return f"weights/vitgg_lr2e05_ep{str(epoch+1)}_loss{str(round(np.mean(running_loss), 5))}.pth"

In [None]:
# triple loss
best_validation_loss = 100
best_acc_top = 0
best_acc_top_n = 0

for epoch in tqdm(range(EPOCHS), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, positive_img, negative_img) in enumerate(tqdm(train_dl, desc="Training", leave=False)):
        anchor_out = cvt_feature_vit_gg.process_extract(anchor_img)
        positive_out = cvt_feature_vit_gg.process_extract(positive_img)
        negative_out = cvt_feature_vit_gg.process_extract(negative_img)
        
        loss = criterion(anchor_out, positive_out, negative_out)
        
        Optimizer.zero_grad()
        loss.backward()
        Optimizer.step()
        running_loss.append(loss.cpu().detach().numpy())
    
    save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss)
    print("Epoch: {}/{} — Loss: {:.4f}".format(epoch+1, EPOCHS, np.mean(running_loss)))

In [12]:
path_trained = "/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep"
model_pipeline.save_pretrained(path_trained, from_pt=True)
preprocess.save_pretrained(path_trained, from_pt=True)

['/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep/preprocessor_config.json']

In [8]:
from transformers import ViTModel
import torch
path_trained = ROOT_NFS_TEST / "weights/vit_gg_lr2e-05_eu_9ep_0_95099acc"
vit_gg = ViTModel.from_pretrained(path_trained)
torch.save(vit_gg.state_dict(), './weights/model.pt')

In [18]:
test = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
test.load_state_dict(torch.load('./weights/model.pt'))

<All keys matched successfully>