# Prepare data

In [1]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import StratifiedKFold

import pandas as pd

from script.tool import ROOT_NFS_DATA

In [2]:
path_dataset = ROOT_NFS_DATA / 'Cosmenet_product_20231018'
df_pd = pd.read_csv(path_dataset / 'datas_20231018.csv')
df_pd.head(1)

Unnamed: 0,file_names,labels,images_path
0,14624_14.jpg,14624,/app/nfs_clientshare/Datasets/Cosmenet_product...


In [3]:
group_df = df_pd.groupby(['labels'])['labels'].count().reset_index(name='count').sort_values(['count'], ascending=False)
group_df.head(1)

Unnamed: 0,labels,count
4172,50348,100


In [4]:
filter_img_2_to_8 = group_df[(group_df['count'] <= 8) & (group_df['count'] > 1)]['labels'].values
filter_img_1_to_8 = group_df[group_df['count'] <= 8]['labels'].values

df_more_8 = df_pd[~df_pd['labels'].isin(filter_img_1_to_8)]
df_2_to_8 = df_pd[df_pd['labels'].isin(filter_img_2_to_8)]

In [5]:
skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
train_2_to_8, test_2_to_8 = skf.split(df_2_to_8, df_2_to_8['labels']).__next__()
df_2_to_8_train = df_2_to_8.iloc[train_2_to_8]
df_2_to_8_test = df_2_to_8.iloc[test_2_to_8]
df_2_to_8_train.head(1)

Unnamed: 0,file_names,labels,images_path
322,39856_2.png,39856,/app/nfs_clientshare/Datasets/Cosmenet_product...


In [6]:
sss_train = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = sss_train.split(df_more_8, df_more_8['labels']).__next__()
df_train = df_more_8.iloc[train_idx]
df_test = df_more_8.iloc[test_idx]
df_train.head(1)

Unnamed: 0,file_names,labels,images_path
3177,11596_2.jpg,11596,/app/nfs_clientshare/Datasets/Cosmenet_product...


In [7]:
print(f"amount of all data : {df_pd.__len__()}")
print(f"amount of all class : {group_df.__len__()}")
print(f"amount of data 2-8 img : {df_2_to_8.__len__()}")
print(f"amount of 2-8 img class : {filter_img_2_to_8.__len__()}")
print(f"amount of data more 8 img : {df_more_8.__len__()}")
print(f"amount of more 8 img class : {group_df[group_df['count'] > 8]['labels'].__len__()}")
print(f"amount of data & class only one : {group_df[group_df['count'] == 1]['labels'].__len__()}")

amount of all data : 60196
amount of all class : 4178
amount of data 2-8 img : 1548
amount of 2-8 img class : 278
amount of data more 8 img : 58631
amount of more 8 img class : 3883
amount of data & class only one : 17


In [8]:
df_count_8 = group_df[(group_df['count'] > 8) & (group_df['count'] > 1)]
group_df_count_8 = df_count_8.groupby(['count'])['count'] \
    .count().reset_index(name='counter_count').sort_values(['counter_count'], ascending=False)
counter_count_1 = group_df_count_8[group_df_count_8["counter_count"] == 1]["count"].values
ind_c = group_df_count_8[group_df_count_8["counter_count"] == 1]["count"].index
df_count_8.loc[df_count_8["count"].isin(counter_count_1), "count"] = 101

In [9]:
sss_val = StratifiedShuffleSplit(n_splits=1, test_size=0.18, random_state=42)
split_idx, val_idx = sss_val.split(df_count_8, df_count_8['count']).__next__()
split_class = df_count_8.iloc[split_idx]["labels"].values
val_class = df_count_8.iloc[val_idx]["labels"].values
val_class[:5]

array([46783, 44043, 40575, 44033, 48695])

In [10]:
df_train_split = df_train[df_train["labels"].isin(split_class)]
df_test_split = df_test[df_test["labels"].isin(split_class)]
df_train_val = df_train[df_train["labels"].isin(val_class)]
df_test_val = df_test[df_test["labels"].isin(val_class)]

In [11]:
df_train_val_mix = pd.concat([df_train_val, df_2_to_8_train])
df_test_val_mix = pd.concat([df_test_val, df_2_to_8_test])

In [12]:
print(f"amount of train split : {len(df_train_split)}")
print(f"amount of train split class : {df_train_split['labels'].nunique()}")
print(f"amount of test split : {len(df_test_split)}")
print(f"amount of test split class : {df_test_split['labels'].nunique()}")
print(f"amount of train val : {len(df_train_val)}")
print(f"amount of train val class : {df_train_val['labels'].nunique()}")
print(f"amount of test val : {len(df_test_val)}")
print(f"amount of test val class : {df_test_val['labels'].nunique()}")
print(f"amount of train val mix : {len(df_train_val_mix)}")
print(f"amount of train val mix class : {df_train_val_mix['labels'].nunique()}")
print(f"amount of test val mix : {len(df_test_val_mix)}")
print(f"amount of test val mix class : {df_test_val_mix['labels'].nunique()}")

amount of train split : 38474
amount of train split class : 3184
amount of test split : 9620
amount of test split class : 3184
amount of train val : 8430
amount of train val class : 699
amount of test val : 2107
amount of test val class : 699
amount of train val mix : 9204
amount of train val mix class : 977
amount of test val mix : 2881
amount of test val mix class : 977


In [13]:
df_train_split = df_train_split.reset_index(drop=True)

# Create Dataset

In [14]:
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd
import numpy as np

from PIL import Image
import random
from tqdm.notebook import tqdm
from script.tool import ROOT_NFS, ROOT_NFS_DATA, ROOT_NFS_TEST

In [15]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
IMAGE_SIZE = 224
BATCH_SIZE = 4
DEVICE = get_default_device()
LEARNING_RATE = 0.00002
EPOCHS = 40

In [16]:
class CosmenetDataset_Triplet():
    def __init__(self, df: pd, train=True, transform=None):
        self.data_csv = df
        self.is_train = train
        self.transform = transform
        if self.is_train:
            self.labels = df.iloc[:, 1].values
            self.image_path = df.iloc[:, 2].values
            self.index = df.index.values 
    
    def get_caompare_img(self, item, anchor_label, compare_type):
        if compare_type == "pos":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]==anchor_label]
        elif compare_type == "neg":
            compare_list = self.index[self.index!=item][self.labels[self.index!=item]!=anchor_label]
        else:
            raise ValueError("compare_type must be pos or neg")
        compare_item = random.choice(compare_list)
        compare_image_path = self.image_path[compare_item]
        compare_img = Image.open(compare_image_path).convert('RGB')
        return compare_img
    
    def __len__(self):
        return len(self.image_path)
    
    def __getitem__(self, item):
        anchor_label = self.labels[item]
        anchor_image_path = self.image_path[item]
        anchor_img = Image.open(anchor_image_path).convert('RGB')
        if self.is_train:
            positive_img = self.get_caompare_img(item, anchor_label, "pos")
            negative_img = self.get_caompare_img(item, anchor_label, "neg")
            if self.transform!=None:
                anchor_img = (self.transform(anchor_img)*255).int()
                positive_img = (self.transform(positive_img)*255).int()
                negative_img = (self.transform(negative_img)*255).int()
        return anchor_img, positive_img, negative_img

In [17]:
def get_dataset(IMAGE_SIZE, data):
    trans = transforms.Compose([transforms.ToTensor(),transforms.Resize((IMAGE_SIZE,IMAGE_SIZE), antialias=False)])
    dataset = CosmenetDataset_Triplet(data, train=True, transform=trans)
    return dataset

In [18]:
df_train_split.head(1)

Unnamed: 0,file_names,labels,images_path
0,11596_2.jpg,11596,/app/nfs_clientshare/Datasets/Cosmenet_product...


In [19]:
train_dataset = get_dataset(IMAGE_SIZE, df_train_split)
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

# Preprocessing

In [20]:
import torch.nn as nn
from transformers import ViTImageProcessor, ViTModel
from script.tool import convert_feature

2023-10-29 18:07:15.025999: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-29 18:07:15.546304: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-29 18:07:15.546348: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-29 18:07:15.548359: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-29 18:07:15.756451: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: A

In [21]:
def select_transformers_model(model, processor, pretrain="google/vit-base-patch16-224-in21k", load_state_dict=None):
    model = model.from_pretrained(pretrain)
    processor = processor.from_pretrained(pretrain)
    Optimizer = torch.optim.Adam(model.model.parameters(),lr = LEARNING_RATE)
    if load_state_dict:
        model.load_state_dict(torch.load(load_state_dict)['model_state_dict'])
        Optimizer.load_state_dict(torch.load(load_state_dict)['optimizer_state_dict'])
    return model, processor, Optimizer

In [22]:
# pipeline for transformer library
class pipeline_transformer:
    def __init__(self, layer, row=False, device='cuda:0'):
        self.device = device
        self.layer = layer
        self.row = row
    
    def selct_model(self, model, processor):
        self.model = model
        self.processor = processor
        self.model.eval().to(self.device)
    
    def process_model(self, img):
        inputs = self.processor(images=img, return_tensors="pt").to(self.device)
        outputs = self.model(**inputs)
        return outputs
        
    def extract(self, img):
        ### return specific layer
        outputs = self.process_model(img)
        if type(self.row) == bool and not self.row:
            outputs = outputs[self.layer]
        else:
            outputs = outputs[self.layer][:, self.row]
        return outputs

In [23]:
model_pipeline, preprocess, Optimizer = select_transformers_model(
    ViTModel, ViTImageProcessor, pretrain="google/vit-base-patch16-224-in21k", 
    load_state_dict='./weights/temp_epoch/vitgg_lr2e05_ep1_loss0.04568.pt')
vit_gg_pipe = pipeline_transformer(layer="last_hidden_state", row=0, device=DEVICE)
vit_gg_pipe.selct_model(model_pipeline, preprocess)
cvt_feature_vit_gg = convert_feature(vit_gg_pipe)
criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-8)

In [37]:
# LAST_LAYER = 199
LAST_LAYER = 198
for n, (layer, param) in enumerate(vit_gg_pipe.model.named_parameters()):
    if n >= LAST_LAYER:
        print("Freeze at layer :", layer)
        param.requires_grad = False
    else:
        param.requires_grad = True

Freeze at layer : pooler.dense.weight
Freeze at layer : pooler.dense.bias


# Training

In [38]:
from tqdm.notebook import tqdm
import numpy as np

In [39]:
def save_weight(epoch, model, Optimizer, running_loss):
    path_trained = get_name(epoch, running_loss)
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.pipeline.model.state_dict(),
            'optimizer_state_dict': Optimizer.state_dict(),
            'loss': np.mean(running_loss),
            }, path_trained)

def get_name(epoch, running_loss):
    return f"weights/temp_epoch/vitgg_lr2e05_ep{str(epoch+1)}_loss{str(round(np.mean(running_loss), 5))}.pth"

In [None]:
# triple loss
for epoch in tqdm(range(1, EPOCHS), desc="Epochs"):
    running_loss = []
    for step, (anchor_img, positive_img, negative_img) in enumerate(tqdm(train_dl, desc="Training", leave=False)):
        anchor_out = cvt_feature_vit_gg.process_extract(anchor_img)
        positive_out = cvt_feature_vit_gg.process_extract(positive_img)
        negative_out = cvt_feature_vit_gg.process_extract(negative_img)
        
        loss = criterion(anchor_out, positive_out, negative_out)
        
        Optimizer.zero_grad()
        loss.backward()
        Optimizer.step()
        running_loss.append(loss.cpu().detach().numpy())
    
    save_weight(epoch, cvt_feature_vit_gg, Optimizer, running_loss)
    print("Epoch: {}/{} — Loss: {:.4f}".format(epoch+1, EPOCHS, np.mean(running_loss)))

Epochs:   0%|          | 0/39 [00:00<?, ?it/s]

Training:   0%|          | 0/9619 [00:00<?, ?it/s]

In [12]:
path_trained = "/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep"
model_pipeline.save_pretrained(path_trained, from_pt=True)
preprocess.save_pretrained(path_trained, from_pt=True)

['/home/music/Desktop/measure_model/weights/vit_gg_lr2e-05_eu_40ep/preprocessor_config.json']

In [8]:
from transformers import ViTModel
import torch
path_trained = ROOT_NFS_TEST / "weights/vit_gg_lr2e-05_eu_9ep_0_95099acc"
vit_gg = ViTModel.from_pretrained(path_trained)
torch.save(vit_gg.state_dict(), './weights/model.pt')

In [18]:
test = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
test.load_state_dict(torch.load('./weights/model.pt'))

<All keys matched successfully>

In [36]:
test = torch.load('./weights/temp_epoch/vitgg_lr2e05_ep1_loss0.32437.pth')

In [37]:
test.keys()

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss'])

In [41]:
test['optimizer_state_dict'].keys()

dict_keys(['state', 'param_groups'])

In [43]:
test['optimizer_state_dict']['state'].keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197])

In [46]:
test['optimizer_state_dict']['state'][0].keys()

dict_keys(['step', 'exp_avg', 'exp_avg_sq'])

In [51]:
test['optimizer_state_dict']['state'][0]['exp_avg_sq'].shape

torch.Size([1, 1, 768])

In [52]:
test['optimizer_state_dict']['param_groups']

[{'lr': 2e-05,
  'betas': (0.9, 0.999),
  'eps': 1e-08,
  'weight_decay': 0,
  'amsgrad': False,
  'maximize': False,
  'foreach': None,
  'capturable': False,
  'differentiable': False,
  'fused': None,
  'params': [0,
   1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   41,
   42,
   43,
   44,
   45,
   46,
   47,
   48,
   49,
   50,
   51,
   52,
   53,
   54,
   55,
   56,
   57,
   58,
   59,
   60,
   61,
   62,
   63,
   64,
   65,
   66,
   67,
   68,
   69,
   70,
   71,
   72,
   73,
   74,
   75,
   76,
   77,
   78,
   79,
   80,
   81,
   82,
   83,
   84,
   85,
   86,
   87,
   88,
   89,
   90,
   91,
   92,
   93,
   94,
   95,
   96,
   97,
   98,
   99,
   100,
   101,
   102,
   103,
   104,
   105,
   106,
   107,
   108,
   109,
   110,
   111,


In [54]:
test['model_state_dict'].keys()

odict_keys(['embeddings.cls_token', 'embeddings.position_embeddings', 'embeddings.patch_embeddings.projection.weight', 'embeddings.patch_embeddings.projection.bias', 'encoder.layer.0.attention.attention.query.weight', 'encoder.layer.0.attention.attention.query.bias', 'encoder.layer.0.attention.attention.key.weight', 'encoder.layer.0.attention.attention.key.bias', 'encoder.layer.0.attention.attention.value.weight', 'encoder.layer.0.attention.attention.value.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.output.dense.weight', 'encoder.layer.0.output.dense.bias', 'encoder.layer.0.layernorm_before.weight', 'encoder.layer.0.layernorm_before.bias', 'encoder.layer.0.layernorm_after.weight', 'encoder.layer.0.layernorm_after.bias', 'encoder.layer.1.attention.attention.query.weight', 'encoder.layer.1.attention.attention.query.bias', 'enc

In [59]:
test['model_state_dict']['encoder.layer.11.layernorm_before.weight'].shape

torch.Size([768])