# Import

In [1]:
from PIL import Image
import numpy as np
import pandas as pd

import torch
from torchsummary import summary

from script.tool import ROOT, ROOT_NFS_TEST, ROOT_NFS_DATA, standardize_feature
from script.func_extract_feature import convert_feature
from tqdm import tqdm
from pathlib import Path
import time, os

# Initial

In [2]:
path_dataset = ROOT_NFS_DATA / 'Cosmenet_products_15000/raw_data'
device = torch.device("cuda:0")
df_pd = pd.read_csv(path_dataset / 'data_cleaned.csv')

# Module

## Timm

In [None]:
from script.func_extract_feature import select_timm_model, pipeline_timm

### efficientnet

In [6]:
model, preprocess = select_timm_model('efficientnet_b1', num_classes=0, pretrain=True)
eff_pipe = pipeline_timm(device=device)
eff_pipe.selct_model(model, preprocess)
eff_pipe.report_test()
cvt_feature_eff = convert_feature(eff_pipe)

runtime : 26.701688766479492 ms
Output shape at layer : torch.Size([1, 1280])


In [None]:
cvt_feature_eff(
    df_pd['path_img'], 
    df_pd['classes'], 
    file_name_output="test"
    )

In [7]:
model, preprocess = select_timm_model('efficientnet_b5', num_classes=0, pretrain=True)
eff_b5_pipe = pipeline_timm(device=device)
eff_b5_pipe.selct_model(model, preprocess)
eff_b5_pipe.report_test()
cvt_feature_eff_b5 = convert_feature(eff_b5_pipe)

runtime : 40.941715240478516 ms
Output shape at layer : torch.Size([1, 2048])


In [None]:
cvt_feature_eff(
    df_pd['path_img'], 
    df_pd['classes'], 
    file_name_output="efficientnet_b5"
    )

## Transformer

In [None]:
from script.func_extract_feature import select_transformers_model, pipeline_transformer

### Vit google

In [8]:
from transformers import ViTImageProcessor, ViTModel

In [10]:
model, preprocess = select_transformers_model(ViTModel, ViTImageProcessor, pretrain="google/vit-base-patch16-224-in21k")
vit_gg_pipe = pipeline_transformer(layer="last_hidden_state", row=0, device=device)
vit_gg_pipe.selct_model(model, preprocess)
vit_gg_pipe.report_test()
cvt_feature_vit_gg = convert_feature(vit_gg_pipe)

runtime : 20.165681838989258 ms
outputs layers : odict_keys(['last_hidden_state', 'pooler_output'])
shape last_hidden_state : torch.Size([1, 197, 768])
shape pooler_output : torch.Size([1, 768])


In [None]:
cvt_feature_vit_gg(
    df_pd['path_img'], 
    df_pd['classes'], 
    file_name_output="vit_base_patch16_224_in21k_last_hidden_state"
    )

In [25]:
model, preprocess = select_transformers_model(ViTModel, ViTImageProcessor, 
                                              pretrain=ROOT_NFS_TEST / '/weights/vit_gg_lr2e-05_eu_9ep_0_95099acc')
vit_gg_trained_lr2e_05_pipe = pipeline_transformer(layer="last_hidden_state", row=0, device=device)
vit_gg_trained_lr2e_05_pipe.selct_model(model, preprocess)
vit_gg_trained_lr2e_05_pipe.report_test()
cvt_feature_vit_gg_trained_lr2e_05 = convert_feature(vit_gg_trained_lr2e_05_pipe)

runtime : 24.142026901245117 ms
outputs layers : odict_keys(['last_hidden_state', 'pooler_output'])
shape last_hidden_state : torch.Size([1, 197, 768])
shape pooler_output : torch.Size([1, 768])


In [26]:
cvt_feature_vit_gg_trained_lr2e_05(
    df_pd['path_img'], 
    df_pd['classes'], 
    file_name_output="vit_b_p16_224_last_hidden_trained_lr2e_05_eu_9ep_0_95099acc"
    )

100%|██████████| 15524/15524 [20:08<00:00, 12.84it/s]


## Transformer onnx

In [13]:
from onnxruntime import InferenceSession
from script.func_extract_feature import select_transformers_onnx_model, pipeline_transformer_onnx

### Vit google

In [30]:
model, preprocess = select_transformers_onnx_model("/home/music/Desktop/measure_model/models/vit_gg/onnx/model.onnx", 
                                                   processor=ViTImageProcessor, providers=['CPUExecutionProvider'])
vit_gg_onnx_pipe = pipeline_transformer_onnx(layer="last_hidden_state", row=0)
vit_gg_onnx_pipe.selct_model(model, preprocess)
vit_gg_onnx_pipe.report_test()
cvt_feature_vit_gg_onnx = convert_feature(vit_gg_onnx_pipe)

runtime : 285.5355739593506 ms
shape : (1, 197, 768)


In [None]:
cvt_feature_vit_gg_onnx(
    df_pd['path_img'], 
    df_pd['classes'], 
    file_name_output="vit_b_p16_224_last_hidden_onnx_"
    )

In [45]:
model, preprocess = select_transformers_onnx_model("/home/music/Desktop/measure_model/models/vit_gg/onnx_quantize/model_quantized.onnx", 
                                                   processor=ViTImageProcessor, providers=['CPUExecutionProvider'])
vit_gg_onnx_quantize_pipe = pipeline_transformer_onnx(layer="last_hidden_state", row=0)
vit_gg_onnx_quantize_pipe.selct_model(model, preprocess)
vit_gg_onnx_quantize_pipe.report_test()
cvt_feature_vit_gg_onnx_quantize = convert_feature(vit_gg_onnx_quantize_pipe)

runtime : 203.07588577270508 ms
shape : (1, 197, 768)


In [None]:
cvt_feature_vit_gg_onnx_quantize(
    df_pd['path_img'], 
    df_pd['classes'], 
    file_name_output="test"
    )