# Library

In [1]:
import random

import numpy as np

import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder

from sklearn.metrics import f1_score

import onnxruntime as ort

from utils.model import ConvNet

# Config

In [2]:
CFG = {'seed': 42,
       'bs': 16,
       'mean': (0.485, 0.456, 0.406),
       'std': (0.229, 0.224, 0.225)}

# Fix Seed

In [3]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

seed_everything(CFG['seed'])

# Data

In [4]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=CFG['mean'], std=CFG['std'])
])

In [5]:
dataset = ImageFolder('./data/train/', transform=transform)
dataloader = DataLoader(dataset,
                        batch_size=CFG['bs'],
                        shuffle=False)

# 모델 성능 비교

### Pytorch 모델

In [6]:
model = ConvNet(num_classes=3)
model.load_state_dict(torch.load("./checkpoints/fine_tuned_conv_net.pth", map_location="cpu", weights_only=True))

<All keys matched successfully>

In [7]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm.tqdm(dataloader):
        outputs, _ = model(inputs)
        _, preds = torch.max(outputs, 1)
        
        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(labels.detach().cpu().numpy())
        
    pytorch_f1_score = f1_score(all_labels, all_preds, average='macro')

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 40.56it/s]


### ONNX 변환 모델

In [8]:
session_fp32 = ort.InferenceSession("./checkpoints/conv_net.onnx", providers=['CPUExecutionProvider'])
all_onnx_preds = []

for inputs, _ in tqdm.tqdm(dataloader):
    inputs_np = inputs.numpy()
    ort_inputs = {"input": inputs_np}
    
    onnx_outs = session_fp32.run(None, ort_inputs)
    onnx_preds = np.argmax(onnx_outs[0], axis=1)
    all_onnx_preds.extend(onnx_preds)

onnx_f1_score = f1_score(all_labels, all_onnx_preds, average='macro')

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 24.23it/s]


### INT8 양자화 모델

In [9]:
int8_session = ort.InferenceSession("./checkpoints/conv_net_int8.onnx", providers=["CPUExecutionProvider"])
all_int8_preds = []

for inputs, _ in tqdm.tqdm(dataloader):
    inputs_np = inputs.numpy()
    ort_inputs = {"input": inputs_np}
    
    int8_outs = int8_session.run(None, ort_inputs)
    int8_preds = np.argmax(int8_outs[0], axis=1)
    all_int8_preds.extend(int8_preds)
    
int8_f1_score = f1_score(all_labels, all_int8_preds, average='macro')

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:02<00:00,  8.76it/s]


### 결과 비교

In [10]:
print(f"Pytorch model's F1-Score: {pytorch_f1_score}")
print(f"ONNX model's F1-Score: {onnx_f1_score}")
print(f"INT8 model's F1-Score: {int8_f1_score}")

Pytorch model's F1-Score: 1.0
ONNX model's F1-Score: 1.0
INT8 model's F1-Score: 1.0
