In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageNet
from torch.utils.data import DataLoader
from torch.quantization import get_default_qconfig, prepare, convert
from torch.quantization import fuse_modules

### CIFAR10 데이터 갖고오기 및 DataLoader 생성

In [2]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

test_dataset = ImageNet(root="./pretrained_ResNet50_testdata", split="val", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

RuntimeError: The archive ILSVRC2012_devkit_t12.tar.gz is not present in the root directory or is corrupted. You need to download it externally and place it in /path/to/imagenet.

### 양자화 할 pretrained ResNet50 모델 불러오기

In [None]:
model = torchvision.models.resnet50(pretrained=True)
model.eval()
model.to('cpu')  # 정적 양자화는 CPU에서 실행 (fbgemm backend를 사용하여 cpu에 최적화된 양자화를 하기 위해)

model

### 양자화 진행을 위한 계층 합치기(fusion)

In [None]:
model.fuse_model = lambda: None  # model.fuse_model 함수 호출시에도 오류가 나지 않도록 더미 함수 생성 (monkey patching)
qconfig = get_default_qconfig('fbgemm') # fbgemm: x86 CPU (Intel/AMD) || qnnpack: ARM CPU (모바일 기기 등)
model.qconfig = qconfig

fused_model = model

def fuse_resnet50_modules(model):
    # Top-level conv fuse
    fuse_modules(model, [['conv1', 'bn1', 'relu']], inplace=True)

    # Layer1~4: Bottleneck blocks fuse
    for layer_name in ['layer1', 'layer2', 'layer3', 'layer4']:
        layer = getattr(model, layer_name)
        for bottleneck in layer:
            fuse_modules(bottleneck, [['conv1', 'bn1']], inplace=True)
            fuse_modules(bottleneck, [['conv2', 'bn2']], inplace=True)
            fuse_modules(bottleneck, [['conv3', 'bn3']], inplace=True) # relu 는 블록의 마지막에 단독으로 존재하므로 포함하지 않음. by ChatGPT

            if hasattr(bottleneck, 'downsample') and bottleneck.downsample is not None:
                fuse_modules(bottleneck.downsample, [['0', '1']], inplace=True)

#torch.quantization.convert() 호출시 Linear 계층은 자동으로 양자화 됨.
fuse_resnet50_modules(fused_model)

fused_model

### 모델 양자화

In [None]:
prepared_model = prepare(fused_model)

def calibrate(model, loader): # 정적 양자화에서 필요한 scale/zero_point를 추정하기 위해 calibration 진행
    model.eval()
    with torch.no_grad():
        for i, (images, _) in enumerate(loader):
            if i > 10: break  # 일부 input 만 사용
            model(images)

calibrate(prepared_model, test_loader)

quantized_model = convert(prepared_model)

### 양자화 모델 평가

In [None]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    start = time.time()
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs.data, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    end = time.time()
    acc = 100 * correct / total
    latency = (end - start) / len(loader)
    return acc, latency

acc, latency = evaluate(model, test_loader)

print(f"[PyTorch] Accuracy: {acc:.2f}%, Latency: {latency:.4f}s per batch")