In [14]:
####### 하이퍼파라미터 선언
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder


hyperparams = {
    "batch_size"   : 4,
    "learning_rate": 0.0001,
    "epochs"       : 5,
    "transform"    : transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean = [0.48235, 0.45882, 0.40784],
                std  = [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0],
            ),
        ]
    ),
}

In [15]:
####### 데이터 불러오기
train_dataset = ImageFolder("../datasets/pet/train", transform=hyperparams["transform"])
test_dataset  = ImageFolder("../datasets/pet/test", transform=hyperparams["transform"])

train_dataloader = DataLoader(train_dataset, batch_size=hyperparams["batch_size"], shuffle=True, drop_last=True)
test_dataloader  = DataLoader(test_dataset, batch_size=hyperparams["batch_size"], shuffle=True, drop_last=True)

In [16]:
####### VGG-16 모델 불러오기 함수
# 대규모 데이터세트로 사전 학습된 알렉스넷 가중치 불러오기
model               = models.vgg16(weights="VGG16_Weights.IMAGENET1K_V1")
# 마지막 계층의 출력을 2개로 변경 (클래스수)
model.classifier[6] = nn.Linear(4096, len(train_dataset.classes))
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [17]:
device    = "mps" if torch.backends.mps.is_available() and torch.backends.mps.is_built() else "cpu"
model     = model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=hyperparams["learning_rate"])

In [18]:
####### VGG-16 모델 학습
for epoch in range(hyperparams["epochs"]): 
    cost = 0.0

    for images, classes in train_dataloader: 
        images  = images.to(device)
        classes = classes.to(device)

        output = model(images)
        loss   = criterion(output, classes)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss

    cost = cost / len(train_dataloader)
    print(f"Epoch : {epoch+1:4d}, Cost : {cost:.3f}")

Epoch :    1, Cost : 0.295
Epoch :    2, Cost : 0.089
Epoch :    3, Cost : 0.056
Epoch :    4, Cost : 0.037
Epoch :    5, Cost : 0.030


In [20]:
####### VGG-16 모델 평가
with torch.no_grad():
    model.eval()

    accuracy = 0.0
    for images, classes in test_dataloader:
        images = images.to(device)
        classes = classes.to(device)

        outputs = model(images)
        probs = F.softmax(outputs, dim=-1)
        outputs_classes = torch.argmax(probs, dim=-1)

        accuracy += int(torch.eq(classes, outputs_classes).sum())

    # acc@1: 상위 1개 레이블에 대한 정확도 측정 ('배치안의 데이터수 x 배치수'로 나눔)
    print(f"acc@1 : {accuracy / (len(test_dataloader) * hyperparams['batch_size']) * 100:.2f}%")

acc@1 : 97.43%


In [8]:
####### VGG-16 모델 저장
torch.save(model.state_dict(), "../models/VGG16.pt")
print("Saved the model weights")

Saved the model weights
