In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 120
BATCH_SIZE = 64
EPOCHS = 10

1. 기본 전처리 + Augmentation + DataLoader

In [2]:
# 이미지 정규화 및 크기 조정
def normalize_and_resize_img(image, label):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5, 0.5])
    ])
    return transform(image), label

# 데이터 증강 (좌우 반전, 밝기 조정)
def augment(image, label):
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2),
        transforms.Lambda(lambda img: torch.clamp(img, 0, 1))  # 값 클리핑
    ])
    return transform(image), label

# 원-핫 인코딩 (스칼라 라벨도, 텐서 라벨도 처리)
def onehot(labels, num_classes=NUM_CLASSES):
    if isinstance(labels, torch.Tensor):
        return torch.nn.functional.one_hot(labels, num_classes=num_classes).float()
    else:
        return torch.nn.functional.one_hot(torch.tensor(labels),
                                           num_classes=num_classes).float()

# 위 함수들을 실제 Dataset 형태로 감싸기
class DogDataset(Dataset):
    def __init__(self, base_dataset, is_test=False, with_aug=False):
        """
        base_dataset: (PIL.Image, int) 튜플로 구성된 원본 데이터셋
        """
        self.base_dataset = base_dataset
        self.is_test = is_test
        self.with_aug = with_aug

    def __len__(self):
        return len(self.base_dataset)

    def __getitem__(self, idx):
        img, label = self.base_dataset[idx]      # PIL.Image, int
        img, label = normalize_and_resize_img(img, label)
        if (not self.is_test) and self.with_aug:
            img, label = augment(img, label)
        return img, label

def apply_normalize_on_dataset(dataset, is_test=False,
                               batch_size=BATCH_SIZE, with_aug=False):
    wrapped = DogDataset(dataset, is_test=is_test, with_aug=with_aug)
    loader = DataLoader(
        wrapped,
        batch_size=batch_size,
        shuffle=not is_test,
        num_workers=2,
        pin_memory=True
    )
    return loader

# 실제 DataLoader 만들기
train_loader = apply_normalize_on_dataset(train_dataset,
                                          is_test=False,
                                          batch_size=BATCH_SIZE,
                                          with_aug=True)   # 기본 Aug 포함
valid_loader = apply_normalize_on_dataset(valid_dataset,
                                          is_test=True,
                                          batch_size=BATCH_SIZE,
                                          with_aug=False)
test_loader  = apply_normalize_on_dataset(test_dataset,
                                          is_test=True,
                                          batch_size=BATCH_SIZE,
                                          with_aug=False)
print("DataLoaders ready.")


NameError: name 'train_dataset' is not defined