In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [7]:
## Sequential -> 차례로 쌓아가는 과정
class VGG16(nn.Module):
    def __init__(self, num_classes=1000):   # num_classes=1000 -> 클래스의 수를 1000으로 설정
        super(VGG16, self).__init__()       # super -> nn.Module을 가리킴
        self.features = nn.Sequential(      # Sequential -> 차례대로 시퀀서를 쌓음
            # 첫 번째 블록
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),    # 1층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),   # 2층
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),                                  # 반으로 줄어듬

            # 두 번째 블록
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),  # 3층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), # 4층
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # 세 번째 블록
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), # 5층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), # 6층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), # 7층
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # 네 번째 블록
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1), # 8층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), # 9층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), # 10층
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # 다섯 번째 블록
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), # 11층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), # 12층
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), # 13층
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),   # 14층
            nn.ReLU(inplace=True),
            nn.Dropout(),

            nn.Linear(4096, 4096),          # 15층
            nn.ReLU(inplace=True),
            nn.Dropout(),

            nn.Linear(4096, num_classes),   # 16층
        )

    def forward(self, x):
        x = F.interpolate(x, size=(224, 224), mode= "bilinear", align_corners= False) # Model에 맞는 이미지 사이즈 = 224, 224 사이즈
        x = self.features(x)
        x = x.view(x.size(0), -1)           # 평탄화 과정 -> flatten
        x = self.classifier(x)              # 분류
        return

In [8]:
# Model 생성
model = VGG16()

In [9]:
print(model)

VGG16(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation

In [14]:
## 데이터셋 준비
import torchvision
from torchvision import transforms              # == import torchvision.transforms as transforms #
from torch import optim                         # == import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import numpy as np

In [15]:
# 데이터셋 가져오기
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
])

In [16]:
# 전체 train 데이터셋 가져오기 : train : val : test -> 80 : 10 : 10
trainset_full = torchvision.datasets.CIFAR10(root="../data", train=True, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:16<00:00, 10286356.48it/s]


Extracting ../data\cifar-10-python.tar.gz to ../data


In [27]:
trainset_full

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ../data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [17]:
len(trainset_full)

50000

In [21]:
## 50000개 중에서 1퍼센트만 사용
num_train = len(trainset_full)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(0.01*num_train)
train_idx = indices[:split]
train_idx

[5219,
 32326,
 43827,
 6153,
 26640,
 421,
 31738,
 4700,
 7021,
 26070,
 36724,
 37185,
 17689,
 19031,
 28867,
 1724,
 18759,
 1173,
 20412,
 41479,
 39224,
 24200,
 11266,
 18373,
 7298,
 25361,
 38009,
 15429,
 36940,
 4651,
 38107,
 47642,
 49818,
 27667,
 42609,
 21611,
 33738,
 43870,
 47606,
 9838,
 29460,
 16507,
 4665,
 11186,
 10210,
 39305,
 21423,
 18870,
 39531,
 18092,
 15668,
 5262,
 34780,
 1570,
 21995,
 35380,
 8819,
 43670,
 40000,
 29075,
 11404,
 31839,
 102,
 18494,
 15404,
 39376,
 41878,
 24620,
 30919,
 45932,
 12691,
 15014,
 49387,
 16490,
 29157,
 7697,
 39420,
 18106,
 43657,
 25336,
 30052,
 46220,
 30699,
 47929,
 33936,
 17857,
 28429,
 18699,
 41474,
 1540,
 13979,
 32204,
 19763,
 30292,
 7979,
 17771,
 28195,
 36671,
 41763,
 38498,
 9337,
 2226,
 6258,
 43171,
 44764,
 37053,
 23814,
 1507,
 8227,
 46715,
 39782,
 14066,
 4202,
 1398,
 11614,
 2697,
 33873,
 6243,
 7682,
 21158,
 22433,
 33245,
 1492,
 7607,
 6114,
 41584,
 10674,
 46249,
 14654,
 

In [28]:
# trainset 가져오기
trainset = Subset(trainset_full, train_idx)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
trainloader

<torch.utils.data.dataloader.DataLoader at 0x145f9936a50>

In [23]:
# validation 데이터셋
# CIFAR-10 검증 데이터셋 로드
valset_full = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)

# 이중에서 1퍼센트만 사용해보기
num_val = len(valset_full)
val_indices = list(range(num_val))
np.random.shuffle(val_indices) # 랜덤 선택
val_split = int(np.floor(0.01 * num_val))  # 예: 전체 데이터셋의 10%만 사용
val_idx = val_indices[:val_split]

Files already downloaded and verified


In [24]:
# val 데이터셋 가져오기
valset = Subset(valset_full, val_idx)
val_loader = DataLoader(valset, batch_size=64, shuffle=False, num_workers=2)

In [25]:
# test 데이터셋 가져오기
testset_full = torchvision.datasets.CIFAR10(root='../data', train=False,
                                       download=True, transform=transform)
testset = Subset(testset_full, val_idx)

testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified


In [26]:
# 모델 정의
vgg16 = VGG16(num_classes=10)

In [33]:
# 손실함수와 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

In [34]:
def train_and_validate(model, train_loader,val_loader, optimizer, criterion, epochs, device):
    train_losses = []     # 실제 학습할 때의 loss 값 : 예측값과 실제값의 차이
    val_losses = []       # 위에서 학습한 모델을 적용했을 때, val 데이터 셋에 대한 예측값과  실제값의 차이

    model.to(device)    # 나중에 실행할 때 GPU/CPU에 있는지 확인

    for epoch in range(epochs):
        model.train()   # train 모드로 설정
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device) # data를 GPU로 이동

            optimizer.zero_grad()                # 그라디언트 초기화
            outputs = model(inputs)             # 예측값
            loss = criterion(outputs, labels)
            loss.backward()                     # 역전파
            optimizer.step()                     # 가중치 업데이트
            running_loss += loss.item()


        # 에포크별 평균 학습 손실 계산
        epoch_loss = running_loss / len(train_loader)
        train_losses.append(epoch_loss)
        print(f'Epoch [{epoch + 1}/{epochs}] - Validation loss: {epoch_loss:.3f}')


        # 검증 과정
        model.eval()                # 평가 모드로 전환
        running_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item()

        # 에포크 별 검증 손실 계산
        epoch_val_loss = running_val_loss / len(val_loader)
        val_losses.append(epoch_val_loss)
        print(f'Epoch [{epoch + 1}/{epochs}] - Validation loss: {epoch_loss:.3f}')

    return train_losses, val_losses

In [35]:
## 7. 모델 학습 (평가 함수는 아래에 정의함)

In [37]:
# model, train_loader,val_loader, optimizer, criterion, epochs, device
device = torch.device("cuda")
history = train_and_validate(vgg16, trainloader, val_loader, optimizer, criterion, epochs=10, device=device)

TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not NoneType