# 패키지 import & 하이퍼파라미터 세팅

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

from tqdm import tqdm
import numpy as np
import random
# 하이퍼파라미터 설정
batch_size = 128
epochs = 90
learning_rate = 0.01
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터 전처리 및 로드

In [2]:
# 데이터 경로
root = "/kaggle/input/islvrc-2012-10-pecent-subset"

transform = transforms.Compose([
    transforms.Resize((227, 227)),  # AlexNet의 입력 크기(227x227)로 조정
    transforms.ToTensor(),
    # RGB, ImageNet 데이터셋의 평균과 표준편차를 기반으로 설정된 값
    # https://velog.io/@gcoh5589/0.485-0.456-0.406
    # 만약, 자체 데이터셋으로 학습할 경우, 이미지 RGB의 평균과 표준편차를 구해서 정규화 값을 변경하는 것이 좋다고 함
    # https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 
])

# # 전체 데이터셋 로드(ILSVRC)
# dataset = datasets.ImageFolder(root=root, transform=transform)

# # Train/Test , 8:2
# train_size = int(0.8 * len(dataset))
# test_size = len(dataset) - train_size
# trainset, testset = random_split(dataset, [train_size, test_size])

# # DataLoader 생성 
# # shuffle : 각 에폭마다 데이터가 학습되는 순서가 변경,test일 경우 False로 설정(평가의 일관성과 재현성) 
# # num_workers : dataset의 데이터를 gpu로 전송할 때 필요한 전처리를 수행할 때 사용하는 subprocess의 수
# trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
# testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# 전체 데이터셋 로드(CIFAR10)
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# 데이터셋 크기 확인
print(f"Train dataset size: {len(trainset)}")
print(f"Test dataset size: {len(testset)}")

# 첫 번째 배치 확인 (이미지와 라벨)
images, labels = next(iter(trainloader))
print(f"Batch image shape: {images.shape}")  # (batch_size, channels, height, width)
print(f"Batch labels: {labels[:5]}")  # 첫 5개 라벨 확인

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0.00/170M [00:00<?, ?B/s]

  0%|          | 65.5k/170M [00:00<04:31, 628kB/s]

  0%|          | 229k/170M [00:00<02:26, 1.16MB/s]

  1%|          | 918k/170M [00:00<00:47, 3.55MB/s]

  2%|▏         | 3.70M/170M [00:00<00:13, 12.3MB/s]

  6%|▌         | 9.76M/170M [00:00<00:05, 27.6MB/s]

  9%|▉         | 16.1M/170M [00:00<00:04, 37.1MB/s]

 13%|█▎        | 22.5M/170M [00:00<00:03, 43.2MB/s]

 17%|█▋        | 28.9M/170M [00:00<00:03, 47.1MB/s]

 21%|██        | 35.3M/170M [00:01<00:02, 49.6MB/s]

 25%|██▍       | 41.8M/170M [00:01<00:02, 51.4MB/s]

 28%|██▊       | 48.2M/170M [00:01<00:02, 52.5MB/s]

 32%|███▏      | 54.6M/170M [00:01<00:02, 53.3MB/s]

 36%|███▌      | 61.0M/170M [00:01<00:02, 53.9MB/s]

 40%|███▉      | 67.5M/170M [00:01<00:01, 54.3MB/s]

 43%|████▎     | 73.9M/170M [00:01<00:01, 54.5MB/s]

 47%|████▋     | 80.2M/170M [00:01<00:01, 54.6MB/s]

 51%|█████     | 86.7M/170M [00:01<00:01, 54.8MB/s]

 55%|█████▍    | 93.2M/170M [00:02<00:01, 55.0MB/s]

 58%|█████▊    | 99.6M/170M [00:02<00:01, 55.0MB/s]

 62%|██████▏   | 106M/170M [00:02<00:01, 55.1MB/s] 

 66%|██████▌   | 113M/170M [00:02<00:01, 55.1MB/s]

 70%|██████▉   | 119M/170M [00:02<00:00, 55.2MB/s]

 74%|███████▎  | 125M/170M [00:02<00:00, 55.1MB/s]

 77%|███████▋  | 132M/170M [00:02<00:00, 55.0MB/s]

 81%|████████  | 138M/170M [00:02<00:00, 55.1MB/s]

 85%|████████▍ | 145M/170M [00:02<00:00, 55.1MB/s]

 89%|████████▊ | 151M/170M [00:03<00:00, 55.0MB/s]

 92%|█████████▏| 157M/170M [00:03<00:00, 54.9MB/s]

 96%|█████████▌| 164M/170M [00:03<00:00, 54.9MB/s]

100%|█████████▉| 170M/170M [00:03<00:00, 54.7MB/s]

100%|██████████| 170M/170M [00:03<00:00, 49.3MB/s]




Extracting ./data/cifar-10-python.tar.gz to ./data


Files already downloaded and verified


Train dataset size: 50000
Test dataset size: 10000


Batch image shape: torch.Size([128, 3, 227, 227])
Batch labels: tensor([6, 8, 4, 6, 3])


# AlexNet 모델 정의

In [3]:
class AlexNet(nn.Module):
    def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
        super(AlexNet, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0), # 1번 레이어 (48x2)
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(alpha=1e-4, beta=0.75, k=2, size=5), # LRN
            nn.MaxPool2d(kernel_size=3, stride=2), # stride < kernel_size일 경우 overlapping pooling
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2), # 2번 레이어 (128x2)
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(alpha=1e-4, beta=0.75, k=2, size=5), # LRN
            nn.MaxPool2d(kernel_size=3, stride=2), # overlapping pooling
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1), # 3번 레이어 (192x2)
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1), # 4번 레이어 (192x2)
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1), # 5번 레이어 (128x2)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2) # overlapping pooling
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) # 내부적으로 적절한 kernel size와 stride 값을 계산하여 풀링을 수행
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout), # dropout : 0.5
            nn.Linear(256 * 6 * 6, 4096), # 6번 레이어
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),# dropout : 0.5
            nn.Linear(4096, 4096), # 7번 레이어
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes) # 8번 레이어
        )
    #     self._init_bias() # 가중치 초기화
        
    # def _init_bias(self):
    #     for layer in self.layers:
    #         if isinstance(layer, nn.Conv2d):
    #             # 편차를 0.01로 하는 zero-mean 가우시안 정규 분포를 모든 레이어의 weight를 초기화
    #             # neuron bias: 2, 4, 5번째 convolution 레이어와 fully-connected 레이어에 상수 1로 적용하고 이외 레이어는 0을 적용
    #             nn.init.normal_(layer.weight, mean=0, std=0.01) # nn.init.normal_(input,mean=0.0,std=0.1) : input tensor를 N(mean,str^2)의 정규분포에 따라 초기화
    #             nn.init.constant_(layer.bias, 0) # nn.init.constant_(input,val) : input tensor를 val값으로 채운다.

    #     nn.init.constant_(self.layers[4].bias, 1) # CNN 2번 레이어 (128x2)
    #     nn.init.constant_(self.layers[10].bias, 1) # CNN 4번 레이어 (192x2)
    #     nn.init.constant_(self.layers[12].bias, 1) # CNN 5번 레이어 (128x2)
    #     nn.init.constant_(self.classifier[1].bias, 1) # MLP 6번 레이어
    #     nn.init.constant_(self.classifier[4].bias, 1) # MLP 7번 레이어
    #     nn.init.constant_(self.classifier[6].bias, 1) # MLP 8번 레이어
        
    def forward(self, x):
        x = self.layers(x) # CNN
        x = self.avgpool(x) # FCN의 입력 이미지 shape이 항상 6x6이 되는 것을 보장
        x = torch.flatten(x, 1) # FCN input
        x = self.classifier(x) # FCN
        return x

## AlexNet 구조 확인

In [4]:
import torchsummary
model = AlexNet(num_classes = 10).to(device) # 데이터 셋의 클래스 수 : 10개

# AlexNet의 Image 입력 사이즈는 (3, 227, 227), 원래 논문에서는 3 x 224 x 224가 맞지만 오류인 것으로 알려졌고 ,구조상 227이 맞다고 함 
torchsummary.summary(model, input_size=(3, 227, 227), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 55, 55]          34,944
              ReLU-2           [-1, 96, 55, 55]               0
 LocalResponseNorm-3           [-1, 96, 55, 55]               0
         MaxPool2d-4           [-1, 96, 27, 27]               0
            Conv2d-5          [-1, 256, 27, 27]         614,656
              ReLU-6          [-1, 256, 27, 27]               0
 LocalResponseNorm-7          [-1, 256, 27, 27]               0
         MaxPool2d-8          [-1, 256, 13, 13]               0
            Conv2d-9          [-1, 384, 13, 13]         885,120
             ReLU-10          [-1, 384, 13, 13]               0
           Conv2d-11          [-1, 384, 13, 13]       1,327,488
             ReLU-12          [-1, 384, 13, 13]               0
           Conv2d-13          [-1, 256, 13, 13]         884,992
             ReLU-14          [-1, 256,

# 

# 손실 함수, 옵티마이저, lr scheduler 설정

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, params=model.parameters(), lr=learning_rate)

# adjust learning rate: validation error가 현재 lr로 더 이상 개선 안되면 lr을 10으로 나눠줌. 논문에서는 0.01을 lr 초기 값으로 총 3번 줄어듦
# factor : lr 업데이트 시 lr = lr*factor로 계산됨
# patience : 검증 손실값이 개선되지 않은 상태를 얼마나 허용할 것인지를 설정하는 정수값. 기본값은 10으로, 10번의 연속적인 epoch 동안 검증 손실값이 개선되지 않으면 learning rate를 감소시킴
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.1, verbose=True, patience=4) # used if valid error doesn't improve.



# train

In [6]:
def train(model, data_loader, loss_fn, optimizer, device):
    model.train()
    running_loss = 0.0
    corr = 0.0
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)
        
        # 누적 Gradient 초기화
        optimizer.zero_grad()

        # forward
        outputs = model(images)

        # loss
        loss_fn = criterion(outputs, labels)
        # backward
        loss_fn.backward()
        # Gradient 업데이트 
        optimizer.step()
        # 누적 loss 업데이트
        running_loss += loss_fn.item()*images.size(0) # 배치 전체 loss 계산
        # max probability와 max index를 반환
        # max index만 사용해서 실제 정답과 비교
        _, pred = torch.max(outputs, 1)
        # 정확히 맞춘 label의 합계를 계산
        corr += pred.eq(labels).sum().item()
    
    # 누적된 정답수를 전체 개수로 나누어 전체 정확도 계산
    acc = corr / len(data_loader.dataset)
    
    # 전체 loss와 전체 accuracy 반환
    # train_loss, train_acc
    return running_loss / len(data_loader.dataset), acc

# test

In [7]:
# 테스트 루프
def test(model, data_loader, loss_fn, device):
    model.eval()
    with torch.no_grad():
        # loss와 accuracy 계산을 위한 임시 변수
        corr = 0.0
        running_loss = 0.0
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            corr += (predicted == labels).sum().item()
            running_loss += loss_fn(outputs, labels).item() * images.size(0)
    acc = corr / len(data_loader.dataset)
    return running_loss / len(data_loader.dataset), acc

# Evaluation

In [8]:
min_loss = np.inf
train_losses, train_accuracies = [], []
val_losses, val_accuracies = [], []
# Epoch 별 train & test
for epoch in tqdm(range(epochs)):
    # Model Training
    # train result
    train_loss, train_acc = train(model, trainloader, criterion, optimizer, device)

    # validation result
    val_loss, val_acc = test(model, testloader, criterion, device)   
    
    # val_loss 가 개선되었다면 min_loss를 갱신하고 model의 가중치(weights)를 저장
    if val_loss < min_loss:
        print(f'[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!')
        min_loss = val_loss
        # torch.save(model.state_dict(), f'{MODEL_NAME}.pth')

    # learning rate update
    lr_scheduler.step(metrics=val_loss)
    
    # Epoch 별 결과
    print(f'epoch {epoch+1:02d}, loss: {train_loss:.5f}, acc: {train_acc:.5f}, val_loss: {val_loss:.5f}, val_accuracy: {val_acc:.5f}')
    # 결과 저장
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

  0%|          | 0/90 [00:00<?, ?it/s]

  1%|          | 1/90 [01:22<2:01:46, 82.10s/it]

[INFO] val_loss has been improved from inf to 1.83192. Saving Model!
epoch 01, loss: 2.16399, acc: 0.18482, val_loss: 1.83192, val_accuracy: 0.31550


  2%|▏         | 2/90 [02:44<2:00:17, 82.01s/it]

[INFO] val_loss has been improved from 1.83192 to 1.38572. Saving Model!
epoch 02, loss: 1.61961, acc: 0.40258, val_loss: 1.38572, val_accuracy: 0.48980


  3%|▎         | 3/90 [04:06<1:58:53, 81.99s/it]

[INFO] val_loss has been improved from 1.38572 to 1.15312. Saving Model!
epoch 03, loss: 1.29644, acc: 0.52798, val_loss: 1.15312, val_accuracy: 0.59240


  4%|▍         | 4/90 [05:28<1:57:39, 82.08s/it]

[INFO] val_loss has been improved from 1.15312 to 0.96966. Saving Model!
epoch 04, loss: 1.05671, acc: 0.62466, val_loss: 0.96966, val_accuracy: 0.65670


  6%|▌         | 5/90 [06:50<1:56:12, 82.03s/it]

[INFO] val_loss has been improved from 0.96966 to 0.83109. Saving Model!
epoch 05, loss: 0.87912, acc: 0.68892, val_loss: 0.83109, val_accuracy: 0.71270


  7%|▋         | 6/90 [08:13<1:55:16, 82.33s/it]

[INFO] val_loss has been improved from 0.83109 to 0.76083. Saving Model!
epoch 06, loss: 0.74926, acc: 0.73582, val_loss: 0.76083, val_accuracy: 0.73630


  8%|▊         | 7/90 [09:35<1:53:45, 82.24s/it]

[INFO] val_loss has been improved from 0.76083 to 0.62722. Saving Model!
epoch 07, loss: 0.66344, acc: 0.76900, val_loss: 0.62722, val_accuracy: 0.78640


  9%|▉         | 8/90 [10:57<1:52:31, 82.33s/it]

[INFO] val_loss has been improved from 0.62722 to 0.61207. Saving Model!
epoch 08, loss: 0.58703, acc: 0.79448, val_loss: 0.61207, val_accuracy: 0.79180


 10%|█         | 9/90 [12:20<1:51:14, 82.40s/it]

[INFO] val_loss has been improved from 0.61207 to 0.57493. Saving Model!
epoch 09, loss: 0.52032, acc: 0.81820, val_loss: 0.57493, val_accuracy: 0.80290


 11%|█         | 10/90 [13:42<1:49:55, 82.44s/it]

epoch 10, loss: 0.46498, acc: 0.83868, val_loss: 0.59538, val_accuracy: 0.79820


 12%|█▏        | 11/90 [15:05<1:48:43, 82.57s/it]

[INFO] val_loss has been improved from 0.57493 to 0.52668. Saving Model!
epoch 11, loss: 0.41290, acc: 0.85594, val_loss: 0.52668, val_accuracy: 0.81990


 13%|█▎        | 12/90 [16:28<1:47:16, 82.52s/it]

epoch 12, loss: 0.35872, acc: 0.87454, val_loss: 0.52851, val_accuracy: 0.82320


 14%|█▍        | 13/90 [17:50<1:45:59, 82.59s/it]

[INFO] val_loss has been improved from 0.52668 to 0.49563. Saving Model!
epoch 13, loss: 0.31814, acc: 0.88934, val_loss: 0.49563, val_accuracy: 0.83720


 16%|█▌        | 14/90 [19:12<1:44:25, 82.45s/it]

epoch 14, loss: 0.28876, acc: 0.89872, val_loss: 0.50674, val_accuracy: 0.83370


 17%|█▋        | 15/90 [20:35<1:43:16, 82.62s/it]

[INFO] val_loss has been improved from 0.49563 to 0.48776. Saving Model!
epoch 15, loss: 0.25057, acc: 0.91196, val_loss: 0.48776, val_accuracy: 0.84520


 18%|█▊        | 16/90 [21:58<1:42:01, 82.72s/it]

epoch 16, loss: 0.23026, acc: 0.91922, val_loss: 0.51178, val_accuracy: 0.83970


 19%|█▉        | 17/90 [23:20<1:40:20, 82.47s/it]

epoch 17, loss: 0.19708, acc: 0.93092, val_loss: 0.52482, val_accuracy: 0.84580


 20%|██        | 18/90 [24:43<1:38:55, 82.43s/it]

epoch 18, loss: 0.17738, acc: 0.93788, val_loss: 0.54221, val_accuracy: 0.84520


 21%|██        | 19/90 [26:05<1:37:26, 82.34s/it]

epoch 19, loss: 0.15382, acc: 0.94572, val_loss: 0.52524, val_accuracy: 0.84640


 22%|██▏       | 20/90 [27:27<1:36:03, 82.34s/it]

epoch 20, loss: 0.14182, acc: 0.95026, val_loss: 0.53978, val_accuracy: 0.83500


 23%|██▎       | 21/90 [28:49<1:34:37, 82.29s/it]

epoch 21, loss: 0.06036, acc: 0.98042, val_loss: 0.50984, val_accuracy: 0.86300


 24%|██▍       | 22/90 [30:12<1:33:25, 82.43s/it]

epoch 22, loss: 0.03875, acc: 0.98828, val_loss: 0.52823, val_accuracy: 0.86570


 26%|██▌       | 23/90 [31:34<1:31:54, 82.31s/it]

epoch 23, loss: 0.03371, acc: 0.98966, val_loss: 0.53614, val_accuracy: 0.86640


 27%|██▋       | 24/90 [32:56<1:30:26, 82.22s/it]

epoch 24, loss: 0.02953, acc: 0.99080, val_loss: 0.55322, val_accuracy: 0.86680


 28%|██▊       | 25/90 [34:19<1:29:14, 82.38s/it]

epoch 25, loss: 0.02715, acc: 0.99150, val_loss: 0.54965, val_accuracy: 0.86880


 29%|██▉       | 26/90 [35:42<1:28:11, 82.68s/it]

epoch 26, loss: 0.02301, acc: 0.99328, val_loss: 0.55574, val_accuracy: 0.86940


 30%|███       | 27/90 [37:05<1:26:51, 82.73s/it]

epoch 27, loss: 0.02173, acc: 0.99382, val_loss: 0.55804, val_accuracy: 0.86870


 31%|███       | 28/90 [38:27<1:25:07, 82.38s/it]

epoch 28, loss: 0.02179, acc: 0.99394, val_loss: 0.56151, val_accuracy: 0.86900


 32%|███▏      | 29/90 [39:49<1:23:50, 82.47s/it]

# train/valid acc, loss 시각화

In [None]:
import matplotlib.pyplot as plt
def plot_training_history(train_losses, val_losses, train_accuracies, val_accuracies):
    epochs = len(train_losses)

    # Loss
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.plot(range(epochs), train_losses, label='Train Loss', color='blue')
    plt.plot(range(epochs), val_losses, label='Validation Loss', color='orange')
    plt.title('Train vs Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(range(epochs), train_accuracies, label='Train Accuracy', color='blue')
    plt.plot(range(epochs), val_accuracies, label='Validation Accuracy', color='orange')
    plt.title('Train vs Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_training_history(train_losses, val_losses, train_accuracies, val_accuracies)