In [164]:
# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchinfo import summary as torch_summary
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

In [155]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [156]:
# 데이터셋을 로드하고 기본 정보를 확인해 보세요.
image, label = trainset[0]

print(f"Image shape: {image.shape}")
print(f"Label: {label}")
print(f"Number of classes: {len(trainset.classes)}")


Image shape: torch.Size([3, 32, 32])
Label: 6
Number of classes: 10
Train dataset size: 50000 (Shape: torch.Size([]))
Test dataset size: 10000 (Shape: torch.Size([]))


In [157]:
num_classes = len(trainset.classes)
print(num_classes)

class_names = trainset.classes
print(class_names)

10
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [173]:
class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super().__init__()
        self.lambd = lambd
    
    def forward(self, x):
        return self.lambd(x)


class BasicBlock(nn.Module):
    """ResNet-18, 34에서 사용"""
    # channel 확장시킬떄 사용 expansion
    expansion = 1
    
    def __init__(self, in_channels, out_channels, stride=1, option='B'):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # LLM과 함께 짠 코드 (채널수가 변경될 경우 shortcut연산이 안될 수 있음)
        self.shortcut = nn.Sequential()
        #크기가 같을때는 x와 동일, 크기가 다를때 크기/채널 조정하도록

        # stride 가 1이 아님 (공간 변화) or 채널수가 변하는 경우
        if stride != 1 or in_channels != out_channels * self.expansion:

            if option == 'A': 
                self.shortcut = LambdaLayer(
                    lambda x: F.pad(
                        x[:, :, ::stride, ::stride], # stride로 다운샘플링
                        (0, 0, 0, 0, 0, out_channels * self.expansion - in_channels),# 채널 zero-padding
                        "constant", 0
                    )
                )
            elif option == 'B':
                # Option B: Projection shortcut (차원 변경시만 1×1 conv)
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels * self.expansion,
                            kernel_size=1, stride=stride, bias=False),
                    nn.BatchNorm2d(out_channels * self.expansion)
                )

            elif option =="C":
            # 이 경우는 stride=1, 채널 동일해도 projection 사용
                pass                
    

    def forward(self,x):
        identity = self.shortcut(x)
      # x : torch.Size([1, 64, 32, 32])
        output = F.relu(self.bn1(self.conv1(x))) # torch.Size([1, 64, 30, 30]) -> padding 추가(short cut에서 더해주려면)
        # print(output.shape)
        output = self.bn2(self.conv2(output)) # torch.Size([1, 64, 28, 28])
        # print(output.shape)

        # skip connection
        # print(x.shape)
        output +=identity
        output = F.relu(output)
        return output


class BottleneckBlock(nn.Module):
    """ResNet-50, 101, 152에서 사용"""
    expansion = 4
    
    def __init__(self, in_channels, out_channels, stride=1, option='B'):
        super().__init__()
        
        # 1x1 conv
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        # 3x3 conv
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        # 1x1 conv (expansion=4 / 4배 확장 64 -> 256, 128 -> 512, 256 -> 1024, 512 -> 2048)
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion,
                               kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        
        self.shortcut = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels * self.expansion:
            if option == 'A':
                self.shortcut = LambdaLayer(
                    lambda x: F.pad(
                        x[:, :, ::stride, ::stride],
                        (0, 0, 0, 0, 0, out_channels * self.expansion - in_channels),
                        "constant", 0
                    )
                )
            elif option == 'B':
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels * self.expansion,
                            kernel_size=1, stride=stride, bias=False),
                    nn.BatchNorm2d(out_channels * self.expansion)
                )
    
    def forward(self, x):
        identity = self.shortcut(x)
        
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        
        out += identity
        out = F.relu(out)
        return out

In [197]:
class ResNet(nn.Module):
    # block -> Basic Block, num_blocks는 각 block개수 list형태로 [3,3,5,2]
    def __init__(self, block, num_blocks, in_channels=3, num_classes=10, option='B'):
        super().__init__()
        self.in_channels = 64
        self.option = option

        # 초기 conv (논문상의 ImageNet은 7x7로 conv, 3x3 max pool을 통해 downsampling 함)
        # 그러나 우리 데이터 cifar 10은 32x32 이므로 최소화
        self.conv1 = nn.Conv2d(3,64,kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)

        # 4개의 residual layers (64, 128, 256, 512)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) # 
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        
    
    def _make_layer(self, block, out_channels, num_blocks, stride):
        layers = []

        # 첫번째 block 쌓기 (stride 적용, 채널수도 변경 가능)
        # block 의 인자들 self,in_channels, out_channels,stride, option="B"
        layers.append(block(self.in_channels, out_channels, stride, option = self.option))
        self.in_channels = out_channels * block.expansion

        # 두번째 block부터는 무조건 stride=1, 채널수 동일하게 유지시키기
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels, stride=1, option = self.option))

        return nn.Sequential(*layers)

    def forward(self,x):
        output = F.relu(self.bn1(self.conv1(x)))
        output = self.layer1(output)
        output = self.layer2(output)
        output = self.layer3(output)
        output = self.layer4(output)
        
        output = self.avgpool(output) 
        # print("avgpool", output.shape)
        # #적용후 tensor shape : [B, 512, 1, 1] -> flatten 필요 
        output = torch.flatten(output, 1)
        output = self.fc(output)
        # print(output.shape)

        # output = self.fc(self.avgpool(output))
        return output


    def summary(self, input_shape=(32, 32, 3)):
        """
        모델의 summary 출력
        
        Args:
            input_shape: 입력 shape (height, width, channels)
        """
        # PyTorch 형식으로 변환: (C, H, W)
        h, w, c = input_shape
        print(torch_summary(self, input_size=(1, c, h, w), 
                        col_names=["input_size", "output_size", "num_params"],
                        depth=3))


configuration = {
    'resnet34': {
        'block': BasicBlock,
        'num_blocks': [3, 4, 6, 3]
    },

    # 'resnet34_plain': {
        
    # }

    'resnet50': {
        'block': BottleneckBlock,
        'num_blocks': [3, 4, 6, 3]
    },



}

def build_resnet(input_shape, is_50, num_classes=1000, option='B'):
    h, w, c = input_shape

    if is_50:
        config = configuration['resnet50']
        model_name = 'ResNet-50'
    else:
        config = configuration['resnet34']
        model_name = 'ResNet-34'

    model = ResNet(
        block = config['block'],
        num_blocks = config['num_blocks'],
        in_channels = c,
        num_classes = num_classes,
        option = option
    )

    return model



In [None]:
resnet_34 = build_resnet(input_shape=(32,32,3), is_50=False)
resnet_34.summary()

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
ResNet                                   [1, 3, 32, 32]            [1, 1000]                 --
├─Conv2d: 1-1                            [1, 3, 32, 32]            [1, 64, 32, 32]           1,792
├─BatchNorm2d: 1-2                       [1, 64, 32, 32]           [1, 64, 32, 32]           128
├─Sequential: 1-3                        [1, 64, 32, 32]           [1, 64, 32, 32]           --
│    └─BasicBlock: 2-1                   [1, 64, 32, 32]           [1, 64, 32, 32]           --
│    │    └─Sequential: 3-1              [1, 64, 32, 32]           [1, 64, 32, 32]           --
│    │    └─Conv2d: 3-2                  [1, 64, 32, 32]           [1, 64, 32, 32]           36,864
│    │    └─BatchNorm2d: 3-3             [1, 64, 32, 32]           [1, 64, 32, 32]           128
│    │    └─Conv2d: 3-4                  [1, 64, 32, 32]           [1, 64, 32, 32]           36,864
│    │    └─BatchNorm2

In [184]:
resnet_50 = build_resnet(input_shape=(32, 32,3), is_50=True)
resnet_50.summary()

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
ResNet                                   [1, 3, 32, 32]            [1, 10]                   --
├─Conv2d: 1-1                            [1, 3, 32, 32]            [1, 64, 32, 32]           1,792
├─BatchNorm2d: 1-2                       [1, 64, 32, 32]           [1, 64, 32, 32]           128
├─Sequential: 1-3                        [1, 64, 32, 32]           [1, 256, 32, 32]          --
│    └─BottleneckBlock: 2-1              [1, 64, 32, 32]           [1, 256, 32, 32]          --
│    │    └─Sequential: 3-1              [1, 64, 32, 32]           [1, 256, 32, 32]          16,896
│    │    └─Conv2d: 3-2                  [1, 64, 32, 32]           [1, 64, 32, 32]           4,096
│    │    └─BatchNorm2d: 3-3             [1, 64, 32, 32]           [1, 64, 32, 32]           128
│    │    └─Conv2d: 3-4                  [1, 64, 32, 32]           [1, 64, 32, 32]           36,864
│    │    └─BatchNo

In [185]:
BATCH_SIZE = 512
EPOCH = 15

# CIFAR-10 데이터셋에 대해 Normalize와 Tensor 변환을 적용하는 코드
transform = transforms.Compose([
    transforms.ToTensor(),  # 이미지를 Tensor로 변환
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # VGG-16 표준 정규화
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [186]:
trainset[0][0].shape

torch.Size([3, 32, 32])

In [187]:
import time

current_time = time.time()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet_34 = build_resnet(input_shape=(3,32,32), is_50=False)

# resnet_34 = torchvision.models.vgg16(pretrained=True)
resnet_34.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet_34.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

resnet_34_train_losses = []
resnet_34_val_accuracy = []

for epoch in range(EPOCH):
    resnet_34.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for i, (inputs, labels) in enumerate(trainloader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = resnet_34(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if i % 100 == 99:
            print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / (i+1):.3f}")
        
    train_loss = running_loss / len(trainloader)
    train_acc = 100 * correct / total
    resnet_34_train_losses.append(train_loss)

    print(f"Epoch {epoch + 1}: Train Accuracy: {train_acc:.2f}%")

    resnet_34.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet_34(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    resnet_34_val_accuracy.append(val_acc)

    print(f"Epoch {epoch + 1}: Validation Accuracy: {val_acc:.2f}%")

print("Finished Training")
print(time.time() - current_time)

Epoch 1: Train Accuracy: 30.24%
Epoch 1: Validation Accuracy: 40.99%
Epoch 2: Train Accuracy: 47.73%
Epoch 2: Validation Accuracy: 50.73%
Epoch 3: Train Accuracy: 56.73%
Epoch 3: Validation Accuracy: 57.08%
Epoch 4: Train Accuracy: 63.73%
Epoch 4: Validation Accuracy: 58.83%
Epoch 5: Train Accuracy: 69.57%
Epoch 5: Validation Accuracy: 61.46%
Epoch 6: Train Accuracy: 75.24%
Epoch 6: Validation Accuracy: 59.77%
Epoch 7: Train Accuracy: 81.10%
Epoch 7: Validation Accuracy: 65.32%
Epoch 8: Train Accuracy: 86.74%
Epoch 8: Validation Accuracy: 61.96%
Epoch 9: Train Accuracy: 90.86%
Epoch 9: Validation Accuracy: 63.64%
Epoch 10: Train Accuracy: 94.27%
Epoch 10: Validation Accuracy: 64.41%
Epoch 11: Train Accuracy: 97.24%
Epoch 11: Validation Accuracy: 61.69%
Epoch 12: Train Accuracy: 98.51%
Epoch 12: Validation Accuracy: 65.71%
Epoch 13: Train Accuracy: 99.52%
Epoch 13: Validation Accuracy: 66.46%


KeyboardInterrupt: 