In [9]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim

import numpy as np
import pandas as pd 
import random 
import os

# MNIST 데이터셋을 위한 변환 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)), # 이미지 크기 조정
    transforms.Grayscale(num_output_channels=3), # 그레이스케일 이미지를 3채널로 변환
    transforms.ToTensor(),
])
# MNIST 데이터셋 로드
train = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True)
test = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# testloader = torch.utils.data.DataLoader(test, batch_size=64, shuffle=False)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

# Seed 고정하기

In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42) # Seed 고정

# 하이퍼 파라미터 정의하기 

In [15]:
learning_rate = 0.001
training_epochs = 10
BATCHSIZE = 64

train_dataset_size = int(len(train) * 0.9)
validation_dataset_size = int(len(train) * 0.1)
train_dataset, validation_dataset = random_split(train, [train_dataset_size, validation_dataset_size])
train_dataset_loader = DataLoader(dataset=train_dataset, batch_size=BATCHSIZE, shuffle=True)
validation_dataset_loader = DataLoader(dataset=validation_dataset, batch_size=BATCHSIZE, shuffle=True)
test_dataset_loader = DataLoader(dataset=test, batch_size=BATCHSIZE, shuffle=True)

# ResNet 모델 정의 , 손실함수 정의하기 

In [17]:
# 사전 학습된 ResNet 모델 로드 및 수정
model = resnet18(pretrained=True) 
model.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 
model.fc = nn.Linear(model.fc.in_features, 10) 
model.to(device)

loss = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=learning_rate)
def model_train(dataloader, model, loss_function, optimizer):

    model.train()

    train_loss_sum = train_correct = train_total = 0

    total_train_batch = len(dataloader)

    for images, labels in dataloader:

        x_train = images.to(device)
        y_train = labels.to(device)

        outputs = model(x_train)
        loss = loss_function(outputs, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss_sum += loss.item()

        train_total += y_train.size(0)
        train_correct += ((torch.argmax(outputs, 1)==y_train)).sum().item()

    train_avg_loss = train_loss_sum / total_train_batch
    train_avg_accuracy = 100*train_correct / train_total

    return (train_avg_loss, train_avg_accuracy)

def model_evaluate(dataloader, model, loss_function, optimizer):

    model.eval()

    with torch.no_grad():

        val_loss_sum = val_correct = val_total = 0

        total_val_batch = len(dataloader)

        for images, labels in dataloader:

            x_val = images.to(device)
            y_val = labels.to(device)

            outputs = model(x_val)
            loss = loss_function(outputs, y_val)

            val_loss_sum += loss.item()

            val_total += y_val.size(0)
            val_correct += ((torch.argmax(outputs, 1)==y_val)).sum().item()

        val_avg_loss = val_loss_sum / total_val_batch
        val_avg_accuracy = 100*val_correct / val_total

    return (val_avg_loss, val_avg_accuracy)

def model_test(dataloader,loss_func, model):

    model.eval()

    with torch.no_grad():

        test_loss_sum = test_correct = test_total = 0

        total_test_batch = len(dataloader)

        for images, labels in dataloader:

            x_test = images.to(device)
            y_test = labels.to(device)

            outputs = model(x_test)
            loss = loss_func(outputs, y_test)

            test_loss_sum += loss.item()

            test_total += y_test.size(0)
            test_correct += ((torch.argmax(outputs, 1)==y_test)).sum().item()

        test_avg_loss = test_loss_sum / total_test_batch
        test_avg_accuracy = 100*test_correct / test_total

        print('accuracy:', test_avg_accuracy)
        print('loss:', test_avg_loss)



# 모델 학습 및 추론

In [18]:

train_accuracy_list = []
val_accuracy_list = []
for epoch in range(training_epochs):

    train_avg_loss, train_avg_accuracy = model_train(train_dataset_loader, model, loss, optim)
    train_accuracy_list.append(train_avg_accuracy)
    val_avg_loss, val_avg_accuracy = model_evaluate(validation_dataset_loader, model, loss, optim)
    val_accuracy_list.append(val_avg_accuracy)

    model_test(test_dataset_loader,loss, model)
    print('epoch:', '%02d' % (epoch + 1),  'train acc =', '{:.3f}'.format(train_avg_accuracy),    'val acc =', '{:.3f}'.format(val_avg_accuracy))


accuracy: 98.97
loss: 0.03476915317683949
epoch: 01 train acc = 97.765 val acc = 98.817
accuracy: 98.62
loss: 0.043150850962832644
epoch: 02 train acc = 98.894 val acc = 98.350
accuracy: 99.39
loss: 0.020310871276248522
epoch: 03 train acc = 99.161 val acc = 99.250
accuracy: 99.29
loss: 0.02381780717739165
epoch: 04 train acc = 99.222 val acc = 99.067
accuracy: 99.29
loss: 0.021216130101000635
epoch: 05 train acc = 99.404 val acc = 99.233
accuracy: 99.3
loss: 0.02430996358573324
epoch: 06 train acc = 99.444 val acc = 98.983
accuracy: 99.18
loss: 0.02489619329250794
epoch: 07 train acc = 99.431 val acc = 99.200
accuracy: 99.39
loss: 0.019817810152536097
epoch: 08 train acc = 99.670 val acc = 99.583
accuracy: 99.31
loss: 0.024669068379715182
epoch: 09 train acc = 99.604 val acc = 99.200
accuracy: 99.3
loss: 0.02303751501787968
epoch: 10 train acc = 99.572 val acc = 99.167


# ResNet 모델을 활용한 MNIST 데이터 분류

   ## * 개요
 사전 학습된 ResNet18 모델을 사용하여 MNIST 데이터셋을 분류하는 실험을 진행하였습니다. 일반적인 CNN 모델과 비교했을 때, ResNet 모델이 더 우수한 성능을 보여주었습니다. 그러나, 매 epoch마다 테스트 데이터에 대한 정확도를 측정한 결과, 예측 결과가 지속적으로 상승하지는 않음을 확인하였습니다. 적절한 epoch을 선택해야 할 거 같습니다. 

## * 모델 수정 및 학습 과정

사전 학습된 ResNet18 모델을 로드하고, MNIST 데이터셋에 적합하도록 다음과 같이 모델의 일부를 수정하였습니다.

- Conv(`conv1`)를 MNIST 데이터셋에 맞게 조정하였습니다.
- Fully connected (`fc`)를 수정했습니다. 

이후, 수정된 모델을 이용하여 각 epoch마다 학습 데이터셋과 검증 데이터셋에 대한 정확도를 구했습니다.


## * 결론

   사전 학습된 ResNet 모델이 MNIST 데이터 분류 작업에서 일반 CNN 모델보다 더 우수한 성능을 보여주었음을 확인하였습니다. 그러나, 모델 학습 과정에서 적절한 epoch 수의 선택이 중요함을 확인했습니다. 
