In [1]:
import random
import numpy as np 
import pandas as pd 
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import os 
# 텐서로 변환
transform = transforms.Compose([     transforms.ToTensor()])
# 훈련 데이터셋
train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
# 테스트 데이터셋
test = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
        
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



'cuda'

# Seed 고정하기

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42) # Seed 고정

# 하이퍼 파라미터 정의하기

In [45]:
learning_rate = 0.0012
training_epochs = 20
BATCHSIZE = 64

train_dataset_size = int(len(train) * 0.9)
validation_dataset_size = int(len(train) * 0.1)
train_dataset, validation_dataset = random_split(train, [train_dataset_size, validation_dataset_size])
train_dataset_loader = DataLoader(dataset=train_dataset, batch_size=BATCHSIZE, shuffle=True)
validation_dataset_loader = DataLoader(dataset=validation_dataset, batch_size=BATCHSIZE, shuffle=True)
test_dataset_loader = DataLoader(dataset=test, batch_size=BATCHSIZE, shuffle=True)
     


# CNN 모델 정의하기

In [46]:
class CNN(torch.nn.Module):

    def __init__(self, drop=0.1):
        super(CNN, self).__init__()
        self.drop = drop
        
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
            
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
     
        self.dropout = torch.nn.Dropout(p=self.drop)
        
        # 첫 번째 FC 층
        self.fc1 = torch.nn.Linear(7*7*64, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)


    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)
        out = self.fc1(out)

        return out


In [47]:
model=CNN().to(device)
loss=torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr = learning_rate)

def model_train(dataloader, model, loss_function, optimizer):

    model.train()

    train_loss_sum = train_correct = train_total = 0

    total_train_batch = len(dataloader)

    for images, labels in dataloader:

        x_train = images.to(device)
        y_train = labels.to(device)

        outputs = model(x_train)
        loss = loss_function(outputs, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss_sum += loss.item()

        train_total += y_train.size(0)
        train_correct += ((torch.argmax(outputs, 1)==y_train)).sum().item()

    train_avg_loss = train_loss_sum / total_train_batch
    train_avg_accuracy = 100*train_correct / train_total

    return (train_avg_loss, train_avg_accuracy)

def model_evaluate(dataloader, model, loss_function, optimizer):

    model.eval()

    with torch.no_grad():

        val_loss_sum = val_correct = val_total = 0

        total_val_batch = len(dataloader)

        for images, labels in dataloader:

            x_val = images.to(device)
            y_val = labels.to(device)

            outputs = model(x_val)
            loss = loss_function(outputs, y_val)

            val_loss_sum += loss.item()

            val_total += y_val.size(0)
            val_correct += ((torch.argmax(outputs, 1)==y_val)).sum().item()

        val_avg_loss = val_loss_sum / total_val_batch
        val_avg_accuracy = 100*val_correct / val_total

    return (val_avg_loss, val_avg_accuracy)
     
def model_test(dataloader,loss_func, model):

    model.eval()

    with torch.no_grad():

        test_loss_sum = test_correct = test_total = 0

        total_test_batch = len(dataloader)

        for images, labels in dataloader:

            x_test = images.to(device)
            y_test = labels.to(device)

            outputs = model(x_test)
            loss = loss_func(outputs, y_test)

            test_loss_sum += loss.item()

            test_total += y_test.size(0)
            test_correct += ((torch.argmax(outputs, 1)==y_test)).sum().item()

        test_avg_loss = test_loss_sum / total_test_batch
        test_avg_accuracy = 100*test_correct / test_total

        print('accuracy:', test_avg_accuracy)
        print('loss:', test_avg_loss)

# 모델 학습하기

In [48]:

train_accuracy_list = []
val_accuracy_list = []
for epoch in range(training_epochs):

    train_avg_loss, train_avg_accuracy = model_train(train_dataset_loader, model, loss, optim)
    train_accuracy_list.append(train_avg_accuracy)
    val_avg_loss, val_avg_accuracy = model_evaluate(validation_dataset_loader, model, loss, optim)
    val_accuracy_list.append(val_avg_accuracy)

    print('epoch:', '%02d' % (epoch + 1),  'train acc =', '{:.3f}'.format(train_avg_accuracy),    'val acc =', '{:.3f}'.format(val_avg_accuracy))


epoch: 01 train acc = 94.174 val acc = 97.400
epoch: 02 train acc = 98.037 val acc = 98.267
epoch: 03 train acc = 98.556 val acc = 98.267
epoch: 04 train acc = 98.772 val acc = 98.600
epoch: 05 train acc = 99.024 val acc = 98.417
epoch: 06 train acc = 99.172 val acc = 98.683
epoch: 07 train acc = 99.157 val acc = 98.633
epoch: 08 train acc = 99.367 val acc = 98.783
epoch: 09 train acc = 99.417 val acc = 98.933
epoch: 10 train acc = 99.485 val acc = 98.933
epoch: 11 train acc = 99.539 val acc = 98.750
epoch: 12 train acc = 99.619 val acc = 98.800
epoch: 13 train acc = 99.574 val acc = 98.667
epoch: 14 train acc = 99.715 val acc = 98.767
epoch: 15 train acc = 99.694 val acc = 98.967
epoch: 16 train acc = 99.702 val acc = 99.100
epoch: 17 train acc = 99.724 val acc = 99.000
epoch: 18 train acc = 99.750 val acc = 98.783
epoch: 19 train acc = 99.757 val acc = 99.017
epoch: 20 train acc = 99.756 val acc = 98.967


# 예측하기

In [49]:
model_test(test_dataset_loader,loss, model)

accuracy: 99.25
loss: 0.031778252591179314


# 모델 성능

## * 최고 정확도
- **99.25%**

## * 시도한 방법

### 데이터 증강
- 시도: affine, jitter 등의 데이터 증강
- 결과: 손글씨 데이터의 단순성과 이미 충분한 데이터 양 때문에 성능 하락

### 학습률 (Learning Rate)
- 시도: 0.001, 0.0012, 0.0015
- 결과: **0.0012**에서 최고 성능

### 드롭아웃 비율 (Drop Rate)
- 시도: 0.1, 0.15, 0.2
- 결과: **0.1**에서 최고 성능

### 배치 크기 (Batch Size)
- 시도: 64, 128, 256
- 결과: **64**에서 최고 성능

### 모델 구조
- 시도: 전결합층 추가, 순차적 레이어(conv2d, relu, maxpool2d) 추가
- 결과: 성능 향상 없음

### 최적화 알고리즘 (Optimizer)
- 시도: Adam, AdamW
- 결과: **Adam**에서 근소한 차이로 더 높은 성능

---

이러한 실험을 통해, 학습률, 드롭아웃 비율, 배치 크기의 최적값을 찾아내고, 데이터 증강과 모델 구조 변화는 본 데이터셋에 대해 큰 영향을 미치지 않는다는 결론을 내렸습니다. 최적화 알고리즘으로는 Adam이 AdamW보다 약간 더 나은 성능을 보였습니다.
