In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

In [None]:
transform = transforms.ToTensor() 

train_dataset = datasets.MNIST(
    root='./data', 
    train=True, 
    transform=transform,
    download=True
)

test_dataset = datasets.MNIST(
    root='./data',
    train=False, 
    transform=transform,
    download=True
)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=64,
    shuffle=True
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=64,
    shuffle=False
)

print("Number of training batches:", len(train_loader))
print("Number of test batches:", len(test_loader))

for images, labels in train_loader:
    print("Batch image shape:", images.shape)
    print("Batch label shape:", labels.shape)
    break

## Model: SimpleCNN
Architecture:
- `Conv2d(1→16, k3, p1) → ReLU → MaxPool2d(2)`
- `Conv2d(16→32, k3, p1) → ReLU → MaxPool2d(2)`
- `Flatten → Linear(32*7*7→128) → ReLU → Linear(128→10)`


**nn.Conv2d(in_channels, out_channels, kernel_size,<br> stride=1, padding=0, dilation=1, groups=1, bias=True)** <br><br>

in_channels: 들어오는 feature map의 개수<br>
out_channels: 나가는 feature map의 개수 = kernel의 개수 <br>
kernel_size: kernel size의 가로 세로 크기 => if 3이면 kernel : (in_channels * 3 * 3) <br>

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.features = nn.Sequential(
            # TODO 1: Conv2d (in_channels=?, out_channels=?, kernel_size=3, padding=1)
            # nn.Conv2d(?, ?, kernel_size=3, padding=1),
            nn.ReLU(),

            # TODO 2: MaxPool2d (kernel_size=2)
            # nn.MaxPool2d(?),

            # TODO 3: Conv2d (in_channels=?, out_channels=?, kernel_size=3, padding=1)
            # nn.Conv2d(?, ?, kernel_size=3, padding=1),
            nn.ReLU(),

            # TODO 4: MaxPool2d (kernel_size=2)
            # nn.MaxPool2d(?),
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            # TODO 5: Linear (in_features=?, out_features=128)
            # nn.Linear(?, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )

    def forward(self, x):
        # TODO 6: features -> classifier
        # x = self.features(x)
        # x = self.classifier(x)
        return x

### Forward test
모델을 완성했다면 아래 셀에서 **output shape = (4, 10)** 이 나와야 합니다.


In [None]:
model = SimpleCNN().to(device)

x = torch.randn(4, 1, 28, 28, device=device)
y = model(x)
print("output:", y.shape)


## Loss / Optimizer 선언
- 분류(0~9) → `CrossEntropyLoss`
- 기본 optimizer → `Adam`


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


## Train loop (2~3 epochs)


In [None]:
epochs = 3

for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total
    print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f} | Train Acc: {train_acc:.4f}")


## Test evaluation


In [None]:

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = model(images)
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_acc = correct / total
print("Test Acc:", test_acc)


## 오답 1개 확인


In [None]:
import matplotlib.pyplot as plt

model.eval()
mis = None

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1)

        wrong = (preds != labels).nonzero(as_tuple=False)
        if len(wrong) > 0:
            idx = wrong[0].item()
            mis = (images[idx].detach().cpu(), int(labels[idx].cpu()), int(preds[idx].cpu()))
            break

if mis is None:
    print("No misclassified sample found (unlikely).")
else:
    img, y_true, y_pred = mis
    plt.figure()
    plt.imshow(img.squeeze(0), cmap="gray")
    plt.title(f"True: {y_true} | Pred: {y_pred}")
    plt.axis("off")
    plt.show()
