In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

### TODO
- Conv(1→16) → ReLU → Pool → Conv(16→32) → ReLU → Pool → Flatten → Linear → ReLU → Linear
- Conv는 kernel=3, stride=1, padding=1
- Pool은 MaxPool2d(2)

- 입력: (B, 1, 28, 28)
- Conv1 -> ?
- Pool1 -> ?
- Conv2 -> ?
- Pool2 -> ?
- Flatten -> ?
- Linear1 input dim = ?


---
## Fashion-MNIST 실험

### 실험 목표
- SmallCNN vs BigCNN으로 **과적합 유도**
- Dropout/Weight Decay로 **일반화 성능 변화 체감**


In [None]:
# TODO: Fashion-MNIST DataLoader
pass


### 샘플 16장 시각화


In [None]:
# 4x4 시각화
images, labels = next(iter(train_loader))
images = images[:16]
labels = labels[:16]

plt.figure(figsize=(6, 6))
for i in range(16):
    plt.subplot(4, 4, i + 1)
    # Normalize 되돌리기: x_norm = (x - 0.5)/0.5 -> x = x_norm*0.5 + 0.5
    img = images[i].squeeze().numpy() * 0.5 + 0.5
    plt.imshow(img, cmap="gray")
    plt.title(fashion_classes[labels[i].item()], fontsize=8)
    plt.axis("off")
plt.tight_layout()
plt.show()


### 모델 정의: SmallCNN / BigCNN


In [None]:
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
# TODO: 모델 정의

class SmallCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: features 정의
        # TODO: classifier 정의
        pass

    def forward(self, x):
        # TODO: forward 구현
        pass

class BigCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: features 정의 (채널 수 증가)
        # TODO: classifier 정의 (Linear 크기 증가)
        pass

    def forward(self, x):
        # TODO: forward 구현
        pass

def count_params(model):
    # TODO: 파라미터 수 계산
    pass

# TODO: 모델 생성 후 파라미터 수 비교 출력
pass


### 빠른 학습: Small vs Big (2~3 epoch)

In [None]:
def accuracy_from_logits(logits: torch.Tensor, y: torch.Tensor) -> float:
    """logits: (B, C), y: (B,)"""
    preds = logits.argmax(dim=1)
    return (preds == y).float().mean().item()

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    total_acc = 0.0
    n_batches = 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad(set_to_none=True)

        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_acc += accuracy_from_logits(logits, y)
        n_batches += 1

    return total_loss / max(n_batches, 1), total_acc / max(n_batches, 1)

@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    total_acc = 0.0
    n_batches = 0

    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)

        total_loss += loss.item()
        total_acc += accuracy_from_logits(logits, y)
        n_batches += 1

    return total_loss / max(n_batches, 1), total_acc / max(n_batches, 1)


def run_training(model, train_loader, test_loader, epochs=2, lr=1e-3, weight_decay=0.0):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    history = []
    for epoch in range(1, epochs + 1):
        tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
        te_loss, te_acc = evaluate(model, test_loader, criterion, device)
        history.append((tr_loss, tr_acc, te_loss, te_acc))
        print(f"Epoch {epoch:02d}/{epochs} | train loss {tr_loss:.4f} acc {tr_acc:.4f} | test loss {te_loss:.4f} acc {te_acc:.4f}")
    return history

In [None]:
# TODO: 빠른 학습
pass


### BigCNN + Dropout


In [None]:
# TODO: BigCNN_Dropout 스켈레톤
class BigCNN_Dropout(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: features 정의
        # TODO: classifier 정의 (Dropout 포함)
        pass

    def forward(self, x):
        # TODO
        pass

# TODO: 학습/평가 실행
pass


### Weight Decay (Big + Dropout + WD)

Weight Decay는 파라미터 크기를 너무 키우지 않도록 벌점을 주는 방식으로 과적합을 완화
https://eair.tistory.com/80


In [None]:
# TODO: optimizer에 weight_decay 적용해서 재학습
# optimizer = torch.optim.Adam(..., lr=1e-3, weight_decay=1e-4)
pass


### 결과 표 정리


In [None]:
import pandas as pd

results_fashion = pd.DataFrame({
    "Model": ["Small", "Big", "Big + Dropout", "Big + Dropout + WD"],
    "Train Acc (last)": [small_tr_acc, big_tr_acc, big_do_tr_acc, big_do_wd_tr_acc],
    "Test Acc (last)":  [small_te_acc, big_te_acc, big_do_te_acc, big_do_wd_te_acc],
})
results_fashion


---
## CIFAR-10 프로젝트 맛보기

- 입력이 (B, 3, 32, 32)로 바뀜.


### CIFAR-10 DataLoader 세팅 + 16장 시각화


In [None]:
# TODO: CIFAR-10 DataLoader
# train_transform_cifar = ...
# test_transform_cifar = ...
# train_cifar = ...
# test_cifar = ...
# train_loader_cifar = ...
# test_loader_cifar = ...
pass


In [None]:
# CIFAR 16장 시각화 (Normalize 되돌린 뒤 표시)
images, labels = next(iter(train_loader_cifar))
images = images[:16]
labels = labels[:16]

mean = torch.tensor([0.4914, 0.4822, 0.4465]).view(3, 1, 1)
std  = torch.tensor([0.2470, 0.2435, 0.2616]).view(3, 1, 1)

plt.figure(figsize=(7, 7))
for i in range(16):
    plt.subplot(4, 4, i + 1)
    img = images[i].cpu() * std + mean
    img = img.permute(1, 2, 0).clamp(0, 1).numpy()
    plt.imshow(img)
    plt.title(cifar_classes[labels[i].item()], fontsize=8)
    plt.axis("off")
plt.tight_layout()
plt.show()


### CIFAR용 CNN 모델

32×32 입력에서 Pool(2) 두 번이면 32→16→8이 되어 Flatten은 (채널 * 8 * 8)이 됩니다.


In [None]:
# TODO: CIFARCNN 스켈레톤
class CIFARCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: features (Conv/ReLU/Pool...)
        # TODO: classifier (Flatten/Linear/ReLU/Dropout/Linear)
        pass

    def forward(self, x):
        # TODO
        pass
    
pass


### 1~2 epoch 학습


In [None]:
# TODO: CIFAR 학습/평가 실행
pass


In [None]:
print("\nCIFAR-10 baseline:")
pd.DataFrame({
    "Model": ["CIFARCNN baseline"],
    "Train Acc (last)": [cifar_tr_acc],
    "Test Acc (last)": [cifar_te_acc],
})
