# 03. 종합 프로젝트: MNIST 손글씨 인식

## 프로젝트 개요
- **데이터**: MNIST (28x28 흑백 손글씨 숫자 이미지)
- **목표**: 0-9 숫자 분류 (10-class classification)
- **구현**: Dataset, DataLoader, CNN, 훈련, 평가, 시각화

---

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

## 1. 데이터 준비

### 1.1 Transforms (데이터 전처리)

In [None]:
# Transform: PIL Image → Tensor, 정규화
transform = transforms.Compose([
    transforms.ToTensor(),  # [0, 255] → [0, 1] Tensor
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 평균, 표준편차
])

print("Transform pipeline:")
print(transform)

### 1.2 Dataset 로드

In [None]:
# MNIST dataset 다운로드 및 로드
train_dataset = datasets.MNIST(
    root='./data',  # 저장 경로
    train=True,  # Training set
    transform=transform,
    download=True  # 없으면 다운로드
)

test_dataset = datasets.MNIST(
    root='./data',
    train=False,  # Test set
    transform=transform,
    download=True
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

### 1.3 DataLoader (배치 생성)

In [None]:
batch_size = 64

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,  # 매 epoch마다 섞기
    num_workers=2  # 병렬 데이터 로딩
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

print(f"Number of train batches: {len(train_loader)}")
print(f"Number of test batches: {len(test_loader)}")

### 1.4 데이터 시각화

In [None]:
# 샘플 이미지 보기
examples = iter(train_loader)
example_data, example_targets = next(examples)

print(f"Batch shape: {example_data.shape}")  # (64, 1, 28, 28)
print(f"Labels: {example_targets[:10]}")

# 시각화
fig = plt.figure(figsize=(12, 6))
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(example_data[i][0], cmap='gray')
    plt.title(f'Label: {example_targets[i]}')
    plt.axis('off')
plt.tight_layout()
plt.show()

## 2. CNN 모델 정의

### Architecture
```
Input (1, 28, 28)
  ↓ Conv1 (32 filters, 3x3)
  ↓ ReLU
  ↓ MaxPool (2x2)
  ↓ Conv2 (64 filters, 3x3)
  ↓ ReLU
  ↓ MaxPool (2x2)
  ↓ Flatten
  ↓ FC1 (128)
  ↓ ReLU
  ↓ Dropout
  ↓ FC2 (10)
Output (10 classes)
```

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        
        # Pooling
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Fully connected layers
        # After 2 pools: 28 → 14 → 7
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        
        # Dropout (regularization)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        # Conv block 1
        x = self.conv1(x)  # (batch, 1, 28, 28) → (batch, 32, 28, 28)
        x = F.relu(x)
        x = self.pool(x)  # (batch, 32, 28, 28) → (batch, 32, 14, 14)
        
        # Conv block 2
        x = self.conv2(x)  # (batch, 32, 14, 14) → (batch, 64, 14, 14)
        x = F.relu(x)
        x = self.pool(x)  # (batch, 64, 14, 14) → (batch, 64, 7, 7)
        
        # Flatten
        x = x.view(-1, 64 * 7 * 7)  # (batch, 64, 7, 7) → (batch, 3136)
        
        # FC layers
        x = self.fc1(x)  # (batch, 3136) → (batch, 128)
        x = F.relu(x)
        x = self.dropout(x)  # 훈련 시에만 적용
        x = self.fc2(x)  # (batch, 128) → (batch, 10)
        
        return x  # Logits (softmax 전)

# 모델 생성
model = CNN().to(device)
print(model)

# 파라미터 수
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal params: {total_params:,}")
print(f"Trainable params: {trainable_params:,}")

## 3. Loss & Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler (선택)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

print("Loss function:", criterion)
print("Optimizer:", optimizer)
print("Scheduler:", scheduler)

## 4. 훈련 함수

In [None]:
def train_epoch(model, dataloader, criterion, optimizer,device):
    """1 epoch 훈련"""
    model.train()  # 훈련 모드
    
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(dataloader, desc="Training")
    for images, labels in pbar:
        images, labels = images.to(device), labels.to(device)
        
        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # 통계
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Progress bar 업데이트
        pbar.set_postfix({'loss': loss.item(), 'acc': correct/total})
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    
    return epoch_loss, epoch_acc

## 5. 평가 함수

In [None]:
def evaluate(model, dataloader, criterion, device):
    """모델 평가"""
    model.eval()  # 평가 모드 (Dropout off)
    
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # Gradient 계산 안 함
        for images, labels in tqdm(dataloader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    
    return epoch_loss, epoch_acc

## 6. 훈련 실행

In [None]:
num_epochs = 10

train_losses, train_accs = [], []
test_losses, test_accs = [], []

for epoch in range(num_epochs):
    print(f"\n{'='*60}")
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print('='*60)
    
    # 훈련
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    # 평가
    test_loss, test_acc = evaluate(model, test_loader, criterion, device)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    # Scheduler step
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    
    print(f"\nTrain Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
    print(f"Learning Rate: {current_lr}")

print("\n훈련 완료!")

## 7. 결과 시각화

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss curves
axes[0].plot(train_losses, label='Train Loss', marker='o')
axes[0].plot(test_losses, label='Test Loss', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Test Loss')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Accuracy curves
axes[1].plot(train_accs, label='Train Acc', marker='o')
axes[1].plot(test_accs, label='Test Acc', marker='s')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Training and Test Accuracy')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nFinal Test Accuracy: {test_accs[-1]:.4f}")

## 8. 예측 예시

In [None]:
# 테스트 이미지로 예측
model.eval()

test_examples = iter(test_loader)
test_images, test_labels = next(test_examples)
test_images = test_images.to(device)

with torch.no_grad():
    test_outputs = model(test_images)
    _, test_predicted = torch.max(test_outputs, 1)

# 시각화
fig = plt.figure(figsize=(15, 8))
for i in range(12):
    plt.subplot(3, 4, i+1)
    img = test_images[i].cpu()[0]
    plt.imshow(img, cmap='gray')
    
    true_label = test_labels[i].item()
    pred_label = test_predicted[i].item()
    
    color = 'green' if true_label == pred_label else 'red'
    plt.title(f'True: {true_label}, Pred: {pred_label}', color=color)
    plt.axis('off')

plt.tight_layout()
plt.show()

## 9. Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 전체 테스트 데이터 예측
all_preds = []
all_labels = []

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Per-class accuracy
print("\nPer-class Accuracy:")
for i in range(10):
    class_acc = cm[i, i] / cm[i, :].sum()
    print(f"Class {i}: {class_acc:.4f}")

## 10. 모델 저장

In [None]:
# 모델 저장
checkpoint = {
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': num_epochs,
    'test_acc': test_accs[-1],
}

torch.save(checkpoint, 'mnist_cnn.pth')
print("Model saved to mnist_cnn.pth")

# 모델 로드 예시
# loaded_model = CNN().to(device)
# checkpoint = torch.load('mnist_cnn.pth')
# loaded_model.load_state_dict(checkpoint['model_state_dict'])
# loaded_model.eval()

## 요약

### 프로젝트 구조
1. ✅ **데이터 준비**: MNIST dataset, DataLoader
2. ✅ **전처리**: Transforms (ToTensor, Normalize)
3. ✅ **모델 정의**: CNN (Conv → Pool → FC)
4. ✅ **훈련 루프**: train_epoch(), evaluate()
5. ✅ **평가**: Accuracy, Confusion Matrix
6. ✅ **시각화**: Loss/Accuracy curves, 예측 결과
7. ✅ **모델 저장**: checkpoint 방식

### AI 연구원/엔지니어로서 알아야 할 것
- **Dataset/DataLoader**: 효율적 데이터 로딩
- **Transforms**: 데이터 augmentation
- **CNN architecture**: 이미지 처리의 기본
- **Training loop**: 표준 패턴
- **Evaluation**: Metrics, visualization
- **Model checkpoint**: 실험 관리

### 추가 학습 주제
- Data augmentation (RandomCrop, RandomHorizontalFlip)
- Batch Normalization
- Residual connections (ResNet)
- Transfer learning
- Mixed precision training
- Distributed training