In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


In [2]:
transform = transforms.ToTensor()

train_dataset = datasets.MNIST(
    root="./data",
    train=True,
    transform=transform,
    download=True,
)

test_dataset = datasets.MNIST(
    root="./data",
    train=False,
    transform=transform,
    download=True,
)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=64,
    shuffle=True
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=64,
    shuffle=False
)

print("Number of training batches:", len(train_loader))
print("Number of test batches:", len(test_loader))


Number of training batches: 938
Number of test batches: 157


In [3]:
for images, labels in train_loader:
    print("Batch image shape:", images.shape)
    print("Batch label shape:", labels.shape)
    break

Batch image shape: torch.Size([64, 1, 28, 28])
Batch label shape: torch.Size([64])


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

device: cpu


In [18]:
class MNISTMLP_NoDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.net(x)
        return x


In [19]:
class MNISTMLP_Dropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(784, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.net(x)


In [13]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()

    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        batch_size = images.size(0)
        running_loss += loss.item() * batch_size

        _, predicted = outputs.max(dim=1)
        total += batch_size
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


In [20]:
@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()

    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        batch_size = images.size(0)
        running_loss += loss.item() * batch_size

        _, predicted = outputs.max(dim=1)
        total += batch_size
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


In [21]:
def run_experiment(model, train_loader, test_loader, optimizer, criterion, device, num_epochs=3):
    history = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []}

    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["test_loss"].append(test_loss)
        history["test_acc"].append(test_acc)

        print(f"Epoch [{epoch}/{num_epochs}] "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")
    return history


In [22]:
# 공통 설정
num_epochs = 3

criterion = nn.CrossEntropyLoss()

# -------------------------
# Model A: Dropout 없음
# -------------------------
model_a = MNISTMLP_NoDropout().to(device)

optimizer_a = optim.Adam(model_a.parameters(), lr=1e-3)

print("=== Model A (No Dropout) ===")
hist_a = run_experiment(model_a, train_loader, test_loader, optimizer_a, criterion, device, num_epochs=num_epochs)


=== Model A (No Dropout) ===
Epoch [1/3] Train Loss: 0.2821, Train Acc: 0.9184 | Test Loss: 0.1245, Test Acc: 0.9627
Epoch [2/3] Train Loss: 0.1045, Train Acc: 0.9685 | Test Loss: 0.0988, Test Acc: 0.9699
Epoch [3/3] Train Loss: 0.0705, Train Acc: 0.9784 | Test Loss: 0.0756, Test Acc: 0.9777


In [24]:
# -------------------------
# Model B: Dropout(p=0.3)
# -------------------------
model_b = MNISTMLP_Dropout().to(device)

optimizer_b = optim.Adam(model_b.parameters(), lr=1e-3)

print("=== Model B (Dropout p=0.3) ===")
hist_b = run_experiment(model_b, train_loader, test_loader, optimizer_b, criterion, device, num_epochs=num_epochs)


=== Model B (Dropout p=0.3) ===
Epoch [1/3] Train Loss: 0.3131, Train Acc: 0.9090 | Test Loss: 0.1527, Test Acc: 0.9528
Epoch [2/3] Train Loss: 0.1364, Train Acc: 0.9594 | Test Loss: 0.0935, Test Acc: 0.9700
Epoch [3/3] Train Loss: 0.1008, Train Acc: 0.9688 | Test Loss: 0.0803, Test Acc: 0.9748


In [26]:
num_epochs_opt = 1 

criterion = nn.CrossEntropyLoss()

# 같은 구조의 새 모델 생성 (Dropout 없는 모델로 비교)
model_opt_adam = MNISTMLP_NoDropout().to(device)
model_opt_sgd = MNISTMLP_NoDropout().to(device)

# Adam optimizer
optimizer_adam = optim.Adam(model_opt_adam.parameters(), lr=1e-3)

# SGD optimizer
optimizer_sgd = optim.SGD(model_opt_sgd.parameters(), lr=0.1)

print("=== Optimizer: Adam ===")
hist_adam = run_experiment(model_opt_adam, train_loader, test_loader, optimizer_adam, criterion, device, num_epochs=num_epochs_opt)

print("\n=== Optimizer: SGD ===")
hist_sgd = run_experiment(model_opt_sgd, train_loader, test_loader, optimizer_sgd, criterion, device, num_epochs=num_epochs_opt)


=== Optimizer: Adam ===
Epoch [1/1] Train Loss: 0.2819, Train Acc: 0.9191 | Test Loss: 0.1242, Test Acc: 0.9611

=== Optimizer: SGD ===
Epoch [1/1] Train Loss: 0.4906, Train Acc: 0.8611 | Test Loss: 0.2177, Test Acc: 0.9347
