In [None]:
# -*- coding: utf-8 -*-
# تمرین جلسه ۹ – مقایسه بهینه‌سازها و تأثیر Learning Rate روی یک CNN (MNIST)

# در صورت نیاز می‌تونی این خط رو باز کنی:
# !pip install torch torchvision matplotlib

import time
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms

# -----------------------------
# 1) تنظیم Device (GPU/CPU)
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -----------------------------
# 2) لود دیتاست MNIST
# -----------------------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # استاندارد MNIST
])

train_dataset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
test_dataset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

print("Train size:", len(train_dataset), "Test size:", len(test_dataset))

# -----------------------------
# 3) تعریف مدل CNN ساده
# -----------------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # 28x28 -> 28x28
            nn.ReLU(),
            nn.MaxPool2d(2),                             # 28x28 -> 14x14

            nn.Conv2d(32, 64, kernel_size=3, padding=1), # 14x14 -> 14x14
            nn.ReLU(),
            nn.MaxPool2d(2),                             # 14x14 -> 7x7
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# -----------------------------
# 4) توابع آموزش و ارزیابی
# -----------------------------
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    total = 0
    correct = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


@torch.no_grad()
def evaluate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    total = 0
    correct = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def build_optimizer(name, params, lr):
    name = name.lower()
    if name == "sgd":
        return optim.SGD(params, lr=lr, momentum=0.9)
    elif name == "adam":
        return optim.Adam(params, lr=lr)
    elif name == "rmsprop":
        return optim.RMSprop(params, lr=lr, momentum=0.9)
    else:
        raise ValueError(f"Unknown optimizer: {name}")

def train_model(optimizer_name, lr, num_epochs=5):
    print(f"\n===== Optimizer: {optimizer_name} | LR = {lr} =====")
    model = SimpleCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = build_optimizer(optimizer_name, model.parameters(), lr)

    history = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }

    start_time = time.time()
    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
        test_loss, test_acc = evaluate(model, test_loader, criterion)

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["test_loss"].append(test_loss)
        history["test_acc"].append(test_acc)

        print(f"Epoch {epoch:02d} | "
              f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.3f} | "
              f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.3f}")

    elapsed = time.time() - start_time
    print(f"زمان آموزش: {elapsed:.1f} ثانیه")
    return history

# -----------------------------
# 5) بخش اول: مقایسه بهینه‌سازها
# -----------------------------
EPOCHS = 5
optimizers = [
    ("SGD", 0.1),
    ("Adam", 1e-3),
    ("RMSProp", 1e-3),
]

hist_opt = {}
for name, lr in optimizers:
    hist_opt[name] = train_model(name, lr, num_epochs=EPOCHS)

# رسم نمودار برای بهینه‌سازها
epochs_range = np.arange(1, EPOCHS + 1)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
for name, _ in optimizers:
    plt.plot(epochs_range, hist_opt[name]["test_acc"], label=name)
plt.title("Test Accuracy - Optimizers")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
for name, _ in optimizers:
    plt.plot(epochs_range, hist_opt[name]["test_loss"], label=name)
plt.title("Test Loss - Optimizers")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

print("\nنتیجه نهایی دقت تست هر بهینه‌ساز:")
for name, _ in optimizers:
    print(f"{name}: {hist_opt[name]['test_acc'][-1]:.3f}")

# -----------------------------
# 6) بخش دوم: اثر Learning Rate برای Adam
# -----------------------------
LR_LIST = [1e-4, 1e-3, 1e-2]
hist_lr = {}

for lr in LR_LIST:
    hist_lr[lr] = train_model("Adam", lr, num_epochs=EPOCHS)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
for lr in LR_LIST:
    plt.plot(epochs_range, hist_lr[lr]["test_acc"], label=f"LR={lr}")
plt.title("Adam - Test Accuracy vs LR")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
for lr in LR_LIST:
    plt.plot(epochs_range, hist_lr[lr]["test_loss"], label=f"LR={lr}")
plt.title("Adam - Test Loss vs LR")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

print("\nنتیجه نهایی Adam با LRهای مختلف:")
for lr in LR_LIST:
    print(f"LR={lr}: Test Accuracy = {hist_lr[lr]['test_acc'][-1]:.3f}")


Device: cpu


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 484kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.49MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.57MB/s]


Train size: 60000 Test size: 10000

===== Optimizer: SGD | LR = 0.1 =====
Epoch 01 | Train Loss: 0.2730 | Train Acc: 0.922 | Test Loss: 0.1265 | Test Acc: 0.963
