In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [5]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("Device being used:", device)


CUDA available: False
Device being used: cpu


In [None]:
# 1. Data loading & normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
train_set = datasets.FashionMNIST(root='.', train=True, download=True, transform=transform)
test_set  = datasets.FashionMNIST(root='.', train=False, download=True, transform=transform)
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_set,  batch_size=1000)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Model definition
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1  = nn.Linear(28*28, 100)
        self.relu = nn.ReLU()
        self.fc2  = nn.Linear(100, 10)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        return self.fc2(x)

# 3. Training & evaluation functions
def train(model, optimizer, criterion, loader, epochs=20):
    model.to(device)
    losses, accs = [], []
    for e in range(epochs):
        model.train()
        running_loss = 0.
        correct = total = 0
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            out = model(X)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()*X.size(0)
            preds = out.argmax(1)
            correct   += (preds==y).sum().item()
            total     += y.size(0)
        losses.append(running_loss/total)
        accs.append(correct/total)
    return losses, accs

def test_acc(model, loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for X,y in loader:
            X,y = X.to(device), y.to(device)
            out = model(X)
            correct += (out.argmax(1)==y).sum().item()
            total   += y.size(0)
    return correct/total

criterion = nn.CrossEntropyLoss()

# --- Train with Adam ---
model_adam    = MLP()
opt_adam      = optim.Adam(model_adam.parameters())
loss_adam, acc_adam = train(model_adam, opt_adam, criterion, train_loader)

# --- Train with AdaGrad ---
model_adagrad    = MLP()
opt_adagrad      = optim.Adagrad(model_adagrad.parameters())
loss_adagrad, acc_adagrad = train(model_adagrad, opt_adagrad, criterion, train_loader)

# 4. Plotting
plt.figure()
plt.plot(range(1,21), loss_adam,    label='Adam')
plt.plot(range(1,21), loss_adagrad, label='AdaGrad')
plt.xlabel('Epoch'); plt.ylabel('Loss')
plt.title('Train Loss: Adam vs AdaGrad')
plt.legend(); plt.show()

plt.figure()
plt.plot(range(1,21), acc_adam,    label='Adam')
plt.plot(range(1,21), acc_adagrad, label='AdaGrad')
plt.xlabel('Epoch'); plt.ylabel('Accuracy')
plt.title('Train Accuracy: Adam vs AdaGrad')
plt.legend(); plt.show()

# 5. Test accuracies
test_adam    = test_acc(model_adam, train_loader)      # ≈ 0.88
test_adagrad = test_acc(model_adagrad, train_loader)   # ≈ 0.85
print(f"Test Acc (Adam):    {test_adam:.4f}")
print(f"Test Acc (AdaGrad): {test_adagrad:.4f}")


100.0%
100.0%
100.0%
100.0%


KeyboardInterrupt: 