# PlayTorch â€” Multilayer Perceptron (MLP)

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as T


# ---------- 1) Device ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


# ---------- 2) Data ----------
# CIFAR-10 images are 32x32 RGB. We convert to tensor and normalize.
# Normalization uses per-channel mean/std commonly used for CIFAR-10.
transform_train = T.Compose([
    T.RandomHorizontalFlip(),          # light augmentation
    T.RandomCrop(32, padding=4),       # light augmentation
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465),
                (0.2023, 0.1994, 0.2010)),
])

transform_test = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465),
                (0.2023, 0.1994, 0.2010)),
])

train_ds = torchvision.datasets.CIFAR10(
    root="./datasets", train=True, download=True, transform=transform_train
)
test_ds = torchvision.datasets.CIFAR10(
    root="./datasets", train=False, download=True, transform=transform_test
)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=256, shuffle=False, num_workers=2, pin_memory=True)

classes = train_ds.classes
print("Train size:", len(train_ds), "Test size:", len(test_ds))
print("Classes:", classes)


Using device: cpu


  entry = pickle.load(f, encoding="latin1")


Train size: 50000 Test size: 10000
Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
# ---------- 3) Model (MLP) ----------
class MLP(nn.Module):
    def __init__(self, in_dim=3*32*32, num_classes=10): 
        super().__init__()          # Initialize the parent class
        self.net = nn.Sequential(   # Define the network architecture
            nn.Linear(in_dim, 512), # Input layer
            nn.ReLU(),              # Activation function
            nn.Linear(512, 256),    # Hidden layer
            nn.ReLU(),              # Activation function
            nn.Linear(256, 128),    # Hidden layer
            nn.ReLU(),              # Activation function
            nn.Linear(128, num_classes) # Output layer
        )

    def forward(self, x): # Define the forward pass
        x = x.flatten(1)  # Flatten the input tensor
        return self.net(x)
    
model = MLP().to(device)
print(model)

MLP(
  (net): Sequential(
    (0): Linear(in_features=3072, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [14]:
# ---------- 4) Loss + Optimizer ----------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# ---------- 5) Training Loop ----------
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()

        logits = model(images)
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total

@torch.no_grad()
def evaluate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        logits = model(images)
        loss = criterion(logits, labels)

        running_loss += loss.item() * images.size(0)

        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total

In [15]:
# ---------- 6) Run Training ----------
num_epochs = 10
for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    test_loss, test_acc = evaluate(model, test_loader, criterion)

    print(f"Epoch {epoch+1}/{num_epochs}: "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
          f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

Epoch 1/10: Train Loss: 1.8087, Train Acc: 0.3470 | Test Loss: 1.6492, Test Acc: 0.4103
Epoch 2/10: Train Loss: 1.6419, Train Acc: 0.4094 | Test Loss: 1.6077, Test Acc: 0.4221
Epoch 3/10: Train Loss: 1.5649, Train Acc: 0.4401 | Test Loss: 1.5847, Test Acc: 0.4335
Epoch 4/10: Train Loss: 1.5187, Train Acc: 0.4530 | Test Loss: 1.6076, Test Acc: 0.4270
Epoch 5/10: Train Loss: 1.4789, Train Acc: 0.4681 | Test Loss: 1.5108, Test Acc: 0.4636
Epoch 6/10: Train Loss: 1.4529, Train Acc: 0.4767 | Test Loss: 1.5482, Test Acc: 0.4420
Epoch 7/10: Train Loss: 1.4317, Train Acc: 0.4833 | Test Loss: 1.5217, Test Acc: 0.4645
Epoch 8/10: Train Loss: 1.4045, Train Acc: 0.4960 | Test Loss: 1.5072, Test Acc: 0.4742
Epoch 9/10: Train Loss: 1.3928, Train Acc: 0.4977 | Test Loss: 1.4567, Test Acc: 0.4814
Epoch 10/10: Train Loss: 1.3806, Train Acc: 0.5013 | Test Loss: 1.4982, Test Acc: 0.4741
