In [7]:
import torch
import torch.nn as nn
import torch.optim
import torchvision
import torchvision.transforms as transforms
import time

from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
## CIFAR-10 normalization vals(3 channels, RGB)
mean = (0.4914, 0.4822, 0.4465) ## one mean + one std per channel
std =  (0.2470, 0.2435, 0.2616)

transform = transforms.Compose([
    transforms.ToTensor(), ## convert PIL img to tensor, convert pixel vals to 0-1 float
    transforms.Normalize(mean, std)

])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform) ## load training split of CIFAR-10
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform) ## load the test set of 10k images

## load data in mini-batches
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [9]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(3072, 512), # Hidden layer 1
    nn.ReLU(),

    nn.Linear(512, 256), # Hidden layer 2
    nn.ReLU(),

    nn.Linear(256, 128), # Hidden Layer 3
    nn.ReLU(),

    nn.Linear(128, 10) # Output layer(10 classes)
)
print(model)
model = model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=3072, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=256, bias=True)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=128, bias=True)
  (6): ReLU()
  (7): Linear(in_features=128, out_features=10, bias=True)
)


In [10]:
# Loss, Optimizer, Training setup
loss_fn = nn.CrossEntropyLoss() ## combination of softmax and Nllloss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 300


# Training Loop

train_losses = []
train_accuracies = []
epoch_times = []

for epoch in range(num_epochs):
    start = time.time()
    model.train()

    running_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate loss
        running_loss += loss.item()

        # Compute accuracy
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    # Metrics for the epoch
    avg_loss = running_loss / len(train_loader)
    accuracy = correct / total
    elapsed = time.time() - start

    train_losses.append(avg_loss)
    train_accuracies.append(accuracy)
    epoch_times.append(elapsed)

    print(f"Epoch {epoch+1}/{num_epochs}  "
          f"Loss: {avg_loss:.4f}  "
          f"Accuracy: {accuracy:.4f}  "
          f"Time: {elapsed:.2f}s")

Epoch 1/300  Loss: 1.6610  Accuracy: 0.4112  Time: 12.89s
Epoch 2/300  Loss: 1.4504  Accuracy: 0.4874  Time: 12.50s
Epoch 3/300  Loss: 1.3408  Accuracy: 0.5263  Time: 12.59s
Epoch 4/300  Loss: 1.2628  Accuracy: 0.5523  Time: 12.38s
Epoch 5/300  Loss: 1.1847  Accuracy: 0.5781  Time: 12.36s
Epoch 6/300  Loss: 1.1092  Accuracy: 0.6072  Time: 12.44s
Epoch 7/300  Loss: 1.0444  Accuracy: 0.6269  Time: 12.23s
Epoch 8/300  Loss: 0.9816  Accuracy: 0.6488  Time: 12.48s
Epoch 9/300  Loss: 0.9215  Accuracy: 0.6715  Time: 12.38s
Epoch 10/300  Loss: 0.8681  Accuracy: 0.6894  Time: 12.34s
Epoch 11/300  Loss: 0.8207  Accuracy: 0.7070  Time: 12.46s
Epoch 12/300  Loss: 0.7619  Accuracy: 0.7256  Time: 12.38s
Epoch 13/300  Loss: 0.7152  Accuracy: 0.7405  Time: 12.30s
Epoch 14/300  Loss: 0.6733  Accuracy: 0.7590  Time: 12.42s
Epoch 15/300  Loss: 0.6427  Accuracy: 0.7719  Time: 12.53s
Epoch 16/300  Loss: 0.6029  Accuracy: 0.7831  Time: 12.61s
Epoch 17/300  Loss: 0.5677  Accuracy: 0.7965  Time: 12.66s
Epoch 

KeyboardInterrupt: 