In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
from torchvision.datasets import CIFAR10
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

True
Tesla T4


In [None]:
## CIFAR-10 normalization vals (3 channels, RGB)
mean = (0.4914, 0.4822, 0.4465)
std  = (0.2470, 0.2435, 0.2616)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset  = CIFAR10(root='./data', train=False, download=True, transform=transform)

batch_size = 256

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=False,
    ##persistent_workers=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=False,
    ##persistent_workers=True
)

100%|██████████| 170M/170M [00:05<00:00, 30.9MB/s]


In [None]:
## CNN Model Definition
class CIFAR_CNN(nn.Module):
    def __init__(self): ## sets up the layers when the model is created
        super().__init__() ## calls nn.Module to properly initialize Pytorch internals

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), ## 3 input channels, 32 filters
            nn.ReLU(),
            nn.MaxPool2d(2), ## from 32 x 32 to 16 x 16

            nn.Conv2d(32, 64, kernel_size=3, padding=1), ## 32 input channels, 64 filters
            nn.ReLU(),
            nn.MaxPool2d(2), ##

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 2 * 2, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CIFAR_CNN().to(device)
print(model)

CIFAR_CNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=512, out_features=256, bias=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=10, bias=True)
  )
)


In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 300

train_losses = []
train_accuracies = []
epoch_times = []

for epoch in range(num_epochs):
    start = time.time()
    model.train()

    running_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Tracking
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / len(train_loader)
    accuracy = correct / total
    elapsed = time.time() - start

    train_losses.append(avg_loss)
    train_accuracies.append(accuracy)
    epoch_times.append(elapsed)

    print(f"Epoch {epoch+1}/{num_epochs}  "
          f"Loss: {avg_loss:.4f}  "
          f"Accuracy: {accuracy:.4f}  "
          f"Time: {elapsed:.2f}s")

Epoch 1/300  Loss: 1.6090  Accuracy: 0.4048  Time: 14.92s
Epoch 2/300  Loss: 1.2122  Accuracy: 0.5624  Time: 13.55s
Epoch 3/300  Loss: 1.0139  Accuracy: 0.6399  Time: 14.02s
Epoch 4/300  Loss: 0.8696  Accuracy: 0.6926  Time: 13.29s
Epoch 5/300  Loss: 0.7517  Accuracy: 0.7361  Time: 13.26s
Epoch 6/300  Loss: 0.6526  Accuracy: 0.7702  Time: 13.36s
Epoch 7/300  Loss: 0.5667  Accuracy: 0.8036  Time: 14.23s
Epoch 8/300  Loss: 0.5022  Accuracy: 0.8241  Time: 13.74s
Epoch 9/300  Loss: 0.4350  Accuracy: 0.8478  Time: 13.93s
Epoch 10/300  Loss: 0.3619  Accuracy: 0.8749  Time: 13.36s
Epoch 11/300  Loss: 0.3009  Accuracy: 0.8943  Time: 13.29s
Epoch 12/300  Loss: 0.2386  Accuracy: 0.9173  Time: 13.16s
Epoch 13/300  Loss: 0.2032  Accuracy: 0.9284  Time: 13.24s
Epoch 14/300  Loss: 0.1616  Accuracy: 0.9427  Time: 13.22s
Epoch 15/300  Loss: 0.1278  Accuracy: 0.9550  Time: 13.32s
Epoch 16/300  Loss: 0.1179  Accuracy: 0.9584  Time: 13.39s
Epoch 17/300  Loss: 0.1105  Accuracy: 0.9615  Time: 13.66s
Epoch 