In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
from torchvision.datasets import CIFAR10
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

True
Tesla T4


In [None]:
## CIFAR-10 normalization vals (3 channels, RGB)
mean = (0.4914, 0.4822, 0.4465)
std  = (0.2470, 0.2435, 0.2616)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset  = CIFAR10(root='./data', train=False, download=True, transform=transform)

batch_size = 256

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,
    pin_memory=False,
    ##persistent_workers=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=False,
    ##persistent_workers=True
)

100%|██████████| 170M/170M [00:02<00:00, 61.0MB/s]


In [None]:
class ResidualBlock(nn.Module):
    def __init__(self,channels):
        super().__init__()

        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)
        ## Conv -> ReLU -> Conv -> Skip connection -> ReLU

    def forward(self, x):
         residual = x
         out = self.conv1(x)
         out = self.bn1(out)
         out = self.relu(out)
         out = self.conv2(out)
         out = out + residual ## skip connection
         out = self.relu(out) ## final ReLU
         return out

class ResNet10(nn.Module):
   def __init__(self): ## sets up the layers when the model is created
        super().__init__() ## calls nn.Module to properly initialize Pytorch internals

        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        blocks = []
        for _ in range(10):
            blocks.append(ResidualBlock(64))
        self.res_blocks = nn.Sequential(*blocks)
        self.pool = nn.AdaptiveAvgPool2d((1,1)) ##from 64 x 32 x 32 to 64 x 1 x 1

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(64, 10)
        )

   def forward(self, x):
        x = self.stem(x) ## 64 x 32 x 32
        x = self.res_blocks(x) ## apply intial conv/ReLU
        x = self.pool(x) ## 64 x 1 x 1
        x = torch.flatten(x,1) ## just 64

        return self.classifier(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet10().to(device)
print(model)

ResNet10(
  (stem): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (res_blocks): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 300

train_losses = []
train_accuracies = []
epoch_times = []

for epoch in range(num_epochs):
    start = time.time()
    model.train()

    running_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Tracking
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / len(train_loader)
    accuracy = correct / total
    elapsed = time.time() - start

    train_losses.append(avg_loss)
    train_accuracies.append(accuracy)
    epoch_times.append(elapsed)

    print(f"Epoch {epoch+1}/{num_epochs}  "
          f"Loss: {avg_loss:.4f}  "
          f"Accuracy: {accuracy:.4f}  "
          f"Time: {elapsed:.2f}s")

Epoch 1/300  Loss: 1.6573  Accuracy: 0.3683  Time: 72.63s
Epoch 2/300  Loss: 1.2702  Accuracy: 0.5345  Time: 72.75s
Epoch 3/300  Loss: 1.0836  Accuracy: 0.6100  Time: 73.02s
Epoch 4/300  Loss: 0.9619  Accuracy: 0.6572  Time: 73.40s
Epoch 5/300  Loss: 0.8729  Accuracy: 0.6878  Time: 73.25s
Epoch 6/300  Loss: 0.7973  Accuracy: 0.7154  Time: 73.28s
Epoch 7/300  Loss: 0.7484  Accuracy: 0.7327  Time: 73.16s
Epoch 8/300  Loss: 0.6915  Accuracy: 0.7550  Time: 73.19s
Epoch 9/300  Loss: 0.6411  Accuracy: 0.7755  Time: 73.47s
Epoch 10/300  Loss: 0.6024  Accuracy: 0.7901  Time: 73.04s
Epoch 11/300  Loss: 0.5563  Accuracy: 0.8074  Time: 73.12s
Epoch 12/300  Loss: 0.5252  Accuracy: 0.8184  Time: 73.20s
Epoch 13/300  Loss: 0.4894  Accuracy: 0.8329  Time: 73.40s
Epoch 14/300  Loss: 0.4573  Accuracy: 0.8415  Time: 73.42s
Epoch 15/300  Loss: 0.4270  Accuracy: 0.8532  Time: 72.96s
Epoch 16/300  Loss: 0.3971  Accuracy: 0.8637  Time: 72.94s
Epoch 17/300  Loss: 0.3784  Accuracy: 0.8685  Time: 72.83s
Epoch 

KeyboardInterrupt: 