<a href="https://colab.research.google.com/github/myllanes/Introduction-to-Deep-Learning/blob/main/HW6_1_RES_18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Michael Yllanes
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
from tqdm import tqdm
from torchvision.models import resnet18, ResNet18_Weights

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Hyperparameters
num_classes = 100     # 100 classes for CIFAR-100
batch_size = 64      # batch size
num_epochs = 30       # Training epochs
learning_rate = 0.001
weight_decay = 1e-4   # Weight decay

# Enhanced data for CIFAR-100
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))  # CIFAR-100 stats
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

# CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size,
                         shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,
                         shuffle=False, num_workers=4, pin_memory=True)

class ResNet18_CIFAR100(nn.Module):
    """Modified ResNet-18 for CIFAR-100 classification"""
    def __init__(self, pretrained=True):
        super().__init__()

        # ResNet-18
        if pretrained:
            self.resnet = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
        else:
            self.resnet = resnet18(weights=None)

        # onvolution
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.resnet.maxpool = nn.Identity()


        in_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.3),  # dropout for regularization
            nn.Linear(in_features, num_classes)
        )

    def forward(self, x):
        return self.resnet(x)

# Initialize model
model = ResNet18_CIFAR100(pretrained=True).to(device)

# Loss function with label smoothing
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

# Optimizer with weight decay
optimizer = torch.optim.AdamW(
    [
        {'params': [p for n, p in model.named_parameters()
                   if 'fc' not in n and p.requires_grad], 'lr': learning_rate/10},
        {'params': model.resnet.fc.parameters(), 'lr': learning_rate}
    ],
    weight_decay=weight_decay
)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0

    for images, labels in tqdm(loader, desc="Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Metrics
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        total_loss += loss.item() * labels.size(0)

    avg_loss = total_loss / total_samples
    accuracy = 100 * total_correct / total_samples
    return avg_loss, accuracy

def evaluate(model, loader, criterion):
    model.eval()
    total_loss, total_correct, total_samples = 0, 0, 0

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
            total_loss += loss.item() * labels.size(0)

    avg_loss = total_loss / total_samples
    accuracy = 100 * total_correct / total_samples
    return avg_loss, accuracy

# Training loop with validation and timing
best_acc = 0.0
total_training_time = 0.0

print("\nStarting training...")
overall_start_time = time.time()

for epoch in range(num_epochs):
    epoch_start_time = time.time()

    print(f"\nEpoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, test_loader, criterion)

    scheduler.step()

    # Calculate epoch time
    epoch_time = time.time() - epoch_start_time
    total_training_time += epoch_time

    # Print metrics
    print(f"Time: {epoch_time:.2f}s")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

# Calculate total training time
total_time = time.time() - overall_start_time


# Print final results and timing information
print(f"Total Training Time: {total_time:.2f} seconds")
print(f"Average Time per Epoch: {total_time/num_epochs:.2f} seconds")

Using device: cuda

Starting training...

Epoch 1/10


Training: 100%|██████████| 391/391 [00:17<00:00, 21.94it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 75.78it/s]


Time: 18.87s
Train Loss: 3.5005 | Train Acc: 23.20%
Val Loss: 2.5645 | Val Acc: 45.18%

Epoch 2/10


Training: 100%|██████████| 391/391 [00:17<00:00, 21.84it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 75.44it/s]


Time: 18.96s
Train Loss: 2.5964 | Train Acc: 44.65%
Val Loss: 2.1821 | Val Acc: 56.73%

Epoch 3/10


Training: 100%|██████████| 391/391 [00:18<00:00, 21.68it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 75.17it/s]


Time: 19.09s
Train Loss: 2.2820 | Train Acc: 53.51%
Val Loss: 2.0465 | Val Acc: 60.62%

Epoch 4/10


Training: 100%|██████████| 391/391 [00:17<00:00, 21.84it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 75.53it/s]


Time: 18.96s
Train Loss: 2.0972 | Train Acc: 59.21%
Val Loss: 1.9244 | Val Acc: 64.36%

Epoch 5/10


Training: 100%|██████████| 391/391 [00:17<00:00, 22.03it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 76.78it/s]


Time: 18.78s
Train Loss: 1.9527 | Train Acc: 63.86%
Val Loss: 1.8626 | Val Acc: 66.71%

Epoch 6/10


Training: 100%|██████████| 391/391 [00:17<00:00, 22.07it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 77.20it/s]


Time: 18.75s
Train Loss: 1.8446 | Train Acc: 67.20%
Val Loss: 1.8111 | Val Acc: 68.18%

Epoch 7/10


Training: 100%|██████████| 391/391 [00:17<00:00, 22.03it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 76.91it/s]


Time: 18.79s
Train Loss: 1.7622 | Train Acc: 69.97%
Val Loss: 1.7685 | Val Acc: 69.28%

Epoch 8/10


Training: 100%|██████████| 391/391 [00:17<00:00, 21.94it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 74.49it/s]


Time: 18.89s
Train Loss: 1.6968 | Train Acc: 72.39%
Val Loss: 1.7407 | Val Acc: 70.47%

Epoch 9/10


Training: 100%|██████████| 391/391 [00:17<00:00, 21.87it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 76.09it/s]


Time: 18.92s
Train Loss: 1.6501 | Train Acc: 73.79%
Val Loss: 1.7290 | Val Acc: 70.78%

Epoch 10/10


Training: 100%|██████████| 391/391 [00:17<00:00, 21.90it/s]
Evaluating: 100%|██████████| 79/79 [00:01<00:00, 74.08it/s]

Time: 18.92s
Train Loss: 1.6318 | Train Acc: 74.48%
Val Loss: 1.7241 | Val Acc: 70.97%
Total Training Time: 188.92 seconds
Average Time per Epoch: 18.89 seconds





In [3]:
!pip install torchinfo
from torchinfo import summary
import time

# Using torchinfo
# Model summary
summary(model, input_size=(batch_size, 3, 32, 32),
       verbose=1, col_names=["input_size", "output_size", "num_params", "mult_adds"])

# Timing
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 100
timings = []

for _ in range(10):
    _ = model(torch.randn(batch_size, 3, 32, 32).to(device))

# Measurement
with torch.no_grad():
    for _ in range(repetitions):
        inputs = torch.randn(batch_size, 3, 32, 32).to(device)
        starter.record()
        _ = model(inputs)
        ender.record()
        torch.cuda.synchronize()
        timings.append(starter.elapsed_time(ender))

avg_time = sum(timings) / repetitions
print(f"Average forward pass time: {avg_time:.2f}ms")
print(f"Throughput: {1000/(avg_time)*batch_size:.0f} samples/sec")

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
Layer (type:depth-idx)                        Input Shape               Output Shape              Param #                   Mult-Adds
ResNet18_CIFAR100                             [64, 3, 32, 32]           [64, 100]                 --                        --
├─ResNet: 1-1                                 [64, 3, 32, 32]           [64, 100]                 --                        --
│    └─Conv2d: 2-1                            [64, 3, 32, 32]           [64, 64, 32, 32]          1,728                     113,246,208
│    └─BatchNorm2d: 2-2                       [64, 64, 32, 32]          [64, 64, 32, 32]          128                       8,192
│    └─ReLU: 2-3                              [64, 64, 32, 32]          [64, 64, 32, 32]          --                        --
│