In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

# Install Dependencies

! pip install -U lightning

# Organize Imports

In [None]:
from pathlib import Path

import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.optim.lr_scheduler import CosineAnnealingLR

# Orginize Path

In [None]:
PATH = Path('../data')
model_path = PATH / 'models' / 'cifar10_cnn_classifier'
model_path.mkdir(parents=True, exist_ok=True)
MNIST_dir = PATH / 'cifar'
MNIST_dir.mkdir(parents=True, exist_ok=True)

# Initialize Device and Workers

In [None]:
import os
 
workers = os.cpu_count()
print("Number of CPUs in the system:", workers)

In [None]:
if torch.cuda.is_available():
    device = 'gpu'  
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu',

## Initialize Static Parameters

In [None]:
# Hyperparameters
BATCH_SIZE = 64
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 128

# Initialize the Model

In [None]:
class Model(nn.Module):
    """CIFAR10 classifier model"""

    def __init__(self, num_classes=10):
        super().__init__()
        # Convolutional feature extractor
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # 3x32x32 -> 32x32x32
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                             # 32x32x32 -> 32x16x16

            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # 32x16x16 -> 64x16x16
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),                             # 64x16x16 -> 64x8x8

            nn.Conv2d(64, 128, kernel_size=3, padding=1), # 64x8x8 -> 128x8x8
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2)                              # 128x8x8 -> 128x4x4
        )
        # Global average pooling layer reduces each feature map to a single value.
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # Classifier: dropout followed by a fully connected layer.
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        # Global pooling: output shape becomes (batch_size, 128, 1, 1)
        x = self.avgpool(x)
        # Flatten the output: shape becomes (batch_size, 128)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        
        return x

In [None]:
class CIFAR10LitModule(L.LightningModule):
    def __init__(self, model, lr=1e-3, weight_decay=1e-4, max_epochs=128):
        super(CIFAR10LitModule, self).__init__()
        self.save_hyperparameters()
        self.model = model

    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('train_loss', loss, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_epoch=True, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('test_loss', loss)
        self.log('test_acc', acc)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(),
            lr=self.hparams.lr,
            weight_decay=self.hparams.weight_decay
        )
        scheduler = CosineAnnealingLR(optimizer, T_max=self.hparams.max_epochs)
        
        return [optimizer], [scheduler]

# Prepare Dataset

In [None]:
train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ])
val_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ])

In [None]:
def get_data_loaders(batch_size):
    train_dataset = datasets.CIFAR10(root='./data/cifar', train=True, transform=train_transform, download=True)
    test_dataset = datasets.CIFAR10(root='./data/cifar', train=False, transform=val_transform, download=True)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=workers-1,
        persistent_workers=True,
    )
    test_loader = DataLoader(
        test_dataset, 
        batch_size=batch_size, 
        shuffle=False,
        num_workers=workers-1,
        persistent_workers=True,
    )

    return train_loader, test_loader

In [None]:
# Prepare data loaders
train_loader, test_loader = get_data_loaders(BATCH_SIZE)

# Checkpointing the Model

In [None]:
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    save_top_k=1,
    mode='min',
    filename=str(model_path / 'best-checkpoint'),
    verbose=True
)

last_checkpoint_callback = ModelCheckpoint(
    save_last=True,
    filename=str(model_path / 'last-checkpoint'),
    verbose=True
)

# Initiate Training

In [None]:
net = Model(num_classes=10)

# Model training
model = CIFAR10LitModule(net, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, max_epochs=EPOCHS)
trainer = L.Trainer(
    max_epochs=EPOCHS,
    callbacks=[checkpoint_callback, last_checkpoint_callback],
    accelerator=device,
    devices=1,
)
trainer.fit(
    model, 
    train_loader, 
    test_loader
)

# Visualize Layer

In [None]:
def visualize_convnet_weights(model: torch.nn.Module):
    """
    Visualize the weight distributions of all trainable parameters in the model.
    For each parameter (whose name includes "weight" and requires gradients), a histogram is displayed.
    
    Args:
        model (torch.nn.Module): The convolutional network model whose weights will be visualized.
    """
    # Iterate over all named parameters
    for name, param in model.named_parameters():
        if "weight" in name and param.requires_grad:
            # Detach the parameter, move it to CPU, and flatten to a 1D array.
            weights = param.detach().cpu().numpy().flatten()
            
            # Create a new figure for each parameter.
            plt.figure(figsize=(8, 6))
            plt.hist(weights, bins=50, color='blue', alpha=0.7)
            plt.title(f"Weight Distribution for Layer: {name}")
            plt.xlabel("Weight Value")
            plt.ylabel("Frequency")
            plt.grid(True)
            plt.show()

In [None]:
visualize_convnet_weights(model.model)

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

def visualize_convnet_kernels(model: nn.Module, conv_layer: nn.Module):
    """
    Visualize the kernels (weights) of a convolutional layer in a ConvNet.

    Args:
        model (torch.nn.Module): The convolutional network.
        layer_name (str): The exact name of the convolution layer (as in model.named_modules())
                          whose kernels you want to visualize.
    """
    # Get the kernel weights: shape (out_channels, in_channels, kernel_h, kernel_w)
    kernels = conv_layer.weight.data.clone().cpu()  # copy for visualization
    num_kernels = kernels.shape[0]
    
    # Setup a grid for visualization.
    grid_cols = int(np.ceil(np.sqrt(num_kernels)))
    grid_rows = int(np.ceil(num_kernels / grid_cols))
    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(grid_cols * 2, grid_rows * 2))
    axes = axes.flatten()

    for i in range(num_kernels):
        kernel = kernels[i]  # shape: (in_channels, kH, kW)
        # If the kernel has 3 input channels, assume it's an RGB kernel.
        if kernel.shape[0] == 3:
            # Permute to (kH, kW, 3) for visualization.
            kernel_img = kernel.permute(1, 2, 0)
        else:
            # Otherwise, average across channels to get a single-channel image.
            kernel_img = kernel.mean(dim=0, keepdim=True).squeeze(0)
        
        # Normalize the kernel values to [0, 1] for better visualization.
        kernel_img = kernel_img - kernel_img.min()
        if kernel_img.max() != 0:
            kernel_img = kernel_img / kernel_img.max()
        else:
            kernel_img = kernel_img

        # Display the kernel.
        if kernel_img.ndim == 3:
            axes[i].imshow(kernel_img.numpy())
        else:
            axes[i].imshow(kernel_img.numpy(), cmap='gray')
        axes[i].set_title(f"Kernel {i}")
        axes[i].axis('off')

    # Hide any extra subplots.
    for j in range(i + 1, len(axes)):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
model.model.features[8]

In [None]:
visualize_convnet_kernels(model.model, model.model.features[0])

In [None]:
visualize_convnet_kernels(model.model, model.model.features[4])

In [None]:
model.model.features

In [None]:
visualize_convnet_kernels(model.model, model.model.features[8])

## Analysis of the Vectors