In [18]:
# We start by importing all the required modules from PyTorch and other libraries.
# - `torch`: The main PyTorch library.
# - `torch.nn`: Contains building blocks for neural networks (layers, loss functions).
# - `torch.optim`: Provides optimization algorithms (like Adam, SGD).
# - `torchvision`: Offers popular datasets, model architectures, and image transformations.
# - `torchvision.transforms`: Contains common image transformations.
# - `torch.utils.data.DataLoader`: Handles batching and shuffling of data.
# - `tqdm`: A utility for creating smart progress bars for loops.
# - `torch.nn.functional as F`: Provides functional interfaces for some nn operations (though we mostly use nn.Module layers here).

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm  # For progress bars
import torch.nn.functional as F # Often used for activation functions if not defined as layers

print("Libraries imported successfully.")
# Check PyTorch version and CUDA availability
print(f"PyTorch Version: {torch.__version__}")
print(f"Torchvision Version: {torchvision.__version__}")

Libraries imported successfully.
PyTorch Version: 2.6.0
Torchvision Version: 0.21.0


In [20]:
# Configuration and Hyperparameters
# Define key parameters for the dataset, model, and training process.
# - `BATCH_SIZE`: How many images are processed in one go. Affects memory usage and training dynamics.
# - `LEARNING_RATE`: Controls how much the model weights are adjusted during optimization.
# - `NUM_EPOCHS`: How many times the entire training dataset is passed through the model.
# - `NUM_CLASSES`: CIFAR-100 has 100 distinct image categories.
# - `DATA_DIR`: Where to download and store the CIFAR-100 dataset locally.
# - `DEVICE`: Automatically select GPU ('cuda') if available, otherwise use CPU ('cpu'). Training is much faster on GPU.

# %%
BATCH_SIZE = 64          # Number of images per batch
LEARNING_RATE = 0.001    # Learning rate for the optimizer
NUM_EPOCHS = 20          # Number of times to iterate over the entire dataset
NUM_CLASSES = 100        # CIFAR-100 has 100 classes
DATA_DIR = './data_cifar100' # Directory to store dataset

# Determine the device to run the training on (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Configuration:")
print(f"  Batch Size: {BATCH_SIZE}")
print(f"  Learning Rate: {LEARNING_RATE}")
print(f"  Number of Epochs: {NUM_EPOCHS}")
print(f"  Number of Classes: {NUM_CLASSES}")
print(f"  Data Directory: {DATA_DIR}")
print(f"  Using device: {device}")

Configuration:
  Batch Size: 64
  Learning Rate: 0.001
  Number of Epochs: 20
  Number of Classes: 100
  Data Directory: ./data_cifar100
  Using device: cpu


In [22]:
# %% [markdown]
# ## Cell 3: Define Data Transformations
# Specify the preprocessing steps to apply to each image.
# - `transforms.ToTensor()`: Converts PIL Images (or NumPy arrays) with pixel values in [0, 255] range to PyTorch Tensors with values in [0.0, 1.0]. It also rearranges dimensions from HxWxC to CxHxW (Channels first).
# - `transforms.Normalize()`: Normalizes tensor image channels using specified mean and standard deviation. This helps stabilize training by centering data around zero. We use (0.5, 0.5, 0.5) for both mean and std to scale pixel values from [0, 1] to [-1, 1]. Other common choices are pre-calculated means/stds specific to CIFAR-100.
# - `transforms.Compose()`: Chains multiple transformations together.

# %%
# Define the sequence of transformations for the input images
transform = transforms.Compose([
    transforms.ToTensor(), # Convert image to PyTorch Tensor (scales to [0, 1])
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # Normalize to [-1, 1] range
    # Alternative: Use pre-calculated CIFAR-100 means/stds
    # transforms.Normalize(mean=[0.5071, 0.4867, 0.4408], std=[0.2675, 0.2565, 0.2761])
])

print("Data transformations defined:")
print(transform)

Data transformations defined:
Compose(
    ToTensor()
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
)


In [24]:
# %% [markdown]
# ## Cell 4: Load Training Dataset
# Download (if needed) and load the CIFAR-100 training set using `torchvision.datasets`.
# - `root`: Specifies the directory to store the data.
# - `train=True`: Indicates that we want the training split of the dataset.
# - `download=True`: Allows downloading the dataset if it's not found in the `root` directory.
# - `transform`: Applies the predefined transformations to each image.

# %%
print("Loading Training Dataset...")
train_dataset = torchvision.datasets.CIFAR100(
    root=DATA_DIR,
    train=True,
    download=True,
    transform=transform
)
print(f"Training dataset loaded. Number of samples: {len(train_dataset)}")
print(f"Number of classes: {len(train_dataset.classes)}")
# print(f"Class names sample: {train_dataset.classes[:10]}") # Uncomment to see some class names

Loading Training Dataset...


100.0%


Training dataset loaded. Number of samples: 50000
Number of classes: 100


In [28]:
# %% [markdown]
# ## Cell 6: Create DataLoaders
# Wrap the datasets in `DataLoader` objects. This handles:
# - **Batching:** Grouping data samples into batches of size `BATCH_SIZE`.
# - **Shuffling:** Randomly shuffling the training data at the beginning of each epoch to improve generalization (`shuffle=True` for train_loader). Shuffling is typically not needed for the test set.
# - **Parallel Loading:** Using multiple worker processes (`num_workers`) to load data in the background, speeding up training.

# %%
print("Creating DataLoaders...")
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,  # Shuffle training data each epoch
    num_workers=2  # Adjust based on your system's capability
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False, # No need to shuffle test data
    num_workers=2
)

print(f"DataLoaders created.")
print(f"  Number of training batches: {len(train_loader)}")
print(f"  Number of testing batches: {len(test_loader)}")

Creating DataLoaders...
DataLoaders created.
  Number of training batches: 782
  Number of testing batches: 157


In [30]:
# Inspect a Batch 
# It's good practice to check the output of the DataLoader. We fetch one batch and print the shape of the images and labels tensors to ensure they match expectations.

# %%
print("Inspecting a sample batch...")
# Get one batch of training data
dataiter = iter(train_loader)
images, labels = next(dataiter)

print(f"  Images batch shape: {images.shape}")
# Expected: [BATCH_SIZE, 3 (Channels), 32 (Height), 32 (Width)]

print(f"  Labels batch shape: {labels.shape}")
# Expected: [BATCH_SIZE]

print(f"  Example label value: {labels[0].item()}") # Print the label of the first image in the batch
print(f"  Image tensor min value: {images.min():.2f}") # Check normalization effect
print(f"  Image tensor max value: {images.max():.2f}") # Check normalization effect

Inspecting a sample batch...
  Images batch shape: torch.Size([64, 3, 32, 32])
  Labels batch shape: torch.Size([64])
  Example label value: 69
  Image tensor min value: -1.00
  Image tensor max value: 1.00


In [32]:
# %% [markdown]
# ## Cell 8: Define the CNN Model Architecture
# Define the structure of our Convolutional Neural Network using `nn.Module`.
# The architecture follows the proposal: Conv -> ReLU -> Pool -> Conv -> ReLU -> Pool -> Conv -> ReLU -> Pool -> Flatten -> FC -> ReLU -> Dropout -> FC.
# - `nn.Conv2d`: Applies 2D convolution. `padding=1` with `kernel_size=3` preserves spatial dimensions before pooling.
# - `nn.ReLU`: Rectified Linear Unit activation function introduces non-linearity.
# - `nn.MaxPool2d`: Reduces spatial dimensions (height, width) by taking the max value in a window, helping to make the model more robust to variations in feature positions and reducing computation. `kernel_size=2, stride=2` halves the dimensions.
# - `nn.Flatten`: Converts the 3D feature map (Channels x Height x Width) from the conv layers into a 1D vector suitable for Fully Connected layers.
# - `nn.Linear`: Applies a linear transformation (fully connected layer). The input size `128 * 4 * 4` is calculated based on the output shape after the last pooling layer (128 channels, 4x4 spatial size).
# - `nn.Dropout`: Randomly sets a fraction (`p=0.5`) of input units to 0 during training, acting as a regularization technique to prevent overfitting.
# - **Note on Softmax:** We don't add a `nn.Softmax` layer at the end because `nn.CrossEntropyLoss` (used later) internally computes Softmax for numerical stability and efficiency. The model outputs raw scores (logits).

# %%
class CIFAR100_CNN(nn.Module):
    """Convolutional Neural Network architecture for CIFAR-100."""
    def __init__(self, num_classes=NUM_CLASSES):
        super(CIFAR100_CNN, self).__init__()

        # --- Convolutional Block 1 ---
        # Input: [BATCH_SIZE, 3, 32, 32]
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        # Output: [BATCH_SIZE, 32, 32, 32] (padding preserves size)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        # Output: [BATCH_SIZE, 32, 16, 16] (pooling halves size)

        # --- Convolutional Block 2 ---
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        # Output: [BATCH_SIZE, 64, 16, 16]
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        # Output: [BATCH_SIZE, 64, 8, 8]

        # --- Convolutional Block 3 ---
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        # Output: [BATCH_SIZE, 128, 8, 8]
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        # Output: [BATCH_SIZE, 128, 4, 4]

        # --- Flatten Layer ---
        self.flatten = nn.Flatten()
        # Output: [BATCH_SIZE, 128 * 4 * 4] = [BATCH_SIZE, 2048]

        # --- Fully Connected Block ---
        # Calculate flattened features size: channels * height * width = 128 * 4 * 4 = 2048
        self.fc1 = nn.Linear(in_features=128 * 4 * 4, out_features=512)
        # Output: [BATCH_SIZE, 512]
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5) # Dropout layer for regularization
        self.fc2 = nn.Linear(in_features=512, out_features=num_classes)
        # Output: [BATCH_SIZE, NUM_CLASSES] (raw logits)

    def forward(self, x):
        """Defines the forward pass of the network."""
        # Pass through convolutional blocks
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.relu3(self.conv3(x)))

        # Flatten the output
        x = self.flatten(x)

        # Pass through fully connected layers
        x = self.relu4(self.fc1(x))
        x = self.dropout(x) # Apply dropout during training
        x = self.fc2(x)     # Final output logits
        return x

print("CNN Model Class Defined.")

CNN Model Class Defined.


In [34]:
# %% [markdown]
# ## Cell 9: Instantiate Model and Move to Device
# Create an instance of our defined `CIFAR100_CNN` model.
# Then, move the model's parameters and buffers to the selected `device` (GPU or CPU). This ensures computations happen on the desired hardware.

# %%
model = CIFAR100_CNN(num_classes=NUM_CLASSES).to(device)

# Print the model structure (optional, but useful)
print("Model instantiated and moved to device:")
print(model)

# You can also count parameters (optional)
# num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(f"Total trainable parameters: {num_params:,}")

Model instantiated and moved to device:
CIFAR100_CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU()
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (relu4): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out_features=100, bias=True)
)


In [36]:
# %% [markdown]
# ## Cell 10: Define Loss Function
# Choose the loss function (or criterion) to measure the difference between the model's predictions and the actual labels.
# - `nn.CrossEntropyLoss`: This is the standard loss function for multi-class classification problems. It combines `nn.LogSoftmax` and `nn.NLLLoss` (Negative Log Likelihood Loss) in one class, making it numerically stable and convenient. It expects raw logits from the model (output of the last linear layer) and integer class labels.

# %%
criterion = nn.CrossEntropyLoss()
print(f"Loss function defined: {criterion}")

Loss function defined: CrossEntropyLoss()


In [38]:
# %% [markdown]
# ## Cell 11: Define Optimizer
# Select the optimization algorithm that will update the model's weights based on the gradients calculated during backpropagation.
# - `optim.Adam`: A popular and generally effective adaptive learning rate optimization algorithm. We pass it the model's parameters (`model.parameters()`) which need to be updated, and the `LEARNING_RATE`.

# %%
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
print(f"Optimizer defined: {optimizer}")

Optimizer defined: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)


In [40]:
# %% [markdown]
# ## Cell 12: Training and Validation Loop
# This is the core part where the model learns. We iterate for `NUM_EPOCHS`.
# In each epoch:
# 1.  **Training Phase:**
#     - Set the model to training mode (`model.train()`). This enables layers like Dropout.
#     - Iterate through batches from the `train_loader`.
#     - Move data to the `device`.
#     - **Zero Gradients:** Clear gradients from the previous batch (`optimizer.zero_grad()`).
#     - **Forward Pass:** Get model predictions (`outputs = model(inputs)`).
#     - **Calculate Loss:** Compute the loss using `criterion`.
#     - **Backward Pass:** Calculate gradients (`loss.backward()`).
#     - **Optimize:** Update model weights (`optimizer.step()`).
#     - Track loss and accuracy.
# 2.  **Validation Phase:**
#     - Set the model to evaluation mode (`model.eval()`). This disables layers like Dropout and adjusts layers like BatchNorm.
#     - Disable gradient computation (`with torch.no_grad():`) for efficiency, as we don't need gradients during evaluation.
#     - Iterate through batches from the `test_loader`.
#     - Calculate loss and accuracy on the test set.
# 3.  Print summary statistics for the epoch.

# %%
print(f"\nStarting training for {NUM_EPOCHS} epochs on {device}...")

# Loop over the dataset multiple times
for epoch in range(NUM_EPOCHS):

    # --- Training Phase ---
    model.train()  # Set model to training mode (enables dropout, etc.)
    running_loss_train = 0.0
    correct_train = 0
    total_train = 0

    # Use tqdm for a progress bar over the training batches
    train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Training]")

    for inputs, labels in train_pbar:
        # Move inputs and labels to the configured device (GPU/CPU)
        inputs, labels = inputs.to(device), labels.to(device)

        # --- Core Training Steps ---
        # 1. Zero the parameter gradients from previous iteration
        optimizer.zero_grad()

        # 2. Forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(inputs)

        # 3. Calculate the loss
        loss = criterion(outputs, labels)

        # 4. Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # 5. Perform a single optimization step (parameter update)
        optimizer.step()
        # --- End Core Training Steps ---

        # Update statistics for the current batch
        running_loss_train += loss.item() * inputs.size(0) # loss.item() is avg loss per item in batch
        _, predicted = torch.max(outputs.data, 1) # Get the index of the max log-probability
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

        # Update progress bar description dynamically
        current_acc_train = 100. * correct_train / total_train
        train_pbar.set_postfix({'Loss': f"{loss.item():.4f}", 'Acc': f"{current_acc_train:.2f}%"})

    # Calculate average loss and accuracy for the epoch (training)
    epoch_loss_train = running_loss_train / len(train_loader.dataset)
    epoch_acc_train = 100. * correct_train / total_train

    # --- Validation Phase ---
    model.eval()  # Set model to evaluation mode (disables dropout, etc.)
    running_loss_val = 0.0
    correct_val = 0
    total_val = 0

    # Disable gradient calculation during validation for efficiency
    with torch.no_grad():
        val_pbar = tqdm(test_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Validation]")
        for inputs, labels in val_pbar:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)

            # Calculate loss
            loss = criterion(outputs, labels)

            # Update statistics
            running_loss_val += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

            # Update progress bar
            current_acc_val = 100. * correct_val / total_val
            val_pbar.set_postfix({'Loss': f"{loss.item():.4f}", 'Acc': f"{current_acc_val:.2f}%"})

    # Calculate average loss and accuracy for the epoch (validation)
    epoch_loss_val = running_loss_val / len(test_loader.dataset)
    epoch_acc_val = 100. * correct_val / total_val

    # Print summary for the epoch
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS} Completed:")
    print(f"  Train Loss: {epoch_loss_train:.4f}, Train Accuracy: {epoch_acc_train:.2f}%")
    print(f"  Validation Loss: {epoch_loss_val:.4f}, Validation Accuracy: {epoch_acc_val:.2f}%")
    print("-" * 50)

print("Training finished!")


Starting training for 20 epochs on cpu...


Epoch 1/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.98it/s, Loss=3.6262, Acc
Epoch 1/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.90it/s, Loss=2.9946, A



Epoch 1/20 Completed:
  Train Loss: 3.8291, Train Accuracy: 10.87%
  Validation Loss: 3.2081, Validation Accuracy: 22.04%
--------------------------------------------------


Epoch 2/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.81it/s, Loss=3.1060, Acc
Epoch 2/20 [Validation]: 100%|█| 157/157 [00:20<00:00,  7.60it/s, Loss=2.8664, A



Epoch 2/20 Completed:
  Train Loss: 3.1451, Train Accuracy: 22.59%
  Validation Loss: 2.7949, Validation Accuracy: 30.69%
--------------------------------------------------


Epoch 3/20 [Training]: 100%|█| 782/782 [00:54<00:00, 14.46it/s, Loss=2.2586, Acc
Epoch 3/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.91it/s, Loss=2.6217, A



Epoch 3/20 Completed:
  Train Loss: 2.8235, Train Accuracy: 28.75%
  Validation Loss: 2.5553, Validation Accuracy: 35.43%
--------------------------------------------------


Epoch 4/20 [Training]: 100%|█| 782/782 [00:53<00:00, 14.58it/s, Loss=2.4450, Acc
Epoch 4/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.85it/s, Loss=2.3826, A



Epoch 4/20 Completed:
  Train Loss: 2.6153, Train Accuracy: 32.86%
  Validation Loss: 2.4498, Validation Accuracy: 37.52%
--------------------------------------------------


Epoch 5/20 [Training]: 100%|█| 782/782 [00:57<00:00, 13.70it/s, Loss=2.1776, Acc
Epoch 5/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.87it/s, Loss=2.3859, A



Epoch 5/20 Completed:
  Train Loss: 2.4482, Train Accuracy: 36.05%
  Validation Loss: 2.3352, Validation Accuracy: 39.49%
--------------------------------------------------


Epoch 6/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.91it/s, Loss=2.7451, Acc
Epoch 6/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.86it/s, Loss=2.0883, A



Epoch 6/20 Completed:
  Train Loss: 2.3075, Train Accuracy: 39.13%
  Validation Loss: 2.2768, Validation Accuracy: 40.66%
--------------------------------------------------


Epoch 7/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.95it/s, Loss=1.5589, Acc
Epoch 7/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.89it/s, Loss=2.1416, A



Epoch 7/20 Completed:
  Train Loss: 2.1865, Train Accuracy: 41.89%
  Validation Loss: 2.1924, Validation Accuracy: 42.35%
--------------------------------------------------


Epoch 8/20 [Training]: 100%|█| 782/782 [00:51<00:00, 15.07it/s, Loss=1.5151, Acc
Epoch 8/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.90it/s, Loss=2.2093, A



Epoch 8/20 Completed:
  Train Loss: 2.0950, Train Accuracy: 43.94%
  Validation Loss: 2.1718, Validation Accuracy: 43.61%
--------------------------------------------------


Epoch 9/20 [Training]: 100%|█| 782/782 [00:52<00:00, 15.00it/s, Loss=2.2427, Acc
Epoch 9/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.84it/s, Loss=2.1499, A



Epoch 9/20 Completed:
  Train Loss: 2.0093, Train Accuracy: 45.37%
  Validation Loss: 2.1659, Validation Accuracy: 43.25%
--------------------------------------------------


Epoch 10/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.95it/s, Loss=1.2119, Ac
Epoch 10/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.90it/s, Loss=2.1258, 



Epoch 10/20 Completed:
  Train Loss: 1.9165, Train Accuracy: 47.64%
  Validation Loss: 2.1480, Validation Accuracy: 44.12%
--------------------------------------------------


Epoch 11/20 [Training]: 100%|█| 782/782 [00:52<00:00, 15.02it/s, Loss=1.6145, Ac
Epoch 11/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.83it/s, Loss=2.2520, 



Epoch 11/20 Completed:
  Train Loss: 1.8379, Train Accuracy: 49.10%
  Validation Loss: 2.1662, Validation Accuracy: 43.71%
--------------------------------------------------


Epoch 12/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.97it/s, Loss=1.9742, Ac
Epoch 12/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.89it/s, Loss=2.4174, 



Epoch 12/20 Completed:
  Train Loss: 1.7674, Train Accuracy: 50.88%
  Validation Loss: 2.1496, Validation Accuracy: 44.60%
--------------------------------------------------


Epoch 13/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.99it/s, Loss=1.3338, Ac
Epoch 13/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.90it/s, Loss=2.1391, 



Epoch 13/20 Completed:
  Train Loss: 1.7015, Train Accuracy: 52.17%
  Validation Loss: 2.1619, Validation Accuracy: 44.26%
--------------------------------------------------


Epoch 14/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.96it/s, Loss=1.4059, Ac
Epoch 14/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.87it/s, Loss=1.9508, 



Epoch 14/20 Completed:
  Train Loss: 1.6448, Train Accuracy: 53.34%
  Validation Loss: 2.1415, Validation Accuracy: 44.77%
--------------------------------------------------


Epoch 15/20 [Training]: 100%|█| 782/782 [19:48<00:00,  1.52s/it, Loss=1.8608, Ac
Epoch 15/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.77it/s, Loss=1.8772, 



Epoch 15/20 Completed:
  Train Loss: 1.5756, Train Accuracy: 55.02%
  Validation Loss: 2.2057, Validation Accuracy: 44.50%
--------------------------------------------------


Epoch 16/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.82it/s, Loss=1.5751, Ac
Epoch 16/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.84it/s, Loss=2.0070, 



Epoch 16/20 Completed:
  Train Loss: 1.5159, Train Accuracy: 56.17%
  Validation Loss: 2.1978, Validation Accuracy: 45.03%
--------------------------------------------------


Epoch 17/20 [Training]: 100%|█| 782/782 [00:52<00:00, 15.01it/s, Loss=1.3355, Ac
Epoch 17/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.93it/s, Loss=1.8481, 



Epoch 17/20 Completed:
  Train Loss: 1.4631, Train Accuracy: 57.62%
  Validation Loss: 2.2239, Validation Accuracy: 44.81%
--------------------------------------------------


Epoch 18/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.97it/s, Loss=1.4990, Ac
Epoch 18/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.97it/s, Loss=2.2168, 



Epoch 18/20 Completed:
  Train Loss: 1.4256, Train Accuracy: 58.47%
  Validation Loss: 2.2720, Validation Accuracy: 44.23%
--------------------------------------------------


Epoch 19/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.96it/s, Loss=1.4812, Ac
Epoch 19/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  8.89it/s, Loss=2.1529, 



Epoch 19/20 Completed:
  Train Loss: 1.3757, Train Accuracy: 59.45%
  Validation Loss: 2.2592, Validation Accuracy: 45.04%
--------------------------------------------------


Epoch 20/20 [Training]: 100%|█| 782/782 [00:52<00:00, 14.88it/s, Loss=1.0136, Ac
Epoch 20/20 [Validation]: 100%|█| 157/157 [00:17<00:00,  9.01it/s, Loss=1.7050, 


Epoch 20/20 Completed:
  Train Loss: 1.3217, Train Accuracy: 60.84%
  Validation Loss: 2.3225, Validation Accuracy: 44.09%
--------------------------------------------------
Training finished!





In [42]:
# %% [markdown]
# ## Cell 13: Save the Trained Model (Optional)
# After training, you might want to save the model's learned parameters (weights and biases) for later use (e.g., inference or further training).
# We typically save the `state_dict`, which is a Python dictionary object that maps each layer to its parameter tensor.

# %%
# Define path to save the model
model_save_path = "cifar100_cnn_model.pth"

# Save the model's state dictionary
# torch.save(model.state_dict(), model_save_path)

# print(f"Model state dictionary saved to {model_save_path}")

# To load the model later:
# model_loaded = CIFAR100_CNN(num_classes=NUM_CLASSES) # Create instance first
# model_loaded.load_state_dict(torch.load(model_save_path))
# model_loaded.to(device) # Move to device
# model_loaded.eval() # Set to evaluation mode if using for inference
# print("Model loaded successfully (Example).")