Goal of this notebook is to improve previous attempts in a more structured way

In [42]:
import torch
import torch
from torch import nn

import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [43]:
import sys
sys.path.append('../src')


from helper_functions import train_step, test_step, accuracy_fn, print_train_time

In [44]:
seed = 213
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [45]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True)

In [46]:
class EnhancedModel(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        
        # Block 1: First set of convolutional layers
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1), 
            nn.ReLU(),
            nn.BatchNorm2d(hidden_units),  # Add batch normalization
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_units),
            nn.MaxPool2d(kernel_size=2, stride=2)  # Reduce spatial dimensions
        )
        
        # Block 2: Second set of convolutional layers
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units*2, 3, padding=1),  # Increase filter size
            nn.ReLU(),
            nn.BatchNorm2d(hidden_units*2),
            nn.Conv2d(hidden_units*2, hidden_units*2, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_units*2),
            nn.MaxPool2d(2)
        )
        
        # Block 3: Third set of convolutional layers
        self.block_3 = nn.Sequential(
            nn.Conv2d(hidden_units*2, hidden_units*4, 3, padding=1),  # Increase filter size again
            nn.ReLU(),
            nn.BatchNorm2d(hidden_units*4),
            nn.Conv2d(hidden_units*4, hidden_units*4, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(hidden_units*4),
            nn.MaxPool2d(2)
        )
        
        # Global Average Pooling to reduce the number of parameters
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Fully connected layer (classifier)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),  # Add dropout to prevent overfitting
            nn.Linear(in_features=hidden_units*4, out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.global_pool(x)  # Global average pooling
        x = self.classifier(x)
        return x


In [47]:
model = EnhancedModel(input_shape=3, 
    hidden_units=64, 
    output_shape=10).to(device)



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [48]:
from timeit import default_timer as timer
train_time_start_model_2 = timer()

# Train and test model 
epochs = 10
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=cinic_train, 
        model=model, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=cinic_test,
        model=model,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_model_2 = timer()
total_train_time_model_2 = print_train_time(start=train_time_start_model_2,
                                           end=train_time_end_model_2,
                                           device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 1.51780 | Train accuracy: 44.27%


 10%|█         | 1/10 [00:22<03:26, 22.94s/it]

Test loss: 1.33048 | Test accuracy: 52.33%

Epoch: 1
---------
Train loss: 1.18208 | Train accuracy: 56.83%


 20%|██        | 2/10 [00:45<03:03, 22.89s/it]

Test loss: 1.10877 | Test accuracy: 58.45%

Epoch: 2
---------
Train loss: 0.99605 | Train accuracy: 64.07%


 30%|███       | 3/10 [01:08<02:40, 22.96s/it]

Test loss: 1.03315 | Test accuracy: 62.85%

Epoch: 3
---------
Train loss: 0.88425 | Train accuracy: 68.36%


 40%|████      | 4/10 [01:31<02:17, 22.98s/it]

Test loss: 0.99222 | Test accuracy: 64.39%

Epoch: 4
---------
Train loss: 0.79697 | Train accuracy: 71.51%


 50%|█████     | 5/10 [01:54<01:54, 22.99s/it]

Test loss: 0.91307 | Test accuracy: 67.54%

Epoch: 5
---------
Train loss: 0.71317 | Train accuracy: 74.47%


 60%|██████    | 6/10 [02:17<01:31, 22.95s/it]

Test loss: 0.88322 | Test accuracy: 70.08%

Epoch: 6
---------
Train loss: 0.64298 | Train accuracy: 76.99%


 70%|███████   | 7/10 [02:40<01:08, 22.98s/it]

Test loss: 0.87215 | Test accuracy: 70.27%

Epoch: 7
---------
Train loss: 0.56530 | Train accuracy: 79.64%


 80%|████████  | 8/10 [03:03<00:45, 22.96s/it]

Test loss: 0.95184 | Test accuracy: 69.93%

Epoch: 8
---------
Train loss: 0.50247 | Train accuracy: 81.96%


 90%|█████████ | 9/10 [03:26<00:22, 22.92s/it]

Test loss: 0.98052 | Test accuracy: 69.89%

Epoch: 9
---------
Train loss: 0.43555 | Train accuracy: 84.35%


100%|██████████| 10/10 [03:49<00:00, 22.93s/it]

Test loss: 1.07100 | Test accuracy: 69.16%


Train time on cuda: 229.287 seconds





In [53]:
from torchinfo import summary

model = EnhancedModel(input_shape=3, 
    hidden_units=64, 
    output_shape=10).to(device)

summary(model, input_size=(64, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
EnhancedModel                            [64, 10]                  --
├─Sequential: 1-1                        [64, 64, 16, 16]          --
│    └─Conv2d: 2-1                       [64, 64, 32, 32]          1,792
│    └─ReLU: 2-2                         [64, 64, 32, 32]          --
│    └─BatchNorm2d: 2-3                  [64, 64, 32, 32]          128
│    └─Conv2d: 2-4                       [64, 64, 32, 32]          36,928
│    └─ReLU: 2-5                         [64, 64, 32, 32]          --
│    └─BatchNorm2d: 2-6                  [64, 64, 32, 32]          128
│    └─MaxPool2d: 2-7                    [64, 64, 16, 16]          --
├─Sequential: 1-2                        [64, 128, 8, 8]           --
│    └─Conv2d: 2-8                       [64, 128, 16, 16]         73,856
│    └─ReLU: 2-9                         [64, 128, 16, 16]         --
│    └─BatchNorm2d: 2-10                 [64, 128, 16, 16]         256
│