Using baseline model i was not able to saturate my gpu, here i will try to asses if the loader was the bottleneck 

# Preparations

In [1]:
import sys
sys.path.append('../src')

import torch
from torch import nn

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
from torchinfo import summary
from tqdm.auto import tqdm
from timeit import default_timer as timer


from helper_functions import train_step, test_step, accuracy_fn, print_train_time

device = 'cuda' if torch.cuda.is_available() else 'cpu'

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
seed = 213
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [3]:
class BaselineModel(nn.Module):
    """
    Model architecture copying TinyVGG from: 
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1), 
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2) 
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*8*8, 
                      out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.classifier(x)
        return x

In [4]:
def run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3):
    train_time_start_model_2 = timer()

    # Train and test model 
    for epoch in tqdm(range(epochs)):
        print(f"Epoch: {epoch}\n---------")
        train_step(data_loader=cinic_train, 
            model=model, 
            loss_fn=loss_fn,
            optimizer=optimizer,
            accuracy_fn=accuracy_fn,
            device=device
        )
        test_step(data_loader=cinic_test,
            model=model,
            loss_fn=loss_fn,
            accuracy_fn=accuracy_fn,
            device=device
        )

    train_time_end_model_2 = timer()
    total_train_time_model_2 = print_train_time(start=train_time_start_model_2,
                                            end=train_time_end_model_2,
                                            device=device)

# Experiments

### the one i used originally

In [5]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True)

In [6]:
model = BaselineModel(input_shape=3, 
    hidden_units=10, 
    output_shape=10).to(device)



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 2.30362 | Train accuracy: 9.98%


 33%|███▎      | 1/3 [00:19<00:39, 19.93s/it]

Test loss: 2.30378 | Test accuracy: 10.00%

Epoch: 1
---------
Train loss: 2.30358 | Train accuracy: 9.84%


 67%|██████▋   | 2/3 [00:39<00:19, 19.80s/it]

Test loss: 2.30515 | Test accuracy: 10.00%

Epoch: 2
---------
Train loss: 2.30350 | Train accuracy: 10.09%


100%|██████████| 3/3 [00:59<00:00, 19.75s/it]

Test loss: 2.30347 | Test accuracy: 9.99%


Train time on cuda: 59.262 seconds





### two workers

In [7]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=2)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=2)

In [8]:
model = BaselineModel(input_shape=3, 
    hidden_units=10, 
    output_shape=10).to(device)



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 2.30357 | Train accuracy: 10.06%


 33%|███▎      | 1/3 [00:10<00:20, 10.42s/it]

Test loss: 2.30326 | Test accuracy: 10.00%

Epoch: 1
---------
Train loss: 2.30358 | Train accuracy: 9.99%


 67%|██████▋   | 2/3 [00:20<00:10, 10.38s/it]

Test loss: 2.30339 | Test accuracy: 10.00%

Epoch: 2
---------
Train loss: 2.30360 | Train accuracy: 10.07%


100%|██████████| 3/3 [00:31<00:00, 10.35s/it]

Test loss: 2.30360 | Test accuracy: 10.00%


Train time on cuda: 31.052 seconds





### 6 workers

In [9]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=6)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=6)

In [10]:
model = BaselineModel(input_shape=3, 
    hidden_units=10, 
    output_shape=10).to(device)



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 1.94162 | Train accuracy: 28.10%


 33%|███▎      | 1/3 [00:04<00:09,  4.98s/it]

Test loss: 1.80471 | Test accuracy: 32.80%

Epoch: 1
---------
Train loss: 1.77356 | Train accuracy: 34.08%


 67%|██████▋   | 2/3 [00:09<00:04,  4.88s/it]

Test loss: 1.74807 | Test accuracy: 34.61%

Epoch: 2
---------
Train loss: 1.73324 | Train accuracy: 35.51%


100%|██████████| 3/3 [00:14<00:00,  4.90s/it]

Test loss: 1.81155 | Test accuracy: 32.95%


Train time on cuda: 14.694 seconds





### two workers and pin_memory

In [11]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=2, pin_memory=True)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=2, pin_memory=True)

In [12]:
model = BaselineModel(input_shape=3, 
    hidden_units=10, 
    output_shape=10).to(device)



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 1.87650 | Train accuracy: 30.39%


 33%|███▎      | 1/3 [00:10<00:21, 10.51s/it]

Test loss: 1.76640 | Test accuracy: 34.29%

Epoch: 1
---------
Train loss: 1.72512 | Train accuracy: 36.04%


 67%|██████▋   | 2/3 [00:20<00:10, 10.48s/it]

Test loss: 1.68253 | Test accuracy: 37.53%

Epoch: 2
---------
Train loss: 1.68662 | Train accuracy: 37.54%


100%|██████████| 3/3 [00:31<00:00, 10.48s/it]

Test loss: 1.69387 | Test accuracy: 37.37%


Train time on cuda: 31.429 seconds





### two workers and drop_last

In [13]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=2, drop_last=True)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=2, drop_last=True)

In [14]:
model = BaselineModel(input_shape=3, 
    hidden_units=10, 
    output_shape=10).to(device)



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 2.30374 | Train accuracy: 9.85%


 33%|███▎      | 1/3 [00:10<00:20, 10.35s/it]

Test loss: 2.30303 | Test accuracy: 10.00%

Epoch: 1
---------
Train loss: 2.30357 | Train accuracy: 9.95%


 67%|██████▋   | 2/3 [00:20<00:10, 10.33s/it]

Test loss: 2.30347 | Test accuracy: 10.00%

Epoch: 2
---------
Train loss: 2.30351 | Train accuracy: 9.88%


100%|██████████| 3/3 [00:31<00:00, 10.34s/it]

Test loss: 2.30330 | Test accuracy: 10.00%


Train time on cuda: 31.034 seconds





### Again just two workers to make sure it works 

In [53]:
cinic_directory = '../data'
cinic_train = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/train',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=6, persistent_workers=True, prefetch_factor=2, pin_memory=True)

cinic_test = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(cinic_directory + '/test',
    	transform=transforms.ToTensor()),
    batch_size=64, shuffle=True, num_workers=6, persistent_workers=True, prefetch_factor=2, pin_memory=True)

In [54]:
model = BaselineModel(input_shape=3, 
    hidden_units=10, 
    output_shape=10).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

run_model(cinic_train, cinic_test, model, loss_fn, optimizer, epochs=3)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 2.30382 | Train accuracy: 9.87%


 33%|███▎      | 1/3 [00:04<00:09,  4.57s/it]

Test loss: 2.30304 | Test accuracy: 9.99%

Epoch: 1
---------
Train loss: 2.30351 | Train accuracy: 9.92%


 67%|██████▋   | 2/3 [00:08<00:04,  4.40s/it]

Test loss: 2.30333 | Test accuracy: 9.99%

Epoch: 2
---------
Train loss: 2.30347 | Train accuracy: 9.91%


100%|██████████| 3/3 [00:13<00:00,  4.38s/it]

Test loss: 2.30326 | Test accuracy: 10.00%


Train time on cuda: 13.136 seconds



