# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
import torch.optim as optim
from IPython.core.debugger import set_trace
import pandas as pd
import lmdb
import pickle
import h5py

import warnings
warnings.filterwarnings('ignore')

# Config

In [3]:
path_data = Path('data')

In [4]:
!ls {path_data}

cifar-10-batches-py  medium_imgs  small_hdf5  small_lmdb
medium_hdf5	     medium_lmdb  small_imgs


# Networks

In [5]:
class FastNet(nn.Module):
    # Just do a single convolution followed by a linear layer
    # Made to be simple to emphasize affect of image loading
    # and augmentation
    def __init__(self, num_cl):
        super(FastNet, self).__init__()
        self.conv = nn.Conv2d(3, 64, 3, stride=2)
        self.fc = nn.Linear(64, num_cl)
        
    def forward(self, x):
        x = self.conv(x)
        x = F.adaptive_avg_pool2d(x, 1)
        x = torch.flatten(x, 1)
        x = self.fc(x)        
        return x

# Datasets

In [6]:
def _open_path(path_img, load=False):
    if path_img.suffix == '.raw':
        img = np.fromfile(path_img, dtype='uint8')
        sz = int(np.sqrt(img.size/3))
        img = Image.fromarray(img.reshape(sz, sz, 3))
    else:
        img = Image.open(path_img)

    if load:
        img.load()

    return img

In [7]:
class PathFolderDataset(Dataset):
    # Loads images on demand
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.subsamples = subsamples
        self.samples = self._get_samples()
                       
    def _get_samples(self):
        samples = [(p, int(p.parent.stem)) for p in self.path_root.glob('*/*')]
        
        if self.subsamples is not None:
            samples = [samples[i] for i in np.random.choice(len(samples), self.subsamples, replace=False)]
            
        return samples
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        path, target = self.samples[idx]
        img = _open_path(path)
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [8]:
class ImageFolderDataset(Dataset):
    # Preloads everything a pillow image
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.subsamples = subsamples
        self.samples = self._get_samples()
               
    def _get_samples(self):
        samples = [(p, int(p.parent.stem)) for p in self.path_root.glob('*/*')]
        
        if self.subsamples is not None:
            samples = [samples[i] for i in np.random.choice(len(samples), self.subsamples, replace=False)]
            
        # Load everything; must .load() or else tons of open file pointers will cause a crash
        samples = [(_open_path(s[0], load=True), s[1]) for s in samples]
            
        return samples
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img, target = self.samples[idx]
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [9]:
class LMDBDataset(Dataset):
    # Loads images from lmdb
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.df_meta = pd.read_csv(path_root/'meta.csv')
        self.env = lmdb.open((path_root/'lmdb').as_posix(), readonly=True)
        
        if subsamples is not None:
            self.df_meta = self.df_meta.sample(n=subsamples, replace=False) 
                              
    def __len__(self):
        return len(self.df_meta)
    
    def __getitem__(self, idx):
        info = self.df_meta.iloc[idx]
        key = info['key']
        target = info['label']
        
        # NOTE: might be slow since this will do each item as separate transaction
        with self.env.begin() as txn:
            img = txn.get(key.encode('ascii'))
                          
        img = pickle.loads(img)
        img = Image.fromarray(img)
                
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [10]:
class HDF5Dataset(Dataset):
    # Loads images from hdf5
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.df_meta = pd.read_csv(path_root/'meta.csv')
            
        if subsamples is not None:
            self.df_meta = self.df_meta.sample(n=subsamples, replace=False) 
                               
    def __len__(self):
        return len(self.df_meta)
    
    def __getitem__(self, idx):
        info = self.df_meta.iloc[idx]
        target = info['label']        
                
        with h5py.File(path_root/'data.hdf5', 'r', libver='latest', swmr=True) as f:
            img = f['data'][info['idx']]
        img = Image.fromarray(img)
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

# Losses

In [11]:
loss = nn.CrossEntropyLoss()

# Train

In [12]:
def train(dl, model, loss, opt, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for X, y in dl:
            # Send data to gpu
            X, y = X.to(torch.device('cuda')), y.to(torch.device('cuda'))
        
            opt.zero_grad()    # Zero gradients
            y_hat = model(X)   # Forward pass
            l = loss(y_hat, y) # Loss
            l.backward()       # Compute gradients
            opt.step()         # Step

        # print statistics
        print(f'Epoch: {epoch}; Loss: {l.item()}')

# Transforms

In [13]:
# No augmentation
tfms_na = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), 
                                                   (0.5, 0.5, 0.5))])

In [14]:
# Augmentation
tfms_aug = transforms.Compose([transforms.ColorJitter(),
                               transforms.RandomHorizontalFlip(),
                               transforms.RandomPerspective(),
                               transforms.RandomAffine(10, 
                                                       translate=(0.1, 0.1), 
                                                       scale=(0.9, 1.1), 
                                                       shear=(-5, 5), 
                                                       resample=Image.BICUBIC),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), 
                                                    (0.5, 0.5, 0.5))])


# Test images

### Small image

In [15]:
size = 'small'

In [16]:
# small size, path, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_path_na_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_na)
ds_ss_path_na_tif_dataset = PathFolderDataset(path_root/'tif', transforms=tfms_na)
ds_ss_path_na_jpg_dataset = PathFolderDataset(path_root/'jpg', transforms=tfms_na)
ds_ss_path_na_raw_dataset = PathFolderDataset(path_root/'raw', transforms=tfms_na)

In [17]:
# small size, path, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_path_aug_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_aug)

In [18]:
# small size, image, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_img_na_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_na)

In [19]:
# small size, image, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_img_aug_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_aug)

Test small image with single batch size and single worker

In [20]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=1, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 3.6578400135040283
CPU times: user 44.1 s, sys: 7.21 s, total: 51.3 s
Wall time: 37.2 s


Very slow, lets increase batch size and also test other file formats which should show encoding speed

In [21]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3125243186950684
CPU times: user 791 ms, sys: 234 ms, total: 1.02 s
Wall time: 8.81 s


In [22]:
dl = DataLoader(ds_ss_path_na_tif_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.297828197479248
CPU times: user 834 ms, sys: 230 ms, total: 1.06 s
Wall time: 15.6 s


In [23]:
dl = DataLoader(ds_ss_path_na_jpg_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2859106063842773
CPU times: user 880 ms, sys: 182 ms, total: 1.06 s
Wall time: 13.3 s


In [24]:
dl = DataLoader(ds_ss_path_na_raw_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.315157413482666
CPU times: user 828 ms, sys: 214 ms, total: 1.04 s
Wall time: 9.81 s


Increase number of workers

In [25]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3262479305267334
CPU times: user 745 ms, sys: 534 ms, total: 1.28 s
Wall time: 2.05 s


Workers help; try some data augmentation

In [26]:
dl = DataLoader(ds_ss_path_aug_png_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2991511821746826
CPU times: user 628 ms, sys: 494 ms, total: 1.12 s
Wall time: 3.3 s


A little slower, now try doing in-memory dataloader

In [27]:
dl = DataLoader(ds_ss_img_na_png_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.296252489089966
CPU times: user 671 ms, sys: 481 ms, total: 1.15 s
Wall time: 1.06 s


Faster, but then you have to store everything in memory. Try in memory with data augmentation.

In [28]:
dl = DataLoader(ds_ss_img_aug_png_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.286672830581665
CPU times: user 621 ms, sys: 465 ms, total: 1.09 s
Wall time: 2.21 s


Delete in memory dataloader to free up some space

In [29]:
del dl, ds_ss_img_na_png_dataset, ds_ss_img_aug_png_dataset

### Medium images

In [30]:
size = 'medium'

In [31]:
# medium size, path, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_path_na_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_na, subsamples=4000)
ds_ms_path_na_tif_dataset = PathFolderDataset(path_root/'tif', transforms=tfms_na, subsamples=4000)
ds_ms_path_na_jpg_dataset = PathFolderDataset(path_root/'jpg', transforms=tfms_na, subsamples=4000)
ds_ms_path_na_raw_dataset = PathFolderDataset(path_root/'raw', transforms=tfms_na, subsamples=4000)

In [32]:
# medium size, path, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_path_aug_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_aug, subsamples=4000)

In [33]:
# medium size, image, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_img_na_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_na, subsamples=4000)

In [34]:
# medium size, image, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_img_aug_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_aug, subsamples=4000)

Test medium image with single batch size and single worker

In [35]:
dl = DataLoader(ds_ms_path_na_png_dataset, batch_size=1, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.503147840499878
CPU times: user 7.67 s, sys: 1.67 s, total: 9.34 s
Wall time: 8.1 s


Lets increase batch size and also test other file formats which should show encoding speed

In [36]:
dl = DataLoader(ds_ms_path_na_png_dataset, batch_size=64, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3388760089874268
CPU times: user 4.22 s, sys: 606 ms, total: 4.82 s
Wall time: 5.38 s


In [37]:
dl = DataLoader(ds_ms_path_na_tif_dataset, batch_size=64, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3120784759521484
CPU times: user 4.15 s, sys: 625 ms, total: 4.77 s
Wall time: 6.31 s


In [38]:
dl = DataLoader(ds_ms_path_na_jpg_dataset, batch_size=64, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2963180541992188
CPU times: user 4.12 s, sys: 720 ms, total: 4.84 s
Wall time: 6.28 s


In [39]:
dl = DataLoader(ds_ms_path_na_raw_dataset, batch_size=64, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2758541107177734
CPU times: user 4.19 s, sys: 644 ms, total: 4.84 s
Wall time: 6.62 s


Encoding doesn't seem to have much of an effect; increase number of workers

In [40]:
dl = DataLoader(ds_ms_path_na_png_dataset, batch_size=64, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2884750366210938
CPU times: user 3.95 s, sys: 1.6 s, total: 5.54 s
Wall time: 3.15 s


Workers help again; try some data augmentation

In [41]:
dl = DataLoader(ds_ms_path_aug_png_dataset, batch_size=64, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.287156581878662
CPU times: user 2.38 s, sys: 1.24 s, total: 3.62 s
Wall time: 5.93 s


Much slower, now try doing in-memory dataloader

In [42]:
dl = DataLoader(ds_ms_img_na_png_dataset, batch_size=64, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.306854724884033
CPU times: user 4.11 s, sys: 1.63 s, total: 5.74 s
Wall time: 2.92 s


A little bit faster. Try in memory with data augmentation.

In [43]:
dl = DataLoader(ds_ms_img_aug_png_dataset, batch_size=64, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.298717975616455
CPU times: user 1.82 s, sys: 1.23 s, total: 3.04 s
Wall time: 5.48 s


Delete in memory dataloader to free up some space

In [44]:
del dl, ds_ms_img_na_png_dataset, ds_ms_img_aug_png_dataset

# Test lmdb

### Small lmdb

In [45]:
size = 'small'

In [46]:
# small size, lmdb, no augmentation
path_root = path_data/(size + '_lmdb')
ds_ss_lmdb_na_dataset = LMDBDataset(path_root, transforms=tfms_na)

In [47]:
# small size, lmdb, augmentation
path_root = path_data/(size + '_lmdb')
ds_ss_lmdb_aug_dataset = LMDBDataset(path_root, transforms=tfms_aug)

Test small image with single batch size and single worker

In [48]:
dl = DataLoader(ds_ss_lmdb_na_dataset, batch_size=1, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.0853044986724854
CPU times: user 43.8 s, sys: 5.96 s, total: 49.8 s
Wall time: 42.4 s


Very slow, lets increase batch size

In [49]:
dl = DataLoader(ds_ss_lmdb_na_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.28078556060791
CPU times: user 836 ms, sys: 235 ms, total: 1.07 s
Wall time: 11.2 s


Increase number of workers

In [50]:
dl = DataLoader(ds_ss_lmdb_na_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2987771034240723
CPU times: user 663 ms, sys: 584 ms, total: 1.25 s
Wall time: 2.62 s


Workers help a bunch! Try some data augmentation

In [51]:
dl = DataLoader(ds_ss_lmdb_aug_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.365288019180298
CPU times: user 659 ms, sys: 656 ms, total: 1.31 s
Wall time: 3.9 s


### Medium lmdb

In [52]:
size = 'medium'

In [53]:
# medium size, lmdb, no augmentation
path_root = path_data/(size + '_lmdb')
ds_ms_lmdb_na_dataset = LMDBDataset(path_root, transforms=tfms_na, subsamples=4000)

In [54]:
# small size, lmdb, augmentation
path_root = path_data/(size + '_lmdb')
ds_ms_lmdb_aug_dataset = LMDBDataset(path_root, transforms=tfms_aug, subsamples=4000)

Test medium image with single batch size and single worker

In [55]:
dl = DataLoader(ds_ms_lmdb_na_dataset, batch_size=1, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.0810186862945557
CPU times: user 7.82 s, sys: 1.72 s, total: 9.54 s
Wall time: 7.53 s


Lets increase batch size

In [56]:
dl = DataLoader(ds_ms_lmdb_na_dataset, batch_size=64, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3068225383758545
CPU times: user 4.2 s, sys: 613 ms, total: 4.81 s
Wall time: 4.76 s


Increase number of workers

In [57]:
dl = DataLoader(ds_ms_lmdb_na_dataset, batch_size=64, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3554763793945312
CPU times: user 3.98 s, sys: 1.56 s, total: 5.54 s
Wall time: 3.27 s


Try some data augmentation

In [58]:
dl = DataLoader(ds_ms_lmdb_aug_dataset, batch_size=64, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3180508613586426
CPU times: user 2.44 s, sys: 1.26 s, total: 3.7 s
Wall time: 6.24 s


# Test HDF5

### Small HDF5

In [59]:
size = 'small'

In [60]:
# small size, hdf5, no augmentation
path_root = path_data/(size + '_hdf5')
ds_ss_hdf5_na_dataset = HDF5Dataset(path_root, transforms=tfms_na)

In [61]:
# small size, hdf5, augmentation
path_root = path_data/(size + '_hdf5')
ds_ss_hdf5_aug_dataset = HDF5Dataset(path_root, transforms=tfms_aug)

Test small image with single batch size and single worker

In [62]:
dl = DataLoader(ds_ss_hdf5_na_dataset, batch_size=1, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.0694477558135986
CPU times: user 44.3 s, sys: 6.39 s, total: 50.7 s
Wall time: 1min


Very slow, lets increase batch size

In [63]:
dl = DataLoader(ds_ss_hdf5_na_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3224947452545166
CPU times: user 814 ms, sys: 296 ms, total: 1.11 s
Wall time: 26.1 s


Increase number of workers

In [64]:
dl = DataLoader(ds_ss_hdf5_na_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3096280097961426
CPU times: user 825 ms, sys: 637 ms, total: 1.46 s
Wall time: 6.98 s


Workers help a bunch! Try some data augmentation

In [65]:
dl = DataLoader(ds_ss_hdf5_aug_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3232290744781494
CPU times: user 773 ms, sys: 648 ms, total: 1.42 s
Wall time: 7.72 s


### Medium HDF5

In [66]:
size = 'medium'

In [67]:
# medium size, hdf5, no augmentation
path_root = path_data/(size + '_hdf5')
ds_ms_hdf5_na_dataset = HDF5Dataset(path_root, transforms=tfms_na, subsamples=4000)

In [68]:
# medium size, hdf5, augmentation
path_root = path_data/(size + '_hdf5')
ds_ms_hdf5_aug_dataset = HDF5Dataset(path_root, transforms=tfms_aug, subsamples=4000)

Test medium image with single batch size and single worker

In [69]:
dl = DataLoader(ds_ms_hdf5_na_dataset, batch_size=1, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.048393487930298
CPU times: user 8.74 s, sys: 2.16 s, total: 10.9 s
Wall time: 10.7 s


Increase batch size

In [70]:
dl = DataLoader(ds_ms_hdf5_na_dataset, batch_size=256, num_workers=1, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.310324192047119
CPU times: user 3.85 s, sys: 747 ms, total: 4.59 s
Wall time: 6.47 s


Increase number of workers

In [71]:
dl = DataLoader(ds_ms_hdf5_na_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3100638389587402
CPU times: user 3.69 s, sys: 1.97 s, total: 5.66 s
Wall time: 4.59 s


Try some data augmentation

In [72]:
dl = DataLoader(ds_ms_hdf5_aug_dataset, batch_size=256, num_workers=12, shuffle=True, pin_memory=True)
model = FastNet(10).cuda()
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3106980323791504
CPU times: user 3.42 s, sys: 1.37 s, total: 4.79 s
Wall time: 8.15 s
