# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
import torch.optim as optim
from IPython.core.debugger import set_trace
import pandas as pd
import lmdb
import pickle
import h5py

import warnings
warnings.filterwarnings('ignore')

# Config

In [3]:
path_data = Path('data')

In [4]:
!ls {path_data}

cifar-10-batches-py  medium_imgs  small_hdf5  small_lmdb
medium_hdf5	     medium_lmdb  small_imgs


# Networks

In [5]:
class FastNet(nn.Module):
    # Just do a single convolution followed by a linear layer
    # Made to be simple to emphasize affect of image loading
    # and augmentation
    def __init__(self, num_cl):
        super(FastNet, self).__init__()
        self.conv = nn.Conv2d(3, 64, 3, stride=2)
        self.fc = nn.Linear(64, num_cl)
        
    def forward(self, x):
        x = self.conv(x)
        x = F.adaptive_avg_pool2d(x, 1)
        x = torch.flatten(x, 1)
        x = self.fc(x)        
        return x

# Datasets

In [6]:
def _open_path(path_img, load=False):
    if path_img.suffix == '.raw':
        img = np.fromfile(path_img, dtype='uint8')
        sz = int(np.sqrt(img.size/3))
        img = Image.fromarray(img.reshape(sz, sz, 3))
    else:
        img = Image.open(path_img)

    if load:
        img.load()

    return img

In [7]:
class PathFolderDataset(Dataset):
    # Loads images on demand
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.subsamples = subsamples
        self.samples = self._get_samples()
                       
    def _get_samples(self):
        samples = [(p, int(p.parent.stem)) for p in self.path_root.glob('*/*')]
        
        if self.subsamples is not None:
            samples = [samples[i] for i in np.random.choice(len(samples), self.subsamples, replace=False)]
            
        return samples
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        path, target = self.samples[idx]
        img = _open_path(path)
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [8]:
class ImageFolderDataset(Dataset):
    # Preloads everything a pillow image
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.subsamples = subsamples
        self.samples = self._get_samples()
               
    def _get_samples(self):
        samples = [(p, int(p.parent.stem)) for p in self.path_root.glob('*/*')]
        
        if self.subsamples is not None:
            samples = [samples[i] for i in np.random.choice(len(samples), self.subsamples, replace=False)]
            
        # Load everything; must .load() or else tons of open file pointers will cause a crash
        samples = [(_open_path(s[0], load=True), s[1]) for s in samples]
            
        return samples
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img, target = self.samples[idx]
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [9]:
class LMDBDataset(Dataset):
    # Loads images from lmdb
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.df_meta = pd.read_csv(path_root/'meta.csv')
        self.env = lmdb.open((path_root/'lmdb').as_posix(), readonly=True)
        
        if subsamples is not None:
            self.df_meta = self.df_meta.sample(n=subsamples, replace=False) 
                              
    def __len__(self):
        return len(self.df_meta)
    
    def __getitem__(self, idx):
        info = self.df_meta.iloc[idx]
        key = info['key']
        target = info['label']
        
        # NOTE: might be slow since this will do each item as separate transaction
        with self.env.begin() as txn:
            img = txn.get(key.encode('ascii'))
                          
        img = pickle.loads(img)
        img = Image.fromarray(img)
                
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [10]:
class HDF5Dataset(Dataset):
    # Loads images from hdf5
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.df_meta = pd.read_csv(path_root/'meta.csv')
            
        if subsamples is not None:
            self.df_meta = self.df_meta.sample(n=subsamples, replace=False) 
                               
    def __len__(self):
        return len(self.df_meta)
    
    def __getitem__(self, idx):
        info = self.df_meta.iloc[idx]
        target = info['label']        
                
        with h5py.File(path_root/'data.hdf5', 'r', libver='latest', swmr=True) as f:
            img = f['data'][info['idx']]
        img = Image.fromarray(img)
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

# Losses

In [11]:
loss = nn.CrossEntropyLoss()

# Train

In [12]:
def train(dl, model, loss, opt, num_epochs):
    for epoch in range(num_epochs):
        for X, y in dl:
            opt.zero_grad()    # Zero gradients
            y_hat = model(X)   # Forward pass
            l = loss(y_hat, y) # Loss
            l.backward()       # Compute gradients
            opt.step()         # Step

        # print statistics
        print(f'Epoch: {epoch}; Loss: {l.item()}')

# Transforms

In [13]:
# No augmentation
tfms_na = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), 
                                                   (0.5, 0.5, 0.5))])

In [14]:
# Augmentation
tfms_aug = transforms.Compose([transforms.ColorJitter(),
                               transforms.RandomHorizontalFlip(),
                               transforms.RandomPerspective(),
                               transforms.RandomAffine(10, 
                                                       translate=(0.1, 0.1), 
                                                       scale=(0.9, 1.1), 
                                                       shear=(-5, 5), 
                                                       resample=Image.BICUBIC),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), 
                                                    (0.5, 0.5, 0.5))])


# Test images

### Small image

In [15]:
size = 'small'

In [16]:
# small size, path, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_path_na_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_na)
ds_ss_path_na_tif_dataset = PathFolderDataset(path_root/'tif', transforms=tfms_na)
ds_ss_path_na_jpg_dataset = PathFolderDataset(path_root/'jpg', transforms=tfms_na)
ds_ss_path_na_raw_dataset = PathFolderDataset(path_root/'raw', transforms=tfms_na)

In [17]:
# small size, path, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_path_aug_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_aug)

In [18]:
# small size, image, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_img_na_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_na)

In [19]:
# small size, image, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ss_img_aug_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_aug)

Test small image with single batch size and single worker

In [20]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=1, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.0109260082244873
CPU times: user 10min 7s, sys: 7.41 s, total: 10min 14s
Wall time: 56.9 s


Very slow, lets increase batch size and also test other file formats which should show encoding speed

In [21]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.317831516265869
CPU times: user 27.6 s, sys: 1.26 s, total: 28.9 s
Wall time: 9.69 s


In [22]:
dl = DataLoader(ds_ss_path_na_tif_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2938072681427
CPU times: user 27.3 s, sys: 1.29 s, total: 28.6 s
Wall time: 16.5 s


In [23]:
dl = DataLoader(ds_ss_path_na_jpg_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.303396701812744
CPU times: user 27.3 s, sys: 1.25 s, total: 28.6 s
Wall time: 14.2 s


In [24]:
dl = DataLoader(ds_ss_path_na_raw_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.291125774383545
CPU times: user 27 s, sys: 1.36 s, total: 28.4 s
Wall time: 10.6 s


Increase number of workers

In [25]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.311436653137207
CPU times: user 37.8 s, sys: 1.65 s, total: 39.5 s
Wall time: 5 s


Workers help; try some data augmentation

In [26]:
dl = DataLoader(ds_ss_path_aug_png_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3233230113983154
CPU times: user 40.6 s, sys: 1.48 s, total: 42.1 s
Wall time: 6.06 s


A little slower, now try doing in-memory dataloader

In [27]:
dl = DataLoader(ds_ss_img_na_png_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.294036626815796
CPU times: user 26 s, sys: 1.45 s, total: 27.5 s
Wall time: 3.03 s


Faster, but then you have to store everything in memory. Try in memory with data augmentation.

In [28]:
dl = DataLoader(ds_ss_img_aug_png_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.303027629852295
CPU times: user 38 s, sys: 1.58 s, total: 39.6 s
Wall time: 5.24 s


Delete in memory dataloader to free up some space

In [29]:
del dl, ds_ss_img_na_png_dataset, ds_ss_img_aug_png_dataset

### Medium images

In [30]:
size = 'medium'

In [31]:
# medium size, path, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_path_na_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_na, subsamples=4000)
ds_ms_path_na_tif_dataset = PathFolderDataset(path_root/'tif', transforms=tfms_na, subsamples=4000)
ds_ms_path_na_jpg_dataset = PathFolderDataset(path_root/'jpg', transforms=tfms_na, subsamples=4000)
ds_ms_path_na_raw_dataset = PathFolderDataset(path_root/'raw', transforms=tfms_na, subsamples=4000)

In [32]:
# medium size, path, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_path_aug_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_aug, subsamples=4000)

In [33]:
# medium size, image, no augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_img_na_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_na, subsamples=4000)

In [34]:
# medium size, image, augmentation, format
path_root = path_data/(size + '_imgs')
ds_ms_img_aug_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_aug, subsamples=4000)

Test medium image with single batch size and single worker

In [35]:
dl = DataLoader(ds_ms_path_na_png_dataset, batch_size=1, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2764034271240234
CPU times: user 3min 12s, sys: 2.01 s, total: 3min 14s
Wall time: 18 s


Lets increase batch size and also test other file formats which should show encoding speed

In [36]:
dl = DataLoader(ds_ms_path_na_png_dataset, batch_size=64, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3416311740875244
CPU times: user 58.9 s, sys: 22.7 s, total: 1min 21s
Wall time: 14 s


In [37]:
dl = DataLoader(ds_ms_path_na_tif_dataset, batch_size=64, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.324099063873291
CPU times: user 58.8 s, sys: 22.9 s, total: 1min 21s
Wall time: 14.1 s


In [38]:
dl = DataLoader(ds_ms_path_na_jpg_dataset, batch_size=64, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.338833808898926
CPU times: user 58.9 s, sys: 23 s, total: 1min 21s
Wall time: 14.2 s


In [39]:
dl = DataLoader(ds_ms_path_na_raw_dataset, batch_size=64, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.332303285598755
CPU times: user 56.1 s, sys: 22.2 s, total: 1min 18s
Wall time: 13.4 s


Encoding doesn't seem to have much of an effect; increase number of workers

In [40]:
dl = DataLoader(ds_ms_path_na_png_dataset, batch_size=64, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2841930389404297
CPU times: user 53.5 s, sys: 21.3 s, total: 1min 14s
Wall time: 13.4 s


Workers dont seem to help much; try some data augmentation

In [41]:
dl = DataLoader(ds_ms_path_aug_png_dataset, batch_size=64, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.316070795059204
CPU times: user 59.7 s, sys: 24.9 s, total: 1min 24s
Wall time: 16.3 s


A smidge slower, now try doing in-memory dataloader

In [42]:
dl = DataLoader(ds_ms_img_na_png_dataset, batch_size=64, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.283987045288086
CPU times: user 54 s, sys: 22.5 s, total: 1min 16s
Wall time: 13.7 s


About the same speed. Try in memory with data augmentation.

In [43]:
dl = DataLoader(ds_ms_img_aug_png_dataset, batch_size=64, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3013663291931152
CPU times: user 59.7 s, sys: 24.4 s, total: 1min 24s
Wall time: 16.2 s


Seems overall for medium images that batchsize/workers/in-memory doesn't have too much of an effect. And, doing %prun actually shows most of the time is spent in the conv2d function and computing gradients, so that is the bottleneck vs IO/encoding/augmentation

Delete in memory dataloader to free up some space

In [44]:
del dl, ds_ms_img_na_png_dataset, ds_ms_img_aug_png_dataset

# Test lmdb

### Small lmdb

In [45]:
size = 'small'

In [46]:
# small size, lmdb, no augmentation
path_root = path_data/(size + '_lmdb')
ds_ss_lmdb_na_dataset = LMDBDataset(path_root, transforms=tfms_na)

In [47]:
# small size, lmdb, augmentation
path_root = path_data/(size + '_lmdb')
ds_ss_lmdb_aug_dataset = LMDBDataset(path_root, transforms=tfms_aug)

Test small image with single batch size and single worker

In [48]:
dl = DataLoader(ds_ss_lmdb_na_dataset, batch_size=1, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 3.264969825744629
CPU times: user 12min 2s, sys: 8.03 s, total: 12min 10s
Wall time: 1min 8s


Very slow, lets increase batch size

In [49]:
dl = DataLoader(ds_ss_lmdb_na_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3026795387268066
CPU times: user 27.5 s, sys: 953 ms, total: 28.4 s
Wall time: 12.5 s


Increase number of workers

In [50]:
dl = DataLoader(ds_ss_lmdb_na_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2961742877960205
CPU times: user 38.9 s, sys: 1.42 s, total: 40.3 s
Wall time: 5.55 s


Workers help a bunch! Try some data augmentation

In [51]:
dl = DataLoader(ds_ss_lmdb_aug_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3149333000183105
CPU times: user 47.3 s, sys: 1.43 s, total: 48.7 s
Wall time: 7.4 s


### Medium lmdb

In [52]:
size = 'medium'

In [53]:
# medium size, lmdb, no augmentation
path_root = path_data/(size + '_lmdb')
ds_ms_lmdb_na_dataset = LMDBDataset(path_root, transforms=tfms_na, subsamples=4000)

In [54]:
# small size, lmdb, augmentation
path_root = path_data/(size + '_lmdb')
ds_ms_lmdb_aug_dataset = LMDBDataset(path_root, transforms=tfms_aug, subsamples=4000)

Test medium image with single batch size and single worker

In [55]:
dl = DataLoader(ds_ms_lmdb_na_dataset, batch_size=1, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.1160240173339844
CPU times: user 3min 20s, sys: 2.22 s, total: 3min 22s
Wall time: 18.7 s


Lets increase batch size

In [56]:
dl = DataLoader(ds_ms_lmdb_na_dataset, batch_size=64, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3144407272338867
CPU times: user 1min 3s, sys: 13.5 s, total: 1min 17s
Wall time: 12.8 s


Increase number of workers

In [57]:
dl = DataLoader(ds_ms_lmdb_na_dataset, batch_size=64, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3241140842437744
CPU times: user 1min 1s, sys: 14.5 s, total: 1min 15s
Wall time: 13.4 s


Try some data augmentation

In [58]:
dl = DataLoader(ds_ms_lmdb_aug_dataset, batch_size=64, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.36087703704834
CPU times: user 1min 6s, sys: 15.1 s, total: 1min 21s
Wall time: 15.4 s


# Test HDF5

### Small HDF5

In [59]:
size = 'small'

In [60]:
# small size, hdf5, no augmentation
path_root = path_data/(size + '_hdf5')
ds_ss_hdf5_na_dataset = HDF5Dataset(path_root, transforms=tfms_na)

In [61]:
# small size, hdf5, augmentation
path_root = path_data/(size + '_hdf5')
ds_ss_hdf5_aug_dataset = HDF5Dataset(path_root, transforms=tfms_aug)

Test small image with single batch size and single worker

In [62]:
dl = DataLoader(ds_ss_hdf5_na_dataset, batch_size=1, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.133254051208496
CPU times: user 15min 8s, sys: 8.43 s, total: 15min 17s
Wall time: 1min 26s


Very slow, lets increase batch size

In [63]:
dl = DataLoader(ds_ss_hdf5_na_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2974672317504883
CPU times: user 27.1 s, sys: 1.08 s, total: 28.2 s
Wall time: 27.2 s


Increase number of workers

In [64]:
dl = DataLoader(ds_ss_hdf5_na_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2833480834960938
CPU times: user 50.9 s, sys: 1.44 s, total: 52.3 s
Wall time: 8.83 s


Workers help a bunch! Try some data augmentation

In [65]:
dl = DataLoader(ds_ss_hdf5_aug_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.322622060775757
CPU times: user 1min 3s, sys: 1.67 s, total: 1min 5s
Wall time: 11.6 s


### Medium HDF5

In [66]:
size = 'medium'

In [67]:
# medium size, hdf5, no augmentation
path_root = path_data/(size + '_hdf5')
ds_ms_hdf5_na_dataset = HDF5Dataset(path_root, transforms=tfms_na, subsamples=4000)

In [68]:
# medium size, hdf5, augmentation
path_root = path_data/(size + '_hdf5')
ds_ms_hdf5_aug_dataset = HDF5Dataset(path_root, transforms=tfms_aug, subsamples=4000)

Test medium image with single batch size and single worker

In [69]:
dl = DataLoader(ds_ms_hdf5_na_dataset, batch_size=1, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.180546760559082
CPU times: user 3min 31s, sys: 2.38 s, total: 3min 33s
Wall time: 19.9 s


Increase batch size

In [70]:
dl = DataLoader(ds_ms_hdf5_na_dataset, batch_size=256, num_workers=1, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3224644660949707
CPU times: user 47 s, sys: 23.8 s, total: 1min 10s
Wall time: 14.4 s


Increase number of workers

In [71]:
dl = DataLoader(ds_ms_hdf5_na_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2769808769226074
CPU times: user 41.9 s, sys: 24.2 s, total: 1min 6s
Wall time: 15.2 s


Workers... make it slower? Maybe some concurrency issues with hdf5? Try some data augmentation

In [72]:
dl = DataLoader(ds_ms_hdf5_aug_dataset, batch_size=256, num_workers=12, shuffle=True)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.288996458053589
CPU times: user 44.3 s, sys: 23.2 s, total: 1min 7s
Wall time: 17.5 s
