# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import PIL
from pathlib import Path
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets
import torch.optim as optim

from fastai.vision import *

from IPython.core.debugger import set_trace

# Config

In [3]:
path_data = Path('data')

In [4]:
!ls {path_data}

cifar-10-batches-py  large_size  small_size


# Utilty functions

In [5]:
def open_PIL(path, load=False):
    if path.suffix == '.raw':
        img = np.fromfile(path, dtype='uint8')
        sz = int(np.sqrt(img.size/3))
        img = PIL.Image.fromarray(img.reshape(sz, sz, 3))
    else:
        img = PIL.Image.open(path)
        
    if load:
        img.load()
        
    return img

# Networks

In [6]:
class FastNet(nn.Module):
    def __init__(self, num_cl):
        super(FastNet, self).__init__()
        self.conv = nn.Conv2d(3, 64, 3, stride = 2)
        self.fc = nn.Linear(64, num_cl)
        
    def forward(self, x):
        x = self.conv(x)
        x = F.adaptive_avg_pool2d(x, 1)
        x = torch.flatten(x, 1)
        x = self.fc(x)        
        return x

# Datasets

In [7]:
class PathFolderDataset(Dataset):
    # Loads images on demand
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.subsamples = subsamples
        self.samples = self._get_samples()
                       
    def _get_samples(self):
        samples = [(p, int(p.parent.stem)) for p in self.path_root.glob('*/*')]
        
        if self.subsamples is not None:
            samples = [samples[i] for i in np.random.choice(len(samples), self.subsamples, replace=False)]
            
        return samples
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        path, target = self.samples[idx]
        img = open_PIL(path)
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

In [8]:
class ImageFolderDataset(Dataset):
    # Preloads everything as PIL image
    def __init__(self, path_root, transforms=None, subsamples=None):
        self.path_root = path_root
        self.transforms = transforms
        self.subsamples = subsamples
        self.samples = self._get_samples()
               
    def _get_samples(self):
        samples = [(p, int(p.parent.stem)) for p in self.path_root.glob('*/*')]
        
        if self.subsamples is not None:
            samples = [samples[i] for i in np.random.choice(len(samples), self.subsamples, replace=False)]
            
        samples = [(open_PIL(s[0], load=True), s[1]) for s in samples]
            
        return samples
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img, target = self.samples[idx]
        
        if self.transforms is not None:
            img = self.transforms(img)
                
        return img, target

# Losses

In [9]:
loss = nn.CrossEntropyLoss()

# Train

In [10]:
def train(dl, model, loss, opt, num_epochs):
    for epoch in range(num_epochs):
        for X, y in dl:
            # Zero gradients
            opt.zero_grad()

            # Forward pass
            y_hat = model(X)
            
            # Loss
            l = loss(y_hat, y)
            
            # Step
            l.backward()
            opt.step()

        # print statistics
        print(f'Epoch: {epoch}; Loss: {l.item()}')

# Transforms

In [11]:
# No augmentation
tfms_na = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), 
                                                   (0.5, 0.5, 0.5))])

In [12]:
# Augmentation
tfms_aug = transforms.Compose([transforms.RandomAffine(10, 
                                                       translate=(0.1, 0.1), 
                                                       scale=(0.9, 1.1), 
                                                       shear=(-5, 5), 
                                                       resample=PIL.Image.BICUBIC),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), 
                                                    (0.5, 0.5, 0.5))])


# Test small image datasets

In [13]:
# small size, path, no augmentation, format
path_root = path_data/'small_size'
ds_ss_path_na_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_na)
ds_ss_path_na_tif_dataset = PathFolderDataset(path_root/'tif', transforms=tfms_na)
ds_ss_path_na_jpg_dataset = PathFolderDataset(path_root/'jpg', transforms=tfms_na)
ds_ss_path_na_raw_dataset = PathFolderDataset(path_root/'raw', transforms=tfms_na)

In [14]:
# small size, path, augmentation, format
path_root = path_data/'small_size'
ds_ss_path_aug_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_aug)

In [15]:
# small size, image, no augmentation, format
path_root = path_data/'small_size'
ds_ss_img_na_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_na)

In [16]:
# small size, image, augmentation, format
path_root = path_data/'small_size'
ds_ss_img_aug_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_aug)

Test small image with single batch size and single worker

In [17]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=1, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 0.04181664064526558
CPU times: user 14min 17s, sys: 9.3 s, total: 14min 26s
Wall time: 1min 27s


Very slow, lets increase batch size and also test other file formats which should show encoding speed

In [18]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=256, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3132524490356445
CPU times: user 8.47 s, sys: 400 ms, total: 8.87 s
Wall time: 10.4 s


In [19]:
dl = DataLoader(ds_ss_path_na_tif_dataset, batch_size=256, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2152068614959717
CPU times: user 8.44 s, sys: 278 ms, total: 8.72 s
Wall time: 17.7 s


In [20]:
dl = DataLoader(ds_ss_path_na_jpg_dataset, batch_size=256, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.264390230178833
CPU times: user 8.49 s, sys: 326 ms, total: 8.82 s
Wall time: 15 s


In [21]:
dl = DataLoader(ds_ss_path_na_raw_dataset, batch_size=256, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.348086357116699
CPU times: user 8.34 s, sys: 410 ms, total: 8.75 s
Wall time: 11 s


tiff is slow for some reason... jpg is slow for obvious reasons. png and raw appear to be about the same; increase number of workers

In [22]:
dl = DataLoader(ds_ss_path_na_png_dataset, batch_size=256, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.324636459350586
CPU times: user 8.13 s, sys: 801 ms, total: 8.93 s
Wall time: 3.29 s


Workers help out a lot; try some data augmentation

In [23]:
dl = DataLoader(ds_ss_path_aug_png_dataset, batch_size=256, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3409769535064697
CPU times: user 8.79 s, sys: 812 ms, total: 9.6 s
Wall time: 4 s


A little slower, now try doing in-memory dataloader

In [24]:
dl = DataLoader(ds_ss_img_na_png_dataset, batch_size=256, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.151681661605835
CPU times: user 8.7 s, sys: 834 ms, total: 9.53 s
Wall time: 2.48 s


Faster, but then you have to store everything in memory. Try in memory with data augmentation.

In [25]:
dl = DataLoader(ds_ss_img_aug_png_dataset, batch_size=256, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.399033308029175
CPU times: user 7.83 s, sys: 737 ms, total: 8.56 s
Wall time: 3 s


Delete in memory dataloader to free up some space

In [26]:
del dl, ds_ss_img_na_png_dataset, ds_ss_img_aug_png_dataset

# Test large image dataset

In [13]:
# large size, path, no augmentation, format
path_root = path_data/'large_size'
ds_ls_path_na_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_na, subsamples=2000)
ds_ls_path_na_tif_dataset = PathFolderDataset(path_root/'tif', transforms=tfms_na, subsamples=2000)
ds_ls_path_na_jpg_dataset = PathFolderDataset(path_root/'jpg', transforms=tfms_na, subsamples=2000)
ds_ls_path_na_raw_dataset = PathFolderDataset(path_root/'raw', transforms=tfms_na, subsamples=2000)

In [14]:
# large size, path, augmentation, format
path_root = path_data/'large_size'
ds_ls_path_aug_png_dataset = PathFolderDataset(path_root/'png', transforms=tfms_aug, subsamples=2000)

In [15]:
# large size, image, no augmentation, format
path_root = path_data/'large_size'
ds_ls_img_na_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_na, subsamples=2000)

In [16]:
# large size, image, augmentation, format
path_root = path_data/'large_size'
ds_ls_img_aug_png_dataset = ImageFolderDataset(path_root/'png', transforms=tfms_aug, subsamples=2000)

Test large image with single batch size and single worker

In [17]:
dl = DataLoader(ds_ls_path_na_png_dataset, batch_size=1, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.319605588912964
CPU times: user 2min 27s, sys: 4.31 s, total: 2min 32s
Wall time: 34.5 s


Very slow, lets increase batch size and also test other file formats which should show encoding speed

In [18]:
dl = DataLoader(ds_ls_path_na_png_dataset, batch_size=32, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3001198768615723
CPU times: user 1min 46s, sys: 50.1 s, total: 2min 36s
Wall time: 31.5 s


In [19]:
dl = DataLoader(ds_ls_path_na_tif_dataset, batch_size=32, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.363144874572754
CPU times: user 1min 58s, sys: 52.4 s, total: 2min 50s
Wall time: 34.9 s


In [20]:
dl = DataLoader(ds_ls_path_na_jpg_dataset, batch_size=32, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2818691730499268
CPU times: user 1min 47s, sys: 49.9 s, total: 2min 36s
Wall time: 32.4 s


In [21]:
dl = DataLoader(ds_ls_path_na_raw_dataset, batch_size=32, num_workers=1)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3126261234283447
CPU times: user 1min 48s, sys: 50.9 s, total: 2min 39s
Wall time: 32.4 s


Encoding doesn't seem to have much of an effect; increase number of workers

In [22]:
dl = DataLoader(ds_ls_path_na_png_dataset, batch_size=32, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.312602996826172
CPU times: user 1min 46s, sys: 53.8 s, total: 2min 40s
Wall time: 34.8 s


Workers dont seem to help much; try some data augmentation

In [23]:
dl = DataLoader(ds_ls_path_aug_png_dataset, batch_size=32, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.3226513862609863
CPU times: user 1min 50s, sys: 51.1 s, total: 2min 41s
Wall time: 35.7 s


A smidge slower, now try doing in-memory dataloader

In [24]:
dl = DataLoader(ds_ls_img_na_png_dataset, batch_size=32, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.280503273010254
CPU times: user 1min 43s, sys: 50.6 s, total: 2min 34s
Wall time: 32.8 s


Just a smidge faster, but then you have to store everything in memory. Try in memory with data augmentation.

In [25]:
dl = DataLoader(ds_ls_img_aug_png_dataset, batch_size=32, num_workers=12)
model = FastNet(10)
opt = optim.SGD(model.parameters(), lr=0.001)
%time train(dl, model, loss, opt, 1)

Epoch: 0; Loss: 2.2880043983459473
CPU times: user 1min 48s, sys: 48.6 s, total: 2min 36s
Wall time: 34.1 s


Seems overall for larger images that batchsize/workers/in-memory doesn't have much of an effect. And, doing %prun actually shows most of the time is spent in the conv2d function and computing gradients, so that is the bottleneck vs IO/encoding/augmentation

Delete in memory dataloader to free up some space

In [26]:
del dl, ds_ls_img_na_png_dataset, ds_ls_img_aug_png_dataset