## 1. Libraries

In [1]:
import cProfile
import time

import numpy as np
import torch
import torch.utils.data as torchdata

from multiprocessing import cpu_count


torch.__version__

'1.4.0'

## 2. DataLoaderでnum_workers > 0を設定して早くなるのはどんな時か?

### Dataset定義

オプションをつけることでわざと遅くする機能を持たせます。

In [2]:
class DatasetWrapper(torchdata.Dataset):
    def __init__(self, slow=False, wait=0.01):
        self.slow = slow
        self.wait = wait
        
    def __len__(self):
        return 128
    
    def __getitem__(self, idx):
        if self.slow:
            time.sleep(self.wait)
        return np.random.random(10)
    
    
def create_loader(slow=False, wait=0.01):
    # parameters
    batch_size = 32
    shuffle = False
    pin_memory = False

### num_workes > 0に指定して効果検証

In [4]:
print("cpuの数:", cpu_count())
fastloader = torchdata.DataLoader(
    DatasetWrapper(), 
    num_workers=0, 
    batch_size=batch_size,
    shuffle=shuffle,
    pin_memory=pin_memory)

fastloader_multi = torchdata.DataLoader(
    DatasetWrapper(), 
    num_workers=cpu_count() // 2, 
    batch_size=batch_size,
    shuffle=shuffle,
    pin_memory=pin_memory)

slowloader = torchdata.DataLoader(
    DatasetWrapper(slow=True, wait=0.01), 
    num_workers=0, 
    batch_size=batch_size,
    shuffle=shuffle,
    pin_memory=pin_memory)

slowloader_multi = torchdata.DataLoader(
    DatasetWrapper(slow=True, wait=0.01), 
    num_workers=cpu_count() // 2, 
    batch_size=batch_size,
    shuffle=shuffle,
    pin_memory=pin_memory)

cpuの数: 8


#### `DataLoader`のロード時間 < `for`文内の場合

In [5]:
%%time
for batch in fastloader:
    time.sleep(1.0)

CPU times: user 4.31 ms, sys: 1.77 ms, total: 6.09 ms
Wall time: 4.01 s


In [6]:
%%time
for batch in fastloader_multi:
    time.sleep(1.0)

CPU times: user 14.3 ms, sys: 18.9 ms, total: 33.1 ms
Wall time: 4.16 s


#### `DataLoader`のロード時間 > `for`文内の場合

In [7]:
%%time
for batch in slowloader:
    time.sleep(1.0)

CPU times: user 13.3 ms, sys: 4.77 ms, total: 18.1 ms
Wall time: 5.52 s


In [8]:
%%time
for batch in slowloader_multi:
    time.sleep(1.0)

CPU times: user 13.2 ms, sys: 20.3 ms, total: 33.5 ms
Wall time: 4.4 s
