<a href="https://colab.research.google.com/github/KeisukeShimokawa/papers-challenge/blob/master/tips/torch/DataLoader_num_workers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!grep physical.id /proc/cpuinfo | sort -u | wc -l
!grep cpu.cores /proc/cpuinfo | sort -u
!grep processor /proc/cpuinfo | wc -l

1
cpu cores	: 2
4


In [2]:
!nvidia-smi | head -n 10

Sun Jun 21 00:58:10 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |


In [3]:
import torch
from torch.utils.data import TensorDataset, DataLoader

In [4]:
inputs = torch.randn(1000000, 10)
labels = torch.randn(1000000, 10)
batch_size = 10000

print(inputs.shape, labels.shape)

torch.Size([1000000, 10]) torch.Size([1000000, 10])


In [5]:
def run_loader(loader):
    for label,target in loader:
        pass

    print(label.shape)
    print(target.shape)

In [10]:
dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True)

%timeit -n1 -r1 run_loader(loader2)

torch.Size([10000, 10])
torch.Size([10000, 10])
1 loop, best of 1: 6.71 s per loop


In [9]:
num_workers = 2

dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers,
                                      pin_memory=True)

%timeit -n1 -r1 run_loader(loader2)

torch.Size([10000, 10])
torch.Size([10000, 10])
1 loop, best of 1: 3.87 s per loop


In [8]:
num_workers = 4

dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers,
                                      pin_memory=True)

%timeit -n1 -r1 run_loader(loader2)

torch.Size([10000, 10])
torch.Size([10000, 10])
1 loop, best of 1: 3.44 s per loop


In [11]:
def collate_fn(batch):

    inputs, labels = list(zip(*batch))
    inputs = torch.stack(inputs)
    labels = torch.stack(labels)
    
    return inputs, labels

In [12]:
dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn,
                                      drop_last=True)

%timeit -n1 -r1 run_loader(loader2)

torch.Size([10000, 10])
torch.Size([10000, 10])
1 loop, best of 1: 6.53 s per loop


In [13]:
num_workers = 2

dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn,
                                      drop_last=True,
                                      num_workers=num_workers,
                                      pin_memory=True)

%timeit -n1 -r1 run_loader(loader2)

torch.Size([10000, 10])
torch.Size([10000, 10])
1 loop, best of 1: 3.66 s per loop


In [14]:
num_workers = 4

dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn,
                                      drop_last=True,
                                      num_workers=num_workers,
                                      pin_memory=True)

%timeit -n1 -r1 run_loader(loader2)

torch.Size([10000, 10])
torch.Size([10000, 10])
1 loop, best of 1: 3.36 s per loop


In [15]:
def run_loader_cuda(loader):
    for label,target in loader:
        label = label.to('cuda:0')
        target = target.to('cuda:0')
        pass

In [16]:
num_workers = 4

dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn,
                                      drop_last=True,
                                      num_workers=num_workers,
                                      pin_memory=True)

%timeit -n1 -r1 run_loader_cuda(loader2)

1 loop, best of 1: 3.36 s per loop


In [17]:
def run_loader_cuda_non_blocking(loader):
    for label,target in loader:
        label = label.to('cuda:0', non_blocking=True)
        target = target.to('cuda:0', non_blocking=True)
        pass

In [18]:
num_workers = 4

dataset = torch.utils.data.TensorDataset(inputs, labels)
loader2 = torch.utils.data.DataLoader(dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn,
                                      drop_last=True,
                                      num_workers=num_workers,
                                      pin_memory=True)

%timeit -n1 -r1 run_loader_cuda_non_blocking(loader2)

1 loop, best of 1: 3.41 s per loop
