Skip to content
Permalink
Browse files

use non_blocking instead of async

  • Loading branch information...
tanglang96 committed Aug 13, 2019
1 parent b431ba7 commit c4cc84c957478254e2f0463059fd3a4ed2a232ee
Showing with 24 additions and 27 deletions.
  1. +1 −1 README.md
  2. +14 −15 cifar10.py
  3. +9 −11 imagenet.py
@@ -2,7 +2,7 @@

PyTorch DataLoaders implemented with [nvidia-dali](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/index.html), we've implemented **CIFAR-10** and **ImageNet** dataloaders, more dataloaders will be added in the future.

With 2 processors of Intel(R) Xeon(R) Gold 6154 CPU, 1 Tesla V100 GPU and all dataset in memory disk, we can **extremely** **boost image preprocessing** with DALI.
With 2 processors of Intel(R) Xeon(R) Gold 6154 CPU, 1 Tesla V100 GPU and all dataset in memory disk, we can **extremely** **accelerate image preprocessing** with DALI.

| Iter Training Data Cost(bs=256) | CIFAR-10 | ImageNet |
| :-----------------------------: | :------: | :------: |
@@ -1,17 +1,16 @@
from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import numpy as np
from nvidia.dali.plugin.pytorch import DALIClassificationIterator, DALIGenericIterator
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
import torch
import random
import os
import pickle
import sys
from sklearn.utils import shuffle
import time
import torch
import pickle
import numpy as np
import nvidia.dali.ops as ops
import nvidia.dali.types as types
from sklearn.utils import shuffle
from torchvision.datasets import CIFAR10
from nvidia.dali.pipeline import Pipeline
import torchvision.transforms as transforms
from nvidia.dali.plugin.pytorch import DALIClassificationIterator, DALIGenericIterator


class HybridTrainPipe_CIFAR(Pipeline):
@@ -191,8 +190,8 @@ def get_cifar_iter_torch(type, image_dir, batch_size, num_threads, cutout=0):
print('start iterate')
start = time.time()
for i, data in enumerate(train_loader):
images = data[0]["data"].cuda(async=True)
labels = data[0]["label"].squeeze().long().cuda(async=True)
images = data[0]["data"].cuda(non_blocking=True)
labels = data[0]["label"].squeeze().long().cuda(non_blocking=True)
end = time.time()
print('end iterate')
print('dali iterate time: %fs' % (end - start))
@@ -202,8 +201,8 @@ def get_cifar_iter_torch(type, image_dir, batch_size, num_threads, cutout=0):
print('start iterate')
start = time.time()
for i, data in enumerate(train_loader):
images = data[0].cuda(async=True)
labels = data[1].cuda(async=True)
images = data[0].cuda(non_blocking=True)
labels = data[1].cuda(non_blocking=True)
end = time.time()
print('end iterate')
print('dali iterate time: %fs' % (end - start))
@@ -1,13 +1,11 @@
from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops
import nvidia.dali.types as types
from nvidia.dali.plugin.pytorch import DALIClassificationIterator, DALIGenericIterator
import time
import shutil
import os
import torch.utils.data
import torchvision.transforms as transforms
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import torchvision.datasets as datasets
from nvidia.dali.pipeline import Pipeline
import torchvision.transforms as transforms
from nvidia.dali.plugin.pytorch import DALIClassificationIterator, DALIGenericIterator


class HybridTrainPipe(Pipeline):
@@ -108,8 +106,8 @@ def get_imagenet_iter_torch(type, image_dir, batch_size, num_threads, device_id,
print('start iterate')
start = time.time()
for i, data in enumerate(train_loader):
images = data[0]["data"].cuda(async=True)
labels = data[0]["label"].squeeze().long().cuda(async=True)
images = data[0]["data"].cuda(non_blocking=True)
labels = data[0]["label"].squeeze().long().cuda(non_blocking=True)
end = time.time()
print('end iterate')
print('dali iterate time: %fs' % (end - start))
@@ -119,8 +117,8 @@ def get_imagenet_iter_torch(type, image_dir, batch_size, num_threads, device_id,
print('start iterate')
start = time.time()
for i, data in enumerate(train_loader):
images = data[0].cuda(async=True)
labels = data[1].cuda(async=True)
images = data[0].cuda(non_blocking=True)
labels = data[1].cuda(non_blocking=True)
end = time.time()
print('end iterate')
print('torch iterate time: %fs' % (end - start))

0 comments on commit c4cc84c

Please sign in to comment.
You can’t perform that action at this time.