Skip to content

Commit

Permalink
modified imagenet dali augmentation to be the same with pytorch
Browse files Browse the repository at this point in the history
  • Loading branch information
tanglang96 committed Aug 13, 2019
1 parent 387cbe2 commit 6bfdfc7
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 24 deletions.
7 changes: 7 additions & 0 deletions README.md
Expand Up @@ -37,3 +37,10 @@ for i, data in enumerate(train_loader):
labels = data[0]["label"].squeeze().long().cuda(async=True)
```

If you have large enough memory for storing dataset, we strongly recommend you to mount a memory disk and put the whole dataset in it to accelerate I/O, like this

```bash
mount -t tmpfs -o size=20g tmpfs /userhome/memoty_data
```

It's noteworthy that `20g` above is a ceiling but **not** occupying `20g` memory at the moment you mount the tmpfs, memories are occupied as you putting dataset in it. Compressed files should **not** be extracted before you've copied them into memory, otherwise it could be much slower.
32 changes: 8 additions & 24 deletions imagenet.py
Expand Up @@ -13,28 +13,13 @@
class HybridTrainPipe(Pipeline):
def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, local_rank=0, world_size=1):
super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
dali_device = "gpu"
self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True)
# let user decide which pipeline works him bets for RN version he runs
if dali_cpu:
dali_device = "cpu"
self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB,
random_aspect_ratio=[0.8, 1.25],
random_area=[0.1, 1.0],
num_attempts=100)
else:
dali_device = "gpu"
# This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
# without additional reallocations
self.decode = ops.nvJPEGDecoderRandomCrop(device="mixed", output_type=types.RGB,
device_memory_padding=211025920, host_memory_padding=140544512,
random_aspect_ratio=[0.8, 1.25],
random_area=[0.1, 1.0],
num_attempts=100)
self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)
self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
self.res = ops.RandomResizedCrop(device="gpu", size=crop, random_area=[0.08, 1.25])
self.cmnp = ops.CropMirrorNormalize(device="gpu",
output_dtype=types.FLOAT,
output_layout=types.NCHW,
crop=(crop, crop),
image_type=types.RGB,
mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
Expand All @@ -46,7 +31,7 @@ def define_graph(self):
self.jpegs, self.labels = self.input(name="Reader")
images = self.decode(self.jpegs)
images = self.res(images)
output = self.cmnp(images.gpu(), mirror=rng)
output = self.cmnp(images, mirror=rng)
return [output, self.labels]


Expand All @@ -55,7 +40,7 @@ def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size, loc
super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size,
random_shuffle=False)
self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)
self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
self.res = ops.Resize(device="gpu", resize_shorter=size, interp_type=types.INTERP_TRIANGULAR)
self.cmnp = ops.CropMirrorNormalize(device="gpu",
output_dtype=types.FLOAT,
Expand All @@ -78,14 +63,14 @@ def get_imagenet_iter_dali(type, image_dir, batch_size, num_threads, device_id,
local_rank=0):
if type == 'train':
pip_train = HybridTrainPipe(batch_size=batch_size, num_threads=num_threads, device_id=local_rank,
data_dir=image_dir,
data_dir=image_dir + '/train',
crop=crop, world_size=world_size, local_rank=local_rank)
pip_train.build()
dali_iter_train = DALIClassificationIterator(pip_train, size=pip_train.epoch_size("Reader") // world_size)
return dali_iter_train
elif type == 'val':
pip_val = HybridValPipe(batch_size=batch_size, num_threads=num_threads, device_id=local_rank,
data_dir=image_dir,
data_dir=image_dir + '/val',
crop=crop, size=val_size, world_size=world_size, local_rank=local_rank)
pip_val.build()
dali_iter_val = DALIClassificationIterator(pip_val, size=pip_val.epoch_size("Reader") // world_size)
Expand All @@ -98,7 +83,6 @@ def get_imagenet_iter_torch(type, image_dir, batch_size, num_threads, device_id,
transform = transforms.Compose([
transforms.RandomResizedCrop(crop, scale=(0.08, 1.25)),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
Expand Down Expand Up @@ -130,7 +114,7 @@ def get_imagenet_iter_torch(type, image_dir, batch_size, num_threads, device_id,
print('end iterate')
print('dali iterate time: %fs' % (end - start))

train_loader = get_imagenet_iter_torch(type='train', image_dir='/userhome/memory_data/imagenet', batch_size=256,
train_loader = get_imagenet_iter_torch(type='train', image_dir='/userhome/data/imagenet', batch_size=256,
num_threads=4, crop=224, device_id=0, num_gpus=1)
print('start iterate')
start = time.time()
Expand Down

0 comments on commit 6bfdfc7

Please sign in to comment.