modified imagenet dali augmentation to be the same with pytorch

tanglang96 · Aug 13, 2019 · 6bfdfc7 · 6bfdfc7
1 parent 387cbe2
commit 6bfdfc7
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -37,3 +37,10 @@ for i, data in enumerate(train_loader):
     labels = data[0]["label"].squeeze().long().cuda(async=True)
 ```
 
+If you have large enough memory for storing dataset, we strongly recommend you to mount a memory disk and put the whole dataset in it to accelerate I/O, like this
+
+```bash
+mount  -t tmpfs -o size=20g  tmpfs /userhome/memoty_data
+```
+
+It's noteworthy that `20g` above is a ceiling but **not** occupying `20g` memory at the moment you mount the tmpfs, memories are occupied as you putting dataset in it. Compressed files should **not** be extracted before you've copied them into memory, otherwise it could be much slower.
diff --git a/imagenet.py b/imagenet.py
@@ -13,28 +13,13 @@
 class HybridTrainPipe(Pipeline):
     def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False, local_rank=0, world_size=1):
         super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
+        dali_device = "gpu"
         self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size, random_shuffle=True)
-        # let user decide which pipeline works him bets for RN version he runs
-        if dali_cpu:
-            dali_device = "cpu"
-            self.decode = ops.HostDecoderRandomCrop(device=dali_device, output_type=types.RGB,
-                                                    random_aspect_ratio=[0.8, 1.25],
-                                                    random_area=[0.1, 1.0],
-                                                    num_attempts=100)
-        else:
-            dali_device = "gpu"
-            # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
-            # without additional reallocations
-            self.decode = ops.nvJPEGDecoderRandomCrop(device="mixed", output_type=types.RGB,
-                                                      device_memory_padding=211025920, host_memory_padding=140544512,
-                                                      random_aspect_ratio=[0.8, 1.25],
-                                                      random_area=[0.1, 1.0],
-                                                      num_attempts=100)
-        self.res = ops.Resize(device=dali_device, resize_x=crop, resize_y=crop, interp_type=types.INTERP_TRIANGULAR)
+        self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
+        self.res = ops.RandomResizedCrop(device="gpu", size=crop, random_area=[0.08, 1.25])
         self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                             output_dtype=types.FLOAT,
                                             output_layout=types.NCHW,
-                                            crop=(crop, crop),
                                             image_type=types.RGB,
                                             mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
                                             std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
@@ -46,7 +31,7 @@ def define_graph(self):
         self.jpegs, self.labels = self.input(name="Reader")
         images = self.decode(self.jpegs)
         images = self.res(images)
-        output = self.cmnp(images.gpu(), mirror=rng)
+        output = self.cmnp(images, mirror=rng)
         return [output, self.labels]
 
 
@@ -55,7 +40,7 @@ def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size, loc
         super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
         self.input = ops.FileReader(file_root=data_dir, shard_id=local_rank, num_shards=world_size,
                                     random_shuffle=False)
-        self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)
+        self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB)
         self.res = ops.Resize(device="gpu", resize_shorter=size, interp_type=types.INTERP_TRIANGULAR)
         self.cmnp = ops.CropMirrorNormalize(device="gpu",
                                             output_dtype=types.FLOAT,
@@ -78,14 +63,14 @@ def get_imagenet_iter_dali(type, image_dir, batch_size, num_threads, device_id,
                            local_rank=0):
     if type == 'train':
         pip_train = HybridTrainPipe(batch_size=batch_size, num_threads=num_threads, device_id=local_rank,
-                                    data_dir=image_dir,
+                                    data_dir=image_dir + '/train',
                                     crop=crop, world_size=world_size, local_rank=local_rank)
         pip_train.build()
         dali_iter_train = DALIClassificationIterator(pip_train, size=pip_train.epoch_size("Reader") // world_size)
         return dali_iter_train
     elif type == 'val':
         pip_val = HybridValPipe(batch_size=batch_size, num_threads=num_threads, device_id=local_rank,
-                                data_dir=image_dir,
+                                data_dir=image_dir + '/val',
                                 crop=crop, size=val_size, world_size=world_size, local_rank=local_rank)
         pip_val.build()
         dali_iter_val = DALIClassificationIterator(pip_val, size=pip_val.epoch_size("Reader") // world_size)
@@ -98,7 +83,6 @@ def get_imagenet_iter_torch(type, image_dir, batch_size, num_threads, device_id,
         transform = transforms.Compose([
             transforms.RandomResizedCrop(crop, scale=(0.08, 1.25)),
             transforms.RandomHorizontalFlip(),
-            transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         ])
@@ -130,7 +114,7 @@ def get_imagenet_iter_torch(type, image_dir, batch_size, num_threads, device_id,
     print('end iterate')
     print('dali iterate time: %fs' % (end - start))
 
-    train_loader = get_imagenet_iter_torch(type='train', image_dir='/userhome/memory_data/imagenet', batch_size=256,
+    train_loader = get_imagenet_iter_torch(type='train', image_dir='/userhome/data/imagenet', batch_size=256,
                                            num_threads=4, crop=224, device_id=0, num_gpus=1)
     print('start iterate')
     start = time.time()