From 077164781a063d2e62fad9083c87f19fa6400b65 Mon Sep 17 00:00:00 2001 From: Andrew Ilyas Date: Wed, 19 Jan 2022 16:53:58 -0500 Subject: [PATCH 01/17] Change JIT assertion to a setter to allow for tensor augmentations before JIT ones --- examples/imagenet-example | 2 +- ffcv/transforms/cutout.py | 4 ++-- ffcv/transforms/flip.py | 6 +++--- ffcv/transforms/mixup.py | 5 +---- ffcv/transforms/poisoning.py | 9 +++------ ffcv/transforms/replace_label.py | 9 ++------- ffcv/transforms/translate.py | 2 +- 7 files changed, 13 insertions(+), 24 deletions(-) diff --git a/examples/imagenet-example b/examples/imagenet-example index d394723e..d35e73a2 160000 --- a/examples/imagenet-example +++ b/examples/imagenet-example @@ -1 +1 @@ -Subproject commit d394723e41e023017562df86a0e95c04be5cd119 +Subproject commit d35e73a25f9dde625f63769d5eb26a0f9da9f490 diff --git a/ffcv/transforms/cutout.py b/ffcv/transforms/cutout.py index a7402bc5..89237e0e 100644 --- a/ffcv/transforms/cutout.py +++ b/ffcv/transforms/cutout.py @@ -3,6 +3,7 @@ """ import numpy as np from typing import Callable, Optional, Tuple +from dataclasses import replace from ffcv.pipeline.compiler import Compiler from ..pipeline.allocation_query import AllocationQuery @@ -48,5 +49,4 @@ def cutout_square(images, *_): return cutout_square def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: - assert previous_state.jit_mode - return previous_state, None + return replace(previous_state, jit_mode=True), None diff --git a/ffcv/transforms/flip.py b/ffcv/transforms/flip.py index b0296f1a..63d4b1f9 100644 --- a/ffcv/transforms/flip.py +++ b/ffcv/transforms/flip.py @@ -1,7 +1,7 @@ """ Random horizontal flip """ -from numpy import dtype +from dataclasses import replace from numpy.random import rand from typing import Callable, Optional, Tuple from ..pipeline.allocation_query import AllocationQuery @@ -42,5 +42,5 @@ def flip(images, dst): return flip def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: - assert previous_state.jit_mode - return (previous_state, AllocationQuery(previous_state.shape, previous_state.dtype)) + return (replace(previous_state, jit_mode=True), + AllocationQuery(previous_state.shape, previous_state.dtype)) diff --git a/ffcv/transforms/mixup.py b/ffcv/transforms/mixup.py index 25741001..53239b6f 100644 --- a/ffcv/transforms/mixup.py +++ b/ffcv/transforms/mixup.py @@ -53,8 +53,6 @@ def mixer(images, dst, indices): return mixer def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: - # assert previous_state.jit_mode - # We do everything in place return (previous_state, AllocationQuery(shape=previous_state.shape, dtype=previous_state.dtype)) @@ -92,8 +90,6 @@ def mixer(labels, temp_array, indices): return mixer def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: - # assert previous_state.jit_mode - # We do everything in place return (replace(previous_state, shape=(3,), dtype=np.float32), AllocationQuery((3,), dtype=np.float32)) @@ -115,6 +111,7 @@ def one_hotter(mixedup_labels, dst): return one_hotter def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: + # Should already be converted to tensor assert not previous_state.jit_mode return (replace(previous_state, shape=(self.num_classes,)), \ AllocationQuery((self.num_classes,), dtype=previous_state.dtype, device=previous_state.device)) \ No newline at end of file diff --git a/ffcv/transforms/poisoning.py b/ffcv/transforms/poisoning.py index 7df9ca8d..6b897885 100644 --- a/ffcv/transforms/poisoning.py +++ b/ffcv/transforms/poisoning.py @@ -1,13 +1,10 @@ """ Poison images by adding a mask """ -from collections.abc import Sequence from typing import Tuple +from dataclasses import replace import numpy as np -from numpy import dtype -from numpy.core.numeric import indices -from numpy.random import rand from typing import Callable, Optional, Tuple from ..pipeline.allocation_query import AllocationQuery from ..pipeline.operation import Operation @@ -67,6 +64,6 @@ def poison(images, temp_array, indices): return poison def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: - assert previous_state.jit_mode # We do everything in place - return (previous_state, AllocationQuery(shape=previous_state.shape, dtype=np.float32)) + return (replace(previous_state, jit_mode=True), \ + AllocationQuery(shape=previous_state.shape, dtype=np.dtype('float32'))) diff --git a/ffcv/transforms/replace_label.py b/ffcv/transforms/replace_label.py index e69ec9e9..5a95c011 100644 --- a/ffcv/transforms/replace_label.py +++ b/ffcv/transforms/replace_label.py @@ -1,13 +1,10 @@ """ Replace label """ -from collections.abc import Sequence from typing import Tuple import numpy as np -from numpy import dtype -from numpy.core.numeric import indices -from numpy.random import rand +from dataclasses import replace from typing import Callable, Optional, Tuple from ..pipeline.allocation_query import AllocationQuery from ..pipeline.operation import Operation @@ -50,6 +47,4 @@ def replace_label(labels, temp_array, indices): return replace_label def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: - assert previous_state.jit_mode - # We do everything in place - return (previous_state, None) + return (replace(previous_state, jit_mode=True), None) diff --git a/ffcv/transforms/translate.py b/ffcv/transforms/translate.py index bb2ab5d5..c4ae93f8 100644 --- a/ffcv/transforms/translate.py +++ b/ffcv/transforms/translate.py @@ -52,4 +52,4 @@ def translate(images, dst): def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: h, w, c = previous_state.shape assert previous_state.jit_mode - return (previous_state, AllocationQuery((h + 2 * self.padding, w + 2 * self.padding, c), previous_state.dtype)) + return (previous_state, AllocationQuery((h + 2 * self.padding, w + 2 * self.padding, c), previous_state.dtype)) \ No newline at end of file From 939699e73e84ded16121cb4799f727e8e0218024 Mon Sep 17 00:00:00 2001 From: Andrew Ilyas Date: Wed, 19 Jan 2022 17:04:02 -0500 Subject: [PATCH 02/17] translate augmentation --- ffcv/transforms/translate.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ffcv/transforms/translate.py b/ffcv/transforms/translate.py index c4ae93f8..e53e157e 100644 --- a/ffcv/transforms/translate.py +++ b/ffcv/transforms/translate.py @@ -2,9 +2,9 @@ Random translate """ import numpy as np -from numpy import dtype from numpy.random import randint -from typing import Any, Callable, Optional, Tuple, Union +from typing import Callable, Optional, Tuple +from dataclasses import replace from ..pipeline.allocation_query import AllocationQuery from ..pipeline.operation import Operation from ..pipeline.state import State @@ -51,5 +51,6 @@ def translate(images, dst): def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: h, w, c = previous_state.shape - assert previous_state.jit_mode - return (previous_state, AllocationQuery((h + 2 * self.padding, w + 2 * self.padding, c), previous_state.dtype)) \ No newline at end of file + return (replace(previous_state, jit_mode=True), \ + AllocationQuery((h + 2 * self.padding, w + 2 * self.padding, c), previous_state.dtype)) + From 89bdf4c06b640784d9708026cd964a733095a3c3 Mon Sep 17 00:00:00 2001 From: hadisalman Date: Sat, 22 Jan 2022 15:46:41 -0500 Subject: [PATCH 03/17] Add flip and mixup tests --- ffcv/transforms/__init__.py | 2 + tests/test_augmentations.py | 96 +++++++++++++++++++++++++++++-------- 2 files changed, 77 insertions(+), 21 deletions(-) diff --git a/ffcv/transforms/__init__.py b/ffcv/transforms/__init__.py index 0850d146..bc8fa321 100644 --- a/ffcv/transforms/__init__.py +++ b/ffcv/transforms/__init__.py @@ -7,6 +7,8 @@ from .replace_label import ReplaceLabel from .normalize import NormalizeImage from .translate import RandomTranslate +from .mixup import ImageMixup, LabelMixup, MixupToOneHot +from .module import ModuleWrapper __all__ = ['ToTensor', 'ToDevice', 'ToTorchImage', 'NormalizeImage', diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index 0d8776b3..b0123bb2 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -1,19 +1,33 @@ +import os +import uuid import numpy as np import torch as ch from torch.utils.data import Dataset from assertpy import assert_that from tempfile import NamedTemporaryFile from torchvision.datasets import CIFAR10 +from torchvision.utils import save_image, make_grid from torch.utils.data import Subset from ffcv.fields.basics import IntDecoder from ffcv.fields.rgb_image import SimpleRGBImageDecoder -from ffcv.transforms.cutout import Cutout from ffcv.writer import DatasetWriter from ffcv.fields import IntField, RGBImageField from ffcv.loader import Loader from ffcv.pipeline.compiler import Compiler -from ffcv.transforms import Squeeze, Cutout, ToTensor, ToDevice, Poison +from ffcv.transforms import * + + +SAVE_IMAGES = True +IMAGES_TMP_PATH = '/tmp/ffcv_augtest_output' +if SAVE_IMAGES: + os.makedirs(IMAGES_TMP_PATH, exist_ok=True) + +UNAUGMENTED_PIPELINE=[ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage() +] def run_test(length, pipeline, compile): my_dataset = Subset(CIFAR10(root='/tmp', train=True, download=True), range(length)) @@ -28,33 +42,66 @@ def run_test(length, pipeline, compile): writer.from_indexed_dataset(my_dataset, chunksize=10) - Compiler.set_enabled(True) + Compiler.set_enabled(compile) loader = Loader(name, batch_size=7, num_workers=2, pipelines={ 'image': pipeline, 'label': [IntDecoder(), ToTensor(), Squeeze()] }, drop_last=False) + + unaugmented_loader = Loader(name, batch_size=7, num_workers=2, pipelines={ + 'image': UNAUGMENTED_PIPELINE, + 'label': [IntDecoder(), ToTensor(), Squeeze()] + }, + drop_last=False) + tot_indices = 0 tot_images = 0 - for images, label in loader: - tot_indices += label.shape[0] + for (images, labels), (original_images, original_labels) in zip(loader, unaugmented_loader): + tot_indices += labels.shape[0] tot_images += images.shape[0] + + for label, original_label in zip(labels, original_labels): + assert_that(label).is_equal_to(original_label) + + if SAVE_IMAGES: + save_image(make_grid(ch.concat([images, original_images])/255., images.shape[0]), + os.path.join(IMAGES_TMP_PATH, str(uuid.uuid4()) + '.jpeg') + ) + assert_that(tot_indices).is_equal_to(len(my_dataset)) assert_that(tot_images).is_equal_to(len(my_dataset)) + def test_cutout(): - run_test(100, [ - SimpleRGBImageDecoder(), - Cutout(8), - ToTensor() - ], True) + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + Cutout(8), + ToTensor(), + ToTorchImage() + ], comp) + + +def test_flip(): + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + RandomHorizontalFlip(1.0), + ToTensor(), + ToTorchImage() + ], comp) - run_test(100, [ - SimpleRGBImageDecoder(), - Cutout(8), - ToTensor() - ], False) + +def test_mixup(): + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + ImageMixup(1, True), + ToTensor(), + ToTorchImage() + ], comp) def test_poison(): @@ -62,11 +109,18 @@ def test_poison(): # Red sqaure mask[:5, :5, 0] = 1 alpha = np.ones((32, 32)) - run_test(100, [ - SimpleRGBImageDecoder(), - Poison(mask, alpha, [0, 1, 2]), - ToTensor() - ], False) + + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + Poison(mask, alpha, [0, 1, 2]), + ToTensor(), + ToTorchImage() + ], comp) + if __name__ == '__main__': - test_poison() + # test_cutout() + test_flip() + # test_mixup() + # test_poison() From 7cc71a5a94d759d830bf8ea99025f6e8311acc29 Mon Sep 17 00:00:00 2001 From: hadisalman Date: Sat, 22 Jan 2022 19:16:01 -0500 Subject: [PATCH 04/17] Add ffcv aug + torchvision aug tests --- tests/test_augmentations.py | 95 +++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 4 deletions(-) diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index b0123bb2..6ceac6ac 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -3,6 +3,7 @@ import numpy as np import torch as ch from torch.utils.data import Dataset +from torchvision import transforms as tvt from assertpy import assert_that from tempfile import NamedTemporaryFile from torchvision.datasets import CIFAR10 @@ -29,7 +30,7 @@ ToTorchImage() ] -def run_test(length, pipeline, compile): +def run_test(length, pipeline, compile=False): my_dataset = Subset(CIFAR10(root='/tmp', train=True, download=True), range(length)) with NamedTemporaryFile() as handle: @@ -94,11 +95,21 @@ def test_flip(): ], comp) +def test_module_wrapper(): + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage(), + ModuleWrapper(tvt.Grayscale(3)), + ], comp) + + def test_mixup(): for comp in [True, False]: run_test(100, [ SimpleRGBImageDecoder(), - ImageMixup(1, True), + ImageMixup(.5, False), ToTensor(), ToTorchImage() ], comp) @@ -113,14 +124,90 @@ def test_poison(): for comp in [True, False]: run_test(100, [ SimpleRGBImageDecoder(), - Poison(mask, alpha, [0, 1, 2]), + Poison(mask, alpha, list(range(100))), ToTensor(), ToTorchImage() ], comp) +def test_random_resized_crop(): + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + RandomResizedCrop(scale=(0.08, 1.0), + ratio=(0.75, 4/3), + size=32), + ToTensor(), + ToTorchImage() + ], comp) + + +def test_translate(): + for comp in [True, False]: + run_test(100, [ + SimpleRGBImageDecoder(), + RandomTranslate(padding=10), + ToTensor(), + ToTorchImage() + ], comp) + + +## Torchvision Transforms +def test_torchvision_greyscale(): + run_test(100, [ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage(), + tvt.Grayscale(3), + ]) + +def test_torchvision_centercrop_pad(): + run_test(100, [ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage(), + tvt.CenterCrop(10), + tvt.Pad(11) + ]) + +def test_torchvision_random_affine(): + run_test(100, [ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage(), + tvt.RandomAffine(25), + ]) + +def test_torchvision_random_crop(): + run_test(100, [ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage(), + tvt.Pad(10), + tvt.RandomCrop(size=32), + ]) + +def test_torchvision_color_jitter(): + run_test(100, [ + SimpleRGBImageDecoder(), + ToTensor(), + ToTorchImage(), + tvt.ColorJitter(.5, .5, .5, .5), + ]) + + if __name__ == '__main__': # test_cutout() - test_flip() + # test_flip() + # test_module_wrapper() # test_mixup() # test_poison() + # test_random_resized_crop() + # test_translate() + + ## Torchvision Transforms + # test_torchvision_greyscale() + # test_torchvision_centercrop_pad() + # test_torchvision_random_affine() + # test_torchvision_random_crop() + test_torchvision_color_jitter() From 6032f4d9ed10e622e126a5601495e8ada2688dcd Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sat, 22 Jan 2022 21:33:17 -0500 Subject: [PATCH 05/17] Support machines without CUDA --- ffcv/loader/epoch_iterator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ffcv/loader/epoch_iterator.py b/ffcv/loader/epoch_iterator.py index 4bc1b9bd..7511de2e 100644 --- a/ffcv/loader/epoch_iterator.py +++ b/ffcv/loader/epoch_iterator.py @@ -46,9 +46,8 @@ def __init__(self, loader: 'Loader', order: Sequence[int]): self.memory_bank_per_stage = defaultdict(list) - if IS_CUDA: - self.cuda_streams = [ch.cuda.Stream() - for _ in range(self.loader.batches_ahead + 2)] + self.cuda_streams = [(ch.cuda.Stream() if IS_CUDA else None) + for _ in range(self.loader.batches_ahead + 2)] # Allocate all the memory memory_allocations = {} From 1d66ed3d18dad09696857578ad14dab1cb7d7cc6 Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sat, 22 Jan 2022 21:43:07 -0500 Subject: [PATCH 06/17] Fix setup.py - Resolves #88 --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 075765d5..7d94fff7 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,8 @@ def pkgconfig(package, kw): description=' FFCV: Fast Forward Computer Vision ', author='MadryLab', author_email='leclerc@mit.edu', - url='https://github.com/MadryLab/fastercv', + url='https://github.com/libffcv/ffcv', + license_files = ('LICENSE.txt',), packages=find_packages(), long_description=long_description, long_description_content_type='text/markdown', From 6de7c173fe47c71f00ab46c62d7df2167760cac3 Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sat, 22 Jan 2022 23:32:17 -0500 Subject: [PATCH 07/17] Fix Distributed + custom indices - Resolves #90 --- ffcv/traversal_order/random.py | 2 +- ffcv/traversal_order/sequential.py | 2 +- tests/test_traversal_orders.py | 54 ++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/ffcv/traversal_order/random.py b/ffcv/traversal_order/random.py index cef4ac24..25bc1708 100644 --- a/ffcv/traversal_order/random.py +++ b/ffcv/traversal_order/random.py @@ -24,4 +24,4 @@ def sample_order(self, epoch: int) -> Sequence[int]: self.sampler.set_epoch(epoch) - return np.array(list(self.sampler)) + return self.indices[np.array(list(self.sampler))] diff --git a/ffcv/traversal_order/sequential.py b/ffcv/traversal_order/sequential.py index 632a0059..c5bba224 100644 --- a/ffcv/traversal_order/sequential.py +++ b/ffcv/traversal_order/sequential.py @@ -27,4 +27,4 @@ def sample_order(self, epoch: int) -> Sequence[int]: self.sampler.set_epoch(epoch) - return np.array(list(self.sampler)) + return self.indices[np.array(list(self.sampler))] diff --git a/tests/test_traversal_orders.py b/tests/test_traversal_orders.py index 9b771a95..4a632bbd 100644 --- a/tests/test_traversal_orders.py +++ b/tests/test_traversal_orders.py @@ -29,24 +29,29 @@ def __getitem__(self, index): return (index, np.sin(np.array([index])).view(' 1: - init_process_group('nccl', sync_url, rank=rank, world_size=world_size) + init_process_group('gloo', sync_url, rank=rank, world_size=world_size) loader = Loader(fname, 8, num_workers=2, order=order, drop_last=False, - distributed=world_size > 1) + distributed=world_size > 1, indices=indices) result = [] for _ in range(3): content = np.concatenate([x[0].numpy().reshape(-1).copy() for x in loader]) result.append(content) result = np.stack(result) + np.save(path.join(out_folder, f"result-{rank}.npy"), result) -def prep_and_run_test(num_workers, order): +def prep_and_run_test(num_workers, order, with_indices=False): length = 600 + indices = None + if with_indices: + indices = np.random.choice(length, length//2, replace=False) + with TemporaryDirectory() as folder: name = path.join(folder, 'dataset.beton') sync_file = path.join(folder, 'share') @@ -58,7 +63,7 @@ def prep_and_run_test(num_workers, order): writer.from_indexed_dataset(dataset) - args = (num_workers, name, order, sync_file, folder) + args = (num_workers, name, order, sync_file, folder, indices) if num_workers > 1: spawn(process_work, nprocs=num_workers, args=args) else: @@ -71,19 +76,22 @@ def prep_and_run_test(num_workers, order): results = np.concatenate(results, 1) + # For each epoch for i in range(results.shape[0]): - if order == OrderOption.SEQUENTIAL and i < results.shape[0] - 1: - assert_that((results[i] == results[i + 1]).all()).is_true() - if order != OrderOption.SEQUENTIAL and i < results.shape[0] - 1: - assert_that((results[i] == results[i + 1]).all()).is_false() - - epoch_content = Counter(results[i]) - indices_gotten = np.array(sorted(list(epoch_content.keys()))) - assert_that(np.all(np.arange(length) == indices_gotten)).is_true() - assert_that(min(epoch_content.values())).is_equal_to(1) - assert_that(max(epoch_content.values())).is_less_than_or_equal_to(2) - - + if not with_indices: + if order == OrderOption.SEQUENTIAL and i < results.shape[0] - 1: + assert_that((results[i] == results[i + 1]).all()).is_true() + if order != OrderOption.SEQUENTIAL and i < results.shape[0] - 1: + assert_that((results[i] == results[i + 1]).all()).is_false() + + epoch_content = Counter(results[i]) + indices_gotten = np.array(sorted(list(epoch_content.keys()))) + assert_that(np.all(np.arange(length) == indices_gotten)).is_true() + assert_that(min(epoch_content.values())).is_equal_to(1) + assert_that(max(epoch_content.values())).is_less_than_or_equal_to(2) + else: + assert_that(set(results[i])).is_equal_to(set(indices)) + def test_traversal_sequential_1(): prep_and_run_test(1, OrderOption.SEQUENTIAL) @@ -123,3 +131,15 @@ def test_traversal_quasirandom_3(): @pytest.mark.skip() def test_traversal_quasirandom_4(): prep_and_run_test(4, OrderOption.QUASI_RANDOM) + +def test_traversal_sequential_distributed_with_indices(): + prep_and_run_test(2, OrderOption.SEQUENTIAL, True) + +def test_traversal_random_distributed_with_indices(): + prep_and_run_test(2, OrderOption.RANDOM, True) + +@pytest.mark.skip() +def test_traversal_quasi_random_distributed_with_indices(): + prep_and_run_test(2, OrderOption.QUASI_RANDOM, True) + +if __name__ == '__main__': \ No newline at end of file From 0485ee44535d48f89c3a030956b54dcf10936c01 Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sun, 23 Jan 2022 00:08:05 -0500 Subject: [PATCH 08/17] Make default seed random - We think that it makes more sense to have random runs by default --- ffcv/loader/loader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ffcv/loader/loader.py b/ffcv/loader/loader.py index 4fcc8678..b41192e8 100644 --- a/ffcv/loader/loader.py +++ b/ffcv/loader/loader.py @@ -93,7 +93,7 @@ def __init__(self, os_cache: bool = DEFAULT_OS_CACHE, order: ORDER_TYPE = OrderOption.SEQUENTIAL, distributed: bool = False, - seed: int = 0, # For ordering of samples + seed: int = None, # For ordering of samples indices: Sequence[int] = None, # For subset selection pipelines: Mapping[str, Sequence[Union[Operation, ch.nn.Module]]] = {}, @@ -103,6 +103,10 @@ def __init__(self, recompile: bool = False, # Recompile at every epoch ): + if seed is None: + tinfo = np.iinfo(np.int) + seed = np.random.randint(tinfo.min, tinfo.max) + # We store the original user arguments to be able to pass it to the # filtered version of the datasets self._args = { From 94ee0e21e7321931bf4a46db2902e6d8773c6ada Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sun, 23 Jan 2022 00:16:21 -0500 Subject: [PATCH 09/17] Put NCCL back in the traversal order test --- tests/test_traversal_orders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_traversal_orders.py b/tests/test_traversal_orders.py index 4a632bbd..05955478 100644 --- a/tests/test_traversal_orders.py +++ b/tests/test_traversal_orders.py @@ -32,7 +32,7 @@ def __getitem__(self, index): def process_work(rank, world_size, fname, order, sync_fname, out_folder, indices): sync_url = f'file://{sync_fname}' if world_size > 1: - init_process_group('gloo', sync_url, rank=rank, world_size=world_size) + init_process_group('nccl', sync_url, rank=rank, world_size=world_size) loader = Loader(fname, 8, num_workers=2, order=order, drop_last=False, distributed=world_size > 1, indices=indices) From eb06acf382c123b04d365a7df60bf6fdbb60fd21 Mon Sep 17 00:00:00 2001 From: Andrew Ilyas Date: Sun, 23 Jan 2022 00:17:15 -0500 Subject: [PATCH 10/17] aug test --- examples/imagenet-example | 2 +- tests/test_augmentations.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/imagenet-example b/examples/imagenet-example index d35e73a2..f134cbff 160000 --- a/examples/imagenet-example +++ b/examples/imagenet-example @@ -1 +1 @@ -Subproject commit d35e73a25f9dde625f63769d5eb26a0f9da9f490 +Subproject commit f134cbfff7f590954edc5c24275444b7dd2f57f6 diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index 6ceac6ac..9671546d 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -54,12 +54,12 @@ def run_test(length, pipeline, compile=False): unaugmented_loader = Loader(name, batch_size=7, num_workers=2, pipelines={ 'image': UNAUGMENTED_PIPELINE, 'label': [IntDecoder(), ToTensor(), Squeeze()] - }, - drop_last=False) + }, drop_last=False) tot_indices = 0 tot_images = 0 for (images, labels), (original_images, original_labels) in zip(loader, unaugmented_loader): + print(images.shape, original_images.shape) tot_indices += labels.shape[0] tot_images += images.shape[0] @@ -198,7 +198,7 @@ def test_torchvision_color_jitter(): if __name__ == '__main__': # test_cutout() - # test_flip() + test_flip() # test_module_wrapper() # test_mixup() # test_poison() @@ -210,4 +210,4 @@ def test_torchvision_color_jitter(): # test_torchvision_centercrop_pad() # test_torchvision_random_affine() # test_torchvision_random_crop() - test_torchvision_color_jitter() + # test_torchvision_color_jitter() From bcd439c8cd1e99c8dbd2811f0f8aa2b26aa8b657 Mon Sep 17 00:00:00 2001 From: Andrew Ilyas Date: Sun, 23 Jan 2022 00:22:22 -0500 Subject: [PATCH 11/17] failing test --- tests/test_augmentations.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index 0d8776b3..b093f208 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -13,7 +13,7 @@ from ffcv.fields import IntField, RGBImageField from ffcv.loader import Loader from ffcv.pipeline.compiler import Compiler -from ffcv.transforms import Squeeze, Cutout, ToTensor, ToDevice, Poison +from ffcv.transforms import Squeeze, Cutout, ToTensor, Poison, RandomHorizontalFlip def run_test(length, pipeline, compile): my_dataset = Subset(CIFAR10(root='/tmp', train=True, download=True), range(length)) @@ -43,6 +43,13 @@ def run_test(length, pipeline, compile): assert_that(tot_indices).is_equal_to(len(my_dataset)) assert_that(tot_images).is_equal_to(len(my_dataset)) +def test_flip(): + run_test(100, [ + SimpleRGBImageDecoder(), + RandomHorizontalFlip(1.0), + ToTensor() + ], True) + def test_cutout(): run_test(100, [ SimpleRGBImageDecoder(), From a80723ec4aa3a9e5e1b159c1284001eb2191f4c8 Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sun, 23 Jan 2022 00:24:27 -0500 Subject: [PATCH 12/17] Fix Depreciation warning --- ffcv/loader/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffcv/loader/loader.py b/ffcv/loader/loader.py index b41192e8..ea32ba62 100644 --- a/ffcv/loader/loader.py +++ b/ffcv/loader/loader.py @@ -104,7 +104,7 @@ def __init__(self, ): if seed is None: - tinfo = np.iinfo(np.int) + tinfo = np.iinfo(int) seed = np.random.randint(tinfo.min, tinfo.max) # We store the original user arguments to be able to pass it to the From b23a8431222c4fecd0ed86e029ea74590e725169 Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sun, 23 Jan 2022 00:24:51 -0500 Subject: [PATCH 13/17] Fix syntax error in test_traversal_orders --- tests/test_traversal_orders.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_traversal_orders.py b/tests/test_traversal_orders.py index 05955478..21c00b6e 100644 --- a/tests/test_traversal_orders.py +++ b/tests/test_traversal_orders.py @@ -140,6 +140,4 @@ def test_traversal_random_distributed_with_indices(): @pytest.mark.skip() def test_traversal_quasi_random_distributed_with_indices(): - prep_and_run_test(2, OrderOption.QUASI_RANDOM, True) - -if __name__ == '__main__': \ No newline at end of file + prep_and_run_test(2, OrderOption.QUASI_RANDOM, True) \ No newline at end of file From cf3431be5d009ecb53038c4736ef3c3d61e95c3a Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sun, 23 Jan 2022 00:40:33 -0500 Subject: [PATCH 14/17] Fix last batch when drop_last = False --- ffcv/loader/epoch_iterator.py | 2 +- tests/test_augmentations.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ffcv/loader/epoch_iterator.py b/ffcv/loader/epoch_iterator.py index 7511de2e..b54fa96b 100644 --- a/ffcv/loader/epoch_iterator.py +++ b/ffcv/loader/epoch_iterator.py @@ -135,7 +135,7 @@ def run_pipeline(self, b_ix, batch_indices, batch_slot, cuda_event): if first_stage: first_stage = False self.memory_context.end_batch(b_ix) - return tuple(args) + return tuple(x[:len(batch_indices)] for x in args) def __next__(self): result = self.output_queue.get() diff --git a/tests/test_augmentations.py b/tests/test_augmentations.py index b093f208..d0cd5140 100644 --- a/tests/test_augmentations.py +++ b/tests/test_augmentations.py @@ -76,4 +76,4 @@ def test_poison(): ], False) if __name__ == '__main__': - test_poison() + test_flip() From 4fb7525c536c5a7cb30ff3f19ae2c13970c9cab5 Mon Sep 17 00:00:00 2001 From: Guillaume Leclerc Date: Sun, 23 Jan 2022 00:47:04 -0500 Subject: [PATCH 15/17] Bump version --- ffcv/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ffcv/__init__.py b/ffcv/__init__.py index cbbcbd22..7fdfb1d6 100644 --- a/ffcv/__init__.py +++ b/ffcv/__init__.py @@ -1,5 +1,5 @@ from .loader import Loader from .writer import DatasetWriter -__version__ = '0.0.2' +__version__ = '0.0.3rc1' __all__ = ['Loader'] diff --git a/setup.py b/setup.py index 7d94fff7..978a6df9 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ def pkgconfig(package, kw): **extension_kwargs) setup(name='ffcv', - version='0.0.2', + version='0.0.3rc1', description=' FFCV: Fast Forward Computer Vision ', author='MadryLab', author_email='leclerc@mit.edu', From f7e2e472a19dbf703c19dfc1d27d1763b53d4b97 Mon Sep 17 00:00:00 2001 From: Andrew Ilyas Date: Sun, 23 Jan 2022 11:43:27 -0500 Subject: [PATCH 16/17] fixing tests --- examples/imagenet-example | 2 +- ffcv/loader/loader.py | 10 +++++++--- tests/test_image_pipeline.py | 15 +++++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/examples/imagenet-example b/examples/imagenet-example index d394723e..f134cbff 160000 --- a/examples/imagenet-example +++ b/examples/imagenet-example @@ -1 +1 @@ -Subproject commit d394723e41e023017562df86a0e95c04be5cd119 +Subproject commit f134cbfff7f590954edc5c24275444b7dd2f57f6 diff --git a/ffcv/loader/loader.py b/ffcv/loader/loader.py index ea32ba62..21cdd104 100644 --- a/ffcv/loader/loader.py +++ b/ffcv/loader/loader.py @@ -103,9 +103,13 @@ def __init__(self, recompile: bool = False, # Recompile at every epoch ): - if seed is None: - tinfo = np.iinfo(int) - seed = np.random.randint(tinfo.min, tinfo.max) + if distributed and order == OrderOption.RANDOM and (seed is None): + print('Warning: no ordering seed was specified with distributed=True. ' + 'Setting seed to 0 to match PyTorch distributed sampler.') + seed = 0 + elif seed is None: + tinfo = np.iinfo('int32') + seed = np.random.randint(0, tinfo.max) # We store the original user arguments to be able to pass it to the # filtered version of the datasets diff --git a/tests/test_image_pipeline.py b/tests/test_image_pipeline.py index 289ba638..7338b772 100644 --- a/tests/test_image_pipeline.py +++ b/tests/test_image_pipeline.py @@ -38,6 +38,7 @@ def create_and_validate(length, mode='raw', reversed=False): with NamedTemporaryFile() as handle: name = handle.name + print(name) fields = { 'index': IntField(), @@ -68,7 +69,9 @@ def create_and_validate(length, mode='raw', reversed=False): if mode == 'raw': assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() else: + print('Here') assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() + print('Here 2', ch.all((image == (i % 255)).reshape(-1))) def make_and_read_cifar_subset(length): my_dataset = Subset(CIFAR10(root='/tmp', train=True, download=True), range(length)) @@ -90,14 +93,14 @@ def make_and_read_cifar_subset(length): for index, images in loader: pass -def test_cifar_subset(): - make_and_read_cifar_subset(200) +# def test_cifar_subset(): + # make_and_read_cifar_subset(200) -def test_simple_raw_image_pipeline(): - create_and_validate(500, 'raw', False) +# def test_simple_raw_image_pipeline(): +# create_and_validate(500, 'raw', False) -def test_simple_raw_image_pipeline_rev(): - create_and_validate(500, 'raw', True) +# def test_simple_raw_image_pipeline_rev(): + # create_and_validate(500, 'raw', True) def test_simple_jpg_image_pipeline(): create_and_validate(500, 'jpg', False) From 405c096002527039146ed34da0675874ede9b2c1 Mon Sep 17 00:00:00 2001 From: Andrew Ilyas Date: Sun, 23 Jan 2022 17:23:03 -0500 Subject: [PATCH 17/17] bring back tests --- tests/test_image_pipeline.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/test_image_pipeline.py b/tests/test_image_pipeline.py index 7338b772..289ba638 100644 --- a/tests/test_image_pipeline.py +++ b/tests/test_image_pipeline.py @@ -38,7 +38,6 @@ def create_and_validate(length, mode='raw', reversed=False): with NamedTemporaryFile() as handle: name = handle.name - print(name) fields = { 'index': IntField(), @@ -69,9 +68,7 @@ def create_and_validate(length, mode='raw', reversed=False): if mode == 'raw': assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() else: - print('Here') assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() - print('Here 2', ch.all((image == (i % 255)).reshape(-1))) def make_and_read_cifar_subset(length): my_dataset = Subset(CIFAR10(root='/tmp', train=True, download=True), range(length)) @@ -93,14 +90,14 @@ def make_and_read_cifar_subset(length): for index, images in loader: pass -# def test_cifar_subset(): - # make_and_read_cifar_subset(200) +def test_cifar_subset(): + make_and_read_cifar_subset(200) -# def test_simple_raw_image_pipeline(): -# create_and_validate(500, 'raw', False) +def test_simple_raw_image_pipeline(): + create_and_validate(500, 'raw', False) -# def test_simple_raw_image_pipeline_rev(): - # create_and_validate(500, 'raw', True) +def test_simple_raw_image_pipeline_rev(): + create_and_validate(500, 'raw', True) def test_simple_jpg_image_pipeline(): create_and_validate(500, 'jpg', False)