From 9e5556ccf7d672f682d2bfccef3194612b234fba Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 11 Jan 2022 11:19:54 +0100 Subject: [PATCH 1/3] use enums in prototype datasets for demux --- torchvision/prototype/datasets/_builtin/clevr.py | 10 ++++++++-- torchvision/prototype/datasets/_builtin/coco.py | 10 ++++++++-- torchvision/prototype/datasets/_builtin/dtd.py | 13 ++++++++++--- torchvision/prototype/datasets/_builtin/mnist.py | 11 +++++++++-- .../prototype/datasets/_builtin/oxford_iiit_pet.py | 10 ++++++++-- torchvision/prototype/datasets/_builtin/sbd.py | 13 ++++++++++--- torchvision/prototype/datasets/_builtin/voc.py | 14 +++++++++++--- 7 files changed, 64 insertions(+), 17 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/clevr.py b/torchvision/prototype/datasets/_builtin/clevr.py index 447c1b5190d..facf27f38d5 100644 --- a/torchvision/prototype/datasets/_builtin/clevr.py +++ b/torchvision/prototype/datasets/_builtin/clevr.py @@ -1,3 +1,4 @@ +import enum import functools import io import pathlib @@ -24,6 +25,11 @@ from torchvision.prototype.features import Label +class CLEVRDemux(enum.IntEnum): + IMAGES = 0 + SCENES = 1 + + class CLEVR(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -43,9 +49,9 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]: def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: path = pathlib.Path(data[0]) if path.parents[1].name == "images": - return 0 + return CLEVRDemux.IMAGES elif path.parent.name == "scenes": - return 1 + return CLEVRDemux.SCENES else: return None diff --git a/torchvision/prototype/datasets/_builtin/coco.py b/torchvision/prototype/datasets/_builtin/coco.py index 6fde966402c..28177a5f417 100644 --- a/torchvision/prototype/datasets/_builtin/coco.py +++ b/torchvision/prototype/datasets/_builtin/coco.py @@ -1,3 +1,4 @@ +import enum import functools import io import pathlib @@ -37,6 +38,11 @@ from torchvision.prototype.utils._internal import FrozenMapping +class CocoDemux(enum.IntEnum): + IMAGES_META = 0 + ANNS_META = 1 + + class Coco(Dataset): def _make_info(self) -> DatasetInfo: name = "coco" @@ -144,9 +150,9 @@ def _filter_meta_files(self, data: Tuple[str, Any], *, split: str, year: str, an def _classify_meta(self, data: Tuple[str, Any]) -> Optional[int]: key, _ = data if key == "images": - return 0 + return CocoDemux.IMAGES_META elif key == "annotations": - return 1 + return CocoDemux.ANNS_META else: return None diff --git a/torchvision/prototype/datasets/_builtin/dtd.py b/torchvision/prototype/datasets/_builtin/dtd.py index e78ab88da27..01df8dd38a4 100644 --- a/torchvision/prototype/datasets/_builtin/dtd.py +++ b/torchvision/prototype/datasets/_builtin/dtd.py @@ -1,3 +1,4 @@ +import enum import io import pathlib from typing import Any, Callable, Dict, List, Optional, Tuple @@ -30,6 +31,12 @@ from torchvision.prototype.features import Label +class DTDDemux(enum.IntEnum): + SPLIT = 0 + JOINT_CATEGORIES = 1 + IMAGES = 2 + + class DTD(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -54,11 +61,11 @@ def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: path = pathlib.Path(data[0]) if path.parent.name == "labels": if path.name == "labels_joint_anno.txt": - return 1 + return DTDDemux.JOINT_CATEGORIES - return 0 + return DTDDemux.SPLIT elif path.parents[1].name == "images": - return 2 + return DTDDemux.IMAGES else: return None diff --git a/torchvision/prototype/datasets/_builtin/mnist.py b/torchvision/prototype/datasets/_builtin/mnist.py index 0d7fe36a3fd..883fa93ee83 100644 --- a/torchvision/prototype/datasets/_builtin/mnist.py +++ b/torchvision/prototype/datasets/_builtin/mnist.py @@ -1,4 +1,5 @@ import abc +import enum import functools import io import operator @@ -232,6 +233,11 @@ def _make_info(self) -> DatasetInfo: } +class EMNISTDemux(enum.IntEnum): + IMAGES = 0 + LABELS = 1 + + class EMNIST(_MNISTBase): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -273,9 +279,9 @@ def _classify_archive(self, data: Tuple[str, Any], *, config: DatasetConfig) -> path = pathlib.Path(data[0]) (images_file, _), (labels_file, _) = self._files_and_checksums(config) if path.name == images_file: - return 0 + return EMNISTDemux.IMAGES elif path.name == labels_file: - return 1 + return EMNISTDemux.LABELS else: return None @@ -320,6 +326,7 @@ def _make_datapipe( decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] + images_dp, labels_dp = Demultiplexer( archive_dp, 2, diff --git a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py index 4e43613715e..06c12e882f4 100644 --- a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py +++ b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py @@ -1,3 +1,4 @@ +import enum import functools import io import pathlib @@ -24,6 +25,11 @@ from torchvision.prototype.features import Label +class OxfordIITPetDemux(enum.IntEnum): + SPLIT_AND_CLASSIFICATION = 0 + SEGMENTATIONS = 1 + + class OxfordIITPet(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -51,8 +57,8 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]: def _classify_anns(self, data: Tuple[str, Any]) -> Optional[int]: return { - "annotations": 0, - "trimaps": 1, + "annotations": OxfordIITPetDemux.SPLIT_AND_CLASSIFICATION, + "trimaps": OxfordIITPetDemux.SEGMENTATIONS, }.get(pathlib.Path(data[0]).parent.name) def _filter_images(self, data: Tuple[str, Any]) -> bool: diff --git a/torchvision/prototype/datasets/_builtin/sbd.py b/torchvision/prototype/datasets/_builtin/sbd.py index f605d7d72f1..b65b9d9fca5 100644 --- a/torchvision/prototype/datasets/_builtin/sbd.py +++ b/torchvision/prototype/datasets/_builtin/sbd.py @@ -1,3 +1,4 @@ +import enum import functools import io import pathlib @@ -33,6 +34,12 @@ ) +class SBDDemux(enum.IntEnum): + SPLIT = 0 + IMAGES = 1 + ANNS = 2 + + class SBD(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -63,12 +70,12 @@ def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: parent, grandparent, *_ = path.parents if parent.name == "dataset": - return 0 + return SBDDemux.SPLIT elif grandparent.name == "dataset": if parent.name == "img": - return 1 + return SBDDemux.IMAGES elif parent.name == "cls": - return 2 + return SBDDemux.ANNS else: return None else: diff --git a/torchvision/prototype/datasets/_builtin/voc.py b/torchvision/prototype/datasets/_builtin/voc.py index 66905fac3bd..7942aaeb417 100644 --- a/torchvision/prototype/datasets/_builtin/voc.py +++ b/torchvision/prototype/datasets/_builtin/voc.py @@ -1,3 +1,4 @@ +import enum import functools import io import pathlib @@ -31,6 +32,13 @@ hint_shuffling, ) + +class VOCDemux(enum.IntEnum): + SPLIT = 0 + IMAGES = 1 + ANNS = 2 + + HERE = pathlib.Path(__file__).parent @@ -75,11 +83,11 @@ def _is_in_folder(self, data: Tuple[str, Any], *, name: str, depth: int = 1) -> def _classify_archive(self, data: Tuple[str, Any], *, config: DatasetConfig) -> Optional[int]: if self._is_in_folder(data, name="ImageSets", depth=2): - return 0 + return VOCDemux.SPLIT elif self._is_in_folder(data, name="JPEGImages"): - return 1 + return VOCDemux.IMAGES elif self._is_in_folder(data, name=self._ANNS_FOLDER[config.task]): - return 2 + return VOCDemux.ANNS else: return None From 0488e71d77d036048a67d3dfc05d655d9891ac69 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 11 Jan 2022 15:39:34 +0100 Subject: [PATCH 2/3] use enum for category generation --- torchvision/prototype/datasets/_builtin/dtd.py | 2 +- torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/dtd.py b/torchvision/prototype/datasets/_builtin/dtd.py index 01df8dd38a4..88f3277de58 100644 --- a/torchvision/prototype/datasets/_builtin/dtd.py +++ b/torchvision/prototype/datasets/_builtin/dtd.py @@ -129,7 +129,7 @@ def _make_datapipe( return Mapper(dp, self._collate_and_decode_sample, fn_kwargs=dict(decoder=decoder)) def _filter_images(self, data: Tuple[str, Any]) -> bool: - return self._classify_archive(data) == 2 + return self._classify_archive(data) == DTDDemux.IMAGES def _generate_categories(self, root: pathlib.Path) -> List[str]: dp = self.resources(self.default_config)[0].load(pathlib.Path(root) / self.name) diff --git a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py index 06c12e882f4..99b1f643b61 100644 --- a/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py +++ b/torchvision/prototype/datasets/_builtin/oxford_iiit_pet.py @@ -141,7 +141,7 @@ def _make_datapipe( return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder)) def _filter_split_and_classification_anns(self, data: Tuple[str, Any]) -> bool: - return self._classify_anns(data) == 0 + return self._classify_anns(data) == OxfordIITPetDemux.SPLIT_AND_CLASSIFICATION def _generate_categories(self, root: pathlib.Path) -> List[str]: config = self.default_config From b0be39d1491bbd901ec997b76f07464ff44ef611 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 11 Jan 2022 15:41:10 +0100 Subject: [PATCH 3/3] revert enum usage for single use constants --- torchvision/prototype/datasets/_builtin/clevr.py | 10 ++-------- torchvision/prototype/datasets/_builtin/coco.py | 10 ++-------- torchvision/prototype/datasets/_builtin/mnist.py | 11 ++--------- torchvision/prototype/datasets/_builtin/sbd.py | 13 +++---------- torchvision/prototype/datasets/_builtin/voc.py | 14 +++----------- 5 files changed, 12 insertions(+), 46 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/clevr.py b/torchvision/prototype/datasets/_builtin/clevr.py index facf27f38d5..447c1b5190d 100644 --- a/torchvision/prototype/datasets/_builtin/clevr.py +++ b/torchvision/prototype/datasets/_builtin/clevr.py @@ -1,4 +1,3 @@ -import enum import functools import io import pathlib @@ -25,11 +24,6 @@ from torchvision.prototype.features import Label -class CLEVRDemux(enum.IntEnum): - IMAGES = 0 - SCENES = 1 - - class CLEVR(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -49,9 +43,9 @@ def resources(self, config: DatasetConfig) -> List[OnlineResource]: def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: path = pathlib.Path(data[0]) if path.parents[1].name == "images": - return CLEVRDemux.IMAGES + return 0 elif path.parent.name == "scenes": - return CLEVRDemux.SCENES + return 1 else: return None diff --git a/torchvision/prototype/datasets/_builtin/coco.py b/torchvision/prototype/datasets/_builtin/coco.py index 28177a5f417..6fde966402c 100644 --- a/torchvision/prototype/datasets/_builtin/coco.py +++ b/torchvision/prototype/datasets/_builtin/coco.py @@ -1,4 +1,3 @@ -import enum import functools import io import pathlib @@ -38,11 +37,6 @@ from torchvision.prototype.utils._internal import FrozenMapping -class CocoDemux(enum.IntEnum): - IMAGES_META = 0 - ANNS_META = 1 - - class Coco(Dataset): def _make_info(self) -> DatasetInfo: name = "coco" @@ -150,9 +144,9 @@ def _filter_meta_files(self, data: Tuple[str, Any], *, split: str, year: str, an def _classify_meta(self, data: Tuple[str, Any]) -> Optional[int]: key, _ = data if key == "images": - return CocoDemux.IMAGES_META + return 0 elif key == "annotations": - return CocoDemux.ANNS_META + return 1 else: return None diff --git a/torchvision/prototype/datasets/_builtin/mnist.py b/torchvision/prototype/datasets/_builtin/mnist.py index 883fa93ee83..0d7fe36a3fd 100644 --- a/torchvision/prototype/datasets/_builtin/mnist.py +++ b/torchvision/prototype/datasets/_builtin/mnist.py @@ -1,5 +1,4 @@ import abc -import enum import functools import io import operator @@ -233,11 +232,6 @@ def _make_info(self) -> DatasetInfo: } -class EMNISTDemux(enum.IntEnum): - IMAGES = 0 - LABELS = 1 - - class EMNIST(_MNISTBase): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -279,9 +273,9 @@ def _classify_archive(self, data: Tuple[str, Any], *, config: DatasetConfig) -> path = pathlib.Path(data[0]) (images_file, _), (labels_file, _) = self._files_and_checksums(config) if path.name == images_file: - return EMNISTDemux.IMAGES + return 0 elif path.name == labels_file: - return EMNISTDemux.LABELS + return 1 else: return None @@ -326,7 +320,6 @@ def _make_datapipe( decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] - images_dp, labels_dp = Demultiplexer( archive_dp, 2, diff --git a/torchvision/prototype/datasets/_builtin/sbd.py b/torchvision/prototype/datasets/_builtin/sbd.py index b65b9d9fca5..f605d7d72f1 100644 --- a/torchvision/prototype/datasets/_builtin/sbd.py +++ b/torchvision/prototype/datasets/_builtin/sbd.py @@ -1,4 +1,3 @@ -import enum import functools import io import pathlib @@ -34,12 +33,6 @@ ) -class SBDDemux(enum.IntEnum): - SPLIT = 0 - IMAGES = 1 - ANNS = 2 - - class SBD(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( @@ -70,12 +63,12 @@ def _classify_archive(self, data: Tuple[str, Any]) -> Optional[int]: parent, grandparent, *_ = path.parents if parent.name == "dataset": - return SBDDemux.SPLIT + return 0 elif grandparent.name == "dataset": if parent.name == "img": - return SBDDemux.IMAGES + return 1 elif parent.name == "cls": - return SBDDemux.ANNS + return 2 else: return None else: diff --git a/torchvision/prototype/datasets/_builtin/voc.py b/torchvision/prototype/datasets/_builtin/voc.py index 7942aaeb417..66905fac3bd 100644 --- a/torchvision/prototype/datasets/_builtin/voc.py +++ b/torchvision/prototype/datasets/_builtin/voc.py @@ -1,4 +1,3 @@ -import enum import functools import io import pathlib @@ -32,13 +31,6 @@ hint_shuffling, ) - -class VOCDemux(enum.IntEnum): - SPLIT = 0 - IMAGES = 1 - ANNS = 2 - - HERE = pathlib.Path(__file__).parent @@ -83,11 +75,11 @@ def _is_in_folder(self, data: Tuple[str, Any], *, name: str, depth: int = 1) -> def _classify_archive(self, data: Tuple[str, Any], *, config: DatasetConfig) -> Optional[int]: if self._is_in_folder(data, name="ImageSets", depth=2): - return VOCDemux.SPLIT + return 0 elif self._is_in_folder(data, name="JPEGImages"): - return VOCDemux.IMAGES + return 1 elif self._is_in_folder(data, name=self._ANNS_FOLDER[config.task]): - return VOCDemux.ANNS + return 2 else: return None