From cad18a49660495bd7f21d05ed7d1d34ef0a9cb02 Mon Sep 17 00:00:00 2001 From: Ponku Date: Sun, 10 Jul 2022 17:05:50 +0100 Subject: [PATCH 01/35] Added Stereo Matching dataset interface and several classic datasets. --- torchvision/datasets/_stereo_matching.py | 479 +++++++++++++++++++++++ 1 file changed, 479 insertions(+) create mode 100644 torchvision/datasets/_stereo_matching.py diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py new file mode 100644 index 00000000000..42535c1623b --- /dev/null +++ b/torchvision/datasets/_stereo_matching.py @@ -0,0 +1,479 @@ +from abc import ABC, abstractmethod +from functools import reduce +from glob import glob +from pathlib import Path +from random import random +import re +import shutil +from typing import Callable, List, Optional, Tuple, Any +import lzma +from torch import Tensor +from .vision import VisionDataset +from .utils import download_and_extract_archive, download_url, verify_str_arg +import os +from torch.utils.model_zoo import tqdm +import numpy as np +from PIL import Image + +__all__ = ( + "CSEStereo" + "Middlebury2014" + "ETH3D" + "Kitti2012" + "Kitti2015" +) + + +def read_pfm_file(file_path: str) -> np.array: + # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py + with open(file_path, "rb") as file: + header = file.readline().rstrip() + assert header in ["PF", "Pf"], f"{file_path} is not a valid .pfm file" + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) + assert dim_match, f"{file_path} has a Malformed PFM header" + + width, height = map(int, dim_match.groups()) + channels = 3 if header == "PF" else 1 + scale = float(file.readline().rstrip()) + # check for endian type + if scale < 0: + scale = -scale + endian = '<' + else: + endian = '>' + + data = np.fromfile(file, endian + 'f') + data = np.reshape(data, (height, width, channels)) + data = np.flipud(data) + + return data + + +class StereoMatchingDataset(ABC, VisionDataset): + """Base interface for Stereo matching datasets""" + + def __init__(self, root: str, transforms: Optional[Callable] = None): + super().__init__(root=root) + self.transforms = transforms + + self._images: List[Tuple] = [] + self._disparities: List[Tuple] = [] + + def _read_img(self, file_path: str) -> Image.Image: + img = Image.open(file_path) + if img.mode != "RGB": + img = img.convert("RGB") + return img + + @abstractmethod + def _read_disparity(self, file_path: str) -> Tuple: + # function that returns a disparity map and an occlusion map + pass + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + img_left = self._read_img(self._images[index][0]) + img_right = self._read_img(self._images[index][1]) + + dsp_map_left, occ_mask_left = self._read_disparity(self._disparities[index][0]) + dsp_map_right, occ_mask_right = self._read_disparity(self._disparities[index][1]) + + imgs = (img_left, img_right) + dsp_maps = (dsp_map_left, dsp_map_right) + occ_masks = (occ_mask_left, occ_mask_right) + + if self.transforms is not None: + imgs, dsp_maps, occ_masks, = self.transforms(imgs, dsp_maps, occ_masks) + + return imgs, dsp_maps, occ_masks + + def __len__(self) -> int: + return len(self._images) + + +class CRESSyntethicStereo(StereoMatchingDataset): + """Synthetic dataset used in training the `CREStereo `_ architecture. + + Ported from the download script in the paper github `repo `_. + """ + DOWNLOAD_SPACE = 4 * 1024 * 1024 * 1024 # dataset requires download requires about 400 GB of free space + + EXPERIMENTAL_RANGE = 1 # TODO: remove after validating dataset structure / flow + + def __init__(self, root: str, transforms: Optional[Callable] = None, download: bool = True): + super().__init__(root, transforms) + # if the API user requests a dataset download check that the user can download it + if download: + statvfs = os.statvfs(root) + # measured in bytes + available_space = statvfs.f_frsize * statvfs.f_bavail + if available_space - self.DOWNLOAD_SPACE < 0: + raise ValueError( + f"The storage device for {root} is too small to download the dataset), " + f"an additional {self.DOWNLOAD_SPACE - self.available_space:.2f} GB are required." + ) + self._download_dataset(root) + + def _download_dataset(self, root: str) -> None: + # TODO: remove before release, used only for testing purposes + dirs = ["tree", "shapenet", "reflective", "hole"] + # create directory subtree for the download + for d in dirs: + d_path = os.path.join(root, d) + if not os.path.exists(d_path): + os.makedirs(d_path) + + for i in range(self.EXPERIMENTAL_RANGE): + url = f"https://data.megengine.org.cn/research/crestereo/dataset/{d}/{i}.tar" + download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) + + +class Middlebury2014(StereoMatchingDataset): + """Publicly available scenes from the Middlebury dataset `2014 version `. + + The dataset mostly follows the original format, without containing the ambient subdirectories. : :: + + root + Middlebury2014 + train + scene1-{ ,perfect,imperfect} + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm + disp{0,1}y.pfm + scene2-{ ,perfect,imperfect} + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm + disp{0,1}y.pfm + ... + additional + scene1-{ ,perfect,imperfect} + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm + disp{0,1}y.pfm + ... + test + scene1 + calib.txt + im{0,1}.png + scene2 + calib.txt + im{0,1}.png + ... + + + Args: + root (string): Root directory of the Middleburry 2014 Dataset. + split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" + use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. Sampled with equal probability. + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + """ + + splits = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano", "Pipes", "Playroom", "Playtable", "Recycle", "Shelves", "Vintage"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1", "Couch", "Flowers", "Mask", "Shopvac", "Sticks", "Storage", "Sword1", "Sword2", "Umbrella"], + "test": ['Plants', 'Classroom2E', 'Classroom2', 'Australia', 'DjembeL', 'CrusadeP', 'Crusade', 'Hoops', 'Bicycle2', 'Staircase', 'Newkuba', 'AustraliaP', 'Djembe', 'Livingroom', 'Computer'] + } + + def __init__( + self, + *, + root: str, + split: str = "train", + use_ambient_views: bool = False, + transforms: Optional[Callable] = None, + download: bool = False + ): + super().__init__(root, transforms) + verify_str_arg(split, "split", valid_values=("train", "test", "additional")) + + if download: + self._download_dataset(root) + + root = Path(root) / "FlyingChairs" + if not os.path.exists(root / split): + raise FileNotFoundError( + f"The {split} directory was not found in the provided root directory" + ) + + split_scenes = self.splits[split] + # check that the provided root folder contains the scene splits + if not all(s in os.listdir(root / split) for s in split_scenes): + raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") + + imgs_left = sorted(glob(str(root / split / "*" / "im0.png"))) + imgs_right = sorted(glob(str(root / split / "*" / "im1.png"))) + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + + if split == "test": + dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + else: + + dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) + dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) + self._disparities = list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) + + self.use_ambient_views = use_ambient_views + + def __getitem__(self, index: int) -> Tuple: + return super().__getitem__(index) + + def _read_img(self, file_path: str) -> Image.Image: + if os.path.basename(file_path) == "im1.png" and self.use_ambient_views: + # initialize sampleable container + ambient_file_paths = list(file_path.replace("im1.png", view_name) for view_name in ["im1E.png", "im1L.png"]) + # double check that we're not going to try to read from an invalid file path + ambient_file_paths = list(filter(lambda p: os.path.exists(p), ambient_file_paths)) + # keep the original image as an option as well for uniform sampling between base views + ambient_file_paths.append(file_path) + file_path = random.choice(ambient_file_paths) + return super()._read_img(file_path) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): # case when dealing with the test split + return None, None + dsp_mask = read_pfm_file(file_path) + occ_mask = dsp_mask < 1e3 + return dsp_mask, occ_mask + + def _download_dataset(self, root: str): + base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" + # train and additional splits have 2 different calibration settings + root = Path(root) / "Middlebury2014" + for split_name, split_scenes in self.splits.values(): + if split_name == "test": + continue + split_root = root / split_name + for scene in split_scenes: + scene_name = f"{scene}-{calibration}" + for calibration in ["perfect", "imperfect"]: + scene_url = f"{base_url}/{scene_name}.zip" + download_and_extract_archive(url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True) + + if any(s not in os.listdir(root) for s in self.splits["test"]): + # test split is downloaded from a different location + test_set_url = "https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip" + + # the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF + # we want to move the contents from testF into the directory + download_and_extract_archive(url=test_set_url, download_root=root, remove_finished=True) + for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): + for scene in scene_names: + shutil.move(os.path.join(scene_dir, scene), os.path.join(root, scene)) + + # cleanup MiddEval3 directory + shutil.rmtree(os.path.join(root, "MiddEval3")) + + +class ETH3D(StereoMatchingDataset): + """"ETH3D `Low-Res Two-View `_ dataset. + + The dataset is expected to have the following structure: :: + + root + ETH3D + two_view_training + scene1 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + scene2 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + ... + two_view_training_gt + scene1 + disp0GT.pfm + mask0nocc.png + scene2 + disp0GT.pfm + mask0nocc.png + ... + two_view_testing + scene1 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + scene2 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + ... + + Args: + root (string): Root directory of the ETH3D Dataset. + split (string, optional): The dataset split of scenes, either "train" (default) or "test". + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("train", "test")) + + root = Path(root) / "ETH3D" + img_dir = "two_view_training" if split == "train" else "two_view_testing" + anot_dir = "two_view_training_gt" + + imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) + imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) + + if split == "test": + dsp_masks_left, dsp_masks_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + else: + dsp_masks_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) + # no masks for the right view, always using left as reference + dsp_masks_right = list("" for _ in dsp_masks_left) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + dsp_mask = read_pfm_file(file_path) + occ_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) + occ_mask = np.array(occ_mask) + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) + + +class Kitti2012(StereoMatchingDataset): + """"Kitti dataset from the `2012 `_ stereo evaluation benchmark. + Uses the RGB images for consistency with Kitti 2015. + + The dataset is expected to have the following structure: :: + + root + Kitti2012 + testing + colored_0 + colored_1 + training + colored_0 + colored_1 + disp_noc + calib + + Args: + root (string): Root directory where Kitti2012 is located. + split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("train", "test")) + + root = Path(root) / "Kitti2012" / (split + "ing") + imgs_left = sorted(glob(str(root / "colored_0" / "*_10.png"))) + imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) + + if split == "train": + dsp_masks_left = sorted(glob(str(root / "disp_noc" / "*.png"))) + dsp_masks_right = list("" for _ in dsp_masks_left) + else: + dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + dsp_mask = np.array(Image.open(file_path)) / 256.0 + occ_mask = dsp_mask > 0.0 + + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) + + +class Kitti2015(StereoMatchingDataset): + """"Kitti dataset from the `2015 `_ stereo evaluation benchmark. + + The dataset is expected to have the following structure: :: + + root + Kitti2015 + testing + image_2 + image_3 + training + image_2 + image_3 + disp_noc_0 + disp_noc_1 + calib + + Args: + root (string): Root directory where Kitti2015 is located. + split (string, optional): The dataset split of scenes, either "train" (default) or test. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("train", "test")) + + root = Path(root) / "Kitti2015" / (split + "ing") + imgs_left = sorted(glob(str(root / "image_2" / "*_10.png"))) + imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) + + if split == "train": + dsp_masks_left = sorted(glob(str(root / "disp_noc_0" / "*.png"))) + dsp_masks_right = sorted(glob(str(root / "disp_noc_1" / "*.png"))) + else: + dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + dsp_mask = np.array(Image.open(file_path)) / 256.0 + occ_mask = dsp_mask > 0.0 + + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) From df6ec4ba3f1ad48a01748637213122ccbd3b73c3 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 11 Jul 2022 20:19:23 +0100 Subject: [PATCH 02/35] added SceneFlow, FallingThings and CREStereo --- torchvision/datasets/_stereo_matching.py | 47 +++++++++++++++++++++++- vision | 1 + 2 files changed, 47 insertions(+), 1 deletion(-) create mode 160000 vision diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 42535c1623b..960e443bd46 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -28,7 +28,8 @@ def read_pfm_file(file_path: str) -> np.array: # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py with open(file_path, "rb") as file: header = file.readline().rstrip() - assert header in ["PF", "Pf"], f"{file_path} is not a valid .pfm file" + assert header in [b"PF", b"Pf"], f"{file_path} is not a valid .pfm file" + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) assert dim_match, f"{file_path} has a Malformed PFM header" @@ -477,3 +478,47 @@ def _read_disparity(self, file_path: str) -> Tuple: def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) + + +class SintelDataset(StereoMatchingDataset): + """"Sintel `Stereo Dataset `_. + + Args: + root (string): Root directory where Sintel Stereo is located. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + """ + + def __init__(self, root: str, transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + root = Path(root) / "Sintel" + + imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) + imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) + + dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) + dsp_masks_right = list("" for _ in dps_masks_left) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dps_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + # disparity decoding as per Sintel instructions + dsp_mask = np.array(Image.open(file_path), dtype=np.float32) + r, g, b = np.split(dsp_mask, 3, axis=-1) + dsp_mask = r * 4 + g / (2**6) + b / (2**14) + + # occlusion mask + occ_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) > 0 + # out of frame mask + off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) > 0 + # combine the masks together + occ_mask = np.logical_or(off_mask, occ_mask) + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) diff --git a/vision b/vision new file mode 160000 index 00000000000..bd19fb8ea9b --- /dev/null +++ b/vision @@ -0,0 +1 @@ +Subproject commit bd19fb8ea9b1f67df2a2a1ee116874609ad3ee8c From d0c5afbcb37f430626f77d23bf153ec044160c31 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 11 Jul 2022 23:29:04 +0100 Subject: [PATCH 03/35] added SceneFlow, FallingThings and CREStereo --- torchvision/datasets/_stereo_matching.py | 228 ++++++++++++++++++----- 1 file changed, 183 insertions(+), 45 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 960e443bd46..65336503b87 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,26 +1,28 @@ from abc import ABC, abstractmethod -from functools import reduce from glob import glob from pathlib import Path from random import random import re import shutil from typing import Callable, List, Optional, Tuple, Any -import lzma from torch import Tensor from .vision import VisionDataset from .utils import download_and_extract_archive, download_url, verify_str_arg import os -from torch.utils.model_zoo import tqdm import numpy as np from PIL import Image +import json __all__ = ( - "CSEStereo" + "CREStereo" # waiting for download "Middlebury2014" "ETH3D" "Kitti2012" "Kitti2015" + "Sintel" + "SceneFlow" # need to find valid mask procedure + "FallingThings" + "InStereo2k" # waiting for download ) @@ -71,21 +73,21 @@ def _read_disparity(self, file_path: str) -> Tuple: # function that returns a disparity map and an occlusion map pass - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) - dsp_map_left, occ_mask_left = self._read_disparity(self._disparities[index][0]) - dsp_map_right, occ_mask_right = self._read_disparity(self._disparities[index][1]) + dsp_map_left, valid_mask_right = self._read_disparity(self._disparities[index][0]) + dsp_map_right, valid_mask_right = self._read_disparity(self._disparities[index][1]) imgs = (img_left, img_right) dsp_maps = (dsp_map_left, dsp_map_right) - occ_masks = (occ_mask_left, occ_mask_right) + valid_masks = (valid_mask_right, valid_mask_right) if self.transforms is not None: - imgs, dsp_maps, occ_masks, = self.transforms(imgs, dsp_maps, occ_masks) + imgs, dsp_maps, valid_masks, = self.transforms(imgs, dsp_maps, valid_masks) - return imgs, dsp_maps, occ_masks + return imgs[0], imgs[1], dsp_maps[0], valid_masks[0] def __len__(self) -> int: return len(self._images) @@ -100,7 +102,9 @@ class CRESSyntethicStereo(StereoMatchingDataset): EXPERIMENTAL_RANGE = 1 # TODO: remove after validating dataset structure / flow - def __init__(self, root: str, transforms: Optional[Callable] = None, download: bool = True): + MAX_DISP = 256. + + def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = True): super().__init__(root, transforms) # if the API user requests a dataset download check that the user can download it if download: @@ -114,6 +118,32 @@ def __init__(self, root: str, transforms: Optional[Callable] = None, download: b ) self._download_dataset(root) + verify_str_arg(split, "split", valid_values=("tree", "shapenet", "reflective", "hole", "all")) + + splits = { + "tree": ["tree"], + "shapenet": ["shapenet"], + "reflective": ["reflective"], + "hole": ["hole"], + "all": ["hole", "shapenet", "reflective", "hole"], + }[split] + + for s in splits: + imgs_left = sorted(glob(str(root / s / "*_left.jpg"))) + imgs_right = (p.replace("_left", "_right") for p in imgs_left) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += imgs + + disparity_maps_left = (p.replace("_left", "_left.disp") for p in imgs_left) + disparity_maps_right = (p.replace("_right", "_right.disp") for p in imgs_right) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + disparity = np.array(Image.open(file_path), dtype=np.float32) + valid = (disparity < self.MAX_DISP) & (disparity > 0.) + return disparity, valid + def _download_dataset(self, root: str) -> None: # TODO: remove before release, used only for testing purposes dirs = ["tree", "shapenet", "reflective", "hole"] @@ -249,9 +279,9 @@ def _read_img(self, file_path: str) -> Image.Image: def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): # case when dealing with the test split return None, None - dsp_mask = read_pfm_file(file_path) - occ_mask = dsp_mask < 1e3 - return dsp_mask, occ_mask + disparity_map = read_pfm_file(file_path) + valid_mask = disparity_map < 1e3 + return disparity_map, valid_mask def _download_dataset(self, root: str): base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" @@ -347,23 +377,23 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) if split == "test": - dsp_masks_left, dsp_masks_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: - dsp_masks_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) + disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) # no masks for the right view, always using left as reference - dsp_masks_right = list("" for _ in dsp_masks_left) + disparity_maps_right = list("" for _ in disparity_maps_left) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - dsp_mask = read_pfm_file(file_path) - occ_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) - occ_mask = np.array(occ_mask) - return dsp_mask, occ_mask + disparity_map = read_pfm_file(file_path) + valid_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) + valid_mask = np.array(valid_mask) + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) @@ -404,22 +434,22 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) if split == "train": - dsp_masks_left = sorted(glob(str(root / "disp_noc" / "*.png"))) - dsp_masks_right = list("" for _ in dsp_masks_left) + disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) + disparity_maps_right = list("" for _ in disparity_maps_left) else: - dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - dsp_mask = np.array(Image.open(file_path)) / 256.0 - occ_mask = dsp_mask > 0.0 + disparity_map = np.array(Image.open(file_path)) / 256.0 + valid_mask = disparity_map > 0.0 - return dsp_mask, occ_mask + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) @@ -459,22 +489,22 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) if split == "train": - dsp_masks_left = sorted(glob(str(root / "disp_noc_0" / "*.png"))) - dsp_masks_right = sorted(glob(str(root / "disp_noc_1" / "*.png"))) + disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) + disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) else: - dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - dsp_mask = np.array(Image.open(file_path)) / 256.0 - occ_mask = dsp_mask > 0.0 + disparity_map = np.array(Image.open(file_path)) / 256.0 + valid_mask = disparity_map < 0.0 - return dsp_mask, occ_mask + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) @@ -498,27 +528,135 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) - dsp_masks_right = list("" for _ in dps_masks_left) + disparity_maps_right = list("" for _ in dps_masks_left) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dps_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(dps_masks_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None # disparity decoding as per Sintel instructions - dsp_mask = np.array(Image.open(file_path), dtype=np.float32) - r, g, b = np.split(dsp_mask, 3, axis=-1) - dsp_mask = r * 4 + g / (2**6) + b / (2**14) + disparity_map = np.array(Image.open(file_path), dtype=np.float32) + r, g, b = np.split(disparity_map, 3, axis=-1) + disparity_map = r * 4 + g / (2**6) + b / (2**14) # occlusion mask - occ_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) > 0 + valid_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) == 0 # out of frame mask - off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) > 0 + off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) == 0 # combine the masks together - occ_mask = np.logical_or(off_mask, occ_mask) - return dsp_mask, occ_mask + valid_mask = np.logical_or(off_mask, valid_mask) + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) + + +class SceneFlowDataset(StereoMatchingDataset): + """Dataset interface for `Scene Flow `_ datasets.""" + + def __init__(self, root: str, split: str = "train", pass_name: str = "clean", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("FlyingThings3D", "Driving", "Monkaa")) + split = split.upper() + + verify_str_arg(split, "pass_name", valid_values=("clean", "final", "both")) + + passes = { + "clean": ["frames_cleanpass"], + "final": ["frames_finalpass"], + "both": ["frames_cleanpass, frames_finalpass"], + }[pass_name] + + root = Path(root) / split + + for p in passes: + imgs_left = sorted(glob(str(root / p / "left" / "*" / "*.png"))) + imgs_right = sorted(glob(str(root / p / "right" / "*" / "*.png"))) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += imgs + + disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] + disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + disparity = read_pfm_file(file_path) + valid = np.ones_like(disparity) + return disparity, valid + + +class FallingThingsDataset(StereoMatchingDataset): + """FallingThings ``_ dataset + + The dataset is expected to have the following structre: :: + + root + FallingThings + single + scene1 + _object_settings.json + _camera_settings.json + image1.left.depth.png + image1.right.depth.png + image1.left.jpg + image1.right.jpg + image2.left.depth.png + image2.right.depth.png + image2.left.jpg + image2.right + ... + scene2 + ... + mixed + scene1 + _object_settings.json + _camera_settings.json + image1.left.depth.png + image1.right.depth.png + image1.left.jpg + image1.right.jpg + image2.left.depth.png + image2.right.depth.png + image2.left.jpg + image2.right + ... + scene2 + ... + """ + + def __init__(self, root: str, split: str = "single", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("single", "mixed", "both")) + split = split.upper() + + splits = { + "single": ["single"], + "mixed": ["mixed"], + "both": ["single", "mixed"], + }[split] + + for s in splits: + imgs_left = sorted(glob(str(root / s / "*.left.jpg"))) + imgs_right = sorted(glob(str(root / s / "*.right.jpg"))) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += imgs + + disparity_maps_left = sorted(glob(str(root / s / "*.left.depth.png"))) + disparity_maps_right = sorted(glob(str(root / s / "*.right.depth.png"))) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + depth = Image.Open(file_path) + with open(os.path.split(file_path)[0] + '_camera_settings.json', 'r') as f: + intrinsics = json.load(f) + fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] + disparity = (fx * 6.0 * 100) / depth.astype(np.float32) + valid = disparity > 0 + return disparity, valid From a5664754ee313dcfc269a3be8645ae15a0db11ba Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 11 Jul 2022 23:34:27 +0100 Subject: [PATCH 04/35] "removed duplicate folder" --- vision | 1 - 1 file changed, 1 deletion(-) delete mode 160000 vision diff --git a/vision b/vision deleted file mode 160000 index bd19fb8ea9b..00000000000 --- a/vision +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bd19fb8ea9b1f67df2a2a1ee116874609ad3ee8c From 8ea74f202735832dd8fd2b3122da195ab5bf1f69 Mon Sep 17 00:00:00 2001 From: Ponku Date: Wed, 13 Jul 2022 11:22:29 +0100 Subject: [PATCH 05/35] Added InStereo2k. Started working on dataset tests --- test/datasets_utils.py | 14 +- test/test_datasets.py | 552 ++++++++++++++++++++++- torchvision/datasets/__init__.py | 1 + torchvision/datasets/_stereo_matching.py | 191 ++++++-- 4 files changed, 686 insertions(+), 72 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 88eb4e17823..f051e325968 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -561,9 +561,9 @@ def test_feature_types(self, config): @test_all_configs def test_num_examples(self, config): with self.create_dataset(config) as (dataset, info): - assert len(dataset) == info["num_examples"] + assert len(dataset) == info["num_examples"], f"The number of examples {len(dataset)} does not match the expected {info['num_examples']}" - @test_all_configs + @ test_all_configs def test_transforms(self, config): mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args) for kwarg in self._TRANSFORM_KWARGS: @@ -587,7 +587,7 @@ class ImageDatasetTestCase(DatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, int) - @contextlib.contextmanager + @ contextlib.contextmanager def create_dataset( self, config: Optional[Dict[str, Any]] = None, @@ -610,7 +610,7 @@ def create_dataset( with self._force_load_images(): yield dataset, info - @contextlib.contextmanager + @ contextlib.contextmanager def _force_load_images(self): open = PIL.Image.open @@ -649,7 +649,7 @@ def _set_default_frames_per_clip(self, inject_fake_data): args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" - @functools.wraps(inject_fake_data) + @ functools.wraps(inject_fake_data) def wrapper(tmpdir, config): args = inject_fake_data(tmpdir, config) if frames_per_clip_last and len(args) == len(args_without_default) - 1: @@ -748,7 +748,7 @@ def size(idx: int) -> Tuple[int, int, int]: ] -@requires_lazy_imports("av") +@ requires_lazy_imports("av") def create_video_file( root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], @@ -790,7 +790,7 @@ def create_video_file( return file -@requires_lazy_imports("av") +@ requires_lazy_imports("av") def create_video_folder( root: Union[str, pathlib.Path], name: Union[str, pathlib.Path], diff --git a/test/test_datasets.py b/test/test_datasets.py index a108479aee3..d390c30cee9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1,3 +1,4 @@ +from abc import abstractmethod import bz2 import contextlib import csv @@ -10,6 +11,7 @@ import random import shutil import string +from typing import List, Callable, Tuple import unittest import xml.etree.ElementTree as ET import zipfile @@ -23,30 +25,540 @@ from torchvision import datasets +class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoETH3D + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + # create the scene folder + image_paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with left right images + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100))) + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100))) + return image_paths + + @staticmethod + def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + # create scene directories + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with a random png file for occlusion mask, and a pfm file for disparity + paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) + pfm_path = os.path.join(scene_dir, "disp0GT.pfm") + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) + paths.append(pfm_path) + return paths + + def inject_fake_data(self, tmpdir, config): + eth3d_dir = os.path.join(tmpdir, "ETH3D") + + num_examples = 2 if config["split"] == "train" else 3 + + split_name = "two_view_training" if config["split"] == "train" else "two_view_test" + split_dir = os.path.join(eth3d_dir, split_name) + self._create_scene_folder(num_examples, split_dir) + + if config["split"] == "train": + annot_dir = os.path.join(eth3d_dir, "two_view_training_gt") + self._create_annotation_folder(num_examples, annot_dir) + + return num_examples + + def test_training_test_splits(self): + with self.create_dataset(split="train") as (dataset, _): + assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" + for _, _, disparity, valid_mask in dataset: + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + dh, dw, _ = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + with self.create_dataset(split="test") as (dataset, _): + assert all(d == ("", "") for d in dataset._disparities) + for _, _, disparity, valid_mask in dataset: + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class CREStereoSynthethicTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CREStereoSynthetic + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" + os.makedirs(crestereo_dir, exist_ok=True) + + split_dir = crestereo_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + num_examples = 4 + + for idx in range(num_examples): + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) + # these are going to end up being gray scale images + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + + return num_examples + + def test_splits(self): + for split in ("tree", "shapenet", "reflective", "hole"): + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 2 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoMiddlebury2014 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "additional"), use_ambient_views=(True, False)) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: + calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] + scene_dirs = [] + for c in calibrations: + scene_dir = os.path.join(root_dir, f"{scene_name}{c}") + os.makedirs(scene_dir, exist_ok=True) + # make normal images first + datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) + # these are going to end up being gray scale images + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) + scene_dirs.append(scene_dir) + return scene_dirs + + def inject_fake_data(self, tmpdir, config): + split_scene_map = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] + } + + middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") + os.makedirs(middlebury_dir, exist_ok=True) + + split_dir = middlebury_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + for idx in range(num_examples): + # special case for test_bad_input + if config["split"] not in split_scene_map: + return 0 + + scene_name = split_scene_map[config["split"]][idx] + self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + + # account for perfect / imperfect calibrations + if config["split"] != "test": + num_examples *= 2 + + return num_examples + + def test_train_splits(self): + for split in ["train", "additional"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 3 + assert disparity.shape == (h, w, 3) + # check that valid mask is the same size as the disparity + dh, dw, c = disparity.shape + print(valid_mask.shape) + mh, mw, _ = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_augmented_view_usage(self): + with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): + for left, right, _, _ in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2012 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + + datasets_utils.create_image_folder( + root=split_dir, + name="colored_0", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="colored_1", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_noc", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2012 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 2 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2015 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + + datasets_utils.create_image_folder( + root=split_dir, + name="image_2", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="image_3", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_0", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) + + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 2 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoSceneFlow + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("FlyingThings3D", "Driving", "Monkaa"), + pass_name=("clean", "final") + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]): + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + for i in range(num_examples): + datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + + def inject_fake_data(self, tmpdir, config): + scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" + os.makedirs(scene_flow_dir, exist_ok=True) + + split_dir = scene_flow_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + pass_dir_map = { + "clean": "frames_cleanpass", + "final": "frames_finalpass", + } + + num_examples = 4 + pass_dir_name = pass_dir_map[config["pass_name"]] + # create pass directories + pass_dir = split_dir / pass_dir_name + disp_dir = split_dir / "disp" + os.makedirs(pass_dir, exist_ok=True) + os.makedirs(disp_dir, exist_ok=True) + + # root / pass / direction / scene / .imgs + # root / disparity / direction / scene / .imgs + for direction in ["left", "right"]: + for scene_idx in range(num_examples): + # scene_dir = pass_dir / direction / f"scene_{scene_idx:06d}" + os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + datasets_utils.create_image_folder( + root=pass_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=3, + size=(3, 100, 200), + ) + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) + self._create_pfm_folder( + root=disp_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.pfm", + num_examples=3, + size=(100, 200), + ) + + return num_examples * 3 + + def test_train_splits(self): + for split, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(split=split, pass_name=pass_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w, 3) + # check that valid mask is the same size as the disparity + dh, dw, _ = disparity.shape + mh, mw, _ = valid_mask.shape + assert dh == mh + assert dw == mw + + +class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoFallingThings + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root: str, scene_name: str, num_examples: int, size: Tuple[int, int]): + root = pathlib.Path(root) / scene_name + os.makedirs(root, exist_ok=True) + + datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[0], size[1])) + # single channel depth maps + datasets_utils.create_image_file(root, "image1.left.depth.jpg", size=(1, size[0], size[1])) + datasets_utils.create_image_file(root, "image1.right.depth.jpg", size=(1, size[0], size[1])) + + def inject_fake_data(self, tmpdir, config): + fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" + + split_dir = pathlib.Path(fallingthings_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + + for i in range(num_examples): + self._make_scene_folder( + root=split_dir, + scene_name=f"scene_{i:06d}", + num_examples=num_examples, + size=(100, 200), + ) + + return num_examples + + class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) - @staticmethod + @ staticmethod def _make_binary_file(num_elements, root, name): file_name = os.path.join(root, name) np.zeros(num_elements, dtype=np.uint8).tofile(file_name) - @staticmethod + @ staticmethod def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) - @staticmethod + @ staticmethod def _make_label_file(num_images, root, name): STL10TestCase._make_binary_file(num_images, root, name) - @staticmethod + @ staticmethod def _make_class_names_file(root, name="class_names.txt"): with open(os.path.join(root, name), "w") as fh: for cname in ("airplane", "bird"): fh.write(f"{cname}\n") - @staticmethod + @ staticmethod def _make_fold_indices_file(root): num_folds = 10 offset = 0 @@ -58,7 +570,7 @@ def _make_fold_indices_file(root): return tuple(range(1, num_folds + 1)) - @staticmethod + @ staticmethod def _make_train_files(root, num_unlabeled_images=1): num_images_in_fold = STL10TestCase._make_fold_indices_file(root) num_train_images = sum(num_images_in_fold) @@ -69,7 +581,7 @@ def _make_train_files(root, num_unlabeled_images=1): return dict(train=num_train_images, unlabeled=num_unlabeled_images) - @staticmethod + @ staticmethod def _make_test_files(root, num_images=2): STL10TestCase._make_image_file(num_images, root, "test_X.bin") STL10TestCase._make_label_file(num_images, root, "test_y.bin") @@ -887,7 +1399,7 @@ def inject_fake_data(self, tmpdir, config): return num_images - @contextlib.contextmanager + @ contextlib.contextmanager def create_dataset(self, *args, **kwargs): with super().create_dataset(*args, **kwargs) as output: yield output @@ -1293,7 +1805,7 @@ def _create_archive(self, root, name, *files): return archive - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_feature_types(self, config): feature_types = self.FEATURE_TYPES self.FEATURE_TYPES = self._TRAIN_FEATURE_TYPES if config["train"] else self._TEST_FEATURE_TYPES @@ -1571,7 +2083,7 @@ def _file_name_fn(self, cls, ext, idx): def _is_valid_file_to_extensions(self, is_valid_file): return {ext for ext in self._EXTENSIONS if is_valid_file(f"foo.{ext}")} - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_is_valid_file(self, config): extensions = config.pop("extensions") # We need to explicitly pass extensions=None here or otherwise it would be filled by the value from the @@ -1581,7 +2093,7 @@ def test_is_valid_file(self, config): ) as (dataset, info): assert len(dataset) == info["num_examples"] - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1602,7 +2114,7 @@ def inject_fake_data(self, tmpdir, config): return dict(num_examples=num_examples_total, classes=classes) - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1701,32 +2213,32 @@ class Places365TestCase(datasets_utils.ImageDatasetTestCase): *((f"{category}/Places365_train_00000001.png", idx) for category, idx in _CATEGORIES_CONTENT), ) - @staticmethod + @ staticmethod def _make_txt(root, name, seq): file = os.path.join(root, name) with open(file, "w") as fh: for text, idx in seq: fh.write(f"{text} {idx}\n") - @staticmethod + @ staticmethod def _make_categories_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._CATEGORIES_CONTENT) - @staticmethod + @ staticmethod def _make_file_list_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._FILE_LIST_CONTENT) - @staticmethod + @ staticmethod def _make_image(file_name, size): os.makedirs(os.path.dirname(file_name), exist_ok=True) PIL.Image.fromarray(np.zeros((*size, 3), dtype=np.uint8)).save(file_name) - @staticmethod + @ staticmethod def _make_devkit_archive(root, split): Places365TestCase._make_categories_txt(root, Places365TestCase._CATEGORIES) Places365TestCase._make_file_list_txt(root, Places365TestCase._FILE_LISTS[split]) - @staticmethod + @ staticmethod def _make_images_archive(root, split, small): folder_name = Places365TestCase._IMAGES[(split, small)] image_size = (256, 256) if small else (512, random.randint(512, 1024)) @@ -2041,7 +2553,7 @@ def inject_fake_data(self, tmpdir, config): return num_examples[config["split"]] - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_flow(self, config): # Make sure flow always exists, and make sure there are as many flow values as (pairs of) images # Also make sure the flow is properly decoded @@ -2100,7 +2612,7 @@ def inject_fake_data(self, tmpdir, config): ) return num_examples - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_flow(self, config): h, w = self.FLOW_H, self.FLOW_W expected_flow = np.arange(3 * h * w).reshape(h, w, 3).transpose(2, 0, 1) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 295fe922478..a7dd8397bab 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,4 +1,5 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K +from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereoSynthetic from .caltech import Caltech101, Caltech256 from .celeba import CelebA from .cifar import CIFAR10, CIFAR100 diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 65336503b87..bcca2b12efb 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,10 +1,12 @@ from abc import ABC, abstractmethod from glob import glob from pathlib import Path -from random import random +import pathlib +import random import re import shutil from typing import Callable, List, Optional, Tuple, Any +from jsonschema import ValidationError from torch import Tensor from .vision import VisionDataset from .utils import download_and_extract_archive, download_url, verify_str_arg @@ -14,15 +16,15 @@ import json __all__ = ( - "CREStereo" # waiting for download - "Middlebury2014" - "ETH3D" - "Kitti2012" - "Kitti2015" - "Sintel" - "SceneFlow" # need to find valid mask procedure - "FallingThings" - "InStereo2k" # waiting for download + "CREStereo" # waiting for download / need to find valid mask procedure + "StereoMiddlebury2014" + "StereoETH3D" + "StereoKitti2012" + "StereoKitti2015" + "StereoSintel" + "StereoSceneFlow" # need to find valid mask procedure + "StereoFallingThings" + "InStereo2k" # need to find valid mask procedure ) @@ -30,13 +32,15 @@ def read_pfm_file(file_path: str) -> np.array: # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py with open(file_path, "rb") as file: header = file.readline().rstrip() - assert header in [b"PF", b"Pf"], f"{file_path} is not a valid .pfm file" + if not header in [b"PF", b"Pf"]: + raise ValidationError(f"Not a valid PFM file: {file_path}") - dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) - assert dim_match, f"{file_path} has a Malformed PFM header" + dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline()) + if not dim_match: + raise ValidationError(f"Malformed PFM header: {file_path}") width, height = map(int, dim_match.groups()) - channels = 3 if header == "PF" else 1 + channels = 3 if header == b"PF" else 1 scale = float(file.readline().rstrip()) # check for endian type if scale < 0: @@ -77,12 +81,12 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) - dsp_map_left, valid_mask_right = self._read_disparity(self._disparities[index][0]) + dsp_map_left, valid_mask_left = self._read_disparity(self._disparities[index][0]) dsp_map_right, valid_mask_right = self._read_disparity(self._disparities[index][1]) imgs = (img_left, img_right) dsp_maps = (dsp_map_left, dsp_map_right) - valid_masks = (valid_mask_right, valid_mask_right) + valid_masks = (valid_mask_left, valid_mask_right) if self.transforms is not None: imgs, dsp_maps, valid_masks, = self.transforms(imgs, dsp_maps, valid_masks) @@ -93,7 +97,7 @@ def __len__(self) -> int: return len(self._images) -class CRESSyntethicStereo(StereoMatchingDataset): +class CREStereoSynthetic(StereoMatchingDataset): """Synthetic dataset used in training the `CREStereo `_ architecture. Ported from the download script in the paper github `repo `_. @@ -104,8 +108,11 @@ class CRESSyntethicStereo(StereoMatchingDataset): MAX_DISP = 256. - def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = True): + def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False): super().__init__(root, transforms) + + root = Path(root) / "CREStereo" + # if the API user requests a dataset download check that the user can download it if download: statvfs = os.statvfs(root) @@ -130,12 +137,17 @@ def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable for s in splits: imgs_left = sorted(glob(str(root / s / "*_left.jpg"))) - imgs_right = (p.replace("_left", "_right") for p in imgs_left) + imgs_right = list(p.replace("_left", "_right") for p in imgs_left) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs - disparity_maps_left = (p.replace("_left", "_left.disp") for p in imgs_left) - disparity_maps_right = (p.replace("_right", "_right.disp") for p in imgs_right) + disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) + disparity_maps_right = list(p.replace("_right", "_right.disp") for p in imgs_right) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps @@ -158,7 +170,7 @@ def _download_dataset(self, root: str) -> None: download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) -class Middlebury2014(StereoMatchingDataset): +class StereoMiddlebury2014(StereoMatchingDataset): """Publicly available scenes from the Middlebury dataset `2014 version `. The dataset mostly follows the original format, without containing the ambient subdirectories. : :: @@ -219,12 +231,11 @@ class Middlebury2014(StereoMatchingDataset): splits = { "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano", "Pipes", "Playroom", "Playtable", "Recycle", "Shelves", "Vintage"], "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1", "Couch", "Flowers", "Mask", "Shopvac", "Sticks", "Storage", "Sword1", "Sword2", "Umbrella"], - "test": ['Plants', 'Classroom2E', 'Classroom2', 'Australia', 'DjembeL', 'CrusadeP', 'Crusade', 'Hoops', 'Bicycle2', 'Staircase', 'Newkuba', 'AustraliaP', 'Djembe', 'Livingroom', 'Computer'] + "test": ["Plants", "Classroom2E", "Classroom2", "Australia", "DjembeL", "CrusadeP", "Crusade", "Hoops", "Bicycle2", "Staircase", "Newkuba", "AustraliaP", "Djembe", "Livingroom", "Computer"] } def __init__( self, - *, root: str, split: str = "train", use_ambient_views: bool = False, @@ -237,7 +248,7 @@ def __init__( if download: self._download_dataset(root) - root = Path(root) / "FlyingChairs" + root = Path(root) / "Middlebury2014" if not os.path.exists(root / split): raise FileNotFoundError( f"The {split} directory was not found in the provided root directory" @@ -245,11 +256,19 @@ def __init__( split_scenes = self.splits[split] # check that the provided root folder contains the scene splits - if not all(s in os.listdir(root / split) for s in split_scenes): + if not any( + # using startswith to account for perfect / imperfect calibrartion + scene.startswith(s) for scene in os.listdir(root / split) + for s in split_scenes + ): raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") imgs_left = sorted(glob(str(root / split / "*" / "im0.png"))) imgs_right = sorted(glob(str(root / split / "*" / "im1.png"))) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) if split == "test": @@ -312,7 +331,7 @@ def _download_dataset(self, root: str): shutil.rmtree(os.path.join(root, "MiddEval3")) -class ETH3D(StereoMatchingDataset): +class StereoETH3D(StereoMatchingDataset): """"ETH3D `Low-Res Two-View `_ dataset. The dataset is expected to have the following structure: :: @@ -370,16 +389,20 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl verify_str_arg(split, "split", valid_values=("train", "test")) root = Path(root) / "ETH3D" - img_dir = "two_view_training" if split == "train" else "two_view_testing" + + img_dir = "two_view_training" if split == "train" else "two_view_test" anot_dir = "two_view_training_gt" imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + if split == "test": disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: - disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) + disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*0GT.pfm"))) # no masks for the right view, always using left as reference disparity_maps_right = list("" for _ in disparity_maps_left) @@ -395,11 +418,11 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = np.array(valid_mask) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple, Tuple]: return super().__getitem__(index) -class Kitti2012(StereoMatchingDataset): +class StereoKitti2012(StereoMatchingDataset): """"Kitti dataset from the `2012 `_ stereo evaluation benchmark. Uses the RGB images for consistency with Kitti 2015. @@ -433,11 +456,14 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / "colored_0" / "*_10.png"))) imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) disparity_maps_right = list("" for _ in disparity_maps_left) else: - disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) @@ -455,7 +481,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) -class Kitti2015(StereoMatchingDataset): +class StereoKitti2015(StereoMatchingDataset): """"Kitti dataset from the `2015 `_ stereo evaluation benchmark. The dataset is expected to have the following structure: :: @@ -468,8 +494,8 @@ class Kitti2015(StereoMatchingDataset): training image_2 image_3 - disp_noc_0 - disp_noc_1 + disp_occ_0 + disp_occ_1 calib Args: @@ -488,11 +514,14 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / "image_2" / "*_10.png"))) imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) else: - disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) @@ -510,7 +539,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) -class SintelDataset(StereoMatchingDataset): +class StereoSintel(StereoMatchingDataset): """"Sintel `Stereo Dataset `_. Args: @@ -527,6 +556,9 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) disparity_maps_right = list("" for _ in dps_masks_left) @@ -554,16 +586,16 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) -class SceneFlowDataset(StereoMatchingDataset): +class StereoSceneFlow(StereoMatchingDataset): """Dataset interface for `Scene Flow `_ datasets.""" def __init__(self, root: str, split: str = "train", pass_name: str = "clean", transforms: Optional[Callable] = None): super().__init__(root, transforms) - verify_str_arg(split, "split", valid_values=("FlyingThings3D", "Driving", "Monkaa")) - split = split.upper() + root = Path(root) / "SceneFlow" - verify_str_arg(split, "pass_name", valid_values=("clean", "final", "both")) + verify_str_arg(split, "split", valid_values=("FlyingThings3D", "Driving", "Monkaa")) + verify_str_arg(pass_name, "pass_name", valid_values=("clean", "final", "both")) passes = { "clean": ["frames_cleanpass"], @@ -571,16 +603,21 @@ def __init__(self, root: str, split: str = "train", pass_name: str = "clean", tr "both": ["frames_cleanpass, frames_finalpass"], }[pass_name] - root = Path(root) / split + root = root / split for p in passes: - imgs_left = sorted(glob(str(root / p / "left" / "*" / "*.png"))) - imgs_right = sorted(glob(str(root / p / "right" / "*" / "*.png"))) + imgs_left = sorted(glob(str(root / p / "*" / "left" / "*.png"))) + imgs_right = sorted(glob(str(root / p / "*" / "right" / "*.png"))) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root / p)) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps @@ -589,8 +626,11 @@ def _read_disparity(self, file_path: str) -> Tuple: valid = np.ones_like(disparity) return disparity, valid + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + return super().__getitem__(index) -class FallingThingsDataset(StereoMatchingDataset): + +class StereoFallingThings(StereoMatchingDataset): """FallingThings ``_ dataset The dataset is expected to have the following structre: :: @@ -644,11 +684,16 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab for s in splits: imgs_left = sorted(glob(str(root / s / "*.left.jpg"))) imgs_right = sorted(glob(str(root / s / "*.right.jpg"))) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = sorted(glob(str(root / s / "*.left.depth.png"))) disparity_maps_right = sorted(glob(str(root / s / "*.right.depth.png"))) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps @@ -660,3 +705,59 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity = (fx * 6.0 * 100) / depth.astype(np.float32) valid = disparity > 0 return disparity, valid + + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + return super().__getitem__(index) + + +class InStereo2k(StereoMatchingDataset): + """InStereo2k ``_ dataset + + The dataset is expected to have the following structre: :: + + root + InStereo2k + train + scene1 + left.png + right.png + left_disp.png + right_disp.png + ... + scene2 + ... + test + scene1 + left.png + right.png + left_disp.png + right_disp.png + ... + scene2 + ... + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + root = Path(root) / "InStereo2k" / split + + imgs_left = sorted(glob(str(root / "*" / "left.png"))) + imgs_right = list(p.replace("left", "right") for p in imgs_left) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images = imgs + + disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) + disparity_maps_right = list(p.replace("right", "right_disp") for p in imgs_left) + + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities = disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + disparity = np.array(Image.open(file_path), dtype=np.float32) + valid = np.ones_like(disparity) + return disparity, valid From 0959499813c5213d9d035128088ea8ffeceb0444 Mon Sep 17 00:00:00 2001 From: Ponku Date: Wed, 13 Jul 2022 15:10:17 +0100 Subject: [PATCH 06/35] "Added calibrartion arg for Middlebury2014 (#6259)" --- test/test_datasets.py | 50 ++++++++--- torchvision/datasets/_stereo_matching.py | 107 +++++++++++++++++++---- 2 files changed, 127 insertions(+), 30 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index d390c30cee9..5d557020ac8 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -193,10 +193,7 @@ def inject_fake_data(self, tmpdir, config): scene_name = split_scene_map[config["split"]][idx] self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) - # account for perfect / imperfect calibrations - if config["split"] != "test": - num_examples *= 2 - + # TODO: add calibration argument test return num_examples def test_train_splits(self): @@ -428,12 +425,15 @@ class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod - def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]): + def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: root = pathlib.Path(root) / name os.makedirs(root, exist_ok=True) + paths = [] for i in range(num_examples): datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + paths.append(str(root / file_name_fn(i))) + return paths def inject_fake_data(self, tmpdir, config): scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" @@ -447,27 +447,25 @@ def inject_fake_data(self, tmpdir, config): "final": "frames_finalpass", } - num_examples = 4 + num_examples = 1 pass_dir_name = pass_dir_map[config["pass_name"]] # create pass directories pass_dir = split_dir / pass_dir_name - disp_dir = split_dir / "disp" + disp_dir = split_dir / "disparity" os.makedirs(pass_dir, exist_ok=True) os.makedirs(disp_dir, exist_ok=True) - # root / pass / direction / scene / .imgs - # root / disparity / direction / scene / .imgs for direction in ["left", "right"]: for scene_idx in range(num_examples): - # scene_dir = pass_dir / direction / f"scene_{scene_idx:06d}" os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) datasets_utils.create_image_folder( root=pass_dir / f"scene_{scene_idx:06d}", name=direction, file_name_fn=lambda i: f"{i:06d}.png", num_examples=3, - size=(3, 100, 200), + size=(3, 200, 100), ) + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) self._create_pfm_folder( root=disp_dir / f"scene_{scene_idx:06d}", @@ -480,18 +478,20 @@ def inject_fake_data(self, tmpdir, config): return num_examples * 3 def test_train_splits(self): - for split, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): - with self.create_dataset(split=split, pass_name=pass_name) as (dataset, _): + for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): for left, right, disparity, valid_mask in dataset: + print(f"Split {split_name} pass {pass_name}") left_array = np.array(left) right_array = np.array(right) h, w, c = left_array.shape # check that left and right are the same size assert left_array.shape == right_array.shape + print(left_array.shape) # check general shapes assert c == 3 assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 + assert len(valid_mask.shape) == 3 assert disparity.shape == (h, w, 3) # check that valid mask is the same size as the disparity dh, dw, _ = disparity.shape @@ -534,6 +534,28 @@ def inject_fake_data(self, tmpdir, config): return num_examples + def test_splits(self): + for split_name in ["single", "mixed"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + print(f"Split {split_name}") + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + print(left_array.shape) + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index bcca2b12efb..0bd75fe82a4 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -6,6 +6,7 @@ import re import shutil from typing import Callable, List, Optional, Tuple, Any +import warnings from jsonschema import ValidationError from torch import Tensor from .vision import VisionDataset @@ -238,6 +239,7 @@ def __init__( self, root: str, split: str = "train", + calibration: Optional[str] = None, use_ambient_views: bool = False, transforms: Optional[Callable] = None, download: bool = False @@ -245,6 +247,22 @@ def __init__( super().__init__(root, transforms) verify_str_arg(split, "split", valid_values=("train", "test", "additional")) + if calibration: + verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", None)) + if split == "test": + warnings.warn( + "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", + RuntimeWarning + ) + else: + if split != "test": + calibration = "perfect" + warnings.warn( + f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + RuntimeWarning + ) + if download: self._download_dataset(root) @@ -263,25 +281,36 @@ def __init__( ): raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") - imgs_left = sorted(glob(str(root / split / "*" / "im0.png"))) - imgs_right = sorted(glob(str(root / split / "*" / "im1.png"))) + calibrartion_suffixes = { + None: [""], + "perfect": ["-perfect"], + "imperfect": ["-imperfect"], + "both": ["-perfect", "-imperfect"], + }[calibration] - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) + for calibration_suffix in calibrartion_suffixes: + scene_pattern = "*" + calibration_suffix - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) + imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) - if split == "test": - dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - else: + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + + self._images += list((l, r) for l, r in zip(imgs_left, imgs_right)) + + if split == "test": + dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + else: + + dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) + dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) - dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - self._disparities = list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) + self._disparities += list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) self.use_ambient_views = use_ambient_views - def __getitem__(self, index: int) -> Tuple: + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: @@ -579,17 +608,60 @@ def _read_disparity(self, file_path: str) -> Tuple: # out of frame mask off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) == 0 # combine the masks together - valid_mask = np.logical_or(off_mask, valid_mask) + valid_mask = np.logical_and(off_mask, valid_mask) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: return super().__getitem__(index) class StereoSceneFlow(StereoMatchingDataset): - """Dataset interface for `Scene Flow `_ datasets.""" + """Dataset interface for `Scene Flow `_ datasets. + + The dataset is expected to have the following structre: :: + + root + SceneFlow + Monkaa + frames_cleanpass + scene1 + left + img1.png + img2.png + right + img1.png + img2.png + scene2 + left + img1.png + img2.png + right + img1.png + img2.png + frames_finalpass + scene1 + left + img1.png + img2.png + right + img1.png + img2.png + ... + ... + disparity + scene1 + left + img1.pfm + img2.pfm + right + img1.pfm + img2.pfm + FlyingThings3D + ... + ... + """ - def __init__(self, root: str, split: str = "train", pass_name: str = "clean", transforms: Optional[Callable] = None): + def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None): super().__init__(root, transforms) root = Path(root) / "SceneFlow" @@ -622,6 +694,9 @@ def __init__(self, root: str, split: str = "train", pass_name: str = "clean", tr self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + raise FileNotFoundError("Disparity map {} not found".format(file_path)) + disparity = read_pfm_file(file_path) valid = np.ones_like(disparity) return disparity, valid From a9365fe3d095d6c7e695fae483595359f159612a Mon Sep 17 00:00:00 2001 From: Ponku Date: Wed, 13 Jul 2022 15:58:46 +0100 Subject: [PATCH 07/35] "Fixed test calibration test Middlebury2014 (#6259)" --- test/test_datasets.py | 40 +++++++++++++++++++++--- torchvision/datasets/_stereo_matching.py | 7 +++-- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 5d557020ac8..518a95362b9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -150,7 +150,11 @@ def test_bad_input(self): class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StereoMiddlebury2014 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "additional"), use_ambient_views=(True, False)) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "additional"), + calibration=("perfect", "imperfect", "both"), + use_ambient_views=(True, False), + ) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod @@ -193,12 +197,15 @@ def inject_fake_data(self, tmpdir, config): scene_name = split_scene_map[config["split"]][idx] self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) - # TODO: add calibration argument test + print(f"Created {scene_name} for split {config['split']}") + + if config["calibration"] == "both": + num_examples *= 2 return num_examples def test_train_splits(self): - for split in ["train", "additional"]: - with self.create_dataset(split=split) as (dataset, _): + for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): + with self.create_dataset(split=split, calibration=calibration) as (dataset, _): for left, right, disparity, valid_mask in dataset: left_array = np.array(left) right_array = np.array(right) @@ -219,7 +226,7 @@ def test_train_splits(self): def test_test_split(self): for split in ["test"]: - with self.create_dataset(split=split) as (dataset, _): + with self.create_dataset(split=split, calibration=None) as (dataset, _): for left, right, disparity, valid_mask in dataset: left_array = np.array(left) right_array = np.array(right) @@ -239,6 +246,29 @@ def test_augmented_view_usage(self): # check that left and right are the same size assert left_array.shape == right_array.shape + def test_warnings_train(self): + # train set invalid + split = "train" + calibration = None + with pytest.warns( + RuntimeWarning, + match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_warnings_test(self): + # test set invalid + split = "test" + calibration = "perfect" + with pytest.warns( + RuntimeWarning, + match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + ): + with self.create_dataset(split=split, calibration=calibration): + pass + def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): with self.create_dataset(split="bad"): diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 0bd75fe82a4..702386b05bd 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -239,7 +239,7 @@ def __init__( self, root: str, split: str = "train", - calibration: Optional[str] = None, + calibration: Optional[str] = "perfect", use_ambient_views: bool = False, transforms: Optional[Callable] = None, download: bool = False @@ -248,8 +248,9 @@ def __init__( verify_str_arg(split, "split", valid_values=("train", "test", "additional")) if calibration: - verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", None)) + verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both", None)) if split == "test": + calibration = None warnings.warn( "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", RuntimeWarning @@ -267,6 +268,7 @@ def __init__( self._download_dataset(root) root = Path(root) / "Middlebury2014" + print(split) if not os.path.exists(root / split): raise FileNotFoundError( f"The {split} directory was not found in the provided root directory" @@ -290,6 +292,7 @@ def __init__( for calibration_suffix in calibrartion_suffixes: scene_pattern = "*" + calibration_suffix + print(scene_pattern) imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) From 96c7bf4aa5be5b01c98016207deda8846d55212c Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 10:03:27 +0100 Subject: [PATCH 08/35] Clean-up. Disp map format to (C, H, W) & valid mask to (H, W). (#6259) --- test/test_datasets.py | 3552 +++++++++++----------- torchvision/datasets/__init__.py | 11 +- torchvision/datasets/_stereo_matching.py | 288 +- 3 files changed, 2081 insertions(+), 1770 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 518a95362b9..dd3c89b9bdc 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1,4 +1,3 @@ -from abc import abstractmethod import bz2 import contextlib import csv @@ -25,701 +24,542 @@ from torchvision import datasets -class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoETH3D - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - - @staticmethod - def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: - # create the scene folder - image_paths = [] - # make the root_dir if it does not exits - os.makedirs(root_dir, exist_ok=True) - - for i in range(num_examples): - scene_dir = os.path.join(root_dir, f"scene_{i}") - os.makedirs(scene_dir, exist_ok=True) - # populate with left right images - image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100))) - image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100))) - return image_paths - - @staticmethod - def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: - paths = [] - # make the root_dir if it does not exits - os.makedirs(root_dir, exist_ok=True) +class STL10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.STL10 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) - # create scene directories - for i in range(num_examples): - scene_dir = os.path.join(root_dir, f"scene_{i}") - os.makedirs(scene_dir, exist_ok=True) - # populate with a random png file for occlusion mask, and a pfm file for disparity - paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) - pfm_path = os.path.join(scene_dir, "disp0GT.pfm") - datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) - paths.append(pfm_path) - return paths + @ staticmethod + def _make_binary_file(num_elements, root, name): + file_name = os.path.join(root, name) + np.zeros(num_elements, dtype=np.uint8).tofile(file_name) - def inject_fake_data(self, tmpdir, config): - eth3d_dir = os.path.join(tmpdir, "ETH3D") + @ staticmethod + def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): + STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) - num_examples = 2 if config["split"] == "train" else 3 + @ staticmethod + def _make_label_file(num_images, root, name): + STL10TestCase._make_binary_file(num_images, root, name) - split_name = "two_view_training" if config["split"] == "train" else "two_view_test" - split_dir = os.path.join(eth3d_dir, split_name) - self._create_scene_folder(num_examples, split_dir) + @ staticmethod + def _make_class_names_file(root, name="class_names.txt"): + with open(os.path.join(root, name), "w") as fh: + for cname in ("airplane", "bird"): + fh.write(f"{cname}\n") - if config["split"] == "train": - annot_dir = os.path.join(eth3d_dir, "two_view_training_gt") - self._create_annotation_folder(num_examples, annot_dir) + @ staticmethod + def _make_fold_indices_file(root): + num_folds = 10 + offset = 0 + with open(os.path.join(root, "fold_indices.txt"), "w") as fh: + for fold in range(num_folds): + line = " ".join([str(idx) for idx in range(offset, offset + fold + 1)]) + fh.write(f"{line}\n") + offset += fold + 1 - return num_examples + return tuple(range(1, num_folds + 1)) - def test_training_test_splits(self): - with self.create_dataset(split="train") as (dataset, _): - assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" - for _, _, disparity, valid_mask in dataset: - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - dh, dw, _ = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + @ staticmethod + def _make_train_files(root, num_unlabeled_images=1): + num_images_in_fold = STL10TestCase._make_fold_indices_file(root) + num_train_images = sum(num_images_in_fold) - with self.create_dataset(split="test") as (dataset, _): - assert all(d == ("", "") for d in dataset._disparities) - for _, _, disparity, valid_mask in dataset: - assert disparity is None - assert valid_mask is None + STL10TestCase._make_image_file(num_train_images, root, "train_X.bin") + STL10TestCase._make_label_file(num_train_images, root, "train_y.bin") + STL10TestCase._make_image_file(1, root, "unlabeled_X.bin") - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + return dict(train=num_train_images, unlabeled=num_unlabeled_images) + @ staticmethod + def _make_test_files(root, num_images=2): + STL10TestCase._make_image_file(num_images, root, "test_X.bin") + STL10TestCase._make_label_file(num_images, root, "test_y.bin") -class CREStereoSynthethicTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CREStereoSynthetic - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + return dict(test=num_images) def inject_fake_data(self, tmpdir, config): - crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" - os.makedirs(crestereo_dir, exist_ok=True) + root_folder = os.path.join(tmpdir, "stl10_binary") + os.mkdir(root_folder) - split_dir = crestereo_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) - num_examples = 4 + num_images_in_split = self._make_train_files(root_folder) + num_images_in_split.update(self._make_test_files(root_folder)) + self._make_class_names_file(root_folder) - for idx in range(num_examples): - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) - # these are going to end up being gray scale images - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + return sum(num_images_in_split[part] for part in config["split"].split("+")) - return num_examples + def test_folds(self): + for fold in range(10): + with self.create_dataset(split="train", folds=fold) as (dataset, _): + assert len(dataset) == fold + 1 - def test_splits(self): - for split in ("tree", "shapenet", "reflective", "hole"): - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 2 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + def test_unlabeled(self): + with self.create_dataset(split="unlabeled") as (dataset, _): + labels = [dataset[idx][1] for idx in range(len(dataset))] + assert all(label == -1 for label in labels) - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): + def test_invalid_folds1(self): + with pytest.raises(ValueError): + with self.create_dataset(folds=10): pass + def test_invalid_folds2(self): + with pytest.raises(ValueError): + with self.create_dataset(folds="0"): + pass + + +class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech101 + FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) -class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoMiddlebury2014 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("train", "additional"), - calibration=("perfect", "imperfect", "both"), - use_ambient_views=(True, False), + target_type=("category", "annotation", ["category", "annotation"]) ) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - - @staticmethod - def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: - calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] - scene_dirs = [] - for c in calibrations: - scene_dir = os.path.join(root_dir, f"{scene_name}{c}") - os.makedirs(scene_dir, exist_ok=True) - # make normal images first - datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) - # these are going to end up being gray scale images - datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) - datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) - scene_dirs.append(scene_dir) - return scene_dirs + REQUIRED_PACKAGES = ("scipy",) def inject_fake_data(self, tmpdir, config): - split_scene_map = { - "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], - "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], - "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] - } + root = pathlib.Path(tmpdir) / "caltech101" + images = root / "101_ObjectCategories" + annotations = root / "Annotations" - middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") - os.makedirs(middlebury_dir, exist_ok=True) + categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"), ("ying_yang", "ying_yang")) + num_images_per_category = 2 - split_dir = middlebury_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) + for image_category, annotation_category in categories: + datasets_utils.create_image_folder( + root=images, + name=image_category, + file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + self._create_annotation_folder( + root=annotations, + name=annotation_category, + file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", + num_examples=num_images_per_category, + ) - num_examples = 4 - for idx in range(num_examples): - # special case for test_bad_input - if config["split"] not in split_scene_map: - return 0 + # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices. + os.makedirs(images / "BACKGROUND_Google") - scene_name = split_scene_map[config["split"]][idx] - self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + return num_images_per_category * len(categories) - print(f"Created {scene_name} for split {config['split']}") + def _create_annotation_folder(self, root, name, file_name_fn, num_examples): + root = pathlib.Path(root) / name + os.makedirs(root) - if config["calibration"] == "both": - num_examples *= 2 - return num_examples + for idx in range(num_examples): + self._create_annotation_file(root, file_name_fn(idx)) - def test_train_splits(self): - for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): - with self.create_dataset(split=split, calibration=calibration) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 3 - assert disparity.shape == (h, w, 3) - # check that valid mask is the same size as the disparity - dh, dw, c = disparity.shape - print(valid_mask.shape) - mh, mw, _ = valid_mask.shape - assert dh == mh - assert dw == mw + def _create_annotation_file(self, root, name): + mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())), dtype=torch.float64).numpy()) + datasets_utils.lazy_importer.scipy.io.savemat(str(pathlib.Path(root) / name), mdict) - def test_test_split(self): - for split in ["test"]: - with self.create_dataset(split=split, calibration=None) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None + def test_combined_targets(self): + target_types = ["category", "annotation"] - def test_augmented_view_usage(self): - with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): - for left, right, _, _ in dataset: - left_array = np.array(left) - right_array = np.array(right) - # check that left and right are the same size - assert left_array.shape == right_array.shape + individual_targets = [] + for target_type in target_types: + with self.create_dataset(target_type=target_type) as (dataset, _): + _, target = dataset[0] + individual_targets.append(target) - def test_warnings_train(self): - # train set invalid - split = "train" - calibration = None - with pytest.warns( - RuntimeWarning, - match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." - f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", - ): - with self.create_dataset(split=split, calibration=calibration): - pass + with self.create_dataset(target_type=target_types) as (dataset, _): + _, combined_targets = dataset[0] - def test_warnings_test(self): - # test set invalid - split = "test" - calibration = "perfect" - with pytest.warns( - RuntimeWarning, - match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." - ): - with self.create_dataset(split=split, calibration=calibration): - pass + actual = len(individual_targets) + expected = len(combined_targets) + assert ( + actual == expected + ), "The number of the returned combined targets does not match the the number targets if requested " + f"individually: {actual} != {expected}", - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): + with self.subTest(target_type=target_type): + actual = type(combined_target) + expected = type(individual_target) + assert ( + actual is expected + ), "Type of the combined target does not match the type of the corresponding individual target: " + f"{actual} is not {expected}", -class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoKitti2012 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) +class Caltech256TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech256 def inject_fake_data(self, tmpdir, config): - kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" - os.makedirs(kitti_dir, exist_ok=True) - - split_dir = kitti_dir / (config["split"] + "ing") - os.makedirs(split_dir, exist_ok=True) - - num_examples = 4 + tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories" - datasets_utils.create_image_folder( - root=split_dir, - name="colored_0", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) - datasets_utils.create_image_folder( - root=split_dir, - name="colored_1", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) + categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter")) + num_images_per_category = 2 - if config["split"] == "train": + for idx, category in categories: datasets_utils.create_image_folder( - root=split_dir, - name="disp_noc", - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, - # Kitti2012 uses a single channel image for disparities - size=(1, 100, 200), + tmpdir, + name=f"{idx:03d}.{category}", + file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg", + num_examples=num_images_per_category, ) - return num_examples - - def test_train_splits(self): - for split in ["train"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 2 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw - - def test_test_split(self): - for split in ["test"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None - - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + return num_images_per_category * len(categories) -class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoKitti2015 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) +class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.WIDERFace + FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) def inject_fake_data(self, tmpdir, config): - kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" - os.makedirs(kitti_dir, exist_ok=True) - - split_dir = kitti_dir / (config["split"] + "ing") - os.makedirs(split_dir, exist_ok=True) - - num_examples = 4 + widerface_dir = pathlib.Path(tmpdir) / "widerface" + annotations_dir = widerface_dir / "wider_face_split" + os.makedirs(annotations_dir) - datasets_utils.create_image_folder( - root=split_dir, - name="image_2", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) - datasets_utils.create_image_folder( - root=split_dir, - name="image_3", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) + split_to_idx = split_to_num_examples = { + "train": 1, + "val": 2, + "test": 3, + } - if config["split"] == "train": - datasets_utils.create_image_folder( - root=split_dir, - name="disp_occ_0", - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, - # Kitti2015 uses a single channel image for disparities - size=(1, 100, 200), - ) + # We need to create all folders regardless of the split in config + for split in ("train", "val", "test"): + split_idx = split_to_idx[split] + num_examples = split_to_num_examples[split] datasets_utils.create_image_folder( - root=split_dir, - name="disp_occ_1", - file_name_fn=lambda i: f"{i:06d}.png", + root=tmpdir, + name=widerface_dir / f"WIDER_{split}" / "images" / "0--Parade", + file_name_fn=lambda image_idx: f"0_Parade_marchingband_1_{split_idx + image_idx}.jpg", num_examples=num_examples, - # Kitti2015 uses a single channel image for disparities - size=(1, 100, 200), ) - return num_examples + annotation_file_name = { + "train": annotations_dir / "wider_face_train_bbx_gt.txt", + "val": annotations_dir / "wider_face_val_bbx_gt.txt", + "test": annotations_dir / "wider_face_test_filelist.txt", + }[split] - def test_train_splits(self): - for split in ["train"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 2 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + annotation_content = { + "train": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "val": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "test": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n" + for image_idx in range(num_examples) + ), + }[split] - def test_test_split(self): - for split in ["test"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None + with open(annotation_file_name, "w") as annotation_file: + annotation_file.write(annotation_content) - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + return split_to_num_examples[config["split"]] -class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoSceneFlow - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("FlyingThings3D", "Driving", "Monkaa"), - pass_name=("clean", "final") +class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Cityscapes + TARGET_TYPES = ( + "instance", + "semantic", + "polygon", + "color", ) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), + *datasets_utils.combinations_grid( + mode=("coarse",), + split=("train", "train_extra", "val"), + target_type=TARGET_TYPES, + ), + ) + FEATURE_TYPES = (PIL.Image.Image, (dict, PIL.Image.Image)) - @staticmethod - def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: - root = pathlib.Path(root) / name - os.makedirs(root, exist_ok=True) + def inject_fake_data(self, tmpdir, config): - paths = [] - for i in range(num_examples): - datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) - paths.append(str(root / file_name_fn(i))) - return paths + tmpdir = pathlib.Path(tmpdir) - def inject_fake_data(self, tmpdir, config): - scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" - os.makedirs(scene_flow_dir, exist_ok=True) + mode_to_splits = { + "Coarse": ["train", "train_extra", "val"], + "Fine": ["train", "test", "val"], + } - split_dir = scene_flow_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) + if config["split"] == "train": # just for coverage of the number of samples + cities = ["bochum", "bremen"] + else: + cities = ["bochum"] - pass_dir_map = { - "clean": "frames_cleanpass", - "final": "frames_finalpass", + polygon_target = { + "imgHeight": 1024, + "imgWidth": 2048, + "objects": [ + { + "label": "sky", + "polygon": [ + [1241, 0], + [1234, 156], + [1478, 197], + [1611, 172], + [1606, 0], + ], + }, + { + "label": "road", + "polygon": [ + [0, 448], + [1331, 274], + [1473, 265], + [2047, 605], + [2047, 1023], + [0, 1023], + ], + }, + ], } - num_examples = 1 - pass_dir_name = pass_dir_map[config["pass_name"]] - # create pass directories - pass_dir = split_dir / pass_dir_name - disp_dir = split_dir / "disparity" - os.makedirs(pass_dir, exist_ok=True) - os.makedirs(disp_dir, exist_ok=True) + for mode in ["Coarse", "Fine"]: + gt_dir = tmpdir / f"gt{mode}" + for split in mode_to_splits[mode]: + for city in cities: - for direction in ["left", "right"]: - for scene_idx in range(num_examples): - os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + def make_image(name, size=10): + datasets_utils.create_image_folder( + root=gt_dir / split, + name=city, + file_name_fn=lambda _: name, + size=size, + num_examples=1, + ) + + make_image(f"{city}_000000_000000_gt{mode}_instanceIds.png") + make_image(f"{city}_000000_000000_gt{mode}_labelIds.png") + make_image(f"{city}_000000_000000_gt{mode}_color.png", size=(4, 10, 10)) + + polygon_target_name = gt_dir / split / city / f"{city}_000000_000000_gt{mode}_polygons.json" + with open(polygon_target_name, "w") as outfile: + json.dump(polygon_target, outfile) + + # Create leftImg8bit folder + for split in ["test", "train_extra", "train", "val"]: + for city in cities: datasets_utils.create_image_folder( - root=pass_dir / f"scene_{scene_idx:06d}", - name=direction, - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=3, - size=(3, 200, 100), + root=tmpdir / "leftImg8bit" / split, + name=city, + file_name_fn=lambda _: f"{city}_000000_000000_leftImg8bit.png", + num_examples=1, ) - os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) - self._create_pfm_folder( - root=disp_dir / f"scene_{scene_idx:06d}", - name=direction, - file_name_fn=lambda i: f"{i:06d}.pfm", - num_examples=3, - size=(100, 200), - ) + info = {"num_examples": len(cities)} + if config["target_type"] == "polygon": + info["expected_polygon_target"] = polygon_target + return info - return num_examples * 3 + def test_combined_targets(self): + target_types = ["semantic", "polygon", "color"] - def test_train_splits(self): - for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): - with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - print(f"Split {split_name} pass {pass_name}") - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - print(left_array.shape) - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 3 - assert disparity.shape == (h, w, 3) - # check that valid mask is the same size as the disparity - dh, dw, _ = disparity.shape - mh, mw, _ = valid_mask.shape - assert dh == mh - assert dw == mw + with self.create_dataset(target_type=target_types) as (dataset, _): + output = dataset[0] + assert isinstance(output, tuple) + assert len(output) == 2 + assert isinstance(output[0], PIL.Image.Image) + assert isinstance(output[1], tuple) + assert len(output[1]) == 3 + assert isinstance(output[1][0], PIL.Image.Image) # semantic + assert isinstance(output[1][1], dict) # polygon + assert isinstance(output[1][2], PIL.Image.Image) # color + def test_feature_types_target_color(self): + with self.create_dataset(target_type="color") as (dataset, _): + color_img, color_target = dataset[0] + assert isinstance(color_img, PIL.Image.Image) + assert np.array(color_target).shape[2] == 4 -class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoFallingThings - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + def test_feature_types_target_polygon(self): + with self.create_dataset(target_type="polygon") as (dataset, info): + polygon_img, polygon_target = dataset[0] + assert isinstance(polygon_img, PIL.Image.Image) + (polygon_target, info["expected_polygon_target"]) - @staticmethod - def _make_scene_folder(root: str, scene_name: str, num_examples: int, size: Tuple[int, int]): - root = pathlib.Path(root) / scene_name - os.makedirs(root, exist_ok=True) - datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[0], size[1])) - datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[0], size[1])) - # single channel depth maps - datasets_utils.create_image_file(root, "image1.left.depth.jpg", size=(1, size[0], size[1])) - datasets_utils.create_image_file(root, "image1.right.depth.jpg", size=(1, size[0], size[1])) +class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageNet + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val")) def inject_fake_data(self, tmpdir, config): - fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" - - split_dir = pathlib.Path(fallingthings_dir) / config["split"] - os.makedirs(split_dir, exist_ok=True) - - num_examples = 4 + tmpdir = pathlib.Path(tmpdir) - for i in range(num_examples): - self._make_scene_folder( - root=split_dir, - scene_name=f"scene_{i:06d}", + wnid = "n01234567" + if config["split"] == "train": + num_examples = 3 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / "train" / wnid / wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", + num_examples=num_examples, + ) + else: + num_examples = 1 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / "val" / wnid, + file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", num_examples=num_examples, - size=(100, 200), ) + wnid_to_classes = {wnid: [1]} + torch.save((wnid_to_classes, None), tmpdir / "meta.bin") return num_examples - def test_splits(self): - for split_name in ["single", "mixed"]: - with self.create_dataset(split=split_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - print(f"Split {split_name}") - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - print(left_array.shape) - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw +class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CIFAR10 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) -class STL10TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.STL10 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) + _VERSION_CONFIG = dict( + base_folder="cifar-10-batches-py", + train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)), + test_files=("test_batch",), + labels_key="labels", + meta_file="batches.meta", + num_categories=10, + categories_key="label_names", + ) - @ staticmethod - def _make_binary_file(num_elements, root, name): - file_name = os.path.join(root, name) - np.zeros(num_elements, dtype=np.uint8).tofile(file_name) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"] + os.makedirs(tmpdir) - @ staticmethod - def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): - STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) + num_images_per_file = 1 + for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]): + self._create_batch_file(tmpdir, name, num_images_per_file) - @ staticmethod - def _make_label_file(num_images, root, name): - STL10TestCase._make_binary_file(num_images, root, name) + categories = self._create_meta_file(tmpdir) - @ staticmethod - def _make_class_names_file(root, name="class_names.txt"): - with open(os.path.join(root, name), "w") as fh: - for cname in ("airplane", "bird"): - fh.write(f"{cname}\n") + return dict( + num_examples=num_images_per_file + * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]), + categories=categories, + ) - @ staticmethod - def _make_fold_indices_file(root): - num_folds = 10 - offset = 0 - with open(os.path.join(root, "fold_indices.txt"), "w") as fh: - for fold in range(num_folds): - line = " ".join([str(idx) for idx in range(offset, offset + fold + 1)]) - fh.write(f"{line}\n") - offset += fold + 1 + def _create_batch_file(self, root, name, num_images): + np_rng = np.random.RandomState(0) + data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3)) + labels = np_rng.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist() + self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels}) - return tuple(range(1, num_folds + 1)) + def _create_meta_file(self, root): + categories = [ + f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}" + for idx in range(self._VERSION_CONFIG["num_categories"]) + ] + self._create_binary_file( + root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories} + ) + return categories - @ staticmethod - def _make_train_files(root, num_unlabeled_images=1): - num_images_in_fold = STL10TestCase._make_fold_indices_file(root) - num_train_images = sum(num_images_in_fold) + def _create_binary_file(self, root, name, content): + with open(pathlib.Path(root) / name, "wb") as fh: + pickle.dump(content, fh) - STL10TestCase._make_image_file(num_train_images, root, "train_X.bin") - STL10TestCase._make_label_file(num_train_images, root, "train_y.bin") - STL10TestCase._make_image_file(1, root, "unlabeled_X.bin") + def test_class_to_idx(self): + with self.create_dataset() as (dataset, info): + expected = {category: label for label, category in enumerate(info["categories"])} + actual = dataset.class_to_idx + assert actual == expected - return dict(train=num_train_images, unlabeled=num_unlabeled_images) - @ staticmethod - def _make_test_files(root, num_images=2): - STL10TestCase._make_image_file(num_images, root, "test_X.bin") - STL10TestCase._make_label_file(num_images, root, "test_y.bin") +class CIFAR100(CIFAR10TestCase): + DATASET_CLASS = datasets.CIFAR100 - return dict(test=num_images) + _VERSION_CONFIG = dict( + base_folder="cifar-100-python", + train_files=("train",), + test_files=("test",), + labels_key="fine_labels", + meta_file="meta", + num_categories=100, + categories_key="fine_label_names", + ) - def inject_fake_data(self, tmpdir, config): - root_folder = os.path.join(tmpdir, "stl10_binary") - os.mkdir(root_folder) - num_images_in_split = self._make_train_files(root_folder) - num_images_in_split.update(self._make_test_files(root_folder)) - self._make_class_names_file(root_folder) +class CelebATestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CelebA + FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) - return sum(num_images_in_split[part] for part in config["split"].split("+")) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "valid", "test", "all"), + target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), + ) - def test_folds(self): - for fold in range(10): - with self.create_dataset(split="train", folds=fold) as (dataset, _): - assert len(dataset) == fold + 1 + _SPLIT_TO_IDX = dict(train=0, valid=1, test=2) - def test_unlabeled(self): - with self.create_dataset(split="unlabeled") as (dataset, _): - labels = [dataset[idx][1] for idx in range(len(dataset))] - assert all(label == -1 for label in labels) + def inject_fake_data(self, tmpdir, config): + base_folder = pathlib.Path(tmpdir) / "celeba" + os.makedirs(base_folder) - def test_invalid_folds1(self): - with pytest.raises(ValueError): - with self.create_dataset(folds=10): - pass + num_images, num_images_per_split = self._create_split_txt(base_folder) - def test_invalid_folds2(self): - with pytest.raises(ValueError): - with self.create_dataset(folds="0"): - pass + datasets_utils.create_image_folder( + base_folder, "img_align_celeba", lambda idx: f"{idx + 1:06d}.jpg", num_images + ) + attr_names = self._create_attr_txt(base_folder, num_images) + self._create_identity_txt(base_folder, num_images) + self._create_bbox_txt(base_folder, num_images) + self._create_landmarks_txt(base_folder, num_images) + return dict(num_examples=num_images_per_split[config["split"]], attr_names=attr_names) -class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Caltech101 - FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) + def _create_split_txt(self, root): + num_images_per_split = dict(train=4, valid=3, test=2) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - target_type=("category", "annotation", ["category", "annotation"]) - ) - REQUIRED_PACKAGES = ("scipy",) + data = [ + [self._SPLIT_TO_IDX[split]] for split, num_images in num_images_per_split.items() for _ in range(num_images) + ] + self._create_txt(root, "list_eval_partition.txt", data) - def inject_fake_data(self, tmpdir, config): - root = pathlib.Path(tmpdir) / "caltech101" - images = root / "101_ObjectCategories" - annotations = root / "Annotations" + num_images_per_split["all"] = num_images = sum(num_images_per_split.values()) + return num_images, num_images_per_split - categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"), ("ying_yang", "ying_yang")) - num_images_per_category = 2 + def _create_attr_txt(self, root, num_images): + header = ("5_o_Clock_Shadow", "Young") + data = torch.rand((num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist() + self._create_txt(root, "list_attr_celeba.txt", data, header=header, add_num_examples=True) + return header - for image_category, annotation_category in categories: - datasets_utils.create_image_folder( - root=images, - name=image_category, - file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", - num_examples=num_images_per_category, - ) - self._create_annotation_folder( - root=annotations, - name=annotation_category, - file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", - num_examples=num_images_per_category, - ) + def _create_identity_txt(self, root, num_images): + data = torch.randint(1, 4, size=(num_images, 1)).tolist() + self._create_txt(root, "identity_CelebA.txt", data) - # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices. - os.makedirs(images / "BACKGROUND_Google") + def _create_bbox_txt(self, root, num_images): + header = ("x_1", "y_1", "width", "height") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt( + root, "list_bbox_celeba.txt", data, header=header, add_num_examples=True, add_image_id_to_header=True + ) - return num_images_per_category * len(categories) + def _create_landmarks_txt(self, root, num_images): + header = ("lefteye_x", "rightmouth_y") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt(root, "list_landmarks_align_celeba.txt", data, header=header, add_num_examples=True) - def _create_annotation_folder(self, root, name, file_name_fn, num_examples): - root = pathlib.Path(root) / name - os.makedirs(root) + def _create_txt(self, root, name, data, header=None, add_num_examples=False, add_image_id_to_header=False): + with open(pathlib.Path(root) / name, "w") as fh: + if add_num_examples: + fh.write(f"{len(data)}\n") - for idx in range(num_examples): - self._create_annotation_file(root, file_name_fn(idx)) + if header: + if add_image_id_to_header: + header = ("image_id", *header) + fh.write(f"{' '.join(header)}\n") - def _create_annotation_file(self, root, name): - mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())), dtype=torch.float64).numpy()) - datasets_utils.lazy_importer.scipy.io.savemat(str(pathlib.Path(root) / name), mdict) + for idx, line in enumerate(data, 1): + fh.write(f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n") def test_combined_targets(self): - target_types = ["category", "annotation"] + target_types = ["attr", "identity", "bbox", "landmarks"] individual_targets = [] for target_type in target_types: @@ -746,1062 +586,659 @@ def test_combined_targets(self): ), "Type of the combined target does not match the type of the corresponding individual target: " f"{actual} is not {expected}", + def test_no_target(self): + with self.create_dataset(target_type=[]) as (dataset, _): + _, target = dataset[0] -class Caltech256TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Caltech256 + assert target is None - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories" + def test_attr_names(self): + with self.create_dataset() as (dataset, info): + assert tuple(dataset.attr_names) == info["attr_names"] - categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter")) - num_images_per_category = 2 + def test_images_names_split(self): + with self.create_dataset(split="all") as (dataset, _): + all_imgs_names = set(dataset.filename) - for idx, category in categories: - datasets_utils.create_image_folder( - tmpdir, - name=f"{idx:03d}.{category}", - file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg", - num_examples=num_images_per_category, - ) + merged_imgs_names = set() + for split in ["train", "valid", "test"]: + with self.create_dataset(split=split) as (dataset, _): + merged_imgs_names.update(dataset.filename) - return num_images_per_category * len(categories) + assert merged_imgs_names == all_imgs_names -class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.WIDERFace - FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) +class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.VOCSegmentation + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) + + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid( + year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval") + ), + dict(year="2007", image_set="test"), + dict(year="2007-test", image_set="test"), + ) def inject_fake_data(self, tmpdir, config): - widerface_dir = pathlib.Path(tmpdir) / "widerface" - annotations_dir = widerface_dir / "wider_face_split" - os.makedirs(annotations_dir) + year, is_test_set = ( + ("2007", True) + if config["year"] == "2007-test" or config["image_set"] == "test" + else (config["year"], False) + ) + image_set = config["image_set"] - split_to_idx = split_to_num_examples = { - "train": 1, - "val": 2, - "test": 3, - } + base_dir = pathlib.Path(tmpdir) + if year == "2011": + base_dir /= "TrainVal" + base_dir = base_dir / "VOCdevkit" / f"VOC{year}" + os.makedirs(base_dir) - # We need to create all folders regardless of the split in config - for split in ("train", "val", "test"): - split_idx = split_to_idx[split] - num_examples = split_to_num_examples[split] + num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set) + datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images) - datasets_utils.create_image_folder( - root=tmpdir, - name=widerface_dir / f"WIDER_{split}" / "images" / "0--Parade", - file_name_fn=lambda image_idx: f"0_Parade_marchingband_1_{split_idx + image_idx}.jpg", - num_examples=num_examples, - ) + datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images) + annotation = self._create_annotation_files(base_dir, "Annotations", num_images) - annotation_file_name = { - "train": annotations_dir / "wider_face_train_bbx_gt.txt", - "val": annotations_dir / "wider_face_val_bbx_gt.txt", - "test": annotations_dir / "wider_face_test_filelist.txt", - }[split] + return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation) - annotation_content = { - "train": "".join( - f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n" - for image_idx in range(num_examples) - ), - "val": "".join( - f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n" - for image_idx in range(num_examples) - ), - "test": "".join( - f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n" - for image_idx in range(num_examples) - ), - }[split] + def _create_image_set_files(self, root, name, is_test_set): + root = pathlib.Path(root) / name + src = pathlib.Path(root) / "Main" + os.makedirs(src, exist_ok=True) - with open(annotation_file_name, "w") as annotation_file: - annotation_file.write(annotation_content) + idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,)) + idcs["trainval"] = (*idcs["train"], *idcs["val"]) - return split_to_num_examples[config["split"]] + for image_set in ("test",) if is_test_set else ("train", "val", "trainval"): + self._create_image_set_file(src, image_set, idcs[image_set]) + shutil.copytree(src, root / "Segmentation") -class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Cityscapes - TARGET_TYPES = ( - "instance", - "semantic", - "polygon", - "color", - ) - ADDITIONAL_CONFIGS = ( - *datasets_utils.combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), - *datasets_utils.combinations_grid( - mode=("coarse",), - split=("train", "train_extra", "val"), - target_type=TARGET_TYPES, - ), - ) - FEATURE_TYPES = (PIL.Image.Image, (dict, PIL.Image.Image)) + num_images = max(itertools.chain(*idcs.values())) + 1 + num_images_per_image_set = {image_set: len(idcs_) for image_set, idcs_ in idcs.items()} + return num_images, num_images_per_image_set - def inject_fake_data(self, tmpdir, config): + def _create_image_set_file(self, root, image_set, idcs): + with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh: + fh.writelines([f"{idx:06d}\n" for idx in idcs]) - tmpdir = pathlib.Path(tmpdir) + def _create_annotation_files(self, root, name, num_images): + root = pathlib.Path(root) / name + os.makedirs(root) - mode_to_splits = { - "Coarse": ["train", "train_extra", "val"], - "Fine": ["train", "test", "val"], - } + for idx in range(num_images): + annotation = self._create_annotation_file(root, f"{idx:06d}.xml") - if config["split"] == "train": # just for coverage of the number of samples - cities = ["bochum", "bremen"] - else: - cities = ["bochum"] + return annotation - polygon_target = { - "imgHeight": 1024, - "imgWidth": 2048, - "objects": [ - { - "label": "sky", - "polygon": [ - [1241, 0], - [1234, 156], - [1478, 197], - [1611, 172], - [1606, 0], - ], - }, - { - "label": "road", - "polygon": [ - [0, 448], - [1331, 274], - [1473, 265], - [2047, 605], - [2047, 1023], - [0, 1023], - ], - }, - ], - } + def _create_annotation_file(self, root, name): + def add_child(parent, name, text=None): + child = ET.SubElement(parent, name) + child.text = text + return child - for mode in ["Coarse", "Fine"]: - gt_dir = tmpdir / f"gt{mode}" - for split in mode_to_splits[mode]: - for city in cities: + def add_name(obj, name="dog"): + add_child(obj, "name", name) + return name - def make_image(name, size=10): - datasets_utils.create_image_folder( - root=gt_dir / split, - name=city, - file_name_fn=lambda _: name, - size=size, - num_examples=1, - ) + def add_bndbox(obj, bndbox=None): + if bndbox is None: + bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"} - make_image(f"{city}_000000_000000_gt{mode}_instanceIds.png") - make_image(f"{city}_000000_000000_gt{mode}_labelIds.png") - make_image(f"{city}_000000_000000_gt{mode}_color.png", size=(4, 10, 10)) + obj = add_child(obj, "bndbox") + for name, text in bndbox.items(): + add_child(obj, name, text) - polygon_target_name = gt_dir / split / city / f"{city}_000000_000000_gt{mode}_polygons.json" - with open(polygon_target_name, "w") as outfile: - json.dump(polygon_target, outfile) + return bndbox - # Create leftImg8bit folder - for split in ["test", "train_extra", "train", "val"]: - for city in cities: - datasets_utils.create_image_folder( - root=tmpdir / "leftImg8bit" / split, - name=city, - file_name_fn=lambda _: f"{city}_000000_000000_leftImg8bit.png", - num_examples=1, - ) + annotation = ET.Element("annotation") + obj = add_child(annotation, "object") + data = dict(name=add_name(obj), bndbox=add_bndbox(obj)) - info = {"num_examples": len(cities)} - if config["target_type"] == "polygon": - info["expected_polygon_target"] = polygon_target - return info + with open(pathlib.Path(root) / name, "wb") as fh: + fh.write(ET.tostring(annotation)) - def test_combined_targets(self): - target_types = ["semantic", "polygon", "color"] + return data - with self.create_dataset(target_type=target_types) as (dataset, _): - output = dataset[0] - assert isinstance(output, tuple) - assert len(output) == 2 - assert isinstance(output[0], PIL.Image.Image) - assert isinstance(output[1], tuple) - assert len(output[1]) == 3 - assert isinstance(output[1][0], PIL.Image.Image) # semantic - assert isinstance(output[1][1], dict) # polygon - assert isinstance(output[1][2], PIL.Image.Image) # color - def test_feature_types_target_color(self): - with self.create_dataset(target_type="color") as (dataset, _): - color_img, color_target = dataset[0] - assert isinstance(color_img, PIL.Image.Image) - assert np.array(color_target).shape[2] == 4 +class VOCDetectionTestCase(VOCSegmentationTestCase): + DATASET_CLASS = datasets.VOCDetection + FEATURE_TYPES = (PIL.Image.Image, dict) - def test_feature_types_target_polygon(self): - with self.create_dataset(target_type="polygon") as (dataset, info): - polygon_img, polygon_target = dataset[0] - assert isinstance(polygon_img, PIL.Image.Image) - (polygon_target, info["expected_polygon_target"]) + def test_annotations(self): + with self.create_dataset() as (dataset, info): + _, target = dataset[0] + assert "annotation" in target + annotation = target["annotation"] -class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.ImageNet - REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val")) + assert "object" in annotation + objects = annotation["object"] - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) + assert len(objects) == 1 + object = objects[0] - wnid = "n01234567" - if config["split"] == "train": - num_examples = 3 - datasets_utils.create_image_folder( - root=tmpdir, - name=tmpdir / "train" / wnid / wnid, - file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", - num_examples=num_examples, - ) - else: - num_examples = 1 - datasets_utils.create_image_folder( - root=tmpdir, - name=tmpdir / "val" / wnid, - file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", - num_examples=num_examples, - ) + assert object == info["annotation"] - wnid_to_classes = {wnid: [1]} - torch.save((wnid_to_classes, None), tmpdir / "meta.bin") - return num_examples +class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CocoDetection + FEATURE_TYPES = (PIL.Image.Image, list) -class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CIFAR10 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + REQUIRED_PACKAGES = ("pycocotools",) - _VERSION_CONFIG = dict( - base_folder="cifar-10-batches-py", - train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)), - test_files=("test_batch",), - labels_key="labels", - meta_file="batches.meta", - num_categories=10, - categories_key="label_names", - ) + _IMAGE_FOLDER = "images" + _ANNOTATIONS_FOLDER = "annotations" + _ANNOTATIONS_FILE = "annotations.json" - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"] - os.makedirs(tmpdir) + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._IMAGE_FOLDER + annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE + return root, annotation_file - num_images_per_file = 1 - for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]): - self._create_batch_file(tmpdir, name, num_images_per_file) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) - categories = self._create_meta_file(tmpdir) + num_images = 3 + num_annotations_per_image = 2 - return dict( - num_examples=num_images_per_file - * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]), - categories=categories, + files = datasets_utils.create_image_folder( + tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images ) + file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files] - def _create_batch_file(self, root, name, num_images): - np_rng = np.random.RandomState(0) - data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3)) - labels = np_rng.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist() - self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels}) + annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotation_folder) + info = self._create_annotation_file( + annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image + ) - def _create_meta_file(self, root): - categories = [ - f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}" - for idx in range(self._VERSION_CONFIG["num_categories"]) - ] - self._create_binary_file( - root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories} + info["num_examples"] = num_images + return info + + def _create_annotation_file(self, root, name, file_names, num_annotations_per_image): + image_ids = [int(file_name.stem) for file_name in file_names] + images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] + + annotations, info = self._create_annotations(image_ids, num_annotations_per_image) + self._create_json(root, name, dict(images=images, annotations=annotations)) + + return info + + def _create_annotations(self, image_ids, num_annotations_per_image): + annotations = datasets_utils.combinations_grid( + image_id=image_ids, bbox=([1.0, 2.0, 3.0, 4.0],) * num_annotations_per_image ) - return categories + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict() - def _create_binary_file(self, root, name, content): - with open(pathlib.Path(root) / name, "wb") as fh: - pickle.dump(content, fh) + def _create_json(self, root, name, content): + file = pathlib.Path(root) / name + with open(file, "w") as fh: + json.dump(content, fh) + return file - def test_class_to_idx(self): - with self.create_dataset() as (dataset, info): - expected = {category: label for label, category in enumerate(info["categories"])} - actual = dataset.class_to_idx - assert actual == expected +class CocoCaptionsTestCase(CocoDetectionTestCase): + DATASET_CLASS = datasets.CocoCaptions -class CIFAR100(CIFAR10TestCase): - DATASET_CLASS = datasets.CIFAR100 + def _create_annotations(self, image_ids, num_annotations_per_image): + captions = [str(idx) for idx in range(num_annotations_per_image)] + annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions) + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict(captions=captions) - _VERSION_CONFIG = dict( - base_folder="cifar-100-python", - train_files=("train",), - test_files=("test",), - labels_key="fine_labels", - meta_file="meta", - num_categories=100, - categories_key="fine_label_names", - ) + def test_captions(self): + with self.create_dataset() as (dataset, info): + _, captions = dataset[0] + assert tuple(captions) == tuple(info["captions"]) -class CelebATestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CelebA - FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) +class UCF101TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.UCF101 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("train", "valid", "test", "all"), - target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), - ) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) - _SPLIT_TO_IDX = dict(train=0, valid=1, test=2) + _VIDEO_FOLDER = "videos" + _ANNOTATIONS_FOLDER = "annotations" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._ANNOTATIONS_FOLDER + return root, annotation_path def inject_fake_data(self, tmpdir, config): - base_folder = pathlib.Path(tmpdir) / "celeba" - os.makedirs(base_folder) + tmpdir = pathlib.Path(tmpdir) - num_images, num_images_per_split = self._create_split_txt(base_folder) + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) - datasets_utils.create_image_folder( - base_folder, "img_align_celeba", lambda idx: f"{idx + 1:06d}.jpg", num_images - ) - attr_names = self._create_attr_txt(base_folder, num_images) - self._create_identity_txt(base_folder, num_images) - self._create_bbox_txt(base_folder, num_images) - self._create_landmarks_txt(base_folder, num_images) + annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotations_folder) + num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) - return dict(num_examples=num_images_per_split[config["split"]], attr_names=attr_names) + return num_examples - def _create_split_txt(self, root): - num_images_per_split = dict(train=4, valid=3, test=2) + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" - data = [ - [self._SPLIT_TO_IDX[split]] for split, num_images in num_images_per_split.items() for _ in range(num_images) + video_files = [ + datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) + for cls in ("ApplyEyeMakeup", "YoYo") ] - self._create_txt(root, "list_eval_partition.txt", data) - - num_images_per_split["all"] = num_images = sum(num_images_per_split.values()) - return num_images, num_images_per_split + return [path.relative_to(root) for path in itertools.chain(*video_files)] - def _create_attr_txt(self, root, num_images): - header = ("5_o_Clock_Shadow", "Young") - data = torch.rand((num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist() - self._create_txt(root, "list_attr_celeba.txt", data, header=header, add_num_examples=True) - return header + def _create_annotation_files(self, root, video_files, fold, train): + current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) + current_annotation = self._annotation_file_name(fold, train) + self._create_annotation_file(root, current_annotation, current_videos) - def _create_identity_txt(self, root, num_images): - data = torch.randint(1, 4, size=(num_images, 1)).tolist() - self._create_txt(root, "identity_CelebA.txt", data) + other_videos = set(video_files) - set(current_videos) + other_annotations = [ + self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) + ] + other_annotations.remove(current_annotation) + for name in other_annotations: + self._create_annotation_file(root, name, other_videos) - def _create_bbox_txt(self, root, num_images): - header = ("x_1", "y_1", "width", "height") - data = torch.randint(10, size=(num_images, len(header))).tolist() - self._create_txt( - root, "list_bbox_celeba.txt", data, header=header, add_num_examples=True, add_image_id_to_header=True - ) + return len(current_videos) - def _create_landmarks_txt(self, root, num_images): - header = ("lefteye_x", "rightmouth_y") - data = torch.randint(10, size=(num_images, len(header))).tolist() - self._create_txt(root, "list_landmarks_align_celeba.txt", data, header=header, add_num_examples=True) + def _annotation_file_name(self, fold, train): + return f"{'train' if train else 'test'}list{fold:02d}.txt" - def _create_txt(self, root, name, data, header=None, add_num_examples=False, add_image_id_to_header=False): + def _create_annotation_file(self, root, name, video_files): with open(pathlib.Path(root) / name, "w") as fh: - if add_num_examples: - fh.write(f"{len(data)}\n") + fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files)) - if header: - if add_image_id_to_header: - header = ("image_id", *header) - fh.write(f"{' '.join(header)}\n") - for idx, line in enumerate(data, 1): - fh.write(f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n") +class LSUNTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.LSUN - def test_combined_targets(self): - target_types = ["attr", "identity", "bbox", "landmarks"] + REQUIRED_PACKAGES = ("lmdb",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]) + ) - individual_targets = [] - for target_type in target_types: - with self.create_dataset(target_type=target_type) as (dataset, _): - _, target = dataset[0] - individual_targets.append(target) + _CATEGORIES = ( + "bedroom", + "bridge", + "church_outdoor", + "classroom", + "conference_room", + "dining_room", + "kitchen", + "living_room", + "restaurant", + "tower", + ) - with self.create_dataset(target_type=target_types) as (dataset, _): - _, combined_targets = dataset[0] + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) - actual = len(individual_targets) - expected = len(combined_targets) - assert ( - actual == expected - ), "The number of the returned combined targets does not match the the number targets if requested " - f"individually: {actual} != {expected}", + num_images = 0 + for cls in self._parse_classes(config["classes"]): + num_images += self._create_lmdb(root, cls) - for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): - with self.subTest(target_type=target_type): - actual = type(combined_target) - expected = type(individual_target) - assert ( - actual is expected - ), "Type of the combined target does not match the type of the corresponding individual target: " - f"{actual} is not {expected}", + return num_images - def test_no_target(self): - with self.create_dataset(target_type=[]) as (dataset, _): - _, target = dataset[0] + @ contextlib.contextmanager + def create_dataset(self, *args, **kwargs): + with super().create_dataset(*args, **kwargs) as output: + yield output + # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus, + # this creates a number of _cache_* files in the current directory that will not be removed together + # with the temporary directory + for file in os.listdir(os.getcwd()): + if file.startswith("_cache_"): + try: + os.remove(file) + except FileNotFoundError: + # When the same test is run in parallel (in fb internal tests), a thread may remove another + # thread's file. We should be able to remove the try/except when + # https://github.com/pytorch/vision/issues/825 is fixed. + pass - assert target is None + def _parse_classes(self, classes): + if not isinstance(classes, str): + return classes - def test_attr_names(self): - with self.create_dataset() as (dataset, info): - assert tuple(dataset.attr_names) == info["attr_names"] + split = classes + if split == "test": + return [split] - def test_images_names_split(self): - with self.create_dataset(split="all") as (dataset, _): - all_imgs_names = set(dataset.filename) + return [f"{category}_{split}" for category in self._CATEGORIES] - merged_imgs_names = set() - for split in ["train", "valid", "test"]: - with self.create_dataset(split=split) as (dataset, _): - merged_imgs_names.update(dataset.filename) + def _create_lmdb(self, root, cls): + lmdb = datasets_utils.lazy_importer.lmdb + hexdigits_lowercase = string.digits + string.ascii_lowercase[:6] - assert merged_imgs_names == all_imgs_names + folder = f"{cls}_lmdb" + num_images = torch.randint(1, 4, size=()).item() + format = "png" + files = datasets_utils.create_image_folder(root, folder, lambda idx: f"{idx}.{format}", num_images) -class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.VOCSegmentation - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) - - ADDITIONAL_CONFIGS = ( - *datasets_utils.combinations_grid( - year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval") - ), - dict(year="2007", image_set="test"), - dict(year="2007-test", image_set="test"), - ) - - def inject_fake_data(self, tmpdir, config): - year, is_test_set = ( - ("2007", True) - if config["year"] == "2007-test" or config["image_set"] == "test" - else (config["year"], False) - ) - image_set = config["image_set"] - - base_dir = pathlib.Path(tmpdir) - if year == "2011": - base_dir /= "TrainVal" - base_dir = base_dir / "VOCdevkit" / f"VOC{year}" - os.makedirs(base_dir) + with lmdb.open(str(root / folder)) as env, env.begin(write=True) as txn: + for file in files: + key = "".join(random.choice(hexdigits_lowercase) for _ in range(40)).encode() - num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set) - datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images) + buffer = io.BytesIO() + PIL.Image.open(file).save(buffer, format) + buffer.seek(0) + value = buffer.read() - datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images) - annotation = self._create_annotation_files(base_dir, "Annotations", num_images) + txn.put(key, value) - return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation) + os.remove(file) - def _create_image_set_files(self, root, name, is_test_set): - root = pathlib.Path(root) / name - src = pathlib.Path(root) / "Main" - os.makedirs(src, exist_ok=True) + return num_images - idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,)) - idcs["trainval"] = (*idcs["train"], *idcs["val"]) + def test_not_found_or_corrupted(self): + # LSUN does not raise built-in exception, but a custom one. It is expressive enough to not 'cast' it to + # RuntimeError or FileNotFoundError that are normally checked by this test. + with pytest.raises(datasets_utils.lazy_importer.lmdb.Error): + super().test_not_found_or_corrupted() - for image_set in ("test",) if is_test_set else ("train", "val", "trainval"): - self._create_image_set_file(src, image_set, idcs[image_set]) - shutil.copytree(src, root / "Segmentation") +class KineticsTestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) - num_images = max(itertools.chain(*idcs.values())) + 1 - num_images_per_image_set = {image_set: len(idcs_) for image_set, idcs_ in idcs.items()} - return num_images, num_images_per_image_set + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 + tmpdir = pathlib.Path(tmpdir) / config["split"] + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", + num_videos_per_class, + ) + return num_videos_per_class * len(classes) - def _create_image_set_file(self, root, image_set, idcs): - with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh: - fh.writelines([f"{idx:06d}\n" for idx in idcs]) - def _create_annotation_files(self, root, name, num_images): - root = pathlib.Path(root) / name - os.makedirs(root) +class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics400 - for idx in range(num_images): - annotation = self._create_annotation_file(root, f"{idx:06d}.xml") + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 - return annotation + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.avi", + num_videos_per_class, + ) - def _create_annotation_file(self, root, name): - def add_child(parent, name, text=None): - child = ET.SubElement(parent, name) - child.text = text - return child + return num_videos_per_class * len(classes) - def add_name(obj, name="dog"): - add_child(obj, "name", name) - return name - def add_bndbox(obj, bndbox=None): - if bndbox is None: - bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"} +class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.HMDB51 - obj = add_child(obj, "bndbox") - for name, text in bndbox.items(): - add_child(obj, name, text) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) - return bndbox + _VIDEO_FOLDER = "videos" + _SPLITS_FOLDER = "splits" + _CLASSES = ("brush_hair", "wave") - annotation = ET.Element("annotation") - obj = add_child(annotation, "object") - data = dict(name=add_name(obj), bndbox=add_bndbox(obj)) + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._SPLITS_FOLDER + return root, annotation_path - with open(pathlib.Path(root) / name, "wb") as fh: - fh.write(ET.tostring(annotation)) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) - return data + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + splits_folder = tmpdir / self._SPLITS_FOLDER + os.makedirs(splits_folder) + num_examples = self._create_split_files(splits_folder, video_files, config["fold"], config["train"]) -class VOCDetectionTestCase(VOCSegmentationTestCase): - DATASET_CLASS = datasets.VOCDetection - FEATURE_TYPES = (PIL.Image.Image, dict) + return num_examples - def test_annotations(self): - with self.create_dataset() as (dataset, info): - _, target = dataset[0] + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"{cls}_{(idx // clips_per_group) + 1:d}_{(idx % clips_per_group) + 1:d}.avi" - assert "annotation" in target - annotation = target["annotation"] + return [ + ( + cls, + datasets_utils.create_video_folder( + root, + cls, + lambda idx: file_name_fn(cls, idx), + num_examples_per_class, + ), + ) + for cls in self._CLASSES + ] - assert "object" in annotation - objects = annotation["object"] + def _create_split_files(self, root, video_files, fold, train): + num_videos = num_train_videos = 0 - assert len(objects) == 1 - object = objects[0] + for cls, videos in video_files: + num_videos += len(videos) - assert object == info["annotation"] + train_videos = set(random.sample(videos, random.randrange(1, len(videos) - 1))) + num_train_videos += len(train_videos) + with open(pathlib.Path(root) / f"{cls}_test_split{fold}.txt", "w") as fh: + fh.writelines(f"{file.name} {1 if file in train_videos else 2}\n" for file in videos) -class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CocoDetection - FEATURE_TYPES = (PIL.Image.Image, list) + return num_train_videos if train else (num_videos - num_train_videos) - REQUIRED_PACKAGES = ("pycocotools",) - _IMAGE_FOLDER = "images" - _ANNOTATIONS_FOLDER = "annotations" - _ANNOTATIONS_FILE = "annotations.json" +class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Omniglot - def dataset_args(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - root = tmpdir / self._IMAGE_FOLDER - annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE - return root, annotation_file + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False)) def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) + target_folder = ( + pathlib.Path(tmpdir) / "omniglot-py" / f"images_{'background' if config['background'] else 'evaluation'}" + ) + os.makedirs(target_folder) - num_images = 3 - num_annotations_per_image = 2 + num_images = 0 + for name in ("Alphabet_of_the_Magi", "Tifinagh"): + num_images += self._create_alphabet_folder(target_folder, name) - files = datasets_utils.create_image_folder( - tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images - ) - file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files] + return num_images - annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER - os.makedirs(annotation_folder) - info = self._create_annotation_file( - annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image - ) + def _create_alphabet_folder(self, root, name): + num_images_total = 0 + for idx in range(torch.randint(1, 4, size=()).item()): + num_images = torch.randint(1, 4, size=()).item() + num_images_total += num_images - info["num_examples"] = num_images - return info + datasets_utils.create_image_folder( + root / name, f"character{idx:02d}", lambda image_idx: f"{image_idx:02d}.png", num_images + ) - def _create_annotation_file(self, root, name, file_names, num_annotations_per_image): - image_ids = [int(file_name.stem) for file_name in file_names] - images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] + return num_images_total - annotations, info = self._create_annotations(image_ids, num_annotations_per_image) - self._create_json(root, name, dict(images=images, annotations=annotations)) - return info +class SBUTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBU + FEATURE_TYPES = (PIL.Image.Image, str) - def _create_annotations(self, image_ids, num_annotations_per_image): - annotations = datasets_utils.combinations_grid( - image_id=image_ids, bbox=([1.0, 2.0, 3.0, 4.0],) * num_annotations_per_image - ) - for id, annotation in enumerate(annotations): - annotation["id"] = id - return annotations, dict() + def inject_fake_data(self, tmpdir, config): + num_images = 3 - def _create_json(self, root, name, content): - file = pathlib.Path(root) / name - with open(file, "w") as fh: - json.dump(content, fh) - return file + dataset_folder = pathlib.Path(tmpdir) / "dataset" + images = datasets_utils.create_image_folder(tmpdir, "dataset", self._create_file_name, num_images) + self._create_urls_txt(dataset_folder, images) + self._create_captions_txt(dataset_folder, num_images) -class CocoCaptionsTestCase(CocoDetectionTestCase): - DATASET_CLASS = datasets.CocoCaptions - - def _create_annotations(self, image_ids, num_annotations_per_image): - captions = [str(idx) for idx in range(num_annotations_per_image)] - annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions) - for id, annotation in enumerate(annotations): - annotation["id"] = id - return annotations, dict(captions=captions) - - def test_captions(self): - with self.create_dataset() as (dataset, info): - _, captions = dataset[0] - assert tuple(captions) == tuple(info["captions"]) + return num_images + def _create_file_name(self, idx): + part1 = datasets_utils.create_random_string(10, string.digits) + part2 = datasets_utils.create_random_string(10, string.ascii_lowercase, string.digits[:6]) + return f"{part1}_{part2}.jpg" -class UCF101TestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.UCF101 + def _create_urls_txt(self, root, images): + with open(root / "SBU_captioned_photo_dataset_urls.txt", "w") as fh: + for image in images: + fh.write( + f"http://static.flickr.com/{datasets_utils.create_random_string(4, string.digits)}/{image.name}\n" + ) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + def _create_captions_txt(self, root, num_images): + with open(root / "SBU_captioned_photo_dataset_captions.txt", "w") as fh: + for _ in range(num_images): + fh.write(f"{datasets_utils.create_random_string(10)}\n") - _VIDEO_FOLDER = "videos" - _ANNOTATIONS_FOLDER = "annotations" - def dataset_args(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - root = tmpdir / self._VIDEO_FOLDER - annotation_path = tmpdir / self._ANNOTATIONS_FOLDER - return root, annotation_path +class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SEMEION def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) + num_images = 3 - video_folder = tmpdir / self._VIDEO_FOLDER - os.makedirs(video_folder) - video_files = self._create_videos(video_folder) + images = torch.rand(num_images, 256) + labels = F.one_hot(torch.randint(10, size=(num_images,))) + with open(pathlib.Path(tmpdir) / "semeion.data", "w") as fh: + for image, one_hot_labels in zip(images, labels): + image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) + labels_columns = " ".join([str(label.item()) for label in one_hot_labels]) + fh.write(f"{image_columns} {labels_columns}\n") - annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER - os.makedirs(annotations_folder) - num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) + return num_images - return num_examples - def _create_videos(self, root, num_examples_per_class=3): - def file_name_fn(cls, idx, clips_per_group=2): - return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" +class USPSTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.USPS - video_files = [ - datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) - for cls in ("ApplyEyeMakeup", "YoYo") - ] - return [path.relative_to(root) for path in itertools.chain(*video_files)] + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) - def _create_annotation_files(self, root, video_files, fold, train): - current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) - current_annotation = self._annotation_file_name(fold, train) - self._create_annotation_file(root, current_annotation, current_videos) + def inject_fake_data(self, tmpdir, config): + num_images = 2 if config["train"] else 1 - other_videos = set(video_files) - set(current_videos) - other_annotations = [ - self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) - ] - other_annotations.remove(current_annotation) - for name in other_annotations: - self._create_annotation_file(root, name, other_videos) + images = torch.rand(num_images, 256) * 2 - 1 + labels = torch.randint(1, 11, size=(num_images,)) - return len(current_videos) + with bz2.open(pathlib.Path(tmpdir) / f"usps{'.t' if not config['train'] else ''}.bz2", "w") as fh: + for image, label in zip(images, labels): + line = " ".join((str(label.item()), *[f"{idx}:{pixel:.6f}" for idx, pixel in enumerate(image, 1)])) + fh.write(f"{line}\n".encode()) - def _annotation_file_name(self, fold, train): - return f"{'train' if train else 'test'}list{fold:02d}.txt" + return num_images - def _create_annotation_file(self, root, name, video_files): - with open(pathlib.Path(root) / name, "w") as fh: - fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files)) +class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBDataset + FEATURE_TYPES = (PIL.Image.Image, (np.ndarray, PIL.Image.Image)) -class LSUNTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.LSUN + REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") - REQUIRED_PACKAGES = ("lmdb",) ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]) + image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") ) - _CATEGORIES = ( - "bedroom", - "bridge", - "church_outdoor", - "classroom", - "conference_room", - "dining_room", - "kitchen", - "living_room", - "restaurant", - "tower", - ) + _NUM_CLASSES = 20 def inject_fake_data(self, tmpdir, config): - root = pathlib.Path(tmpdir) - - num_images = 0 - for cls in self._parse_classes(config["classes"]): - num_images += self._create_lmdb(root, cls) - - return num_images + num_images, num_images_per_image_set = self._create_split_files(tmpdir) - @ contextlib.contextmanager - def create_dataset(self, *args, **kwargs): - with super().create_dataset(*args, **kwargs) as output: - yield output - # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus, - # this creates a number of _cache_* files in the current directory that will not be removed together - # with the temporary directory - for file in os.listdir(os.getcwd()): - if file.startswith("_cache_"): - try: - os.remove(file) - except FileNotFoundError: - # When the same test is run in parallel (in fb internal tests), a thread may remove another - # thread's file. We should be able to remove the try/except when - # https://github.com/pytorch/vision/issues/825 is fixed. - pass + sizes = self._create_target_folder(tmpdir, "cls", num_images) - def _parse_classes(self, classes): - if not isinstance(classes, str): - return classes + datasets_utils.create_image_folder( + tmpdir, "img", lambda idx: f"{self._file_stem(idx)}.jpg", num_images, size=lambda idx: sizes[idx] + ) - split = classes - if split == "test": - return [split] + return num_images_per_image_set[config["image_set"]] - return [f"{category}_{split}" for category in self._CATEGORIES] + def _create_split_files(self, root): + root = pathlib.Path(root) - def _create_lmdb(self, root, cls): - lmdb = datasets_utils.lazy_importer.lmdb - hexdigits_lowercase = string.digits + string.ascii_lowercase[:6] + splits = dict(train=(0, 1, 2), train_noval=(0, 2), val=(3,)) - folder = f"{cls}_lmdb" + for split, idcs in splits.items(): + self._create_split_file(root, split, idcs) - num_images = torch.randint(1, 4, size=()).item() - format = "png" - files = datasets_utils.create_image_folder(root, folder, lambda idx: f"{idx}.{format}", num_images) + num_images = max(itertools.chain(*splits.values())) + 1 + num_images_per_split = {split: len(idcs) for split, idcs in splits.items()} + return num_images, num_images_per_split - with lmdb.open(str(root / folder)) as env, env.begin(write=True) as txn: - for file in files: - key = "".join(random.choice(hexdigits_lowercase) for _ in range(40)).encode() + def _create_split_file(self, root, name, idcs): + with open(root / f"{name}.txt", "w") as fh: + fh.writelines(f"{self._file_stem(idx)}\n" for idx in idcs) - buffer = io.BytesIO() - PIL.Image.open(file).save(buffer, format) - buffer.seek(0) - value = buffer.read() + def _create_target_folder(self, root, name, num_images): + io = datasets_utils.lazy_importer.scipy.io - txn.put(key, value) + target_folder = pathlib.Path(root) / name + os.makedirs(target_folder) - os.remove(file) + sizes = [torch.randint(1, 4, size=(2,)).tolist() for _ in range(num_images)] + for idx, size in enumerate(sizes): + content = dict( + GTcls=dict(Boundaries=self._create_boundaries(size), Segmentation=self._create_segmentation(size)) + ) + io.savemat(target_folder / f"{self._file_stem(idx)}.mat", content) - return num_images + return sizes - def test_not_found_or_corrupted(self): - # LSUN does not raise built-in exception, but a custom one. It is expressive enough to not 'cast' it to - # RuntimeError or FileNotFoundError that are normally checked by this test. - with pytest.raises(datasets_utils.lazy_importer.lmdb.Error): - super().test_not_found_or_corrupted() + def _create_boundaries(self, size): + sparse = datasets_utils.lazy_importer.scipy.sparse + return [ + [sparse.csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] + for _ in range(self._NUM_CLASSES) + ] + def _create_segmentation(self, size): + return torch.randint(0, self._NUM_CLASSES + 1, size=size, dtype=torch.uint8).numpy() -class KineticsTestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.Kinetics - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) + def _file_stem(self, idx): + return f"2008_{idx:06d}" - def inject_fake_data(self, tmpdir, config): - classes = ("Abseiling", "Zumba") - num_videos_per_class = 2 - tmpdir = pathlib.Path(tmpdir) / config["split"] - digits = string.ascii_letters + string.digits + "-_" - for cls in classes: - datasets_utils.create_video_folder( - tmpdir, - cls, - lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", - num_videos_per_class, - ) - return num_videos_per_class * len(classes) +class FakeDataTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FakeData + FEATURE_TYPES = (PIL.Image.Image, int) -class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.Kinetics400 + def dataset_args(self, tmpdir, config): + return () def inject_fake_data(self, tmpdir, config): - classes = ("Abseiling", "Zumba") - num_videos_per_class = 2 + return config["size"] - digits = string.ascii_letters + string.digits + "-_" - for cls in classes: - datasets_utils.create_video_folder( - tmpdir, - cls, - lambda _: f"{datasets_utils.create_random_string(11, digits)}.avi", - num_videos_per_class, - ) + def test_not_found_or_corrupted(self): + self.skipTest("The data is generated at creation and thus cannot be non-existent or corrupted.") - return num_videos_per_class * len(classes) +class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PhotoTour -class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.HMDB51 + # The PhotoTour dataset returns examples with different features with respect to the 'train' parameter. Thus, + # we overwrite 'FEATURE_TYPES' with a dummy value to satisfy the initial checks of the base class. Furthermore, we + # overwrite the 'test_feature_types()' method to select the correct feature types before the test is run. + FEATURE_TYPES = () + _TRAIN_FEATURE_TYPES = (torch.Tensor,) + _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + datasets_utils.combinations_grid(train=(True, False)) - _VIDEO_FOLDER = "videos" - _SPLITS_FOLDER = "splits" - _CLASSES = ("brush_hair", "wave") - - def dataset_args(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - root = tmpdir / self._VIDEO_FOLDER - annotation_path = tmpdir / self._SPLITS_FOLDER - return root, annotation_path - - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - - video_folder = tmpdir / self._VIDEO_FOLDER - os.makedirs(video_folder) - video_files = self._create_videos(video_folder) - - splits_folder = tmpdir / self._SPLITS_FOLDER - os.makedirs(splits_folder) - num_examples = self._create_split_files(splits_folder, video_files, config["fold"], config["train"]) - - return num_examples - - def _create_videos(self, root, num_examples_per_class=3): - def file_name_fn(cls, idx, clips_per_group=2): - return f"{cls}_{(idx // clips_per_group) + 1:d}_{(idx % clips_per_group) + 1:d}.avi" - - return [ - ( - cls, - datasets_utils.create_video_folder( - root, - cls, - lambda idx: file_name_fn(cls, idx), - num_examples_per_class, - ), - ) - for cls in self._CLASSES - ] - - def _create_split_files(self, root, video_files, fold, train): - num_videos = num_train_videos = 0 - - for cls, videos in video_files: - num_videos += len(videos) - - train_videos = set(random.sample(videos, random.randrange(1, len(videos) - 1))) - num_train_videos += len(train_videos) - - with open(pathlib.Path(root) / f"{cls}_test_split{fold}.txt", "w") as fh: - fh.writelines(f"{file.name} {1 if file in train_videos else 2}\n" for file in videos) - - return num_train_videos if train else (num_videos - num_train_videos) - - -class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Omniglot - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False)) - - def inject_fake_data(self, tmpdir, config): - target_folder = ( - pathlib.Path(tmpdir) / "omniglot-py" / f"images_{'background' if config['background'] else 'evaluation'}" - ) - os.makedirs(target_folder) - - num_images = 0 - for name in ("Alphabet_of_the_Magi", "Tifinagh"): - num_images += self._create_alphabet_folder(target_folder, name) - - return num_images - - def _create_alphabet_folder(self, root, name): - num_images_total = 0 - for idx in range(torch.randint(1, 4, size=()).item()): - num_images = torch.randint(1, 4, size=()).item() - num_images_total += num_images - - datasets_utils.create_image_folder( - root / name, f"character{idx:02d}", lambda image_idx: f"{image_idx:02d}.png", num_images - ) - - return num_images_total - - -class SBUTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.SBU - FEATURE_TYPES = (PIL.Image.Image, str) - - def inject_fake_data(self, tmpdir, config): - num_images = 3 - - dataset_folder = pathlib.Path(tmpdir) / "dataset" - images = datasets_utils.create_image_folder(tmpdir, "dataset", self._create_file_name, num_images) - - self._create_urls_txt(dataset_folder, images) - self._create_captions_txt(dataset_folder, num_images) - - return num_images - - def _create_file_name(self, idx): - part1 = datasets_utils.create_random_string(10, string.digits) - part2 = datasets_utils.create_random_string(10, string.ascii_lowercase, string.digits[:6]) - return f"{part1}_{part2}.jpg" - - def _create_urls_txt(self, root, images): - with open(root / "SBU_captioned_photo_dataset_urls.txt", "w") as fh: - for image in images: - fh.write( - f"http://static.flickr.com/{datasets_utils.create_random_string(4, string.digits)}/{image.name}\n" - ) - - def _create_captions_txt(self, root, num_images): - with open(root / "SBU_captioned_photo_dataset_captions.txt", "w") as fh: - for _ in range(num_images): - fh.write(f"{datasets_utils.create_random_string(10)}\n") - - -class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.SEMEION - - def inject_fake_data(self, tmpdir, config): - num_images = 3 - - images = torch.rand(num_images, 256) - labels = F.one_hot(torch.randint(10, size=(num_images,))) - with open(pathlib.Path(tmpdir) / "semeion.data", "w") as fh: - for image, one_hot_labels in zip(images, labels): - image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) - labels_columns = " ".join([str(label.item()) for label in one_hot_labels]) - fh.write(f"{image_columns} {labels_columns}\n") - - return num_images - - -class USPSTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.USPS - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) - - def inject_fake_data(self, tmpdir, config): - num_images = 2 if config["train"] else 1 - - images = torch.rand(num_images, 256) * 2 - 1 - labels = torch.randint(1, 11, size=(num_images,)) - - with bz2.open(pathlib.Path(tmpdir) / f"usps{'.t' if not config['train'] else ''}.bz2", "w") as fh: - for image, label in zip(images, labels): - line = " ".join((str(label.item()), *[f"{idx}:{pixel:.6f}" for idx, pixel in enumerate(image, 1)])) - fh.write(f"{line}\n".encode()) - - return num_images - - -class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.SBDataset - FEATURE_TYPES = (PIL.Image.Image, (np.ndarray, PIL.Image.Image)) - - REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") - ) - - _NUM_CLASSES = 20 - - def inject_fake_data(self, tmpdir, config): - num_images, num_images_per_image_set = self._create_split_files(tmpdir) - - sizes = self._create_target_folder(tmpdir, "cls", num_images) - - datasets_utils.create_image_folder( - tmpdir, "img", lambda idx: f"{self._file_stem(idx)}.jpg", num_images, size=lambda idx: sizes[idx] - ) - - return num_images_per_image_set[config["image_set"]] - - def _create_split_files(self, root): - root = pathlib.Path(root) - - splits = dict(train=(0, 1, 2), train_noval=(0, 2), val=(3,)) - - for split, idcs in splits.items(): - self._create_split_file(root, split, idcs) - - num_images = max(itertools.chain(*splits.values())) + 1 - num_images_per_split = {split: len(idcs) for split, idcs in splits.items()} - return num_images, num_images_per_split - - def _create_split_file(self, root, name, idcs): - with open(root / f"{name}.txt", "w") as fh: - fh.writelines(f"{self._file_stem(idx)}\n" for idx in idcs) - - def _create_target_folder(self, root, name, num_images): - io = datasets_utils.lazy_importer.scipy.io - - target_folder = pathlib.Path(root) / name - os.makedirs(target_folder) - - sizes = [torch.randint(1, 4, size=(2,)).tolist() for _ in range(num_images)] - for idx, size in enumerate(sizes): - content = dict( - GTcls=dict(Boundaries=self._create_boundaries(size), Segmentation=self._create_segmentation(size)) - ) - io.savemat(target_folder / f"{self._file_stem(idx)}.mat", content) - - return sizes - - def _create_boundaries(self, size): - sparse = datasets_utils.lazy_importer.scipy.sparse - return [ - [sparse.csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] - for _ in range(self._NUM_CLASSES) - ] - - def _create_segmentation(self, size): - return torch.randint(0, self._NUM_CLASSES + 1, size=size, dtype=torch.uint8).numpy() - - def _file_stem(self, idx): - return f"2008_{idx:06d}" - - -class FakeDataTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.FakeData - FEATURE_TYPES = (PIL.Image.Image, int) - - def dataset_args(self, tmpdir, config): - return () - - def inject_fake_data(self, tmpdir, config): - return config["size"] - - def test_not_found_or_corrupted(self): - self.skipTest("The data is generated at creation and thus cannot be non-existent or corrupted.") - - -class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.PhotoTour - - # The PhotoTour dataset returns examples with different features with respect to the 'train' parameter. Thus, - # we overwrite 'FEATURE_TYPES' with a dummy value to satisfy the initial checks of the base class. Furthermore, we - # overwrite the 'test_feature_types()' method to select the correct feature types before the test is run. - FEATURE_TYPES = () - _TRAIN_FEATURE_TYPES = (torch.Tensor,) - _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) - - datasets_utils.combinations_grid(train=(True, False)) - - _NAME = "liberty" + _NAME = "liberty" def dataset_args(self, tmpdir, config): return tmpdir, self._NAME @@ -2898,341 +2335,1042 @@ def inject_fake_data(self, tmpdir: str, config): ) ) - meta_folder = data_folder / "labels" - meta_folder.mkdir() - image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files] - image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2) - with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file: - file.write("\n".join(image_ids_in_config) + "\n") + meta_folder = data_folder / "labels" + meta_folder.mkdir() + image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files] + image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2) + with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file: + file.write("\n".join(image_ids_in_config) + "\n") + + return len(image_ids_in_config) + + +class FER2013TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FER2013 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, "fer2013") + os.makedirs(base_folder) + + num_samples = 5 + with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file: + writer = csv.DictWriter( + file, + fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",), + quoting=csv.QUOTE_NONNUMERIC, + quotechar='"', + ) + writer.writeheader() + for _ in range(num_samples): + row = dict( + pixels=" ".join( + str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() + ) + ) + if config["split"] == "train": + row["emotion"] = str(int(torch.randint(0, 7, ()))) + + writer.writerow(row) + + return num_samples + + +class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.GTSRB + FEATURE_TYPES = (PIL.Image.Image, int) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir: str, config): + root_folder = os.path.join(tmpdir, "gtsrb") + os.makedirs(root_folder, exist_ok=True) + + # Train data + train_folder = os.path.join(root_folder, "GTSRB", "Training") + os.makedirs(train_folder, exist_ok=True) + + num_examples = 3 if config["split"] == "train" else 4 + classes = ("00000", "00042", "00012") + for class_idx in classes: + datasets_utils.create_image_folder( + train_folder, + name=class_idx, + file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", + num_examples=num_examples, + ) + + total_number_of_examples = num_examples * len(classes) + # Test data + test_folder = os.path.join(root_folder, "GTSRB", "Final_Test", "Images") + os.makedirs(test_folder, exist_ok=True) + + with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file: + csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n") + + for _ in range(total_number_of_examples): + image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm" + datasets_utils.create_image_file(test_folder, image_file) + row = [ + image_file, + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(0, 43, size=()).item(), + ] + csv_file.write(";".join(map(str, row)) + "\n") + + return total_number_of_examples + + +class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CLEVRClassification + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + + def inject_fake_data(self, tmpdir, config): + data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" + + images_folder = data_folder / "images" + image_files = datasets_utils.create_image_folder( + images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5 + ) + + scenes_folder = data_folder / "scenes" + scenes_folder.mkdir() + if config["split"] != "test": + with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file: + json.dump( + dict( + info=dict(), + scenes=[ + dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ()))) + for image_file in image_files + ], + ), + file, + ) + + return len(image_files) + + +class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.OxfordIIITPet + FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("trainval", "test"), + target_types=("category", "segmentation", ["category", "segmentation"], []), + ) + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, "oxford-iiit-pet") + + classification_anns_meta = ( + dict(cls="Abyssinian", label=0, species="cat"), + dict(cls="Keeshond", label=18, species="dog"), + dict(cls="Yorkshire Terrier", label=37, species="dog"), + ) + split_and_classification_anns = [ + self._meta_to_split_and_classification_ann(meta, idx) + for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) + ] + image_ids, *_ = zip(*split_and_classification_anns) + + image_files = datasets_utils.create_image_folder( + base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) + ) + + anns_folder = os.path.join(base_folder, "annotations") + os.makedirs(anns_folder) + split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2) + with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file: + writer = csv.writer(file, delimiter=" ") + for split_and_classification_ann in split_and_classification_anns_in_split: + writer.writerow(split_and_classification_ann) + + segmentation_files = datasets_utils.create_image_folder( + anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) + ) + + # The dataset has some rogue files + for path in image_files[:2]: + path.with_suffix(".mat").touch() + for path in segmentation_files: + path.with_name(f".{path.name}").touch() + + return len(split_and_classification_anns_in_split) + + def _meta_to_split_and_classification_ann(self, meta, idx): + image_id = "_".join( + [ + *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], + str(idx), + ] + ) + class_id = str(meta["label"] + 1) + species = "1" if meta["species"] == "cat" else "2" + breed_id = "-1" + return (image_id, class_id, species, breed_id) + + +class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StanfordCars + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir, config): + import scipy.io as io + from numpy.core.records import fromarrays + + num_examples = {"train": 5, "test": 7}[config["split"]] + num_classes = 3 + base_folder = pathlib.Path(tmpdir) / "stanford_cars" + + devkit = base_folder / "devkit" + devkit.mkdir(parents=True) + + if config["split"] == "train": + images_folder_name = "cars_train" + annotations_mat_path = devkit / "cars_train_annos.mat" + else: + images_folder_name = "cars_test" + annotations_mat_path = base_folder / "cars_test_annos_withlabels.mat" + + datasets_utils.create_image_folder( + root=base_folder, + name=images_folder_name, + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, + ) + + classes = np.random.randint(1, num_classes + 1, num_examples, dtype=np.uint8) + fnames = [f"{i:5d}.jpg" for i in range(num_examples)] + rec_array = fromarrays( + [classes, fnames], + names=["class", "fname"], + ) + io.savemat(annotations_mat_path, {"annotations": rec_array}) + + random_class_names = ["random_name"] * num_classes + io.savemat(devkit / "cars_meta.mat", {"class_names": random_class_names}) + + return num_examples + + +class Country211TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Country211 + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test")) + + def inject_fake_data(self, tmpdir: str, config): + split_folder = pathlib.Path(tmpdir) / "country211" / config["split"] + split_folder.mkdir(parents=True, exist_ok=True) + + num_examples = { + "train": 3, + "valid": 4, + "test": 5, + }[config["split"]] + + classes = ("AD", "BS", "GR") + for cls in classes: + datasets_utils.create_image_folder( + split_folder, + name=cls, + file_name_fn=lambda idx: f"{idx}.jpg", + num_examples=num_examples, + ) + + return num_examples * len(classes) + + +class Flowers102TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Flowers102 + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + REQUIRED_PACKAGES = ("scipy",) + + def inject_fake_data(self, tmpdir: str, config): + base_folder = pathlib.Path(tmpdir) / "flowers-102" + + num_classes = 3 + num_images_per_split = dict(train=5, val=4, test=3) + num_images_total = sum(num_images_per_split.values()) + datasets_utils.create_image_folder( + base_folder, + "jpg", + file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg", + num_examples=num_images_total, + ) + + label_dict = dict( + labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8), + ) + datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "imagelabels.mat"), label_dict) + + setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16) + np.random.shuffle(setid_mat) + setid_dict = dict( + trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1), + valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1), + tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1), + ) + datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "setid.mat"), setid_dict) + + return num_images_per_split[config["split"]] + + +class PCAMTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PCAM + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + REQUIRED_PACKAGES = ("h5py",) + + def inject_fake_data(self, tmpdir: str, config): + base_folder = pathlib.Path(tmpdir) / "pcam" + base_folder.mkdir() + + num_images = {"train": 2, "test": 3, "val": 4}[config["split"]] + + images_file = datasets.PCAM._FILES[config["split"]]["images"][0] + with datasets_utils.lazy_importer.h5py.File(str(base_folder / images_file), "w") as f: + f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8) + + targets_file = datasets.PCAM._FILES[config["split"]]["targets"][0] + with datasets_utils.lazy_importer.h5py.File(str(base_folder / targets_file), "w") as f: + f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8) + + return num_images + + +class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.RenderedSST2 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"} + + def inject_fake_data(self, tmpdir: str, config): + root_folder = pathlib.Path(tmpdir) / "rendered-sst2" + image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]] + + num_images_per_class = {"train": 5, "test": 6, "val": 7} + sampled_classes = ["positive", "negative"] + for cls in sampled_classes: + datasets_utils.create_image_folder( + image_folder, + cls, + file_name_fn=lambda idx: f"{idx}.png", + num_examples=num_images_per_class[config["split"]], + ) + + return len(sampled_classes) * num_images_per_class[config["split"]] + + +class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoETH3D + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + # create the scene folder + image_paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with left right images + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100))) + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100))) + return image_paths + + @staticmethod + def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + # create scene directories + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with a random png file for occlusion mask, and a pfm file for disparity + paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) + pfm_path = os.path.join(scene_dir, "disp0GT.pfm") + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) + paths.append(pfm_path) + return paths + + def inject_fake_data(self, tmpdir, config): + eth3d_dir = os.path.join(tmpdir, "ETH3D") + + num_examples = 2 if config["split"] == "train" else 3 + + split_name = "two_view_training" if config["split"] == "train" else "two_view_test" + split_dir = os.path.join(eth3d_dir, split_name) + self._create_scene_folder(num_examples, split_dir) + + if config["split"] == "train": + annot_dir = os.path.join(eth3d_dir, "two_view_training_gt") + self._create_annotation_folder(num_examples, annot_dir) + + return num_examples + + def test_training_test_splits(self): + with self.create_dataset(split="train") as (dataset, _): + assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" + for _, _, disparity, valid_mask in dataset: + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + with self.create_dataset(split="test") as (dataset, _): + assert all(d == ("", "") for d in dataset._disparities) + for _, _, disparity, valid_mask in dataset: + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class CREStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CREStereo + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" + os.makedirs(crestereo_dir, exist_ok=True) + + split_dir = crestereo_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"tree": 2, "shapenet": 3, "reflective": 6, "hole": 5}.get(config["split"], 0) + + for idx in range(num_examples): + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) + # these are going to end up being gray scale images + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + + return num_examples + + def test_splits(self): + for split in ("tree", "shapenet", "reflective", "hole"): + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoMiddlebury2014 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "additional"), + calibration=("perfect", "imperfect", "both"), + use_ambient_views=(True, False), + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: + calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] + scene_dirs = [] + for c in calibrations: + scene_dir = os.path.join(root_dir, f"{scene_name}{c}") + os.makedirs(scene_dir, exist_ok=True) + # make normal images first + datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) + # these are going to end up being gray scale images + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) + scene_dirs.append(scene_dir) + return scene_dirs + + def inject_fake_data(self, tmpdir, config): + split_scene_map = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] + } + + middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") + os.makedirs(middlebury_dir, exist_ok=True) + + split_dir = middlebury_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 2, "additional": 3, "test": 4}.get(config["split"], 0) + for idx in range(num_examples): + scene_name = split_scene_map[config["split"]][idx] + self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + + if config["calibration"] == "both": + num_examples *= 2 + return num_examples + + def test_train_splits(self): + for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): + with self.create_dataset(split=split, calibration=calibration) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + print("disparities", disparity.shape, valid_mask.shape) + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split, calibration=None) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_augmented_view_usage(self): + with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): + for left, right, _, _ in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + + def test_warnings_train(self): + # train set invalid + split = "train" + calibration = None + with pytest.warns( + RuntimeWarning, + match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_warnings_test(self): + # test set invalid + split = "test" + calibration = "perfect" + with pytest.warns( + RuntimeWarning, + match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2012 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 4, "test": 3}.get(config["split"], 0) + + datasets_utils.create_image_folder( + root=split_dir, + name="colored_0", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="colored_1", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_noc", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2012 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + assert disparity is None + assert valid_mask is None - return len(image_ids_in_config) + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass -class FER2013TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.FER2013 +class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2015 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - - FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): - base_folder = os.path.join(tmpdir, "fer2013") - os.makedirs(base_folder) - - num_samples = 5 - with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file: - writer = csv.DictWriter( - file, - fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",), - quoting=csv.QUOTE_NONNUMERIC, - quotechar='"', - ) - writer.writeheader() - for _ in range(num_samples): - row = dict( - pixels=" ".join( - str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() - ) - ) - if config["split"] == "train": - row["emotion"] = str(int(torch.randint(0, 7, ()))) - - writer.writerow(row) - - return num_samples - + kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" + os.makedirs(kitti_dir, exist_ok=True) -class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.GTSRB - FEATURE_TYPES = (PIL.Image.Image, int) + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + num_examples = {"train": 4, "test": 6}.get(config["split"], 0) - def inject_fake_data(self, tmpdir: str, config): - root_folder = os.path.join(tmpdir, "gtsrb") - os.makedirs(root_folder, exist_ok=True) + datasets_utils.create_image_folder( + root=split_dir, + name="image_2", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="image_3", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) - # Train data - train_folder = os.path.join(root_folder, "GTSRB", "Training") - os.makedirs(train_folder, exist_ok=True) + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_0", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) - num_examples = 3 if config["split"] == "train" else 4 - classes = ("00000", "00042", "00012") - for class_idx in classes: datasets_utils.create_image_folder( - train_folder, - name=class_idx, - file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", + root=split_dir, + name="disp_occ_1", + file_name_fn=lambda i: f"{i:06d}.png", num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), ) - total_number_of_examples = num_examples * len(classes) - # Test data - test_folder = os.path.join(root_folder, "GTSRB", "Final_Test", "Images") - os.makedirs(test_folder, exist_ok=True) + return num_examples - with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file: - csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n") + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - for _ in range(total_number_of_examples): - image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm" - datasets_utils.create_image_file(test_folder, image_file) - row = [ - image_file, - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(0, 43, size=()).item(), - ] - csv_file.write(";".join(map(str, row)) + "\n") + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert disparity is None + assert valid_mask is None - return total_number_of_examples + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass -class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CLEVRClassification - FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) +class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoSceneFlow + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("FlyingThings3D", "Driving", "Monkaa"), + pass_name=("clean", "final") + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + @staticmethod + def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) - def inject_fake_data(self, tmpdir, config): - data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" + paths = [] + for i in range(num_examples): + datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + paths.append(str(root / file_name_fn(i))) + return paths - images_folder = data_folder / "images" - image_files = datasets_utils.create_image_folder( - images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5 - ) + def inject_fake_data(self, tmpdir, config): + scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" + os.makedirs(scene_flow_dir, exist_ok=True) - scenes_folder = data_folder / "scenes" - scenes_folder.mkdir() - if config["split"] != "test": - with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file: - json.dump( - dict( - info=dict(), - scenes=[ - dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ()))) - for image_file in image_files - ], - ), - file, - ) + split_dir = scene_flow_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) - return len(image_files) + pass_dir_map = { + "clean": "frames_cleanpass", + "final": "frames_finalpass", + } + num_examples = 1 + pass_dir_name = pass_dir_map.get(config["pass_name"], None) -class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.OxfordIIITPet - FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) + # create pass directories + pass_dir = split_dir / pass_dir_name + disp_dir = split_dir / "disparity" + os.makedirs(pass_dir, exist_ok=True) + os.makedirs(disp_dir, exist_ok=True) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("trainval", "test"), - target_types=("category", "segmentation", ["category", "segmentation"], []), - ) + num_examples = {"FlyingThings3D": 4, "Driving": 6, "Monkaa": 5}.get(config["split"], 0) - def inject_fake_data(self, tmpdir, config): - base_folder = os.path.join(tmpdir, "oxford-iiit-pet") + for direction in ["left", "right"]: + for scene_idx in range(num_examples): + os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + datasets_utils.create_image_folder( + root=pass_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=1, + size=(3, 200, 100), + ) - classification_anns_meta = ( - dict(cls="Abyssinian", label=0, species="cat"), - dict(cls="Keeshond", label=18, species="dog"), - dict(cls="Yorkshire Terrier", label=37, species="dog"), - ) - split_and_classification_anns = [ - self._meta_to_split_and_classification_ann(meta, idx) - for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) - ] - image_ids, *_ = zip(*split_and_classification_anns) + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) + self._create_pfm_folder( + root=disp_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.pfm", + num_examples=1, + size=(100, 200), + ) - image_files = datasets_utils.create_image_folder( - base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) - ) + return num_examples - anns_folder = os.path.join(base_folder, "annotations") - os.makedirs(anns_folder) - split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2) - with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file: - writer = csv.writer(file, delimiter=" ") - for split_and_classification_ann in split_and_classification_anns_in_split: - writer.writerow(split_and_classification_ann) + def test_splits(self): + for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - segmentation_files = datasets_utils.create_image_folder( - anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) - ) + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass - # The dataset has some rogue files - for path in image_files[:2]: - path.with_suffix(".mat").touch() - for path in segmentation_files: - path.with_name(f".{path.name}").touch() - return len(split_and_classification_anns_in_split) +class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoFallingThings + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - def _meta_to_split_and_classification_ann(self, meta, idx): - image_id = "_".join( - [ - *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], - str(idx), - ] - ) - class_id = str(meta["label"] + 1) - species = "1" if meta["species"] == "cat" else "2" - breed_id = "-1" - return (image_id, class_id, species, breed_id) + @staticmethod + def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]): + file = pathlib.Path(root) / name + image = np.ones((size[0], size[1]), dtype=np.uint8) + PIL.Image.fromarray(image).save(file) + @staticmethod + def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> List[str]: + paths = [] + root = pathlib.Path(root) / scene_name + os.makedirs(root, exist_ok=True) + # jpg images + paths.append(datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))) + paths.append(datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))) + # single channel depth maps + paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1]))) + paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1]))) + # camera settings json. Minimal example for _read_disparity function testing + settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} + with open(root / "_camera_settings.json", "w") as f: + json.dump(settings_json, f) -class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StanfordCars - REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + return paths def inject_fake_data(self, tmpdir, config): - import scipy.io as io - from numpy.core.records import fromarrays + fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" + os.makedirs(fallingthings_dir, exist_ok=True) - num_examples = {"train": 5, "test": 7}[config["split"]] - num_classes = 3 - base_folder = pathlib.Path(tmpdir) / "stanford_cars" + split_dir = pathlib.Path(fallingthings_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) - devkit = base_folder / "devkit" - devkit.mkdir(parents=True) + num_examples = {"single": 2, "mixed": 3}.get(config["split"], 0) - if config["split"] == "train": - images_folder_name = "cars_train" - annotations_mat_path = devkit / "cars_train_annos.mat" - else: - images_folder_name = "cars_test" - annotations_mat_path = base_folder / "cars_test_annos_withlabels.mat" + for i in range(num_examples): + self._make_scene_folder( + root=split_dir, + scene_name=f"scene_{i:06d}", + size=(100, 200), + ) - datasets_utils.create_image_folder( - root=base_folder, - name=images_folder_name, - file_name_fn=lambda image_index: f"{image_index:5d}.jpg", - num_examples=num_examples, - ) + return num_examples - classes = np.random.randint(1, num_classes + 1, num_examples, dtype=np.uint8) - fnames = [f"{i:5d}.jpg" for i in range(num_examples)] - rec_array = fromarrays( - [classes, fnames], - names=["class", "fname"], - ) - io.savemat(annotations_mat_path, {"annotations": rec_array}) + def test_splits(self): + for split_name in ["single", "mixed"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - random_class_names = ["random_name"] * num_classes - io.savemat(devkit / "cars_meta.mat", {"class_names": random_class_names}) + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass - return num_examples +class StereoSintelTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoSintel + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) -class Country211TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Country211 + def inject_fake_data(self, tmpdir, config): + sintel_dir = pathlib.Path(tmpdir) / "Sintel" + os.makedirs(sintel_dir, exist_ok=True) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test")) + split_dir = pathlib.Path(sintel_dir) / "training" + os.makedirs(split_dir, exist_ok=True) - def inject_fake_data(self, tmpdir: str, config): - split_folder = pathlib.Path(tmpdir) / "country211" / config["split"] - split_folder.mkdir(parents=True, exist_ok=True) + # a single setting, since there are no splits + num_examples = 4 - num_examples = { - "train": 3, - "valid": 4, - "test": 5, - }[config["split"]] + for view in ["final_left", "final_right"]: + root = split_dir / view + os.makedirs(root, exist_ok=True) - classes = ("AD", "BS", "GR") - for cls in classes: datasets_utils.create_image_folder( - split_folder, - name=cls, - file_name_fn=lambda idx: f"{idx}.jpg", + root=root, + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", num_examples=num_examples, + size=(3, 100, 200), ) - return num_examples * len(classes) - - -class Flowers102TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Flowers102 - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) - REQUIRED_PACKAGES = ("scipy",) - - def inject_fake_data(self, tmpdir: str, config): - base_folder = pathlib.Path(tmpdir) / "flowers-102" - - num_classes = 3 - num_images_per_split = dict(train=5, val=4, test=3) - num_images_total = sum(num_images_per_split.values()) datasets_utils.create_image_folder( - base_folder, - "jpg", - file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg", - num_examples=num_images_total, + root=split_dir / "occlusions", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + size=(1, 100, 200), ) - label_dict = dict( - labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8), + datasets_utils.create_image_folder( + root=split_dir / "outofframe", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + size=(1, 100, 200), ) - datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "imagelabels.mat"), label_dict) - setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16) - np.random.shuffle(setid_mat) - setid_dict = dict( - trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1), - valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1), - tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1), + datasets_utils.create_image_folder( + root=split_dir / "disparities", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + size=(3, 100, 200), ) - datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "setid.mat"), setid_dict) - - return num_images_per_split[config["split"]] + return num_examples -class PCAMTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.PCAM + def test_splits(self): + with self.create_dataset() as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) - REQUIRED_PACKAGES = ("h5py",) - def inject_fake_data(self, tmpdir: str, config): - base_folder = pathlib.Path(tmpdir) / "pcam" - base_folder.mkdir() +class InStereo2k(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.InStereo2k + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - num_images = {"train": 2, "test": 3, "val": 4}[config["split"]] + @staticmethod + def _make_scene_folder(root: str, name: str, size: Tuple[int, int]): + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) - images_file = datasets.PCAM._FILES[config["split"]]["images"][0] - with datasets_utils.lazy_importer.h5py.File(str(base_folder / images_file), "w") as f: - f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8) + datasets_utils.create_image_file(root=root, name="left.png", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="right.png", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="left_disp.png", size=(1, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="right_disp.png", size=(1, size[0], size[1])) - targets_file = datasets.PCAM._FILES[config["split"]]["targets"][0] - with datasets_utils.lazy_importer.h5py.File(str(base_folder / targets_file), "w") as f: - f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8) + def inject_fake_data(self, tmpdir, config): + in_stereo_dir = pathlib.Path(tmpdir) / "InStereo2k" + os.makedirs(in_stereo_dir, exist_ok=True) - return num_images + split_dir = pathlib.Path(in_stereo_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) + num_examples = {"train": 4, "test": 5}.get(config["split"], 0) -class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.RenderedSST2 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) - SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"} + for i in range(num_examples): + self._make_scene_folder(split_dir, f"scene_{i:06d}", (100, 200)) - def inject_fake_data(self, tmpdir: str, config): - root_folder = pathlib.Path(tmpdir) / "rendered-sst2" - image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]] + return num_examples - num_images_per_class = {"train": 5, "test": 6, "val": 7} - sampled_classes = ["positive", "negative"] - for cls in sampled_classes: - datasets_utils.create_image_folder( - image_folder, - cls, - file_name_fn=lambda idx: f"{idx}.png", - num_examples=num_images_per_class[config["split"]], - ) + def test_splits(self): + for split_name in ["train", "test"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - return len(sampled_classes) * num_images_per_class[config["split"]] + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass if __name__ == "__main__": diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index a7dd8397bab..8b38ba73a85 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,5 +1,5 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K -from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereoSynthetic +from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereo, InStereo2k from .caltech import Caltech101, Caltech256 from .celeba import CelebA from .cifar import CIFAR10, CIFAR100 @@ -106,4 +106,13 @@ "FGVCAircraft", "EuroSAT", "RenderedSST2", + "StereoETH3D", + "StereoFallingThings", + "StereoKitti2012", + "StereoKitti2015", + "StereoMiddlebury2014", + "StereoSceneFlow", + "StereoSintel", + "CREStereo", + "InStereo2k", ) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 702386b05bd..4de0b5b0532 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,31 +1,30 @@ from abc import ABC, abstractmethod from glob import glob from pathlib import Path -import pathlib import random import re import shutil -from typing import Callable, List, Optional, Tuple, Any +from typing import Callable, List, Optional, Tuple import warnings from jsonschema import ValidationError from torch import Tensor from .vision import VisionDataset -from .utils import download_and_extract_archive, download_url, verify_str_arg +from .utils import download_and_extract_archive, verify_str_arg import os import numpy as np from PIL import Image import json __all__ = ( - "CREStereo" # waiting for download / need to find valid mask procedure + "CREStereo" "StereoMiddlebury2014" "StereoETH3D" "StereoKitti2012" "StereoKitti2015" "StereoSintel" - "StereoSceneFlow" # need to find valid mask procedure + "StereoSceneFlow" "StereoFallingThings" - "InStereo2k" # need to find valid mask procedure + "InStereo2k" ) @@ -54,13 +53,38 @@ def read_pfm_file(file_path: str) -> np.array: data = np.reshape(data, (height, width, channels)) data = np.flipud(data) - return data + # PFM files for disparity maps should contain only a single channel + # they should also be returned in (C, H, W) format + return np.transpose(data[:, :, :1], (2, 0, 1)) class StereoMatchingDataset(ABC, VisionDataset): """Base interface for Stereo matching datasets""" def __init__(self, root: str, transforms: Optional[Callable] = None): + """ + + Args: + root(str): Root directory of the dataset. + transforms(callable, optional): A function/transform that takes in Tuples of + (images, disparities, valid_masks) and returns a transformed version of each of them. + images is a Tuple of (``PIL.Image``, ``PIL.Image``) + disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W) + valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W) + + In some cases, when a dataset does not provide disparties, the ``disparities`` and + ``valid_masks`` can be Tuples containing None values. + + For training splits generally the datasets provide a minimal guarantee of + images: (``PIL.Image``, ``PIL.Image``) + disparities: (``np.ndarray``, ``None``) with shape (1, H, W) + valid_masks: (``np.ndarray``, ``None``) with shape (H, W) + + For some test splits, the datasets provides outputs that look like: + imgaes: (``PIL.Image``, ``PIL.Image``) + disparities: (``None``, ``None``) + valid_masks: (``None``, ``None``) + """ super().__init__(root=root) self.transforms = transforms @@ -79,6 +103,18 @@ def _read_disparity(self, file_path: str) -> Tuple: pass def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` where ``valid_mask`` + is a numpy boolean mask of shape (H, W) + indicating which disparity values are valid. The disparity is a numpy array of + shape (1, H, W) and the images are PIL images. ``disparity`` and ``valid_mask`` are None for + datasets on which for ``split="test"`` the authors did not provide annotations. + """ img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) @@ -98,21 +134,59 @@ def __len__(self) -> int: return len(self._images) -class CREStereoSynthetic(StereoMatchingDataset): +class CREStereo(StereoMatchingDataset): """Synthetic dataset used in training the `CREStereo `_ architecture. - Ported from the download script in the paper github `repo `_. - """ - DOWNLOAD_SPACE = 4 * 1024 * 1024 * 1024 # dataset requires download requires about 400 GB of free space + Dataset details on the official paper `repo `_. - EXPERIMENTAL_RANGE = 1 # TODO: remove after validating dataset structure / flow + The dataset is expected to have the following structure: :: - MAX_DISP = 256. + root + CREStereo + tree + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + img2_left.jpg + img2_right.jpg + img2_left.disp.jpg + img2_right.disp.jpg + ... + shapenet + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + ... + reflective + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + ... + hole + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + ... - def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False): + Args: + root (str): Root directory of the dataset. + split (str): The split of the dataset to use. One of ``"tree"``, ``"shapenet"``, ``"reflective"``, ``"hole"`` + or ``"all"``. The ``"all"`` split contains all of the above splits. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. + download (bool, optional): If true, downloads the dataset from the internet and puts it in the root directory. + max_disparity (int, optional): Maximum disparity value. Used to compute the valid mask. + """ + DOWNLOAD_SPACE = 400 * 1024 * 1024 * 1024 + + def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False, max_disparity: float = 256.): super().__init__(root, transforms) root = Path(root) / "CREStereo" + self.max_disparity = max_disparity # if the API user requests a dataset download check that the user can download it if download: @@ -149,16 +223,23 @@ def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) disparity_maps_right = list(p.replace("_right", "_right.disp") for p in imgs_right) + if not any(os.path.exists(file_path) for file_path in disparity_maps_left): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) + + if not any(os.path.exists(file_path) for file_path in disparity_maps_right): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = (disparity < self.MAX_DISP) & (disparity > 0.) + valid = (disparity < self.max_disparity) & (disparity > 0.) + # unsqueeze the disparity map into (C, H, W) format + disparity = disparity[None, :, :] return disparity, valid def _download_dataset(self, root: str) -> None: - # TODO: remove before release, used only for testing purposes dirs = ["tree", "shapenet", "reflective", "hole"] # create directory subtree for the download for d in dirs: @@ -221,11 +302,11 @@ class StereoMiddlebury2014(StereoMatchingDataset): Args: root (string): Root directory of the Middleburry 2014 Dataset. - split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" - use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. Sampled with equal probability. + split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional" + use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. + The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ @@ -268,7 +349,7 @@ def __init__( self._download_dataset(root) root = Path(root) / "Middlebury2014" - print(split) + if not os.path.exists(root / split): raise FileNotFoundError( f"The {split} directory was not found in the provided root directory" @@ -292,24 +373,23 @@ def __init__( for calibration_suffix in calibrartion_suffixes: scene_pattern = "*" + calibration_suffix - print(scene_pattern) imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) self._images += list((l, r) for l, r in zip(imgs_left, imgs_right)) if split == "test": - dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: + disparity_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) + disparity_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) + if not len(disparity_maps_left) or not len(disparity_maps_right): + raise FileNotFoundError("No disparity maps found in {}".format(root / split)) - dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) - dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - - self._disparities += list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) + self._disparities += list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self.use_ambient_views = use_ambient_views @@ -317,6 +397,7 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: + """Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.""" if os.path.basename(file_path) == "im1.png" and self.use_ambient_views: # initialize sampleable container ambient_file_paths = list(file_path.replace("im1.png", view_name) for view_name in ["im1E.png", "im1L.png"]) @@ -332,6 +413,8 @@ def _read_disparity(self, file_path: str) -> Tuple: return None, None disparity_map = read_pfm_file(file_path) valid_mask = disparity_map < 1e3 + # remove the channel dimension from the valid mask + valid_mask = valid_mask[0, :, :] return disparity_map, valid_mask def _download_dataset(self, root: str): @@ -357,10 +440,13 @@ def _download_dataset(self, root: str): download_and_extract_archive(url=test_set_url, download_root=root, remove_finished=True) for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): for scene in scene_names: - shutil.move(os.path.join(scene_dir, scene), os.path.join(root, scene)) + scene_dst_dir = root / "test" / scene + scene_src_dir = scene_dir / scene + os.makedirs(scene_dst_dir, exist_ok=True) + shutil.move(str(scene_src_dir), str(scene_dst_dir)) # cleanup MiddEval3 directory - shutil.rmtree(os.path.join(root, "MiddEval3")) + shutil.rmtree(str(root / "MiddEval3")) class StereoETH3D(StereoMatchingDataset): @@ -411,8 +497,7 @@ class StereoETH3D(StereoMatchingDataset): root (string): Root directory of the ETH3D Dataset. split (string, optional): The dataset split of scenes, either "train" (default) or "test". calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -427,7 +512,6 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) @@ -435,8 +519,9 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*0GT.pfm"))) - # no masks for the right view, always using left as reference disparity_maps_right = list("" for _ in disparity_maps_left) + if not len(disparity_maps_left): + raise FileNotFoundError("No disparity maps found in {}".format(root / anot_dir)) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) @@ -447,10 +532,10 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = read_pfm_file(file_path) valid_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) - valid_mask = np.array(valid_mask) + valid_mask = np.array(valid_mask).astype(np.bool) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -474,8 +559,7 @@ class StereoKitti2012(StereoMatchingDataset): Args: root (string): Root directory where Kitti2012 is located. split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ @@ -494,6 +578,9 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) disparity_maps_right = list("" for _ in disparity_maps_left) + if not len(disparity_maps_left): + raise FileNotFoundError("No disparity maps found in {}".format(root)) + else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) @@ -506,7 +593,8 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = np.array(Image.open(file_path)) / 256.0 valid_mask = disparity_map > 0.0 - + # unsqueeze the disparity map into (C, H, W) format + disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: @@ -533,8 +621,7 @@ class StereoKitti2015(StereoMatchingDataset): Args: root (string): Root directory where Kitti2015 is located. split (string, optional): The dataset split of scenes, either "train" (default) or test. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -552,6 +639,9 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) + if not len(disparity_maps_left) or not len(disparity_maps_right): + raise FileNotFoundError("No disparity maps found in {}".format(root)) + else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) @@ -564,7 +654,8 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = np.array(Image.open(file_path)) / 256.0 valid_mask = disparity_map < 0.0 - + # unsqueeze the disparity map into (C, H, W) format + disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: @@ -574,10 +665,45 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: class StereoSintel(StereoMatchingDataset): """"Sintel `Stereo Dataset `_. + The dataset is expected to have the following structure: :: + + root + Sintel + training + final_left + scene1 + img1.png + img2.png + ... + ... + final_right + scene2 + img1.png + img2.png + ... + ... + disparities + scene1 + img1.png + img2.png + ... + ... + occlusions + scene1 + img1.png + img2.png + ... + ... + outofframe + scene1 + img1.png + img2.png + ... + ... + Args: root (string): Root directory where Sintel Stereo is located. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, transforms: Optional[Callable] = None): @@ -587,11 +713,13 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) + if not len(dps_masks_left): + raise FileNotFoundError("No disparity maps found in {}".format(root)) + disparity_maps_right = list("" for _ in dps_masks_left) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) @@ -605,7 +733,8 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = np.array(Image.open(file_path), dtype=np.float32) r, g, b = np.split(disparity_map, 3, axis=-1) disparity_map = r * 4 + g / (2**6) + b / (2**14) - + # reshape into (C, H, W) format + disparity_map = np.transpose(disparity_map, (2, 0, 1)) # occlusion mask valid_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) == 0 # out of frame mask @@ -662,6 +791,10 @@ class StereoSceneFlow(StereoMatchingDataset): FlyingThings3D ... ... + + Args: + root (string): Root directory where SceneFlow is located. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None): @@ -683,7 +816,6 @@ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "c for p in passes: imgs_left = sorted(glob(str(root / p / "*" / "left" / "*.png"))) imgs_right = sorted(glob(str(root / p / "*" / "right" / "*.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root / p)) @@ -693,15 +825,19 @@ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "c disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + if not any(os.path.exists(file_path) for file_path in disparity_maps_left): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) + + if not any(os.path.exists(file_path) for file_path in disparity_maps_right): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: - if not os.path.exists(file_path): - raise FileNotFoundError("Disparity map {} not found".format(file_path)) - disparity = read_pfm_file(file_path) - valid = np.ones_like(disparity) + # keep valid mask with shape (H, W) + valid = np.ones(disparity.shape[1:]).astype(np.bool) return disparity, valid def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: @@ -745,13 +881,20 @@ class StereoFallingThings(StereoMatchingDataset): ... scene2 ... + + Args: + root (string): Root directory where FallingThings is located. + split (string): Either "single", "mixed", or "both". + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. + """ def __init__(self, root: str, split: str = "single", transforms: Optional[Callable] = None): super().__init__(root, transforms) + root = Path(root) / "FallingThings" + verify_str_arg(split, "split", valid_values=("single", "mixed", "both")) - split = split.upper() splits = { "single": ["single"], @@ -760,28 +903,35 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab }[split] for s in splits: - imgs_left = sorted(glob(str(root / s / "*.left.jpg"))) - imgs_right = sorted(glob(str(root / s / "*.right.jpg"))) - + imgs_left = sorted(glob(str(root / s / "*" / "*.left.jpg"))) + imgs_right = sorted(glob(str(root / s / "*" / "*.right.jpg"))) if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs - disparity_maps_left = sorted(glob(str(root / s / "*.left.depth.png"))) - disparity_maps_right = sorted(glob(str(root / s / "*.right.depth.png"))) + disparity_maps_left = sorted(glob(str(root / s / "*" / "*.left.depth.png"))) + disparity_maps_right = sorted(glob(str(root / s / "*" / "*.right.depth.png"))) + if not len(disparity_maps_left) or not len(disparity_maps_right): + raise FileNotFoundError("No disparity maps found in {}".format(root)) disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: - depth = Image.Open(file_path) - with open(os.path.split(file_path)[0] + '_camera_settings.json', 'r') as f: + # (H, W) image + depth = np.array(Image.open(file_path)) + # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt + # in order to extract disparity from depth maps + with open(os.path.split(file_path)[0] + '/_camera_settings.json', 'r') as f: intrinsics = json.load(f) fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] + # inverse of depth-from-disparity equation disparity = (fx * 6.0 * 100) / depth.astype(np.float32) valid = disparity > 0 + # unsqueeze disparity to (C, H, W) + disparity = disparity[None, :, :] return disparity, valid def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: @@ -789,7 +939,7 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: class InStereo2k(StereoMatchingDataset): - """InStereo2k ``_ dataset + """InStereo2k ``_ dataset The dataset is expected to have the following structre: :: @@ -813,6 +963,11 @@ class InStereo2k(StereoMatchingDataset): ... scene2 ... + + Args: + root (string): Root directory where InStereo2k is located. + split (string): Either "train" or "test". + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -820,9 +975,10 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl root = Path(root) / "InStereo2k" / split + verify_str_arg(split, "split", valid_values=("train", "test")) + imgs_left = sorted(glob(str(root / "*" / "left.png"))) imgs_right = list(p.replace("left", "right") for p in imgs_left) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) @@ -832,10 +988,18 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) disparity_maps_right = list(p.replace("right", "right_disp") for p in imgs_left) + if not any(os.path.exists(file_path) for file_path in disparity_maps_left): + raise FileNotFoundError("No disparity valid maps found in {}".format(root)) + + if not any(os.path.exists(file_path) for file_path in disparity_maps_right): + raise FileNotFoundError("No disparity valid maps found in {}".format(root)) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities = disparity_maps def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = np.ones_like(disparity) + valid = np.ones_like(disparity).astype(np.bool) + # unsqueeze disparity to (C, H, W) + disparity = disparity[None, :, :] return disparity, valid From bbb1c562c4435b4324e03f73bad5ed985b149e2a Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 10:12:06 +0100 Subject: [PATCH 09/35] Ran ufmt. (#6259) --- torchvision/datasets/__init__.py | 12 +- torchvision/datasets/_stereo_matching.py | 195 +++++++++++++++-------- 2 files changed, 138 insertions(+), 69 deletions(-) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 8b38ba73a85..973d5ca9f7e 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,5 +1,15 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K -from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereo, InStereo2k +from ._stereo_matching import ( + StereoETH3D, + StereoFallingThings, + StereoKitti2012, + StereoKitti2015, + StereoMiddlebury2014, + StereoSceneFlow, + StereoSintel, + CREStereo, + InStereo2k, +) from .caltech import Caltech101, Caltech256 from .celeba import CelebA from .cifar import CIFAR10, CIFAR100 diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 4de0b5b0532..3edb0f639a5 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,19 +1,21 @@ -from abc import ABC, abstractmethod -from glob import glob -from pathlib import Path +import json +import os import random import re import shutil -from typing import Callable, List, Optional, Tuple import warnings +from abc import ABC, abstractmethod +from glob import glob +from pathlib import Path +from typing import Callable, List, Optional, Tuple + +import numpy as np from jsonschema import ValidationError +from PIL import Image from torch import Tensor -from .vision import VisionDataset + from .utils import download_and_extract_archive, verify_str_arg -import os -import numpy as np -from PIL import Image -import json +from .vision import VisionDataset __all__ = ( "CREStereo" @@ -35,7 +37,7 @@ def read_pfm_file(file_path: str) -> np.array: if not header in [b"PF", b"Pf"]: raise ValidationError(f"Not a valid PFM file: {file_path}") - dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline()) + dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) if not dim_match: raise ValidationError(f"Malformed PFM header: {file_path}") @@ -45,11 +47,11 @@ def read_pfm_file(file_path: str) -> np.array: # check for endian type if scale < 0: scale = -scale - endian = '<' + endian = "<" else: - endian = '>' + endian = ">" - data = np.fromfile(file, endian + 'f') + data = np.fromfile(file, endian + "f") data = np.reshape(data, (height, width, channels)) data = np.flipud(data) @@ -126,7 +128,11 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: valid_masks = (valid_mask_left, valid_mask_right) if self.transforms is not None: - imgs, dsp_maps, valid_masks, = self.transforms(imgs, dsp_maps, valid_masks) + ( + imgs, + dsp_maps, + valid_masks, + ) = self.transforms(imgs, dsp_maps, valid_masks) return imgs[0], imgs[1], dsp_maps[0], valid_masks[0] @@ -135,7 +141,7 @@ def __len__(self) -> int: class CREStereo(StereoMatchingDataset): - """Synthetic dataset used in training the `CREStereo `_ architecture. + """Synthetic dataset used in training the `CREStereo `_ architecture. Dataset details on the official paper `repo `_. @@ -179,10 +185,18 @@ class CREStereo(StereoMatchingDataset): transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (bool, optional): If true, downloads the dataset from the internet and puts it in the root directory. max_disparity (int, optional): Maximum disparity value. Used to compute the valid mask. - """ + """ + DOWNLOAD_SPACE = 400 * 1024 * 1024 * 1024 - def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False, max_disparity: float = 256.): + def __init__( + self, + root: str, + split: str = "tree", + transforms: Optional[Callable] = None, + download: bool = False, + max_disparity: float = 256.0, + ): super().__init__(root, transforms) root = Path(root) / "CREStereo" @@ -234,7 +248,7 @@ def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = (disparity < self.max_disparity) & (disparity > 0.) + valid = (disparity < self.max_disparity) & (disparity > 0.0) # unsqueeze the disparity map into (C, H, W) format disparity = disparity[None, :, :] return disparity, valid @@ -261,33 +275,33 @@ class StereoMiddlebury2014(StereoMatchingDataset): Middlebury2014 train scene1-{ ,perfect,imperfect} - calib.txt - im{0,1}.png - im1E.png - im1L.png - disp{0,1}.pfm - disp{0,1}-n.png - disp{0,1}-sd.pfm + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm disp{0,1}y.pfm scene2-{ ,perfect,imperfect} - calib.txt - im{0,1}.png - im1E.png - im1L.png - disp{0,1}.pfm - disp{0,1}-n.png - disp{0,1}-sd.pfm + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm disp{0,1}y.pfm ... additional scene1-{ ,perfect,imperfect} - calib.txt - im{0,1}.png - im1E.png - im1L.png - disp{0,1}.pfm - disp{0,1}-n.png - disp{0,1}-sd.pfm + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm disp{0,1}y.pfm ... test @@ -305,15 +319,56 @@ class StereoMiddlebury2014(StereoMatchingDataset): split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional" use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. - calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. - download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ splits = { - "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano", "Pipes", "Playroom", "Playtable", "Recycle", "Shelves", "Vintage"], - "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1", "Couch", "Flowers", "Mask", "Shopvac", "Sticks", "Storage", "Sword1", "Sword2", "Umbrella"], - "test": ["Plants", "Classroom2E", "Classroom2", "Australia", "DjembeL", "CrusadeP", "Crusade", "Hoops", "Bicycle2", "Staircase", "Newkuba", "AustraliaP", "Djembe", "Livingroom", "Computer"] + "train": [ + "Adirondack", + "Jadeplant", + "Motorcycle", + "Piano", + "Pipes", + "Playroom", + "Playtable", + "Recycle", + "Shelves", + "Vintage", + ], + "additional": [ + "Backpack", + "Bicycle1", + "Cable", + "Classroom1", + "Couch", + "Flowers", + "Mask", + "Shopvac", + "Sticks", + "Storage", + "Sword1", + "Sword2", + "Umbrella", + ], + "test": [ + "Plants", + "Classroom2E", + "Classroom2", + "Australia", + "DjembeL", + "CrusadeP", + "Crusade", + "Hoops", + "Bicycle2", + "Staircase", + "Newkuba", + "AustraliaP", + "Djembe", + "Livingroom", + "Computer", + ], } def __init__( @@ -323,7 +378,7 @@ def __init__( calibration: Optional[str] = "perfect", use_ambient_views: bool = False, transforms: Optional[Callable] = None, - download: bool = False + download: bool = False, ): super().__init__(root, transforms) verify_str_arg(split, "split", valid_values=("train", "test", "additional")) @@ -333,8 +388,7 @@ def __init__( if split == "test": calibration = None warnings.warn( - "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", - RuntimeWarning + "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", RuntimeWarning ) else: if split != "test": @@ -342,7 +396,7 @@ def __init__( warnings.warn( f"\nSplit '{split}' has calibration settings, however None was provided as an argument." f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", - RuntimeWarning + RuntimeWarning, ) if download: @@ -351,15 +405,14 @@ def __init__( root = Path(root) / "Middlebury2014" if not os.path.exists(root / split): - raise FileNotFoundError( - f"The {split} directory was not found in the provided root directory" - ) + raise FileNotFoundError(f"The {split} directory was not found in the provided root directory") split_scenes = self.splits[split] # check that the provided root folder contains the scene splits if not any( # using startswith to account for perfect / imperfect calibrartion - scene.startswith(s) for scene in os.listdir(root / split) + scene.startswith(s) + for scene in os.listdir(root / split) for s in split_scenes ): raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") @@ -429,7 +482,9 @@ def _download_dataset(self, root: str): scene_name = f"{scene}-{calibration}" for calibration in ["perfect", "imperfect"]: scene_url = f"{base_url}/{scene_name}.zip" - download_and_extract_archive(url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True) + download_and_extract_archive( + url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True + ) if any(s not in os.listdir(root) for s in self.splits["test"]): # test split is downloaded from a different location @@ -450,7 +505,7 @@ def _download_dataset(self, root: str): class StereoETH3D(StereoMatchingDataset): - """"ETH3D `Low-Res Two-View `_ dataset. + """ "ETH3D `Low-Res Two-View `_ dataset. The dataset is expected to have the following structure: :: @@ -458,13 +513,13 @@ class StereoETH3D(StereoMatchingDataset): ETH3D two_view_training scene1 - im1.png + im1.png im0.png images.txt cameras.txt calib.txt scene2 - im1.png + im1.png im0.png images.txt cameras.txt @@ -480,13 +535,13 @@ class StereoETH3D(StereoMatchingDataset): ... two_view_testing scene1 - im1.png + im1.png im0.png images.txt cameras.txt calib.txt scene2 - im1.png + im1.png im0.png images.txt cameras.txt @@ -496,7 +551,7 @@ class StereoETH3D(StereoMatchingDataset): Args: root (string): Root directory of the ETH3D Dataset. split (string, optional): The dataset split of scenes, either "train" (default) or "test". - calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ @@ -540,7 +595,7 @@ def __getitem__(self, index: int) -> Tuple: class StereoKitti2012(StereoMatchingDataset): - """"Kitti dataset from the `2012 `_ stereo evaluation benchmark. + """ "Kitti dataset from the `2012 `_ stereo evaluation benchmark. Uses the RGB images for consistency with Kitti 2015. The dataset is expected to have the following structure: :: @@ -560,7 +615,7 @@ class StereoKitti2012(StereoMatchingDataset): root (string): Root directory where Kitti2012 is located. split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. - download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -602,7 +657,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: class StereoKitti2015(StereoMatchingDataset): - """"Kitti dataset from the `2015 `_ stereo evaluation benchmark. + """ "Kitti dataset from the `2015 `_ stereo evaluation benchmark. The dataset is expected to have the following structure: :: @@ -663,7 +718,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: class StereoSintel(StereoMatchingDataset): - """"Sintel `Stereo Dataset `_. + """ "Sintel `Stereo Dataset `_. The dataset is expected to have the following structure: :: @@ -732,7 +787,7 @@ def _read_disparity(self, file_path: str) -> Tuple: # disparity decoding as per Sintel instructions disparity_map = np.array(Image.open(file_path), dtype=np.float32) r, g, b = np.split(disparity_map, 3, axis=-1) - disparity_map = r * 4 + g / (2**6) + b / (2**14) + disparity_map = r * 4 + g / (2 ** 6) + b / (2 ** 14) # reshape into (C, H, W) format disparity_map = np.transpose(disparity_map, (2, 0, 1)) # occlusion mask @@ -797,7 +852,9 @@ class StereoSceneFlow(StereoMatchingDataset): transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ - def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None): + def __init__( + self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None + ): super().__init__(root, transforms) root = Path(root) / "SceneFlow" @@ -823,7 +880,9 @@ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "c self._images += imgs disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] - disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + disparity_maps_right = [ + file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right + ] if not any(os.path.exists(file_path) for file_path in disparity_maps_left): raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) @@ -924,9 +983,9 @@ def _read_disparity(self, file_path: str) -> Tuple: depth = np.array(Image.open(file_path)) # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt # in order to extract disparity from depth maps - with open(os.path.split(file_path)[0] + '/_camera_settings.json', 'r') as f: + with open(os.path.split(file_path)[0] + "/_camera_settings.json", "r") as f: intrinsics = json.load(f) - fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] + fx = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"] # inverse of depth-from-disparity equation disparity = (fx * 6.0 * 100) / depth.astype(np.float32) valid = disparity > 0 From 669611eab0681edf1ffef5796f7755150575b4a3 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 10:48:42 +0100 Subject: [PATCH 10/35] Adressed CI/CD errors --- torchvision/datasets/_stereo_matching.py | 41 ++++++++++++------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 3edb0f639a5..254d9d2624a 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -10,7 +10,6 @@ from typing import Callable, List, Optional, Tuple import numpy as np -from jsonschema import ValidationError from PIL import Image from torch import Tensor @@ -35,11 +34,11 @@ def read_pfm_file(file_path: str) -> np.array: with open(file_path, "rb") as file: header = file.readline().rstrip() if not header in [b"PF", b"Pf"]: - raise ValidationError(f"Not a valid PFM file: {file_path}") + raise ValueError(f"Not a valid PFM file: {file_path}") dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) if not dim_match: - raise ValidationError(f"Malformed PFM header: {file_path}") + raise ValueError(f"Malformed PFM header: {file_path}") width, height = map(int, dim_match.groups()) channels = 3 if header == b"PF" else 1 @@ -231,7 +230,7 @@ def __init__( if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) @@ -243,7 +242,7 @@ def __init__( if not any(os.path.exists(file_path) for file_path in disparity_maps_right): raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: @@ -432,7 +431,7 @@ def __init__( if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - self._images += list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += list((left, right) for left, right in zip(imgs_left, imgs_right)) if split == "test": disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) @@ -442,7 +441,7 @@ def __init__( if not len(disparity_maps_left) or not len(disparity_maps_right): raise FileNotFoundError("No disparity maps found in {}".format(root / split)) - self._disparities += list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self.use_ambient_views = use_ambient_views @@ -578,8 +577,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if not len(disparity_maps_left): raise FileNotFoundError("No disparity maps found in {}".format(root / anot_dir)) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -639,8 +638,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -700,8 +699,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -777,8 +776,8 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): disparity_maps_right = list("" for _ in dps_masks_left) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(dps_masks_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(dps_masks_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -876,7 +875,7 @@ def __init__( if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root / p)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] @@ -890,7 +889,7 @@ def __init__( if not any(os.path.exists(file_path) for file_path in disparity_maps_right): raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: @@ -967,7 +966,7 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = sorted(glob(str(root / s / "*" / "*.left.depth.png"))) @@ -975,7 +974,7 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab if not len(disparity_maps_left) or not len(disparity_maps_right): raise FileNotFoundError("No disparity maps found in {}".format(root)) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: @@ -1041,7 +1040,7 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images = imgs disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) @@ -1053,7 +1052,7 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if not any(os.path.exists(file_path) for file_path in disparity_maps_right): raise FileNotFoundError("No disparity valid maps found in {}".format(root)) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities = disparity_maps def _read_disparity(self, file_path: str) -> Tuple: From d9d17a8ff5796ab2c79ce035525533d8d54dc7ed Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 11:21:36 +0100 Subject: [PATCH 11/35] Ran formatting pre-commit hook --- test/datasets_utils.py | 16 ++--- test/test_datasets.py | 76 +++++++++++++----------- torchvision/datasets/_stereo_matching.py | 2 +- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index f051e325968..9afd8f741fd 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -561,9 +561,11 @@ def test_feature_types(self, config): @test_all_configs def test_num_examples(self, config): with self.create_dataset(config) as (dataset, info): - assert len(dataset) == info["num_examples"], f"The number of examples {len(dataset)} does not match the expected {info['num_examples']}" + assert ( + len(dataset) == info["num_examples"] + ), f"The number of examples {len(dataset)} does not match the expected {info['num_examples']}" - @ test_all_configs + @test_all_configs def test_transforms(self, config): mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args) for kwarg in self._TRANSFORM_KWARGS: @@ -587,7 +589,7 @@ class ImageDatasetTestCase(DatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, int) - @ contextlib.contextmanager + @contextlib.contextmanager def create_dataset( self, config: Optional[Dict[str, Any]] = None, @@ -610,7 +612,7 @@ def create_dataset( with self._force_load_images(): yield dataset, info - @ contextlib.contextmanager + @contextlib.contextmanager def _force_load_images(self): open = PIL.Image.open @@ -649,7 +651,7 @@ def _set_default_frames_per_clip(self, inject_fake_data): args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" - @ functools.wraps(inject_fake_data) + @functools.wraps(inject_fake_data) def wrapper(tmpdir, config): args = inject_fake_data(tmpdir, config) if frames_per_clip_last and len(args) == len(args_without_default) - 1: @@ -748,7 +750,7 @@ def size(idx: int) -> Tuple[int, int, int]: ] -@ requires_lazy_imports("av") +@requires_lazy_imports("av") def create_video_file( root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], @@ -790,7 +792,7 @@ def create_video_file( return file -@ requires_lazy_imports("av") +@requires_lazy_imports("av") def create_video_folder( root: Union[str, pathlib.Path], name: Union[str, pathlib.Path], diff --git a/test/test_datasets.py b/test/test_datasets.py index dd3c89b9bdc..5db3be40b4f 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -10,10 +10,10 @@ import random import shutil import string -from typing import List, Callable, Tuple import unittest import xml.etree.ElementTree as ET import zipfile +from typing import List, Callable, Tuple import datasets_utils import numpy as np @@ -28,26 +28,26 @@ class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) - @ staticmethod + @staticmethod def _make_binary_file(num_elements, root, name): file_name = os.path.join(root, name) np.zeros(num_elements, dtype=np.uint8).tofile(file_name) - @ staticmethod + @staticmethod def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) - @ staticmethod + @staticmethod def _make_label_file(num_images, root, name): STL10TestCase._make_binary_file(num_images, root, name) - @ staticmethod + @staticmethod def _make_class_names_file(root, name="class_names.txt"): with open(os.path.join(root, name), "w") as fh: for cname in ("airplane", "bird"): fh.write(f"{cname}\n") - @ staticmethod + @staticmethod def _make_fold_indices_file(root): num_folds = 10 offset = 0 @@ -59,7 +59,7 @@ def _make_fold_indices_file(root): return tuple(range(1, num_folds + 1)) - @ staticmethod + @staticmethod def _make_train_files(root, num_unlabeled_images=1): num_images_in_fold = STL10TestCase._make_fold_indices_file(root) num_train_images = sum(num_images_in_fold) @@ -70,7 +70,7 @@ def _make_train_files(root, num_unlabeled_images=1): return dict(train=num_train_images, unlabeled=num_unlabeled_images) - @ staticmethod + @staticmethod def _make_test_files(root, num_images=2): STL10TestCase._make_image_file(num_images, root, "test_X.bin") STL10TestCase._make_label_file(num_images, root, "test_y.bin") @@ -888,7 +888,7 @@ def inject_fake_data(self, tmpdir, config): return num_images - @ contextlib.contextmanager + @contextlib.contextmanager def create_dataset(self, *args, **kwargs): with super().create_dataset(*args, **kwargs) as output: yield output @@ -1294,7 +1294,7 @@ def _create_archive(self, root, name, *files): return archive - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_feature_types(self, config): feature_types = self.FEATURE_TYPES self.FEATURE_TYPES = self._TRAIN_FEATURE_TYPES if config["train"] else self._TEST_FEATURE_TYPES @@ -1572,7 +1572,7 @@ def _file_name_fn(self, cls, ext, idx): def _is_valid_file_to_extensions(self, is_valid_file): return {ext for ext in self._EXTENSIONS if is_valid_file(f"foo.{ext}")} - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_is_valid_file(self, config): extensions = config.pop("extensions") # We need to explicitly pass extensions=None here or otherwise it would be filled by the value from the @@ -1582,7 +1582,7 @@ def test_is_valid_file(self, config): ) as (dataset, info): assert len(dataset) == info["num_examples"] - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1603,7 +1603,7 @@ def inject_fake_data(self, tmpdir, config): return dict(num_examples=num_examples_total, classes=classes) - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1702,32 +1702,32 @@ class Places365TestCase(datasets_utils.ImageDatasetTestCase): *((f"{category}/Places365_train_00000001.png", idx) for category, idx in _CATEGORIES_CONTENT), ) - @ staticmethod + @staticmethod def _make_txt(root, name, seq): file = os.path.join(root, name) with open(file, "w") as fh: for text, idx in seq: fh.write(f"{text} {idx}\n") - @ staticmethod + @staticmethod def _make_categories_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._CATEGORIES_CONTENT) - @ staticmethod + @staticmethod def _make_file_list_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._FILE_LIST_CONTENT) - @ staticmethod + @staticmethod def _make_image(file_name, size): os.makedirs(os.path.dirname(file_name), exist_ok=True) PIL.Image.fromarray(np.zeros((*size, 3), dtype=np.uint8)).save(file_name) - @ staticmethod + @staticmethod def _make_devkit_archive(root, split): Places365TestCase._make_categories_txt(root, Places365TestCase._CATEGORIES) Places365TestCase._make_file_list_txt(root, Places365TestCase._FILE_LISTS[split]) - @ staticmethod + @staticmethod def _make_images_archive(root, split, small): folder_name = Places365TestCase._IMAGES[(split, small)] image_size = (256, 256) if small else (512, random.randint(512, 1024)) @@ -2042,7 +2042,7 @@ def inject_fake_data(self, tmpdir, config): return num_examples[config["split"]] - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_flow(self, config): # Make sure flow always exists, and make sure there are as many flow values as (pairs of) images # Also make sure the flow is properly decoded @@ -2101,7 +2101,7 @@ def inject_fake_data(self, tmpdir, config): ) return num_examples - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_flow(self, config): h, w = self.FLOW_H, self.FLOW_W expected_flow = np.arange(3 * h * w).reshape(h, w, 3).transpose(2, 0, 1) @@ -2726,7 +2726,9 @@ def inject_fake_data(self, tmpdir, config): def test_training_test_splits(self): with self.create_dataset(split="train") as (dataset, _): - assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" + assert dataset._images and len(dataset._images) == len( + dataset._disparities + ), "Training images do not match with training disparities" for _, _, disparity, valid_mask in dataset: assert len(disparity.shape) == 3 assert len(valid_mask.shape) == 2 @@ -2813,10 +2815,10 @@ def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: scene_dir = os.path.join(root_dir, f"{scene_name}{c}") os.makedirs(scene_dir, exist_ok=True) # make normal images first - datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1L.png", size=(3, 100, 100)) # these are going to end up being gray scale images datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) @@ -2827,7 +2829,7 @@ def inject_fake_data(self, tmpdir, config): split_scene_map = { "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], - "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"], } middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") @@ -2895,7 +2897,7 @@ def test_warnings_train(self): with pytest.warns( RuntimeWarning, match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." - f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", ): with self.create_dataset(split=split, calibration=calibration): pass @@ -2905,8 +2907,7 @@ def test_warnings_test(self): split = "test" calibration = "perfect" with pytest.warns( - RuntimeWarning, - match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + RuntimeWarning, match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." ): with self.create_dataset(split=split, calibration=calibration): pass @@ -3086,13 +3087,14 @@ def test_bad_input(self): class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StereoSceneFlow ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("FlyingThings3D", "Driving", "Monkaa"), - pass_name=("clean", "final") + split=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final") ) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod - def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: + def _create_pfm_folder( + root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int] + ) -> List[str]: root = pathlib.Path(root) / name os.makedirs(root, exist_ok=True) @@ -3193,8 +3195,12 @@ def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> Lis paths.append(datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))) paths.append(datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))) # single channel depth maps - paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1]))) - paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1]))) + paths.append( + StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) + ) + paths.append( + StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) + ) # camera settings json. Minimal example for _read_disparity function testing settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} with open(root / "_camera_settings.json", "w") as f: diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 254d9d2624a..8ef5f3e6e1a 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -33,7 +33,7 @@ def read_pfm_file(file_path: str) -> np.array: # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py with open(file_path, "rb") as file: header = file.readline().rstrip() - if not header in [b"PF", b"Pf"]: + if header not in [b"PF", b"Pf"]: raise ValueError(f"Not a valid PFM file: {file_path}") dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) From a31ee83e49802063dc1941f41dab49b511efc515 Mon Sep 17 00:00:00 2001 From: Ponku Date: Sun, 10 Jul 2022 17:05:50 +0100 Subject: [PATCH 12/35] Added Stereo Matching dataset interface and several classic datasets. --- torchvision/datasets/_stereo_matching.py | 479 +++++++++++++++++++++++ 1 file changed, 479 insertions(+) create mode 100644 torchvision/datasets/_stereo_matching.py diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py new file mode 100644 index 00000000000..42535c1623b --- /dev/null +++ b/torchvision/datasets/_stereo_matching.py @@ -0,0 +1,479 @@ +from abc import ABC, abstractmethod +from functools import reduce +from glob import glob +from pathlib import Path +from random import random +import re +import shutil +from typing import Callable, List, Optional, Tuple, Any +import lzma +from torch import Tensor +from .vision import VisionDataset +from .utils import download_and_extract_archive, download_url, verify_str_arg +import os +from torch.utils.model_zoo import tqdm +import numpy as np +from PIL import Image + +__all__ = ( + "CSEStereo" + "Middlebury2014" + "ETH3D" + "Kitti2012" + "Kitti2015" +) + + +def read_pfm_file(file_path: str) -> np.array: + # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py + with open(file_path, "rb") as file: + header = file.readline().rstrip() + assert header in ["PF", "Pf"], f"{file_path} is not a valid .pfm file" + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) + assert dim_match, f"{file_path} has a Malformed PFM header" + + width, height = map(int, dim_match.groups()) + channels = 3 if header == "PF" else 1 + scale = float(file.readline().rstrip()) + # check for endian type + if scale < 0: + scale = -scale + endian = '<' + else: + endian = '>' + + data = np.fromfile(file, endian + 'f') + data = np.reshape(data, (height, width, channels)) + data = np.flipud(data) + + return data + + +class StereoMatchingDataset(ABC, VisionDataset): + """Base interface for Stereo matching datasets""" + + def __init__(self, root: str, transforms: Optional[Callable] = None): + super().__init__(root=root) + self.transforms = transforms + + self._images: List[Tuple] = [] + self._disparities: List[Tuple] = [] + + def _read_img(self, file_path: str) -> Image.Image: + img = Image.open(file_path) + if img.mode != "RGB": + img = img.convert("RGB") + return img + + @abstractmethod + def _read_disparity(self, file_path: str) -> Tuple: + # function that returns a disparity map and an occlusion map + pass + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + img_left = self._read_img(self._images[index][0]) + img_right = self._read_img(self._images[index][1]) + + dsp_map_left, occ_mask_left = self._read_disparity(self._disparities[index][0]) + dsp_map_right, occ_mask_right = self._read_disparity(self._disparities[index][1]) + + imgs = (img_left, img_right) + dsp_maps = (dsp_map_left, dsp_map_right) + occ_masks = (occ_mask_left, occ_mask_right) + + if self.transforms is not None: + imgs, dsp_maps, occ_masks, = self.transforms(imgs, dsp_maps, occ_masks) + + return imgs, dsp_maps, occ_masks + + def __len__(self) -> int: + return len(self._images) + + +class CRESSyntethicStereo(StereoMatchingDataset): + """Synthetic dataset used in training the `CREStereo `_ architecture. + + Ported from the download script in the paper github `repo `_. + """ + DOWNLOAD_SPACE = 4 * 1024 * 1024 * 1024 # dataset requires download requires about 400 GB of free space + + EXPERIMENTAL_RANGE = 1 # TODO: remove after validating dataset structure / flow + + def __init__(self, root: str, transforms: Optional[Callable] = None, download: bool = True): + super().__init__(root, transforms) + # if the API user requests a dataset download check that the user can download it + if download: + statvfs = os.statvfs(root) + # measured in bytes + available_space = statvfs.f_frsize * statvfs.f_bavail + if available_space - self.DOWNLOAD_SPACE < 0: + raise ValueError( + f"The storage device for {root} is too small to download the dataset), " + f"an additional {self.DOWNLOAD_SPACE - self.available_space:.2f} GB are required." + ) + self._download_dataset(root) + + def _download_dataset(self, root: str) -> None: + # TODO: remove before release, used only for testing purposes + dirs = ["tree", "shapenet", "reflective", "hole"] + # create directory subtree for the download + for d in dirs: + d_path = os.path.join(root, d) + if not os.path.exists(d_path): + os.makedirs(d_path) + + for i in range(self.EXPERIMENTAL_RANGE): + url = f"https://data.megengine.org.cn/research/crestereo/dataset/{d}/{i}.tar" + download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) + + +class Middlebury2014(StereoMatchingDataset): + """Publicly available scenes from the Middlebury dataset `2014 version `. + + The dataset mostly follows the original format, without containing the ambient subdirectories. : :: + + root + Middlebury2014 + train + scene1-{ ,perfect,imperfect} + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm + disp{0,1}y.pfm + scene2-{ ,perfect,imperfect} + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm + disp{0,1}y.pfm + ... + additional + scene1-{ ,perfect,imperfect} + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm + disp{0,1}y.pfm + ... + test + scene1 + calib.txt + im{0,1}.png + scene2 + calib.txt + im{0,1}.png + ... + + + Args: + root (string): Root directory of the Middleburry 2014 Dataset. + split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" + use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. Sampled with equal probability. + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + """ + + splits = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano", "Pipes", "Playroom", "Playtable", "Recycle", "Shelves", "Vintage"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1", "Couch", "Flowers", "Mask", "Shopvac", "Sticks", "Storage", "Sword1", "Sword2", "Umbrella"], + "test": ['Plants', 'Classroom2E', 'Classroom2', 'Australia', 'DjembeL', 'CrusadeP', 'Crusade', 'Hoops', 'Bicycle2', 'Staircase', 'Newkuba', 'AustraliaP', 'Djembe', 'Livingroom', 'Computer'] + } + + def __init__( + self, + *, + root: str, + split: str = "train", + use_ambient_views: bool = False, + transforms: Optional[Callable] = None, + download: bool = False + ): + super().__init__(root, transforms) + verify_str_arg(split, "split", valid_values=("train", "test", "additional")) + + if download: + self._download_dataset(root) + + root = Path(root) / "FlyingChairs" + if not os.path.exists(root / split): + raise FileNotFoundError( + f"The {split} directory was not found in the provided root directory" + ) + + split_scenes = self.splits[split] + # check that the provided root folder contains the scene splits + if not all(s in os.listdir(root / split) for s in split_scenes): + raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") + + imgs_left = sorted(glob(str(root / split / "*" / "im0.png"))) + imgs_right = sorted(glob(str(root / split / "*" / "im1.png"))) + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + + if split == "test": + dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + else: + + dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) + dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) + self._disparities = list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) + + self.use_ambient_views = use_ambient_views + + def __getitem__(self, index: int) -> Tuple: + return super().__getitem__(index) + + def _read_img(self, file_path: str) -> Image.Image: + if os.path.basename(file_path) == "im1.png" and self.use_ambient_views: + # initialize sampleable container + ambient_file_paths = list(file_path.replace("im1.png", view_name) for view_name in ["im1E.png", "im1L.png"]) + # double check that we're not going to try to read from an invalid file path + ambient_file_paths = list(filter(lambda p: os.path.exists(p), ambient_file_paths)) + # keep the original image as an option as well for uniform sampling between base views + ambient_file_paths.append(file_path) + file_path = random.choice(ambient_file_paths) + return super()._read_img(file_path) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): # case when dealing with the test split + return None, None + dsp_mask = read_pfm_file(file_path) + occ_mask = dsp_mask < 1e3 + return dsp_mask, occ_mask + + def _download_dataset(self, root: str): + base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" + # train and additional splits have 2 different calibration settings + root = Path(root) / "Middlebury2014" + for split_name, split_scenes in self.splits.values(): + if split_name == "test": + continue + split_root = root / split_name + for scene in split_scenes: + scene_name = f"{scene}-{calibration}" + for calibration in ["perfect", "imperfect"]: + scene_url = f"{base_url}/{scene_name}.zip" + download_and_extract_archive(url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True) + + if any(s not in os.listdir(root) for s in self.splits["test"]): + # test split is downloaded from a different location + test_set_url = "https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip" + + # the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF + # we want to move the contents from testF into the directory + download_and_extract_archive(url=test_set_url, download_root=root, remove_finished=True) + for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): + for scene in scene_names: + shutil.move(os.path.join(scene_dir, scene), os.path.join(root, scene)) + + # cleanup MiddEval3 directory + shutil.rmtree(os.path.join(root, "MiddEval3")) + + +class ETH3D(StereoMatchingDataset): + """"ETH3D `Low-Res Two-View `_ dataset. + + The dataset is expected to have the following structure: :: + + root + ETH3D + two_view_training + scene1 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + scene2 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + ... + two_view_training_gt + scene1 + disp0GT.pfm + mask0nocc.png + scene2 + disp0GT.pfm + mask0nocc.png + ... + two_view_testing + scene1 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + scene2 + im1.png + im0.png + images.txt + cameras.txt + calib.txt + ... + + Args: + root (string): Root directory of the ETH3D Dataset. + split (string, optional): The dataset split of scenes, either "train" (default) or "test". + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("train", "test")) + + root = Path(root) / "ETH3D" + img_dir = "two_view_training" if split == "train" else "two_view_testing" + anot_dir = "two_view_training_gt" + + imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) + imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) + + if split == "test": + dsp_masks_left, dsp_masks_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + else: + dsp_masks_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) + # no masks for the right view, always using left as reference + dsp_masks_right = list("" for _ in dsp_masks_left) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + dsp_mask = read_pfm_file(file_path) + occ_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) + occ_mask = np.array(occ_mask) + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) + + +class Kitti2012(StereoMatchingDataset): + """"Kitti dataset from the `2012 `_ stereo evaluation benchmark. + Uses the RGB images for consistency with Kitti 2015. + + The dataset is expected to have the following structure: :: + + root + Kitti2012 + testing + colored_0 + colored_1 + training + colored_0 + colored_1 + disp_noc + calib + + Args: + root (string): Root directory where Kitti2012 is located. + split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("train", "test")) + + root = Path(root) / "Kitti2012" / (split + "ing") + imgs_left = sorted(glob(str(root / "colored_0" / "*_10.png"))) + imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) + + if split == "train": + dsp_masks_left = sorted(glob(str(root / "disp_noc" / "*.png"))) + dsp_masks_right = list("" for _ in dsp_masks_left) + else: + dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + dsp_mask = np.array(Image.open(file_path)) / 256.0 + occ_mask = dsp_mask > 0.0 + + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) + + +class Kitti2015(StereoMatchingDataset): + """"Kitti dataset from the `2015 `_ stereo evaluation benchmark. + + The dataset is expected to have the following structure: :: + + root + Kitti2015 + testing + image_2 + image_3 + training + image_2 + image_3 + disp_noc_0 + disp_noc_1 + calib + + Args: + root (string): Root directory where Kitti2015 is located. + split (string, optional): The dataset split of scenes, either "train" (default) or test. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("train", "test")) + + root = Path(root) / "Kitti2015" / (split + "ing") + imgs_left = sorted(glob(str(root / "image_2" / "*_10.png"))) + imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) + + if split == "train": + dsp_masks_left = sorted(glob(str(root / "disp_noc_0" / "*.png"))) + dsp_masks_right = sorted(glob(str(root / "disp_noc_1" / "*.png"))) + else: + dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + dsp_mask = np.array(Image.open(file_path)) / 256.0 + occ_mask = dsp_mask > 0.0 + + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) From 4a5ac8931cb04d85d6bd833af2a0b0c8ebffcdd9 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 11 Jul 2022 20:19:23 +0100 Subject: [PATCH 13/35] added SceneFlow, FallingThings and CREStereo --- torchvision/datasets/_stereo_matching.py | 47 +++++++++++++++++++++++- vision | 1 + 2 files changed, 47 insertions(+), 1 deletion(-) create mode 160000 vision diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 42535c1623b..960e443bd46 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -28,7 +28,8 @@ def read_pfm_file(file_path: str) -> np.array: # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py with open(file_path, "rb") as file: header = file.readline().rstrip() - assert header in ["PF", "Pf"], f"{file_path} is not a valid .pfm file" + assert header in [b"PF", b"Pf"], f"{file_path} is not a valid .pfm file" + dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) assert dim_match, f"{file_path} has a Malformed PFM header" @@ -477,3 +478,47 @@ def _read_disparity(self, file_path: str) -> Tuple: def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) + + +class SintelDataset(StereoMatchingDataset): + """"Sintel `Stereo Dataset `_. + + Args: + root (string): Root directory where Sintel Stereo is located. + transforms (callalbe, optional): A function/transform that takes in + ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + """ + + def __init__(self, root: str, transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + root = Path(root) / "Sintel" + + imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) + imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) + + dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) + dsp_masks_right = list("" for _ in dps_masks_left) + + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._dsp_masks = list((l, r) for l, r in zip(dps_masks_left, dsp_masks_right)) + + def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + return None, None + + # disparity decoding as per Sintel instructions + dsp_mask = np.array(Image.open(file_path), dtype=np.float32) + r, g, b = np.split(dsp_mask, 3, axis=-1) + dsp_mask = r * 4 + g / (2**6) + b / (2**14) + + # occlusion mask + occ_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) > 0 + # out of frame mask + off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) > 0 + # combine the masks together + occ_mask = np.logical_or(off_mask, occ_mask) + return dsp_mask, occ_mask + + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + return super().__getitem__(index) diff --git a/vision b/vision new file mode 160000 index 00000000000..bd19fb8ea9b --- /dev/null +++ b/vision @@ -0,0 +1 @@ +Subproject commit bd19fb8ea9b1f67df2a2a1ee116874609ad3ee8c From a1fc699e18c0d1e6e541b7d1e49fc3397c6572c8 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 11 Jul 2022 23:29:04 +0100 Subject: [PATCH 14/35] added SceneFlow, FallingThings and CREStereo --- torchvision/datasets/_stereo_matching.py | 228 ++++++++++++++++++----- 1 file changed, 183 insertions(+), 45 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 960e443bd46..65336503b87 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,26 +1,28 @@ from abc import ABC, abstractmethod -from functools import reduce from glob import glob from pathlib import Path from random import random import re import shutil from typing import Callable, List, Optional, Tuple, Any -import lzma from torch import Tensor from .vision import VisionDataset from .utils import download_and_extract_archive, download_url, verify_str_arg import os -from torch.utils.model_zoo import tqdm import numpy as np from PIL import Image +import json __all__ = ( - "CSEStereo" + "CREStereo" # waiting for download "Middlebury2014" "ETH3D" "Kitti2012" "Kitti2015" + "Sintel" + "SceneFlow" # need to find valid mask procedure + "FallingThings" + "InStereo2k" # waiting for download ) @@ -71,21 +73,21 @@ def _read_disparity(self, file_path: str) -> Tuple: # function that returns a disparity map and an occlusion map pass - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) - dsp_map_left, occ_mask_left = self._read_disparity(self._disparities[index][0]) - dsp_map_right, occ_mask_right = self._read_disparity(self._disparities[index][1]) + dsp_map_left, valid_mask_right = self._read_disparity(self._disparities[index][0]) + dsp_map_right, valid_mask_right = self._read_disparity(self._disparities[index][1]) imgs = (img_left, img_right) dsp_maps = (dsp_map_left, dsp_map_right) - occ_masks = (occ_mask_left, occ_mask_right) + valid_masks = (valid_mask_right, valid_mask_right) if self.transforms is not None: - imgs, dsp_maps, occ_masks, = self.transforms(imgs, dsp_maps, occ_masks) + imgs, dsp_maps, valid_masks, = self.transforms(imgs, dsp_maps, valid_masks) - return imgs, dsp_maps, occ_masks + return imgs[0], imgs[1], dsp_maps[0], valid_masks[0] def __len__(self) -> int: return len(self._images) @@ -100,7 +102,9 @@ class CRESSyntethicStereo(StereoMatchingDataset): EXPERIMENTAL_RANGE = 1 # TODO: remove after validating dataset structure / flow - def __init__(self, root: str, transforms: Optional[Callable] = None, download: bool = True): + MAX_DISP = 256. + + def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = True): super().__init__(root, transforms) # if the API user requests a dataset download check that the user can download it if download: @@ -114,6 +118,32 @@ def __init__(self, root: str, transforms: Optional[Callable] = None, download: b ) self._download_dataset(root) + verify_str_arg(split, "split", valid_values=("tree", "shapenet", "reflective", "hole", "all")) + + splits = { + "tree": ["tree"], + "shapenet": ["shapenet"], + "reflective": ["reflective"], + "hole": ["hole"], + "all": ["hole", "shapenet", "reflective", "hole"], + }[split] + + for s in splits: + imgs_left = sorted(glob(str(root / s / "*_left.jpg"))) + imgs_right = (p.replace("_left", "_right") for p in imgs_left) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += imgs + + disparity_maps_left = (p.replace("_left", "_left.disp") for p in imgs_left) + disparity_maps_right = (p.replace("_right", "_right.disp") for p in imgs_right) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + disparity = np.array(Image.open(file_path), dtype=np.float32) + valid = (disparity < self.MAX_DISP) & (disparity > 0.) + return disparity, valid + def _download_dataset(self, root: str) -> None: # TODO: remove before release, used only for testing purposes dirs = ["tree", "shapenet", "reflective", "hole"] @@ -249,9 +279,9 @@ def _read_img(self, file_path: str) -> Image.Image: def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): # case when dealing with the test split return None, None - dsp_mask = read_pfm_file(file_path) - occ_mask = dsp_mask < 1e3 - return dsp_mask, occ_mask + disparity_map = read_pfm_file(file_path) + valid_mask = disparity_map < 1e3 + return disparity_map, valid_mask def _download_dataset(self, root: str): base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" @@ -347,23 +377,23 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) if split == "test": - dsp_masks_left, dsp_masks_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: - dsp_masks_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) + disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) # no masks for the right view, always using left as reference - dsp_masks_right = list("" for _ in dsp_masks_left) + disparity_maps_right = list("" for _ in disparity_maps_left) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - dsp_mask = read_pfm_file(file_path) - occ_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) - occ_mask = np.array(occ_mask) - return dsp_mask, occ_mask + disparity_map = read_pfm_file(file_path) + valid_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) + valid_mask = np.array(valid_mask) + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) @@ -404,22 +434,22 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) if split == "train": - dsp_masks_left = sorted(glob(str(root / "disp_noc" / "*.png"))) - dsp_masks_right = list("" for _ in dsp_masks_left) + disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) + disparity_maps_right = list("" for _ in disparity_maps_left) else: - dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - dsp_mask = np.array(Image.open(file_path)) / 256.0 - occ_mask = dsp_mask > 0.0 + disparity_map = np.array(Image.open(file_path)) / 256.0 + valid_mask = disparity_map > 0.0 - return dsp_mask, occ_mask + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) @@ -459,22 +489,22 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) if split == "train": - dsp_masks_left = sorted(glob(str(root / "disp_noc_0" / "*.png"))) - dsp_masks_right = sorted(glob(str(root / "disp_noc_1" / "*.png"))) + disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) + disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) else: - dsp_masks_left, dsp_masks_right = list("" for _ in dsp_masks_left), list("" for _ in dsp_masks_right) + disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dsp_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - dsp_mask = np.array(Image.open(file_path)) / 256.0 - occ_mask = dsp_mask > 0.0 + disparity_map = np.array(Image.open(file_path)) / 256.0 + valid_mask = disparity_map < 0.0 - return dsp_mask, occ_mask + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) @@ -498,27 +528,135 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) - dsp_masks_right = list("" for _ in dps_masks_left) + disparity_maps_right = list("" for _ in dps_masks_left) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._dsp_masks = list((l, r) for l, r in zip(dps_masks_left, dsp_masks_right)) + self._disparities = list((l, r) for l, r in zip(dps_masks_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None # disparity decoding as per Sintel instructions - dsp_mask = np.array(Image.open(file_path), dtype=np.float32) - r, g, b = np.split(dsp_mask, 3, axis=-1) - dsp_mask = r * 4 + g / (2**6) + b / (2**14) + disparity_map = np.array(Image.open(file_path), dtype=np.float32) + r, g, b = np.split(disparity_map, 3, axis=-1) + disparity_map = r * 4 + g / (2**6) + b / (2**14) # occlusion mask - occ_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) > 0 + valid_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) == 0 # out of frame mask - off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) > 0 + off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) == 0 # combine the masks together - occ_mask = np.logical_or(off_mask, occ_mask) - return dsp_mask, occ_mask + valid_mask = np.logical_or(off_mask, valid_mask) + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) + + +class SceneFlowDataset(StereoMatchingDataset): + """Dataset interface for `Scene Flow `_ datasets.""" + + def __init__(self, root: str, split: str = "train", pass_name: str = "clean", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("FlyingThings3D", "Driving", "Monkaa")) + split = split.upper() + + verify_str_arg(split, "pass_name", valid_values=("clean", "final", "both")) + + passes = { + "clean": ["frames_cleanpass"], + "final": ["frames_finalpass"], + "both": ["frames_cleanpass, frames_finalpass"], + }[pass_name] + + root = Path(root) / split + + for p in passes: + imgs_left = sorted(glob(str(root / p / "left" / "*" / "*.png"))) + imgs_right = sorted(glob(str(root / p / "right" / "*" / "*.png"))) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += imgs + + disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] + disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + disparity = read_pfm_file(file_path) + valid = np.ones_like(disparity) + return disparity, valid + + +class FallingThingsDataset(StereoMatchingDataset): + """FallingThings ``_ dataset + + The dataset is expected to have the following structre: :: + + root + FallingThings + single + scene1 + _object_settings.json + _camera_settings.json + image1.left.depth.png + image1.right.depth.png + image1.left.jpg + image1.right.jpg + image2.left.depth.png + image2.right.depth.png + image2.left.jpg + image2.right + ... + scene2 + ... + mixed + scene1 + _object_settings.json + _camera_settings.json + image1.left.depth.png + image1.right.depth.png + image1.left.jpg + image1.right.jpg + image2.left.depth.png + image2.right.depth.png + image2.left.jpg + image2.right + ... + scene2 + ... + """ + + def __init__(self, root: str, split: str = "single", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + verify_str_arg(split, "split", valid_values=("single", "mixed", "both")) + split = split.upper() + + splits = { + "single": ["single"], + "mixed": ["mixed"], + "both": ["single", "mixed"], + }[split] + + for s in splits: + imgs_left = sorted(glob(str(root / s / "*.left.jpg"))) + imgs_right = sorted(glob(str(root / s / "*.right.jpg"))) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += imgs + + disparity_maps_left = sorted(glob(str(root / s / "*.left.depth.png"))) + disparity_maps_right = sorted(glob(str(root / s / "*.right.depth.png"))) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + depth = Image.Open(file_path) + with open(os.path.split(file_path)[0] + '_camera_settings.json', 'r') as f: + intrinsics = json.load(f) + fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] + disparity = (fx * 6.0 * 100) / depth.astype(np.float32) + valid = disparity > 0 + return disparity, valid From 62368b1d1eb8e260d3ea89acfc245a940c44a700 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 11 Jul 2022 23:34:27 +0100 Subject: [PATCH 15/35] "removed duplicate folder" --- vision | 1 - 1 file changed, 1 deletion(-) delete mode 160000 vision diff --git a/vision b/vision deleted file mode 160000 index bd19fb8ea9b..00000000000 --- a/vision +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bd19fb8ea9b1f67df2a2a1ee116874609ad3ee8c From 33c52a5705a414ce2f47fe193e7cb9c0f48432d1 Mon Sep 17 00:00:00 2001 From: Ponku Date: Wed, 13 Jul 2022 11:22:29 +0100 Subject: [PATCH 16/35] Added InStereo2k. Started working on dataset tests --- test/datasets_utils.py | 14 +- test/test_datasets.py | 552 ++++++++++++++++++++++- torchvision/datasets/__init__.py | 1 + torchvision/datasets/_stereo_matching.py | 191 ++++++-- 4 files changed, 686 insertions(+), 72 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 88eb4e17823..f051e325968 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -561,9 +561,9 @@ def test_feature_types(self, config): @test_all_configs def test_num_examples(self, config): with self.create_dataset(config) as (dataset, info): - assert len(dataset) == info["num_examples"] + assert len(dataset) == info["num_examples"], f"The number of examples {len(dataset)} does not match the expected {info['num_examples']}" - @test_all_configs + @ test_all_configs def test_transforms(self, config): mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args) for kwarg in self._TRANSFORM_KWARGS: @@ -587,7 +587,7 @@ class ImageDatasetTestCase(DatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, int) - @contextlib.contextmanager + @ contextlib.contextmanager def create_dataset( self, config: Optional[Dict[str, Any]] = None, @@ -610,7 +610,7 @@ def create_dataset( with self._force_load_images(): yield dataset, info - @contextlib.contextmanager + @ contextlib.contextmanager def _force_load_images(self): open = PIL.Image.open @@ -649,7 +649,7 @@ def _set_default_frames_per_clip(self, inject_fake_data): args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" - @functools.wraps(inject_fake_data) + @ functools.wraps(inject_fake_data) def wrapper(tmpdir, config): args = inject_fake_data(tmpdir, config) if frames_per_clip_last and len(args) == len(args_without_default) - 1: @@ -748,7 +748,7 @@ def size(idx: int) -> Tuple[int, int, int]: ] -@requires_lazy_imports("av") +@ requires_lazy_imports("av") def create_video_file( root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], @@ -790,7 +790,7 @@ def create_video_file( return file -@requires_lazy_imports("av") +@ requires_lazy_imports("av") def create_video_folder( root: Union[str, pathlib.Path], name: Union[str, pathlib.Path], diff --git a/test/test_datasets.py b/test/test_datasets.py index a108479aee3..d390c30cee9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1,3 +1,4 @@ +from abc import abstractmethod import bz2 import contextlib import csv @@ -10,6 +11,7 @@ import random import shutil import string +from typing import List, Callable, Tuple import unittest import xml.etree.ElementTree as ET import zipfile @@ -23,30 +25,540 @@ from torchvision import datasets +class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoETH3D + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + # create the scene folder + image_paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with left right images + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100))) + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100))) + return image_paths + + @staticmethod + def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + # create scene directories + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with a random png file for occlusion mask, and a pfm file for disparity + paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) + pfm_path = os.path.join(scene_dir, "disp0GT.pfm") + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) + paths.append(pfm_path) + return paths + + def inject_fake_data(self, tmpdir, config): + eth3d_dir = os.path.join(tmpdir, "ETH3D") + + num_examples = 2 if config["split"] == "train" else 3 + + split_name = "two_view_training" if config["split"] == "train" else "two_view_test" + split_dir = os.path.join(eth3d_dir, split_name) + self._create_scene_folder(num_examples, split_dir) + + if config["split"] == "train": + annot_dir = os.path.join(eth3d_dir, "two_view_training_gt") + self._create_annotation_folder(num_examples, annot_dir) + + return num_examples + + def test_training_test_splits(self): + with self.create_dataset(split="train") as (dataset, _): + assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" + for _, _, disparity, valid_mask in dataset: + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + dh, dw, _ = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + with self.create_dataset(split="test") as (dataset, _): + assert all(d == ("", "") for d in dataset._disparities) + for _, _, disparity, valid_mask in dataset: + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class CREStereoSynthethicTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CREStereoSynthetic + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" + os.makedirs(crestereo_dir, exist_ok=True) + + split_dir = crestereo_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + num_examples = 4 + + for idx in range(num_examples): + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) + # these are going to end up being gray scale images + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + + return num_examples + + def test_splits(self): + for split in ("tree", "shapenet", "reflective", "hole"): + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 2 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoMiddlebury2014 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "additional"), use_ambient_views=(True, False)) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: + calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] + scene_dirs = [] + for c in calibrations: + scene_dir = os.path.join(root_dir, f"{scene_name}{c}") + os.makedirs(scene_dir, exist_ok=True) + # make normal images first + datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) + # these are going to end up being gray scale images + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) + scene_dirs.append(scene_dir) + return scene_dirs + + def inject_fake_data(self, tmpdir, config): + split_scene_map = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] + } + + middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") + os.makedirs(middlebury_dir, exist_ok=True) + + split_dir = middlebury_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + for idx in range(num_examples): + # special case for test_bad_input + if config["split"] not in split_scene_map: + return 0 + + scene_name = split_scene_map[config["split"]][idx] + self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + + # account for perfect / imperfect calibrations + if config["split"] != "test": + num_examples *= 2 + + return num_examples + + def test_train_splits(self): + for split in ["train", "additional"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 3 + assert disparity.shape == (h, w, 3) + # check that valid mask is the same size as the disparity + dh, dw, c = disparity.shape + print(valid_mask.shape) + mh, mw, _ = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_augmented_view_usage(self): + with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): + for left, right, _, _ in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2012 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + + datasets_utils.create_image_folder( + root=split_dir, + name="colored_0", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="colored_1", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_noc", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2012 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 2 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2015 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + + datasets_utils.create_image_folder( + root=split_dir, + name="image_2", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="image_3", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_0", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) + + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 2 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoSceneFlow + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("FlyingThings3D", "Driving", "Monkaa"), + pass_name=("clean", "final") + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]): + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + for i in range(num_examples): + datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + + def inject_fake_data(self, tmpdir, config): + scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" + os.makedirs(scene_flow_dir, exist_ok=True) + + split_dir = scene_flow_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + pass_dir_map = { + "clean": "frames_cleanpass", + "final": "frames_finalpass", + } + + num_examples = 4 + pass_dir_name = pass_dir_map[config["pass_name"]] + # create pass directories + pass_dir = split_dir / pass_dir_name + disp_dir = split_dir / "disp" + os.makedirs(pass_dir, exist_ok=True) + os.makedirs(disp_dir, exist_ok=True) + + # root / pass / direction / scene / .imgs + # root / disparity / direction / scene / .imgs + for direction in ["left", "right"]: + for scene_idx in range(num_examples): + # scene_dir = pass_dir / direction / f"scene_{scene_idx:06d}" + os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + datasets_utils.create_image_folder( + root=pass_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=3, + size=(3, 100, 200), + ) + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) + self._create_pfm_folder( + root=disp_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.pfm", + num_examples=3, + size=(100, 200), + ) + + return num_examples * 3 + + def test_train_splits(self): + for split, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(split=split, pass_name=pass_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w, 3) + # check that valid mask is the same size as the disparity + dh, dw, _ = disparity.shape + mh, mw, _ = valid_mask.shape + assert dh == mh + assert dw == mw + + +class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoFallingThings + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root: str, scene_name: str, num_examples: int, size: Tuple[int, int]): + root = pathlib.Path(root) / scene_name + os.makedirs(root, exist_ok=True) + + datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[0], size[1])) + # single channel depth maps + datasets_utils.create_image_file(root, "image1.left.depth.jpg", size=(1, size[0], size[1])) + datasets_utils.create_image_file(root, "image1.right.depth.jpg", size=(1, size[0], size[1])) + + def inject_fake_data(self, tmpdir, config): + fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" + + split_dir = pathlib.Path(fallingthings_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = 4 + + for i in range(num_examples): + self._make_scene_folder( + root=split_dir, + scene_name=f"scene_{i:06d}", + num_examples=num_examples, + size=(100, 200), + ) + + return num_examples + + class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) - @staticmethod + @ staticmethod def _make_binary_file(num_elements, root, name): file_name = os.path.join(root, name) np.zeros(num_elements, dtype=np.uint8).tofile(file_name) - @staticmethod + @ staticmethod def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) - @staticmethod + @ staticmethod def _make_label_file(num_images, root, name): STL10TestCase._make_binary_file(num_images, root, name) - @staticmethod + @ staticmethod def _make_class_names_file(root, name="class_names.txt"): with open(os.path.join(root, name), "w") as fh: for cname in ("airplane", "bird"): fh.write(f"{cname}\n") - @staticmethod + @ staticmethod def _make_fold_indices_file(root): num_folds = 10 offset = 0 @@ -58,7 +570,7 @@ def _make_fold_indices_file(root): return tuple(range(1, num_folds + 1)) - @staticmethod + @ staticmethod def _make_train_files(root, num_unlabeled_images=1): num_images_in_fold = STL10TestCase._make_fold_indices_file(root) num_train_images = sum(num_images_in_fold) @@ -69,7 +581,7 @@ def _make_train_files(root, num_unlabeled_images=1): return dict(train=num_train_images, unlabeled=num_unlabeled_images) - @staticmethod + @ staticmethod def _make_test_files(root, num_images=2): STL10TestCase._make_image_file(num_images, root, "test_X.bin") STL10TestCase._make_label_file(num_images, root, "test_y.bin") @@ -887,7 +1399,7 @@ def inject_fake_data(self, tmpdir, config): return num_images - @contextlib.contextmanager + @ contextlib.contextmanager def create_dataset(self, *args, **kwargs): with super().create_dataset(*args, **kwargs) as output: yield output @@ -1293,7 +1805,7 @@ def _create_archive(self, root, name, *files): return archive - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_feature_types(self, config): feature_types = self.FEATURE_TYPES self.FEATURE_TYPES = self._TRAIN_FEATURE_TYPES if config["train"] else self._TEST_FEATURE_TYPES @@ -1571,7 +2083,7 @@ def _file_name_fn(self, cls, ext, idx): def _is_valid_file_to_extensions(self, is_valid_file): return {ext for ext in self._EXTENSIONS if is_valid_file(f"foo.{ext}")} - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_is_valid_file(self, config): extensions = config.pop("extensions") # We need to explicitly pass extensions=None here or otherwise it would be filled by the value from the @@ -1581,7 +2093,7 @@ def test_is_valid_file(self, config): ) as (dataset, info): assert len(dataset) == info["num_examples"] - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1602,7 +2114,7 @@ def inject_fake_data(self, tmpdir, config): return dict(num_examples=num_examples_total, classes=classes) - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1701,32 +2213,32 @@ class Places365TestCase(datasets_utils.ImageDatasetTestCase): *((f"{category}/Places365_train_00000001.png", idx) for category, idx in _CATEGORIES_CONTENT), ) - @staticmethod + @ staticmethod def _make_txt(root, name, seq): file = os.path.join(root, name) with open(file, "w") as fh: for text, idx in seq: fh.write(f"{text} {idx}\n") - @staticmethod + @ staticmethod def _make_categories_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._CATEGORIES_CONTENT) - @staticmethod + @ staticmethod def _make_file_list_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._FILE_LIST_CONTENT) - @staticmethod + @ staticmethod def _make_image(file_name, size): os.makedirs(os.path.dirname(file_name), exist_ok=True) PIL.Image.fromarray(np.zeros((*size, 3), dtype=np.uint8)).save(file_name) - @staticmethod + @ staticmethod def _make_devkit_archive(root, split): Places365TestCase._make_categories_txt(root, Places365TestCase._CATEGORIES) Places365TestCase._make_file_list_txt(root, Places365TestCase._FILE_LISTS[split]) - @staticmethod + @ staticmethod def _make_images_archive(root, split, small): folder_name = Places365TestCase._IMAGES[(split, small)] image_size = (256, 256) if small else (512, random.randint(512, 1024)) @@ -2041,7 +2553,7 @@ def inject_fake_data(self, tmpdir, config): return num_examples[config["split"]] - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_flow(self, config): # Make sure flow always exists, and make sure there are as many flow values as (pairs of) images # Also make sure the flow is properly decoded @@ -2100,7 +2612,7 @@ def inject_fake_data(self, tmpdir, config): ) return num_examples - @datasets_utils.test_all_configs + @ datasets_utils.test_all_configs def test_flow(self, config): h, w = self.FLOW_H, self.FLOW_W expected_flow = np.arange(3 * h * w).reshape(h, w, 3).transpose(2, 0, 1) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 295fe922478..a7dd8397bab 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,4 +1,5 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K +from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereoSynthetic from .caltech import Caltech101, Caltech256 from .celeba import CelebA from .cifar import CIFAR10, CIFAR100 diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 65336503b87..bcca2b12efb 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,10 +1,12 @@ from abc import ABC, abstractmethod from glob import glob from pathlib import Path -from random import random +import pathlib +import random import re import shutil from typing import Callable, List, Optional, Tuple, Any +from jsonschema import ValidationError from torch import Tensor from .vision import VisionDataset from .utils import download_and_extract_archive, download_url, verify_str_arg @@ -14,15 +16,15 @@ import json __all__ = ( - "CREStereo" # waiting for download - "Middlebury2014" - "ETH3D" - "Kitti2012" - "Kitti2015" - "Sintel" - "SceneFlow" # need to find valid mask procedure - "FallingThings" - "InStereo2k" # waiting for download + "CREStereo" # waiting for download / need to find valid mask procedure + "StereoMiddlebury2014" + "StereoETH3D" + "StereoKitti2012" + "StereoKitti2015" + "StereoSintel" + "StereoSceneFlow" # need to find valid mask procedure + "StereoFallingThings" + "InStereo2k" # need to find valid mask procedure ) @@ -30,13 +32,15 @@ def read_pfm_file(file_path: str) -> np.array: # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py with open(file_path, "rb") as file: header = file.readline().rstrip() - assert header in [b"PF", b"Pf"], f"{file_path} is not a valid .pfm file" + if not header in [b"PF", b"Pf"]: + raise ValidationError(f"Not a valid PFM file: {file_path}") - dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) - assert dim_match, f"{file_path} has a Malformed PFM header" + dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline()) + if not dim_match: + raise ValidationError(f"Malformed PFM header: {file_path}") width, height = map(int, dim_match.groups()) - channels = 3 if header == "PF" else 1 + channels = 3 if header == b"PF" else 1 scale = float(file.readline().rstrip()) # check for endian type if scale < 0: @@ -77,12 +81,12 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) - dsp_map_left, valid_mask_right = self._read_disparity(self._disparities[index][0]) + dsp_map_left, valid_mask_left = self._read_disparity(self._disparities[index][0]) dsp_map_right, valid_mask_right = self._read_disparity(self._disparities[index][1]) imgs = (img_left, img_right) dsp_maps = (dsp_map_left, dsp_map_right) - valid_masks = (valid_mask_right, valid_mask_right) + valid_masks = (valid_mask_left, valid_mask_right) if self.transforms is not None: imgs, dsp_maps, valid_masks, = self.transforms(imgs, dsp_maps, valid_masks) @@ -93,7 +97,7 @@ def __len__(self) -> int: return len(self._images) -class CRESSyntethicStereo(StereoMatchingDataset): +class CREStereoSynthetic(StereoMatchingDataset): """Synthetic dataset used in training the `CREStereo `_ architecture. Ported from the download script in the paper github `repo `_. @@ -104,8 +108,11 @@ class CRESSyntethicStereo(StereoMatchingDataset): MAX_DISP = 256. - def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = True): + def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False): super().__init__(root, transforms) + + root = Path(root) / "CREStereo" + # if the API user requests a dataset download check that the user can download it if download: statvfs = os.statvfs(root) @@ -130,12 +137,17 @@ def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable for s in splits: imgs_left = sorted(glob(str(root / s / "*_left.jpg"))) - imgs_right = (p.replace("_left", "_right") for p in imgs_left) + imgs_right = list(p.replace("_left", "_right") for p in imgs_left) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs - disparity_maps_left = (p.replace("_left", "_left.disp") for p in imgs_left) - disparity_maps_right = (p.replace("_right", "_right.disp") for p in imgs_right) + disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) + disparity_maps_right = list(p.replace("_right", "_right.disp") for p in imgs_right) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps @@ -158,7 +170,7 @@ def _download_dataset(self, root: str) -> None: download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) -class Middlebury2014(StereoMatchingDataset): +class StereoMiddlebury2014(StereoMatchingDataset): """Publicly available scenes from the Middlebury dataset `2014 version `. The dataset mostly follows the original format, without containing the ambient subdirectories. : :: @@ -219,12 +231,11 @@ class Middlebury2014(StereoMatchingDataset): splits = { "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano", "Pipes", "Playroom", "Playtable", "Recycle", "Shelves", "Vintage"], "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1", "Couch", "Flowers", "Mask", "Shopvac", "Sticks", "Storage", "Sword1", "Sword2", "Umbrella"], - "test": ['Plants', 'Classroom2E', 'Classroom2', 'Australia', 'DjembeL', 'CrusadeP', 'Crusade', 'Hoops', 'Bicycle2', 'Staircase', 'Newkuba', 'AustraliaP', 'Djembe', 'Livingroom', 'Computer'] + "test": ["Plants", "Classroom2E", "Classroom2", "Australia", "DjembeL", "CrusadeP", "Crusade", "Hoops", "Bicycle2", "Staircase", "Newkuba", "AustraliaP", "Djembe", "Livingroom", "Computer"] } def __init__( self, - *, root: str, split: str = "train", use_ambient_views: bool = False, @@ -237,7 +248,7 @@ def __init__( if download: self._download_dataset(root) - root = Path(root) / "FlyingChairs" + root = Path(root) / "Middlebury2014" if not os.path.exists(root / split): raise FileNotFoundError( f"The {split} directory was not found in the provided root directory" @@ -245,11 +256,19 @@ def __init__( split_scenes = self.splits[split] # check that the provided root folder contains the scene splits - if not all(s in os.listdir(root / split) for s in split_scenes): + if not any( + # using startswith to account for perfect / imperfect calibrartion + scene.startswith(s) for scene in os.listdir(root / split) + for s in split_scenes + ): raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") imgs_left = sorted(glob(str(root / split / "*" / "im0.png"))) imgs_right = sorted(glob(str(root / split / "*" / "im1.png"))) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) if split == "test": @@ -312,7 +331,7 @@ def _download_dataset(self, root: str): shutil.rmtree(os.path.join(root, "MiddEval3")) -class ETH3D(StereoMatchingDataset): +class StereoETH3D(StereoMatchingDataset): """"ETH3D `Low-Res Two-View `_ dataset. The dataset is expected to have the following structure: :: @@ -370,16 +389,20 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl verify_str_arg(split, "split", valid_values=("train", "test")) root = Path(root) / "ETH3D" - img_dir = "two_view_training" if split == "train" else "two_view_testing" + + img_dir = "two_view_training" if split == "train" else "two_view_test" anot_dir = "two_view_training_gt" imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + if split == "test": disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: - disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*[0-1].pfm"))) + disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*0GT.pfm"))) # no masks for the right view, always using left as reference disparity_maps_right = list("" for _ in disparity_maps_left) @@ -395,11 +418,11 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = np.array(valid_mask) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple, Tuple]: return super().__getitem__(index) -class Kitti2012(StereoMatchingDataset): +class StereoKitti2012(StereoMatchingDataset): """"Kitti dataset from the `2012 `_ stereo evaluation benchmark. Uses the RGB images for consistency with Kitti 2015. @@ -433,11 +456,14 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / "colored_0" / "*_10.png"))) imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) disparity_maps_right = list("" for _ in disparity_maps_left) else: - disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) @@ -455,7 +481,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) -class Kitti2015(StereoMatchingDataset): +class StereoKitti2015(StereoMatchingDataset): """"Kitti dataset from the `2015 `_ stereo evaluation benchmark. The dataset is expected to have the following structure: :: @@ -468,8 +494,8 @@ class Kitti2015(StereoMatchingDataset): training image_2 image_3 - disp_noc_0 - disp_noc_1 + disp_occ_0 + disp_occ_1 calib Args: @@ -488,11 +514,14 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / "image_2" / "*_10.png"))) imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) else: - disparity_maps_left, disparity_maps_right = list("" for _ in disparity_maps_left), list("" for _ in disparity_maps_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) @@ -510,7 +539,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) -class SintelDataset(StereoMatchingDataset): +class StereoSintel(StereoMatchingDataset): """"Sintel `Stereo Dataset `_. Args: @@ -527,6 +556,9 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) disparity_maps_right = list("" for _ in dps_masks_left) @@ -554,16 +586,16 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: return super().__getitem__(index) -class SceneFlowDataset(StereoMatchingDataset): +class StereoSceneFlow(StereoMatchingDataset): """Dataset interface for `Scene Flow `_ datasets.""" def __init__(self, root: str, split: str = "train", pass_name: str = "clean", transforms: Optional[Callable] = None): super().__init__(root, transforms) - verify_str_arg(split, "split", valid_values=("FlyingThings3D", "Driving", "Monkaa")) - split = split.upper() + root = Path(root) / "SceneFlow" - verify_str_arg(split, "pass_name", valid_values=("clean", "final", "both")) + verify_str_arg(split, "split", valid_values=("FlyingThings3D", "Driving", "Monkaa")) + verify_str_arg(pass_name, "pass_name", valid_values=("clean", "final", "both")) passes = { "clean": ["frames_cleanpass"], @@ -571,16 +603,21 @@ def __init__(self, root: str, split: str = "train", pass_name: str = "clean", tr "both": ["frames_cleanpass, frames_finalpass"], }[pass_name] - root = Path(root) / split + root = root / split for p in passes: - imgs_left = sorted(glob(str(root / p / "left" / "*" / "*.png"))) - imgs_right = sorted(glob(str(root / p / "right" / "*" / "*.png"))) + imgs_left = sorted(glob(str(root / p / "*" / "left" / "*.png"))) + imgs_right = sorted(glob(str(root / p / "*" / "right" / "*.png"))) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root / p)) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps @@ -589,8 +626,11 @@ def _read_disparity(self, file_path: str) -> Tuple: valid = np.ones_like(disparity) return disparity, valid + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + return super().__getitem__(index) -class FallingThingsDataset(StereoMatchingDataset): + +class StereoFallingThings(StereoMatchingDataset): """FallingThings ``_ dataset The dataset is expected to have the following structre: :: @@ -644,11 +684,16 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab for s in splits: imgs_left = sorted(glob(str(root / s / "*.left.jpg"))) imgs_right = sorted(glob(str(root / s / "*.right.jpg"))) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = sorted(glob(str(root / s / "*.left.depth.png"))) disparity_maps_right = sorted(glob(str(root / s / "*.right.depth.png"))) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps @@ -660,3 +705,59 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity = (fx * 6.0 * 100) / depth.astype(np.float32) valid = disparity > 0 return disparity, valid + + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + return super().__getitem__(index) + + +class InStereo2k(StereoMatchingDataset): + """InStereo2k ``_ dataset + + The dataset is expected to have the following structre: :: + + root + InStereo2k + train + scene1 + left.png + right.png + left_disp.png + right_disp.png + ... + scene2 + ... + test + scene1 + left.png + right.png + left_disp.png + right_disp.png + ... + scene2 + ... + """ + + def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): + super().__init__(root, transforms) + + root = Path(root) / "InStereo2k" / split + + imgs_left = sorted(glob(str(root / "*" / "left.png"))) + imgs_right = list(p.replace("left", "right") for p in imgs_left) + + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + + imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images = imgs + + disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) + disparity_maps_right = list(p.replace("right", "right_disp") for p in imgs_left) + + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities = disparity_maps + + def _read_disparity(self, file_path: str) -> Tuple: + disparity = np.array(Image.open(file_path), dtype=np.float32) + valid = np.ones_like(disparity) + return disparity, valid From 2deab62984a831a80ca9dc15bf81ae96ac21f434 Mon Sep 17 00:00:00 2001 From: Ponku Date: Wed, 13 Jul 2022 15:10:17 +0100 Subject: [PATCH 17/35] "Added calibrartion arg for Middlebury2014 (#6259)" --- test/test_datasets.py | 50 ++++++++--- torchvision/datasets/_stereo_matching.py | 107 +++++++++++++++++++---- 2 files changed, 127 insertions(+), 30 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index d390c30cee9..5d557020ac8 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -193,10 +193,7 @@ def inject_fake_data(self, tmpdir, config): scene_name = split_scene_map[config["split"]][idx] self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) - # account for perfect / imperfect calibrations - if config["split"] != "test": - num_examples *= 2 - + # TODO: add calibration argument test return num_examples def test_train_splits(self): @@ -428,12 +425,15 @@ class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod - def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]): + def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: root = pathlib.Path(root) / name os.makedirs(root, exist_ok=True) + paths = [] for i in range(num_examples): datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + paths.append(str(root / file_name_fn(i))) + return paths def inject_fake_data(self, tmpdir, config): scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" @@ -447,27 +447,25 @@ def inject_fake_data(self, tmpdir, config): "final": "frames_finalpass", } - num_examples = 4 + num_examples = 1 pass_dir_name = pass_dir_map[config["pass_name"]] # create pass directories pass_dir = split_dir / pass_dir_name - disp_dir = split_dir / "disp" + disp_dir = split_dir / "disparity" os.makedirs(pass_dir, exist_ok=True) os.makedirs(disp_dir, exist_ok=True) - # root / pass / direction / scene / .imgs - # root / disparity / direction / scene / .imgs for direction in ["left", "right"]: for scene_idx in range(num_examples): - # scene_dir = pass_dir / direction / f"scene_{scene_idx:06d}" os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) datasets_utils.create_image_folder( root=pass_dir / f"scene_{scene_idx:06d}", name=direction, file_name_fn=lambda i: f"{i:06d}.png", num_examples=3, - size=(3, 100, 200), + size=(3, 200, 100), ) + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) self._create_pfm_folder( root=disp_dir / f"scene_{scene_idx:06d}", @@ -480,18 +478,20 @@ def inject_fake_data(self, tmpdir, config): return num_examples * 3 def test_train_splits(self): - for split, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): - with self.create_dataset(split=split, pass_name=pass_name) as (dataset, _): + for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): for left, right, disparity, valid_mask in dataset: + print(f"Split {split_name} pass {pass_name}") left_array = np.array(left) right_array = np.array(right) h, w, c = left_array.shape # check that left and right are the same size assert left_array.shape == right_array.shape + print(left_array.shape) # check general shapes assert c == 3 assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 + assert len(valid_mask.shape) == 3 assert disparity.shape == (h, w, 3) # check that valid mask is the same size as the disparity dh, dw, _ = disparity.shape @@ -534,6 +534,28 @@ def inject_fake_data(self, tmpdir, config): return num_examples + def test_splits(self): + for split_name in ["single", "mixed"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + print(f"Split {split_name}") + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + print(left_array.shape) + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (h, w) + # check that valid mask is the same size as the disparity + dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index bcca2b12efb..0bd75fe82a4 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -6,6 +6,7 @@ import re import shutil from typing import Callable, List, Optional, Tuple, Any +import warnings from jsonschema import ValidationError from torch import Tensor from .vision import VisionDataset @@ -238,6 +239,7 @@ def __init__( self, root: str, split: str = "train", + calibration: Optional[str] = None, use_ambient_views: bool = False, transforms: Optional[Callable] = None, download: bool = False @@ -245,6 +247,22 @@ def __init__( super().__init__(root, transforms) verify_str_arg(split, "split", valid_values=("train", "test", "additional")) + if calibration: + verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", None)) + if split == "test": + warnings.warn( + "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", + RuntimeWarning + ) + else: + if split != "test": + calibration = "perfect" + warnings.warn( + f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + RuntimeWarning + ) + if download: self._download_dataset(root) @@ -263,25 +281,36 @@ def __init__( ): raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") - imgs_left = sorted(glob(str(root / split / "*" / "im0.png"))) - imgs_right = sorted(glob(str(root / split / "*" / "im1.png"))) + calibrartion_suffixes = { + None: [""], + "perfect": ["-perfect"], + "imperfect": ["-imperfect"], + "both": ["-perfect", "-imperfect"], + }[calibration] - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) + for calibration_suffix in calibrartion_suffixes: + scene_pattern = "*" + calibration_suffix - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) + imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) - if split == "test": - dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - else: + if not len(imgs_left) or not len(imgs_right): + raise FileNotFoundError("No images found in {}".format(root)) + + self._images += list((l, r) for l, r in zip(imgs_left, imgs_right)) + + if split == "test": + dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + else: + + dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) + dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) - dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - self._disparities = list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) + self._disparities += list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) self.use_ambient_views = use_ambient_views - def __getitem__(self, index: int) -> Tuple: + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: @@ -579,17 +608,60 @@ def _read_disparity(self, file_path: str) -> Tuple: # out of frame mask off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) == 0 # combine the masks together - valid_mask = np.logical_or(off_mask, valid_mask) + valid_mask = np.logical_and(off_mask, valid_mask) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: return super().__getitem__(index) class StereoSceneFlow(StereoMatchingDataset): - """Dataset interface for `Scene Flow `_ datasets.""" + """Dataset interface for `Scene Flow `_ datasets. + + The dataset is expected to have the following structre: :: + + root + SceneFlow + Monkaa + frames_cleanpass + scene1 + left + img1.png + img2.png + right + img1.png + img2.png + scene2 + left + img1.png + img2.png + right + img1.png + img2.png + frames_finalpass + scene1 + left + img1.png + img2.png + right + img1.png + img2.png + ... + ... + disparity + scene1 + left + img1.pfm + img2.pfm + right + img1.pfm + img2.pfm + FlyingThings3D + ... + ... + """ - def __init__(self, root: str, split: str = "train", pass_name: str = "clean", transforms: Optional[Callable] = None): + def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None): super().__init__(root, transforms) root = Path(root) / "SceneFlow" @@ -622,6 +694,9 @@ def __init__(self, root: str, split: str = "train", pass_name: str = "clean", tr self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: + if not os.path.exists(file_path): + raise FileNotFoundError("Disparity map {} not found".format(file_path)) + disparity = read_pfm_file(file_path) valid = np.ones_like(disparity) return disparity, valid From cbc55f30e8adaaa20513c9076f52d317442f6c2b Mon Sep 17 00:00:00 2001 From: Ponku Date: Wed, 13 Jul 2022 15:58:46 +0100 Subject: [PATCH 18/35] "Fixed test calibration test Middlebury2014 (#6259)" --- test/test_datasets.py | 40 +++++++++++++++++++++--- torchvision/datasets/_stereo_matching.py | 7 +++-- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 5d557020ac8..518a95362b9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -150,7 +150,11 @@ def test_bad_input(self): class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StereoMiddlebury2014 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "additional"), use_ambient_views=(True, False)) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "additional"), + calibration=("perfect", "imperfect", "both"), + use_ambient_views=(True, False), + ) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod @@ -193,12 +197,15 @@ def inject_fake_data(self, tmpdir, config): scene_name = split_scene_map[config["split"]][idx] self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) - # TODO: add calibration argument test + print(f"Created {scene_name} for split {config['split']}") + + if config["calibration"] == "both": + num_examples *= 2 return num_examples def test_train_splits(self): - for split in ["train", "additional"]: - with self.create_dataset(split=split) as (dataset, _): + for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): + with self.create_dataset(split=split, calibration=calibration) as (dataset, _): for left, right, disparity, valid_mask in dataset: left_array = np.array(left) right_array = np.array(right) @@ -219,7 +226,7 @@ def test_train_splits(self): def test_test_split(self): for split in ["test"]: - with self.create_dataset(split=split) as (dataset, _): + with self.create_dataset(split=split, calibration=None) as (dataset, _): for left, right, disparity, valid_mask in dataset: left_array = np.array(left) right_array = np.array(right) @@ -239,6 +246,29 @@ def test_augmented_view_usage(self): # check that left and right are the same size assert left_array.shape == right_array.shape + def test_warnings_train(self): + # train set invalid + split = "train" + calibration = None + with pytest.warns( + RuntimeWarning, + match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_warnings_test(self): + # test set invalid + split = "test" + calibration = "perfect" + with pytest.warns( + RuntimeWarning, + match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + ): + with self.create_dataset(split=split, calibration=calibration): + pass + def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): with self.create_dataset(split="bad"): diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 0bd75fe82a4..702386b05bd 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -239,7 +239,7 @@ def __init__( self, root: str, split: str = "train", - calibration: Optional[str] = None, + calibration: Optional[str] = "perfect", use_ambient_views: bool = False, transforms: Optional[Callable] = None, download: bool = False @@ -248,8 +248,9 @@ def __init__( verify_str_arg(split, "split", valid_values=("train", "test", "additional")) if calibration: - verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", None)) + verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both", None)) if split == "test": + calibration = None warnings.warn( "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", RuntimeWarning @@ -267,6 +268,7 @@ def __init__( self._download_dataset(root) root = Path(root) / "Middlebury2014" + print(split) if not os.path.exists(root / split): raise FileNotFoundError( f"The {split} directory was not found in the provided root directory" @@ -290,6 +292,7 @@ def __init__( for calibration_suffix in calibrartion_suffixes: scene_pattern = "*" + calibration_suffix + print(scene_pattern) imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) From 0759706aacda6e9aa93ff5140bbc5e906fd257f9 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 10:03:27 +0100 Subject: [PATCH 19/35] Clean-up. Disp map format to (C, H, W) & valid mask to (H, W). (#6259) --- test/test_datasets.py | 3552 +++++++++++----------- torchvision/datasets/__init__.py | 11 +- torchvision/datasets/_stereo_matching.py | 288 +- 3 files changed, 2081 insertions(+), 1770 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 518a95362b9..dd3c89b9bdc 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1,4 +1,3 @@ -from abc import abstractmethod import bz2 import contextlib import csv @@ -25,701 +24,542 @@ from torchvision import datasets -class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoETH3D - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - - @staticmethod - def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: - # create the scene folder - image_paths = [] - # make the root_dir if it does not exits - os.makedirs(root_dir, exist_ok=True) - - for i in range(num_examples): - scene_dir = os.path.join(root_dir, f"scene_{i}") - os.makedirs(scene_dir, exist_ok=True) - # populate with left right images - image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100))) - image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100))) - return image_paths - - @staticmethod - def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: - paths = [] - # make the root_dir if it does not exits - os.makedirs(root_dir, exist_ok=True) +class STL10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.STL10 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) - # create scene directories - for i in range(num_examples): - scene_dir = os.path.join(root_dir, f"scene_{i}") - os.makedirs(scene_dir, exist_ok=True) - # populate with a random png file for occlusion mask, and a pfm file for disparity - paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) - pfm_path = os.path.join(scene_dir, "disp0GT.pfm") - datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) - paths.append(pfm_path) - return paths + @ staticmethod + def _make_binary_file(num_elements, root, name): + file_name = os.path.join(root, name) + np.zeros(num_elements, dtype=np.uint8).tofile(file_name) - def inject_fake_data(self, tmpdir, config): - eth3d_dir = os.path.join(tmpdir, "ETH3D") + @ staticmethod + def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): + STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) - num_examples = 2 if config["split"] == "train" else 3 + @ staticmethod + def _make_label_file(num_images, root, name): + STL10TestCase._make_binary_file(num_images, root, name) - split_name = "two_view_training" if config["split"] == "train" else "two_view_test" - split_dir = os.path.join(eth3d_dir, split_name) - self._create_scene_folder(num_examples, split_dir) + @ staticmethod + def _make_class_names_file(root, name="class_names.txt"): + with open(os.path.join(root, name), "w") as fh: + for cname in ("airplane", "bird"): + fh.write(f"{cname}\n") - if config["split"] == "train": - annot_dir = os.path.join(eth3d_dir, "two_view_training_gt") - self._create_annotation_folder(num_examples, annot_dir) + @ staticmethod + def _make_fold_indices_file(root): + num_folds = 10 + offset = 0 + with open(os.path.join(root, "fold_indices.txt"), "w") as fh: + for fold in range(num_folds): + line = " ".join([str(idx) for idx in range(offset, offset + fold + 1)]) + fh.write(f"{line}\n") + offset += fold + 1 - return num_examples + return tuple(range(1, num_folds + 1)) - def test_training_test_splits(self): - with self.create_dataset(split="train") as (dataset, _): - assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" - for _, _, disparity, valid_mask in dataset: - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - dh, dw, _ = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + @ staticmethod + def _make_train_files(root, num_unlabeled_images=1): + num_images_in_fold = STL10TestCase._make_fold_indices_file(root) + num_train_images = sum(num_images_in_fold) - with self.create_dataset(split="test") as (dataset, _): - assert all(d == ("", "") for d in dataset._disparities) - for _, _, disparity, valid_mask in dataset: - assert disparity is None - assert valid_mask is None + STL10TestCase._make_image_file(num_train_images, root, "train_X.bin") + STL10TestCase._make_label_file(num_train_images, root, "train_y.bin") + STL10TestCase._make_image_file(1, root, "unlabeled_X.bin") - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + return dict(train=num_train_images, unlabeled=num_unlabeled_images) + @ staticmethod + def _make_test_files(root, num_images=2): + STL10TestCase._make_image_file(num_images, root, "test_X.bin") + STL10TestCase._make_label_file(num_images, root, "test_y.bin") -class CREStereoSynthethicTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CREStereoSynthetic - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + return dict(test=num_images) def inject_fake_data(self, tmpdir, config): - crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" - os.makedirs(crestereo_dir, exist_ok=True) + root_folder = os.path.join(tmpdir, "stl10_binary") + os.mkdir(root_folder) - split_dir = crestereo_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) - num_examples = 4 + num_images_in_split = self._make_train_files(root_folder) + num_images_in_split.update(self._make_test_files(root_folder)) + self._make_class_names_file(root_folder) - for idx in range(num_examples): - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) - # these are going to end up being gray scale images - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + return sum(num_images_in_split[part] for part in config["split"].split("+")) - return num_examples + def test_folds(self): + for fold in range(10): + with self.create_dataset(split="train", folds=fold) as (dataset, _): + assert len(dataset) == fold + 1 - def test_splits(self): - for split in ("tree", "shapenet", "reflective", "hole"): - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 2 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + def test_unlabeled(self): + with self.create_dataset(split="unlabeled") as (dataset, _): + labels = [dataset[idx][1] for idx in range(len(dataset))] + assert all(label == -1 for label in labels) - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): + def test_invalid_folds1(self): + with pytest.raises(ValueError): + with self.create_dataset(folds=10): pass + def test_invalid_folds2(self): + with pytest.raises(ValueError): + with self.create_dataset(folds="0"): + pass + + +class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech101 + FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) -class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoMiddlebury2014 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("train", "additional"), - calibration=("perfect", "imperfect", "both"), - use_ambient_views=(True, False), + target_type=("category", "annotation", ["category", "annotation"]) ) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - - @staticmethod - def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: - calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] - scene_dirs = [] - for c in calibrations: - scene_dir = os.path.join(root_dir, f"{scene_name}{c}") - os.makedirs(scene_dir, exist_ok=True) - # make normal images first - datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) - # these are going to end up being gray scale images - datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) - datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) - scene_dirs.append(scene_dir) - return scene_dirs + REQUIRED_PACKAGES = ("scipy",) def inject_fake_data(self, tmpdir, config): - split_scene_map = { - "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], - "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], - "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] - } + root = pathlib.Path(tmpdir) / "caltech101" + images = root / "101_ObjectCategories" + annotations = root / "Annotations" - middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") - os.makedirs(middlebury_dir, exist_ok=True) + categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"), ("ying_yang", "ying_yang")) + num_images_per_category = 2 - split_dir = middlebury_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) + for image_category, annotation_category in categories: + datasets_utils.create_image_folder( + root=images, + name=image_category, + file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + self._create_annotation_folder( + root=annotations, + name=annotation_category, + file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", + num_examples=num_images_per_category, + ) - num_examples = 4 - for idx in range(num_examples): - # special case for test_bad_input - if config["split"] not in split_scene_map: - return 0 + # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices. + os.makedirs(images / "BACKGROUND_Google") - scene_name = split_scene_map[config["split"]][idx] - self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + return num_images_per_category * len(categories) - print(f"Created {scene_name} for split {config['split']}") + def _create_annotation_folder(self, root, name, file_name_fn, num_examples): + root = pathlib.Path(root) / name + os.makedirs(root) - if config["calibration"] == "both": - num_examples *= 2 - return num_examples + for idx in range(num_examples): + self._create_annotation_file(root, file_name_fn(idx)) - def test_train_splits(self): - for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): - with self.create_dataset(split=split, calibration=calibration) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 3 - assert disparity.shape == (h, w, 3) - # check that valid mask is the same size as the disparity - dh, dw, c = disparity.shape - print(valid_mask.shape) - mh, mw, _ = valid_mask.shape - assert dh == mh - assert dw == mw + def _create_annotation_file(self, root, name): + mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())), dtype=torch.float64).numpy()) + datasets_utils.lazy_importer.scipy.io.savemat(str(pathlib.Path(root) / name), mdict) - def test_test_split(self): - for split in ["test"]: - with self.create_dataset(split=split, calibration=None) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None + def test_combined_targets(self): + target_types = ["category", "annotation"] - def test_augmented_view_usage(self): - with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): - for left, right, _, _ in dataset: - left_array = np.array(left) - right_array = np.array(right) - # check that left and right are the same size - assert left_array.shape == right_array.shape + individual_targets = [] + for target_type in target_types: + with self.create_dataset(target_type=target_type) as (dataset, _): + _, target = dataset[0] + individual_targets.append(target) - def test_warnings_train(self): - # train set invalid - split = "train" - calibration = None - with pytest.warns( - RuntimeWarning, - match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." - f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", - ): - with self.create_dataset(split=split, calibration=calibration): - pass + with self.create_dataset(target_type=target_types) as (dataset, _): + _, combined_targets = dataset[0] - def test_warnings_test(self): - # test set invalid - split = "test" - calibration = "perfect" - with pytest.warns( - RuntimeWarning, - match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." - ): - with self.create_dataset(split=split, calibration=calibration): - pass + actual = len(individual_targets) + expected = len(combined_targets) + assert ( + actual == expected + ), "The number of the returned combined targets does not match the the number targets if requested " + f"individually: {actual} != {expected}", - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): + with self.subTest(target_type=target_type): + actual = type(combined_target) + expected = type(individual_target) + assert ( + actual is expected + ), "Type of the combined target does not match the type of the corresponding individual target: " + f"{actual} is not {expected}", -class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoKitti2012 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) +class Caltech256TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech256 def inject_fake_data(self, tmpdir, config): - kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" - os.makedirs(kitti_dir, exist_ok=True) - - split_dir = kitti_dir / (config["split"] + "ing") - os.makedirs(split_dir, exist_ok=True) - - num_examples = 4 + tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories" - datasets_utils.create_image_folder( - root=split_dir, - name="colored_0", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) - datasets_utils.create_image_folder( - root=split_dir, - name="colored_1", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) + categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter")) + num_images_per_category = 2 - if config["split"] == "train": + for idx, category in categories: datasets_utils.create_image_folder( - root=split_dir, - name="disp_noc", - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, - # Kitti2012 uses a single channel image for disparities - size=(1, 100, 200), + tmpdir, + name=f"{idx:03d}.{category}", + file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg", + num_examples=num_images_per_category, ) - return num_examples - - def test_train_splits(self): - for split in ["train"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 2 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw - - def test_test_split(self): - for split in ["test"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None - - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + return num_images_per_category * len(categories) -class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoKitti2015 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) +class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.WIDERFace + FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) def inject_fake_data(self, tmpdir, config): - kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" - os.makedirs(kitti_dir, exist_ok=True) - - split_dir = kitti_dir / (config["split"] + "ing") - os.makedirs(split_dir, exist_ok=True) - - num_examples = 4 + widerface_dir = pathlib.Path(tmpdir) / "widerface" + annotations_dir = widerface_dir / "wider_face_split" + os.makedirs(annotations_dir) - datasets_utils.create_image_folder( - root=split_dir, - name="image_2", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) - datasets_utils.create_image_folder( - root=split_dir, - name="image_3", - file_name_fn=lambda i: f"{i:06d}_10.png", - num_examples=num_examples, - size=(3, 100, 200), - ) + split_to_idx = split_to_num_examples = { + "train": 1, + "val": 2, + "test": 3, + } - if config["split"] == "train": - datasets_utils.create_image_folder( - root=split_dir, - name="disp_occ_0", - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, - # Kitti2015 uses a single channel image for disparities - size=(1, 100, 200), - ) + # We need to create all folders regardless of the split in config + for split in ("train", "val", "test"): + split_idx = split_to_idx[split] + num_examples = split_to_num_examples[split] datasets_utils.create_image_folder( - root=split_dir, - name="disp_occ_1", - file_name_fn=lambda i: f"{i:06d}.png", + root=tmpdir, + name=widerface_dir / f"WIDER_{split}" / "images" / "0--Parade", + file_name_fn=lambda image_idx: f"0_Parade_marchingband_1_{split_idx + image_idx}.jpg", num_examples=num_examples, - # Kitti2015 uses a single channel image for disparities - size=(1, 100, 200), ) - return num_examples + annotation_file_name = { + "train": annotations_dir / "wider_face_train_bbx_gt.txt", + "val": annotations_dir / "wider_face_val_bbx_gt.txt", + "test": annotations_dir / "wider_face_test_filelist.txt", + }[split] - def test_train_splits(self): - for split in ["train"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 2 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + annotation_content = { + "train": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "val": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "test": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n" + for image_idx in range(num_examples) + ), + }[split] - def test_test_split(self): - for split in ["test"]: - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None + with open(annotation_file_name, "w") as annotation_file: + annotation_file.write(annotation_content) - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + return split_to_num_examples[config["split"]] -class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoSceneFlow - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("FlyingThings3D", "Driving", "Monkaa"), - pass_name=("clean", "final") +class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Cityscapes + TARGET_TYPES = ( + "instance", + "semantic", + "polygon", + "color", ) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), + *datasets_utils.combinations_grid( + mode=("coarse",), + split=("train", "train_extra", "val"), + target_type=TARGET_TYPES, + ), + ) + FEATURE_TYPES = (PIL.Image.Image, (dict, PIL.Image.Image)) - @staticmethod - def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: - root = pathlib.Path(root) / name - os.makedirs(root, exist_ok=True) + def inject_fake_data(self, tmpdir, config): - paths = [] - for i in range(num_examples): - datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) - paths.append(str(root / file_name_fn(i))) - return paths + tmpdir = pathlib.Path(tmpdir) - def inject_fake_data(self, tmpdir, config): - scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" - os.makedirs(scene_flow_dir, exist_ok=True) + mode_to_splits = { + "Coarse": ["train", "train_extra", "val"], + "Fine": ["train", "test", "val"], + } - split_dir = scene_flow_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) + if config["split"] == "train": # just for coverage of the number of samples + cities = ["bochum", "bremen"] + else: + cities = ["bochum"] - pass_dir_map = { - "clean": "frames_cleanpass", - "final": "frames_finalpass", + polygon_target = { + "imgHeight": 1024, + "imgWidth": 2048, + "objects": [ + { + "label": "sky", + "polygon": [ + [1241, 0], + [1234, 156], + [1478, 197], + [1611, 172], + [1606, 0], + ], + }, + { + "label": "road", + "polygon": [ + [0, 448], + [1331, 274], + [1473, 265], + [2047, 605], + [2047, 1023], + [0, 1023], + ], + }, + ], } - num_examples = 1 - pass_dir_name = pass_dir_map[config["pass_name"]] - # create pass directories - pass_dir = split_dir / pass_dir_name - disp_dir = split_dir / "disparity" - os.makedirs(pass_dir, exist_ok=True) - os.makedirs(disp_dir, exist_ok=True) + for mode in ["Coarse", "Fine"]: + gt_dir = tmpdir / f"gt{mode}" + for split in mode_to_splits[mode]: + for city in cities: - for direction in ["left", "right"]: - for scene_idx in range(num_examples): - os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + def make_image(name, size=10): + datasets_utils.create_image_folder( + root=gt_dir / split, + name=city, + file_name_fn=lambda _: name, + size=size, + num_examples=1, + ) + + make_image(f"{city}_000000_000000_gt{mode}_instanceIds.png") + make_image(f"{city}_000000_000000_gt{mode}_labelIds.png") + make_image(f"{city}_000000_000000_gt{mode}_color.png", size=(4, 10, 10)) + + polygon_target_name = gt_dir / split / city / f"{city}_000000_000000_gt{mode}_polygons.json" + with open(polygon_target_name, "w") as outfile: + json.dump(polygon_target, outfile) + + # Create leftImg8bit folder + for split in ["test", "train_extra", "train", "val"]: + for city in cities: datasets_utils.create_image_folder( - root=pass_dir / f"scene_{scene_idx:06d}", - name=direction, - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=3, - size=(3, 200, 100), + root=tmpdir / "leftImg8bit" / split, + name=city, + file_name_fn=lambda _: f"{city}_000000_000000_leftImg8bit.png", + num_examples=1, ) - os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) - self._create_pfm_folder( - root=disp_dir / f"scene_{scene_idx:06d}", - name=direction, - file_name_fn=lambda i: f"{i:06d}.pfm", - num_examples=3, - size=(100, 200), - ) + info = {"num_examples": len(cities)} + if config["target_type"] == "polygon": + info["expected_polygon_target"] = polygon_target + return info - return num_examples * 3 + def test_combined_targets(self): + target_types = ["semantic", "polygon", "color"] - def test_train_splits(self): - for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): - with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - print(f"Split {split_name} pass {pass_name}") - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - print(left_array.shape) - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 3 - assert disparity.shape == (h, w, 3) - # check that valid mask is the same size as the disparity - dh, dw, _ = disparity.shape - mh, mw, _ = valid_mask.shape - assert dh == mh - assert dw == mw + with self.create_dataset(target_type=target_types) as (dataset, _): + output = dataset[0] + assert isinstance(output, tuple) + assert len(output) == 2 + assert isinstance(output[0], PIL.Image.Image) + assert isinstance(output[1], tuple) + assert len(output[1]) == 3 + assert isinstance(output[1][0], PIL.Image.Image) # semantic + assert isinstance(output[1][1], dict) # polygon + assert isinstance(output[1][2], PIL.Image.Image) # color + def test_feature_types_target_color(self): + with self.create_dataset(target_type="color") as (dataset, _): + color_img, color_target = dataset[0] + assert isinstance(color_img, PIL.Image.Image) + assert np.array(color_target).shape[2] == 4 -class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoFallingThings - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + def test_feature_types_target_polygon(self): + with self.create_dataset(target_type="polygon") as (dataset, info): + polygon_img, polygon_target = dataset[0] + assert isinstance(polygon_img, PIL.Image.Image) + (polygon_target, info["expected_polygon_target"]) - @staticmethod - def _make_scene_folder(root: str, scene_name: str, num_examples: int, size: Tuple[int, int]): - root = pathlib.Path(root) / scene_name - os.makedirs(root, exist_ok=True) - datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[0], size[1])) - datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[0], size[1])) - # single channel depth maps - datasets_utils.create_image_file(root, "image1.left.depth.jpg", size=(1, size[0], size[1])) - datasets_utils.create_image_file(root, "image1.right.depth.jpg", size=(1, size[0], size[1])) +class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageNet + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val")) def inject_fake_data(self, tmpdir, config): - fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" - - split_dir = pathlib.Path(fallingthings_dir) / config["split"] - os.makedirs(split_dir, exist_ok=True) - - num_examples = 4 + tmpdir = pathlib.Path(tmpdir) - for i in range(num_examples): - self._make_scene_folder( - root=split_dir, - scene_name=f"scene_{i:06d}", + wnid = "n01234567" + if config["split"] == "train": + num_examples = 3 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / "train" / wnid / wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", + num_examples=num_examples, + ) + else: + num_examples = 1 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / "val" / wnid, + file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", num_examples=num_examples, - size=(100, 200), ) + wnid_to_classes = {wnid: [1]} + torch.save((wnid_to_classes, None), tmpdir / "meta.bin") return num_examples - def test_splits(self): - for split_name in ["single", "mixed"]: - with self.create_dataset(split=split_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - print(f"Split {split_name}") - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - print(left_array.shape) - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (h, w) - # check that valid mask is the same size as the disparity - dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw +class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CIFAR10 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) -class STL10TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.STL10 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) + _VERSION_CONFIG = dict( + base_folder="cifar-10-batches-py", + train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)), + test_files=("test_batch",), + labels_key="labels", + meta_file="batches.meta", + num_categories=10, + categories_key="label_names", + ) - @ staticmethod - def _make_binary_file(num_elements, root, name): - file_name = os.path.join(root, name) - np.zeros(num_elements, dtype=np.uint8).tofile(file_name) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"] + os.makedirs(tmpdir) - @ staticmethod - def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): - STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) + num_images_per_file = 1 + for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]): + self._create_batch_file(tmpdir, name, num_images_per_file) - @ staticmethod - def _make_label_file(num_images, root, name): - STL10TestCase._make_binary_file(num_images, root, name) + categories = self._create_meta_file(tmpdir) - @ staticmethod - def _make_class_names_file(root, name="class_names.txt"): - with open(os.path.join(root, name), "w") as fh: - for cname in ("airplane", "bird"): - fh.write(f"{cname}\n") + return dict( + num_examples=num_images_per_file + * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]), + categories=categories, + ) - @ staticmethod - def _make_fold_indices_file(root): - num_folds = 10 - offset = 0 - with open(os.path.join(root, "fold_indices.txt"), "w") as fh: - for fold in range(num_folds): - line = " ".join([str(idx) for idx in range(offset, offset + fold + 1)]) - fh.write(f"{line}\n") - offset += fold + 1 + def _create_batch_file(self, root, name, num_images): + np_rng = np.random.RandomState(0) + data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3)) + labels = np_rng.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist() + self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels}) - return tuple(range(1, num_folds + 1)) + def _create_meta_file(self, root): + categories = [ + f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}" + for idx in range(self._VERSION_CONFIG["num_categories"]) + ] + self._create_binary_file( + root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories} + ) + return categories - @ staticmethod - def _make_train_files(root, num_unlabeled_images=1): - num_images_in_fold = STL10TestCase._make_fold_indices_file(root) - num_train_images = sum(num_images_in_fold) + def _create_binary_file(self, root, name, content): + with open(pathlib.Path(root) / name, "wb") as fh: + pickle.dump(content, fh) - STL10TestCase._make_image_file(num_train_images, root, "train_X.bin") - STL10TestCase._make_label_file(num_train_images, root, "train_y.bin") - STL10TestCase._make_image_file(1, root, "unlabeled_X.bin") + def test_class_to_idx(self): + with self.create_dataset() as (dataset, info): + expected = {category: label for label, category in enumerate(info["categories"])} + actual = dataset.class_to_idx + assert actual == expected - return dict(train=num_train_images, unlabeled=num_unlabeled_images) - @ staticmethod - def _make_test_files(root, num_images=2): - STL10TestCase._make_image_file(num_images, root, "test_X.bin") - STL10TestCase._make_label_file(num_images, root, "test_y.bin") +class CIFAR100(CIFAR10TestCase): + DATASET_CLASS = datasets.CIFAR100 - return dict(test=num_images) + _VERSION_CONFIG = dict( + base_folder="cifar-100-python", + train_files=("train",), + test_files=("test",), + labels_key="fine_labels", + meta_file="meta", + num_categories=100, + categories_key="fine_label_names", + ) - def inject_fake_data(self, tmpdir, config): - root_folder = os.path.join(tmpdir, "stl10_binary") - os.mkdir(root_folder) - num_images_in_split = self._make_train_files(root_folder) - num_images_in_split.update(self._make_test_files(root_folder)) - self._make_class_names_file(root_folder) +class CelebATestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CelebA + FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) - return sum(num_images_in_split[part] for part in config["split"].split("+")) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "valid", "test", "all"), + target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), + ) - def test_folds(self): - for fold in range(10): - with self.create_dataset(split="train", folds=fold) as (dataset, _): - assert len(dataset) == fold + 1 + _SPLIT_TO_IDX = dict(train=0, valid=1, test=2) - def test_unlabeled(self): - with self.create_dataset(split="unlabeled") as (dataset, _): - labels = [dataset[idx][1] for idx in range(len(dataset))] - assert all(label == -1 for label in labels) + def inject_fake_data(self, tmpdir, config): + base_folder = pathlib.Path(tmpdir) / "celeba" + os.makedirs(base_folder) - def test_invalid_folds1(self): - with pytest.raises(ValueError): - with self.create_dataset(folds=10): - pass + num_images, num_images_per_split = self._create_split_txt(base_folder) - def test_invalid_folds2(self): - with pytest.raises(ValueError): - with self.create_dataset(folds="0"): - pass + datasets_utils.create_image_folder( + base_folder, "img_align_celeba", lambda idx: f"{idx + 1:06d}.jpg", num_images + ) + attr_names = self._create_attr_txt(base_folder, num_images) + self._create_identity_txt(base_folder, num_images) + self._create_bbox_txt(base_folder, num_images) + self._create_landmarks_txt(base_folder, num_images) + return dict(num_examples=num_images_per_split[config["split"]], attr_names=attr_names) -class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Caltech101 - FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) + def _create_split_txt(self, root): + num_images_per_split = dict(train=4, valid=3, test=2) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - target_type=("category", "annotation", ["category", "annotation"]) - ) - REQUIRED_PACKAGES = ("scipy",) + data = [ + [self._SPLIT_TO_IDX[split]] for split, num_images in num_images_per_split.items() for _ in range(num_images) + ] + self._create_txt(root, "list_eval_partition.txt", data) - def inject_fake_data(self, tmpdir, config): - root = pathlib.Path(tmpdir) / "caltech101" - images = root / "101_ObjectCategories" - annotations = root / "Annotations" + num_images_per_split["all"] = num_images = sum(num_images_per_split.values()) + return num_images, num_images_per_split - categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"), ("ying_yang", "ying_yang")) - num_images_per_category = 2 + def _create_attr_txt(self, root, num_images): + header = ("5_o_Clock_Shadow", "Young") + data = torch.rand((num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist() + self._create_txt(root, "list_attr_celeba.txt", data, header=header, add_num_examples=True) + return header - for image_category, annotation_category in categories: - datasets_utils.create_image_folder( - root=images, - name=image_category, - file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", - num_examples=num_images_per_category, - ) - self._create_annotation_folder( - root=annotations, - name=annotation_category, - file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", - num_examples=num_images_per_category, - ) + def _create_identity_txt(self, root, num_images): + data = torch.randint(1, 4, size=(num_images, 1)).tolist() + self._create_txt(root, "identity_CelebA.txt", data) - # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices. - os.makedirs(images / "BACKGROUND_Google") + def _create_bbox_txt(self, root, num_images): + header = ("x_1", "y_1", "width", "height") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt( + root, "list_bbox_celeba.txt", data, header=header, add_num_examples=True, add_image_id_to_header=True + ) - return num_images_per_category * len(categories) + def _create_landmarks_txt(self, root, num_images): + header = ("lefteye_x", "rightmouth_y") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt(root, "list_landmarks_align_celeba.txt", data, header=header, add_num_examples=True) - def _create_annotation_folder(self, root, name, file_name_fn, num_examples): - root = pathlib.Path(root) / name - os.makedirs(root) + def _create_txt(self, root, name, data, header=None, add_num_examples=False, add_image_id_to_header=False): + with open(pathlib.Path(root) / name, "w") as fh: + if add_num_examples: + fh.write(f"{len(data)}\n") - for idx in range(num_examples): - self._create_annotation_file(root, file_name_fn(idx)) + if header: + if add_image_id_to_header: + header = ("image_id", *header) + fh.write(f"{' '.join(header)}\n") - def _create_annotation_file(self, root, name): - mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())), dtype=torch.float64).numpy()) - datasets_utils.lazy_importer.scipy.io.savemat(str(pathlib.Path(root) / name), mdict) + for idx, line in enumerate(data, 1): + fh.write(f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n") def test_combined_targets(self): - target_types = ["category", "annotation"] + target_types = ["attr", "identity", "bbox", "landmarks"] individual_targets = [] for target_type in target_types: @@ -746,1062 +586,659 @@ def test_combined_targets(self): ), "Type of the combined target does not match the type of the corresponding individual target: " f"{actual} is not {expected}", + def test_no_target(self): + with self.create_dataset(target_type=[]) as (dataset, _): + _, target = dataset[0] -class Caltech256TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Caltech256 + assert target is None - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories" + def test_attr_names(self): + with self.create_dataset() as (dataset, info): + assert tuple(dataset.attr_names) == info["attr_names"] - categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter")) - num_images_per_category = 2 + def test_images_names_split(self): + with self.create_dataset(split="all") as (dataset, _): + all_imgs_names = set(dataset.filename) - for idx, category in categories: - datasets_utils.create_image_folder( - tmpdir, - name=f"{idx:03d}.{category}", - file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg", - num_examples=num_images_per_category, - ) + merged_imgs_names = set() + for split in ["train", "valid", "test"]: + with self.create_dataset(split=split) as (dataset, _): + merged_imgs_names.update(dataset.filename) - return num_images_per_category * len(categories) + assert merged_imgs_names == all_imgs_names -class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.WIDERFace - FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) +class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.VOCSegmentation + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) + + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid( + year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval") + ), + dict(year="2007", image_set="test"), + dict(year="2007-test", image_set="test"), + ) def inject_fake_data(self, tmpdir, config): - widerface_dir = pathlib.Path(tmpdir) / "widerface" - annotations_dir = widerface_dir / "wider_face_split" - os.makedirs(annotations_dir) + year, is_test_set = ( + ("2007", True) + if config["year"] == "2007-test" or config["image_set"] == "test" + else (config["year"], False) + ) + image_set = config["image_set"] - split_to_idx = split_to_num_examples = { - "train": 1, - "val": 2, - "test": 3, - } + base_dir = pathlib.Path(tmpdir) + if year == "2011": + base_dir /= "TrainVal" + base_dir = base_dir / "VOCdevkit" / f"VOC{year}" + os.makedirs(base_dir) - # We need to create all folders regardless of the split in config - for split in ("train", "val", "test"): - split_idx = split_to_idx[split] - num_examples = split_to_num_examples[split] + num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set) + datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images) - datasets_utils.create_image_folder( - root=tmpdir, - name=widerface_dir / f"WIDER_{split}" / "images" / "0--Parade", - file_name_fn=lambda image_idx: f"0_Parade_marchingband_1_{split_idx + image_idx}.jpg", - num_examples=num_examples, - ) + datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images) + annotation = self._create_annotation_files(base_dir, "Annotations", num_images) - annotation_file_name = { - "train": annotations_dir / "wider_face_train_bbx_gt.txt", - "val": annotations_dir / "wider_face_val_bbx_gt.txt", - "test": annotations_dir / "wider_face_test_filelist.txt", - }[split] + return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation) - annotation_content = { - "train": "".join( - f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n" - for image_idx in range(num_examples) - ), - "val": "".join( - f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n" - for image_idx in range(num_examples) - ), - "test": "".join( - f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n" - for image_idx in range(num_examples) - ), - }[split] + def _create_image_set_files(self, root, name, is_test_set): + root = pathlib.Path(root) / name + src = pathlib.Path(root) / "Main" + os.makedirs(src, exist_ok=True) - with open(annotation_file_name, "w") as annotation_file: - annotation_file.write(annotation_content) + idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,)) + idcs["trainval"] = (*idcs["train"], *idcs["val"]) - return split_to_num_examples[config["split"]] + for image_set in ("test",) if is_test_set else ("train", "val", "trainval"): + self._create_image_set_file(src, image_set, idcs[image_set]) + shutil.copytree(src, root / "Segmentation") -class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Cityscapes - TARGET_TYPES = ( - "instance", - "semantic", - "polygon", - "color", - ) - ADDITIONAL_CONFIGS = ( - *datasets_utils.combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), - *datasets_utils.combinations_grid( - mode=("coarse",), - split=("train", "train_extra", "val"), - target_type=TARGET_TYPES, - ), - ) - FEATURE_TYPES = (PIL.Image.Image, (dict, PIL.Image.Image)) + num_images = max(itertools.chain(*idcs.values())) + 1 + num_images_per_image_set = {image_set: len(idcs_) for image_set, idcs_ in idcs.items()} + return num_images, num_images_per_image_set - def inject_fake_data(self, tmpdir, config): + def _create_image_set_file(self, root, image_set, idcs): + with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh: + fh.writelines([f"{idx:06d}\n" for idx in idcs]) - tmpdir = pathlib.Path(tmpdir) + def _create_annotation_files(self, root, name, num_images): + root = pathlib.Path(root) / name + os.makedirs(root) - mode_to_splits = { - "Coarse": ["train", "train_extra", "val"], - "Fine": ["train", "test", "val"], - } + for idx in range(num_images): + annotation = self._create_annotation_file(root, f"{idx:06d}.xml") - if config["split"] == "train": # just for coverage of the number of samples - cities = ["bochum", "bremen"] - else: - cities = ["bochum"] + return annotation - polygon_target = { - "imgHeight": 1024, - "imgWidth": 2048, - "objects": [ - { - "label": "sky", - "polygon": [ - [1241, 0], - [1234, 156], - [1478, 197], - [1611, 172], - [1606, 0], - ], - }, - { - "label": "road", - "polygon": [ - [0, 448], - [1331, 274], - [1473, 265], - [2047, 605], - [2047, 1023], - [0, 1023], - ], - }, - ], - } + def _create_annotation_file(self, root, name): + def add_child(parent, name, text=None): + child = ET.SubElement(parent, name) + child.text = text + return child - for mode in ["Coarse", "Fine"]: - gt_dir = tmpdir / f"gt{mode}" - for split in mode_to_splits[mode]: - for city in cities: + def add_name(obj, name="dog"): + add_child(obj, "name", name) + return name - def make_image(name, size=10): - datasets_utils.create_image_folder( - root=gt_dir / split, - name=city, - file_name_fn=lambda _: name, - size=size, - num_examples=1, - ) + def add_bndbox(obj, bndbox=None): + if bndbox is None: + bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"} - make_image(f"{city}_000000_000000_gt{mode}_instanceIds.png") - make_image(f"{city}_000000_000000_gt{mode}_labelIds.png") - make_image(f"{city}_000000_000000_gt{mode}_color.png", size=(4, 10, 10)) + obj = add_child(obj, "bndbox") + for name, text in bndbox.items(): + add_child(obj, name, text) - polygon_target_name = gt_dir / split / city / f"{city}_000000_000000_gt{mode}_polygons.json" - with open(polygon_target_name, "w") as outfile: - json.dump(polygon_target, outfile) + return bndbox - # Create leftImg8bit folder - for split in ["test", "train_extra", "train", "val"]: - for city in cities: - datasets_utils.create_image_folder( - root=tmpdir / "leftImg8bit" / split, - name=city, - file_name_fn=lambda _: f"{city}_000000_000000_leftImg8bit.png", - num_examples=1, - ) + annotation = ET.Element("annotation") + obj = add_child(annotation, "object") + data = dict(name=add_name(obj), bndbox=add_bndbox(obj)) - info = {"num_examples": len(cities)} - if config["target_type"] == "polygon": - info["expected_polygon_target"] = polygon_target - return info + with open(pathlib.Path(root) / name, "wb") as fh: + fh.write(ET.tostring(annotation)) - def test_combined_targets(self): - target_types = ["semantic", "polygon", "color"] + return data - with self.create_dataset(target_type=target_types) as (dataset, _): - output = dataset[0] - assert isinstance(output, tuple) - assert len(output) == 2 - assert isinstance(output[0], PIL.Image.Image) - assert isinstance(output[1], tuple) - assert len(output[1]) == 3 - assert isinstance(output[1][0], PIL.Image.Image) # semantic - assert isinstance(output[1][1], dict) # polygon - assert isinstance(output[1][2], PIL.Image.Image) # color - def test_feature_types_target_color(self): - with self.create_dataset(target_type="color") as (dataset, _): - color_img, color_target = dataset[0] - assert isinstance(color_img, PIL.Image.Image) - assert np.array(color_target).shape[2] == 4 +class VOCDetectionTestCase(VOCSegmentationTestCase): + DATASET_CLASS = datasets.VOCDetection + FEATURE_TYPES = (PIL.Image.Image, dict) - def test_feature_types_target_polygon(self): - with self.create_dataset(target_type="polygon") as (dataset, info): - polygon_img, polygon_target = dataset[0] - assert isinstance(polygon_img, PIL.Image.Image) - (polygon_target, info["expected_polygon_target"]) + def test_annotations(self): + with self.create_dataset() as (dataset, info): + _, target = dataset[0] + assert "annotation" in target + annotation = target["annotation"] -class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.ImageNet - REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val")) + assert "object" in annotation + objects = annotation["object"] - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) + assert len(objects) == 1 + object = objects[0] - wnid = "n01234567" - if config["split"] == "train": - num_examples = 3 - datasets_utils.create_image_folder( - root=tmpdir, - name=tmpdir / "train" / wnid / wnid, - file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", - num_examples=num_examples, - ) - else: - num_examples = 1 - datasets_utils.create_image_folder( - root=tmpdir, - name=tmpdir / "val" / wnid, - file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", - num_examples=num_examples, - ) + assert object == info["annotation"] - wnid_to_classes = {wnid: [1]} - torch.save((wnid_to_classes, None), tmpdir / "meta.bin") - return num_examples +class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CocoDetection + FEATURE_TYPES = (PIL.Image.Image, list) -class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CIFAR10 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + REQUIRED_PACKAGES = ("pycocotools",) - _VERSION_CONFIG = dict( - base_folder="cifar-10-batches-py", - train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)), - test_files=("test_batch",), - labels_key="labels", - meta_file="batches.meta", - num_categories=10, - categories_key="label_names", - ) + _IMAGE_FOLDER = "images" + _ANNOTATIONS_FOLDER = "annotations" + _ANNOTATIONS_FILE = "annotations.json" - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"] - os.makedirs(tmpdir) + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._IMAGE_FOLDER + annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE + return root, annotation_file - num_images_per_file = 1 - for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]): - self._create_batch_file(tmpdir, name, num_images_per_file) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) - categories = self._create_meta_file(tmpdir) + num_images = 3 + num_annotations_per_image = 2 - return dict( - num_examples=num_images_per_file - * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]), - categories=categories, + files = datasets_utils.create_image_folder( + tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images ) + file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files] - def _create_batch_file(self, root, name, num_images): - np_rng = np.random.RandomState(0) - data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3)) - labels = np_rng.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist() - self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels}) + annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotation_folder) + info = self._create_annotation_file( + annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image + ) - def _create_meta_file(self, root): - categories = [ - f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}" - for idx in range(self._VERSION_CONFIG["num_categories"]) - ] - self._create_binary_file( - root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories} + info["num_examples"] = num_images + return info + + def _create_annotation_file(self, root, name, file_names, num_annotations_per_image): + image_ids = [int(file_name.stem) for file_name in file_names] + images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] + + annotations, info = self._create_annotations(image_ids, num_annotations_per_image) + self._create_json(root, name, dict(images=images, annotations=annotations)) + + return info + + def _create_annotations(self, image_ids, num_annotations_per_image): + annotations = datasets_utils.combinations_grid( + image_id=image_ids, bbox=([1.0, 2.0, 3.0, 4.0],) * num_annotations_per_image ) - return categories + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict() - def _create_binary_file(self, root, name, content): - with open(pathlib.Path(root) / name, "wb") as fh: - pickle.dump(content, fh) + def _create_json(self, root, name, content): + file = pathlib.Path(root) / name + with open(file, "w") as fh: + json.dump(content, fh) + return file - def test_class_to_idx(self): - with self.create_dataset() as (dataset, info): - expected = {category: label for label, category in enumerate(info["categories"])} - actual = dataset.class_to_idx - assert actual == expected +class CocoCaptionsTestCase(CocoDetectionTestCase): + DATASET_CLASS = datasets.CocoCaptions -class CIFAR100(CIFAR10TestCase): - DATASET_CLASS = datasets.CIFAR100 + def _create_annotations(self, image_ids, num_annotations_per_image): + captions = [str(idx) for idx in range(num_annotations_per_image)] + annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions) + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict(captions=captions) - _VERSION_CONFIG = dict( - base_folder="cifar-100-python", - train_files=("train",), - test_files=("test",), - labels_key="fine_labels", - meta_file="meta", - num_categories=100, - categories_key="fine_label_names", - ) + def test_captions(self): + with self.create_dataset() as (dataset, info): + _, captions = dataset[0] + assert tuple(captions) == tuple(info["captions"]) -class CelebATestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CelebA - FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) +class UCF101TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.UCF101 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("train", "valid", "test", "all"), - target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), - ) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) - _SPLIT_TO_IDX = dict(train=0, valid=1, test=2) + _VIDEO_FOLDER = "videos" + _ANNOTATIONS_FOLDER = "annotations" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._ANNOTATIONS_FOLDER + return root, annotation_path def inject_fake_data(self, tmpdir, config): - base_folder = pathlib.Path(tmpdir) / "celeba" - os.makedirs(base_folder) + tmpdir = pathlib.Path(tmpdir) - num_images, num_images_per_split = self._create_split_txt(base_folder) + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) - datasets_utils.create_image_folder( - base_folder, "img_align_celeba", lambda idx: f"{idx + 1:06d}.jpg", num_images - ) - attr_names = self._create_attr_txt(base_folder, num_images) - self._create_identity_txt(base_folder, num_images) - self._create_bbox_txt(base_folder, num_images) - self._create_landmarks_txt(base_folder, num_images) + annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotations_folder) + num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) - return dict(num_examples=num_images_per_split[config["split"]], attr_names=attr_names) + return num_examples - def _create_split_txt(self, root): - num_images_per_split = dict(train=4, valid=3, test=2) + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" - data = [ - [self._SPLIT_TO_IDX[split]] for split, num_images in num_images_per_split.items() for _ in range(num_images) + video_files = [ + datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) + for cls in ("ApplyEyeMakeup", "YoYo") ] - self._create_txt(root, "list_eval_partition.txt", data) - - num_images_per_split["all"] = num_images = sum(num_images_per_split.values()) - return num_images, num_images_per_split + return [path.relative_to(root) for path in itertools.chain(*video_files)] - def _create_attr_txt(self, root, num_images): - header = ("5_o_Clock_Shadow", "Young") - data = torch.rand((num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist() - self._create_txt(root, "list_attr_celeba.txt", data, header=header, add_num_examples=True) - return header + def _create_annotation_files(self, root, video_files, fold, train): + current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) + current_annotation = self._annotation_file_name(fold, train) + self._create_annotation_file(root, current_annotation, current_videos) - def _create_identity_txt(self, root, num_images): - data = torch.randint(1, 4, size=(num_images, 1)).tolist() - self._create_txt(root, "identity_CelebA.txt", data) + other_videos = set(video_files) - set(current_videos) + other_annotations = [ + self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) + ] + other_annotations.remove(current_annotation) + for name in other_annotations: + self._create_annotation_file(root, name, other_videos) - def _create_bbox_txt(self, root, num_images): - header = ("x_1", "y_1", "width", "height") - data = torch.randint(10, size=(num_images, len(header))).tolist() - self._create_txt( - root, "list_bbox_celeba.txt", data, header=header, add_num_examples=True, add_image_id_to_header=True - ) + return len(current_videos) - def _create_landmarks_txt(self, root, num_images): - header = ("lefteye_x", "rightmouth_y") - data = torch.randint(10, size=(num_images, len(header))).tolist() - self._create_txt(root, "list_landmarks_align_celeba.txt", data, header=header, add_num_examples=True) + def _annotation_file_name(self, fold, train): + return f"{'train' if train else 'test'}list{fold:02d}.txt" - def _create_txt(self, root, name, data, header=None, add_num_examples=False, add_image_id_to_header=False): + def _create_annotation_file(self, root, name, video_files): with open(pathlib.Path(root) / name, "w") as fh: - if add_num_examples: - fh.write(f"{len(data)}\n") + fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files)) - if header: - if add_image_id_to_header: - header = ("image_id", *header) - fh.write(f"{' '.join(header)}\n") - for idx, line in enumerate(data, 1): - fh.write(f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n") +class LSUNTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.LSUN - def test_combined_targets(self): - target_types = ["attr", "identity", "bbox", "landmarks"] + REQUIRED_PACKAGES = ("lmdb",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]) + ) - individual_targets = [] - for target_type in target_types: - with self.create_dataset(target_type=target_type) as (dataset, _): - _, target = dataset[0] - individual_targets.append(target) + _CATEGORIES = ( + "bedroom", + "bridge", + "church_outdoor", + "classroom", + "conference_room", + "dining_room", + "kitchen", + "living_room", + "restaurant", + "tower", + ) - with self.create_dataset(target_type=target_types) as (dataset, _): - _, combined_targets = dataset[0] + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) - actual = len(individual_targets) - expected = len(combined_targets) - assert ( - actual == expected - ), "The number of the returned combined targets does not match the the number targets if requested " - f"individually: {actual} != {expected}", + num_images = 0 + for cls in self._parse_classes(config["classes"]): + num_images += self._create_lmdb(root, cls) - for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): - with self.subTest(target_type=target_type): - actual = type(combined_target) - expected = type(individual_target) - assert ( - actual is expected - ), "Type of the combined target does not match the type of the corresponding individual target: " - f"{actual} is not {expected}", + return num_images - def test_no_target(self): - with self.create_dataset(target_type=[]) as (dataset, _): - _, target = dataset[0] + @ contextlib.contextmanager + def create_dataset(self, *args, **kwargs): + with super().create_dataset(*args, **kwargs) as output: + yield output + # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus, + # this creates a number of _cache_* files in the current directory that will not be removed together + # with the temporary directory + for file in os.listdir(os.getcwd()): + if file.startswith("_cache_"): + try: + os.remove(file) + except FileNotFoundError: + # When the same test is run in parallel (in fb internal tests), a thread may remove another + # thread's file. We should be able to remove the try/except when + # https://github.com/pytorch/vision/issues/825 is fixed. + pass - assert target is None + def _parse_classes(self, classes): + if not isinstance(classes, str): + return classes - def test_attr_names(self): - with self.create_dataset() as (dataset, info): - assert tuple(dataset.attr_names) == info["attr_names"] + split = classes + if split == "test": + return [split] - def test_images_names_split(self): - with self.create_dataset(split="all") as (dataset, _): - all_imgs_names = set(dataset.filename) + return [f"{category}_{split}" for category in self._CATEGORIES] - merged_imgs_names = set() - for split in ["train", "valid", "test"]: - with self.create_dataset(split=split) as (dataset, _): - merged_imgs_names.update(dataset.filename) + def _create_lmdb(self, root, cls): + lmdb = datasets_utils.lazy_importer.lmdb + hexdigits_lowercase = string.digits + string.ascii_lowercase[:6] - assert merged_imgs_names == all_imgs_names + folder = f"{cls}_lmdb" + num_images = torch.randint(1, 4, size=()).item() + format = "png" + files = datasets_utils.create_image_folder(root, folder, lambda idx: f"{idx}.{format}", num_images) -class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.VOCSegmentation - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) - - ADDITIONAL_CONFIGS = ( - *datasets_utils.combinations_grid( - year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval") - ), - dict(year="2007", image_set="test"), - dict(year="2007-test", image_set="test"), - ) - - def inject_fake_data(self, tmpdir, config): - year, is_test_set = ( - ("2007", True) - if config["year"] == "2007-test" or config["image_set"] == "test" - else (config["year"], False) - ) - image_set = config["image_set"] - - base_dir = pathlib.Path(tmpdir) - if year == "2011": - base_dir /= "TrainVal" - base_dir = base_dir / "VOCdevkit" / f"VOC{year}" - os.makedirs(base_dir) + with lmdb.open(str(root / folder)) as env, env.begin(write=True) as txn: + for file in files: + key = "".join(random.choice(hexdigits_lowercase) for _ in range(40)).encode() - num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set) - datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images) + buffer = io.BytesIO() + PIL.Image.open(file).save(buffer, format) + buffer.seek(0) + value = buffer.read() - datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images) - annotation = self._create_annotation_files(base_dir, "Annotations", num_images) + txn.put(key, value) - return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation) + os.remove(file) - def _create_image_set_files(self, root, name, is_test_set): - root = pathlib.Path(root) / name - src = pathlib.Path(root) / "Main" - os.makedirs(src, exist_ok=True) + return num_images - idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,)) - idcs["trainval"] = (*idcs["train"], *idcs["val"]) + def test_not_found_or_corrupted(self): + # LSUN does not raise built-in exception, but a custom one. It is expressive enough to not 'cast' it to + # RuntimeError or FileNotFoundError that are normally checked by this test. + with pytest.raises(datasets_utils.lazy_importer.lmdb.Error): + super().test_not_found_or_corrupted() - for image_set in ("test",) if is_test_set else ("train", "val", "trainval"): - self._create_image_set_file(src, image_set, idcs[image_set]) - shutil.copytree(src, root / "Segmentation") +class KineticsTestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) - num_images = max(itertools.chain(*idcs.values())) + 1 - num_images_per_image_set = {image_set: len(idcs_) for image_set, idcs_ in idcs.items()} - return num_images, num_images_per_image_set + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 + tmpdir = pathlib.Path(tmpdir) / config["split"] + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", + num_videos_per_class, + ) + return num_videos_per_class * len(classes) - def _create_image_set_file(self, root, image_set, idcs): - with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh: - fh.writelines([f"{idx:06d}\n" for idx in idcs]) - def _create_annotation_files(self, root, name, num_images): - root = pathlib.Path(root) / name - os.makedirs(root) +class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics400 - for idx in range(num_images): - annotation = self._create_annotation_file(root, f"{idx:06d}.xml") + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 - return annotation + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.avi", + num_videos_per_class, + ) - def _create_annotation_file(self, root, name): - def add_child(parent, name, text=None): - child = ET.SubElement(parent, name) - child.text = text - return child + return num_videos_per_class * len(classes) - def add_name(obj, name="dog"): - add_child(obj, "name", name) - return name - def add_bndbox(obj, bndbox=None): - if bndbox is None: - bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"} +class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.HMDB51 - obj = add_child(obj, "bndbox") - for name, text in bndbox.items(): - add_child(obj, name, text) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) - return bndbox + _VIDEO_FOLDER = "videos" + _SPLITS_FOLDER = "splits" + _CLASSES = ("brush_hair", "wave") - annotation = ET.Element("annotation") - obj = add_child(annotation, "object") - data = dict(name=add_name(obj), bndbox=add_bndbox(obj)) + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._SPLITS_FOLDER + return root, annotation_path - with open(pathlib.Path(root) / name, "wb") as fh: - fh.write(ET.tostring(annotation)) + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) - return data + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + splits_folder = tmpdir / self._SPLITS_FOLDER + os.makedirs(splits_folder) + num_examples = self._create_split_files(splits_folder, video_files, config["fold"], config["train"]) -class VOCDetectionTestCase(VOCSegmentationTestCase): - DATASET_CLASS = datasets.VOCDetection - FEATURE_TYPES = (PIL.Image.Image, dict) + return num_examples - def test_annotations(self): - with self.create_dataset() as (dataset, info): - _, target = dataset[0] + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"{cls}_{(idx // clips_per_group) + 1:d}_{(idx % clips_per_group) + 1:d}.avi" - assert "annotation" in target - annotation = target["annotation"] + return [ + ( + cls, + datasets_utils.create_video_folder( + root, + cls, + lambda idx: file_name_fn(cls, idx), + num_examples_per_class, + ), + ) + for cls in self._CLASSES + ] - assert "object" in annotation - objects = annotation["object"] + def _create_split_files(self, root, video_files, fold, train): + num_videos = num_train_videos = 0 - assert len(objects) == 1 - object = objects[0] + for cls, videos in video_files: + num_videos += len(videos) - assert object == info["annotation"] + train_videos = set(random.sample(videos, random.randrange(1, len(videos) - 1))) + num_train_videos += len(train_videos) + with open(pathlib.Path(root) / f"{cls}_test_split{fold}.txt", "w") as fh: + fh.writelines(f"{file.name} {1 if file in train_videos else 2}\n" for file in videos) -class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CocoDetection - FEATURE_TYPES = (PIL.Image.Image, list) + return num_train_videos if train else (num_videos - num_train_videos) - REQUIRED_PACKAGES = ("pycocotools",) - _IMAGE_FOLDER = "images" - _ANNOTATIONS_FOLDER = "annotations" - _ANNOTATIONS_FILE = "annotations.json" +class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Omniglot - def dataset_args(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - root = tmpdir / self._IMAGE_FOLDER - annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE - return root, annotation_file + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False)) def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) + target_folder = ( + pathlib.Path(tmpdir) / "omniglot-py" / f"images_{'background' if config['background'] else 'evaluation'}" + ) + os.makedirs(target_folder) - num_images = 3 - num_annotations_per_image = 2 + num_images = 0 + for name in ("Alphabet_of_the_Magi", "Tifinagh"): + num_images += self._create_alphabet_folder(target_folder, name) - files = datasets_utils.create_image_folder( - tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images - ) - file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files] + return num_images - annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER - os.makedirs(annotation_folder) - info = self._create_annotation_file( - annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image - ) + def _create_alphabet_folder(self, root, name): + num_images_total = 0 + for idx in range(torch.randint(1, 4, size=()).item()): + num_images = torch.randint(1, 4, size=()).item() + num_images_total += num_images - info["num_examples"] = num_images - return info + datasets_utils.create_image_folder( + root / name, f"character{idx:02d}", lambda image_idx: f"{image_idx:02d}.png", num_images + ) - def _create_annotation_file(self, root, name, file_names, num_annotations_per_image): - image_ids = [int(file_name.stem) for file_name in file_names] - images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] + return num_images_total - annotations, info = self._create_annotations(image_ids, num_annotations_per_image) - self._create_json(root, name, dict(images=images, annotations=annotations)) - return info +class SBUTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBU + FEATURE_TYPES = (PIL.Image.Image, str) - def _create_annotations(self, image_ids, num_annotations_per_image): - annotations = datasets_utils.combinations_grid( - image_id=image_ids, bbox=([1.0, 2.0, 3.0, 4.0],) * num_annotations_per_image - ) - for id, annotation in enumerate(annotations): - annotation["id"] = id - return annotations, dict() + def inject_fake_data(self, tmpdir, config): + num_images = 3 - def _create_json(self, root, name, content): - file = pathlib.Path(root) / name - with open(file, "w") as fh: - json.dump(content, fh) - return file + dataset_folder = pathlib.Path(tmpdir) / "dataset" + images = datasets_utils.create_image_folder(tmpdir, "dataset", self._create_file_name, num_images) + self._create_urls_txt(dataset_folder, images) + self._create_captions_txt(dataset_folder, num_images) -class CocoCaptionsTestCase(CocoDetectionTestCase): - DATASET_CLASS = datasets.CocoCaptions - - def _create_annotations(self, image_ids, num_annotations_per_image): - captions = [str(idx) for idx in range(num_annotations_per_image)] - annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions) - for id, annotation in enumerate(annotations): - annotation["id"] = id - return annotations, dict(captions=captions) - - def test_captions(self): - with self.create_dataset() as (dataset, info): - _, captions = dataset[0] - assert tuple(captions) == tuple(info["captions"]) + return num_images + def _create_file_name(self, idx): + part1 = datasets_utils.create_random_string(10, string.digits) + part2 = datasets_utils.create_random_string(10, string.ascii_lowercase, string.digits[:6]) + return f"{part1}_{part2}.jpg" -class UCF101TestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.UCF101 + def _create_urls_txt(self, root, images): + with open(root / "SBU_captioned_photo_dataset_urls.txt", "w") as fh: + for image in images: + fh.write( + f"http://static.flickr.com/{datasets_utils.create_random_string(4, string.digits)}/{image.name}\n" + ) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + def _create_captions_txt(self, root, num_images): + with open(root / "SBU_captioned_photo_dataset_captions.txt", "w") as fh: + for _ in range(num_images): + fh.write(f"{datasets_utils.create_random_string(10)}\n") - _VIDEO_FOLDER = "videos" - _ANNOTATIONS_FOLDER = "annotations" - def dataset_args(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - root = tmpdir / self._VIDEO_FOLDER - annotation_path = tmpdir / self._ANNOTATIONS_FOLDER - return root, annotation_path +class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SEMEION def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) + num_images = 3 - video_folder = tmpdir / self._VIDEO_FOLDER - os.makedirs(video_folder) - video_files = self._create_videos(video_folder) + images = torch.rand(num_images, 256) + labels = F.one_hot(torch.randint(10, size=(num_images,))) + with open(pathlib.Path(tmpdir) / "semeion.data", "w") as fh: + for image, one_hot_labels in zip(images, labels): + image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) + labels_columns = " ".join([str(label.item()) for label in one_hot_labels]) + fh.write(f"{image_columns} {labels_columns}\n") - annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER - os.makedirs(annotations_folder) - num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) + return num_images - return num_examples - def _create_videos(self, root, num_examples_per_class=3): - def file_name_fn(cls, idx, clips_per_group=2): - return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" +class USPSTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.USPS - video_files = [ - datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) - for cls in ("ApplyEyeMakeup", "YoYo") - ] - return [path.relative_to(root) for path in itertools.chain(*video_files)] + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) - def _create_annotation_files(self, root, video_files, fold, train): - current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) - current_annotation = self._annotation_file_name(fold, train) - self._create_annotation_file(root, current_annotation, current_videos) + def inject_fake_data(self, tmpdir, config): + num_images = 2 if config["train"] else 1 - other_videos = set(video_files) - set(current_videos) - other_annotations = [ - self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) - ] - other_annotations.remove(current_annotation) - for name in other_annotations: - self._create_annotation_file(root, name, other_videos) + images = torch.rand(num_images, 256) * 2 - 1 + labels = torch.randint(1, 11, size=(num_images,)) - return len(current_videos) + with bz2.open(pathlib.Path(tmpdir) / f"usps{'.t' if not config['train'] else ''}.bz2", "w") as fh: + for image, label in zip(images, labels): + line = " ".join((str(label.item()), *[f"{idx}:{pixel:.6f}" for idx, pixel in enumerate(image, 1)])) + fh.write(f"{line}\n".encode()) - def _annotation_file_name(self, fold, train): - return f"{'train' if train else 'test'}list{fold:02d}.txt" + return num_images - def _create_annotation_file(self, root, name, video_files): - with open(pathlib.Path(root) / name, "w") as fh: - fh.writelines(f"{str(file).replace(os.sep, '/')}\n" for file in sorted(video_files)) +class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBDataset + FEATURE_TYPES = (PIL.Image.Image, (np.ndarray, PIL.Image.Image)) -class LSUNTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.LSUN + REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") - REQUIRED_PACKAGES = ("lmdb",) ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]) + image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") ) - _CATEGORIES = ( - "bedroom", - "bridge", - "church_outdoor", - "classroom", - "conference_room", - "dining_room", - "kitchen", - "living_room", - "restaurant", - "tower", - ) + _NUM_CLASSES = 20 def inject_fake_data(self, tmpdir, config): - root = pathlib.Path(tmpdir) - - num_images = 0 - for cls in self._parse_classes(config["classes"]): - num_images += self._create_lmdb(root, cls) - - return num_images + num_images, num_images_per_image_set = self._create_split_files(tmpdir) - @ contextlib.contextmanager - def create_dataset(self, *args, **kwargs): - with super().create_dataset(*args, **kwargs) as output: - yield output - # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus, - # this creates a number of _cache_* files in the current directory that will not be removed together - # with the temporary directory - for file in os.listdir(os.getcwd()): - if file.startswith("_cache_"): - try: - os.remove(file) - except FileNotFoundError: - # When the same test is run in parallel (in fb internal tests), a thread may remove another - # thread's file. We should be able to remove the try/except when - # https://github.com/pytorch/vision/issues/825 is fixed. - pass + sizes = self._create_target_folder(tmpdir, "cls", num_images) - def _parse_classes(self, classes): - if not isinstance(classes, str): - return classes + datasets_utils.create_image_folder( + tmpdir, "img", lambda idx: f"{self._file_stem(idx)}.jpg", num_images, size=lambda idx: sizes[idx] + ) - split = classes - if split == "test": - return [split] + return num_images_per_image_set[config["image_set"]] - return [f"{category}_{split}" for category in self._CATEGORIES] + def _create_split_files(self, root): + root = pathlib.Path(root) - def _create_lmdb(self, root, cls): - lmdb = datasets_utils.lazy_importer.lmdb - hexdigits_lowercase = string.digits + string.ascii_lowercase[:6] + splits = dict(train=(0, 1, 2), train_noval=(0, 2), val=(3,)) - folder = f"{cls}_lmdb" + for split, idcs in splits.items(): + self._create_split_file(root, split, idcs) - num_images = torch.randint(1, 4, size=()).item() - format = "png" - files = datasets_utils.create_image_folder(root, folder, lambda idx: f"{idx}.{format}", num_images) + num_images = max(itertools.chain(*splits.values())) + 1 + num_images_per_split = {split: len(idcs) for split, idcs in splits.items()} + return num_images, num_images_per_split - with lmdb.open(str(root / folder)) as env, env.begin(write=True) as txn: - for file in files: - key = "".join(random.choice(hexdigits_lowercase) for _ in range(40)).encode() + def _create_split_file(self, root, name, idcs): + with open(root / f"{name}.txt", "w") as fh: + fh.writelines(f"{self._file_stem(idx)}\n" for idx in idcs) - buffer = io.BytesIO() - PIL.Image.open(file).save(buffer, format) - buffer.seek(0) - value = buffer.read() + def _create_target_folder(self, root, name, num_images): + io = datasets_utils.lazy_importer.scipy.io - txn.put(key, value) + target_folder = pathlib.Path(root) / name + os.makedirs(target_folder) - os.remove(file) + sizes = [torch.randint(1, 4, size=(2,)).tolist() for _ in range(num_images)] + for idx, size in enumerate(sizes): + content = dict( + GTcls=dict(Boundaries=self._create_boundaries(size), Segmentation=self._create_segmentation(size)) + ) + io.savemat(target_folder / f"{self._file_stem(idx)}.mat", content) - return num_images + return sizes - def test_not_found_or_corrupted(self): - # LSUN does not raise built-in exception, but a custom one. It is expressive enough to not 'cast' it to - # RuntimeError or FileNotFoundError that are normally checked by this test. - with pytest.raises(datasets_utils.lazy_importer.lmdb.Error): - super().test_not_found_or_corrupted() + def _create_boundaries(self, size): + sparse = datasets_utils.lazy_importer.scipy.sparse + return [ + [sparse.csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] + for _ in range(self._NUM_CLASSES) + ] + def _create_segmentation(self, size): + return torch.randint(0, self._NUM_CLASSES + 1, size=size, dtype=torch.uint8).numpy() -class KineticsTestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.Kinetics - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) + def _file_stem(self, idx): + return f"2008_{idx:06d}" - def inject_fake_data(self, tmpdir, config): - classes = ("Abseiling", "Zumba") - num_videos_per_class = 2 - tmpdir = pathlib.Path(tmpdir) / config["split"] - digits = string.ascii_letters + string.digits + "-_" - for cls in classes: - datasets_utils.create_video_folder( - tmpdir, - cls, - lambda _: f"{datasets_utils.create_random_string(11, digits)}.mp4", - num_videos_per_class, - ) - return num_videos_per_class * len(classes) +class FakeDataTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FakeData + FEATURE_TYPES = (PIL.Image.Image, int) -class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.Kinetics400 + def dataset_args(self, tmpdir, config): + return () def inject_fake_data(self, tmpdir, config): - classes = ("Abseiling", "Zumba") - num_videos_per_class = 2 + return config["size"] - digits = string.ascii_letters + string.digits + "-_" - for cls in classes: - datasets_utils.create_video_folder( - tmpdir, - cls, - lambda _: f"{datasets_utils.create_random_string(11, digits)}.avi", - num_videos_per_class, - ) + def test_not_found_or_corrupted(self): + self.skipTest("The data is generated at creation and thus cannot be non-existent or corrupted.") - return num_videos_per_class * len(classes) +class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PhotoTour -class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): - DATASET_CLASS = datasets.HMDB51 + # The PhotoTour dataset returns examples with different features with respect to the 'train' parameter. Thus, + # we overwrite 'FEATURE_TYPES' with a dummy value to satisfy the initial checks of the base class. Furthermore, we + # overwrite the 'test_feature_types()' method to select the correct feature types before the test is run. + FEATURE_TYPES = () + _TRAIN_FEATURE_TYPES = (torch.Tensor,) + _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + datasets_utils.combinations_grid(train=(True, False)) - _VIDEO_FOLDER = "videos" - _SPLITS_FOLDER = "splits" - _CLASSES = ("brush_hair", "wave") - - def dataset_args(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - root = tmpdir / self._VIDEO_FOLDER - annotation_path = tmpdir / self._SPLITS_FOLDER - return root, annotation_path - - def inject_fake_data(self, tmpdir, config): - tmpdir = pathlib.Path(tmpdir) - - video_folder = tmpdir / self._VIDEO_FOLDER - os.makedirs(video_folder) - video_files = self._create_videos(video_folder) - - splits_folder = tmpdir / self._SPLITS_FOLDER - os.makedirs(splits_folder) - num_examples = self._create_split_files(splits_folder, video_files, config["fold"], config["train"]) - - return num_examples - - def _create_videos(self, root, num_examples_per_class=3): - def file_name_fn(cls, idx, clips_per_group=2): - return f"{cls}_{(idx // clips_per_group) + 1:d}_{(idx % clips_per_group) + 1:d}.avi" - - return [ - ( - cls, - datasets_utils.create_video_folder( - root, - cls, - lambda idx: file_name_fn(cls, idx), - num_examples_per_class, - ), - ) - for cls in self._CLASSES - ] - - def _create_split_files(self, root, video_files, fold, train): - num_videos = num_train_videos = 0 - - for cls, videos in video_files: - num_videos += len(videos) - - train_videos = set(random.sample(videos, random.randrange(1, len(videos) - 1))) - num_train_videos += len(train_videos) - - with open(pathlib.Path(root) / f"{cls}_test_split{fold}.txt", "w") as fh: - fh.writelines(f"{file.name} {1 if file in train_videos else 2}\n" for file in videos) - - return num_train_videos if train else (num_videos - num_train_videos) - - -class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Omniglot - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False)) - - def inject_fake_data(self, tmpdir, config): - target_folder = ( - pathlib.Path(tmpdir) / "omniglot-py" / f"images_{'background' if config['background'] else 'evaluation'}" - ) - os.makedirs(target_folder) - - num_images = 0 - for name in ("Alphabet_of_the_Magi", "Tifinagh"): - num_images += self._create_alphabet_folder(target_folder, name) - - return num_images - - def _create_alphabet_folder(self, root, name): - num_images_total = 0 - for idx in range(torch.randint(1, 4, size=()).item()): - num_images = torch.randint(1, 4, size=()).item() - num_images_total += num_images - - datasets_utils.create_image_folder( - root / name, f"character{idx:02d}", lambda image_idx: f"{image_idx:02d}.png", num_images - ) - - return num_images_total - - -class SBUTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.SBU - FEATURE_TYPES = (PIL.Image.Image, str) - - def inject_fake_data(self, tmpdir, config): - num_images = 3 - - dataset_folder = pathlib.Path(tmpdir) / "dataset" - images = datasets_utils.create_image_folder(tmpdir, "dataset", self._create_file_name, num_images) - - self._create_urls_txt(dataset_folder, images) - self._create_captions_txt(dataset_folder, num_images) - - return num_images - - def _create_file_name(self, idx): - part1 = datasets_utils.create_random_string(10, string.digits) - part2 = datasets_utils.create_random_string(10, string.ascii_lowercase, string.digits[:6]) - return f"{part1}_{part2}.jpg" - - def _create_urls_txt(self, root, images): - with open(root / "SBU_captioned_photo_dataset_urls.txt", "w") as fh: - for image in images: - fh.write( - f"http://static.flickr.com/{datasets_utils.create_random_string(4, string.digits)}/{image.name}\n" - ) - - def _create_captions_txt(self, root, num_images): - with open(root / "SBU_captioned_photo_dataset_captions.txt", "w") as fh: - for _ in range(num_images): - fh.write(f"{datasets_utils.create_random_string(10)}\n") - - -class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.SEMEION - - def inject_fake_data(self, tmpdir, config): - num_images = 3 - - images = torch.rand(num_images, 256) - labels = F.one_hot(torch.randint(10, size=(num_images,))) - with open(pathlib.Path(tmpdir) / "semeion.data", "w") as fh: - for image, one_hot_labels in zip(images, labels): - image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) - labels_columns = " ".join([str(label.item()) for label in one_hot_labels]) - fh.write(f"{image_columns} {labels_columns}\n") - - return num_images - - -class USPSTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.USPS - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) - - def inject_fake_data(self, tmpdir, config): - num_images = 2 if config["train"] else 1 - - images = torch.rand(num_images, 256) * 2 - 1 - labels = torch.randint(1, 11, size=(num_images,)) - - with bz2.open(pathlib.Path(tmpdir) / f"usps{'.t' if not config['train'] else ''}.bz2", "w") as fh: - for image, label in zip(images, labels): - line = " ".join((str(label.item()), *[f"{idx}:{pixel:.6f}" for idx, pixel in enumerate(image, 1)])) - fh.write(f"{line}\n".encode()) - - return num_images - - -class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.SBDataset - FEATURE_TYPES = (PIL.Image.Image, (np.ndarray, PIL.Image.Image)) - - REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") - ) - - _NUM_CLASSES = 20 - - def inject_fake_data(self, tmpdir, config): - num_images, num_images_per_image_set = self._create_split_files(tmpdir) - - sizes = self._create_target_folder(tmpdir, "cls", num_images) - - datasets_utils.create_image_folder( - tmpdir, "img", lambda idx: f"{self._file_stem(idx)}.jpg", num_images, size=lambda idx: sizes[idx] - ) - - return num_images_per_image_set[config["image_set"]] - - def _create_split_files(self, root): - root = pathlib.Path(root) - - splits = dict(train=(0, 1, 2), train_noval=(0, 2), val=(3,)) - - for split, idcs in splits.items(): - self._create_split_file(root, split, idcs) - - num_images = max(itertools.chain(*splits.values())) + 1 - num_images_per_split = {split: len(idcs) for split, idcs in splits.items()} - return num_images, num_images_per_split - - def _create_split_file(self, root, name, idcs): - with open(root / f"{name}.txt", "w") as fh: - fh.writelines(f"{self._file_stem(idx)}\n" for idx in idcs) - - def _create_target_folder(self, root, name, num_images): - io = datasets_utils.lazy_importer.scipy.io - - target_folder = pathlib.Path(root) / name - os.makedirs(target_folder) - - sizes = [torch.randint(1, 4, size=(2,)).tolist() for _ in range(num_images)] - for idx, size in enumerate(sizes): - content = dict( - GTcls=dict(Boundaries=self._create_boundaries(size), Segmentation=self._create_segmentation(size)) - ) - io.savemat(target_folder / f"{self._file_stem(idx)}.mat", content) - - return sizes - - def _create_boundaries(self, size): - sparse = datasets_utils.lazy_importer.scipy.sparse - return [ - [sparse.csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] - for _ in range(self._NUM_CLASSES) - ] - - def _create_segmentation(self, size): - return torch.randint(0, self._NUM_CLASSES + 1, size=size, dtype=torch.uint8).numpy() - - def _file_stem(self, idx): - return f"2008_{idx:06d}" - - -class FakeDataTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.FakeData - FEATURE_TYPES = (PIL.Image.Image, int) - - def dataset_args(self, tmpdir, config): - return () - - def inject_fake_data(self, tmpdir, config): - return config["size"] - - def test_not_found_or_corrupted(self): - self.skipTest("The data is generated at creation and thus cannot be non-existent or corrupted.") - - -class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.PhotoTour - - # The PhotoTour dataset returns examples with different features with respect to the 'train' parameter. Thus, - # we overwrite 'FEATURE_TYPES' with a dummy value to satisfy the initial checks of the base class. Furthermore, we - # overwrite the 'test_feature_types()' method to select the correct feature types before the test is run. - FEATURE_TYPES = () - _TRAIN_FEATURE_TYPES = (torch.Tensor,) - _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) - - datasets_utils.combinations_grid(train=(True, False)) - - _NAME = "liberty" + _NAME = "liberty" def dataset_args(self, tmpdir, config): return tmpdir, self._NAME @@ -2898,341 +2335,1042 @@ def inject_fake_data(self, tmpdir: str, config): ) ) - meta_folder = data_folder / "labels" - meta_folder.mkdir() - image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files] - image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2) - with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file: - file.write("\n".join(image_ids_in_config) + "\n") + meta_folder = data_folder / "labels" + meta_folder.mkdir() + image_ids = [str(path.relative_to(path.parents[1])).replace(os.sep, "/") for path in image_files] + image_ids_in_config = random.choices(image_ids, k=len(image_files) // 2) + with open(meta_folder / f"{config['split']}{config['partition']}.txt", "w") as file: + file.write("\n".join(image_ids_in_config) + "\n") + + return len(image_ids_in_config) + + +class FER2013TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FER2013 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, "fer2013") + os.makedirs(base_folder) + + num_samples = 5 + with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file: + writer = csv.DictWriter( + file, + fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",), + quoting=csv.QUOTE_NONNUMERIC, + quotechar='"', + ) + writer.writeheader() + for _ in range(num_samples): + row = dict( + pixels=" ".join( + str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() + ) + ) + if config["split"] == "train": + row["emotion"] = str(int(torch.randint(0, 7, ()))) + + writer.writerow(row) + + return num_samples + + +class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.GTSRB + FEATURE_TYPES = (PIL.Image.Image, int) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir: str, config): + root_folder = os.path.join(tmpdir, "gtsrb") + os.makedirs(root_folder, exist_ok=True) + + # Train data + train_folder = os.path.join(root_folder, "GTSRB", "Training") + os.makedirs(train_folder, exist_ok=True) + + num_examples = 3 if config["split"] == "train" else 4 + classes = ("00000", "00042", "00012") + for class_idx in classes: + datasets_utils.create_image_folder( + train_folder, + name=class_idx, + file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", + num_examples=num_examples, + ) + + total_number_of_examples = num_examples * len(classes) + # Test data + test_folder = os.path.join(root_folder, "GTSRB", "Final_Test", "Images") + os.makedirs(test_folder, exist_ok=True) + + with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file: + csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n") + + for _ in range(total_number_of_examples): + image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm" + datasets_utils.create_image_file(test_folder, image_file) + row = [ + image_file, + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(1, 100, size=()).item(), + torch.randint(0, 43, size=()).item(), + ] + csv_file.write(";".join(map(str, row)) + "\n") + + return total_number_of_examples + + +class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CLEVRClassification + FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + + def inject_fake_data(self, tmpdir, config): + data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" + + images_folder = data_folder / "images" + image_files = datasets_utils.create_image_folder( + images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5 + ) + + scenes_folder = data_folder / "scenes" + scenes_folder.mkdir() + if config["split"] != "test": + with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file: + json.dump( + dict( + info=dict(), + scenes=[ + dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ()))) + for image_file in image_files + ], + ), + file, + ) + + return len(image_files) + + +class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.OxfordIIITPet + FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("trainval", "test"), + target_types=("category", "segmentation", ["category", "segmentation"], []), + ) + + def inject_fake_data(self, tmpdir, config): + base_folder = os.path.join(tmpdir, "oxford-iiit-pet") + + classification_anns_meta = ( + dict(cls="Abyssinian", label=0, species="cat"), + dict(cls="Keeshond", label=18, species="dog"), + dict(cls="Yorkshire Terrier", label=37, species="dog"), + ) + split_and_classification_anns = [ + self._meta_to_split_and_classification_ann(meta, idx) + for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) + ] + image_ids, *_ = zip(*split_and_classification_anns) + + image_files = datasets_utils.create_image_folder( + base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) + ) + + anns_folder = os.path.join(base_folder, "annotations") + os.makedirs(anns_folder) + split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2) + with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file: + writer = csv.writer(file, delimiter=" ") + for split_and_classification_ann in split_and_classification_anns_in_split: + writer.writerow(split_and_classification_ann) + + segmentation_files = datasets_utils.create_image_folder( + anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) + ) + + # The dataset has some rogue files + for path in image_files[:2]: + path.with_suffix(".mat").touch() + for path in segmentation_files: + path.with_name(f".{path.name}").touch() + + return len(split_and_classification_anns_in_split) + + def _meta_to_split_and_classification_ann(self, meta, idx): + image_id = "_".join( + [ + *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], + str(idx), + ] + ) + class_id = str(meta["label"] + 1) + species = "1" if meta["species"] == "cat" else "2" + breed_id = "-1" + return (image_id, class_id, species, breed_id) + + +class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StanfordCars + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + + def inject_fake_data(self, tmpdir, config): + import scipy.io as io + from numpy.core.records import fromarrays + + num_examples = {"train": 5, "test": 7}[config["split"]] + num_classes = 3 + base_folder = pathlib.Path(tmpdir) / "stanford_cars" + + devkit = base_folder / "devkit" + devkit.mkdir(parents=True) + + if config["split"] == "train": + images_folder_name = "cars_train" + annotations_mat_path = devkit / "cars_train_annos.mat" + else: + images_folder_name = "cars_test" + annotations_mat_path = base_folder / "cars_test_annos_withlabels.mat" + + datasets_utils.create_image_folder( + root=base_folder, + name=images_folder_name, + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, + ) + + classes = np.random.randint(1, num_classes + 1, num_examples, dtype=np.uint8) + fnames = [f"{i:5d}.jpg" for i in range(num_examples)] + rec_array = fromarrays( + [classes, fnames], + names=["class", "fname"], + ) + io.savemat(annotations_mat_path, {"annotations": rec_array}) + + random_class_names = ["random_name"] * num_classes + io.savemat(devkit / "cars_meta.mat", {"class_names": random_class_names}) + + return num_examples + + +class Country211TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Country211 + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test")) + + def inject_fake_data(self, tmpdir: str, config): + split_folder = pathlib.Path(tmpdir) / "country211" / config["split"] + split_folder.mkdir(parents=True, exist_ok=True) + + num_examples = { + "train": 3, + "valid": 4, + "test": 5, + }[config["split"]] + + classes = ("AD", "BS", "GR") + for cls in classes: + datasets_utils.create_image_folder( + split_folder, + name=cls, + file_name_fn=lambda idx: f"{idx}.jpg", + num_examples=num_examples, + ) + + return num_examples * len(classes) + + +class Flowers102TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Flowers102 + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + REQUIRED_PACKAGES = ("scipy",) + + def inject_fake_data(self, tmpdir: str, config): + base_folder = pathlib.Path(tmpdir) / "flowers-102" + + num_classes = 3 + num_images_per_split = dict(train=5, val=4, test=3) + num_images_total = sum(num_images_per_split.values()) + datasets_utils.create_image_folder( + base_folder, + "jpg", + file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg", + num_examples=num_images_total, + ) + + label_dict = dict( + labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8), + ) + datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "imagelabels.mat"), label_dict) + + setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16) + np.random.shuffle(setid_mat) + setid_dict = dict( + trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1), + valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1), + tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1), + ) + datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "setid.mat"), setid_dict) + + return num_images_per_split[config["split"]] + + +class PCAMTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PCAM + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + REQUIRED_PACKAGES = ("h5py",) + + def inject_fake_data(self, tmpdir: str, config): + base_folder = pathlib.Path(tmpdir) / "pcam" + base_folder.mkdir() + + num_images = {"train": 2, "test": 3, "val": 4}[config["split"]] + + images_file = datasets.PCAM._FILES[config["split"]]["images"][0] + with datasets_utils.lazy_importer.h5py.File(str(base_folder / images_file), "w") as f: + f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8) + + targets_file = datasets.PCAM._FILES[config["split"]]["targets"][0] + with datasets_utils.lazy_importer.h5py.File(str(base_folder / targets_file), "w") as f: + f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8) + + return num_images + + +class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.RenderedSST2 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"} + + def inject_fake_data(self, tmpdir: str, config): + root_folder = pathlib.Path(tmpdir) / "rendered-sst2" + image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]] + + num_images_per_class = {"train": 5, "test": 6, "val": 7} + sampled_classes = ["positive", "negative"] + for cls in sampled_classes: + datasets_utils.create_image_folder( + image_folder, + cls, + file_name_fn=lambda idx: f"{idx}.png", + num_examples=num_images_per_class[config["split"]], + ) + + return len(sampled_classes) * num_images_per_class[config["split"]] + + +class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoETH3D + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + # create the scene folder + image_paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with left right images + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(100, 100))) + image_paths.append(datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(100, 100))) + return image_paths + + @staticmethod + def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + paths = [] + # make the root_dir if it does not exits + os.makedirs(root_dir, exist_ok=True) + + # create scene directories + for i in range(num_examples): + scene_dir = os.path.join(root_dir, f"scene_{i}") + os.makedirs(scene_dir, exist_ok=True) + # populate with a random png file for occlusion mask, and a pfm file for disparity + paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) + pfm_path = os.path.join(scene_dir, "disp0GT.pfm") + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) + paths.append(pfm_path) + return paths + + def inject_fake_data(self, tmpdir, config): + eth3d_dir = os.path.join(tmpdir, "ETH3D") + + num_examples = 2 if config["split"] == "train" else 3 + + split_name = "two_view_training" if config["split"] == "train" else "two_view_test" + split_dir = os.path.join(eth3d_dir, split_name) + self._create_scene_folder(num_examples, split_dir) + + if config["split"] == "train": + annot_dir = os.path.join(eth3d_dir, "two_view_training_gt") + self._create_annotation_folder(num_examples, annot_dir) + + return num_examples + + def test_training_test_splits(self): + with self.create_dataset(split="train") as (dataset, _): + assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" + for _, _, disparity, valid_mask in dataset: + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + with self.create_dataset(split="test") as (dataset, _): + assert all(d == ("", "") for d in dataset._disparities) + for _, _, disparity, valid_mask in dataset: + assert disparity is None + assert valid_mask is None + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class CREStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CREStereo + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" + os.makedirs(crestereo_dir, exist_ok=True) + + split_dir = crestereo_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"tree": 2, "shapenet": 3, "reflective": 6, "hole": 5}.get(config["split"], 0) + + for idx in range(num_examples): + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) + # these are going to end up being gray scale images + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + + return num_examples + + def test_splits(self): + for split in ("tree", "shapenet", "reflective", "hole"): + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoMiddlebury2014 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "additional"), + calibration=("perfect", "imperfect", "both"), + use_ambient_views=(True, False), + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + @staticmethod + def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: + calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] + scene_dirs = [] + for c in calibrations: + scene_dir = os.path.join(root_dir, f"{scene_name}{c}") + os.makedirs(scene_dir, exist_ok=True) + # make normal images first + datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) + # these are going to end up being gray scale images + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) + datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) + scene_dirs.append(scene_dir) + return scene_dirs + + def inject_fake_data(self, tmpdir, config): + split_scene_map = { + "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], + "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] + } + + middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") + os.makedirs(middlebury_dir, exist_ok=True) + + split_dir = middlebury_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 2, "additional": 3, "test": 4}.get(config["split"], 0) + for idx in range(num_examples): + scene_name = split_scene_map[config["split"]][idx] + self._make_scene_folder(root_dir=split_dir, scene_name=scene_name, split=config["split"]) + + if config["calibration"] == "both": + num_examples *= 2 + return num_examples + + def test_train_splits(self): + for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): + with self.create_dataset(split=split, calibration=calibration) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + print("disparities", disparity.shape, valid_mask.shape) + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split, calibration=None) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def test_augmented_view_usage(self): + with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): + for left, right, _, _ in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + + def test_warnings_train(self): + # train set invalid + split = "train" + calibration = None + with pytest.warns( + RuntimeWarning, + match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_warnings_test(self): + # test set invalid + split = "test" + calibration = "perfect" + with pytest.warns( + RuntimeWarning, + match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + ): + with self.create_dataset(split=split, calibration=calibration): + pass + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass + + +class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2012 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = pathlib.Path(tmpdir) / "Kitti2012" + os.makedirs(kitti_dir, exist_ok=True) + + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) + + num_examples = {"train": 4, "test": 3}.get(config["split"], 0) + + datasets_utils.create_image_folder( + root=split_dir, + name="colored_0", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="colored_1", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_noc", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2012 uses a single channel image for disparities + size=(1, 100, 200), + ) + + return num_examples + + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + assert disparity is None + assert valid_mask is None - return len(image_ids_in_config) + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass -class FER2013TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.FER2013 +class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoKitti2015 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - - FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): - base_folder = os.path.join(tmpdir, "fer2013") - os.makedirs(base_folder) - - num_samples = 5 - with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file: - writer = csv.DictWriter( - file, - fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",), - quoting=csv.QUOTE_NONNUMERIC, - quotechar='"', - ) - writer.writeheader() - for _ in range(num_samples): - row = dict( - pixels=" ".join( - str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist() - ) - ) - if config["split"] == "train": - row["emotion"] = str(int(torch.randint(0, 7, ()))) - - writer.writerow(row) - - return num_samples - + kitti_dir = pathlib.Path(tmpdir) / "Kitti2015" + os.makedirs(kitti_dir, exist_ok=True) -class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.GTSRB - FEATURE_TYPES = (PIL.Image.Image, int) + split_dir = kitti_dir / (config["split"] + "ing") + os.makedirs(split_dir, exist_ok=True) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + num_examples = {"train": 4, "test": 6}.get(config["split"], 0) - def inject_fake_data(self, tmpdir: str, config): - root_folder = os.path.join(tmpdir, "gtsrb") - os.makedirs(root_folder, exist_ok=True) + datasets_utils.create_image_folder( + root=split_dir, + name="image_2", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) + datasets_utils.create_image_folder( + root=split_dir, + name="image_3", + file_name_fn=lambda i: f"{i:06d}_10.png", + num_examples=num_examples, + size=(3, 100, 200), + ) - # Train data - train_folder = os.path.join(root_folder, "GTSRB", "Training") - os.makedirs(train_folder, exist_ok=True) + if config["split"] == "train": + datasets_utils.create_image_folder( + root=split_dir, + name="disp_occ_0", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), + ) - num_examples = 3 if config["split"] == "train" else 4 - classes = ("00000", "00042", "00012") - for class_idx in classes: datasets_utils.create_image_folder( - train_folder, - name=class_idx, - file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", + root=split_dir, + name="disp_occ_1", + file_name_fn=lambda i: f"{i:06d}.png", num_examples=num_examples, + # Kitti2015 uses a single channel image for disparities + size=(1, 100, 200), ) - total_number_of_examples = num_examples * len(classes) - # Test data - test_folder = os.path.join(root_folder, "GTSRB", "Final_Test", "Images") - os.makedirs(test_folder, exist_ok=True) + return num_examples - with open(os.path.join(root_folder, "GT-final_test.csv"), "w") as csv_file: - csv_file.write("Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId\n") + def test_train_splits(self): + for split in ["train"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - for _ in range(total_number_of_examples): - image_file = datasets_utils.create_random_string(5, string.digits) + ".ppm" - datasets_utils.create_image_file(test_folder, image_file) - row = [ - image_file, - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(1, 100, size=()).item(), - torch.randint(0, 43, size=()).item(), - ] - csv_file.write(";".join(map(str, row)) + "\n") + def test_test_split(self): + for split in ["test"]: + with self.create_dataset(split=split) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert disparity is None + assert valid_mask is None - return total_number_of_examples + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass -class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.CLEVRClassification - FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) +class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoSceneFlow + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("FlyingThings3D", "Driving", "Monkaa"), + pass_name=("clean", "final") + ) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + @staticmethod + def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) - def inject_fake_data(self, tmpdir, config): - data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" + paths = [] + for i in range(num_examples): + datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) + paths.append(str(root / file_name_fn(i))) + return paths - images_folder = data_folder / "images" - image_files = datasets_utils.create_image_folder( - images_folder, config["split"], lambda idx: f"CLEVR_{config['split']}_{idx:06d}.png", num_examples=5 - ) + def inject_fake_data(self, tmpdir, config): + scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" + os.makedirs(scene_flow_dir, exist_ok=True) - scenes_folder = data_folder / "scenes" - scenes_folder.mkdir() - if config["split"] != "test": - with open(scenes_folder / f"CLEVR_{config['split']}_scenes.json", "w") as file: - json.dump( - dict( - info=dict(), - scenes=[ - dict(image_filename=image_file.name, objects=[dict()] * int(torch.randint(10, ()))) - for image_file in image_files - ], - ), - file, - ) + split_dir = scene_flow_dir / config["split"] + os.makedirs(split_dir, exist_ok=True) - return len(image_files) + pass_dir_map = { + "clean": "frames_cleanpass", + "final": "frames_finalpass", + } + num_examples = 1 + pass_dir_name = pass_dir_map.get(config["pass_name"], None) -class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.OxfordIIITPet - FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) + # create pass directories + pass_dir = split_dir / pass_dir_name + disp_dir = split_dir / "disparity" + os.makedirs(pass_dir, exist_ok=True) + os.makedirs(disp_dir, exist_ok=True) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("trainval", "test"), - target_types=("category", "segmentation", ["category", "segmentation"], []), - ) + num_examples = {"FlyingThings3D": 4, "Driving": 6, "Monkaa": 5}.get(config["split"], 0) - def inject_fake_data(self, tmpdir, config): - base_folder = os.path.join(tmpdir, "oxford-iiit-pet") + for direction in ["left", "right"]: + for scene_idx in range(num_examples): + os.makedirs(pass_dir / f"scene_{scene_idx:06d}", exist_ok=True) + datasets_utils.create_image_folder( + root=pass_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=1, + size=(3, 200, 100), + ) - classification_anns_meta = ( - dict(cls="Abyssinian", label=0, species="cat"), - dict(cls="Keeshond", label=18, species="dog"), - dict(cls="Yorkshire Terrier", label=37, species="dog"), - ) - split_and_classification_anns = [ - self._meta_to_split_and_classification_ann(meta, idx) - for meta, idx in itertools.product(classification_anns_meta, (1, 2, 10)) - ] - image_ids, *_ = zip(*split_and_classification_anns) + os.makedirs(disp_dir / f"scene_{scene_idx:06d}", exist_ok=True) + self._create_pfm_folder( + root=disp_dir / f"scene_{scene_idx:06d}", + name=direction, + file_name_fn=lambda i: f"{i:06d}.pfm", + num_examples=1, + size=(100, 200), + ) - image_files = datasets_utils.create_image_folder( - base_folder, "images", file_name_fn=lambda idx: f"{image_ids[idx]}.jpg", num_examples=len(image_ids) - ) + return num_examples - anns_folder = os.path.join(base_folder, "annotations") - os.makedirs(anns_folder) - split_and_classification_anns_in_split = random.choices(split_and_classification_anns, k=len(image_ids) // 2) - with open(os.path.join(anns_folder, f"{config['split']}.txt"), "w", newline="") as file: - writer = csv.writer(file, delimiter=" ") - for split_and_classification_ann in split_and_classification_anns_in_split: - writer.writerow(split_and_classification_ann) + def test_splits(self): + for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): + with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - segmentation_files = datasets_utils.create_image_folder( - anns_folder, "trimaps", file_name_fn=lambda idx: f"{image_ids[idx]}.png", num_examples=len(image_ids) - ) + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass - # The dataset has some rogue files - for path in image_files[:2]: - path.with_suffix(".mat").touch() - for path in segmentation_files: - path.with_name(f".{path.name}").touch() - return len(split_and_classification_anns_in_split) +class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoFallingThings + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) - def _meta_to_split_and_classification_ann(self, meta, idx): - image_id = "_".join( - [ - *[(str.title if meta["species"] == "cat" else str.lower)(part) for part in meta["cls"].split()], - str(idx), - ] - ) - class_id = str(meta["label"] + 1) - species = "1" if meta["species"] == "cat" else "2" - breed_id = "-1" - return (image_id, class_id, species, breed_id) + @staticmethod + def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]): + file = pathlib.Path(root) / name + image = np.ones((size[0], size[1]), dtype=np.uint8) + PIL.Image.fromarray(image).save(file) + @staticmethod + def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> List[str]: + paths = [] + root = pathlib.Path(root) / scene_name + os.makedirs(root, exist_ok=True) + # jpg images + paths.append(datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))) + paths.append(datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))) + # single channel depth maps + paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1]))) + paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1]))) + # camera settings json. Minimal example for _read_disparity function testing + settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} + with open(root / "_camera_settings.json", "w") as f: + json.dump(settings_json, f) -class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StanfordCars - REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + return paths def inject_fake_data(self, tmpdir, config): - import scipy.io as io - from numpy.core.records import fromarrays + fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" + os.makedirs(fallingthings_dir, exist_ok=True) - num_examples = {"train": 5, "test": 7}[config["split"]] - num_classes = 3 - base_folder = pathlib.Path(tmpdir) / "stanford_cars" + split_dir = pathlib.Path(fallingthings_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) - devkit = base_folder / "devkit" - devkit.mkdir(parents=True) + num_examples = {"single": 2, "mixed": 3}.get(config["split"], 0) - if config["split"] == "train": - images_folder_name = "cars_train" - annotations_mat_path = devkit / "cars_train_annos.mat" - else: - images_folder_name = "cars_test" - annotations_mat_path = base_folder / "cars_test_annos_withlabels.mat" + for i in range(num_examples): + self._make_scene_folder( + root=split_dir, + scene_name=f"scene_{i:06d}", + size=(100, 200), + ) - datasets_utils.create_image_folder( - root=base_folder, - name=images_folder_name, - file_name_fn=lambda image_index: f"{image_index:5d}.jpg", - num_examples=num_examples, - ) + return num_examples - classes = np.random.randint(1, num_classes + 1, num_examples, dtype=np.uint8) - fnames = [f"{i:5d}.jpg" for i in range(num_examples)] - rec_array = fromarrays( - [classes, fnames], - names=["class", "fname"], - ) - io.savemat(annotations_mat_path, {"annotations": rec_array}) + def test_splits(self): + for split_name in ["single", "mixed"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - random_class_names = ["random_name"] * num_classes - io.savemat(devkit / "cars_meta.mat", {"class_names": random_class_names}) + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass - return num_examples +class StereoSintelTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StereoSintel + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) -class Country211TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Country211 + def inject_fake_data(self, tmpdir, config): + sintel_dir = pathlib.Path(tmpdir) / "Sintel" + os.makedirs(sintel_dir, exist_ok=True) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test")) + split_dir = pathlib.Path(sintel_dir) / "training" + os.makedirs(split_dir, exist_ok=True) - def inject_fake_data(self, tmpdir: str, config): - split_folder = pathlib.Path(tmpdir) / "country211" / config["split"] - split_folder.mkdir(parents=True, exist_ok=True) + # a single setting, since there are no splits + num_examples = 4 - num_examples = { - "train": 3, - "valid": 4, - "test": 5, - }[config["split"]] + for view in ["final_left", "final_right"]: + root = split_dir / view + os.makedirs(root, exist_ok=True) - classes = ("AD", "BS", "GR") - for cls in classes: datasets_utils.create_image_folder( - split_folder, - name=cls, - file_name_fn=lambda idx: f"{idx}.jpg", + root=root, + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", num_examples=num_examples, + size=(3, 100, 200), ) - return num_examples * len(classes) - - -class Flowers102TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.Flowers102 - - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) - REQUIRED_PACKAGES = ("scipy",) - - def inject_fake_data(self, tmpdir: str, config): - base_folder = pathlib.Path(tmpdir) / "flowers-102" - - num_classes = 3 - num_images_per_split = dict(train=5, val=4, test=3) - num_images_total = sum(num_images_per_split.values()) datasets_utils.create_image_folder( - base_folder, - "jpg", - file_name_fn=lambda idx: f"image_{idx + 1:05d}.jpg", - num_examples=num_images_total, + root=split_dir / "occlusions", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + size=(1, 100, 200), ) - label_dict = dict( - labels=np.random.randint(1, num_classes + 1, size=(1, num_images_total), dtype=np.uint8), + datasets_utils.create_image_folder( + root=split_dir / "outofframe", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + size=(1, 100, 200), ) - datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "imagelabels.mat"), label_dict) - setid_mat = np.arange(1, num_images_total + 1, dtype=np.uint16) - np.random.shuffle(setid_mat) - setid_dict = dict( - trnid=setid_mat[: num_images_per_split["train"]].reshape(1, -1), - valid=setid_mat[num_images_per_split["train"] : -num_images_per_split["test"]].reshape(1, -1), - tstid=setid_mat[-num_images_per_split["test"] :].reshape(1, -1), + datasets_utils.create_image_folder( + root=split_dir / "disparities", + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples, + size=(3, 100, 200), ) - datasets_utils.lazy_importer.scipy.io.savemat(str(base_folder / "setid.mat"), setid_dict) - - return num_images_per_split[config["split"]] + return num_examples -class PCAMTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.PCAM + def test_splits(self): + with self.create_dataset() as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) - REQUIRED_PACKAGES = ("h5py",) - def inject_fake_data(self, tmpdir: str, config): - base_folder = pathlib.Path(tmpdir) / "pcam" - base_folder.mkdir() +class InStereo2k(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.InStereo2k + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - num_images = {"train": 2, "test": 3, "val": 4}[config["split"]] + @staticmethod + def _make_scene_folder(root: str, name: str, size: Tuple[int, int]): + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) - images_file = datasets.PCAM._FILES[config["split"]]["images"][0] - with datasets_utils.lazy_importer.h5py.File(str(base_folder / images_file), "w") as f: - f["x"] = np.random.randint(0, 256, size=(num_images, 10, 10, 3), dtype=np.uint8) + datasets_utils.create_image_file(root=root, name="left.png", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="right.png", size=(3, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="left_disp.png", size=(1, size[0], size[1])) + datasets_utils.create_image_file(root=root, name="right_disp.png", size=(1, size[0], size[1])) - targets_file = datasets.PCAM._FILES[config["split"]]["targets"][0] - with datasets_utils.lazy_importer.h5py.File(str(base_folder / targets_file), "w") as f: - f["y"] = np.random.randint(0, 2, size=(num_images, 1, 1, 1), dtype=np.uint8) + def inject_fake_data(self, tmpdir, config): + in_stereo_dir = pathlib.Path(tmpdir) / "InStereo2k" + os.makedirs(in_stereo_dir, exist_ok=True) - return num_images + split_dir = pathlib.Path(in_stereo_dir) / config["split"] + os.makedirs(split_dir, exist_ok=True) + num_examples = {"train": 4, "test": 5}.get(config["split"], 0) -class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.RenderedSST2 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) - SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"} + for i in range(num_examples): + self._make_scene_folder(split_dir, f"scene_{i:06d}", (100, 200)) - def inject_fake_data(self, tmpdir: str, config): - root_folder = pathlib.Path(tmpdir) / "rendered-sst2" - image_folder = root_folder / self.SPLIT_TO_FOLDER[config["split"]] + return num_examples - num_images_per_class = {"train": 5, "test": 6, "val": 7} - sampled_classes = ["positive", "negative"] - for cls in sampled_classes: - datasets_utils.create_image_folder( - image_folder, - cls, - file_name_fn=lambda idx: f"{idx}.png", - num_examples=num_images_per_class[config["split"]], - ) + def test_splits(self): + for split_name in ["train", "test"]: + with self.create_dataset(split=split_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw - return len(sampled_classes) * num_images_per_class[config["split"]] + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): + with self.create_dataset(split="bad"): + pass if __name__ == "__main__": diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index a7dd8397bab..8b38ba73a85 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,5 +1,5 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K -from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereoSynthetic +from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereo, InStereo2k from .caltech import Caltech101, Caltech256 from .celeba import CelebA from .cifar import CIFAR10, CIFAR100 @@ -106,4 +106,13 @@ "FGVCAircraft", "EuroSAT", "RenderedSST2", + "StereoETH3D", + "StereoFallingThings", + "StereoKitti2012", + "StereoKitti2015", + "StereoMiddlebury2014", + "StereoSceneFlow", + "StereoSintel", + "CREStereo", + "InStereo2k", ) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 702386b05bd..4de0b5b0532 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,31 +1,30 @@ from abc import ABC, abstractmethod from glob import glob from pathlib import Path -import pathlib import random import re import shutil -from typing import Callable, List, Optional, Tuple, Any +from typing import Callable, List, Optional, Tuple import warnings from jsonschema import ValidationError from torch import Tensor from .vision import VisionDataset -from .utils import download_and_extract_archive, download_url, verify_str_arg +from .utils import download_and_extract_archive, verify_str_arg import os import numpy as np from PIL import Image import json __all__ = ( - "CREStereo" # waiting for download / need to find valid mask procedure + "CREStereo" "StereoMiddlebury2014" "StereoETH3D" "StereoKitti2012" "StereoKitti2015" "StereoSintel" - "StereoSceneFlow" # need to find valid mask procedure + "StereoSceneFlow" "StereoFallingThings" - "InStereo2k" # need to find valid mask procedure + "InStereo2k" ) @@ -54,13 +53,38 @@ def read_pfm_file(file_path: str) -> np.array: data = np.reshape(data, (height, width, channels)) data = np.flipud(data) - return data + # PFM files for disparity maps should contain only a single channel + # they should also be returned in (C, H, W) format + return np.transpose(data[:, :, :1], (2, 0, 1)) class StereoMatchingDataset(ABC, VisionDataset): """Base interface for Stereo matching datasets""" def __init__(self, root: str, transforms: Optional[Callable] = None): + """ + + Args: + root(str): Root directory of the dataset. + transforms(callable, optional): A function/transform that takes in Tuples of + (images, disparities, valid_masks) and returns a transformed version of each of them. + images is a Tuple of (``PIL.Image``, ``PIL.Image``) + disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W) + valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W) + + In some cases, when a dataset does not provide disparties, the ``disparities`` and + ``valid_masks`` can be Tuples containing None values. + + For training splits generally the datasets provide a minimal guarantee of + images: (``PIL.Image``, ``PIL.Image``) + disparities: (``np.ndarray``, ``None``) with shape (1, H, W) + valid_masks: (``np.ndarray``, ``None``) with shape (H, W) + + For some test splits, the datasets provides outputs that look like: + imgaes: (``PIL.Image``, ``PIL.Image``) + disparities: (``None``, ``None``) + valid_masks: (``None``, ``None``) + """ super().__init__(root=root) self.transforms = transforms @@ -79,6 +103,18 @@ def _read_disparity(self, file_path: str) -> Tuple: pass def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` where ``valid_mask`` + is a numpy boolean mask of shape (H, W) + indicating which disparity values are valid. The disparity is a numpy array of + shape (1, H, W) and the images are PIL images. ``disparity`` and ``valid_mask`` are None for + datasets on which for ``split="test"`` the authors did not provide annotations. + """ img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) @@ -98,21 +134,59 @@ def __len__(self) -> int: return len(self._images) -class CREStereoSynthetic(StereoMatchingDataset): +class CREStereo(StereoMatchingDataset): """Synthetic dataset used in training the `CREStereo `_ architecture. - Ported from the download script in the paper github `repo `_. - """ - DOWNLOAD_SPACE = 4 * 1024 * 1024 * 1024 # dataset requires download requires about 400 GB of free space + Dataset details on the official paper `repo `_. - EXPERIMENTAL_RANGE = 1 # TODO: remove after validating dataset structure / flow + The dataset is expected to have the following structure: :: - MAX_DISP = 256. + root + CREStereo + tree + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + img2_left.jpg + img2_right.jpg + img2_left.disp.jpg + img2_right.disp.jpg + ... + shapenet + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + ... + reflective + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + ... + hole + img1_left.jpg + img1_right.jpg + img1_left.disp.jpg + img1_right.disp.jpg + ... - def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False): + Args: + root (str): Root directory of the dataset. + split (str): The split of the dataset to use. One of ``"tree"``, ``"shapenet"``, ``"reflective"``, ``"hole"`` + or ``"all"``. The ``"all"`` split contains all of the above splits. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. + download (bool, optional): If true, downloads the dataset from the internet and puts it in the root directory. + max_disparity (int, optional): Maximum disparity value. Used to compute the valid mask. + """ + DOWNLOAD_SPACE = 400 * 1024 * 1024 * 1024 + + def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False, max_disparity: float = 256.): super().__init__(root, transforms) root = Path(root) / "CREStereo" + self.max_disparity = max_disparity # if the API user requests a dataset download check that the user can download it if download: @@ -149,16 +223,23 @@ def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) disparity_maps_right = list(p.replace("_right", "_right.disp") for p in imgs_right) + if not any(os.path.exists(file_path) for file_path in disparity_maps_left): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) + + if not any(os.path.exists(file_path) for file_path in disparity_maps_right): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = (disparity < self.MAX_DISP) & (disparity > 0.) + valid = (disparity < self.max_disparity) & (disparity > 0.) + # unsqueeze the disparity map into (C, H, W) format + disparity = disparity[None, :, :] return disparity, valid def _download_dataset(self, root: str) -> None: - # TODO: remove before release, used only for testing purposes dirs = ["tree", "shapenet", "reflective", "hole"] # create directory subtree for the download for d in dirs: @@ -221,11 +302,11 @@ class StereoMiddlebury2014(StereoMatchingDataset): Args: root (string): Root directory of the Middleburry 2014 Dataset. - split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" - use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. Sampled with equal probability. + split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional" + use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. + The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ @@ -268,7 +349,7 @@ def __init__( self._download_dataset(root) root = Path(root) / "Middlebury2014" - print(split) + if not os.path.exists(root / split): raise FileNotFoundError( f"The {split} directory was not found in the provided root directory" @@ -292,24 +373,23 @@ def __init__( for calibration_suffix in calibrartion_suffixes: scene_pattern = "*" + calibration_suffix - print(scene_pattern) imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) self._images += list((l, r) for l, r in zip(imgs_left, imgs_right)) if split == "test": - dsp_maps_left, dsp_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: + disparity_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) + disparity_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) + if not len(disparity_maps_left) or not len(disparity_maps_right): + raise FileNotFoundError("No disparity maps found in {}".format(root / split)) - dsp_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) - dsp_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - - self._disparities += list((l, r) for l, r in zip(dsp_maps_left, dsp_maps_right)) + self._disparities += list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self.use_ambient_views = use_ambient_views @@ -317,6 +397,7 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: + """Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.""" if os.path.basename(file_path) == "im1.png" and self.use_ambient_views: # initialize sampleable container ambient_file_paths = list(file_path.replace("im1.png", view_name) for view_name in ["im1E.png", "im1L.png"]) @@ -332,6 +413,8 @@ def _read_disparity(self, file_path: str) -> Tuple: return None, None disparity_map = read_pfm_file(file_path) valid_mask = disparity_map < 1e3 + # remove the channel dimension from the valid mask + valid_mask = valid_mask[0, :, :] return disparity_map, valid_mask def _download_dataset(self, root: str): @@ -357,10 +440,13 @@ def _download_dataset(self, root: str): download_and_extract_archive(url=test_set_url, download_root=root, remove_finished=True) for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): for scene in scene_names: - shutil.move(os.path.join(scene_dir, scene), os.path.join(root, scene)) + scene_dst_dir = root / "test" / scene + scene_src_dir = scene_dir / scene + os.makedirs(scene_dst_dir, exist_ok=True) + shutil.move(str(scene_src_dir), str(scene_dst_dir)) # cleanup MiddEval3 directory - shutil.rmtree(os.path.join(root, "MiddEval3")) + shutil.rmtree(str(root / "MiddEval3")) class StereoETH3D(StereoMatchingDataset): @@ -411,8 +497,7 @@ class StereoETH3D(StereoMatchingDataset): root (string): Root directory of the ETH3D Dataset. split (string, optional): The dataset split of scenes, either "train" (default) or "test". calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -427,7 +512,6 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) @@ -435,8 +519,9 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) else: disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*0GT.pfm"))) - # no masks for the right view, always using left as reference disparity_maps_right = list("" for _ in disparity_maps_left) + if not len(disparity_maps_left): + raise FileNotFoundError("No disparity maps found in {}".format(root / anot_dir)) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) @@ -447,10 +532,10 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = read_pfm_file(file_path) valid_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) - valid_mask = np.array(valid_mask) + valid_mask = np.array(valid_mask).astype(np.bool) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -474,8 +559,7 @@ class StereoKitti2012(StereoMatchingDataset): Args: root (string): Root directory where Kitti2012 is located. split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ @@ -494,6 +578,9 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) disparity_maps_right = list("" for _ in disparity_maps_left) + if not len(disparity_maps_left): + raise FileNotFoundError("No disparity maps found in {}".format(root)) + else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) @@ -506,7 +593,8 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = np.array(Image.open(file_path)) / 256.0 valid_mask = disparity_map > 0.0 - + # unsqueeze the disparity map into (C, H, W) format + disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: @@ -533,8 +621,7 @@ class StereoKitti2015(StereoMatchingDataset): Args: root (string): Root directory where Kitti2015 is located. split (string, optional): The dataset split of scenes, either "train" (default) or test. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -552,6 +639,9 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if split == "train": disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) + if not len(disparity_maps_left) or not len(disparity_maps_right): + raise FileNotFoundError("No disparity maps found in {}".format(root)) + else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) @@ -564,7 +654,8 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = np.array(Image.open(file_path)) / 256.0 valid_mask = disparity_map < 0.0 - + # unsqueeze the disparity map into (C, H, W) format + disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: @@ -574,10 +665,45 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: class StereoSintel(StereoMatchingDataset): """"Sintel `Stereo Dataset `_. + The dataset is expected to have the following structure: :: + + root + Sintel + training + final_left + scene1 + img1.png + img2.png + ... + ... + final_right + scene2 + img1.png + img2.png + ... + ... + disparities + scene1 + img1.png + img2.png + ... + ... + occlusions + scene1 + img1.png + img2.png + ... + ... + outofframe + scene1 + img1.png + img2.png + ... + ... + Args: root (string): Root directory where Sintel Stereo is located. - transforms (callalbe, optional): A function/transform that takes in - ``left_img, right_img, left_disparity, right_disparity`` and returns a transformed version. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, transforms: Optional[Callable] = None): @@ -587,11 +713,13 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) + if not len(dps_masks_left): + raise FileNotFoundError("No disparity maps found in {}".format(root)) + disparity_maps_right = list("" for _ in dps_masks_left) self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) @@ -605,7 +733,8 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = np.array(Image.open(file_path), dtype=np.float32) r, g, b = np.split(disparity_map, 3, axis=-1) disparity_map = r * 4 + g / (2**6) + b / (2**14) - + # reshape into (C, H, W) format + disparity_map = np.transpose(disparity_map, (2, 0, 1)) # occlusion mask valid_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) == 0 # out of frame mask @@ -662,6 +791,10 @@ class StereoSceneFlow(StereoMatchingDataset): FlyingThings3D ... ... + + Args: + root (string): Root directory where SceneFlow is located. + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None): @@ -683,7 +816,6 @@ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "c for p in passes: imgs_left = sorted(glob(str(root / p / "*" / "left" / "*.png"))) imgs_right = sorted(glob(str(root / p / "*" / "right" / "*.png"))) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root / p)) @@ -693,15 +825,19 @@ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "c disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + if not any(os.path.exists(file_path) for file_path in disparity_maps_left): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) + + if not any(os.path.exists(file_path) for file_path in disparity_maps_right): + raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: - if not os.path.exists(file_path): - raise FileNotFoundError("Disparity map {} not found".format(file_path)) - disparity = read_pfm_file(file_path) - valid = np.ones_like(disparity) + # keep valid mask with shape (H, W) + valid = np.ones(disparity.shape[1:]).astype(np.bool) return disparity, valid def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: @@ -745,13 +881,20 @@ class StereoFallingThings(StereoMatchingDataset): ... scene2 ... + + Args: + root (string): Root directory where FallingThings is located. + split (string): Either "single", "mixed", or "both". + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. + """ def __init__(self, root: str, split: str = "single", transforms: Optional[Callable] = None): super().__init__(root, transforms) + root = Path(root) / "FallingThings" + verify_str_arg(split, "split", valid_values=("single", "mixed", "both")) - split = split.upper() splits = { "single": ["single"], @@ -760,28 +903,35 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab }[split] for s in splits: - imgs_left = sorted(glob(str(root / s / "*.left.jpg"))) - imgs_right = sorted(glob(str(root / s / "*.right.jpg"))) - + imgs_left = sorted(glob(str(root / s / "*" / "*.left.jpg"))) + imgs_right = sorted(glob(str(root / s / "*" / "*.right.jpg"))) if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) self._images += imgs - disparity_maps_left = sorted(glob(str(root / s / "*.left.depth.png"))) - disparity_maps_right = sorted(glob(str(root / s / "*.right.depth.png"))) + disparity_maps_left = sorted(glob(str(root / s / "*" / "*.left.depth.png"))) + disparity_maps_right = sorted(glob(str(root / s / "*" / "*.right.depth.png"))) + if not len(disparity_maps_left) or not len(disparity_maps_right): + raise FileNotFoundError("No disparity maps found in {}".format(root)) disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: - depth = Image.Open(file_path) - with open(os.path.split(file_path)[0] + '_camera_settings.json', 'r') as f: + # (H, W) image + depth = np.array(Image.open(file_path)) + # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt + # in order to extract disparity from depth maps + with open(os.path.split(file_path)[0] + '/_camera_settings.json', 'r') as f: intrinsics = json.load(f) fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] + # inverse of depth-from-disparity equation disparity = (fx * 6.0 * 100) / depth.astype(np.float32) valid = disparity > 0 + # unsqueeze disparity to (C, H, W) + disparity = disparity[None, :, :] return disparity, valid def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: @@ -789,7 +939,7 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: class InStereo2k(StereoMatchingDataset): - """InStereo2k ``_ dataset + """InStereo2k ``_ dataset The dataset is expected to have the following structre: :: @@ -813,6 +963,11 @@ class InStereo2k(StereoMatchingDataset): ... scene2 ... + + Args: + root (string): Root directory where InStereo2k is located. + split (string): Either "train" or "test". + transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -820,9 +975,10 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl root = Path(root) / "InStereo2k" / split + verify_str_arg(split, "split", valid_values=("train", "test")) + imgs_left = sorted(glob(str(root / "*" / "left.png"))) imgs_right = list(p.replace("left", "right") for p in imgs_left) - if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) @@ -832,10 +988,18 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) disparity_maps_right = list(p.replace("right", "right_disp") for p in imgs_left) + if not any(os.path.exists(file_path) for file_path in disparity_maps_left): + raise FileNotFoundError("No disparity valid maps found in {}".format(root)) + + if not any(os.path.exists(file_path) for file_path in disparity_maps_right): + raise FileNotFoundError("No disparity valid maps found in {}".format(root)) + disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) self._disparities = disparity_maps def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = np.ones_like(disparity) + valid = np.ones_like(disparity).astype(np.bool) + # unsqueeze disparity to (C, H, W) + disparity = disparity[None, :, :] return disparity, valid From de94c2c8acd7811cb272b05cc0f94ca77f965511 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 10:12:06 +0100 Subject: [PATCH 20/35] Ran ufmt. (#6259) --- torchvision/datasets/__init__.py | 12 +- torchvision/datasets/_stereo_matching.py | 195 +++++++++++++++-------- 2 files changed, 138 insertions(+), 69 deletions(-) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 8b38ba73a85..973d5ca9f7e 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,5 +1,15 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K -from ._stereo_matching import StereoETH3D, StereoFallingThings, StereoKitti2012, StereoKitti2015, StereoMiddlebury2014, StereoSceneFlow, StereoSintel, CREStereo, InStereo2k +from ._stereo_matching import ( + StereoETH3D, + StereoFallingThings, + StereoKitti2012, + StereoKitti2015, + StereoMiddlebury2014, + StereoSceneFlow, + StereoSintel, + CREStereo, + InStereo2k, +) from .caltech import Caltech101, Caltech256 from .celeba import CelebA from .cifar import CIFAR10, CIFAR100 diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 4de0b5b0532..3edb0f639a5 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,19 +1,21 @@ -from abc import ABC, abstractmethod -from glob import glob -from pathlib import Path +import json +import os import random import re import shutil -from typing import Callable, List, Optional, Tuple import warnings +from abc import ABC, abstractmethod +from glob import glob +from pathlib import Path +from typing import Callable, List, Optional, Tuple + +import numpy as np from jsonschema import ValidationError +from PIL import Image from torch import Tensor -from .vision import VisionDataset + from .utils import download_and_extract_archive, verify_str_arg -import os -import numpy as np -from PIL import Image -import json +from .vision import VisionDataset __all__ = ( "CREStereo" @@ -35,7 +37,7 @@ def read_pfm_file(file_path: str) -> np.array: if not header in [b"PF", b"Pf"]: raise ValidationError(f"Not a valid PFM file: {file_path}") - dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline()) + dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) if not dim_match: raise ValidationError(f"Malformed PFM header: {file_path}") @@ -45,11 +47,11 @@ def read_pfm_file(file_path: str) -> np.array: # check for endian type if scale < 0: scale = -scale - endian = '<' + endian = "<" else: - endian = '>' + endian = ">" - data = np.fromfile(file, endian + 'f') + data = np.fromfile(file, endian + "f") data = np.reshape(data, (height, width, channels)) data = np.flipud(data) @@ -126,7 +128,11 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: valid_masks = (valid_mask_left, valid_mask_right) if self.transforms is not None: - imgs, dsp_maps, valid_masks, = self.transforms(imgs, dsp_maps, valid_masks) + ( + imgs, + dsp_maps, + valid_masks, + ) = self.transforms(imgs, dsp_maps, valid_masks) return imgs[0], imgs[1], dsp_maps[0], valid_masks[0] @@ -135,7 +141,7 @@ def __len__(self) -> int: class CREStereo(StereoMatchingDataset): - """Synthetic dataset used in training the `CREStereo `_ architecture. + """Synthetic dataset used in training the `CREStereo `_ architecture. Dataset details on the official paper `repo `_. @@ -179,10 +185,18 @@ class CREStereo(StereoMatchingDataset): transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (bool, optional): If true, downloads the dataset from the internet and puts it in the root directory. max_disparity (int, optional): Maximum disparity value. Used to compute the valid mask. - """ + """ + DOWNLOAD_SPACE = 400 * 1024 * 1024 * 1024 - def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable] = None, download: bool = False, max_disparity: float = 256.): + def __init__( + self, + root: str, + split: str = "tree", + transforms: Optional[Callable] = None, + download: bool = False, + max_disparity: float = 256.0, + ): super().__init__(root, transforms) root = Path(root) / "CREStereo" @@ -234,7 +248,7 @@ def __init__(self, root: str, split: str = "tree", transforms: Optional[Callable def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = (disparity < self.max_disparity) & (disparity > 0.) + valid = (disparity < self.max_disparity) & (disparity > 0.0) # unsqueeze the disparity map into (C, H, W) format disparity = disparity[None, :, :] return disparity, valid @@ -261,33 +275,33 @@ class StereoMiddlebury2014(StereoMatchingDataset): Middlebury2014 train scene1-{ ,perfect,imperfect} - calib.txt - im{0,1}.png - im1E.png - im1L.png - disp{0,1}.pfm - disp{0,1}-n.png - disp{0,1}-sd.pfm + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm disp{0,1}y.pfm scene2-{ ,perfect,imperfect} - calib.txt - im{0,1}.png - im1E.png - im1L.png - disp{0,1}.pfm - disp{0,1}-n.png - disp{0,1}-sd.pfm + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm disp{0,1}y.pfm ... additional scene1-{ ,perfect,imperfect} - calib.txt - im{0,1}.png - im1E.png - im1L.png - disp{0,1}.pfm - disp{0,1}-n.png - disp{0,1}-sd.pfm + calib.txt + im{0,1}.png + im1E.png + im1L.png + disp{0,1}.pfm + disp{0,1}-n.png + disp{0,1}-sd.pfm disp{0,1}y.pfm ... test @@ -305,15 +319,56 @@ class StereoMiddlebury2014(StereoMatchingDataset): split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional" use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. - calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. - download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ splits = { - "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano", "Pipes", "Playroom", "Playtable", "Recycle", "Shelves", "Vintage"], - "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1", "Couch", "Flowers", "Mask", "Shopvac", "Sticks", "Storage", "Sword1", "Sword2", "Umbrella"], - "test": ["Plants", "Classroom2E", "Classroom2", "Australia", "DjembeL", "CrusadeP", "Crusade", "Hoops", "Bicycle2", "Staircase", "Newkuba", "AustraliaP", "Djembe", "Livingroom", "Computer"] + "train": [ + "Adirondack", + "Jadeplant", + "Motorcycle", + "Piano", + "Pipes", + "Playroom", + "Playtable", + "Recycle", + "Shelves", + "Vintage", + ], + "additional": [ + "Backpack", + "Bicycle1", + "Cable", + "Classroom1", + "Couch", + "Flowers", + "Mask", + "Shopvac", + "Sticks", + "Storage", + "Sword1", + "Sword2", + "Umbrella", + ], + "test": [ + "Plants", + "Classroom2E", + "Classroom2", + "Australia", + "DjembeL", + "CrusadeP", + "Crusade", + "Hoops", + "Bicycle2", + "Staircase", + "Newkuba", + "AustraliaP", + "Djembe", + "Livingroom", + "Computer", + ], } def __init__( @@ -323,7 +378,7 @@ def __init__( calibration: Optional[str] = "perfect", use_ambient_views: bool = False, transforms: Optional[Callable] = None, - download: bool = False + download: bool = False, ): super().__init__(root, transforms) verify_str_arg(split, "split", valid_values=("train", "test", "additional")) @@ -333,8 +388,7 @@ def __init__( if split == "test": calibration = None warnings.warn( - "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", - RuntimeWarning + "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", RuntimeWarning ) else: if split != "test": @@ -342,7 +396,7 @@ def __init__( warnings.warn( f"\nSplit '{split}' has calibration settings, however None was provided as an argument." f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", - RuntimeWarning + RuntimeWarning, ) if download: @@ -351,15 +405,14 @@ def __init__( root = Path(root) / "Middlebury2014" if not os.path.exists(root / split): - raise FileNotFoundError( - f"The {split} directory was not found in the provided root directory" - ) + raise FileNotFoundError(f"The {split} directory was not found in the provided root directory") split_scenes = self.splits[split] # check that the provided root folder contains the scene splits if not any( # using startswith to account for perfect / imperfect calibrartion - scene.startswith(s) for scene in os.listdir(root / split) + scene.startswith(s) + for scene in os.listdir(root / split) for s in split_scenes ): raise FileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.") @@ -429,7 +482,9 @@ def _download_dataset(self, root: str): scene_name = f"{scene}-{calibration}" for calibration in ["perfect", "imperfect"]: scene_url = f"{base_url}/{scene_name}.zip" - download_and_extract_archive(url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True) + download_and_extract_archive( + url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True + ) if any(s not in os.listdir(root) for s in self.splits["test"]): # test split is downloaded from a different location @@ -450,7 +505,7 @@ def _download_dataset(self, root: str): class StereoETH3D(StereoMatchingDataset): - """"ETH3D `Low-Res Two-View `_ dataset. + """ "ETH3D `Low-Res Two-View `_ dataset. The dataset is expected to have the following structure: :: @@ -458,13 +513,13 @@ class StereoETH3D(StereoMatchingDataset): ETH3D two_view_training scene1 - im1.png + im1.png im0.png images.txt cameras.txt calib.txt scene2 - im1.png + im1.png im0.png images.txt cameras.txt @@ -480,13 +535,13 @@ class StereoETH3D(StereoMatchingDataset): ... two_view_testing scene1 - im1.png + im1.png im0.png images.txt cameras.txt calib.txt scene2 - im1.png + im1.png im0.png images.txt cameras.txt @@ -496,7 +551,7 @@ class StereoETH3D(StereoMatchingDataset): Args: root (string): Root directory of the ETH3D Dataset. split (string, optional): The dataset split of scenes, either "train" (default) or "test". - calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. + calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ @@ -540,7 +595,7 @@ def __getitem__(self, index: int) -> Tuple: class StereoKitti2012(StereoMatchingDataset): - """"Kitti dataset from the `2012 `_ stereo evaluation benchmark. + """ "Kitti dataset from the `2012 `_ stereo evaluation benchmark. Uses the RGB images for consistency with Kitti 2015. The dataset is expected to have the following structure: :: @@ -560,7 +615,7 @@ class StereoKitti2012(StereoMatchingDataset): root (string): Root directory where Kitti2012 is located. split (string, optional): The dataset split of scenes, either "train" (default), test, or "additional" transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. - download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. + download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. """ def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): @@ -602,7 +657,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: class StereoKitti2015(StereoMatchingDataset): - """"Kitti dataset from the `2015 `_ stereo evaluation benchmark. + """ "Kitti dataset from the `2015 `_ stereo evaluation benchmark. The dataset is expected to have the following structure: :: @@ -663,7 +718,7 @@ def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: class StereoSintel(StereoMatchingDataset): - """"Sintel `Stereo Dataset `_. + """ "Sintel `Stereo Dataset `_. The dataset is expected to have the following structure: :: @@ -732,7 +787,7 @@ def _read_disparity(self, file_path: str) -> Tuple: # disparity decoding as per Sintel instructions disparity_map = np.array(Image.open(file_path), dtype=np.float32) r, g, b = np.split(disparity_map, 3, axis=-1) - disparity_map = r * 4 + g / (2**6) + b / (2**14) + disparity_map = r * 4 + g / (2 ** 6) + b / (2 ** 14) # reshape into (C, H, W) format disparity_map = np.transpose(disparity_map, (2, 0, 1)) # occlusion mask @@ -797,7 +852,9 @@ class StereoSceneFlow(StereoMatchingDataset): transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ - def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None): + def __init__( + self, root: str, split: str = "FlyingThings3D", pass_name: str = "clean", transforms: Optional[Callable] = None + ): super().__init__(root, transforms) root = Path(root) / "SceneFlow" @@ -823,7 +880,9 @@ def __init__(self, root: str, split: str = "FlyingThings3D", pass_name: str = "c self._images += imgs disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] - disparity_maps_right = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right] + disparity_maps_right = [ + file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right + ] if not any(os.path.exists(file_path) for file_path in disparity_maps_left): raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) @@ -924,9 +983,9 @@ def _read_disparity(self, file_path: str) -> Tuple: depth = np.array(Image.open(file_path)) # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt # in order to extract disparity from depth maps - with open(os.path.split(file_path)[0] + '/_camera_settings.json', 'r') as f: + with open(os.path.split(file_path)[0] + "/_camera_settings.json", "r") as f: intrinsics = json.load(f) - fx = intrinsics['camera_settings'][0]['intrinsic_settings']['fx'] + fx = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"] # inverse of depth-from-disparity equation disparity = (fx * 6.0 * 100) / depth.astype(np.float32) valid = disparity > 0 From 4256ca455917ef4e480aeb2a7a8ca65609ca4dd4 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 10:48:42 +0100 Subject: [PATCH 21/35] Adressed CI/CD errors --- torchvision/datasets/_stereo_matching.py | 41 ++++++++++++------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 3edb0f639a5..254d9d2624a 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -10,7 +10,6 @@ from typing import Callable, List, Optional, Tuple import numpy as np -from jsonschema import ValidationError from PIL import Image from torch import Tensor @@ -35,11 +34,11 @@ def read_pfm_file(file_path: str) -> np.array: with open(file_path, "rb") as file: header = file.readline().rstrip() if not header in [b"PF", b"Pf"]: - raise ValidationError(f"Not a valid PFM file: {file_path}") + raise ValueError(f"Not a valid PFM file: {file_path}") dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) if not dim_match: - raise ValidationError(f"Malformed PFM header: {file_path}") + raise ValueError(f"Malformed PFM header: {file_path}") width, height = map(int, dim_match.groups()) channels = 3 if header == b"PF" else 1 @@ -231,7 +230,7 @@ def __init__( if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) @@ -243,7 +242,7 @@ def __init__( if not any(os.path.exists(file_path) for file_path in disparity_maps_right): raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: @@ -432,7 +431,7 @@ def __init__( if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - self._images += list((l, r) for l, r in zip(imgs_left, imgs_right)) + self._images += list((left, right) for left, right in zip(imgs_left, imgs_right)) if split == "test": disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) @@ -442,7 +441,7 @@ def __init__( if not len(disparity_maps_left) or not len(disparity_maps_right): raise FileNotFoundError("No disparity maps found in {}".format(root / split)) - self._disparities += list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._disparities += list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self.use_ambient_views = use_ambient_views @@ -578,8 +577,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if not len(disparity_maps_left): raise FileNotFoundError("No disparity maps found in {}".format(root / anot_dir)) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -639,8 +638,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -700,8 +699,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl else: disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -777,8 +776,8 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): disparity_maps_right = list("" for _ in dps_masks_left) - self._images = list((l, r) for l, r in zip(imgs_left, imgs_right)) - self._disparities = list((l, r) for l, r in zip(dps_masks_left, disparity_maps_right)) + self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) + self._disparities = list((left, right) for left, right in zip(dps_masks_left, disparity_maps_right)) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -876,7 +875,7 @@ def __init__( if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root / p)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] @@ -890,7 +889,7 @@ def __init__( if not any(os.path.exists(file_path) for file_path in disparity_maps_right): raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: @@ -967,7 +966,7 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images += imgs disparity_maps_left = sorted(glob(str(root / s / "*" / "*.left.depth.png"))) @@ -975,7 +974,7 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab if not len(disparity_maps_left) or not len(disparity_maps_right): raise FileNotFoundError("No disparity maps found in {}".format(root)) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: @@ -1041,7 +1040,7 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) - imgs = list((l, r) for l, r in zip(imgs_left, imgs_right)) + imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) self._images = imgs disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) @@ -1053,7 +1052,7 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl if not any(os.path.exists(file_path) for file_path in disparity_maps_right): raise FileNotFoundError("No disparity valid maps found in {}".format(root)) - disparity_maps = list((l, r) for l, r in zip(disparity_maps_left, disparity_maps_right)) + disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) self._disparities = disparity_maps def _read_disparity(self, file_path: str) -> Tuple: From d7882ca96175146c3e81424189f64f8cd4c4e8f4 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 11:21:36 +0100 Subject: [PATCH 22/35] Ran formatting pre-commit hook --- test/datasets_utils.py | 16 ++--- test/test_datasets.py | 76 +++++++++++++----------- torchvision/datasets/_stereo_matching.py | 2 +- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index f051e325968..9afd8f741fd 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -561,9 +561,11 @@ def test_feature_types(self, config): @test_all_configs def test_num_examples(self, config): with self.create_dataset(config) as (dataset, info): - assert len(dataset) == info["num_examples"], f"The number of examples {len(dataset)} does not match the expected {info['num_examples']}" + assert ( + len(dataset) == info["num_examples"] + ), f"The number of examples {len(dataset)} does not match the expected {info['num_examples']}" - @ test_all_configs + @test_all_configs def test_transforms(self, config): mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args) for kwarg in self._TRANSFORM_KWARGS: @@ -587,7 +589,7 @@ class ImageDatasetTestCase(DatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, int) - @ contextlib.contextmanager + @contextlib.contextmanager def create_dataset( self, config: Optional[Dict[str, Any]] = None, @@ -610,7 +612,7 @@ def create_dataset( with self._force_load_images(): yield dataset, info - @ contextlib.contextmanager + @contextlib.contextmanager def _force_load_images(self): open = PIL.Image.open @@ -649,7 +651,7 @@ def _set_default_frames_per_clip(self, inject_fake_data): args_without_default = argspec.args[1 : (-len(argspec.defaults) if argspec.defaults else None)] frames_per_clip_last = args_without_default[-1] == "frames_per_clip" - @ functools.wraps(inject_fake_data) + @functools.wraps(inject_fake_data) def wrapper(tmpdir, config): args = inject_fake_data(tmpdir, config) if frames_per_clip_last and len(args) == len(args_without_default) - 1: @@ -748,7 +750,7 @@ def size(idx: int) -> Tuple[int, int, int]: ] -@ requires_lazy_imports("av") +@requires_lazy_imports("av") def create_video_file( root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], @@ -790,7 +792,7 @@ def create_video_file( return file -@ requires_lazy_imports("av") +@requires_lazy_imports("av") def create_video_folder( root: Union[str, pathlib.Path], name: Union[str, pathlib.Path], diff --git a/test/test_datasets.py b/test/test_datasets.py index dd3c89b9bdc..5db3be40b4f 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -10,10 +10,10 @@ import random import shutil import string -from typing import List, Callable, Tuple import unittest import xml.etree.ElementTree as ET import zipfile +from typing import List, Callable, Tuple import datasets_utils import numpy as np @@ -28,26 +28,26 @@ class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) - @ staticmethod + @staticmethod def _make_binary_file(num_elements, root, name): file_name = os.path.join(root, name) np.zeros(num_elements, dtype=np.uint8).tofile(file_name) - @ staticmethod + @staticmethod def _make_image_file(num_images, root, name, num_channels=3, height=96, width=96): STL10TestCase._make_binary_file(num_images * num_channels * height * width, root, name) - @ staticmethod + @staticmethod def _make_label_file(num_images, root, name): STL10TestCase._make_binary_file(num_images, root, name) - @ staticmethod + @staticmethod def _make_class_names_file(root, name="class_names.txt"): with open(os.path.join(root, name), "w") as fh: for cname in ("airplane", "bird"): fh.write(f"{cname}\n") - @ staticmethod + @staticmethod def _make_fold_indices_file(root): num_folds = 10 offset = 0 @@ -59,7 +59,7 @@ def _make_fold_indices_file(root): return tuple(range(1, num_folds + 1)) - @ staticmethod + @staticmethod def _make_train_files(root, num_unlabeled_images=1): num_images_in_fold = STL10TestCase._make_fold_indices_file(root) num_train_images = sum(num_images_in_fold) @@ -70,7 +70,7 @@ def _make_train_files(root, num_unlabeled_images=1): return dict(train=num_train_images, unlabeled=num_unlabeled_images) - @ staticmethod + @staticmethod def _make_test_files(root, num_images=2): STL10TestCase._make_image_file(num_images, root, "test_X.bin") STL10TestCase._make_label_file(num_images, root, "test_y.bin") @@ -888,7 +888,7 @@ def inject_fake_data(self, tmpdir, config): return num_images - @ contextlib.contextmanager + @contextlib.contextmanager def create_dataset(self, *args, **kwargs): with super().create_dataset(*args, **kwargs) as output: yield output @@ -1294,7 +1294,7 @@ def _create_archive(self, root, name, *files): return archive - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_feature_types(self, config): feature_types = self.FEATURE_TYPES self.FEATURE_TYPES = self._TRAIN_FEATURE_TYPES if config["train"] else self._TEST_FEATURE_TYPES @@ -1572,7 +1572,7 @@ def _file_name_fn(self, cls, ext, idx): def _is_valid_file_to_extensions(self, is_valid_file): return {ext for ext in self._EXTENSIONS if is_valid_file(f"foo.{ext}")} - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_is_valid_file(self, config): extensions = config.pop("extensions") # We need to explicitly pass extensions=None here or otherwise it would be filled by the value from the @@ -1582,7 +1582,7 @@ def test_is_valid_file(self, config): ) as (dataset, info): assert len(dataset) == info["num_examples"] - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1603,7 +1603,7 @@ def inject_fake_data(self, tmpdir, config): return dict(num_examples=num_examples_total, classes=classes) - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_classes(self, config): with self.create_dataset(config) as (dataset, info): assert len(dataset.classes) == len(info["classes"]) @@ -1702,32 +1702,32 @@ class Places365TestCase(datasets_utils.ImageDatasetTestCase): *((f"{category}/Places365_train_00000001.png", idx) for category, idx in _CATEGORIES_CONTENT), ) - @ staticmethod + @staticmethod def _make_txt(root, name, seq): file = os.path.join(root, name) with open(file, "w") as fh: for text, idx in seq: fh.write(f"{text} {idx}\n") - @ staticmethod + @staticmethod def _make_categories_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._CATEGORIES_CONTENT) - @ staticmethod + @staticmethod def _make_file_list_txt(root, name): Places365TestCase._make_txt(root, name, Places365TestCase._FILE_LIST_CONTENT) - @ staticmethod + @staticmethod def _make_image(file_name, size): os.makedirs(os.path.dirname(file_name), exist_ok=True) PIL.Image.fromarray(np.zeros((*size, 3), dtype=np.uint8)).save(file_name) - @ staticmethod + @staticmethod def _make_devkit_archive(root, split): Places365TestCase._make_categories_txt(root, Places365TestCase._CATEGORIES) Places365TestCase._make_file_list_txt(root, Places365TestCase._FILE_LISTS[split]) - @ staticmethod + @staticmethod def _make_images_archive(root, split, small): folder_name = Places365TestCase._IMAGES[(split, small)] image_size = (256, 256) if small else (512, random.randint(512, 1024)) @@ -2042,7 +2042,7 @@ def inject_fake_data(self, tmpdir, config): return num_examples[config["split"]] - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_flow(self, config): # Make sure flow always exists, and make sure there are as many flow values as (pairs of) images # Also make sure the flow is properly decoded @@ -2101,7 +2101,7 @@ def inject_fake_data(self, tmpdir, config): ) return num_examples - @ datasets_utils.test_all_configs + @datasets_utils.test_all_configs def test_flow(self, config): h, w = self.FLOW_H, self.FLOW_W expected_flow = np.arange(3 * h * w).reshape(h, w, 3).transpose(2, 0, 1) @@ -2726,7 +2726,9 @@ def inject_fake_data(self, tmpdir, config): def test_training_test_splits(self): with self.create_dataset(split="train") as (dataset, _): - assert dataset._images and len(dataset._images) == len(dataset._disparities), "Training images do not match with training disparities" + assert dataset._images and len(dataset._images) == len( + dataset._disparities + ), "Training images do not match with training disparities" for _, _, disparity, valid_mask in dataset: assert len(disparity.shape) == 3 assert len(valid_mask.shape) == 2 @@ -2813,10 +2815,10 @@ def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: scene_dir = os.path.join(root_dir, f"{scene_name}{c}") os.makedirs(scene_dir, exist_ok=True) # make normal images first - datasets_utils.create_image_file(root=scene_dir, name=f"im0.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1E.png", size=(3, 100, 100)) - datasets_utils.create_image_file(root=scene_dir, name=f"im1L.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1E.png", size=(3, 100, 100)) + datasets_utils.create_image_file(root=scene_dir, name="im1L.png", size=(3, 100, 100)) # these are going to end up being gray scale images datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) @@ -2827,7 +2829,7 @@ def inject_fake_data(self, tmpdir, config): split_scene_map = { "train": ["Adirondack", "Jadeplant", "Motorcycle", "Piano"], "additional": ["Backpack", "Bicycle1", "Cable", "Classroom1"], - "test": ["Plants", "Classroom2E", "Classroom2", "Australia"] + "test": ["Plants", "Classroom2E", "Classroom2", "Australia"], } middlebury_dir = pathlib.Path(tmpdir, "Middlebury2014") @@ -2895,7 +2897,7 @@ def test_warnings_train(self): with pytest.warns( RuntimeWarning, match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." - f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", + f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", ): with self.create_dataset(split=split, calibration=calibration): pass @@ -2905,8 +2907,7 @@ def test_warnings_test(self): split = "test" calibration = "perfect" with pytest.warns( - RuntimeWarning, - match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + RuntimeWarning, match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." ): with self.create_dataset(split=split, calibration=calibration): pass @@ -3086,13 +3087,14 @@ def test_bad_input(self): class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StereoSceneFlow ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - split=("FlyingThings3D", "Driving", "Monkaa"), - pass_name=("clean", "final") + split=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final") ) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod - def _create_pfm_folder(root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int]) -> List[str]: + def _create_pfm_folder( + root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int] + ) -> List[str]: root = pathlib.Path(root) / name os.makedirs(root, exist_ok=True) @@ -3193,8 +3195,12 @@ def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> Lis paths.append(datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))) paths.append(datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))) # single channel depth maps - paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1]))) - paths.append(StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1]))) + paths.append( + StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) + ) + paths.append( + StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) + ) # camera settings json. Minimal example for _read_disparity function testing settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} with open(root / "_camera_settings.json", "w") as f: diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 254d9d2624a..8ef5f3e6e1a 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -33,7 +33,7 @@ def read_pfm_file(file_path: str) -> np.array: # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py with open(file_path, "rb") as file: header = file.readline().rstrip() - if not header in [b"PF", b"Pf"]: + if header not in [b"PF", b"Pf"]: raise ValueError(f"Not a valid PFM file: {file_path}") dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) From 5f291c292ca2611c21e6b5ac2d12b79f51e3cab4 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 16:12:41 +0100 Subject: [PATCH 23/35] Added reusable _pfm_read. Addressed CI issues. --- torchvision/datasets/_stereo_matching.py | 95 +++++++++--------------- 1 file changed, 34 insertions(+), 61 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 8ef5f3e6e1a..a8797d7d5c1 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -1,7 +1,7 @@ +import functools import json import os import random -import re import shutil import warnings from abc import ABC, abstractmethod @@ -11,9 +11,8 @@ import numpy as np from PIL import Image -from torch import Tensor -from .utils import download_and_extract_archive, verify_str_arg +from .utils import download_and_extract_archive, verify_str_arg, _read_pfm from .vision import VisionDataset __all__ = ( @@ -28,35 +27,7 @@ "InStereo2k" ) - -def read_pfm_file(file_path: str) -> np.array: - # adapted from https://github.com/ucbdrive/hd3/blob/master/utils/pfm.py - with open(file_path, "rb") as file: - header = file.readline().rstrip() - if header not in [b"PF", b"Pf"]: - raise ValueError(f"Not a valid PFM file: {file_path}") - - dim_match = re.match(rb"^(\d+)\s(\d+)\s$", file.readline()) - if not dim_match: - raise ValueError(f"Malformed PFM header: {file_path}") - - width, height = map(int, dim_match.groups()) - channels = 3 if header == b"PF" else 1 - scale = float(file.readline().rstrip()) - # check for endian type - if scale < 0: - scale = -scale - endian = "<" - else: - endian = ">" - - data = np.fromfile(file, endian + "f") - data = np.reshape(data, (height, width, channels)) - data = np.flipud(data) - - # PFM files for disparity maps should contain only a single channel - # they should also be returned in (C, H, W) format - return np.transpose(data[:, :, :1], (2, 0, 1)) +_read_pfm_file = functools.partial(_read_pfm, slice_channels=1) class StereoMatchingDataset(ABC, VisionDataset): @@ -103,7 +74,7 @@ def _read_disparity(self, file_path: str) -> Tuple: # function that returns a disparity map and an occlusion map pass - def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + def __getitem__(self, index: int) -> Tuple: """Return example at given index. Args: @@ -111,10 +82,10 @@ def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` where ``valid_mask`` - is a numpy boolean mask of shape (H, W) - indicating which disparity values are valid. The disparity is a numpy array of - shape (1, H, W) and the images are PIL images. ``disparity`` and ``valid_mask`` are None for - datasets on which for ``split="test"`` the authors did not provide annotations. + is a numpy boolean mask of shape (H, W) + indicating which disparity values are valid. The disparity is a numpy array of + shape (1, H, W) and the images are PIL images. ``disparity`` and ``valid_mask`` are None for + datasets on which for ``split="test"`` the authors did not provide annotations. """ img_left = self._read_img(self._images[index][0]) img_right = self._read_img(self._images[index][1]) @@ -180,7 +151,7 @@ class CREStereo(StereoMatchingDataset): Args: root (str): Root directory of the dataset. split (str): The split of the dataset to use. One of ``"tree"``, ``"shapenet"``, ``"reflective"``, ``"hole"`` - or ``"all"``. The ``"all"`` split contains all of the above splits. + or ``"all"``. The ``"all"`` split contains all of the above splits. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (bool, optional): If true, downloads the dataset from the internet and puts it in the root directory. max_disparity (int, optional): Maximum disparity value. Used to compute the valid mask. @@ -208,10 +179,10 @@ def __init__( available_space = statvfs.f_frsize * statvfs.f_bavail if available_space - self.DOWNLOAD_SPACE < 0: raise ValueError( - f"The storage device for {root} is too small to download the dataset), " - f"an additional {self.DOWNLOAD_SPACE - self.available_space:.2f} GB are required." + f"The storage device for {str(root)} is too small to download the dataset), " + f"an additional {self.DOWNLOAD_SPACE - available_space:.2f} GB are required." ) - self._download_dataset(root) + self._download_dataset(str(root)) verify_str_arg(split, "split", valid_values=("tree", "shapenet", "reflective", "hole", "all")) @@ -260,7 +231,7 @@ def _download_dataset(self, root: str) -> None: if not os.path.exists(d_path): os.makedirs(d_path) - for i in range(self.EXPERIMENTAL_RANGE): + for i in range(10): url = f"https://data.megengine.org.cn/research/crestereo/dataset/{d}/{i}.tar" download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) @@ -317,7 +288,7 @@ class StereoMiddlebury2014(StereoMatchingDataset): root (string): Root directory of the Middleburry 2014 Dataset. split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional" use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. - The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. + The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. calibration (string, optional): Wether or not to use the calibrated (default) or uncalibrated scenes. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (boolean, optional): Wether or not to download the dataset in the ``root`` directory. @@ -380,10 +351,11 @@ def __init__( download: bool = False, ): super().__init__(root, transforms) + verify_str_arg(split, "split", valid_values=("train", "test", "additional")) if calibration: - verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both", None)) + verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both")) # type: ignore if split == "test": calibration = None warnings.warn( @@ -445,7 +417,7 @@ def __init__( self.use_ambient_views = use_ambient_views - def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: @@ -463,7 +435,7 @@ def _read_img(self, file_path: str) -> Image.Image: def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): # case when dealing with the test split return None, None - disparity_map = read_pfm_file(file_path) + disparity_map = _read_pfm_file(file_path) valid_mask = disparity_map < 1e3 # remove the channel dimension from the valid mask valid_mask = valid_mask[0, :, :] @@ -478,8 +450,8 @@ def _download_dataset(self, root: str): continue split_root = root / split_name for scene in split_scenes: - scene_name = f"{scene}-{calibration}" for calibration in ["perfect", "imperfect"]: + scene_name = f"{scene}-{calibration}" scene_url = f"{base_url}/{scene_name}.zip" download_and_extract_archive( url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True @@ -491,11 +463,11 @@ def _download_dataset(self, root: str): # the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF # we want to move the contents from testF into the directory - download_and_extract_archive(url=test_set_url, download_root=root, remove_finished=True) + download_and_extract_archive(url=test_set_url, download_root=str(root), remove_finished=True) for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): for scene in scene_names: scene_dst_dir = root / "test" / scene - scene_src_dir = scene_dir / scene + scene_src_dir = Path(scene_dir) / scene os.makedirs(scene_dst_dir, exist_ok=True) shutil.move(str(scene_src_dir), str(scene_dst_dir)) @@ -584,9 +556,9 @@ def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - disparity_map = read_pfm_file(file_path) + disparity_map = _read_pfm_file(file_path) valid_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) - valid_mask = np.array(valid_mask).astype(np.bool) + valid_mask = np.array(valid_mask).astype(np.bool_) return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple: @@ -651,7 +623,7 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -712,7 +684,7 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tuple, Tuple, Tuple]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -797,7 +769,7 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = np.logical_and(off_mask, valid_mask) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -893,12 +865,12 @@ def __init__( self._disparities += disparity_maps def _read_disparity(self, file_path: str) -> Tuple: - disparity = read_pfm_file(file_path) + disparity = _read_pfm_file(file_path) # keep valid mask with shape (H, W) - valid = np.ones(disparity.shape[1:]).astype(np.bool) + valid = np.ones(disparity.shape[1:]).astype(np.bool_) return disparity, valid - def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -992,7 +964,7 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity = disparity[None, :, :] return disparity, valid - def __getitem__(self, index: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -1037,11 +1009,12 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl imgs_left = sorted(glob(str(root / "*" / "left.png"))) imgs_right = list(p.replace("left", "right") for p in imgs_left) + if not len(imgs_left) or not len(imgs_right): raise FileNotFoundError("No images found in {}".format(root)) imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._images = imgs + self._images = imgs # type: ignore disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) disparity_maps_right = list(p.replace("right", "right_disp") for p in imgs_left) @@ -1053,11 +1026,11 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl raise FileNotFoundError("No disparity valid maps found in {}".format(root)) disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) - self._disparities = disparity_maps + self._disparities = disparity_maps # type: ignore def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) - valid = np.ones_like(disparity).astype(np.bool) + valid = np.ones_like(disparity).astype(np.bool_) # unsqueeze disparity to (C, H, W) disparity = disparity[None, :, :] return disparity, valid From af6b343a019bfd1a9b158bf214f52c9bfea5a5cc Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 17:05:08 +0100 Subject: [PATCH 24/35] Removed duplicate test code for stereo dataset testcases --- test/datasets_utils.py | 33 +++++ test/test_datasets.py | 168 ++--------------------- torchvision/datasets/_stereo_matching.py | 2 +- 3 files changed, 49 insertions(+), 154 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index 9afd8f741fd..a643c43685a 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -16,6 +16,7 @@ from collections import defaultdict from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union +import numpy as np import PIL import PIL.Image import pytest @@ -933,6 +934,38 @@ def create_random_string(length: int, *digits: str) -> str: return "".join(random.choice(digits) for _ in range(length)) +def shape_test_for_stereo_disp( + left: PIL.Image.Image, right: PIL.Image.Image, disparity: np.ndarray, valid_mask: np.ndarray +): + left_array = np.array(left) + right_array = np.array(right) + h, w, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert len(disparity.shape) == 3 + assert len(valid_mask.shape) == 2 + assert disparity.shape == (1, h, w) + # check that valid mask is the same size as the disparity + _, dh, dw = disparity.shape + mh, mw = valid_mask.shape + assert dh == mh + assert dw == mw + + +def shape_test_for_stereo_none(left: PIL.Image.Image, right: PIL.Image.Image, disparity: None, valid_mask: None): + left_array = np.array(left) + right_array = np.array(right) + _, _, c = left_array.shape + # check that left and right are the same size + assert left_array.shape == right_array.shape + # check general shapes + assert c == 3 + assert disparity is None + assert valid_mask is None + + def make_fake_pfm_file(h, w, file_name): values = list(range(3 * h * w)) # Note: we pack everything in little endian: -1.0, and "<" diff --git a/test/test_datasets.py b/test/test_datasets.py index 5db3be40b4f..8ba77244c2f 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2729,19 +2729,13 @@ def test_training_test_splits(self): assert dataset._images and len(dataset._images) == len( dataset._disparities ), "Training images do not match with training disparities" - for _, _, disparity, valid_mask in dataset: - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + for left, right, disparity, valid_mask in dataset: + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) with self.create_dataset(split="test") as (dataset, _): assert all(d == ("", "") for d in dataset._disparities) - for _, _, disparity, valid_mask in dataset: - assert disparity is None - assert valid_mask is None + for left, right, disparity, valid_mask in dataset: + datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -2776,21 +2770,7 @@ def test_splits(self): for split in ("tree", "shapenet", "reflective", "hole"): with self.create_dataset(split=split) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -2851,36 +2831,13 @@ def test_train_splits(self): for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): with self.create_dataset(split=split, calibration=calibration) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - print("disparities", disparity.shape, valid_mask.shape) - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_test_split(self): for split in ["test"]: with self.create_dataset(split=split, calibration=None) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert disparity is None - assert valid_mask is None + datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) def test_augmented_view_usage(self): with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): @@ -2963,32 +2920,13 @@ def test_train_splits(self): for split in ["train"]: with self.create_dataset(split=split) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_test_split(self): for split in ["test"]: with self.create_dataset(split=split) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - # check that left and right are the same size - assert left_array.shape == right_array.shape - assert disparity is None - assert valid_mask is None + datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3050,33 +2988,13 @@ def test_train_splits(self): for split in ["train"]: with self.create_dataset(split=split) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_test_split(self): for split in ["test"]: with self.create_dataset(split=split) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert disparity is None - assert valid_mask is None + datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3153,21 +3071,7 @@ def test_splits(self): for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3230,21 +3134,7 @@ def test_splits(self): for split_name in ["single", "mixed"]: with self.create_dataset(split=split_name) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3307,21 +3197,7 @@ def inject_fake_data(self, tmpdir, config): def test_splits(self): with self.create_dataset() as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) class InStereo2k(datasets_utils.ImageDatasetTestCase): @@ -3357,21 +3233,7 @@ def test_splits(self): for split_name in ["train", "test"]: with self.create_dataset(split=split_name) as (dataset, _): for left, right, disparity, valid_mask in dataset: - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape - # check that left and right are the same size - assert left_array.shape == right_array.shape - # check general shapes - assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 - assert disparity.shape == (1, h, w) - # check that valid mask is the same size as the disparity - _, dh, dw = disparity.shape - mh, mw = valid_mask.shape - assert dh == mh - assert dw == mw + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index a8797d7d5c1..991ec71ef53 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -445,7 +445,7 @@ def _download_dataset(self, root: str): base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" # train and additional splits have 2 different calibration settings root = Path(root) / "Middlebury2014" - for split_name, split_scenes in self.splits.values(): + for split_name, split_scenes in self.splits.items(): if split_name == "test": continue split_root = root / split_name From 67eacf201265bd9fc00d997e312288a22f7e833e Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 18:08:51 +0100 Subject: [PATCH 25/35] Removed string replaces. Moved pattern matching in parent class. --- torchvision/datasets/_stereo_matching.py | 265 +++++++++++------------ 1 file changed, 121 insertions(+), 144 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 991ec71ef53..f7ced224bf6 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -69,6 +69,30 @@ def _read_img(self, file_path: str) -> Image.Image: img = img.convert("RGB") return img + def _scan_pairs(self, left_pattern: str, right_pattern: str, fill_empty: bool = False) -> List[Tuple[str, str]]: + left_paths = sorted(glob(left_pattern)) + right_paths = sorted(glob(right_pattern)) + + # used when dealing with inexistent disparity for the right image + if fill_empty: + right_paths = list("" for _ in left_paths) + + if not left_paths: + raise FileNotFoundError(f"Could not find any files matching the patterns: {left_pattern}") + + if not right_paths: + raise FileNotFoundError(f"Could not find any files matching the patterns: {right_pattern}") + + if len(left_paths) != len(right_paths): + raise ValueError( + f"Found {len(left_paths)} left files but {len(right_paths)} right files using:\n " + f"left pattern: {left_pattern}\n" + f"right pattern: {right_pattern}\n" + ) + + images = list((left, right) for left, right in zip(left_paths, right_paths)) + return images + @abstractmethod def _read_disparity(self, file_path: str) -> Tuple: # function that returns a disparity map and an occlusion map @@ -195,26 +219,15 @@ def __init__( }[split] for s in splits: - imgs_left = sorted(glob(str(root / s / "*_left.jpg"))) - imgs_right = list(p.replace("_left", "_right") for p in imgs_left) - - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) - - imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) + left_image_pattern = str(root / s / "*_left.jpg") + right_image_pattern = str(root / s / "*_right.jpg") + imgs = self._scan_pairs(left_image_pattern, right_image_pattern) self._images += imgs - disparity_maps_left = list(p.replace("_left", "_left.disp") for p in imgs_left) - disparity_maps_right = list(p.replace("_right", "_right.disp") for p in imgs_right) - - if not any(os.path.exists(file_path) for file_path in disparity_maps_left): - raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) - - if not any(os.path.exists(file_path) for file_path in disparity_maps_right): - raise FileNotFoundError("No disparity valid maps found in {}".format(root / s)) - - disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) - self._disparities += disparity_maps + left_disparity_pattern = str(root / s / "*_left.disp.jpg") + right_disparity_pattern = str(root / s / "*_right.disp.jpg") + disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) + self._disparities += disparities def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) @@ -397,23 +410,16 @@ def __init__( for calibration_suffix in calibrartion_suffixes: scene_pattern = "*" + calibration_suffix - - imgs_left = sorted(glob(str(root / split / scene_pattern / "im0.png"))) - imgs_right = sorted(glob(str(root / split / scene_pattern / "im1.png"))) - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) - - self._images += list((left, right) for left, right in zip(imgs_left, imgs_right)) + left_img_pattern = str(root / split / scene_pattern / "im0.png") + right_img_pattern = str(root / split / scene_pattern / "im1.png") + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) if split == "test": - disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + self._disparities += list(("", "") for _ in self._images) else: - disparity_maps_left = sorted(glob(str(root / split / "*" / "disp0.pfm"))) - disparity_maps_right = sorted(glob(str(root / split / "*" / "disp1.pfm"))) - if not len(disparity_maps_left) or not len(disparity_maps_right): - raise FileNotFoundError("No disparity maps found in {}".format(root / split)) - - self._disparities += list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) + left_dispartity_pattern = str(root / split / "*" / "disp0.pfm") + right_dispartity_pattern = str(root / split / "*" / "disp1.pfm") + self._disparities += self._scan_pairs(left_dispartity_pattern, right_dispartity_pattern) self.use_ambient_views = use_ambient_views @@ -424,7 +430,8 @@ def _read_img(self, file_path: str) -> Image.Image: """Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.""" if os.path.basename(file_path) == "im1.png" and self.use_ambient_views: # initialize sampleable container - ambient_file_paths = list(file_path.replace("im1.png", view_name) for view_name in ["im1E.png", "im1L.png"]) + base_path = os.path.basename(file_path)[0] + ambient_file_paths = list(os.path.join(base_path, view_name) for view_name in ["im1E.png", "im1L.png"]) # double check that we're not going to try to read from an invalid file path ambient_file_paths = list(filter(lambda p: os.path.exists(p), ambient_file_paths)) # keep the original image as an option as well for uniform sampling between base views @@ -454,7 +461,7 @@ def _download_dataset(self, root: str): scene_name = f"{scene}-{calibration}" scene_url = f"{base_url}/{scene_name}.zip" download_and_extract_archive( - url=scene_url, filename=scene_name, download_root=str(split_root), remove_finished=True + url=scene_url, filename=f"{scene_name}.zip", download_root=str(split_root), remove_finished=True ) if any(s not in os.listdir(root) for s in self.splits["test"]): @@ -536,28 +543,23 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl img_dir = "two_view_training" if split == "train" else "two_view_test" anot_dir = "two_view_training_gt" - imgs_left = sorted(glob(str(root / img_dir / "*" / "*im0.png"))) - imgs_right = sorted(glob(str(root / img_dir / "*" / "*im1.png"))) - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) + left_img_pattern = str(root / img_dir / "*" / "im0.png") + right_img_pattern = str(root / img_dir / "*" / "im1.png") + self._images = self._scan_pairs(left_img_pattern, right_img_pattern) if split == "test": - disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) + self._disparities = list(("", "") for _ in self._images) else: - disparity_maps_left = sorted(glob(str(root / anot_dir / "*" / "*0GT.pfm"))) - disparity_maps_right = list("" for _ in disparity_maps_left) - if not len(disparity_maps_left): - raise FileNotFoundError("No disparity maps found in {}".format(root / anot_dir)) - - self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) + disparity_pattern = str(root / anot_dir / "*" / "disp0GT.pfm") + self._disparities = self._scan_pairs(disparity_pattern, "", fill_empty=True) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None disparity_map = _read_pfm_file(file_path) - valid_mask = Image.open(file_path.replace("disp0GT.pfm", "mask0nocc.png")) + mask_path = os.path.join(os.path.split(file_path)[0], "mask0nocc.png") + valid_mask = Image.open(mask_path) valid_mask = np.array(valid_mask).astype(np.bool_) return disparity_map, valid_mask @@ -595,23 +597,16 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl verify_str_arg(split, "split", valid_values=("train", "test")) root = Path(root) / "Kitti2012" / (split + "ing") - imgs_left = sorted(glob(str(root / "colored_0" / "*_10.png"))) - imgs_right = sorted(glob(str(root / "colored_1" / "*_10.png"))) - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) + left_img_pattern = str(root / "colored_0" / "*_10.png") + right_img_pattern = str(root / "colored_1" / "*_10.png") + self._images = self._scan_pairs(left_img_pattern, right_img_pattern) if split == "train": - disparity_maps_left = sorted(glob(str(root / "disp_noc" / "*.png"))) - disparity_maps_right = list("" for _ in disparity_maps_left) - if not len(disparity_maps_left): - raise FileNotFoundError("No disparity maps found in {}".format(root)) - + disparity_pattern = str(root / "disp_noc" / "*.png") + self._disparities = self._scan_pairs(disparity_pattern, "", fill_empty=True) else: - disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - - self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) + self._disparities = list(("", "") for _ in self._images) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -636,12 +631,30 @@ class StereoKitti2015(StereoMatchingDataset): Kitti2015 testing image_2 + img1.png + img2.png + ... image_3 + img1.png + img2.png + ... training image_2 + img1.png + img2.png + ... image_3 + img1.png + img2.png + ... disp_occ_0 + img1.png + img2.png + ... disp_occ_1 + img1.png + img2.png + ... calib Args: @@ -656,23 +669,16 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl verify_str_arg(split, "split", valid_values=("train", "test")) root = Path(root) / "Kitti2015" / (split + "ing") - imgs_left = sorted(glob(str(root / "image_2" / "*_10.png"))) - imgs_right = sorted(glob(str(root / "image_3" / "*_10.png"))) - - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) + left_img_pattern = str(root / "image_2" / "*.png") + right_img_pattern = str(root / "image_3" / "*.png") + self._images = self._scan_pairs(left_img_pattern, right_img_pattern) if split == "train": - disparity_maps_left = sorted(glob(str(root / "disp_occ_0" / "*.png"))) - disparity_maps_right = sorted(glob(str(root / "disp_occ_1" / "*.png"))) - if not len(disparity_maps_left) or not len(disparity_maps_right): - raise FileNotFoundError("No disparity maps found in {}".format(root)) - + left_disparity_pattern = str(root / "disp_occ_0" / "*.png") + right_disparity_pattern = str(root / "disp_occ_1" / "*.png") + self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) else: - disparity_maps_left, disparity_maps_right = list("" for _ in imgs_left), list("" for _ in imgs_right) - - self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._disparities = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) + self._disparities = list(("", "") for _ in self._images) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -737,19 +743,23 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): root = Path(root) / "Sintel" - imgs_left = sorted(glob(str(root / "training" / "final_left" / "*" / "*.png"))) - imgs_right = sorted(glob(str(root / "training" / "final_right" / "*" / "*.png"))) - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) - - dps_masks_left = sorted(glob(str(root / "training" / "disparities" / "*" / "*.png"))) - if not len(dps_masks_left): - raise FileNotFoundError("No disparity maps found in {}".format(root)) - - disparity_maps_right = list("" for _ in dps_masks_left) - - self._images = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._disparities = list((left, right) for left, right in zip(dps_masks_left, disparity_maps_right)) + left_img_pattern = str(root / "training" / "final_left" / "*" / "*.png") + right_img_pattern = str(root / "training" / "final_right" / "*" / "*.png") + self._images = self._scan_pairs(left_img_pattern, right_img_pattern) + + disparity_pattern = str(root / "training" / "disparities" / "*" / "*.png") + self._disparities = self._scan_pairs(disparity_pattern, "", fill_empty=True) + + def _get_oclussion_mask_paths(self, file_path: str) -> List[str]: + path_tokens = file_path.split(os.sep) + for idx in range(len(path_tokens) - 1): + if path_tokens[idx] == "training" and path_tokens[idx + 1] == "disparities": + pre_tokens = path_tokens[: idx + 1] + post_tokens = path_tokens[idx + 2 :] + return ( + "/".join(pre_tokens + ["occlusions"] + post_tokens), + "/".join(pre_tokens + ["outofframe"] + post_tokens), + ) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -761,10 +771,12 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = r * 4 + g / (2 ** 6) + b / (2 ** 14) # reshape into (C, H, W) format disparity_map = np.transpose(disparity_map, (2, 0, 1)) - # occlusion mask - valid_mask = np.array(Image.open(file_path.replace("disparities", "occlusions"))) == 0 - # out of frame mask - off_mask = np.array(Image.open(file_path.replace("disparities", "outofframe"))) == 0 + # find the appropiate file paths + occlued_mask_path, out_of_frame_mask_path = self._get_oclussion_mask_paths(file_path) + # occlusion masks + valid_mask = np.array(Image.open(occlued_mask_path)) == 0 + # out of frame masks + off_mask = np.array(Image.open(out_of_frame_mask_path)) == 0 # combine the masks together valid_mask = np.logical_and(off_mask, valid_mask) return disparity_map, valid_mask @@ -842,27 +854,13 @@ def __init__( root = root / split for p in passes: - imgs_left = sorted(glob(str(root / p / "*" / "left" / "*.png"))) - imgs_right = sorted(glob(str(root / p / "*" / "right" / "*.png"))) - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root / p)) - - imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._images += imgs - - disparity_maps_left = [file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_left] - disparity_maps_right = [ - file_path.replace(p, "disparity").replace(".png", ".pfm") for file_path in imgs_right - ] + left_img_pattern = str(root / p / "*" / "left" / "*.png") + right_img_pattern = str(root / p / "*" / "right" / "*.png") + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) - if not any(os.path.exists(file_path) for file_path in disparity_maps_left): - raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) - - if not any(os.path.exists(file_path) for file_path in disparity_maps_right): - raise FileNotFoundError("No disparity valid maps found in {}".format(root / "disparity")) - - disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) - self._disparities += disparity_maps + left_disparity_pattern = str(root / "disparity" / "*" / "left" / "*.pfm") + right_disparity_pattern = str(root / "disparity" / "*" / "right" / "*.pfm") + self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern) def _read_disparity(self, file_path: str) -> Tuple: disparity = _read_pfm_file(file_path) @@ -933,21 +931,13 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab }[split] for s in splits: - imgs_left = sorted(glob(str(root / s / "*" / "*.left.jpg"))) - imgs_right = sorted(glob(str(root / s / "*" / "*.right.jpg"))) - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) - - imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._images += imgs - - disparity_maps_left = sorted(glob(str(root / s / "*" / "*.left.depth.png"))) - disparity_maps_right = sorted(glob(str(root / s / "*" / "*.right.depth.png"))) - if not len(disparity_maps_left) or not len(disparity_maps_right): - raise FileNotFoundError("No disparity maps found in {}".format(root)) + left_img_pattern = str(root / s / "*" / "*.left.jpg") + right_img_pattern = str(root / s / "*" / "*.right.jpg") + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) - disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) - self._disparities += disparity_maps + left_disparity_pattern = str(root / s / "*" / "*.left.depth.png") + right_disparity_pattern = str(root / s / "*" / "*.right.depth.png") + self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern) def _read_disparity(self, file_path: str) -> Tuple: # (H, W) image @@ -1007,26 +997,13 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl verify_str_arg(split, "split", valid_values=("train", "test")) - imgs_left = sorted(glob(str(root / "*" / "left.png"))) - imgs_right = list(p.replace("left", "right") for p in imgs_left) - - if not len(imgs_left) or not len(imgs_right): - raise FileNotFoundError("No images found in {}".format(root)) - - imgs = list((left, right) for left, right in zip(imgs_left, imgs_right)) - self._images = imgs # type: ignore - - disparity_maps_left = list(p.replace("left", "left_disp") for p in imgs_left) - disparity_maps_right = list(p.replace("right", "right_disp") for p in imgs_left) - - if not any(os.path.exists(file_path) for file_path in disparity_maps_left): - raise FileNotFoundError("No disparity valid maps found in {}".format(root)) - - if not any(os.path.exists(file_path) for file_path in disparity_maps_right): - raise FileNotFoundError("No disparity valid maps found in {}".format(root)) + left_img_pattern = str(root / "*" / "left.png") + right_img_pattern = str(root / "*" / "right.png") + self._images = self._scan_pairs(left_img_pattern, right_img_pattern) - disparity_maps = list((left, right) for left, right in zip(disparity_maps_left, disparity_maps_right)) - self._disparities = disparity_maps # type: ignore + left_disparity_pattern = str(root / "*" / "left_disp.png") + right_disparity_pattern = str(root / "*" / "right_disp.png") + self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) def _read_disparity(self, file_path: str) -> Tuple: disparity = np.array(Image.open(file_path), dtype=np.float32) From 07e00676f29de1f08d36466c08a3fc98271e80e1 Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 18:38:00 +0100 Subject: [PATCH 26/35] Addressed doc comments --- torchvision/datasets/_stereo_matching.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index f7ced224bf6..053195459d5 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -44,7 +44,7 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W) valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W) - In some cases, when a dataset does not provide disparties, the ``disparities`` and + In some cases, when a dataset does not provide disparities, the ``disparities`` and ``valid_masks`` can be Tuples containing None values. For training splits generally the datasets provide a minimal guarantee of @@ -427,10 +427,14 @@ def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: - """Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.""" + """ + Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True. + When ``use_ambient_views`` is True, the dataset will return at random one of ``[im1.png, im1E.png, im1L.png]`` + as the right image. + """ if os.path.basename(file_path) == "im1.png" and self.use_ambient_views: # initialize sampleable container - base_path = os.path.basename(file_path)[0] + base_path = os.path.dirname(file_path) ambient_file_paths = list(os.path.join(base_path, view_name) for view_name in ["im1E.png", "im1L.png"]) # double check that we're not going to try to read from an invalid file path ambient_file_paths = list(filter(lambda p: os.path.exists(p), ambient_file_paths)) @@ -765,7 +769,7 @@ def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - # disparity decoding as per Sintel instructions + # disparity decoding as per Sintel instructions in the README provided with the dataset disparity_map = np.array(Image.open(file_path), dtype=np.float32) r, g, b = np.split(disparity_map, 3, axis=-1) disparity_map = r * 4 + g / (2 ** 6) + b / (2 ** 14) @@ -945,10 +949,11 @@ def _read_disparity(self, file_path: str) -> Tuple: # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt # in order to extract disparity from depth maps with open(os.path.split(file_path)[0] + "/_camera_settings.json", "r") as f: + # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constatnt) intrinsics = json.load(f) - fx = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"] - # inverse of depth-from-disparity equation - disparity = (fx * 6.0 * 100) / depth.astype(np.float32) + focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"] + baseline, pixel_constant = 6.0, 100.0 # pixel constant is inverted + disparity = (baseline * focal * pixel_constant) / depth.astype(np.float32) valid = disparity > 0 # unsqueeze disparity to (C, H, W) disparity = disparity[None, :, :] From ec550e84238f6e7d993469352fb0294d3dfdcedd Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 19:50:47 +0100 Subject: [PATCH 27/35] Middlebury disparity quickfix --- torchvision/datasets/_stereo_matching.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 053195459d5..6a4d2e48999 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -417,11 +417,12 @@ def __init__( if split == "test": self._disparities += list(("", "") for _ in self._images) else: - left_dispartity_pattern = str(root / split / "*" / "disp0.pfm") - right_dispartity_pattern = str(root / split / "*" / "disp1.pfm") + left_dispartity_pattern = str(root / split / scene_pattern / "disp0.pfm") + right_dispartity_pattern = str(root / split / scene_pattern / "disp1.pfm") self._disparities += self._scan_pairs(left_dispartity_pattern, right_dispartity_pattern) self.use_ambient_views = use_ambient_views + print(self._disparities[0], self._images[0]) def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) From 1dd17538d98f960bcdfa35cab7fd51efbc73cbbf Mon Sep 17 00:00:00 2001 From: Ponku Date: Thu, 14 Jul 2022 20:44:43 +0100 Subject: [PATCH 28/35] Fixed mypy errors. Addressed download checks. --- torchvision/datasets/_stereo_matching.py | 72 ++++++++++++++++-------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 6a4d2e48999..d40616cb835 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -60,8 +60,8 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): super().__init__(root=root) self.transforms = transforms - self._images: List[Tuple] = [] - self._disparities: List[Tuple] = [] + self._images: List[Tuple[str, str]] = [] + self._disparities: List[Tuple[str, str]] = [] def _read_img(self, file_path: str) -> Image.Image: img = Image.open(file_path) @@ -69,25 +69,27 @@ def _read_img(self, file_path: str) -> Image.Image: img = img.convert("RGB") return img - def _scan_pairs(self, left_pattern: str, right_pattern: str, fill_empty: bool = False) -> List[Tuple[str, str]]: - left_paths = sorted(glob(left_pattern)) - right_paths = sorted(glob(right_pattern)) + def _scan_pairs( + self, paths_left_pattern: str, paths_right_pattern: str, fill_empty: bool = False + ) -> List[Tuple[str, str]]: + left_paths: List[str] = sorted(glob(paths_left_pattern)) + right_paths: List[str] = sorted(glob(paths_right_pattern)) # used when dealing with inexistent disparity for the right image if fill_empty: right_paths = list("" for _ in left_paths) if not left_paths: - raise FileNotFoundError(f"Could not find any files matching the patterns: {left_pattern}") + raise FileNotFoundError(f"Could not find any files matching the patterns: {paths_left_pattern}") if not right_paths: - raise FileNotFoundError(f"Could not find any files matching the patterns: {right_pattern}") + raise FileNotFoundError(f"Could not find any files matching the patterns: {paths_right_pattern}") if len(left_paths) != len(right_paths): raise ValueError( f"Found {len(left_paths)} left files but {len(right_paths)} right files using:\n " - f"left pattern: {left_pattern}\n" - f"right pattern: {right_pattern}\n" + f"left pattern: {paths_left_pattern}\n" + f"right pattern: {paths_right_pattern}\n" ) images = list((left, right) for left, right in zip(left_paths, right_paths)) @@ -387,6 +389,7 @@ def __init__( self._download_dataset(root) root = Path(root) / "Middlebury2014" + self.split = split if not os.path.exists(root / split): raise FileNotFoundError(f"The {split} directory was not found in the provided root directory") @@ -457,7 +460,9 @@ def _download_dataset(self, root: str): base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" # train and additional splits have 2 different calibration settings root = Path(root) / "Middlebury2014" - for split_name, split_scenes in self.splits.items(): + download_split = self.split + + for split_name, split_scenes in (download_split, self.splits[download_split]): if split_name == "test": continue split_root = root / split_name @@ -465,11 +470,16 @@ def _download_dataset(self, root: str): for calibration in ["perfect", "imperfect"]: scene_name = f"{scene}-{calibration}" scene_url = f"{base_url}/{scene_name}.zip" - download_and_extract_archive( - url=scene_url, filename=f"{scene_name}.zip", download_root=str(split_root), remove_finished=True - ) - - if any(s not in os.listdir(root) for s in self.splits["test"]): + # download the scene only if it doesn't exist + if not os.path.exists(split_root / scene_name): + download_and_extract_archive( + url=scene_url, + filename=f"{scene_name}.zip", + download_root=str(split_root), + remove_finished=True, + ) + + if any(s not in os.listdir(root / "test") for s in self.splits["test"]): # test split is downloaded from a different location test_set_url = "https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip" @@ -550,13 +560,13 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl left_img_pattern = str(root / img_dir / "*" / "im0.png") right_img_pattern = str(root / img_dir / "*" / "im1.png") - self._images = self._scan_pairs(left_img_pattern, right_img_pattern) + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) if split == "test": self._disparities = list(("", "") for _ in self._images) else: disparity_pattern = str(root / anot_dir / "*" / "disp0GT.pfm") - self._disparities = self._scan_pairs(disparity_pattern, "", fill_empty=True) + self._disparities += self._scan_pairs(disparity_pattern, "", fill_empty=True) def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): @@ -605,11 +615,11 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl left_img_pattern = str(root / "colored_0" / "*_10.png") right_img_pattern = str(root / "colored_1" / "*_10.png") - self._images = self._scan_pairs(left_img_pattern, right_img_pattern) + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) if split == "train": disparity_pattern = str(root / "disp_noc" / "*.png") - self._disparities = self._scan_pairs(disparity_pattern, "", fill_empty=True) + self._disparities += self._scan_pairs(disparity_pattern, "", fill_empty=True) else: self._disparities = list(("", "") for _ in self._images) @@ -676,12 +686,12 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl root = Path(root) / "Kitti2015" / (split + "ing") left_img_pattern = str(root / "image_2" / "*.png") right_img_pattern = str(root / "image_3" / "*.png") - self._images = self._scan_pairs(left_img_pattern, right_img_pattern) + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) if split == "train": left_disparity_pattern = str(root / "disp_occ_0" / "*.png") right_disparity_pattern = str(root / "disp_occ_1" / "*.png") - self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) + self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern) else: self._disparities = list(("", "") for _ in self._images) @@ -750,21 +760,33 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): left_img_pattern = str(root / "training" / "final_left" / "*" / "*.png") right_img_pattern = str(root / "training" / "final_right" / "*" / "*.png") - self._images = self._scan_pairs(left_img_pattern, right_img_pattern) + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) disparity_pattern = str(root / "training" / "disparities" / "*" / "*.png") - self._disparities = self._scan_pairs(disparity_pattern, "", fill_empty=True) + self._disparities += self._scan_pairs(disparity_pattern, "", fill_empty=True) - def _get_oclussion_mask_paths(self, file_path: str) -> List[str]: + def _get_oclussion_mask_paths(self, file_path: str) -> Tuple[str, str]: path_tokens = file_path.split(os.sep) + rets = None + for idx in range(len(path_tokens) - 1): if path_tokens[idx] == "training" and path_tokens[idx + 1] == "disparities": pre_tokens = path_tokens[: idx + 1] post_tokens = path_tokens[idx + 2 :] - return ( + rets = ( "/".join(pre_tokens + ["occlusions"] + post_tokens), "/".join(pre_tokens + ["outofframe"] + post_tokens), ) + break + + if rets is None: + raise ValueError("Malformed file path: {}".format(file_path)) + + for path in rets: + if not os.path.exists(path): + raise ValueError(f"Could not find file {path}") + + return rets def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): From 9f70687e055a30cd91439f99c12503202c9f0c2e Mon Sep 17 00:00:00 2001 From: Ponku Date: Fri, 15 Jul 2022 12:30:07 +0100 Subject: [PATCH 29/35] Dataset renaming. Test changes. getitem removed. Warnings removed. Middlebury per split download. --- test/datasets_utils.py | 22 ++-- test/test_datasets.py | 80 ++++++------ torchvision/datasets/__init__.py | 14 +-- torchvision/datasets/_stereo_matching.py | 150 +++++++++-------------- 4 files changed, 114 insertions(+), 152 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index a643c43685a..b0c31c71116 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -24,7 +24,7 @@ import torchvision.datasets import torchvision.io from common_utils import get_tmp_dir, disable_console_output - +from torchvision.transforms.functional import get_dimensions __all__ = [ "UsageError", @@ -937,15 +937,15 @@ def create_random_string(length: int, *digits: str) -> str: def shape_test_for_stereo_disp( left: PIL.Image.Image, right: PIL.Image.Image, disparity: np.ndarray, valid_mask: np.ndarray ): - left_array = np.array(left) - right_array = np.array(right) - h, w, c = left_array.shape + left_dims = get_dimensions(left) + right_dims = get_dimensions(right) + c, h, w = left_dims # check that left and right are the same size - assert left_array.shape == right_array.shape + assert left_dims == right_dims # check general shapes assert c == 3 - assert len(disparity.shape) == 3 - assert len(valid_mask.shape) == 2 + assert disparity.ndim == 3 + assert valid_mask.ndim == 2 assert disparity.shape == (1, h, w) # check that valid mask is the same size as the disparity _, dh, dw = disparity.shape @@ -955,11 +955,11 @@ def shape_test_for_stereo_disp( def shape_test_for_stereo_none(left: PIL.Image.Image, right: PIL.Image.Image, disparity: None, valid_mask: None): - left_array = np.array(left) - right_array = np.array(right) - _, _, c = left_array.shape + left_dims = get_dimensions(left) + right_dims = get_dimensions(right) + c, _, _ = left_dims # check that left and right are the same size - assert left_array.shape == right_array.shape + assert left_dims == right_dims # check general shapes assert c == 3 assert disparity is None diff --git a/test/test_datasets.py b/test/test_datasets.py index 8ba77244c2f..77f3ee4e019 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2672,8 +2672,8 @@ def inject_fake_data(self, tmpdir: str, config): return len(sampled_classes) * num_images_per_class[config["split"]] -class StereoETH3DTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoETH3D +class ETH3DTStereoestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ETH3DStereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @@ -2745,41 +2745,37 @@ def test_bad_input(self): class CREStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.CREStereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("tree", "shapenet", "reflective", "hole")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" os.makedirs(crestereo_dir, exist_ok=True) - split_dir = crestereo_dir / config["split"] - os.makedirs(split_dir, exist_ok=True) + examples = {"tree": 2, "shapenet": 3, "reflective": 6, "hole": 5} - num_examples = {"tree": 2, "shapenet": 3, "reflective": 6, "hole": 5}.get(config["split"], 0) + for category_name in ["shapenet", "reflective", "tree", "hole"]: + split_dir = crestereo_dir / category_name + os.makedirs(split_dir, exist_ok=True) + num_examples = examples[category_name] - for idx in range(num_examples): - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) - # these are going to end up being gray scale images - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.jpg", size=(1, 100, 100)) - datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.jpg", size=(1, 100, 100)) + for idx in range(num_examples): + p = datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) + print(p) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) + # these are going to end up being gray scale images + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.png", size=(1, 100, 100)) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.disp.png", size=(1, 100, 100)) - return num_examples + return sum(examples.values()) def test_splits(self): - for split in ("tree", "shapenet", "reflective", "hole"): - with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) - - def test_bad_input(self): - with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): - with self.create_dataset(split="bad"): - pass + with self.create_dataset() as (dataset, _): + for left, right, disparity, valid_mask in dataset: + datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) -class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoMiddlebury2014 +class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Middlebury2014Stereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( split=("train", "additional"), calibration=("perfect", "imperfect", "both"), @@ -2789,7 +2785,7 @@ class StereoMiddlebury2014TestCase(datasets_utils.ImageDatasetTestCase): @staticmethod def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: - calibrations = [""] if split == "test" else ["-perfect", "-imperfect"] + calibrations = [None] if split == "test" else ["-perfect", "-imperfect"] scene_dirs = [] for c in calibrations: scene_dir = os.path.join(root_dir, f"{scene_name}{c}") @@ -2851,9 +2847,9 @@ def test_warnings_train(self): # train set invalid split = "train" calibration = None - with pytest.warns( - RuntimeWarning, - match=f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + with pytest.raises( + ValueError, + match=f"Split '{split}' has calibration settings, however None was provided as an argument." f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", ): with self.create_dataset(split=split, calibration=calibration): @@ -2863,8 +2859,8 @@ def test_warnings_test(self): # test set invalid split = "test" calibration = "perfect" - with pytest.warns( - RuntimeWarning, match="\nSplit 'test' has only no calibration settings, ignoring calibration argument." + with pytest.raises( + ValueError, match="Split 'test' has only no calibration settings, please set `calibration=None`." ): with self.create_dataset(split=split, calibration=calibration): pass @@ -2875,8 +2871,8 @@ def test_bad_input(self): pass -class StereoKitti2012TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoKitti2012 +class Kitti2012StereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Kitti2012Stereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @@ -2934,8 +2930,8 @@ def test_bad_input(self): pass -class StereoKitti2015TestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoKitti2015 +class Kitti2015StereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Kitti2015Stereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @@ -3002,8 +2998,8 @@ def test_bad_input(self): pass -class StereoSceneFlowTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoSceneFlow +class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SceneFlowStereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( split=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final") ) @@ -3079,8 +3075,8 @@ def test_bad_input(self): pass -class StereoFallingThingsTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoFallingThings +class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FallingThingsStereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @@ -3100,10 +3096,10 @@ def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> Lis paths.append(datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))) # single channel depth maps paths.append( - StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) + FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) ) paths.append( - StereoFallingThingsTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) + FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) ) # camera settings json. Minimal example for _read_disparity function testing settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} @@ -3142,8 +3138,8 @@ def test_bad_input(self): pass -class StereoSintelTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StereoSintel +class SintelStereoTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SintelStereo FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 973d5ca9f7e..8e0e6f274d1 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -1,12 +1,12 @@ from ._optical_flow import KittiFlow, Sintel, FlyingChairs, FlyingThings3D, HD1K from ._stereo_matching import ( - StereoETH3D, - StereoFallingThings, - StereoKitti2012, - StereoKitti2015, - StereoMiddlebury2014, - StereoSceneFlow, - StereoSintel, + ETH3DStereo, + FallingThingsStereo, + Kitti2012Stereo, + Kitti2015Stereo, + Middlebury2014Stereo, + SceneFlowStereo, + SintelStereo, CREStereo, InStereo2k, ) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index d40616cb835..474b82adcc0 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -3,7 +3,6 @@ import os import random import shutil -import warnings from abc import ABC, abstractmethod from glob import glob from pathlib import Path @@ -17,13 +16,13 @@ __all__ = ( "CREStereo" - "StereoMiddlebury2014" - "StereoETH3D" - "StereoKitti2012" - "StereoKitti2015" - "StereoSintel" - "StereoSceneFlow" - "StereoFallingThings" + "Middlebury2014Stereo" + "ETH3DStereo" + "Kitti2012Stereo" + "Kitti2015Stereo" + "SintelStereo" + "SceneFlowStereo" + "FallingThingsStereo" "InStereo2k" ) @@ -188,7 +187,6 @@ class CREStereo(StereoMatchingDataset): def __init__( self, root: str, - split: str = "tree", transforms: Optional[Callable] = None, download: bool = False, max_disparity: float = 256.0, @@ -210,29 +208,22 @@ def __init__( ) self._download_dataset(str(root)) - verify_str_arg(split, "split", valid_values=("tree", "shapenet", "reflective", "hole", "all")) + dirs = ["shapenet", "reflective", "tree", "hole"] - splits = { - "tree": ["tree"], - "shapenet": ["shapenet"], - "reflective": ["reflective"], - "hole": ["hole"], - "all": ["hole", "shapenet", "reflective", "hole"], - }[split] - - for s in splits: + for s in dirs: left_image_pattern = str(root / s / "*_left.jpg") right_image_pattern = str(root / s / "*_right.jpg") + print(left_image_pattern, right_image_pattern) imgs = self._scan_pairs(left_image_pattern, right_image_pattern) self._images += imgs - left_disparity_pattern = str(root / s / "*_left.disp.jpg") - right_disparity_pattern = str(root / s / "*_right.disp.jpg") + left_disparity_pattern = str(root / s / "*_left.disp.png") + right_disparity_pattern = str(root / s / "*_right.disp.png") disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) self._disparities += disparities def _read_disparity(self, file_path: str) -> Tuple: - disparity = np.array(Image.open(file_path), dtype=np.float32) + disparity = np.asarray(Image.open(file_path), dtype=np.float32) valid = (disparity < self.max_disparity) & (disparity > 0.0) # unsqueeze the disparity map into (C, H, W) format disparity = disparity[None, :, :] @@ -251,7 +242,7 @@ def _download_dataset(self, root: str) -> None: download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) -class StereoMiddlebury2014(StereoMatchingDataset): +class Middlebury2014Stereo(StereoMatchingDataset): """Publicly available scenes from the Middlebury dataset `2014 version `. The dataset mostly follows the original format, without containing the ambient subdirectories. : :: @@ -368,28 +359,23 @@ def __init__( super().__init__(root, transforms) verify_str_arg(split, "split", valid_values=("train", "test", "additional")) + self.split = split if calibration: - verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both")) # type: ignore + verify_str_arg(calibration, "calibration", valid_values=("perfect", "imperfect", "both", None)) # type: ignore if split == "test": - calibration = None - warnings.warn( - "\nSplit 'test' has only no calibration settings, ignoring calibration argument.", RuntimeWarning - ) + raise ValueError("Split 'test' has only no calibration settings, please set `calibration=None`.") else: if split != "test": - calibration = "perfect" - warnings.warn( - f"\nSplit '{split}' has calibration settings, however None was provided as an argument." + raise ValueError( + f"Split '{split}' has calibration settings, however None was provided as an argument." f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.", - RuntimeWarning, ) if download: self._download_dataset(root) root = Path(root) / "Middlebury2014" - self.split = split if not os.path.exists(root / split): raise FileNotFoundError(f"The {split} directory was not found in the provided root directory") @@ -425,10 +411,6 @@ def __init__( self._disparities += self._scan_pairs(left_dispartity_pattern, right_dispartity_pattern) self.use_ambient_views = use_ambient_views - print(self._disparities[0], self._images[0]) - - def __getitem__(self, index: int) -> Tuple: - return super().__getitem__(index) def _read_img(self, file_path: str) -> Image.Image: """ @@ -460,16 +442,15 @@ def _download_dataset(self, root: str): base_url = "https://vision.middlebury.edu/stereo/data/scenes2014/zip" # train and additional splits have 2 different calibration settings root = Path(root) / "Middlebury2014" - download_split = self.split + split_name = self.split - for split_name, split_scenes in (download_split, self.splits[download_split]): - if split_name == "test": - continue - split_root = root / split_name - for scene in split_scenes: + if split_name != "test": + for split_scene in self.splits[split_name]: + split_root = root / split_name for calibration in ["perfect", "imperfect"]: - scene_name = f"{scene}-{calibration}" + scene_name = f"{split_scene}-{calibration}" scene_url = f"{base_url}/{scene_name}.zip" + print(f"Downloading {scene_url}") # download the scene only if it doesn't exist if not os.path.exists(split_root / scene_name): download_and_extract_archive( @@ -478,26 +459,26 @@ def _download_dataset(self, root: str): download_root=str(split_root), remove_finished=True, ) - - if any(s not in os.listdir(root / "test") for s in self.splits["test"]): - # test split is downloaded from a different location - test_set_url = "https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip" - - # the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF - # we want to move the contents from testF into the directory - download_and_extract_archive(url=test_set_url, download_root=str(root), remove_finished=True) - for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): - for scene in scene_names: - scene_dst_dir = root / "test" / scene - scene_src_dir = Path(scene_dir) / scene - os.makedirs(scene_dst_dir, exist_ok=True) - shutil.move(str(scene_src_dir), str(scene_dst_dir)) - - # cleanup MiddEval3 directory - shutil.rmtree(str(root / "MiddEval3")) - - -class StereoETH3D(StereoMatchingDataset): + else: + os.makedirs(root / "test") + if any(s not in os.listdir(root / "test") for s in self.splits["test"]): + # test split is downloaded from a different location + test_set_url = "https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip" + # the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF + # we want to move the contents from testF into the directory + download_and_extract_archive(url=test_set_url, download_root=str(root), remove_finished=True) + for scene_dir, scene_names, _ in os.walk(str(root / "MiddEval3/testF")): + for scene in scene_names: + scene_dst_dir = root / "test" + scene_src_dir = Path(scene_dir) / scene + os.makedirs(scene_dst_dir, exist_ok=True) + shutil.move(str(scene_src_dir), str(scene_dst_dir)) + + # cleanup MiddEval3 directory + shutil.rmtree(str(root / "MiddEval3")) + + +class ETH3DStereo(StereoMatchingDataset): """ "ETH3D `Low-Res Two-View `_ dataset. The dataset is expected to have the following structure: :: @@ -575,14 +556,11 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = _read_pfm_file(file_path) mask_path = os.path.join(os.path.split(file_path)[0], "mask0nocc.png") valid_mask = Image.open(mask_path) - valid_mask = np.array(valid_mask).astype(np.bool_) + valid_mask = np.asarray(valid_mask).astype(np.bool_) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple: - return super().__getitem__(index) - -class StereoKitti2012(StereoMatchingDataset): +class Kitti2012Stereo(StereoMatchingDataset): """ "Kitti dataset from the `2012 `_ stereo evaluation benchmark. Uses the RGB images for consistency with Kitti 2015. @@ -627,17 +605,14 @@ def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - disparity_map = np.array(Image.open(file_path)) / 256.0 + disparity_map = np.asarray(Image.open(file_path)) / 256.0 valid_mask = disparity_map > 0.0 # unsqueeze the disparity map into (C, H, W) format disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple: - return super().__getitem__(index) - -class StereoKitti2015(StereoMatchingDataset): +class Kitti2015Stereo(StereoMatchingDataset): """ "Kitti dataset from the `2015 `_ stereo evaluation benchmark. The dataset is expected to have the following structure: :: @@ -699,17 +674,14 @@ def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): return None, None - disparity_map = np.array(Image.open(file_path)) / 256.0 + disparity_map = np.asarray(Image.open(file_path)) / 256.0 valid_mask = disparity_map < 0.0 # unsqueeze the disparity map into (C, H, W) format disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple: - return super().__getitem__(index) - -class StereoSintel(StereoMatchingDataset): +class SintelStereo(StereoMatchingDataset): """ "Sintel `Stereo Dataset `_. The dataset is expected to have the following structure: :: @@ -793,7 +765,7 @@ def _read_disparity(self, file_path: str) -> Tuple: return None, None # disparity decoding as per Sintel instructions in the README provided with the dataset - disparity_map = np.array(Image.open(file_path), dtype=np.float32) + disparity_map = np.asarray(Image.open(file_path), dtype=np.float32) r, g, b = np.split(disparity_map, 3, axis=-1) disparity_map = r * 4 + g / (2 ** 6) + b / (2 ** 14) # reshape into (C, H, W) format @@ -801,18 +773,15 @@ def _read_disparity(self, file_path: str) -> Tuple: # find the appropiate file paths occlued_mask_path, out_of_frame_mask_path = self._get_oclussion_mask_paths(file_path) # occlusion masks - valid_mask = np.array(Image.open(occlued_mask_path)) == 0 + valid_mask = np.asarray(Image.open(occlued_mask_path)) == 0 # out of frame masks - off_mask = np.array(Image.open(out_of_frame_mask_path)) == 0 + off_mask = np.asarray(Image.open(out_of_frame_mask_path)) == 0 # combine the masks together valid_mask = np.logical_and(off_mask, valid_mask) return disparity_map, valid_mask - def __getitem__(self, index: int) -> Tuple: - return super().__getitem__(index) - -class StereoSceneFlow(StereoMatchingDataset): +class SceneFlowStereo(StereoMatchingDataset): """Dataset interface for `Scene Flow `_ datasets. The dataset is expected to have the following structre: :: @@ -895,11 +864,8 @@ def _read_disparity(self, file_path: str) -> Tuple: valid = np.ones(disparity.shape[1:]).astype(np.bool_) return disparity, valid - def __getitem__(self, index: int) -> Tuple: - return super().__getitem__(index) - -class StereoFallingThings(StereoMatchingDataset): +class FallingThingsStereo(StereoMatchingDataset): """FallingThings ``_ dataset The dataset is expected to have the following structre: :: @@ -968,7 +934,7 @@ def __init__(self, root: str, split: str = "single", transforms: Optional[Callab def _read_disparity(self, file_path: str) -> Tuple: # (H, W) image - depth = np.array(Image.open(file_path)) + depth = np.asarray(Image.open(file_path)) # as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt # in order to extract disparity from depth maps with open(os.path.split(file_path)[0] + "/_camera_settings.json", "r") as f: @@ -1034,7 +1000,7 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) def _read_disparity(self, file_path: str) -> Tuple: - disparity = np.array(Image.open(file_path), dtype=np.float32) + disparity = np.asarray(Image.open(file_path), dtype=np.float32) valid = np.ones_like(disparity).astype(np.bool_) # unsqueeze disparity to (C, H, W) disparity = disparity[None, :, :] From 78f4a52a69605a6385d160fdef7814c63e4ccf3a Mon Sep 17 00:00:00 2001 From: Ponku Date: Fri, 15 Jul 2022 13:55:53 +0100 Subject: [PATCH 30/35] Forced disparity to be positive --- torchvision/datasets/_stereo_matching.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 474b82adcc0..66d8834c74c 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -433,6 +433,7 @@ def _read_disparity(self, file_path: str) -> Tuple: if not os.path.exists(file_path): # case when dealing with the test split return None, None disparity_map = _read_pfm_file(file_path) + disparity_map = np.abs(disparity_map) # ensure that the disparity is positive valid_mask = disparity_map < 1e3 # remove the channel dimension from the valid mask valid_mask = valid_mask[0, :, :] @@ -554,6 +555,7 @@ def _read_disparity(self, file_path: str) -> Tuple: return None, None disparity_map = _read_pfm_file(file_path) + disparity_map = np.abs(disparity_map) # ensure that the disparity is positive mask_path = os.path.join(os.path.split(file_path)[0], "mask0nocc.png") valid_mask = Image.open(mask_path) valid_mask = np.asarray(valid_mask).astype(np.bool_) @@ -675,7 +677,7 @@ def _read_disparity(self, file_path: str) -> Tuple: return None, None disparity_map = np.asarray(Image.open(file_path)) / 256.0 - valid_mask = disparity_map < 0.0 + valid_mask = disparity_map > 0.0 # unsqueeze the disparity map into (C, H, W) format disparity_map = disparity_map[None, :, :] return disparity_map, valid_mask @@ -859,10 +861,11 @@ def __init__( self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern) def _read_disparity(self, file_path: str) -> Tuple: - disparity = _read_pfm_file(file_path) + disparity_map = _read_pfm_file(file_path) + disparity_map = np.abs(disparity_map) # ensure that the disparity is positive # keep valid mask with shape (H, W) - valid = np.ones(disparity.shape[1:]).astype(np.bool_) - return disparity, valid + valid = np.ones(disparity_map.shape[1:]).astype(np.bool_) + return disparity_map, valid class FallingThingsStereo(StereoMatchingDataset): From e2ad8d21b4c69237c4b2230d3ad920d5484227cb Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 18 Jul 2022 15:53:56 +0100 Subject: [PATCH 31/35] Removed implicit mask creation. Added private built_in_mask flag similar to _optical_flow.py --- test/datasets_utils.py | 18 +++- test/test_datasets.py | 122 +++++++++++++---------- torchvision/datasets/_stereo_matching.py | 93 ++++++++++------- 3 files changed, 145 insertions(+), 88 deletions(-) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index b0c31c71116..ea85a853824 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -934,7 +934,7 @@ def create_random_string(length: int, *digits: str) -> str: return "".join(random.choice(digits) for _ in range(length)) -def shape_test_for_stereo_disp( +def shape_test_for_stereo_gt_w_mask( left: PIL.Image.Image, right: PIL.Image.Image, disparity: np.ndarray, valid_mask: np.ndarray ): left_dims = get_dimensions(left) @@ -945,7 +945,6 @@ def shape_test_for_stereo_disp( # check general shapes assert c == 3 assert disparity.ndim == 3 - assert valid_mask.ndim == 2 assert disparity.shape == (1, h, w) # check that valid mask is the same size as the disparity _, dh, dw = disparity.shape @@ -954,7 +953,19 @@ def shape_test_for_stereo_disp( assert dw == mw -def shape_test_for_stereo_none(left: PIL.Image.Image, right: PIL.Image.Image, disparity: None, valid_mask: None): +def shape_test_for_stereo_gt_no_mask(left: PIL.Image.Image, right: PIL.Image.Image, disparity: np.ndarray): + left_dims = get_dimensions(left) + right_dims = get_dimensions(right) + c, h, w = left_dims + # check that left and right are the same size + assert left_dims == right_dims + # check general shapes + assert c == 3 + assert disparity.ndim == 3 + assert disparity.shape == (1, h, w) + + +def shape_test_for_stereo_no_gt(left: PIL.Image.Image, right: PIL.Image.Image, disparity: None): left_dims = get_dimensions(left) right_dims = get_dimensions(right) c, _, _ = left_dims @@ -963,7 +974,6 @@ def shape_test_for_stereo_none(left: PIL.Image.Image, right: PIL.Image.Image, di # check general shapes assert c == 3 assert disparity is None - assert valid_mask is None def make_fake_pfm_file(h, w, file_name): diff --git a/test/test_datasets.py b/test/test_datasets.py index 77f3ee4e019..a75e597c049 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2724,18 +2724,19 @@ def inject_fake_data(self, tmpdir, config): return num_examples - def test_training_test_splits(self): + def test_training_splits(self): with self.create_dataset(split="train") as (dataset, _): assert dataset._images and len(dataset._images) == len( dataset._disparities ), "Training images do not match with training disparities" for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + datasets_utils.shape_test_for_stereo_gt_w_mask(left, right, disparity, valid_mask) + def test_testing_splits(self): with self.create_dataset(split="test") as (dataset, _): assert all(d == ("", "") for d in dataset._disparities) - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) + for left, right, disparity, _ in dataset: + datasets_utils.shape_test_for_stereo_no_gt(left, right, disparity) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -2745,7 +2746,7 @@ def test_bad_input(self): class CREStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.CREStereo - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, np.ndarray, type(None)) def inject_fake_data(self, tmpdir, config): crestereo_dir = pathlib.Path(tmpdir) / "CREStereo" @@ -2759,8 +2760,7 @@ def inject_fake_data(self, tmpdir, config): num_examples = examples[category_name] for idx in range(num_examples): - p = datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) - print(p) + datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.jpg", size=(100, 100)) datasets_utils.create_image_file(root=split_dir, name=f"{idx}_right.jpg", size=(100, 100)) # these are going to end up being gray scale images datasets_utils.create_image_file(root=split_dir, name=f"{idx}_left.disp.png", size=(1, 100, 100)) @@ -2770,8 +2770,8 @@ def inject_fake_data(self, tmpdir, config): def test_splits(self): with self.create_dataset() as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity, mask in dataset: + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): @@ -2781,7 +2781,7 @@ class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): calibration=("perfect", "imperfect", "both"), use_ambient_views=(True, False), ) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @staticmethod def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: @@ -2826,18 +2826,18 @@ def inject_fake_data(self, tmpdir, config): def test_train_splits(self): for split, calibration in itertools.product(["train", "additional"], ["perfect", "imperfect", "both"]): with self.create_dataset(split=split, calibration=calibration) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) def test_test_split(self): for split in ["test"]: with self.create_dataset(split=split, calibration=None) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo_no_gt(left, right, disparity) def test_augmented_view_usage(self): with self.create_dataset(split="train", use_ambient_views=True) as (dataset, _): - for left, right, _, _ in dataset: + for left, right, _ in dataset: left_array = np.array(left) right_array = np.array(right) # check that left and right are the same size @@ -2915,14 +2915,16 @@ def inject_fake_data(self, tmpdir, config): def test_train_splits(self): for split in ["train"]: with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) def test_test_split(self): for split in ["test"]: with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo_no_gt(left, right, disparity) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -2983,14 +2985,16 @@ def inject_fake_data(self, tmpdir, config): def test_train_splits(self): for split in ["train"]: with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) def test_test_split(self): for split in ["test"]: with self.create_dataset(split=split) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_none(left, right, disparity, valid_mask) + for left, right, disparity, mask in dataset: + assert mask is None + datasets_utils.shape_test_for_stereo_no_gt(left, right, disparity) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3003,7 +3007,7 @@ class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase): ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( split=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final") ) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @staticmethod def _create_pfm_folder( @@ -3066,8 +3070,8 @@ def inject_fake_data(self, tmpdir, config): def test_splits(self): for split_name, pass_name in itertools.product(["FlyingThings3D", "Driving", "Monkaa"], ["clean", "final"]): with self.create_dataset(split=split_name, pass_name=pass_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3078,7 +3082,7 @@ def test_bad_input(self): class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.FallingThingsStereo ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("single", "mixed")) - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @staticmethod def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]): @@ -3129,8 +3133,8 @@ def inject_fake_data(self, tmpdir, config): def test_splits(self): for split_name in ["single", "mixed"]: with self.create_dataset(split=split_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): @@ -3140,6 +3144,7 @@ def test_bad_input(self): class SintelStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.SintelStereo + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(pass_name=("final", "clean", "both")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): @@ -3150,25 +3155,31 @@ def inject_fake_data(self, tmpdir, config): os.makedirs(split_dir, exist_ok=True) # a single setting, since there are no splits - num_examples = 4 - - for view in ["final_left", "final_right"]: - root = split_dir / view - os.makedirs(root, exist_ok=True) + num_examples = {"final": 2, "clean": 2} + pass_names = { + "final": ["final"], + "clean": ["clean"], + "both": ["final", "clean"], + }.get(config["pass_name"], []) + + for p in pass_names: + for view in [f"{p}_left", f"{p}_right"]: + root = split_dir / view + os.makedirs(root, exist_ok=True) - datasets_utils.create_image_folder( - root=root, - name="scene1", - file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, - size=(3, 100, 200), - ) + datasets_utils.create_image_folder( + root=root, + name="scene1", + file_name_fn=lambda i: f"{i:06d}.png", + num_examples=num_examples[p], + size=(3, 100, 200), + ) datasets_utils.create_image_folder( root=split_dir / "occlusions", name="scene1", file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, + num_examples=2, size=(1, 100, 200), ) @@ -3176,7 +3187,7 @@ def inject_fake_data(self, tmpdir, config): root=split_dir / "outofframe", name="scene1", file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, + num_examples=2, size=(1, 100, 200), ) @@ -3184,21 +3195,32 @@ def inject_fake_data(self, tmpdir, config): root=split_dir / "disparities", name="scene1", file_name_fn=lambda i: f"{i:06d}.png", - num_examples=num_examples, + num_examples=2, size=(3, 100, 200), ) + if config["pass_name"] == "both": + num_examples = sum(num_examples.values()) + else: + num_examples = num_examples.get(config["pass_name"], 0) + return num_examples def test_splits(self): - with self.create_dataset() as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for pass_name in ["final", "clean", "both"]: + with self.create_dataset(pass_name=pass_name) as (dataset, _): + for left, right, disparity, valid_mask in dataset: + datasets_utils.shape_test_for_stereo_gt_w_mask(left, right, disparity, valid_mask) + + def test_bad_input(self): + with pytest.raises(ValueError, match="Unknown value 'bad' for argument pass_name"): + with self.create_dataset(pass_name="bad"): + pass class InStereo2k(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.InStereo2k - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) @staticmethod @@ -3228,8 +3250,8 @@ def inject_fake_data(self, tmpdir, config): def test_splits(self): for split_name in ["train", "test"]: with self.create_dataset(split=split_name) as (dataset, _): - for left, right, disparity, valid_mask in dataset: - datasets_utils.shape_test_for_stereo_disp(left, right, disparity, valid_mask) + for left, right, disparity in dataset: + datasets_utils.shape_test_for_stereo_gt_no_mask(left, right, disparity) def test_bad_input(self): with pytest.raises(ValueError, match="Unknown value 'bad' for argument split"): diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 66d8834c74c..14ce20a2d96 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -32,6 +32,8 @@ class StereoMatchingDataset(ABC, VisionDataset): """Base interface for Stereo matching datasets""" + _has_built_in_disparity_mask = False + def __init__(self, root: str, transforms: Optional[Callable] = None): """ @@ -49,11 +51,15 @@ def __init__(self, root: str, transforms: Optional[Callable] = None): For training splits generally the datasets provide a minimal guarantee of images: (``PIL.Image``, ``PIL.Image``) disparities: (``np.ndarray``, ``None``) with shape (1, H, W) - valid_masks: (``np.ndarray``, ``None``) with shape (H, W) + + Optionally, based on the dataset, it can return a ``mask`` as well: + valid_masks: (``np.ndarray | None``, ``None``) with shape (H, W) For some test splits, the datasets provides outputs that look like: imgaes: (``PIL.Image``, ``PIL.Image``) disparities: (``None``, ``None``) + + Optionally, based on the dataset, it can return a ``mask`` as well: valid_masks: (``None``, ``None``) """ super().__init__(root=root) @@ -106,10 +112,10 @@ def __getitem__(self, index: int) -> Tuple: index(int): The index of the example to retrieve Returns: - tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` where ``valid_mask`` - is a numpy boolean mask of shape (H, W) - indicating which disparity values are valid. The disparity is a numpy array of - shape (1, H, W) and the images are PIL images. ``disparity`` and ``valid_mask`` are None for + tuple: A 3 or 4-tuple with ``(img_left, img_right, disparity, Optional[valid_mask])`` where ``valid_mask`` + can be a numpy boolean mask of shape (H, W) if the dataset provides a file + indicating which disparity pixels are valid. The disparity is a numpy array of + shape (1, H, W) and the images are PIL images. ``disparity`` is None for datasets on which for ``split="test"`` the authors did not provide annotations. """ img_left = self._read_img(self._images[index][0]) @@ -129,7 +135,10 @@ def __getitem__(self, index: int) -> Tuple: valid_masks, ) = self.transforms(imgs, dsp_maps, valid_masks) - return imgs[0], imgs[1], dsp_maps[0], valid_masks[0] + if self._has_built_in_disparity_mask or valid_masks[0] is not None: + return imgs[0], imgs[1], dsp_maps[0], valid_masks[0] + else: + return imgs[0], imgs[1], dsp_maps[0] def __len__(self) -> int: return len(self._images) @@ -192,6 +201,7 @@ def __init__( max_disparity: float = 256.0, ): super().__init__(root, transforms) + self._has_built_in_disparity_mask = True root = Path(root) / "CREStereo" self.max_disparity = max_disparity @@ -213,7 +223,6 @@ def __init__( for s in dirs: left_image_pattern = str(root / s / "*_left.jpg") right_image_pattern = str(root / s / "*_right.jpg") - print(left_image_pattern, right_image_pattern) imgs = self._scan_pairs(left_image_pattern, right_image_pattern) self._images += imgs @@ -223,11 +232,11 @@ def __init__( self._disparities += disparities def _read_disparity(self, file_path: str) -> Tuple: - disparity = np.asarray(Image.open(file_path), dtype=np.float32) - valid = (disparity < self.max_disparity) & (disparity > 0.0) + disparity_map = np.asarray(Image.open(file_path), dtype=np.float32) # unsqueeze the disparity map into (C, H, W) format - disparity = disparity[None, :, :] - return disparity, valid + disparity_map = disparity_map[None, :, :] + valid_mask = None + return disparity_map, valid_mask def _download_dataset(self, root: str) -> None: dirs = ["tree", "shapenet", "reflective", "hole"] @@ -430,13 +439,13 @@ def _read_img(self, file_path: str) -> Image.Image: return super()._read_img(file_path) def _read_disparity(self, file_path: str) -> Tuple: - if not os.path.exists(file_path): # case when dealing with the test split + # test split has not disparity maps + if not os.path.exists(file_path): return None, None + disparity_map = _read_pfm_file(file_path) disparity_map = np.abs(disparity_map) # ensure that the disparity is positive - valid_mask = disparity_map < 1e3 - # remove the channel dimension from the valid mask - valid_mask = valid_mask[0, :, :] + valid_mask = None return disparity_map, valid_mask def _download_dataset(self, root: str): @@ -532,6 +541,9 @@ class ETH3DStereo(StereoMatchingDataset): def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): super().__init__(root, transforms) + # needed for output consistency, otherwise tests get fussy about + # variable sized FEATURE_TYPES based on dataset split + self._has_built_in_disparity_mask = True verify_str_arg(split, "split", valid_values=("train", "test")) @@ -551,6 +563,7 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl self._disparities += self._scan_pairs(disparity_pattern, "", fill_empty=True) def _read_disparity(self, file_path: str) -> Tuple: + # test split has no disparity maps if not os.path.exists(file_path): return None, None @@ -588,6 +601,7 @@ class Kitti2012Stereo(StereoMatchingDataset): def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): super().__init__(root, transforms) + self._has_built_in_disparity_mask = True verify_str_arg(split, "split", valid_values=("train", "test")) @@ -604,13 +618,14 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl self._disparities = list(("", "") for _ in self._images) def _read_disparity(self, file_path: str) -> Tuple: + # test split has no disparity maps if not os.path.exists(file_path): return None, None disparity_map = np.asarray(Image.open(file_path)) / 256.0 - valid_mask = disparity_map > 0.0 # unsqueeze the disparity map into (C, H, W) format disparity_map = disparity_map[None, :, :] + valid_mask = None return disparity_map, valid_mask @@ -657,6 +672,7 @@ class Kitti2015Stereo(StereoMatchingDataset): def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None): super().__init__(root, transforms) + self._has_built_in_disparity_mask = True verify_str_arg(split, "split", valid_values=("train", "test")) @@ -673,13 +689,14 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl self._disparities = list(("", "") for _ in self._images) def _read_disparity(self, file_path: str) -> Tuple: + # test split has no disparity maps if not os.path.exists(file_path): return None, None disparity_map = np.asarray(Image.open(file_path)) / 256.0 - valid_mask = disparity_map > 0.0 # unsqueeze the disparity map into (C, H, W) format disparity_map = disparity_map[None, :, :] + valid_mask = None return disparity_map, valid_mask @@ -724,20 +741,29 @@ class SintelStereo(StereoMatchingDataset): Args: root (string): Root directory where Sintel Stereo is located. + pass_name (string): The name of the pass to use, either "final" or "clean". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """ - def __init__(self, root: str, transforms: Optional[Callable] = None): + def __init__(self, root: str, pass_name: str = "final", transforms: Optional[Callable] = None): super().__init__(root, transforms) + verify_str_arg(pass_name, "pass_name", valid_values=("final", "clean", "both")) + root = Path(root) / "Sintel" + pass_names = { + "final": ["final"], + "clean": ["clean"], + "both": ["final", "clean"], + }[pass_name] - left_img_pattern = str(root / "training" / "final_left" / "*" / "*.png") - right_img_pattern = str(root / "training" / "final_right" / "*" / "*.png") - self._images += self._scan_pairs(left_img_pattern, right_img_pattern) + for p in pass_names: + left_img_pattern = str(root / "training" / f"{p}_left" / "*" / "*.png") + right_img_pattern = str(root / "training" / f"{p}_right" / "*" / "*.png") + self._images += self._scan_pairs(left_img_pattern, right_img_pattern) - disparity_pattern = str(root / "training" / "disparities" / "*" / "*.png") - self._disparities += self._scan_pairs(disparity_pattern, "", fill_empty=True) + disparity_pattern = str(root / "training" / "disparities" / "*" / "*.png") + self._disparities += self._scan_pairs(disparity_pattern, "", fill_empty=True) def _get_oclussion_mask_paths(self, file_path: str) -> Tuple[str, str]: path_tokens = file_path.split(os.sep) @@ -863,9 +889,8 @@ def __init__( def _read_disparity(self, file_path: str) -> Tuple: disparity_map = _read_pfm_file(file_path) disparity_map = np.abs(disparity_map) # ensure that the disparity is positive - # keep valid mask with shape (H, W) - valid = np.ones(disparity_map.shape[1:]).astype(np.bool_) - return disparity_map, valid + valid_mask = None + return disparity_map, valid_mask class FallingThingsStereo(StereoMatchingDataset): @@ -945,11 +970,11 @@ def _read_disparity(self, file_path: str) -> Tuple: intrinsics = json.load(f) focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"] baseline, pixel_constant = 6.0, 100.0 # pixel constant is inverted - disparity = (baseline * focal * pixel_constant) / depth.astype(np.float32) - valid = disparity > 0 + disparity_map = (baseline * focal * pixel_constant) / depth.astype(np.float32) # unsqueeze disparity to (C, H, W) - disparity = disparity[None, :, :] - return disparity, valid + disparity_map = disparity_map[None, :, :] + valid_mask = None + return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple: return super().__getitem__(index) @@ -1003,8 +1028,8 @@ def __init__(self, root: str, split: str = "train", transforms: Optional[Callabl self._disparities = self._scan_pairs(left_disparity_pattern, right_disparity_pattern) def _read_disparity(self, file_path: str) -> Tuple: - disparity = np.asarray(Image.open(file_path), dtype=np.float32) - valid = np.ones_like(disparity).astype(np.bool_) + disparity_map = np.asarray(Image.open(file_path), dtype=np.float32) # unsqueeze disparity to (C, H, W) - disparity = disparity[None, :, :] - return disparity, valid + disparity_map = disparity_map[None, :, :] + valid_mask = None + return disparity_map, valid_mask From 93f4b6c12800910ff77c12343c27905384339719 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 18 Jul 2022 16:20:48 +0100 Subject: [PATCH 32/35] Added getiem & docs to inform support multi shape returns --- torchvision/datasets/_stereo_matching.py | 129 +++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index 14ce20a2d96..ff7d0183773 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -250,6 +250,20 @@ def _download_dataset(self, root: str) -> None: url = f"https://data.megengine.org.cn/research/crestereo/dataset/{d}/{i}.tar" download_and_extract_archive(url=url, download_root=d_path, remove_finished=True) + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not + generate a valid mask. + """ + return super().__getitem__(index) + class Middlebury2014Stereo(StereoMatchingDataset): """Publicly available scenes from the Middlebury dataset `2014 version `. @@ -487,6 +501,20 @@ def _download_dataset(self, root: str): # cleanup MiddEval3 directory shutil.rmtree(str(root / "MiddEval3")) + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 3-tuple with ``(img_left, img_right, disparity)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + If a ``valid_mask`` is generated within the ``transforms`` parameter, + a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. + """ + return super().__getitem__(index) + class ETH3DStereo(StereoMatchingDataset): """ "ETH3D `Low-Res Two-View `_ dataset. @@ -574,6 +602,22 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = np.asarray(valid_mask).astype(np.bool_) return disparity_map, valid_mask + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not + generate a valid mask. + + Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test. + """ + return super().__getitem__(index) + class Kitti2012Stereo(StereoMatchingDataset): """ "Kitti dataset from the `2012 `_ stereo evaluation benchmark. @@ -628,6 +672,22 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = None return disparity_map, valid_mask + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not + generate a valid mask. + + Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test. + """ + return super().__getitem__(index) + class Kitti2015Stereo(StereoMatchingDataset): """ "Kitti dataset from the `2015 `_ stereo evaluation benchmark. @@ -699,6 +759,22 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = None return disparity_map, valid_mask + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not + generate a valid mask. + + Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test. + """ + return super().__getitem__(index) + class SintelStereo(StereoMatchingDataset): """ "Sintel `Stereo Dataset `_. @@ -808,6 +884,20 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = np.logical_and(off_mask, valid_mask) return disparity_map, valid_mask + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 3-tuple with ``(img_left, img_right, disparity)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + If a ``valid_mask`` is generated within the ``transforms`` parameter, + a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. + """ + return super().__getitem__(index) + class SceneFlowStereo(StereoMatchingDataset): """Dataset interface for `Scene Flow `_ datasets. @@ -892,6 +982,20 @@ def _read_disparity(self, file_path: str) -> Tuple: valid_mask = None return disparity_map, valid_mask + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 3-tuple with ``(img_left, img_right, disparity)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + If a ``valid_mask`` is generated within the ``transforms`` parameter, + a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. + """ + return super().__getitem__(index) + class FallingThingsStereo(StereoMatchingDataset): """FallingThings ``_ dataset @@ -977,6 +1081,17 @@ def _read_disparity(self, file_path: str) -> Tuple: return disparity_map, valid_mask def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 3-tuple with ``(img_left, img_right, disparity)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + If a ``valid_mask`` is generated within the ``transforms`` parameter, + a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. + """ return super().__getitem__(index) @@ -1033,3 +1148,17 @@ def _read_disparity(self, file_path: str) -> Tuple: disparity_map = disparity_map[None, :, :] valid_mask = None return disparity_map, valid_mask + + def __getitem__(self, index: int) -> Tuple: + """Return example at given index. + + Args: + index(int): The index of the example to retrieve + + Returns: + tuple: A 3-tuple with ``(img_left, img_right, disparity)``. + The disparity is a numpy array of shape (1, H, W) and the images are PIL images. + If a ``valid_mask`` is generated within the ``transforms`` parameter, + a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. + """ + return super().__getitem__(index) From c83bc8059ccea8b3abe60567a928791dcb391f6d Mon Sep 17 00:00:00 2001 From: Ponku Date: Tue, 19 Jul 2022 11:20:28 +0100 Subject: [PATCH 33/35] removed path returns from helper test functions --- test/test_datasets.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index a75e597c049..08c5be78649 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2693,7 +2693,7 @@ def _create_scene_folder(num_examples: int, root_dir: str) -> List[pathlib.Path] return image_paths @staticmethod - def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib.Path]: + def _create_annotation_folder(num_examples: int, root_dir: str) -> None: paths = [] # make the root_dir if it does not exits os.makedirs(root_dir, exist_ok=True) @@ -2706,8 +2706,6 @@ def _create_annotation_folder(num_examples: int, root_dir: str) -> List[pathlib. paths.append(datasets_utils.create_image_file(root=scene_dir, name="mask0nocc.png", size=(1, 100, 100))) pfm_path = os.path.join(scene_dir, "disp0GT.pfm") datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) - paths.append(pfm_path) - return paths def inject_fake_data(self, tmpdir, config): eth3d_dir = os.path.join(tmpdir, "ETH3D") @@ -2784,9 +2782,9 @@ class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @staticmethod - def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: + def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> None: calibrations = [None] if split == "test" else ["-perfect", "-imperfect"] - scene_dirs = [] + for c in calibrations: scene_dir = os.path.join(root_dir, f"{scene_name}{c}") os.makedirs(scene_dir, exist_ok=True) @@ -2798,8 +2796,6 @@ def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> List[str]: # these are going to end up being gray scale images datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp0.pfm")) datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=os.path.join(scene_dir, "disp1.pfm")) - scene_dirs.append(scene_dir) - return scene_dirs def inject_fake_data(self, tmpdir, config): split_scene_map = { @@ -3012,15 +3008,12 @@ class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase): @staticmethod def _create_pfm_folder( root: str, name: str, file_name_fn: Callable[..., str], num_examples: int, size: Tuple[int, int] - ) -> List[str]: + ) -> None: root = pathlib.Path(root) / name os.makedirs(root, exist_ok=True) - paths = [] for i in range(num_examples): datasets_utils.make_fake_pfm_file(size[0], size[1], root / file_name_fn(i)) - paths.append(str(root / file_name_fn(i))) - return paths def inject_fake_data(self, tmpdir, config): scene_flow_dir = pathlib.Path(tmpdir) / "SceneFlow" @@ -3091,27 +3084,20 @@ def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]): PIL.Image.fromarray(image).save(file) @staticmethod - def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> List[str]: - paths = [] + def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> None: root = pathlib.Path(root) / scene_name os.makedirs(root, exist_ok=True) # jpg images - paths.append(datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))) - paths.append(datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))) + datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0])) + datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0])) # single channel depth maps - paths.append( - FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) - ) - paths.append( - FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) - ) + FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1])) + FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1])) # camera settings json. Minimal example for _read_disparity function testing settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]} with open(root / "_camera_settings.json", "w") as f: json.dump(settings_json, f) - return paths - def inject_fake_data(self, tmpdir, config): fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings" os.makedirs(fallingthings_dir, exist_ok=True) From 650bf67c47aa54748ff81e69cd5134e9a36a29df Mon Sep 17 00:00:00 2001 From: Ponku Date: Tue, 19 Jul 2022 11:36:30 +0100 Subject: [PATCH 34/35] replaced os.path.join with pathlib in tests --- test/test_datasets.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 08c5be78649..ff1a418fbac 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2708,12 +2708,12 @@ def _create_annotation_folder(num_examples: int, root_dir: str) -> None: datasets_utils.make_fake_pfm_file(h=100, w=100, file_name=pfm_path) def inject_fake_data(self, tmpdir, config): - eth3d_dir = os.path.join(tmpdir, "ETH3D") + eth3d_dir = pathlib.Path(tmpdir) / "ETH3D" num_examples = 2 if config["split"] == "train" else 3 split_name = "two_view_training" if config["split"] == "train" else "two_view_test" - split_dir = os.path.join(eth3d_dir, split_name) + split_dir = eth3d_dir / split_name self._create_scene_folder(num_examples, split_dir) if config["split"] == "train": @@ -2784,9 +2784,10 @@ class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): @staticmethod def _make_scene_folder(root_dir: str, scene_name: str, split: str) -> None: calibrations = [None] if split == "test" else ["-perfect", "-imperfect"] + root_dir = pathlib.Path(root_dir) for c in calibrations: - scene_dir = os.path.join(root_dir, f"{scene_name}{c}") + scene_dir = root_dir / f"{scene_name}{c}" os.makedirs(scene_dir, exist_ok=True) # make normal images first datasets_utils.create_image_file(root=scene_dir, name="im0.png", size=(3, 100, 100)) From 39efae5772b8eba73e678d606a5f5720cbf7a977 Mon Sep 17 00:00:00 2001 From: Ponku Date: Mon, 25 Jul 2022 14:58:28 +0100 Subject: [PATCH 35/35] crestereo draft implementation --- test/test_prototype_models.py | 41 + .../models/depth/stereo/crestereo.py | 1007 +++++++++++++++++ 2 files changed, 1048 insertions(+) create mode 100644 torchvision/prototype/models/depth/stereo/crestereo.py diff --git a/test/test_prototype_models.py b/test/test_prototype_models.py index c76a84f8634..6ff1382010d 100644 --- a/test/test_prototype_models.py +++ b/test/test_prototype_models.py @@ -1,6 +1,7 @@ import pytest import test_models as TM import torch +import torchvision.prototype.models.depth.stereo.crestereo as crestereo import torchvision.prototype.models.depth.stereo.raft_stereo as raft_stereo from common_utils import set_rng_seed, cpu_and_gpu @@ -36,3 +37,43 @@ def test_raft_stereo(model_builder, model_mode, dev): # Test against expected file output TM._assert_expected(depth_pred, name=model_builder.__name__, atol=1e-2, rtol=1e-2) + + +@pytest.mark.parametrize("model_builder", (crestereo.crestereo_base,)) +@pytest.mark.parametrize("model_mode", ("standard", "scripted")) +@pytest.mark.parametrize("dev", cpu_and_gpu()) +def test_crestereo(model_builder, model_mode, dev): + set_rng_seed(0) + + model = model_builder().eval().to(dev) + + if model_mode == "scripted": + model = torch.jit.script(model) + + img1 = torch.rand(1, 3, 256, 256).to(dev) + img2 = torch.rand(1, 3, 256, 256).to(dev) + iterations = 3 + + preds = model(img1, img2, flow_init=None, iterations=iterations) + disparity_pred = preds[-1] + + # all the pyramid levels except the highest res make only half the number of iterations + expected_iterations = (iterations // 2) * (len(model.resolutions) - 1) + expected_iterations += iterations + assert ( + len(preds) == expected_iterations + ), "Number of predictions should be the number of iterations multiplied by the number of pyramid levels" + + assert disparity_pred.shape == torch.Size( + [1, 2, 256, 256] + ), f"Predicted disparity should have the same spatial shape as the input. Inputs shape {img1.shape[2:]}, Prediction shape {disparity_pred.shape[2:]}" + + assert all( + d.shape == torch.Size([1, 2, 256, 256]) for d in preds + ), "All predicted disparities are expected to have the same shape" + + # test a backward pass with a dummy loss as well + preds = torch.stack(preds, dim=0) + targets = torch.ones_like(preds, requires_grad=False) + loss = torch.nn.functional.mse_loss(preds, targets) + loss.backward() diff --git a/torchvision/prototype/models/depth/stereo/crestereo.py b/torchvision/prototype/models/depth/stereo/crestereo.py new file mode 100644 index 00000000000..92a75d20ce3 --- /dev/null +++ b/torchvision/prototype/models/depth/stereo/crestereo.py @@ -0,0 +1,1007 @@ +import math +from functools import partial +from typing import Iterable, List, Optional, Callable, Tuple, Dict, Union + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.models.optical_flow.raft as raft +from torch import Tensor +from torchvision.models._api import WeightsEnum +from torchvision.models.optical_flow._utils import make_coords_grid, grid_sample, upsample_flow +from torchvision.ops import Conv2dNormActivation + + +class ResidualBlock(raft.ResidualBlock): + def __init__(self, in_channels, out_channels, *, norm_layer, stride=1): + super().__init__(in_channels, out_channels, norm_layer=norm_layer, stride=stride) + + # the CREStereo base architecture changes the number of channels + # even on grids with the same spatial resolution + if in_channels != out_channels: + self.downsample = Conv2dNormActivation( + in_channels, + out_channels, + norm_layer=norm_layer, + kernel_size=1, + stride=stride, + bias=True, + activation_layer=None, + ) + + +class FeatureEncoder(raft.FeatureEncoder): + """Base encoder for Feature Encoder and Context Encoder""" + + def __init__( + self, + *, + block: Callable[..., nn.Module] = ResidualBlock, + layers: Tuple[int, int, int, int, int] = (64, 64, 96, 128, 256), + strides: Tuple[int, int, int, int] = (2, 1, 2, 1), + norm_layer: Callable[..., nn.Module] = nn.InstanceNorm2d, + ): + super().__init__(block=block, layers=layers, strides=strides, norm_layer=norm_layer) + for s in strides: + if s not in [1, 2]: + raise ValueError(f"FeatureEncoder unsupported stride size {s}. Supported values are one of ``[1, 2]``.") + + self.output_dim = layers[-1] + num_downsamples = len(list(filter(lambda s: s == 2, strides))) + self.downsample_factor = 2 ** num_downsamples + + +class ConvexMaskPredictor(nn.Module): + def __init__( + self, + *, + in_channels: int, + hidden_size: int, + upsample_factor: int, + multiplier: float = 0.25, + ) -> None: + + super().__init__() + self.mask_head = nn.Sequential( + Conv2dNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3), + nn.Conv2d(hidden_size, upsample_factor ** 2 * 9, 1, padding=0), + ) + + self.multiplier = multiplier + + def forward(self, x: Tensor) -> Tensor: + x = self.mask_head(x) * self.multiplier + return x + + +class AdaptiveGroupCorrelationLayer(nn.Module): + """ + Container for computing various correlation types between a left and right feature map. + This module does not contain any optimisable parameters, it's solely a collection of ops. + We wrap in a nn.Module for torch.jit.script compatibility + + Adaptive Group Correlation operations from: https://openaccess.thecvf.com/content/CVPR2022/papers/Li_Practical_Stereo_Matching_via_Cascaded_Recurrent_Network_With_Adaptive_Correlation_CVPR_2022_paper.pdf + + Canonical reference implementation: https://github.com/megvii-research/CREStereo/blob/master/nets/corr.py + """ + + def __init__( + self, + attention_module: Optional[nn.Module] = None, + groups: int = 4, + search_window_1d: Tuple[int, int] = (1, 9), + search_dilate_1d: Tuple[int, int] = (1, 1), + search_window_2d: Tuple[int, int] = (3, 3), + search_dilate_2d: Tuple[int, int] = (1, 1), + ) -> None: + super().__init__() + self.attention_module = attention_module + + assert np.prod(search_window_1d) == np.prod(search_window_2d), ( + f"The 1D and 2D windows should contain the same number of elements. " + f"1D shape: {search_window_1d} 2D shape: {search_window_2d}" + ) + + assert np.prod(search_window_1d) % 2 == 1, ( + f"Search windows should contain an odd number of elements in them." + f"Window of shape {search_window_1d} has {np.prod(search_window_1d)} elements." + ) + + assert any( + size == 1 for size in search_window_1d + ), f"The 1D search window should have at least one size equal to 1. 1D shape: {search_window_1d}" + + assert all( + size != 1 for size in search_window_2d + ), f"The 2D search window should have all dimensions greater than 1. 2D shape: {search_window_2d}" + + self.search_window_1d = search_window_1d + self.search_window_2d = search_window_2d + + self.search_dilate_1d = search_dilate_1d + self.search_dilate_2d = search_dilate_2d + + self.groups = groups + + # two selection tables for dealing withh the small_patch argument in the forward function + self.patch_sizes = { + True: [self.search_window_2d for _ in range(self.groups)], + False: [self.search_window_1d for _ in range(self.groups)], + } + + self.dilate_sizes = { + True: [self.search_dilate_2d for _ in range(self.groups)], + False: [self.search_dilate_1d for _ in range(self.groups)], + } + + def forward( + self, + left_features: Tensor, + right_features: Tensor, + flow: torch.Tensor, + extra_offset: Union[torch.Tensor, None], + use_small_patch: bool = False, + iter_mode: bool = False, + ): + if iter_mode or extra_offset is None: + corr = self.iterative_correlation(left_features, right_features, flow, use_small_patch) + else: + corr = self.attention_offset_correlation(left_features, right_features, flow, extra_offset, use_small_patch) # type: ignore + return corr + + def _make_coords(self, feature_map: Tensor) -> Tensor: + return make_coords_grid(feature_map.shape[0], feature_map.shape[2], feature_map.shape[3]).to(feature_map.device) + + def get_correlation( + self, + left_feature: Tensor, + right_feature: Tensor, + window_size: Tuple[int, int] = (3, 3), + dilate: Tuple[int, int] = (1, 1), + ) -> Tensor: + """Function that computes a correlation product between the left and right features. + + The correlation is computed in a sliding window fashion, namely the the left features are fixed + and for each ``(i, j)`` location we compute the correlation with a sliding window anchored in + ``(i, j)`` from the right feature map. The sliding window selects pixels obtained in the range of the sliding + window; i.e ``(i - window_size // 2, i + window_size // 2)`` respectively ``(j - window_size // 2, j + window_size // 2)``. + """ + + B, C, H, W = left_feature.shape + + di_y, di_x = dilate[0], dilate[1] + pad_y, pad_x = window_size[0] // 2 * di_y, window_size[1] // 2 * di_x + + right_padded = F.pad(right_feature, (pad_x, pad_x, pad_y, pad_y), mode="replicate") + right_padded = right_padded.detach() + # in order to vectorize the correlation computation over all pixel candidates + # we create multiple shifted right images which we stack on an extra dimension + right_padded = F.unfold(right_padded, kernel_size=(H, W), dilation=dilate) + # torch unfold returns a tensor of shape [B, flattened_values, n_selections] + right_padded = right_padded.permute(0, 2, 1) + # we consider rehsape back into [B, n_views, C, H, W] + right_padded = right_padded.reshape(B, (window_size[0] * window_size[1]), C, H, W) + # we expand the left features for broadcasting + left_feature = left_feature.unsqueeze(1) + # this will compute an element product of between [B, 1, C, H, W] * [B, n_views, C, H, W] + # to obtain correlations over the pixel canditates we perform a mean on the C dimension + correlation = torch.mean(left_feature * right_padded, dim=2, keepdim=False) + # the final correlation tensor shape will be [B, n_views, H, W] + # where on the i-th position of the n_views dimension we will have + # the correlation value between the left pixel + # and the i-th candidate on the right feature map + return correlation + + def iterative_correlation( + self, left_feature: Tensor, right_feature: Tensor, flow: Tensor, use_small_patch: bool = False + ) -> Tensor: + """Function that computes 1 pass of non-offsetted Group-Wise correlation""" + coords = self._make_coords(left_feature) + + # we offset the coordinate grid in the flow direction + coords = coords + flow + coords = coords.permute(0, 2, 3, 1) + # resample right features according to off-setted grid + right_feature = grid_sample(right_feature, coords, mode="bilinear", align_corners=True) + + # use_small_patch is a flag by which we decide on how many axes + # we perform candidate search. See section 3.1 ``Deformable search window`` & Figure 4 in the paper. + patch_size_list = self.patch_sizes[use_small_patch] + dilate_size_list = self.dilate_sizes[use_small_patch] + + # chunking the left and right feature to perform group-wise correlation + # mechanism simillar to GroupNorm. See section 3.1 ``Group-wise correlation``. + left_groups = torch.chunk(left_feature, self.groups, dim=1) + right_groups = torch.chunk(right_feature, self.groups, dim=1) + + correlations = [] + # this boils down to rather than performing the correlation product + # over the entire C dimensions, we use subsets of C to get multiple correlation sets + for i in range(len(patch_size_list)): + correlation = self.get_correlation(left_groups[i], right_groups[i], patch_size_list[i], dilate_size_list[i]) + correlations.append(correlation) + final_correlations = torch.cat(correlations, dim=1) + return final_correlations + + def attention_offset_correlation( + self, + left_feature: Tensor, + right_feature: Tensor, + flow: Tensor, + extra_offset: Tensor, + use_small_patch: bool = False, + ): + """Function that computes 1 pass of offsetted Group-Wise correlation + + If the class was provided with an attention layer, the left and right feature maps + will be passed through a transformer first + """ + B, C, H, W = left_feature.shape + + if self.attention_module is not None: + # prepare for transformer required input shapes + left_feature = left_feature.permute(0, 2, 3, 1).reshape(B, H * W, C) + right_feature = right_feature.permute(0, 2, 3, 1).reshape(B, H * W, C) + # this can be either self attention or cross attention, hence the tupple return + left_feature, right_feature = self.attention_module(left_feature, right_feature) + left_feature = left_feature.reshape(B, H, W, C).permute(0, 3, 1, 2) + right_feature = right_feature.reshape(B, H, W, C).permute(0, 3, 1, 2) + + left_groups = torch.chunk(left_feature, self.groups, dim=1) + right_groups = torch.chunk(right_feature, self.groups, dim=1) + + num_search_candidates = 9 + # for each pixel (i, j) we have a number of search candidates + # thus, for each candidate we should have an X-axis and Y-axis offset value + extra_offset = extra_offset.reshape(B, num_search_candidates, 2, H, W).permute(0, 1, 3, 4, 2) + + # see line 133 for details + patch_size_list = self.patch_sizes[use_small_patch] + dilate_size_list = self.dilate_sizes[use_small_patch] + + group_channels = C // self.groups + correlations = [] + + for i in range(len(patch_size_list)): + left_group, right_group = left_groups[i], right_groups[i] + patch_size, dilate = patch_size_list[i], dilate_size_list[i] + + di_y, di_x = dilate + ps_y, ps_x = patch_size + # define the search based on the window patch shape + ry, rx = ps_y // 2 * di_y, ps_x // 2 * di_x + + # base offsets for search (i.e. where to look on the search index) + x_grid, y_grid = torch.meshgrid( + torch.arange(-rx, rx + 1, di_x), torch.arange(-ry, ry + 1, di_y), indexing="xy" + ) + x_grid, y_grid = x_grid.to(flow.device), y_grid.to(flow.device) + offsets = torch.stack((x_grid, y_grid)) + offsets = offsets.reshape(2, -1).permute(1, 0) + + for d in (0, 2, 3): + offsets = offsets.unsqueeze(d) + # extra offsets for search (i.e. deformed search indexes. Simillar concept to deformable convolutions) + offsets = offsets + extra_offset + + coords = self._make_coords(left_feature) + flow + coords = coords.permute(0, 2, 3, 1).unsqueeze(1) + coords = coords + offsets + coords = coords.reshape(B, -1, W, 2) + + right_group = grid_sample(right_group, coords, mode="bilinear", align_corners=True) + # we do not need to perform any window shifting because the grid sample op + # will return a multi-view right based on the num_search_candidates dimension in the offsets + right_group = right_group.reshape(B, -1, group_channels, H, W) + left_group = left_group.reshape(B, -1, group_channels, H, W) + correlation = torch.mean(left_group * right_group, dim=2) + correlations.append(correlation) + + final_correlation = torch.cat(correlations, dim=1) + return final_correlation + + +def elu_feature_map(x: Tensor) -> Tensor: + """Elu feature map operation from: https://arxiv.org/pdf/2006.16236.pdf""" + return F.elu(x) + 1 + + +class LinearAttention(nn.Module): + """ + Linear attention operation from: https://arxiv.org/pdf/2006.16236.pdf + Cannonical implementation reference: https://github.com/idiap/fast-transformers/blob/master/fast_transformers/attention/linear_attention.py + LoFTR implementation reference: https://github.com/zju3dv/LoFTR/blob/2122156015b61fbb650e28b58a958e4d632b1058/src/loftr/loftr_module/linear_attention.py + """ + + def __init__(self, eps: float = 1e-6, feature_map_fn: Callable[[Tensor], Tensor] = elu_feature_map) -> None: + super().__init__() + self.eps = eps + self.feature_map_fn = elu_feature_map + + def forward( + self, + queries: Tensor, + keys: Tensor, + values: Tensor, + q_mask: Optional[Tensor] = None, + kv_mask: Optional[Tensor] = None, + ): + """ + Args: + queries (torch.Tensor): [N, S1, H, D] + keys (torch.Tensor): [N, S2, H, D] + values (torch.Tensor): [N, S2, H, D] + q_mask (torch.Tensor): [N, S1] (optional) + kv_mask (torch.Tensor): [N, S2] (optional) + Returns: + queried_values (torch.Tensor): [N, S1, H, D] + """ + queries = self.feature_map_fn(queries) + values = self.feature_map_fn(values) + + if q_mask is not None: + queries = queries * q_mask[:, :, None, None] + if kv_mask is not None: + keys = keys * kv_mask[:, :, None, None] + values = values * kv_mask[:, :, None, None] + + # mitigates fp16 overflows + values_length = values.shape[1] + values = values / values_length + kv = torch.einsum("NSHD, NSHV -> NHDV", keys, values) + z = 1 / (torch.einsum("NLHD, NHD -> NLH", queries, keys.sum(dim=1)) + self.eps) + # rescale at the end to account for fp16 mitigation + queried_values = torch.einsum("NLHD, NHDV, NLH -> NLHV", queries, kv, z) * values_length + return queried_values + + +class SoftmaxAttention(nn.Module): + """ + A simple softmax attention operation + LoFTR implementation reference: https://github.com/zju3dv/LoFTR/blob/2122156015b61fbb650e28b58a958e4d632b1058/src/loftr/loftr_module/linear_attention.py + """ + + def __init__(self, dropout: float = 0.0) -> None: + super().__init__() + self.dropout = nn.Dropout(dropout) if dropout else nn.Identity() + + def forward( + self, + queries: Tensor, + keys: Tensor, + values: Tensor, + q_mask: Optional[Tensor] = None, + kv_mask: Optional[Tensor] = None, + ): + """ + Computes classical softmax full-attention between all queries and keys. + + Args: + queries (torch.Tensor): [N, S1, H, D] + keys (torch.Tensor): [N, S2, H, D] + values (torch.Tensor): [N, S2, H, D] + q_mask (torch.Tensor): [N, S1] (optional) + kv_mask (torch.Tensor): [N, S2] (optional) + Returns: + queried_values: [N, S1, H, D] + """ + + scale_factor = 1.0 / queries.shape[3] ** 0.5 # irsqrt(D) scaling + queries = queries * scale_factor + + qk = torch.einsum("NLHD, NSHD -> NLSH", queries, keys) + if kv_mask is not None and q_mask is not None: + qk.masked_fill_(~(q_mask[:, :, None, None] * kv_mask[:, None, :, None]), float("-inf")) + + attention = torch.softmax(qk, dim=2) + attention = self.dropout(attention) + + queried_values = torch.einsum("NLSH, NSHD -> NLHD", attention, values) + return queried_values + + +class PositionalEncodingSine(nn.Module): + """ + Sinusoidal positonal encodings + + Using the scaling term from https://github.com/megvii-research/CREStereo/blob/master/nets/attention/position_encoding.py + Reference implementation from https://github.com/facebookresearch/detr/blob/8a144f83a287f4d3fece4acdf073f387c5af387d/models/position_encoding.py#L28-L48 + """ + + def __init__(self, dim_model: int) -> None: + super().__init__() + self.dim_model = dim_model + self.scale_factor = -math.log(10_000) / (dim_model // 2) + + def forward(self, x: Tensor) -> Tensor: + """ + Args: + x: [B, C, H, W] + """ + torch._assert( + len(x.shape) == 4, + f"PositionalEncodingSine requires a 4-D dimensional input. Provided tensor is of shape {x.shape}", + ) + + coords = torch.ones(size=x.shape[2:], dtype=x.dtype, device=x.device) + positions_y = coords.cumsum(0).unsqueeze(0).unsqueeze(-1) + positions_x = coords.cumsum(1).unsqueeze(0).unsqueeze(-1) + + div_term = torch.exp(torch.arange(0, self.dim_model // 2, dtype=x.dtype, device=x.device) * self.scale_factor) + positions_x = positions_x * div_term + positions_y = positions_y * div_term + + positions_x = torch.stack((positions_x[..., 0::2].sin(), positions_x[..., 1::2].cos()), dim=4).flatten(3) + positions_y = torch.stack((positions_y[..., 0::2].sin(), positions_y[..., 1::2].cos()), dim=4).flatten(3) + + positional_embeddings = torch.cat((positions_x, positions_y), dim=3).permute(0, 3, 1, 2) + return x + positional_embeddings + + +class LocalFeatureEncoderLayer(nn.Module): + """ + LoFTR transformer module from: https://arxiv.org/pdf/2104.00680.pdf + Cannonical implementations at: https://github.com/zju3dv/LoFTR/blob/master/src/loftr/loftr_module/transformer.py + """ + + def __init__( + self, + *, + dim_model: int, + num_heads: int, + attention_type: str = "linear", + ) -> None: + super().__init__() + + if attention_type not in ["linear", "softmax"]: + raise ValueError( + f"Unsuported attention type {attention_type}. LocalFeatureEncoderLayer supports one of ``[linear, softmax]``" + ) + + self.dim_head = dim_model // num_heads + self.num_heads = num_heads + + # multi-head attention + self.query_proj = nn.Linear(dim_model, dim_model, bias=False) + self.key_proj = nn.Linear(dim_model, dim_model, bias=False) + self.value_proj = nn.Linear(dim_model, dim_model, bias=False) + self.attention_op = LinearAttention() if attention_type == "linear" else SoftmaxAttention() + self.merge = nn.Linear(dim_model, dim_model, bias=False) + + # feed forward network + self.ffn = nn.Sequential( + nn.Linear(dim_model * 2, dim_model * 2, bias=False), + nn.ReLU(), + nn.Linear(dim_model * 2, dim_model, bias=False), + ) + + # norm layers + self.attention_norm = nn.LayerNorm(dim_model) + self.ffn_norm = nn.LayerNorm(dim_model) + + def forward(self, x: Tensor, source: Tensor, x_mask: Optional[Tensor] = None, source_mask: Optional[Tensor] = None): + """ + Args: + x (torch.Tensor): [B, S1, D] + source (torch.Tensor): [B, S2, D] + x_mask (torch.Tensor): [B, S1] (optional) + source_mask (torch.Tensor): [B, S2] (optional) + """ + B, S, D = x.shape + queries, keys, values = x, source, source + + queries = self.query_proj(queries).reshape(B, S, self.num_heads, self.dim_head) + keys = self.key_proj(keys).reshape(B, S, self.num_heads, self.dim_head) + values = self.value_proj(values).reshape(B, S, self.num_heads, self.dim_head) + + # attention operation + message = self.attention_op(queries, keys, values, x_mask, source_mask) + # concatenating attention heads together before passing throught projection layer + message = self.merge(message.reshape(B, S, D)) + message = self.attention_norm(message) + + # ffn operation + message = self.ffn(torch.cat([x, message], dim=2)) + message = self.attention_norm(message) + + return x + message + + +class LocalFeatureTransformer(nn.Module): + """ + LoFTR transformer module from: https://arxiv.org/pdf/2104.00680.pdf + Cannonical implementations at: https://github.com/zju3dv/LoFTR/blob/master/src/loftr/loftr_module/transformer.py + """ + + def __init__( + self, + *, + dim_model: int, + num_heads: int, + attention_directions: List[str], + attention_type: str = "linear", + ) -> None: + super(LocalFeatureTransformer, self).__init__() + + self.attention_directions = attention_directions + for direction in attention_directions: + if direction not in ["self", "cross"]: + raise ValueError( + f"Attention direction {direction} unsupported. LocalFeatureTransformer accepts only ``attention_type`` in ``[self, cross]``." + ) + + self.layers = nn.ModuleList( + [ + LocalFeatureEncoderLayer(dim_model=dim_model, num_heads=num_heads, attention_type=attention_type) + for _ in attention_directions + ] + ) + + def forward( + self, + left_features: Tensor, + right_features: Tensor, + left_mask: Optional[Tensor] = None, + right_mask: Optional[Tensor] = None, + ): + """ + Args: + left_features (torch.Tensor): [N, S1, D] + right_features (torch.Tensor): [N, S2, D] + left_mask (torch.Tensor): [N, S1] (optional) + right_mask (torch.Tensor): [N, S2] (optional) + Returns: + left_features (torch.Tensor): [N, S1, D] + right_features (torch.Tensor): [N, S2, D] + """ + + torch._assert( + left_features.shape[2] == right_features.shape[2], + f"left_features and right_features should have the same embedding dimensions. left_features: {left_features.shape[2]} right_features: {right_features.shape[2]}", + ) + + for idx, layer in enumerate(self.layers): + attention_direction = self.attention_directions[idx] + # for layer, attention_direction in zip(self.layers, self.attention_directions): + + if attention_direction == "self": + left_features = layer(left_features, left_features, left_mask, left_mask) + right_features = layer(right_features, right_features, right_mask, right_mask) + + elif attention_direction == "cross": + left_features = layer(left_features, right_features, left_mask, right_mask) + right_features = layer(right_features, left_features, right_mask, left_mask) + + return left_features, right_features + + +class PyramidDownsample(nn.Module): + """ + A simple wrapper that return and Avg Pool feature pyramid based on the provided scales. + Implicitly returns the input as well. + """ + + def __init__(self, factors: Iterable[int]) -> None: + super().__init__() + self.factors = factors + + def forward(self, x: torch.Tensor) -> List[Tensor]: + results = [x] + for factor in self.factors: + results.append(F.avg_pool2d(x, kernel_size=factor, stride=factor)) + return results + + +class CREStereo(nn.Module): + """ + CREStereo network from: https://openaccess.thecvf.com/content/CVPR2022/papers/Li_Practical_Stereo_Matching_via_Cascaded_Recurrent_Network_With_Adaptive_Correlation_CVPR_2022_paper.pdf + + Canonical implementation: https://github.com/megvii-research/CREStereo/blob/master/nets/crestereo.py + """ + + def __init__( + self, + *, + feature_encoder: FeatureEncoder, + update_block: raft.UpdateBlock, + flow_head: raft.FlowHead, + self_attn_block: LocalFeatureTransformer, + cross_attn_block: LocalFeatureTransformer, + feature_downsample_rates: Tuple[int, ...] = (2, 4), + correlation_groups: int = 4, + search_window_1d: Tuple[int, int] = (1, 9), + search_dilate_1d: Tuple[int, int] = (1, 1), + search_window_2d: Tuple[int, int] = (3, 3), + search_dilate_2d: Tuple[int, int] = (1, 1), + ) -> None: + super().__init__() + + self.feature_encoder = feature_encoder + self.update_block = update_block + self.flow_head = flow_head + self.self_attn_block = self_attn_block + + # average pooling for the feature encoder outputs + self.downsampling_pyramid = PyramidDownsample(feature_downsample_rates) + self.downsampling_factors: List[int] = [feature_encoder.downsample_factor] + base_downsample_factor: int = self.downsampling_factors[0] + for rate in feature_downsample_rates: + self.downsampling_factors.append(base_downsample_factor * rate) + + # output resolution tracking + self.resolutions: List[str] = [f"1 / {factor}" for factor in self.downsampling_factors] + self.search_pixels = int(np.prod(search_window_1d)) + + # flow convex upsampling mask predictor + self.mask_predictor = ConvexMaskPredictor( + in_channels=feature_encoder.output_dim // 2, + hidden_size=feature_encoder.output_dim, + upsample_factor=4, + multiplier=0.25, + ) + + # offsets modules for offseted feature selection + self.offset_convs = nn.ModuleDict() + self.correlation_layers = nn.ModuleDict() + + offset_conv_layer = partial( + Conv2dNormActivation, + in_channels=feature_encoder.output_dim, + out_channels=self.search_pixels * 2, + norm_layer=None, + activation_layer=None, + ) + + correlation_layer = partial( + AdaptiveGroupCorrelationLayer, + groups=correlation_groups, + search_window_1d=search_window_1d, + search_dilate_1d=search_dilate_1d, + search_window_2d=search_window_2d, + search_dilate_2d=search_dilate_2d, + ) + + # populate the dicts in top to bottom order + # useful for iterating through torch.jit.script module given the network forward pass + # + # Ignore the largest resolution. We handle that separately due to torch.jit.script + # not being to able access to runtime generated keys in ModuleDicts. + # This way, we can keep a generic way of processing all pyramid levels but except + # the final one + + for idx, resolution in enumerate(reversed(self.resolutions[1:])): + # the largest resolution does use offset convolutions for sampling grid coords + offset_conv = None if idx == len(self.resolutions) - 1 else offset_conv_layer() + if offset_conv: + self.offset_convs[resolution] = offset_conv + # only the lowest resolution uses the cross attention module when computing correlation scores + self.correlation_layers[resolution] = ( + correlation_layer(attention_module=cross_attn_block) if idx == 0 else correlation_layer() + ) + + # correlation layer for the largest resolution + self.max_res_correlation_layer = correlation_layer() + + # simple 2D Postional Encodings + self.positional_encodings = PositionalEncodingSine(feature_encoder.output_dim) + + def freeze_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + + def unfreeze_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.train() + + def forward(self, left_image: Tensor, right_image: Tensor, flow_init: Optional[Tensor], iterations: int = 10): + features = torch.cat([left_image, right_image], dim=0) + features = self.feature_encoder(features) + left_features, right_features = features.chunk(2, dim=0) + + # update block network state and input context are derived from the left feature map + net, ctx = left_features.chunk(2, dim=1) + net = torch.tanh(net) + ctx = torch.relu(ctx) + + # will output lists of tensor. + l_pyramid = self.downsampling_pyramid(left_features) + r_pyramid = self.downsampling_pyramid(right_features) + net_pyramid = self.downsampling_pyramid(net) + ctx_pyramid = self.downsampling_pyramid(ctx) + + # we store in reversed order because we process the pyramid from top to bottom + l_pyramid: Dict[str, Tensor] = {res: l_pyramid[idx] for idx, res in enumerate(self.resolutions)} + r_pyramid: Dict[str, Tensor] = {res: r_pyramid[idx] for idx, res in enumerate(self.resolutions)} + net_pyramid: Dict[str, Tensor] = {res: net_pyramid[idx] for idx, res in enumerate(self.resolutions)} + ctx_pyramid: Dict[str, Tensor] = {res: ctx_pyramid[idx] for idx, res in enumerate(self.resolutions)} + + # offsets for sampling pixel candidates in the correlation ops + offsets: Dict[str, Tensor] = {} + for resolution, offset_conv in self.offset_convs.items(): + feature_map = l_pyramid[resolution] + offset = offset_conv(feature_map) + offsets[resolution] = (torch.sigmoid(offset) - 0.5) * 2.0 + + # the smallest resolution is prepared for passing through self attention + min_res = self.resolutions[-1] + max_res = self.resolutions[0] + + B, C, MIN_H, MIN_W = l_pyramid[min_res].shape + # add positional encodings + l_pyramid[min_res] = self.positional_encodings(l_pyramid[min_res]) + r_pyramid[min_res] = self.positional_encodings(r_pyramid[min_res]) + # reshaping for transformer + l_pyramid[min_res] = l_pyramid[min_res].permute(0, 2, 3, 1).reshape(B, MIN_H * MIN_W, C) + r_pyramid[min_res] = r_pyramid[min_res].permute(0, 2, 3, 1).reshape(B, MIN_H * MIN_W, C) + # perform self attention + l_pyramid[min_res], r_pyramid[min_res] = self.self_attn_block(l_pyramid[min_res], r_pyramid[min_res]) + # now we need to reshape back into [B, C, H, W] format + l_pyramid[min_res] = l_pyramid[min_res].reshape(B, MIN_H, MIN_W, C).permute(0, 3, 1, 2) + r_pyramid[min_res] = r_pyramid[min_res].reshape(B, MIN_H, MIN_W, C).permute(0, 3, 1, 2) + + predictions: List[Tensor] = [] + flow_estimates: Dict[str, Tensor] = {} + # we added this because of torch.script.jit + # also, the predicition prior is always going to have the + # spatial size of the features outputed by the feature encoder + flow_pred_prior: Tensor = torch.empty( + size=(B, 2, left_features.shape[2], left_features.shape[3]), + dtype=l_pyramid[max_res].dtype, + device=l_pyramid[max_res].device, + ) + + if flow_init is not None: + scale = l_pyramid[max_res].shape[2] // flow_init.shape[2] + # in CREStereo implementation they multiply with -scale instead of scale + # upsample_flow multiples with scale, therefor we add the - in front + flow_estimates[max_res] = -upsample_flow(flow_init, up_mask=None, factor=scale) + # when not provided with a flow prior, we construct one using the lower resolution maps + else: + # initialize a zero flow with the smallest resolution + flow = torch.zeros(size=(B, 2, MIN_H, MIN_W), device=left_features.device, dtype=left_features.dtype) + + # flows from coarse resolutions are refined similarly + # we always need to fetch the next pyramid feature map as well + # when updating coarse resolutions, therefore we create a reversed + # view which has its order synced with the ModuleDict keys iterator + coarse_resolutions: List[str] = self.resolutions[::-1] # using slicing because of torch.jit.script + fine_grained_resolution = max_res + + # set the coarsest flow to the zero flow + flow_estimates[coarse_resolutions[0]] = flow + + # the correlation layer ModuleDict will contain layers ordered from coarse to fine resolution + # i.e ["1 / 16", "1 / 8", "1 / 4"] + # the correlation layer ModuleDict has layers for all the resolutions except the fine one + # i.e {"1 / 16": Module, "1 / 8": Module} + # for these resolution we perform only half of the number of refinement iterations + for idx, (resolution, correlation_layer) in enumerate(self.correlation_layers.items()): + # compute the scale difference between the first pyramid scale and the current pyramid scale + scale_to_base = l_pyramid[fine_grained_resolution].shape[2] // l_pyramid[resolution].shape[2] + for it in range(iterations // 2): + # set wether or not we want to search on (X, Y) axes for correlation or just on X axis + use_small_search_patch = (it % 2) == 1 + # we consider this a prior, therefor we do not want to back-propagate through it + flow_estimates[resolution] = flow_estimates[resolution].detach() + + # corr_fn = self.get_module_from_module_dict(self.correlation_functions, resolution) + correlations = correlation_layer( + l_pyramid[resolution], # left + r_pyramid[resolution], # right + flow_estimates[resolution], + offsets[resolution], + use_small_search_patch, + ) + + # update the recurrent network state and the flow deltas + net_pyramid[resolution], delta_flow = self.update_block( + net_pyramid[resolution], ctx_pyramid[resolution], correlations, flow_estimates[resolution] + ) + + # the convex upsampling weights are computed w.r.t. + # the recurrent update state + up_mask = self.mask_predictor(net_pyramid[resolution]) + flow_estimates[resolution] = flow_estimates[resolution] + delta_flow + # convex upsampling with the initial feature encoder downsampling rate + flow_pred_prior = upsample_flow( + flow_estimates[resolution], up_mask, factor=self.downsampling_factors[0] + ) + # we then bilinear upsample to the final resolution + # we use a factor that's equivalent to the difference between + # the current downsample resolution and the base downsample resolution + # + # i.e. if a 1 / 16 flow is upsampled by 4 (base downsampling) we get a 1 / 4 flow. + # therefore we have to further upscale it by the difference between + # the current level 1 / 16 and the base level 1 / 4. + flow_pred = -upsample_flow(flow_pred_prior, None, factor=scale_to_base) + predictions.append(flow_pred) + + # when constructing the next resolution prior, we resample w.r.t + # to the scale of the next level in the pyramid + next_resolution = coarse_resolutions[idx + 1] + scale_to_next = l_pyramid[next_resolution].shape[2] / flow_pred_prior.shape[2] + # we use the flow_up_prior because this is a more accurate estimation of the true flow + # due to the convex upsample, which resembles a learned super-resolution module. + # this is not necessarily an upsample, it can be a downsample, based on the provided configuration + flow_estimates[next_resolution] = -scale_to_next * F.interpolate( + input=flow_pred_prior, + size=l_pyramid[next_resolution].shape[2:], + mode="bilinear", + align_corners=True, + ) + + # finally we will be doing a full pass through the fine-grained resolution + # this coincides with the maximum resolution + + # we keep a separate loop here in order to avoid python control flow + # to decide how much iterations should we do based on the current resolution + # further more, if provided with an inital flow, there is no need to generate + # a prior estimate when moving into the final refinement stage + + for it in range(iterations): + use_small_search_patch = (it % 2) == 1 + + flow_estimates[max_res] = flow_estimates[max_res].detach() + # we run the fine-grained resolution correlations in iterative mode + # this means that we are using the fixed window pixel selections + # instead of the deformed ones as with the previous steps + correlations = self.max_res_correlation_layer( + l_pyramid[max_res], + r_pyramid[max_res], + flow_estimates[max_res], + extra_offset=None, + use_small_patch=use_small_search_patch, + iter_mode=True, + ) + + net_pyramid[max_res], delta_flow = self.update_block( + net_pyramid[max_res], ctx_pyramid[max_res], correlations, flow_estimates[max_res] + ) + + up_mask = self.mask_predictor(net_pyramid[max_res]) + flow_estimates[max_res] = flow_estimates[max_res] + delta_flow + # at the final resolution we simply do a convex upsample using the base downsample rate + flow_pred = -upsample_flow(flow_estimates[max_res], up_mask, factor=self.downsampling_factors[0]) + predictions.append(flow_pred) + + return predictions + + +def _crestereo( + *, + weights: Optional[WeightsEnum], + progress: bool, + # Feature Encoder + feature_encoder_layers: Tuple[int, int, int, int, int], + feature_encoder_strides: Tuple[int, int, int, int], + feature_encoder_block: Callable[..., nn.Module], + # Average Pooling Pyramid + feature_downsample_rates: Tuple[int, ...], + # Adaptive Correlation Layer + corr_groups: int, + corr_search_window_2d: Tuple[int, int], + corr_search_dilate_2d: Tuple[int, int], + corr_search_window_1d: Tuple[int, int], + corr_search_dilate_1d: Tuple[int, int], + # Flow head + flow_head_hidden_size: int, + # Recurrent block + recurrent_block_hidden_state_size: int, + recurrent_block_kernel_size: Tuple[Tuple[int, int], Tuple[int, int]], + recurrent_block_padding: Tuple[Tuple[int, int], Tuple[int, int]], + # Motion Encoder + motion_encoder_corr_layers: Tuple[int, int], + motion_encoder_flow_layers: Tuple[int, int], + motion_encoder_out_channels: int, + # Transformer Blocks + num_attention_heads: int, + num_self_attention_layers: int, + num_cross_attention_layers: int, + self_attention_type: str, + cross_attention_type: str, + **kwargs, +) -> CREStereo: + + feature_encoder = kwargs.pop("feature_encoder", None) or FeatureEncoder( + block=feature_encoder_block, + layers=feature_encoder_layers, + strides=feature_encoder_strides, + norm_layer=nn.InstanceNorm2d, + ) + + assert feature_encoder.output_dim % corr_groups == 0, ( + f"Final ``feature_encoder_layers`` size should be divisible by ``corr_groups`` argument." + f"Feature encoder output size : {feature_encoder.output_dim}, Correlation groups: {corr_groups}." + ) + + motion_encoder = kwargs.pop("motion_encoder", None) or raft.MotionEncoder( + in_channels_corr=corr_groups * int(np.prod(corr_search_window_1d)), + corr_layers=motion_encoder_corr_layers, + flow_layers=motion_encoder_flow_layers, + out_channels=motion_encoder_out_channels, + ) + + out_channels_context = feature_encoder_layers[-1] - recurrent_block_hidden_state_size + recurrent_block = kwargs.pop("recurrent_block", None) or raft.RecurrentBlock( + input_size=motion_encoder.out_channels + out_channels_context, + hidden_size=recurrent_block_hidden_state_size, + kernel_size=recurrent_block_kernel_size, + padding=recurrent_block_padding, + ) + + flow_head = kwargs.pop("flow_head", None) or raft.FlowHead( + in_channels=out_channels_context, hidden_size=flow_head_hidden_size + ) + + update_block = raft.UpdateBlock(motion_encoder=motion_encoder, recurrent_block=recurrent_block, flow_head=flow_head) + + self_attn_block = LocalFeatureTransformer( + dim_model=feature_encoder.output_dim, + num_heads=num_attention_heads, + attention_directions=["self"] * num_self_attention_layers, + attention_type=self_attention_type, + ) + + cross_attn_block = LocalFeatureTransformer( + dim_model=feature_encoder.output_dim, + num_heads=num_attention_heads, + attention_directions=["cross"] * num_cross_attention_layers, + attention_type=cross_attention_type, + ) + + model = CREStereo( + feature_encoder=feature_encoder, + update_block=update_block, + flow_head=flow_head, + self_attn_block=self_attn_block, + cross_attn_block=cross_attn_block, + feature_downsample_rates=feature_downsample_rates, + correlation_groups=corr_groups, + search_window_1d=corr_search_window_1d, + search_window_2d=corr_search_window_2d, + search_dilate_1d=corr_search_dilate_1d, + search_dilate_2d=corr_search_dilate_2d, + ) + + if weights is not None: + model.load_state_dict(weights.get_state_dict(progress=progress)) + + return model + + +def crestereo_base(*, weights: Optional[WeightsEnum] = None, progress=True, **kwargs) -> CREStereo: + return _crestereo( + weights=weights, + progress=progress, + # Feature encoder + feature_encoder_layers=(64, 64, 96, 128, 256), + feature_encoder_strides=(2, 1, 2, 1), + feature_encoder_block=ResidualBlock, + # Average pooling pyramid + feature_downsample_rates=(2, 4), + # Motion encoder + motion_encoder_corr_layers=(256, 192), + motion_encoder_flow_layers=(128, 64), + motion_encoder_out_channels=256, + # Recurrent block + recurrent_block_hidden_state_size=128, + recurrent_block_kernel_size=((1, 5), (5, 1)), + recurrent_block_padding=((0, 2), (2, 0)), + # Flow head + flow_head_hidden_size=256, + # Transformer blocks + num_attention_heads=8, + num_self_attention_layers=1, + num_cross_attention_layers=1, + self_attention_type="linear", + cross_attention_type="linear", + # Adaptive Correlation layer + corr_groups=4, + corr_search_window_2d=(3, 3), + corr_search_dilate_2d=(1, 1), + corr_search_window_1d=(1, 9), + corr_search_dilate_1d=(1, 1), + )