From 710bd071208a35b0dff74bfc427c64a24cf19812 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 19 May 2022 10:16:57 +0100 Subject: [PATCH 1/5] Deactivate CelebA download --- torchvision/datasets/celeba.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/celeba.py b/torchvision/datasets/celeba.py index e9dd883b92e..241515e7c43 100644 --- a/torchvision/datasets/celeba.py +++ b/torchvision/datasets/celeba.py @@ -38,6 +38,12 @@ class CelebA(VisionDataset): download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. + + .. warning:: + + Downloading CelebA is not supported anymore as of 0.13. See + `this issue `__ + for more details. """ base_folder = "celeba" @@ -146,10 +152,15 @@ def download(self) -> None: print("Files already downloaded and verified") return - for (file_id, md5, filename) in self.file_list: - download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5) + raise ValueError( + "Downloading CelebA is not supported anymore as of 0.13. See " + "https://github.com/pytorch/vision/issues/5705 for more details." + ) + # TODO maybe uncomment lines below to put back download if it ever gets fixed. + # for (file_id, md5, filename) in self.file_list: + # download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5) - extract_archive(os.path.join(self.root, self.base_folder, "img_align_celeba.zip")) + # extract_archive(os.path.join(self.root, self.base_folder, "img_align_celeba.zip")) def __getitem__(self, index: int) -> Tuple[Any, Any]: X = PIL.Image.open(os.path.join(self.root, self.base_folder, "img_align_celeba", self.filename[index])) From ce6dcfd0d694be23bdd2b46c6ce98073e4a98b66 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 19 May 2022 10:20:12 +0100 Subject: [PATCH 2/5] flake8 --- torchvision/datasets/celeba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/datasets/celeba.py b/torchvision/datasets/celeba.py index 241515e7c43..e7a286f0b0a 100644 --- a/torchvision/datasets/celeba.py +++ b/torchvision/datasets/celeba.py @@ -6,7 +6,7 @@ import PIL import torch -from .utils import download_file_from_google_drive, check_integrity, verify_str_arg, extract_archive +from .utils import check_integrity, verify_str_arg from .vision import VisionDataset CSV = namedtuple("CSV", ["header", "index", "data"]) From 048cc3d2c26017b94836368cc59d7741df8ebfe8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 19 May 2022 14:10:29 +0100 Subject: [PATCH 3/5] Do proto version --- .../prototype/datasets/_builtin/celeba.py | 39 ++++++++++++------- .../prototype/datasets/utils/_resource.py | 6 +-- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/torchvision/prototype/datasets/_builtin/celeba.py b/torchvision/prototype/datasets/_builtin/celeba.py index 46ccf8de6f7..00a345a7b75 100644 --- a/torchvision/prototype/datasets/_builtin/celeba.py +++ b/torchvision/prototype/datasets/_builtin/celeba.py @@ -11,7 +11,7 @@ ) from torchvision.prototype.datasets.utils import ( Dataset, - GDriveResource, + ManualDownloadResource, OnlineResource, ) from torchvision.prototype.datasets.utils._internal import ( @@ -85,33 +85,46 @@ def __init__( super().__init__(root, skip_integrity_check=skip_integrity_check) def _resources(self) -> List[OnlineResource]: - splits = GDriveResource( - "0B7EVK8r0v71pY0NSMzRuSXJEVkk", + instructions = "Please download the file from https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html." + # splits = GDriveResource( + # "0B7EVK8r0v71pY0NSMzRuSXJEVkk", + splits = ManualDownloadResource( + instructions=instructions, sha256="fc955bcb3ef8fbdf7d5640d9a8693a8431b5f2ee291a5c1449a1549e7e073fe7", file_name="list_eval_partition.txt", ) - images = GDriveResource( - "0B7EVK8r0v71pZjFTYXZWM3FlRnM", + # images = GDriveResource( + # "0B7EVK8r0v71pZjFTYXZWM3FlRnM", + images = ManualDownloadResource( + instructions=instructions, sha256="46fb89443c578308acf364d7d379fe1b9efb793042c0af734b6112e4fd3a8c74", file_name="img_align_celeba.zip", ) - identities = GDriveResource( - "1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS", + # identities = GDriveResource( + # "1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS", + identities = ManualDownloadResource( + instructions=instructions, sha256="c6143857c3e2630ac2da9f782e9c1232e5e59be993a9d44e8a7916c78a6158c0", file_name="identity_CelebA.txt", ) - attributes = GDriveResource( - "0B7EVK8r0v71pblRyaVFSWGxPY0U", + # attributes = GDriveResource( + # "0B7EVK8r0v71pblRyaVFSWGxPY0U", + attributes = ManualDownloadResource( + instructions=instructions, sha256="f0e5da289d5ccf75ffe8811132694922b60f2af59256ed362afa03fefba324d0", file_name="list_attr_celeba.txt", ) - bounding_boxes = GDriveResource( - "0B7EVK8r0v71pbThiMVRxWXZ4dU0", + # bounding_boxes = GDriveResource( + # "0B7EVK8r0v71pbThiMVRxWXZ4dU0", + bounding_boxes = ManualDownloadResource( + instructions=instructions, sha256="7487a82e57c4bb956c5445ae2df4a91ffa717e903c5fa22874ede0820c8ec41b", file_name="list_bbox_celeba.txt", ) - landmarks = GDriveResource( - "0B7EVK8r0v71pd0FJY3Blby1HUTQ", + # landmarks = GDriveResource( + # "0B7EVK8r0v71pd0FJY3Blby1HUTQ", + landmarks = ManualDownloadResource( + instructions=instructions, sha256="6c02a87569907f6db2ba99019085697596730e8129f67a3d61659f198c48d43b", file_name="list_landmarks_align_celeba.txt", ) diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py index 3c9b95cb498..380ed3e97f0 100644 --- a/torchvision/prototype/datasets/utils/_resource.py +++ b/torchvision/prototype/datasets/utils/_resource.py @@ -216,9 +216,9 @@ def __init__(self, instructions: str, **kwargs: Any) -> None: def _download(self, root: pathlib.Path) -> NoReturn: raise RuntimeError( - f"The file {self.file_name} cannot be downloaded automatically. " - f"Please follow the instructions below and place it in {root}\n\n" - f"{self.instructions}" + f"The file {self.file_name} was not found, and cannot be downloaded automatically. " + f"{self.instructions} " + f"Once it is downloaded, please place the file in {root}\n\n" ) From b007eded8d46da403cdd75220dc45f4a696eece9 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 20 May 2022 10:00:35 +0100 Subject: [PATCH 4/5] Update torchvision/prototype/datasets/utils/_resource.py Co-authored-by: Philip Meier --- torchvision/prototype/datasets/utils/_resource.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py index 380ed3e97f0..9222c6e30a0 100644 --- a/torchvision/prototype/datasets/utils/_resource.py +++ b/torchvision/prototype/datasets/utils/_resource.py @@ -216,9 +216,9 @@ def __init__(self, instructions: str, **kwargs: Any) -> None: def _download(self, root: pathlib.Path) -> NoReturn: raise RuntimeError( - f"The file {self.file_name} was not found, and cannot be downloaded automatically. " - f"{self.instructions} " - f"Once it is downloaded, please place the file in {root}\n\n" + f"The file {self.file_name} was not found, and cannot be downloaded automatically.\n\n" + f"{self.instructions.strip()}\n\n" + f"Once it is downloaded, please place the file in {root}." ) From b5193a874fb0feb6ed12465e744df461b340d737 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 20 May 2022 10:00:45 +0100 Subject: [PATCH 5/5] address review --- torchvision/datasets/celeba.py | 17 ++++++++--------- .../prototype/datasets/_builtin/celeba.py | 12 ------------ 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/torchvision/datasets/celeba.py b/torchvision/datasets/celeba.py index e7a286f0b0a..4cfbeca2890 100644 --- a/torchvision/datasets/celeba.py +++ b/torchvision/datasets/celeba.py @@ -35,15 +35,16 @@ class CelebA(VisionDataset): and returns a transformed version. E.g, ``transforms.PILToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. - download (bool, optional): If true, downloads the dataset from the internet and - puts it in root directory. If dataset is already downloaded, it is not - downloaded again. + download (bool, optional): Unsupported. .. warning:: Downloading CelebA is not supported anymore as of 0.13. See `this issue `__ for more details. + Please download the files from + https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html and extract + them in ``root/celeba``. """ base_folder = "celeba" @@ -154,13 +155,11 @@ def download(self) -> None: raise ValueError( "Downloading CelebA is not supported anymore as of 0.13. See " - "https://github.com/pytorch/vision/issues/5705 for more details." + "https://github.com/pytorch/vision/issues/5705 for more details. " + "Please download the files from " + "https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html and extract them " + "in ``root/celeba``." ) - # TODO maybe uncomment lines below to put back download if it ever gets fixed. - # for (file_id, md5, filename) in self.file_list: - # download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5) - - # extract_archive(os.path.join(self.root, self.base_folder, "img_align_celeba.zip")) def __getitem__(self, index: int) -> Tuple[Any, Any]: X = PIL.Image.open(os.path.join(self.root, self.base_folder, "img_align_celeba", self.filename[index])) diff --git a/torchvision/prototype/datasets/_builtin/celeba.py b/torchvision/prototype/datasets/_builtin/celeba.py index 00a345a7b75..db90aa057ff 100644 --- a/torchvision/prototype/datasets/_builtin/celeba.py +++ b/torchvision/prototype/datasets/_builtin/celeba.py @@ -86,43 +86,31 @@ def __init__( def _resources(self) -> List[OnlineResource]: instructions = "Please download the file from https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html." - # splits = GDriveResource( - # "0B7EVK8r0v71pY0NSMzRuSXJEVkk", splits = ManualDownloadResource( instructions=instructions, sha256="fc955bcb3ef8fbdf7d5640d9a8693a8431b5f2ee291a5c1449a1549e7e073fe7", file_name="list_eval_partition.txt", ) - # images = GDriveResource( - # "0B7EVK8r0v71pZjFTYXZWM3FlRnM", images = ManualDownloadResource( instructions=instructions, sha256="46fb89443c578308acf364d7d379fe1b9efb793042c0af734b6112e4fd3a8c74", file_name="img_align_celeba.zip", ) - # identities = GDriveResource( - # "1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS", identities = ManualDownloadResource( instructions=instructions, sha256="c6143857c3e2630ac2da9f782e9c1232e5e59be993a9d44e8a7916c78a6158c0", file_name="identity_CelebA.txt", ) - # attributes = GDriveResource( - # "0B7EVK8r0v71pblRyaVFSWGxPY0U", attributes = ManualDownloadResource( instructions=instructions, sha256="f0e5da289d5ccf75ffe8811132694922b60f2af59256ed362afa03fefba324d0", file_name="list_attr_celeba.txt", ) - # bounding_boxes = GDriveResource( - # "0B7EVK8r0v71pbThiMVRxWXZ4dU0", bounding_boxes = ManualDownloadResource( instructions=instructions, sha256="7487a82e57c4bb956c5445ae2df4a91ffa717e903c5fa22874ede0820c8ec41b", file_name="list_bbox_celeba.txt", ) - # landmarks = GDriveResource( - # "0B7EVK8r0v71pd0FJY3Blby1HUTQ", landmarks = ManualDownloadResource( instructions=instructions, sha256="6c02a87569907f6db2ba99019085697596730e8129f67a3d61659f198c48d43b",