From 41c74c1bec6691f59637fa718741af7fe55411ee Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 1 Apr 2022 08:16:00 +0200 Subject: [PATCH 1/2] indicate md5 checksum is not used for security --- torchvision/datasets/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py index dbc9cf2a6b4..3716fea1587 100644 --- a/torchvision/datasets/utils.py +++ b/torchvision/datasets/utils.py @@ -7,6 +7,7 @@ import os.path import pathlib import re +import sys import tarfile import urllib import urllib.error @@ -52,7 +53,7 @@ def bar_update(count, block_size, total_size): def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str: - md5 = hashlib.md5() + md5 = hashlib.md5(**dict(usedforsecurity=False) if sys.version_info >= (3, 9) else dict()) with open(fpath, "rb") as f: for chunk in iter(lambda: f.read(chunk_size), b""): md5.update(chunk) From da0a427f63be094308d9160317540783049b66b0 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 4 Apr 2022 09:24:52 +0200 Subject: [PATCH 2/2] add explanation --- torchvision/datasets/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py index 3716fea1587..35ea12dfea8 100644 --- a/torchvision/datasets/utils.py +++ b/torchvision/datasets/utils.py @@ -53,6 +53,9 @@ def bar_update(count, block_size, total_size): def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str: + # Setting the `usedforsecurity` flag does not change anything about the functionality, but indicates that we are + # not using the MD5 checksum for cryptography. This enables its usage in restricted environments like FIPS. Without + # it torchvision.datasets is unusable in these environments since we perform a MD5 check everywhere. md5 = hashlib.md5(**dict(usedforsecurity=False) if sys.version_info >= (3, 9) else dict()) with open(fpath, "rb") as f: for chunk in iter(lambda: f.read(chunk_size), b""):