From 71abb5cccb37b3d5b81c6e9ed161c4e840aad80c Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 23 May 2020 12:24:35 -0700 Subject: [PATCH] bpo-38632: respect SOURCE_DATE_EPOCH when building .tar sdists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This currently only affects .tar as compression may also add some variability, Gzip for example adds current timestamp. With this I am able without further modification to do reproducible build bytes of bytes of IPython with no source code changes in IPython itself. Adding reproducibility to other format will need to be on a per-format basis, which currently is tough as distutils seem to shell out to do the compression. I can do some refactor and do in process tar and compressing – which should be faster/more robust, but will be another pull requests. --- Lib/distutils/archive_util.py | 17 ++++++++++++++++- Lib/distutils/tests/test_archive_util.py | 18 ++++++++++++++++++ .../2020-05-23-12-05-15.bpo-38632.nFTEqW.rst | 5 +++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-23-12-05-15.bpo-38632.nFTEqW.rst diff --git a/Lib/distutils/archive_util.py b/Lib/distutils/archive_util.py index 565a3117b4b5e1..478f8ed1671895 100644 --- a/Lib/distutils/archive_util.py +++ b/Lib/distutils/archive_util.py @@ -103,10 +103,25 @@ def _set_uid_gid(tarinfo): tarinfo.uname = owner return tarinfo + _filter = _set_uid_gid + + # SOURCE_DATE EPOCH is defined there + # https://reproducible-builds.org/specs/source-date-epoch/ + # we are at least sure that when it is set no timestamp can be later than + # this. + if (sde:= os.environ.get('SOURCE_DATE_EPOCH')): + timestamp = int(sde) + + def _respect_SOURCE_DATE_EPOCH(tarinfo): + tarinfo.mtime = min(tarinfo.mtime, timestamp) + return tarinfo + + _filter = lambda x: _respect_SOURCE_DATE_EPOCH(_set_uid_gid(x)) + if not dry_run: tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) try: - tar.add(base_dir, filter=_set_uid_gid) + tar.add(base_dir, filter=_filter) finally: tar.close() diff --git a/Lib/distutils/tests/test_archive_util.py b/Lib/distutils/tests/test_archive_util.py index e9aad0e40fd14c..fbde589f5e7bfb 100644 --- a/Lib/distutils/tests/test_archive_util.py +++ b/Lib/distutils/tests/test_archive_util.py @@ -306,6 +306,24 @@ def test_make_archive_tar(self): self.assertEqual(os.path.basename(res), 'archive.tar') self.assertEqual(self._tarinfo(res), self._created_files) + def test_make_archive_tar_source_date_epoch(self): + ORIGINAL_SDE = os.environ.get('SOURCE_DATE_EPOCH') + try: + os.environ['SOURCE_DATE_EPOCH'] = '1337' + base_dir = self._create_files() + base_name = os.path.join(self.mkdtemp() , 'archive') + res = make_archive(base_name, 'tar', base_dir, 'dist') + + archive = tarfile.open(res,mode='r') + for item in archive: + self.assertLessEqual(item.mtime, 1337) + finally: + archive.close() + if ORIGINAL_SDE is None: + del os.environ['SOURCE_DATE_EPOCH'] + else: + os.environ['SOURCE_DATE_EPOCH'] = ORIGINAL_SDE + @unittest.skipUnless(ZLIB_SUPPORT, 'Need zlib support to run') def test_make_archive_gztar(self): base_dir = self._create_files() diff --git a/Misc/NEWS.d/next/Library/2020-05-23-12-05-15.bpo-38632.nFTEqW.rst b/Misc/NEWS.d/next/Library/2020-05-23-12-05-15.bpo-38632.nFTEqW.rst new file mode 100644 index 00000000000000..828f7640e675af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-23-12-05-15.bpo-38632.nFTEqW.rst @@ -0,0 +1,5 @@ +Partial support for ``SOURCE_DATE_EPOCH`` environment variable for sdists +has been added in distutils. When the ``SOURCE_DATE_EPOCH`` environment +variable is set, the ``mtime`` of the files in an sdist tar archive will not +be later than ``SOURCE_DATE_EPOCH``. This is a firs step to simplify getting +byte identical reproducibility of source dists.