Skip to content

Commit

Permalink
Merge pull request #1915 from pre-commit/reproducible-tar
Browse files Browse the repository at this point in the history
make tarfile creation reproducible
  • Loading branch information
asottile committed May 18, 2021
2 parents 7266936 + c2108d6 commit 9f2f405
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 10 deletions.
Binary file modified pre_commit/resources/rbenv.tar.gz
Binary file not shown.
Binary file modified pre_commit/resources/ruby-build.tar.gz
Binary file not shown.
Binary file modified pre_commit/resources/ruby-download.tar.gz
Binary file not shown.
36 changes: 26 additions & 10 deletions testing/make-archives
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
import gzip
import os.path
import shutil
import subprocess
Expand All @@ -24,15 +25,14 @@ REPOS = (
)


def reset(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
tarinfo.uid = tarinfo.gid = 0
tarinfo.uname = tarinfo.gname = 'root'
tarinfo.mtime = 0
return tarinfo


def make_archive(name: str, repo: str, ref: str, destdir: str) -> str:
"""Makes an archive of a repository in the given destdir.
:param text name: Name to give the archive. For instance foo. The file
that is created will be called foo.tar.gz.
:param text repo: Repository to clone.
:param text ref: Tag/SHA/branch to check out.
:param text destdir: Directory to place archives in.
"""
output_path = os.path.join(destdir, f'{name}.tar.gz')
with tempfile.TemporaryDirectory() as tmpdir:
# this ensures that the root directory has umask permissions
Expand All @@ -47,8 +47,24 @@ def make_archive(name: str, repo: str, ref: str, destdir: str) -> str:
# runtime
shutil.rmtree(os.path.join(gitdir, '.git'))

with tarfile.open(output_path, 'w|gz') as tf:
tf.add(gitdir, name)
arcs = [(name, gitdir)]
for root, dirs, filenames in os.walk(gitdir):
for filename in dirs + filenames:
abspath = os.path.abspath(os.path.join(root, filename))
relpath = os.path.relpath(abspath, gitdir)
arcs.append((os.path.join(name, relpath), abspath))
arcs.sort()

with gzip.GzipFile(output_path, 'wb', mtime=0) as gzipf:
# https://github.com/python/typeshed/issues/5491
with tarfile.open(fileobj=gzipf, mode='w') as tf: # type: ignore
for arcname, abspath in arcs:
tf.add(
abspath,
arcname=arcname,
recursive=False,
filter=reset,
)

return output_path

Expand Down

0 comments on commit 9f2f405

Please sign in to comment.