Skip to content

Commit

Permalink
bpo-34022: Stop forcing of hash-based invalidation with SOURCE_DATE_E…
Browse files Browse the repository at this point in the history
…POCH (pythonGH-9607)

Unconditional forcing of ``CHECKED_HASH`` invalidation was introduced in
3.7.0 in bpo-29708.  The change is bad, as it unconditionally overrides
*invalidation_mode*, even if it was passed as an explicit argument to
``py_compile.compile()`` or ``compileall``.  An environment variable
should *never* override an explicit argument to a library function.
That change leads to multiple test failures if the ``SOURCE_DATE_EPOCH``
environment variable is set.

This changes ``py_compile.compile()`` to only look at
``SOURCE_DATE_EPOCH`` if no explicit *invalidation_mode* was specified.
I also made various relevant tests run with explicit control over the
value of ``SOURCE_DATE_EPOCH``.

While looking at this, I noticed that ``zipimport`` does not work
with hash-based .pycs _at all_, though I left the fixes for
subsequent commits.
(cherry picked from commit a6b3ec5)

Co-authored-by: Elvis Pranskevichus <elvis@magic.io>
  • Loading branch information
elprans authored and miss-islington committed Nov 28, 2018
1 parent c901045 commit c1c0066
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 30 deletions.
11 changes: 7 additions & 4 deletions Doc/library/compileall.rst
Expand Up @@ -85,13 +85,16 @@ compile Python sources.

.. cmdoption:: --invalidation-mode [timestamp|checked-hash|unchecked-hash]

Control how the generated pycs will be invalidated at runtime. The default
setting, ``timestamp``, means that ``.pyc`` files with the source timestamp
Control how the generated byte-code files are invalidated at runtime.
The ``timestamp`` value, means that ``.pyc`` files with the source timestamp
and size embedded will be generated. The ``checked-hash`` and
``unchecked-hash`` values cause hash-based pycs to be generated. Hash-based
pycs embed a hash of the source file contents rather than a timestamp. See
:ref:`pyc-invalidation` for more information on how Python validates bytecode
cache files at runtime.
:ref:`pyc-invalidation` for more information on how Python validates
bytecode cache files at runtime.
The default is ``timestamp`` if the :envvar:`SOURCE_DATE_EPOCH` environment
variable is not set, and ``checked-hash`` if the ``SOURCE_DATE_EPOCH``
environment variable is set.

.. versionchanged:: 3.2
Added the ``-i``, ``-b`` and ``-h`` options.
Expand Down
13 changes: 9 additions & 4 deletions Doc/library/py_compile.rst
Expand Up @@ -54,10 +54,10 @@ byte-code cache files in the directory containing the source code.
level of the current interpreter.

*invalidation_mode* should be a member of the :class:`PycInvalidationMode`
enum and controls how the generated ``.pyc`` files are invalidated at
runtime. If the :envvar:`SOURCE_DATE_EPOCH` environment variable is set,
*invalidation_mode* will be forced to
:attr:`PycInvalidationMode.CHECKED_HASH`.
enum and controls how the generated bytecode cache is invalidated at
runtime. The default is :attr:`PycInvalidationMode.CHECKED_HASH` if
the :envvar:`SOURCE_DATE_EPOCH` environment variable is set, otherwise
the default is :attr:`PycInvalidationMode.TIMESTAMP`.

.. versionchanged:: 3.2
Changed default value of *cfile* to be :PEP:`3147`-compliant. Previous
Expand All @@ -77,6 +77,11 @@ byte-code cache files in the directory containing the source code.
*invalidation_mode* will be forced to
:attr:`PycInvalidationMode.CHECKED_HASH`.

.. versionchanged:: 3.7.2
The :envvar:`SOURCE_DATE_EPOCH` environment variable no longer
overrides the value of the *invalidation_mode* argument, and determines
its default value instead.


.. class:: PycInvalidationMode

Expand Down
20 changes: 13 additions & 7 deletions Lib/compileall.py
Expand Up @@ -49,7 +49,7 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):

def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
invalidation_mode=None):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
Expand Down Expand Up @@ -100,7 +100,7 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,

def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
invalidation_mode=None):
"""Byte-compile one file.
Arguments (only fullname is required):
Expand Down Expand Up @@ -186,7 +186,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,

def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
invalidation_mode=None):
"""Byte-compile all module on sys.path.
Arguments (all optional):
Expand Down Expand Up @@ -259,9 +259,12 @@ def main():
type=int, help='Run compileall concurrently')
invalidation_modes = [mode.name.lower().replace('_', '-')
for mode in py_compile.PycInvalidationMode]
parser.add_argument('--invalidation-mode', default='timestamp',
parser.add_argument('--invalidation-mode',
choices=sorted(invalidation_modes),
help='How the pycs will be invalidated at runtime')
help=('set .pyc invalidation mode; defaults to '
'"checked-hash" if the SOURCE_DATE_EPOCH '
'environment variable is set, and '
'"timestamp" otherwise.'))

args = parser.parse_args()
compile_dests = args.compile_dest
Expand Down Expand Up @@ -290,8 +293,11 @@ def main():
if args.workers is not None:
args.workers = args.workers or None

ivl_mode = args.invalidation_mode.replace('-', '_').upper()
invalidation_mode = py_compile.PycInvalidationMode[ivl_mode]
if args.invalidation_mode:
ivl_mode = args.invalidation_mode.replace('-', '_').upper()
invalidation_mode = py_compile.PycInvalidationMode[ivl_mode]
else:
invalidation_mode = None

success = True
try:
Expand Down
13 changes: 10 additions & 3 deletions Lib/py_compile.py
Expand Up @@ -69,8 +69,15 @@ class PycInvalidationMode(enum.Enum):
UNCHECKED_HASH = 3


def _get_default_invalidation_mode():
if os.environ.get('SOURCE_DATE_EPOCH'):
return PycInvalidationMode.CHECKED_HASH
else:
return PycInvalidationMode.TIMESTAMP


def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1,
invalidation_mode=PycInvalidationMode.TIMESTAMP):
invalidation_mode=None):
"""Byte-compile one Python source file to Python bytecode.
:param file: The source file name.
Expand Down Expand Up @@ -112,8 +119,8 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1,
the resulting file would be regular and thus not the same type of file as
it was previously.
"""
if os.environ.get('SOURCE_DATE_EPOCH'):
invalidation_mode = PycInvalidationMode.CHECKED_HASH
if invalidation_mode is None:
invalidation_mode = _get_default_invalidation_mode()
if cfile is None:
if optimize >= 0:
optimization = optimize if optimize >= 1 else ''
Expand Down
50 changes: 44 additions & 6 deletions Lib/test/test_compileall.py
Expand Up @@ -22,7 +22,11 @@
from test import support
from test.support import script_helper

class CompileallTests(unittest.TestCase):
from .test_py_compile import without_source_date_epoch
from .test_py_compile import SourceDateEpochTestMeta


class CompileallTestsBase:

def setUp(self):
self.directory = tempfile.mkdtemp()
Expand All @@ -46,7 +50,7 @@ def add_bad_source_file(self):
with open(self.bad_source_path, 'w') as file:
file.write('x (\n')

def data(self):
def timestamp_metadata(self):
with open(self.bc_path, 'rb') as file:
data = file.read(12)
mtime = int(os.stat(self.source_path).st_mtime)
Expand All @@ -57,16 +61,18 @@ def data(self):
def recreation_check(self, metadata):
"""Check that compileall recreates bytecode when the new metadata is
used."""
if os.environ.get('SOURCE_DATE_EPOCH'):
raise unittest.SkipTest('SOURCE_DATE_EPOCH is set')
py_compile.compile(self.source_path)
self.assertEqual(*self.data())
self.assertEqual(*self.timestamp_metadata())
with open(self.bc_path, 'rb') as file:
bc = file.read()[len(metadata):]
with open(self.bc_path, 'wb') as file:
file.write(metadata)
file.write(bc)
self.assertNotEqual(*self.data())
self.assertNotEqual(*self.timestamp_metadata())
compileall.compile_dir(self.directory, force=False, quiet=True)
self.assertTrue(*self.data())
self.assertTrue(*self.timestamp_metadata())

def test_mtime(self):
# Test a change in mtime leads to a new .pyc.
Expand Down Expand Up @@ -189,6 +195,21 @@ def test_compile_missing_multiprocessing(self, compile_file_mock):
compileall.compile_dir(self.directory, quiet=True, workers=5)
self.assertTrue(compile_file_mock.called)


class CompileallTestsWithSourceEpoch(CompileallTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass


class CompileallTestsWithoutSourceEpoch(CompileallTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=False):
pass


class EncodingTest(unittest.TestCase):
"""Issue 6716: compileall should escape source code when printing errors
to stdout."""
Expand All @@ -212,7 +233,7 @@ def test_error(self):
sys.stdout = orig_stdout


class CommandLineTests(unittest.TestCase):
class CommandLineTestsBase:
"""Test compileall's CLI."""

@classmethod
Expand Down Expand Up @@ -285,6 +306,7 @@ def test_no_args_compiles_path(self):
self.assertNotCompiled(self.initfn)
self.assertNotCompiled(self.barfn)

@without_source_date_epoch # timestamp invalidation test
def test_no_args_respects_force_flag(self):
self._skip_if_sys_path_not_writable()
bazfn = script_helper.make_script(self.directory, 'baz', '')
Expand Down Expand Up @@ -353,6 +375,7 @@ def test_multiple_runs(self):
self.assertTrue(os.path.exists(self.pkgdir_cachedir))
self.assertFalse(os.path.exists(cachecachedir))

@without_source_date_epoch # timestamp invalidation test
def test_force(self):
self.assertRunOK('-q', self.pkgdir)
pycpath = importlib.util.cache_from_source(self.barfn)
Expand Down Expand Up @@ -556,5 +579,20 @@ def test_workers_available_cores(self, compile_dir):
self.assertEqual(compile_dir.call_args[-1]['workers'], None)


class CommmandLineTestsWithSourceEpoch(CommandLineTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass


class CommmandLineTestsNoSourceEpoch(CommandLineTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=False):
pass



if __name__ == "__main__":
unittest.main()
15 changes: 15 additions & 0 deletions Lib/test/test_importlib/source/test_file_loader.py
Expand Up @@ -19,6 +19,9 @@

from test.support import make_legacy_pyc, unload

from test.test_py_compile import without_source_date_epoch
from test.test_py_compile import SourceDateEpochTestMeta


class SimpleTest(abc.LoaderTests):

Expand Down Expand Up @@ -359,6 +362,17 @@ def test_overiden_unchecked_hash_based_pyc(self):
abc=importlib_abc, util=importlib_util)


class SourceDateEpochTestMeta(SourceDateEpochTestMeta,
type(Source_SimpleTest)):
pass


class SourceDateEpoch_SimpleTest(Source_SimpleTest,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass


class BadBytecodeTest:

def import_(self, file, module_name):
Expand Down Expand Up @@ -617,6 +631,7 @@ def test_bad_marshal(self):

# [bad timestamp]
@util.writes_bytecode_files
@without_source_date_epoch
def test_old_timestamp(self):
# When the timestamp is older than the source, bytecode should be
# regenerated.
Expand Down
66 changes: 60 additions & 6 deletions Lib/test/test_py_compile.py
@@ -1,3 +1,4 @@
import functools
import importlib.util
import os
import py_compile
Expand All @@ -10,7 +11,44 @@
from test import support


class PyCompileTests(unittest.TestCase):
def without_source_date_epoch(fxn):
"""Runs function with SOURCE_DATE_EPOCH unset."""
@functools.wraps(fxn)
def wrapper(*args, **kwargs):
with support.EnvironmentVarGuard() as env:
env.unset('SOURCE_DATE_EPOCH')
return fxn(*args, **kwargs)
return wrapper


def with_source_date_epoch(fxn):
"""Runs function with SOURCE_DATE_EPOCH set."""
@functools.wraps(fxn)
def wrapper(*args, **kwargs):
with support.EnvironmentVarGuard() as env:
env['SOURCE_DATE_EPOCH'] = '123456789'
return fxn(*args, **kwargs)
return wrapper


# Run tests with SOURCE_DATE_EPOCH set or unset explicitly.
class SourceDateEpochTestMeta(type(unittest.TestCase)):
def __new__(mcls, name, bases, dct, *, source_date_epoch):
cls = super().__new__(mcls, name, bases, dct)

for attr in dir(cls):
if attr.startswith('test_'):
meth = getattr(cls, attr)
if source_date_epoch:
wrapper = with_source_date_epoch(meth)
else:
wrapper = without_source_date_epoch(meth)
setattr(cls, attr, wrapper)

return cls


class PyCompileTestsBase:

def setUp(self):
self.directory = tempfile.mkdtemp()
Expand Down Expand Up @@ -99,16 +137,18 @@ def test_bad_coding(self):
importlib.util.cache_from_source(bad_coding)))

def test_source_date_epoch(self):
testtime = 123456789
with support.EnvironmentVarGuard() as env:
env["SOURCE_DATE_EPOCH"] = str(testtime)
py_compile.compile(self.source_path, self.pyc_path)
py_compile.compile(self.source_path, self.pyc_path)
self.assertTrue(os.path.exists(self.pyc_path))
self.assertFalse(os.path.exists(self.cache_path))
with open(self.pyc_path, 'rb') as fp:
flags = importlib._bootstrap_external._classify_pyc(
fp.read(), 'test', {})
self.assertEqual(flags, 0b11)
if os.environ.get('SOURCE_DATE_EPOCH'):
expected_flags = 0b11
else:
expected_flags = 0b00

self.assertEqual(flags, expected_flags)

@unittest.skipIf(sys.flags.optimize > 0, 'test does not work with -O')
def test_double_dot_no_clobber(self):
Expand Down Expand Up @@ -153,5 +193,19 @@ def test_invalidation_mode(self):
self.assertEqual(flags, 0b1)


class PyCompileTestsWithSourceEpoch(PyCompileTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass


class PyCompileTestsWithoutSourceEpoch(PyCompileTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=False):
pass


if __name__ == "__main__":
unittest.main()
@@ -0,0 +1,3 @@
The :envvar:`SOURCE_DATE_EPOCH` environment variable no longer overrides the
value of the *invalidation_mode* argument to :func:`py_compile.compile`, and
determines its default value instead.

0 comments on commit c1c0066

Please sign in to comment.