Skip to content

Commit

Permalink
Issue #13645: pyc files now contain the size of the corresponding source
Browse files Browse the repository at this point in the history
code, to avoid timestamp collisions (especially on filesystems with a low
timestamp resolution) when checking for freshness of the bytecode.
  • Loading branch information
pitrou committed Jan 13, 2012
1 parent 1f918c1 commit 5136ac0
Show file tree
Hide file tree
Showing 14 changed files with 166 additions and 48 deletions.
19 changes: 19 additions & 0 deletions Doc/library/importlib.rst
Expand Up @@ -239,11 +239,30 @@ are also provided to help in implementing the core ABCs.
optimization to speed up loading by removing the parsing step of Python's
compiler, and so no bytecode-specific API is exposed.

.. method:: path_stats(self, path)

Optional abstract method which returns a :class:`dict` containing
metadata about the specifed path. Supported dictionary keys are:

- ``'mtime'`` (mandatory): an integer or floating-point number
representing the modification time of the source code;
- ``'size'`` (optional): the size in bytes of the source code.

Any other keys in the dictionary are ignored, to allow for future
extensions.

.. versionadded:: 3.3

.. method:: path_mtime(self, path)

Optional abstract method which returns the modification time for the
specified path.

.. deprecated:: 3.3
This method is deprecated in favour of :meth:`path_stats`. You don't
have to implement it, but it is still available for compatibility
purposes.

.. method:: set_data(self, path, data)

Optional abstract method which writes the specified bytes to a file
Expand Down
57 changes: 42 additions & 15 deletions Lib/importlib/_bootstrap.py
Expand Up @@ -331,25 +331,40 @@ def is_package(self, fullname):
filename = self.get_filename(fullname).rpartition(path_sep)[2]
return filename.rsplit('.', 1)[0] == '__init__'

def _bytes_from_bytecode(self, fullname, data, source_mtime):
def _bytes_from_bytecode(self, fullname, data, source_stats):
"""Return the marshalled bytes from bytecode, verifying the magic
number and timestamp along the way.
number, timestamp and source size along the way.
If source_mtime is None then skip the timestamp check.
If source_stats is None then skip the timestamp check.
"""
magic = data[:4]
raw_timestamp = data[4:8]
raw_size = data[8:12]
if len(magic) != 4 or magic != imp.get_magic():
raise ImportError("bad magic number in {}".format(fullname))
elif len(raw_timestamp) != 4:
raise EOFError("bad timestamp in {}".format(fullname))
elif source_mtime is not None:
if marshal._r_long(raw_timestamp) != source_mtime:
raise ImportError("bytecode is stale for {}".format(fullname))
elif len(raw_size) != 4:
raise EOFError("bad size in {}".format(fullname))
if source_stats is not None:
try:
source_mtime = int(source_stats['mtime'])
except KeyError:
pass
else:
if marshal._r_long(raw_timestamp) != source_mtime:
raise ImportError("bytecode is stale for {}".format(fullname))
try:
source_size = source_stats['size'] & 0xFFFFFFFF
except KeyError:
pass
else:
if marshal._r_long(raw_size) != source_size:
raise ImportError("bytecode is stale for {}".format(fullname))
# Can't return the code object as errors from marshal loading need to
# propagate even when source is available.
return data[8:]
return data[12:]

@module_for_loader
def _load_module(self, module, *, sourceless=False):
Expand Down Expand Up @@ -377,11 +392,20 @@ class SourceLoader(_LoaderBasics):
def path_mtime(self, path):
"""Optional method that returns the modification time (an int) for the
specified path, where path is a str.
"""
raise NotImplementedError

Implementing this method allows the loader to read bytecode files.
def path_stats(self, path):
"""Optional method returning a metadata dict for the specified path
to by the path (str).
Possible keys:
- 'mtime' (mandatory) is the numeric timestamp of last source
code modification;
- 'size' (optional) is the size in bytes of the source code.
Implementing this method allows the loader to read bytecode files.
"""
raise NotImplementedError
return {'mtime': self.path_mtime(path)}

def set_data(self, path, data):
"""Optional method which writes data (bytes) to a file path (a str).
Expand All @@ -407,7 +431,7 @@ def get_source(self, fullname):
def get_code(self, fullname):
"""Concrete implementation of InspectLoader.get_code.
Reading of bytecode requires path_mtime to be implemented. To write
Reading of bytecode requires path_stats to be implemented. To write
bytecode, set_data must also be implemented.
"""
Expand All @@ -416,18 +440,19 @@ def get_code(self, fullname):
source_mtime = None
if bytecode_path is not None:
try:
source_mtime = self.path_mtime(source_path)
st = self.path_stats(source_path)
except NotImplementedError:
pass
else:
source_mtime = int(st['mtime'])
try:
data = self.get_data(bytecode_path)
except IOError:
pass
else:
try:
bytes_data = self._bytes_from_bytecode(fullname, data,
source_mtime)
st)
except (ImportError, EOFError):
pass
else:
Expand All @@ -448,6 +473,7 @@ def get_code(self, fullname):
# throw an exception.
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_mtime))
data.extend(marshal._w_long(len(source_bytes)))
data.extend(marshal.dumps(code_object))
try:
self.set_data(bytecode_path, data)
Expand Down Expand Up @@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader):

"""Concrete implementation of SourceLoader using the file system."""

def path_mtime(self, path):
"""Return the modification time for the path."""
return int(_os.stat(path).st_mtime)
def path_stats(self, path):
"""Return the metadat for the path."""
st = _os.stat(path)
return {'mtime': st.st_mtime, 'size': st.st_size}

def set_data(self, path, data):
"""Write bytes data to a file."""
Expand Down
15 changes: 14 additions & 1 deletion Lib/importlib/abc.py
Expand Up @@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):

def path_mtime(self, path):
"""Return the (int) modification time for the path (str)."""
raise NotImplementedError
if self.path_stats.__func__ is SourceLoader.path_stats:
raise NotImplementedError
return int(self.path_stats(path)['mtime'])

def path_stats(self, path):
"""Return a metadata dict for the source pointed to by the path (str).
Possible keys:
- 'mtime' (mandatory) is the numeric timestamp of last source
code modification;
- 'size' (optional) is the size in bytes of the source code.
"""
if self.path_mtime.__func__ is SourceLoader.path_mtime:
raise NotImplementedError
return {'mtime': self.path_mtime(path)}

def set_data(self, path, data):
"""Write the bytes to the path (if possible).
Expand Down
10 changes: 7 additions & 3 deletions Lib/importlib/test/source/test_abc_loader.py
Expand Up @@ -5,6 +5,7 @@
from .. import util
from . import util as source_util

import collections
import imp
import inspect
import io
Expand Down Expand Up @@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
def __init__(self, path, magic=imp.get_magic()):
super().__init__(path)
self.bytecode_path = imp.cache_from_source(self.path)
self.source_size = len(self.source)
data = bytearray(magic)
data.extend(marshal._w_long(self.source_mtime))
data.extend(marshal._w_long(self.source_size))
code_object = compile(self.source, self.path, 'exec',
dont_inherit=True)
data.extend(marshal.dumps(code_object))
Expand All @@ -56,9 +59,9 @@ def get_data(self, path):
else:
raise IOError

def path_mtime(self, path):
def path_stats(self, path):
assert path == self.path
return self.source_mtime
return {'mtime': self.source_mtime, 'size': self.source_size}

def set_data(self, path, data):
self.written[path] = bytes(data)
Expand Down Expand Up @@ -657,6 +660,7 @@ def verify_code(self, code_object, *, bytecode_written=False):
self.assertIn(self.cached, self.loader.written)
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(self.loader.source_mtime))
data.extend(marshal._w_long(self.loader.source_size))
data.extend(marshal.dumps(code_object))
self.assertEqual(self.loader.written[self.cached], bytes(data))

Expand Down Expand Up @@ -847,7 +851,7 @@ def test_SourceLoader(self):
# Required abstractmethods.
self.raises_NotImplementedError(ins, 'get_filename', 'get_data')
# Optional abstractmethods.
self.raises_NotImplementedError(ins,'path_mtime', 'set_data')
self.raises_NotImplementedError(ins,'path_stats', 'set_data')

def test_PyLoader(self):
self.raises_NotImplementedError(self.PyLoader(), 'source_path',
Expand Down
44 changes: 32 additions & 12 deletions Lib/importlib/test/source/test_file_loader.py
Expand Up @@ -70,11 +70,6 @@ def test_module_reuse(self):
module_dict_id = id(module.__dict__)
with open(mapping['_temp'], 'w') as file:
file.write("testing_var = 42\n")
# For filesystems where the mtime is only to a second granularity,
# everything that has happened above can be too fast;
# force an mtime on the source that is guaranteed to be different
# than the original mtime.
loader.path_mtime = self.fake_mtime(loader.path_mtime)
module = loader.load_module('_temp')
self.assertTrue('testing_var' in module.__dict__,
"'testing_var' not in "
Expand Down Expand Up @@ -190,10 +185,17 @@ def _test_partial_timestamp(self, test, *, del_source=False):
del_source=del_source)
test('_temp', mapping, bc_path)

def _test_partial_size(self, test, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:11],
del_source=del_source)
test('_temp', mapping, bc_path)

def _test_no_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8],
lambda bc: bc[:12],
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bc_path
with self.assertRaises(EOFError):
Expand All @@ -202,7 +204,7 @@ def _test_no_marshal(self, *, del_source=False):
def _test_non_code_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8] + marshal.dumps(b'abcd'),
lambda bc: bc[:12] + marshal.dumps(b'abcd'),
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ImportError):
Expand All @@ -211,7 +213,7 @@ def _test_non_code_marshal(self, *, del_source=False):
def _test_bad_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8] + b'<test>',
lambda bc: bc[:12] + b'<test>',
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(EOFError):
Expand All @@ -235,15 +237,15 @@ def test_empty_file(self):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)

self._test_empty_file(test)

def test_partial_magic(self):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)

self._test_partial_magic(test)

Expand All @@ -254,7 +256,7 @@ def test_magic_only(self):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)

self._test_magic_only(test)

Expand All @@ -276,10 +278,21 @@ def test_partial_timestamp(self):
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)

self._test_partial_timestamp(test)

@source_util.writes_bytecode_files
def test_partial_size(self):
# When the size is partial, regenerate the .pyc, else
# raise EOFError.
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
self.assertGreater(len(file.read()), 12)

self._test_partial_size(test)

@source_util.writes_bytecode_files
def test_no_marshal(self):
# When there is only the magic number and timestamp, raise EOFError.
Expand Down Expand Up @@ -375,6 +388,13 @@ def test(name, mapping, bytecode_path):

self._test_partial_timestamp(test, del_source=True)

def test_partial_size(self):
def test(name, mapping, bytecode_path):
with self.assertRaises(EOFError):
self.import_(bytecode_path, name)

self._test_partial_size(test, del_source=True)

def test_no_marshal(self):
self._test_no_marshal(del_source=True)

Expand Down
2 changes: 1 addition & 1 deletion Lib/pkgutil.py
Expand Up @@ -21,7 +21,7 @@ def read_code(stream):
if magic != imp.get_magic():
return None

stream.read(4) # Skip timestamp
stream.read(8) # Skip timestamp and size
return marshal.load(stream)


Expand Down
7 changes: 5 additions & 2 deletions Lib/py_compile.py
Expand Up @@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
"""
with tokenize.open(file) as f:
try:
timestamp = int(os.fstat(f.fileno()).st_mtime)
st = os.fstat(f.fileno())
except AttributeError:
timestamp = int(os.stat(file).st_mtime)
st = os.stat(file)
timestamp = int(st.st_mtime)
size = st.st_size & 0xFFFFFFFF
codestring = f.read()
try:
codeobject = builtins.compile(codestring, dfile or file, 'exec',
Expand All @@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
with open(cfile, 'wb') as fc:
fc.write(b'\0\0\0\0')
wr_long(fc, timestamp)
wr_long(fc, size)
marshal.dump(codeobject, fc)
fc.flush()
fc.seek(0, 0)
Expand Down
12 changes: 11 additions & 1 deletion Lib/test/test_import.py
Expand Up @@ -380,7 +380,7 @@ def test_module_without_source(self):
def test_foreign_code(self):
py_compile.compile(self.file_name)
with open(self.compiled_name, "rb") as f:
header = f.read(8)
header = f.read(12)
code = marshal.load(f)
constants = list(code.co_consts)
foreign_code = test_main.__code__
Expand Down Expand Up @@ -644,6 +644,16 @@ def cleanup():
self.assertEqual(sys.modules['pep3147.foo'].__cached__,
os.path.join(os.curdir, foo_pyc))

def test_recompute_pyc_same_second(self):
# Even when the source file doesn't change timestamp, a change in
# source size is enough to trigger recomputation of the pyc file.
__import__(TESTFN)
unload(TESTFN)
with open(self.source, 'a') as fp:
print("x = 5", file=fp)
m = __import__(TESTFN)
self.assertEqual(m.x, 5)


class RelativeImportFromImportlibTests(test_relative_imports.RelativeImports):

Expand Down

0 comments on commit 5136ac0

Please sign in to comment.