Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-114763: Protect lazy loading modules from attribute access race #114781

Merged
merged 12 commits into from
Feb 24, 2024
81 changes: 51 additions & 30 deletions Lib/importlib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import _imp
import sys
import threading
import types


Expand Down Expand Up @@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):

def __getattribute__(self, attr):
"""Trigger the load of the module and return the attribute."""
# All module metadata must be garnered from __spec__ in order to avoid
# using mutated values.
# Stop triggering this method.
self.__class__ = types.ModuleType
# Get the original name to make sure no object substitution occurred
# in sys.modules.
original_name = self.__spec__.name
# Figure out exactly what attributes were mutated between the creation
# of the module and now.
attrs_then = self.__spec__.loader_state['__dict__']
attrs_now = self.__dict__
attrs_updated = {}
for key, value in attrs_now.items():
# Code that set the attribute may have kept a reference to the
# assigned object, making identity more important than equality.
if key not in attrs_then:
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]):
attrs_updated[key] = value
self.__spec__.loader.exec_module(self)
# If exec_module() was used directly there is no guarantee the module
# object was put into sys.modules.
if original_name in sys.modules:
if id(self) != id(sys.modules[original_name]):
raise ValueError(f"module object for {original_name!r} "
"substituted in sys.modules during a lazy "
"load")
# Update after loading since that's what would happen in an eager
# loading situation.
self.__dict__.update(attrs_updated)
__spec__ = object.__getattribute__(self, '__spec__')
loader_state = __spec__.loader_state
with loader_state['lock']:
# Only the first thread to get the lock should trigger the load
# and reset the module's class. The rest can now getattr().
if object.__getattribute__(self, '__class__') is _LazyModule:
effigies marked this conversation as resolved.
Show resolved Hide resolved
# The first thread comes here multiple times as it descends the
# call stack. The first time, it sets is_loading and triggers
# exec_module(), which will access module.__dict__, module.__name__,
# and/or module.__spec__, reentering this method. These accesses
# need to be allowed to proceed without triggering the load again.
if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
return object.__getattribute__(self, attr)
loader_state['is_loading'] = True

__dict__ = object.__getattribute__(self, '__dict__')

# All module metadata must be gathered from __spec__ in order to avoid
# using mutated values.
# Get the original name to make sure no object substitution occurred
# in sys.modules.
original_name = __spec__.name
# Figure out exactly what attributes were mutated between the creation
# of the module and now.
attrs_then = loader_state['__dict__']
attrs_now = __dict__
attrs_updated = {}
for key, value in attrs_now.items():
# Code that set an attribute may have kept a reference to the
# assigned object, making identity more important than equality.
if key not in attrs_then:
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]):
attrs_updated[key] = value
__spec__.loader.exec_module(self)
# If exec_module() was used directly there is no guarantee the module
# object was put into sys.modules.
if original_name in sys.modules:
if id(self) != id(sys.modules[original_name]):
raise ValueError(f"module object for {original_name!r} "
"substituted in sys.modules during a lazy "
"load")
# Update after loading since that's what would happen in an eager
# loading situation.
__dict__.update(attrs_updated)
# Finally, stop triggering this method.
self.__class__ = types.ModuleType

return getattr(self, attr)

def __delattr__(self, attr):
Expand Down Expand Up @@ -244,5 +263,7 @@ def exec_module(self, module):
loader_state = {}
loader_state['__dict__'] = module.__dict__.copy()
loader_state['__class__'] = module.__class__
loader_state['lock'] = threading.RLock()
loader_state['is_loading'] = False
module.__spec__.loader_state = loader_state
module.__class__ = _LazyModule
42 changes: 40 additions & 2 deletions Lib/test/test_importlib/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
from importlib import abc
from importlib import util
import sys
import time
import threading
import types
import unittest

from test.support import threading_helper
from test.test_importlib import util as test_util


Expand Down Expand Up @@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
module_name = 'lazy_loader_test'
mutated_name = 'changed'
loaded = None
load_count = 0
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)

def find_spec(self, name, path, target=None):
Expand All @@ -48,8 +52,10 @@ def find_spec(self, name, path, target=None):
return util.spec_from_loader(name, util.LazyLoader(self))

def exec_module(self, module):
time.sleep(0.01) # Simulate a slow load.
brettcannon marked this conversation as resolved.
Show resolved Hide resolved
exec(self.source_code, module.__dict__)
self.loaded = module
self.load_count += 1


class LazyLoaderTests(unittest.TestCase):
Expand All @@ -59,8 +65,9 @@ def test_init(self):
# Classes that don't define exec_module() trigger TypeError.
util.LazyLoader(object)

def new_module(self, source_code=None):
loader = TestingImporter()
def new_module(self, source_code=None, loader=None):
if loader is None:
loader = TestingImporter()
if source_code is not None:
loader.source_code = source_code
spec = util.spec_from_loader(TestingImporter.module_name,
Expand Down Expand Up @@ -140,6 +147,37 @@ def test_module_already_in_sys(self):
# Force the load; just care that no exception is raised.
module.__name__

@threading_helper.requires_working_threading()
def test_module_load_race(self):
effigies marked this conversation as resolved.
Show resolved Hide resolved
with test_util.uncache(TestingImporter.module_name):
loader = TestingImporter()
module = self.new_module(loader=loader)
self.assertEqual(loader.load_count, 0)

class RaisingThread(threading.Thread):
exc = None
def run(self):
try:
super().run()
except Exception as exc:
self.exc = exc

def access_module():
return module.attr

threads = []
for _ in range(2):
threads.append(thread := RaisingThread(target=access_module))
thread.start()

# Races could cause errors
for thread in threads:
thread.join()
self.assertIsNone(thread.exc)

# Or multiple load attempts
self.assertEqual(loader.load_count, 1)


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Protect modules loaded with :class:`importlib.util.LazyLoader` from race
conditions when multiple threads try to access attributes before the loading
is complete.