diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index 3ad71d31c2f438..ff4f12fb1af70c 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -13,6 +13,7 @@ import _imp import sys +import threading import types @@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType): def __getattribute__(self, attr): """Trigger the load of the module and return the attribute.""" - # All module metadata must be garnered from __spec__ in order to avoid - # using mutated values. - # Stop triggering this method. - self.__class__ = types.ModuleType - # Get the original name to make sure no object substitution occurred - # in sys.modules. - original_name = self.__spec__.name - # Figure out exactly what attributes were mutated between the creation - # of the module and now. - attrs_then = self.__spec__.loader_state['__dict__'] - attrs_now = self.__dict__ - attrs_updated = {} - for key, value in attrs_now.items(): - # Code that set the attribute may have kept a reference to the - # assigned object, making identity more important than equality. - if key not in attrs_then: - attrs_updated[key] = value - elif id(attrs_now[key]) != id(attrs_then[key]): - attrs_updated[key] = value - self.__spec__.loader.exec_module(self) - # If exec_module() was used directly there is no guarantee the module - # object was put into sys.modules. - if original_name in sys.modules: - if id(self) != id(sys.modules[original_name]): - raise ValueError(f"module object for {original_name!r} " - "substituted in sys.modules during a lazy " - "load") - # Update after loading since that's what would happen in an eager - # loading situation. - self.__dict__.update(attrs_updated) + __spec__ = object.__getattribute__(self, '__spec__') + loader_state = __spec__.loader_state + with loader_state['lock']: + # Only the first thread to get the lock should trigger the load + # and reset the module's class. The rest can now getattr(). + if object.__getattribute__(self, '__class__') is _LazyModule: + # The first thread comes here multiple times as it descends the + # call stack. The first time, it sets is_loading and triggers + # exec_module(), which will access module.__dict__, module.__name__, + # and/or module.__spec__, reentering this method. These accesses + # need to be allowed to proceed without triggering the load again. + if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'): + return object.__getattribute__(self, attr) + loader_state['is_loading'] = True + + __dict__ = object.__getattribute__(self, '__dict__') + + # All module metadata must be gathered from __spec__ in order to avoid + # using mutated values. + # Get the original name to make sure no object substitution occurred + # in sys.modules. + original_name = __spec__.name + # Figure out exactly what attributes were mutated between the creation + # of the module and now. + attrs_then = loader_state['__dict__'] + attrs_now = __dict__ + attrs_updated = {} + for key, value in attrs_now.items(): + # Code that set an attribute may have kept a reference to the + # assigned object, making identity more important than equality. + if key not in attrs_then: + attrs_updated[key] = value + elif id(attrs_now[key]) != id(attrs_then[key]): + attrs_updated[key] = value + __spec__.loader.exec_module(self) + # If exec_module() was used directly there is no guarantee the module + # object was put into sys.modules. + if original_name in sys.modules: + if id(self) != id(sys.modules[original_name]): + raise ValueError(f"module object for {original_name!r} " + "substituted in sys.modules during a lazy " + "load") + # Update after loading since that's what would happen in an eager + # loading situation. + __dict__.update(attrs_updated) + # Finally, stop triggering this method. + self.__class__ = types.ModuleType + return getattr(self, attr) def __delattr__(self, attr): @@ -244,5 +263,7 @@ def exec_module(self, module): loader_state = {} loader_state['__dict__'] = module.__dict__.copy() loader_state['__class__'] = module.__class__ + loader_state['lock'] = threading.RLock() + loader_state['is_loading'] = False module.__spec__.loader_state = loader_state module.__class__ = _LazyModule diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py index cc993f333e355a..38ab21907b58d9 100644 --- a/Lib/test/test_importlib/test_lazy.py +++ b/Lib/test/test_importlib/test_lazy.py @@ -2,9 +2,12 @@ from importlib import abc from importlib import util import sys +import time +import threading import types import unittest +from test.support import threading_helper from test.test_importlib import util as test_util @@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader): module_name = 'lazy_loader_test' mutated_name = 'changed' loaded = None + load_count = 0 source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name) def find_spec(self, name, path, target=None): @@ -48,8 +52,10 @@ def find_spec(self, name, path, target=None): return util.spec_from_loader(name, util.LazyLoader(self)) def exec_module(self, module): + time.sleep(0.01) # Simulate a slow load. exec(self.source_code, module.__dict__) self.loaded = module + self.load_count += 1 class LazyLoaderTests(unittest.TestCase): @@ -59,8 +65,9 @@ def test_init(self): # Classes that don't define exec_module() trigger TypeError. util.LazyLoader(object) - def new_module(self, source_code=None): - loader = TestingImporter() + def new_module(self, source_code=None, loader=None): + if loader is None: + loader = TestingImporter() if source_code is not None: loader.source_code = source_code spec = util.spec_from_loader(TestingImporter.module_name, @@ -140,6 +147,37 @@ def test_module_already_in_sys(self): # Force the load; just care that no exception is raised. module.__name__ + @threading_helper.requires_working_threading() + def test_module_load_race(self): + with test_util.uncache(TestingImporter.module_name): + loader = TestingImporter() + module = self.new_module(loader=loader) + self.assertEqual(loader.load_count, 0) + + class RaisingThread(threading.Thread): + exc = None + def run(self): + try: + super().run() + except Exception as exc: + self.exc = exc + + def access_module(): + return module.attr + + threads = [] + for _ in range(2): + threads.append(thread := RaisingThread(target=access_module)) + thread.start() + + # Races could cause errors + for thread in threads: + thread.join() + self.assertIsNone(thread.exc) + + # Or multiple load attempts + self.assertEqual(loader.load_count, 1) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst new file mode 100644 index 00000000000000..e8bdb83dde61fb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst @@ -0,0 +1,3 @@ +Protect modules loaded with :class:`importlib.util.LazyLoader` from race +conditions when multiple threads try to access attributes before the loading +is complete.