Skip to content

Commit

Permalink
Merge pull request #159 from lsst/tickets/DM-39402
Browse files Browse the repository at this point in the history
DM-39402: Refactor python package determination to use a hierarchy
  • Loading branch information
timj committed Jun 1, 2023
2 parents a34f01d + 7c21978 commit 5c40a46
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 42 deletions.
2 changes: 2 additions & 0 deletions doc/changes/DM-39402.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improved the performance of ``lsst.utils.packages.getPythonPackages()`` to use the namespace hierarchy so it now only needs to check as deep into the hierarchy as is needed to find a version.
Additionally, the code no longer tries to extract versions from Python standard library packages.
143 changes: 105 additions & 38 deletions python/lsst/utils/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@
# version. We need to guess the version from the environment
ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"])

try:
# Python 3.10 includes a list of standard library modules.
# These will all have the same version number as Python itself.
_STDLIB = sys.stdlib_module_names
except AttributeError:
_STDLIB = frozenset()


def getVersionFromPythonModule(module: types.ModuleType) -> str:
"""Determine the version of a python module.
Expand Down Expand Up @@ -114,55 +121,115 @@ def getPythonPackages() -> Dict[str, str]:
pass # It's not available, so don't care

packages = {"python": sys.version}

# Not iterating with sys.modules.iteritems() because it's not atomic and
# subject to race conditions
moduleNames = list(sys.modules.keys())
for name in moduleNames:
try:
# This is the Python standard way to find a package version.
# It can be slow.
ver = importlib.metadata.version(name)
except Exception:
# Fall back to using the module itself.
module = sys.modules[name]
try:
ver = getVersionFromPythonModule(module)
except Exception:
continue # Can't get a version from it, don't care

# Remove "foo.bar.version" in favor of "foo.bar"
# This prevents duplication when the __init__.py includes
# "from .version import *"
modified = False
for ending in (".version", "._version"):
if name.endswith(ending):
name = name[: -len(ending)]
modified = True
break

# Check if this name has already been registered.
# This can happen if x._version is encountered before x.
if name in packages:
if ver != packages[name]:
# There is an inconsistency between this version
# and that previously calculated. Raising an exception
# would go against the ethos of this package. If this
# is the stripped package name we should drop it and
# trust the primary version. Else if this was not
# the modified version we should use it in preference.
if modified:
continue
module_names = list(sys.modules.keys())

# Use knowledge of package hierarchy to find the versions rather than
# using each name independently. Group all the module names into the
# hierarchy, splitting on dot, and skipping any component that starts
# with an underscore.

# Sorting the module names gives us:
# lsst
# lsst.afw
# lsst.afw.cameraGeom
# ...
# lsst.daf
# lsst.daf.butler
#
# and so we can use knowledge of the previous version to inform whether
# we need to look at the subsequent line.
n_versions = 0
n_checked = 0
previous = ""
for name in sorted(module_names):
if name.startswith("_") or "._" in name:
# Refers to a private module so we can ignore it and assume
# version has been lifted into parent or, if top level, not
# relevant for versioning. This applies also to standard library
# packages such as _abc and __future__.
continue

if name in _STDLIB:
# Assign all standard library packages the python version
# since they almost all lack explicit versions.
packages[name] = sys.version
previous = name
continue

if name.startswith(previous + ".") and previous in packages:
# Already have this version. Use the same previous name
# for the line after this.
continue

# Look for a version.
ver = _get_python_package_version(name, packages)

n_checked += 1
if ver is not None:
n_versions += 1
previous = name

log.debug(
"Given %d modules but checked %d in hierarchy and found versions for %d",
len(module_names),
n_checked,
n_versions,
)

for name in list(packages.keys()):
# Use LSST package names instead of python module names
# This matches the names we get from the environment (i.e., EUPS)
# so we can clobber these build-time versions if the environment
# reveals that we're not using the packages as-built.
if name.startswith("lsst."):
name = name.replace("lsst.", "").replace(".", "_")
new_name = name.replace("lsst.", "").replace(".", "_")
packages[new_name] = packages[name]
del packages[name]

return packages


def _get_python_package_version(name: str, packages: dict[str, str]) -> str | None:
"""Given a package or module name, try to determine the version.
Parameters
----------
name : `str`
The name of the package or module to try.
packages : `dict`[`str`, `str`]
A dictionary mapping a name to a version. Modified in place.
The key used might not match exactly the given key.
Returns
-------
ver : `str` or `None`
The version string stored in ``packages``. Nothing is stored if the
value here is `None`.
"""
try:
# This is the Python standard way to find a package version.
# It can be slow.
ver = importlib.metadata.version(name)
except Exception:
# Fall back to using the module itself. There is no guarantee
# that "a" exists for module "a.b" so if hierarchy has been expanded
# this might fail. Check first.
if name not in sys.modules:
return None
module = sys.modules[name]
try:
ver = getVersionFromPythonModule(module)
except Exception:
return None # Can't get a version from it, don't care

# Update the package information.
if ver is not None:
packages[name] = ver

return packages
return ver


_eups: Optional[Any] = None # Singleton Eups object
Expand Down
21 changes: 17 additions & 4 deletions tests/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#

import os
import sys
import unittest
from collections.abc import Mapping

Expand Down Expand Up @@ -112,22 +113,34 @@ def testPackages(self):
# Now load an obscure python package and the list of packages should
# change
# Shouldn't be used by anything we've previously imported
import smtpd # noqa: F401
# smtpd can be used on 3.8 since it does have a version string but
# it is also a deprecated package so should not be used in tests
# for python 3.10 and newer.
# chunk does not have a version string but is handled as a stdlib
# package on 3.10 and newer.
if sys.version_info < (3, 10, 0):
import smtpd # noqa: F401

new_package = "smtpd"
else:
import chunk # noqa: F401

new_package = "chunk"

new = lsst.utils.packages.Packages.fromSystem()
self.assertDictEqual(packages.difference(new), {}) # No inconsistencies
self.assertDictEqual(packages.extra(new), {}) # Nothing in 'packages' that's not in 'new'
missing = packages.missing(new)
self.assertGreater(len(missing), 0) # 'packages' should be missing some stuff in 'new'
self.assertIn("smtpd", missing)
self.assertIn(new_package, missing)

# Inverted comparisons
self.assertDictEqual(new.difference(packages), {})
self.assertDictEqual(new.missing(packages), {}) # Nothing in 'new' that's not in 'packages'
extra = new.extra(packages)
self.assertGreater(len(extra), 0) # 'new' has extra stuff compared to 'packages'
self.assertIn("smtpd", extra)
self.assertIn("smtpd", new)
self.assertIn(new_package, extra)
self.assertIn(new_package, new)

# Run with both a Packages and a dict
for new_pkg in (new, dict(new)):
Expand Down

0 comments on commit 5c40a46

Please sign in to comment.