Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-25431: Include conda environment in package version detection #45

Merged
merged 5 commits into from
Aug 28, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
102 changes: 76 additions & 26 deletions python/lsst/base/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import subprocess
import logging
import pickle as pickle
import re
import yaml
from collections.abc import Mapping

Expand All @@ -42,11 +43,12 @@
BUILDTIME = set(["boost", "eigen", "tmv"])

# Python modules to attempt to load so we can try to get the version
# We do this because the version only appears to be available from python, but we use the library
# We do this because the version only appears to be available from python,
# but we use the library
PYTHON = set(["galsim"])

# Packages that don't seem to have a mechanism for reporting the runtime version
# We need to guess the version from the environment
# Packages that don't seem to have a mechanism for reporting the runtime
# version. We need to guess the version from the environment
ENVIRONMENT = set(["astrometry_net", "astrometry_net_data", "minuit2", "xpa"])


Expand Down Expand Up @@ -110,7 +112,8 @@ def getPythonPackages():
pass # It's not available, so don't care

packages = {"python": sys.version}
# Not iterating with sys.modules.iteritems() because it's not atomic and subject to race conditions
# Not iterating with sys.modules.iteritems() because it's not atomic and
# subject to race conditions
moduleNames = list(sys.modules.keys())
for name in moduleNames:
module = sys.modules[name]
Expand All @@ -120,7 +123,8 @@ def getPythonPackages():
continue # Can't get a version from it, don't care

# Remove "foo.bar.version" in favor of "foo.bar"
# This prevents duplication when the __init__.py includes "from .version import *"
# This prevents duplication when the __init__.py includes
# "from .version import *"
for ending in (".version", "._version"):
if name.endswith(ending):
name = name[:-len(ending)]
Expand All @@ -130,8 +134,9 @@ def getPythonPackages():
assert ver == packages[name]

# Use LSST package names instead of python module names
# This matches the names we get from the environment (i.e., EUPS) so we can clobber these build-time
# versions if the environment reveals that we're not using the packages as-built.
# This matches the names we get from the environment (i.e., EUPS)
# so we can clobber these build-time versions if the environment
# reveals that we're not using the packages as-built.
if "lsst" in name:
name = name.replace("lsst.", "").replace(".", "_")

Expand Down Expand Up @@ -173,21 +178,25 @@ def getEnvironmentPackages():
products = _eups.findProducts(tags=["setup"])

# Get versions for things we can't determine via runtime mechanisms
# XXX Should we just grab everything we can, rather than just a predetermined set?
# XXX Should we just grab everything we can, rather than just a
# predetermined set?
packages = {prod.name: prod.version for prod in products if prod in ENVIRONMENT}

# The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the version name indicates uninstalled
# code, so the version could be different than what's being reported by the runtime environment (because
# we don't tend to run "scons" every time we update some python file, and even if we did sconsUtils
# probably doesn't check to see if the repo is clean).
# The string 'LOCAL:' (the value of Product.LocalVersionPrefix) in the
# version name indicates uninstalled code, so the version could be
# different than what's being reported by the runtime environment (because
# we don't tend to run "scons" every time we update some python file,
# and even if we did sconsUtils probably doesn't check to see if the repo
# is clean).
for prod in products:
if not prod.version.startswith(Product.LocalVersionPrefix):
continue
ver = prod.version

gitDir = os.path.join(prod.dir, ".git")
if os.path.exists(gitDir):
# get the git revision and an indication if the working copy is clean
# get the git revision and an indication if the working copy is
# clean
revCmd = ["git", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "rev-parse", "HEAD"]
diffCmd = ["git", "--no-pager", "--git-dir=" + gitDir, "--work-tree=" + prod.dir, "diff",
"--patch"]
Expand All @@ -207,27 +216,67 @@ def getEnvironmentPackages():
return packages


def getCondaPackages():
"""Get products and their versions from the conda environment.

Returns
-------
packages : `dict`
Keys (type `str`) are product names; values (type `str`) are their
versions.

Notes
-----
Returns empty result if a conda environment is not in use or can not
be queried.
"""

try:
import json
from conda.cli.python_api import Commands, run_command
except ImportError:
return {}

# Get the installed package list
versions_json = run_command(Commands.LIST, "--json")
packages = {pkg["name"]: pkg["version"] for pkg in json.loads(versions_json[0])}

# Try to work out the conda environment name and include it as a fake
# package. The "obvious" way of running "conda info --json" does give
# access to the active_prefix but takes about 2 seconds to run.
# The equivalent to the code above would be:
# info_json = run_command(Commands.INFO, "--json")
# As a comporomise look for the env name in the path to the python
# executable
match = re.search(r"/envs/(.*?)/bin/", sys.executable)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tried to see if there was a slightly more elegant way of doing this without much luck. This may be about as good as it gets, unfortunately. Conda does seem to know some information about the environment(s) was activated, but I wasn't able to find a much better way than this.

if match:
packages["conda_env"] = match.group(1)

return packages


class Packages:
"""A table of packages and their versions.

There are a few different types of packages, and their versions are collected
in different ways:
There are a few different types of packages, and their versions are
collected in different ways:

1. Run-time libraries (e.g., cfitsio, fftw): we get their version from
interrogating the dynamic library
2. Python modules (e.g., afw, numpy; galsim is also in this group even though
we only use it through the library, because no version information is
currently provided through the library): we get their version from the
``__version__`` module variable. Note that this means that we're only aware
of modules that have already been imported.
2. Python modules (e.g., afw, numpy; galsim is also in this group even
though we only use it through the library, because no version
information is currently provided through the library): we get their
version from the ``__version__`` module variable. Note that this means
that we're only aware of modules that have already been imported.
3. Other packages provide no run-time accessible version information (e.g.,
astrometry_net): we get their version from interrogating the environment.
Currently, that means EUPS; if EUPS is replaced or dropped then we'll need
to consider an alternative means of getting this version information.
astrometry_net): we get their version from interrogating the
environment. Currently, that means EUPS; if EUPS is replaced or dropped
then we'll need to consider an alternative means of getting this version
information.
4. Local versions of packages (a non-installed EUPS package, selected with
``setup -r /path/to/package``): we identify these through the environment
(EUPS again) and use as a version the path supplemented with the ``git``
SHA and, if the git repo isn't clean, an MD5 of the diff.
``setup -r /path/to/package``): we identify these through the
environment (EUPS again) and use as a version the path supplemented with
the ``git`` SHA and, if the git repo isn't clean, an MD5 of the diff.

These package versions are collected and stored in a Packages object, which
provides useful comparison and persistence features.
Expand Down Expand Up @@ -278,6 +327,7 @@ def fromSystem(cls):
packages : `Packages`
"""
packages = {}
packages.update(getCondaPackages())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mentioned a slowdown, does fromSystem need to be cached in some way? Or is generally processed once per process?

Copy link
Member Author

@timj timj Aug 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only expect this to be called once but on the other hand we can be pretty confident that it's not going to change whilst this process is running. It might be best to add some lru_cache around each of the get functions just in case it is called multiple times. @ktlim can you see a downside for doing that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Altough there is a special case of the routine that gets the python packages since that only reports packages that have been loaded so if you call it, then do an import, then call it again, you will get a different answer. That won't be a problem for the Conda stuff or, I imagine, the EUPS one.

packages.update(getPythonPackages())
packages.update(getRuntimeVersions())
packages.update(getEnvironmentPackages()) # Should be last, to override products with LOCAL versions
Expand Down
4 changes: 2 additions & 2 deletions python/lsstDebug.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ class Info:

lsstDebug.Info(__name__).display = True

Why is this interesting? Because you can replace `lsstDebug.Info` with your
own version, e.g.
Why is this interesting? Because you can replace `lsstDebug.Info` with
your own version, e.g.

.. code-block:: python

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[flake8]
max-line-length = 110
max-doc-length = 79
ignore = E133, E226, E228, N802, N803, N806, N816, W503
exclude = __init__.py, tests/.tests

Expand Down
3 changes: 2 additions & 1 deletion tests/testModuleImporter2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
class ModuleImporterTestCase(unittest.TestCase):

def testImporter(self):
# Before we import lsst, the functionality to import Python modules from C++ should not work.
# Before we import lsst, the functionality to import Python modules
# from C++ should not work.
self.assertFalse(testModuleImporterLib.doImport("math"))
# ...but after we import lsst.base.cppimport, it should.`
import lsst.base.cppimport # noqa F401
Expand Down
21 changes: 12 additions & 9 deletions tests/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,27 @@ def testPython(self):
def testEnvironment(self):
"""Test getting versions from the environment

Unfortunately, none of the products that need their versions divined from the
environment are dependencies of this package, and so all we can do is test
that this doesn't fall over.
Unfortunately, none of the products that need their versions divined
from the environment are dependencies of this package, and so all we
can do is test that this doesn't fall over.
"""
lsst.base.getEnvironmentPackages()

def testRuntime(self):
"""Test getting versions from runtime libraries

Unfortunately, none of the products that we get runtime versions from are
dependencies of this package, and so all we can do is test that this doesn't
fall over.
Unfortunately, none of the products that we get runtime versions from
are dependencies of this package, and so all we can do is test that
this doesn't fall over.
"""
lsst.base.getRuntimeVersions()

def _writeTempFile(self, packages, suffix):
"""Write packages to a temp file using the supplied suffix and read
back.
"""
# Can't use lsst.utils.tests.getTempFilePath because we're its dependency
# Can't use lsst.utils.tests.getTempFilePath because we're its
# dependency
temp = tempfile.NamedTemporaryFile(prefix="packages.", suffix=suffix, delete=False)
tempName = temp.name
temp.close() # We don't use the fd, just want a filename
Expand Down Expand Up @@ -112,8 +113,10 @@ def testPackages(self):
self.assertDictEqual(new.missing(packages), {})
self.assertDictEqual(new.extra(packages), {})

# Now load an obscure python package and the list of packages should change
import smtpd # noqa Shouldn't be used by anything we've previously imported
# Now load an obscure python package and the list of packages should
# change
# Shouldn't be used by anything we've previously imported
import smtpd # noqa: F401
new = lsst.base.Packages.fromSystem()
self.assertDictEqual(packages.difference(new), {}) # No inconsistencies
self.assertDictEqual(packages.extra(new), {}) # Nothing in 'packages' that's not in 'new'
Expand Down