Skip to content

Commit

Permalink
Merge pull request #1654 from braingram/class_path
Browse files Browse the repository at this point in the history
index converters by type when no converter is found
  • Loading branch information
braingram committed May 10, 2024
2 parents 1722dcf + 7bdf1a7 commit 097dffa
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Expand Up @@ -20,6 +20,10 @@
extension was created from a manifest registered with a uri that
does not match the id in the manifest [#1785]

- Allow converters to provide types as strings that can
resolve to public classes (even if the class is implemented
in a private module). [#1654]


3.2.0 (2024-04-05)
------------------
Expand Down
65 changes: 65 additions & 0 deletions asdf/_tests/test_extension.py
@@ -1,4 +1,5 @@
import fractions
import sys

import pytest
from packaging.specifiers import SpecifierSet
Expand All @@ -18,6 +19,7 @@
Validator,
get_cached_extension_manager,
)
from asdf.extension._manager import _resolve_type
from asdf.testing.helpers import roundtrip_object


Expand Down Expand Up @@ -982,3 +984,66 @@ def from_yaml_tree(self, *args):
fn = tmp_path / "foo.asdf"
with pytest.warns(AsdfManifestURIMismatchWarning):
af.write_to(fn)


def test_resolve_type_not_imported():
path = "mailbox.Mailbox"

if "mailbox" in sys.modules:
del sys.modules["mailbox"]

assert _resolve_type(path) is None

import mailbox

assert _resolve_type(path) is mailbox.Mailbox


@pytest.mark.parametrize(
"path, obj", (("sys", sys), ("asdf.AsdfFile", AsdfFile), ("asdf.Missing", None), ("not_a_module", None))
)
def test_resolve_type(path, obj):
assert _resolve_type(path) is obj


def test_extension_converter_by_class_path():
class MailboxConverter:
tags = ["asdf://example.com/tags/mailbox-1.0.0"]
types = ["mailbox.Mailbox"]

def to_yaml_tree(self, obj, tag, ctx):
return {}

def from_yaml_tree(self, node, tag, ctx):
return None

class MailboxExtension:
tags = MailboxConverter.tags
converters = [MailboxConverter()]
extension_uri = "asdf://example.com/extensions/mailbox-1.0.0"

# grab the type so we can use it for extension_manager.get_converter_for_type
import mailbox

typ = mailbox.Mailbox
del sys.modules["mailbox"], mailbox

with config_context() as cfg:
cfg.add_extension(MailboxExtension())
extension_manager = AsdfFile().extension_manager

# make sure that registering the extension did not load the module
assert "mailbox" not in sys.modules

# as the module hasn't been loaded, the converter shouldn't be found
with pytest.raises(KeyError, match="No support available for Python type 'mailbox.Mailbox'"):
extension_manager.get_converter_for_type(typ)

# make sure inspecting the type didn't import the module
assert "mailbox" not in sys.modules

# finally, import the module and check that the converter can now be found
import mailbox

converter = extension_manager.get_converter_for_type(mailbox.Mailbox)
assert isinstance(converter.delegate, MailboxConverter)
132 changes: 112 additions & 20 deletions asdf/extension/_manager.py
@@ -1,3 +1,4 @@
import sys
from functools import lru_cache

from asdf.tagged import Tagged
Expand All @@ -6,6 +7,42 @@
from ._extension import ExtensionProxy


def _resolve_type(path):
"""
Convert a class path (like the string "asdf.AsdfFile") to a
class (``asdf.AsdfFile``) only if the module implementing the
class has already been imported.
Parameters
----------
path : str
Path/name of class (for example, "asdf.AsdfFile")
Returns
-------
typ : class or None
The class (if it's already been imported) or None
"""
if "." not in path:
# check if this path is a module
if path in sys.modules:
return sys.modules[path]
return None
# this type is part of a module
module_name, type_name = path.rsplit(".", maxsplit=1)
# if the module is not imported, don't index it
if module_name not in sys.modules:
return None
module = sys.modules[module_name]
if not hasattr(module, type_name):
# the imported module does not have this class, perhaps
# it is dynamically created so do not index it yet
return None
return getattr(module, type_name)


class ExtensionManager:
"""
Wraps a list of extensions and indexes their converters
Expand All @@ -23,9 +60,37 @@ def __init__(self, extensions):

self._tag_defs_by_tag = {}
self._converters_by_tag = {}
# This dict has both str and type keys:
self._converters_by_type = {}

# To optimize performance converters can be registered using either:
# - the class/type they convert
# - the name/path (string) of the class/type they convert
# This allows the registration to continue without importing
# every module for every extension (which would be needed to turn
# the class paths into proper classes). Using class paths can be
# complicated by packages that have private implementations of
# classes that are exposed at a different 'public' location.
# These private classes may change between minor versions
# and would break converters that are registered using the private
# class path. However, often libraries do not modify the module
# of the 'public' class (so inspecting the class path returns
# the private class path). One example of this in asdf is
# Converter (exposed as ``asdf.extension.Converter`` but with
# a class path of ``asdf.extension._converter.Converter``).
# To allow converters to be registered with the public location
# we will need to attempt to import the public class path
# and then register the private class path after the class is
# imported. We don't want to do this unnecessarily and since
# class instances do not contain the public class path
# we adopt a strategy of checking class paths and only
# registering those that have already been imported. Thiss
# is ok because asdf will only use the converter type
# when attempting to serialize an object in memory (so the
# public class path will already be imported at the time
# the converter is needed).

# first we store the converters in the order they are discovered
# the key here can either be a class path (str) or class (type)
converters_by_type = {}
validators = set()

for extension in self._extensions:
Expand All @@ -37,17 +102,27 @@ def __init__(self, extensions):
if tag not in self._converters_by_tag:
self._converters_by_tag[tag] = converter
for typ in converter.types:
if isinstance(typ, str):
if typ not in self._converters_by_type:
self._converters_by_type[typ] = converter
else:
type_class_name = get_class_name(typ, instance=False)
if typ not in self._converters_by_type and type_class_name not in self._converters_by_type:
self._converters_by_type[typ] = converter
self._converters_by_type[type_class_name] = converter
if typ not in converters_by_type:
converters_by_type[typ] = converter

validators.update(extension.validators)

self._converters_by_class_path = {}
self._converters_by_type = {}

for type_or_path, converter in converters_by_type.items():
if isinstance(type_or_path, str):
path = type_or_path
typ = _resolve_type(path)
if typ is None:
if path not in self._converters_by_class_path:
self._converters_by_class_path[path] = converter
continue
else:
typ = type_or_path
if typ not in self._converters_by_type:
self._converters_by_type[typ] = converter

self._validator_manager = _get_cached_validator_manager(tuple(validators))

@property
Expand Down Expand Up @@ -90,7 +165,10 @@ def handles_type(self, typ):
-------
bool
"""
return typ in self._converters_by_type or get_class_name(typ, instance=False) in self._converters_by_type
if typ in self._converters_by_type:
return True
self._index_converters()
return typ in self._converters_by_type

def handles_tag_definition(self, tag):
"""
Expand Down Expand Up @@ -172,18 +250,32 @@ def get_converter_for_type(self, typ):
KeyError
Unrecognized type.
"""
if typ not in self._converters_by_type:
self._index_converters()
try:
return self._converters_by_type[typ]
except KeyError:
class_name = get_class_name(typ, instance=False)
try:
return self._converters_by_type[class_name]
except KeyError:
msg = (
f"No support available for Python type '{get_class_name(typ, instance=False)}'. "
"You may need to install or enable an extension."
)
raise KeyError(msg) from None
msg = (
f"No support available for Python type '{get_class_name(typ, instance=False)}'. "
"You may need to install or enable an extension."
)
raise KeyError(msg) from None

def _index_converters(self):
"""
Search _converters_by_class_path for paths (strings) that
refer to classes that are currently imported. For imported
classes, add them to _converters_by_class (if the class
doesn't already have a converter).
"""
# search class paths to find ones that are imported
for class_path in list(self._converters_by_class_path):
typ = _resolve_type(class_path)
if typ is None:
continue
if typ not in self._converters_by_type:
self._converters_by_type[typ] = self._converters_by_class_path[class_path]
del self._converters_by_class_path[class_path]

@property
def validator_manager(self):
Expand Down
12 changes: 6 additions & 6 deletions docs/asdf/extending/converters.rst
Expand Up @@ -26,12 +26,12 @@ characters up to a ``/``, or ``**``, which matches any sequence of characters.
The `~asdf.util.uri_match` method can be used to test URI patterns.

`Converter.types` - a list of Python types or fully-qualified Python type names handled
by the converter. Note that a string name must reflect the actual location of the
class's implementation and not just a module where it is imported for convenience.
For example, if class ``Foo`` is implemented in ``example_package.foo.Foo`` but
imported as ``example_package.Foo`` for convenience, it is the former name that
must be used. The `~asdf.util.get_class_name` method will return the name that
`asdf` expects.
by the converter. For strings, the private or public path can be used. For example,
if class ``Foo`` is implemented in ``example_package.foo.Foo`` but imported
as ``example_package.Foo`` for convenience either ``example_package.foo.Foo``
or ``example_package.Foo`` can be used. As most libraries do not consider moving
where a class is implemented it is preferred to use the "public" location
where the class is imported (in this example ``example_package.Foo``).

The string type name is recommended over a type object for performance reasons,
see :ref:`extending_converters_performance`.
Expand Down

0 comments on commit 097dffa

Please sign in to comment.