Skip to content

Commit

Permalink
bpo-33453: Handle string type annotations in dataclasses. (GH-6768)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericvsmith committed May 16, 2018
1 parent d8dcd57 commit 2a7bacb
Show file tree
Hide file tree
Showing 7 changed files with 399 additions and 20 deletions.
134 changes: 116 additions & 18 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import sys
import copy
import types
Expand Down Expand Up @@ -187,6 +188,10 @@ def __repr__(self):
# __init__.
_POST_INIT_NAME = '__post_init__'

# String regex that string annotations for ClassVar or InitVar must match.
# Allows "identifier.identifier[" or "identifier[".
# https://bugs.python.org/issue33453 for details.
_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')

class _InitVarMeta(type):
def __getitem__(self, params):
Expand Down Expand Up @@ -532,6 +537,80 @@ def _hash_fn(fields):
[f'return hash({self_tuple})'])


def _is_classvar(a_type, typing):
if typing:
# This test uses a typing internal class, but it's the best
# way to test if this is a ClassVar.
return (a_type is typing.ClassVar
or (type(a_type) is typing._GenericAlias
and a_type.__origin__ is typing.ClassVar))


def _is_initvar(a_type, dataclasses):
# The module we're checking against is the module we're
# currently in (dataclasses.py).
return a_type is dataclasses.InitVar


def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
# Given a type annotation string, does it refer to a_type in
# a_module? For example, when checking that annotation denotes a
# ClassVar, then a_module is typing, and a_type is
# typing.ClassVar.

# It's possible to look up a_module given a_type, but it involves
# looking in sys.modules (again!), and seems like a waste since
# the caller already knows a_module.

# - annotation is a string type annotation
# - cls is the class that this annotation was found in
# - a_module is the module we want to match
# - a_type is the type in that module we want to match
# - is_type_predicate is a function called with (obj, a_module)
# that determines if obj is of the desired type.

# Since this test does not do a local namespace lookup (and
# instead only a module (global) lookup), there are some things it
# gets wrong.

# With string annotations, this will work:
# CV = ClassVar
# @dataclass
# class C0:
# cv0: CV

# But this will not:
# @dataclass
# class C1:
# CV = ClassVar
# cv1: CV

# In C1, the code in this function will look up "CV" in the module
# and not find it, so it will not consider cv1 as a ClassVar.
# This is a fairly obscure corner case, and the best way to fix it
# would be to eval() the string "CV" with the correct global and
# local namespaces. However that would involve a eval() penalty
# for every single field of every dataclass that's defined. It
# was judged not worth it.

match = _MODULE_IDENTIFIER_RE.match(annotation)
if match:
ns = None
module_name = match.group(1)
if not module_name:
# No module name, assume the class's module did
# "from dataclasses import InitVar".
ns = sys.modules.get(cls.__module__).__dict__
else:
# Look up module_name in the class's module.
module = sys.modules.get(cls.__module__)
if module and module.__dict__.get(module_name) is a_module:
ns = sys.modules.get(a_type.__module__).__dict__
if ns and is_type_predicate(ns.get(match.group(2)), a_module):
return True
return False


def _get_field(cls, a_name, a_type):
# Return a Field object for this field name and type. ClassVars
# and InitVars are also returned, but marked as such (see
Expand All @@ -548,34 +627,54 @@ def _get_field(cls, a_name, a_type):
default = MISSING
f = field(default=default)

# Assume it's a normal field until proven otherwise.
f._field_type = _FIELD

# Only at this point do we know the name and the type. Set them.
f.name = a_name
f.type = a_type

# If typing has not been imported, then it's impossible for
# any annotation to be a ClassVar. So, only look for ClassVar
# if typing has been imported.
# Assume it's a normal field until proven otherwise. We're next
# going to decide if it's a ClassVar or InitVar, everything else
# is just a normal field.
f._field_type = _FIELD

# In addition to checking for actual types here, also check for
# string annotations. get_type_hints() won't always work for us
# (see https://github.com/python/typing/issues/508 for example),
# plus it's expensive and would require an eval for every stirng
# annotation. So, make a best effort to see if this is a
# ClassVar or InitVar using regex's and checking that the thing
# referenced is actually of the correct type.

# For the complete discussion, see https://bugs.python.org/issue33453

# If typing has not been imported, then it's impossible for any
# annotation to be a ClassVar. So, only look for ClassVar if
# typing has been imported by any module (not necessarily cls's
# module).
typing = sys.modules.get('typing')
if typing is not None:
if typing:
# This test uses a typing internal class, but it's the best
# way to test if this is a ClassVar.
if (type(a_type) is typing._GenericAlias and
a_type.__origin__ is typing.ClassVar):
# This field is a ClassVar, so it's not a field.
if (_is_classvar(a_type, typing)
or (isinstance(f.type, str)
and _is_type(f.type, cls, typing, typing.ClassVar,
_is_classvar))):
f._field_type = _FIELD_CLASSVAR

# If the type is InitVar, or if it's a matching string annotation,
# then it's an InitVar.
if f._field_type is _FIELD:
# Check if this is an InitVar.
if a_type is InitVar:
# InitVars are not fields, either.
# The module we're checking against is the module we're
# currently in (dataclasses.py).
dataclasses = sys.modules[__name__]
if (_is_initvar(a_type, dataclasses)
or (isinstance(f.type, str)
and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
_is_initvar))):
f._field_type = _FIELD_INITVAR

# Validations for fields. This is delayed until now, instead of
# in the Field() constructor, since only here do we know the field
# name, which allows better error reporting.
# Validations for individual fields. This is delayed until now,
# instead of in the Field() constructor, since only here do we
# know the field name, which allows for better error reporting.

# Special restrictions for ClassVar and InitVar.
if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
Expand Down Expand Up @@ -605,7 +704,6 @@ def _set_new_attribute(cls, name, value):
return False



# Decide if/how we're going to create a hash function. Key is
# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
# take. The common case is to do nothing, so instead of providing a
Expand Down Expand Up @@ -865,7 +963,7 @@ def fields(class_or_instance):

# Might it be worth caching this, per class?
try:
fields = getattr(class_or_instance, _FIELDS)
fields = getattr(class_or_instance, _FIELDS)
except AttributeError:
raise TypeError('must be called with a dataclass type or instance')

Expand Down
32 changes: 32 additions & 0 deletions Lib/test/dataclass_module_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#from __future__ import annotations
USING_STRINGS = False

# dataclass_module_1.py and dataclass_module_1_str.py are identical
# except only the latter uses string annotations.

import dataclasses
import typing

T_CV2 = typing.ClassVar[int]
T_CV3 = typing.ClassVar

T_IV2 = dataclasses.InitVar[int]
T_IV3 = dataclasses.InitVar

@dataclasses.dataclass
class CV:
T_CV4 = typing.ClassVar
cv0: typing.ClassVar[int] = 20
cv1: typing.ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.

@dataclasses.dataclass
class IV:
T_IV4 = dataclasses.InitVar
iv0: dataclasses.InitVar[int]
iv1: dataclasses.InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.
32 changes: 32 additions & 0 deletions Lib/test/dataclass_module_1_str.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations
USING_STRINGS = True

# dataclass_module_1.py and dataclass_module_1_str.py are identical
# except only the latter uses string annotations.

import dataclasses
import typing

T_CV2 = typing.ClassVar[int]
T_CV3 = typing.ClassVar

T_IV2 = dataclasses.InitVar[int]
T_IV3 = dataclasses.InitVar

@dataclasses.dataclass
class CV:
T_CV4 = typing.ClassVar
cv0: typing.ClassVar[int] = 20
cv1: typing.ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.

@dataclasses.dataclass
class IV:
T_IV4 = dataclasses.InitVar
iv0: dataclasses.InitVar[int]
iv1: dataclasses.InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.
32 changes: 32 additions & 0 deletions Lib/test/dataclass_module_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#from __future__ import annotations
USING_STRINGS = False

# dataclass_module_2.py and dataclass_module_2_str.py are identical
# except only the latter uses string annotations.

from dataclasses import dataclass, InitVar
from typing import ClassVar

T_CV2 = ClassVar[int]
T_CV3 = ClassVar

T_IV2 = InitVar[int]
T_IV3 = InitVar

@dataclass
class CV:
T_CV4 = ClassVar
cv0: ClassVar[int] = 20
cv1: ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.

@dataclass
class IV:
T_IV4 = InitVar
iv0: InitVar[int]
iv1: InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.
32 changes: 32 additions & 0 deletions Lib/test/dataclass_module_2_str.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations
USING_STRINGS = True

# dataclass_module_2.py and dataclass_module_2_str.py are identical
# except only the latter uses string annotations.

from dataclasses import dataclass, InitVar
from typing import ClassVar

T_CV2 = ClassVar[int]
T_CV3 = ClassVar

T_IV2 = InitVar[int]
T_IV3 = InitVar

@dataclass
class CV:
T_CV4 = ClassVar
cv0: ClassVar[int] = 20
cv1: ClassVar = 30
cv2: T_CV2
cv3: T_CV3
not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar.

@dataclass
class IV:
T_IV4 = InitVar
iv0: InitVar[int]
iv1: InitVar
iv2: T_IV2
iv3: T_IV3
not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar.
Loading

0 comments on commit 2a7bacb

Please sign in to comment.