/
datadumper.py
304 lines (254 loc) · 11.2 KB
/
datadumper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
"""
typedload
This module is the inverse of dataloader. It converts typed
data structures to things that json can serialize.
"""
# Copyright (C) 2018-2023 Salvo "LtWorf" Tomaselli
#
# typedload is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
import datetime
import ipaddress
from inspect import signature
from enum import Enum
import pathlib
import re
from typing import *
import uuid
from .exceptions import TypedloadValueError
from .typechecks import is_attrs, NONETYPE
__all__ = [
'Dumper',
]
class Dumper:
"""
This dumps data structures recursively using only
basic types, lists and dictionaries.
A value dumped in this way from a typed data structure
can be loaded back using dataloader.
hidedefault: Enabled by default.
When enabled, does not include fields that have the
same value as the default in the dump.
isodates: Disabled by default.
Will be enabled by default from version 3.
When disabled, datetime.datetime, datetime.time, datetime.date
are dumped as lists of ints.
When enabled they are dumped as strings in ISO 8601 format.
When enabled, timezone information will work.
raiseconditionerrors: Enabled by default.
Raises exceptions when evaluating a condition from an
handler. When disabled, the exceptions are not raised
and the condition is considered False.
mangle_key: Defaults to 'name'
Specifies which key is used into the metadata dictionaries
to perform name-mangling.
handlers: This is the list that the dumper uses to
perform its task.
The type is:
List[
Tuple[
Callable[[Any], bool],
Callable[['Dumper', Any, Any], Any]
]
]
The elements are: Tuple[Condition, Dumper]
Condition(value) -> Bool
Dumper(dumper, value, value_type) -> simpler_value
In most cases, it is sufficient to append new elements
at the end, to handle more types.
strconstructed: Set of types to dump to a string.
These parameters can be set as named arguments in the constructor
or they can be set later on.
The constructor will accept any named argument, but only the documented
ones have any effect. This is to allow custom handlers to have their
own parameters as well.
Because internal caches are used, after the first call to dump() these properties
should no longer be modified.
There is support for:
* Basic python types (int, str, bool, float, NoneType)
* NamedTuple, dataclasses, attrs, TypedDict
* Dict[TypeA, TypeB]
* Enum
* List
* Tuple
* Set
* FrozenSet
* Path
* IPv4Address, IPv6Address, IPv4Network, IPv6Network, IPv4Interface, IPv6Interface
* datetime
"""
def __init__(self, **kwargs) -> None:
self.basictypes = {int, bool, float, str, NONETYPE}
self.hidedefault = True
self.isodates = False
# Which key is used in metadata to perform name mangling
self.mangle_key = 'name'
# Raise errors if the condition fails
self.raiseconditionerrors = True
# Things that become str. Needs to be done before handlers are created
if 'strconstructed' in kwargs:
self.strconstructed = kwargs.pop('strconstructed')
else:
self.strconstructed = {
pathlib.Path,
pathlib.PosixPath,
pathlib.WindowsPath,
ipaddress.IPv4Address,
ipaddress.IPv6Address,
ipaddress.IPv4Network,
ipaddress.IPv6Network,
ipaddress.IPv4Interface,
ipaddress.IPv6Interface,
uuid.UUID,
}
self.handlers = [
(lambda value: type(value) in self.basictypes, _identitydump),
(lambda value: isinstance(value, tuple) and hasattr(value, '_fields') and hasattr(value, '_asdict'), _namedtupledump),
(lambda value: '__dataclass_fields__' in dir(value), _dataclassdump),
(lambda value: isinstance(value, (list, tuple, set, frozenset)), _iteratordump),
(lambda value: isinstance(value, Enum), lambda l, value, t: l.dump(value.value)),
(lambda value: isinstance(value, Dict), lambda l, value, t: {l.dump(k): l.dump(v) for k, v in value.items()}),
(is_attrs, _attrdump),
(lambda value: isinstance(value, (datetime.date, datetime.time)), _datetimedump),
(lambda value: isinstance(value, datetime.timedelta), _timedeltadump),
(lambda value: isinstance(value, re.Pattern), _patterndump),
(lambda value: type(value) in self.strconstructed, lambda l, value, t: str(value)),
] # type: List[Tuple[Callable[[Any], bool], Callable[['Dumper', Any, Any], Any]|Callable[['Dumper', Any], Any]]]
self._handlerscache = {} # type: Dict[Type[Any], Callable[['Dumper', Any, Any], Any]]
self._dataclasscache = {} # type: Dict[Type[Any], Tuple[Set[str], Dict[str, Any], Dict[str, Any]]]
for k, v in kwargs.items():
setattr(self, k, v)
def index(self, value: Any) -> int:
"""
Returns the index in the handlers list
that matches the given value.
If no condition matches, ValueError is raised.
"""
for i, cond in ((j[0], j[1][0]) for j in enumerate(self.handlers)):
try:
match = cond(value)
except Exception:
if self.raiseconditionerrors:
raise
match = False
if match:
return i
raise TypedloadValueError('Unable to dump %s' % value, value=value, type_=type(value))
def dump(self, value: Any, annotated_type=Any) -> Any:
"""
Dump the typed data structure into its
untyped equivalent.
annotated_type contains the annotation for the value.
It is not needed to provide it, but it can enable some faster code paths.
"""
t = type(value)
func = self._handlerscache.get(t)
if func is None:
index = self.index(value)
f = self.handlers[index][1]
# It has no type parameter, make a lambda
if len(signature(f).parameters) == 2:
import warnings
warnings.warn(
'The type signature for the dump handlers has changed to include type hints\n'
'new handlers are: f(dumper, value, annotated_type)',
DeprecationWarning
)
func = lambda d, v, _: f(d, v) # type: ignore
else:
func = f # type: ignore
self._handlerscache[t] = func # type: ignore
return func(self, value, annotated_type) # type: ignore
def _attrdump(d, value, t) -> Dict[str, Any]:
r = {}
for attr in value.__attrs_attrs__:
attrval = getattr(value, attr.name)
if not attr.repr:
continue
if d.hidedefault:
if attrval == attr.default:
continue
elif hasattr(attr.default, 'factory') and attrval == attr.default.factory():
continue
name = attr.metadata.get(d.mangle_key, attr.name)
r[name] = d.dump(attrval)
return r
def _datetimedump(d: Dumper, value: Union[datetime.time, datetime.date, datetime.datetime], t):
if d.isodates:
return value.isoformat()
import warnings
warnings.warn(
'Dumping datetime classes as list of values is deprecated.\n'
'You are encouraged to dump with isodates=True\n'
'This will become the default in the next major version.',
DeprecationWarning
)
# datetime is subclass of date
if isinstance(value, datetime.date) and not isinstance(value, datetime.datetime):
return [value.year, value.month, value.day]
if value.tzinfo is not None:
raise NotImplementedError('Dumping of tzdata object is not supported')
if isinstance(value, datetime.time):
return [value.hour, value.minute, value.second, value.microsecond]
# datetime.datetime
return [value.year, value.month, value.day, value.hour, value.minute, value.second, value.microsecond]
def _timedeltadump(d: Dumper, value: datetime.timedelta, t) -> float:
return value.total_seconds()
def _patterndump(d: Dumper, value: re.Pattern, t):
return value.pattern
def _namedtupledump(d: Dumper, value, t) -> Dict[str, Any]:
field_defaults = getattr(value, '_field_defaults', {})
# Named tuple, skip default values
return {
k: d.dump(v) for k, v in value._asdict().items()
if not d.hidedefault or k not in field_defaults or field_defaults[k] != v
}
def _dataclassdump(d: Dumper, value, t) -> Dict[str, Any]:
t = type(value)
cached = d._dataclasscache.get(t)
if cached is None:
from dataclasses import _MISSING_TYPE as DT_MISSING_TYPE
fields = set(value.__dataclass_fields__.keys())
field_defaults = {k: v.default for k,v in value.__dataclass_fields__.items() if not isinstance (v.default, DT_MISSING_TYPE)}
field_factories = {k: v.default_factory() for k,v in value.__dataclass_fields__.items() if not isinstance (v.default_factory, DT_MISSING_TYPE)}
defaults = {**field_defaults, **field_factories} # Merge the two dictionaries
type_hints = get_type_hints(value)
d._dataclasscache[t] = (fields, defaults, type_hints)
else:
fields, defaults, type_hints = cached
r = {
value.__dataclass_fields__[f].metadata.get(d.mangle_key, f) : d.dump(getattr(value, f), type_hints.get(f, Any)) for f in fields
if not d.hidedefault or f not in defaults or defaults[f] != getattr(value, f)
}
return r
def _iteratordump(d: Dumper, value: Any, t: Any) -> List[Any]:
itertypes = getattr(t, '__args__', (Any, ))
# list[T] or tuple[T, ...]
if (len(itertypes) == 1) or (len(itertypes) == 2 and itertypes[1] == ...): # type: ignore
# This is true for lists/sets but not tuples
itertype = itertypes[0]
else:
itertype = Any
if itertype in d.basictypes and d.handlers[0][1] == _identitydump:
# Iterable of basic types, unchanged default handler for basic types
if isinstance(value, list):
# Just copy the list if it's a list
return value.copy()
else:
# Create a list and return it otherwise
return [i for i in value]
return [d.dump(i) for i in value]
def _identitydump(d: Dumper, value: Any, t: Any) -> Any:
return value