-
Notifications
You must be signed in to change notification settings - Fork 60
/
core.py
2595 lines (2110 loc) · 87.8 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""*glom gets results.*
The ``glom`` package has one central entrypoint,
:func:`glom.glom`. Everything else in the package revolves around that
one function. Sometimes, big things come in small packages.
A couple of conventional terms you'll see repeated many times below:
* **target** - glom is built to work on any data, so we simply
refer to the object being accessed as the *"target"*
* **spec** - *(aka "glomspec", short for specification)* The
accompanying template used to specify the structure of the return
value.
Now that you know the terms, let's take a look around glom's powerful
semantics.
"""
from __future__ import print_function
import os
import sys
import pdb
import copy
import warnings
import weakref
import operator
from abc import ABCMeta
from pprint import pprint
import string
from collections import OrderedDict
import traceback
from face.helpers import get_wrap_width
from boltons.typeutils import make_sentinel
from boltons.iterutils import is_iterable
#from boltons.funcutils import format_invocation
PY2 = (sys.version_info[0] == 2)
if PY2:
_AbstractIterableBase = object
from .chainmap_backport import ChainMap
from repr import Repr
from .reprlib_backport import recursive_repr
else:
basestring = str
_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})
from collections import ChainMap
from reprlib import Repr, recursive_repr
GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()
GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True
TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width
PATH_STAR = False
# should * and ** be interpreted as parallel traversal in Path.from_text()?
# (will change to True in a later version)
_type_type = type
_MISSING = make_sentinel('_MISSING')
SKIP = make_sentinel('SKIP')
SKIP.__doc__ = """
The ``SKIP`` singleton can be returned from a function or included
via a :class:`~glom.Val` to cancel assignment into the output
object.
>>> target = {'a': 'b'}
>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}
>>> glom(target, spec)
{}
>>> target = {'a': 'a'}
>>> glom(target, spec)
{'a': 'a'}
Mostly used to drop keys from dicts (as above) or filter objects from
lists.
.. note::
SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+
will remove the OMIT alias entirely.
"""
OMIT = SKIP # backwards compat, remove in 19+
STOP = make_sentinel('STOP')
STOP.__doc__ = """
The ``STOP`` singleton can be used to halt iteration of a list or
execution of a tuple of subspecs.
>>> target = range(10)
>>> spec = [lambda x: x if x < 5 else STOP]
>>> glom(target, spec)
[0, 1, 2, 3, 4]
"""
LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')
LAST_CHILD_SCOPE.__doc__ = """
Marker that can be used by parents to keep track of the last child
scope executed. Useful for "lifting" results out of child scopes
for scopes that want to chain the scopes of their children together
similar to tuple.
"""
NO_PYFRAME = make_sentinel('NO_PYFRAME')
NO_PYFRAME.__doc__ = """
Used internally to mark scopes which are no longer wrapped
in a recursive glom() call, so that they can be cleaned up correctly
in case of exceptions
"""
MODE = make_sentinel('MODE')
MIN_MODE = make_sentinel('MIN_MODE')
CHILD_ERRORS = make_sentinel('CHILD_ERRORS')
CHILD_ERRORS.__doc__ = """
``CHILD_ERRORS`` is used by glom internals to keep track of
failed child branches of the current scope.
"""
CUR_ERROR = make_sentinel('CUR_ERROR')
CUR_ERROR.__doc__ = """
``CUR_ERROR`` is used by glom internals to keep track of
thrown exceptions.
"""
_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
class GlomError(Exception):
"""The base exception for all the errors that might be raised from
:func:`glom` processing logic.
By default, exceptions raised from within functions passed to glom
(e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a
GlomError.
"""
@classmethod
def wrap(cls, exc):
# TODO: need to test this against a wide array of exception types
# this approach to wrapping errors works for exceptions
# defined in pure-python as well as C
exc_type = type(exc)
bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)
exc_wrapper_type = type("GlomError.wrap({})".format(exc_type.__name__), bases, {})
try:
wrapper = exc_wrapper_type(*exc.args)
wrapper.__wrapped = exc
return wrapper
except Exception: # maybe exception can't be re-created
return exc
def _set_wrapped(self, exc):
self.__wrapped = exc
def _finalize(self, scope):
# careful when changing how this functionality works; pytest seems to mess with
# the traceback module or sys.exc_info(). we saw different stacks when originally
# developing this in June 2020.
etype, evalue, _ = sys.exc_info()
tb_lines = traceback.format_exc().strip().splitlines()
limit = 0
for line in reversed(tb_lines):
if _PKG_DIR_PATH in line:
limit -= 1
break
limit += 1
self._tb_lines = tb_lines[-limit:]
self._scope = scope
def __str__(self):
if getattr(self, '_finalized_str', None):
return self._finalized_str
elif getattr(self, '_scope', None) is not None:
self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)
parts = ["error raised while processing, details below.",
" Target-spec trace (most recent last):",
self._target_spec_trace]
parts.extend(self._tb_lines)
self._finalized_str = "\n".join(parts)
return self._finalized_str
# else, not finalized
try:
exc_get_message = self.get_message
except AttributeError:
exc_get_message = super(GlomError, self).__str__
return exc_get_message()
def _unpack_stack(scope, only_errors=True):
"""
convert scope to [[scope, spec, target, error, [children]]]
this is a convenience method for printing stacks
only_errors=True means ignore branches which may still be hanging around
which were not involved in the stack trace of the error
only_errors=False could be useful for debugger / introspection (similar
to traceback.print_stack())
"""
stack = []
scope = scope.maps[0]
while LAST_CHILD_SCOPE in scope:
child = scope[LAST_CHILD_SCOPE]
branches = scope[CHILD_ERRORS]
if branches == [child]:
branches = [] # if there's only one branch, count it as linear
stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])
# NB: this id() business is necessary to avoid a
# nondeterministic bug in abc's __eq__ see #189 for details
if id(child) in [id(b) for b in branches]:
break # if child already covered by branches, stop the linear descent
scope = child.maps[0]
else: # if break executed above, cur scope was already added
stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])
# push errors "down" to where they were first raised / first observed
for i in range(len(stack) - 1):
cur, nxt = stack[i], stack[i + 1]
if cur[3] == nxt[3]:
cur[3] = None
if only_errors: # trim the stack to the last error
# leave at least 1 to not break formatting func below
# TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly
while len(stack) > 1 and stack[-1][3] is None:
stack.pop()
return stack
def _format_trace_value(value, maxlen):
s = bbrepr(value).replace("\\'", "'")
if len(s) > maxlen:
try:
suffix = '... (len=%s)' % len(value)
except Exception:
suffix = '...'
s = s[:maxlen - len(suffix)] + suffix
return s
def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):
"""
unpack a scope into a multi-line but short summary
"""
segments = []
indent = " " + "|" * depth
tick = "| " if depth else "- "
def mk_fmt(label, t=None):
pre = indent + (t or tick) + label + ": "
fmt_width = width - len(pre)
return lambda v: pre + _format_trace_value(v, fmt_width)
fmt_t = mk_fmt("Target")
fmt_s = mk_fmt("Spec")
fmt_b = mk_fmt("Spec", "+ ")
recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)
tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]
fmt_e = lambda e: indent + tick + tb_exc_line(e)
for scope, spec, target, error, branches in _unpack_stack(scope):
if target is not prev_target:
segments.append(fmt_t(target))
prev_target = target
if branches:
segments.append(fmt_b(spec))
segments.extend([recurse(s) for s in branches[:-1]])
segments.append(recurse(branches[-1], last_branch))
else:
segments.append(fmt_s(spec))
if error is not None and error is not root_error:
last_line_error = True
segments.append(fmt_e(error))
else:
last_line_error = False
if depth: # \ on first line, X on last line
remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]
segments[0] = remark(segments[0], "\\")
if not last_branch or last_line_error:
segments[-1] = remark(segments[-1], "X")
return "\n".join(segments)
# TODO: not used (yet)
def format_oneline_trace(scope):
"""
unpack a scope into a single line summary
(shortest summary possible)
"""
# the goal here is to do a kind of delta-compression --
# if the target is the same, don't repeat it
segments = []
prev_target = _MISSING
for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):
segments.append('/')
if type(spec) in (TType, Path):
segments.append(bbrepr(spec))
else:
segments.append(type(spec).__name__)
if target != prev_target:
segments.append('!')
segments.append(type(target).__name__)
if Path in scope:
segments.append('<')
segments.append('->'.join([str(p) for p in scope[Path]]))
segments.append('>')
prev_target = target
return "".join(segments)
class PathAccessError(GlomError, AttributeError, KeyError, IndexError):
"""This :exc:`GlomError` subtype represents a failure to access an
attribute as dictated by the spec. The most commonly-seen error
when using glom, it maintains a copy of the original exception and
produces a readable error message for easy debugging.
If you see this error, you may want to:
* Check the target data is accurate using :class:`~glom.Inspect`
* Catch the exception and return a semantically meaningful error message
* Use :class:`glom.Coalesce` to specify a default
* Use the top-level ``default`` kwarg on :func:`~glom.glom()`
In any case, be glad you got this error and not the one it was
wrapping!
Args:
exc (Exception): The error that arose when we tried to access
*path*. Typically an instance of KeyError, AttributeError,
IndexError, or TypeError, and sometimes others.
path (Path): The full Path glom was in the middle of accessing
when the error occurred.
part_idx (int): The index of the part of the *path* that caused
the error.
>>> target = {'a': {'b': None}}
>>> glom(target, 'a.b.c')
Traceback (most recent call last):
...
PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...
"""
def __init__(self, exc, path, part_idx):
self.exc = exc
self.path = path
self.part_idx = part_idx
def __copy__(self):
# py27 struggles to copy PAE without this method
return type(self)(self.exc, self.path, self.part_idx)
def get_message(self):
path_part = Path(self.path).values()[self.part_idx]
return ('could not access %r, part %r of %r, got error: %r'
% (path_part, self.part_idx, self.path, self.exc))
def __repr__(self):
cn = self.__class__.__name__
return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.part_idx)
class PathAssignError(GlomError):
"""This :exc:`GlomError` subtype is raised when an assignment fails,
stemming from an :func:`~glom.assign` call or other
:class:`~glom.Assign` usage.
One example would be assigning to an out-of-range position in a list::
>>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP
Traceback (most recent call last):
...
PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...
Other assignment failures could be due to assigning to an
``@property`` or exception being raised inside a ``__setattr__()``.
"""
def __init__(self, exc, path, dest_name):
self.exc = exc
self.path = path
self.dest_name = dest_name
def __copy__(self):
# py27 struggles to copy PAE without this method
return type(self)(self.exc, self.path, self.dest_name)
def get_message(self):
return ('could not assign %r on object at %r, got error: %r'
% (self.dest_name, self.path, self.exc))
def __repr__(self):
cn = self.__class__.__name__
return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.dest_name)
class CoalesceError(GlomError):
"""This :exc:`GlomError` subtype is raised from within a
:class:`Coalesce` spec's processing, when none of the subspecs
match and no default is provided.
The exception object itself keeps track of several values which
may be useful for processing:
Args:
coal_obj (Coalesce): The original failing spec, see
:class:`Coalesce`'s docs for details.
skipped (list): A list of ignored values and exceptions, in the
order that their respective subspecs appear in the original
*coal_obj*.
path: Like many GlomErrors, this exception knows the path at
which it occurred.
>>> target = {}
>>> glom(target, Coalesce('a', 'b'))
Traceback (most recent call last):
...
CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
.. note::
Coalesce is a *branching* specifier type, so as of v20.7.0, its
exception messages feature an error tree. See
:ref:`branched-exceptions` for details on how to interpret these
exceptions.
"""
def __init__(self, coal_obj, skipped, path):
self.coal_obj = coal_obj
self.skipped = skipped
self.path = path
def __copy__(self):
# py27 struggles to copy PAE without this method
return type(self)(self.coal_obj, self.skipped, self.path)
def __repr__(self):
cn = self.__class__.__name__
return '%s(%r, %r, %r)' % (cn, self.coal_obj, self.skipped, self.path)
def get_message(self):
missed_specs = tuple(self.coal_obj.subspecs)
skipped_vals = [v.__class__.__name__
if isinstance(v, self.coal_obj.skip_exc)
else '<skipped %s>' % v.__class__.__name__
for v in self.skipped]
msg = ('no valid values found. Tried %r and got (%s)'
% (missed_specs, ', '.join(skipped_vals)))
if self.coal_obj.skip is not _MISSING:
msg += ', skip set to %r' % (self.coal_obj.skip,)
if self.coal_obj.skip_exc is not GlomError:
msg += ', skip_exc set to %r' % (self.coal_obj.skip_exc,)
if self.path is not None:
msg += ' (at path %r)' % (self.path,)
return msg
class BadSpec(GlomError, TypeError):
"""Raised when a spec structure is malformed, e.g., when a specifier
type is invalid for the current mode."""
class UnregisteredTarget(GlomError):
"""This :class:`GlomError` subtype is raised when a spec calls for an
unsupported action on a target type. For instance, trying to
iterate on an non-iterable target:
>>> glom(object(), ['a.b.c'])
Traceback (most recent call last):
...
UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)
It should be noted that this is a pretty uncommon occurrence in
production glom usage. See the :ref:`setup-and-registration`
section for details on how to avoid this error.
An UnregisteredTarget takes and tracks a few values:
Args:
op (str): The name of the operation being performed ('get' or 'iterate')
target_type (type): The type of the target being processed.
type_map (dict): A mapping of target types that do support this operation
path: The path at which the error occurred.
"""
def __init__(self, op, target_type, type_map, path):
self.op = op
self.target_type = target_type
self.type_map = type_map
self.path = path
super(UnregisteredTarget, self).__init__(op, target_type, type_map, path)
def __repr__(self):
cn = self.__class__.__name__
# <type %r> is because Python 3 inexplicably changed the type
# repr from <type *> to <class *>
return ('%s(%r, <type %r>, %r, %r)'
% (cn, self.op, self.target_type.__name__, self.type_map, self.path))
def get_message(self):
if not self.type_map:
return ("glom() called without registering any types for operation '%s'. see"
" glom.register() or Glommer's constructor for details." % (self.op,))
reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])
reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))
msg = ("target type %r not registered for '%s', expected one of"
" registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))
if self.path:
msg += ' (at %r)' % (self.path,)
return msg
if getattr(__builtins__, '__dict__', None) is not None:
# pypy's __builtins__ is a module, as is CPython's REPL, but at
# normal execution time it's a dict?
__builtins__ = __builtins__.__dict__
_BUILTIN_ID_NAME_MAP = dict([(id(v), k)
for k, v in __builtins__.items()])
# on py27, Repr is an old-style class, hence the lack of super() below
class _BBRepr(Repr):
"""A better repr for builtins, when the built-in repr isn't
roundtrippable.
"""
def __init__(self):
Repr.__init__(self)
# turn up all the length limits very high
for name in self.__dict__:
setattr(self, name, 1024)
def repr1(self, x, level):
ret = Repr.repr1(self, x, level)
if not ret.startswith('<'):
return ret
return _BUILTIN_ID_NAME_MAP.get(id(x), ret)
bbrepr = recursive_repr()(_BBRepr().repr)
class _BBReprFormatter(string.Formatter):
"""
allow format strings to be evaluated where {!r} will use bbrepr
instead of repr
"""
def convert_field(self, value, conversion):
if conversion == 'r':
return bbrepr(value).replace("\\'", "'")
return super(_BBReprFormatter, self).convert_field(value, conversion)
bbformat = _BBReprFormatter().format
# TODO: push this back up to boltons with repr kwarg
def format_invocation(name='', args=(), kwargs=None, **kw):
"""Given a name, positional arguments, and keyword arguments, format
a basic Python-style function call.
>>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))
func(1, 2, c=3)
>>> print(format_invocation('a_func', args=(1,)))
a_func(1)
>>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))
kw_func(a=1, b=2)
"""
_repr = kw.pop('repr', bbrepr)
if kw:
raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))
kwargs = kwargs or {}
a_text = ', '.join([_repr(a) for a in args])
if isinstance(kwargs, dict):
kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]
else:
kwarg_items = kwargs
kw_text = ', '.join(['%s=%s' % (k, _repr(v)) for k, v in kwarg_items])
all_args_text = a_text
if all_args_text and kw_text:
all_args_text += ', '
all_args_text += kw_text
return '%s(%s)' % (name, all_args_text)
class Path(object):
"""Path objects specify explicit paths when the default
``'a.b.c'``-style general access syntax won't work or isn't
desirable. Use this to wrap ints, datetimes, and other valid
keys, as well as strings with dots that shouldn't be expanded.
>>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}
>>> glom(target, Path('a', 2))
3
>>> glom(target, Path('a', 'd.e'))
'f'
Paths can be used to join together other Path objects, as
well as :data:`~glom.T` objects:
>>> Path(T['a'], T['b'])
T['a']['b']
>>> Path(Path('a', 'b'), Path('c', 'd'))
Path('a', 'b', 'c', 'd')
Paths also support indexing and slicing, with each access
returning a new Path object:
>>> path = Path('a', 'b', 1, 2)
>>> path[0]
Path('a')
>>> path[-2:]
Path(1, 2)
"""
def __init__(self, *path_parts):
if not path_parts:
self.path_t = T
return
if isinstance(path_parts[0], TType):
path_t = path_parts[0]
offset = 1
else:
path_t = T
offset = 0
for part in path_parts[offset:]:
if isinstance(part, Path):
part = part.path_t
if isinstance(part, TType):
sub_parts = _T_PATHS[part]
if sub_parts[0] is not T:
raise ValueError('path segment must be path from T, not %r'
% sub_parts[0])
i = 1
while i < len(sub_parts):
path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])
i += 2
else:
path_t = _t_child(path_t, 'P', part)
self.path_t = path_t
_CACHE = {True: {}, False: {}}
_MAX_CACHE = 10000
_STAR_WARNED = False
@classmethod
def from_text(cls, text):
"""Make a Path from .-delimited text:
>>> Path.from_text('a.b.c')
Path('a', 'b', 'c')
"""
def create():
segs = text.split('.')
if PATH_STAR:
segs = [
_T_STAR if seg == '*' else
_T_STARSTAR if seg == '**' else seg
for seg in segs]
elif not cls._STAR_WARNED:
if '*' in segs or '**' in segs:
warnings.warn(
"'*' and '**' will changed behavior in a future glom version."
" Recommend switch to T['*'] or T['**'].")
cls._STAR_WARNED = True
return cls(*segs)
cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default
if text not in cache:
if len(cache) > cls._MAX_CACHE:
return create()
cache[text] = create()
return cache[text]
def glomit(self, target, scope):
# The entrypoint for the Path extension
return _t_eval(target, self.path_t, scope)
def __len__(self):
return (len(_T_PATHS[self.path_t]) - 1) // 2
def __eq__(self, other):
if type(other) is Path:
return _T_PATHS[self.path_t] == _T_PATHS[other.path_t]
elif type(other) is TType:
return _T_PATHS[self.path_t] == _T_PATHS[other]
return False
def __ne__(self, other):
return not self == other
def values(self):
"""
Returns a tuple of values referenced in this path.
>>> Path(T.a.b, 'c', T['d']).values()
('a', 'b', 'c', 'd')
"""
cur_t_path = _T_PATHS[self.path_t]
return cur_t_path[2::2]
def items(self):
"""
Returns a tuple of (operation, value) pairs.
>>> Path(T.a.b, 'c', T['d']).items()
(('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))
"""
cur_t_path = _T_PATHS[self.path_t]
return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))
def startswith(self, other):
if isinstance(other, basestring):
other = Path(other)
if isinstance(other, Path):
other = other.path_t
if not isinstance(other, TType):
raise TypeError('can only check if Path starts with string, Path or T')
o_path = _T_PATHS[other]
return _T_PATHS[self.path_t][:len(o_path)] == o_path
def from_t(self):
'''return the same path but starting from T'''
t_path = _T_PATHS[self.path_t]
if t_path[0] is S:
new_t = TType()
_T_PATHS[new_t] = (T,) + t_path[1:]
return Path(new_t)
return self
def __getitem__(self, i):
cur_t_path = _T_PATHS[self.path_t]
try:
step = i.step
start = i.start if i.start is not None else 0
stop = i.stop
start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)
if stop is not None:
stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)
except AttributeError:
step = 1
start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)
if start < 0 or start > len(cur_t_path):
raise IndexError('Path index out of range')
stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)
new_t = TType()
new_path = cur_t_path[start:stop]
if step is not None and step != 1:
new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]
new_path = sum(new_path, ())
_T_PATHS[new_t] = (cur_t_path[0],) + new_path
return Path(new_t)
def __repr__(self):
return _format_path(_T_PATHS[self.path_t][1:])
def _format_path(t_path):
path_parts, cur_t_path = [], []
i = 0
while i < len(t_path):
op, arg = t_path[i], t_path[i + 1]
i += 2
if op == 'P':
if cur_t_path:
path_parts.append(cur_t_path)
cur_t_path = []
path_parts.append(arg)
else:
cur_t_path.append(op)
cur_t_path.append(arg)
if path_parts and cur_t_path:
path_parts.append(cur_t_path)
if path_parts or not cur_t_path:
return 'Path(%s)' % ', '.join([_format_t(part)
if type(part) is list else repr(part)
for part in path_parts])
return _format_t(cur_t_path)
class Spec(object):
"""Spec objects serve three purposes, here they are, roughly ordered
by utility:
1. As a form of compiled or "curried" glom call, similar to
Python's built-in :func:`re.compile`.
2. A marker as an object as representing a spec rather than a
literal value in certain cases where that might be ambiguous.
3. A way to update the scope within another Spec.
In the second usage, Spec objects are the complement to
:class:`~glom.Val`, wrapping a value and marking that it
should be interpreted as a glom spec, rather than a literal value.
This is useful in places where it would be interpreted as a value
by default. (Such as T[key], Call(func) where key and func are
assumed to be literal values and not specs.)
Args:
spec: The glom spec.
scope (dict): additional values to add to the scope when
evaluating this Spec
"""
def __init__(self, spec, scope=None):
self.spec = spec
self.scope = scope or {}
def glom(self, target, **kw):
scope = dict(self.scope)
scope.update(kw.get('scope', {}))
kw['scope'] = ChainMap(scope)
glom_ = scope.get(glom, glom)
return glom_(target, self.spec, **kw)
def glomit(self, target, scope):
scope.update(self.scope)
return scope[glom](target, self.spec, scope)
def __repr__(self):
cn = self.__class__.__name__
if self.scope:
return '%s(%s, scope=%r)' % (cn, bbrepr(self.spec), self.scope)
return '%s(%s)' % (cn, bbrepr(self.spec))
class Coalesce(object):
"""Coalesce objects specify fallback behavior for a list of
subspecs.
Subspecs are passed as positional arguments, and keyword arguments
control defaults. Each subspec is evaluated in turn, and if none
match, a :exc:`CoalesceError` is raised, or a default is returned,
depending on the options used.
.. note::
This operation may seem very familar if you have experience with
`SQL`_ or even `C# and others`_.
In practice, this fallback behavior's simplicity is only surpassed
by its utility:
>>> target = {'c': 'd'}
>>> glom(target, Coalesce('a', 'b', 'c'))
'd'
glom tries to get ``'a'`` from ``target``, but gets a
KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,
glom *coalesces* into the next subspec, ``'b'``. The process
repeats until it gets to ``'c'``, which returns our value,
``'d'``. If our value weren't present, we'd see:
>>> target = {}
>>> glom(target, Coalesce('a', 'b'))
Traceback (most recent call last):
...
CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
Same process, but because ``target`` is empty, we get a
:exc:`CoalesceError`.
.. note::
Coalesce is a *branching* specifier type, so as of v20.7.0, its
exception messages feature an error tree. See
:ref:`branched-exceptions` for details on how to interpret these
exceptions.
If we want to avoid an exception, and we know which value we want
by default, we can set *default*:
>>> target = {}
>>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')
'd-fault'
``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.
Args:
subspecs: One or more glommable subspecs
default: A value to return if no subspec results in a valid value
default_factory: A callable whose result will be returned as a default
skip: A value, tuple of values, or predicate function
representing values to ignore
skip_exc: An exception or tuple of exception types to catch and
move on to the next subspec. Defaults to :exc:`GlomError`, the
parent type of all glom runtime exceptions.
If all subspecs produce skipped values or exceptions, a
:exc:`CoalesceError` will be raised. For more examples, check out
the :doc:`tutorial`, which makes extensive use of Coalesce.
.. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE
.. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#
"""
def __init__(self, *subspecs, **kwargs):
self.subspecs = subspecs
self._orig_kwargs = dict(kwargs)
self.default = kwargs.pop('default', _MISSING)
self.default_factory = kwargs.pop('default_factory', _MISSING)
if self.default and self.default_factory:
raise ValueError('expected one of "default" or "default_factory", not both')
self.skip = kwargs.pop('skip', _MISSING)
if self.skip is _MISSING:
self.skip_func = lambda v: False
elif callable(self.skip):
self.skip_func = self.skip
elif isinstance(self.skip, tuple):
self.skip_func = lambda v: v in self.skip
else:
self.skip_func = lambda v: v == self.skip
self.skip_exc = kwargs.pop('skip_exc', GlomError)
if kwargs:
raise TypeError('unexpected keyword args: %r' % (sorted(kwargs.keys()),))
def glomit(self, target, scope):
skipped = []
for subspec in self.subspecs:
try:
ret = scope[glom](target, subspec, scope)
if not self.skip_func(ret):
break
skipped.append(ret)
except self.skip_exc as e:
skipped.append(e)
continue
else:
if self.default is not _MISSING:
ret = arg_val(target, self.default, scope)
elif self.default_factory is not _MISSING:
ret = self.default_factory()
else:
raise CoalesceError(self, skipped, scope[Path])
return ret
def __repr__(self):
cn = self.__class__.__name__
return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)
class Inspect(object):
"""The :class:`~glom.Inspect` specifier type provides a way to get
visibility into glom's evaluation of a specification, enabling
debugging of those tricky problems that may arise with unexpected
data.
:class:`~glom.Inspect` can be inserted into an existing spec in one of two
ways. First, as a wrapper around the spec in question, or second,
as an argument-less placeholder wherever a spec could be.
:class:`~glom.Inspect` supports several modes, controlled by
keyword arguments. Its default, no-argument mode, simply echos the
state of the glom at the point where it appears:
>>> target = {'a': {'b': {}}}
>>> val = glom(target, Inspect('a.b')) # wrapping a spec
---
path: ['a.b']
target: {'a': {'b': {}}}
output: {}
---
Debugging behavior aside, :class:`~glom.Inspect` has no effect on
values in the target, spec, or result.
Args:
echo (bool): Whether to print the path, target, and output of
each inspected glom. Defaults to True.
recursive (bool): Whether or not the Inspect should be applied
at every level, at or below the spec that it wraps. Defaults
to False.
breakpoint (bool): This flag controls whether a debugging prompt
should appear before evaluating each inspected spec. Can also
take a callable. Defaults to False.
post_mortem (bool): This flag controls whether exceptions
should be caught and interactively debugged with :mod:`pdb` on
inspected specs.
All arguments above are keyword-only to avoid overlap with a
wrapped spec.
.. note::
Just like ``pdb.set_trace()``, be careful about leaving stray
``Inspect()`` instances in production glom specs.
"""