Skip to content

Commit

Permalink
Merge pull request #198 from mahmoud/perf
Browse files Browse the repository at this point in the history
Perf
  • Loading branch information
kurtbrose committed Sep 18, 2020
2 parents d99ccaa + 404abae commit 55bbcd5
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 23 deletions.
1 change: 1 addition & 0 deletions .tox-coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ source =
omit =
*/flycheck_*.py
*/chainmap_backport.py
*/perf_report.py

[paths]
source =
Expand Down
58 changes: 40 additions & 18 deletions glom/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,9 @@ def __init__(self, *path_parts):
path_t = _t_child(path_t, 'P', part)
self.path_t = path_t

_CACHE = {}
_MAX_CACHE = 10000

@classmethod
def from_text(cls, text):
"""Make a Path from .-delimited text:
Expand All @@ -633,7 +636,11 @@ def from_text(cls, text):
Path('a', 'b', 'c')
"""
return cls(*text.split('.'))
if text not in cls._CACHE:
if len(cls._CACHE) > cls._MAX_CACHE:
return cls(*text.split('.'))
cls._CACHE[text] = cls(*text.split('.'))
return cls._CACHE[text]

def glomit(self, target, scope):
# The entrypoint for the Path extension
Expand Down Expand Up @@ -987,9 +994,13 @@ def _trace(self, target, spec, scope):
scope[glom] = scope[Inspect]
if self.echo:
print('---')
# TODO: switch from scope[Path] to the Target-Spec format trace above
# ... but maybe be smart about only printing deltas instead of the whole
# thing
print('path: ', scope[Path] + [spec])
print('target:', target)
if self.breakpoint:
# TODO: real debugger here?
self.breakpoint()
try:
ret = scope[Inspect](target, spec, scope)
Expand Down Expand Up @@ -1807,6 +1818,7 @@ class TargetRegistry(object):
def __init__(self, register_default_types=True):
self._op_type_map = {}
self._op_type_tree = {} # see _register_fuzzy_type for details
self._type_cache = {}

self._op_auto_map = OrderedDict() # op name to function that returns handler function

Expand All @@ -1825,22 +1837,26 @@ def get_handler(self, op, obj, path=None, raise_exc=True):
"""
ret = False
obj_type = type(obj)
type_map = self.get_type_map(op)
if type_map:
try:
ret = type_map[obj_type]
except KeyError:
type_tree = self._op_type_tree.get(op, {})
closest = self._get_closest_type(obj, type_tree=type_tree)
if closest is None:
ret = False
else:
ret = type_map[closest]
cache_key = (obj_type, op)
if cache_key not in self._type_cache:
type_map = self.get_type_map(op)
if type_map:
try:
ret = type_map[obj_type]
except KeyError:
type_tree = self._op_type_tree.get(op, {})
closest = self._get_closest_type(obj, type_tree=type_tree)
if closest is None:
ret = False
else:
ret = type_map[closest]

if ret is False and raise_exc:
raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)
if ret is False and raise_exc:
raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)

return ret
self._type_cache[cache_key] = ret

return self._type_cache[cache_key]

def get_type_map(self, op):
try:
Expand Down Expand Up @@ -1928,6 +1944,8 @@ def register(self, target_type, **kwargs):
for op_name in new_op_map:
self._register_fuzzy_type(op_name, target_type)

self._type_cache = {} # reset type cache

return

def register_op(self, op_name, auto_func=None, exact=False):
Expand Down Expand Up @@ -2119,21 +2137,23 @@ def _has_callable_glomit(obj):

def _glom(target, spec, scope):
parent = scope
pmap = parent.maps[0]
scope = scope.new_child({
T: target,
Spec: spec,
UP: parent,
CHILD_ERRORS: [],
MODE: pmap[MODE],
})
parent[LAST_CHILD_SCOPE] = scope
pmap[LAST_CHILD_SCOPE] = scope

try:
if isinstance(spec, TType): # must go first, due to callability
if type(spec) is TType: # must go first, due to callability
return _t_eval(target, spec, scope)
elif _has_callable_glomit(spec):
return spec.glomit(target, scope)

return scope[MODE](target, spec, scope)
return scope.maps[0][MODE](target, spec, scope)
except Exception as e:
scope.maps[1][CHILD_ERRORS].append(scope)
scope.maps[0][CUR_ERROR] = e
Expand All @@ -2147,6 +2167,8 @@ def _glom(target, spec, scope):


def AUTO(target, spec, scope):
if type(spec) is str: # shortcut to make deep-get use case faster
return _t_eval(target, Path.from_text(spec).path_t, scope)
if isinstance(spec, dict):
return _handle_dict(target, spec, scope)
elif isinstance(spec, list):
Expand Down
3 changes: 0 additions & 3 deletions glom/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,9 +1003,6 @@ def glomit(self, target, scope):
type(target).__name__))

if errs:
# TODO: due to the usage of basic path (not a Path
# object), the format can be a bit inconsistent here
# (e.g., 'a.b' and ['a', 'b'])
raise CheckError(errs, self, scope[Path])
return ret

Expand Down
87 changes: 87 additions & 0 deletions glom/test/perf_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
slow gloms that came up organically, used as performance metrics
"""
import time
import gc

import attr

from glom import glom, T



STR_SPEC = [{
'id': ('id', str),
'name': 'short_name',
'external_id': 'external_id',
'created_date': 'created_date',
}]


T_SPEC = [{
'id': (T.id, str),
'name': T.short_name,
'external_id': T.external_id,
'created_date': T.created_date,
}]


def func(data):
return [{
'id': str(t.id),
'name': t.short_name,
'external_id': t.external_id,
'created_date': t.created_date
} for t in data]


def setup_list_of_dict(num=100):
"""
a common use case is list-of-dicts object processing
to prepare internal objects for JSON serialization
"""
Obj = attr.make_class(
'Obj', ['id', 'short_name', 'external_id', 'created_date'])

data = [
Obj(i, 'name' + str(i), 'external' + str(i), 'now') for i in range(num)]

return data


def run(spec, data):
start = time.time()
glom(data, spec)
end = time.time()
print("{} us per object".format((end - start) / len(data) * 1e6))


def ratio(spec, func, data):
glom_dur = []
py_dur = []
for i in range(10):
t1 = time.perf_counter_ns()
glom(data, spec)
t2 = time.perf_counter_ns()
func(data)
t3 = time.perf_counter_ns()
glom_dur.append(t2 - t1)
py_dur.append(t3 - t2)

glom_avg = sum(sorted(glom_dur)[2:-2])
py_avg = sum(sorted(py_dur)[2:-2])

return 1.0 * glom_avg / py_avg


if __name__ == "__main__":
import cProfile
data = setup_list_of_dict(100000)
run(STR_SPEC, data)
run(STR_SPEC, data)
print(ratio(STR_SPEC, func, setup_list_of_dict(1000)))
print(ratio(STR_SPEC, func, setup_list_of_dict(1000)))


# suggest using scalene to profile with:
# $ scalene glom/test/perf_report.py --profile-all --reduced-profile --cpu-only --outfile SCALENE-CPU.txt
7 changes: 7 additions & 0 deletions glom/test/test_path_and_t.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,10 @@ def test_t_dunders():
assert 'use T.__("name__")' in str(exc_info.value)

assert glom(1, T.__('class__')) is int


def test_path_cache():
assert Path.from_text('a.b.c') is Path.from_text('a.b.c')
pre = Path._MAX_CACHE
Path._MAX_CACHE = 0
assert Path.from_text('d.e.f') is not Path.from_text('d.e.f')
4 changes: 2 additions & 2 deletions glom/test/test_target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def test_types_bare():
with pytest.raises(UnregisteredTarget) as exc_info:
glommer.glom({'test': [{'hi': 'hi'}]}, ('test', ['hi']))
# feel free to update the "(at ['test'])" part to improve path display
assert str(exc_info.value).find(
assert (
"target type 'list' not registered for 'iterate', "
"expected one of registered types: (dict) (at ['test'])") != -1
"expected one of registered types: (dict)" in str(exc_info.value))
return


Expand Down

0 comments on commit 55bbcd5

Please sign in to comment.