From a825662a28ed2e3bab80804fce9f60ddee97e3eb Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 4 Oct 2019 15:28:59 -0400 Subject: [PATCH] PERF: Avoid slow exception repr in blaze. The error raised by blaze when you do a field lookup with a nonexistent key on an expression shows the repr of the expression, which contains the expression's entire dshape. For exprs that are part of a very large tree, rendering the dshape can take a significant amount of time. Band-aid fix this for now by checking if the field exists before doing the lookup. This saves 30-40 seconds (!) of startup time on Quantopian research. --- zipline/pipeline/loaders/blaze/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/zipline/pipeline/loaders/blaze/core.py b/zipline/pipeline/loaders/blaze/core.py index 57a091050c..2c9886af54 100644 --- a/zipline/pipeline/loaders/blaze/core.py +++ b/zipline/pipeline/loaders/blaze/core.py @@ -452,7 +452,13 @@ def _get_metadata(field, expr, metadata_expr, no_metadata_rule): return metadata_expr try: - return expr._child['_'.join(((expr._name or ''), field))] + # The error produced by expr[field_name] when field_name doesn't exist + # is very expensive. Avoid that cost by doing the check ourselves. + field_name = '_'.join(((expr._name or ''), field)) + child = expr._child + if field_name not in child.fields: + raise AttributeError(field_name) + return child[field_name] except (ValueError, AttributeError): if no_metadata_rule == 'raise': raise ValueError(