From f169bb00aa3ab55edbefd338e0d79d10e822b699 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 20 Nov 2015 20:12:24 -0500 Subject: [PATCH 01/10] MAINT: Re-export USEquityPricingLoader. --- zipline/pipeline/loaders/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zipline/pipeline/loaders/__init__.py b/zipline/pipeline/loaders/__init__.py index e69de29bb2..39aa2592e7 100644 --- a/zipline/pipeline/loaders/__init__.py +++ b/zipline/pipeline/loaders/__init__.py @@ -0,0 +1,3 @@ +from .equity_pricing_loader import USEquityPricingLoader + +__all__ = ['USEquityPricingLoader'] From ac7b44af23524917f2cb232e4f427298c38458d1 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Sat, 7 Nov 2015 15:50:30 -0500 Subject: [PATCH 02/10] ENH: Add USEquityPricingLoader.from_files. --- .../pipeline/loaders/equity_pricing_loader.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/zipline/pipeline/loaders/equity_pricing_loader.py b/zipline/pipeline/loaders/equity_pricing_loader.py index 8d00f60261..6494e97749 100644 --- a/zipline/pipeline/loaders/equity_pricing_loader.py +++ b/zipline/pipeline/loaders/equity_pricing_loader.py @@ -16,6 +16,10 @@ uint32, ) +from zipline.data.us_equity_pricing import ( + BcolzDailyBarReader, + SQLiteAdjustmentReader, +) from zipline.lib.adjusted_array import ( adjusted_array, ) @@ -40,6 +44,24 @@ def __init__(self, raw_price_loader, adjustments_loader): self._calendar = self.raw_price_loader._calendar self.adjustments_loader = adjustments_loader + @classmethod + def from_files(cls, pricing_path, adjustments_path): + """ + Create a loader from a bcolz equity pricing dir and a SQLite + adjustments path. + + Parameters + ---------- + pricing_path : str + Path to a bcolz directory written by a BcolzDailyBarWriter. + adjusments_path : str + Path to an adjusments db written by a SQLiteAdjustmentWriter. + """ + return cls( + BcolzDailyBarReader(pricing_path), + SQLiteAdjustmentReader(adjustments_path) + ) + def load_adjusted_array(self, columns, dates, assets, mask): # load_adjusted_array is called with dates on which the user's algo # will be shown data, which means we need to return the data that would From 7aa04a2e1722cd71010efa013dc40d79a199e7b2 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Tue, 10 Nov 2015 11:07:25 -0500 Subject: [PATCH 03/10] ENH: Add pipeline.engine_from_files. --- zipline/pipeline/__init__.py | 42 ++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/zipline/pipeline/__init__.py b/zipline/pipeline/__init__.py index 6cddb7ea4d..451466b573 100644 --- a/zipline/pipeline/__init__.py +++ b/zipline/pipeline/__init__.py @@ -1,16 +1,54 @@ +from zipline.assets import AssetFinder + from .classifier import Classifier +from .engine import SimplePipelineEngine from .factors import Factor, CustomFactor from .filters import Filter from .term import Term from .graph import TermGraph from .pipeline import Pipeline +from .loaders import USEquityPricingLoader + + +def engine_from_files(daily_bar_path, + adjustments_path, + asset_db_path, + calendar): + """ + Construct a SimplePipelineEngine from local filesystem resources. + + Parameters + ---------- + daily_bar_path : str + Path to pass to `BcolzDailyBarReader`. + adjustments_path : str + Path to pass to SQLiteAdjustmentReader. + asset_db_path : str + Path to pass to `AssetFinder`. + calendar : pd.DatetimeIndex + Calendar to use for the loader. + """ + loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path) + + if not asset_db_path.startswith("sqlite:"): + asset_db_path = "sqlite:///" + asset_db_path + asset_finder = AssetFinder(asset_db_path) + + return SimplePipelineEngine( + lambda _: loader, + calendar, + asset_finder, + ) + -__all__ = [ +__all__ = ( 'Classifier', 'CustomFactor', + 'engine_from_files', 'Factor', 'Filter', 'Pipeline', + 'SimplePipelineEngine', 'Term', 'TermGraph', -] +) From 5d8a915d15b7ccebcf01eb6781a15c376e329f77 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Tue, 10 Nov 2015 16:34:23 -0500 Subject: [PATCH 04/10] ENH: Add inspect() function to adjusted_array. --- tests/pipeline/test_adjusted_array.py | 27 ++++++++++++++++++++++++ zipline/lib/adjusted_array.pyx | 30 +++++++++++++++++++++------ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/tests/pipeline/test_adjusted_array.py b/tests/pipeline/test_adjusted_array.py index 858db64867..a047469c9d 100644 --- a/tests/pipeline/test_adjusted_array.py +++ b/tests/pipeline/test_adjusted_array.py @@ -1,6 +1,7 @@ """ Tests for chunked adjustments. """ +from textwrap import dedent from unittest import TestCase from nose_parameterized import parameterized @@ -323,3 +324,29 @@ def test_bad_input(self): with self.assertRaisesRegexp(ValueError, msg): adjusted_array(data, bad_mask, {}) + + def test_inspect(self): + data = arange(15, dtype=float).reshape(5, 3) + adj_array = adjusted_array( + data, + NOMASK, + {4: [Float64Multiply(2, 3, 0, 0, 4.0)]}, + ) + + expected = dedent( + """\ + Adjusted Array: + + Data: + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [ 6., 7., 8.], + [ 9., 10., 11.], + [ 12., 13., 14.]]) + + Adjustments: + {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \ +last_col=0, value=4.000000)]} + """ + ) + self.assertEqual(expected, adj_array.inspect()) diff --git a/zipline/lib/adjusted_array.pyx b/zipline/lib/adjusted_array.pyx index 525af7723e..64a8a87db1 100644 --- a/zipline/lib/adjusted_array.pyx +++ b/zipline/lib/adjusted_array.pyx @@ -5,6 +5,8 @@ from cpython cimport ( Py_EQ, PyObject_RichCompare, ) +from pprint import pformat + from numpy import ( asarray, bool_, @@ -128,6 +130,15 @@ cdef class Float64AdjustedArray(AdjustedArray): self._data = data self.adjustments = adjustments + def inspect(self): + return ( + "Adjusted Array:\n\nData:\n" + "{data}\n\nAdjustments:\n{adjustments}\n".format( + data=repr(asarray(self._data)), + adjustments=pformat(self.adjustments), + ) + ) + property dtype: def __get__(self): return float64 @@ -216,10 +227,17 @@ cdef class _Float64AdjustedArrayWindow: self.anchor += 1 return out - def __repr__(self): - return "%s(window_length=%d, anchor=%d, max_anchor=%d)" % ( - type(self).__name__, - self.window_length, - self.anchor, - self.max_anchor, + def inspect(self): + return ( + "{type_}\n" + "Window Length: {window_length}\n" + "Current Buffer:\n" + "{data}\n" + "Remaining Adjustments:\n" + "{adjustments}\n" + ).format( + type_=type(self).__name__, + window_length=self.window_length, + data=asarray(self.data[self.anchor - self.window_length:self.anchor]), + adjustments=pformat(self.adjustments), ) From 0b2787a86b53094fe46a2d98fcd3cc335711a8b0 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Tue, 10 Nov 2015 16:34:53 -0500 Subject: [PATCH 05/10] MAINT: Add show_graph to Pipeline. --- zipline/pipeline/pipeline.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/zipline/pipeline/pipeline.py b/zipline/pipeline/pipeline.py index dbd627aa47..b5173d5e0a 100644 --- a/zipline/pipeline/pipeline.py +++ b/zipline/pipeline/pipeline.py @@ -1,6 +1,6 @@ from zipline.utils.input_validation import expect_types, optional -from .term import Term +from .term import Term, AssetExists from .filters import Filter from .graph import TermGraph @@ -145,3 +145,20 @@ def to_graph(self, screen_name, default_screen): columns[screen_name] = screen return TermGraph(columns) + + def show_graph(self, format='svg'): + """ + Render this Pipeline as a DAG. + + Parameters + ---------- + format : str, optional + Image format to render with. Default is 'svg'. + """ + g = self.to_graph('', AssetExists()) + if format == 'svg': + return g.svg + elif format == 'png': + return g.png + else: + return g.jpeg From 01b820d96c2a5b8c60e06aff167472f347ef95f7 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 11 Nov 2015 18:14:51 -0500 Subject: [PATCH 06/10] DOC: Add repr for CustomFactor. --- zipline/pipeline/term.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zipline/pipeline/term.py b/zipline/pipeline/term.py index 6e647e0861..8e148bbea9 100644 --- a/zipline/pipeline/term.py +++ b/zipline/pipeline/term.py @@ -245,6 +245,9 @@ def _compute(self, windows, dates, assets, mask): out[~mask] = nan return out + def short_repr(self): + return type(self).__name__ + '(%d)' % self.window_length + class CompositeTerm(Term): inputs = NotSpecified From f1d33aed961ff70683a900a31af2c840a07add79 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 11 Nov 2015 18:17:07 -0500 Subject: [PATCH 07/10] ENH: Add warmup_assets to equity_pricing_loader. --- zipline/pipeline/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/zipline/pipeline/__init__.py b/zipline/pipeline/__init__.py index 451466b573..bac3f670de 100644 --- a/zipline/pipeline/__init__.py +++ b/zipline/pipeline/__init__.py @@ -1,3 +1,4 @@ +from __future__ import print_function from zipline.assets import AssetFinder from .classifier import Classifier @@ -13,7 +14,8 @@ def engine_from_files(daily_bar_path, adjustments_path, asset_db_path, - calendar): + calendar, + warmup_assets=False): """ Construct a SimplePipelineEngine from local filesystem resources. @@ -27,12 +29,19 @@ def engine_from_files(daily_bar_path, Path to pass to `AssetFinder`. calendar : pd.DatetimeIndex Calendar to use for the loader. + warmup_assets : bool + Whether or not to populate AssetFinder caches. This can speed up + initial latency on subsequent pipeline runs, at the cost of extra + memory consumption. """ loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path) if not asset_db_path.startswith("sqlite:"): asset_db_path = "sqlite:///" + asset_db_path asset_finder = AssetFinder(asset_db_path) + if warmup_assets: + results = asset_finder.retrieve_all(asset_finder.sids) + print("Warmed up %d assets." % len(results)) return SimplePipelineEngine( lambda _: loader, From 667254206b814158f04646324f651821a5a36577 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 20 Nov 2015 21:06:48 -0500 Subject: [PATCH 08/10] DOC: warmup_assets is an optional arg. --- zipline/pipeline/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zipline/pipeline/__init__.py b/zipline/pipeline/__init__.py index bac3f670de..d9ca191852 100644 --- a/zipline/pipeline/__init__.py +++ b/zipline/pipeline/__init__.py @@ -29,10 +29,10 @@ def engine_from_files(daily_bar_path, Path to pass to `AssetFinder`. calendar : pd.DatetimeIndex Calendar to use for the loader. - warmup_assets : bool + warmup_assets : bool, optional Whether or not to populate AssetFinder caches. This can speed up initial latency on subsequent pipeline runs, at the cost of extra - memory consumption. + memory consumption. Default is False """ loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path) From 26cef8d959d243f577639144cd886b31e2336f90 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 20 Nov 2015 21:09:12 -0500 Subject: [PATCH 09/10] DOC: Better doc and error messages for show_graph. --- zipline/pipeline/pipeline.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zipline/pipeline/pipeline.py b/zipline/pipeline/pipeline.py index b5173d5e0a..7bfaa4b1b8 100644 --- a/zipline/pipeline/pipeline.py +++ b/zipline/pipeline/pipeline.py @@ -152,7 +152,7 @@ def show_graph(self, format='svg'): Parameters ---------- - format : str, optional + format : {'svg', 'png', 'jpeg'} Image format to render with. Default is 'svg'. """ g = self.to_graph('', AssetExists()) @@ -160,5 +160,7 @@ def show_graph(self, format='svg'): return g.svg elif format == 'png': return g.png - else: + elif format == 'jpeg': return g.jpeg + else: + raise ValueError("Unknown graph format %r." % format) From 121632e814e118b7f194a59dc6b7ade3e8222af5 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 20 Nov 2015 21:12:34 -0500 Subject: [PATCH 10/10] DOC: Add a whatsnew entry. --- docs/source/whatsnew/0.8.4.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/whatsnew/0.8.4.txt b/docs/source/whatsnew/0.8.4.txt index 692741bb86..dde16a16a0 100644 --- a/docs/source/whatsnew/0.8.4.txt +++ b/docs/source/whatsnew/0.8.4.txt @@ -82,6 +82,8 @@ Documentation Miscellaneous ~~~~~~~~~~~~~ +* Added a :meth:`~zipline.pipeline.pipeline.Pipeline.show_graph` method to render + a Pipeline as an image (:issue:`836`). * Adds :func:`~zipline.utils.test_utils.subtest` decorator for creating subtests without ``nose_parameterized.expand`` which bloats the test output (:issue:`833`).