Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interactive conveniences #836

Merged
merged 10 commits into from Nov 21, 2015
2 changes: 2 additions & 0 deletions docs/source/whatsnew/0.8.4.txt
Expand Up @@ -82,6 +82,8 @@ Documentation
Miscellaneous
~~~~~~~~~~~~~

* Added a :meth:`~zipline.pipeline.pipeline.Pipeline.show_graph` method to render
a Pipeline as an image (:issue:`836`).
* Adds :func:`~zipline.utils.test_utils.subtest` decorator for creating subtests
without ``nose_parameterized.expand`` which bloats the test output
(:issue:`833`).
Expand Down
27 changes: 27 additions & 0 deletions tests/pipeline/test_adjusted_array.py
@@ -1,6 +1,7 @@
"""
Tests for chunked adjustments.
"""
from textwrap import dedent
from unittest import TestCase

from nose_parameterized import parameterized
Expand Down Expand Up @@ -323,3 +324,29 @@ def test_bad_input(self):

with self.assertRaisesRegexp(ValueError, msg):
adjusted_array(data, bad_mask, {})

def test_inspect(self):
data = arange(15, dtype=float).reshape(5, 3)
adj_array = adjusted_array(
data,
NOMASK,
{4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
)

expected = dedent(
"""\
Adjusted Array:

Data:
array([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.],
[ 9., 10., 11.],
[ 12., 13., 14.]])

Adjustments:
{4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
"""
)
self.assertEqual(expected, adj_array.inspect())
30 changes: 24 additions & 6 deletions zipline/lib/adjusted_array.pyx
Expand Up @@ -5,6 +5,8 @@ from cpython cimport (
Py_EQ,
PyObject_RichCompare,
)
from pprint import pformat

from numpy import (
asarray,
bool_,
Expand Down Expand Up @@ -128,6 +130,15 @@ cdef class Float64AdjustedArray(AdjustedArray):
self._data = data
self.adjustments = adjustments

def inspect(self):
return (
"Adjusted Array:\n\nData:\n"
"{data}\n\nAdjustments:\n{adjustments}\n".format(
data=repr(asarray(self._data)),
adjustments=pformat(self.adjustments),
)
)

property dtype:
def __get__(self):
return float64
Expand Down Expand Up @@ -216,10 +227,17 @@ cdef class _Float64AdjustedArrayWindow:
self.anchor += 1
return out

def __repr__(self):
return "%s(window_length=%d, anchor=%d, max_anchor=%d)" % (
type(self).__name__,
self.window_length,
self.anchor,
self.max_anchor,
def inspect(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this not the repr? Same above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had it as the repr originally, but that's really annoying if you have a list of these or something. Too much text I think.

return (
"{type_}\n"
"Window Length: {window_length}\n"
"Current Buffer:\n"
"{data}\n"
"Remaining Adjustments:\n"
"{adjustments}\n"
).format(
type_=type(self).__name__,
window_length=self.window_length,
data=asarray(self.data[self.anchor - self.window_length:self.anchor]),
adjustments=pformat(self.adjustments),
)
51 changes: 49 additions & 2 deletions zipline/pipeline/__init__.py
@@ -1,16 +1,63 @@
from __future__ import print_function
from zipline.assets import AssetFinder

from .classifier import Classifier
from .engine import SimplePipelineEngine
from .factors import Factor, CustomFactor
from .filters import Filter
from .term import Term
from .graph import TermGraph
from .pipeline import Pipeline
from .loaders import USEquityPricingLoader


def engine_from_files(daily_bar_path,
adjustments_path,
asset_db_path,
calendar,
warmup_assets=False):
"""
Construct a SimplePipelineEngine from local filesystem resources.

Parameters
----------
daily_bar_path : str
Path to pass to `BcolzDailyBarReader`.
adjustments_path : str
Path to pass to SQLiteAdjustmentReader.
asset_db_path : str
Path to pass to `AssetFinder`.
calendar : pd.DatetimeIndex
Calendar to use for the loader.
warmup_assets : bool, optional
Whether or not to populate AssetFinder caches. This can speed up
initial latency on subsequent pipeline runs, at the cost of extra
memory consumption. Default is False
"""
loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path)

if not asset_db_path.startswith("sqlite:"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it need to be sqlite? the backend supports any sqldb no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this is better as:

if os.path.isfile(asset_db_path):?

asset_db_path = "sqlite:///" + asset_db_path
asset_finder = AssetFinder(asset_db_path)
if warmup_assets:
results = asset_finder.retrieve_all(asset_finder.sids)
print("Warmed up %d assets." % len(results))

return SimplePipelineEngine(
lambda _: loader,
calendar,
asset_finder,
)


__all__ = [
__all__ = (
'Classifier',
'CustomFactor',
'engine_from_files',
'Factor',
'Filter',
'Pipeline',
'SimplePipelineEngine',
'Term',
'TermGraph',
]
)
3 changes: 3 additions & 0 deletions zipline/pipeline/loaders/__init__.py
@@ -0,0 +1,3 @@
from .equity_pricing_loader import USEquityPricingLoader

__all__ = ['USEquityPricingLoader']
22 changes: 22 additions & 0 deletions zipline/pipeline/loaders/equity_pricing_loader.py
Expand Up @@ -16,6 +16,10 @@
uint32,
)

from zipline.data.us_equity_pricing import (
BcolzDailyBarReader,
SQLiteAdjustmentReader,
)
from zipline.lib.adjusted_array import (
adjusted_array,
)
Expand All @@ -40,6 +44,24 @@ def __init__(self, raw_price_loader, adjustments_loader):
self._calendar = self.raw_price_loader._calendar
self.adjustments_loader = adjustments_loader

@classmethod
def from_files(cls, pricing_path, adjustments_path):
"""
Create a loader from a bcolz equity pricing dir and a SQLite
adjustments path.

Parameters
----------
pricing_path : str
Path to a bcolz directory written by a BcolzDailyBarWriter.
adjusments_path : str
Path to an adjusments db written by a SQLiteAdjustmentWriter.
"""
return cls(
BcolzDailyBarReader(pricing_path),
SQLiteAdjustmentReader(adjustments_path)
)

def load_adjusted_array(self, columns, dates, assets, mask):
# load_adjusted_array is called with dates on which the user's algo
# will be shown data, which means we need to return the data that would
Expand Down
21 changes: 20 additions & 1 deletion zipline/pipeline/pipeline.py
@@ -1,6 +1,6 @@
from zipline.utils.input_validation import expect_types, optional

from .term import Term
from .term import Term, AssetExists
from .filters import Filter
from .graph import TermGraph

Expand Down Expand Up @@ -145,3 +145,22 @@ def to_graph(self, screen_name, default_screen):
columns[screen_name] = screen

return TermGraph(columns)

def show_graph(self, format='svg'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe expect_value to check that I don't pass format='ayy'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an explicit elif and a default branch that raises.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kk

"""
Render this Pipeline as a DAG.

Parameters
----------
format : {'svg', 'png', 'jpeg'}
Image format to render with. Default is 'svg'.
"""
g = self.to_graph('', AssetExists())
if format == 'svg':
return g.svg
elif format == 'png':
return g.png
elif format == 'jpeg':
return g.jpeg
else:
raise ValueError("Unknown graph format %r." % format)
3 changes: 3 additions & 0 deletions zipline/pipeline/term.py
Expand Up @@ -245,6 +245,9 @@ def _compute(self, windows, dates, assets, mask):
out[~mask] = nan
return out

def short_repr(self):
return type(self).__name__ + '(%d)' % self.window_length


class CompositeTerm(Term):
inputs = NotSpecified
Expand Down