Skip to content

Commit

Permalink
BUG: When increasing the length dynamically, the rolling panel was
Browse files Browse the repository at this point in the history
getting filled with the wrong datetimes and causing errors.

Updates the logic for addressing missing datetimes and adds unit tests
for the 2 main cases (no missing datetimes, and some missing datetimes).
  • Loading branch information
llllllllll committed Nov 10, 2014
1 parent ca59abc commit 03485e9
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 22 deletions.
108 changes: 108 additions & 0 deletions tests/test_history.py
Expand Up @@ -935,6 +935,114 @@ def handle_data(context, data):
' HistorySpec',
)

@parameterized.expand([
('zero', 1),
('non-zero', 2),
])
def test_history_grow_length_inter_bar_date_buf_delta(self, name, incr):
"""
Tests growing the length of a digest panel with different date_buf
deltas.
When incr=1, you will not be missing any dates, so there is nothing to
fill in and the date_buf delta is zero.
Anything greater that 0 will peek back in time and need to be filled
in.
"""
algo_text = dedent(
"""\
from zipline.api import history
def initialize(context):
context.bar_count = 1
def handle_data(context, data):
prices = history(context.bar_count, '1d', 'price')
context.test_case.assertEqual(len(prices), context.bar_count)
context.bar_count += {incr}
"""
).format(incr=incr)
start = pd.Timestamp('2007-04-05', tz='UTC')
end = pd.Timestamp('2007-04-10', tz='UTC')

sim_params = SimulationParameters(
period_start=start,
period_end=end,
capital_base=float("1.0e5"),
data_frequency='minute',
emission_rate='daily'
)

test_algo = TradingAlgorithm(
script=algo_text,
data_frequency='minute',
sim_params=sim_params
)
test_algo.test_case = self

source = RandomWalkSource(start=start, end=end)

self.assertIsNone(test_algo.history_container)
test_algo.run(source)

@parameterized.expand([
('zero', 1),
('non-zero', 2),
])
def test_history_grow_length_intra_bar_date_buf_delta(self, name, incr):
"""
Tests growing the length of a digest panel with different date_buf
deltas.
When incr=1, you will not be missing any dates, so there is nothing to
fill in and the date_buf delta is zero.
Anything greater that 0 will peek back in time and need to be filled
in.
"""
algo_text = dedent(
"""\
from zipline.api import history
def initialize(context):
context.bar_count = 1
def handle_data(context, data):
prices = history(context.bar_count, '1d', 'price')
context.test_case.assertEqual(len(prices), context.bar_count)
context.bar_count += {incr}
prices = history(context.bar_count, '1d', 'price')
context.test_case.assertEqual(len(prices), context.bar_count)
"""
).format(incr=incr)
start = pd.Timestamp('2007-04-05', tz='UTC')
end = pd.Timestamp('2007-04-10', tz='UTC')

sim_params = SimulationParameters(
period_start=start,
period_end=end,
capital_base=float("1.0e5"),
data_frequency='minute',
emission_rate='daily'
)

test_algo = TradingAlgorithm(
script=algo_text,
data_frequency='minute',
sim_params=sim_params
)
test_algo.test_case = self

source = RandomWalkSource(start=start, end=end)

self.assertIsNone(test_algo.history_container)
test_algo.run(source)


class TestHistoryContainerResize(TestCase):
@parameterized.expand(
Expand Down
15 changes: 5 additions & 10 deletions zipline/history/history_container.py
Expand Up @@ -387,23 +387,18 @@ def _resize_panel(self, panel, size, dt, freq, env=None):
"""
# This is the oldest datetime that will be shown in the current window
# of the panel.
oldest_idx = panel._oldest_frame_idx
oldest_dt = pd.Timestamp(
panel.date_buf[oldest_idx], tz='utc',
panel.date_buf[panel.start_index], tz='utc',
)
old_cap = panel.cap
panel.resize(size)
delta = panel.extend(size)

delta = (old_cap - oldest_idx) - panel._oldest_frame_idx

# Backfill the missing dates of the new current window.
# Construct the missing dates.
missing_dts = self._create_window_date_buf(
delta, freq.unit_str, freq.data_frequency, oldest_dt,
)

# Fill the dates in between the new oldest index and adjusted oldest
# index.
where = slice(panel._oldest_frame_idx, -(old_cap - oldest_idx))
# Fill the delta with the dates we calculated.
where = slice(panel.start_index, panel.start_index + delta)
panel.date_buf[where] = missing_dts

@with_environment()
Expand Down
30 changes: 18 additions & 12 deletions zipline/utils/data.py
Expand Up @@ -47,7 +47,6 @@ def __init__(self,
self.minor_axis = _ensure_index(sids)

self.cap_multiple = cap_multiple
self.cap = cap_multiple * window

self.dtype = dtype
self.date_buf = np.empty(self.cap, dtype='M8[ns]') \
Expand All @@ -56,14 +55,18 @@ def __init__(self,
self.buffer = self._create_buffer()

@property
def _oldest_frame_idx(self):
def cap(self):
return self.cap_multiple * self._window

@property
def start_index(self):
return self._pos - self._window

def oldest_frame(self):
"""
Get the oldest frame in the panel.
"""
return self.buffer.iloc[:, self._oldest_frame_idx, :]
return self.buffer.iloc[:, self.start_index, :]

def set_minor_axis(self, minor_axis):
self.minor_axis = _ensure_index(minor_axis)
Expand All @@ -82,16 +85,18 @@ def _create_buffer(self):
)
return panel

def resize(self, window):
def extend(self, window):
"""
Resizes the buffer to hold a new window with a new cap_multiple.
If cap_multiple is None, then the old cap_multiple is used.
"""
self._window = window
if window < self._window:
raise ValueError(
'Cannot extend rolling panel with a smaller window',
)

pre = self.cap
self.cap = self.cap_multiple * window
delta = self.cap - pre
delta = window - self._window
self._window = window

self._pos += delta

Expand All @@ -104,14 +109,15 @@ def resize(self, window):
pd.Panel(
items=self.items,
minor_axis=self.minor_axis,
major_axis=np.arange(delta),
major_axis=np.arange(delta * self.cap_multiple),
dtype=self.dtype,
),
self.buffer
],
axis=1,
)
self.buffer.major_axis = pd.Int64Index(range(self.cap))
return delta

def add_frame(self, tick, frame):
"""
Expand All @@ -130,7 +136,7 @@ def get_current(self):
these objects because internal data might change
"""

where = slice(self._oldest_frame_idx, self._pos)
where = slice(self.start_index, self._pos)
major_axis = pd.DatetimeIndex(deepcopy(self.date_buf[where]), tz='utc')
return pd.Panel(self.buffer.values[:, where, :], self.items,
major_axis, self.minor_axis, dtype=self.dtype)
Expand All @@ -141,11 +147,11 @@ def set_current(self, panel):
passed panel. The passed panel must have the same indices as the panel
that would be returned by self.get_current.
"""
where = slice(self._oldest_frame_idx, self._pos)
where = slice(self.start_index, self._pos)
self.buffer.values[:, where, :] = panel.values

def current_dates(self):
where = slice(self._oldest_frame_idx, self._pos)
where = slice(self.start_index, self._pos)
return pd.DatetimeIndex(deepcopy(self.date_buf[where]), tz='utc')

def _roll_data(self):
Expand Down

0 comments on commit 03485e9

Please sign in to comment.