Skip to content

Commit

Permalink
Merge 6e4060f into 36e65d6
Browse files Browse the repository at this point in the history
  • Loading branch information
Maya Tydykov committed Mar 15, 2017
2 parents 36e65d6 + 6e4060f commit 0e1ea43
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 5 deletions.
55 changes: 53 additions & 2 deletions tests/pipeline/test_blaze.py
Expand Up @@ -776,8 +776,9 @@ def _test_id(self, df, dshape, expected, finder, add):
check_dtype=False,
)

def _test_id_macro(self, df, dshape, expected, finder, add):
dates = self.dates
def _test_id_macro(self, df, dshape, expected, finder, add, dates=None):
if dates is None:
dates = self.dates
expr = bz.data(df, name='expr', dshape=dshape)
loader = BlazeLoader()
ds = from_blaze(
Expand Down Expand Up @@ -1875,6 +1876,56 @@ def test_checkpoints_out_of_bounds(self):

self._test_checkpoints(checkpoints)

def test_id_take_last_in_group_sorted(self):
"""
input
asof_date timestamp other value
2014-01-03 2014-01-04 00 3 3
2014-01-02 2014-01-04 00 2 2
output (expected):
other value
2014-01-02 NaN NaN
2014-01-03 NaN NaN
2014-01-06 3 3
"""

dates = pd.DatetimeIndex([
pd.Timestamp('2014-01-02'),
pd.Timestamp('2014-01-03'),
pd.Timestamp('2014-01-06'),
])

T = pd.Timestamp
df = pd.DataFrame(
columns=['asof_date', 'timestamp', 'other', 'value'],
data=[
# asof-dates are flipped in terms of order so that if we
# don't sort on asof-date before getting the last in group,
# we will get the wrong result.
[T('2014-01-03'), T('2014-01-04 00'), 3, 3],
[T('2014-01-02'), T('2014-01-04 00'), 2, 2],
],
)
fields = OrderedDict(self.macro_dshape.measure.fields)
fields['other'] = fields['value']
expected = pd.DataFrame(
data=[[np.nan, np.nan], # 2014-01-02
[np.nan, np.nan], # 2014-01-03
[3, 3]], # 2014-01-06
columns=['other', 'value'],
index=dates,
)
self._test_id_macro(
df,
var * Record(fields),
expected,
self.asset_finder,
('other', 'value'),
dates=dates,
)


class MiscTestCase(ZiplineTestCase):
def test_exprdata_repr(self):
Expand Down
5 changes: 5 additions & 0 deletions zipline/pipeline/loaders/blaze/core.py
Expand Up @@ -1104,6 +1104,11 @@ def collect_expr(e, lower):
materialized_deltas,
dates,
)
# If we ever have cases where we find out about multiple asof_dates'
# data on the same TS, we want to make sure that last_in_date_group
# selects the correct last asof_date's value.
sparse_output.sort_values(AD_FIELD_NAME, inplace=True)
non_novel_deltas.sort_values(AD_FIELD_NAME, inplace=True)
if AD_FIELD_NAME not in requested_columns:
sparse_output.drop(AD_FIELD_NAME, axis=1, inplace=True)

Expand Down
10 changes: 7 additions & 3 deletions zipline/pipeline/loaders/utils.py
Expand Up @@ -281,15 +281,17 @@ def last_in_date_group(df,
assets,
reindex=True,
have_sids=True,
extra_groupers=[]):
extra_groupers=None):
"""
Determine the last piece of information known on each date in the date
index for each group.
index for each group. Input df MUST be sorted such that the correct last
item is chosen from each group.
Parameters
----------
df : pd.DataFrame
The DataFrame containing the data to be grouped.
The DataFrame containing the data to be grouped. Must be sorted so that
the correct last item is chosen from each group.
dates : pd.DatetimeIndex
The dates to use for grouping and reindexing.
assets : pd.Int64Index
Expand All @@ -316,6 +318,8 @@ def last_in_date_group(df,
)]]
if have_sids:
idx += [SID_FIELD_NAME]
if extra_groupers is None:
extra_groupers = []
idx += extra_groupers

last_in_group = df.drop(TS_FIELD_NAME, axis=1).groupby(
Expand Down

0 comments on commit 0e1ea43

Please sign in to comment.