Skip to content

Commit

Permalink
Merge pull request #1525 from quantopian/fix_estimates_overwrites_bug
Browse files Browse the repository at this point in the history
Fix estimates overwrites bug
  • Loading branch information
Maya Tydykov committed Oct 10, 2016
2 parents 00a053c + 4efe990 commit fe00452
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 44 deletions.
118 changes: 78 additions & 40 deletions tests/pipeline/test_quarters_estimates.py
Expand Up @@ -819,7 +819,7 @@ def make_events(cls):
pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-20'),
pd.Timestamp('2015-01-20')],
'estimate': [10., 11.] + [20., 21.],
'estimate': [100., 101.] + [200., 201.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 0,
Expand All @@ -833,12 +833,30 @@ def make_events(cls):
EVENT_DATE_FIELD_NAME:
[pd.Timestamp('2015-01-12'), pd.Timestamp('2015-01-12'),
pd.Timestamp('2015-01-15'), pd.Timestamp('2015-01-15')],
'estimate': [10., 11.] + [30., 31.],
'estimate': [110., 111.] + [310., 311.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [3] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 1
})
return pd.concat([sid_0_timeline, sid_1_timeline])

# Extra sid to make sure we have correct overwrites when sid quarter
# boundaries collide.
sid_3_timeline = pd.DataFrame({
TS_FIELD_NAME: [pd.Timestamp('2015-01-05'),
pd.Timestamp('2015-01-07'),
pd.Timestamp('2015-01-05'),
pd.Timestamp('2015-01-17')],
EVENT_DATE_FIELD_NAME:
[pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-10'),
pd.Timestamp('2015-01-20'),
pd.Timestamp('2015-01-20')],
'estimate': [120., 121.] + [220., 221.],
FISCAL_QUARTER_FIELD_NAME: [1] * 2 + [2] * 2,
FISCAL_YEAR_FIELD_NAME: 2015,
SID_FIELD_NAME: 2
})
return pd.concat([sid_0_timeline, sid_1_timeline, sid_3_timeline])

@classmethod
def make_expected_timelines(cls):
Expand Down Expand Up @@ -925,50 +943,59 @@ def make_expected_timelines(cls):
oneq_previous = pd.concat([
cls.create_expected_df(
[(0, np.NaN, cls.window_test_start_date),
(1, np.NaN, cls.window_test_start_date)],
(1, np.NaN, cls.window_test_start_date),
(2, np.NaN, cls.window_test_start_date)],
pd.Timestamp('2015-01-09')
),
cls.create_expected_df(
[(0, 11, pd.Timestamp('2015-01-10')),
(1, 11, pd.Timestamp('2015-01-12'))],
[(0, 101, pd.Timestamp('2015-01-10')),
(1, 111, pd.Timestamp('2015-01-12')),
(2, 121, pd.Timestamp('2015-01-10'))],
pd.Timestamp('2015-01-12')
),
cls.create_expected_df(
[(0, 11, pd.Timestamp('2015-01-10')),
(1, 11, pd.Timestamp('2015-01-12'))],
[(0, 101, pd.Timestamp('2015-01-10')),
(1, 111, pd.Timestamp('2015-01-12')),
(2, 121, pd.Timestamp('2015-01-10'))],
pd.Timestamp('2015-01-13')
),
cls.create_expected_df(
[(0, 11, pd.Timestamp('2015-01-10')),
(1, 11, pd.Timestamp('2015-01-12'))],
[(0, 101, pd.Timestamp('2015-01-10')),
(1, 111, pd.Timestamp('2015-01-12')),
(2, 121, pd.Timestamp('2015-01-10'))],
pd.Timestamp('2015-01-14')
),
cls.create_expected_df(
[(0, 11, pd.Timestamp('2015-01-10')),
(1, 31, pd.Timestamp('2015-01-15'))],
[(0, 101, pd.Timestamp('2015-01-10')),
(1, 311, pd.Timestamp('2015-01-15')),
(2, 121, pd.Timestamp('2015-01-10'))],
pd.Timestamp('2015-01-15')
),
cls.create_expected_df(
[(0, 11, pd.Timestamp('2015-01-10')),
(1, 31, pd.Timestamp('2015-01-15'))],
[(0, 101, pd.Timestamp('2015-01-10')),
(1, 311, pd.Timestamp('2015-01-15')),
(2, 121, pd.Timestamp('2015-01-10'))],
pd.Timestamp('2015-01-16')
),
cls.create_expected_df(
[(0, 21, pd.Timestamp('2015-01-17')),
(1, 31, pd.Timestamp('2015-01-15'))],
[(0, 201, pd.Timestamp('2015-01-17')),
(1, 311, pd.Timestamp('2015-01-15')),
(2, 221, pd.Timestamp('2015-01-17'))],
pd.Timestamp('2015-01-20')
),
])

twoq_previous = pd.concat(
[cls.create_expected_df(
[(0, np.NaN, cls.window_test_start_date),
(1, np.NaN, cls.window_test_start_date)],
(1, np.NaN, cls.window_test_start_date),
(2, np.NaN, cls.window_test_start_date)],
end_date
) for end_date in pd.date_range('2015-01-09', '2015-01-19')] +
[cls.create_expected_df(
[(0, 11, pd.Timestamp('2015-01-20')),
(1, np.NaN, cls.window_test_start_date)],
[(0, 101, pd.Timestamp('2015-01-20')),
(1, np.NaN, cls.window_test_start_date),
(2, 121, pd.Timestamp('2015-01-20'))],
pd.Timestamp('2015-01-20')
)]
)
Expand All @@ -987,55 +1014,66 @@ def make_loader(cls, events, columns):
def make_expected_timelines(cls):
oneq_next = pd.concat([
cls.create_expected_df(
[(0, 10, cls.window_test_start_date),
(0, 11, pd.Timestamp('2015-01-07')),
(1, 10, pd.Timestamp('2015-01-09'))],
[(0, 100, cls.window_test_start_date),
(0, 101, pd.Timestamp('2015-01-07')),
(1, 110, pd.Timestamp('2015-01-09')),
(2, 120, cls.window_test_start_date),
(2, 121, pd.Timestamp('2015-01-07'))],
pd.Timestamp('2015-01-09')
),
cls.create_expected_df(
[(0, 20, cls.window_test_start_date),
(1, 10, pd.Timestamp('2015-01-09')),
(1, 11, pd.Timestamp('2015-01-12'))],
[(0, 200, cls.window_test_start_date),
(1, 110, pd.Timestamp('2015-01-09')),
(1, 111, pd.Timestamp('2015-01-12')),
(2, 220, cls.window_test_start_date)],
pd.Timestamp('2015-01-12')
),
cls.create_expected_df(
[(0, 20, cls.window_test_start_date),
(1, 30, pd.Timestamp('2015-01-09'))],
[(0, 200, cls.window_test_start_date),
(1, 310, pd.Timestamp('2015-01-09')),
(2, 220, cls.window_test_start_date)],
pd.Timestamp('2015-01-13')
),
cls.create_expected_df(
[(0, 20, cls.window_test_start_date),
(1, 30, pd.Timestamp('2015-01-09'))],
[(0, 200, cls.window_test_start_date),
(1, 310, pd.Timestamp('2015-01-09')),
(2, 220, cls.window_test_start_date)],
pd.Timestamp('2015-01-14')
),
cls.create_expected_df(
[(0, 20, cls.window_test_start_date),
(1, 30, pd.Timestamp('2015-01-09')),
(1, 31, pd.Timestamp('2015-01-15'))],
[(0, 200, cls.window_test_start_date),
(1, 310, pd.Timestamp('2015-01-09')),
(1, 311, pd.Timestamp('2015-01-15')),
(2, 220, cls.window_test_start_date)],
pd.Timestamp('2015-01-15')
),
cls.create_expected_df(
[(0, 20, cls.window_test_start_date),
(1, np.NaN, cls.window_test_start_date)],
[(0, 200, cls.window_test_start_date),
(1, np.NaN, cls.window_test_start_date),
(2, 220, cls.window_test_start_date)],
pd.Timestamp('2015-01-16')
),
cls.create_expected_df(
[(0, 20, cls.window_test_start_date),
(0, 21, pd.Timestamp('2015-01-17')),
(1, np.NaN, cls.window_test_start_date)],
[(0, 200, cls.window_test_start_date),
(0, 201, pd.Timestamp('2015-01-17')),
(1, np.NaN, cls.window_test_start_date),
(2, 220, cls.window_test_start_date),
(2, 221, pd.Timestamp('2015-01-17'))],
pd.Timestamp('2015-01-20')
),
])

twoq_next = pd.concat(
[cls.create_expected_df(
[(0, 20, pd.Timestamp(cls.window_test_start_date)),
(1, np.NaN, pd.Timestamp(cls.window_test_start_date))],
[(0, 200, pd.Timestamp(cls.window_test_start_date)),
(1, np.NaN, pd.Timestamp(cls.window_test_start_date)),
(2, 220, pd.Timestamp(cls.window_test_start_date))],
pd.Timestamp('2015-01-09')
)] +
[cls.create_expected_df(
[(0, np.NaN, pd.Timestamp(cls.window_test_start_date)),
(1, np.NaN, pd.Timestamp(cls.window_test_start_date))],
(1, np.NaN, pd.Timestamp(cls.window_test_start_date)),
(2, np.NaN, pd.Timestamp(cls.window_test_start_date))],
end_date
) for end_date in pd.date_range('2015-01-12', '2015-01-20')]
)
Expand Down
12 changes: 8 additions & 4 deletions zipline/pipeline/loaders/earnings_estimates.py
Expand Up @@ -280,6 +280,10 @@ def get_adjustments(self,

sid_to_idx = dict(zip(assets, range(len(assets))))

for column in columns:
column_name = self.name_map[column.name]
col_to_overwrites[column_name] = defaultdict(list)

def collect_adjustments(group):
next_qtr_start_indices = dates.searchsorted(
group[EVENT_DATE_FIELD_NAME].values,
Expand Down Expand Up @@ -358,7 +362,7 @@ def create_overwrite_for_quarter(self,
# overwrite all values going up to the starting index of
# that quarter with estimates for that quarter.
if requested_quarter in quarters_with_estimates_for_sid:
col_to_overwrites[column_name][next_qtr_start_idx] = [
col_to_overwrites[column_name][next_qtr_start_idx].append(
self.create_overwrite_for_estimate(
col,
column_name,
Expand All @@ -368,19 +372,19 @@ def create_overwrite_for_quarter(self,
sid,
sid_idx
),
]
)
# There are no estimates for the quarter. Overwrite all
# values going up to the starting index of that quarter
# with the missing value for this column.
else:
col_to_overwrites[column_name][next_qtr_start_idx] = [
col_to_overwrites[column_name][next_qtr_start_idx].append(
self.overwrite_with_null(
col,
last_per_qtr.index,
next_qtr_start_idx,
sid_idx
),
]
)

def overwrite_with_null(self,
column,
Expand Down

0 comments on commit fe00452

Please sign in to comment.