Skip to content

Commit

Permalink
ENH: vectorize dividend ratio calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe Jevnik committed Sep 24, 2018
1 parent a3739c3 commit fbdc5c9
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 81 deletions.
11 changes: 7 additions & 4 deletions tests/pipeline/test_us_equity_pricing_loader.py
Expand Up @@ -283,7 +283,9 @@ def make_dividends_data(cls):

@classmethod
def make_adjustment_writer_equity_daily_bar_reader(cls):
return MockDailyBarReader()
return MockDailyBarReader(
dates=cls.calendar_days_between(cls.START_DATE, cls.END_DATE),
)

@classmethod
def make_equity_daily_bar_data(cls, country_code, sids):
Expand All @@ -310,14 +312,15 @@ def test_input_sanity(self):
self.assertGreaterEqual(eff_date, asset_start)
self.assertLessEqual(eff_date, asset_end)

def calendar_days_between(self, start_date, end_date, shift=0):
slice_ = self.equity_daily_bar_days.slice_indexer(start_date, end_date)
@classmethod
def calendar_days_between(cls, start_date, end_date, shift=0):
slice_ = cls.equity_daily_bar_days.slice_indexer(start_date, end_date)
start = slice_.start + shift
stop = slice_.stop + shift
if start < 0:
raise KeyError(start_date, shift)

return self.equity_daily_bar_days[start:stop]
return cls.equity_daily_bar_days[start:stop]

def expected_adjustments(self, start_date, end_date):
price_adjustments = {}
Expand Down
7 changes: 6 additions & 1 deletion tests/test_api_shim.py
Expand Up @@ -168,7 +168,12 @@ def make_splits_data(cls):

@classmethod
def make_adjustment_writer_equity_daily_bar_reader(cls):
return MockDailyBarReader()
return MockDailyBarReader(
dates=cls.nyse_calendar.sessions_in_range(
cls.START_DATE,
cls.END_DATE,
),
)

@classmethod
def init_class_fixtures(cls):
Expand Down
7 changes: 6 additions & 1 deletion tests/test_bar_data.py
Expand Up @@ -950,7 +950,12 @@ def make_dividends_data(cls):

@classmethod
def make_adjustment_writer_equity_daily_bar_reader(cls):
return MockDailyBarReader()
return MockDailyBarReader(
dates=cls.trading_calendar.sessions_in_range(
cls.START_DATE,
cls.END_DATE,
),
)

@classmethod
def make_equity_daily_bar_data(cls, country_code, sids):
Expand Down
7 changes: 6 additions & 1 deletion tests/test_benchmark.py
Expand Up @@ -75,7 +75,12 @@ def make_equity_info(cls):

@classmethod
def make_adjustment_writer_equity_daily_bar_reader(cls):
return MockDailyBarReader()
return MockDailyBarReader(
dates=cls.trading_calendar.sessions_in_range(
cls.START_DATE,
cls.END_DATE,
),
)

@classmethod
def make_stock_dividends_data(cls):
Expand Down
7 changes: 6 additions & 1 deletion tests/test_history.py
Expand Up @@ -234,7 +234,12 @@ def make_dividends_data(cls):

@classmethod
def make_adjustment_writer_equity_daily_bar_reader(cls):
return MockDailyBarReader()
return MockDailyBarReader(
dates=cls.trading_calendar.sessions_in_range(
cls.TRADING_START_DT,
cls.TRADING_END_DT,
),
)

def verify_regular_dt(self, idx, dt, mode, fields=None, assets=None):
if mode == 'daily':
Expand Down
111 changes: 51 additions & 60 deletions zipline/data/adjustments.py
Expand Up @@ -16,6 +16,7 @@
float64_dtype,
int64_dtype,
uint32_dtype,
uint64_dtype,
)
from zipline.utils.sqlite_utils import group_into_chunks, coerce_string_to_conn
from ._adjustments import load_adjustments_from_sqlite
Expand Down Expand Up @@ -349,72 +350,62 @@ def calc_dividend_ratios(self, dividends):
return pd.DataFrame(np.array(
[],
dtype=[
('sid', uint32_dtype),
('sid', uint64_dtype),
('effective_date', uint32_dtype),
('ratio', float64_dtype),
],
))
ex_dates = dividends.ex_date.values

sids = dividends.sid.values
amounts = dividends.amount.values

ratios = np.full(len(amounts), np.nan)

equity_daily_bar_reader = self._equity_daily_bar_reader

effective_dates = np.full(len(amounts), -1, dtype=int64_dtype)

calendar = self._calendar

# Calculate locs against a tz-naive cal, as the ex_dates are tz-
# naive.
#
# TODO: A better approach here would be to localize ex_date to
# the tz of the calendar, but currently get_indexer does not
# preserve tz of the target when method='bfill', which throws
# off the comparison.
tz_naive_calendar = calendar.tz_localize(None)
day_locs = tz_naive_calendar.get_indexer(ex_dates, method='bfill')

isnull = pd.isnull

for i, amount in enumerate(amounts):
sid = sids[i]
ex_date = ex_dates[i]
day_loc = day_locs[i]

prev_close_date = calendar[day_loc - 1]

try:
prev_close = equity_daily_bar_reader.get_value(
sid, prev_close_date, 'close')
if not isnull(prev_close):
ratio = 1.0 - amount / prev_close
ratios[i] = ratio
# only assign effective_date when data is found
effective_dates[i] = ex_date
except NoDataOnDate:
log.warn("Couldn't compute ratio for dividend %s" % {
'sid': sid,
'ex_date': ex_date,
'amount': amount,
})
continue

# Create a mask to filter out indices in the effective_date, sid, and
# ratio vectors for which a ratio was not calculable.
effective_mask = effective_dates != -1
effective_dates = effective_dates[effective_mask]
effective_dates = effective_dates.astype('datetime64[ns]').\
astype('datetime64[s]').astype(uint32_dtype)
sids = sids[effective_mask]
ratios = ratios[effective_mask]

pricing_reader = self._equity_daily_bar_reader
input_sids = dividends.sid.values
unique_sids, sids_ix = np.unique(input_sids, return_inverse=True)
dates = pricing_reader.sessions.values

close, = pricing_reader.load_raw_arrays(
['close'],
pd.Timestamp(dates[0], tz='UTC'),
pd.Timestamp(dates[-1], tz='UTC'),
unique_sids,
)
date_ix = np.searchsorted(dates, dividends.ex_date.values)
mask = date_ix > 0

date_ix = date_ix[mask]
sids_ix = sids_ix[mask]
input_dates = dividends.ex_date.values[mask]

# subtract one day to get the close on the day prior to the merger
previous_close = close[date_ix - 1, sids_ix]
input_sids = input_sids[mask]

amount = dividends.amount.values[mask]
ratio = 1.0 - amount / previous_close

non_nan_ratio_mask = ~np.isnan(ratio)
for ix in np.flatnonzero(~non_nan_ratio_mask):
log.warn(
"Couldn't compute ratio for dividend "
" sid={sid}, ex_date={ex_date!s}, amount={amount}",
sid=input_sids[ix],
ex_date=dates[date_ix[ix]],
amount=amount[ix],
)

positive_ratio_mask = ratio > 0
for ix in np.flatnonzero(~positive_ratio_mask):
log.warn(
"Negative dividend ratio for dividend "
" sid={sid}, ex_date={ex_date!s}, amount={amount}",
sid=input_sids[ix],
ex_date=dates[date_ix[ix]],
amount=amount[ix],
)

valid_ratio_mask = non_nan_ratio_mask & positive_ratio_mask
return pd.DataFrame({
'sid': sids,
'effective_date': effective_dates,
'ratio': ratios,
'sid': input_sids[valid_ratio_mask],
'effective_date': input_dates[valid_ratio_mask],
'ratio': ratio[valid_ratio_mask],
})

def _write_dividends(self, dividends):
Expand Down
3 changes: 1 addition & 2 deletions zipline/data/hdf5_daily_bars.py
Expand Up @@ -457,8 +457,7 @@ def dates(self):

@lazyval
def sids(self):
sids = self._country_group[INDEX][SID][:]
return sids.astype(int)
return self._country_group[INDEX][SID][:].astype('int64', copy=False)

@lazyval
def asset_start_dates(self):
Expand Down
1 change: 0 additions & 1 deletion zipline/data/session_bars.py
Expand Up @@ -33,4 +33,3 @@ def sessions(self):
All session labels (unioning the range for all assets) which the
reader can provide.
"""
pass
22 changes: 12 additions & 10 deletions zipline/testing/core.py
Expand Up @@ -981,8 +981,19 @@ def wrapped(*args, **kwargs):


class MockDailyBarReader(object):
def __init__(self, dates):
self.sessions = pd.DatetimeIndex(dates)

def load_raw_arrays(self, columns, start, stop, sids):
dates = self.sessions
output_dates = dates[(dates >= start) & (dates <= stop)]
return [
np.full((len(output_dates), len(sids)), 100.0)
for _ in columns
]

def get_value(self, col, sid, dt):
return 100
return 100.0


def create_mock_adjustment_data(splits=None, dividends=None, mergers=None):
Expand All @@ -1004,15 +1015,6 @@ def create_mock_adjustment_data(splits=None, dividends=None, mergers=None):
return splits, mergers, dividends


def create_mock_adjustments(tempdir, days, splits=None, dividends=None,
mergers=None):
path = tempdir.getpath("test_adjustments.db")
SQLiteAdjustmentWriter(path, MockDailyBarReader(), days).write(
*create_mock_adjustment_data(splits, dividends, mergers)
)
return path


def assert_timestamp_equal(left, right, compare_nat_equal=True, msg=""):
"""
Assert that two pandas Timestamp objects are the same.
Expand Down
1 change: 1 addition & 0 deletions zipline/utils/numpy_utils.py
Expand Up @@ -33,6 +33,7 @@
bool_dtype = dtype('bool')

uint32_dtype = dtype('uint32')
uint64_dtype = dtype('uint64')
int64_dtype = dtype('int64')

float32_dtype = dtype('float32')
Expand Down

0 comments on commit fbdc5c9

Please sign in to comment.