From a98b690c8a9d6783b322b4cd6acc9d9ade9aa1b1 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Fri, 30 May 2014 18:24:24 -0400 Subject: [PATCH] ENH: Enhancements to `TradingEnvironment`. Adds a suite of new functions for querying data from the trading calendar. These include: `previous_trading_day` `minutes_for_days_in_range` (minutely version of `days_in_range`) `previous_open_and_close` (inverse of `next_open_and_close`) `next_market_minute` `previous_market_minute` `open_close_window` (get a range of opens/closes with slicing semantics) `market_minute_window` (get a range of minutes with slicing semantics) Also refactors `test_finance` to move `TradingEnvironment` tests into their own TestCase. --- tests/test_finance.py | 263 +++++++++++++++++++++++++------------ zipline/finance/trading.py | 125 +++++++++++++++++- 2 files changed, 302 insertions(+), 86 deletions(-) diff --git a/tests/test_finance.py b/tests/test_finance.py index 7eec21b828..a9fa724d19 100644 --- a/tests/test_finance.py +++ b/tests/test_finance.py @@ -40,6 +40,7 @@ from zipline.gens.composites import date_sorted_sources from zipline.finance import trading +from zipline.finance.trading import TradingEnvironment from zipline.finance.execution import MarketOrder, LimitOrder from zipline.finance.trading import SimulationParameters @@ -80,88 +81,6 @@ def test_factory_daily(self): self.assertTrue(trade.dt > prev.dt) prev = trade - @timed(DEFAULT_TIMEOUT) - def test_trading_environment(self): - # holidays taken from: http://www.nyse.com/press/1191407641943.html - new_years = datetime(2008, 1, 1, tzinfo=pytz.utc) - mlk_day = datetime(2008, 1, 21, tzinfo=pytz.utc) - presidents = datetime(2008, 2, 18, tzinfo=pytz.utc) - good_friday = datetime(2008, 3, 21, tzinfo=pytz.utc) - memorial_day = datetime(2008, 5, 26, tzinfo=pytz.utc) - july_4th = datetime(2008, 7, 4, tzinfo=pytz.utc) - labor_day = datetime(2008, 9, 1, tzinfo=pytz.utc) - tgiving = datetime(2008, 11, 27, tzinfo=pytz.utc) - christmas = datetime(2008, 5, 25, tzinfo=pytz.utc) - a_saturday = datetime(2008, 8, 2, tzinfo=pytz.utc) - a_sunday = datetime(2008, 10, 12, tzinfo=pytz.utc) - holidays = [ - new_years, - mlk_day, - presidents, - good_friday, - memorial_day, - july_4th, - labor_day, - tgiving, - christmas, - a_saturday, - a_sunday - ] - - for holiday in holidays: - self.assertTrue(not trading.environment.is_trading_day(holiday)) - - first_trading_day = datetime(2008, 1, 2, tzinfo=pytz.utc) - last_trading_day = datetime(2008, 12, 31, tzinfo=pytz.utc) - workdays = [first_trading_day, last_trading_day] - - for workday in workdays: - self.assertTrue(trading.environment.is_trading_day(workday)) - - def test_simulation_parameters(self): - env = SimulationParameters( - period_start=datetime(2008, 1, 1, tzinfo=pytz.utc), - period_end=datetime(2008, 12, 31, tzinfo=pytz.utc), - capital_base=100000, - ) - - self.assertTrue(env.last_close.month == 12) - self.assertTrue(env.last_close.day == 31) - - @timed(DEFAULT_TIMEOUT) - def test_sim_params_days_in_period(self): - - # January 2008 - # Su Mo Tu We Th Fr Sa - # 1 2 3 4 5 - # 6 7 8 9 10 11 12 - # 13 14 15 16 17 18 19 - # 20 21 22 23 24 25 26 - # 27 28 29 30 31 - - env = SimulationParameters( - period_start=datetime(2007, 12, 31, tzinfo=pytz.utc), - period_end=datetime(2008, 1, 7, tzinfo=pytz.utc), - capital_base=100000, - ) - - expected_trading_days = ( - datetime(2007, 12, 31, tzinfo=pytz.utc), - # Skip new years - # holidays taken from: http://www.nyse.com/press/1191407641943.html - datetime(2008, 1, 2, tzinfo=pytz.utc), - datetime(2008, 1, 3, tzinfo=pytz.utc), - datetime(2008, 1, 4, tzinfo=pytz.utc), - # Skip Saturday - # Skip Sunday - datetime(2008, 1, 7, tzinfo=pytz.utc) - ) - - num_expected_trading_days = 5 - self.assertEquals(num_expected_trading_days, env.days_in_period) - np.testing.assert_array_equal(expected_trading_days, - env.trading_days.tolist()) - @timed(EXTENDED_TIMEOUT) def test_full_zipline(self): # provide enough trades to ensure all orders are filled. @@ -429,3 +348,183 @@ def test_blotter_processes_splits(self): self.assertEqual(300, fls_order['amount']) self.assertEqual(3.33, fls_order['limit']) self.assertEqual(2, fls_order['sid']) + + +class TradingEnvironmentTestCase(TestCase): + """ + Tests for date management utilities in zipline.finance.trading. + """ + + def setUp(self): + setup_logger(self) + + def tearDown(self): + teardown_logger(self) + + @classmethod + def setUpClass(cls): + cls.env = TradingEnvironment() + + @timed(DEFAULT_TIMEOUT) + def test_is_trading_day(self): + # holidays taken from: http://www.nyse.com/press/1191407641943.html + new_years = datetime(2008, 1, 1, tzinfo=pytz.utc) + mlk_day = datetime(2008, 1, 21, tzinfo=pytz.utc) + presidents = datetime(2008, 2, 18, tzinfo=pytz.utc) + good_friday = datetime(2008, 3, 21, tzinfo=pytz.utc) + memorial_day = datetime(2008, 5, 26, tzinfo=pytz.utc) + july_4th = datetime(2008, 7, 4, tzinfo=pytz.utc) + labor_day = datetime(2008, 9, 1, tzinfo=pytz.utc) + tgiving = datetime(2008, 11, 27, tzinfo=pytz.utc) + christmas = datetime(2008, 5, 25, tzinfo=pytz.utc) + a_saturday = datetime(2008, 8, 2, tzinfo=pytz.utc) + a_sunday = datetime(2008, 10, 12, tzinfo=pytz.utc) + holidays = [ + new_years, + mlk_day, + presidents, + good_friday, + memorial_day, + july_4th, + labor_day, + tgiving, + christmas, + a_saturday, + a_sunday + ] + + for holiday in holidays: + self.assertTrue(not self.env.is_trading_day(holiday)) + + first_trading_day = datetime(2008, 1, 2, tzinfo=pytz.utc) + last_trading_day = datetime(2008, 12, 31, tzinfo=pytz.utc) + workdays = [first_trading_day, last_trading_day] + + for workday in workdays: + self.assertTrue(self.env.is_trading_day(workday)) + + def test_simulation_parameters(self): + env = SimulationParameters( + period_start=datetime(2008, 1, 1, tzinfo=pytz.utc), + period_end=datetime(2008, 12, 31, tzinfo=pytz.utc), + capital_base=100000, + ) + + self.assertTrue(env.last_close.month == 12) + self.assertTrue(env.last_close.day == 31) + + @timed(DEFAULT_TIMEOUT) + def test_sim_params_days_in_period(self): + + # January 2008 + # Su Mo Tu We Th Fr Sa + # 1 2 3 4 5 + # 6 7 8 9 10 11 12 + # 13 14 15 16 17 18 19 + # 20 21 22 23 24 25 26 + # 27 28 29 30 31 + + env = SimulationParameters( + period_start=datetime(2007, 12, 31, tzinfo=pytz.utc), + period_end=datetime(2008, 1, 7, tzinfo=pytz.utc), + capital_base=100000, + ) + + expected_trading_days = ( + datetime(2007, 12, 31, tzinfo=pytz.utc), + # Skip new years + # holidays taken from: http://www.nyse.com/press/1191407641943.html + datetime(2008, 1, 2, tzinfo=pytz.utc), + datetime(2008, 1, 3, tzinfo=pytz.utc), + datetime(2008, 1, 4, tzinfo=pytz.utc), + # Skip Saturday + # Skip Sunday + datetime(2008, 1, 7, tzinfo=pytz.utc) + ) + + num_expected_trading_days = 5 + self.assertEquals(num_expected_trading_days, env.days_in_period) + np.testing.assert_array_equal(expected_trading_days, + env.trading_days.tolist()) + + @timed(DEFAULT_TIMEOUT) + def test_market_minute_window(self): + + # January 2008 + # Su Mo Tu We Th Fr Sa + # 1 2 3 4 5 + # 6 7 8 9 10 11 12 + # 13 14 15 16 17 18 19 + # 20 21 22 23 24 25 26 + # 27 28 29 30 31 + + us_east = pytz.timezone('US/Eastern') + utc = pytz.utc + + # 10:01 AM Eastern on January 7th.. + start = us_east.localize(datetime(2008, 1, 7, 10, 1)) + utc_start = start.astimezone(utc) + + # Get the next 10 minutes + minutes = self.env.market_minute_window( + utc_start, 10, + ) + self.assertEqual(len(minutes), 10) + for i in range(10): + self.assertEqual(minutes[i], utc_start + timedelta(minutes=i)) + + # Get the previous 10 minutes. + minutes = self.env.market_minute_window( + utc_start, 10, step=-1, + ) + self.assertEqual(len(minutes), 10) + for i in range(10): + self.assertEqual(minutes[i], utc_start + timedelta(minutes=-i)) + + # Get the next 900 minutes, including utc_start, rolling over into the + # next two days. + # Should include: + # Today: 10:01 AM -> 4:00 PM (360 minutes) + # Tomorrow: 9:31 AM -> 4:00 PM (390 minutes, 750 total) + # Last Day: 9:31 AM -> 12:00 PM (150 minutes, 900 total) + minutes = self.env.market_minute_window( + utc_start, 900, + ) + today = self.env.market_minutes_for_day(start)[30:] + tomorrow = self.env.market_minutes_for_day( + start + timedelta(days=1) + ) + last_day = self.env.market_minutes_for_day( + start + timedelta(days=2))[:150] + + self.assertEqual(len(minutes), 900) + self.assertEqual(minutes[0], utc_start) + self.assertTrue(all(today == minutes[:360])) + self.assertTrue(all(tomorrow == minutes[360:750])) + self.assertTrue(all(last_day == minutes[750:])) + + # Get the previous 801 minutes, including utc_start, rolling over into + # Friday the 4th and Thursday the 3rd. + # Should include: + # Today: 10:01 AM -> 9:31 AM (31 minutes) + # Friday: 4:00 PM -> 9:31 AM (390 minutes, 421 total) + # Thursday: 4:00 PM -> 9:41 AM (380 minutes, 801 total) + minutes = self.env.market_minute_window( + utc_start, 801, step=-1, + ) + + today = self.env.market_minutes_for_day(start)[30::-1] + # minus an extra two days from each of these to account for the two + # weekend days we skipped + friday = self.env.market_minutes_for_day( + start + timedelta(days=-3), + )[::-1] + thursday = self.env.market_minutes_for_day( + start + timedelta(days=-4), + )[:9:-1] + + self.assertEqual(len(minutes), 801) + self.assertEqual(minutes[0], utc_start) + self.assertTrue(all(today == minutes[:31])) + self.assertTrue(all(friday == minutes[31:421])) + self.assertTrue(all(thursday == minutes[421:])) diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py index f4ace1f55e..c9cb71054c 100644 --- a/zipline/finance/trading.py +++ b/zipline/finance/trading.py @@ -17,12 +17,12 @@ import logbook import datetime -import pandas as pd - from zipline.data.loader import load_market_data from zipline.utils import tradingcalendar from zipline.utils.tradingcalendar import get_early_closes +import pandas as pd +import numpy as np log = logbook.Logger('Trading') @@ -166,11 +166,39 @@ def next_trading_day(self, test_date): return None + def previous_trading_day(self, test_date): + dt = self.normalize_date(test_date) + delta = datetime.timedelta(days=-1) + + while self.first_trading_day < test_date: + dt += delta + if dt in self.trading_days: + return dt + + return None + def days_in_range(self, start, end): mask = ((self.trading_days >= start) & (self.trading_days <= end)) return self.trading_days[mask] + def minutes_for_days_in_range(self, start, end): + """ + Get all market minutes for the days between start and end, inclusive. + """ + start_date = self.normalize_date(start) + end_date = self.normalize_date(end) + + all_minutes = [] + for day in self.days_in_range(start_date, end_date): + day_minutes = self.market_minutes_for_day(day) + all_minutes.append(day_minutes) + + # Concatenate all minutes and truncate minutes before start/after end. + return pd.DatetimeIndex( + np.concatenate(all_minutes), copy=False, tz='UTC', + ) + def next_open_and_close(self, start_date): """ Given the start_date, returns the next open and close of @@ -185,15 +213,104 @@ def next_open_and_close(self, start_date): return self.get_open_and_close(next_open) + def previous_open_and_close(self, start_date): + """ + Given the start_date, returns the previous open and close of the + market. + """ + previous = self.previous_trading_day(start_date) + + if previous is None: + raise NoFurtherDataError( + "Attempt to backtest beyond available history. " + "First successful date: %s" % self.first_trading_day) + return self.get_open_and_close(previous) + + def next_market_minute(self, start): + """ + Get the next market minute after @start. This is either the immediate + next minute, or the open of the next market day after start. + """ + next_minute = start + datetime.timedelta(minutes=1) + if self.is_market_hours(next_minute): + return next_minute + return self.next_open_and_close(start)[0] + + def previous_market_minute(self, start): + """ + Get the next market minute before @start. This is either the immediate + previous minute, or the close of the market day before start. + """ + prev_minute = start - datetime.timedelta(minutes=1) + if self.is_market_hours(prev_minute): + return prev_minute + return self.previous_open_and_close(start)[1] + def get_open_and_close(self, day): todays_minutes = self.open_and_closes.ix[day.date()] return todays_minutes['market_open'], todays_minutes['market_close'] - def market_minutes_for_day(self, midnight): - market_open, market_close = self.get_open_and_close(midnight) + def market_minutes_for_day(self, stamp): + market_open, market_close = self.get_open_and_close(stamp) return pd.date_range(market_open, market_close, freq='T') + def open_close_window(self, start, count, offset=0, step=1): + """ + Return a DataFrame containing `count` market opens and closes, + beginning with `start` + `offset` days and continuing `step` minutes at + a time. + """ + # TODO: Correctly handle end of data. + start_idx = self.get_index(start) + offset + stop_idx = start_idx + (count * step) + + index = np.arange(start_idx, stop_idx, step) + + return self.open_and_closes.iloc[index] + + def market_minute_window(self, start, count, step=1): + """ + Return a DatetimeIndex containing `count` market minutes, starting with + `start` and continuing `step` minutes at a time. + """ + if not self.is_market_hours(start): + raise ValueError("market_minute_window starting at " + "non-market time {minute}".format(minute=start)) + + all_minutes = [] + + current_day_minutes = self.market_minutes_for_day(start) + first_minute_idx = current_day_minutes.searchsorted(start) + minutes_in_range = current_day_minutes[first_minute_idx::step] + + # Build up list of lists of days' market minutes until we have count + # minutes stored altogether. + while True: + + if len(minutes_in_range) >= count: + # Truncate off extra minutes + minutes_in_range = minutes_in_range[:count] + + all_minutes.append(minutes_in_range) + count -= len(minutes_in_range) + if count <= 0: + break + + if step > 0: + start, _ = self.next_open_and_close(start) + current_day_minutes = self.market_minutes_for_day(start) + else: + _, start = self.previous_open_and_close(start) + current_day_minutes = self.market_minutes_for_day(start) + + minutes_in_range = current_day_minutes[::step] + + # Concatenate all the accumulated minutes. + return pd.DatetimeIndex( + np.concatenate(all_minutes), copy=False, tz='UTC', + ) + def trading_day_distance(self, first_date, second_date): first_date = self.normalize_date(first_date) second_date = self.normalize_date(second_date)