Skip to content

Commit

Permalink
Merge pull request #1471 from quantopian/fix-slow-startup
Browse files Browse the repository at this point in the history
PERF: Remove import-time calendar creations.
  • Loading branch information
Scott Sanderson committed Sep 8, 2016
2 parents 9c8f0ce + 40fa6ae commit 1ccc9e4
Show file tree
Hide file tree
Showing 13 changed files with 345 additions and 109 deletions.
95 changes: 95 additions & 0 deletions tests/calendars/test_calendar_dispatcher.py
@@ -0,0 +1,95 @@
"""
Tests for TradingCalendarDispatcher.
"""
from zipline.errors import (
CalendarNameCollision,
CyclicCalendarAlias,
InvalidCalendarName,
)
from zipline.testing import ZiplineTestCase
from zipline.utils.calendars.calendar_utils import TradingCalendarDispatcher
from zipline.utils.calendars.exchange_calendar_ice import ICEExchangeCalendar


class CalendarAliasTestCase(ZiplineTestCase):

@classmethod
def init_class_fixtures(cls):
super(CalendarAliasTestCase, cls).init_class_fixtures()
# Make a calendar once so that we don't spend time in every test
# instantiating calendars.
cls.dispatcher_kwargs = dict(
calendars={'ICE': ICEExchangeCalendar()},
calendar_factories={},
aliases={
'ICE_ALIAS': 'ICE',
'ICE_ALIAS_ALIAS': 'ICE_ALIAS',
},
)

def init_instance_fixtures(self):
super(CalendarAliasTestCase, self).init_instance_fixtures()
self.dispatcher = TradingCalendarDispatcher(
# Make copies here so that tests that mutate the dispatcher dicts
# are isolated from one another.
**{k: v.copy() for k, v in self.dispatcher_kwargs.items()}
)

def test_follow_alias_chain(self):
self.assertIs(
self.dispatcher.get_calendar('ICE_ALIAS'),
self.dispatcher.get_calendar('ICE'),
)
self.assertIs(
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS'),
self.dispatcher.get_calendar('ICE'),
)

def test_add_new_aliases(self):
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('NOT_ICE')

self.dispatcher.register_calendar_alias('NOT_ICE', 'ICE')

self.assertIs(
self.dispatcher.get_calendar('NOT_ICE'),
self.dispatcher.get_calendar('ICE'),
)

self.dispatcher.register_calendar_alias(
'ICE_ALIAS_ALIAS_ALIAS',
'ICE_ALIAS_ALIAS'
)
self.assertIs(
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS_ALIAS'),
self.dispatcher.get_calendar('ICE'),
)

def test_remove_aliases(self):
self.dispatcher.deregister_calendar('ICE_ALIAS_ALIAS')
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS')

def test_reject_alias_that_already_exists(self):
with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE')

with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE_ALIAS', 'NOT_ICE')

def test_allow_alias_override_with_force(self):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE', force=True)
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE')

def test_reject_cyclic_aliases(self):
add_alias = self.dispatcher.register_calendar_alias

add_alias('A', 'B')
add_alias('B', 'C')

with self.assertRaises(CyclicCalendarAlias) as e:
add_alias('C', 'A')

expected = "Cycle in calendar aliases: ['C' -> 'A' -> 'B' -> 'C']"
self.assertEqual(str(e.exception), expected)
11 changes: 8 additions & 3 deletions tests/calendars/test_trading_calendar.py
Expand Up @@ -39,8 +39,12 @@
deregister_calendar,
get_calendar,
)
from zipline.utils.calendars.calendar_utils import register_calendar_type, \
_default_calendar_factories
from zipline.utils.calendars.calendar_utils import (
_default_calendar_aliases,
_default_calendar_factories,
register_calendar_type,

)
from zipline.utils.calendars.trading_calendar import days_at_time, \
TradingCalendar

Expand Down Expand Up @@ -123,7 +127,8 @@ def test_force_registration(self):

class DefaultsTestCase(TestCase):
def test_default_calendars(self):
for name in concat(_default_calendar_factories):
for name in concat([_default_calendar_factories,
_default_calendar_aliases]):
self.assertIsNotNone(get_calendar(name),
"get_calendar(%r) returned None" % name)

Expand Down
4 changes: 2 additions & 2 deletions tests/data/bundles/test_core.py
Expand Up @@ -155,7 +155,7 @@ def test_ingest(self):

@self.register(
'bundle',
calendar=calendar,
calendar_name='NYSE',
start_session=self.START_DATE,
end_session=self.END_DATE,
)
Expand Down Expand Up @@ -369,7 +369,7 @@ def _empty_ingest(self, _wrote_to=[]):
"""
if not self.bundles:
@self.register('bundle',
calendar=get_calendar('NYSE'),
calendar_name='NYSE',
start_session=pd.Timestamp('2014', tz='UTC'),
end_session=pd.Timestamp('2014', tz='UTC'))
def _(environ,
Expand Down
7 changes: 4 additions & 3 deletions tests/data/bundles/test_quandl.py
Expand Up @@ -5,6 +5,7 @@
from toolz import merge
import toolz.curried.operator as op

from zipline import get_calendar
from zipline.data.bundles import ingest, load, bundles
from zipline.data.bundles.quandl import (
format_wiki_url,
Expand All @@ -28,9 +29,9 @@ class QuandlBundleTestCase(ZiplineTestCase):
asset_start = pd.Timestamp('2014-01', tz='utc')
asset_end = pd.Timestamp('2015-01', tz='utc')
bundle = bundles['quandl']
calendar = bundle.calendar
start_date = bundle.start_session
end_date = bundle.end_session
calendar = get_calendar(bundle.calendar_name)
start_date = calendar.first_session
end_date = calendar.last_session
api_key = 'ayylmao'
columns = 'open', 'high', 'low', 'close', 'volume'

Expand Down
2 changes: 1 addition & 1 deletion tests/data/bundles/test_yahoo.py
Expand Up @@ -157,7 +157,7 @@ def adjustments_callback(request):
self.register(
'bundle',
yahoo_equities(self.symbols),
calendar=self.calendar,
calendar_name='NYSE',
start_session=self.asset_start,
end_session=self.asset_end,
)
Expand Down
18 changes: 18 additions & 0 deletions zipline/__init__.py
Expand Up @@ -38,12 +38,15 @@ def cleanup_tempdir():
from . import finance
from . import gens
from . import utils
from .utils.calendars import get_calendar
from .utils.run_algo import run_algorithm
from ._version import get_versions

# These need to happen after the other imports.
from . algorithm import TradingAlgorithm
from . import api


__version__ = get_versions()['version']
del get_versions

Expand All @@ -53,11 +56,26 @@ def load_ipython_extension(ipython):
ipython.register_magic_function(zipline_magic, 'line_cell', 'zipline')


# PERF: Fire a warning if calendars were instantiated during zipline import.
# Having calendars doesn't break anything per-se, but it makes zipline imports
# noticeably slower, which becomes particularly noticeable in the Zipline CLI.
from zipline.utils.calendars.calendar_utils import global_calendar_dispatcher
if global_calendar_dispatcher._calendars:
import warnings
warnings.warn(
"Found TradingCalendar instances after zipline import.\n"
"Zipline startup will be much slower until this is fixed!",
)
del warnings
del global_calendar_dispatcher


__all__ = [
'TradingAlgorithm',
'api',
'data',
'finance',
'get_calendar',
'gens',
'run_algorithm',
'utils',
Expand Down
87 changes: 46 additions & 41 deletions zipline/data/bundles/core.py
Expand Up @@ -7,7 +7,6 @@
from contextlib2 import ExitStack
import click
import pandas as pd
from six import string_types
from toolz import curry, complement, take

from ..us_equity_pricing import (
Expand All @@ -31,7 +30,7 @@
from zipline.utils.input_validation import ensure_timestamp, optionally
import zipline.utils.paths as pth
from zipline.utils.preprocess import preprocess
from zipline.utils.calendars import get_calendar, register_calendar
from zipline.utils.calendars import get_calendar


def asset_db_path(bundle_name, timestr, environ=None, db_version=None):
Expand Down Expand Up @@ -133,9 +132,14 @@ def ingestions_for_bundle(bundle, environ=None):
)


_BundlePayload = namedtuple(
'_BundlePayload',
'calendar start_session end_session minutes_per_day ingest create_writers',
RegisteredBundle = namedtuple(
'RegisteredBundle',
['calendar_name',
'start_session',
'end_session',
'minutes_per_day',
'ingest',
'create_writers']
)

BundleData = namedtuple(
Expand Down Expand Up @@ -220,7 +224,7 @@ def _make_bundle_core():
@curry
def register(name,
f,
calendar='NYSE',
calendar_name='NYSE',
start_session=None,
end_session=None,
minutes_per_day=390,
Expand Down Expand Up @@ -257,10 +261,9 @@ def register(name,
successful load.
show_progress : bool
Show the progress for the current load where possible.
calendar : zipline.utils.calendars.TradingCalendar or str, optional
The trading calendar to align the data to, or the name of a trading
calendar. This defaults to 'NYSE', in which case we use the NYSE
calendar.
calendar_name : str, optional
The name of a calendar used to align bundle data.
Default is 'NYSE'.
start_session : pd.Timestamp, optional
The first session for which we want data. If not provided,
or if the date lies outside the range supported by the
Expand Down Expand Up @@ -296,24 +299,17 @@ def quandl_ingest_function(...):
stacklevel=3,
)

if isinstance(calendar, string_types):
calendar = get_calendar(calendar)

# If the start and end sessions are not provided or lie outside
# the bounds of the calendar being used, set them to the first
# and last sessions of the calendar.
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session

_bundles[name] = _BundlePayload(
calendar,
start_session,
end_session,
minutes_per_day,
f,
create_writers,
# NOTE: We don't eagerly compute calendar values here because
# `register` is called at module scope in zipline, and creating a
# calendar currently takes between 0.5 and 1 seconds, which causes a
# noticeable delay on the zipline CLI.
_bundles[name] = RegisteredBundle(
calendar_name=calendar_name,
start_session=start_session,
end_session=end_session,
minutes_per_day=minutes_per_day,
ingest=f,
create_writers=create_writers,
)
return f

Expand Down Expand Up @@ -365,9 +361,21 @@ def ingest(name,
except KeyError:
raise UnknownBundle(name)

calendar = get_calendar(bundle.calendar_name)

start_session = bundle.start_session
end_session = bundle.end_session

if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session

if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session

if timestamp is None:
timestamp = pd.Timestamp.utcnow()
timestamp = timestamp.tz_convert('utc').tz_localize(None)

timestr = to_bundle_ingest_dirname(timestamp)
cachepath = cache_path(name, environ=environ)
pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
Expand All @@ -387,9 +395,9 @@ def ingest(name,
)
daily_bar_writer = BcolzDailyBarWriter(
daily_bars_path,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
)
# Do an empty write to ensure that the daily ctables exist
# when we create the SQLiteAdjustmentWriter below. The
Expand All @@ -401,9 +409,9 @@ def ingest(name,
wd.ensure_dir(*minute_equity_relative(
name, timestr, environ=environ)
),
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
minutes_per_day=bundle.minutes_per_day,
)
assets_db_path = wd.getpath(*asset_db_relative(
Expand All @@ -416,7 +424,7 @@ def ingest(name,
wd.getpath(*adjustment_db_relative(
name, timestr, environ=environ)),
BcolzDailyBarReader(daily_bars_path),
bundle.calendar.all_sessions,
calendar.all_sessions,
overwrite=True,
)
)
Expand All @@ -435,9 +443,9 @@ def ingest(name,
minute_bar_writer,
daily_bar_writer,
adjustment_db_writer,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
cache,
show_progress,
pth.data_path([name, timestr], environ=environ),
Expand Down Expand Up @@ -611,6 +619,3 @@ def should_clean(name):
return BundleCore(bundles, register, unregister, ingest, load, clean)

bundles, register, unregister, ingest, load, clean = _make_bundle_core()

register_calendar("YAHOO", get_calendar("NYSE"))
register_calendar("QUANDL", get_calendar("NYSE"))

0 comments on commit 1ccc9e4

Please sign in to comment.