Skip to content

Commit

Permalink
PERF: Remove module-scope calendar creations.
Browse files Browse the repository at this point in the history
Remove module scope invocations of `get_calendar('NYSE')`, which cuts
zipline import time in half on my machine. This make the zipline CLI
noticeably more responsive, and it reduces memory consumed at import
time from 130MB to 90MB.

Before:

$ time python -c 'import zipline'

real    0m1.262s
user    0m1.128s
sys     0m0.120s

After:

$ time python -c 'import zipline'

real    0m0.676s
user    0m0.536s
sys     0m0.132s
  • Loading branch information
Scott Sanderson committed Sep 5, 2016
1 parent 9a301dc commit eb5c4d5
Show file tree
Hide file tree
Showing 8 changed files with 325 additions and 102 deletions.
95 changes: 95 additions & 0 deletions tests/calendars/test_calendar_dispatcher.py
@@ -0,0 +1,95 @@
"""
Tests for TradingCalendarDispatcher.
"""
from zipline.errors import (
CalendarNameCollision,
CyclicCalendarAlias,
InvalidCalendarName,
)
from zipline.testing import ZiplineTestCase
from zipline.utils.calendars.calendar_utils import TradingCalendarDispatcher
from zipline.utils.calendars.exchange_calendar_ice import ICEExchangeCalendar


class CalendarAliasTestCase(ZiplineTestCase):

@classmethod
def init_class_fixtures(cls):
super(CalendarAliasTestCase, cls).init_class_fixtures()
# Make a calendar once so that we don't spend time in every test
# instantiating calendars.
cls.dispatcher_kwargs = dict(
calendars={'ICE': ICEExchangeCalendar()},
calendar_factories={},
aliases={
'ICE_ALIAS': 'ICE',
'ICE_ALIAS_ALIAS': 'ICE_ALIAS',
},
)

def init_instance_fixtures(self):
super(CalendarAliasTestCase, self).init_instance_fixtures()
self.dispatcher = TradingCalendarDispatcher(
# Make copies here so that tests that mutate the dispatcher dicts
# are isolated from one another.
**{k: v.copy() for k, v in self.dispatcher_kwargs.items()}
)

def test_follow_alias_chain(self):
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('ICE_ALIAS'),
)
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS'),
)

def test_add_new_aliases(self):
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('NOT_ICE')

self.dispatcher.register_calendar_alias('NOT_ICE', 'ICE')

self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('NOT_ICE'),
)

self.dispatcher.register_calendar_alias(
'ICE_ALIAS_ALIAS_ALIAS',
'ICE_ALIAS_ALIAS'
)
self.assertIs(
self.dispatcher.get_calendar('ICE'),
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS_ALIAS'),
)

def test_remove_aliases(self):
self.dispatcher.deregister_calendar('ICE_ALIAS_ALIAS')
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE_ALIAS_ALIAS')

def test_reject_alias_that_already_exists(self):
with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE')

with self.assertRaises(CalendarNameCollision):
self.dispatcher.register_calendar_alias('ICE_ALIAS', 'NOT_ICE')

def test_allow_alias_override_with_force(self):
self.dispatcher.register_calendar_alias('ICE', 'NOT_ICE', force=True)
with self.assertRaises(InvalidCalendarName):
self.dispatcher.get_calendar('ICE')

def test_reject_cyclic_aliases(self):
add_alias = self.dispatcher.register_calendar_alias

add_alias('A', 'B')
add_alias('B', 'C')

with self.assertRaises(CyclicCalendarAlias) as e:
add_alias('C', 'A')

expected = "Cycle in calendar aliases: ['C' -> 'A' -> 'B' -> 'C']"
self.assertEqual(str(e.exception), expected)
3 changes: 2 additions & 1 deletion tests/data/bundles/test_quandl.py
Expand Up @@ -5,6 +5,7 @@
from toolz import merge
import toolz.curried.operator as op

from zipline import get_calendar
from zipline.data.bundles import ingest, load, bundles
from zipline.data.bundles.quandl import (
format_wiki_url,
Expand All @@ -28,7 +29,7 @@ class QuandlBundleTestCase(ZiplineTestCase):
asset_start = pd.Timestamp('2014-01', tz='utc')
asset_end = pd.Timestamp('2015-01', tz='utc')
bundle = bundles['quandl']
calendar = bundle.calendar
calendar = get_calendar(bundle.calendar_name)
start_date = bundle.start_session
end_date = bundle.end_session
api_key = 'ayylmao'
Expand Down
23 changes: 20 additions & 3 deletions zipline/__init__.py
Expand Up @@ -38,26 +38,43 @@ def cleanup_tempdir():
from . import finance
from . import gens
from . import utils
from .utils.calendars import get_calendar
from .utils.run_algo import run_algorithm
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions

# These need to happen after the other imports.
from . algorithm import TradingAlgorithm
from . import api

__version__ = get_versions()['version']
del get_versions


def load_ipython_extension(ipython):
from .__main__ import zipline_magic
ipython.register_magic_function(zipline_magic, 'line_cell', 'zipline')


# PERF: Fire a warning if calendars were instantiated during zipline import.
# Having calendars doesn't break anything per-se, but it makes zipline imports
# noticeably slower, which becomes particularly noticeable in the Zipline CLI.
from zipline.utils.calendars.calendar_utils import global_calendar_dispatcher
if global_calendar_dispatcher._calendars:
import warnings
warnings.warn(
"Found TradingCalendar instances after zipline import.\n"
"Zipline startup will be much slower until this is fixed!",
stacklevel=0,
)
del warnings
del global_calendar_dispatcher


__all__ = [
'TradingAlgorithm',
'api',
'data',
'finance',
'get_calendar',
'gens',
'run_algorithm',
'utils',
Expand Down
87 changes: 47 additions & 40 deletions zipline/data/bundles/core.py
Expand Up @@ -7,7 +7,6 @@
from contextlib2 import ExitStack
import click
import pandas as pd
from six import string_types
from toolz import curry, complement, take

from ..us_equity_pricing import (
Expand All @@ -31,7 +30,7 @@
from zipline.utils.input_validation import ensure_timestamp, optionally
import zipline.utils.paths as pth
from zipline.utils.preprocess import preprocess
from zipline.utils.calendars import get_calendar, register_calendar
from zipline.utils.calendars import get_calendar, register_calendar_alias


def asset_db_path(bundle_name, timestr, environ=None, db_version=None):
Expand Down Expand Up @@ -133,9 +132,14 @@ def ingestions_for_bundle(bundle, environ=None):
)


_BundlePayload = namedtuple(
'_BundlePayload',
'calendar start_session end_session minutes_per_day ingest create_writers',
RegisteredBundle = namedtuple(
'RegisteredBundle',
['calendar_name',
'start_session',
'end_session',
'minutes_per_day',
'ingest',
'create_writers']
)

BundleData = namedtuple(
Expand Down Expand Up @@ -220,7 +224,7 @@ def _make_bundle_core():
@curry
def register(name,
f,
calendar='NYSE',
calendar_name='NYSE',
start_session=None,
end_session=None,
minutes_per_day=390,
Expand Down Expand Up @@ -257,10 +261,9 @@ def register(name,
successful load.
show_progress : bool
Show the progress for the current load where possible.
calendar : zipline.utils.calendars.TradingCalendar or str, optional
The trading calendar to align the data to, or the name of a trading
calendar. This defaults to 'NYSE', in which case we use the NYSE
calendar.
calendar_name : str, optional
The name of a calendar used to align bundle data.
Default is 'NYSE'.
start_session : pd.Timestamp, optional
The first session for which we want data. If not provided,
or if the date lies outside the range supported by the
Expand Down Expand Up @@ -296,24 +299,17 @@ def quandl_ingest_function(...):
stacklevel=3,
)

if isinstance(calendar, string_types):
calendar = get_calendar(calendar)

# If the start and end sessions are not provided or lie outside
# the bounds of the calendar being used, set them to the first
# and last sessions of the calendar.
if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session
if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session

_bundles[name] = _BundlePayload(
calendar,
start_session,
end_session,
minutes_per_day,
f,
create_writers,
# NOTE: We don't eagerly compute calendar values here because
# `register` is called at module scope in zipline, and creating a
# calendar currently takes between 0.5 and 1 seconds, which causes a
# noticeable delay on the zipline CLI.
_bundles[name] = RegisteredBundle(
calendar_name=calendar_name,
start_session=start_session,
end_session=end_session,
minutes_per_day=minutes_per_day,
ingest=f,
create_writers=create_writers,
)
return f

Expand Down Expand Up @@ -365,9 +361,20 @@ def ingest(name,
except KeyError:
raise UnknownBundle(name)

calendar = get_calendar(bundle.calendar_name)
start_session = bundle.start_session
end_session = bundle.start_session

if start_session is None or start_session < calendar.first_session:
start_session = calendar.first_session

if end_session is None or end_session > calendar.last_session:
end_session = calendar.last_session

if timestamp is None:
timestamp = pd.Timestamp.utcnow()
timestamp = timestamp.tz_convert('utc').tz_localize(None)

timestr = to_bundle_ingest_dirname(timestamp)
cachepath = cache_path(name, environ=environ)
pth.ensure_directory(pth.data_path([name, timestr], environ=environ))
Expand All @@ -387,9 +394,9 @@ def ingest(name,
)
daily_bar_writer = BcolzDailyBarWriter(
daily_bars_path,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
)
# Do an empty write to ensure that the daily ctables exist
# when we create the SQLiteAdjustmentWriter below. The
Expand All @@ -401,9 +408,9 @@ def ingest(name,
wd.ensure_dir(*minute_equity_relative(
name, timestr, environ=environ)
),
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
minutes_per_day=bundle.minutes_per_day,
)
assets_db_path = wd.getpath(*asset_db_relative(
Expand All @@ -416,7 +423,7 @@ def ingest(name,
wd.getpath(*adjustment_db_relative(
name, timestr, environ=environ)),
BcolzDailyBarReader(daily_bars_path),
bundle.calendar.all_sessions,
calendar.all_sessions,
overwrite=True,
)
)
Expand All @@ -435,9 +442,9 @@ def ingest(name,
minute_bar_writer,
daily_bar_writer,
adjustment_db_writer,
bundle.calendar,
bundle.start_session,
bundle.end_session,
calendar,
start_session,
end_session,
cache,
show_progress,
pth.data_path([name, timestr], environ=environ),
Expand Down Expand Up @@ -612,5 +619,5 @@ def should_clean(name):

bundles, register, unregister, ingest, load, clean = _make_bundle_core()

register_calendar("YAHOO", get_calendar("NYSE"))
register_calendar("QUANDL", get_calendar("NYSE"))
register_calendar_alias("YAHOO", "NYSE")
register_calendar_alias("QUANDL", "NYSE")
13 changes: 6 additions & 7 deletions zipline/data/loader.py
Expand Up @@ -45,10 +45,6 @@

ONE_HOUR = pd.Timedelta(hours=1)

nyse_cal = get_calendar('NYSE')
trading_day_nyse = nyse_cal.day
trading_days_nyse = nyse_cal.all_sessions


def last_modified_time(path):
"""
Expand Down Expand Up @@ -95,9 +91,7 @@ def has_data_for_dates(series_or_df, first_date, last_date):
return (first <= first_date) and (last >= last_date)


def load_market_data(trading_day=trading_day_nyse,
trading_days=trading_days_nyse,
bm_symbol='^GSPC'):
def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC'):
"""
Load benchmark returns and treasury yield curves for the given calendar and
benchmark symbol.
Expand Down Expand Up @@ -136,6 +130,11 @@ def load_market_data(trading_day=trading_day_nyse,
'1month', '3month', '6month',
'1year','2year','3year','5year','7year','10year','20year','30year'
"""
if trading_day is None:
trading_day = get_calendar('NYSE').trading_day
if trading_days is None:
trading_days = get_calendar('NYSE').all_sessions

first_date = trading_days[0]
now = pd.Timestamp.utcnow()

Expand Down
7 changes: 7 additions & 0 deletions zipline/errors.py
Expand Up @@ -680,6 +680,13 @@ class CalendarNameCollision(ZiplineError):
)


class CyclicCalendarAlias(ZiplineError):
"""
Raised when calendar aliases form a cycle.
"""
msg = "Cycle in calendar aliases: [{cycle}]"


class ScheduleFunctionWithoutCalendar(ZiplineError):
"""
Raised when schedule_function is called but there is not a calendar to be
Expand Down

0 comments on commit eb5c4d5

Please sign in to comment.