Skip to content

Commit

Permalink
Merge 9984d84 into 9fe8076
Browse files Browse the repository at this point in the history
  • Loading branch information
freddiev4 committed Jun 2, 2017
2 parents 9fe8076 + 9984d84 commit 346f4dc
Show file tree
Hide file tree
Showing 12 changed files with 6,190 additions and 6,922 deletions.
Binary file modified tests/resources/example_data.tar.gz
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/resources/rebuild_example_data
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import shutil
import tarfile

import click
import matplotlib
import numpy as np
import pandas as pd

Expand All @@ -14,6 +15,8 @@ from zipline.testing import test_resource_path, tmp_dir
from zipline.utils.cache import dataframe_cache


matplotlib.use('Agg')

INPUT_DATA_START_DATE = pd.Timestamp('2004-01-02')
INPUT_DATA_END_DATE = pd.Timestamp('2014-12-31')
INPUT_DATA_SYMBOLS = (
Expand Down
2 changes: 1 addition & 1 deletion tests/risk/test_risk_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def test_benchmarkrange(self):
def test_partial_month(self):

start_session = self.trading_calendar.minute_to_session_label(
pd.Timestamp("1991-01-01", tz='UTC')
pd.Timestamp("1993-02-01", tz='UTC')
)

# 1992 and 1996 were leap years
Expand Down
10 changes: 3 additions & 7 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@

from zipline import examples
from zipline.data.bundles import register, unregister
from zipline.testing import test_resource_path, copy_market_data
from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase, \
WithTradingEnvironment
from zipline.testing import test_resource_path
from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase
from zipline.testing.predicates import assert_equal
from zipline.utils.cache import dataframe_cache

Expand Down Expand Up @@ -54,10 +53,7 @@ def init_class_fixtures(cls):
serialization='pickle',
)

copy_market_data(WithTradingEnvironment.MARKET_DATA_DIR,
cls.tmpdir.getpath('example_data/root'))

@parameterized.expand(examples.EXAMPLE_MODULES)
@parameterized.expand(sorted(examples.EXAMPLE_MODULES))
def test_example(self, example_name):
actual_perf = examples.run_example(
example_name,
Expand Down
70 changes: 34 additions & 36 deletions zipline/data/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,50 +12,48 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd

from six.moves.urllib_parse import urlencode
import pandas_datareader.data as pd_reader


def format_yahoo_index_url(symbol, start_date, end_date):
def get_benchmark_returns(symbol, first_date, last_date):
"""
Format a URL for querying Yahoo Finance for Index data.
"""
return (
'https://ichart.finance.yahoo.com/table.csv?' + urlencode({
's': symbol,
# start_date month, zero indexed
'a': start_date.month - 1,
# start_date day
'b': start_date.day,
# start_date year
'c': start_date.year,
# end_date month, zero indexed
'd': end_date.month - 1,
# end_date day
'e': end_date.day,
# end_date year
'f': end_date.year,
# daily frequency
'g': 'd',
})
)
Get a Series of benchmark returns from Google associated with `symbol`.
Default is `SPY`.
Parameters
----------
symbol : str
Benchmark symbol for which we're getting the returns.
first_date : pd.Timestamp
First date for which we want to get data.
last_date : pd.Timestamp
Last date for which we want to get data.
def get_benchmark_returns(symbol, start_date, end_date):
"""
Get a Series of benchmark returns from Yahoo.
The furthest date that Google goes back to is 2001-06-26. It has missing
data for 2008-12-15, 2009-08-11, and 2012-02-02, so we add data for the
dates for which Google is missing data.
Returns a Series with returns from (start_date, end_date].
We're also limited to the last 4000 days worth of data.
start_date is **not** included because we need the close from day N - 1 to
first_date is **not** included because we need the close from day N - 1 to
compute the returns for day N.
"""
return pd.read_csv(
format_yahoo_index_url(symbol, start_date, end_date),
parse_dates=['Date'],
index_col='Date',
usecols=["Adj Close", "Date"],
squeeze=True, # squeeze tells pandas to make this a Series
# instead of a 1-column DataFrame
).sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
data = pd_reader.DataReader(
symbol,
'google',
first_date,
last_date
)

data = data['Close']

data[pd.Timestamp('2008-12-15')] = np.nan
data[pd.Timestamp('2009-08-11')] = np.nan
data[pd.Timestamp('2012-02-02')] = np.nan

data = data.fillna(method='ffill')

return data.sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
27 changes: 18 additions & 9 deletions zipline/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
)
from ..utils.deprecate import deprecated
from zipline.utils.calendars import get_calendar
from zipline.utils.paths import ensure_file

logger = logbook.Logger('Loader')

# Mapping from index symbol to appropriate bond data
INDEX_MAPPING = {
'^GSPC':
'SPY':
(treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'),
'^GSPTSE':
(treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'),
Expand All @@ -50,6 +51,7 @@ def last_modified_time(path):
"""
Get the last modified time of path as a Timestamp.
"""
ensure_file(path)
return pd.Timestamp(os.path.getmtime(path), unit='s', tz='UTC')


Expand Down Expand Up @@ -91,13 +93,13 @@ def has_data_for_dates(series_or_df, first_date, last_date):
return (first <= first_date) and (last >= last_date)


def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC',
def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
environ=None):
"""
Load benchmark returns and treasury yield curves for the given calendar and
benchmark symbol.
Benchmarks are downloaded as a Series from Yahoo Finance. Treasury curves
Benchmarks are downloaded as a Series from Google Finance. Treasury curves
are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov'
by default. For Canadian exchanges, a loader for Canadian bonds from the
Bank of Canada is also available.
Expand All @@ -115,7 +117,7 @@ def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC',
A calendar of trading days. Also used for determining what cached
dates we should expect to have cached. Defaults to the NYSE calendar.
bm_symbol : str, optional
Symbol for the benchmark index to load. Defaults to '^GSPC', the Yahoo
Symbol for the benchmark index to load. Defaults to 'SPY', the Google
ticker for the S&P 500.
Returns
Expand Down Expand Up @@ -215,7 +217,13 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day,

# If no cached data was found or it was missing any dates then download the
# necessary data.
logger.info('Downloading benchmark data for {symbol!r}.', symbol=symbol)
logger.info(
('Downloading benchmark data for {symbol!r} '
'from {first_date} to {last_date}'),
symbol=symbol,
first_date=first_date - trading_day,
last_date=last_date
)

try:
data = get_benchmark_returns(
Expand All @@ -225,7 +233,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day,
)
data.to_csv(get_data_filepath(filename, environ))
except (OSError, IOError, HTTPError):
logger.exception('failed to cache the new benchmark returns')
logger.exception('Failed to cache the new benchmark returns')
raise
if not has_data_for_dates(data, first_date, last_date):
logger.warn("Still don't have expected data after redownload!")
Expand Down Expand Up @@ -260,7 +268,7 @@ def ensure_treasury_data(symbol, first_date, last_date, now, environ=None):
path.
"""
loader_module, filename, source = INDEX_MAPPING.get(
symbol, INDEX_MAPPING['^GSPC'],
symbol, INDEX_MAPPING['SPY'],
)
first_date = max(first_date, loader_module.earliest_possible_date())

Expand Down Expand Up @@ -297,7 +305,8 @@ def _load_cached_data(filename, first_date, last_date, now, resource_name,
# yet, so don't try to read from 'path'.
if os.path.exists(path):
try:
data = from_csv(path).tz_localize('UTC')
data = from_csv(path)
data.index = data.index.to_datetime().tz_localize('UTC')
if has_data_for_dates(data, first_date, last_date):
return data

Expand Down Expand Up @@ -335,7 +344,7 @@ def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
"""Load closing prices from yahoo finance.
:Optional:
indexes : dict (Default: {'SPX': '^GSPC'})
indexes : dict (Default: {'SPX': '^SPY'})
Financial indexes to load.
stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
Expand Down
3 changes: 2 additions & 1 deletion zipline/data/treasuries.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ def get_treasury_data(start_date, end_date):
"&from=" # An unbounded query is ~2x faster than specifying dates.
"&to="
"&filetype=csv"
"&label=include"
"&layout=seriescolumn"
"&type=package",
skiprows=1, # First row is a useless header.
skiprows=5, # First 5 rows are useless headers.
parse_dates=['Time Period'],
na_values=['ND'], # Presumably this stands for "No Data".
index_col=0,
Expand Down
2 changes: 1 addition & 1 deletion zipline/finance/trading.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class TradingEnvironment(object):
def __init__(
self,
load=None,
bm_symbol='^GSPC',
bm_symbol='SPY',
exchange_tz="US/Eastern",
trading_calendar=None,
asset_db_path=':memory:',
Expand Down
Loading

0 comments on commit 346f4dc

Please sign in to comment.