Merge 9984d84 into 9fe8076

quantopian · Jun 2, 2017 · 346f4dc · 346f4dc
2 parents 9fe8076 + 9984d84
commit 346f4dc
Show file tree

Hide file tree

Showing 12 changed files with 6,190 additions and 6,922 deletions.
diff --git a/tests/resources/example_data.tar.gz b/tests/resources/example_data.tar.gz
diff --git a/tests/resources/rebuild_example_data b/tests/resources/rebuild_example_data
@@ -5,6 +5,7 @@ import shutil
 import tarfile
 
 import click
+import matplotlib
 import numpy as np
 import pandas as pd
 
@@ -14,6 +15,8 @@ from zipline.testing import test_resource_path, tmp_dir
 from zipline.utils.cache import dataframe_cache
 
 
+matplotlib.use('Agg')
+
 INPUT_DATA_START_DATE = pd.Timestamp('2004-01-02')
 INPUT_DATA_END_DATE = pd.Timestamp('2014-12-31')
 INPUT_DATA_SYMBOLS = (

diff --git a/tests/risk/test_risk_period.py b/tests/risk/test_risk_period.py
@@ -385,7 +385,7 @@ def test_benchmarkrange(self):
     def test_partial_month(self):
 
         start_session = self.trading_calendar.minute_to_session_label(
-            pd.Timestamp("1991-01-01", tz='UTC')
+            pd.Timestamp("1993-02-01", tz='UTC')
         )
 
         # 1992 and 1996 were leap years

diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -21,9 +21,8 @@
 
 from zipline import examples
 from zipline.data.bundles import register, unregister
-from zipline.testing import test_resource_path, copy_market_data
-from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase, \
-    WithTradingEnvironment
+from zipline.testing import test_resource_path
+from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase
 from zipline.testing.predicates import assert_equal
 from zipline.utils.cache import dataframe_cache
 
@@ -54,10 +53,7 @@ def init_class_fixtures(cls):
             serialization='pickle',
         )
 
-        copy_market_data(WithTradingEnvironment.MARKET_DATA_DIR,
-                         cls.tmpdir.getpath('example_data/root'))
-
-    @parameterized.expand(examples.EXAMPLE_MODULES)
+    @parameterized.expand(sorted(examples.EXAMPLE_MODULES))
     def test_example(self, example_name):
         actual_perf = examples.run_example(
             example_name,

diff --git a/zipline/data/benchmarks.py b/zipline/data/benchmarks.py
@@ -12,50 +12,48 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numpy as np
 import pandas as pd
 
-from six.moves.urllib_parse import urlencode
+import pandas_datareader.data as pd_reader
 
 
-def format_yahoo_index_url(symbol, start_date, end_date):
+def get_benchmark_returns(symbol, first_date, last_date):
     """
-    Format a URL for querying Yahoo Finance for Index data.
-    """
-    return (
-        'https://ichart.finance.yahoo.com/table.csv?' + urlencode({
-            's': symbol,
-            # start_date month, zero indexed
-            'a': start_date.month - 1,
-            # start_date day
-            'b': start_date.day,
-            # start_date year
-            'c': start_date.year,
-            # end_date month, zero indexed
-            'd': end_date.month - 1,
-            # end_date day
-            'e': end_date.day,
-            # end_date year
-            'f': end_date.year,
-            # daily frequency
-            'g': 'd',
-        })
-    )
+    Get a Series of benchmark returns from Google associated with `symbol`.
+    Default is `SPY`.
 
+    Parameters
+    ----------
+    symbol : str
+        Benchmark symbol for which we're getting the returns.
+    first_date : pd.Timestamp
+        First date for which we want to get data.
+    last_date : pd.Timestamp
+        Last date for which we want to get data.
 
-def get_benchmark_returns(symbol, start_date, end_date):
-    """
-    Get a Series of benchmark returns from Yahoo.
+    The furthest date that Google goes back to is 2001-06-26. It has missing
+    data for 2008-12-15, 2009-08-11, and 2012-02-02, so we add data for the
+    dates for which Google is missing data.
 
-    Returns a Series with returns from (start_date, end_date].
+    We're also limited to the last 4000 days worth of data.
 
-    start_date is **not** included because we need the close from day N - 1 to
+    first_date is **not** included because we need the close from day N - 1 to
     compute the returns for day N.
     """
-    return pd.read_csv(
-        format_yahoo_index_url(symbol, start_date, end_date),
-        parse_dates=['Date'],
-        index_col='Date',
-        usecols=["Adj Close", "Date"],
-        squeeze=True,  # squeeze tells pandas to make this a Series
-                       # instead of a 1-column DataFrame
-    ).sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
+    data = pd_reader.DataReader(
+        symbol,
+        'google',
+        first_date,
+        last_date
+    )
+
+    data = data['Close']
+
+    data[pd.Timestamp('2008-12-15')] = np.nan
+    data[pd.Timestamp('2009-08-11')] = np.nan
+    data[pd.Timestamp('2012-02-02')] = np.nan
+
+    data = data.fillna(method='ffill')
+
+    return data.sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
diff --git a/zipline/data/loader.py b/zipline/data/loader.py
@@ -30,12 +30,13 @@
 )
 from ..utils.deprecate import deprecated
 from zipline.utils.calendars import get_calendar
+from zipline.utils.paths import ensure_file
 
 logger = logbook.Logger('Loader')
 
 # Mapping from index symbol to appropriate bond data
 INDEX_MAPPING = {
-    '^GSPC':
+    'SPY':
     (treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'),
     '^GSPTSE':
     (treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'),
@@ -50,6 +51,7 @@ def last_modified_time(path):
     """
     Get the last modified time of path as a Timestamp.
     """
+    ensure_file(path)
     return pd.Timestamp(os.path.getmtime(path), unit='s', tz='UTC')
 
 
@@ -91,13 +93,13 @@ def has_data_for_dates(series_or_df, first_date, last_date):
     return (first <= first_date) and (last >= last_date)
 
 
-def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC',
+def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
                      environ=None):
     """
     Load benchmark returns and treasury yield curves for the given calendar and
     benchmark symbol.
 
-    Benchmarks are downloaded as a Series from Yahoo Finance.  Treasury curves
+    Benchmarks are downloaded as a Series from Google Finance.  Treasury curves
     are US Treasury Bond rates and are downloaded from 'www.federalreserve.gov'
     by default.  For Canadian exchanges, a loader for Canadian bonds from the
     Bank of Canada is also available.
@@ -115,7 +117,7 @@ def load_market_data(trading_day=None, trading_days=None, bm_symbol='^GSPC',
         A calendar of trading days.  Also used for determining what cached
         dates we should expect to have cached. Defaults to the NYSE calendar.
     bm_symbol : str, optional
-        Symbol for the benchmark index to load.  Defaults to '^GSPC', the Yahoo
+        Symbol for the benchmark index to load.  Defaults to 'SPY', the Google
         ticker for the S&P 500.
 
     Returns
@@ -215,7 +217,13 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day,
 
     # If no cached data was found or it was missing any dates then download the
     # necessary data.
-    logger.info('Downloading benchmark data for {symbol!r}.', symbol=symbol)
+    logger.info(
+        ('Downloading benchmark data for {symbol!r} '
+            'from {first_date} to {last_date}'),
+        symbol=symbol,
+        first_date=first_date - trading_day,
+        last_date=last_date
+    )
 
     try:
         data = get_benchmark_returns(
@@ -225,7 +233,7 @@ def ensure_benchmark_data(symbol, first_date, last_date, now, trading_day,
         )
         data.to_csv(get_data_filepath(filename, environ))
     except (OSError, IOError, HTTPError):
-        logger.exception('failed to cache the new benchmark returns')
+        logger.exception('Failed to cache the new benchmark returns')
         raise
     if not has_data_for_dates(data, first_date, last_date):
         logger.warn("Still don't have expected data after redownload!")
@@ -260,7 +268,7 @@ def ensure_treasury_data(symbol, first_date, last_date, now, environ=None):
     path.
     """
     loader_module, filename, source = INDEX_MAPPING.get(
-        symbol, INDEX_MAPPING['^GSPC'],
+        symbol, INDEX_MAPPING['SPY'],
     )
     first_date = max(first_date, loader_module.earliest_possible_date())
 
@@ -297,7 +305,8 @@ def _load_cached_data(filename, first_date, last_date, now, resource_name,
     # yet, so don't try to read from 'path'.
     if os.path.exists(path):
         try:
-            data = from_csv(path).tz_localize('UTC')
+            data = from_csv(path)
+            data.index = data.index.to_datetime().tz_localize('UTC')
             if has_data_for_dates(data, first_date, last_date):
                 return data
 
@@ -335,7 +344,7 @@ def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
     """Load closing prices from yahoo finance.
 
     :Optional:
-        indexes : dict (Default: {'SPX': '^GSPC'})
+        indexes : dict (Default: {'SPX': '^SPY'})
             Financial indexes to load.
         stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
                                  'XOM', 'AA', 'JNJ', 'PEP', 'KO'])

diff --git a/zipline/data/treasuries.py b/zipline/data/treasuries.py
@@ -67,9 +67,10 @@ def get_treasury_data(start_date, end_date):
         "&from="  # An unbounded query is ~2x faster than specifying dates.
         "&to="
         "&filetype=csv"
+        "&label=include"
         "&layout=seriescolumn"
         "&type=package",
-        skiprows=1,  # First row is a useless header.
+        skiprows=5,  # First 5 rows are useless headers.
         parse_dates=['Time Period'],
         na_values=['ND'],  # Presumably this stands for "No Data".
         index_col=0,

diff --git a/zipline/finance/trading.py b/zipline/finance/trading.py
@@ -78,7 +78,7 @@ class TradingEnvironment(object):
     def __init__(
         self,
         load=None,
-        bm_symbol='^GSPC',
+        bm_symbol='SPY',
         exchange_tz="US/Eastern",
         trading_calendar=None,
         asset_db_path=':memory:',