Merge 1da88f8 into 9c90e66

quantopian · Apr 20, 2020 · ee30d5f · ee30d5f
2 parents 9c90e66 + 1da88f8
commit ee30d5f
Show file tree

Hide file tree

Showing 16 changed files with 568 additions and 331 deletions.
diff --git a/README.rst b/README.rst
@@ -99,7 +99,7 @@ Once you have your key, run the following from the command line:
 .. code:: bash
 
     $ QUANDL_API_KEY=<yourkey> zipline ingest -b quandl
-    $ zipline run -f dual_moving_average.py --start 2014-1-1 --end 2018-1-1 -o dma.pickle
+    $ zipline run -f dual_moving_average.py --start 2014-1-1 --end 2018-1-1 -o dma.pickle --no-benchmark
 
 This will download asset pricing data data from `quandl`, and stream it through the algorithm
 over the specified time range. Then, the resulting performance DataFrame is saved in `dma.pickle`, which you

diff --git a/docs/source/beginner-tutorial.rst b/docs/source/beginner-tutorial.rst
@@ -157,13 +157,27 @@ on OSX):
                                    The calendar you want to use e.g. LSE. NYSE
                                    is the default.
    --print-algo / --no-print-algo  Print the algorithm to stdout.
+   --benchmark-file                The csv file that contains the benchmark
+                                   returns (date, returns columns)
+   --benchmark-symbol              The instrument's symbol to be used as
+                                   a benchmark.
+                                   (should exist in the ingested bundle)
+   --benchmark-sid                 The sid of the instrument to be used as a
+                                   benchmark.
+                                   (should exist in the ingested bundle)
+   --no-benchmark                  This flag is used to set the benchmark to
+                                   zero. Alpha, beta and benchmark metrics
+                                   are not calculated
    --help                          Show this message and exit.
 
 As you can see there are a couple of flags that specify where to find your
 algorithm (``-f``) as well as parameters specifying which data to use,
 defaulting to ``quandl``. There are also arguments for
-the date range to run the algorithm over (``--start`` and ``--end``). Finally,
-you'll want to save the performance metrics of your algorithm so that you can
+the date range to run the algorithm over (``--start`` and ``--end``).To use a
+benchmark, you need to choose one of the benchmark options listed before. You can
+always use the option (``--no-benchmark``) that uses zero returns as a benchmark (
+alpha, beta and benchmark metrics are not calculated in this case).
+Finally, you'll want to save the performance metrics of your algorithm so that you can
 analyze how it performed. This is done via the ``--output`` flag and will cause
 it to write the performance ``DataFrame`` in the pickle Python file format.
 Note that you can also define a configuration file with these parameters that
@@ -177,13 +191,12 @@ Thus, to execute our algorithm from above and save the results to
 .. code-block:: python
 
     zipline run -f ../../zipline/examples/buyapple.py --start 2016-1-1 --end 2018-1-1 -o buyapple_out.pickle
+        --no-benchmark
 
 
 .. parsed-literal::
 
     AAPL
-    [2018-01-03 04:30:50.150039] WARNING: Loader: Refusing to download new benchmark data because a download succeeded at 2018-01-03 04:01:34+00:00.
-    [2018-01-03 04:30:50.191479] WARNING: Loader: Refusing to download new treasury data because a download succeeded at 2018-01-03 04:01:35+00:00.
     [2018-01-03 04:30:51.843465] INFO: Performance: Simulated 503 trading days out of 503.
     [2018-01-03 04:30:51.843598] INFO: Performance: first open: 2016-01-04 14:31:00+00:00
     [2018-01-03 04:30:51.843672] INFO: Performance: last close: 2017-12-29 21:00:00+00:00

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logbook
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_series_equal
@@ -23,17 +24,24 @@
     InvalidBenchmarkAsset)
 
 from zipline.sources.benchmark_source import BenchmarkSource
+from zipline.utils.run_algo import BenchmarkSpec
+
 from zipline.testing import (
     MockDailyBarReader,
     create_minute_bar_data,
+    parameter_space,
     tmp_bcolz_equity_minute_bar_reader,
 )
+from zipline.testing.predicates import assert_equal
 from zipline.testing.fixtures import (
+    WithAssetFinder,
     WithDataPortal,
     WithSimParams,
+    WithTmpDir,
     WithTradingCalendars,
     ZiplineTestCase,
 )
+from zipline.testing.core import make_test_handler
 
 
 class TestBenchmark(WithDataPortal, WithSimParams, WithTradingCalendars,
@@ -230,3 +238,176 @@ def test_no_stock_dividends_allowed(self):
                          "00:00:00.  Choose another asset to use as the "
                          "benchmark.",
                          exc.exception.message)
+
+
+class BenchmarkSpecTestCase(WithTmpDir,
+                            WithAssetFinder,
+                            ZiplineTestCase):
+
+    @classmethod
+    def init_class_fixtures(cls):
+        super(BenchmarkSpecTestCase, cls).init_class_fixtures()
+
+        zero_returns_index = pd.date_range(
+            cls.START_DATE,
+            cls.END_DATE,
+            freq='D',
+            tz='utc',
+        )
+        cls.zero_returns = pd.Series(index=zero_returns_index, data=0.0)
+
+    def init_instance_fixtures(self):
+        super(BenchmarkSpecTestCase, self).init_instance_fixtures()
+        self.log_handler = self.enter_instance_context(make_test_handler(self))
+
+    @classmethod
+    def make_equity_info(cls):
+        return pd.DataFrame.from_dict(
+            {
+                1: {
+                    'symbol': 'A',
+                    'start_date': cls.START_DATE,
+                    'end_date': cls.END_DATE + pd.Timedelta(days=1),
+                    "exchange": "TEST",
+                },
+                2: {
+                    'symbol': 'B',
+                    'start_date': cls.START_DATE,
+                    'end_date': cls.END_DATE + pd.Timedelta(days=1),
+                    "exchange": "TEST",
+                }
+            },
+            orient='index',
+        )
+
+    def logs_at_level(self, level):
+        return [
+            r.message for r in self.log_handler.records if r.level == level
+        ]
+
+    def resolve_spec(self, spec):
+        return spec.resolve(self.asset_finder, self.START_DATE, self.END_DATE)
+
+    def test_no_benchmark(self):
+        """Test running with no benchmark provided.
+
+        We should have no benchmark sid and have a returns series of all zeros.
+        """
+        spec = BenchmarkSpec.from_cli_params(
+            no_benchmark=False,
+            benchmark_sid=None,
+            benchmark_symbol=None,
+            benchmark_file=None,
+        )
+
+        sid, returns = self.resolve_spec(spec)
+
+        self.assertIs(sid, None)
+        self.assertIs(returns, None)
+
+        warnings = self.logs_at_level(logbook.WARNING)
+        expected = [
+            'No benchmark configured. Assuming algorithm calls set_benchmark.',
+            'Pass --benchmark-sid, --benchmark-symbol, or --benchmark-file to set a source of benchmark returns.',  # noqa
+            "Pass --no-benchmark to use a dummy benchmark of zero returns.",
+        ]
+        assert_equal(warnings, expected)
+
+    def test_no_benchmark_explicitly_disabled(self):
+        """Test running with no benchmark provided, with no_benchmark flag.
+        """
+        spec = BenchmarkSpec.from_cli_params(
+            no_benchmark=True,
+            benchmark_sid=None,
+            benchmark_symbol=None,
+            benchmark_file=None,
+        )
+
+        sid, returns = self.resolve_spec(spec)
+
+        self.assertIs(sid, None)
+        assert_series_equal(returns, self.zero_returns)
+
+        warnings = self.logs_at_level(logbook.WARNING)
+        expected = []
+        assert_equal(warnings, expected)
+
+    @parameter_space(case=[('A', 1), ('B', 2)])
+    def test_benchmark_symbol(self, case):
+        """Test running with no benchmark provided, with no_benchmark flag.
+        """
+        symbol, expected_sid = case
+
+        spec = BenchmarkSpec.from_cli_params(
+            no_benchmark=False,
+            benchmark_sid=None,
+            benchmark_symbol=symbol,
+            benchmark_file=None,
+        )
+
+        sid, returns = self.resolve_spec(spec)
+
+        assert_equal(sid, expected_sid)
+        self.assertIs(returns, None)
+
+        warnings = self.logs_at_level(logbook.WARNING)
+        expected = []
+        assert_equal(warnings, expected)
+
+    @parameter_space(input_sid=[1, 2])
+    def test_benchmark_sid(self, input_sid):
+        """Test running with no benchmark provided, with no_benchmark flag.
+        """
+        spec = BenchmarkSpec.from_cli_params(
+            no_benchmark=False,
+            benchmark_sid=input_sid,
+            benchmark_symbol=None,
+            benchmark_file=None,
+        )
+
+        sid, returns = self.resolve_spec(spec)
+
+        assert_equal(sid, input_sid)
+        self.assertIs(returns, None)
+
+        warnings = self.logs_at_level(logbook.WARNING)
+        expected = []
+        assert_equal(warnings, expected)
+
+    def test_benchmark_file(self):
+        """Test running with a benchmark file.
+        """
+        csv_file_path = self.tmpdir.getpath('b.csv')
+        with open(csv_file_path, 'w') as csv_file:
+            csv_file.write("date,return\n"
+                           "2020-01-03 00:00:00+00:00,-0.1\n"
+                           "2020-01-06 00:00:00+00:00,0.333\n"
+                           "2020-01-07 00:00:00+00:00,0.167\n"
+                           "2020-01-08 00:00:00+00:00,0.143\n"
+                           "2020-01-09 00:00:00+00:00,6.375\n")
+
+        spec = BenchmarkSpec.from_cli_params(
+            no_benchmark=False,
+            benchmark_sid=None,
+            benchmark_symbol=None,
+            benchmark_file=csv_file_path,
+        )
+
+        sid, returns = self.resolve_spec(spec)
+
+        self.assertIs(sid, None)
+
+        expected_dates = pd.to_datetime(
+            ['2020-01-03', '2020-01-06', '2020-01-07',
+             '2020-01-08', '2020-01-09'],
+            utc=True,
+        )
+        expected_values = [-0.1, 0.333, 0.167, 0.143, 6.375]
+        expected_returns = pd.Series(index=expected_dates,
+                                     data=expected_values)
+
+        assert_series_equal(returns, expected_returns, check_names=False)
+
+        warnings = self.logs_at_level(logbook.WARNING)
+        expected = []
+        assert_equal(warnings, expected)
diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py
@@ -1,6 +1,9 @@
+import mock
+
 import zipline.__main__ as main
 import zipline
 from zipline.testing import ZiplineTestCase
+from zipline.testing.fixtures import WithTmpDir
 from zipline.testing.predicates import (
     assert_equal,
     assert_raises_str,
@@ -13,7 +16,7 @@
 )
 
 
-class CmdLineTestCase(ZiplineTestCase):
+class CmdLineTestCase(WithTmpDir, ZiplineTestCase):
 
     def init_instance_fixtures(self):
         super(CmdLineTestCase, self).init_instance_fixtures()
@@ -136,3 +139,83 @@ def test_user_input(self):
         assert_equal(zipline.extension_args.second.b.a, 'blah5')
         assert_equal(zipline.extension_args.a1, 'value1')
         assert_equal(zipline.extension_args.b_, 'value2')
+
+    def test_benchmark_argument_handling(self):
+        runner = CliRunner()
+
+        # CLI validates that the algo file exists, so create an empty file.
+        algo_path = self.tmpdir.getpath('dummy_algo.py')
+        with open(algo_path, 'w'):
+            pass
+
+        def run_and_get_benchmark_spec(benchmark_args):
+            """
+            Run the cli, mocking out `main._run`, and return the benchmark_spec
+            passed to _run..
+            """
+            args = [
+                '--no-default-extension',
+                'run',
+                '-s', '2014-01-02',
+                '-e 2015-01-02',
+                '--algofile', algo_path,
+            ] + benchmark_args
+
+            mock_spec = mock.create_autospec(main._run)
+
+            with mock.patch.object(main, '_run', spec=mock_spec) as mock_run:
+                result = runner.invoke(main.main, args, catch_exceptions=False)
+
+            if result.exit_code != 0:
+                raise AssertionError(
+                    "Cli run failed with {exc}\n\n"
+                    "Output was:\n\n"
+                    "{output}".format(exc=result.exception,
+                                      output=result.output),
+                )
+
+            mock_run.assert_called_once()
+
+            return mock_run.call_args[1]['benchmark_spec']
+
+        spec = run_and_get_benchmark_spec([])
+        assert_equal(spec.benchmark_returns, None)
+        assert_equal(spec.benchmark_file, None)
+        assert_equal(spec.benchmark_sid, None)
+        assert_equal(spec.benchmark_symbol, None)
+        assert_equal(spec.no_benchmark, False)
+
+        spec = run_and_get_benchmark_spec(['--no-benchmark'])
+        assert_equal(spec.benchmark_returns, None)
+        assert_equal(spec.benchmark_file, None)
+        assert_equal(spec.benchmark_sid, None)
+        assert_equal(spec.benchmark_symbol, None)
+        assert_equal(spec.no_benchmark, True)
+
+        for symbol in 'AAPL', 'SPY':
+            spec = run_and_get_benchmark_spec(['--benchmark-symbol', symbol])
+            assert_equal(spec.benchmark_returns, None)
+            assert_equal(spec.benchmark_file, None)
+            assert_equal(spec.benchmark_sid, None)
+            assert_equal(spec.benchmark_symbol, symbol)
+            assert_equal(spec.no_benchmark, False)
+
+        for sid in 2, 3:
+            spec = run_and_get_benchmark_spec(['--benchmark-sid', str(sid)])
+            assert_equal(spec.benchmark_returns, None)
+            assert_equal(spec.benchmark_file, None)
+            assert_equal(spec.benchmark_sid, sid)
+            assert_equal(spec.benchmark_symbol, None)
+            assert_equal(spec.no_benchmark, False)
+
+        # CLI also validates the returns file exists.
+        bm_path = self.tmpdir.getpath('returns.csv')
+        with open(bm_path, 'w'):
+            pass
+
+        spec = run_and_get_benchmark_spec(['--benchmark-file', bm_path])
+        assert_equal(spec.benchmark_returns, None)
+        assert_equal(spec.benchmark_file, bm_path)
+        assert_equal(spec.benchmark_sid, None)
+        assert_equal(spec.benchmark_symbol, None)
+        assert_equal(spec.no_benchmark, False)
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -26,10 +26,10 @@
 from zipline.testing.fixtures import (
     WithTmpDir,
     ZiplineTestCase,
+    read_checked_in_benchmark_data,
 )
 from zipline.testing.predicates import assert_equal
 from zipline.utils.cache import dataframe_cache
-from zipline.utils.paths import update_modified_time
 
 
 # Otherwise the next line sometimes complains about being run too late.
@@ -61,9 +61,7 @@ def init_class_fixtures(cls):
             serialization='pickle',
         )
 
-        update_modified_time(
-            cls.tmpdir.getpath('example_data/root/data/SPY_benchmark.csv'),
-        )
+        cls.benchmark_returns = read_checked_in_benchmark_data()
 
     @parameterized.expand(sorted(EXAMPLE_MODULES))
     def test_example(self, example_name):
@@ -75,6 +73,7 @@ def test_example(self, example_name):
             environ={
                 'ZIPLINE_ROOT': self.tmpdir.getpath('example_data/root'),
             },
+            benchmark_returns=self.benchmark_returns,
         )
         expected_perf = self.expected_perf[example_name]
         # Exclude positions column as the positions do not always have the