Merge 412870a into 4386895

quantopian · Oct 14, 2016 · a07cfcb · a07cfcb
2 parents 4386895 + 412870a
commit a07cfcb
Show file tree

Hide file tree

Showing 17 changed files with 742 additions and 49 deletions.
diff --git a/tests/pipeline/test_alias.py b/tests/pipeline/test_alias.py
@@ -0,0 +1,65 @@
+from nose.tools import nottest
+import numpy as np
+
+from zipline.testing.predicates import assert_equal
+from zipline.pipeline import Classifier, Factor, Filter
+from zipline.utils.numpy_utils import float64_dtype, int64_dtype
+
+from .base import BasePipelineTestCase
+
+
+@nottest
+class BaseAliasTestCase(BasePipelineTestCase):
+
+    def test_alias(self):
+        f = self.Term()
+        alias = f.alias('ayy lmao')
+
+        f_values = np.random.RandomState(5).randn(5, 5)
+
+        self.check_terms(
+            terms={
+                'f_alias': alias,
+            },
+            expected={
+                'f_alias': f_values,
+            },
+            initial_workspace={f: f_values},
+            mask=self.build_mask(np.ones((5, 5))),
+        )
+
+    def test_repr(self):
+        assert_equal(
+            repr(self.Term().alias('ayy lmao')),
+            "Aliased%s(Term(...), name='ayy lmao')" % (
+                self.Term.__base__.__name__,
+            ),
+        )
+
+    def test_short_repr(self):
+        for name in ('a', 'b'):
+            assert_equal(
+                self.Term().alias(name).short_repr(),
+                name,
+            )
+
+
+class TestFactorAlias(BaseAliasTestCase):
+    class Term(Factor):
+        dtype = float64_dtype
+        inputs = ()
+        window_length = 0
+
+
+class TestFilterAlias(BaseAliasTestCase):
+    class Term(Filter):
+        inputs = ()
+        window_length = 0
+
+
+class TestClassifierAlias(BaseAliasTestCase):
+    class Term(Classifier):
+        dtype = int64_dtype
+        inputs = ()
+        window_length = 0
+        missing_value = -1
diff --git a/tests/pipeline/test_classifier.py b/tests/pipeline/test_classifier.py
@@ -2,10 +2,13 @@
 from operator import or_
 
 import numpy as np
+import pandas as pd
 
 from zipline.lib.labelarray import LabelArray
 from zipline.pipeline import Classifier
 from zipline.testing import parameter_space
+from zipline.testing.fixtures import ZiplineTestCase
+from zipline.testing.predicates import assert_equal
 from zipline.utils.numpy_utils import (
     categorical_dtype,
     int64_dtype,
@@ -464,3 +467,81 @@ class C(Classifier):
             "TypeError(\"unhashable type: 'dict'\",)."
         )
         self.assertEqual(errmsg, expected)
+
+
+class TestPostProcessAndToWorkSpaceValue(ZiplineTestCase):
+    def test_reversability_categorical(self):
+        class F(Classifier):
+            inputs = ()
+            window_length = 0
+            dtype = categorical_dtype
+            missing_value = '<missing>'
+
+        f = F()
+        column_data = LabelArray(
+            np.array(
+                [['a', f.missing_value],
+                 ['b', f.missing_value],
+                 ['c', 'd']],
+            ),
+            missing_value=f.missing_value,
+        )
+
+        assert_equal(
+            f.postprocess(column_data.ravel()),
+            pd.Categorical(
+                ['a', f.missing_value, 'b', f.missing_value, 'c', 'd'],
+            ),
+        )
+
+        # only include the non-missing data
+        pipeline_output = pd.Series(
+            data=['a', 'b', 'c', 'd'],
+            index=pd.MultiIndex.from_arrays([
+                [pd.Timestamp('2014-01-01'),
+                 pd.Timestamp('2014-01-02'),
+                 pd.Timestamp('2014-01-03'),
+                 pd.Timestamp('2014-01-03')],
+                [0, 0, 0, 1],
+            ]),
+            dtype='category',
+        )
+
+        assert_equal(
+            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
+            column_data,
+        )
+
+    def test_reversability_int64(self):
+        class F(Classifier):
+            inputs = ()
+            window_length = 0
+            dtype = int64_dtype
+            missing_value = -1
+
+        f = F()
+        column_data = np.array(
+            [[0, f.missing_value],
+             [1, f.missing_value],
+             [2, 3]],
+        )
+
+        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())
+
+        # only include the non-missing data
+        pipeline_output = pd.Series(
+            data=[0, 1, 2, 3],
+            index=pd.MultiIndex.from_arrays([
+                [pd.Timestamp('2014-01-01'),
+                 pd.Timestamp('2014-01-02'),
+                 pd.Timestamp('2014-01-03'),
+                 pd.Timestamp('2014-01-03')],
+                [0, 0, 0, 1],
+            ]),
+            dtype=int64_dtype,
+        )
+
+        assert_equal(
+            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
+            column_data,
+        )
diff --git a/tests/pipeline/test_engine.py b/tests/pipeline/test_engine.py
@@ -14,6 +14,7 @@
     float32,
     float64,
     full,
+    full_like,
     log,
     nan,
     tile,
@@ -66,6 +67,7 @@
 from zipline.testing import (
     AssetID,
     AssetIDPlusDay,
+    ExplodingObject,
     check_arrays,
     make_alternating_boolean_array,
     make_cascading_boolean_array,
@@ -78,6 +80,7 @@
     WithTradingEnvironment,
     ZiplineTestCase,
 )
+from zipline.testing.predicates import assert_equal
 from zipline.utils.memoize import lazyval
 from zipline.utils.numpy_utils import bool_dtype, datetime64ns_dtype
 
@@ -163,14 +166,14 @@ def compute(self, today, assets, out, *inputs):
         out[:] = sum(inputs).sum(axis=0)
 
 
-class ConstantInputTestCase(WithTradingEnvironment, ZiplineTestCase):
+class WithConstantInputs(WithTradingEnvironment):
     asset_ids = ASSET_FINDER_EQUITY_SIDS = 1, 2, 3, 4
     START_DATE = Timestamp('2014-01-01', tz='utc')
     END_DATE = Timestamp('2014-03-01', tz='utc')
 
     @classmethod
     def init_class_fixtures(cls):
-        super(ConstantInputTestCase, cls).init_class_fixtures()
+        super(WithConstantInputs, cls).init_class_fixtures()
         cls.constants = {
             # Every day, assume every stock starts at 2, goes down to 1,
             # goes up to 4, and finishes at 3.
@@ -192,6 +195,8 @@ def init_class_fixtures(cls):
         )
         cls.assets = cls.asset_finder.retrieve_all(cls.asset_ids)
 
+
+class ConstantInputTestCase(WithConstantInputs, ZiplineTestCase):
     def test_bad_dates(self):
         loader = self.loader
         engine = SimplePipelineEngine(
@@ -1315,3 +1320,111 @@ def test_string_classifiers_produce_categoricals(self):
             columns=self.asset_finder.retrieve_all(self.asset_finder.sids),
         )
         assert_frame_equal(result.c.unstack(), expected_final_result)
+
+
+class PopulateInitialWorkspaceTestCase(WithConstantInputs, ZiplineTestCase):
+    def test_populate_default_workspace(self):
+        window_length = 5
+        column = USEquityPricing.low
+        base_term = column.latest
+        precomputed_term = (base_term + 1).alias('precomputed_term')
+        precomputed_term_with_window = SimpleMovingAverage(
+            inputs=(column,),
+            window_length=window_length,
+        ).alias('precomputed_term_with_window')
+        depends_on_precomputed_term = precomputed_term + 1
+        depends_on_precomputed_term_with_window = (
+            precomputed_term_with_window + 1
+        )
+        column_value = self.constants[column]
+        precomputed_term_value = -column_value
+        precomputed_term_with_window_value = -(column_value + 1)
+
+        def populate_initial_workspace(initial_workspace,
+                                       root_mask_term,
+                                       execution_plan,
+                                       dates,
+                                       assets):
+            ws = initial_workspace.copy()
+            _, precomputed_term_dates = execution_plan.mask_and_dates_for_term(
+                precomputed_term,
+                root_mask_term,
+                initial_workspace,
+                dates,
+            )
+            ws[precomputed_term] = full(
+                (len(precomputed_term_dates), len(assets)),
+                precomputed_term_value,
+                dtype=float64,
+            )
+            (
+                _,
+                precomputed_term_with_window_dates,
+            ) = execution_plan.mask_and_dates_for_term(
+                precomputed_term,
+                root_mask_term,
+                initial_workspace,
+                dates,
+            )
+
+            ws[precomputed_term_with_window] = full(
+                (len(precomputed_term_with_window_dates), len(assets)),
+                precomputed_term_with_window_value,
+                dtype=float64,
+            )
+            return ws
+
+        def dispatcher(c):
+            if c is column:
+                # the base_term should never be loaded, its initial refcount
+                # should be zero
+                return ExplodingObject()
+            return self.loader
+
+        engine = SimplePipelineEngine(
+            dispatcher,
+            self.dates,
+            self.asset_finder,
+            populate_initial_workspace=populate_initial_workspace,
+        )
+
+        results = engine.run_pipeline(
+            Pipeline({
+                'precomputed_term': precomputed_term,
+                'precomputed_term_with_window': precomputed_term_with_window,
+                'depends_on_precomputed_term': depends_on_precomputed_term,
+                'depends_on_precomputed_term_with_window':
+                    depends_on_precomputed_term_with_window,
+            }),
+            self.dates[window_length - 1],
+            self.dates[-1],
+        )
+
+        assert_equal(
+            results['precomputed_term'].values,
+            full_like(
+                results['precomputed_term'],
+                precomputed_term_value,
+            ),
+        ),
+        assert_equal(
+            results['precomputed_term_with_window'].values,
+            full_like(
+                results['precomputed_term_with_window'],
+                precomputed_term_with_window_value,
+            ),
+        ),
+        assert_equal(
+            results['depends_on_precomputed_term'].values,
+            full_like(
+                results['depends_on_precomputed_term'],
+                precomputed_term_value + 1,
+            ),
+        )
+        assert_equal(
+            results['depends_on_precomputed_term_with_window'].values,
+            full_like(
+                results['depends_on_precomputed_term_with_window'],
+                precomputed_term_with_window_value + 1,
+            ),
+        )
diff --git a/tests/pipeline/test_factor.py b/tests/pipeline/test_factor.py
@@ -21,6 +21,7 @@
     where,
 )
 from numpy.random import randn, seed
+import pandas as pd
 
 from zipline.errors import UnknownRankMethod
 from zipline.lib.labelarray import LabelArray
@@ -37,6 +38,8 @@
     parameter_space,
     permute_rows,
 )
+from zipline.testing.fixtures import ZiplineTestCase
+from zipline.testing.predicates import assert_equal
 from zipline.utils.numpy_utils import (
     categorical_dtype,
     datetime64ns_dtype,
@@ -1058,3 +1061,39 @@ def test_demean_is_window_safe_if_input_is_window_safe(self):
         self.assertFalse(F().demean().window_safe)
         self.assertFalse(F(window_safe=False).demean().window_safe)
         self.assertTrue(F(window_safe=True).demean().window_safe)
+
+
+class TestPostProcessAndToWorkSpaceValue(ZiplineTestCase):
+    @parameter_space(dtype_=(float64_dtype, datetime64ns_dtype))
+    def test_reversability(self, dtype_):
+        class F(Factor):
+            inputs = ()
+            dtype = dtype_
+            window_length = 0
+
+        f = F()
+        column_data = array(
+            [[0, f.missing_value],
+             [1, f.missing_value],
+             [2, 3]],
+            dtype=dtype_,
+        )
+
+        assert_equal(f.postprocess(column_data.ravel()), column_data.ravel())
+
+        # only include the non-missing data
+        pipeline_output = pd.Series(
+            data=array([0, 1, 2, 3], dtype=dtype_),
+            index=pd.MultiIndex.from_arrays([
+                [pd.Timestamp('2014-01-01'),
+                 pd.Timestamp('2014-01-02'),
+                 pd.Timestamp('2014-01-03'),
+                 pd.Timestamp('2014-01-03')],
+                [0, 0, 0, 1],
+            ]),
+        )
+
+        assert_equal(
+            f.to_workspace_value(pipeline_output, pd.Index([0, 1])),
+            column_data,
+        )