Merge pull request #1358 from quantopian/smoothing

ENH: added smoothing to zipline
quantopian · Aug 2, 2016 · 129d16f · 129d16f
2 parents 7761c73 + 499703d
commit 129d16f
Show file tree

Hide file tree

Showing 4 changed files with 146 additions and 0 deletions.
diff --git a/docs/source/whatsnew/1.0.2.txt b/docs/source/whatsnew/1.0.2.txt
@@ -26,6 +26,11 @@ Enhancements
   :meth:`~zipline.pipeline.factors.Factor.top`, and
   :meth:`~zipline.pipeline.factors.Factor.bottom`. (:issue:`1349`).
 
+- Added new pipeline filters, :class:`~zipline.pipeline.filters.All` and
+  :class:`~zipline.pipeline.filters.Any`, which takes another filter and
+  returns True if an asset produced a True for any/all days in the previous
+  ``window_length`` days (:issue:`1358`).
+
 Bug Fixes
 ~~~~~~~~~
 

diff --git a/tests/pipeline/test_filter.py b/tests/pipeline/test_filter.py
@@ -30,6 +30,7 @@
 from zipline.pipeline import Filter, Factor, TermGraph
 from zipline.pipeline.classifiers import Classifier
 from zipline.pipeline.factors import CustomFactor
+from zipline.pipeline.filters import All, Any
 from zipline.testing import check_arrays, parameter_space, permute_rows
 from zipline.utils.numpy_utils import float64_dtype, int64_dtype
 from .base import BasePipelineTestCase, with_default_shape
@@ -395,6 +396,108 @@ def test_isfinite(self):
         )
         check_arrays(results['isfinite'], isfinite(data))
 
+    def test_all(self):
+
+        data = array([[1, 1, 1, 1, 1, 1],
+                      [0, 1, 1, 1, 1, 1],
+                      [1, 0, 1, 1, 1, 1],
+                      [1, 1, 0, 1, 1, 1],
+                      [1, 1, 1, 0, 1, 1],
+                      [1, 1, 1, 1, 0, 1],
+                      [1, 1, 1, 1, 1, 0]], dtype=bool)
+
+        # With a window_length of N, 0's should be "sticky" for the (N - 1)
+        # days after the 0 in the base data.
+
+        # Note that, the way ``self.run_graph`` works, we compute the same
+        # number of output rows for all inputs, so we only get the last 4
+        # outputs for expected_3 even though we have enought input data to
+        # compute 5 rows.
+        expected_3 = array([[0, 0, 0, 1, 1, 1],
+                            [1, 0, 0, 0, 1, 1],
+                            [1, 1, 0, 0, 0, 1],
+                            [1, 1, 1, 0, 0, 0]], dtype=bool)
+
+        expected_4 = array([[0, 0, 0, 1, 1, 1],
+                            [0, 0, 0, 0, 1, 1],
+                            [1, 0, 0, 0, 0, 1],
+                            [1, 1, 0, 0, 0, 0]], dtype=bool)
+
+        class Input(Filter):
+            inputs = ()
+            window_length = 0
+
+        results = self.run_graph(
+            TermGraph({
+                '3': All(inputs=[Input()], window_length=3),
+                '4': All(inputs=[Input()], window_length=4),
+            }),
+            initial_workspace={Input(): data},
+            mask=self.build_mask(ones(shape=data.shape)),
+        )
+
+        check_arrays(results['3'], expected_3)
+        check_arrays(results['4'], expected_4)
+
+    def test_any(self):
+
+        # FUN FACT: The inputs and outputs here are exactly the negation of
+        # the inputs and outputs for test_all above. This isn't a coincidence.
+        #
+        # By de Morgan's Laws, we have::
+        #
+        #     ~(a & b) == (~a | ~b)
+        #
+        # negating both sides, we have::
+        #
+        #      (a & b) == ~(a | ~b)
+        #
+        # Since all(a, b) is isomorphic to (a & b), and any(a, b) is isomorphic
+        # to (a | b), we have::
+        #
+        #     all(a, b) == ~(any(~a, ~b))
+        #
+        data = array([[0, 0, 0, 0, 0, 0],
+                      [1, 0, 0, 0, 0, 0],
+                      [0, 1, 0, 0, 0, 0],
+                      [0, 0, 1, 0, 0, 0],
+                      [0, 0, 0, 1, 0, 0],
+                      [0, 0, 0, 0, 1, 0],
+                      [0, 0, 0, 0, 0, 1]], dtype=bool)
+
+        # With a window_length of N, 1's should be "sticky" for the (N - 1)
+        # days after the 1 in the base data.
+
+        # Note that, the way ``self.run_graph`` works, we compute the same
+        # number of output rows for all inputs, so we only get the last 4
+        # outputs for expected_3 even though we have enought input data to
+        # compute 5 rows.
+        expected_3 = array([[1, 1, 1, 0, 0, 0],
+                            [0, 1, 1, 1, 0, 0],
+                            [0, 0, 1, 1, 1, 0],
+                            [0, 0, 0, 1, 1, 1]], dtype=bool)
+
+        expected_4 = array([[1, 1, 1, 0, 0, 0],
+                            [1, 1, 1, 1, 0, 0],
+                            [0, 1, 1, 1, 1, 0],
+                            [0, 0, 1, 1, 1, 1]], dtype=bool)
+
+        class Input(Filter):
+            inputs = ()
+            window_length = 0
+
+        results = self.run_graph(
+            TermGraph({
+                '3': Any(inputs=[Input()], window_length=3),
+                '4': Any(inputs=[Input()], window_length=4),
+            }),
+            initial_workspace={Input(): data},
+            mask=self.build_mask(ones(shape=data.shape)),
+        )
+
+        check_arrays(results['3'], expected_3)
+        check_arrays(results['4'], expected_4)
+
     @parameter_space(factor_len=[2, 3, 4])
     def test_window_safe(self, factor_len):
         # all true data set of (days, securities)

diff --git a/zipline/pipeline/filters/__init__.py b/zipline/pipeline/filters/__init__.py
@@ -9,8 +9,11 @@
     PercentileFilter,
     SingleAsset,
 )
+from .smoothing import All, Any
 
 __all__ = [
+    'All',
+    'Any',
     'ArrayPredicate',
     'CustomFilter',
     'Filter',

diff --git a/zipline/pipeline/filters/smoothing.py b/zipline/pipeline/filters/smoothing.py
@@ -0,0 +1,35 @@
+"""
+Filters that apply smoothing operations on other filters.
+
+These are generally useful for controlling/minimizing turnover on existing
+Filters.
+"""
+from .filter import CustomFilter
+
+
+class All(CustomFilter):
+    """
+    A Filter requiring that assets produce True for ``window_length``
+    consecutive days.
+
+    **Default Inputs:** None
+
+    **Default Window Length:** None
+    """
+
+    def compute(self, today, assets, out, arg):
+        out[:] = (arg.sum(axis=0) == self.window_length)
+
+
+class Any(CustomFilter):
+    """
+    A Filter requiring that assets produce True for at least one day in the
+    last ``window_length`` days.
+
+    **Default Inputs:** None
+
+    **Default Window Length:** None
+    """
+
+    def compute(self, today, assets, out, arg):
+        out[:] = (arg.sum(axis=0) > 0)