Merge pull request #1603 from quantopian/randc-built-in-factors-twekas

Randc built in factors twekas
quantopian · Nov 28, 2016 · 37f5826 · 37f5826
2 parents 3363237 + ee230c8
commit 37f5826
Show file tree

Hide file tree

Showing 3 changed files with 136 additions and 59 deletions.
diff --git a/tests/pipeline/test_technical.py b/tests/pipeline/test_technical.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pandas as pd
 import talib
-from numpy.random import random_integers
+from numpy.random import RandomState
 
 from zipline.lib.adjusted_array import AdjustedArray
 from zipline.pipeline.data import USEquityPricing
@@ -421,25 +421,67 @@ def expected_ewma(self, data_df, window):
             .mean()
             .values[-1])
 
-    def test_MACD_window_length_generation(self):
-        signal_period = random_integers(1, 90)
-        fast_period = random_integers(signal_period+1, signal_period+100)
-        slow_period = random_integers(fast_period+1, fast_period+100)
+    @parameter_space(seed=range(5))
+    def test_MACD_window_length_generation(self, seed):
+        rng = RandomState(seed)
+
+        signal_period = rng.randint(1, 90)
+        fast_period = rng.randint(signal_period + 1, signal_period + 100)
+        slow_period = rng.randint(fast_period + 1, fast_period + 100)
         ewma = MovingAverageConvergenceDivergenceSignal(
             fast_period=fast_period,
             slow_period=slow_period,
             signal_period=signal_period,
         )
         assert_equal(
             ewma.window_length,
-            slow_period+signal_period-1,
+            slow_period + signal_period - 1,
+        )
+
+    def test_bad_inputs(self):
+        template = (
+            "MACDSignal() expected a value greater than or equal to 1"
+            " for argument %r, but got 0 instead."
+        )
+        with self.assertRaises(ValueError) as e:
+            MovingAverageConvergenceDivergenceSignal(fast_period=0)
+        self.assertEqual(template % 'fast_period', str(e.exception))
+
+        with self.assertRaises(ValueError) as e:
+            MovingAverageConvergenceDivergenceSignal(slow_period=0)
+        self.assertEqual(template % 'slow_period', str(e.exception))
+
+        with self.assertRaises(ValueError) as e:
+            MovingAverageConvergenceDivergenceSignal(signal_period=0)
+        self.assertEqual(template % 'signal_period', str(e.exception))
+
+        with self.assertRaises(ValueError) as e:
+            MovingAverageConvergenceDivergenceSignal(
+                fast_period=5,
+                slow_period=4,
+            )
+
+        expected = (
+            "'slow_period' must be greater than 'fast_period', but got\n"
+            "slow_period=4, fast_period=5"
         )
+        self.assertEqual(expected, str(e.exception))
+
+    @parameter_space(
+        seed=range(2),
+        fast_period=[3, 5],
+        slow_period=[8, 10],
+        signal_period=[3, 9],
+        __fail_fast=True,
+    )
+    def test_moving_average_convergence_divergence(self,
+                                                   seed,
+                                                   fast_period,
+                                                   slow_period,
+                                                   signal_period):
+        rng = RandomState(seed)
 
-    def test_moving_average_convergence_divergence(self):
         nassets = 3
-        fast_period = 3
-        slow_period = 8
-        signal_period = 2
 
         macd = MovingAverageConvergenceDivergenceSignal(
             fast_period=fast_period,
@@ -450,7 +492,7 @@ def test_moving_average_convergence_divergence(self):
         today = pd.Timestamp('2016', tz='utc')
         assets = pd.Index(np.arange(nassets))
         out = np.empty(shape=(nassets,), dtype=np.float64)
-        close = np.random.rand(macd.window_length, nassets)
+        close = rng.rand(macd.window_length, nassets)
 
         macd.compute(
             today,
@@ -465,14 +507,23 @@ def test_moving_average_convergence_divergence(self):
         close_df = pd.DataFrame(close)
         fast_ewma = self.expected_ewma(
             close_df,
-            fast_period)
+            fast_period,
+        )
         slow_ewma = self.expected_ewma(
             close_df,
-            slow_period)
-        expected_signal = self.expected_ewma(
-            fast_ewma-slow_ewma,
+            slow_period,
+        )
+        signal_ewma = self.expected_ewma(
+            fast_ewma - slow_ewma,
             signal_period
-        ).values[-1]
+        )
+
+        # Everything but the last row should be NaN.
+        self.assertTrue(signal_ewma.iloc[:-1].isnull().all().all())
+
+        # We're testing a single compute call, which we expect to be equivalent
+        # to the last row of the frame we calculated with pandas.
+        expected_signal = signal_ewma.values[-1]
 
         np.testing.assert_almost_equal(
             out,
@@ -492,7 +543,7 @@ def test_simple_volatility(self):
         nassets = 3
         ann_vol = AnnualizedVolatility()
         today = pd.Timestamp('2016', tz='utc')
-        assets = np.arange(nassets, dtype=np.float)
+        assets = np.arange(nassets, dtype=np.float64)
         returns = np.full((ann_vol.window_length, nassets),
                           0.004,
                           dtype=np.float64)
@@ -514,7 +565,7 @@ def test_volatility(self):
         nassets = 3
         ann_vol = AnnualizedVolatility()
         today = pd.Timestamp('2016', tz='utc')
-        assets = np.arange(nassets, dtype=np.float)
+        assets = np.arange(nassets, dtype=np.float64)
         returns = np.random.normal(loc=0.001,
                                    scale=0.01,
                                    size=(ann_vol.window_length, nassets))

diff --git a/zipline/pipeline/factors/technical.py b/zipline/pipeline/factors/technical.py
@@ -14,6 +14,7 @@
     dstack,
     exp,
     fmax,
+    full,
     inf,
     isnan,
     log,
@@ -25,8 +26,7 @@
 
 from zipline.pipeline.data import USEquityPricing
 from zipline.pipeline.mixins import SingleInputMixin
-from zipline.utils.numpy_utils import ignore_nanwarnings
-from zipline.utils.input_validation import expect_types
+from zipline.utils.input_validation import expect_bounded, expect_types
 from zipline.utils.math_utils import (
     nanargmax,
     nanargmin,
@@ -35,9 +35,12 @@
     nanstd,
     nansum,
     nanmin,
-    exponential_weights,
 )
-from zipline.utils.numpy_utils import rolling_window
+from zipline.utils.numpy_utils import (
+    float64_dtype,
+    ignore_nanwarnings,
+    rolling_window,
+)
 from .factor import CustomFactor
 
 
@@ -161,6 +164,28 @@ def compute(self, today, assets, out, close, volume):
         out[:] = nansum(close * volume, axis=0) / len(close)
 
 
+def exponential_weights(length, decay_rate):
+    """
+    Build a weight vector for an exponentially-weighted statistic.
+
+    The resulting ndarray is of the form::
+
+        [decay_rate ** length, ..., decay_rate ** 2, decay_rate]
+
+    Parameters
+    ----------
+    length : int
+        The length of the desired weight vector.
+    decay_rate : float
+        The rate at which entries in the weight vector increase or decrease.
+
+    Returns
+    -------
+    weights : ndarray[float64]
+    """
+    return full(length, decay_rate, float64_dtype) ** arange(length + 1, 1, -1)
+
+
 class _ExponentialWeightedFactor(SingleInputMixin, CustomFactor):
     """
     Base class for factors implementing exponential-weighted operations.
@@ -379,13 +404,13 @@ class LinearWeightedMovingAverage(CustomFactor, SingleInputMixin):
     ctx = ignore_nanwarnings()
 
     def compute(self, today, assets, out, data):
-        num_days = data.shape[0]
+        ndays = data.shape[0]
 
         # Initialize weights array
-        weights = arange(1, num_days + 1, dtype=float).reshape(num_days, 1)
+        weights = arange(1, ndays + 1, dtype=float64_dtype).reshape(ndays, 1)
 
         # Compute normalizer
-        normalizer = (num_days * (num_days + 1)) / 2
+        normalizer = (ndays * (ndays + 1)) / 2
 
         # Weight the data
         weighted_data = data * weights
@@ -684,40 +709,48 @@ class MovingAverageConvergenceDivergenceSignal(CustomFactor):
     trend in a stock's price.
 
     **Default Inputs:** :data:`zipline.pipeline.data.USEquityPricing.close`
-    **Default Window Length:** Window length is automatically calculated as the
-    sum of slow_period and signal_period.
 
     Parameters
     ----------
-    fast_period : int > 0
+    fast_period : int > 0, optional
         The window length for the "fast" EWMA. Default is 12.
-    slow_period : int > 0, > fast_period
+    slow_period : int > 0, > fast_period, optional
         The window length for the "slow" EWMA. Default is 26.
-    signal_period' : int > 0, < fast_period
+    signal_period' : int > 0, < fast_period, optional
         The window length for the signal line. Default is 9.
 
-    Returns
-    -------
-    The EWMA of the difference between "fast" EWMA and "slow" EWMA line using
-    `signal_period` as span.
+    Notes
+    -----
+    Unlike most pipeline expressions, this factor does not accept a
+    ``window_length`` parameter. ``window_length`` is inferred from
+    ``slow_period`` and ``signal_period``.
     """
-
-    inputs = [USEquityPricing.close]
+    inputs = (USEquityPricing.close,)
+    # We don't use the default form of `params` here because we want to
+    # dynamically calculate `window_length` from the period lengths in our
+    # __new__.
     params = ('fast_period', 'slow_period', 'signal_period')
 
+    @expect_bounded(
+        __funcname='MACDSignal',
+        fast_period=(1, None),  # These must all be >= 1.
+        slow_period=(1, None),
+        signal_period=(1, None),
+    )
     def __new__(cls,
                 fast_period=12,
                 slow_period=26,
                 signal_period=9,
                 *args,
                 **kwargs):
 
-        if signal_period <= 0:
-            raise ValueError("'signal_period' must be larger than 0.")
-        if slow_period <= fast_period or fast_period <= signal_period:
+        if slow_period <= fast_period:
             raise ValueError(
-                "'slow_period' must be larger than 'fast_period'."
-                "'fast_period' must be larger than 'signal_period'."
+                "'slow_period' must be greater than 'fast_period', but got\n"
+                "slow_period={slow}, fast_period={fast}".format(
+                    slow=slow_period,
+                    fast=fast_period,
+                )
             )
 
         return super(MovingAverageConvergenceDivergenceSignal, cls).__new__(
@@ -731,10 +764,11 @@ def __new__(cls,
 
     def _ewma(self, data, length):
         decay_rate = 1.0 - (2.0 / (1.0 + length))
-        return average(data,
-                       axis=1,
-                       weights=exponential_weights(length, decay_rate)
-                       )
+        return average(
+            data,
+            axis=1,
+            weights=exponential_weights(length, decay_rate)
+        )
 
     def compute(self, today, assets, out, close, fast_period, slow_period,
                 signal_period):
@@ -756,19 +790,19 @@ class AnnualizedVolatility(CustomFactor):
     https://en.wikipedia.org/wiki/Volatility_(finance)
 
     The degree of variation of a series over time as measured by the standard
-    deviation of returns.
+    deviation of daily returns.
 
     **Default Inputs:**
         :data:`zipline.pipeline.factors.Returns(window_length=2)`
 
     Parameters
     ----------
-    annualization_factor :
-        The number of time units per year. Defaults to average number of NYSE
-        trading days per year, 252.
+    annualization_factor : float, optional
+        The number of time units per year. Defaults is 252, the number of NYSE
+        trading days in a normal year.
     """
     inputs = [Returns(window_length=2)]
-    params = {'annualization_factor': 252}
+    params = {'annualization_factor': 252.0}
     window_length = 252
 
     def compute(self, today, assets, out, returns, annualization_factor):

diff --git a/zipline/utils/math_utils.py b/zipline/utils/math_utils.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import math
 
-from numpy import isnan, full, arange
+from numpy import isnan
 
 
 def tolerant_equals(a, b, atol=10e-7, rtol=10e-7, equal_nan=False):
@@ -77,11 +77,3 @@ def round_if_near_integer(a, epsilon=1e-4):
         return round(a)
     else:
         return a
-
-
-def exponential_weights(length, decay_rate):
-    """
-    Return weighting vector for an exponential moving statistic on `length`
-    rows with a decay rate of `decay_rate`.
-    """
-    return full(length, decay_rate, float) ** arange(length + 1, 1, -1)