Skip to content

Commit

Permalink
Use broadcasting to remove shared code
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichalowicz committed Jul 19, 2016
1 parent 790e0d8 commit c9058e8
Showing 1 changed file with 22 additions and 27 deletions.
49 changes: 22 additions & 27 deletions zipline/pipeline/factors/statistical.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

from numpy import broadcast_arrays
from scipy.stats import (
linregress,
pearsonr,
Expand Down Expand Up @@ -73,15 +74,13 @@ class RollingPearson(_RollingCorrelation):
instance of this class.
"""
def compute(self, today, assets, out, base_data, target_data):
if target_data.shape[1] > 1:
# Both inputs are 2D, so compute sid-by-sid.
for i in range(len(out)):
out[i] = pearsonr(base_data[:, i], target_data[:, i])[0]
else:
# Second input is a slice, so always compute with its only column.
slice_data = target_data[:, 0]
for i in range(len(out)):
out[i] = pearsonr(base_data[:, i], slice_data)[0]
# If `target_data` is a Slice or single column of data, broadcast it
# out to the same shape as `base_data`, then compute column-wise. This
# is efficient because each column of the broadcasted array only refers
# to a single memory location.
target_data = broadcast_arrays(target_data, base_data)[0]
for i in range(len(out)):
out[i] = pearsonr(base_data[:, i], target_data[:, i])[0]


class RollingSpearman(_RollingCorrelation):
Expand Down Expand Up @@ -119,15 +118,13 @@ class RollingSpearman(_RollingCorrelation):
instance of this class.
"""
def compute(self, today, assets, out, base_data, target_data):
if target_data.shape[1] > 1:
# Both inputs are 2D, so compute sid-by-sid.
for i in range(len(out)):
out[i] = spearmanr(base_data[:, i], target_data[:, i])[0]
else:
# Second input is a slice, so always compute with its only column.
slice_data = target_data[:, 0]
for i in range(len(out)):
out[i] = spearmanr(base_data[:, i], slice_data)[0]
# If `target_data` is a Slice or single column of data, broadcast it
# out to the same shape as `base_data`, then compute column-wise. This
# is efficient because each column of the broadcasted array only refers
# to a single memory location.
target_data = broadcast_arrays(target_data, base_data)[0]
for i in range(len(out)):
out[i] = spearmanr(base_data[:, i], target_data[:, i])[0]


class RollingLinearRegression(CustomFactor, SingleInputMixin):
Expand Down Expand Up @@ -201,15 +198,13 @@ def regress(y, x):
p_value[i] = regr_results[3]
stderr[i] = regr_results[4]

if independent.shape[1] > 1:
# Both inputs are 2D, so compute sid-by-sid.
for i in range(len(out)):
regress(y=dependent[:, i], x=independent[:, i])
else:
# Second input is a slice, so always compute with its only column.
slice_data = independent[:, 0]
for i in range(len(out)):
regress(y=dependent[:, i], x=slice_data)
# If `independent` is a Slice or single column of data, broadcast it
# out to the same shape as `dependent`, then compute column-wise. This
# is efficient because each column of the broadcasted array only refers
# to a single memory location.
independent = broadcast_arrays(independent, dependent)[0]
for i in range(len(out)):
regress(y=dependent[:, i], x=independent[:, i])


class RollingPearsonOfReturns(RollingPearson):
Expand Down

0 comments on commit c9058e8

Please sign in to comment.