In [1]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import talib
import matplotlib.pyplot as plt
import numexpr as ne
import timeit
import warnings
from IPython import get_ipython
ipython = get_ipython()
# pip install line_profiler
%load_ext line_profiler
%matplotlib inline

In [2]:
import numba as nb
@nb.jit
def nancumlast_numba(arr, copy=True): # ffill in pandas parlance
    '''Numba decorator solution provided by shx2.'''
    if copy:
        out = arr.copy()
    else:
        out = arr
    for col_idx in range(out.shape[1]):
        for row_idx in range(1, out.shape[0]):
            if np.isnan(out[row_idx, col_idx]):
                out[row_idx, col_idx] = out[row_idx-1, col_idx]
    return out

def nancumlast_np1(arr):
    '''Solution provided by Divakar.'''
    mask = np.isnan(arr)
    idx = np.where(~mask, np.arange(mask.shape[0])[:,None],0)
    np.maximum.accumulate(idx, axis=0, out=idx)
    out = arr[idx, np.arange(idx.shape[1])[None, :]]
    return out

def nancumlast_np2(arr, axis=0, copy=True):
    def fillna1d(v, copy=True):
        if copy:
            v = v.copy()
        mask = np.isnan(v)
        idx = np.arange(len(v))
        mask[0] = False
        pos_valid = idx[~mask]
        pos_nan = idx[mask]
        pos = np.searchsorted(pos_valid, pos_nan, side='right')
        v[pos_nan] = v[pos_valid[pos-1]]
        return v
    return np.apply_along_axis(fillna1d,axis,arr, copy=copy)

In [3]:
def returns(x, periods=1, step=1):
    r = (x[periods::step]-x[:-periods:step,])/x[:-periods:step,]
    return r

def returns_codesplit(x, periods=1, step=1):
    b = x[:-periods:step]
    a = x[periods::step]
        
    r = (a-b) # implies copy
    r = r/b #inplace
    return r

def returns_inplace(x, periods=1, step=1, copy=True):
    if copy:
        x = x.copy()
    a = x[periods::step] # view
    a /= x[:-periods:step,] # in place computation
    a -= 1 # in place computation
    return a

def returns_fast(x, periods=1, step=1):
    r = (x[periods::step]-x[:-periods:step,])
    r /= x[:-periods:step,]
    return r

def returns_numexpr(x, periods=1, step=1):
    a = x[periods::step]
    b = x[:-periods:step]
    r = ne.evaluate('a/b-1')
    return r

In [4]:
def returns_fast_propagate(x, periods=1, step=1, propagate=True):
    b = x[:-periods:step].copy()
    a = x[periods::step]
    if propagate:
        mask = np.isnan(a)
        nancumlast_numba(b, copy=False)
        
    r = (a-b) # implies copy
    r /= b 
    
    if propagate:
        r[mask] = np.nan
    return r

def returns_fast_propagate_inplace(x, periods=1, step=1, propagate=True):
    r = x[:-periods:step].copy() # r=b
    a = x[periods::step]
    
    if propagate:
        mask = np.isnan(a)
        nancumlast_numba(r, copy=False)
     
    np.divide(a, r, out=r) # r=a/r
    r-=1 # r = r - 1
    
    if propagate:
        r[mask] = np.nan
    return r

def returns_fast_propagate_numexpr(x, periods=1, step=1, propagate=True):
    r = x[:-periods:step].copy() # r=b
    a = x[periods::step]
    
    if propagate:
        mask = np.isnan(a)
        nancumlast_numba(r, copy=False)
    
    ne.evaluate('a/r-1',out=r)
    
    if propagate:
        r[mask] = np.nan
    return r

In [5]:
# Generate prices
N = 1000
M = 30
p = 100 + np.cumsum(np.random.randn(N,M),axis=0)
p[[0,0,1,4,5,5,-2,-1,-1,1],[0,1,1,0,0,1,0,0,1,2]] = np.nan

fp = pd.DataFrame(p).ffill().values

In [6]:
rfpf = lambda x: returns_fast_propagate(x, propagate=False)
func_list = [returns, returns_codesplit, 
             returns_inplace,
             returns_fast, 
             returns_numexpr, 
             rfpf,
             returns_fast_propagate, returns_fast_propagate_inplace, 
             returns_fast_propagate_numexpr]

def eval_array_funcs(func_list, *args):
    prevr = None
    for f in func_list:
        r = f(*args)
        if prevr is not None:
            if not np.allclose(r,prevr,equal_nan=True):
                print('----------------------- output changed!!! ------------------')
        prevr = r
        
        t = timeit.Timer('f(*args)', globals=locals())
        n, time = t.autorange()
        print('{:>40}: {:.6f} ms'.format(f.__name__, 1000*time/n))
         
eval_array_funcs(func_list,p)

                                 returns: 0.164982 ms
                       returns_codesplit: 0.159429 ms
                         returns_inplace: 0.221371 ms
                            returns_fast: 0.166307 ms
                         returns_numexpr: 0.220305 ms
                                <lambda>: 0.193243 ms
----------------------- output changed!!! ------------------
                  returns_fast_propagate: 0.312725 ms
          returns_fast_propagate_inplace: 0.296076 ms
          returns_fast_propagate_numexpr: 0.376868 ms


In [7]:
# Where is time spent?
%lprun -f returns_fast_propagate_inplace returns_fast_propagate_inplace(p,propagate=True)