In [1]:
import sys
sys.path.append('..')

from pipelinetweak.lagged import val_to_roc, roc_to_val, RoC
import numpy as np

Generate some random variables $x>0$.
Assume `x` is a time series with 3 variables and 7 observations.
The oldest observation is in the first row.

In [2]:
np.random.seed(42)
x = np.random.normal(size=(7,3)) * 5 + 75
x

array([[77.48357077, 74.30867849, 78.23844269],
       [82.61514928, 73.82923313, 73.82931522],
       [82.89606408, 78.83717365, 72.65262807],
       [77.71280022, 72.68291154, 72.67135123],
       [76.20981136, 65.43359878, 66.37541084],
       [72.18856235, 69.9358444 , 76.57123666],
       [70.45987962, 67.93848149, 82.32824384]])

## To Rate of Change

The rate of change (RoC), or quotient between $x_t$ and the previous observation $x_{t-1}$ is

$$
r_t
= \frac{x_t}{x_{t-1}}
$$


In [3]:
ret = val_to_roc(x)
ret

array([[       nan,        nan,        nan],
       [1.06622796, 0.99354792, 0.943645  ],
       [1.00340028, 1.0678314 , 0.98406206],
       [0.93747274, 0.92193705, 1.00025771],
       [0.9806597 , 0.90026111, 0.9133642 ],
       [0.9472345 , 1.06880633, 1.15360848],
       [0.97605323, 0.97144007, 1.07518498]])

## Compounding
The relation between two observations is

$$
x_t = x_{t-1} \, r_t
$$

or for multiple time steps

$$
x_T = \prod_{t=1}^T x_{t-1} \, r_t
$$

The user must provide the intial values $x_0$.
You could use the first row `x[0,:]` from the original dataset.

In [4]:
roc_to_val(ret, initial=x[0,:])

array([[77.48357077, 74.30867849, 78.23844269],
       [82.61514928, 73.82923313, 73.82931522],
       [82.89606408, 78.83717365, 72.65262807],
       [77.71280022, 72.68291154, 72.67135123],
       [76.20981136, 65.43359878, 66.37541084],
       [72.18856235, 69.9358444 , 76.57123666],
       [70.45987962, 67.93848149, 82.32824384]])

## Index to 1
However, an initial value of 1 is often used to compare multiple time series.
This is called "indexed to 1" or "indexed to 100 percent".

In [5]:
roc_to_val(ret, initial=1)

array([[1.        , 1.        , 1.        ],
       [1.06622796, 0.99354792, 0.943645  ],
       [1.06985343, 1.06094167, 0.92860524],
       [1.00295843, 0.97812144, 0.92884455],
       [0.98356091, 0.88056469, 0.84837336],
       [0.93166282, 0.94115312, 0.97869071],
       [0.90935251, 0.91427385, 1.05227355]])

## sklearn API
* set `RoC(initial=value)` if the `inverse_transform` should always use certain initial values. You can temporarly overwrite the behavior with `inverse_transform(initial=othervalue)`
* if no initial values are specified, e.g. `RoC()`, then `RoC().fit(X)` will store the first row of `X` as initial values `X[0,:]`. Again, you can temporarily overwrite these inital values with `inverse_transform(initial=othervalue)`

transform, inverse_transform

* it assumed that `X` nor `Z` have any missing values, i.e. `NaN`
* suggest approaches: a) use "previous tick" interpolation before `transform`, b) impute `0.0` before `inverse_transform`


In [6]:
obj = RoC()
obj.fit(x)
z = obj.transform(x)
z

array([[1.06622796, 0.99354792, 0.943645  ],
       [1.00340028, 1.0678314 , 0.98406206],
       [0.93747274, 0.92193705, 1.00025771],
       [0.9806597 , 0.90026111, 0.9133642 ],
       [0.9472345 , 1.06880633, 1.15360848],
       [0.97605323, 0.97144007, 1.07518498]])

In [7]:
obj.inverse_transform(z)

array([[77.48357077, 74.30867849, 78.23844269],
       [82.61514928, 73.82923313, 73.82931522],
       [82.89606408, 78.83717365, 72.65262807],
       [77.71280022, 72.68291154, 72.67135123],
       [76.20981136, 65.43359878, 66.37541084],
       [72.18856235, 69.9358444 , 76.57123666],
       [70.45987962, 67.93848149, 82.32824384]])

In [8]:
obj.inverse_transform(z, initial=100)

array([[100.        , 100.        , 100.        ],
       [106.6227956 ,  99.35479223,  94.36449995],
       [106.98534316, 106.09416725,  92.86052428],
       [100.29584265,  97.8121439 ,  92.88445518],
       [ 98.35609098,  88.05646945,  84.83733642],
       [ 93.1662824 ,  94.11531172,  97.86907054],
       [ 90.93525108,  91.42738489, 105.22735501]])

## Pipeline example

In [9]:
from pipelinetweak.lagged import RoC
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline

pipe = Pipeline(steps=[
    ("feng", RoC()),
    ("model", Ridge())
])

m = pipe.fit(x[:,1:], x[1:,0])
yhat = m.predict(x[:,1:])
yhat

array([77.1186619 , 77.07684397, 77.0138636 , 77.14008947, 76.8196796 ,
       76.91312837])

In [10]:
obj = RoC(trimnan=True)
r = obj.fit_transform(x)
lm = Ridge()
lm.fit(r[:,1:], x[1:,0])
yhat = lm.predict(r[:,1:])
yhat

array([77.1186619 , 77.07684397, 77.0138636 , 77.14008947, 76.8196796 ,
       76.91312837])