## Example for VOM piplines

In [2]:
import numpy as np
import pandas as pd

Create data

In [3]:
cycles = 2 # how many sine cycles
resolution = 25 # how many datapoints to generate

length = np.pi * 2 * cycles
my_wave = np.sin(np.arange(0, length, length / resolution)) + 0.25  # move the wave up by 0.25
wavedf = pd.DataFrame(my_wave).reset_index()
wavedf.columns=['time', 'vib']

In [4]:
# Check format
wavedf.head(3)

Unnamed: 0,time,vib
0,0,0.25
1,1,0.731754
2,2,1.094328


In [5]:
# Check median
wavedf['vib'].median()

0.25

# Set up custom transformer and estimator

In [6]:
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

class fpTransform(BaseEstimator, TransformerMixin):
    def fit(self, X, y):
        self.params = y.median()  #imaging we're finding the curve fit instead of simple median
        return self

    def transform(self, X, y=None):
        X['pred'] = self.params['vib']  #create another column 'pred' in X. Store calculated median in this new column
        return X

class residualRegressor(BaseEstimator, RegressorMixin):
     def __init__(self):
         self.model = RandomForestRegressor()

     def fit(self, X, y):
         res = y['vib'] - X['pred']
         self.model.fit(pd.DataFrame(X['time']), pd.DataFrame(res))
         return self
     
     def predict(self, X):
         res = self.model.predict(pd.DataFrame(X['time']))
         vib = res + X['pred']
         return pd.DataFrame(vib)



Set up X,y datasets

In [21]:
X = pd.DataFrame(wavedf['time'])
y = pd.DataFrame(wavedf['vib'])

No pipleine run (to validate everything runs fine)

In [8]:
#transformer
fp = fpTransform()
fp.fit(X,y)
Xt = fp.transform(X)

#regressor
reg = residualRegressor()
reg.fit(Xt,y)
predf = reg.predict(Xt)

  self.model.fit(pd.DataFrame(X['time']), pd.DataFrame(res))


Pipeline

In [9]:
vib_pipe = Pipeline(
    steps=[
        ("first-principle transformer", fpTransform()),
        ("residual regressor", residualRegressor())

    ]
)

In [18]:
vib_pipe.fit(X,y)

  self.model.fit(pd.DataFrame(X['time']), pd.DataFrame(res))


In [22]:
vib_pipe.predict(X).head()

Unnamed: 0,pred
0,0.456565
1,0.649266
2,1.013753
3,1.188451
4,1.167128


Check results

In [13]:
predf.head()

Unnamed: 0,pred
0,0.423325
1,0.63048
2,1.018119
3,1.181667
4,1.156261


In [14]:
y['vib'].head()

0    0.250000
1    0.731754
2    1.094328
3    1.248027
4    1.154827
Name: vib, dtype: float64

In [15]:
median_percent_diff = (
    (predf['pred'] - y['vib'])*100 / y['vib']
    ).median()
median_percent_diff

-5.317198688031737