In [15]:
import sys
sys.path.append('../')

from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn import set_config
import polars as pl

from base.impactpy.model.transformers import (
	ConstantIntradayVolatilityTransformer,
	AverageDailyVolumeTransformer
)

set_config(transform_output='polars', enable_metadata_routing=True)

In [2]:
from pathlib import Path
p = Path().resolve().parents[0] / 'data' / 'raw_bin_samples' / 'clean.arrow'
lf = pl.scan_ipc(p)

In [3]:
df = lf.collect()

In [4]:
df

symbol,timestamp,price,trade
cat,datetime[μs],f64,i32
"""ADSK""",2019-01-02 09:30:00,125.205,-287
"""ADSK""",2019-01-02 09:30:10,124.61,20
"""ADSK""",2019-01-02 09:30:20,124.61,0
"""ADSK""",2019-01-02 09:30:30,125.155,200
"""ADSK""",2019-01-02 09:30:40,125.705,-385
…,…,…,…
"""ADS""",2019-12-27 15:59:20,110.915,0
"""ADS""",2019-12-27 15:59:30,110.95,0
"""ADS""",2019-12-27 15:59:40,110.92,0
"""ADS""",2019-12-27 15:59:50,110.91,-2


In [17]:
window = 30

def set_col_routing(trans):
	return (
		trans
		.set_fit_request(cols=True)
		.set_transform_request(cols=True)
	)

vola_trans = set_col_routing(ConstantIntradayVolatilityTransformer(window, 'vola'))
volu_trans = set_col_routing(AverageDailyVolumeTransformer(window, 'volu'))

union = FeatureUnion([
	('original', 'passthrough'),
	('vola_trans', vola_trans),
	('volu_trans', volu_trans),
])

def drop_nulls(df: pl.DataFrame) -> pl.DataFrame:
	return df.drop_nulls()
drop_null_trans = FunctionTransformer(drop_nulls)

pipeline = Pipeline([
	('rolling_estimates', union),
	('drop_nulls', drop_null_trans),
])

cols ={
	'timestamp': 'timestamp',
	'price': 'price',
	'symbol': 'symbol'
}

In [18]:
pipeline.fit_transform(df, cols=cols)

symbol,timestamp,price,trade,vola,volu
cat,datetime[μs],f64,i32,f64,f64
"""ADSK""",2019-02-01 09:30:00,147.67,0,0.034882,250607.285714
"""ADSK""",2019-02-01 09:30:10,147.66,0,0.034882,250607.285714
"""ADSK""",2019-02-01 09:30:20,147.66,0,0.034882,250607.285714
"""ADSK""",2019-02-01 09:30:30,147.66,0,0.034882,250607.285714
"""ADSK""",2019-02-01 09:30:40,147.665,0,0.034882,250607.285714
…,…,…,…,…,…
"""ADS""",2019-12-27 15:59:20,110.915,0,0.02861,81498.9
"""ADS""",2019-12-27 15:59:30,110.95,0,0.02861,81498.9
"""ADS""",2019-12-27 15:59:40,110.92,0,0.02861,81498.9
"""ADS""",2019-12-27 15:59:50,110.91,-2,0.02861,81498.9


In [9]:
union.fit_transform(df, cols=cols)

symbol,timestamp,price,trade,vola,volu
cat,datetime[μs],f64,i32,f64,f64
"""ADSK""",2019-01-02 09:30:00,125.205,-287,,
"""ADSK""",2019-01-02 09:30:10,124.61,20,,
"""ADSK""",2019-01-02 09:30:20,124.61,0,,
"""ADSK""",2019-01-02 09:30:30,125.155,200,,
"""ADSK""",2019-01-02 09:30:40,125.705,-385,,
…,…,…,…,…,…
"""ADS""",2019-12-27 15:59:20,110.915,0,0.02861,81498.9
"""ADS""",2019-12-27 15:59:30,110.95,0,0.02861,81498.9
"""ADS""",2019-12-27 15:59:40,110.92,0,0.02861,81498.9
"""ADS""",2019-12-27 15:59:50,110.91,-2,0.02861,81498.9


In [13]:
union.get_params()['volu_trans'].estimates_

symbol,date,volu
cat,date,f64
"""ADSK""",2019-02-01,250607.285714
"""ADSK""",2019-02-04,251257.714286
"""ADSK""",2019-02-05,237350.7
"""ADSK""",2019-02-06,235164.47619
"""ADSK""",2019-02-07,238315.761905
…,…,…
"""ADS""",2019-12-20,90423.619048
"""ADS""",2019-12-23,88878.952381
"""ADS""",2019-12-24,85781.4
"""ADS""",2019-12-26,82936.666667
