# Example how to use Window Feature Extractor
Window feature extractor is a frame work to calculate features w.r.t specified windows in time series data

In [1]:
import os
import sys
import numpy as np
import pandas as pd

sys.path.append('..')

from odutils.feature_extractors import StatWindowFeatsExtractor
from odutils.window import WindowGenerator

## Build toy data
Try to calculate basic statistics features of `value_a` and `value_b`

In [2]:
data = pd.DataFrame(
      dict(id=['A'] * 50 + ['B'] * 50,
        time=np.concatenate([np.arange(1, 51), np.arange(1, 51)], axis=0),
        # time=np.repeat(np.arange(1, 51), 2),
        value_a=np.random.randint(100, size=100),
        value_b=np.random.normal(size=100)
        ))

data

Unnamed: 0,id,time,value_a,value_b
0,A,1,89,-0.459619
1,A,2,97,-0.618345
2,A,3,22,0.226129
3,A,4,22,1.423755
4,A,5,96,-0.717490
...,...,...,...,...
95,B,46,24,0.861503
96,B,47,46,0.111006
97,B,48,28,-0.747266
98,B,49,77,-0.194669


## Configure the target columns and feature extractor

In [7]:
# Target columns to apply calculation.
target_cols = ['value_a', 'value_b']

# Feature extractor
# Calculate mean(), std(), median() of each column
ext = StatWindowFeatsExtractor(target_cols)

## Apply calculation on whole DataFrame
transform() apply calculation on whole DataFrame

In [4]:
# With single data with one large window
feats = ext.transform(data)
feats

{'value_a_mean': 48.61,
 'value_a_std': 27.51541204488859,
 'value_a_median': 42.5,
 'value_b_mean': 0.02487707291037414,
 'value_b_std': 0.9964125828853809,
 'value_b_median': -0.1748913718374986}

## Apply calculation per the window which ends with every `end_time` in `end_times`
use transform_generator() 

In [8]:
end_times = np.array([12, 24, 36, 45])
wg = WindowGenerator(window_size=10, col_time='time', min_length=1)
gen = wg.generate(end_times, data)

feats = ext.transform_generator(gen)
print('missings:', wg.missing_data_end_times)
feats

missings: []


Unnamed: 0_level_0,value_a_mean,value_a_std,value_a_median,value_b_mean,value_b_std,value_b_median
end_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12,49.45,26.807602,50.5,0.165655,0.958402,-0.061004
24,53.45,25.908445,55.0,0.025201,0.950961,-0.112738
36,36.0,24.617067,32.0,-0.324845,0.996426,-0.57469
45,47.8,28.154218,43.0,0.270942,0.985836,0.259173


## Apply  transform_generator groupby the keys passed
use end_time_group_generator() 
transform_generator_group() apply transform_generator groupby end_time_group_generator

In [9]:
def get_end_time_data_generator(data, end_times):
  for id, d in data.groupby('id'):
    yield [id,], end_times[id], d


end_times = {'A': np.array([12, 24, 36, 45, 100]),
              'B': np.array([0, 12, 24, 36, 45])}
end_time_group_gen = get_end_time_data_generator(data, end_times)
wg = WindowGenerator(window_size=10, col_time='time', min_length=1)


feats = ext.transform_generator_group(wg, end_time_group_gen)
print('missings:', ext.missing_data_end_times)

feats

missings: {'A': [100], 'B': [0]}


Too few rows exist in the window sliced.
dat_win.shape[0]: 0
self.min_length: 1
start_time, end_time: 90, 100
Too few rows exist in the window sliced.
dat_win.shape[0]: 0
self.min_length: 1
start_time, end_time: -10, 0


Unnamed: 0,Unnamed: 1,value_a_mean,value_a_std,value_a_median,value_b_mean,value_b_std,value_b_median
A,12,49.2,26.236616,42.0,0.197429,0.996201,-0.052863
A,24,55.8,27.432827,62.0,0.521156,0.970844,0.339901
A,36,35.7,28.565889,26.0,-0.498521,0.740489,-0.665583
A,45,40.2,21.84857,36.0,-0.197738,0.871352,-0.480141
A,100,,,,,,
B,0,,,,,,
B,12,49.7,27.364393,60.5,0.133882,0.917952,-0.07107
B,24,51.1,24.060133,46.5,-0.470753,0.611698,-0.300011
B,36,36.3,19.894974,34.0,-0.151169,1.17349,-0.258505
B,45,55.4,31.503016,66.0,0.739622,0.863232,0.555623
