# unstack_double_lstm_timeseries_classifier

In [1]:
from draco.demo import load_demo

target_times, readings = load_demo()

In [2]:
pipeline_name = 'classes.unstack_double_lstm_timeseries_classifier'

In [3]:
from draco.pipeline import DracoPipeline

pipeline = DracoPipeline(pipeline_name)

In [4]:
pipeline.template['primitives']

['pandas.DataFrame.resample',
 'pandas.DataFrame.unstack',
 'pandas.DataFrame.pop',
 'pandas.DataFrame.pop',
 'sklearn.impute.SimpleImputer',
 'sklearn.preprocessing.MinMaxScaler',
 'pandas.DataFrame',
 'pandas.DataFrame.set',
 'pandas.DataFrame.set',
 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',
 'keras.Sequential.DoubleLSTMTimeSeriesClassifier']

# Step by Step execution

## Input Data

In [5]:
readings.head()

Unnamed: 0,turbine_id,signal_id,timestamp,value
0,T001,S01,2013-01-10,323.0
1,T001,S02,2013-01-10,320.0
2,T001,S03,2013-01-10,284.0
3,T001,S04,2013-01-10,348.0
4,T001,S05,2013-01-10,273.0


In [6]:
target_times.head()

Unnamed: 0,turbine_id,cutoff_time,target
0,T001,2013-01-12,0
1,T001,2013-01-13,0
2,T001,2013-01-14,0
3,T001,2013-01-15,1
4,T001,2013-01-16,0


## Data Preparation (part of Draco Pipeline)

* Input: target_times, readings, turbines
* Output: X, y, readings, turbines
* Effect: target_times has been split into X and y

## pandas.DataFrame.resample

* Input: readings
* Output: readings (resampled)
* Effect: readings have been resampled to the indicated resample rule and turbine_id,
  signal_id and timestamp have been set as a multi-index

In [7]:
context = pipeline.fit(target_times, readings, output_=0)

In [8]:
context.keys()

dict_keys(['readings', 'turbines', 'X', 'y'])

In [9]:
context['readings'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
turbine_id,signal_id,timestamp,Unnamed: 3_level_1
T001,S01,2013-01-10 00:00:00,313.333333
T001,S01,2013-01-10 01:00:00,197.5
T001,S01,2013-01-10 02:00:00,248.166667
T001,S01,2013-01-10 03:00:00,253.166667
T001,S01,2013-01-10 04:00:00,305.0


## pandas.DataFrame.unstack

* Input: readings (resampled)
* Output: readings (unstacked)
* Effect: readings have been unstacked

In [10]:
step = 1
context = pipeline.fit(**context, output_=step, start_=step)

In [11]:
context.keys()

dict_keys(['readings', 'turbines', 'X', 'y'])

In [12]:
context['readings'].head()

Unnamed: 0,turbine_id,timestamp,value_S01,value_S02,value_S03,value_S04,value_S05,value_S06,value_S07,value_S08,...,value_S17,value_S18,value_S19,value_S20,value_S21,value_S22,value_S23,value_S24,value_S25,value_S26
0,T001,2013-01-10 00:00:00,313.333333,323.833333,336.0,364.666667,286.5,314.0,243.166667,3197980.0,...,10.383333,3131958.0,52.666667,54.333333,56.166667,61.0,47.666667,52.666667,40.833333,357.333333
1,T001,2013-01-10 01:00:00,197.5,221.333333,216.0,260.666667,206.833333,235.833333,186.666667,3198221.0,...,8.666667,3133668.0,33.166667,37.0,36.166667,43.666667,34.5,39.333333,31.166667,249.666667
2,T001,2013-01-10 02:00:00,248.166667,271.666667,277.5,298.0,233.666667,271.166667,216.333333,3198448.0,...,8.833333,3135413.0,41.5,45.666667,46.5,49.666667,39.333333,45.5,36.166667,297.666667
3,T001,2013-01-10 03:00:00,253.166667,256.166667,242.666667,265.333333,211.666667,226.666667,181.0,3198691.0,...,8.433333,3137001.0,42.333333,42.833333,40.5,44.166667,35.333333,37.833333,30.333333,268.0
4,T001,2013-01-10 04:00:00,305.0,312.333333,346.166667,329.833333,280.666667,308.833333,271.833333,3198978.0,...,9.083333,3138843.0,50.5,51.166667,55.5,53.666667,46.166667,49.666667,41.166667,341.833333


## pandas.DataFrame.pop

* Input: readings (unstacked)
* Output: readings (without turbine_id), turbine_id
* Effect: turbine_id has been popped from readings

In [13]:
step = 2
context = pipeline.fit(**context, output_=step, start_=step)

In [14]:
context.keys()

dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])

In [15]:
context['turbine_id'].head()

0    T001
1    T001
2    T001
3    T001
4    T001
Name: turbine_id, dtype: object

In [16]:
context['readings'].head()

Unnamed: 0,timestamp,value_S01,value_S02,value_S03,value_S04,value_S05,value_S06,value_S07,value_S08,value_S09,...,value_S17,value_S18,value_S19,value_S20,value_S21,value_S22,value_S23,value_S24,value_S25,value_S26
0,2013-01-10 00:00:00,313.333333,323.833333,336.0,364.666667,286.5,314.0,243.166667,3197980.0,695143.166667,...,10.383333,3131958.0,52.666667,54.333333,56.166667,61.0,47.666667,52.666667,40.833333,357.333333
1,2013-01-10 01:00:00,197.5,221.333333,216.0,260.666667,206.833333,235.833333,186.666667,3198221.0,695403.666667,...,8.666667,3133668.0,33.166667,37.0,36.166667,43.666667,34.5,39.333333,31.166667,249.666667
2,2013-01-10 02:00:00,248.166667,271.666667,277.5,298.0,233.666667,271.166667,216.333333,3198448.0,695656.5,...,8.833333,3135413.0,41.5,45.666667,46.5,49.666667,39.333333,45.5,36.166667,297.666667
3,2013-01-10 03:00:00,253.166667,256.166667,242.666667,265.333333,211.666667,226.666667,181.0,3198691.0,695911.333333,...,8.433333,3137001.0,42.333333,42.833333,40.5,44.166667,35.333333,37.833333,30.333333,268.0
4,2013-01-10 04:00:00,305.0,312.333333,346.166667,329.833333,280.666667,308.833333,271.833333,3198978.0,696195.833333,...,9.083333,3138843.0,50.5,51.166667,55.5,53.666667,46.166667,49.666667,41.166667,341.833333


## pandas.DataFrame.pop

* Input: readings (without turbine_id)
* Output: readings (without timestamp), timestamp
* Effect: timestamp has been popped from readings

In [17]:
step = 3
context = pipeline.fit(**context, output_=step, start_=step)

In [18]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])

In [19]:
context['timestamp'].head()

0   2013-01-10 00:00:00
1   2013-01-10 01:00:00
2   2013-01-10 02:00:00
3   2013-01-10 03:00:00
4   2013-01-10 04:00:00
Name: timestamp, dtype: datetime64[ns]

In [20]:
context['readings'].head()

Unnamed: 0,value_S01,value_S02,value_S03,value_S04,value_S05,value_S06,value_S07,value_S08,value_S09,value_S10,...,value_S17,value_S18,value_S19,value_S20,value_S21,value_S22,value_S23,value_S24,value_S25,value_S26
0,313.333333,323.833333,336.0,364.666667,286.5,314.0,243.166667,3197980.0,695143.166667,3348384.0,...,10.383333,3131958.0,52.666667,54.333333,56.166667,61.0,47.666667,52.666667,40.833333,357.333333
1,197.5,221.333333,216.0,260.666667,206.833333,235.833333,186.666667,3198221.0,695403.666667,3348651.0,...,8.666667,3133668.0,33.166667,37.0,36.166667,43.666667,34.5,39.333333,31.166667,249.666667
2,248.166667,271.666667,277.5,298.0,233.666667,271.166667,216.333333,3198448.0,695656.5,3348910.0,...,8.833333,3135413.0,41.5,45.666667,46.5,49.666667,39.333333,45.5,36.166667,297.666667
3,253.166667,256.166667,242.666667,265.333333,211.666667,226.666667,181.0,3198691.0,695911.333333,3349157.0,...,8.433333,3137001.0,42.333333,42.833333,40.5,44.166667,35.333333,37.833333,30.333333,268.0
4,305.0,312.333333,346.166667,329.833333,280.666667,308.833333,271.833333,3198978.0,696195.833333,3349452.0,...,9.083333,3138843.0,50.5,51.166667,55.5,53.666667,46.166667,49.666667,41.166667,341.833333


## sklearn.impute.SimpleImputer

* Input: readings (unstacked, no turbine_id, no timestamp)
* Output: readings (imputed, numpy array)
* Effect: readings have been imputed and converted to numpy array

In [21]:
step = 4
context = pipeline.fit(**context, output_=step, start_=step)

In [22]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [23]:
context['readings'][0:5]

array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,
        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,
        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,
        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,
        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,
        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,
        4.08333333e+01, 3.57333333e+02],
       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,
        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,
        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,
        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,
        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,
        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,
        3.11666667e+01, 2.49666667e+02],
       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02

## sklearn.preprocessing.MinMaxScaler

* Input: (imputed, array)
* Output: readings (scaled, array)
* Effect: readings have been scaled to [-1, 1] range

In [24]:
step = 5
context = pipeline.fit(**context, output_=step, start_=step)

In [25]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [26]:
context['readings'][0:5]

array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,
        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -0.11007463,
        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,
        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,
        -0.25697453],
       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,
        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,
        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,
        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,
        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,
        -0.48085254],
       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,
        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,
        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,
       

## pandas.DataFrame

* Input: readings (scaled, array)
* Output: readings (dataframe)
* Effect: readings have been converted into a dataframe

In [27]:
step = 6
context = pipeline.fit(**context, output_=step, start_=step)

In [28]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [29]:
context['readings'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,-0.261261,-0.237069,-0.208701,-0.141066,-0.323288,-0.259694,-0.421988,-1.0,-1.0,-1.0,...,-0.104242,-0.373977,-0.252336,-0.227166,-0.201405,-0.134818,-0.322392,-0.253801,-0.418224,-0.256975
1,-0.53349,-0.478056,-0.490888,-0.38558,-0.510372,-0.4434,-0.554384,-0.99983,-0.999825,-0.999825,...,-0.252336,-0.373635,-0.525701,-0.470726,-0.482436,-0.378664,-0.50762,-0.440936,-0.553738,-0.480853
2,-0.414414,-0.359718,-0.346267,-0.297806,-0.447358,-0.36036,-0.484866,-0.99967,-0.999656,-0.999656,...,-0.237958,-0.373286,-0.408879,-0.348946,-0.337237,-0.294256,-0.439625,-0.354386,-0.483645,-0.381043
3,-0.402664,-0.39616,-0.42818,-0.374608,-0.499022,-0.464943,-0.567663,-0.999499,-0.999485,-0.999494,...,-0.272466,-0.372968,-0.397196,-0.388759,-0.421546,-0.37163,-0.495897,-0.461988,-0.565421,-0.442731
4,-0.280846,-0.264107,-0.184793,-0.222962,-0.336986,-0.271837,-0.354814,-0.999296,-0.999295,-0.999301,...,-0.216391,-0.372599,-0.28271,-0.271663,-0.210773,-0.237984,-0.343494,-0.295906,-0.413551,-0.289205


## pandas.DataFrame.set

* Input: readings (dataframe)
* Output: readings (dataframe with turbine_id)
* Effect: turbine_id has been set as a readings column

In [30]:
step = 7
context = pipeline.fit(**context, output_=step, start_=step)

In [31]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [32]:
context['readings'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,turbine_id
0,-0.261261,-0.237069,-0.208701,-0.141066,-0.323288,-0.259694,-0.421988,-1.0,-1.0,-1.0,...,-0.373977,-0.252336,-0.227166,-0.201405,-0.134818,-0.322392,-0.253801,-0.418224,-0.256975,T001
1,-0.53349,-0.478056,-0.490888,-0.38558,-0.510372,-0.4434,-0.554384,-0.99983,-0.999825,-0.999825,...,-0.373635,-0.525701,-0.470726,-0.482436,-0.378664,-0.50762,-0.440936,-0.553738,-0.480853,T001
2,-0.414414,-0.359718,-0.346267,-0.297806,-0.447358,-0.36036,-0.484866,-0.99967,-0.999656,-0.999656,...,-0.373286,-0.408879,-0.348946,-0.337237,-0.294256,-0.439625,-0.354386,-0.483645,-0.381043,T001
3,-0.402664,-0.39616,-0.42818,-0.374608,-0.499022,-0.464943,-0.567663,-0.999499,-0.999485,-0.999494,...,-0.372968,-0.397196,-0.388759,-0.421546,-0.37163,-0.495897,-0.461988,-0.565421,-0.442731,T001
4,-0.280846,-0.264107,-0.184793,-0.222962,-0.336986,-0.271837,-0.354814,-0.999296,-0.999295,-0.999301,...,-0.372599,-0.28271,-0.271663,-0.210773,-0.237984,-0.343494,-0.295906,-0.413551,-0.289205,T001


## pandas.DataFrame.set

* Input: readings (dataframe with turbine_id)
* Output: readings (dataframe with turbine_id and timestamp)
* Effect: timestamp has been set as a readings column

In [33]:
step = 8
context = pipeline.fit(**context, output_=step, start_=step)

In [34]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [35]:
context['readings'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,turbine_id,timestamp
0,-0.261261,-0.237069,-0.208701,-0.141066,-0.323288,-0.259694,-0.421988,-1.0,-1.0,-1.0,...,-0.252336,-0.227166,-0.201405,-0.134818,-0.322392,-0.253801,-0.418224,-0.256975,T001,2013-01-10 00:00:00
1,-0.53349,-0.478056,-0.490888,-0.38558,-0.510372,-0.4434,-0.554384,-0.99983,-0.999825,-0.999825,...,-0.525701,-0.470726,-0.482436,-0.378664,-0.50762,-0.440936,-0.553738,-0.480853,T001,2013-01-10 01:00:00
2,-0.414414,-0.359718,-0.346267,-0.297806,-0.447358,-0.36036,-0.484866,-0.99967,-0.999656,-0.999656,...,-0.408879,-0.348946,-0.337237,-0.294256,-0.439625,-0.354386,-0.483645,-0.381043,T001,2013-01-10 02:00:00
3,-0.402664,-0.39616,-0.42818,-0.374608,-0.499022,-0.464943,-0.567663,-0.999499,-0.999485,-0.999494,...,-0.397196,-0.388759,-0.421546,-0.37163,-0.495897,-0.461988,-0.565421,-0.442731,T001,2013-01-10 03:00:00
4,-0.280846,-0.264107,-0.184793,-0.222962,-0.336986,-0.271837,-0.354814,-0.999296,-0.999295,-0.999301,...,-0.28271,-0.271663,-0.210773,-0.237984,-0.343494,-0.295906,-0.413551,-0.289205,T001,2013-01-10 04:00:00


## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences

* Input: X, readings (dataframe with turbine_id and timestamp)
* Output: X
* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape
  (window_size x num_signals) for each one of the target times.

In [36]:
pipeline._pipeline.get_hyperparameters()[
    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']

{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}

In [37]:
step = 9
context = pipeline.fit(**context, output_=step, start_=step)

In [38]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [39]:
context['readings'].shape

(8521, 28)

In [40]:
context['y'].shape

(353,)

In [41]:
context['X'].shape

(353, 24, 26)

In [42]:
context['X'][0]

array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,
        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,
        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,
        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,
        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,
        -0.63853752],
       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,
        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,
        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,
        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,
        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,
        -0.63645815],
       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,
        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,
        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,
       

## keras.Sequential.DoubleLSTMTimeSeriesClassifier

* Input: X, y
* Output: 
* Effect: DoubleLSTM has been fitted.

In [43]:
step = 10
context = pipeline.fit(**context, output_=step, start_=step)