# double_lstm_with_unstack

In [1]:
from draco.demo import load_demo

target_times, readings = load_demo()

In [2]:
pipeline_name = 'double_lstm_with_unstack'

In [3]:
from draco.pipeline import DracoPipeline

pipeline = DracoPipeline(pipeline_name)

In [4]:
pipeline.template['primitives']

['mlblocks.MLPipeline',
 'pandas.DataFrame.pop',
 'pandas.DataFrame.pop',
 'sklearn.impute.SimpleImputer',
 'sklearn.preprocessing.MinMaxScaler',
 'pandas.DataFrame',
 'pandas.DataFrame.set',
 'pandas.DataFrame.set',
 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',
 'keras.Sequential.DoubleLSTMTimeSeriesClassifier']

# Step by Step execution

## Input Data

In [5]:
readings.head()

Unnamed: 0,turbine_id,signal_id,timestamp,value
0,T001,S01,2013-01-10,323.0
1,T001,S02,2013-01-10,320.0
2,T001,S03,2013-01-10,284.0
3,T001,S04,2013-01-10,348.0
4,T001,S05,2013-01-10,273.0


In [6]:
target_times.head()

Unnamed: 0,turbine_id,cutoff_time,target
0,T001,2013-01-12,0
1,T001,2013-01-13,0
2,T001,2013-01-14,0
3,T001,2013-01-15,1
4,T001,2013-01-16,0


## Data Preparation (part of Draco Pipeline)

* Input: target_times, readings, turbines
* Output: X, y, readings, turbines
* Effect: target_times has been split into X and y

## mlblocks.MLPipeline

### pandas.DataFrame.resample

* Input: readings
* Output: readings (resampled)
* Effect: readings have been resampled to the indicated resample rule and turbine_id,
  signal_id and timestamp have been set as a multi-index
  
### pandas.DataFrame.unstack

* Input: readings (resampled)
* Output: readings (unstacked)
* Effect: readings have been unstacked

In [7]:
context = pipeline.fit(target_times, readings, output_=0)

In [8]:
context.keys()

dict_keys(['readings', 'turbines', 'X', 'y'])

In [9]:
context['readings'].head()

Unnamed: 0,turbine_id,timestamp,value_S01,value_S02,value_S03,value_S04,value_S05,value_S06,value_S07,value_S08,...,value_S17,value_S18,value_S19,value_S20,value_S21,value_S22,value_S23,value_S24,value_S25,value_S26
0,T001,2013-01-10 00:00:00,323.0,320.0,284.0,348.0,273.0,342.0,280.0,3197842.0,...,11.7,3131020.0,55.0,55.0,47.0,58.0,45.0,58.0,47.0,356.0
1,T001,2013-01-10 00:10:00,346.0,384.0,367.0,411.0,331.0,360.0,249.0,3197900.0,...,10.2,3131420.0,58.0,63.0,62.0,67.0,55.0,61.0,42.0,400.0
2,T001,2013-01-10 00:20:00,407.0,363.0,407.0,393.0,275.0,335.0,270.0,3197968.0,...,9.5,3131822.0,68.0,61.0,67.0,66.0,46.0,55.0,45.0,402.0
3,T001,2013-01-10 00:30:00,257.0,307.0,315.0,361.0,317.0,354.0,271.0,3198011.0,...,10.5,3132179.0,43.0,51.0,53.0,62.0,53.0,60.0,45.0,357.0
4,T001,2013-01-10 00:40:00,267.0,309.0,314.0,355.0,262.0,246.0,212.0,3198056.0,...,9.6,3132501.0,45.0,51.0,54.0,59.0,43.0,41.0,36.0,322.0


## pandas.DataFrame.pop

* Input: readings (unstacked)
* Output: readings (without turbine_id), turbine_id
* Effect: turbine_id has been popped from readings

In [10]:
step = 1
context = pipeline.fit(**context, output_=step, start_=step)

In [11]:
context.keys()

dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])

In [12]:
context['turbine_id'].head()

0    T001
1    T001
2    T001
3    T001
4    T001
Name: turbine_id, dtype: object

In [13]:
context['readings'].head()

Unnamed: 0,timestamp,value_S01,value_S02,value_S03,value_S04,value_S05,value_S06,value_S07,value_S08,value_S09,...,value_S17,value_S18,value_S19,value_S20,value_S21,value_S22,value_S23,value_S24,value_S25,value_S26
0,2013-01-10 00:00:00,323.0,320.0,284.0,348.0,273.0,342.0,280.0,3197842.0,695000.0,...,11.7,3131020.0,55.0,55.0,47.0,58.0,45.0,58.0,47.0,356.0
1,2013-01-10 00:10:00,346.0,384.0,367.0,411.0,331.0,360.0,249.0,3197900.0,695063.0,...,10.2,3131420.0,58.0,63.0,62.0,67.0,55.0,61.0,42.0,400.0
2,2013-01-10 00:20:00,407.0,363.0,407.0,393.0,275.0,335.0,270.0,3197968.0,695124.0,...,9.5,3131822.0,68.0,61.0,67.0,66.0,46.0,55.0,45.0,402.0
3,2013-01-10 00:30:00,257.0,307.0,315.0,361.0,317.0,354.0,271.0,3198011.0,695175.0,...,10.5,3132179.0,43.0,51.0,53.0,62.0,53.0,60.0,45.0,357.0
4,2013-01-10 00:40:00,267.0,309.0,314.0,355.0,262.0,246.0,212.0,3198056.0,695226.0,...,9.6,3132501.0,45.0,51.0,54.0,59.0,43.0,41.0,36.0,322.0


## pandas.DataFrame.pop

* Input: readings (without turbine_id)
* Output: readings (without timestamp), timestamp
* Effect: timestamp has been popped from readings

In [14]:
step = 2
context = pipeline.fit(**context, output_=step, start_=step)

In [15]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])

In [16]:
context['timestamp'].head()

0   2013-01-10 00:00:00
1   2013-01-10 00:10:00
2   2013-01-10 00:20:00
3   2013-01-10 00:30:00
4   2013-01-10 00:40:00
Name: timestamp, dtype: datetime64[ns]

In [17]:
context['readings'].head()

Unnamed: 0,value_S01,value_S02,value_S03,value_S04,value_S05,value_S06,value_S07,value_S08,value_S09,value_S10,...,value_S17,value_S18,value_S19,value_S20,value_S21,value_S22,value_S23,value_S24,value_S25,value_S26
0,323.0,320.0,284.0,348.0,273.0,342.0,280.0,3197842.0,695000.0,3348234.0,...,11.7,3131020.0,55.0,55.0,47.0,58.0,45.0,58.0,47.0,356.0
1,346.0,384.0,367.0,411.0,331.0,360.0,249.0,3197900.0,695063.0,3348296.0,...,10.2,3131420.0,58.0,63.0,62.0,67.0,55.0,61.0,42.0,400.0
2,407.0,363.0,407.0,393.0,275.0,335.0,270.0,3197968.0,695124.0,3348363.0,...,9.5,3131822.0,68.0,61.0,67.0,66.0,46.0,55.0,45.0,402.0
3,257.0,307.0,315.0,361.0,317.0,354.0,271.0,3198011.0,695175.0,3348416.0,...,10.5,3132179.0,43.0,51.0,53.0,62.0,53.0,60.0,45.0,357.0
4,267.0,309.0,314.0,355.0,262.0,246.0,212.0,3198056.0,695226.0,3348470.0,...,9.6,3132501.0,45.0,51.0,54.0,59.0,43.0,41.0,36.0,322.0


## sklearn.impute.SimpleImputer

* Input: readings (unstacked, no turbine_id, no timestamp)
* Output: readings (imputed, numpy array)
* Effect: readings have been imputed and converted to numpy array

In [18]:
step = 3
context = pipeline.fit(**context, output_=step, start_=step)

In [19]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [20]:
context['readings'][0:5]

array([[3.230000e+02, 3.200000e+02, 2.840000e+02, 3.480000e+02,
        2.730000e+02, 3.420000e+02, 2.800000e+02, 3.197842e+06,
        6.950000e+05, 3.348234e+06, 3.436762e+06, 3.322362e+06,
        3.357952e+06, 3.223797e+06, 8.300000e+00, 6.000000e+00,
        1.170000e+01, 3.131020e+06, 5.500000e+01, 5.500000e+01,
        4.700000e+01, 5.800000e+01, 4.500000e+01, 5.800000e+01,
        4.700000e+01, 3.560000e+02],
       [3.460000e+02, 3.840000e+02, 3.670000e+02, 4.110000e+02,
        3.310000e+02, 3.600000e+02, 2.490000e+02, 3.197900e+06,
        6.950630e+05, 3.348296e+06, 3.436829e+06, 3.322417e+06,
        3.358013e+06, 3.223839e+06, 7.600000e+00, 5.000000e+00,
        1.020000e+01, 3.131420e+06, 5.800000e+01, 6.300000e+01,
        6.200000e+01, 6.700000e+01, 5.500000e+01, 6.100000e+01,
        4.200000e+01, 4.000000e+02],
       [4.070000e+02, 3.630000e+02, 4.070000e+02, 3.930000e+02,
        2.750000e+02, 3.350000e+02, 2.700000e+02, 3.197968e+06,
        6.951240e+05, 3.348363

## sklearn.preprocessing.MinMaxScaler

* Input: (imputed, array)
* Output: readings (scaled, array)
* Effect: readings have been scaled to [-1, 1] range

In [21]:
step = 4
context = pipeline.fit(**context, output_=step, start_=step)

In [22]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [23]:
context['readings'][0:5]

array([[-0.23563892, -0.24267292, -0.3286385 , -0.17702227, -0.35287222,
        -0.19248826, -0.3317757 , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -0.11702128,
        -0.24050633, -0.25714286, -0.37378787, -0.22758621, -0.22758621,
        -0.31972789, -0.1862069 , -0.36986301, -0.1862069 , -0.33793103,
        -0.26141079],
       [-0.18171161, -0.0926143 , -0.13380282, -0.02930832, -0.21688159,
        -0.15023474, -0.40420561, -0.99995911, -0.99995779, -0.99995941,
        -0.99995718, -0.99996326, -0.99996042, -0.99997164, -0.19148936,
        -0.36708861, -0.35238095, -0.37370786, -0.1862069 , -0.11724138,
        -0.11564626, -0.06206897, -0.23287671, -0.14482759, -0.40689655,
        -0.17012448],
       [-0.03868699, -0.14185229, -0.0399061 , -0.07151231, -0.34818288,
        -0.20892019, -0.35514019, -0.99991116, -0.99991693, -0.99991555,
        -0.999915  , -0.99993254, -0.99992474, -0.99994125, -0.17021277,
       

## pandas.DataFrame

* Input: readings (scaled, array)
* Output: readings (dataframe)
* Effect: readings have been converted into a dataframe

In [24]:
step = 5
context = pipeline.fit(**context, output_=step, start_=step)

In [25]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [26]:
context['readings'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,-0.235639,-0.242673,-0.328638,-0.177022,-0.352872,-0.192488,-0.331776,-1.0,-1.0,-1.0,...,-0.257143,-0.373788,-0.227586,-0.227586,-0.319728,-0.186207,-0.369863,-0.186207,-0.337931,-0.261411
1,-0.181712,-0.092614,-0.133803,-0.029308,-0.216882,-0.150235,-0.404206,-0.999959,-0.999958,-0.999959,...,-0.352381,-0.373708,-0.186207,-0.117241,-0.115646,-0.062069,-0.232877,-0.144828,-0.406897,-0.170124
2,-0.038687,-0.141852,-0.039906,-0.071512,-0.348183,-0.20892,-0.35514,-0.999911,-0.999917,-0.999916,...,-0.396825,-0.373627,-0.048276,-0.144828,-0.047619,-0.075862,-0.356164,-0.227586,-0.365517,-0.165975
3,-0.390387,-0.273154,-0.255869,-0.146542,-0.249707,-0.164319,-0.352804,-0.999881,-0.999883,-0.999881,...,-0.333333,-0.373556,-0.393103,-0.282759,-0.238095,-0.131034,-0.260274,-0.158621,-0.365517,-0.259336
4,-0.36694,-0.268464,-0.258216,-0.16061,-0.378664,-0.41784,-0.490654,-0.999849,-0.999849,-0.999846,...,-0.390476,-0.373492,-0.365517,-0.282759,-0.22449,-0.172414,-0.39726,-0.42069,-0.489655,-0.33195


## pandas.DataFrame.set

* Input: readings (dataframe)
* Output: readings (dataframe with turbine_id)
* Effect: turbine_id has been set as a readings column

In [27]:
step = 6
context = pipeline.fit(**context, output_=step, start_=step)

In [28]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [29]:
context['readings'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,turbine_id
0,-0.235639,-0.242673,-0.328638,-0.177022,-0.352872,-0.192488,-0.331776,-1.0,-1.0,-1.0,...,-0.373788,-0.227586,-0.227586,-0.319728,-0.186207,-0.369863,-0.186207,-0.337931,-0.261411,T001
1,-0.181712,-0.092614,-0.133803,-0.029308,-0.216882,-0.150235,-0.404206,-0.999959,-0.999958,-0.999959,...,-0.373708,-0.186207,-0.117241,-0.115646,-0.062069,-0.232877,-0.144828,-0.406897,-0.170124,T001
2,-0.038687,-0.141852,-0.039906,-0.071512,-0.348183,-0.20892,-0.35514,-0.999911,-0.999917,-0.999916,...,-0.373627,-0.048276,-0.144828,-0.047619,-0.075862,-0.356164,-0.227586,-0.365517,-0.165975,T001
3,-0.390387,-0.273154,-0.255869,-0.146542,-0.249707,-0.164319,-0.352804,-0.999881,-0.999883,-0.999881,...,-0.373556,-0.393103,-0.282759,-0.238095,-0.131034,-0.260274,-0.158621,-0.365517,-0.259336,T001
4,-0.36694,-0.268464,-0.258216,-0.16061,-0.378664,-0.41784,-0.490654,-0.999849,-0.999849,-0.999846,...,-0.373492,-0.365517,-0.282759,-0.22449,-0.172414,-0.39726,-0.42069,-0.489655,-0.33195,T001


## pandas.DataFrame.set

* Input: readings (dataframe with turbine_id)
* Output: readings (dataframe with turbine_id and timestamp)
* Effect: timestamp has been set as a readings column

In [30]:
step = 7
context = pipeline.fit(**context, output_=step, start_=step)

In [31]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [32]:
context['readings'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,turbine_id,timestamp
0,-0.235639,-0.242673,-0.328638,-0.177022,-0.352872,-0.192488,-0.331776,-1.0,-1.0,-1.0,...,-0.227586,-0.227586,-0.319728,-0.186207,-0.369863,-0.186207,-0.337931,-0.261411,T001,2013-01-10 00:00:00
1,-0.181712,-0.092614,-0.133803,-0.029308,-0.216882,-0.150235,-0.404206,-0.999959,-0.999958,-0.999959,...,-0.186207,-0.117241,-0.115646,-0.062069,-0.232877,-0.144828,-0.406897,-0.170124,T001,2013-01-10 00:10:00
2,-0.038687,-0.141852,-0.039906,-0.071512,-0.348183,-0.20892,-0.35514,-0.999911,-0.999917,-0.999916,...,-0.048276,-0.144828,-0.047619,-0.075862,-0.356164,-0.227586,-0.365517,-0.165975,T001,2013-01-10 00:20:00
3,-0.390387,-0.273154,-0.255869,-0.146542,-0.249707,-0.164319,-0.352804,-0.999881,-0.999883,-0.999881,...,-0.393103,-0.282759,-0.238095,-0.131034,-0.260274,-0.158621,-0.365517,-0.259336,T001,2013-01-10 00:30:00
4,-0.36694,-0.268464,-0.258216,-0.16061,-0.378664,-0.41784,-0.490654,-0.999849,-0.999849,-0.999846,...,-0.365517,-0.282759,-0.22449,-0.172414,-0.39726,-0.42069,-0.489655,-0.33195,T001,2013-01-10 00:40:00


## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences

* Input: X, readings (dataframe with turbine_id and timestamp)
* Output: X
* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape
  (window_size x num_signals) for each one of the target times.

In [33]:
pipeline._pipeline.get_hyperparameters()[
    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']

{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}

In [34]:
step = 8
context = pipeline.fit(**context, output_=step, start_=step)

In [35]:
context.keys()

dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])

In [36]:
context['readings'].shape

(51121, 28)

In [37]:
context['y'].shape

(353,)

In [38]:
context['X'].shape

(353, 24, 26)

In [39]:
context['X'][0]

array([[-0.66002345, -0.57327081, -0.64084507, -0.57796014, -0.6014068 ,
        -0.56103286, -0.55140187, -0.9928135 , -0.99291267, -0.99315058,
        -0.99304288, -0.99346346, -0.99352632, -0.99395333, -0.42553191,
        -0.41772152, -0.58730159, -0.35996294, -0.66896552, -0.57241379,
        -0.61904762, -0.5862069 , -0.60273973, -0.55862069, -0.55862069,
        -0.59751037],
       [-0.2989449 , -0.38569754, -0.48591549, -0.47713951, -0.66705744,
        -0.5915493 , -0.77336449, -0.99278389, -0.9928852 , -0.99312701,
        -0.99301988, -0.9934481 , -0.9935075 , -0.9939459 , -0.39361702,
        -0.40506329, -0.54285714, -0.35992014, -0.40689655, -0.42068966,
        -0.46938776, -0.48965517, -0.67123288, -0.5862069 , -0.83448276,
        -0.5560166 ],
       [-0.33645955, -0.40679953, -0.39906103, -0.38569754, -0.56154748,
        -0.43192488, -0.45560748, -0.99275498, -0.9928584 , -0.99310017,
        -0.99299431, -0.99342739, -0.99348349, -0.99392294, -0.29787234,
       

## keras.Sequential.DoubleLSTMTimeSeriesClassifier

* Input: X, y
* Output: 
* Effect: DoubleLSTM has been fitted.

In [40]:
step = 9
context = pipeline.fit(**context, output_=step, start_=step)

2022-01-18 05:32:48.464559: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-18 05:32:48.495873: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fba31d9b0c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-01-18 05:32:48.495892: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
