# Notebook: Iterators

In [1]:
from ceruleo.dataset.catalog.PHMDataset2018 import PHMDataset2018, FailureType

2022-08-08 22:13:48.320996: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
  from .autonotebook import tqdm as notebook_tqdm


## Load dataset

In [2]:
dataset = PHMDataset2018(
    tools=['01_M01', '04_M01']
)

### Create a transformer for a dataset

In [3]:
from ceruleo.dataset.analysis.numerical_features import analysis
from ceruleo.transformation.functional.transformers import Transformer
from ceruleo.transformation.features.selection import ByNameFeatureSelector, ByTypeFeatureSelector
from ceruleo.iterators.iterators import RelativeToEnd
from ceruleo.transformation.features.slicing import SliceRows
from ceruleo.transformation.functional.pipeline.pipeline import make_pipeline
from ceruleo.transformation.features.resamplers import IndexMeanResampler
from ceruleo.transformation.features.transformation import Clip
from ceruleo.transformation.features.slicing import SliceRows
from ceruleo.iterators.iterators import RelativeToEnd

In [4]:
FEATURES = [
   'IONGAUGEPRESSURE', 'ETCHBEAMVOLTAGE', 'ETCHBEAMCURRENT',
   'ETCHSUPPRESSORVOLTAGE', 'ETCHSUPPRESSORCURRENT', 'FLOWCOOLFLOWRATE',
   'FLOWCOOLPRESSURE', 'ETCHGASCHANNEL1READBACK', 'ETCHPBNGASREADBACK',
]
transformer = Transformer(
    pipelineX=make_pipeline(
        ByNameFeatureSelector(features=FEATURES), 
        Clip(lower=-6, upper=6),
        IndexMeanResampler(rule='120s'),
        SliceRows(initial=RelativeToEnd(1500))
    ), 
    pipelineY=make_pipeline(
        ByNameFeatureSelector(features=['RUL']),  
        IndexMeanResampler(rule='120s'),
        SliceRows(initial=RelativeToEnd(1500))
    )
)

transformed_dataset = transformer.fit_map(dataset)

## Iterator

In [5]:
from ceruleo.iterators.iterators import WindowedDatasetIterator, IterationType

### Forecast iterator

The forecast iterator produces as target the values of the Y transformers that start where the X data ends.

In [6]:
iterator = WindowedDatasetIterator(
    transformed_dataset,
    window_size=150,
    step=15,
    horizon=5,
    iteration_type=IterationType.FORECAST # The default value
)

In [7]:
X, y, sw = next(iterator)
(X.shape, y.shape)

((150, 9), (5, 1))

It is possible to obtain all the data following the order of the shuffler in an numpy matrix. By default all the data is flattented

In [8]:
X, y, sw = iterator.get_data()
(X.shape, y.shape, sw.shape)

((1679, 1350), (1679, 5), (1679,))

If flatten is False, we can see the shape of the data. X has 1679 samples, of a window size of 150 and 9 features.

In [9]:
X, y, sw = iterator.get_data(flatten=False)
(X.shape, y.shape, sw.shape)

((1679, 150, 9), (1679, 5), (1679,))

### Seq to Seq Iterator

The seq to seq iterator will return as a target a window of a same size as the input aligned with it

In [10]:
iterator = WindowedDatasetIterator(
    transformed_dataset,
    window_size=150,
    step=15,
    iteration_type=IterationType.SEQ_TO_SEQ 
)

In [11]:
X, y, sw = next(iterator)
(X.shape, y.shape)

((150, 9), (150, 1))

## Batcher

In [12]:
from ceruleo.iterators.batcher import Batcher

In [13]:
batcher = Batcher.new(
    transformed_dataset,
    batch_size=64,
    window=150,
    step=15,
    horizon=5
)
X, y, sw = next(batcher)
(X.shape, y.shape, sw.shape)

((64, 150, 9), (64, 5, 1), (64, 1))