In [1]:
%load_ext autoreload
%autoreload 2

# Notebook: Models

In [2]:
import matplotlib.pyplot as plt
import seaborn as sbn

sbn.set()

## Load the dataset

In [3]:
from ceruleo.dataset.catalog.PHMDataset2018 import PHMDataset2018, FailureType

2022-08-09 12:29:55.736172: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
  from .autonotebook import tqdm as notebook_tqdm


In [4]:
dataset = PHMDataset2018(
    tools=['01_M01', '04_M01']
)

### Create a transformer for a dataset

In [5]:
from ceruleo.dataset.analysis.numerical_features import analysis
from ceruleo.transformation.functional.transformers import Transformer
from ceruleo.transformation.features.selection import ByNameFeatureSelector, ByTypeFeatureSelector
from ceruleo.iterators.iterators import RelativeToEnd
from ceruleo.transformation.features.slicing import SliceRows
from ceruleo.transformation.functional.pipeline.pipeline import make_pipeline
from ceruleo.transformation.features.resamplers import IndexMeanResampler
from ceruleo.transformation.features.transformation import Clip
from ceruleo.transformation.features.slicing import SliceRows
from ceruleo.iterators.iterators import RelativeToEnd

In [6]:
FEATURES = [
   'IONGAUGEPRESSURE', 'ETCHBEAMVOLTAGE', 'ETCHBEAMCURRENT',
   'ETCHSUPPRESSORVOLTAGE', 'ETCHSUPPRESSORCURRENT', 'FLOWCOOLFLOWRATE',
   'FLOWCOOLPRESSURE', 'ETCHGASCHANNEL1READBACK', 'ETCHPBNGASREADBACK',
]
transformer = Transformer(
    pipelineX=make_pipeline(
        ByNameFeatureSelector(features=FEATURES), 
        Clip(lower=-6, upper=6),
        IndexMeanResampler(rule='500s'),

    ), 
    pipelineY=make_pipeline(
        ByNameFeatureSelector(features=['RUL']),  
        IndexMeanResampler(rule='500s'),
    )
)



## Split train-test-validation

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
train_dataset, test_dataset = train_test_split(dataset, train_size=0.8)

In [9]:
train_dataset, val_dataset = train_test_split(train_dataset, train_size=0.8)

In [10]:
len(train_dataset), len(val_dataset), len(test_dataset)

(13, 4, 5)

## Create iterators

# Models

## Scikit-learn

In [11]:
import sklearn.pipeline as sk_pipeline
from sklearn.ensemble import RandomForestRegressor
from ceruleo.models.sklearn import EstimatorWrapper, TimeSeriesWindowTransformer


In [16]:

from sklearn.compose import TransformedTargetRegressor
from sklearn.base import RegressorMixin, BaseEstimator

transformer = TimeSeriesWindowTransformer(transformer,
                                          window_size=15,
                                          step=8)
    
class CeruleoRegressor(RegressorMixin, BaseEstimator):
    def __init__(self, features_transformer : TimeSeriesWindowTransformer, regressor):
        self.pipe = sk_pipeline.make_pipeline(
                features_transformer,
                EstimatorWrapper(RandomForestRegressor()))
        
    def fit(self, dataset):
        self.pipe.fit(dataset)
        return self
    
    def predict(self, dataset):
        return self.pipe.predict(dataset)
    
    def get_params(self):
        return self.pipe.get_params()

In [17]:
regressor = CeruleoRegressor(transformer, RandomForestRegressor())

In [18]:
regressor.fit(train_dataset)

NotFittedError: This TimeSeriesWindowTransformer instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [None]:
y_pred = pipe.predict(test_dataset)

In [None]:
fig, ax = plt.subplots(figsize=(17, 5))
ax.plot(y_pred)
ax.plot(pipe.steps[0][1].true_values(test_dataset))

## Keras

## Pytorch