In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.impute import SimpleImputer

import rul_pm.utils 
from rul_pm.dataset.lives_dataset import AbstractLivesDataset
from rul_pm.dataset.CMAPSS import (CMAPSSDataset, sensor_indices)
from rul_pm.iterators.iterators import LifeDatasetIterator
from rul_pm.transformation.transformers import Transformer, transformation_pipeline, numericals_pipeline
from rul_pm.transformation.features.selection import (
    ByNameFeatureSelector,
    PandasVarianceThreshold,
    PandasNullProportionSelector)
from rul_pm.transformation.features.generation import (
    OneHotCategoricalPandas,
    EWMAOutOfRange,
    Accumulate,
    RollingStatistics)
from rul_pm.transformation.transformerstep import Concatenate as ConcatenateStep
from rul_pm.transformation.utils import PandasTransformerWrapper
from rul_pm.transformation.outliers import IQROutlierRemover, ZScoreOutlierRemover
from rul_pm.transformation.imputers import ForwardFillImputer, PandasMeanImputer
from rul_pm.transformation.features.scalers import PandasMinMaxScaler

from rul_pm.transformation.resamplers import ResamplerTransformer
from rul_pm.iterators.iterators import WindowedDatasetIterator, LifeDatasetIterator
from rul_pm.iterators.batcher import get_batcher
from rul_pm.models.sklearn import SKLearnModel


from rul_pm.transformation.transformers import Transformer, transformation_pipeline, LivesPipeline
from rul_pm.transformation.utils import PandasTransformerWrapper
from rul_pm.transformation.imputers import ForwardFillImputer, PandasRemoveInf, PerColumnImputer
from rul_pm.transformation.utils import IdentityTransformer
from rul_pm.graphics.plots import plot_true_vs_predicted


# 1. Loading data

In [2]:
train_dataset = CMAPSSDataset(train=True, models=['FD001'])
validation_dataset = CMAPSSDataset(train=False, models=['FD001'])

# 2. Transformation

In [3]:
features = [train_dataset[0].columns[i] for i in sensor_indices]

pipe = ByNameFeatureSelector(features)
pipe = IQROutlierRemover(1.5, 1)(pipe)
pipe = ForwardFillImputer()(pipe)
pipe = PandasMinMaxScaler((-1,1), name='RawFeatures')(pipe)
pipe = PandasTransformerWrapper(SimpleImputer(fill_value=-2, strategy='constant'))(pipe)


target_pipe = ByNameFeatureSelector(['RUL'])

In [4]:
transformer = Transformer(
    transformerX=pipe.build(),
    transformerY=target_pipe.build())

# 3. Clustering

In [5]:
rul_pm.utils.show_progressbar = True
clus_model = SKLearnModel(
    model=KMeans(),
    window=10,
    transformer=transformer
)
clus_model.fit(train_dataset)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19731.0), HTML(value='')))




<rul_pm.models.sklearn.SKLearnModel at 0x7f7936b07f70>

In [6]:
clusters = clus_model.predict(train_dataset[[5]])
clusters

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=179.0), HTML(value='')))




array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3], dtype=int32)

# 4. Outlier detection

In [7]:
out_model = SKLearnModel(
    model=IsolationForest(),
    window=5,
    transformer=transformer
)
out_model.fit(train_dataset)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20231.0), HTML(value='')))




<rul_pm.models.sklearn.SKLearnModel at 0x7f7936a78130>

In [8]:
y_pred = out_model.predict(train_dataset[[5]])
y_pred

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=184.0), HTML(value='')))




array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1, -1, -1, -1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])