In [8]:
%load_ext autoreload
%autoreload 2

In [9]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Topic: EX2 - Turbofan RUL Prediction
**Task**: Predict the remaining useful life (RUL) of turbofan engines based on given sensor data (time series data). It is a forcasting problem, where the goal is to predict the number of cycles an engine will last before it fails.
**Data**: Turbofan engine degradation simulation data (NASA) - [Link](https://data.nasa.gov/dataset/Turbofan-Engine-Degradation-Simulation-Data-Set/vrks-gjie). See also in the topic [introduction notebook](https://github.com/nina-prog/damage-propagation-modeling/blob/2fb8c1a1102a48d7abbf04e4031807790a913a99/notebooks/Turbofan%20remaining%20useful%20life%20Prediction.ipynb).

**Subtasks**:
1. Perform a deep **exploratory data analysis (EDA)** on the given data.
2. Implement a more efficient **sliding window method** for time series data analysis. -> 🎯 **Focus on this task**
3. Apply **traditional machine learning methods** (SOTA) to predict the remaining useful life. Includes data preparation, feature extraction, feature selection, model selection, and model parameter optimization.
4. Create **neural network models** to predict the remaining useful life. Includes different architectures like Convolutional Neural Networks (CNN), Recurrent Neural Networks (RNN), or Attention Models. Note: You can search for SOTA research papers and reproduce current state-of-the-art models.


# Imports + Settings

In [42]:
# third-party libraries
import pandas as pd
import numpy as np
import os
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

import time

import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# source code
from src.utils import load_data, load_config, train_val_split_by_group
from src.nn_utils import scale_data, create_sliding_window
from src.nn_util.nn_models.ligthning.cnnModel1 import CNNModel1 as CNNModel
from src.nn_util.nn_models.ligthning.exampleLSTMModel import ExampleLSTMModel as LSTMModel
from src.nn_util.datamodule.lightning.turbofanDatamodule import TurbofanDatamodule

In [12]:
# settings
sns.set_style("whitegrid")
sns.set_palette("Set2")
sns.set(rc={"figure.dpi":100, 'savefig.dpi':200})
sns.set_context('notebook')

In [13]:
np.random.seed(42)

# Paths

In [14]:
# Make sure to execute this cell only once for one kernel session, before running any other cell below.
os.chdir("../") # set working directory to root of project
os.getcwd() # check current working directory

In [15]:
PATH_TO_CONFIG = "configs/config.yaml"

# Load Config + Data

In [16]:
config = load_config(PATH_TO_CONFIG) # config is dict

In [17]:
train_data, test_data, test_RUL_data = load_data(config_path=PATH_TO_CONFIG, dataset_num=1)

# 📍 << Subtask X: TOPIC >>

[TEMPLATE]

Findings:
* Interpretation of plots
* or other key take aways from previous code

In [17]:
# [TEMPLATE] - save processed data (as pickle)
df = pd.DataFrame()
timestamp = time.strftime("%Y%m%d-%H%M%S")
df.to_pickle(f"{config['paths']['processed_data_dir']}ex2_topic_{timestamp}.pkl")

In [12]:
# [TEMPLATE] - save data predictions (as csv)
df = pd.DataFrame()
timestamp = time.strftime("%Y%m%d-%H%M%S")
df.to_csv(f"{config['paths']['prediction_dir']}ex2_topic_{timestamp}.csv", sep=',', decimal='.')

In [13]:
# [TEMPLATE] - save plot results (as png)
fig = plt.figure(figsize=(9, 6))
timestamp = time.strftime("%Y%m%d-%H%M%S")
fig.savefig(f"{config['paths']['plot_dir']}ex2_topic_{timestamp}.png")

## Only CNN

Inspiration: Paper Dynamic predictive maintenance for multiple components using data-driven
probabilistic RUL prognostics: The case of turbofan engines

In [18]:
window_size = 30

In [19]:
# Setting the seed
pl.seed_everything(17)

In [20]:
#scoring: Dict[str, make_scorer] = {
#    'mae': make_scorer(mean_absolute_error),
#    'mse': make_scorer(mean_squared_error),
#    'r2': make_scorer(r2_score)
#}

In [21]:
train_data = scale_data(train_data)
test_data = scale_data(test_data)

In [22]:
# TODO: Remove and use Ninas rolling window
# Add RUL to train datasets
from src.rolling_window_creator import calculate_RUL

time_column = 'Cycle'
group_column = 'UnitNumber'

train_data = calculate_RUL(train_data, time_column, group_column)

In [23]:
test_list = []
for engine_number in test_data["UnitNumber"].unique():
    rows_of_engine = test_data["UnitNumber"] == engine_number
    num_rows_of_engine = rows_of_engine.sum()
    remove_first = num_rows_of_engine - window_size
    engine_dataframe = test_data[rows_of_engine][test_data[rows_of_engine]['Cycle'] > remove_first]
    engine_dataframe = engine_dataframe.drop(columns=["UnitNumber", "Cycle"])
    test_list.append(engine_dataframe.values)
    
X_test = np.array(test_list)
y_test = test_RUL_data.values

In [24]:
train, val = train_val_split_by_group(train_data)

In [25]:
X_train, y_train = create_sliding_window(train, window_size=window_size)
X_val, y_val = create_sliding_window(val, window_size=window_size)

Change types of arrays and swap axes:

In [26]:
train_CNN = True

print(X_train.shape)
if train_CNN:
    X_train = np.swapaxes(X_train, 1, 2)
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.float32)
print(X_train.shape)

print(X_val.shape)
if train_CNN:
    X_val = np.swapaxes(X_val, 1, 2)
X_val = np.array(X_val, dtype=np.float32)
y_val = np.array(y_val, dtype=np.float32)
print(X_val.shape)

print(X_test.shape)
if train_CNN:
    X_test = np.swapaxes(X_test, 1, 2)
X_test = np.array(X_test, dtype=np.float32)
y_test = np.array(y_test, dtype=np.float32)
print(X_test.shape)

In [43]:
# Select hyperparameters of trainer!
checkpoint_callback = ModelCheckpoint(monitor="val_loss")
trainer = Trainer(min_epochs=1, max_epochs=150, callbacks=[checkpoint_callback])
datamodule = TurbofanDatamodule()
datamodule.set_train_dataset(X_train, y_train)
datamodule.set_val_dataset(X_val, y_val)
datamodule.set_predict_dataset(X_test)
datamodule.set_test_dataset(X_test, y_test)
model = CNNModel(lr=0.000001, window_size=window_size, dropout_rate=0.2)

In [44]:
%%capture
trainer.fit(model, datamodule=datamodule)

In [45]:
pred = trainer.test(model, datamodule=datamodule, ckpt_path='best')

## Only LSTM

In [71]:
# Select hyperparameters of trainer!
checkpoint_callback = ModelCheckpoint(monitor="val_loss")
trainer = Trainer(min_epochs=1, max_epochs=150, callbacks=[checkpoint_callback])
datamodule = TurbofanDatamodule()
datamodule.set_train_dataset(X_train, y_train)
datamodule.set_val_dataset(X_val, y_val)
datamodule.set_predict_dataset(X_test)
datamodule.set_test_dataset(X_test, y_test)
model = LSTMModel(lr=0.0001, window_size=window_size, dropout_rate=0.2)

In [64]:
%%capture
trainer.fit(model, datamodule=datamodule)

In [65]:
pred = trainer.test(model, datamodule=datamodule, ckpt_path='best')