<a href="https://colab.research.google.com/github/nguyenthong11/LSTM-CMAPSS-keras/blob/main/RUL_CMAPSS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/RUL_CMAPSS')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
import pandas as pd
import numpy as np
import tensorflow as tf

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

In [43]:
# column names of CMAPSS Dataset
columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3']+[f's{i}' for i in range(1, 22)]

feature_columns = ['setting1', 'setting2', 'setting3']+[f's{i}' for i in range(1, 22)] + ['cycle_norm']

class CMAPSSDataset():
    def __init__(self, fd_number, batch_size, sequence_length):
      super(CMAPSSDataset).__init__()
      self.batch_size = batch_size
      self.sequence_length = sequence_length
      self.train_data = None
      self.test_data = None

      # \s+ one or more space
      data = pd.read_csv("CMAPSSData/train_FD00" + fd_number + ".txt", delimiter="\s+", header=None)
      data.columns = columns

      self.engine_size = data['id'].max()

      # Compute RUL with cycle
      rul = pd.DataFrame(data.groupby('id')['cycle'].max()).reset_index()
      rul.columns = ['id', 'max']
      data = data.merge(rul, on=['id'], how='left')
      data['RUL'] = data['max'] - data['cycle']
      data.drop(['max'], axis=1, inplace=True)

      # normalize to N(0,1)
      self.std = StandardScaler()
      data['cycle_norm'] = data['cycle']
      cols_normalize = data.columns.difference(['id', 'cycle', 'RUL'])
      norm_data = pd.DataFrame(self.std.fit_transform(data[cols_normalize]), columns=cols_normalize, index=data.index)
      join_data = data[data.columns.difference(cols_normalize)].join(norm_data)
      self.train_data = join_data.reindex(columns=data.columns)

      # on test data
      test_data = pd.read_csv("CMAPSSData/test_FD00" + fd_number + ".txt", delimiter="\s+", header=None)
      test_data.columns = columns
      truth_data = pd.read_csv("CMAPSSData/RUL_FD00" + fd_number + ".txt", delimiter="\s+", header=None)
      truth_data.columns = ['truth']
      truth_data['id'] = truth_data.index + 1

      test_rul = pd.DataFrame(test_data.groupby('id')['cycle'].max()).reset_index()
      test_rul.columns = ['id', 'elapsed']
      test_rul = test_rul.merge(truth_data, on=['id'], how='left')
      test_rul['max'] = test_rul['elapsed'] + test_rul['truth']

      test_data = test_data.merge(test_rul, on=['id'], how='left')
      test_data['RUL'] = test_data['max'] - test_data['cycle']
      test_data.drop(['max'], axis=1, inplace=True)

      test_data['cycle_norm'] = test_data['cycle']
      norm_test_data = pd.DataFrame(self.std.fit_transform(test_data[cols_normalize]), columns=cols_normalize, index=test_data.index)
      join_test_data = test_data[test_data.columns.difference(cols_normalize)].join(norm_test_data)
      self.test_data = join_test_data.reindex(columns=test_data.columns)

    def get_train_data(self):
      return self.train_data

    def get_test_data(self):
      return self.test_data

    def get_feature_slice(self, input_data):
        # Reshape the data to (samples, features, time steps)
      def reshapeFeatures(input, columns, sequence_length):
        data = input[columns].values
        num_elements = data.shape[0]
        for start, stop in zip(range(0, num_elements-sequence_length), range(sequence_length, num_elements)):
          yield(data[start:stop, :])

      feature_list = [list(reshapeFeatures(input_data[input_data['id'] == i], feature_columns, self.sequence_length))
                      for i in range(1, self.engine_size + 1) if len(input_data[input_data['id']  == i]) > self.sequence_length]
      feature_array = np.concatenate(list(feature_list), axis=0).astype(np.float32)
      length = len(feature_array) // self.batch_size
      return feature_array[:length*self.batch_size]

    def get_label_slice(self, input_data):
      def reshapeLabels(input, sequence_length, columns=['RUL']):
        data = input[columns].values
        num_elements = data.shape[0]
        return(data[sequence_length:num_elements, :])

      label_list = [reshapeLabels(input_data[input_data['id'] == i], self.sequence_length)
            for i in range(1, self.engine_size+1)]
      label_array = np.concatenate(label_list).astype(np.float32)
      length = len(label_array) // self.batch_size
      return label_array[:length*self.batch_size]


    def get_last_data_slice(self, input_data):
      num_engine = input_data['id'].unique().max()
      test_feature_list = [input_data[input_data['id'] == i][feature_columns].values[-self.sequence_length:]
                            for i in range(1, num_engine+1) if len(input_data[input_data['id']  == i]) >= self.sequence_length]
      test_feature_array = np.asarray(test_feature_list).astype(np.float32)
      length_test = len(test_feature_array) // self.batch_size

      test_rul_list = [input_data[input_data['id'] == i]['RUL'].values[-1:]
                          for i in range(1, num_engine+1) if len(input_data[input_data['id'] == i]) >= self.sequence_length]
      test_rul_array = np.asarray(test_rul_list).astype(np.float32)
      length_rul = len(test_rul_array) // self.batch_size

      return test_feature_array[:length_test*self.batch_size], test_rul_array[:length_rul*self.batch_size]


In [44]:
sequence_length=32
batch_size=32
epochs=1000
N = '4'

datasets = CMAPSSDataset(fd_number=N, batch_size=batch_size, sequence_length=sequence_length)

train_data = datasets.get_train_data()
train_feature_slice = datasets.get_feature_slice(train_data)
train_rul_slice = datasets.get_label_slice(train_data)
print("train_data.shape: {}".format(train_data.shape))
print("train_feature_slice.shape: {}".format(train_feature_slice.shape))
print("train_rul_slice.shape: {}".format(train_rul_slice.shape))

test_data = datasets.get_test_data()
test_feature_slice, test_rul_slice = datasets.get_last_data_slice(test_data)
print("test_data.shape: {}".format(test_data.shape))
print("test_feature_slice.shape: {}".format(test_feature_slice.shape))
print("test_rul_slice.shape: {}".format(test_rul_slice.shape))

timesteps = train_feature_slice.shape[1]
nb_features = train_feature_slice.shape[2]

train_data.shape: (61249, 28)
train_feature_slice.shape: (53280, 32, 25)
train_rul_slice.shape: (53280, 1)
test_data.shape: (41214, 30)
test_feature_slice.shape: (224, 32, 25)
test_rul_slice.shape: (224, 1)


In [46]:
model = Sequential()
model.add(LSTM(input_shape=(timesteps, nb_features), units=32, return_sequences=True, name="lstm_0"))
model.add(Dropout(0.2, name="dropout_0"))
model.add(LSTM(units=64, return_sequences=False, name="lstm_1"))
model.add(Dropout(0.2, name="dropout_1"))
# model.add(LSTM(units=25, return_sequences=False, name="lstm_2"))
# model.add(Dropout(0.2, name="dropout_2"))
model.add(Dense(units=16, name="dense_0"))
model.add(Activation("relu", name="activation_0"))
model.add(Dropout(0.2, name="dropout_2"))
model.add(Dense(units=16, name="dense_1"))
model.add(Activation("relu", name="activation_1"))
model.add(Dropout(0.2, name="dropout_3"))
model.add(Dense(units=1, name="dense_2"))
model.add(Activation("linear", name="activation_2"))
model.compile(loss='mse', optimizer='rmsprop', metrics=['mae'])
print(model.summary())

  super().__init__(**kwargs)


None


In [45]:
# log_filepath = "tensorboard-logs"
# tb_cb = tf.keras.callbacks.TensorBoard(log_dir=log_filepath, write_images=1, histogram_freq=1)
es_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0, patience=10, verbose=0, mode='auto')

# es_cb = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min')
# tb_cb = keras.callbacks.ModelCheckpoint(weights_save_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)

In [47]:
history = model.fit(train_feature_slice, train_rul_slice,
            batch_size = batch_size,
            epochs = epochs,
            validation_split=0.05,
            verbose = 2,
            callbacks = [es_cb])

Epoch 1/1000
1582/1582 - 61s - 38ms/step - loss: 5179.9600 - mae: 50.7949 - val_loss: 2686.5034 - val_mae: 42.6051
Epoch 2/1000
1582/1582 - 75s - 47ms/step - loss: 3423.4275 - mae: 41.2633 - val_loss: 2335.4060 - val_mae: 37.1617
Epoch 3/1000
1582/1582 - 83s - 52ms/step - loss: 3215.2615 - mae: 39.8594 - val_loss: 2385.6064 - val_mae: 38.2056
Epoch 4/1000
1582/1582 - 81s - 51ms/step - loss: 3137.6626 - mae: 39.2153 - val_loss: 2179.6494 - val_mae: 36.2301
Epoch 5/1000
1582/1582 - 49s - 31ms/step - loss: 2999.1853 - mae: 38.2111 - val_loss: 2381.4155 - val_mae: 38.7584
Epoch 6/1000
1582/1582 - 82s - 52ms/step - loss: 2914.9941 - mae: 37.5474 - val_loss: 2518.4541 - val_mae: 38.0267
Epoch 7/1000
1582/1582 - 81s - 51ms/step - loss: 2797.1819 - mae: 36.9300 - val_loss: 2547.0256 - val_mae: 36.9344
Epoch 8/1000
1582/1582 - 51s - 32ms/step - loss: 2678.4631 - mae: 36.1821 - val_loss: 2213.5869 - val_mae: 35.2606
Epoch 9/1000
1582/1582 - 48s - 30ms/step - loss: 2564.2556 - mae: 35.3795 - val_

In [34]:
weights_save_path = 'vanilla-lstm-cmapss-weights_v0.keras'
model.save(weights_save_path)

In [7]:
model = keras.saving.load_model(weights_save_path, custom_objects=None, compile=True, safe_mode=True)

In [48]:
y_pred = model.predict(test_feature_slice)
y_truth = test_rul_slice

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


In [49]:
from sklearn.metrics import mean_squared_error as mse
mse(y_pred, y_truth, squared=False)

38.792118