In [None]:
import pandas as pd
import numpy as np
import zipfile
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix,accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation
from keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from keras.layers import LSTM, Dense, Dropout
from keras.losses import Huber
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint


import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline  

In [None]:
# create column names for the dataset
meta_cols = ['engine_id', 'cycle']
setting_cols = ['setting_{}'.format(i+1) for i in range(3)]
sensor_cols = ['sensor_{}'.format(i+1) for i in range(21)]
feature_cols = setting_cols + sensor_cols
target_col = 'RUL'
cols = meta_cols + setting_cols + sensor_cols

In [73]:
# Training dataset
train_data = pd.read_csv('sensordata/PM_train.txt', sep = ' ',header=None).drop([26,27], axis=1)
train_data.columns = cols
print('shape of train_data:', train_data.shape)

shape of train_data: (20631, 26)


In [74]:
# Test dataset
test_data = pd.read_csv('sensordata/PM_test.txt', sep = ' ',header=None).drop([26,27], axis=1)
test_data.columns = cols
print('shape of test_data:', test_data.shape)

shape of test_data: (13096, 26)


In [75]:
const_cols = [c for c in train_data.columns if len(train_data[c].drop_duplicates()) <=2]

In [76]:
train_data.drop(columns = const_cols, axis=1, inplace=True)
test_data.drop(columns = const_cols, axis=1, inplace=True)

In [77]:
# Truth values with remaining useful life
truth_data = pd.read_csv('sensordata/PM_truth.txt', sep=' ', header=None).drop([1], axis=1)
truth_data.columns = ['cycle_RUL']
truth_data['engine_id'] = truth_data.index+1
print('shape of truth_data:',truth_data.shape)

shape of truth_data: (100, 2)


In [78]:
# Find the max value for the test dataset
rul = pd.DataFrame(test_data.groupby('engine_id')['cycle'].max()).reset_index()
rul.columns = ['engine_id', 'cycle_max']
# calcuate the run to failure cycles
truth_data['rtf'] = truth_data['cycle_RUL'] + rul['cycle_max']
truth_data.tail()

Unnamed: 0,cycle_RUL,engine_id,rtf
95,137,96,234
96,82,97,216
97,59,98,180
98,117,99,214
99,20,100,218


In [79]:
truth_data.drop('cycle_RUL', axis=1, inplace=True)

In [80]:
# calculate the total time to failure(ttf) in test data for each row
test_data = test_data.merge(truth_data, on=['engine_id'], how='left')
test_data['RUL'] = test_data['rtf'] - test_data['cycle']
test_data.drop(['rtf'], axis=1, inplace=True)
test_data.tail()

Unnamed: 0,engine_id,cycle,setting_1,setting_2,sensor_2,sensor_3,sensor_4,sensor_7,sensor_8,sensor_9,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_17,sensor_20,sensor_21,RUL
13091,100,194,0.0049,0.0,643.24,1599.45,1415.79,553.41,2388.02,9142.37,47.69,520.69,2388.0,8213.28,8.4715,394,38.65,23.1974,24
13092,100,195,-0.0011,-0.0001,643.22,1595.69,1422.05,553.22,2388.05,9140.68,47.6,521.05,2388.09,8210.85,8.4512,395,38.57,23.2771,23
13093,100,196,-0.0006,-0.0003,643.44,1593.15,1406.82,553.04,2388.11,9146.81,47.57,521.18,2388.04,8217.24,8.4569,395,38.62,23.2051,22
13094,100,197,-0.0038,0.0001,643.26,1594.99,1419.36,553.37,2388.07,9148.85,47.61,521.33,2388.08,8220.48,8.4711,395,38.66,23.2699,21
13095,100,198,0.0013,0.0003,642.95,1601.62,1424.99,552.48,2388.06,9155.03,47.8,521.07,2388.05,8214.64,8.4903,396,38.7,23.1855,20


In [81]:
# Calculate the RUL for training dataset
train_data['RUL'] = train_data.groupby(['engine_id'])['cycle'].transform(max)-train_data['cycle']
train_data.tail()

Unnamed: 0,engine_id,cycle,setting_1,setting_2,sensor_2,sensor_3,sensor_4,sensor_7,sensor_8,sensor_9,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_17,sensor_20,sensor_21,RUL
20626,100,196,-0.0004,-0.0003,643.49,1597.98,1428.63,551.43,2388.19,9065.52,48.07,519.49,2388.26,8137.6,8.4956,397,38.49,22.9735,4
20627,100,197,-0.0016,-0.0005,643.54,1604.5,1433.58,550.86,2388.23,9065.11,48.04,519.68,2388.22,8136.5,8.5139,395,38.3,23.1594,3
20628,100,198,0.0004,0.0,643.42,1602.46,1428.18,550.94,2388.24,9065.9,48.09,520.01,2388.24,8141.05,8.5646,398,38.44,22.9333,2
20629,100,199,-0.0011,0.0003,643.23,1605.26,1426.53,550.68,2388.25,9073.72,48.39,519.67,2388.23,8139.29,8.5389,395,38.29,23.064,1
20630,100,200,-0.0032,-0.0005,643.85,1600.38,1432.14,550.79,2388.26,9061.48,48.2,519.3,2388.26,8137.33,8.5036,396,38.37,23.0522,0


In [115]:
include_cols = [x for x in feature_cols if x not in const_cols]

In [58]:
train_data.head()

Unnamed: 0,engine_id,cycle,setting_1,setting_2,sensor_2,sensor_3,sensor_4,sensor_7,sensor_8,sensor_9,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_17,sensor_20,sensor_21,RUL
0,1,1,-0.0007,-0.0004,641.82,1589.7,1400.6,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.419,191
1,1,2,0.0019,-0.0003,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.0,23.4236,190
2,1,3,-0.0043,0.0003,642.35,1587.99,1404.2,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442,189
3,1,4,0.0007,0.0,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,642.37,1582.85,1406.22,554.0,2388.06,9055.15,47.28,522.19,2388.04,8133.8,8.4294,393,38.9,23.4044,187


In [83]:
df_train = train_data.copy()
df_test = test_data.copy()
# Perform feature scaling on the feature columns
sc = MinMaxScaler()
df_train[include_cols] = sc.fit_transform(df_train[include_cols])
df_test[include_cols] = sc.transform(df_test[include_cols])

In [31]:
df_train.head()

Unnamed: 0,engine_id,cycle,setting_1,setting_2,sensor_2,sensor_3,sensor_4,sensor_7,sensor_8,sensor_9,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_17,sensor_20,sensor_21,RUL
0,1,1,-0.0007,-0.0004,641.82,1589.7,1400.6,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.419,191
1,1,2,0.0019,-0.0003,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.0,23.4236,190
2,1,3,-0.0043,0.0003,642.35,1587.99,1404.2,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442,189
3,1,4,0.0007,0.0,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,642.37,1582.85,1406.22,554.0,2388.06,9055.15,47.28,522.19,2388.04,8133.8,8.4294,393,38.9,23.4044,187


In [84]:
df_test.to_csv('C://Users/rajas/PycharmProjects/Flask_PM/data/df_rul_test.csv')

In [114]:
include_cols

['setting_1',
 'setting_2',
 'sensor_2',
 'sensor_3',
 'sensor_4',
 'sensor_7',
 'sensor_8',
 'sensor_9',
 'sensor_11',
 'sensor_12',
 'sensor_13',
 'sensor_14',
 'sensor_15',
 'sensor_17',
 'sensor_20',
 'sensor_21']

In [101]:
# timestamp or window size
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length + 1):
        sequence = data[i:i+sequence_length]
        sequences.append(sequence)
    return np.array(sequences)


In [85]:
def gen_sequence(id_df, seq_length, seq_cols):
    df_zeros=pd.DataFrame(np.zeros((seq_length-1,id_df.shape[1])),columns=id_df.columns)
    # id_df=df_zeros.append(id_df,ignore_index=True)
    id_df = pd.concat([df_zeros,id_df], ignore_index=True)
    data_array = id_df[seq_cols].values
    num_elements = data_array.shape[0]
    lstm_array=[]
    for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
        lstm_array.append(data_array[start:stop, :])
    return np.array(lstm_array)

# function to generate labels
def gen_label(id_df, seq_length, seq_cols,label):
    df_zeros=pd.DataFrame(np.zeros((seq_length-1,id_df.shape[1])),columns=id_df.columns)
    # id_df=df_zeros.append(id_df,ignore_index=True)
    id_df = pd.concat([df_zeros,id_df], ignore_index=True)
    data_array = id_df[seq_cols].values
    num_elements = data_array.shape[0]
    y_label=[]
    for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
        y_label.append(id_df[label][stop])
    return np.array(y_label)

In [86]:
seq_length = 30 
seq_cols=include_cols

In [87]:
# generate X_train
X_train=np.concatenate(list(list(gen_sequence(df_train[df_train['engine_id']==id], seq_length, seq_cols)) for id in df_train['engine_id'].unique()))
print(X_train.shape)
# generate y_train
y_train=np.concatenate(list(list(gen_label(df_train[df_train['engine_id']==id], 30, seq_cols,'RUL')) for id in df_train['engine_id'].unique()))
print(y_train.shape)
# generate X_test
X_test=np.concatenate(list(list(gen_sequence(df_test[df_test['engine_id']==id], seq_length, seq_cols)) for id in df_test['engine_id'].unique()))
print(X_test.shape)
# generate y_test
y_test=np.concatenate(list(list(gen_label(df_test[df_test['engine_id']==id], 30, seq_cols,'RUL')) for id in df_test['engine_id'].unique()))
print(y_test.shape)


(20531, 30, 16)
(20531,)
(12996, 30, 16)
(12996,)


In [165]:
'''
# Convert data to numpy arrays
X_train = df_train.drop('RUL', axis=1).values
y_train = df_train['RUL'].values
X_test = df_test.drop('RUL', axis=1).values
y_test = df_test['RUL'].values
'''

In [167]:
#X_train = create_sequences(X_train, sequence_length)
#X_test = create_sequences(X_test, sequence_length)

In [96]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.3))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation='linear'))

# Compile the model with Huber loss 
model.compile(loss=Huber(), optimizer=Adam(learning_rate=0.001))
#model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))

In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=0)

In [97]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, save_weights_only=True, monitor='val_loss', mode='min')

In [98]:
#history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1,callbacks=[early_stopping, model_checkpoint])
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1,callbacks=[early_stopping, model_checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50


In [99]:
# save the model to a file
model.save('C://Users/rajas/PycharmProjects/Flask_PM/models/model_calculate_rul')



INFO:tensorflow:Assets written to: C://Users/rajas/PycharmProjects/Flask_PM/models/model_calculate_rul/assets


INFO:tensorflow:Assets written to: C://Users/rajas/PycharmProjects/Flask_PM/models/model_calculate_rul/assets


In [104]:
scores = model.evaluate(X_train, y_train, verbose=1, batch_size=200)
print('Loss: {}',scores)

Loss: {} 20.21293067932129


In [159]:
y2_pred=model.predict(X_test)
result2 = np.vectorize(y2_pred)



In [111]:
def cal_rul(machine_id):
    machine_df=df_test[df_test.engine_id==machine_id]
    machine_test=gen_sequence(machine_df,seq_length,seq_cols)
    m_pred=model.predict(machine_test, verbose=0)
    failure_prob=list(m_pred[-1]*1)[0]
    return int(failure_prob)

In [113]:
machine_id=98
print('RUL of the machine is: ',cal_rul(machine_id))

RUL of the machine is:  62


In [151]:
len(y2_pred)

13067

In [160]:
X_test

array([[[1.00000000e+00, 1.00000000e+00, 6.32183908e-01, ...,
         3.33333333e-01, 5.58139535e-01, 6.61833748e-01],
        [1.00000000e+00, 2.00000000e+00, 3.44827586e-01, ...,
         4.16666667e-01, 6.82170543e-01, 6.86826843e-01],
        [1.00000000e+00, 3.00000000e+00, 5.17241379e-01, ...,
         4.16666667e-01, 7.28682171e-01, 7.21347694e-01],
        ...,
        [1.00000000e+00, 2.80000000e+01, 6.26436782e-01, ...,
         3.33333333e-01, 5.34883721e-01, 6.29660315e-01],
        [1.00000000e+00, 2.90000000e+01, 5.80459770e-01, ...,
         3.33333333e-01, 6.82170543e-01, 6.46092240e-01],
        [1.00000000e+00, 3.00000000e+01, 3.56321839e-01, ...,
         2.50000000e-01, 7.36434109e-01, 7.07953604e-01]],

       [[1.00000000e+00, 2.00000000e+00, 3.44827586e-01, ...,
         4.16666667e-01, 6.82170543e-01, 6.86826843e-01],
        [1.00000000e+00, 3.00000000e+00, 5.17241379e-01, ...,
         4.16666667e-01, 7.28682171e-01, 7.21347694e-01],
        [1.00000000e+00, 

In [186]:
def prob_failure(machine_id):
    machine_df=df_test[df_test.engine_id==machine_id]
    machine_test=create_sequences(machine_df,seq_length)
    m_pred=model.predict(machine_test, verbose=0)
    return m_pred

In [188]:
machine_id=1
print('calculated RUL ',prob_failure(machine_id))

InvalidArgumentError: Graph execution error:

Specified a list with shape [?,18] from a tensor with shape [2,19]
	 [[{{node TensorArrayUnstack/TensorListFromTensor}}]]
	 [[sequential_11/lstm_24/PartitionedCall]] [Op:__inference_predict_function_362698]