In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
%matplotlib inline

#### Data Understanding

In [2]:
data_training_FD2 = pd.read_csv('train_FD002.txt',sep=" ", header=None)
data_training_FD2.head()
engine_cycle = ['engine_id','cycle']
settings = ['setting1','setting2','setting3']
sensors = ['s%s'%i for i in range(1,22)]
nan_cols = ['NaN1','NaN2']
data_training_FD2.columns = engine_cycle + settings + sensors + nan_cols
data_training_FD2 = data_training_FD2.drop(['NaN1','NaN2'],axis=1)

# check number of units
print 'Number of engines:', data_training_FD2['engine_id'].unique().shape[0]

Number of engines: 260


#### Time of Event for particular engine = number of last cycle

In [3]:
# suppress warning message
warnings.filterwarnings("ignore")

data_training_FD2['time_of_event']=np.nan
for eng in data_training_FD2.engine_id.value_counts().index:
    data_training_FD2['time_of_event'][data_training_FD2.engine_id==eng] = max(data_training_FD2.cycle[data_training_FD2.engine_id==eng])

#### Time to Event (RUL) for particular engine = number of last cycle - number of present cycle

In [4]:
data_training_FD2['time_to_event'] = data_training_FD2['time_of_event'] - data_training_FD2['cycle']
data_training_FD2.head()

Unnamed: 0,engine_id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s14,s15,s16,s17,s18,s19,s20,s21,time_of_event,time_to_event
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071,149.0,148.0
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665,149.0,147.0
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723,149.0,146.0
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701,149.0,145.0
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286,149.0,144.0


#### Input Centering and Target Normalization

In [5]:
data_training = data_training_FD2.copy()
settings_min = data_training_FD2[settings].min()
settings_max = data_training_FD2[settings].max()
settings_mean = data_training_FD2[settings].mean()
for setting in settings:
    data_training[setting] = (data_training_FD2[setting] - settings_mean[setting])/(settings_max[setting] - settings_min[setting])
    
sensors_min = data_training_FD2[sensors].min()
sensors_max = data_training_FD2[sensors].max()
sensors_mean = data_training_FD2[sensors].mean()
for sensor in sensors:
    data_training[sensor] = (data_training_FD2[sensor] - sensors_mean[sensor])/(sensors_max[sensor] - sensors_min[sensor])
    
time_of_event_max = data_training_FD2['time_of_event'].max()
data_training['time_to_event'] = data_training_FD2['time_to_event']/time_of_event_max

In [6]:
time_of_event_max

378.0

In [7]:
data_training.head()

Unnamed: 0,engine_id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s14,s15,s16,s17,s18,s19,s20,s21,time_of_event,time_to_event
0,1,1,0.261852,0.318223,0.148849,-0.318586,-0.223437,-0.166222,-0.164184,-0.238281,...,-0.042933,0.006022,-0.332595,-0.149057,-0.012276,0.148849,-0.207795,-0.208556,149.0,0.391534
1,1,2,0.428485,0.319173,0.148849,-0.378854,-0.273166,-0.180824,-0.191744,-0.384873,...,0.013572,0.017482,-0.332595,-0.190724,-0.035531,0.148849,-0.355943,-0.353075,149.0,0.388889
2,1,3,0.023814,0.059078,-0.851151,-0.140766,-0.388682,-0.442127,-0.380282,-0.091689,...,-0.480144,0.572806,-0.332595,-0.409474,-0.663438,-0.851151,-0.230086,-0.216224,149.0,0.386243
3,1,4,0.428711,0.320123,0.148849,-0.378854,-0.276745,-0.178629,-0.1903,-0.384873,...,0.004909,0.008475,-0.332595,-0.201141,-0.035531,0.148849,-0.34977,-0.341494,149.0,0.383598
4,1,5,0.023855,0.057296,-0.851151,-0.140766,-0.390884,-0.439553,-0.379127,-0.091689,...,-0.488808,0.573611,-0.332595,-0.409474,-0.663438,-0.851151,-0.228371,-0.224398,149.0,0.380952


#### Set a look-back time range to train the model

In [8]:
dt = 10

#### Prepare the training data

In [8]:
Xtraining, Ytraining = [], []
for engine in data_training['engine_id'].unique():
    #print engine, X[X['engine_id'] == engine]['cycle'].max()
    cycle_max = data_training[data_training['engine_id'] == engine]['cycle'].max()
    for i in range(cycle_max - dt + 1):
        select_Xdata = data_training[data_training['engine_id'] == engine][settings+sensors][i:i+dt].as_matrix()
        Xtraining.append(select_Xdata)
        select_Ydata = data_training[data_training['engine_id'] == engine]['time_to_event'].iloc[i+dt-1]
        Ytraining.append(select_Ydata)
Xtraining = np.array(Xtraining)
Ytraining = np.array(Ytraining)

#### Prepare training data for testing

In [9]:
Xtraining_to_testing, Ytraining_to_testing, engines_training_to_testing = [], [], []
for engine in data_training['engine_id'].unique():
    Xtest_engine, Ytest_engine = [], []
    cycle_max = data_training[data_training['engine_id'] == engine]['cycle'].max()
    if cycle_max > (dt+5): # at least 5 samples from the engine are expected
        for i in range(cycle_max - dt + 1):
            select_Xdata = data_training[data_training['engine_id'] == engine][settings+sensors][i:i+dt].as_matrix()
            Xtest_engine.append(select_Xdata)
            select_Ydata = data_training[data_training['engine_id'] == engine]['time_to_event'].iloc[i+dt-1]
            Ytest_engine.append(select_Ydata)
        Xtraining_to_testing.append(np.array(Xtest_engine))
        Ytraining_to_testing.append(np.array(Ytest_engine))
        engines_training_to_testing.append(engine)
Xtraining_to_testing = np.array(Xtraining_to_testing)
Ytraining_to_testing = np.array(Ytraining_to_testing)
engines_training_to_testing = np.array(engines_training_to_testing)

#### Save training data and its testing data

In [10]:
print Xtraining.shape, Ytraining.shape
np.save('Xtraining_data_dt_10', Xtraining) # Xtraining = np.load('Xtraining_data_dt_10.npy')
np.save('Ytraining_data_dt_10', Ytraining) # Ytraining = np.load('Ytraining_data_dt_10.npy')

np.save('Xtraining_to_testing_dt_10',Xtraining_to_testing)
np.save('Ytraining_to_testing_dt_10',Ytraining_to_testing)
np.save('engines_training_to_testing_dt_10',engines_training_to_testing)

### Modeling
#### Import modules

In [10]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

Using TensorFlow backend.


#### Design RNN model architecture

In [11]:
"""model = Sequential()
model.add(LSTM(32, return_sequences = True, input_shape = (dt,len(settings+sensors))))
model.add(LSTM(64))
model.add(Dense(8))
model.add(Dense(8))
model.add(Dense(1))
model.compile(loss='mae',optimizer='adam')
print model.summary()"""

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 32)            7296      
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                24832     
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 520       
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 72        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 9         
Total params: 32,729
Trainable params: 32,729
Non-trainable params: 0
_________________________________________________________________
None


#### Model training

In [None]:
from keras.models import load_model
model = load_model('model_dt_10.h5')
history = model.fit(Xtraining,Ytraining,epochs=500,batch_size=32)
# save the model
model.save('model_dt_10.h5')
mae_loss = pd.DataFrame(history.history['loss'],columns = ['loss'])
# save the loss
mae_loss.to_csv('mae_loss_dt_10_3.csv',index=False)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 38/500
Epoch 39/500
Epoch 53/500
Epoch 54/500
Epoch 72/500
Epoch 73/500
Epoch 93/500
Epoch 94/500
Epoch 116/500
Epoch 117/500

In [None]:
# 0.0764