# Import libraries

In [46]:
import os
import numpy as np
import pandas as pd

# Loading train/test data with URL

In [47]:
id_col = ['id']
cycle_col = ['cycle']
setting_cols = ['setting1', 'setting2', 'setting3']
sensor_cols = ['sensor{}'.format(i) for i in range(1, 22)]
rul_col = ['RUL']
total_cols = id_col + cycle_col + setting_cols + sensor_cols + rul_col

## Loading the training data

In [48]:
def load_data(file_path):
    data = pd.read_csv(file_path, sep=" ", header=None)
    # Remove the columns 26, 27 because all rows are "None" values
    # print(data)
    data.drop([26, 27], axis=1, inplace=True)
    data.columns = id_col + cycle_col + setting_cols + sensor_cols
    return data

In [49]:
# Add train data with RUL
def add_train_RUL(data):
    df = data.copy()
    pd_RUL = df.groupby('id')['cycle'].max().reset_index()
    pd_RUL.columns = ['id', 'max_cycle']
    df = df.merge(pd_RUL, on=['id'], how='left')
    df['RUL'] = df['max_cycle'] - df['cycle']
    df.drop(columns=['max_cycle'], inplace=True)
    return df[df['cycle'] > 0]

In [50]:
# Load train data
def load_training_data(dir_path, ID_dataset):
    training_path = os.path.join(dir_path, f'train_FD00{ID_dataset}.txt')
    data = load_data(training_path)
    training_data = add_train_RUL(data)
    return training_data

In [51]:
dir_path = "../../datasets/CMAPSS_JetEngine"
ID_dataset = 1 # Working with dataset FD001
df_train = load_training_data(dir_path, ID_dataset)
df_train

Unnamed: 0,id,cycle,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189
3,1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,100.0,518.67,643.49,1597.98,1428.63,14.62,...,2388.26,8137.60,8.4956,0.03,397,2388,100.0,38.49,22.9735,4
20627,100,197,-0.0016,-0.0005,100.0,518.67,643.54,1604.50,1433.58,14.62,...,2388.22,8136.50,8.5139,0.03,395,2388,100.0,38.30,23.1594,3
20628,100,198,0.0004,0.0000,100.0,518.67,643.42,1602.46,1428.18,14.62,...,2388.24,8141.05,8.5646,0.03,398,2388,100.0,38.44,22.9333,2
20629,100,199,-0.0011,0.0003,100.0,518.67,643.23,1605.26,1426.53,14.62,...,2388.23,8139.29,8.5389,0.03,395,2388,100.0,38.29,23.0640,1


In [52]:
# Some types of measurement data are constant values, which cannot provide valuable information for the RUL prediction.
# Remove these columns ['setting3','sensor1','sensor5','sensor10','sensor16', 'sensor18','sensor19']

# Remove the properties that weakly correlate with the RUL ['id', 'setting1', 'setting2', 'sensor6']

# Remove the cycle as in this paper ['cycle']

df_train.drop(columns=['setting3','sensor1','sensor5','sensor10','sensor16', 'sensor18','sensor19', 'id', 'setting1', 'setting2', 'sensor6', 'cycle'], inplace=True)

In [53]:
df_train

Unnamed: 0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,sensor15,sensor17,sensor20,sensor21,RUL
0,641.82,1589.70,1400.60,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.4190,191
1,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.00,23.4236,190
2,642.35,1587.99,1404.20,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442,189
3,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739,188
4,642.37,1582.85,1406.22,554.00,2388.06,9055.15,47.28,522.19,2388.04,8133.80,8.4294,393,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,643.49,1597.98,1428.63,551.43,2388.19,9065.52,48.07,519.49,2388.26,8137.60,8.4956,397,38.49,22.9735,4
20627,643.54,1604.50,1433.58,550.86,2388.23,9065.11,48.04,519.68,2388.22,8136.50,8.5139,395,38.30,23.1594,3
20628,643.42,1602.46,1428.18,550.94,2388.24,9065.90,48.09,520.01,2388.24,8141.05,8.5646,398,38.44,22.9333,2
20629,643.23,1605.26,1426.53,550.68,2388.25,9073.72,48.39,519.67,2388.23,8139.29,8.5389,395,38.29,23.0640,1


## Loading the testing data

In [25]:
def load_testing_data(dir_path, ID_dataset):
    testing_path = os.path.join(dir_path, f'test_FD00{ID_dataset}.txt')
    data = load_data(testing_path)
    return data

In [120]:
dir_path = "../../datasets/CMAPSS_JetEngine"
ID_dataset = 1 # Working with dataset FD001
df_test = load_testing_data(dir_path, ID_dataset)
df_test_cp = df_test.copy()
test_max = df_test.groupby('id')['cycle'].max().reset_index()
test_max.columns = ['id', 'max']
df_test = df_test.merge(test_max, on=['id'], how='left')
df_test.drop(columns=['setting3','sensor1','sensor5','sensor10','sensor16', 'sensor18','sensor19', 'id', 'setting1', 'setting2', 'sensor6', 'cycle'], inplace=True)
df_test

Unnamed: 0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,sensor15,sensor17,sensor20,sensor21,max
0,643.02,1585.29,1398.21,553.90,2388.04,9050.17,47.20,521.72,2388.03,8125.55,8.4052,392,38.86,23.3735,31
1,641.71,1588.45,1395.42,554.85,2388.01,9054.42,47.50,522.16,2388.06,8139.62,8.3803,393,39.02,23.3916,31
2,642.46,1586.94,1401.34,554.11,2388.05,9056.96,47.50,521.97,2388.03,8130.10,8.4441,393,39.08,23.4166,31
3,642.44,1584.12,1406.42,554.07,2388.03,9045.29,47.28,521.38,2388.05,8132.90,8.3917,391,39.00,23.3737,31
4,642.51,1587.19,1401.92,554.16,2388.01,9044.55,47.31,522.15,2388.03,8129.54,8.4031,390,38.99,23.4130,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,643.24,1599.45,1415.79,553.41,2388.02,9142.37,47.69,520.69,2388.00,8213.28,8.4715,394,38.65,23.1974,198
13092,643.22,1595.69,1422.05,553.22,2388.05,9140.68,47.60,521.05,2388.09,8210.85,8.4512,395,38.57,23.2771,198
13093,643.44,1593.15,1406.82,553.04,2388.11,9146.81,47.57,521.18,2388.04,8217.24,8.4569,395,38.62,23.2051,198
13094,643.26,1594.99,1419.36,553.37,2388.07,9148.85,47.61,521.33,2388.08,8220.48,8.4711,395,38.66,23.2699,198


## Loading RUL testing data

In [84]:
RUL = pd.read_csv(f"{dir_path}/RUL_FD001.txt",sep=" ",header=None)
RUL.drop(RUL.columns[[1]], axis=1, inplace=True)
RUL

Unnamed: 0,0
0,112
1,98
2,69
3,82
4,91
...,...
95,137
96,82
97,59
98,117


# Visualize the correlation matrix data

In [70]:
sns.heatmap(df.corr(), annot=True, cmap='RdYlGn',linewidths=0.2)
fig=plt.gcf()
fig.set_size_inches(18,10)
plt.show()

NameError: name 'sns' is not defined

# Evaluation Metrics

In [22]:
# Equation (21)
def calculate_s_score(y_ground_truth, y_predicted_value, positive_exp_factor=10, negative_exp_factor=13):
    s_score = 0
    losses = y_predicted_value - y_ground_truth
    for loss in losses:
        if loss >= 0:
            s_score += math.exp(loss/positive_exp_factor) - 1
        else:
            s_score += math.exp(-loss/negative_exp_factor) - 1
    return s_score

In [23]:
def evaluate_model(y_ground_truth, y_predicted_value):
    """
    Return RMSE and s_score
    """
    rmse = mean_squared_error(y_ground_truth, y_predicted_value, squared=False)
    s_score = calculate_s_score(y_ground_truth, y_predicted_value)
    return rmse, s_score

# LSTM (Long Short-Term Memory)

## Data Preprocessing Functions

In [54]:
df_train

Unnamed: 0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,sensor15,sensor17,sensor20,sensor21,RUL
0,641.82,1589.70,1400.60,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.4190,191
1,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.00,23.4236,190
2,642.35,1587.99,1404.20,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442,189
3,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739,188
4,642.37,1582.85,1406.22,554.00,2388.06,9055.15,47.28,522.19,2388.04,8133.80,8.4294,393,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,643.49,1597.98,1428.63,551.43,2388.19,9065.52,48.07,519.49,2388.26,8137.60,8.4956,397,38.49,22.9735,4
20627,643.54,1604.50,1433.58,550.86,2388.23,9065.11,48.04,519.68,2388.22,8136.50,8.5139,395,38.30,23.1594,3
20628,643.42,1602.46,1428.18,550.94,2388.24,9065.90,48.09,520.01,2388.24,8141.05,8.5646,398,38.44,22.9333,2
20629,643.23,1605.26,1426.53,550.68,2388.25,9073.72,48.39,519.67,2388.23,8139.29,8.5389,395,38.29,23.0640,1


In [97]:
df_test

Unnamed: 0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,sensor15,sensor17,sensor20,sensor21,max
0,643.02,1585.29,1398.21,553.90,2388.04,9050.17,47.20,521.72,2388.03,8125.55,8.4052,392,38.86,23.3735,31
1,641.71,1588.45,1395.42,554.85,2388.01,9054.42,47.50,522.16,2388.06,8139.62,8.3803,393,39.02,23.3916,31
2,642.46,1586.94,1401.34,554.11,2388.05,9056.96,47.50,521.97,2388.03,8130.10,8.4441,393,39.08,23.4166,31
3,642.44,1584.12,1406.42,554.07,2388.03,9045.29,47.28,521.38,2388.05,8132.90,8.3917,391,39.00,23.3737,31
4,642.51,1587.19,1401.92,554.16,2388.01,9044.55,47.31,522.15,2388.03,8129.54,8.4031,390,38.99,23.4130,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,643.24,1599.45,1415.79,553.41,2388.02,9142.37,47.69,520.69,2388.00,8213.28,8.4715,394,38.65,23.1974,198
13092,643.22,1595.69,1422.05,553.22,2388.05,9140.68,47.60,521.05,2388.09,8210.85,8.4512,395,38.57,23.2771,198
13093,643.44,1593.15,1406.82,553.04,2388.11,9146.81,47.57,521.18,2388.04,8217.24,8.4569,395,38.62,23.2051,198
13094,643.26,1594.99,1419.36,553.37,2388.07,9148.85,47.61,521.33,2388.08,8220.48,8.4711,395,38.66,23.2699,198


In [106]:
RUL

Unnamed: 0,0
0,112
1,98
2,69
3,82
4,91
...,...
95,137
96,82
97,59
98,117


In [109]:
def process_data_lstm(training_data, testing_data, RUL_data):
    train_df = training_data.copy()
    truth_df = RUL_data.copy()
    
    # =======================================================
    # Training
    # =======================================================
    # MinMax normalization 
    cols_normalize = train_df.columns.difference(['RUL'])
    min_max_scaler = MinMaxScaler()

    norm_train_df = pd.DataFrame(
        min_max_scaler.fit_transform(train_df[cols_normalize]),
        columns=cols_normalize,
        index=train_df.index,
    )    
    train_df = norm_train_df.join(df_train['RUL']).reindex(columns=df_train.columns)
    print("train_df >> ",train_df.shape)

    # =======================================================
    # Testing
    # =======================================================
    # MinMax normalization

    test_df = testing_data.drop(columns=['max'])
    
    norm_test_df = pd.DataFrame(
        min_max_scaler.fit_transform(test_df[cols_normalize]),
        columns=cols_normalize,
        index=test_df.index
    )

    # We can use ground truth dataset to generate labels for the test data.
    truth_df.columns = ['more']
    truth_df['id'] = truth_df.index + 1
    return truth_df

In [110]:
from sklearn.preprocessing import MinMaxScaler
norm = process_data_lstm(df_train, df_test, RUL)
norm

train_df >>  (20631, 15)


Unnamed: 0,more,id
0,112,1
1,98,2
2,69,3
3,82,4
4,91,5
...,...,...
95,137,96
96,82,97
97,59,98
98,117,99


In [113]:
norm['max'] = test_max['max'] + norm['more']
norm.drop('more', axis=1, inplace=True)

In [111]:
test_max

Unnamed: 0,id,max
0,1,31
1,2,49
2,3,126
3,4,106
4,5,98
...,...,...
95,96,97
96,97,134
97,98,121
98,99,97


In [114]:
norm

Unnamed: 0,id,max
0,1,143
1,2,147
2,3,195
3,4,188
4,5,189
...,...,...
95,96,234
96,97,216
97,98,180
98,99,214


In [118]:
df_test_cp = df_test_cp.merge(norm, on=['id'], how='right')

In [119]:
df_test_cp

Unnamed: 0,id,cycle,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,max
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735,143
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916,143
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,2388.03,8130.10,8.4441,0.03,393,2388,100.0,39.08,23.4166,143
3,1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,...,2388.05,8132.90,8.3917,0.03,391,2388,100.0,39.00,23.3737,143
4,1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,...,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.4130,143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,100.0,518.67,643.24,1599.45,1415.79,14.62,...,2388.00,8213.28,8.4715,0.03,394,2388,100.0,38.65,23.1974,218
13092,100,195,-0.0011,-0.0001,100.0,518.67,643.22,1595.69,1422.05,14.62,...,2388.09,8210.85,8.4512,0.03,395,2388,100.0,38.57,23.2771,218
13093,100,196,-0.0006,-0.0003,100.0,518.67,643.44,1593.15,1406.82,14.62,...,2388.04,8217.24,8.4569,0.03,395,2388,100.0,38.62,23.2051,218
13094,100,197,-0.0038,0.0001,100.0,518.67,643.26,1594.99,1419.36,14.62,...,2388.08,8220.48,8.4711,0.03,395,2388,100.0,38.66,23.2699,218
