In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
column_names = ['engine_id', 'cycle', 'setting1', 'setting2', 'setting3'] + [f'sensor{i}' for i in range(1, 27)]
train_df = pd.read_csv('./dataset/train_FD001.txt', sep='\s+', header=None, names=column_names)
test_df = pd.read_csv('./dataset/test_FD001.txt', sep='\s+', header=None, names=column_names)
true_rul = pd.read_csv('./dataset/RUL_FD001.txt', header=None)
train_df = train_df.dropna(axis=1, how="all")
test_df = test_df.dropna(axis=1, how="all")

In [2]:
# RNG
rng = np.random.RandomState(42)

In [3]:
columns_to_drop = ["setting3", "sensor1", "sensor5", "sensor6", "sensor10", "sensor16", "sensor18", "sensor19"]
train_df_dropped = train_df.drop(columns=columns_to_drop)
test_df_dropped = test_df.drop(columns=columns_to_drop)

In [4]:
# Normalization
from sklearn.preprocessing import MinMaxScaler

# Initialize the scaler
scaler = MinMaxScaler()

# Separate the columns to normalize and the columns to skip
columns_to_skip = train_df_dropped.columns[:2]
columns_to_normalize = train_df_dropped.columns[2:]

# Normalize only the selected columns
normalized_data = scaler.fit_transform(train_df_dropped[columns_to_normalize])

# Combine the normalized and unnormalized columns
train_df_normalized = pd.DataFrame(train_df_dropped[columns_to_skip].values, columns=columns_to_skip)
train_df_normalized = pd.concat([train_df_normalized, pd.DataFrame(normalized_data, columns=columns_to_normalize)], axis=1)

# Display the normalized DataFrame
print("Normalized Data (0-1 range):")
print(train_df_normalized.head())

Normalized Data (0-1 range):
   engine_id  cycle  setting1  setting2   sensor2   sensor3   sensor4  \
0          1      1  0.459770  0.166667  0.183735  0.406802  0.309757   
1          1      2  0.609195  0.250000  0.283133  0.453019  0.352633   
2          1      3  0.252874  0.750000  0.343373  0.369523  0.370527   
3          1      4  0.540230  0.500000  0.343373  0.256159  0.331195   
4          1      5  0.390805  0.333333  0.349398  0.257467  0.404625   

    sensor7   sensor8   sensor9  sensor11  sensor12  sensor13  sensor14  \
0  0.726248  0.242424  0.109755  0.369048  0.633262  0.205882  0.199608   
1  0.628019  0.212121  0.100242  0.380952  0.765458  0.279412  0.162813   
2  0.710145  0.272727  0.140043  0.250000  0.795309  0.220588  0.171793   
3  0.740741  0.318182  0.124518  0.166667  0.889126  0.294118  0.174889   
4  0.668277  0.242424  0.149960  0.255952  0.746269  0.235294  0.174734   

   sensor15  sensor17  sensor20  sensor21  
0  0.363986  0.333333  0.713178  0.72

In [5]:
# Apply column dropping to test data
test_df_dropped = test_df.drop(columns=columns_to_drop)

# Normalize test data using the same scaler
normalized_test_data = scaler.transform(test_df_dropped[columns_to_normalize])

# Combine normalized and unnormalized columns in test data
test_df_normalized = pd.DataFrame(test_df_dropped[columns_to_skip].values, columns=columns_to_skip)
test_df_normalized = pd.concat([test_df_normalized, pd.DataFrame(normalized_test_data, columns=columns_to_normalize)], axis=1)

In [6]:
# Labelling RUL
train_df_normalized['RUL'] = train_df_normalized.groupby('engine_id')['cycle'].transform(lambda x: x.max() - x)

In [7]:
# PWRUL
# Set the early RUL threshold
early_rul_threshold = 120

# Define the piecewise linear degradation function
def piecewise_rul(cycle, max_cycle):
    remaining_life = max_cycle - cycle
    if remaining_life > early_rul_threshold:
        return early_rul_threshold  # slower degradation in the early phase
    else:
        return remaining_life  # direct linear degradation after threshold
    
train_df_normalized["PWRUL"] = train_df_normalized.apply(lambda row: piecewise_rul(row['cycle'], row['cycle'] + row['RUL']), axis=1)

In [8]:
# Define sequence length
sequence_length = 30

# Identify feature columns
feature_columns = [col for col in train_df_normalized.columns if col not in ['engine_id', 'cycle', 'RUL', 'PWRUL']]

# Initialize lists for sequences and labels
X = []
y = []

# Generate sequences and labels
for engine_id in train_df_normalized['engine_id'].unique():
    engine_data = train_df_normalized[train_df_normalized['engine_id'] == engine_id].reset_index(drop=True)
    for i in range(sequence_length, len(engine_data)):
        # Extract sequence of sensor readings
        seq_x = engine_data[feature_columns].iloc[i-sequence_length:i].values
        # Extract the RUL value at the end of the sequence
        seq_y = engine_data['RUL'].iloc[i]
        X.append(seq_x)
        y.append(seq_y)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

print("Input shape:", X.shape)
print("Labels shape:", y.shape)



Input shape: (17631, 30, 16)
Labels shape: (17631,)


In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense

# Define the model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length, len(feature_columns))))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(units=100))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 28, 64)            3136      
                                                                 
 conv1d_1 (Conv1D)           (None, 26, 64)            12352     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 13, 64)           0         
 )                                                               
                                                                 
 lstm (LSTM)                 (None, 100)               66000     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 81,589
Trainable params: 81,589
Non-trainable params: 0
____________________________________________________

In [10]:
from sklearn.model_selection import train_test_split

# Train the model
history = model.fit(X, y, 
                    epochs=50, 
                    batch_size=64)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
# from sklearn.metrics import mean_squared_error

# # Predict RUL on validation data
# y_pred = model.predict(X_val)

# # Calculate Mean Squared Error
# mse = mean_squared_error(y_val, y_pred)
# print("Validation Mean Squared Error:", mse)

In [12]:
from sklearn.metrics import mean_squared_error
X_test = []
y_test = []

for engine_id in test_df_normalized['engine_id'].unique():
    engine_data = test_df_normalized[test_df_normalized['engine_id'] == engine_id].reset_index(drop=True)
    if len(engine_data) >= sequence_length:
        # Use only the last sequence
        seq_x = engine_data[feature_columns].iloc[-sequence_length:].values
        X_test.append(seq_x)
        # Get the true RUL for this engine
        seq_y = true_rul.loc[engine_id - 1].values[0]
        y_test.append(seq_y)
    else:
        print(f"Engine {engine_id} has insufficient data for the defined sequence length.")

X_test = np.array(X_test)
y_test = np.array(y_test)

# Predict RUL on test data
y_test_pred = model.predict(X_test)

# Evaluate
test_mse = mean_squared_error(y_test, y_test_pred)
print("Test Mean Squared Error:", test_mse)

Test Mean Squared Error: 984.0605567679921


In [13]:
from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(y_test, y_test_pred, squared=False)
print("Test Root Mean Squared Error:", rmse)

Test Root Mean Squared Error: 31.3697395074934




In [14]:
# model.save('rul_prediction_model.h5')