In [6]:
import pandas as pd
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

In [7]:
# Define column headers
headers = ['unit_number','time_in_cycles','setting_1','setting_2','setting_3','T2','T24','T30','T50','P2','P15','P30','Nf',
           'Nc','epr','Ps30','phi','NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

In [8]:
##Swap out the file names below with the FD you would like to e
# Read the training data
train_file_path = 'train_FD001.txt'
train_data = pd.read_csv(train_file_path, delim_whitespace=True, header=None, names=headers)

# Read the test data
test_file_path = 'test_FD001.txt'
test_data = pd.read_csv(test_file_path, delim_whitespace=True, header=None, names=headers)

# Read the RUL data
rul_file_path = 'RUL_FD001.txt'
rul_data = pd.read_csv(rul_file_path, header=None, names=['RUL'])

In [9]:
# Calculate the maximum cycle number for each unit in the training data
max_cycle_train = train_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
max_cycle_train.columns = ['unit_number', 'max_cycle']

# Merge the max_cycle_train dataframe with the train data to calculate the RUL for each entry
train_data = train_data.merge(max_cycle_train, on='unit_number', how='left')

# Calculate the RUL for each entry in the train data
train_data['RUL'] = train_data['max_cycle'] - train_data['time_in_cycles']

# Drop the temporary max_cycle column
train_data = train_data.drop(columns=['max_cycle'])

In [11]:
train_data[train_data['unit_number']==1] 

Unnamed: 0,unit_number,time_in_cycles,setting_1,setting_2,setting_3,T2,T24,T30,T50,P2,...,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189
3,1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,1,188,-0.0067,0.0003,100.0,518.67,643.75,1602.38,1422.78,14.62,...,2388.23,8117.69,8.5207,0.03,396,2388,100.0,38.51,22.9588,4
188,1,189,-0.0006,0.0002,100.0,518.67,644.18,1596.17,1428.01,14.62,...,2388.33,8117.51,8.5183,0.03,395,2388,100.0,38.48,23.1127,3
189,1,190,-0.0027,0.0001,100.0,518.67,643.64,1599.22,1425.95,14.62,...,2388.35,8112.58,8.5223,0.03,398,2388,100.0,38.49,23.0675,2
190,1,191,-0.0000,-0.0004,100.0,518.67,643.34,1602.36,1425.77,14.62,...,2388.30,8114.61,8.5174,0.03,394,2388,100.0,38.45,23.1295,1


In [13]:
rul_data.columns = ['RUL']
rul_data['unit_number'] = rul_data.index + 1

In [14]:
# Calculate the maximum cycle number for each unit in the test data
max_cycle_test = test_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
max_cycle_test.columns = ['unit_number', 'max_cycle']

# Merge the max_cycle_test dataframe with the test data to calculate the RUL for each entry
test_data = test_data.merge(max_cycle_test, on='unit_number', how='left')

# Assign the RUL values to the test data based on unit number
# The provided RUL is at the end of the data collection, add this RUL to the difference between max cycle and current cycle
test_data['RUL'] = test_data.apply(lambda row: rul_data.loc[rul_data['unit_number'] == row['unit_number'], 'RUL'].values[0] + (row['max_cycle'] - row['time_in_cycles']), axis=1)

# Drop the temporary max_cycle column
test_data = test_data.drop(columns=['max_cycle'])

test_data

Unnamed: 0,unit_number,time_in_cycles,setting_1,setting_2,setting_3,T2,T24,T30,T50,P2,...,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32,RUL
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735,142.0
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916,141.0
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,2388.03,8130.10,8.4441,0.03,393,2388,100.0,39.08,23.4166,140.0
3,1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,...,2388.05,8132.90,8.3917,0.03,391,2388,100.0,39.00,23.3737,139.0
4,1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,...,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.4130,138.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,100.0,518.67,643.24,1599.45,1415.79,14.62,...,2388.00,8213.28,8.4715,0.03,394,2388,100.0,38.65,23.1974,24.0
13092,100,195,-0.0011,-0.0001,100.0,518.67,643.22,1595.69,1422.05,14.62,...,2388.09,8210.85,8.4512,0.03,395,2388,100.0,38.57,23.2771,23.0
13093,100,196,-0.0006,-0.0003,100.0,518.67,643.44,1593.15,1406.82,14.62,...,2388.04,8217.24,8.4569,0.03,395,2388,100.0,38.62,23.2051,22.0
13094,100,197,-0.0038,0.0001,100.0,518.67,643.26,1594.99,1419.36,14.62,...,2388.08,8220.48,8.4711,0.03,395,2388,100.0,38.66,23.2699,21.0


In [19]:
# Prepare the data for training
X_train = train_data.drop(columns=['unit_number', 'time_in_cycles', 'RUL'])
y_train = train_data[['unit_number', 'time_in_cycles', 'RUL']]
X_test = test_data.drop(columns=['unit_number', 'time_in_cycles', 'RUL'])
y_test = test_data[['unit_number', 'time_in_cycles', 'RUL']]

In [20]:
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for Lasso
lasso_params = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

# Lasso Regression with GridSearchCV
lasso = Lasso()
lasso_cv = GridSearchCV(lasso, lasso_params, cv=5, scoring='r2')
lasso_cv.fit(X_train, y_train['RUL'])

# Best alpha and R^2 score for Lasso
print(f"Best alpha for Lasso: {lasso_cv.best_params_}")
print(f"Best R^2 for Lasso: {lasso_cv.best_score_}")

# Filter test data to get the rows with the maximum time_in_cycles for each unit_number
max_cycles = test_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
test_max_cycles = pd.merge(test_data, max_cycles, on=['unit_number', 'time_in_cycles'], how='inner')

# Evaluate Lasso on filtered test data
test_max_cycles.loc[:, 'predicted'] = lasso_cv.predict(test_max_cycles.drop(columns=['unit_number', 'time_in_cycles', 'RUL']))
mae_lasso = mean_absolute_error(test_max_cycles['RUL'], test_max_cycles['predicted'])
mse_lasso = mean_squared_error(test_max_cycles['RUL'], test_max_cycles['predicted'])
r2_lasso = r2_score(test_max_cycles['RUL'], test_max_cycles['predicted'])

print(f"Lasso Regression - MAE: {mae_lasso}, MSE: {mse_lasso}, R^2: {r2_lasso}")

Best alpha for Lasso: {'alpha': 0.01}
Best R^2 for Lasso: 0.5691915070178621
Lasso Regression - MAE: 25.655149278725222, MSE: 1029.813594041371, R^2: 0.40365294864107215


In [21]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for Ridge
ridge_params = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

# Ridge Regression with GridSearchCV
ridge = Ridge()
ridge_cv = GridSearchCV(ridge, ridge_params, cv=5, scoring='r2')
ridge_cv.fit(X_train, y_train['RUL'])

# Best alpha and R^2 score for Ridge
print(f"Best alpha for Ridge: {ridge_cv.best_params_}")
print(f"Best R^2 for Ridge: {ridge_cv.best_score_}")

# Filter test data to get the rows with the maximum time_in_cycles for each unit_number
max_cycles = test_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
test_max_cycles = pd.merge(test_data, max_cycles, on=['unit_number', 'time_in_cycles'], how='inner')

# Evaluate Ridge on filtered test data
test_max_cycles.loc[:, 'predicted'] = ridge_cv.predict(test_max_cycles.drop(columns=['unit_number', 'time_in_cycles', 'RUL']))
mae_ridge = mean_absolute_error(test_max_cycles['RUL'], test_max_cycles['predicted'])
mse_ridge = mean_squared_error(test_max_cycles['RUL'], test_max_cycles['predicted'])
r2_ridge = r2_score(test_max_cycles['RUL'], test_max_cycles['predicted'])

print(f"Ridge Regression - MAE: {mae_ridge}, MSE: {mse_ridge}, R^2: {r2_ridge}")

Best alpha for Ridge: {'alpha': 1}
Best R^2 for Ridge: 0.5692086558760463
Ridge Regression - MAE: 25.554324770372187, MSE: 1021.8607774746306, R^2: 0.40825828570111444


In [22]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train['RUL'])

# Filter test data to get the rows with the maximum time_in_cycles for each unit_number
max_cycles = test_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
test_max_cycles = pd.merge(test_data, max_cycles, on=['unit_number', 'time_in_cycles'], how='inner')

# Predict on the filtered test set
y_pred = model.predict(test_max_cycles.drop(columns=['unit_number', 'time_in_cycles', 'RUL']))

# Evaluate the model
mse = mean_squared_error(test_max_cycles['RUL'], y_pred)
r2 = r2_score(test_max_cycles['RUL'], y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 1026.631754812678
R^2 Score: 0.4054954961204492


In [23]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [24]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = Sequential()
model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))  # Output layer for regression task
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
history = model.fit(X_train_scaled, y_train['RUL'], epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 8767.8252 - mae: 71.3803 - val_loss: 2811.3057 - val_mae: 39.7318
Epoch 2/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 800us/step - loss: 1735.1287 - mae: 30.6692 - val_loss: 2762.9495 - val_mae: 38.3313
Epoch 3/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 788us/step - loss: 1576.4919 - mae: 28.5086 - val_loss: 2670.9431 - val_mae: 37.1054
Epoch 4/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 779us/step - loss: 1556.2882 - mae: 28.0270 - val_loss: 2748.1692 - val_mae: 37.3693
Epoch 5/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 800us/step - loss: 1491.5320 - mae: 27.3344 - val_loss: 2786.1599 - val_mae: 37.6204
Epoch 6/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 798us/step - loss: 1547.7172 - mae: 27.7833 - val_loss: 2529.0029 - val_mae: 36.3914
Epoch 7/50
[1m516/516[0m [3

In [26]:
# Filter test data to get the rows with the maximum time_in_cycles for each unit_number
max_cycles = test_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
test_max_cycles = pd.merge(test_data, max_cycles, on=['unit_number', 'time_in_cycles'], how='inner')

# Assuming X_test_scaled is the scaled version of X_test, you need to scale test_max_cycles accordingly
# Note: Make sure to use the same scaling method used on X_train to scale test_max_cycles
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
X_test_max_scaled = scaler.transform(test_max_cycles.drop(columns=['unit_number', 'time_in_cycles', 'RUL']))

# Evaluate the model
loss, accuracy = model.evaluate(X_test_max_scaled, test_max_cycles['RUL'])
print(f'Test Accuracy: {accuracy:.4f}')

loss, mae = model.evaluate(X_test_max_scaled, test_max_cycles['RUL'])
print(f'Test Mean Absolute Error: {mae:.4f}')

# Make predictions
predictions = model.predict(X_test_max_scaled)

# Calculate the R² value
r2 = r2_score(test_max_cycles['RUL'], predictions)
print(f'R² Value: {r2:.4f}')

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 959.9179 - mae: 23.3485  
Test Accuracy: 22.5759
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 959.9179 - mae: 23.3485  
Test Mean Absolute Error: 22.5759
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
R² Value: 0.4696


In [30]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler


# Filter test data to get the rows with the maximum time_in_cycles for each unit_number
max_cycles = test_data.groupby('unit_number')['time_in_cycles'].max().reset_index()
test_max_cycles = pd.merge(test_data, max_cycles, on=['unit_number', 'time_in_cycles'], how='inner')

# Scale the data
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_max_scaled = scaler.transform(test_max_cycles.drop(columns=['unit_number', 'time_in_cycles', 'RUL']))

# Train the Model
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,
                max_depth = 5, alpha = 10, n_estimators = 100)

xg_reg.fit(X_train_scaled, y_train['RUL'])

# Evaluate the Model
predictions = xg_reg.predict(X_test_max_scaled)
rmse = mean_squared_error(test_max_cycles['RUL'], predictions, squared=False)
mae = mean_absolute_error(test_max_cycles['RUL'], predictions)
r2 = r2_score(test_max_cycles['RUL'], predictions)

print(f"RMSE: {rmse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"R² Value: {r2:.4f}")

RMSE: 31.3662
Mean Absolute Error: 23.0233
R² Value: 0.4303
