In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [13]:
# Define file paths
train_file_path = 'train_FD001.txt'
test_file_path = 'test_FD001.txt'
rul_file_path = 'RUL_FD001.txt'

# Load the data
train_df = pd.read_csv(train_file_path, sep=' ', header=None)
test_df = pd.read_csv(test_file_path, sep=' ', header=None)
rul_df = pd.read_csv(rul_file_path, sep=' ', header=None)

In [14]:
# Drop the last two columns which are all NaN due to trailing space in files
train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)
test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)

In [16]:
# Assign column names
column_names = ['unit_number', 'time_(cycles)', 'operational_setting_1', 'operational_setting_2', 'operational_setting_3',
                'T2_Total_temperature_at_fan_inlet_(°R)', 'T24_Total_temperature_at_LPC_outlet_(°R)', 'T30_Total_temperature_at_HPC_outlet_(°R)',
                'T50_Total_temperature_at_LPT_outlet_(°R)', 'P2_Pressure_at_fan_inlet_(psia)', 'P15_Total_pressure_in_bypass-duct_(psia)',
                'P30_Total_pressure_at_HPC_outlet_(psia)', 'Nf_Physical_fan_speed_(rpm)', 'Nc_Physical_core_speed_(rpm)', 'epr_Engine_pressure_ratio_(P50/P2)',
                'Ps30_Static_pressure_at_HPC_outlet_(psia)', 'phi_Ratio_of_fuel_flow_to_Ps30_(pps/psi)', 'NRf_Corrected_fan_speed_(rpm)', 
                'NRc_Corrected_core_speed_(rpm)', 'BPR_Bypass_Ratio', 'farB_Burner_fuel-air_ratio', 'htBleed_Bleed_Enthalpy', 
                'Nf_dmd_Demanded_fan_speed_(rpm)', 'PCNfR_dmd_Demanded_corrected_fan_speed_(rpm)', 'W31_HPT_coolant_bleed_(lbm/s)', 
                'W32_LPT_coolant_bleed_(lbm/s)']

train_df.columns = column_names
test_df.columns = column_names


In [17]:
# Normalize the data
scaler = MinMaxScaler()
train_df.iloc[:, 2:] = scaler.fit_transform(train_df.iloc[:, 2:])
test_df.iloc[:, 2:] = scaler.transform(test_df.iloc[:, 2:])

  train_df.iloc[:, 2:] = scaler.fit_transform(train_df.iloc[:, 2:])
  test_df.iloc[:, 2:] = scaler.transform(test_df.iloc[:, 2:])


In [18]:
# Add remaining useful life to the training data
rul = pd.DataFrame(train_df.groupby('unit_number')['time_(cycles)'].max()).reset_index()
rul.columns = ['unit_number', 'max_time_in_cycles']
train_df = train_df.merge(rul, on=['unit_number'], how='left')
train_df['remaining_useful_life'] = train_df['max_time_in_cycles'] - train_df['time_(cycles)']
train_df.drop(columns=['max_time_in_cycles'], inplace=True)

In [19]:
print(train_df.head())
print(test_df.head())
print(rul_df.head())

   unit_number  time_(cycles)  operational_setting_1  operational_setting_2  \
0            1              1               0.459770               0.166667   
1            1              2               0.609195               0.250000   
2            1              3               0.252874               0.750000   
3            1              4               0.540230               0.500000   
4            1              5               0.390805               0.333333   

   operational_setting_3  T2_Total_temperature_at_fan_inlet_(°R)  \
0                    0.0                                     0.0   
1                    0.0                                     0.0   
2                    0.0                                     0.0   
3                    0.0                                     0.0   
4                    0.0                                     0.0   

   T24_Total_temperature_at_LPC_outlet_(°R)  \
0                                  0.183735   
1                     