# **1) Imports & Settings**

In [None]:
import random
import warnings

import numpy as np
import pandas as pd

np.random.seed(34)
random.seed(34)
warnings.filterwarnings('ignore')

2025-09-24 20:33:20.759341: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758746000.997609      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758746001.063081      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# **2) Data loading**

In [2]:
# column names
index_names = ['engine', 'cycle']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names = [
"(Fan inlet temperature) (◦R)",
"(LPC outlet temperature) (◦R)",
"(HPC outlet temperature) (◦R)",
"(LPT outlet temperature) (◦R)",
"(Fan inlet Pressure) (psia)",
"(bypass-duct pressure) (psia)",
"(HPC outlet pressure) (psia)",
"(Physical fan speed) (rpm)",
"(Physical core speed) (rpm)",
"(Engine pressure ratio(P50/P2)",
"(HPC outlet Static pressure) (psia)",
"(Ratio of fuel flow to Ps30) (pps/psia)",
"(Corrected fan speed) (rpm)",
"(Corrected core speed) (rpm)",
"(Bypass Ratio) ",
"(Burner fuel-air ratio)",
"(Bleed Enthalpy)",
"(Required fan speed)",
"(Required fan conversion speed)",
"(High-pressure turbines Cool air flow)",
"(Low-pressure turbines Cool air flow)"
]
col_names = index_names + setting_names + sensor_names


# adjust these paths as necessary
TRAIN_PATH = '/kaggle/input/nasa-cmaps/CMaps/train_FD001.txt'
TEST_PATH = '/kaggle/input/nasa-cmaps/CMaps/test_FD001.txt'
RUL_PATH = '/kaggle/input/nasa-cmaps/CMaps/RUL_FD001.txt'


# load
print("Loading data...")
df_train = pd.read_csv(TRAIN_PATH, sep='\s+', header=None, names=col_names)
df_test = pd.read_csv(TEST_PATH, sep='\s+', header=None, names=col_names)
df_test_RUL = pd.read_csv(RUL_PATH, sep='\s+', header=None, names=['RUL'])


print(f"train shape: {df_train.shape}, test shape: {df_test.shape}, test_RUL shape: {df_test_RUL.shape}")

Loading data...
train shape: (20631, 26), test shape: (13096, 26), test_RUL shape: (100, 1)


# **3) Preprocessing & RUL computation**

In [3]:
def remove_constant_columns(df_train, df_test):
    constant_cols = [col for col in df_train.columns if df_train[col].nunique() == 1]
    print("Columns with constant values:", constant_cols)
    df_train = df_train.drop(constant_cols, axis=1, errors='ignore')
    df_test = df_test.drop(constant_cols, axis=1, errors='ignore')
    return df_train, df_test


def compute_rul(df):
    # maximum cycle per engine -> life
    df_life = df.groupby('engine').agg({'cycle': 'max'}).rename(columns={'cycle': 'life'})
    df = df.merge(df_life, how='left', on='engine')
    df['RUL'] = df['life'] - df['cycle']
    df.drop(columns=['life'], inplace=True)
    # clip RUL as in original
    df['RUL'] = df['RUL'].clip(upper=125)
    return df

In [4]:
# remove constants
df_train, df_test = remove_constant_columns(df_train.copy(), df_test.copy())
# compute RUL in train
df_train = compute_rul(df_train)

# prepare test last cycle per engine (the ground truth RUL is provided separately)
df_test_cycle = df_test.groupby('engine').agg({'cycle': 'max'}).rename(columns={'cycle': 'life'})
df_test_max = df_test.merge(df_test_cycle, how='left', on='engine')
df_test_max = df_test_max[df_test_max['cycle'] == df_test_max['life']].drop(columns=['life'])

Columns with constant values: ['setting_3', '(Fan inlet temperature) (◦R)', '(Fan inlet Pressure) (psia)', '(Engine pressure ratio(P50/P2)', '(Burner fuel-air ratio)', '(Required fan speed)', '(Required fan conversion speed)']


In [5]:
# quick sanity prints
print('\nTrain head:')
print(df_train.head())
print('\nTest head (last cycle of each engine):')
print(df_test_max.head())


Train head:
   engine  cycle  setting_1  setting_2  (LPC outlet temperature) (◦R)  \
0       1      1    -0.0007    -0.0004                         641.82   
1       1      2     0.0019    -0.0003                         642.15   
2       1      3    -0.0043     0.0003                         642.35   
3       1      4     0.0007     0.0000                         642.35   
4       1      5    -0.0019    -0.0002                         642.37   

   (HPC outlet temperature) (◦R)  (LPT outlet temperature) (◦R)  \
0                        1589.70                        1400.60   
1                        1591.82                        1403.14   
2                        1587.99                        1404.20   
3                        1582.79                        1401.87   
4                        1582.85                        1406.22   

   (bypass-duct pressure) (psia)  (HPC outlet pressure) (psia)  \
0                          21.61                        554.36   
1            