**Phase 0 : Imports and Reading Data:**

- Upload dataset.

- Import essential libraries: pandas, numpy, matplotlib, seaborn and....etc.

- Load the dataset into a DataFrame.

In [215]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler 
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
import tensorflow as tf
from tensorflow.keras import layers, models
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

In [216]:
df_train = pd.read_csv(('/kaggle/input/nasa-cmaps/CMaps/train_FD004.txt'), sep=' ', header=None)
df_test = pd.read_csv(('/kaggle/input/nasa-cmaps/CMaps/test_FD004.txt'), sep=' ', header=None)
rul = pd.read_csv(('/kaggle/input/nasa-cmaps/CMaps/RUL_FD004.txt'), sep=' ', header=None)

**Phase 1: Data Understanding & Preprocessing**

- Explore dataset structure and dimensions.

- Assign column names, drop empty/constant columns.

- Check missing values and duplicates and handle.

- Verify data types and correct inconsistencies.

In [217]:
df_train.drop(df_train.columns[[26, 27]], axis=1, inplace=True)
df_test.drop(df_test.columns[[26, 27]], axis=1, inplace=True)
rul.drop(rul.columns[[1]], axis=1, inplace=True)
print("df_train:" ,df_train.shape)
print("df_test:",df_test.shape)
print("rul:" ,rul.shape)

df_train: (61249, 26)
df_test: (41214, 26)
rul: (248, 1)


In [218]:
index_names = ['engine', 'cycle']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names=[ "(Fan inlet temperature) (◦R)",
"(LPC outlet temperature) (◦R)",
"(HPC outlet temperature) (◦R)",
"(LPT outlet temperature) (◦R)",
"(Fan inlet Pressure) (psia)",
"(bypass-duct pressure) (psia)",
"(HPC outlet pressure) (psia)",
"(Physical fan speed) (rpm)",
"(Physical core speed) (rpm)",
"(Engine pressure ratio(P50/P2)",
"(HPC outlet Static pressure) (psia)",
"(Ratio of fuel flow to Ps30) (pps/psia)",
"(Corrected fan speed) (rpm)",
"(Corrected core speed) (rpm)",
"(Bypass Ratio) ",
"(Burner fuel-air ratio)",
"(Bleed Enthalpy)",
"(Required fan speed)",
"(Required fan conversion speed)",
"(High-pressure turbines Cool air flow)",
"(Low-pressure turbines Cool air flow)" ]
col_names = index_names + setting_names + sensor_names
df_train.columns = col_names
df_test.columns = col_names
rul.columns = ['RUL']

In [219]:
print(df_test.info())
print("---------------------------------")
print(df_train.info())
print("---------------------------------")
print(rul.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41214 entries, 0 to 41213
Data columns (total 26 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   engine                                   41214 non-null  int64  
 1   cycle                                    41214 non-null  int64  
 2   setting_1                                41214 non-null  float64
 3   setting_2                                41214 non-null  float64
 4   setting_3                                41214 non-null  float64
 5   (Fan inlet temperature) (◦R)             41214 non-null  float64
 6   (LPC outlet temperature) (◦R)            41214 non-null  float64
 7   (HPC outlet temperature) (◦R)            41214 non-null  float64
 8   (LPT outlet temperature) (◦R)            41214 non-null  float64
 9   (Fan inlet Pressure) (psia)              41214 non-null  float64
 10  (bypass-duct pressure) (psia)            41214

In [220]:
df_train.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
engine,61249.0,124.325181,71.99535,1.0,60.0,126.0,185.0,249.0
cycle,61249.0,134.311417,89.783389,1.0,62.0,123.0,191.0,543.0
setting_1,61249.0,23.999823,14.780722,0.0,10.0046,25.0014,41.9981,42.008
setting_2,61249.0,0.571347,0.310703,0.0,0.2507,0.7,0.84,0.842
setting_3,61249.0,94.031576,14.251954,60.0,100.0,100.0,100.0,100.0
(Fan inlet temperature) (◦R),61249.0,472.882435,26.436832,445.0,445.0,462.54,491.19,518.67
(LPC outlet temperature) (◦R),61249.0,579.420056,37.342647,535.48,549.33,555.74,607.07,644.42
(HPC outlet temperature) (◦R),61249.0,1417.8966,106.167598,1242.67,1350.55,1367.68,1497.42,1613.0
(LPT outlet temperature) (◦R),61249.0,1201.915359,119.327591,1024.42,1119.49,1136.92,1302.62,1440.77
(Fan inlet Pressure) (psia),61249.0,8.031626,3.622872,3.91,3.91,7.05,10.52,14.62


In [221]:
df_train.duplicated().sum()

0

In [222]:
df_train.isnull().sum()

engine                                     0
cycle                                      0
setting_1                                  0
setting_2                                  0
setting_3                                  0
(Fan inlet temperature) (◦R)               0
(LPC outlet temperature) (◦R)              0
(HPC outlet temperature) (◦R)              0
(LPT outlet temperature) (◦R)              0
(Fan inlet Pressure) (psia)                0
(bypass-duct pressure) (psia)              0
(HPC outlet pressure) (psia)               0
(Physical fan speed) (rpm)                 0
(Physical core speed) (rpm)                0
(Engine pressure ratio(P50/P2)             0
(HPC outlet Static pressure) (psia)        0
(Ratio of fuel flow to Ps30) (pps/psia)    0
(Corrected fan speed) (rpm)                0
(Corrected core speed) (rpm)               0
(Bypass Ratio)                             0
(Burner fuel-air ratio)                    0
(Bleed Enthalpy)                           0
(Required 