# 01 â€” EDA & Baseline (CMAPSS FD001)

This notebook performs descriptive analysis, preprocessing, and a baseline Linear Regression for RUL.

In [4]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
plt.style.use('seaborn-v0_8')

In [3]:
# Load data (ensure files are in data/raw/)
cols = ['engine_id','cycle'] + [f'op_setting_{i}' for i in range(1,4)] + [f'sensor_{i}' for i in range(1,22)]
train = pd.read_csv('data/raw/train_FD001.txt', sep='\s+', header=None); train.columns = cols
test  = pd.read_csv('data/raw/test_FD001.txt',  sep='\s+', header=None); test.columns = cols
rul   = pd.read_csv('data/raw/RUL_FD001.txt',   sep='\s+', header=None)

FileNotFoundError: [Errno 2] No such file or directory: 'data/raw/train_FD001.txt'

In [None]:
# Descriptive stats
display(train.head()); display(train.describe())

In [None]:
# Compute RUL
last = train.groupby('engine_id')['cycle'].max().reset_index().rename(columns={'cycle':'max_cycle'})
train = train.merge(last, on='engine_id', how='left'); train['RUL'] = train['max_cycle'] - train['cycle']; train.drop(columns=['max_cycle'], inplace=True)
display(train[['engine_id','cycle','RUL']].head())

In [None]:
# Scale
features = [f'op_setting_{i}' for i in range(1,4)] + [f'sensor_{i}' for i in range(1,22)]
scaler = MinMaxScaler(); train[features] = scaler.fit_transform(train[features])

In [None]:
# Baseline model
X, y = train[features], train['RUL']
lr = LinearRegression().fit(X, y)
pred = lr.predict(X)
import numpy as np
mae = mean_absolute_error(y, pred); rmse = np.sqrt(mean_squared_error(y, pred)); r2 = r2_score(y, pred)
print({'MAE':mae, 'RMSE':rmse, 'R2':r2})

In [None]:
# Plot
plt.figure(figsize=(6,6)); sns.scatterplot(x=y, y=pred, alpha=0.4)
plt.xlabel('True RUL'); plt.ylabel('Predicted RUL'); plt.title('Predicted vs True (Baseline)'); plt.grid(True); plt.show()