# FD001 — RUL Predictions Visualization

This notebook loads the saved LSTM sequence model and test data, computes per-window predictions, and visualizes predicted vs true Remaining Useful Life (RUL) for dataset **FD001**.

**How to use:** make sure you have run the training and the following files exist in the repo root:

- `data/processed/for_model/FD001_test_labeled.csv`
- `models/sequence/FD001/lstm_best.pth`
- `models/sequence/FD001/features.joblib`

Then run the cells.


In [1]:
# Standard imports
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import torch
import torch.nn as nn
from tqdm import tqdm
%matplotlib inline
print('Imports ready')


Imports ready


In [2]:
# Configuration - adjust paths if your repo is elsewhere
ROOT = Path.cwd()  # adjust if executing from a different working directory
PROC_DIR = ROOT / 'data' / 'processed' / 'for_model'
MODEL_DIR = ROOT / 'models' / 'sequence' / 'FD001'
TEST_CSV = PROC_DIR / 'FD001_test_labeled.csv'
FEATURES_PKL = MODEL_DIR / 'features.joblib'
CKPT_PATH = MODEL_DIR / 'lstm_best.pth'

print('Paths:')
print('TEST_CSV ->', TEST_CSV)
print('FEATURES_PKL ->', FEATURES_PKL)
print('CKPT_PATH ->', CKPT_PATH)


Paths:
TEST_CSV -> c:\Users\user\BMW Projects\auto-predictive-maintenance-system\notebooks\data\processed\for_model\FD001_test_labeled.csv
FEATURES_PKL -> c:\Users\user\BMW Projects\auto-predictive-maintenance-system\notebooks\models\sequence\FD001\features.joblib
CKPT_PATH -> c:\Users\user\BMW Projects\auto-predictive-maintenance-system\notebooks\models\sequence\FD001\lstm_best.pth


In [6]:
# Load test dataframe and feature names
assert TEST_CSV.exists(), f"Test CSV not found: {TEST_CSV}"
assert FEATURES_PKL.exists(), f"Features not found: {FEATURES_PKL}"
assert CKPT_PATH.exists(), f"Checkpoint not found: {CKPT_PATH}"

df_test = pd.read_csv(TEST_CSV)
feature_names = joblib.load(FEATURES_PKL)
print('Test rows:', len(df_test))
print('Feature count:', len(feature_names))
df_test.head()


AttributeError: 'NoneType' object has no attribute 'exists'

In [None]:
# Build sliding windows per unit and normalization similar to training
SEQ_LEN = 50  # must match training seq_len

# Build units grouped arrays
units = []
for unit, g in df_test.groupby('unit'):
    g = g.sort_values('cycle').reset_index(drop=True)
    feats = g[feature_names].to_numpy(dtype=np.float32)
    targ = g['RUL_clipped'].to_numpy(dtype=np.float32) if 'RUL_clipped' in g.columns else g['RUL'].to_numpy(dtype=np.float32)
    units.append({'unit': int(unit), 'cycles': g['cycle'].to_numpy(), 'features': feats, 'target': targ})

# compute global mean/std from test units (we assume training saved mean/std but it wasn't persisted; using test-set stats is ok for quick viz)
all_feats = np.vstack([u['features'] for u in units])
mean = all_feats.mean(axis=0)
std = all_feats.std(axis=0) + 1e-8

print('Units found:', len(units))
print('Feature dim:', units[0]['features'].shape[1])


In [None]:
# Define LSTMRegressor (must match architecture used in training)
class LSTMRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_layers=2, dropout=0.2, bidirectional=False):
        super().__init__()
        self.num_directions = 2 if bidirectional else 1
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=bidirectional)
        self.head = nn.Sequential(
            nn.Linear(hidden_dim * self.num_directions, hidden_dim//2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim//2, 1)
        )
    def forward(self, x):
        out, _ = self.lstm(x)
        last = out[:, -1, :]
        return self.head(last).squeeze(1)

print('Model class defined')


In [None]:
# Load checkpoint, infer input dim from features
input_dim = len(feature_names)
print('Input dim:', input_dim)

ckpt = torch.load(CKPT_PATH, map_location='cpu')
# Attempt to detect hidden_dim & num_layers from state dict shapes
state = ckpt.get('model_state', ckpt)
# default hyperparams (fallbacks)
hidden_dim = 128
num_layers = 2
bidirectional = False

# instantiate model
model = LSTMRegressor(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers, dropout=0.2, bidirectional=bidirectional)
try:
    model.load_state_dict(state)
    print('Loaded state dict directly')
except Exception as e:
    # handle wrapper with 'model_state'
    if 'model_state' in ckpt:
        model.load_state_dict(ckpt['model_state'])
        print('Loaded model_state from checkpoint')
    else:
        raise

model.eval()
print('Model ready')


In [None]:
# Generate predictions for each possible window per unit and align with cycles
all_unit_preds = []

with torch.no_grad():
    for u in units:
        feats = u['features']
        L, F = feats.shape
        preds_per_cycle = np.full(L, np.nan, dtype=np.float32)
        # sliding windows
        for end in range(L):
            start = max(0, end - SEQ_LEN + 1)
            seq = feats[start:end+1]
            # pad if shorter
            if seq.shape[0] < SEQ_LEN:
                pad_len = SEQ_LEN - seq.shape[0]
                pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
                seq = np.vstack([pad, seq])
            # normalize
            seq = (seq - mean) / std
            seq_tensor = torch.from_numpy(seq).unsqueeze(0)  # (1, T, F)
            pred = model(seq_tensor).cpu().numpy().item()
            preds_per_cycle[end] = pred
        all_unit_preds.append({'unit': u['unit'], 'cycles': u['cycles'], 'preds': preds_per_cycle, 'targets': u['target']})

print('Predictions generated for all units')


In [None]:
# For each unit, take the prediction at the last observed cycle and compare to true RUL at last cycle
preds = []
trues = []
units_list = []
for u in all_unit_preds:
    last_pred = float(u['preds'][-1])
    last_true = float(u['targets'][-1])
    preds.append(last_pred)
    trues.append(last_true)
    units_list.append(u['unit'])

preds = np.array(preds)
trues = np.array(trues)

# Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(trues, preds)
rmse = np.sqrt(mean_squared_error(trues, preds))
r2 = r2_score(trues, preds)
print('Per-unit last-cycle metrics:')
print('MAE =', mae)
print('RMSE =', rmse)
print('R2 =', r2)

# Build DataFrame for plotting
df_eval = pd.DataFrame({'unit': units_list, 'true_rul': trues, 'pred_rul': preds})
df_eval = df_eval.sort_values('unit').reset_index(drop=True)
df_eval.head()


In [None]:
# Scatter: True vs Predicted RUL (last observed cycle)
plt.figure(figsize=(6,6))
plt.scatter(df_eval['true_rul'], df_eval['pred_rul'], alpha=0.7)
plt.plot([df_eval['true_rul'].min(), df_eval['true_rul'].max()], [df_eval['true_rul'].min(), df_eval['true_rul'].max()], 'r--')
plt.xlabel('True RUL')
plt.ylabel('Predicted RUL')
plt.title('FD001: True vs Predicted (last observed cycle)')
plt.grid(True)
plt.show()


In [None]:
# Error histogram
errors = df_eval['pred_rul'] - df_eval['true_rul']
plt.figure(figsize=(6,4))
plt.hist(errors, bins=30)
plt.title('Prediction Error Histogram (pred - true)')
plt.xlabel('Error (cycles)')
plt.ylabel('Count')
plt.show()

# Show top worst units
df_eval['abs_err'] = errors.abs()
print('Top 10 worst units by absolute error:')
print(df_eval.sort_values('abs_err', ascending=False).head(10))


In [None]:
# Plot time-series predictions vs true RUL for a few example units (worst, median, best)
worst_units = df_eval.sort_values('abs_err', ascending=False).head(3)['unit'].tolist()
median_unit = df_eval.sort_values('abs_err').iloc[len(df_eval)//2]['unit']
example_units = worst_units + [median_unit]

for u_id in example_units:
    u = next(x for x in all_unit_preds if x['unit'] == u_id)
    cycles = u['cycles']
    plt.figure(figsize=(8,3))
    plt.plot(cycles, u['targets'], label='true RUL')
    plt.plot(cycles, u['preds'], label='predicted RUL')
    plt.xlabel('Cycle')
    plt.ylabel('RUL')
    plt.title(f'Unit {u_id} — True vs Predicted RUL (all cycles)')
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
# Save per-unit evaluation to CSV for later reporting
out_path = ROOT / 'analysis' / 'fd001_unit_predictions.csv'
out_path.parent.mkdir(parents=True, exist_ok=True)
df_eval.to_csv(out_path, index=False)
print('Saved per-unit eval to', out_path)
