<a href="https://colab.research.google.com/github/kiran1465313/AI_Hackathon-TURBOFAN/blob/main/AI_Hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install numpy pandas scikit-learn tensorflow

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow import keras


In [None]:
# CMAPSS FD001 has: id, cycle, 3 settings, 21 sensors = 26 columns
col_names = (
    ["engine_id", "cycle"] +
    [f"setting{i}" for i in range(1, 4)] +
    [f"s{i}" for i in range(1, 22)]
)

train = pd.read_csv("train_FD001.txt", sep=r"\s+", header=None)
test  = pd.read_csv("test_FD001.txt",  sep=r"\s+", header=None)

train.columns = col_names
test.columns  = col_names

rul_test = np.loadtxt("RUL_FD001.txt")  # 100 values for FD001 test engines

print("train shape:", train.shape)
print("test shape :", test.shape)
print("train engines:", train.engine_id.nunique())
print("test engines :", test.engine_id.nunique())
print("RUL_test len :", len(rul_test), "range:", (rul_test.min(), rul_test.max()))


train shape: (20631, 26)
test shape : (13096, 26)
train engines: 100
test engines : 100
RUL_test len : 100 range: (np.float64(7.0), np.float64(145.0))


In [None]:
# RUL = max_cycle(engine) - current_cycle
max_cycle = train.groupby("engine_id")["cycle"].max()
train = train.merge(max_cycle.rename("max_cycle"), on="engine_id", how="left")
train["RUL"] = train["max_cycle"] - train["cycle"]
train.drop(columns=["max_cycle"], inplace=True)

train[["engine_id","cycle","RUL"]].head(), train[train.engine_id==1][["cycle","RUL"]].tail()


(   engine_id  cycle  RUL
 0          1      1  191
 1          1      2  190
 2          1      3  189
 3          1      4  188
 4          1      5  187,
      cycle  RUL
 187    188    4
 188    189    3
 189    190    2
 190    191    1
 191    192    0)

In [None]:
feature_cols = [c for c in col_names if c not in ["engine_id", "cycle"]]

# remove near-constant columns based on training variance
variances = train[feature_cols].var()
keep_cols = variances[variances > 1e-6].index.tolist()

print("Features before:", len(feature_cols))
print("Features kept  :", len(keep_cols))

feature_cols = keep_cols


Features before: 24
Features kept  : 16


In [None]:
scaler = StandardScaler()
train_scaled = train.copy()
test_scaled  = test.copy()

train_scaled[feature_cols] = scaler.fit_transform(train_scaled[feature_cols])
test_scaled[feature_cols]  = scaler.transform(test_scaled[feature_cols])

train_scaled.head()



Unnamed: 0,engine_id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s13,s14,s15,s16,s17,s18,s19,s20,s21,RUL
0,1,1,-0.31598,-0.0004,100.0,518.67,-1.721725,-0.134255,-0.925936,14.62,...,-1.05889,-0.269071,-0.603816,0.03,-0.78171,2388,100.0,1.348493,1.194427,191
1,1,2,0.872722,-0.0003,100.0,518.67,-1.06178,0.211528,-0.643726,14.62,...,-0.363646,-0.642845,-0.275852,0.03,-0.78171,2388,100.0,1.016528,1.236922,190
2,1,3,-1.961874,0.0003,100.0,518.67,-0.661813,-0.413166,-0.525953,14.62,...,-0.919841,-0.551629,-0.649144,0.03,-2.073094,2388,100.0,0.739891,0.503423,189
3,1,4,0.32409,0.0,100.0,518.67,-0.661813,-1.261314,-0.784831,14.62,...,-0.224597,-0.520176,-1.971665,0.03,-0.78171,2388,100.0,0.352598,0.777792,188
4,1,5,-0.864611,-0.0002,100.0,518.67,-0.621816,-1.251528,-0.301518,14.62,...,-0.780793,-0.521748,-0.339845,0.03,-0.136018,2388,100.0,0.463253,1.059552,187


In [None]:
SEQ_LEN = 30

def make_train_sequences(df, feature_cols, seq_len=30):
    X, y = [], []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        rul = g["RUL"].values
        if len(g) <= seq_len:
            continue
        for i in range(len(g) - seq_len):
            X.append(feats[i:i+seq_len])
            y.append(rul[i+seq_len])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

X, y = make_train_sequences(train_scaled, feature_cols, SEQ_LEN)
print("X:", X.shape, "y:", y.shape)


X: (17631, 30, 16) y: (17631,)


In [None]:
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("Train:", X_tr.shape, y_tr.shape)
print("Val  :", X_val.shape, y_val.shape)


Train: (14104, 30, 16) (14104,)
Val  : (3527, 30, 16) (3527,)


In [None]:
tf.keras.utils.set_random_seed(42)

model = keras.Sequential([
    keras.layers.Input(shape=(SEQ_LEN, X.shape[2])),
    keras.layers.LSTM(64, return_sequences=True),
    keras.layers.Dropout(0.2),
    keras.layers.LSTM(32),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(1)
])

model.compile(optimizer=keras.optimizers.Adam(1e-3), loss="mse", metrics=["mae"])
model.summary()


In [None]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

history = model.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    epochs=40,
    batch_size=256,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 103ms/step - loss: 12028.4434 - mae: 91.0819 - val_loss: 10385.5303 - val_mae: 82.6070
Epoch 2/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 110ms/step - loss: 9805.5273 - mae: 78.9796 - val_loss: 8066.9360 - val_mae: 69.9615
Epoch 3/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 89ms/step - loss: 7475.6069 - mae: 65.7507 - val_loss: 5707.9087 - val_mae: 55.0765
Epoch 4/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 111ms/step - loss: 5226.7109 - mae: 51.5191 - val_loss: 3780.8928 - val_mae: 41.9123
Epoch 5/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 91ms/step - loss: 3473.9429 - mae: 39.3974 - val_loss: 2469.2510 - val_mae: 32.0113
Epoch 6/40
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 95ms/step - loss: 2334.2461 - mae: 31.1099 - val_loss: 1722.5458 - val_mae: 26.4077
Epoch 7/40
[1m56/56[0m [32m━━━━━━━━━━

In [None]:
pred_val = model.predict(X_val, verbose=0).ravel()
rmse = np.sqrt(mean_squared_error(y_val, pred_val))
mae  = mean_absolute_error(y_val, pred_val)

print("Validation RMSE:", rmse)
print("Validation MAE :", mae)


Validation RMSE: 14.569553746162052
Validation MAE : 9.909929275512695


In [None]:
def make_test_last_sequences(df, feature_cols, seq_len=30):
    X_last = []
    engine_ids = []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        if len(g) < seq_len:
            # pad from the beginning if very short (rare)
            pad = np.repeat(feats[:1], repeats=(seq_len - len(g)), axis=0)
            feats = np.vstack([pad, feats])
        X_last.append(feats[-seq_len:])
        engine_ids.append(eid)
    return np.array(X_last, dtype=np.float32), np.array(engine_ids)

X_test_last, eids = make_test_last_sequences(test_scaled, feature_cols, SEQ_LEN)
pred_test = model.predict(X_test_last, verbose=0).ravel()

# ensure ordering matches RUL file: typically engine_id 1..100
order = np.argsort(eids)
pred_test = pred_test[order]
eids = eids[order]

rmse_test = np.sqrt(mean_squared_error(rul_test, pred_test))
mae_test  = mean_absolute_error(rul_test, pred_test)

print("Test RMSE:", rmse_test)
print("Test MAE :", mae_test)
print("Engine IDs head:", eids[:10])


Test RMSE: 29.530152092358183
Test MAE : 18.344799089431763
Engine IDs head: [ 1  2  3  4  5  6  7  8  9 10]


In [None]:
import joblib, os

model.save("lstm_rul_fd001.h5")
joblib.dump(scaler, "scaler_fd001.pkl")
joblib.dump(feature_cols, "feature_cols_fd001.pkl")

print("Saved:", os.listdir("."))




Saved: ['.config', 'lstm_rul_fd001.h5', 'train_FD001.txt', 'feature_cols_fd001.pkl', 'test_FD001.txt', 'scaler_fd001.pkl', 'RUL_FD001.txt', 'sample_data']


In [None]:
# =========================
# Model improving block
# =========================
# (A) Clip RUL targets (very common for CMAPSS FD001)
RUL_CAP = 125
train_scaled["RUL_clipped"] = train_scaled["RUL"].clip(upper=RUL_CAP)

def make_train_sequences_target(df, feature_cols, target_col, seq_len=30):
    X, y = [], []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        tgt = g[target_col].values
        if len(g) <= seq_len:
            continue
        for i in range(len(g) - seq_len):
            X.append(feats[i:i+seq_len])
            y.append(tgt[i+seq_len])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

# (B) Better split: by engine_id (avoid leakage of same engine into train+val)
engine_ids = train_scaled["engine_id"].unique()
rng = np.random.default_rng(42)
rng.shuffle(engine_ids)
split = int(0.8 * len(engine_ids))
tr_ids, val_ids = engine_ids[:split], engine_ids[split:]

train_tr = train_scaled[train_scaled.engine_id.isin(tr_ids)]
train_val = train_scaled[train_scaled.engine_id.isin(val_ids)]

X_tr2, y_tr2 = make_train_sequences_target(train_tr, feature_cols, "RUL_clipped", SEQ_LEN)
X_val2, y_val2 = make_train_sequences_target(train_val, feature_cols, "RUL_clipped", SEQ_LEN)

print("Train2:", X_tr2.shape, y_tr2.shape)
print("Val2  :", X_val2.shape, y_val2.shape)

# (C) Slightly improved model compile/training setup
tf.keras.utils.set_random_seed(42)

model2 = keras.Sequential([
    keras.layers.Input(shape=(SEQ_LEN, X_tr2.shape[2])),
    keras.layers.LSTM(128, return_sequences=True),
    keras.layers.Dropout(0.25),
    keras.layers.LSTM(64),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(1)
])

model2.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mae"]
)

callbacks2 = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5),
]

history2 = model2.fit(
    X_tr2, y_tr2,
    validation_data=(X_val2, y_val2),
    epochs=60,
    batch_size=256,
    callbacks=callbacks2,
    verbose=1
)

pred_val2 = model2.predict(X_val2, verbose=0).ravel()
rmse2 = np.sqrt(mean_squared_error(y_val2, pred_val2))
mae2 = mean_absolute_error(y_val2, pred_val2)
print("Improved Validation RMSE:", rmse2)
print("Improved Validation MAE :", mae2)

# Use improved model going forward
model = model2


Train2: (14127, 30, 16) (14127,)
Val2  : (3504, 30, 16) (3504,)
Epoch 1/60
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 240ms/step - loss: 7525.5229 - mae: 76.3409 - val_loss: 4544.5698 - val_mae: 57.3047 - learning_rate: 0.0010
Epoch 2/60
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 229ms/step - loss: 3674.7429 - mae: 51.5262 - val_loss: 1903.7062 - val_mae: 39.0037 - learning_rate: 0.0010
Epoch 3/60
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 232ms/step - loss: 1535.8871 - mae: 34.2858 - val_loss: 731.1323 - val_mae: 22.5131 - learning_rate: 0.0010
Epoch 4/60
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 228ms/step - loss: 827.2211 - mae: 23.6402 - val_loss: 502.3005 - val_mae: 18.9925 - learning_rate: 0.0010
Epoch 5/60
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 230ms/step - loss: 473.2146 - mae: 17.9546 - val_loss: 325.2785 - val_mae: 14.3232 - learning_rate: 0.0010
Epoch 6/60
[1m5

In [26]:
# =========================
# Random Forest + Ensemble block (CPU Colab 12.7GB friendly)
# - Visible progress (sklearn verbose + chunk prints)
# - Timers for each stage
# - Uses max_depth + max_samples to speed up on CPU
# =========================
!pip -q install tqdm

import time
import numpy as np
import joblib
from tqdm.auto import tqdm

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# ---- Timer helper (elapsed time using time.time) ----
class Timer:
    def __init__(self, name): self.name = name
    def __enter__(self):
        self.t0 = time.time()
        print(f"\n[{self.name}] started...")
        return self
    def __exit__(self, exc_type, exc, tb):
        print(f"[{self.name}] done in {time.time() - self.t0:.2f} sec.")  # time.time usage [web:163]

# ---- Helpers: window builders ----
def make_train_sequences_flat(df, feature_cols, seq_len=30, target_col="RUL_clipped"):
    X, y = [], []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        tgt = g[target_col].values
        if len(g) <= seq_len:
            continue
        for i in range(len(g) - seq_len):
            X.append(feats[i:i+seq_len].reshape(-1))  # flatten
            y.append(tgt[i+seq_len])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

def make_train_sequences_seq(df, feature_cols, seq_len=30, target_col="RUL_clipped"):
    X, y = [], []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        tgt = g[target_col].values
        if len(g) <= seq_len:
            continue
        for i in range(len(g) - seq_len):
            X.append(feats[i:i+seq_len])
            y.append(tgt[i+seq_len])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

def make_test_last_sequences_flat(df, feature_cols, seq_len=30):
    X_last, engine_ids = [], []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        if len(g) < seq_len:
            pad = np.repeat(feats[:1], repeats=(seq_len - len(g)), axis=0)
            feats = np.vstack([pad, feats])
        X_last.append(feats[-seq_len:].reshape(-1))
        engine_ids.append(int(eid))
    X_last = np.array(X_last, dtype=np.float32)
    engine_ids = np.array(engine_ids, dtype=int)
    order = np.argsort(engine_ids)
    return X_last[order], engine_ids[order]

def make_test_last_sequences_seq(df, feature_cols, seq_len=30):
    X_last, engine_ids = [], []
    for eid, g in df.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        if len(g) < seq_len:
            pad = np.repeat(feats[:1], repeats=(seq_len - len(g)), axis=0)
            feats = np.vstack([pad, feats])
        X_last.append(feats[-seq_len:])
        engine_ids.append(int(eid))
    X_last = np.array(X_last, dtype=np.float32)
    engine_ids = np.array(engine_ids, dtype=int)
    order = np.argsort(engine_ids)
    return X_last[order], engine_ids[order]

# =========================
# 0) Pre-check prints
# =========================
print("SEQ_LEN:", SEQ_LEN)
print("feature_cols:", len(feature_cols))
print("train_scaled rows:", len(train_scaled), "| engines:", train_scaled.engine_id.nunique())

# =========================
# 1) Clip target
# =========================
RUL_CAP = 125
if "RUL_clipped" not in train_scaled.columns:
    train_scaled["RUL_clipped"] = train_scaled["RUL"].clip(upper=RUL_CAP)

# =========================
# 2) Engine-wise split (no leakage)
# =========================
engine_ids_all = train_scaled["engine_id"].unique()
rng = np.random.default_rng(42)
rng.shuffle(engine_ids_all)
split = int(0.8 * len(engine_ids_all))
tr_ids, val_ids = engine_ids_all[:split], engine_ids_all[split:]

train_tr = train_scaled[train_scaled.engine_id.isin(tr_ids)]
train_val = train_scaled[train_scaled.engine_id.isin(val_ids)]
print("Train engines:", len(tr_ids), "| Val engines:", len(val_ids))

# =========================
# 3) Build RF windows
# =========================
with Timer("Build RF windows (flat)"):
    X_rf_tr, y_rf_tr = make_train_sequences_flat(train_tr, feature_cols, SEQ_LEN, target_col="RUL_clipped")
    X_rf_val, y_rf_val = make_train_sequences_flat(train_val, feature_cols, SEQ_LEN, target_col="RUL_clipped")

print("RF Train X:", X_rf_tr.shape, "Val X:", X_rf_val.shape)

# =========================
# 4) Train Random Forest (CPU-friendly)
#    - warm_start incremental trees (supported) [web:125]
#    - max_depth speeds up training [web:125]
#    - max_samples subsamples rows per tree if bootstrap=True (default) [web:125]
# =========================
N_EST_TOTAL = 100   # good starting point on free CPU
CHUNK = 20          # prints every 20 trees

rf = RandomForestRegressor(
    n_estimators=0,
    warm_start=True,        # incremental trees [web:125]
    n_jobs=-1,
    random_state=42,
    min_samples_leaf=2,
    max_depth=12,           # CPU speed-up [web:125]
    max_samples=0.7,        # CPU speed-up [web:125]
    verbose=2               # show progress logs [web:125]
)

print("\n[Train RF] starting... you should see sklearn logs + chunk prints.")
start_all = time.time()

for n in range(CHUNK, N_EST_TOTAL + 1, CHUNK):
    rf.set_params(n_estimators=n)
    t0 = time.time()
    rf.fit(X_rf_tr, y_rf_tr)
    print(f"\n>>> RF progress: {n}/{N_EST_TOTAL} trees finished | chunk time: {time.time()-t0:.2f}s | total: {time.time()-start_all:.2f}s")

# RF validation
with Timer("RF validation predict"):
    pred_rf_val = rf.predict(X_rf_val)

rmse_rf = np.sqrt(mean_squared_error(y_rf_val, pred_rf_val))
mae_rf = mean_absolute_error(y_rf_val, pred_rf_val)
print("RF Validation RMSE:", rmse_rf)
print("RF Validation MAE :", mae_rf)

# =========================
# 5) LSTM validation (same val engines)
# =========================
with Timer("Build LSTM val windows (seq)"):
    X_lstm_val, y_lstm_val = make_train_sequences_seq(train_val, feature_cols, SEQ_LEN, target_col="RUL_clipped")

with Timer("LSTM validation predict"):
    pred_lstm_val = model.predict(X_lstm_val, verbose=0).ravel()

rmse_lstm = np.sqrt(mean_squared_error(y_lstm_val, pred_lstm_val))
mae_lstm  = mean_absolute_error(y_lstm_val, pred_lstm_val)
print("LSTM Validation RMSE:", rmse_lstm)
print("LSTM Validation MAE :", mae_lstm)

# =========================
# 6) Find best ensemble weight (prints progress)
# =========================
weights = np.linspace(0, 1, 41)
best_w, best_rmse = None, 1e18

with Timer("Ensemble weight search"):
    for w in tqdm(weights, desc="Searching weight w (LSTM share)"):
        ens = w * pred_lstm_val + (1 - w) * pred_rf_val[:len(pred_lstm_val)]
        rmse = np.sqrt(mean_squared_error(y_lstm_val, ens))
        if rmse < best_rmse:
            best_rmse = rmse
            best_w = float(w)

print("Best w (LSTM):", best_w, "| Best Val RMSE:", best_rmse)

# =========================
# 7) TEST: last-window predictions + ensemble
# =========================
with Timer("Build test last windows (LSTM + RF)"):
    X_test_last_seq, eids_test = make_test_last_sequences_seq(test_scaled, feature_cols, SEQ_LEN)
    X_test_last_flat, eids_test2 = make_test_last_sequences_flat(test_scaled, feature_cols, SEQ_LEN)

assert np.all(eids_test == eids_test2)

with Timer("Predict test (LSTM)"):
    pred_lstm_test = model.predict(X_test_last_seq, verbose=0).ravel()
    pred_lstm_test = np.maximum(pred_lstm_test, 0.0)

with Timer("Predict test (RF)"):
    pred_rf_test = rf.predict(X_test_last_flat)
    pred_rf_test = np.maximum(pred_rf_test, 0.0)

pred_ens_test = best_w * pred_lstm_test + (1 - best_w) * pred_rf_test

rmse_ens = np.sqrt(mean_squared_error(rul_test, pred_ens_test))
mae_ens = mean_absolute_error(rul_test, pred_ens_test)
print("Ensemble Test RMSE:", rmse_ens)
print("Ensemble Test MAE :", mae_ens)

# =========================
# 8) Save artifacts for dashboard inference
# =========================
joblib.dump(rf, "rf_rul_fd001.pkl")
joblib.dump({"w_lstm": best_w, "seq_len": int(SEQ_LEN)}, "ensemble_cfg.pkl")
print("Saved: rf_rul_fd001.pkl, ensemble_cfg.pkl")


SEQ_LEN: 30
feature_cols: 16
train_scaled rows: 20631 | engines: 100
Train engines: 80 | Val engines: 20

[Build RF windows (flat)] started...
[Build RF windows (flat)] done in 0.42 sec.
RF Train X: (14127, 480) Val X: (3504, 480)

[Train RF] starting... you should see sklearn logs + chunk prints.
building tree 1 of 20
building tree 2 of 20


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.


building tree 3 of 20
building tree 4 of 20
building tree 5 of 20
building tree 6 of 20
building tree 7 of 20
building tree 8 of 20
building tree 9 of 20
building tree 10 of 20
building tree 11 of 20
building tree 12 of 20
building tree 13 of 20
building tree 14 of 20
building tree 15 of 20
building tree 16 of 20
building tree 17 of 20
building tree 18 of 20
building tree 19 of 20
building tree 20 of 20


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   57.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.



>>> RF progress: 20/100 trees finished | chunk time: 57.01s | total: 57.01s
building tree 1 of 20
building tree 2 of 20
building tree 3 of 20
building tree 4 of 20
building tree 5 of 20
building tree 6 of 20
building tree 7 of 20
building tree 8 of 20
building tree 9 of 20
building tree 10 of 20
building tree 11 of 20
building tree 12 of 20
building tree 13 of 20
building tree 14 of 20
building tree 15 of 20
building tree 16 of 20
building tree 17 of 20
building tree 18 of 20
building tree 19 of 20
building tree 20 of 20


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   57.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.



>>> RF progress: 40/100 trees finished | chunk time: 57.02s | total: 114.03s
building tree 1 of 20
building tree 2 of 20
building tree 3 of 20
building tree 4 of 20
building tree 5 of 20
building tree 6 of 20
building tree 7 of 20
building tree 8 of 20
building tree 9 of 20
building tree 10 of 20
building tree 11 of 20
building tree 12 of 20
building tree 13 of 20
building tree 14 of 20
building tree 15 of 20
building tree 16 of 20
building tree 17 of 20
building tree 18 of 20
building tree 19 of 20
building tree 20 of 20


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   58.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.



>>> RF progress: 60/100 trees finished | chunk time: 58.95s | total: 172.98s
building tree 1 of 20
building tree 2 of 20
building tree 3 of 20
building tree 4 of 20
building tree 5 of 20
building tree 6 of 20
building tree 7 of 20
building tree 8 of 20
building tree 9 of 20
building tree 10 of 20
building tree 11 of 20
building tree 12 of 20
building tree 13 of 20
building tree 14 of 20
building tree 15 of 20
building tree 16 of 20
building tree 17 of 20
building tree 18 of 20
building tree 19 of 20
building tree 20 of 20


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   56.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.



>>> RF progress: 80/100 trees finished | chunk time: 56.43s | total: 229.41s
building tree 1 of 20
building tree 2 of 20
building tree 3 of 20
building tree 4 of 20
building tree 5 of 20
building tree 6 of 20
building tree 7 of 20
building tree 8 of 20
building tree 9 of 20
building tree 10 of 20
building tree 11 of 20
building tree 12 of 20
building tree 13 of 20
building tree 14 of 20
building tree 15 of 20
building tree 16 of 20
building tree 17 of 20
building tree 18 of 20
building tree 19 of 20
building tree 20 of 20


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   58.3s finished
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  37 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    0.1s finished



>>> RF progress: 100/100 trees finished | chunk time: 58.32s | total: 287.73s

[RF validation predict] started...
[RF validation predict] done in 0.10 sec.
RF Validation RMSE: 18.83441871742764
RF Validation MAE : 14.398413610684681

[Build LSTM val windows (seq)] started...
[Build LSTM val windows (seq)] done in 0.04 sec.

[LSTM validation predict] started...
[LSTM validation predict] done in 0.98 sec.
LSTM Validation RMSE: 31.173988610987347
LSTM Validation MAE : 17.420494079589844

[Ensemble weight search] started...


Searching weight w (LSTM share):   0%|          | 0/41 [00:00<?, ?it/s]

[Ensemble weight search] done in 0.05 sec.
Best w (LSTM): 0.325 | Best Val RMSE: 13.679853817151937

[Build test last windows (LSTM + RF)] started...
[Build test last windows (LSTM + RF)] done in 0.21 sec.

[Predict test (LSTM)] started...
[Predict test (LSTM)] done in 0.11 sec.

[Predict test (RF)] started...
[Predict test (RF)] done in 0.05 sec.
Ensemble Test RMSE: 18.2193932933568
Ensemble Test MAE : 13.543856155544331
Saved: rf_rul_fd001.pkl, ensemble_cfg.pkl


[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  37 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    0.0s finished


In [None]:
# =========================
# Dashboard block (Colab)
# =========================
!pip -q install ipywidgets
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import numpy as np

# Helper: create last sequence for ONE engine from already-scaled test df
def get_last_sequence_for_engine(test_scaled_df, engine_id, feature_cols, seq_len=30):
    g = test_scaled_df[test_scaled_df.engine_id == engine_id].sort_values("cycle")
    if g.empty:
        return None, None
    feats = g[feature_cols].values
    cycles = g["cycle"].values
    if len(g) < seq_len:
        pad = np.repeat(feats[:1], repeats=(seq_len - len(g)), axis=0)
        feats = np.vstack([pad, feats])
    x = feats[-seq_len:]
    return x.astype(np.float32), cycles

def predict_rul_for_engine(engine_id):
    x, cycles = get_last_sequence_for_engine(test_scaled, engine_id, feature_cols, SEQ_LEN)
    if x is None:
        return None
    pred = float(model.predict(x[None, ...], verbose=0).ravel()[0])
    # Optional safety: RUL should not be negative
    pred = max(0.0, pred)
    return pred, cycles

# UI
title = widgets.HTML("<h3>CMAPSS FD001 RUL Dashboard</h3>")
engine_in = widgets.BoundedIntText(
    value=1, min=int(test_scaled.engine_id.min()), max=int(test_scaled.engine_id.max()),
    description="Engine #", layout=widgets.Layout(width="300px")
)
btn = widgets.Button(description="Predict RUL", button_style="success")
out = widgets.Output()

def on_click(_):
    with out:
        clear_output(wait=True)
        engine_id = int(engine_in.value)
        result = predict_rul_for_engine(engine_id)
        if result is None:
            print(f"Engine #{engine_id} not found in test set.")
            return
        pred, cycles = result
        print(f"Engine #{engine_id}  |  Predicted RUL: {pred:.2f} cycles")

        # Simple visualization: show last SEQ_LEN cycles window
        g = test_scaled[test_scaled.engine_id == engine_id].sort_values("cycle")
        tail = g.tail(SEQ_LEN)
        plt.figure(figsize=(10, 3))
        plt.plot(tail["cycle"].values, np.arange(len(tail)), label="Window index (last 30 cycles)")
        plt.title(f"Engine #{engine_id} - last {SEQ_LEN} cycles window")
        plt.xlabel("Cycle")
        plt.ylabel("Index")
        plt.grid(True)
        plt.legend()
        plt.show()

btn.on_click(on_click)

display(title, widgets.HBox([engine_in, btn]), out)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m1.0/1.6 MB[0m [31m23.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25h

HTML(value='<h3>CMAPSS FD001 RUL Dashboard</h3>')

HBox(children=(BoundedIntText(value=1, description='Engine #', layout=Layout(width='300px'), min=1), Button(bu…

Output()

In [27]:
# =========================
# Gradio Web Dashboard (LSTM + RF Ensemble)
# Shows: Ensemble + LSTM-only outputs side-by-side
# =========================
!pip -q install gradio plotly pandas numpy scikit-learn tensorflow joblib

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import gradio as gr
import joblib
from tensorflow import keras

# -------- Load artifacts --------
LSTM_MODEL_PATH = "lstm_rul_fd001.h5"
SCALER_PATH     = "scaler_fd001.pkl"
FEATS_PATH      = "feature_cols_fd001.pkl"

RF_PATH         = "rf_rul_fd001.pkl"
ENS_CFG_PATH    = "ensemble_cfg.pkl"

# FIX: inference load avoids 'mse' deserialization issues [web:77]
lstm = keras.models.load_model(LSTM_MODEL_PATH, compile=False)  # [web:77]
scaler = joblib.load(SCALER_PATH)
feature_cols = joblib.load(FEATS_PATH)

rf = joblib.load(RF_PATH)
ens_cfg = joblib.load(ENS_CFG_PATH)
W_LSTM = float(ens_cfg.get("w_lstm", 0.5))
SEQ_LEN = int(ens_cfg.get("seq_len", 30))

# CMAPSS FD001 column schema
col_names = (
    ["engine_id", "cycle"] +
    [f"setting{i}" for i in range(1, 4)] +
    [f"s{i}" for i in range(1, 22)]
)

# --- Status thresholds (edit as needed) ---
GREEN_TH = 50
YELLOW_TH = 20

def status_from_rul(rul):
    if rul >= GREEN_TH:
        return "GREEN", "#16a34a", "Airworthy / OK"
    if rul >= YELLOW_TH:
        return "YELLOW", "#f59e0b", "Warning / Schedule maintenance"
    return "RED", "#dc2626", "Critical / Ground soon"

def _read_cmapss_txt(file_obj) -> pd.DataFrame:
    path = file_obj.name
    df = pd.read_csv(path, sep=r"\s+", header=None)
    df = df.iloc[:, :len(col_names)]
    df.columns = col_names
    return df

def _scale(df: pd.DataFrame) -> pd.DataFrame:
    df2 = df.copy()
    df2[feature_cols] = scaler.transform(df2[feature_cols])
    return df2

def _seq_rolling_engine(g_scaled: pd.DataFrame, seq_len=30):
    g_scaled = g_scaled.sort_values("cycle")
    feats = g_scaled[feature_cols].values
    cycles = g_scaled["cycle"].values

    if len(g_scaled) < seq_len:
        pad = np.repeat(feats[:1], repeats=(seq_len - len(g_scaled)), axis=0)
        feats = np.vstack([pad, feats])
        cycles = np.concatenate([np.full(seq_len - len(g_scaled), cycles[0]), cycles])

    X, cycles_y = [], []
    for i in range(seq_len, len(feats) + 1):
        X.append(feats[i-seq_len:i])
        cycles_y.append(cycles[i-1])
    return np.array(cycles_y, dtype=int), np.array(X, dtype=np.float32)

def _rf_predict_from_seq(X_seq: np.ndarray) -> np.ndarray:
    # X_seq: (N, seq_len, F) -> flatten for RF: (N, seq_len*F)
    X_flat = X_seq.reshape((X_seq.shape[0], -1))
    return rf.predict(X_flat)

def _fleet_last_windows(test_scaled: pd.DataFrame, seq_len=30):
    X_last, eids = [], []
    for eid, g in test_scaled.groupby("engine_id"):
        g = g.sort_values("cycle")
        feats = g[feature_cols].values
        if len(g) < seq_len:
            pad = np.repeat(feats[:1], repeats=(seq_len - len(g)), axis=0)
            feats = np.vstack([pad, feats])
        X_last.append(feats[-seq_len:])
        eids.append(int(eid))
    X_last = np.array(X_last, dtype=np.float32)
    eids = np.array(eids, dtype=int)
    order = np.argsort(eids)
    return X_last[order], eids[order]

def build_dashboard(file_obj, engine_choice, top_k, sensor_choice):
    if file_obj is None:
        return (
            "### Upload a test file to begin.",
            None, None, None, None, None, None,
            pd.DataFrame()
        )

    # Read + validate + scale
    test_df = _read_cmapss_txt(file_obj)
    missing = [c for c in feature_cols if c not in test_df.columns]
    if missing:
        return (
            f"### Error\nMissing required columns: {missing[:10]} ...",
            None, None, None, None, None, None,
            pd.DataFrame()
        )

    test_scaled = _scale(test_df)

    # -------- Fleet overview (Ensemble) --------
    X_last, eids = _fleet_last_windows(test_scaled, SEQ_LEN)

    pred_lstm_fleet = lstm.predict(X_last, verbose=0).ravel()
    pred_rf_fleet = _rf_predict_from_seq(X_last)
    pred_ens_fleet = W_LSTM * pred_lstm_fleet + (1 - W_LSTM) * pred_rf_fleet

    pred_lstm_fleet = np.maximum(pred_lstm_fleet, 0.0)
    pred_ens_fleet = np.maximum(pred_ens_fleet, 0.0)

    fleet = pd.DataFrame({
        "engine_id": eids,
        "RUL_ensemble": pred_ens_fleet,
        "RUL_lstm": pred_lstm_fleet
    }).sort_values("RUL_ensemble", ascending=True).reset_index(drop=True)

    k = max(1, min(int(top_k), len(fleet)))
    top_df = fleet.head(k).sort_values("RUL_ensemble", ascending=True)

    fig_topk = px.bar(
        top_df, x="engine_id", y="RUL_ensemble",
        title=f"Top {k} Critical Engines (Ensemble RUL)",
        labels={"engine_id": "Engine ID", "RUL_ensemble": "Predicted RUL (cycles)"}
    )

    fig_hist = px.histogram(
        fleet, x="RUL_ensemble", nbins=30,
        title="Fleet RUL Distribution (Ensemble)",
        labels={"RUL_ensemble": "Predicted RUL (cycles)"}
    )

    fig_scatter = px.scatter(
        fleet, x="engine_id", y="RUL_ensemble",
        title="Fleet Overview: Ensemble RUL by Engine",
        labels={"engine_id": "Engine ID", "RUL_ensemble": "Predicted RUL (cycles)"}
    )

    # -------- Selected engine details --------
    engine_choice = int(engine_choice)
    eng_raw = test_df[test_df.engine_id == engine_choice].sort_values("cycle")
    eng_scaled = test_scaled[test_scaled.engine_id == engine_choice].sort_values("cycle")

    if eng_raw.empty:
        return (
            f"### Engine #{engine_choice} not found in uploaded file.",
            fig_topk, fig_hist, fig_scatter,
            None, None, None,
            top_df
        )

    # Single-point predictions (last window)
    x_last = X_last[np.where(eids == engine_choice)[0][0]][None, ...]
    rul_lstm = float(np.maximum(lstm.predict(x_last, verbose=0).ravel()[0], 0.0))
    rul_rf = float(np.maximum(_rf_predict_from_seq(x_last)[0], 0.0))
    rul_ens = float(np.maximum(W_LSTM * rul_lstm + (1 - W_LSTM) * rul_rf, 0.0))

    status_name, status_color, status_msg = status_from_rul(rul_ens)

    details_md = f"""
### Engine #{engine_choice} — Details

**Ensemble RUL:** `{rul_ens:.2f}` cycles
**LSTM-only RUL:** `{rul_lstm:.2f}` cycles
**Ensemble weight:** `w_lstm={W_LSTM:.3f}` (Ensemble = w\*LSTM + (1-w)\*RF)

**Status (based on Ensemble):**
<span style="background:{status_color}; color:white; padding:6px 10px; border-radius:10px; font-weight:700;">
{status_name}
</span>

**Action:** {status_msg}
**Cycles in uploaded file:** `{int(eng_raw.cycle.max())}`
"""

    # Rolling RUL trend curves (LSTM vs Ensemble)
    cycles_y, X_roll = _seq_rolling_engine(eng_scaled, SEQ_LEN)
    roll_lstm = np.maximum(lstm.predict(X_roll, verbose=0).ravel(), 0.0)
    roll_rf = np.maximum(_rf_predict_from_seq(X_roll), 0.0)
    roll_ens = np.maximum(W_LSTM * roll_lstm + (1 - W_LSTM) * roll_rf, 0.0)

    trend_df = pd.DataFrame({
        "cycle": cycles_y,
        "RUL_lstm": roll_lstm,
        "RUL_ensemble": roll_ens
    })

    # One chart with both lines
    fig_rul_trend = go.Figure()
    fig_rul_trend.add_trace(go.Scatter(x=trend_df["cycle"], y=trend_df["RUL_ensemble"],
                                       mode="lines", name="Ensemble RUL", line=dict(width=3)))
    fig_rul_trend.add_trace(go.Scatter(x=trend_df["cycle"], y=trend_df["RUL_lstm"],
                                       mode="lines", name="LSTM RUL", line=dict(width=2, dash="dot")))
    fig_rul_trend.update_layout(
        title=f"Engine #{engine_choice}: RUL Trend (Ensemble vs LSTM)",
        xaxis_title="Cycle",
        yaxis_title="Predicted RUL (cycles)"
    )
    # Risk bands
    fig_rul_trend.add_hrect(y0=0, y1=YELLOW_TH, fillcolor="rgba(220,38,38,0.15)", line_width=0)
    fig_rul_trend.add_hrect(y0=YELLOW_TH, y1=GREEN_TH, fillcolor="rgba(245,158,11,0.15)", line_width=0)
    fig_rul_trend.add_hrect(y0=GREEN_TH, y1=max(200, float(trend_df[["RUL_lstm","RUL_ensemble"]].max().max()) + 10),
                            fillcolor="rgba(22,163,74,0.12)", line_width=0)

    # Sensor plot
    sensor_choice = str(sensor_choice)
    if sensor_choice not in eng_raw.columns:
        sensor_choice = "s12"

    fig_sensor = px.line(
        eng_raw, x="cycle", y=sensor_choice,
        title=f"Engine #{engine_choice}: {sensor_choice} vs Cycle (Raw)",
        labels={"cycle": "Cycle", sensor_choice: sensor_choice}
    )

    # Gauge uses Ensemble RUL
    max_gauge = 150
    fig_gauge = go.Figure(go.Indicator(
        mode="gauge+number",
        value=rul_ens,
        title={"text": f"Engine #{engine_choice} Health (Ensemble RUL)"},
        gauge={
            "axis": {"range": [0, max_gauge]},
            "bar": {"color": status_color},
            "steps": [
                {"range": [0, YELLOW_TH], "color": "rgba(220,38,38,0.25)"},
                {"range": [YELLOW_TH, GREEN_TH], "color": "rgba(245,158,11,0.25)"},
                {"range": [GREEN_TH, max_gauge], "color": "rgba(22,163,74,0.20)"},
            ],
        }
    ))
    fig_gauge.update_layout(height=260, margin=dict(l=20, r=20, t=60, b=20))

    # Return: engine details + fleet plots + engine plots + small top-k table
    return details_md, fig_topk, fig_hist, fig_scatter, fig_rul_trend, fig_sensor, fig_gauge, top_df

# ---------------- UI ----------------
sensor_options = [f"s{i}" for i in range(1, 22)]

with gr.Blocks(title="Turbofan RUL Dashboard (FD001) - Ensemble") as demo:  # multi-output events [web:85]
    gr.Markdown("## Turbofan Engine RUL Predictor (FD001)\nUploads → Engine details (Ensemble + LSTM) + Fleet overview.")

    with gr.Row():
        file_in = gr.File(label="Upload test file (.txt)", file_types=[".txt", ".csv"])
        engine_in = gr.Number(value=1, label="Engine #X", precision=0)
        topk_in = gr.Slider(minimum=3, maximum=30, value=10, step=1, label="Top-K critical engines")
        sensor_in = gr.Dropdown(choices=sensor_options, value="s12", label="Sensor to display")

    btn = gr.Button("Run / Refresh", variant="primary")

    # Engine details (shows both outputs)
    engine_md = gr.Markdown()  # [web:106]

    with gr.Row():
        engine_rul_plot = gr.Plot(label="Selected Engine: RUL Trend (Ensemble vs LSTM)")  # [web:86]
        engine_sensor_plot = gr.Plot(label="Selected Engine: Sensor Trend")              # [web:86]

    engine_gauge = gr.Plot(label="Selected Engine: Status (Ensemble)")  # [web:86]

    gr.Markdown("### Fleet overview (Ensemble)")
    with gr.Row():
        fleet_topk_plot = gr.Plot(label="Top-K Critical Engines (Ensemble)")  # [web:86]
        fleet_hist_plot = gr.Plot(label="Fleet RUL Distribution (Ensemble)")  # [web:86]

    fleet_scatter_plot = gr.Plot(label="Fleet Scatter (Ensemble)")           # [web:86]
    topk_table = gr.Dataframe(label="Top-K Table", interactive=False)        # [web:55]

    btn.click(
        fn=build_dashboard,
        inputs=[file_in, engine_in, topk_in, sensor_in],
        outputs=[
            engine_md,
            fleet_topk_plot, fleet_hist_plot, fleet_scatter_plot,
            engine_rul_plot, engine_sensor_plot, engine_gauge,
            topk_table
        ]
    )

demo.launch(share=True, debug=True)



invalid escape sequence '\*'


invalid escape sequence '\*'


invalid escape sequence '\*'



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://789c74cb14eb61e189.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 416, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1139, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 107, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/error

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://789c74cb14eb61e189.gradio.live


