In [None]:
import numpy as np
import pandas as pd
import pickle

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import BaseEstimator, TransformerMixin

In [None]:
# -------------------------------
# CONFIG
# -------------------------------
WINDOW_SIZE = 30

In [None]:
SENSORS = [
    's2', 's3', 's4', 's7', 's8', 's9',
    's11', 's12', 's13', 's14', 's15', 's16'
]

In [None]:
# -------------------------------
# LOAD CMAPSS DATA
# -------------------------------
def load_cmapss(path):
    cols = ['engine_id', 'cycle', 'op1', 'op2', 'op3'] + \
           [f's{i}' for i in range(1, 22)]
    return pd.read_csv(path, sep=' ', header=None, names=cols)

In [None]:
df = load_cmapss("train_FD001.txt")

In [None]:
# -------------------------------
# COMPUTE RUL
# -------------------------------
max_cycle = df.groupby('engine_id')['cycle'].max()
df['RUL'] = df.apply(lambda r: max_cycle[r.engine_id] - r.cycle, axis=1)

In [None]:
def rul_to_label(rul):
    if rul > 50:
        return 0
    elif rul > 20:
        return 1
    else:
        return 2

In [None]:
df['label'] = df['RUL'].apply(rul_to_label)

In [None]:
# -------------------------------
# FEATURE EXTRACTION FOR TRAINING
# -------------------------------
def extract_training_features(df):
    X, y = [], []

    for eid in df.engine_id.unique():
        eng = df[df.engine_id == eid].reset_index(drop=True)

        for i in range(WINDOW_SIZE, len(eng)):
            window = eng.iloc[i-WINDOW_SIZE:i]
            features = []

            for s in SENSORS:
                v = window[s].values
                features.extend([v.mean(), v.std(), v[-1] - v[0]])

            X.append(features)
            y.append(eng.loc[i, 'label'])

    return np.array(X), np.array(y)

In [None]:
X_train, y_train = extract_training_features(df)

In [None]:
# -------------------------------
# TRAIN SCALER + MODEL
# -------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

In [None]:
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=12,
    class_weight="balanced",
    random_state=42
)

In [None]:
model.fit(X_train_scaled, y_train)

In [None]:
# -------------------------------
# CSV FEATURE EXTRACTOR (INFERENCE ONLY)
# -------------------------------
class CSVFeatureExtractor(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        # X must be a pandas DataFrame (raw CSV)
        if len(X) < WINDOW_SIZE:
            raise ValueError("CSV must contain at least 30 rows")

        window = X.iloc[-WINDOW_SIZE:]
        features = []

        for s in SENSORS:
            if s not in X.columns:
                raise ValueError(f"Missing sensor: {s}")
            v = window[s].values
            features.extend([v.mean(), v.std(), v[-1] - v[0]])

        return np.array(features).reshape(1, -1)

In [None]:
# -------------------------------
# SAVE SINGLE PIPELINE OBJECT
# -------------------------------
pipeline = {
    "feature_extractor": CSVFeatureExtractor(),
    "scaler": scaler,
    "model": model,
    "label_map": {
        0: "HEALTHY",
        1: "MAINTENANCE",
        2: "REPLACE"
    }
}

In [None]:
with open("engine_maintenance_pipeline.pkl", "wb") as f:
    pickle.dump(pipeline, f)

In [None]:
print("âœ… Pipeline saved as engine_maintenance_pipeline.pkl")