In [1]:
# ============================================
# Phase 4: Modeling & Evaluation
# ============================================

import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# Set project root
BASE = Path(r"C:\Users\lavan\OneDrive\Desktop\PredictiveMaintenanceProject")
ST   = BASE / "data" / "staging"

# Load features
feat = pd.read_csv(ST / "features_for_model.csv", parse_dates=["Date"])

# Features and target
X = feat[["HoursUsed", "Hours_7d_mean", "Temperature", "Vibration", "DaysSinceLastMaint"]].fillna(0)
y = feat["label_fail_next7d"]

# Time-based split (last 20% dates = test)
cutoff = feat["Date"].quantile(0.8)
mask_train = feat["Date"] <= cutoff
X_train, y_train = X[mask_train], y[mask_train]
X_test, y_test   = X[~mask_train], y[~mask_train]

# Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Predictions
pred = model.predict(X_test)
prob = model.predict_proba(X_test)[:, 1]

# Metrics
print("Classification Report:\n", classification_report(y_test, pred))
print("ROC-AUC:", roc_auc_score(y_test, prob))

# Save model
MODEL_DIR = BASE / "models"
MODEL_DIR.mkdir(exist_ok=True)
joblib.dump(model, MODEL_DIR / "rf_model.pkl")
print("✅ Model saved to:", MODEL_DIR / "rf_model.pkl")


Classification Report:
               precision    recall  f1-score   support

           0       0.28      0.26      0.27       109
           1       0.69      0.71      0.70       251

    accuracy                           0.57       360
   macro avg       0.48      0.48      0.48       360
weighted avg       0.56      0.57      0.57       360

ROC-AUC: 0.45902628020029973
✅ Model saved to: C:\Users\lavan\OneDrive\Desktop\PredictiveMaintenanceProject\models\rf_model.pkl
