In [1]:
# ============================================
# Phase 3: Feature Engineering
# ============================================

import pandas as pd
from pathlib import Path

# Set project root
BASE = Path(r"C:\Users\lavan\OneDrive\Desktop\PredictiveMaintenanceProject")
ST   = BASE / "data" / "staging"

# Load cleaned data
usage = pd.read_csv(ST / "hostel_usage_logs_clean.csv", parse_dates=["Date"])
maint = pd.read_csv(ST / "hostel_maintenance_logs_clean.csv", parse_dates=["Date"])

# Sort by DeviceID and Date
usage = usage.sort_values(["DeviceID", "Date"])

# Rolling 7-day mean usage hours
usage["Hours_7d_mean"] = usage.groupby("DeviceID")["HoursUsed"] \
                              .transform(lambda s: s.rolling(7, min_periods=1).mean())

# Days since last maintenance
last_maint = maint.groupby("DeviceID")["Date"].max().rename("LastMaintDate").reset_index()
usage = usage.merge(last_maint, on="DeviceID", how="left")
usage["DaysSinceLastMaint"] = (usage["Date"] - pd.to_datetime(usage["LastMaintDate"])) \
                                .dt.days.fillna(9999).astype(int)

# Label: failure in next 7 days
usage = usage.sort_values(["DeviceID", "Date"])
usage["future_fail_7d"] = usage.groupby("DeviceID")["FailureFlag"] \
                               .transform(lambda s: s[::-1].rolling(7, min_periods=1).sum()[::-1])
usage["label_fail_next7d"] = (usage["future_fail_7d"] > 0).astype(int)

# Final feature set
feat = usage[[
    "DeviceID", "Date", "HoursUsed", "Hours_7d_mean",
    "Temperature", "Vibration", "DaysSinceLastMaint",
    "label_fail_next7d"
]]

# Save
feat.to_csv(ST / "features_for_model.csv", index=False)
print("✅ Features saved to:", ST / "features_for_model.csv")


✅ Features saved to: C:\Users\lavan\OneDrive\Desktop\PredictiveMaintenanceProject\data\staging\features_for_model.csv
