In [1]:
import os
import joblib
import pandas as pd
import numpy as np

# ---------- CONFIG ----------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Wearable Data Wellness Anomaly Detector"
DATA_PATH = os.path.join(BASE_DIR, "archive", "wearable_sports_health_dataset.csv")

WELLNESS_MODEL_PATH = os.path.join(BASE_DIR, "wellness_model.pkl")
ANOMALY_MODEL_PATH = os.path.join(BASE_DIR, "anomaly_model.pkl")
OUT_PREDICTIONS = os.path.join(BASE_DIR, "predictions.csv")

# ---------- LOAD MODELS ----------
print("[INFO] Loading trained models...")
wellness_model = joblib.load(WELLNESS_MODEL_PATH)
anomaly_model = joblib.load(ANOMALY_MODEL_PATH)

# ---------- LOAD DATA ----------
print("[INFO] Loading dataset...")
df = pd.read_csv(DATA_PATH)

# Rename columns for consistency
rename_map = {
    "Timestamp": "timestamp",
    "Heart_Rate": "heart_rate",
    "Step_Count": "steps"
}
df = df.rename(columns=rename_map)

# Ensure timestamp
if "timestamp" in df.columns:
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
    if df["timestamp"].isna().all():
        df["timestamp"] = pd.date_range("2024-01-01", periods=len(df), freq="h")
else:
    df["timestamp"] = pd.date_range("2024-01-01", periods=len(df), freq="h")

# ---------- FEATURE ENGINEERING ----------
def feature_engineering(df):
    df = df.copy()
    df["hr_roll_mean"] = df["heart_rate"].rolling(10, min_periods=1).mean()
    df["steps_roll_mean"] = df["steps"].rolling(10, min_periods=1).mean()
    df["hour"] = df["timestamp"].dt.hour
    df["is_night"] = df["hour"].apply(lambda h: 1 if h < 6 or h > 22 else 0)
    df["hr_diff"] = df["heart_rate"].diff().abs()
    df["hrv"] = df["hr_diff"].rolling(5, min_periods=1).std()
    df["sleep_hours"] = np.where(
        df["is_night"] == 1,
        np.random.uniform(5, 8, len(df)),
        np.random.uniform(0, 1, len(df))
    )
    df["sleep_quality"] = df["sleep_hours"].apply(lambda h: min(h / 8.0, 1.0))
    return df

df_feat = feature_engineering(df).dropna().reset_index(drop=True)

# ---------- SELECT FEATURES ----------
X = df_feat[["heart_rate", "steps", "sleep_quality",
             "hr_roll_mean", "steps_roll_mean", "hrv", "is_night"]]

# ---------- PREDICT ----------
df_feat["predicted_wellness"] = wellness_model.predict(X)
df_feat["anomaly_score"] = anomaly_model.decision_function(X)
df_feat["anomaly_flag"] = anomaly_model.predict(X)  # -1 anomaly, 1 normal

# ---------- SAVE RESULTS ----------
results = df_feat[["timestamp", "heart_rate", "steps",
                   "predicted_wellness", "anomaly_flag", "anomaly_score"]]

results.to_csv(OUT_PREDICTIONS, index=False)
print(f"[DONE] Predictions saved to {OUT_PREDICTIONS}")

# ---------- SHOW SAMPLE ----------
print("\n[RESULT] Sample Predictions:")
print(results.head(10))

# ---------- SUMMARY ----------
avg_wellness = results["predicted_wellness"].mean()
anomaly_rate = (results["anomaly_flag"] == -1).mean() * 100

print("\n[SUMMARY]")
print(f"Average Wellness Score: {avg_wellness:.2f}")
print(f"Anomaly Rate: {anomaly_rate:.2f}%")


[INFO] Loading trained models...
[INFO] Loading dataset...
[DONE] Predictions saved to C:\Users\NXTWAVE\Downloads\Wearable Data Wellness Anomaly Detector\predictions.csv

[RESULT] Sample Predictions:
            timestamp  heart_rate  steps  predicted_wellness  anomaly_flag  \
0 2025-04-10 09:10:00         131    858           38.847463             1   
1 2025-04-10 09:15:00         157    732           28.801681             1   
2 2025-04-10 09:20:00          73    667           45.155589             1   
3 2025-04-10 09:25:00         128    865           39.791707             1   
4 2025-04-10 09:30:00         133    977           40.974303             1   
5 2025-04-10 09:35:00         170    764           27.326482             1   
6 2025-04-10 09:40:00         149    923           35.696563             1   
7 2025-04-10 09:45:00         108    870           44.184741             1   
8 2025-04-10 09:50:00          89    710           45.337122             1   
9 2025-04-10 09:55:0