In [None]:
# ===================== INSTALL REQUIRED PACKAGES =====================
!pip install fastapi uvicorn pyngrok pandas numpy prophet scikit-learn tsfresh matplotlib plotly streamlit requests openai typing Optional reportlab --quiet

In [None]:
from pyngrok import ngrok
# ngrok.set_auth_token("38R1JP7kNn7VgVq2lhBv8Dfd3cm_3nMAW2KpZWRyoWZpqL4uf")
ngrok.set_auth_token("380ZzaoDdXGvfjMya7VVYPbpJA4_3dcMEvWNppZQDdUGs9j7p")

In [13]:
import os
os.environ["OPENAI_API_KEY"] = "api_key"
!uvicorn backend:app --host 0.0.0.0 --port 8003

[32mINFO[0m:     Started server process [[36m43470[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[31mERROR[0m:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8003): address already in use
[32mINFO[0m:     Waiting for application shutdown.
[32mINFO[0m:     Application shutdown complete.


In [None]:
%%writefile backend.py
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
import pandas as pd
from prophet import Prophet
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
import os
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image
)
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from datetime import datetime
import glob
from openai import OpenAI


from fastapi import Body
from typing import Optional

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("OPENAI_API_KEY not set")

client = OpenAI(api_key=api_key)


# ================= APP =================
app = FastAPI(title="FitPulse Backend")

# ================= CORS =================
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ================= GLOBAL STATE =================
CLEAN_DF = None
FEATURE_DF = None
ANOMALY_DF = None
RULE_RECOMMENDATIONS = None

# ================= ROOT =================
def classify_severity(count: int):
    if count >= 20:
        return "High"
    elif count >= 5:
        return "Medium"
    return "Low"

def df_to_table(df, max_rows=10):
    data = [df.columns.tolist()] + df.head(max_rows).values.tolist()
    return Table(data, repeatRows=1)

@app.get("/")
def root():
    return {"message": "FitPulse Backend is running"}

# ================= PREPROCESSING (CSV + JSON) =================
@app.post("/preprocess")
async def preprocess(file: UploadFile = File(...)):
    global CLEAN_DF

    try:
        if file.filename.endswith(".csv"):
            df = pd.read_csv(file.file)
        elif file.filename.endswith(".json"):
            df = pd.read_json(file.file)
        else:
            return JSONResponse(status_code=400, content={"error": "Only CSV or JSON supported"})
    except Exception as e:
        return JSONResponse(status_code=400, content={"error": f"Invalid file: {e}"})

    required_cols = ["user_id", "date", "TotalSteps", "avg_heart_rate", "total_sleep_minutes"]
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        return JSONResponse(status_code=400, content={"error": f"Missing columns: {missing}"})

    # ---------- CLEANING ----------
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    for col in ["TotalSteps", "avg_heart_rate", "total_sleep_minutes"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    df["TotalSteps"].fillna(0, inplace=True)
    df["avg_heart_rate"].fillna(df["avg_heart_rate"].median(), inplace=True)
    df["total_sleep_minutes"].fillna(df["total_sleep_minutes"].median(), inplace=True)

    # ---------- AGGREGATE ----------
    df = df.groupby(["user_id", "date"], as_index=False).agg({
        "TotalSteps": "sum",
        "avg_heart_rate": "mean",
        "total_sleep_minutes": "mean"
    })

    # ---------- RENAME FOR UI ----------
    df.rename(columns={
        "avg_heart_rate": "heart_rate",
        "TotalSteps": "steps",
        "total_sleep_minutes": "sleep"
    }, inplace=True)

    # Convert sleep minutes â†’ hours (matches screenshot ~7.05)
    df["sleep"] = (df["sleep"] / 60).round(2)

    CLEAN_DF = df
    df.to_csv("clean_data.csv", index=False)


    return {
        "status": "success",
        "overview": {
            "rows_loaded": len(df),
            "users": df["user_id"].nunique(),
            "days": df["date"].nunique(),
            "avg_hr": round(df["heart_rate"].mean(), 1)
        },
        "preview": df.head(20).to_dict(orient="records")
    }



# ================= OVERVIEW =================
@app.get("/overview")
def overview():
    if CLEAN_DF is None:
        return {"error": "Run /preprocess first"}
    df = CLEAN_DF
    return {
        "rows": len(df),
        "users_count": df["user_id"].nunique(),
        "users_list": df["user_id"].astype(str).unique().tolist(),  # <-- add this
        "start_date": str(df["date"].min().date()),
        "end_date": str(df["date"].max().date()),
        "avg_heart_rate": round(df["heart_rate"].mean(), 2),
        "avg_steps": round(df["steps"].mean(), 2),
        "avg_sleep_hours": round(df["sleep"].mean(), 2),
    }

# ================= FEATURES + ANOMALIES =================
@app.post("/module2")
def module2():
    global FEATURE_DF, ANOMALY_DF, RULE_RECOMMENDATIONS
    if CLEAN_DF is None:
        return JSONResponse(status_code=400, content={"error": "Run /preprocess first"})

    df = CLEAN_DF.copy().sort_values("date")


    FEATURE_DF = df.copy()

    # Rolling 7-day mean features
    for col in ["steps", "heart_rate", "sleep"]:
        FEATURE_DF[f"{col}_mean_7"] = FEATURE_DF[col].rolling(7).mean()
        FEATURE_DF[f"{col}_std_7"] = FEATURE_DF[col].rolling(7).std()
        FEATURE_DF[f"{col}_skew_7"] = FEATURE_DF[col].rolling(7).skew()
        FEATURE_DF[f"{col}_kurt_7"] = FEATURE_DF[col].rolling(7).kurt()

    FEATURE_DF.to_csv("feature_data.csv", index=False)

    # Rule-based anomalies
    records = []
    for _, r in df.iterrows():
        if r["heart_rate"] > 120: records.append((r["user_id"], r["date"], "heart_rate_high"))
        if r["heart_rate"] < 40: records.append((r["user_id"], r["date"], "heart_rate_low"))
        if r["steps"] == 0: records.append((r["user_id"], r["date"], "no_steps"))
        if r["sleep"] < 4 or r["sleep"] > 12: records.append((r["user_id"], r["date"], "sleep_abnormal"))

    rule_df = pd.DataFrame(records, columns=["user_id", "date", "metric"])

    # DBSCAN anomalies
    X_scaled = StandardScaler().fit_transform(df[["heart_rate", "steps", "sleep"]])
    labels = DBSCAN(eps=1.2, min_samples=3).fit_predict(X_scaled)
    df["cluster"] = labels
    dbscan_df = df[df["cluster"] == -1][["user_id", "date"]].copy()
    dbscan_df["metric"] = "dbscan_outlier"

    ANOMALY_DF = pd.concat([rule_df, dbscan_df], ignore_index=True)

    summary_df = (
        ANOMALY_DF
        .groupby(["user_id", "metric"])
        .size()
        .reset_index(name="count")
    )

    summary_df["severity"] = summary_df["count"].apply(classify_severity)

    # Save both versions
    ANOMALY_DF.to_csv("anomaly_raw.csv", index=False)
    summary_df.to_csv("anomaly_report.csv", index=False)


    recs = []

    for _, row in summary_df.iterrows():
        user = row["user_id"]
        metric = row["metric"]
        severity = row["severity"]

        if metric == "heart_rate_high":
            recs.append({
                "user_id": user,
                "issue": "High heart rate",
                "severity": severity,
                "recommendation": "Reduce high-intensity workouts and consult a physician if persistent."
            })

        elif metric == "heart_rate_low":
            recs.append({
                "user_id": user,
                "issue": "Low heart rate",
                "severity": severity,
                "recommendation": "Ensure adequate nutrition and consult a healthcare professional."
            })

        elif metric == "no_steps":
            recs.append({
                "user_id": user,
                "issue": "No physical activity",
                "severity": severity,
                "recommendation": "Increase daily movement with light walks or stretching."
            })

        elif metric == "sleep_abnormal":
            recs.append({
                "user_id": user,
                "issue": "Abnormal sleep duration",
                "severity": severity,
                "recommendation": "Maintain a consistent sleep schedule and improve sleep hygiene."
            })

        elif metric == "dbscan_outlier":
            recs.append({
                "user_id": user,
                "issue": "Unusual health pattern",
                "severity": severity,
                "recommendation": "Monitor trends closely; consider lifestyle adjustments."
            })

    RULE_RECOMMENDATIONS = pd.DataFrame(recs)
    RULE_RECOMMENDATIONS.to_csv("recommendations.csv", index=False)

    return {
        "status": "success",
        "total_anomalies": len(ANOMALY_DF),
        "summary_rows": len(summary_df)
    }


# ================= ANOMALY SUMMARY =================

@app.get("/module3/summary")
def anomaly_summary(user_id: str = "All"):
    if ANOMALY_DF is None or ANOMALY_DF.empty:
        return {"summary": {}}

    df = ANOMALY_DF.copy()
    df["user_id"] = df["user_id"].astype(str)

    if user_id != "All":
        df = df[df["user_id"] == user_id]

    return {"summary": df["metric"].value_counts().to_dict()}


# ================= PROPHET PLOTS =================
def plot_prophet(df, column, fname, ylabel):
    df2 = df[["date", column]].rename(columns={"date":"ds", column:"y"})
    if len(df2) < 10: return None
    model = Prophet()
    model.fit(df2)
    forecast = model.predict(df2)
    df2["yhat"] = forecast["yhat"]
    df2["residual"] = df2["y"] - df2["yhat"]
    threshold = 2.5 * df2["residual"].std()
    outliers = df2[abs(df2["residual"]) > threshold]

    plt.figure(figsize=(12,5))
    plt.plot(df2["ds"], df2["y"], label=f"Actual {ylabel}",color="blue")
    plt.plot(df2["ds"], df2["yhat"], label="Prophet Trend",color="red")
    plt.scatter(outliers["ds"], outliers["y"], color="red", label="Anomaly")
    plt.xlabel("Date")
    plt.ylabel(ylabel)
    plt.title(f"{ylabel} Prophet Trend Anomalies")
    plt.legend()
    plt.grid(True)
    plt.savefig(fname)
    plt.close()
    return fname
@app.get("/module3/anomaly-table")
def anomaly_table():
    if not os.path.exists("anomaly_report.csv"):
        return JSONResponse(status_code=404, content={"error": "Run anomaly detection first"})

    df = pd.read_csv("anomaly_report.csv")
    return {
        "rows": df.to_dict(orient="records")
    }
@app.get("/module3/prophet/{metric}")
def prophet_metric(metric: str, user_id: str = "All"):
    mapping = {
        "heart_rate": "Heart Rate (BPM)",
        "steps": "Steps",
        "sleep": "Sleep (Hours)"
    }

    if FEATURE_DF is None:
        return JSONResponse(status_code=400, content={"error": "Run /module2 first"})

    if metric not in mapping:
        return JSONResponse(status_code=400, content={"error": "Invalid metric"})

    # âœ… FORCE STRING TYPES (THIS IS THE KEY FIX)
    df = FEATURE_DF.copy()
    df["user_id"] = df["user_id"].astype(str)
    user_id = str(user_id)

    # âœ… APPLY USER FILTER
    if user_id != "All":
        df = df[df["user_id"] == user_id]

    if df.empty:
        return JSONResponse(status_code=400, content={"error": "No data for selected user"})

    fname = f"prophet_{metric}.png"

    # âœ… PASS FILTERED DATA ONLY
    if plot_prophet(df, metric, fname, mapping[metric]):
        return FileResponse(fname)

    return JSONResponse(status_code=400, content={"error": "Not enough data"})



# ================= DBSCAN VISUALIZATION =================
# ================= DBSCAN VISUALIZATION =================
@app.get("/module3/dbscan")
def dbscan_viz(user_id: str = "All"):
    if FEATURE_DF is None:
        return JSONResponse(status_code=400, content={"error": "Run /module2 first"})

    df = FEATURE_DF.copy()
    df["user_id"] = df["user_id"].astype(str)

    if user_id != "All":
        df = df[df["user_id"] == user_id]

    if df.empty:
        return JSONResponse(status_code=400, content={"error": "No data for selected user"})

    X_scaled = StandardScaler().fit_transform(df[["heart_rate", "steps", "sleep"]])
    labels = DBSCAN(eps=1.2, min_samples=3).fit_predict(X_scaled)
    df["cluster"] = labels

    normal = df[df["cluster"] != -1]
    outliers = df[df["cluster"] == -1]

    plt.figure(figsize=(10, 6))
    plt.scatter(normal["steps"], normal["heart_rate"], label="Normal", alpha=0.6)
    plt.scatter(outliers["steps"], outliers["heart_rate"], label="Outlier", alpha=0.9)

    plt.xlabel("Steps")
    plt.ylabel("Heart Rate")
    plt.title("DBSCAN Clustering")
    plt.legend()
    plt.grid(True)

    fname = "dbscan.png"
    plt.savefig(fname)
    plt.close()

    return FileResponse(fname)


# ================= DISTRIBUTION =================
# ================= DISTRIBUTION =================
@app.get("/module3/distribution/{metric}")
def distribution(metric: str, user_id: str = "All"):
    if FEATURE_DF is None:
        return JSONResponse(status_code=400, content={"error": "Run /module2 first"})

    if metric not in ["heart_rate", "steps", "sleep"]:
        return JSONResponse(status_code=400, content={"error": "Invalid metric"})

    df = FEATURE_DF.copy()
    df["user_id"] = df["user_id"].astype(str)

    if user_id != "All":
        df = df[df["user_id"] == user_id]

    if df.empty:
        return JSONResponse(status_code=400, content={"error": "No data for selected user"})

    colors_map = {
        "heart_rate": "crimson",
        "steps": "royalblue",
        "sleep": "green"
    }

    plt.figure(figsize=(10, 5))
    plt.hist(df[metric], bins=20, color=colors_map[metric], edgecolor="black")
    plt.xlabel(metric)
    plt.ylabel("Frequency")
    plt.title(f"{metric} Distribution")
    plt.grid(True)

    fname = f"{metric}_dist.png"
    plt.savefig(fname)
    plt.close()

    return FileResponse(fname)


@app.get("/recommendations")
def get_recommendations(user_id: str = "All"):
    if RULE_RECOMMENDATIONS is None or RULE_RECOMMENDATIONS.empty:
        return {"message": "No rule-based recommendations available"}

    df = RULE_RECOMMENDATIONS.copy()
    df["user_id"] = df["user_id"].astype(str)

    if user_id != "All":
        df = df[df["user_id"] == user_id]

    return df.to_dict(orient="records")


# ================= DOWNLOAD ANOMALIES =================
@app.get("/download-anomalies")
def download_anomalies():
    if not os.path.exists("anomaly_report.csv"):
        return JSONResponse(status_code=404, content={"error": "No anomalies"})

    return FileResponse(
        "anomaly_report.csv",
        media_type="text/csv",
        filename="anomaly_report.csv"
    )
@app.get("/download-report")
def download_report():
    if CLEAN_DF is None:
        return JSONResponse(status_code=400, content={"error": "Run preprocess first"})

    filename = "fitpulse_dashboard_report.pdf"
    doc = SimpleDocTemplate(filename, pagesize=A4)
    styles = getSampleStyleSheet()
    story = []

    # ---------- TITLE ----------
    story.append(Paragraph("<b>FitPulse Health Analytics Report</b>", styles["Title"]))
    story.append(Spacer(1, 12))
    story.append(Paragraph(
        f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        styles["Normal"]
    ))
    story.append(Spacer(1, 20))

    # ---------- OVERVIEW ----------
    df = CLEAN_DF.copy()
    overview_data = [
        ["Metric", "Value"],
        ["Rows Loaded", len(df)],
        ["Users", df["user_id"].nunique()],
        ["Days", df["date"].nunique()],
        ["Avg Heart Rate", round(df["heart_rate"].mean(), 1)],
        ["Start Date", str(df["date"].min().date())],
        ["End Date", str(df["date"].max().date())],
    ]

    overview_table = Table(overview_data)
    overview_table.setStyle(TableStyle([
        ("BACKGROUND", (0,0), (-1,0), colors.lightgrey),
        ("GRID", (0,0), (-1,-1), 1, colors.black),
        ("FONT", (0,0), (-1,0), "Helvetica-Bold"),
    ]))

    story.append(Paragraph("<b>Dataset Overview</b>", styles["Heading2"]))
    story.append(Spacer(1, 10))
    story.append(overview_table)
    story.append(Spacer(1, 20))

    # ---------- SAMPLE DATA ----------
    story.append(Paragraph("<b>Sample Records</b>", styles["Heading2"]))
    sample_table = df_to_table(df)
    sample_table.setStyle(TableStyle([
        ("GRID", (0,0), (-1,-1), 0.5, colors.grey),
        ("BACKGROUND", (0,0), (-1,0), colors.whitesmoke),
    ]))
    story.append(sample_table)
    story.append(Spacer(1, 20))

    # ---------- ANOMALY SUMMARY ----------
    if os.path.exists("anomaly_report.csv"):
        anom_df = pd.read_csv("anomaly_report.csv")
        story.append(Paragraph("<b>Anomaly Summary</b>", styles["Heading2"]))
        anom_table = df_to_table(anom_df)
        anom_table.setStyle(TableStyle([
            ("GRID", (0,0), (-1,-1), 0.5, colors.grey),
            ("BACKGROUND", (0,0), (-1,0), colors.lightgrey),
        ]))
        story.append(anom_table)
        story.append(Spacer(1, 20))

    # ---------- IMAGES ----------
    story.append(Paragraph("<b>Visual Analytics</b>", styles["Heading2"]))
    story.append(Spacer(1, 10))

    image_files = (
      glob.glob("prophet_heart_rate_*.png") +
      glob.glob("prophet_steps_*.png") +
      glob.glob("prophet_sleep_*.png") +
      ["dbscan.png", "heart_rate_dist.png", "steps_dist.png", "sleep_dist.png"]
    )


    for img in image_files:
        if os.path.exists(img):
            story.append(Image(img, width=400, height=220))
            story.append(Spacer(1, 12))

    # ---------- BUILD ----------
    # ---------- RECOMMENDATIONS ----------
    if os.path.exists("recommendations.csv"):
        rec_df = pd.read_csv("recommendations.csv")
        story.append(Paragraph("<b>Health Recommendations</b>", styles["Heading2"]))
        rec_table = df_to_table(rec_df)
        rec_table.setStyle(TableStyle([
            ("GRID", (0,0), (-1,-1), 0.5, colors.grey),
            ("BACKGROUND", (0,0), (-1,0), colors.lightgrey),
        ]))
        story.append(rec_table)
        story.append(Spacer(1, 20))

    doc.build(story)

    return FileResponse(
        filename,
        media_type="application/pdf",
        filename=filename
    )



@app.post("/llm-advice")
def llm_advice(
    question: str = Body(...),
    user_id: Optional[str] = Body("All")
):
    global CLEAN_DF, ANOMALY_DF, RULE_RECOMMENDATIONS

    if CLEAN_DF is None:
        return {"error": "No data available. Please preprocess data first."}

    df = CLEAN_DF.copy()
    df["user_id"] = df["user_id"].astype(str)

    if user_id != "All":
        df = df[df["user_id"] == user_id]

    if df.empty:
        return {"error": "No data available for selected user."}

    summary_text = df.describe().to_string()

    anomaly_text = ""
    if ANOMALY_DF is not None and not ANOMALY_DF.empty:
        adf = ANOMALY_DF.copy()
        adf["user_id"] = adf["user_id"].astype(str)
        if user_id != "All":
            adf = adf[adf["user_id"] == user_id]
        anomaly_text = adf.head(20).to_string()

    rules_text = ""
    if RULE_RECOMMENDATIONS is not None and not RULE_RECOMMENDATIONS.empty:
        rdf = RULE_RECOMMENDATIONS.copy()
        rdf["user_id"] = rdf["user_id"].astype(str)
        if user_id != "All":
            rdf = rdf[rdf["user_id"] == user_id]
        rules_text = rdf.to_string()

    prompt = f"""
You are a health analytics assistant.

User-specific health data summary:
{summary_text}

Detected anomalies:
{anomaly_text}

Rule-based recommendations:
{rules_text}

User question:
{question}

Give clear, practical, non-medical wellness advice.
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You provide general wellness advice, not medical diagnosis."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.4,
        max_tokens=300
    )

    return {
        "answer": response.choices[0].message.content.strip(),
        "user_id": user_id
    }

In [None]:
import threading
import uvicorn

def run_backend():
    uvicorn.run("backend:app", host="0.0.0.0", port=8003)

threading.Thread(target=run_backend).start()

In [None]:
from pyngrok import ngrok
ngrok.kill()

backend_url = ngrok.connect(8003)
print("BACKEND URL:", backend_url)

In [None]:
from pyngrok import ngrok
ngrok.kill()

In [None]:
!mkdir -p .streamlit

In [None]:
%%writefile .streamlit/config.toml
[theme]
base="light"
primaryColor="#E63946"
backgroundColor="#F1FAEE"
secondaryBackgroundColor="#A8DADC"
textColor="#1D3557"

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import requests
import io
from datetime import datetime
import plotly.express as px

# ================= CONFIG =================
BACKEND = "http://localhost:8003"
HEADERS = {"ngrok-skip-browser-warning": "true"}

st.set_page_config(
    page_title="FitPulse Health Analytics",
    layout="wide"
)

# ================= SESSION STATE =================
if "preprocess_done" not in st.session_state:
    st.session_state.preprocess_done = False

if "module2_done" not in st.session_state:
    st.session_state.module2_done = False

# ================= API HELPERS =================
def api_get(endpoint, raw=False):
    r = requests.get(f"{BACKEND}{endpoint}", headers=HEADERS)
    return r if raw else r.json()

def api_post(endpoint, files=None):
    return requests.post(f"{BACKEND}{endpoint}", files=files, headers=HEADERS).json()

def api_post_json(endpoint, payload=None):
    r = requests.post(
        f"{BACKEND}{endpoint}",
        json=payload,
        headers=HEADERS
    )
    return r.json()


# ================= COMMON FILTERS =================
def sidebar_filters(metrics=True):
    overview = api_get("/overview")
    users = ["All"]

    try:
        backend_users = overview.get("users_list", [])
        users += sorted([str(u) for u in backend_users])

    except:
        pass

    user = st.sidebar.selectbox("Select User", users)

    try:
        start = datetime.strptime(overview["start_date"], "%Y-%m-%d")
        end = datetime.strptime(overview["end_date"], "%Y-%m-%d")
    except:
        start, end = datetime.today(), datetime.today()

    dates = st.sidebar.date_input("Date Range", [start, end])

    metric = None
    if metrics:
        metric = st.sidebar.multiselect(
            "Select Metrics",
            ["heart_rate", "steps", "sleep"],
            default=["heart_rate", "steps", "sleep"]
        )

    return user, dates, metric

# ================= SIDEBAR =================
st.sidebar.title("FitPulse Controls")

page = st.sidebar.radio(
    "Navigation",
    [
        "1. Data Upload & Preprocessing",
        "2. Feature Engineering & Anomalies",
        "3. Trends",
        "4. Anomalies",
        "5. Distributions & DBSCAN",
        "6. Downloads"
    ]
)

st.sidebar.divider()

# ================= PAGE ROUTER =================
if page == "1. Data Upload & Preprocessing":
    st.title("FitPulse Health Analytics")
    with st.expander("Project Overview", expanded=True):
        st.markdown("""
**FitPulse** is an end-to-end health analytics system for wearable data.

**Input Columns**
- user_id
- date
- TotalSteps
- avg_heart_rate
- total_sleep_minutes

**Outputs**
- Cleaned dataset
- Rolling health features
- Anomalies & clustering
- Interactive trends
- CSV & PDF reports
        """)

    uploaded = st.file_uploader("Upload file", type=["csv", "json"])
    if st.button("Run Preprocessing"):
        if not uploaded:
            st.warning("Please upload a file first")
        else:
            res = api_post("/preprocess", files={"file": uploaded})
            if res.get("status") == "success":
                st.success("Preprocessing completed")
                st.session_state.preprocess_done = True

                ov = res["overview"]
                c1, c2, c3, c4 = st.columns(4)
                c1.metric("Records", ov["rows_loaded"])
                c2.metric("Users", ov["users"])
                c3.metric("Days", ov["days"])
                c4.metric("Avg HR", ov["avg_hr"])

                st.dataframe(pd.DataFrame(res["preview"]), use_container_width=True)
            else:
                st.error(res)

elif page == "2. Feature Engineering & Anomalies":
    st.title("Feature Engineering & Anomaly Detection")
    if not st.session_state.preprocess_done:
        st.warning("Complete preprocessing first")
    else:
        if st.button("Run Feature Engineering & Anomaly Detection"):
            res = api_post("/module2")
            if res.get("status") == "success":
                st.success("Module completed")
                st.session_state.module2_done = True
                st.table(pd.DataFrame({
                    "Total Anomalies": [res["total_anomalies"]],
                    "Summary Rows": [res["summary_rows"]]
                }))
            else:
                st.error(res)

elif page == "3. Trends":
    if not st.session_state.module2_done:
        st.warning("Complete previous modules first")
    else:
        st.title("Health Trends")
        user, _, metrics = sidebar_filters(metrics=True)

        if not metrics:
            st.warning("Select at least one metric")
        else:
            for metric in metrics:
                r = api_get(f"/module3/prophet/{metric}?user_id={user}", raw=True)
                if r.status_code == 200:
                    st.image(io.BytesIO(r.content), use_column_width=True)
                else:
                    st.error(r.json().get("error", "No data"))

elif page == "4. Anomalies":
    if not st.session_state.module2_done:
        st.warning("Complete previous modules first")
    else:
        st.title("Anomaly Analysis")
        user, _, metrics = sidebar_filters(metrics=True)

        summary = api_get(f"/module3/summary?user_id={user}")
        df = pd.DataFrame(summary["summary"].items(), columns=["Metric", "Count"])

        # ðŸ”¹ MAP UI METRICS â†’ BACKEND METRICS
        metric_map = {
            "heart_rate": ["heart_rate_high", "heart_rate_low"],
            "steps": ["no_steps"],
            "sleep": ["sleep_abnormal"]
        }

        selected_metrics = []
        if metrics:
            for m in metrics:
                selected_metrics.extend(metric_map.get(m, []))

        # FILTER SUMMARY
        if selected_metrics:
            df = df[df["Metric"].isin(selected_metrics)]

        fig = px.bar(df, x="Metric", y="Count", color="Metric")
        st.plotly_chart(fig, use_container_width=True)

        # ================= TABLE =================
        table = api_get("/module3/anomaly-table")
        df_anom = pd.DataFrame(table["rows"])

        # ðŸ”¹ FIX USER TYPE MISMATCH
        df_anom["user_id"] = df_anom["user_id"].astype(str)

        if user != "All":
            df_anom = df_anom[df_anom["user_id"] == str(user)]

        # FILTER TABLE BY METRICS
        if selected_metrics:
            df_anom = df_anom[df_anom["metric"].isin(selected_metrics)]

        st.dataframe(df_anom, use_container_width=True)

elif page == "5. Distributions & DBSCAN":
    if not st.session_state.module2_done:
        st.warning("Complete previous modules first")
    else:
        st.title("Distributions & Clustering")
        user, _, metrics = sidebar_filters(metrics=True)

        for metric in metrics:
            r = api_get(f"/module3/distribution/{metric}?user_id={user}", raw=True)
            if r.status_code == 200:
                st.image(io.BytesIO(r.content), caption=f"{metric} distribution", use_column_width=True)

        if any(m in metrics for m in ["heart_rate", "steps", "sleep"]):
            r = api_get(f"/module3/dbscan?user_id={user}", raw=True)
            if r.status_code == 200:
                st.image(io.BytesIO(r.content), caption="DBSCAN clustering", use_column_width=True)

elif page == "6. Downloads":
    if not st.session_state.module2_done:
        st.warning("Complete previous modules first")
    else:
        st.title("Download Reports")
        user, _, _ = sidebar_filters(metrics=False)

        r = api_get("/download-anomalies", raw=True)
        if r.status_code == 200:
            st.download_button("Download Anomaly CSV", r.content, "anomaly_report.csv")

        r = api_get("/download-report", raw=True)
        if r.status_code == 200:
            st.download_button("Download Full PDF Report", r.content, "fitpulse_dashboard_report.pdf")

        st.subheader("Ask Health Advisor (AI)")
        question = st.text_input("Ask a health-related question")

        if st.button("Ask AI"):
            res = api_post_json("/llm-advice", {"question": question, "user_id": user})

            if "answer" in res:
                st.markdown(res["answer"])
            else:
                st.error(res)

st.caption("FitPulse | FastAPI Backend with Streamlit Frontend")

In [None]:
!streamlit run app.py --server.port 8501 --server.address 0.0.0.0

In [None]:
# ===============================
# Launch Streamlit via Ngrok
# ===============================
import threading, time
def run_streamlit():
    !streamlit run app.py --server.port 8501 --server.address 0.0.0.0
threading.Thread(target=run_streamlit).start()
time.sleep(5)
streamlit_url = ngrok.connect(8501)
print("Streamlit public URL:", streamlit_url)

In [None]:
# Kill any process running on port 8003 to resolve 'address already in use' error
!lsof -i :8003 -t | xargs kill -9

# Start uvicorn backend in the background
!uvicorn backend:app --host 0.0.0.0 --port 8003 &