# Frailty & Grip Strength — Three-Stage Workflow

**Ingest → Process → Analyze** (Chapter 3 study case). Data: 10 female participants.

In [1]:
import os
import pandas as pd
import numpy as np

RAW_PATH = os.path.join("data", "raw", "frailty_raw.csv")
PROCESSED_PATH = os.path.join("data", "processed", "frailty_processed.csv")
REPORTS_DIR = "reports"
os.makedirs(os.path.dirname(PROCESSED_PATH), exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)

## Stage 1: Ingest

Read raw CSV into a pandas DataFrame.

In [None]:
df = pd.read_csv(RAW_PATH)
df.columns = [c.strip() for c in df.columns]
print("Stage 1: Ingest — Raw data")
df.head(10)

## Stage 2: Process

**a. Unit standardization** — Height_m, Weight_kg  
**b. Feature engineering** — BMI, AgeGroup  
**c. Encoding** — Frailty_binary, one-hot AgeGroup

In [None]:
# a. Unit standardization
df["Height_m"] = df["Height"] * 0.0254
df["Weight_kg"] = df["Weight"] * 0.45359237

# b. Feature engineering
df["BMI"] = (df["Weight_kg"] / (df["Height_m"] ** 2)).round(2)

def age_to_group(age):
    if age < 30: return "<30"
    elif age <= 45: return "30–45"
    elif age <= 60: return "46–60"
    else: return ">60"
df["AgeGroup"] = df["Age"].map(age_to_group)

# c. Categorical → numeric encoding
df["Frailty_binary"] = (df["Frailty"].str.upper() == "Y").astype("int8")
age_dummies = pd.get_dummies(df["AgeGroup"], prefix="AgeGroup", dtype="int8")
age_dummies.columns = [f"AgeGroup_{c.replace('AgeGroup_', '')}" for c in age_dummies.columns]
df = pd.concat([df, age_dummies], axis=1)
df = df.rename(columns={"Grip strength": "Grip_kg"})

df.to_csv(PROCESSED_PATH, index=False)
print("Processed data saved to", PROCESSED_PATH)
df.head(10)

## Stage 3: Analyze & Report

Summary table (mean/median/std) and correlation Grip_kg vs Frailty_binary → `reports/findings.md`

In [None]:
numeric_cols = [c for c in ["Height", "Weight", "Age", "Grip_kg", "Height_m", "Weight_kg", "BMI", "Frailty_binary"] if c in df.columns]
summary = df[numeric_cols].agg(["mean", "median", "std"]).round(4)
corr_strength_frailty = df["Grip_kg"].corr(df["Frailty_binary"])

findings = f"""# Frailty & Grip Strength — Findings

## Summary statistics (numeric columns)

| Statistic | {" | ".join(numeric_cols)} |
|-----------|{"|".join(["---"] * len(numeric_cols))}|
"""
for stat in ["mean", "median", "std"]:
    findings += f"| {stat} | {" | ".join(summary.loc[stat].astype(str))} |\n"
findings += f"""

## Relation: Grip strength vs Frailty

- **Correlation (Grip_kg, Frailty_binary):** {corr_strength_frailty:.4f}

Interpretation: Negative correlation indicates that higher grip strength tends to associate with lower frailty (Frailty_binary 0), and lower grip strength with higher frailty (1).
"""

with open(os.path.join(REPORTS_DIR, "findings.md"), "w", encoding="utf-8") as f:
    f.write(findings)

print(summary)
print("\nCorrelation(Grip_kg, Frailty_binary):", corr_strength_frailty)
print("\nReport written to", os.path.join(REPORTS_DIR, "findings.md"))