# Frailty & Grip Strength — Three-Stage Workflow

**Ingest → Process → Analyze** . Data: 10 female participants.

In [1]:
import os
import pandas as pd
import numpy as np

RAW_PATH = os.path.join("data", "raw", "frailty_raw.csv")
PROCESSED_PATH = os.path.join("data", "processed", "frailty_processed.csv")
REPORTS_DIR = "reports"
os.makedirs(os.path.dirname(PROCESSED_PATH), exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)

## Stage 1: Ingest

Read raw CSV into a pandas DataFrame.

In [2]:
df = pd.read_csv(RAW_PATH)
df.columns = [c.strip() for c in df.columns]
print("Stage 1: Ingest — Raw data")
df.head(10)

Stage 1: Ingest — Raw data


Unnamed: 0,Height,Weight,Age,Grip strength,Frailty
0,65.8,112,30,30,N
1,71.5,136,19,31,N
2,69.4,153,45,29,N
3,68.2,142,22,28,Y
4,67.8,144,29,24,Y
5,68.7,123,50,26,N
6,69.8,141,51,22,Y
7,70.1,136,23,20,Y
8,67.9,112,17,19,N
9,66.8,120,39,31,N


## Stage 2: Process

**a. Unit standardization** - Height_m, Weight_kg  
**b. Feature engineering** - BMI, AgeGroup  
**c. Encoding** - Frailty_binary, one-hot AgeGroup

In [3]:
# a. Unit standardization
df["Height_m"] = df["Height"] * 0.0254
df["Weight_kg"] = df["Weight"] * 0.45359237

# b. Feature engineering
df["BMI"] = (df["Weight_kg"] / (df["Height_m"] ** 2)).round(2)

def age_to_group(age):
    if age < 30: return "<30"
    elif age <= 45: return "30–45"
    elif age <= 60: return "46–60"
    else: return ">60"
df["AgeGroup"] = df["Age"].map(age_to_group)

# c. Categorical → numeric encoding
df["Frailty_binary"] = (df["Frailty"].str.upper() == "Y").astype("int8")
age_dummies = pd.get_dummies(df["AgeGroup"], prefix="AgeGroup", dtype="int8")
age_dummies.columns = [f"AgeGroup_{c.replace('AgeGroup_', '')}" for c in age_dummies.columns]
# Ensure all four one-hot columns exist (assignment: AgeGroup_<30, 30–45, 46–60, >60)
required_age_cols = ["AgeGroup_<30", "AgeGroup_30–45", "AgeGroup_46–60", "AgeGroup_>60"]
for col in required_age_cols:
    if col not in age_dummies.columns:
        age_dummies[col] = np.int8(0)
age_dummies = age_dummies[required_age_cols]
df = pd.concat([df, age_dummies], axis=1)
df = df.rename(columns={"Grip strength": "Grip_kg"})

df.to_csv(PROCESSED_PATH, index=False)
print("Processed data saved to", PROCESSED_PATH)
df.head(10)

Processed data saved to data\processed\frailty_processed.csv


Unnamed: 0,Height,Weight,Age,Grip_kg,Frailty,Height_m,Weight_kg,BMI,AgeGroup,Frailty_binary,AgeGroup_<30,AgeGroup_30–45,AgeGroup_46–60,AgeGroup_>60
0,65.8,112,30,30,N,1.67132,50.802345,18.19,30–45,0,0,1,0,0
1,71.5,136,19,31,N,1.8161,61.688562,18.7,<30,0,1,0,0,0
2,69.4,153,45,29,N,1.76276,69.399633,22.33,30–45,0,0,1,0,0
3,68.2,142,22,28,Y,1.73228,64.410117,21.46,<30,1,1,0,0,0
4,67.8,144,29,24,Y,1.72212,65.317301,22.02,<30,1,1,0,0,0
5,68.7,123,50,26,N,1.74498,55.791862,18.32,46–60,0,0,0,1,0
6,69.8,141,51,22,Y,1.77292,63.956524,20.35,46–60,1,0,0,1,0
7,70.1,136,23,20,Y,1.78054,61.688562,19.46,<30,1,1,0,0,0
8,67.9,112,17,19,N,1.72466,50.802345,17.08,<30,0,1,0,0,0
9,66.8,120,39,31,N,1.69672,54.431084,18.91,30–45,0,0,1,0,0


## Stage 3: Analyze & Report

Summary table (mean/median/std) and correlation Grip_kg vs Frailty_binary → `reports/findings.md`

In [5]:
numeric_cols = [c for c in ["Height", "Weight", "Age", "Grip_kg", "Height_m", "Weight_kg", "BMI", "Frailty_binary"] if c in df.columns]
summary = df[numeric_cols].agg(["mean", "median", "std"]).round(4)
corr_strength_frailty = df["Grip_kg"].corr(df["Frailty_binary"])

findings = f"""# Frailty & Grip Strength — Findings

## Summary statistics (numeric columns)

| Statistic | {" | ".join(numeric_cols)} |
|-----------|{"|".join(["---"] * len(numeric_cols))}|
"""
for stat in ["mean", "median", "std"]:
    row_vals = " | ".join(summary.loc[stat].astype(str))
    findings += f"| {stat} | {row_vals} |\n"
findings += f"""

## Relation: Grip strength vs Frailty

- **Correlation (Grip_kg, Frailty_binary):** {corr_strength_frailty:.4f}

Interpretation: Negative correlation indicates that higher grip strength tends to associate with lower frailty (Frailty_binary 0), and lower grip strength with higher frailty (1).
"""

with open(os.path.join(REPORTS_DIR, "findings.md"), "w", encoding="utf-8") as f:
    f.write(findings)

print(summary)
print("\nCorrelation(Grip_kg, Frailty_binary):", corr_strength_frailty)
print("\nReport written to", os.path.join(REPORTS_DIR, "findings.md"))

         Height    Weight      Age  Grip_kg  Height_m  Weight_kg     BMI  \
mean    68.6000  131.9000  32.5000  26.0000    1.7424    59.8288  19.682   
median  68.4500  136.0000  29.5000  27.0000    1.7386    61.6886  19.185   
std      1.6707   14.2318  12.8604   4.5216    0.0424     6.4554   1.781   

        Frailty_binary  
mean            0.4000  
median          0.0000  
std             0.5164  

Correlation(Grip_kg, Frailty_binary): -0.4758668672668007

Report written to reports\findings.md
