# CS 5530 - Assignment 1, Question 1
## Frailty & Grip Strength Analysis
Three-stage workflow: Ingest → Process → Analyze

In [1]:
"""
CS 5530 - Assignment 1, Question 1
Frailty & Grip Strength Analysis
Three-stage workflow: Ingest -> Process -> Analyze
"""

import pandas as pd
import os

## Stage 1: Ingest

In [2]:
df = pd.read_csv("data.csv")
print("=== Stage 1: Ingestion ===")
print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
print(df.head())
print()

=== Stage 1: Ingestion ===
Loaded 10 rows, 5 columns
   Height  Weight  Age  Grip_strength Frailty
0    65.8     112   30             30       N
1    71.5     136   19             31       N
2    69.4     153   45             29       N
3    68.2     142   22             28       Y
4    67.8     144   29             24       Y



## Stage 2: Process

In [3]:
print("=== Stage 2: Processing ===")

# Unit standardization
df["Height_m"] = df["Height"] * 0.0254
df["Weight_kg"] = df["Weight"] * 0.45359237

# Feature engineering
df["BMI"] = round(df["Weight_kg"] / (df["Height_m"] ** 2), 2)

# Age group binning
bins = [0, 30, 45, 60, 200]
labels = ["<30", "30-45", "46-60", ">60"]
df["AgeGroup"] = pd.cut(df["Age"], bins=bins, labels=labels, right=True)

# Binary encoding for Frailty
df["Frailty_binary"] = df["Frailty"].map({"Y": 1, "N": 0}).astype("int8")

# One-hot encode AgeGroup
age_dummies = pd.get_dummies(df["AgeGroup"], prefix="AgeGroup", dtype=int)
df = pd.concat([df, age_dummies], axis=1)

print("Processed dataframe:")
print(df.to_string())
print()

=== Stage 2: Processing ===
Processed dataframe:
   Height  Weight  Age  Grip_strength Frailty  Height_m  Weight_kg    BMI AgeGroup  Frailty_binary  AgeGroup_<30  AgeGroup_30-45  AgeGroup_46-60  AgeGroup_>60
0    65.8     112   30             30       N   1.67132  50.802345  18.19      <30               0             1               0               0             0
1    71.5     136   19             31       N   1.81610  61.688562  18.70      <30               0             1               0               0             0
2    69.4     153   45             29       N   1.76276  69.399633  22.33    30-45               0             0               1               0             0
3    68.2     142   22             28       Y   1.73228  64.410117  21.46      <30               1             1               0               0             0
4    67.8     144   29             24       Y   1.72212  65.317301  22.02      <30               1             1               0               0            

## Stage 3: Analyze & Report

In [4]:
print("=== Stage 3: Analysis ===")

# Summary statistics for numeric columns
numeric_cols = ["Height_m", "Weight_kg", "BMI", "Grip_strength", "Frailty_binary"]
summary = df[numeric_cols].agg(["mean", "median", "std"]).round(4)
print("Summary statistics:")
print(summary)
print()

# Correlation between grip strength and frailty
corr = df["Grip_strength"].corr(df["Frailty_binary"])
print(f"Correlation (Grip_strength vs Frailty_binary): {corr:.4f}")

# Save report
os.makedirs("reports", exist_ok=True)
with open("reports/findings.md", "w") as f:
    f.write("# Frailty & Grip Strength Analysis - Findings\n\n")
    f.write("## Summary Statistics\n\n")
    f.write("| Metric | Height_m | Weight_kg | BMI | Grip_strength | Frailty_binary |\n")
    f.write("|--------|----------|-----------|-----|---------------|----------------|\n")
    for stat in ["mean", "median", "std"]:
        row = [f"{summary.loc[stat, c]:.4f}" for c in numeric_cols]
        f.write(f"| {stat} | " + " | ".join(row) + " |\n")
    f.write("\n## Correlation Analysis\n\n")
    f.write(f"Pearson correlation between Grip Strength (kg) and Frailty (binary): **{corr:.4f}**\n\n")
    if corr < 0:
        f.write("The negative correlation indicates that lower grip strength is associated "
                "with higher likelihood of frailty, which aligns with clinical expectations. "
                "Participants classified as frail tend to have weaker grip strength.\n")
    else:
        f.write("The positive correlation suggests grip strength increases with frailty, "
                "which is unexpected and may warrant further investigation.\n")

print("\nReport saved to reports/findings.md")

=== Stage 3: Analysis ===
Summary statistics:
        Height_m  Weight_kg     BMI  Grip_strength  Frailty_binary
mean      1.7424    59.8288  19.682        26.0000          0.4000
median    1.7386    61.6886  19.185        27.0000          0.0000
std       0.0424     6.4554   1.781         4.5216          0.5164

Correlation (Grip_strength vs Frailty_binary): -0.4759

Report saved to reports/findings.md
