In [None]:
import pandas as pd
import os
dt = {
    "Height_in": [65.8, 71.5, 69.4, 68.2, 67.8, 68.7, 69.8, 70.1, 67.9, 66.8],
    "Weight_lb": [112, 136, 153, 142, 144, 123, 141, 136, 112, 120],
    "Age_yr":    [30, 19, 45, 22, 29, 50, 51, 23, 17, 39],
    "Grip_kg":   [30, 31, 29, 28, 24, 26, 22, 20, 19, 31],
    "Frailty":   ["N", "N", "N", "Y", "Y", "N", "Y", "Y", "N", "N"]
}

df = pd.DataFrame(dt)
df.to_csv("frailty_raw.csv", index=False)
os.makedirs("raw_data", exist_ok=True)
df.to_csv("raw_data/raw_data.csv", index=False)

In [None]:
df = pd.read_csv("frailty_raw.csv")
print(df)

   Height_in  Weight_lb  Age_yr  Grip_kg Frailty
0       65.8        112      30       30       N
1       71.5        136      19       31       N
2       69.4        153      45       29       N
3       68.2        142      22       28       Y
4       67.8        144      29       24       Y
5       68.7        123      50       26       N
6       69.8        141      51       22       Y
7       70.1        136      23       20       Y
8       67.9        112      17       19       N
9       66.8        120      39       31       N


In [None]:
df["Height_m"] = df["Height_in"] * 0.0254
df["Weight_kg"] = df["Weight_lb"] * 0.45359237

In [None]:
df["BMI"] = (df["Weight_kg"] / (df["Height_m"] ** 2)).round(2)

In [None]:
def age_group(age):
    if age < 30:
        return "<30"
    elif 30 <= age <= 45:
        return "30–45"
    elif 46 <= age <= 60:
        return "46–60"
    else:
        return ">60"

df["AgeGroup"] = df["Age_yr"].apply(age_group)
df=df.drop(columns="Age_yr")
df=df.drop(columns="Height_in")
df=df.drop(columns="Weight_lb")


In [None]:
df

Unnamed: 0,Grip_kg,Frailty,Height_m,Weight_kg,BMI,AgeGroup
0,30,N,1.67132,50.802345,18.19,30–45
1,31,N,1.8161,61.688562,18.7,<30
2,29,N,1.76276,69.399633,22.33,30–45
3,28,Y,1.73228,64.410117,21.46,<30
4,24,Y,1.72212,65.317301,22.02,<30
5,26,N,1.74498,55.791862,18.32,46–60
6,22,Y,1.77292,63.956524,20.35,46–60
7,20,Y,1.78054,61.688562,19.46,<30
8,19,N,1.72466,50.802345,17.08,<30
9,31,N,1.69672,54.431084,18.91,30–45


In [None]:
df["Frailty_binary"] = df["Frailty"].map({"Y": 1, "N": 0}).astype("int8")

In [None]:
df = pd.get_dummies(df, columns=["AgeGroup"])

In [None]:
import os
summary = df.describe().T[["mean", "50%", "std"]]
summary.rename(columns={"50%": "median"}, inplace=True)
os.makedirs("output", exist_ok=True)
summary.to_csv("output/frailty_summary.csv")
os.makedirs("clean_data", exist_ok=True)
df.to_csv("clean_data/clean_data.csv", index=False)

In [None]:
corr = df["Grip_kg"].corr(df["Frailty_binary"])
corr_rounded = round(corr, 3)
print("Correlation:", corr_rounded)

Correlation: -0.476
