**IMPORT** **LIBRARIES & LOAD DATA**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path


CHOOSE THE FRALITY.CSV FILE AND UPLOAD

In [3]:

df = pd.read_csv("frailty.csv")
display(df)




Unnamed: 0,Height,Weight,Age,Grip strength,Frailty
0,65.8,112,30,30,N
1,71.5,136,19,31,N
2,69.4,153,45,29,N
3,68.2,142,22,28,Y
4,67.8,144,29,24,Y
5,68.7,123,50,26,N
6,69.8,141,51,22,Y
7,70.1,136,23,20,Y
8,67.9,112,17,19,N
9,66.8,120,39,31,N


# **Unit standardization**

In [4]:
df.columns = df.columns.str.strip()
df["Height"] = df["Height"].astype(float)
df["Weight"] = df["Weight"].astype(float)
df["Height_m"] = df["Height"] * 0.0254
df["Weight_kg"] = df["Weight"] * 0.45359237
display(df)




Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg
0,65.8,112.0,30,30,N,1.67132,50.802345
1,71.5,136.0,19,31,N,1.8161,61.688562
2,69.4,153.0,45,29,N,1.76276,69.399633
3,68.2,142.0,22,28,Y,1.73228,64.410117
4,67.8,144.0,29,24,Y,1.72212,65.317301
5,68.7,123.0,50,26,N,1.74498,55.791862
6,69.8,141.0,51,22,Y,1.77292,63.956524
7,70.1,136.0,23,20,Y,1.78054,61.688562
8,67.9,112.0,17,19,N,1.72466,50.802345
9,66.8,120.0,39,31,N,1.69672,54.431084


# Feature engineering

In [5]:
df["BMI"] = (df["Weight_kg"] / (df["Height_m"]**2)).round(2)

In [6]:
df["Age"] = df["Age"].astype(int)
def age_group(age):
    if age < 30:
        return "<30"
    elif 30 <= age <= 45:
        return "30–45"
    elif 46 <= age <= 60:
        return "46–60"
    else:
        return ">60"

df["AgeGroup"] = df["Age"].apply(age_group)
display(df)



Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI,AgeGroup
0,65.8,112.0,30,30,N,1.67132,50.802345,18.19,30–45
1,71.5,136.0,19,31,N,1.8161,61.688562,18.7,<30
2,69.4,153.0,45,29,N,1.76276,69.399633,22.33,30–45
3,68.2,142.0,22,28,Y,1.73228,64.410117,21.46,<30
4,67.8,144.0,29,24,Y,1.72212,65.317301,22.02,<30
5,68.7,123.0,50,26,N,1.74498,55.791862,18.32,46–60
6,69.8,141.0,51,22,Y,1.77292,63.956524,20.35,46–60
7,70.1,136.0,23,20,Y,1.78054,61.688562,19.46,<30
8,67.9,112.0,17,19,N,1.72466,50.802345,17.08,<30
9,66.8,120.0,39,31,N,1.69672,54.431084,18.91,30–45


## Categorical → numeric encoding

In [7]:
df["Frailty"] = df["Frailty"].astype(str).str.strip().str.upper()
df["Frailty_binary"] = df["Frailty"].map({"Y":1, "N":0}).astype("int8")
display(df)



Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI,AgeGroup,Frailty_binary
0,65.8,112.0,30,30,N,1.67132,50.802345,18.19,30–45,0
1,71.5,136.0,19,31,N,1.8161,61.688562,18.7,<30,0
2,69.4,153.0,45,29,N,1.76276,69.399633,22.33,30–45,0
3,68.2,142.0,22,28,Y,1.73228,64.410117,21.46,<30,1
4,67.8,144.0,29,24,Y,1.72212,65.317301,22.02,<30,1
5,68.7,123.0,50,26,N,1.74498,55.791862,18.32,46–60,0
6,69.8,141.0,51,22,Y,1.77292,63.956524,20.35,46–60,1
7,70.1,136.0,23,20,Y,1.78054,61.688562,19.46,<30,1
8,67.9,112.0,17,19,N,1.72466,50.802345,17.08,<30,0
9,66.8,120.0,39,31,N,1.69672,54.431084,18.91,30–45,0


# One-hot encoding **AgeGroup**

In [8]:
import numpy as np

df["AgeGroup_<30"] = np.where(df["AgeGroup"] == '<30', 1, 0)
df["AgeGroup_30_45"] = np.where(df["AgeGroup"] == '30-45', 1, 0)
df["AgeGroup_40_60"] = np.where(df["AgeGroup"] == '45-60', 1, 0)
df["AgeGroup_>60"] = np.where(df["AgeGroup"] == '>60', 1, 0)

display(df)



Unnamed: 0,Height,Weight,Age,Grip strength,Frailty,Height_m,Weight_kg,BMI,AgeGroup,Frailty_binary,AgeGroup_<30,AgeGroup_30_45,AgeGroup_40_60,AgeGroup_>60
0,65.8,112.0,30,30,N,1.67132,50.802345,18.19,30–45,0,0,0,0,0
1,71.5,136.0,19,31,N,1.8161,61.688562,18.7,<30,0,1,0,0,0
2,69.4,153.0,45,29,N,1.76276,69.399633,22.33,30–45,0,0,0,0,0
3,68.2,142.0,22,28,Y,1.73228,64.410117,21.46,<30,1,1,0,0,0
4,67.8,144.0,29,24,Y,1.72212,65.317301,22.02,<30,1,1,0,0,0
5,68.7,123.0,50,26,N,1.74498,55.791862,18.32,46–60,0,0,0,0,0
6,69.8,141.0,51,22,Y,1.77292,63.956524,20.35,46–60,1,0,0,0,0
7,70.1,136.0,23,20,Y,1.78054,61.688562,19.46,<30,1,1,0,0,0
8,67.9,112.0,17,19,N,1.72466,50.802345,17.08,<30,0,1,0,0,0
9,66.8,120.0,39,31,N,1.69672,54.431084,18.91,30–45,0,0,0,0,0


# EDA & Reporting

In [9]:
df["Grip strength"] = df["Grip strength"].astype(int)
summary_stats = df.describe().T[["mean","std","min","max"]]
print("\nSummary Statistics:")
display(summary_stats)



Summary Statistics:


Unnamed: 0,mean,std,min,max
Height,68.6,1.670662,65.8,71.5
Weight,131.9,14.231811,112.0,153.0
Age,32.5,12.860361,17.0,51.0
Grip strength,26.0,4.521553,19.0,31.0
Height_m,1.74244,0.042435,1.67132,1.8161
Weight_kg,59.828834,6.455441,50.802345,69.399633
BMI,19.682,1.780972,17.08,22.33
Frailty_binary,0.4,0.516398,0.0,1.0
AgeGroup_<30,0.5,0.527046,0.0,1.0
AgeGroup_30_45,0.0,0.0,0.0,0.0


In [10]:
correlation = df["Grip strength"].corr(df["Frailty_binary"])
print("\nCorrelation (Grip strength vs Frailty_binary):", correlation)


Correlation (Grip strength vs Frailty_binary): -0.4758668672668007
