importing all required modules

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import joblib

Reading the dataset

In [2]:
df = pd.read_csv("bodyfat.csv")
df

Unnamed: 0,Density,BodyFat,Age,Weight,Height,Neck,Chest,Abdomen,Hip,Thigh,Knee,Ankle,Biceps,Forearm,Wrist
0,1.0708,12.3,23,154.25,67.75,36.2,93.1,85.2,94.5,59.0,37.3,21.9,32.0,27.4,17.1
1,1.0853,6.1,22,173.25,72.25,38.5,93.6,83.0,98.7,58.7,37.3,23.4,30.5,28.9,18.2
2,1.0414,25.3,22,154.00,66.25,34.0,95.8,87.9,99.2,59.6,38.9,24.0,28.8,25.2,16.6
3,1.0751,10.4,26,184.75,72.25,37.4,101.8,86.4,101.2,60.1,37.3,22.8,32.4,29.4,18.2
4,1.0340,28.7,24,184.25,71.25,34.4,97.3,100.0,101.9,63.2,42.2,24.0,32.2,27.7,17.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,1.0736,11.0,70,134.25,67.00,34.9,89.2,83.6,88.8,49.6,34.8,21.5,25.6,25.7,18.5
248,1.0236,33.6,72,201.00,69.75,40.9,108.5,105.0,104.5,59.6,40.8,23.2,35.2,28.6,20.1
249,1.0328,29.3,72,186.75,66.00,38.9,111.1,111.5,101.7,60.3,37.3,21.5,31.3,27.2,18.0
250,1.0399,26.0,72,190.75,70.50,38.9,108.3,101.3,97.8,56.0,41.6,22.7,30.5,29.4,19.8


Data cleaning

In [3]:
df = df.drop(columns=["Density","Knee","Ankle","Wrist"])
df

Unnamed: 0,BodyFat,Age,Weight,Height,Neck,Chest,Abdomen,Hip,Thigh,Biceps,Forearm
0,12.3,23,154.25,67.75,36.2,93.1,85.2,94.5,59.0,32.0,27.4
1,6.1,22,173.25,72.25,38.5,93.6,83.0,98.7,58.7,30.5,28.9
2,25.3,22,154.00,66.25,34.0,95.8,87.9,99.2,59.6,28.8,25.2
3,10.4,26,184.75,72.25,37.4,101.8,86.4,101.2,60.1,32.4,29.4
4,28.7,24,184.25,71.25,34.4,97.3,100.0,101.9,63.2,32.2,27.7
...,...,...,...,...,...,...,...,...,...,...,...
247,11.0,70,134.25,67.00,34.9,89.2,83.6,88.8,49.6,25.6,25.7
248,33.6,72,201.00,69.75,40.9,108.5,105.0,104.5,59.6,35.2,28.6
249,29.3,72,186.75,66.00,38.9,111.1,111.5,101.7,60.3,31.3,27.2
250,26.0,72,190.75,70.50,38.9,108.3,101.3,97.8,56.0,30.5,29.4


Normalizing data

In [4]:
# Normalize body part measurements using chest as reference
df['Neck'] = df['Neck'] / df['Chest']
df['Abdomen'] = df['Abdomen'] / df['Chest']
df['Hip'] = df['Hip'] / df['Chest']
df['Thigh'] = df['Thigh'] / df['Chest']
df['Biceps'] = df['Biceps'] / df['Chest']
df['Forearm'] = df['Forearm'] / df['Chest']
df['Chest'] = df['Chest'] / df['Chest']  # Always 1


Creating Dependent and Independent variables

In [5]:
x = df[['Age', 'Weight', 'Height', 'Neck', 'Chest', 'Abdomen', 'Hip', 'Thigh', 'Biceps', 'Forearm']]
y = df['BodyFat']

Splitting into training and testing data

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Model creation

In [7]:
model = RandomForestRegressor()
model.fit(x_train,y_train)

Evaluation

In [8]:
y_pred = model.predict(x_test)
score = r2_score(y_test, y_pred)
print(score)

0.5192727982742879


Saving the model

In [9]:
joblib.dump(model, "bodyfat_model.pkl")
print("✅ Model saved!")

✅ Model saved!
