In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import joblib

In [2]:
df = pd.read_csv('final_weight_goal_dataset.csv')

In [3]:
df.head()

Unnamed: 0,name,age,height,weight,goal,diet_type,exercise_level,disease,days_to_goal,goal_weight_change,target_weight,gender
0,Vikas Sharma,56,186.0,116.0,Maintain,Non-Vegetarian,Fast,BP,36,0.0,116.0,female
1,Anjali Yadav,46,161.0,67.0,Loss,Non-Vegetarian,Fast,Diabetes,86,18.5,48.5,female
2,Sneha Reddy,32,162.0,91.0,Loss,Vegetarian,Slow,BP,151,17.8,73.2,male
3,Rohan Patel,25,172.0,90.0,Maintain,Non-Vegetarian,Slow,Thyroid,46,0.0,90.0,female
4,Anjali Sharma,38,174.0,56.0,Gain,Vegetarian,Slow,Thyroid,133,12.2,68.2,female


In [4]:
df.isnull().sum()

name                    0
age                     0
height                 16
weight                 19
goal                    0
diet_type              13
exercise_level          0
disease               177
days_to_goal            0
goal_weight_change      0
target_weight          19
gender                  0
dtype: int64

In [5]:
df.shape

(700, 12)

In [7]:
df = df.dropna(subset=['days_to_goal'])

In [8]:
df.duplicated().sum()

195

In [9]:
df = df.drop_duplicates()

In [10]:
df.shape

(505, 12)

In [11]:
df.isnull().sum()

name                    0
age                     0
height                 10
weight                 11
goal                    0
diet_type              10
exercise_level          0
disease               136
days_to_goal            0
goal_weight_change      0
target_weight          11
gender                  0
dtype: int64

In [12]:
df['height'].mean()

169.36969696969697

In [13]:
df['height'] = df['height'].fillna(df['height'].mean())

In [14]:
df['weight'].mean()

81.68623481781377

In [15]:
df['weight'] = df['weight'].fillna(df['weight'].mean())

In [16]:
df['target_weight'].mean()

81.08987854251012

In [17]:
df['target_weight'] = df['target_weight'].fillna(df['target_weight'].mean())

In [18]:
df['diet_type'].mode()

0    Non-Vegetarian
Name: diet_type, dtype: object

In [19]:
df['diet_type'].mode()[0]

'Non-Vegetarian'

In [20]:
df['diet_type'] = df['diet_type'].fillna(df['diet_type'].mode()[0])

In [21]:
df = df.drop(columns=['name'])

In [22]:
df

Unnamed: 0,age,height,weight,goal,diet_type,exercise_level,disease,days_to_goal,goal_weight_change,target_weight,gender
0,56,186.0,116.000000,Maintain,Non-Vegetarian,Fast,BP,36,0.0,116.000000,female
1,46,161.0,67.000000,Loss,Non-Vegetarian,Fast,Diabetes,86,18.5,48.500000,female
2,32,162.0,91.000000,Loss,Vegetarian,Slow,BP,151,17.8,73.200000,male
3,25,172.0,90.000000,Maintain,Non-Vegetarian,Slow,Thyroid,46,0.0,90.000000,female
4,38,174.0,56.000000,Gain,Vegetarian,Slow,Thyroid,133,12.2,68.200000,female
...,...,...,...,...,...,...,...,...,...,...,...
500,49,159.0,48.000000,Gain,Vegetarian,Medium,,96,16.1,64.100000,male
501,21,179.0,90.000000,Gain,Vegetarian,Fast,BP,66,8.6,98.600000,male
502,35,168.0,81.686235,Gain,Non-Vegetarian,Medium,BP,66,17.4,81.089879,female
503,50,180.0,56.000000,Gain,Vegetarian,Fast,BP,91,19.7,75.700000,female


In [23]:
df = pd.get_dummies(df, columns=['goal', 'diet_type', 'exercise_level', 'disease', 'gender'])

In [24]:
df

Unnamed: 0,age,height,weight,days_to_goal,goal_weight_change,target_weight,goal_Gain,goal_Loss,goal_Maintain,diet_type_Non-Vegetarian,diet_type_Vegetarian,exercise_level_Fast,exercise_level_Medium,exercise_level_Slow,disease_BP,disease_Diabetes,disease_Thyroid,gender_female,gender_male,gender_other
0,56,186.0,116.000000,36,0.0,116.000000,False,False,True,True,False,True,False,False,True,False,False,True,False,False
1,46,161.0,67.000000,86,18.5,48.500000,False,True,False,True,False,True,False,False,False,True,False,True,False,False
2,32,162.0,91.000000,151,17.8,73.200000,False,True,False,False,True,False,False,True,True,False,False,False,True,False
3,25,172.0,90.000000,46,0.0,90.000000,False,False,True,True,False,False,False,True,False,False,True,True,False,False
4,38,174.0,56.000000,133,12.2,68.200000,True,False,False,False,True,False,False,True,False,False,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,49,159.0,48.000000,96,16.1,64.100000,True,False,False,False,True,False,True,False,False,False,False,False,True,False
501,21,179.0,90.000000,66,8.6,98.600000,True,False,False,False,True,True,False,False,True,False,False,False,True,False
502,35,168.0,81.686235,66,17.4,81.089879,True,False,False,True,False,False,True,False,True,False,False,True,False,False
503,50,180.0,56.000000,91,19.7,75.700000,True,False,False,False,True,True,False,False,True,False,False,True,False,False


In [25]:
df.isnull().sum()

age                         0
height                      0
weight                      0
days_to_goal                0
goal_weight_change          0
target_weight               0
goal_Gain                   0
goal_Loss                   0
goal_Maintain               0
diet_type_Non-Vegetarian    0
diet_type_Vegetarian        0
exercise_level_Fast         0
exercise_level_Medium       0
exercise_level_Slow         0
disease_BP                  0
disease_Diabetes            0
disease_Thyroid             0
gender_female               0
gender_male                 0
gender_other                0
dtype: int64

In [37]:
df.columns

Index(['age', 'height', 'weight', 'days_to_goal', 'goal_weight_change',
       'target_weight', 'goal_Gain', 'goal_Loss', 'goal_Maintain',
       'diet_type_Non-Vegetarian', 'diet_type_Vegetarian',
       'exercise_level_Fast', 'exercise_level_Medium', 'exercise_level_Slow',
       'disease_BP', 'disease_Diabetes', 'disease_Thyroid', 'gender_female',
       'gender_male', 'gender_other'],
      dtype='object')

In [26]:
X = df.drop(columns=['days_to_goal'])

In [27]:
X.columns

Index(['age', 'height', 'weight', 'goal_weight_change', 'target_weight',
       'goal_Gain', 'goal_Loss', 'goal_Maintain', 'diet_type_Non-Vegetarian',
       'diet_type_Vegetarian', 'exercise_level_Fast', 'exercise_level_Medium',
       'exercise_level_Slow', 'disease_BP', 'disease_Diabetes',
       'disease_Thyroid', 'gender_female', 'gender_male', 'gender_other'],
      dtype='object')

In [28]:
y = df['days_to_goal']

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
model = RandomForestRegressor()

In [31]:
model.fit(X_train, y_train)

In [32]:
y_pred = model.predict(X_test)

In [33]:
y_pred #predicted value

array([ 42.68, 100.54,  42.85,  63.85, 130.91,  40.15,  94.05,  28.32,
        39.54, 110.91, 121.05,  31.3 ,  69.15,  69.8 ,  28.4 ,  38.2 ,
        45.17,  89.15,  98.52,  26.92,  32.72,  39.17,  31.39, 106.56,
        30.52,  70.95,  38.84,  82.79,  24.8 ,  97.05,  89.23,  27.03,
       124.58, 122.32, 129.04,  77.26, 138.04,  31.28,  33.19,  90.62,
        88.08,  45.7 ,  75.58,  37.5 ,  95.28,  97.03,  93.04, 150.27,
        49.77,  31.96,  89.84, 160.69,  30.23, 105.28, 165.31,  41.5 ,
        33.66,  78.74, 112.08, 119.18,  92.7 ,  71.73,  44.78,  77.27,
       130.75,  99.76,  85.1 , 102.3 , 122.41, 102.77,  42.54, 130.04,
        76.11,  29.14,  37.02, 104.25,  87.89,  54.56,  98.13,  86.44,
       167.19, 116.12, 121.57,  71.49,  45.36, 113.69,  39.28,  69.2 ,
        75.65,  28.09,  97.72,  95.31,  45.72,  90.95,  85.03,  75.19,
        41.91,  31.39,  90.63,  69.22,  78.05])

In [34]:
y_test# actual value

173     36
274     94
490     39
72      65
305    133
      ... 
331     43
411     34
503     91
349     66
86      93
Name: days_to_goal, Length: 101, dtype: int64

In [36]:
joblib.dump(model, "random_forest_fitness_model.pkl")
#print("✅ Model saved as fitness_days_predictor.pkl")

['random_forest_fitness_model.pkl']