In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error as mae
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
import joblib
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('calories.csv')
df.replace({'male': 0, 'female': 1}, inplace=True)

df['BMI'] = df['Weight'] / ((df['Height'] / 100) ** 2)

primary_features = ['Heart_Rate', 'Body_Temp', 'Duration']
extended_features = ['Heart_Rate', 'Body_Temp', 'Duration', 'BMI', 'Gender', 'Age']

target = df['Calories'].values

X_primary = df[primary_features]
X_train_p, X_val_p, Y_train_p, Y_val_p = train_test_split(X_primary, target, test_size=0.1, random_state=22)

X_extended = df[extended_features]
X_train_e, X_val_e, Y_train_e, Y_val_e = train_test_split(X_extended, target, test_size=0.1, random_state=22)

scaler_primary = StandardScaler()
X_train_p = scaler_primary.fit_transform(X_train_p)
X_val_p = scaler_primary.transform(X_val_p)

scaler_extended = StandardScaler()
X_train_e = scaler_extended.fit_transform(X_train_e)
X_val_e = scaler_extended.transform(X_val_e)

models = {
    'RandomForest': RandomForestRegressor(random_state=22),
    'GradientBoosting': GradientBoostingRegressor(random_state=22),
    'XGBoost': xgb.XGBRegressor(random_state=22, objective='reg:absoluteerror'),
    'LightGBM': lgb.LGBMRegressor(random_state=22, objective='mae'),
    'CatBoost': cb.CatBoostRegressor(random_state=22, loss_function='MAE', verbose=0)
}

for model_name, model in models.items():
    print(f"\n=== {model_name} ===")
    
    print("Primary Features (Heart_Rate, Body_Temp, Duration):")
    model.fit(X_train_p, Y_train_p)
    train_preds_p = model.predict(X_train_p)
    val_preds_p = model.predict(X_val_p)
    print(f"Training Error: {mae(Y_train_p, train_preds_p):.2f}")
    print(f"Validation Error: {mae(Y_val_p, val_preds_p):.2f}")
    
    print("Extended Features (Heart_Rate, Body_Temp, Duration, BMI, Gender, Age):")
    model.fit(X_train_e, Y_train_e)
    train_preds_e = model.predict(X_train_e)
    val_preds_e = model.predict(X_val_e)
    print(f"Training Error: {mae(Y_train_e, train_preds_e):.2f}")
    print(f"Validation Error: {mae(Y_val_e, val_preds_e):.2f}")




=== RandomForest ===
Primary Features (Heart_Rate, Body_Temp, Duration):
Training Error: 6.59
Validation Error: 9.06
Extended Features (Heart_Rate, Body_Temp, Duration, BMI, Gender, Age):
Training Error: 0.93
Validation Error: 2.37

=== GradientBoosting ===
Primary Features (Heart_Rate, Body_Temp, Duration):
Training Error: 8.17
Validation Error: 8.33
Extended Features (Heart_Rate, Body_Temp, Duration, BMI, Gender, Age):
Training Error: 3.06
Validation Error: 3.18

=== XGBoost ===
Primary Features (Heart_Rate, Body_Temp, Duration):
Training Error: 7.84
Validation Error: 8.17
Extended Features (Heart_Rate, Body_Temp, Duration, BMI, Gender, Age):
Training Error: 2.02
Validation Error: 2.50

=== LightGBM ===
Primary Features (Heart_Rate, Body_Temp, Duration):
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[Light

In [6]:
best_model = models['CatBoost']
best_model.fit(X_train_e, Y_train_e)
joblib.dump(best_model, 'catboost_model.pkl')
joblib.dump(scaler_extended, 'scaler.pkl')

['scaler.pkl']

In [16]:
import requests

data = {"Gender": 1, "Age": 29.0, "Height": 189.0, "Heart_Rate": 100.0, "Body_Temp": 39.8}
response = requests.post("http://127.0.0.1:8000/", json=data)
print(response.json())

{'prediction': 63.52}
