In [31]:
# 1. Setup and Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os

In [32]:
dataset_folder = './datasets/'

In [33]:
# Load datasets
fndds_derivation = pd.read_csv(os.path.join(dataset_folder, 'fndds_derivation.csv'))
food = pd.read_csv(os.path.join(dataset_folder, 'food.csv'))
food_attribute = pd.read_csv(os.path.join(dataset_folder, 'food_attribute.csv'))
food_attribute_type = pd.read_csv(os.path.join(dataset_folder, 'food_attribute_type.csv'))
food_nutrient = pd.read_csv(os.path.join(dataset_folder, 'food_nutrient.csv'))
food_portion = pd.read_csv(os.path.join(dataset_folder, 'food_portion.csv'))
food_update_log_entry = pd.read_csv(os.path.join(dataset_folder, 'food_update_log_entry.csv'))
input_food = pd.read_csv(os.path.join(dataset_folder, 'Input_food.csv'))
measure_unit = pd.read_csv(os.path.join(dataset_folder, 'measure_unit.csv'))
nutrient = pd.read_csv(os.path.join(dataset_folder, 'nutrient.csv'))
survey_fndds_food = pd.read_csv(os.path.join(dataset_folder, 'survey_fndds_food.csv'))
wweia_food_category = pd.read_csv(os.path.join(dataset_folder, 'wweia_food_category.csv'))
exercise_dataset = pd.read_csv(os.path.join(dataset_folder, 'exercise_dataset.csv'))

In [34]:
# Display sample rows for verification
print('Food dataset sample:')
print(food.head())

Food dataset sample:
    fdc_id          data_type             description  food_category_id  \
0  2705383  survey_fndds_food             Milk, human              9602   
1  2705384  survey_fndds_food               Milk, NFS              1004   
2  2705385  survey_fndds_food             Milk, whole              1002   
3  2705386  survey_fndds_food  Milk, reduced fat (2%)              1004   
4  2705387  survey_fndds_food      Milk, low fat (1%)              1006   

  publication_date  
0       2022-10-28  
1       2022-10-28  
2       2022-10-28  
3       2022-10-28  
4       2022-10-28  


In [35]:
print('\nExercise dataset sample:')
print(exercise_dataset.head())


Exercise dataset sample:
   ID     Exercise  Calories Burn  Dream Weight  Actual Weight  Age  Gender  \
0   1   Exercise 2     286.959851     91.892531      96.301115   45    Male   
1   2   Exercise 7     343.453036     64.165097      61.104668   25    Male   
2   3   Exercise 4     261.223465     70.846224      71.766724   20    Male   
3   4   Exercise 5     127.183858     79.477008      82.984456   33    Male   
4   5  Exercise 10     416.318374     89.960226      85.643174   29  Female   

   Duration  Heart Rate        BMI Weather Conditions  Exercise Intensity  
0        37         170  29.426275              Rainy                   5  
1        43         142  21.286346              Rainy                   5  
2        20         148  27.899592             Cloudy                   4  
3        39         170  33.729552              Sunny                  10  
4        34         118  23.286113             Cloudy                   3  


In [36]:
# 5. Feature Engineering
features = exercise_dataset.drop(['Calories'], axis=1)  # Drop target column to get features
target = exercise_dataset['calories']                   # Target variable

KeyError: "['Calories'] not found in axis"

In [None]:
# 6. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [None]:
# 7. Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# 8. Evaluation
y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))

In [None]:
# 9. Save the Model
joblib.dump(model, 'calorie_model.pkl')

In [None]:
# 10. (Optional) Prediction Example
sample = X_test.iloc[0:1]
print("Predicted:", model.predict(sample), "Actual:", y_test.iloc[0])