In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
calories = pd.read_csv('calories.csv')
exercise = pd.read_csv('exercise.csv')

In [3]:
df = exercise.merge(calories, on='User_ID')

In [4]:
df.head(3)

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0


In [6]:
df['Gender'] = df['Gender'].map({'male': 1, 'female': 0})

In [7]:
X = df.drop(['User_ID', 'Calories'], axis=1)
y = df['Calories']

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training Model

In [9]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

In [10]:
models = {
    'lr': LinearRegression(),
    'rd': Ridge(),
    'ls': Lasso(),
    'dtr': DecisionTreeRegressor(),
    'rfr': RandomForestRegressor()
}

In [12]:
for name, mod in models.items():
    mod.fit(X_train, y_train)
    y_pred = mod.predict(X_test)

    print(f"{name}  MSE: {mean_squared_error(y_test, y_pred)}, Score: {r2_score(y_test, y_pred)}")



lr  MSE: 131.9957457508168, Score: 0.9672937151257296
rd  MSE: 131.99625903139352, Score: 0.9672935879435944
ls  MSE: 143.82689461175067, Score: 0.9643621590908397
dtr  MSE: 27.977666666666668, Score: 0.9930676134225961
rfr  MSE: 7.178825633333334, Score: 0.998221209972405


In [13]:
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
y_pred = rfr.predict(X_test)


In [14]:
import pickle

In [15]:
pickle.dump(rfr, open('rfr.pkl', 'wb'))
X_train.to_csv('X_train.csv')