In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
calories = pd.read_csv('calories.csv')
exercise = pd.read_csv('exercise.csv')

In [3]:
calories.head(2)

Unnamed: 0,User_ID,Calories
0,14733363,231.0
1,14861698,66.0


In [4]:
exercise.head(2)

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3


In [5]:
df = exercise.merge(calories, on='User_ID')

In [6]:
df.head(3)

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0


In [7]:
# Encoding
df['Gender'] = df['Gender'].map({'male': 1, 'female': 0})
df.head(3)

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,1,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,0,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,1,69,179.0,79.0,5.0,88.0,38.7,26.0


In [8]:
#Train Test Split 

In [9]:
X=df.drop(['User_ID','Calories'],axis=1)
y=df['Calories']

In [10]:
X.shape

(15000, 7)

In [11]:
y.shape

(15000,)

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
X_train.shape

(12000, 7)

In [15]:
X_test.shape

(3000, 7)

In [16]:
#Training Model

In [17]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

In [18]:
models = {
    'lr': LinearRegression(),
    'rd': Ridge(),
    'ls': Lasso(),
    'dtr': DecisionTreeRegressor(),
    'rfr': RandomForestRegressor()
}

In [19]:
for name, mod in models.items():
    mod.fit(X_train, y_train)
    y_pred = mod.predict(X_test)

    print(f"{name}  MSE: {mean_squared_error(y_test, y_pred)}, Score: {r2_score(y_test, y_pred)}")

lr  MSE: 131.99574575081698, Score: 0.9672937151257295
rd  MSE: 131.99625903139344, Score: 0.9672935879435945
ls  MSE: 143.82689461175062, Score: 0.9643621590908397
dtr  MSE: 28.116333333333333, Score: 0.9930332541977834
rfr  MSE: 7.255435066666666, Score: 0.9982022274670492


In [20]:
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
y_pred = rfr.predict(X_test)

In [21]:
import pickle
pickle.dump(rfr, open('rfr.pkl', 'wb'))

In [22]:
X_train.to_csv('X_train.csv')