In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pickle 
%matplotlib inline

In [2]:
ex_df = pd.read_csv('exercise.csv')
cal_df = pd.read_csv('calories.csv')

In [3]:
print(ex_df.head())

    User_ID  Gender  Age  Height  Weight  Duration  Heart_Rate  Body_Temp
0  14733363    male   68   190.0    94.0      29.0       105.0       40.8
1  14861698  female   20   166.0    60.0      14.0        94.0       40.3
2  11179863    male   69   179.0    79.0       5.0        88.0       38.7
3  16180408  female   34   179.0    71.0      13.0       100.0       40.5
4  17771927  female   27   154.0    58.0      10.0        81.0       39.8


In [4]:
final_df = ex_df.merge(cal_df,on="User_ID")
print(final_df.head())

    User_ID  Gender  Age  Height  ...  Duration  Heart_Rate  Body_Temp  Calories
0  14733363    male   68   190.0  ...      29.0       105.0       40.8     231.0
1  14861698  female   20   166.0  ...      14.0        94.0       40.3      66.0
2  11179863    male   69   179.0  ...       5.0        88.0       38.7      26.0
3  16180408  female   34   179.0  ...      13.0       100.0       40.5      71.0
4  17771927  female   27   154.0  ...      10.0        81.0       39.8      35.0

[5 rows x 9 columns]


In [5]:
final_df['Gender'] = final_df['Gender'].map({'male':1 ,'female':0})

In [6]:
final_df["BMI"] = (final_df["Weight"] / ((final_df["Height"] / 100) ** 2)).round(2)

In [7]:
final_df = final_df.drop(columns=['Height','Weight','User_ID'])

In [8]:
final_df.head()

Unnamed: 0,Gender,Age,Duration,Heart_Rate,Body_Temp,Calories,BMI
0,1,68,29.0,105.0,40.8,231.0,26.04
1,0,20,14.0,94.0,40.3,66.0,21.77
2,1,69,5.0,88.0,38.7,26.0,24.66
3,0,34,13.0,100.0,40.5,71.0,22.16
4,0,27,10.0,81.0,39.8,35.0,24.46


In [9]:
y = final_df["Calories"]

In [10]:
final_df = final_df.drop(columns='Calories')
final_df.head()

Unnamed: 0,Gender,Age,Duration,Heart_Rate,Body_Temp,BMI
0,1,68,29.0,105.0,40.8,26.04
1,0,20,14.0,94.0,40.3,21.77
2,1,69,5.0,88.0,38.7,24.66
3,0,34,13.0,100.0,40.5,22.16
4,0,27,10.0,81.0,39.8,24.46


In [11]:
X_train , X_test , y_train , y_test = train_test_split(final_df,y,test_size=0.2,random_state=1)

In [12]:
model2 = RandomForestRegressor(n_estimators=1000,max_depth=6,max_features=3)
model1 = LinearRegression()
model1.fit(X_train,y_train)
model2.fit(X_train,y_train)

In [13]:
print("Linear Regression Score = ",model1.score(X_test,y_test))
print("RandomForest Regression Score = ",model2.score(X_test,y_test))

Linear Regression Score =  0.9651112627454046
RandomForest Regression Score =  0.9828875131434964


In [14]:
from sklearn import metrics
linreg_prediction = model1.predict(X_test)
print("Linear Regression Mean Absolute Error(MAE) : " , round(metrics.mean_absolute_error(y_test , linreg_prediction) , 2))
print("Linear Regression Mean Squared Error(MSE) : " , round(metrics.mean_squared_error(y_test , linreg_prediction) , 2))
print("Linear Regression Root Mean Squared Error(RMSE) : " , round(np.sqrt(metrics.mean_squared_error(y_test , linreg_prediction)) , 2))

Linear Regression Mean Absolute Error(MAE) :  8.52
Linear Regression Mean Squared Error(MSE) :  140.08
Linear Regression Root Mean Squared Error(RMSE) :  11.84


In [16]:
random_reg_prediction = model2.predict(X_test)
print("RandomForest Mean Absolute Error(MAE) : " , round(metrics.mean_absolute_error(y_test , random_reg_prediction) , 2))
print("RandomForest Mean Squared Error(MSE) : " , round(metrics.mean_squared_error(y_test , random_reg_prediction) , 2))
print("RandomForest Root Mean Squared Error(RMSE) : " , round(np.sqrt(metrics.mean_squared_error(y_test , random_reg_prediction)) , 2))

RandomForest Mean Absolute Error(MAE) :  5.33
RandomForest Mean Squared Error(MSE) :  68.71
RandomForest Root Mean Squared Error(RMSE) :  8.29


In [15]:
import pickle
# Save the model and expected column names
with open("own_model.pkl", "wb") as file:
    pickle.dump({"model": model2, "columns": X_train.columns.tolist()}, file)