Importing Dependencies

In [35]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

Loading Dataset

In [19]:
exercise_data = pd.read_csv("/content/sample_data/calories.csv")
calories_data = pd.read_csv("/content/sample_data/exercise.csv")

In [20]:
exercise_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8


In [21]:
exercise_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   User_ID     15000 non-null  int64  
 1   Gender      15000 non-null  object 
 2   Age         15000 non-null  int64  
 3   Height      15000 non-null  float64
 4   Weight      15000 non-null  float64
 5   Duration    15000 non-null  float64
 6   Heart_Rate  15000 non-null  float64
 7   Body_Temp   15000 non-null  float64
dtypes: float64(5), int64(2), object(1)
memory usage: 937.6+ KB


In [22]:
le = LabelEncoder()
exercise_data['Gender'] = le.fit_transform(exercise_data['Gender'])

In [23]:
exercise_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,1,68,190.0,94.0,29.0,105.0,40.8
1,14861698,0,20,166.0,60.0,14.0,94.0,40.3
2,11179863,1,69,179.0,79.0,5.0,88.0,38.7
3,16180408,0,34,179.0,71.0,13.0,100.0,40.5
4,17771927,0,27,154.0,58.0,10.0,81.0,39.8


Concatenating datasets

In [24]:
exercise_data = pd.concat([exercise_data, calories_data['Calories']], axis=1)

In [25]:
exercise_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,1,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,0,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,1,69,179.0,79.0,5.0,88.0,38.7,26.0
3,16180408,0,34,179.0,71.0,13.0,100.0,40.5,71.0
4,17771927,0,27,154.0,58.0,10.0,81.0,39.8,35.0


In [27]:
exercise_data = exercise_data.drop(columns=['User_ID'], axis=1)

In [28]:
X = exercise_data.drop(columns='Calories', axis=1)
Y = exercise_data['Calories']

Splitting the data

In [29]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

Model Training and Evaluation

In [31]:
model = LinearRegression()
model.fit(X_train, Y_train)

In [40]:
train_predictions = model.predict(X_train)
train_mae = mean_absolute_error(Y_train, train_predictions)
print("Training MAE using logistic regression : ",train_mae)

Training MAE using logistic regression :  8.332985229896753


In [41]:
test_predictions = model.predict(X_test)
test_mae = mean_absolute_error(Y_test, test_predictions)
print("Testing MAE using logistic regression : ",test_mae)

Testing MAE using logistic regression :  8.385188053147193


In [42]:
model = XGBRegressor()
model.fit(X_train, Y_train)

In [43]:
train_predictions = model.predict(X_train)
train_mae = mean_absolute_error(Y_train, train_predictions)
print("Training MAE using XGBRegressor : ",train_mae)

Training MAE using XGBRegressor :  0.9322033420062313


In [44]:
test_predictions = model.predict(X_test)
test_mae = mean_absolute_error(Y_test, test_predictions)
print("Testing MAE using XGBRegressor : ",test_mae)

Testing MAE using XGBRegressor :  1.4833678883314132
