# Step 1: Read the data

In [15]:
import pandas as pd

# Reading the data
df = pd.read_csv("calories.csv", index_col=0)
df.head()

Unnamed: 0_level_0,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0
16180408,female,34,179.0,71.0,13.0,100.0,40.5,71.0
17771927,female,27,154.0,58.0,10.0,81.0,39.8,35.0


# Step 2: Define the input and target features

In [16]:
X = df.drop(columns=["Calories"], axis=1)
y = df["Calories"]

X["Gender"] = X["Gender"].map({"female": 0, "male": 1})

# Step 3: Run a 5-fold cross validation

In [17]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.linear_model import ElasticNet

# Defining the model 
elastic_md = make_pipeline(StandardScaler(), ElasticNet())

# Cross-validation
skf = KFold(n_splits=5, shuffle=True, random_state=42)
elastic_cv = cross_val_score(elastic_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"ElasticNet CV RMSE: {-elastic_cv.mean():.2f} ± {elastic_cv.std():.2f}")


ElasticNet CV RMSE: 19.50 ± 0.33


# Engineering Interaction Features

In [18]:
cols = X.columns.tolist()

from itertools import combinations
for col1, col2 in combinations(cols, 2):
    X[f"{col1}_x_{col2}"] = X[col1] * X[col2]

In [19]:
# Defining the model 
elastic_md = make_pipeline(StandardScaler(), ElasticNet())

# Cross-validation
skf = KFold(n_splits=5, shuffle=True, random_state=42)
elastic_cv = cross_val_score(elastic_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"ElasticNet CV RMSE: {-elastic_cv.mean():.2f} ± {elastic_cv.std():.2f}")

ElasticNet CV RMSE: 12.04 ± 0.19


# Estimating the optimal values for alpha and l1_ratio

In [21]:
import numpy as np
from sklearn.linear_model import ElasticNetCV

# Defining the range of alpha and l1_ratio values to test
alphas = np.linspace(0.001, 10, 100)
l1_ratios = np.linspace(0.001, 1, 100)

# Defining the model
elastic_cv_alpha_l1 = Pipeline([
    ("scaler", StandardScaler()),
    ("elastic_net_cv", ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=skf, max_iter=100000))
])

elastic_cv_alpha_l1.fit(X, y)

print(f"Optimal alpha: {elastic_cv_alpha_l1.named_steps['elastic_net_cv'].alpha_}")
print(f"Optimal l1_ratio: {elastic_cv_alpha_l1.named_steps['elastic_net_cv'].l1_ratio_}")

Optimal alpha: 0.001
Optimal l1_ratio: 1.0


# Re-training the ElasticNet Model 

In [23]:
# Defining the model 
elastic_md = make_pipeline(StandardScaler(), ElasticNet(alpha=0.001, l1_ratio=1.0, max_iter=100000))

# Cross-validation
skf = KFold(n_splits=5, shuffle=True, random_state=42)
elastic_cv = cross_val_score(elastic_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"ElasticNet CV RMSE: {-elastic_cv.mean():.2f} ± {elastic_cv.std():.2f}")

ElasticNet CV RMSE: 3.01 ± 0.02
