# Step 1: Read the data

In [1]:
import pandas as pd

# Reading the data
df = pd.read_csv("calories.csv", index_col=0)
df.head()

Unnamed: 0_level_0,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0
16180408,female,34,179.0,71.0,13.0,100.0,40.5,71.0
17771927,female,27,154.0,58.0,10.0,81.0,39.8,35.0


# Step 2: Define the input and target features

In [2]:
X = df.drop(columns=["Calories"], axis=1)
y = df["Calories"]

X["Gender"] = X["Gender"].map({"female": 0, "male": 1})

# Step 3: Run a 5-fold cross validation

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor

# Defining the model 
mlp_md = make_pipeline(
    StandardScaler(),
    MLPRegressor(hidden_layer_sizes=(16),
                 activation="relu",
                 max_iter=10000,
                 random_state=42))

# Defining the cross-validation method
skf = KFold(n_splits=5, shuffle=True, random_state=42)

# Running cross-validation
mlp_cv = cross_val_score(mlp_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"5-Fold CV RMSE: {-mlp_cv.mean():.2f} ± {mlp_cv.std():.2f}")

5-Fold CV RMSE: 0.69 ± 0.03


# Engineering Interaction Features

In [7]:
cols = X.columns.tolist()

from itertools import combinations
for col1, col2 in combinations(cols, 2):
    X[f"{col1}_x_{col2}"] = X[col1] * X[col2]

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor

# Defining the model 
mlp_md = make_pipeline(
    StandardScaler(),
    MLPRegressor(hidden_layer_sizes=(16),
                 activation="relu",
                 max_iter=10000,
                 random_state=42))

# Defining the cross-validation method
skf = KFold(n_splits=5, shuffle=True, random_state=42)

# Running cross-validation
mlp_cv = cross_val_score(mlp_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"5-Fold CV RMSE: {-mlp_cv.mean():.2f} ± {mlp_cv.std():.2f}")

5-Fold CV RMSE: 0.47 ± 0.02


In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor

# Defining the model 
mlp_md = make_pipeline(
    StandardScaler(),
    MLPRegressor(hidden_layer_sizes=(16),
                 activation="relu",
                 solver="sgd",
                 max_iter=10000,
                 random_state=42))

# Defining the cross-validation method
skf = KFold(n_splits=5, shuffle=True, random_state=42)

# Running cross-validation
mlp_cv = cross_val_score(mlp_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"5-Fold CV RMSE: {-mlp_cv.mean():.2f} ± {mlp_cv.std():.2f}")

5-Fold CV RMSE: 0.47 ± 0.04


In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor

# Defining the model 
mlp_md = make_pipeline(
    StandardScaler(),
    MLPRegressor(hidden_layer_sizes=(16),
                 activation="relu",
                 solver="sgd",
                 max_iter=10000,
                 early_stopping=True,
                 n_iter_no_change=1000,
                 random_state=42))

# Defining the cross-validation method
skf = KFold(n_splits=5, shuffle=True, random_state=42)

# Running cross-validation
mlp_cv = cross_val_score(mlp_md, X, y, cv=skf, scoring="neg_root_mean_squared_error", n_jobs=-1)

print(f"5-Fold CV RMSE: {-mlp_cv.mean():.2f} ± {mlp_cv.std():.2f}")

5-Fold CV RMSE: 0.41 ± 0.02
