In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
train_data = pd.read_csv("/kaggle/input/playground-series-s5e5/train.csv")
test_data = pd.read_csv("/kaggle/input/playground-series-s5e5/test.csv")

In [3]:
train_copy, test_copy = train_data, test_data

In [4]:
train_data.head()

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,male,36,189.0,82.0,26.0,101.0,41.0,150.0
1,1,female,64,163.0,60.0,8.0,85.0,39.7,34.0
2,2,female,51,161.0,64.0,7.0,84.0,39.8,29.0
3,3,male,20,192.0,90.0,25.0,105.0,40.7,140.0
4,4,female,38,166.0,61.0,25.0,102.0,40.6,146.0


In [5]:
test_data.head()

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,750000,male,45,177.0,81.0,7.0,87.0,39.8
1,750001,male,26,200.0,97.0,20.0,101.0,40.5
2,750002,female,29,188.0,85.0,16.0,102.0,40.4
3,750003,female,39,172.0,73.0,20.0,107.0,40.6
4,750004,female,30,173.0,67.0,16.0,94.0,40.5


In [6]:
train_data.shape

(750000, 9)

In [7]:
test_data.shape

(250000, 8)

In [8]:
train_data.isnull().sum()

id            0
Sex           0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
Calories      0
dtype: int64

In [9]:
test_data.isnull().sum()

id            0
Sex           0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
dtype: int64

In [10]:
gender_map = {"male" : 0,"female" : 1}
train_data['Sex'] = train_data['Sex'].replace(gender_map)

train_data=train_data.drop("id",axis=1)

print (train_data.head())

   Sex  Age  Height  Weight  Duration  Heart_Rate  Body_Temp  Calories
0    0   36   189.0    82.0      26.0       101.0       41.0     150.0
1    1   64   163.0    60.0       8.0        85.0       39.7      34.0
2    1   51   161.0    64.0       7.0        84.0       39.8      29.0
3    0   20   192.0    90.0      25.0       105.0       40.7     140.0
4    1   38   166.0    61.0      25.0       102.0       40.6     146.0


  train_data['Sex'] = train_data['Sex'].replace(gender_map)


In [11]:
def feature_engineering(df):
    df['bmi'] = df['Weight'] / ((df['Height'] / 100) ** 2)    
    df['exercise_intensity'] = df['Heart_Rate'] / df['Duration']
    df['heart_rate_duration'] = df['Heart_Rate'] * df['Duration']
    df['temp_duration'] = df['Body_Temp'] * df['Duration']
    df['hr_to_temp'] = df['Heart_Rate'] / df['Body_Temp']
    df['hr_to_age'] = df['Heart_Rate'] / df['Age']
    df['age_bmi'] = df['Age'] * df['bmi']
    df['max_heart_rate'] = 220 - df['Age']
    df['heart_rate_intensity'] = df['Heart_Rate'] / df['max_heart_rate']
    return df

In [12]:
train_data = feature_engineering(train_data)
test_data = feature_engineering(test_data)

In [13]:
#train_data.drop(['Sex','Age','Height','Weight','Duration','Heart_Rate','Body_Temp'], axis=1)

In [14]:
X = train_data.drop(['Calories'], axis=1)
y = train_data['Calories']

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.pipeline import Pipeline

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_model.score(X_test, y_test) 
y_pred = lr_model.predict(X_test)
print(f"LinearRegression → R²: {r2_score(y_test, y_pred):.4f}, RMSE: {mean_squared_error(y_test, y_pred, squared=False):.2f}")

LinearRegression → R²: 0.9898, RMSE: 6.30


In [18]:
test_data['Sex'] = test_data['Sex'].replace(gender_map)
test_ids = test_data['id']
X_test_final = test_data.drop('id', axis=1)


test_predictions = lr_model.predict(X_test_final)
test_predictions = np.maximum(0, test_predictions)

submission = pd.DataFrame({
    'id': test_ids,
    'Calories': test_predictions
})
submission.to_csv('submission.csv', index=False)

  test_data['Sex'] = test_data['Sex'].replace(gender_map)


In [19]:
y_pred

array([203.06867921,  66.42237769,  42.50516994, ...,  33.74200102,
        32.85884037, 177.44853624])

In [20]:
submission

Unnamed: 0,id,Calories
0,750000,28.834309
1,750001,110.552432
2,750002,88.440191
3,750003,129.097957
4,750004,75.748380
...,...,...
249995,999995,23.829629
249996,999996,14.321549
249997,999997,70.628803
249998,999998,175.223137
