# Gradient Boosting

## XGBoost Model

In [5]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from utils.transform_scale import transform_scale_df, transform_v2_scale_df, TARGET_VARIABLE_COLUMN

import torch
import torch.nn as nn
import torch.optim as optim

DATA_PATH = Path("data")

In [2]:
# Load augmented data
train_augmented = pd.read_csv(DATA_PATH / "train-augmented.csv", parse_dates=["month"])
test_augmented = pd.read_csv(DATA_PATH / "test-augmented.csv", parse_dates=["month"])

train_augmented.head()

Unnamed: 0,month,town,flat_type,block,street_name,floor_area_sqm,flat_model,eco_category,lease_commence_date,latitude,...,mean_age_m,std_age_f,std_age_m,pri_sch_dist,pri_sch,sec_sch_dist,sec_sch,mall_dist,mrt_name,mrt_dist
0,2001-08-01,pasir ris,4 room,440,pasir ris drive 4,118.0,model a,uncategorized,1989,1.369008,...,36.16763,20.331631,19.999478,0.344087,Loyang Primary School,0.428301,Pasir Ris Crest Secondary School,1.033216,Pasir Ris,1.137522
1,2014-10-01,punggol,5 room,196B,punggol field,110.0,improved,uncategorized,2003,1.399007,...,31.967676,20.103889,19.793305,0.160852,Edgefield Primary School,0.312383,Meridian Secondary School,0.80604,Cove,0.118373
2,2020-09-01,sengkang,5 room,404A,fernvale lane,112.0,premium apartment,uncategorized,2004,1.388348,...,34.164736,20.311337,19.94782,0.184906,Fernvale Primary School,0.55838,Pei Hwa Secondary School,0.452556,Fernvale,0.481153
3,2000-10-01,clementi,3 room,375,clementi avenue 4,67.0,new generation,uncategorized,1980,1.318493,...,40.577282,21.625967,21.440329,0.304561,Pei Tong Primary School,0.619132,Clementi Town Secondary School,0.456499,Clementi,0.42332
4,2013-01-01,bukit batok,3 room,163,bukit batok street 11,73.0,model a,uncategorized,1985,1.348149,...,38.318241,20.497124,20.287059,0.233809,Princess Elizabeth Primary School,0.217911,Bukit Batok Secondary School,0.764172,Bukit Batok,0.77422


In [3]:
# See linear.ipynb for details - code copied from there
# Split the train data into train and test
X = train_augmented.drop(columns=TARGET_VARIABLE_COLUMN)
y = train_augmented[TARGET_VARIABLE_COLUMN]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Transform and scale the data
# See utils/transform_scale.py for details
X_train = transform_v2_scale_df(X_train)
X_test = transform_v2_scale_df(X_test)

In [22]:
# Convert the data to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

model = GradientBoostingRegressor(n_estimators= 300, learning_rate=0.2, max_depth=10)
model = model.fit(X_train_tensor, y_train_tensor)

# Evaluate the model
with torch.no_grad():
    y_pred = model.predict(X_test_tensor)
    print(f"Mean squared error: {mean_squared_error(y_test_tensor, y_pred)}")
    print(f"Mean absolute error: {mean_absolute_error(y_test_tensor, y_pred)}")
    print(f"R2 score: {r2_score(y_test_tensor, y_pred)}")

Mean squared error: 286303191.4108773
Mean absolute error: 11913.619120683172
R2 score: 0.9829078367030853


In [23]:
X = transform_v2_scale_df(test_augmented)
y_pred = model.predict(X)
df = pd.DataFrame(y_pred, columns=['Predicted'])
# df['Id'] = df.index

import os  
os.makedirs('data', exist_ok=True)  
df.to_csv('data/boosting_result.csv', index=True, header=True)



## Hyperparameter Tuning

In [14]:
from sklearn.model_selection import GridSearchCV

parameters = {
    'n_estimators':[100, 150, 200, 250],
    'max_features': ['sqrt', 'log2', 'auto'],
    'min_samples_split': [2, 3, 5],
    'learning_rate': [0.01, 0.05, 0.1, 0.5]
}

from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor()
clf = GridSearchCV(model, parameters, verbose=3)
model = clf.fit(X_train_tensor, y_train_tensor)

# Store the parameters of the best model
best_params = model.best_params_

# Predict class labels of test data on the model with the best found parameters
y_pred = model.predict(X_test_tensor)

print(f"Mean squared error: {mean_squared_error(y_test_tensor, y_pred)}")
print(f"Mean absolute error: {mean_absolute_error(y_test_tensor, y_pred)}")
print(f"R2 score: {r2_score(y_test_tensor, y_pred)}")

print('Best Gradient Boosting regressor: ', best_params)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV 1/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.573 total time=  21.0s
[CV 2/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.551 total time=  22.1s
[CV 3/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.566 total time=  21.3s
[CV 4/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.578 total time=  22.1s
[CV 5/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.560 total time=  22.8s
[CV 1/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.702 total time=  30.4s
[CV 2/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.695 total time=  31.3s
[CV 3/5] END learning_rate=0.01, max_features=sqrt, min_samples_split=2, n_e



[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.669 total time= 1.4min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.667 total time= 1.3min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.668 total time= 1.4min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.669 total time= 1.3min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.670 total time= 1.4min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.765 total time= 2.0min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.762 total time= 2.1min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.764 total time= 2.2min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.764 total time= 2.0min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.766 total time= 2.0min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.819 total time= 2.6min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.817 total time= 2.7min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.818 total time= 2.6min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.818 total time= 2.6min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.819 total time= 2.6min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.855 total time= 3.3min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.853 total time= 3.3min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.854 total time= 3.3min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.854 total time= 3.3min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.855 total time= 3.3min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.669 total time= 1.3min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.667 total time= 1.3min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.668 total time= 1.3min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.669 total time= 1.3min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.670 total time= 1.3min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.765 total time= 2.0min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.762 total time= 2.0min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.764 total time= 2.0min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.764 total time= 2.0min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.766 total time= 2.0min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.819 total time= 2.6min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.817 total time= 2.7min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.818 total time= 2.7min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.818 total time= 2.6min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.819 total time= 2.6min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.855 total time= 3.3min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.853 total time= 3.3min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.854 total time= 3.3min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.854 total time= 3.3min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.855 total time= 3.3min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.669 total time= 1.3min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.667 total time= 1.4min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.668 total time= 1.3min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.669 total time= 1.3min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.670 total time= 1.4min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.765 total time= 2.0min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.762 total time= 2.0min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.764 total time= 2.0min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.764 total time= 2.0min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.766 total time= 2.0min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.819 total time= 2.6min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.817 total time= 2.6min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.818 total time= 2.6min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.818 total time= 2.6min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.819 total time= 2.6min




[CV 1/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.855 total time= 3.2min




[CV 2/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.853 total time= 3.3min




[CV 3/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.854 total time= 3.3min




[CV 4/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.854 total time= 3.3min




[CV 5/5] END learning_rate=0.01, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.855 total time= 3.3min
[CV 1/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.907 total time=  19.7s
[CV 2/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.903 total time=  19.6s
[CV 3/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.904 total time=  20.1s
[CV 4/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.904 total time=  19.6s
[CV 5/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.905 total time=  19.7s
[CV 1/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.922 total time=  29.0s
[CV 2/5] END learning_rate=0.05, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.926 total time=  29.3s
[CV 3/5] END lea



[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.918 total time= 1.4min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.917 total time= 1.3min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.918 total time= 1.4min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.917 total time= 1.4min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.917 total time= 1.3min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.936 total time= 1.9min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.935 total time= 1.9min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.936 total time= 1.9min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.935 total time= 1.9min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.935 total time= 1.9min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.944 total time= 2.5min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.943 total time= 2.5min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.944 total time= 2.5min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.943 total time= 2.6min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.942 total time= 2.6min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.948 total time= 3.2min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.947 total time= 3.2min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.948 total time= 3.2min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.947 total time= 3.2min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.947 total time= 3.2min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.918 total time= 1.3min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.917 total time= 1.3min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.918 total time= 1.3min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.917 total time= 1.3min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.917 total time= 1.3min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.936 total time= 1.9min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.935 total time= 1.9min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.936 total time= 1.9min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.935 total time= 1.9min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.935 total time= 1.9min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.944 total time= 2.5min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.943 total time= 2.6min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.944 total time= 2.6min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.943 total time= 2.5min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.942 total time= 2.5min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.948 total time= 3.2min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.947 total time= 3.2min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.948 total time= 3.3min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.947 total time= 3.6min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.947 total time= 3.2min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.918 total time= 1.3min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.917 total time= 1.3min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.918 total time= 1.4min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.917 total time= 1.7min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.917 total time= 1.5min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.936 total time= 2.1min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.935 total time= 2.1min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.936 total time= 2.1min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.935 total time= 2.2min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.935 total time= 1.9min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.944 total time= 2.6min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.943 total time= 2.8min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.944 total time= 2.8min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.943 total time= 2.7min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.942 total time= 2.5min




[CV 1/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.948 total time= 3.3min




[CV 2/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.947 total time= 3.1min




[CV 3/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.948 total time= 3.1min




[CV 4/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.947 total time= 3.1min




[CV 5/5] END learning_rate=0.05, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.947 total time= 3.1min
[CV 1/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.929 total time=  18.9s
[CV 2/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.929 total time=  18.7s
[CV 3/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.934 total time=  18.9s
[CV 4/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.936 total time=  18.7s
[CV 5/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.935 total time=  19.0s
[CV 1/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.943 total time=  28.4s
[CV 2/5] END learning_rate=0.1, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.943 total time=  27.9s
[CV 3/5] END learning_r



[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.943 total time= 1.3min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.942 total time= 1.3min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.943 total time= 1.3min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.943 total time= 1.3min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.942 total time= 1.3min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.951 total time= 1.9min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.950 total time= 1.9min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.951 total time= 1.9min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.951 total time= 1.9min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.950 total time= 1.9min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.955 total time= 2.5min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.955 total time= 2.5min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.955 total time= 2.5min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.955 total time= 2.6min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.954 total time= 2.6min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.958 total time= 3.1min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.958 total time= 3.2min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.959 total time= 3.2min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.959 total time= 3.2min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=2, n_estimators=250;, score=0.958 total time= 3.2min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.943 total time= 1.3min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.942 total time= 1.3min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.943 total time= 1.3min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.943 total time= 1.3min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=100;, score=0.942 total time= 1.3min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.951 total time= 1.9min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.950 total time= 1.9min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.951 total time= 1.9min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.951 total time= 1.9min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=150;, score=0.950 total time= 1.9min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.955 total time= 2.6min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.955 total time= 2.5min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.955 total time= 2.5min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.955 total time= 2.5min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=200;, score=0.954 total time= 2.5min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.958 total time= 3.1min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.958 total time= 3.2min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.959 total time= 3.2min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.959 total time= 3.2min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=3, n_estimators=250;, score=0.958 total time= 3.1min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.943 total time= 1.3min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.942 total time= 1.3min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.943 total time= 1.3min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.943 total time= 1.3min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=100;, score=0.942 total time= 1.3min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.951 total time= 1.9min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.950 total time= 1.9min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.951 total time= 2.0min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.951 total time= 1.9min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=150;, score=0.950 total time= 1.9min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.955 total time= 2.5min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.955 total time= 2.5min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.955 total time= 2.5min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.955 total time= 2.5min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=200;, score=0.954 total time= 2.5min




[CV 1/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.958 total time= 3.2min




[CV 2/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.958 total time= 3.2min




[CV 3/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.959 total time= 3.2min




[CV 4/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.959 total time= 3.2min




[CV 5/5] END learning_rate=0.1, max_features=auto, min_samples_split=5, n_estimators=250;, score=0.958 total time= 3.2min
[CV 1/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.950 total time=  19.7s
[CV 2/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.951 total time=  19.8s
[CV 3/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.951 total time=  19.3s
[CV 4/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.953 total time=  19.4s
[CV 5/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=100;, score=0.953 total time=  19.6s
[CV 1/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.959 total time=  29.0s
[CV 2/5] END learning_rate=0.5, max_features=sqrt, min_samples_split=2, n_estimators=150;, score=0.960 total time=  29.3s
[CV 3/5] END learning_ra



[CV 1/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.960 total time= 1.3min




[CV 2/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.961 total time= 1.2min




[CV 3/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.962 total time= 1.2min




[CV 4/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.961 total time= 1.2min




[CV 5/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=100;, score=0.960 total time= 1.2min




[CV 1/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.965 total time= 2.2min




[CV 2/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.966 total time= 2.3min




[CV 3/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.967 total time= 2.2min




[CV 4/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.966 total time= 2.7min




[CV 5/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=150;, score=0.966 total time= 2.4min




[CV 1/5] END learning_rate=0.5, max_features=auto, min_samples_split=2, n_estimators=200;, score=0.968 total time= 3.4min




In [None]:
X = transform_v2_scale_df(test_augmented)
y_pred = model.predict(X)
df = pd.DataFrame(y_pred, columns=['Predicted'])
# df['Id'] = df.index

import os  
os.makedirs('data', exist_ok=True)  
df.to_csv('data/boosting_result.csv', index=True, header=True) 

In [None]:
print(f"Mean squared error: {mean_squared_error(y_test_tensor, y_pred)}")
print(f"Mean absolute error: {mean_absolute_error(y_test_tensor, y_pred)}")
print(f"R2 score: {r2_score(y_test_tensor, y_pred)}")