# Setup

In [1]:
%%capture
!pip install --upgrade xgboost
!pip install --upgrade optuna

In [2]:
import os
import gc
import time
import warnings
import subprocess

gc.enable()
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 4)
np.set_printoptions(precision=4)

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import HyperbandPruner

import xgboost
from xgboost import XGBRegressor

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

SEED = 2311
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)

In [3]:
#remove cell to run future versions
assert optuna.__version__ == '3.0.3', f'Change in Optuna version. Original notebook version: 3.0.3'
assert xgboost.__version__ == '1.6.2', f'Change in XGBoost version. Original notebook version: 1.6.2'

In [4]:
#Check GPU availability
try:
    subprocess.check_output('nvidia-smi')
    HAVE_GPU = True
except Exception:
    HAVE_GPU = False

print(f'GPU available: {HAVE_GPU}')

GPU available: True


In [5]:
DATA_URL = 'https://raw.githubusercontent.com/stiwari-ds/data-science-competitions/main/analytics_vidhya/jobathon_nov22/data'

train = pd.read_csv(f'{DATA_URL}/processed/train.csv') #processed datasets from notebook 00
train_drop = pd.read_csv(f'{DATA_URL}/processed/train_drop.csv')
train_ffill = pd.read_csv(f'{DATA_URL}/processed/train_ffill.csv')
train_bfill = pd.read_csv(f'{DATA_URL}/processed/train_bfill.csv')
train_linear = pd.read_csv(f'{DATA_URL}/processed/train_linear.csv')
train_poly3 = pd.read_csv(f'{DATA_URL}/processed/train_poly3.csv')
train_poly5 = pd.read_csv(f'{DATA_URL}/processed/train_poly5.csv')
train_iterimp = pd.read_csv(f'{DATA_URL}/processed/train_iterimp.csv')

test = pd.read_csv(f'{DATA_URL}/processed/test.csv')
sample_sub = pd.read_csv(f'{DATA_URL}/raw/sample_submission.csv')

In [6]:
TARGET = 'energy'

In [7]:
features = list(test.columns)

cat_features_1 = ['quarter', 'dayofweek']
cat_features_2 = ['quarter', 'dayofweek', 'month', 'hour']
cat_features_3 = ['quarter', 'dayofweek', 'month', 'hour', 'dayofmonth', 'weekofyear']

# Baseline

In [8]:
%%time
scores = []
cv = TimeSeriesSplit(n_splits=10)
X, y = train_drop[features], train_drop[TARGET]
X[cat_features_1] = X[cat_features_1].astype('category')
for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
    X_train, y_train = X.loc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.loc[val_idx], y.iloc[val_idx]

    model = XGBRegressor(
        objective='reg:squarederror',
        tree_method='gpu_hist' if HAVE_GPU else 'hist',
        enable_categorical=HAVE_GPU,
        eval_metric='rmse',
        early_stopping_rounds=100, 
        seed=SEED
    ) 
    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=0
    )
    val_preds = model.predict(X_val)
    score = mean_squared_error(y_val, val_preds, squared=False)
    scores.append(score)
    print(f'Fold #{fold}: ' \
          f'(Data size: train = {X_train.shape[0]:>5}, test = {X_val.shape[0]:4})' \
          f' RMSE = {score:.5f} ({model.best_iteration} rounds)')
    _ = gc.collect()

print(f'Avg. RMSE = {np.mean(scores):.5f} +/- {np.std(scores):.5f}')

Fold #0: (Data size: train =  8472, test = 8462) RMSE = 162.71702 (19 rounds)
Fold #1: (Data size: train = 16934, test = 8462) RMSE = 149.14382 (10 rounds)
Fold #2: (Data size: train = 25396, test = 8462) RMSE = 165.26525 (17 rounds)
Fold #3: (Data size: train = 33858, test = 8462) RMSE = 176.48886 (27 rounds)
Fold #4: (Data size: train = 42320, test = 8462) RMSE = 168.28283 (16 rounds)
Fold #5: (Data size: train = 50782, test = 8462) RMSE = 332.18391 (35 rounds)
Fold #6: (Data size: train = 59244, test = 8462) RMSE = 227.13640 (14 rounds)
Fold #7: (Data size: train = 67706, test = 8462) RMSE = 185.63637 (16 rounds)
Fold #8: (Data size: train = 76168, test = 8462) RMSE = 202.32710 (30 rounds)
Fold #9: (Data size: train = 84630, test = 8462) RMSE = 203.59508 (23 rounds)
Avg. RMSE = 197.27766 +/- 50.10104
CPU times: user 11.5 s, sys: 507 ms, total: 12 s
Wall time: 12.8 s


# Hyperparameter tuning

In [9]:
def objective(trial, data, cat_features, base_params):

    scores = []
    X, y = data
    X[cat_features] = X[cat_features].astype('category')

    param_grid = {
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 0.5, step=0.05),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 256),
        'gamma': trial.suggest_float('gamma', 0, 20, step=0.1), #complexity-control
        'alpha': trial.suggest_float('alpha', 0, 5, step=0.05), #L1-reg
        'lambda': trial.suggest_float('lambda', 1e-2, 1e4, log=True), #L2-reg
        'subsample': trial.suggest_float('subsample', 0.75, 1.0, step=0.05),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.75, 1.0, step=0.05),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.75, 1.0, step=0.05),
        'colsample_bynode': trial.suggest_float('colsample_bynode', 0.75, 1.0, step=0.05),
        'max_cat_to_onehot': trial.suggest_categorical('max_cat_to_onehot', [4, 7, 12]) 
    }

    cv = TimeSeriesSplit(n_splits=10)
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
        X_train, y_train = X.loc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.loc[val_idx], y.iloc[val_idx]
        
        model = XGBRegressor(**base_params, **param_grid)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            verbose=0
        )
        val_preds = model.predict(X_val)
        scores.append(mean_squared_error(y_val, val_preds, squared=False))
    return np.mean(scores)

In [10]:
def tune_params(data, cat_features, base_params, n_trials, direction):
    study = optuna.create_study(
        sampler=TPESampler(seed=SEED),
        pruner=HyperbandPruner(),
        direction=direction
    )
    study.optimize(
        func=lambda trial: objective(trial, data, cat_features, base_params),
        n_trials=n_trials,
        gc_after_trial=True
    )
    return study

# Cross-validation

In [11]:
def cross_validate_predict(data, cat_features, model_params):
    test_preds = {} #predictions on test set for each fold
    scores = [] #scores on validation set

    X, y, X_test = data
    X[cat_features] = X[cat_features].astype('category')
    X_test[cat_features] = X_test[cat_features].astype('category')
       
    cv = TimeSeriesSplit(n_splits=10)
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
        X_train, y_train = X.loc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.loc[val_idx], y.iloc[val_idx]
        
        model = XGBRegressor(**model_params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            verbose=0
        )

        val_preds = model.predict(X_val)
        test_preds[f'fold{fold}'] = model.predict(X_test)

        score = mean_squared_error(y_val, val_preds, squared=False)
        scores.append(score)
        print(f'Fold #{fold}: ' \
              f'(Data size: train = {X_train.shape[0]:>5}, test = {X_val.shape[0]:4})' \
              f' RMSE = {score:.5f} ({model.best_iteration} rounds)')
        _ = gc.collect()
    
    print(f'Avg. RMSE = {np.mean(scores):.5f} +/- {np.std(scores):.5f}')
    
    test_preds = pd.DataFrame.from_dict(test_preds)
    test_preds['mean'] = test_preds.mean(axis=1)

    return test_preds

In [12]:
def run_experiment(data, cat_features, n_trials=5):
    
    X, y, X_test = data

    base_params = {
        'objective': 'reg:squarederror',
        'n_estimators': 20000,
        'booster': 'gbtree',
        'eval_metric': 'rmse',
        'early_stopping_rounds': 100,
        'tree_method': 'gpu_hist' if HAVE_GPU else 'hist',
        'predictor': 'gpu_predictor' if HAVE_GPU else 'cpu_predictor',
        'enable_categorical': HAVE_GPU,
        'verbosity': 1,
        'seed': SEED
    }
    
    model = XGBRegressor(**base_params)
    
    print(f'----------Hyperparameter tuning----------')
    start = time.time()
    study = tune_params(
        data=(X, y),
        cat_features=cat_features,
        base_params=base_params,
        n_trials=n_trials, 
        direction='minimize' #metric: RMSE -> lower is better
    )
    end = time.time()
    print(f'Best trial: {study.best_trial.number} -> Best value: {study.best_value:.5f}')
    print(f'Best hyperparameters:')
    for k, v in study.best_params.items():
        print(f'{k:15} - {v}')
    print(f'[Time taken: {end - start:.2f}s]\n')
    
    print(f'-----Cross-validation and prediction-----')
    start = time.time()
    model_params = {**base_params, **study.best_params}
    test_preds = cross_validate_predict(data, cat_features, model_params)
    end = time.time()
    print(f'[Time taken: {end - start:.2f}s]\n')

    return test_preds

**Trial runs**

In [13]:
optuna.logging.set_verbosity(optuna.logging.INFO)

In [14]:
%%time
tp = run_experiment(
    data=(train_drop[features], train_drop[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=3
)

[32m[I 2022-11-18 16:21:09,324][0m A new study created in memory with name: no-name-db9d79af-73dc-4b8d-a9fa-3fc6cd0d19b4[0m


----------Hyperparameter tuning----------


[32m[I 2022-11-18 16:21:28,662][0m Trial 0 finished with value: 195.78783017865436 and parameters: {'learning_rate': 0.2, 'max_depth': 11, 'min_child_weight': 57, 'gamma': 3.0, 'alpha': 0.25, 'lambda': 177.1345546784436, 'subsample': 0.75, 'colsample_bytree': 0.75, 'colsample_bylevel': 0.95, 'colsample_bynode': 0.9, 'max_cat_to_onehot': 12}. Best is trial 0 with value: 195.78783017865436.[0m
[32m[I 2022-11-18 16:21:42,534][0m Trial 1 finished with value: 200.60882550235922 and parameters: {'learning_rate': 0.45000000000000007, 'max_depth': 12, 'min_child_weight': 148, 'gamma': 2.3000000000000003, 'alpha': 3.35, 'lambda': 1.3459766417243109, 'subsample': 0.9, 'colsample_bytree': 0.9, 'colsample_bylevel': 0.95, 'colsample_bynode': 0.85, 'max_cat_to_onehot': 12}. Best is trial 0 with value: 195.78783017865436.[0m
[32m[I 2022-11-18 16:22:05,711][0m Trial 2 finished with value: 191.05386319982154 and parameters: {'learning_rate': 0.15000000000000002, 'max_depth': 7, 'min_child_weigh

Best trial: 2 -> Best value: 191.05386
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 7
min_child_weight - 157
gamma           - 6.0
alpha           - 3.5500000000000003
lambda          - 3131.3882809486845
subsample       - 0.85
colsample_bytree - 0.85
colsample_bylevel - 1.0
colsample_bynode - 0.95
max_cat_to_onehot - 4
[Time taken: 56.51s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8472, test = 8462) RMSE = 155.16923 (348 rounds)
Fold #1: (Data size: train = 16934, test = 8462) RMSE = 147.59368 (246 rounds)
Fold #2: (Data size: train = 25396, test = 8462) RMSE = 158.18230 (85 rounds)
Fold #3: (Data size: train = 33858, test = 8462) RMSE = 179.35086 (250 rounds)
Fold #4: (Data size: train = 42320, test = 8462) RMSE = 157.20717 (253 rounds)
Fold #5: (Data size: train = 50782, test = 8462) RMSE = 309.73006 (391 rounds)
Fold #6: (Data size: train = 59244, test = 8462) RMSE = 218.70306 (98 rounds)
Fold #7: (Data size: train = 6

In [15]:
optuna.logging.set_verbosity(optuna.logging.ERROR)

### Dataset 1: train_drop

In [16]:
%%time
tp_1_1 = run_experiment(
    data=(train_drop[features], train_drop[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 41 -> Best value: 180.82483
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 236
gamma           - 4.4
alpha           - 3.4000000000000004
lambda          - 0.010038563400505355
subsample       - 0.95
colsample_bytree - 0.85
colsample_bylevel - 0.8
colsample_bynode - 0.95
max_cat_to_onehot - 4
[Time taken: 901.98s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8472, test = 8462) RMSE = 145.37247 (50 rounds)
Fold #1: (Data size: train = 16934, test = 8462) RMSE = 151.36063 (16 rounds)
Fold #2: (Data size: train = 25396, test = 8462) RMSE = 152.96819 (30 rounds)
Fold #3: (Data size: train = 33858, test = 8462) RMSE = 164.82529 (313 rounds)
Fold #4: (Data size: train = 42320, test = 8462) RMSE = 151.70146 (79 rounds)
Fold #5: (Data size: train = 50782, test = 8462) RMSE = 276.26640 (159 rounds)
Fold #6: (Data size: train = 59244, test = 8462) RMSE = 184.61034 (31 rounds)
Fol

In [17]:
%%time
tp_1_2 = run_experiment(
    data=(train_drop[features], train_drop[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 88 -> Best value: 181.48380
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 256
gamma           - 2.0
alpha           - 2.6
lambda          - 0.052211555913631455
subsample       - 0.8
colsample_bytree - 0.95
colsample_bylevel - 0.9
colsample_bynode - 0.9
max_cat_to_onehot - 4
[Time taken: 1068.25s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8472, test = 8462) RMSE = 147.03869 (54 rounds)
Fold #1: (Data size: train = 16934, test = 8462) RMSE = 155.57162 (16 rounds)
Fold #2: (Data size: train = 25396, test = 8462) RMSE = 149.71451 (41 rounds)
Fold #3: (Data size: train = 33858, test = 8462) RMSE = 162.72819 (158 rounds)
Fold #4: (Data size: train = 42320, test = 8462) RMSE = 146.75381 (90 rounds)
Fold #5: (Data size: train = 50782, test = 8462) RMSE = 263.00072 (235 rounds)
Fold #6: (Data size: train = 59244, test = 8462) RMSE = 206.48062 (24 rounds)
Fold #7: (Data size

In [18]:
%%time
tp_1_3 = run_experiment(
    data=(train_drop[features], train_drop[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 83 -> Best value: 183.14384
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 255
gamma           - 19.700000000000003
alpha           - 1.9500000000000002
lambda          - 0.18167571986081854
subsample       - 0.75
colsample_bytree - 0.85
colsample_bylevel - 0.85
colsample_bynode - 0.85
max_cat_to_onehot - 7
[Time taken: 1306.25s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8472, test = 8462) RMSE = 136.53887 (50 rounds)
Fold #1: (Data size: train = 16934, test = 8462) RMSE = 149.35229 (25 rounds)
Fold #2: (Data size: train = 25396, test = 8462) RMSE = 150.10780 (34 rounds)
Fold #3: (Data size: train = 33858, test = 8462) RMSE = 171.56640 (119 rounds)
Fold #4: (Data size: train = 42320, test = 8462) RMSE = 151.15926 (97 rounds)
Fold #5: (Data size: train = 50782, test = 8462) RMSE = 279.97502 (335 rounds)
Fold #6: (Data size: train = 59244, test = 8462) RMSE = 199.00130

### Dataset 2: train_ffill

In [19]:
%%time
tp_2_1 = run_experiment(
    data=(train_ffill[features], train_ffill[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 99 -> Best value: 182.84446
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 209
gamma           - 5.2
alpha           - 3.0
lambda          - 0.04587935317552276
subsample       - 0.95
colsample_bytree - 0.9
colsample_bylevel - 1.0
colsample_bynode - 0.9
max_cat_to_onehot - 4
[Time taken: 852.02s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 149.49857 (44 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 153.73984 (21 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 153.81181 (35 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 157.98132 (255 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 152.67486 (60 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 280.94161 (164 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 181.91048 (30 rounds)
Fold #7: (Data size: 

In [20]:
%%time
tp_2_2 = run_experiment(
    data=(train_ffill[features], train_ffill[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 91 -> Best value: 181.37630
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 3
min_child_weight - 241
gamma           - 17.8
alpha           - 4.45
lambda          - 0.06395872229902426
subsample       - 0.9
colsample_bytree - 0.9
colsample_bylevel - 0.85
colsample_bynode - 0.8
max_cat_to_onehot - 4
[Time taken: 1120.71s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 151.37181 (48 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 156.20781 (25 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 148.28728 (45 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 159.05417 (284 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 147.95721 (115 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 276.91299 (239 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 191.74464 (29 rounds)
F

In [21]:
%%time
tp_2_3 = run_experiment(
    data=(train_ffill[features], train_ffill[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 87 -> Best value: 183.07864
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 239
gamma           - 9.0
alpha           - 5.0
lambda          - 0.9345649158392516
subsample       - 0.75
colsample_bytree - 0.85
colsample_bylevel - 0.75
colsample_bynode - 1.0
max_cat_to_onehot - 7
[Time taken: 1296.53s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 135.06189 (41 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 154.61332 (17 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 153.58767 (26 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 175.99744 (193 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 155.66210 (76 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 278.22915 (365 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 186.54346 (25 rounds)
Fold #7: (Data size

### Dataset 3: train_bfill

In [22]:
%%time
tp_3_1 = run_experiment(
    data=(train_bfill[features], train_bfill[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 94 -> Best value: 182.79927
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 15
gamma           - 1.3
alpha           - 4.8500000000000005
lambda          - 2.2381124974760804
subsample       - 0.85
colsample_bytree - 0.95
colsample_bylevel - 0.85
colsample_bynode - 1.0
max_cat_to_onehot - 4
[Time taken: 996.95s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 156.50045 (39 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 155.68745 (22 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 157.18357 (40 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 165.42386 (175 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 154.86009 (97 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 277.20765 (144 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 179.35359 (26 rounds)
Fold #

In [23]:
%%time
tp_3_2 = run_experiment(
    data=(train_bfill[features], train_bfill[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 80 -> Best value: 182.66172
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 4
min_child_weight - 214
gamma           - 19.0
alpha           - 3.85
lambda          - 0.1048582995497879
subsample       - 0.8
colsample_bytree - 0.8
colsample_bylevel - 0.75
colsample_bynode - 0.75
max_cat_to_onehot - 4
[Time taken: 1162.56s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 149.97273 (49 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 155.75569 (20 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 147.60262 (38 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 162.59706 (225 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 148.77962 (93 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 291.77645 (215 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 188.00260 (28 rounds)
Fo

In [24]:
%%time
tp_3_3 = run_experiment(
    data=(train_bfill[features], train_bfill[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 97 -> Best value: 183.56230
Best hyperparameters:
learning_rate   - 0.1
max_depth       - 3
min_child_weight - 205
gamma           - 9.200000000000001
alpha           - 1.55
lambda          - 0.20452161396044213
subsample       - 0.9
colsample_bytree - 0.85
colsample_bylevel - 0.85
colsample_bynode - 0.9
max_cat_to_onehot - 4
[Time taken: 1214.27s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 136.78337 (70 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 151.31620 (38 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 150.66626 (51 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 174.06109 (261 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 148.59353 (143 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 289.22180 (561 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 193.57316 (46 rounds)
Fol

### Dataset 4: train_linear

In [25]:
%%time
tp_4_1 = run_experiment(
    data=(train_linear[features], train_linear[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 96 -> Best value: 183.28997
Best hyperparameters:
learning_rate   - 0.1
max_depth       - 3
min_child_weight - 12
gamma           - 18.7
alpha           - 2.85
lambda          - 0.7041541403292714
subsample       - 0.75
colsample_bytree - 0.8
colsample_bylevel - 0.8
colsample_bynode - 0.9
max_cat_to_onehot - 4
[Time taken: 1025.48s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 145.10162 (84 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 152.57850 (39 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 150.13628 (101 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 164.75381 (402 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 152.15575 (213 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 298.74851 (428 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 198.00055 (54 rounds)
Fold #7: (Data siz

In [26]:
%%time
tp_4_2 = run_experiment(
    data=(train_linear[features], train_linear[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 36 -> Best value: 180.47819
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 3
min_child_weight - 239
gamma           - 14.4
alpha           - 3.5500000000000003
lambda          - 0.011325915331149764
subsample       - 0.9
colsample_bytree - 0.85
colsample_bylevel - 0.9
colsample_bynode - 0.95
max_cat_to_onehot - 4
[Time taken: 1094.79s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 146.17872 (47 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 154.37345 (22 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 150.69021 (45 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 158.10051 (371 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 150.99174 (102 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 278.30587 (257 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 185.754

In [27]:
%%time
tp_4_3 = run_experiment(
    data=(train_linear[features], train_linear[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 86 -> Best value: 183.92572
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 237
gamma           - 9.3
alpha           - 4.15
lambda          - 0.49621855924589847
subsample       - 1.0
colsample_bytree - 0.85
colsample_bylevel - 0.85
colsample_bynode - 0.85
max_cat_to_onehot - 12
[Time taken: 1255.33s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 139.45133 (35 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 157.23101 (17 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 152.54543 (28 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 172.88700 (196 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 152.95206 (64 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 275.48049 (182 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 187.34701 (27 rounds)
Fold #7: (Data s

### Dataset 5: train_poly3

In [28]:
%%time
tp_5_1 = run_experiment(
    data=(train_poly3[features], train_poly3[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 47 -> Best value: 183.58647
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 3
min_child_weight - 229
gamma           - 11.0
alpha           - 4.9
lambda          - 21.164423819825146
subsample       - 0.9
colsample_bytree - 0.95
colsample_bylevel - 0.75
colsample_bynode - 1.0
max_cat_to_onehot - 4
[Time taken: 912.60s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 153.31141 (260 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 153.47656 (36 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 157.98843 (59 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 160.31351 (349 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 151.73767 (110 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 283.76824 (283 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 188.84530 (32 rounds)
Fo

In [29]:
%%time
tp_5_2 = run_experiment(
    data=(train_poly3[features], train_poly3[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 81 -> Best value: 181.14221
Best hyperparameters:
learning_rate   - 0.25
max_depth       - 3
min_child_weight - 247
gamma           - 13.3
alpha           - 4.1000000000000005
lambda          - 1.6441607746861542
subsample       - 0.9
colsample_bytree - 0.75
colsample_bylevel - 0.9
colsample_bynode - 0.85
max_cat_to_onehot - 7
[Time taken: 1058.59s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 146.28250 (37 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 153.55646 (13 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 145.53395 (34 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 163.85966 (164 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 153.57197 (108 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 284.14752 (149 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 183.40755 (28 rounds)
Fo

In [30]:
%%time
tp_5_3 = run_experiment(
    data=(train_poly3[features], train_poly3[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 55 -> Best value: 184.58300
Best hyperparameters:
learning_rate   - 0.30000000000000004
max_depth       - 3
min_child_weight - 216
gamma           - 11.5
alpha           - 1.35
lambda          - 0.5971331530602634
subsample       - 0.95
colsample_bytree - 0.85
colsample_bylevel - 0.95
colsample_bynode - 0.85
max_cat_to_onehot - 7
[Time taken: 1224.09s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 140.44804 (28 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 156.54361 (11 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 153.88581 (20 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 168.19640 (78 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 154.45726 (63 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 279.73373 (132 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 196.91836 (22 rounds)
F

### Dataset 6: train_poly5

In [31]:
%%time
tp_6_1 = run_experiment(
    data=(train_poly5[features], train_poly5[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 76 -> Best value: 181.55543
Best hyperparameters:
learning_rate   - 0.25
max_depth       - 4
min_child_weight - 15
gamma           - 0.6000000000000001
alpha           - 3.75
lambda          - 0.14967373371356124
subsample       - 1.0
colsample_bytree - 0.9
colsample_bylevel - 0.8
colsample_bynode - 0.85
max_cat_to_onehot - 12
[Time taken: 729.54s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 152.94669 (36 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 147.01375 (13 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 153.98375 (26 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 166.96919 (91 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 158.71125 (58 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 265.07341 (205 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 177.50076 (16 rounds)
Fold 

In [32]:
%%time
tp_6_2 = run_experiment(
    data=(train_poly5[features], train_poly5[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 44 -> Best value: 180.38660
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 3
min_child_weight - 201
gamma           - 4.1000000000000005
alpha           - 3.75
lambda          - 0.07004965373110639
subsample       - 0.8
colsample_bytree - 0.8
colsample_bylevel - 0.85
colsample_bynode - 0.85
max_cat_to_onehot - 4
[Time taken: 1175.19s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 150.43260 (48 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 152.92657 (26 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 145.28758 (47 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 156.25517 (186 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 147.79236 (104 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 279.19718 (250 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 183.9937

In [33]:
%%time
tp_6_3 = run_experiment(
    data=(train_poly5[features], train_poly5[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 98 -> Best value: 184.96687
Best hyperparameters:
learning_rate   - 0.30000000000000004
max_depth       - 3
min_child_weight - 191
gamma           - 13.600000000000001
alpha           - 0.05
lambda          - 0.23987203403604967
subsample       - 0.75
colsample_bytree - 0.95
colsample_bylevel - 0.85
colsample_bynode - 0.8
max_cat_to_onehot - 4
[Time taken: 1104.17s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 136.93183 (45 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 153.52412 (12 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 153.72447 (17 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 173.66120 (107 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 152.31134 (30 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 277.03333 (270 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 207.6342

### Dataset 7: train_iterimp

In [34]:
%%time
tp_7_1 = run_experiment(
    data=(train_iterimp[features], train_iterimp[TARGET], test[features]),
    cat_features=cat_features_1,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 30 -> Best value: 196.82030
Best hyperparameters:
learning_rate   - 0.25
max_depth       - 3
min_child_weight - 22
gamma           - 3.6
alpha           - 0.0
lambda          - 0.12232938405694538
subsample       - 0.85
colsample_bytree - 0.95
colsample_bylevel - 0.85
colsample_bynode - 1.0
max_cat_to_onehot - 12
[Time taken: 757.49s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 169.54536 (30 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 172.36554 (14 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 172.06723 (30 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 176.17466 (170 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 173.03082 (62 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 293.82073 (122 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 183.86992 (20 rounds)
Fold #7: (Data siz

In [35]:
%%time
tp_7_2 = run_experiment(
    data=(train_iterimp[features], train_iterimp[TARGET], test[features]),
    cat_features=cat_features_2,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 74 -> Best value: 195.25769
Best hyperparameters:
learning_rate   - 0.2
max_depth       - 3
min_child_weight - 249
gamma           - 16.400000000000002
alpha           - 3.2
lambda          - 0.010468224998474059
subsample       - 0.75
colsample_bytree - 0.8
colsample_bylevel - 0.85
colsample_bynode - 0.9
max_cat_to_onehot - 12
[Time taken: 1170.43s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 166.89780 (130 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 177.31381 (19 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 161.29499 (48 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 172.27974 (337 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 169.35589 (97 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 274.88423 (257 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 200.81619 (27 rounds)
F

In [36]:
%%time
tp_7_3 = run_experiment(
    data=(train_iterimp[features], train_iterimp[TARGET], test[features]),
    cat_features=cat_features_3,
    n_trials=100
)

----------Hyperparameter tuning----------
Best trial: 91 -> Best value: 198.93763
Best hyperparameters:
learning_rate   - 0.15000000000000002
max_depth       - 3
min_child_weight - 249
gamma           - 17.0
alpha           - 1.05
lambda          - 1.1007237447909146
subsample       - 0.8
colsample_bytree - 0.85
colsample_bylevel - 0.85
colsample_bynode - 1.0
max_cat_to_onehot - 12
[Time taken: 1235.60s]

-----Cross-validation and prediction-----
Fold #0: (Data size: train =  8642, test = 8635) RMSE = 150.96151 (69 rounds)
Fold #1: (Data size: train = 17277, test = 8635) RMSE = 175.66440 (25 rounds)
Fold #2: (Data size: train = 25912, test = 8635) RMSE = 168.49465 (40 rounds)
Fold #3: (Data size: train = 34547, test = 8635) RMSE = 187.11395 (133 rounds)
Fold #4: (Data size: train = 43182, test = 8635) RMSE = 174.31470 (92 rounds)
Fold #5: (Data size: train = 51817, test = 8635) RMSE = 292.27393 (354 rounds)
Fold #6: (Data size: train = 60452, test = 8635) RMSE = 204.84516 (30 rounds)
F

# Submission files

In [37]:
# from google.colab import drive
# drive.mount('/content/drive')

In [38]:
# NOTEBOOK = '01'
# SUBMISSION_PATH = f'/content/drive/MyDrive/data_science_competitions/analytics_vidhya/jobathon_nov22/submissions/nb_{NOTEBOOK}'
# if not os.path.isdir(SUBMISSION_PATH):
#     os.makedirs(SUBMISSION_PATH)

In [39]:
def create_submission_files(test_preds: pd.DataFrame, config: str):
    for col in (test_preds.columns):
        sub = sample_sub.copy()
        sub[TARGET] = test_preds[col]
        sub.to_csv(f'{config}_{col}.csv', index=False)

In [40]:
create_submission_files(tp_1_1, '1_1')
create_submission_files(tp_1_2, '1_2')
create_submission_files(tp_1_3, '1_3')

In [41]:
create_submission_files(tp_2_1, '2_1')
create_submission_files(tp_2_2, '2_2')
create_submission_files(tp_2_3, '2_3')

In [42]:
create_submission_files(tp_3_1, '3_1')
create_submission_files(tp_3_2, '3_2')
create_submission_files(tp_3_3, '3_3')

In [43]:
create_submission_files(tp_4_1, '4_1')
create_submission_files(tp_4_2, '4_2')
create_submission_files(tp_4_3, '4_3')

In [44]:
create_submission_files(tp_5_1, '5_1')
create_submission_files(tp_5_2, '5_2')
create_submission_files(tp_5_3, '5_3')

In [45]:
create_submission_files(tp_6_1, '6_1')
create_submission_files(tp_6_2, '6_2')
create_submission_files(tp_6_3, '6_3')

In [46]:
create_submission_files(tp_7_1, '7_1')
create_submission_files(tp_7_2, '7_2')
create_submission_files(tp_7_3, '7_3')