### Imports

In [None]:
ptrain_url = 'https://raw.githubusercontent.com/sidt-ai/MH-hackathons/main/dare_in_reality_2021/data/processed/proc_train.csv'
ptest_url = 'https://raw.githubusercontent.com/sidt-ai/MH-hackathons/main/dare_in_reality_2021/data/processed/proc_test.csv'
submission_url = 'https://raw.githubusercontent.com/sidt-ai/MH-hackathons/main/dare_in_reality_2021/data/raw/submission.csv'

In [None]:
!pip install --quiet optuna

[K     |████████████████████████████████| 308 kB 5.4 MB/s 
[K     |████████████████████████████████| 80 kB 6.2 MB/s 
[K     |████████████████████████████████| 209 kB 28.4 MB/s 
[K     |████████████████████████████████| 75 kB 2.6 MB/s 
[K     |████████████████████████████████| 49 kB 4.3 MB/s 
[K     |████████████████████████████████| 112 kB 27.9 MB/s 
[K     |████████████████████████████████| 149 kB 29.0 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [None]:
!pip install --quiet catboost

[K     |████████████████████████████████| 76.3 MB 36 kB/s 
[?25h

In [None]:
import time
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np

from catboost import CatBoostRegressor

import optuna
from optuna.samplers import TPESampler

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error

N_SPLITS = 5
SEED = 2311

# Model 1 - CatBoost, no weather data [Public LB - ]

In [None]:
ptrain = pd.read_csv(ptrain_url)
ptest = pd.read_csv(ptest_url)

### Creating Folds for Cross-validation

In [None]:
ptrain['fold'] = -1

kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)

for fold, (_, val_idx) in enumerate(kf.split(X=ptrain)):
  ptrain.loc[val_idx, 'fold'] = fold

In [None]:
ptrain.shape[0], ptest.shape[0]

(10054, 420)

In [None]:
ptrain.columns

Index(['CAR_NUMBER', 'LAP_NUMBER', 'LAP_TIME', 'LAP_IMPROVEMENT',
       'CROSSING_FINISH_LINE_IN_PIT', 'S1_IMPROVEMENT', 'S2_IMPROVEMENT',
       'S3_IMPROVEMENT', 'KPH', 'S1_LARGE', 'S2_LARGE', 'S3_LARGE',
       'DRIVER_NAME', 'PIT_TIME', 'TEAM', 'LOCATION', 'EVENT', 'PITSTOP_TAKEN',
       'EVENT_TYPE', 'fold'],
      dtype='object')

In [None]:
features = [f for f in ptrain.columns if f not in ('LAP_TIME', 'fold')]
cat_features = [f for f in features if f not in ('KPH', 'PIT_TIME', 'S1_LARGE', 'S2_LARGE', 'S3_LARGE')]

### Hyperparameter tuning using Optuna

In [None]:
xtrain, xval, ytrain, yval = train_test_split(ptrain[features], ptrain.LAP_TIME,
                                              test_size=0.2, random_state=SEED)

In [None]:
base_params = {
    'iterations': 5000,
    'loss_function': 'RMSE',
    'eval_metric': 'RMSE',
    'score_function': 'L2',
    'task_type': 'GPU',
    'cat_features': cat_features,
    'use_best_model': True,
    'random_seed': SEED
}

In [None]:
def objective(trial, xtrain, ytrain, xval, yval, base_params):

    param_grid = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'depth': trial.suggest_int('depth', 3, 10),
        'l2_leaf_reg': trial.suggest_uniform('l2_leaf_reg', 0.1, 10),
        'bootstrap_type': trial.suggest_categorical('bootstrap_type', 
                                                    ['Bayesian', 'Poisson', 'No']),
        'grow_policy': trial.suggest_categorical('grow_policy', 
                                                 ['SymmetricTree', 'Lossguide'])
    }
    
    model = CatBoostRegressor(**base_params, **param_grid)
    
    model.fit(
        xtrain, ytrain,
        eval_set=(xval, yval),
        early_stopping_rounds=100,
        verbose_eval=100
    )
    
    predictions = model.predict(xval)
    return mean_squared_log_error(yval, predictions, squared=False)

In [None]:
%%time
study = optuna.create_study(sampler=TPESampler(seed=SEED), 
                            direction='minimize', 
                            study_name='dir2021_catboost')

study.optimize(
    lambda trial: objective(trial, xtrain, ytrain, xval, yval, base_params), 
    n_trials=50
)

[32m[I 2021-11-22 12:47:01,579][0m A new study created in memory with name: dir2021_catboost[0m


0:	learn: 23.3019442	test: 23.9740440	best: 23.9740440 (0)	total: 43.7ms	remaining: 3m 38s
100:	learn: 21.9519980	test: 23.2765798	best: 23.2260595 (64)	total: 3.25s	remaining: 2m 37s
bestTest = 23.22605947
bestIteration = 64
Shrink model to first 65 iterations.


[32m[I 2021-11-22 12:47:09,288][0m Trial 0 finished with value: 0.21793188570186026 and parameters: {'learning_rate': 0.03894082102082652, 'depth': 9, 'l2_leaf_reg': 2.2658391209721263, 'bootstrap_type': 'No', 'grow_policy': 'Lossguide'}. Best is trial 0 with value: 0.21793188570186026.[0m


0:	learn: 23.2346375	test: 23.9138535	best: 23.9138535 (0)	total: 69.8ms	remaining: 5m 48s
100:	learn: 21.4161533	test: 23.2762073	best: 23.2197133 (30)	total: 5.55s	remaining: 4m 29s
bestTest = 23.21971327
bestIteration = 30
Shrink model to first 31 iterations.


[32m[I 2021-11-22 12:47:18,544][0m Trial 1 finished with value: 0.2177702984992022 and parameters: {'learning_rate': 0.07699736160829906, 'depth': 7, 'l2_leaf_reg': 0.6319937645727293, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 1 with value: 0.2177702984992022.[0m


0:	learn: 23.3391562	test: 24.0057567	best: 24.0057567 (0)	total: 65.7ms	remaining: 5m 28s
100:	learn: 22.0957074	test: 23.2206234	best: 23.2191846 (97)	total: 6.19s	remaining: 5m
200:	learn: 21.7300916	test: 23.2514550	best: 23.2184604 (119)	total: 12.7s	remaining: 5m 2s
bestTest = 23.21846043
bestIteration = 119
Shrink model to first 120 iterations.


[32m[I 2021-11-22 12:47:34,306][0m Trial 2 finished with value: 0.21781781376612536 and parameters: {'learning_rate': 0.020661640053029007, 'depth': 8, 'l2_leaf_reg': 3.612911913186504, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 1 with value: 0.2177702984992022.[0m


0:	learn: 23.3472242	test: 24.0132037	best: 24.0132037 (0)	total: 37.4ms	remaining: 3m 7s
100:	learn: 22.2565804	test: 23.2382933	best: 23.2381475 (99)	total: 3.28s	remaining: 2m 39s
200:	learn: 22.0100036	test: 23.2378386	best: 23.2239050 (147)	total: 6.61s	remaining: 2m 37s
bestTest = 23.22390501
bestIteration = 147
Shrink model to first 148 iterations.


[32m[I 2021-11-22 12:47:44,120][0m Trial 3 finished with value: 0.21786635778316205 and parameters: {'learning_rate': 0.016226283586212614, 'depth': 9, 'l2_leaf_reg': 1.2127855867893356, 'bootstrap_type': 'Poisson', 'grow_policy': 'Lossguide'}. Best is trial 1 with value: 0.2177702984992022.[0m


0:	learn: 23.2998352	test: 23.9695854	best: 23.9695854 (0)	total: 33.2ms	remaining: 2m 45s
100:	learn: 22.0316169	test: 23.2508080	best: 23.2268208 (56)	total: 2.91s	remaining: 2m 21s
bestTest = 23.22682084
bestIteration = 56
Shrink model to first 57 iterations.


[32m[I 2021-11-22 12:47:50,272][0m Trial 4 finished with value: 0.21796099712264555 and parameters: {'learning_rate': 0.04295002146409108, 'depth': 6, 'l2_leaf_reg': 9.9980076017466, 'bootstrap_type': 'Poisson', 'grow_policy': 'Lossguide'}. Best is trial 1 with value: 0.2177702984992022.[0m


0:	learn: 23.2037931	test: 23.8950184	best: 23.8950184 (0)	total: 82.6ms	remaining: 6m 52s
100:	learn: 19.4371958	test: 23.4575857	best: 23.2337737 (18)	total: 7.95s	remaining: 6m 25s
bestTest = 23.23377374
bestIteration = 18
Shrink model to first 19 iterations.


[32m[I 2021-11-22 12:48:01,074][0m Trial 5 finished with value: 0.2180466505420513 and parameters: {'learning_rate': 0.09867261986874484, 'depth': 10, 'l2_leaf_reg': 8.584574407018797, 'bootstrap_type': 'Bayesian', 'grow_policy': 'SymmetricTree'}. Best is trial 1 with value: 0.2177702984992022.[0m


0:	learn: 23.3382519	test: 24.0049566	best: 24.0049566 (0)	total: 41.5ms	remaining: 3m 27s
100:	learn: 22.3028371	test: 23.2128231	best: 23.2128231 (100)	total: 4.14s	remaining: 3m 20s
200:	learn: 22.1257652	test: 23.2200599	best: 23.2065751 (125)	total: 8.63s	remaining: 3m 25s
bestTest = 23.20657508
bestIteration = 125
Shrink model to first 126 iterations.


[32m[I 2021-11-22 12:48:12,703][0m Trial 6 finished with value: 0.21767092383840167 and parameters: {'learning_rate': 0.02093662866307587, 'depth': 6, 'l2_leaf_reg': 4.363469594884588, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 6 with value: 0.21767092383840167.[0m


0:	learn: 23.2171741	test: 23.8911981	best: 23.8911981 (0)	total: 14.1ms	remaining: 1m 10s
100:	learn: 22.2133137	test: 23.2543299	best: 23.2108468 (26)	total: 1.23s	remaining: 59.5s
bestTest = 23.21084684
bestIteration = 26
Shrink model to first 27 iterations.


[32m[I 2021-11-22 12:48:15,793][0m Trial 7 finished with value: 0.21777720327173797 and parameters: {'learning_rate': 0.09240529928476354, 'depth': 3, 'l2_leaf_reg': 7.809753030348231, 'bootstrap_type': 'No', 'grow_policy': 'Lossguide'}. Best is trial 6 with value: 0.21767092383840167.[0m


0:	learn: 23.2311616	test: 23.9043461	best: 23.9043461 (0)	total: 21.8ms	remaining: 1m 49s
100:	learn: 22.0660298	test: 23.2499231	best: 23.2119609 (30)	total: 1.84s	remaining: 1m 29s
bestTest = 23.21196086
bestIteration = 30
Shrink model to first 31 iterations.


[32m[I 2021-11-22 12:48:19,702][0m Trial 8 finished with value: 0.2178525409243414 and parameters: {'learning_rate': 0.08452193895201393, 'depth': 4, 'l2_leaf_reg': 7.135166719428352, 'bootstrap_type': 'Poisson', 'grow_policy': 'Lossguide'}. Best is trial 6 with value: 0.21767092383840167.[0m


0:	learn: 23.3478459	test: 24.0118175	best: 24.0118175 (0)	total: 37.4ms	remaining: 3m 6s
100:	learn: 22.4078438	test: 23.2283301	best: 23.2283301 (100)	total: 3.5s	remaining: 2m 49s
200:	learn: 22.2916563	test: 23.2168126	best: 23.2147898 (156)	total: 7.08s	remaining: 2m 49s
bestTest = 23.21478983
bestIteration = 156
Shrink model to first 157 iterations.


[32m[I 2021-11-22 12:48:30,323][0m Trial 9 finished with value: 0.21777778023672945 and parameters: {'learning_rate': 0.016787558082004, 'depth': 5, 'l2_leaf_reg': 2.0441229515753725, 'bootstrap_type': 'Bayesian', 'grow_policy': 'SymmetricTree'}. Best is trial 6 with value: 0.21767092383840167.[0m


0:	learn: 23.2593679	test: 23.9397540	best: 23.9397540 (0)	total: 44.6ms	remaining: 3m 42s
100:	learn: 21.9749088	test: 23.2404758	best: 23.2149840 (37)	total: 4.19s	remaining: 3m 23s
bestTest = 23.21498395
bestIteration = 37
Shrink model to first 38 iterations.


[32m[I 2021-11-22 12:48:37,579][0m Trial 10 finished with value: 0.2178007104402648 and parameters: {'learning_rate': 0.06494604920974353, 'depth': 6, 'l2_leaf_reg': 5.310049384553956, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 6 with value: 0.21767092383840167.[0m


0:	learn: 23.2453774	test: 23.9242768	best: 23.9242768 (0)	total: 52.3ms	remaining: 4m 21s
100:	learn: 21.4679137	test: 23.2768375	best: 23.2154552 (32)	total: 4.92s	remaining: 3m 58s
bestTest = 23.21545519
bestIteration = 32
Shrink model to first 33 iterations.


[32m[I 2021-11-22 12:48:45,490][0m Trial 11 finished with value: 0.2177647906712828 and parameters: {'learning_rate': 0.07083218658442009, 'depth': 7, 'l2_leaf_reg': 0.3322915986919206, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 6 with value: 0.21767092383840167.[0m


0:	learn: 23.2656827	test: 23.9432836	best: 23.9432836 (0)	total: 50ms	remaining: 4m 9s
100:	learn: 21.8293890	test: 23.2565644	best: 23.2102711 (42)	total: 5s	remaining: 4m 2s
bestTest = 23.21027107
bestIteration = 42
Shrink model to first 43 iterations.


[32m[I 2021-11-22 12:48:54,009][0m Trial 12 finished with value: 0.21766951173431645 and parameters: {'learning_rate': 0.05941640059813206, 'depth': 7, 'l2_leaf_reg': 4.711128721088422, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 12 with value: 0.21766951173431645.[0m


0:	learn: 23.2796691	test: 23.9561151	best: 23.9561151 (0)	total: 34.7ms	remaining: 2m 53s
100:	learn: 22.1798966	test: 23.2298888	best: 23.2084539 (51)	total: 3.56s	remaining: 2m 52s
bestTest = 23.20845395
bestIteration = 51
Shrink model to first 52 iterations.


[32m[I 2021-11-22 12:49:01,121][0m Trial 13 finished with value: 0.21766704303394105 and parameters: {'learning_rate': 0.053741943479585975, 'depth': 5, 'l2_leaf_reg': 5.28886310743216, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 13 with value: 0.21766704303394105.[0m


0:	learn: 23.2862400	test: 23.9561164	best: 23.9561164 (0)	total: 29.7ms	remaining: 2m 28s
100:	learn: 22.3087323	test: 23.2146225	best: 23.2101345 (44)	total: 2.96s	remaining: 2m 23s
bestTest = 23.21013449
bestIteration = 44
Shrink model to first 45 iterations.


[32m[I 2021-11-22 12:49:06,951][0m Trial 14 finished with value: 0.21771759724876039 and parameters: {'learning_rate': 0.052052694909992654, 'depth': 4, 'l2_leaf_reg': 5.878921481768221, 'bootstrap_type': 'Bayesian', 'grow_policy': 'SymmetricTree'}. Best is trial 13 with value: 0.21766704303394105.[0m


0:	learn: 23.2726917	test: 23.9458536	best: 23.9458536 (0)	total: 44.3ms	remaining: 3m 41s
100:	learn: 22.1499319	test: 23.2349228	best: 23.2094784 (45)	total: 3.6s	remaining: 2m 54s
bestTest = 23.20947836
bestIteration = 45
Shrink model to first 46 iterations.


[32m[I 2021-11-22 12:49:13,623][0m Trial 15 finished with value: 0.21771873581637347 and parameters: {'learning_rate': 0.05787127244979044, 'depth': 5, 'l2_leaf_reg': 3.6152844078676454, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 13 with value: 0.21766704303394105.[0m


0:	learn: 23.3132851	test: 23.9856514	best: 23.9856514 (0)	total: 59ms	remaining: 4m 54s
100:	learn: 21.9146347	test: 23.2470677	best: 23.2276638 (76)	total: 5.63s	remaining: 4m 33s
bestTest = 23.22766381
bestIteration = 76
Shrink model to first 77 iterations.


[32m[I 2021-11-22 12:49:25,018][0m Trial 16 finished with value: 0.2179007184546818 and parameters: {'learning_rate': 0.03430823422831404, 'depth': 8, 'l2_leaf_reg': 6.266459440227877, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 13 with value: 0.21766704303394105.[0m


0:	learn: 23.2793914	test: 23.9529948	best: 23.9529948 (0)	total: 32.1ms	remaining: 2m 40s
100:	learn: 22.1717796	test: 23.2317498	best: 23.2011907 (45)	total: 3.49s	remaining: 2m 49s
bestTest = 23.20119073
bestIteration = 45
Shrink model to first 46 iterations.


[32m[I 2021-11-22 12:49:31,569][0m Trial 17 finished with value: 0.21765785882067173 and parameters: {'learning_rate': 0.05367848838257187, 'depth': 5, 'l2_leaf_reg': 4.300859520499788, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2897877	test: 23.9584009	best: 23.9584009 (0)	total: 19.4ms	remaining: 1m 37s
100:	learn: 22.3811114	test: 23.2186599	best: 23.2098774 (45)	total: 2.08s	remaining: 1m 40s
bestTest = 23.2098774
bestIteration = 45
Shrink model to first 46 iterations.


[32m[I 2021-11-22 12:49:36,105][0m Trial 18 finished with value: 0.21775521957314148 and parameters: {'learning_rate': 0.049655485040834044, 'depth': 3, 'l2_leaf_reg': 2.9194923524746628, 'bootstrap_type': 'Bayesian', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.3223882	test: 23.9893194	best: 23.9893194 (0)	total: 35.7ms	remaining: 2m 58s
100:	learn: 22.3130581	test: 23.2293925	best: 23.2254774 (74)	total: 3.55s	remaining: 2m 52s
bestTest = 23.22547738
bestIteration = 74
Shrink model to first 75 iterations.


[32m[I 2021-11-22 12:49:43,861][0m Trial 19 finished with value: 0.21790821866471724 and parameters: {'learning_rate': 0.03078187760731355, 'depth': 5, 'l2_leaf_reg': 6.665520046706236, 'bootstrap_type': 'Poisson', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2921232	test: 23.9648886	best: 23.9648886 (0)	total: 26.5ms	remaining: 2m 12s
100:	learn: 22.2888158	test: 23.2236467	best: 23.2065376 (51)	total: 2.82s	remaining: 2m 16s
bestTest = 23.20653758
bestIteration = 51
Shrink model to first 52 iterations.


[32m[I 2021-11-22 12:49:49,698][0m Trial 20 finished with value: 0.21773028882900616 and parameters: {'learning_rate': 0.047412297909271615, 'depth': 4, 'l2_leaf_reg': 5.3143425469434815, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2689931	test: 23.9438444	best: 23.9438444 (0)	total: 31.8ms	remaining: 2m 38s
100:	learn: 22.1389814	test: 23.2306956	best: 23.2107424 (48)	total: 3.54s	remaining: 2m 51s
bestTest = 23.2107424
bestIteration = 48
Shrink model to first 49 iterations.


[32m[I 2021-11-22 12:49:56,732][0m Trial 21 finished with value: 0.21769136470243694 and parameters: {'learning_rate': 0.059536661489935264, 'depth': 5, 'l2_leaf_reg': 4.501778974893421, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2526397	test: 23.9331711	best: 23.9331711 (0)	total: 54.9ms	remaining: 4m 34s
100:	learn: 21.7254601	test: 23.2934133	best: 23.2121416 (35)	total: 5.12s	remaining: 4m 8s
bestTest = 23.21214161
bestIteration = 35
Shrink model to first 36 iterations.


[32m[I 2021-11-22 12:50:05,301][0m Trial 22 finished with value: 0.21773358097663748 and parameters: {'learning_rate': 0.0663504516845238, 'depth': 7, 'l2_leaf_reg': 3.879667684761717, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2368527	test: 23.9166748	best: 23.9166748 (0)	total: 43.5ms	remaining: 3m 37s
100:	learn: 21.9009006	test: 23.2867238	best: 23.2090793 (28)	total: 4.2s	remaining: 3m 23s
bestTest = 23.20907931
bestIteration = 28
Shrink model to first 29 iterations.


[32m[I 2021-11-22 12:50:12,289][0m Trial 23 finished with value: 0.21774033364545095 and parameters: {'learning_rate': 0.07617403703020481, 'depth': 6, 'l2_leaf_reg': 4.9165213889806845, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2652726	test: 23.9440806	best: 23.9440806 (0)	total: 54.7ms	remaining: 4m 33s
100:	learn: 21.4287425	test: 23.2821483	best: 23.2258199 (44)	total: 5.53s	remaining: 4m 28s
bestTest = 23.22581994
bestIteration = 44
Shrink model to first 45 iterations.


[32m[I 2021-11-22 12:50:21,824][0m Trial 24 finished with value: 0.21783555145037387 and parameters: {'learning_rate': 0.059674332311839315, 'depth': 8, 'l2_leaf_reg': 2.765747619148833, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2975259	test: 23.9695322	best: 23.9695322 (0)	total: 31.1ms	remaining: 2m 35s
100:	learn: 22.3011786	test: 23.2205056	best: 23.2050563 (56)	total: 2.78s	remaining: 2m 14s
bestTest = 23.20505633
bestIteration = 56
Shrink model to first 57 iterations.


[32m[I 2021-11-22 12:50:27,694][0m Trial 25 finished with value: 0.2176914121109813 and parameters: {'learning_rate': 0.04442914776076404, 'depth': 4, 'l2_leaf_reg': 5.795819113862544, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2808935	test: 23.9532919	best: 23.9532919 (0)	total: 24.8ms	remaining: 2m 3s
100:	learn: 22.0569719	test: 23.2838515	best: 23.2287797 (52)	total: 2.57s	remaining: 2m 4s
bestTest = 23.22877969
bestIteration = 52
Shrink model to first 53 iterations.


[32m[I 2021-11-22 12:50:33,128][0m Trial 26 finished with value: 0.21800249332349705 and parameters: {'learning_rate': 0.05369730562116362, 'depth': 5, 'l2_leaf_reg': 7.386942307392442, 'bootstrap_type': 'No', 'grow_policy': 'Lossguide'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.3189375	test: 23.9889178	best: 23.9889178 (0)	total: 44.9ms	remaining: 3m 44s
100:	learn: 22.0918568	test: 23.2138607	best: 23.2049947 (82)	total: 4.76s	remaining: 3m 50s
bestTest = 23.20499473
bestIteration = 82
Shrink model to first 83 iterations.


[32m[I 2021-11-22 12:50:43,416][0m Trial 27 finished with value: 0.21770240654658726 and parameters: {'learning_rate': 0.030868619586531743, 'depth': 7, 'l2_leaf_reg': 4.590460334022424, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2555149	test: 23.9299232	best: 23.9299232 (0)	total: 41.4ms	remaining: 3m 26s
100:	learn: 21.9229128	test: 23.2621631	best: 23.2196624 (31)	total: 4.35s	remaining: 3m 31s
bestTest = 23.21966241
bestIteration = 31
Shrink model to first 32 iterations.


[32m[I 2021-11-22 12:50:50,955][0m Trial 28 finished with value: 0.2177793485637782 and parameters: {'learning_rate': 0.06800817837157139, 'depth': 6, 'l2_leaf_reg': 3.0437516624168035, 'bootstrap_type': 'Bayesian', 'grow_policy': 'SymmetricTree'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.3036622	test: 23.9743720	best: 23.9743720 (0)	total: 30.3ms	remaining: 2m 31s
100:	learn: 21.9056716	test: 23.2607375	best: 23.2282070 (52)	total: 3.13s	remaining: 2m 31s
bestTest = 23.22820704
bestIteration = 52
Shrink model to first 53 iterations.


[32m[I 2021-11-22 12:50:57,144][0m Trial 29 finished with value: 0.2179328976719683 and parameters: {'learning_rate': 0.03986640808508659, 'depth': 9, 'l2_leaf_reg': 1.68867846885753, 'bootstrap_type': 'Poisson', 'grow_policy': 'Lossguide'}. Best is trial 17 with value: 0.21765785882067173.[0m


0:	learn: 23.2675410	test: 23.9386985	best: 23.9386985 (0)	total: 20.5ms	remaining: 1m 42s
100:	learn: 22.3208856	test: 23.2303745	best: 23.2035187 (36)	total: 2.08s	remaining: 1m 41s
bestTest = 23.20351874
bestIteration = 36
Shrink model to first 37 iterations.


[32m[I 2021-11-22 12:51:01,489][0m Trial 30 finished with value: 0.21764491858964385 and parameters: {'learning_rate': 0.06197208812117224, 'depth': 3, 'l2_leaf_reg': 3.9458703752193056, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2661396	test: 23.9373976	best: 23.9373976 (0)	total: 20.2ms	remaining: 1m 41s
100:	learn: 22.3156044	test: 23.2274979	best: 23.2035978 (36)	total: 2.08s	remaining: 1m 41s
bestTest = 23.20359777
bestIteration = 36
Shrink model to first 37 iterations.


[32m[I 2021-11-22 12:51:05,871][0m Trial 31 finished with value: 0.21765534638064624 and parameters: {'learning_rate': 0.06280724746684102, 'depth': 3, 'l2_leaf_reg': 4.1305909789806226, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2464484	test: 23.9194685	best: 23.9194685 (0)	total: 19.7ms	remaining: 1m 38s
100:	learn: 22.2969494	test: 23.2315290	best: 23.2061291 (31)	total: 2.04s	remaining: 1m 39s
bestTest = 23.20612911
bestIteration = 31
Shrink model to first 32 iterations.


[32m[I 2021-11-22 12:51:10,063][0m Trial 32 finished with value: 0.21767915615020414 and parameters: {'learning_rate': 0.07421144570926687, 'depth': 3, 'l2_leaf_reg': 3.929030967148743, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2660568	test: 23.9375262	best: 23.9375262 (0)	total: 20.2ms	remaining: 1m 40s
100:	learn: 22.3194999	test: 23.2306461	best: 23.2035737 (36)	total: 2.07s	remaining: 1m 40s
bestTest = 23.20357366
bestIteration = 36
Shrink model to first 37 iterations.


[32m[I 2021-11-22 12:51:14,349][0m Trial 33 finished with value: 0.2176517194294895 and parameters: {'learning_rate': 0.06247896053774924, 'depth': 3, 'l2_leaf_reg': 3.2235472159616316, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2321437	test: 23.9071543	best: 23.9071543 (0)	total: 19.6ms	remaining: 1m 38s
100:	learn: 22.2862071	test: 23.2365546	best: 23.2027472 (27)	total: 2.11s	remaining: 1m 42s
bestTest = 23.20274723
bestIteration = 27
Shrink model to first 28 iterations.


[32m[I 2021-11-22 12:51:18,507][0m Trial 34 finished with value: 0.21764783762914097 and parameters: {'learning_rate': 0.08250786016491857, 'depth': 3, 'l2_leaf_reg': 2.3101846195753, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2304819	test: 23.9055423	best: 23.9055423 (0)	total: 17.6ms	remaining: 1m 27s
100:	learn: 22.2881527	test: 23.2410348	best: 23.2060019 (26)	total: 2.14s	remaining: 1m 43s
bestTest = 23.20600188
bestIteration = 26
Shrink model to first 27 iterations.


[32m[I 2021-11-22 12:51:22,673][0m Trial 35 finished with value: 0.21767608895738305 and parameters: {'learning_rate': 0.08334234780256894, 'depth': 3, 'l2_leaf_reg': 1.2027791364085685, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2324554	test: 23.9062534	best: 23.9062534 (0)	total: 11.2ms	remaining: 55.8s
100:	learn: 22.2333482	test: 23.2488791	best: 23.2110879 (29)	total: 1.16s	remaining: 56.3s
bestTest = 23.21108786
bestIteration = 29
Shrink model to first 30 iterations.


[32m[I 2021-11-22 12:51:25,646][0m Trial 36 finished with value: 0.21773324987366097 and parameters: {'learning_rate': 0.08193762091125148, 'depth': 3, 'l2_leaf_reg': 2.4642685053407063, 'bootstrap_type': 'No', 'grow_policy': 'Lossguide'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2186320	test: 23.8960875	best: 23.8960875 (0)	total: 27ms	remaining: 2m 14s
100:	learn: 22.1978756	test: 23.2495582	best: 23.2180575 (30)	total: 2.85s	remaining: 2m 18s
bestTest = 23.21805752
bestIteration = 30
Shrink model to first 31 iterations.


[32m[I 2021-11-22 12:51:30,882][0m Trial 37 finished with value: 0.21786268792758845 and parameters: {'learning_rate': 0.0905684558113482, 'depth': 4, 'l2_leaf_reg': 3.3184567459241845, 'bootstrap_type': 'Poisson', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2480689	test: 23.9215058	best: 23.9215058 (0)	total: 23.3ms	remaining: 1m 56s
100:	learn: 22.3149212	test: 23.2317685	best: 23.2055090 (31)	total: 2.08s	remaining: 1m 41s
bestTest = 23.20550902
bestIteration = 31
Shrink model to first 32 iterations.


[32m[I 2021-11-22 12:51:35,144][0m Trial 38 finished with value: 0.21766408159821232 and parameters: {'learning_rate': 0.07307135931289838, 'depth': 3, 'l2_leaf_reg': 1.319788645768217, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2361611	test: 23.9111358	best: 23.9111358 (0)	total: 16.8ms	remaining: 1m 23s
100:	learn: 22.0319766	test: 23.3044943	best: 23.2240081 (30)	total: 1.75s	remaining: 1m 24s
bestTest = 23.22400805
bestIteration = 30
Shrink model to first 31 iterations.


[32m[I 2021-11-22 12:51:39,386][0m Trial 39 finished with value: 0.21791480893235882 and parameters: {'learning_rate': 0.07962185229319035, 'depth': 4, 'l2_leaf_reg': 2.197230743389443, 'bootstrap_type': 'No', 'grow_policy': 'Lossguide'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2229262	test: 23.8969810	best: 23.8969810 (0)	total: 22.2ms	remaining: 1m 50s
100:	learn: 22.3151224	test: 23.2359901	best: 23.2118149 (25)	total: 2.36s	remaining: 1m 54s
bestTest = 23.21181491
bestIteration = 25
Shrink model to first 26 iterations.


[32m[I 2021-11-22 12:51:44,223][0m Trial 40 finished with value: 0.2178047549058908 and parameters: {'learning_rate': 0.0889262541248371, 'depth': 3, 'l2_leaf_reg': 3.329472593217115, 'bootstrap_type': 'Bayesian', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2624587	test: 23.9340399	best: 23.9340399 (0)	total: 21.2ms	remaining: 1m 45s
100:	learn: 22.3141112	test: 23.2283382	best: 23.2045661 (34)	total: 2.31s	remaining: 1m 52s
bestTest = 23.20456614
bestIteration = 34
Shrink model to first 35 iterations.


[32m[I 2021-11-22 12:51:49,026][0m Trial 41 finished with value: 0.2176723776898654 and parameters: {'learning_rate': 0.06493352334410707, 'depth': 3, 'l2_leaf_reg': 4.108903964327818, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2640795	test: 23.9369458	best: 23.9369458 (0)	total: 26.1ms	remaining: 2m 10s
100:	learn: 22.2437708	test: 23.2388202	best: 23.2045487 (37)	total: 2.99s	remaining: 2m 25s
bestTest = 23.20454873
bestIteration = 37
Shrink model to first 38 iterations.


[32m[I 2021-11-22 12:51:54,641][0m Trial 42 finished with value: 0.21769693347276237 and parameters: {'learning_rate': 0.06298364941186428, 'depth': 4, 'l2_leaf_reg': 2.4131927009967957, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 30 with value: 0.21764491858964385.[0m


0:	learn: 23.2077632	test: 23.8845146	best: 23.8845146 (0)	total: 21.4ms	remaining: 1m 46s
100:	learn: 22.2618428	test: 23.2401575	best: 23.2016596 (23)	total: 2.2s	remaining: 1m 46s
bestTest = 23.20165956
bestIteration = 23
Shrink model to first 24 iterations.


[32m[I 2021-11-22 12:51:58,796][0m Trial 43 finished with value: 0.21763347031045138 and parameters: {'learning_rate': 0.09654703295900584, 'depth': 3, 'l2_leaf_reg': 3.392212884308976, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 43 with value: 0.21763347031045138.[0m


0:	learn: 23.2069020	test: 23.8842258	best: 23.8842258 (0)	total: 20.8ms	remaining: 1m 43s
100:	learn: 22.2663312	test: 23.2339343	best: 23.2053845 (23)	total: 2.15s	remaining: 1m 44s
bestTest = 23.20538447
bestIteration = 23
Shrink model to first 24 iterations.


[32m[I 2021-11-22 12:52:02,958][0m Trial 44 finished with value: 0.21765941594175342 and parameters: {'learning_rate': 0.09738154406330808, 'depth': 3, 'l2_leaf_reg': 1.7140005925186625, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 43 with value: 0.21763347031045138.[0m


0:	learn: 23.2088199	test: 23.8854853	best: 23.8854853 (0)	total: 19.7ms	remaining: 1m 38s
100:	learn: 22.2677411	test: 23.2396895	best: 23.2044603 (23)	total: 2.07s	remaining: 1m 40s
bestTest = 23.20446033
bestIteration = 23
Shrink model to first 24 iterations.


[32m[I 2021-11-22 12:52:07,021][0m Trial 45 finished with value: 0.2176626503668315 and parameters: {'learning_rate': 0.09593405409825753, 'depth': 3, 'l2_leaf_reg': 3.493426317363899, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 43 with value: 0.21763347031045138.[0m


0:	learn: 23.2219786	test: 23.9416091	best: 23.9416091 (0)	total: 78.6ms	remaining: 6m 33s
100:	learn: 18.7850745	test: 23.3491527	best: 23.2634751 (30)	total: 7.89s	remaining: 6m 22s
bestTest = 23.26347505
bestIteration = 30
Shrink model to first 31 iterations.


[32m[I 2021-11-22 12:52:18,902][0m Trial 46 finished with value: 0.2182344097168072 and parameters: {'learning_rate': 0.06932032649450769, 'depth': 10, 'l2_leaf_reg': 0.9943348161099679, 'bootstrap_type': 'Poisson', 'grow_policy': 'SymmetricTree'}. Best is trial 43 with value: 0.21763347031045138.[0m


0:	learn: 23.2150079	test: 23.8919695	best: 23.8919695 (0)	total: 16.7ms	remaining: 1m 23s
100:	learn: 21.9959871	test: 23.3121053	best: 23.2258280 (26)	total: 1.67s	remaining: 1m 20s
bestTest = 23.22582797
bestIteration = 26
Shrink model to first 27 iterations.


[32m[I 2021-11-22 12:52:22,463][0m Trial 47 finished with value: 0.2179827407967738 and parameters: {'learning_rate': 0.09297510789235373, 'depth': 4, 'l2_leaf_reg': 9.125185784433878, 'bootstrap_type': 'No', 'grow_policy': 'Lossguide'}. Best is trial 43 with value: 0.21763347031045138.[0m


0:	learn: 23.2245082	test: 23.9020877	best: 23.9020877 (0)	total: 26.4ms	remaining: 2m 12s
100:	learn: 22.1937878	test: 23.2517891	best: 23.2091597 (28)	total: 2.71s	remaining: 2m 11s
bestTest = 23.20915966
bestIteration = 28
Shrink model to first 29 iterations.


[32m[I 2021-11-22 12:52:27,418][0m Trial 48 finished with value: 0.21776376801234051 and parameters: {'learning_rate': 0.08545500178407597, 'depth': 4, 'l2_leaf_reg': 2.555316431667239, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 43 with value: 0.21763347031045138.[0m


0:	learn: 23.2031715	test: 23.8808709	best: 23.8808709 (0)	total: 20.1ms	remaining: 1m 40s
100:	learn: 22.2660869	test: 23.2437399	best: 23.2040116 (22)	total: 2.14s	remaining: 1m 43s
bestTest = 23.20401164
bestIteration = 22
Shrink model to first 23 iterations.


[32m[I 2021-11-22 12:52:31,637][0m Trial 49 finished with value: 0.21764731152847647 and parameters: {'learning_rate': 0.09964303073248612, 'depth': 3, 'l2_leaf_reg': 1.920333567788793, 'bootstrap_type': 'No', 'grow_policy': 'SymmetricTree'}. Best is trial 43 with value: 0.21763347031045138.[0m


CPU times: user 4min 53s, sys: 2min 38s, total: 7min 31s
Wall time: 5min 30s


In [None]:
best_params = study.best_params
print('Best params:')
for key, value in best_params.items():
    print(f'\t{key}: {value}')

Best params:
	learning_rate: 0.09654703295900584
	depth: 3
	l2_leaf_reg: 3.392212884308976
	bootstrap_type: No
	grow_policy: SymmetricTree


In [None]:
del xtrain, xval, ytrain, yval

In [None]:
def custom_cross_val_predict(train, test, features):
    oof_preds = {}
    test_preds = []
    scores = []
    
    cv_start = time.time()
    
    for fold in range(N_SPLITS):
        print('-' * 40)
        
        xtrain = train[train.fold != fold].reset_index(drop=True)
        ytrain = xtrain.LAP_TIME

        xval = train[train.fold == fold].reset_index(drop=True)
        yval = xval.LAP_TIME    
        val_idx = xval.index.values.tolist()
        
        fold_start = time.time()
        
        model = CatBoostRegressor(**base_params, **best_params)

        model.fit(
            xtrain[features], ytrain,
            eval_set=(xval[features], yval),
            early_stopping_rounds=100,
            verbose_eval=100
        )
        val_preds = model.predict(xval[features])
        oof_preds.update(dict(zip(val_idx, val_preds)))
        auc = mean_squared_log_error(xval.LAP_TIME, val_preds, squared=False)
        scores.append(auc)
        
        fold_end = time.time()
        
        print(f'Fold #{fold}: AUC = {auc:.5f}\t[Time: {fold_end - fold_start:.2f} secs]')
        
        test_preds.append(model.predict(test[features]))

        del xtrain, ytrain, xval, yval
        
    cv_end = time.time()
    print(f'\nAverage RMSLE = {np.mean(scores):.5f} with std. dev. = {np.std(scores):.5f}')
    print(f'[Total time: {cv_end - cv_start:.2f} secs]\n')
    
    oof_preds = pd.DataFrame.from_dict(oof_preds, orient='index').reset_index()
    test_preds = np.mean(np.column_stack(test_preds), axis=1)
    
    return oof_preds, test_preds

In [None]:
oof_preds, test_preds = custom_cross_val_predict(ptrain, ptest, features)

----------------------------------------
0:	learn: 23.2077632	test: 23.8845146	best: 23.8845146 (0)	total: 17.8ms	remaining: 1m 29s
100:	learn: 22.2769045	test: 23.2190266	best: 23.2029883 (23)	total: 2.05s	remaining: 1m 39s
bestTest = 23.20298833
bestIteration = 23
Shrink model to first 24 iterations.
Fold #0: AUC = 0.21771	[Time: 4.09 secs]
----------------------------------------
0:	learn: 23.2253661	test: 23.8331287	best: 23.8331287 (0)	total: 18.9ms	remaining: 1m 34s
100:	learn: 22.3635292	test: 23.1037619	best: 23.0869556 (33)	total: 2.01s	remaining: 1m 37s
bestTest = 23.08695565
bestIteration = 33
Shrink model to first 34 iterations.
Fold #1: AUC = 0.21466	[Time: 4.18 secs]
----------------------------------------
0:	learn: 23.3788007	test: 23.2006723	best: 23.2006723 (0)	total: 19.1ms	remaining: 1m 35s
100:	learn: 22.4799958	test: 22.5208975	best: 22.5187563 (98)	total: 2.09s	remaining: 1m 41s
bestTest = 22.51875634
bestIteration = 98
Shrink model to first 99 iterations.
Fold #

In [None]:
submission = pd.read_csv(submission_url)
submission.LAP_TIME = test_preds
submission.to_csv('sub2_catboost_newproc_noweather.csv', index=False)

In [None]:
!head sub2_catboost_newproc_noweather.csv

LAP_TIME
98.14868657813058
97.68700761440559
97.06044196338917
97.80922575878795
97.54292190513225
96.89852526722242
97.9170745583775
97.74799216990941
95.94249607191304


# Model 2