In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
from catboost import Pool
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [33]:
df = pd.read_csv("final_2019.csv")

df[['month', 'day', 'hour', 'min']] = df[['month', 'day', 'hour', 'min']].astype(str)

In [34]:
#predict delay
y = df.pop("delay_minutes")

X = df
#print(X.dtypes)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#List of categorical columns
categoricalcolumns = X.select_dtypes(include=["object"]).columns.tolist()
#print("Names of categorical columns : ", categoricalcolumns)

#Get location of categorical columns
cat_features = [X.columns.get_loc(col) for col in categoricalcolumns]
#print("Location of categorical columns : ",cat_features)


train_data = Pool(data=X_train,
                  label=y_train,
                  cat_features=cat_features
                 )


test_data = Pool(data=X_test,
                  label=y_test,
                  cat_features=cat_features
                 )

In [35]:
model = CatBoostRegressor(iterations=500,
                          learning_rate=0.1,
                          early_stopping_rounds=5,
                          depth=10)

In [36]:
grid = {'iterations': [500],
        'learning_rate': [0.17],
        'depth': [15],
        'l2_leaf_reg': [1],
        'early_stopping_rounds': [5]}

grid_search_result = model.grid_search(grid,
                                       X=train_data,
                                       plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 7.1799367	test: 7.1690698	best: 7.1690698 (0)	total: 114ms	remaining: 56.6s
1:	learn: 6.8658152	test: 6.8597431	best: 6.8597431 (1)	total: 607ms	remaining: 2m 31s
2:	learn: 6.5664574	test: 6.5661412	best: 6.5661412 (2)	total: 1.28s	remaining: 3m 32s
3:	learn: 6.3217863	test: 6.3355006	best: 6.3355006 (3)	total: 2.05s	remaining: 4m 14s
4:	learn: 6.1632875	test: 6.1792408	best: 6.1792408 (4)	total: 2.6s	remaining: 4m 17s
5:	learn: 6.0345375	test: 6.0626630	best: 6.0626630 (5)	total: 3.2s	remaining: 4m 23s
6:	learn: 5.9468526	test: 5.9824795	best: 5.9824795 (6)	total: 3.75s	remaining: 4m 24s
7:	learn: 5.7865842	test: 5.8366185	best: 5.8366185 (7)	total: 4.56s	remaining: 4m 40s
8:	learn: 5.6674613	test: 5.7292078	best: 5.7292078 (8)	total: 5.2s	remaining: 4m 43s
9:	learn: 5.5530695	test: 5.6300039	best: 5.6300039 (9)	total: 5.9s	remaining: 4m 49s
10:	learn: 5.4658580	test: 5.5549823	best: 5.5549823 (10)	total: 6.64s	remaining: 4m 55s
11:	learn: 5.3981338	test: 5.4966590	best: 5.4

93:	learn: 4.1361023	test: 4.8241156	best: 4.8241156 (93)	total: 1m 35s	remaining: 6m 52s
94:	learn: 4.1322003	test: 4.8236163	best: 4.8236163 (94)	total: 1m 36s	remaining: 6m 51s
95:	learn: 4.1222134	test: 4.8224926	best: 4.8224926 (95)	total: 1m 37s	remaining: 6m 50s
96:	learn: 4.1080577	test: 4.8148073	best: 4.8148073 (96)	total: 1m 39s	remaining: 6m 52s
97:	learn: 4.0913106	test: 4.8153999	best: 4.8148073 (96)	total: 1m 40s	remaining: 6m 51s
98:	learn: 4.0871320	test: 4.8148505	best: 4.8148073 (96)	total: 1m 41s	remaining: 6m 51s
99:	learn: 4.0721533	test: 4.8140905	best: 4.8140905 (99)	total: 1m 42s	remaining: 6m 50s
100:	learn: 4.0640162	test: 4.8107462	best: 4.8107462 (100)	total: 1m 45s	remaining: 6m 55s
101:	learn: 4.0621281	test: 4.8091810	best: 4.8091810 (101)	total: 1m 46s	remaining: 6m 53s
102:	learn: 4.0513893	test: 4.8087815	best: 4.8087815 (102)	total: 1m 47s	remaining: 6m 53s
103:	learn: 4.0431520	test: 4.8069094	best: 4.8069094 (103)	total: 1m 48s	remaining: 6m 52s
10

183:	learn: 3.5484552	test: 4.7241193	best: 4.7241193 (183)	total: 3m 25s	remaining: 5m 52s
184:	learn: 3.5432895	test: 4.7238516	best: 4.7238516 (184)	total: 3m 26s	remaining: 5m 51s
185:	learn: 3.5408859	test: 4.7237694	best: 4.7237694 (185)	total: 3m 27s	remaining: 5m 50s
186:	learn: 3.5396909	test: 4.7232864	best: 4.7232864 (186)	total: 3m 28s	remaining: 5m 48s
187:	learn: 3.5368796	test: 4.7231406	best: 4.7231406 (187)	total: 3m 29s	remaining: 5m 47s
188:	learn: 3.5244953	test: 4.7231465	best: 4.7231406 (187)	total: 3m 30s	remaining: 5m 46s
189:	learn: 3.5153271	test: 4.7229111	best: 4.7229111 (189)	total: 3m 31s	remaining: 5m 45s
190:	learn: 3.5135003	test: 4.7229276	best: 4.7229111 (189)	total: 3m 32s	remaining: 5m 44s
191:	learn: 3.5067366	test: 4.7234725	best: 4.7229111 (189)	total: 3m 34s	remaining: 5m 43s
192:	learn: 3.5020632	test: 4.7232890	best: 4.7229111 (189)	total: 3m 35s	remaining: 5m 42s
193:	learn: 3.4996938	test: 4.7227871	best: 4.7227871 (193)	total: 3m 36s	remain

73:	learn: 4.3027149	test: 4.8361834	best: 4.8361117 (72)	total: 1m 12s	remaining: 6m 55s
74:	learn: 4.2896720	test: 4.8328101	best: 4.8328101 (74)	total: 1m 13s	remaining: 6m 53s
75:	learn: 4.2788193	test: 4.8303255	best: 4.8303255 (75)	total: 1m 14s	remaining: 6m 53s
76:	learn: 4.2714812	test: 4.8290966	best: 4.8290966 (76)	total: 1m 14s	remaining: 6m 51s
77:	learn: 4.2561914	test: 4.8264283	best: 4.8264283 (77)	total: 1m 16s	remaining: 6m 51s
78:	learn: 4.2398886	test: 4.8242185	best: 4.8242185 (78)	total: 1m 17s	remaining: 6m 50s
79:	learn: 4.2355766	test: 4.8222443	best: 4.8222443 (79)	total: 1m 17s	remaining: 6m 47s
80:	learn: 4.2269217	test: 4.8183476	best: 4.8183476 (80)	total: 1m 18s	remaining: 6m 46s
81:	learn: 4.2194885	test: 4.8182577	best: 4.8182577 (81)	total: 1m 19s	remaining: 6m 45s
82:	learn: 4.2174593	test: 4.8175204	best: 4.8175204 (82)	total: 1m 20s	remaining: 6m 44s
83:	learn: 4.2140347	test: 4.8163983	best: 4.8163983 (83)	total: 1m 21s	remaining: 6m 44s
84:	learn:

163:	learn: 3.6462565	test: 4.7442985	best: 4.7442985 (163)	total: 2m 42s	remaining: 5m 32s
164:	learn: 3.6393735	test: 4.7436943	best: 4.7436943 (164)	total: 2m 43s	remaining: 5m 31s
165:	learn: 3.6220461	test: 4.7357905	best: 4.7357905 (165)	total: 2m 44s	remaining: 5m 30s
166:	learn: 3.6184456	test: 4.7357158	best: 4.7357158 (166)	total: 2m 45s	remaining: 5m 30s
167:	learn: 3.6126573	test: 4.7359462	best: 4.7357158 (166)	total: 2m 47s	remaining: 5m 30s
168:	learn: 3.6095318	test: 4.7356406	best: 4.7356406 (168)	total: 2m 48s	remaining: 5m 29s
169:	learn: 3.6086539	test: 4.7353545	best: 4.7353545 (169)	total: 2m 49s	remaining: 5m 28s
170:	learn: 3.6058565	test: 4.7346627	best: 4.7346627 (170)	total: 2m 50s	remaining: 5m 27s
171:	learn: 3.6040417	test: 4.7346775	best: 4.7346627 (170)	total: 2m 51s	remaining: 5m 26s
172:	learn: 3.6017898	test: 4.7336309	best: 4.7336309 (172)	total: 2m 52s	remaining: 5m 25s
173:	learn: 3.5993966	test: 4.7335609	best: 4.7335609 (173)	total: 2m 53s	remain

26:	learn: 4.9239755	test: 5.2498834	best: 5.2498834 (26)	total: 17.4s	remaining: 5m 5s
27:	learn: 4.9205884	test: 5.2465547	best: 5.2465547 (27)	total: 17.7s	remaining: 4m 58s
28:	learn: 4.9049661	test: 5.2417049	best: 5.2417049 (28)	total: 18.4s	remaining: 4m 58s
29:	learn: 4.8806002	test: 5.2327426	best: 5.2327426 (29)	total: 19.2s	remaining: 5m
30:	learn: 4.8421336	test: 5.2249730	best: 5.2249730 (30)	total: 20.1s	remaining: 5m 4s
31:	learn: 4.8287455	test: 5.2143753	best: 5.2143753 (31)	total: 20.8s	remaining: 5m 4s
32:	learn: 4.8128215	test: 5.2109984	best: 5.2109984 (32)	total: 21.8s	remaining: 5m 8s
33:	learn: 4.7870418	test: 5.2003112	best: 5.2003112 (33)	total: 22.8s	remaining: 5m 12s
34:	learn: 4.7668434	test: 5.1966386	best: 5.1966386 (34)	total: 23.8s	remaining: 5m 16s
35:	learn: 4.7435534	test: 5.1928014	best: 5.1928014 (35)	total: 24.8s	remaining: 5m 20s
36:	learn: 4.7272640	test: 5.1871121	best: 5.1871121 (36)	total: 25.8s	remaining: 5m 22s
37:	learn: 4.7181427	test: 5.

119:	learn: 3.8808714	test: 4.9874891	best: 4.9874891 (119)	total: 1m 53s	remaining: 5m 58s
120:	learn: 3.8781961	test: 4.9867695	best: 4.9867695 (120)	total: 1m 54s	remaining: 5m 58s
121:	learn: 3.8733691	test: 4.9864393	best: 4.9864393 (121)	total: 1m 55s	remaining: 5m 57s
122:	learn: 3.8702545	test: 4.9864545	best: 4.9864393 (121)	total: 1m 56s	remaining: 5m 57s
123:	learn: 3.8658024	test: 4.9847527	best: 4.9847527 (123)	total: 1m 58s	remaining: 5m 57s
124:	learn: 3.8620286	test: 4.9847417	best: 4.9847417 (124)	total: 1m 59s	remaining: 5m 57s
125:	learn: 3.8454258	test: 4.9807692	best: 4.9807692 (125)	total: 2m	remaining: 5m 56s
126:	learn: 3.8358458	test: 4.9803215	best: 4.9803215 (126)	total: 2m 1s	remaining: 5m 55s
127:	learn: 3.8336872	test: 4.9803243	best: 4.9803215 (126)	total: 2m 2s	remaining: 5m 55s
128:	learn: 3.8305056	test: 4.9799780	best: 4.9799780 (128)	total: 2m 3s	remaining: 5m 53s
129:	learn: 3.8226755	test: 4.9795150	best: 4.9795150 (129)	total: 2m 4s	remaining: 5m 

209:	learn: 3.3058990	test: 4.9269699	best: 4.9263507 (205)	total: 3m 25s	remaining: 4m 44s
210:	learn: 3.2961432	test: 4.9267838	best: 4.9263507 (205)	total: 3m 27s	remaining: 4m 43s

bestTest = 4.926350733
bestIteration = 205

Training on fold [2/3]
0:	learn: 7.1575187	test: 7.2176735	best: 7.2176735 (0)	total: 234ms	remaining: 1m 56s
1:	learn: 6.8837337	test: 6.9447631	best: 6.9447631 (1)	total: 256ms	remaining: 1m 3s
2:	learn: 6.5712801	test: 6.6347839	best: 6.6347839 (2)	total: 1.02s	remaining: 2m 49s
3:	learn: 6.3473987	test: 6.4111499	best: 6.4111499 (3)	total: 1.64s	remaining: 3m 23s
4:	learn: 6.1692529	test: 6.2411630	best: 6.2411630 (4)	total: 2.37s	remaining: 3m 54s
5:	learn: 6.0275369	test: 6.1186783	best: 6.1186783 (5)	total: 3.08s	remaining: 4m 13s
6:	learn: 5.9315582	test: 6.0326401	best: 6.0326401 (6)	total: 3.75s	remaining: 4m 24s
7:	learn: 5.8513602	test: 5.9632987	best: 5.9632987 (7)	total: 4.36s	remaining: 4m 28s
8:	learn: 5.7152717	test: 5.8338117	best: 5.8338117 (

90:	learn: 4.0843887	test: 4.9437546	best: 4.9437546 (90)	total: 1m 21s	remaining: 6m 5s
91:	learn: 4.0768799	test: 4.9443468	best: 4.9437546 (90)	total: 1m 22s	remaining: 6m 4s
92:	learn: 4.0745703	test: 4.9440508	best: 4.9437546 (90)	total: 1m 23s	remaining: 6m 3s
93:	learn: 4.0713875	test: 4.9427236	best: 4.9427236 (93)	total: 1m 23s	remaining: 6m 2s
94:	learn: 4.0586958	test: 4.9426958	best: 4.9426958 (94)	total: 1m 24s	remaining: 6m 1s
95:	learn: 4.0510016	test: 4.9376444	best: 4.9376444 (95)	total: 1m 25s	remaining: 6m 1s
96:	learn: 4.0452981	test: 4.9374038	best: 4.9374038 (96)	total: 1m 26s	remaining: 6m 1s
97:	learn: 4.0340909	test: 4.9361692	best: 4.9361692 (97)	total: 1m 27s	remaining: 6m
98:	learn: 4.0297038	test: 4.9362882	best: 4.9361692 (97)	total: 1m 29s	remaining: 6m
99:	learn: 4.0204460	test: 4.9340951	best: 4.9340951 (99)	total: 1m 30s	remaining: 6m
100:	learn: 3.9988367	test: 4.9307881	best: 4.9307881 (100)	total: 1m 31s	remaining: 6m
101:	learn: 3.9834083	test: 4.9

181:	learn: 3.4556699	test: 4.8737430	best: 4.8737430 (181)	total: 2m 48s	remaining: 4m 54s
182:	learn: 3.4463597	test: 4.8733179	best: 4.8733179 (182)	total: 2m 49s	remaining: 4m 53s
183:	learn: 3.4433606	test: 4.8732167	best: 4.8732167 (183)	total: 2m 50s	remaining: 4m 52s
184:	learn: 3.4411486	test: 4.8730302	best: 4.8730302 (184)	total: 2m 51s	remaining: 4m 51s
185:	learn: 3.4366949	test: 4.8731105	best: 4.8730302 (184)	total: 2m 52s	remaining: 4m 50s
186:	learn: 3.4354281	test: 4.8726467	best: 4.8726467 (186)	total: 2m 52s	remaining: 4m 49s
187:	learn: 3.4287411	test: 4.8723384	best: 4.8723384 (187)	total: 2m 53s	remaining: 4m 48s
188:	learn: 3.4251072	test: 4.8722492	best: 4.8722492 (188)	total: 2m 55s	remaining: 4m 48s
189:	learn: 3.4219233	test: 4.8719959	best: 4.8719959 (189)	total: 2m 56s	remaining: 4m 47s
190:	learn: 3.4154923	test: 4.8688880	best: 4.8688880 (190)	total: 2m 56s	remaining: 4m 46s
191:	learn: 3.4114639	test: 4.8689245	best: 4.8688880 (190)	total: 2m 58s	remain

CatBoostError: only one of the parameters od_wait, early_stopping_rounds should be initialized.