In [25]:
import os
import pandas as pd
from sklearn.model_selection import ShuffleSplit, KFold
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import datetime
import json
import os
import numpy as np
import xgboost
import catboost as cb

In [2]:
PATH_TO_DATA = 'dota2'
df_train_features = pd.read_csv(os.path.join(PATH_TO_DATA, 'train_features.csv'), index_col='match_id_hash')
df_train_targets = pd.read_csv(os.path.join(PATH_TO_DATA, 'train_targets.csv'), index_col='match_id_hash')

In [3]:
X = df_train_features.values
y = df_train_targets['radiant_win'].values

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=17)

In [7]:
X_train_scaled = StandardScaler().fit_transform(X_train)
X_test_scaled = StandardScaler().fit_transform(X_valid)

In [12]:
xgb_model = xgboost.XGBClassifier()
test_params = {
 "learning_rate"    : [0.05, 0.15,  0.30 ] ,
 "max_depth"        : [6, 8, 16],
 "min_child_weight" : [ 1, 5]
}
model = GridSearchCV(estimator = xgb_model,param_grid = test_params, n_jobs=-1, verbose=10)

In [14]:
model.fit(X_train_scaled,y_train)
print (model.best_params_)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 3 folds for each of 18 candidates, totalling 54 fits


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  8.7min
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 11.2min
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 16.0min
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 19.3min
[Parallel(n_jobs=-1)]: Done  54 out of  54 | elapsed: 25.2min finished


{'learning_rate': 0.05, 'max_depth': 8, 'min_child_weight': 1}


In [15]:
xgb_valid_pred = model.predict_proba(X_valid)[:, 1]
print('Validation ROC-AUC score:',roc_auc_score(y_valid, xgb_valid_pred))

Validation ROC-AUC score: 0.5936112712097714


In [16]:
df_test_features = pd.read_csv(os.path.join(PATH_TO_DATA, 'test_features.csv'), index_col='match_id_hash')
X_test_scaled = StandardScaler().fit_transform(df_test_features.values)
y_test_pred = model.predict_proba(X_test_scaled)[:, 1]
df_submission = pd.DataFrame({'radiant_win_prob': y_test_pred}, index=df_test_features.index)

In [17]:
submission_filename = 'xgb_{}.csv'.format(
    datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
df_submission.to_csv(submission_filename)
print('Submission saved to {}'.format(submission_filename))

Submission saved to xgb_2019-04-04_14-52-50.csv


In [34]:
y_train_scaled = y_train.astype('int')
y_valid_scaled = y_valid.astype('int')

In [33]:
params = {'depth': [4, 7, 10],
          'learning_rate' : [0.03, 0.1, 0.15],
         'l2_leaf_reg': [1,4,9],
         'iterations': [300]}
cbc = cb.CatBoostClassifier()
cb_model = GridSearchCV(cbc, params, scoring="roc_auc", cv = 3)
cbc.fit(X_train_scaled,y_train_scaled)

Learning rate set to 0.042921
0:	learn: 0.6864132	total: 358ms	remaining: 5m 58s
1:	learn: 0.6805733	total: 690ms	remaining: 5m 44s
2:	learn: 0.6749626	total: 1.03s	remaining: 5m 43s
3:	learn: 0.6692754	total: 1.36s	remaining: 5m 39s
4:	learn: 0.6645180	total: 1.68s	remaining: 5m 34s
5:	learn: 0.6600015	total: 2s	remaining: 5m 30s
6:	learn: 0.6555536	total: 2.32s	remaining: 5m 28s
7:	learn: 0.6514457	total: 2.63s	remaining: 5m 26s
8:	learn: 0.6479888	total: 2.96s	remaining: 5m 26s
9:	learn: 0.6444146	total: 3.29s	remaining: 5m 25s
10:	learn: 0.6405043	total: 3.64s	remaining: 5m 27s
11:	learn: 0.6370233	total: 3.97s	remaining: 5m 27s
12:	learn: 0.6339540	total: 4.3s	remaining: 5m 26s
13:	learn: 0.6314527	total: 4.62s	remaining: 5m 25s
14:	learn: 0.6287567	total: 4.96s	remaining: 5m 25s
15:	learn: 0.6267256	total: 5.28s	remaining: 5m 24s
16:	learn: 0.6244869	total: 5.61s	remaining: 5m 24s
17:	learn: 0.6222280	total: 5.93s	remaining: 5m 23s
18:	learn: 0.6200557	total: 6.25s	remaining: 5m 

157:	learn: 0.5385944	total: 49s	remaining: 4m 20s
158:	learn: 0.5384126	total: 49.2s	remaining: 4m 20s
159:	learn: 0.5381696	total: 49.5s	remaining: 4m 20s
160:	learn: 0.5379377	total: 49.8s	remaining: 4m 19s
161:	learn: 0.5376909	total: 50.2s	remaining: 4m 19s
162:	learn: 0.5374492	total: 50.5s	remaining: 4m 19s
163:	learn: 0.5373015	total: 50.8s	remaining: 4m 18s
164:	learn: 0.5370977	total: 51.1s	remaining: 4m 18s
165:	learn: 0.5369147	total: 51.5s	remaining: 4m 18s
166:	learn: 0.5366386	total: 51.8s	remaining: 4m 18s
167:	learn: 0.5364478	total: 52.1s	remaining: 4m 17s
168:	learn: 0.5362946	total: 52.3s	remaining: 4m 17s
169:	learn: 0.5360341	total: 52.6s	remaining: 4m 17s
170:	learn: 0.5358089	total: 52.9s	remaining: 4m 16s
171:	learn: 0.5356679	total: 53.2s	remaining: 4m 16s
172:	learn: 0.5354532	total: 53.5s	remaining: 4m 15s
173:	learn: 0.5353195	total: 53.8s	remaining: 4m 15s
174:	learn: 0.5351240	total: 54.1s	remaining: 4m 14s
175:	learn: 0.5350275	total: 54.4s	remaining: 4m

311:	learn: 0.5123944	total: 1m 34s	remaining: 3m 27s
312:	learn: 0.5122178	total: 1m 34s	remaining: 3m 27s
313:	learn: 0.5120930	total: 1m 34s	remaining: 3m 27s
314:	learn: 0.5119636	total: 1m 35s	remaining: 3m 26s
315:	learn: 0.5118303	total: 1m 35s	remaining: 3m 26s
316:	learn: 0.5116610	total: 1m 35s	remaining: 3m 26s
317:	learn: 0.5114568	total: 1m 36s	remaining: 3m 25s
318:	learn: 0.5112674	total: 1m 36s	remaining: 3m 25s
319:	learn: 0.5111424	total: 1m 36s	remaining: 3m 25s
320:	learn: 0.5110193	total: 1m 36s	remaining: 3m 24s
321:	learn: 0.5108415	total: 1m 37s	remaining: 3m 24s
322:	learn: 0.5107913	total: 1m 37s	remaining: 3m 24s
323:	learn: 0.5106710	total: 1m 37s	remaining: 3m 23s
324:	learn: 0.5105454	total: 1m 38s	remaining: 3m 23s
325:	learn: 0.5103542	total: 1m 38s	remaining: 3m 23s
326:	learn: 0.5101846	total: 1m 38s	remaining: 3m 22s
327:	learn: 0.5100282	total: 1m 38s	remaining: 3m 22s
328:	learn: 0.5098429	total: 1m 39s	remaining: 3m 22s
329:	learn: 0.5097518	total:

465:	learn: 0.4938300	total: 2m 16s	remaining: 2m 36s
466:	learn: 0.4936857	total: 2m 16s	remaining: 2m 36s
467:	learn: 0.4936042	total: 2m 17s	remaining: 2m 35s
468:	learn: 0.4935439	total: 2m 17s	remaining: 2m 35s
469:	learn: 0.4934590	total: 2m 17s	remaining: 2m 35s
470:	learn: 0.4933400	total: 2m 18s	remaining: 2m 35s
471:	learn: 0.4933216	total: 2m 18s	remaining: 2m 34s
472:	learn: 0.4932124	total: 2m 18s	remaining: 2m 34s
473:	learn: 0.4931027	total: 2m 18s	remaining: 2m 34s
474:	learn: 0.4929563	total: 2m 19s	remaining: 2m 33s
475:	learn: 0.4928235	total: 2m 19s	remaining: 2m 33s
476:	learn: 0.4927036	total: 2m 19s	remaining: 2m 33s
477:	learn: 0.4925463	total: 2m 19s	remaining: 2m 32s
478:	learn: 0.4924145	total: 2m 20s	remaining: 2m 32s
479:	learn: 0.4922838	total: 2m 20s	remaining: 2m 32s
480:	learn: 0.4922145	total: 2m 20s	remaining: 2m 31s
481:	learn: 0.4920711	total: 2m 21s	remaining: 2m 31s
482:	learn: 0.4919431	total: 2m 21s	remaining: 2m 31s
483:	learn: 0.4918530	total:

618:	learn: 0.4807769	total: 2m 59s	remaining: 1m 50s
619:	learn: 0.4807025	total: 2m 59s	remaining: 1m 49s
620:	learn: 0.4806286	total: 2m 59s	remaining: 1m 49s
621:	learn: 0.4806023	total: 3m	remaining: 1m 49s
622:	learn: 0.4805225	total: 3m	remaining: 1m 49s
623:	learn: 0.4804617	total: 3m	remaining: 1m 48s
624:	learn: 0.4804218	total: 3m	remaining: 1m 48s
625:	learn: 0.4803196	total: 3m 1s	remaining: 1m 48s
626:	learn: 0.4801844	total: 3m 1s	remaining: 1m 47s
627:	learn: 0.4800666	total: 3m 1s	remaining: 1m 47s
628:	learn: 0.4800000	total: 3m 2s	remaining: 1m 47s
629:	learn: 0.4799713	total: 3m 2s	remaining: 1m 47s
630:	learn: 0.4799292	total: 3m 2s	remaining: 1m 46s
631:	learn: 0.4797970	total: 3m 2s	remaining: 1m 46s
632:	learn: 0.4797500	total: 3m 3s	remaining: 1m 46s
633:	learn: 0.4797283	total: 3m 3s	remaining: 1m 45s
634:	learn: 0.4795878	total: 3m 3s	remaining: 1m 45s
635:	learn: 0.4794777	total: 3m 4s	remaining: 1m 45s
636:	learn: 0.4793771	total: 3m 4s	remaining: 1m 45s
63

771:	learn: 0.4699521	total: 3m 41s	remaining: 1m 5s
772:	learn: 0.4698793	total: 3m 41s	remaining: 1m 5s
773:	learn: 0.4698089	total: 3m 41s	remaining: 1m 4s
774:	learn: 0.4696775	total: 3m 42s	remaining: 1m 4s
775:	learn: 0.4695994	total: 3m 42s	remaining: 1m 4s
776:	learn: 0.4695497	total: 3m 42s	remaining: 1m 3s
777:	learn: 0.4695087	total: 3m 42s	remaining: 1m 3s
778:	learn: 0.4694798	total: 3m 43s	remaining: 1m 3s
779:	learn: 0.4693316	total: 3m 43s	remaining: 1m 3s
780:	learn: 0.4692983	total: 3m 43s	remaining: 1m 2s
781:	learn: 0.4692682	total: 3m 43s	remaining: 1m 2s
782:	learn: 0.4692148	total: 3m 44s	remaining: 1m 2s
783:	learn: 0.4691752	total: 3m 44s	remaining: 1m 1s
784:	learn: 0.4690923	total: 3m 44s	remaining: 1m 1s
785:	learn: 0.4689812	total: 3m 44s	remaining: 1m 1s
786:	learn: 0.4688639	total: 3m 45s	remaining: 1m
787:	learn: 0.4688032	total: 3m 45s	remaining: 1m
788:	learn: 0.4687839	total: 3m 45s	remaining: 1m
789:	learn: 0.4687019	total: 3m 46s	remaining: 1m
790:	

928:	learn: 0.4597451	total: 4m 25s	remaining: 20.3s
929:	learn: 0.4596361	total: 4m 25s	remaining: 20s
930:	learn: 0.4596103	total: 4m 25s	remaining: 19.7s
931:	learn: 0.4596037	total: 4m 26s	remaining: 19.4s
932:	learn: 0.4595584	total: 4m 26s	remaining: 19.1s
933:	learn: 0.4594806	total: 4m 26s	remaining: 18.8s
934:	learn: 0.4594704	total: 4m 26s	remaining: 18.6s
935:	learn: 0.4593749	total: 4m 27s	remaining: 18.3s
936:	learn: 0.4593579	total: 4m 27s	remaining: 18s
937:	learn: 0.4592770	total: 4m 27s	remaining: 17.7s
938:	learn: 0.4592404	total: 4m 28s	remaining: 17.4s
939:	learn: 0.4592164	total: 4m 28s	remaining: 17.1s
940:	learn: 0.4591923	total: 4m 28s	remaining: 16.8s
941:	learn: 0.4591424	total: 4m 29s	remaining: 16.6s
942:	learn: 0.4591346	total: 4m 29s	remaining: 16.3s
943:	learn: 0.4590728	total: 4m 29s	remaining: 16s
944:	learn: 0.4590026	total: 4m 30s	remaining: 15.7s
945:	learn: 0.4589662	total: 4m 30s	remaining: 15.4s
946:	learn: 0.4589430	total: 4m 30s	remaining: 15.2s

<catboost.core.CatBoostClassifier at 0x1e358240>

In [35]:
cb_valid_pred = cbc.predict_proba(X_valid)[:, 1]
print('Validation ROC-AUC score:',roc_auc_score(y_valid_scaled, cb_valid_pred))

Validation ROC-AUC score: 0.5714109157765983


In [36]:
y_test_pred_cb = cbc.predict_proba(X_test_scaled)[:, 1]
df_submission = pd.DataFrame({'radiant_win_prob': y_test_pred_cb}, index=df_test_features.index)
submission_filename = 'cb_{}.csv'.format(
    datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
df_submission.to_csv(submission_filename)
print('Submission saved to {}'.format(submission_filename))

Submission saved to cb_2019-04-04_16-33-02.csv
