In [1]:
import time
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.ensemble import GradientBoostingClassifier as GB
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score

In [2]:
dota_train = pd.read_csv('features.csv', index_col='match_id')
dota_test = pd.read_csv('features_test.csv', index_col='match_id')

In [28]:
dota_train.head()

Unnamed: 0_level_0,start_time,lobby_type,r1_hero,r1_level,r1_xp,r1_gold,r1_lh,r1_kills,r1_deaths,r1_items,...,dire_boots_count,dire_ward_observer_count,dire_ward_sentry_count,dire_first_ward_time,duration,radiant_win,tower_status_radiant,tower_status_dire,barracks_status_radiant,barracks_status_dire
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1430198770,7,11,5,2098,1489,20,0,0,7,...,4,2,2,-52.0,2874,1,1796,0,51,0
1,1430220345,0,42,4,1188,1033,9,0,1,12,...,4,3,1,-5.0,2463,1,1974,0,63,1
2,1430227081,7,33,4,1319,1270,22,0,0,12,...,4,3,1,13.0,2130,0,0,1830,0,63
3,1430263531,1,29,4,1779,1056,14,0,0,5,...,4,2,0,27.0,1459,0,1920,2047,50,63
4,1430282290,7,13,4,1431,1090,8,1,0,8,...,3,3,0,-16.0,2449,0,4,1974,3,63


In [49]:
dota_train['r1_hero'].dtype

dtype('int64')

### Removing columns, which can result in leakage

In [3]:
X = pd.DataFrame(dota_train.drop(['radiant_win','duration','tower_status_radiant',\
                 'tower_status_dire','barracks_status_radiant','barracks_status_dire'],\
                    axis=1))

### Finding cols with missing values

In [4]:
for i in dota_train.columns:
    print
    #print(f'{i}: {dota_train[i].isnull().sum()/dota_train.shape[0] :.2f}% data is lost')
    #if dota_train[i].isnull().sum()>0:
    #    print(f"\'{i}\'",end=',')

In [5]:
cols_with_missing = ['first_blood_time','first_blood_team','first_blood_player1',\
                     'first_blood_player2','radiant_bottle_time','radiant_courier_time',\
                     'radiant_flying_courier_time','radiant_first_ward_time',\
                     'dire_bottle_time','dire_courier_time','dire_flying_courier_time',\
                     'dire_first_ward_time']

Если событие "первая кровь" не успело произойти за первые 5 минут, то признаки принимают пропущенное значение.
Поэтому признаки 'first_blood_time','first_blood_team','first_blood_player1' имеют 0.20% 
значений. Признак 'first_blood_player2' имеет 0.45% пропущенных значений скорее всего из-за того, что "первую кровь" пустил только один игрок, без помощи других.

### Replace missing values

According to the task we will fill the missing values with zeros. Later we can do some hyperparameter optimization

In [6]:
X.fillna(0,inplace = True)

### Target column

We predict who will win, so we need to label 'radiant_win'

### Prediction with Gradient Boosting

In [7]:
y = dota_train['radiant_win']
kfold = KFold(n_splits=5, shuffle = True)

In [59]:
for i in [10,20,30]:

    clf = GB(n_estimators=i)
    start_time = datetime.datetime.now()
    mean_score = cross_val_score(clf, X, y, scoring="roc_auc", cv = kfold).mean()
    #results = cross_val_score(clf, X, y, cv=kfold)
    #print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
    print ('Time elapsed:', datetime.datetime.now() - start_time)
    print(mean_score)

Time elapsed: 0:01:09.984544
0.665306650864717
Time elapsed: 0:02:26.949287
0.6814140908787021
Time elapsed: 0:03:41.682899
0.6896463600745194


Now we need to use AUC ROC, and probably decrease processing time.
For a normal cross-validation:<br>
10 algos Time elapsed: 0:01:09.984544 Acc:0.665306650864717<br>
20 algos Time elapsed: 0:02:26.949287 Acc:0.6814140908787021<br>
30 algos Time elapsed: 0:03:41.682899 Acc:0.6896463600745194

# Logistic Regression prediction

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [55]:
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X))

In [56]:
X_test_scaled = pd.DataFrame(scaler.transform(dota_test))

### Finding the accuracy by ROC

In [11]:
from sklearn.linear_model import RidgeClassifier

In [12]:
for i in [1e-5, 1e-4, 5e-5,6e-5]:
    start_time = datetime.datetime.now()
    model = RidgeClassifier(alpha=i)
    mean_score = cross_val_score(model, X_scaled, y, scoring="roc_auc", cv = kfold).mean()
    print(i)
    print ('Time elapsed:', datetime.datetime.now() - start_time)
    print(mean_score)

1e-05
Time elapsed: 0:00:02.821758
0.7164126316007362
0.0001
Time elapsed: 0:00:01.603752
0.7161284359805912
5e-05
Time elapsed: 0:00:01.647683
0.7164183067582307
6e-05
Time elapsed: 0:00:01.585853
0.716576002325031


In [28]:
100*(0.7164579771021937 - 0.6896463600745194)/0.6896463600745194

3.8877341460596124

In [30]:
X_scaled.shape

(97230, 102)

### Dropping nominal features

In [13]:
drop_columns = ['lobby_type','r1_hero', 'r2_hero', 'r3_hero','r4_hero', 'r5_hero', 'd1_hero', 'd2_hero',\
                'd3_hero','d4_hero',\
                    'd5_hero']

In [57]:
X_drop = pd.DataFrame(X.drop(drop_columns,axis=1))
X_test_drop = pd.DataFrame(dota_test.drop(drop_columns,axis=1))

In [58]:
scaler2 = StandardScaler()
X_scaled_drop = pd.DataFrame(scaler2.fit_transform(X_drop),columns = X_drop.columns)  #saving the names of columns
X_test_scaled_drop = pd.DataFrame(scaler2.transform(X_test_drop),columns = X_test_drop.columns)

In [44]:
for i in [1e-5, 1e-4, 5e-5,6e-5]:
    start_time = datetime.datetime.now()
    model = RidgeClassifier(alpha=i)
    mean_score = cross_val_score(model, X_scaled_drop, y, scoring="roc_auc", cv = kfold).mean()
    print(i)
    print ('Time elapsed:', datetime.datetime.now() - start_time)
    print(mean_score)

1e-05
Time elapsed: 0:00:02.440072
0.716586382786688
0.0001
Time elapsed: 0:00:01.838245
0.7164035458744885
5e-05
Time elapsed: 0:00:02.605598
0.7164924852707244
6e-05
Time elapsed: 0:00:02.210787
0.7165119301661695


### How many different heroes do we have?

In [59]:
heroes = ['r1_hero', 'r2_hero', 'r3_hero','r4_hero', 'r5_hero', 'd1_hero', 'd2_hero',\
                'd3_hero','d4_hero',\
                    'd5_hero']
#for i in heroes:
np.unique(dota_test[heroes].values)

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  25,  26,  27,
        28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
        54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
        67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
        80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,
        93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105,
       106, 109, 110, 112])

112 heroes

### Create bag of words

In [22]:
# 112 — количество различных героев в выборке
X_pick = np.zeros((X.shape[0], 112))

for i, match_id in enumerate(X.index):
    for p in range(5):
        X_pick[i, X.loc[match_id, 'r%d_hero' % (p+1)]-1] = 1
        X_pick[i, X.loc[match_id, 'd%d_hero' % (p+1)]-1] = -1

In [60]:
# 112 — количество различных героев в выборке
X_test_pick = np.zeros((dota_test.shape[0], 112))

for i, match_id in enumerate(dota_test.index):
    for p in range(5):
        X_test_pick[i, dota_test.loc[match_id, 'r%d_hero' % (p+1)]-1] = 1
        X_test_pick[i, dota_test.loc[match_id, 'd%d_hero' % (p+1)]-1] = -1

In [61]:
X_pick=pd.DataFrame(X_pick)
X_pick.rename(columns=lambda x: x+ 1, inplace=True)

X_test_pick=pd.DataFrame(X_test_pick)
X_test_pick.rename(columns=lambda x: x+ 1, inplace=True)
#X_pick.head()
#X_pick[109].value_counts()

In [36]:
X_for_bag = X_scaled_drop.copy()
X_for_bag.reset_index(drop=True, inplace=True)
X_pick.reset_index(drop=True, inplace=True)

In [62]:
X_test_for_bag = X_test_scaled_drop.copy()
X_test_for_bag.reset_index(drop=True, inplace=True)
X_test_pick.reset_index(drop=True, inplace=True)

In [63]:
X_test_for_bag = pd.concat([X_test_for_bag,X_test_pick],axis=1)

In [39]:
pd.set_option("display.max_columns", None)
X_for_bag.head()

Unnamed: 0,start_time,r1_level,r1_xp,r1_gold,r1_lh,r1_kills,r1_deaths,r1_items,r2_level,r2_xp,r2_gold,r2_lh,r2_kills,r2_deaths,r2_items,r3_level,r3_xp,r3_gold,r3_lh,r3_kills,r3_deaths,r3_items,r4_level,r4_xp,r4_gold,r4_lh,r4_kills,r4_deaths,r4_items,r5_level,r5_xp,r5_gold,r5_lh,r5_kills,r5_deaths,r5_items,d1_level,d1_xp,d1_gold,d1_lh,d1_kills,d1_deaths,d1_items,d2_level,d2_xp,d2_gold,d2_lh,d2_kills,d2_deaths,d2_items,d3_level,d3_xp,d3_gold,d3_lh,d3_kills,d3_deaths,d3_items,d4_level,d4_xp,d4_gold,d4_lh,d4_kills,d4_deaths,d4_items,d5_level,d5_xp,d5_gold,d5_lh,d5_kills,d5_deaths,d5_items,first_blood_time,first_blood_team,first_blood_player1,first_blood_player2,radiant_bottle_time,radiant_courier_time,radiant_flying_courier_time,radiant_tpscroll_count,radiant_boots_count,radiant_ward_observer_count,radiant_ward_sentry_count,radiant_first_ward_time,dire_bottle_time,dire_courier_time,dire_flying_courier_time,dire_tpscroll_count,dire_boots_count,dire_ward_observer_count,dire_ward_sentry_count,dire_first_ward_time,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112
0,-2.544364,1.400808,1.525972,0.734957,0.969743,-0.537757,-0.578083,-0.509023,-0.332256,-0.625222,-0.255162,-0.05258,-0.533149,-0.582396,-1.756715,1.50723,1.308568,0.095346,-0.037204,-0.529081,-0.578154,-0.112095,-0.282744,-0.72983,-0.757388,-0.446505,-0.525343,-0.579877,-0.525183,-0.27763,-0.771379,-0.935772,-0.669994,-0.524078,1.052453,1.115474,-0.417344,-0.322641,-0.33765,0.082842,-0.548801,-0.564562,-0.928921,0.561134,-0.208412,-0.272358,0.173184,-0.540261,-0.569909,-1.774695,1.488903,1.558823,0.961587,1.416964,-0.538253,-0.571065,-0.947053,-0.311205,-0.78558,-1.312874,-0.905651,-0.53886,-0.56754,-0.13169,-0.316231,-0.39826,-0.192406,-0.794164,0.976866,-0.565289,0.278256,-0.849902,1.182703,1.681514,-0.788219,0.378538,-0.029781,0.876751,-0.514555,-1.13697,-0.532046,-0.987486,1.066448,-0.041743,-0.262922,0.640648,0.018054,0.562864,-0.551154,1.846004,-1.121494,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-2.540452,0.501314,-0.080139,-0.24757,-0.246859,-0.537757,1.017574,1.49293,0.578881,0.732454,-0.250795,-0.05258,-0.533149,1.019572,-0.524101,0.59158,0.582307,0.884835,0.856569,1.014674,-0.578154,-0.52191,-0.282744,-0.889558,-1.001642,-0.334372,-0.525343,-0.579877,-0.525183,-1.190175,-1.344598,-1.198371,-1.005251,-0.524078,-0.573665,-1.352836,1.392776,1.286498,0.507465,0.526938,-0.548801,-0.564562,-0.11989,-0.357152,-1.015865,-1.20514,-1.064184,-0.540261,1.072422,-1.362973,-0.347705,-0.859912,0.548265,-0.942193,2.506742,-0.571065,1.517711,-1.232681,-1.337375,-1.127777,-1.131068,-0.53886,-0.56754,-0.543653,0.599652,0.531154,1.183531,1.570177,-0.540155,-0.565289,0.278256,-0.287631,1.182703,1.050584,-0.788219,0.912215,-0.029781,-1.525446,-0.514555,-2.86946,-0.532046,-0.987486,-0.338591,0.578946,-0.262922,0.379585,1.066668,0.562864,0.67817,0.437788,0.043947,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-2.539231,0.501314,0.15107,0.263085,1.190944,-0.537757,-0.578083,1.49293,-0.332256,0.224676,-0.726779,-0.49841,-0.533149,-0.582396,-0.934972,-0.32407,0.20566,-0.419251,-1.154419,1.014674,-0.578154,-0.931725,1.547927,2.179771,2.251377,1.796163,1.030097,1.043031,-0.937075,-0.27763,0.427498,1.202532,1.900308,-0.524078,-0.573665,0.292704,1.392776,1.901968,1.910183,0.86001,0.925434,1.076171,0.689141,-0.357152,-0.319096,-0.332322,0.9606,-0.540261,1.072422,0.695638,-0.347705,-0.796356,-1.073911,-0.829853,-0.538253,-0.571065,-0.125465,0.61027,0.268676,-0.940451,-0.680234,-0.53886,-0.56754,-0.543653,-0.316231,0.313323,0.939019,1.682765,-0.540155,-0.565289,-0.543829,1.746117,-0.845521,-0.211277,-0.788219,-0.593027,-0.146503,-1.525446,-0.514555,1.461765,-0.532046,0.391203,-0.823968,-0.824352,0.158654,0.640648,0.018054,0.562864,0.67817,0.437788,0.490286,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-2.532622,0.501314,0.96295,-0.198013,0.306142,-0.537757,-0.578083,-1.309804,-1.243393,-1.170813,-1.242065,-1.055698,-0.533149,-0.582396,-0.934972,1.50723,1.539241,0.08655,0.521404,-0.529081,-0.578154,-0.931725,-1.198079,-1.031136,-1.294746,-1.119305,-0.525343,-0.579877,-0.937075,-0.27763,-0.807544,-0.015573,0.224024,-0.524078,-0.573665,-0.941451,1.392776,1.140213,0.050057,0.637962,-0.548801,-0.564562,-0.928921,-0.357152,-0.848931,0.798121,1.298063,-0.540261,-0.569909,0.695638,0.570599,0.885136,-0.11616,0.068874,-0.538253,-0.571065,-0.536259,-1.232681,-0.909825,-1.230361,-1.018359,-0.53886,-0.56754,-0.543653,-1.232114,-1.211497,-1.312717,-1.131927,-0.540155,-0.565289,-0.543829,-0.933645,-0.845521,-1.157673,-0.788219,1.391156,0.262024,-1.525446,-1.54907,-0.270725,-0.532046,-0.987486,-0.594053,0.241615,-0.022021,0.269135,-1.554868,0.562864,-0.551154,-0.970428,0.837439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-2.529221,0.501314,0.348745,-0.124754,-0.357459,0.968527,-0.578083,-0.108632,-1.243393,-1.008757,-1.21368,-1.167155,-0.533149,1.019572,-0.524101,-0.32407,-0.538623,-0.379666,-1.154419,1.014674,-0.578154,-0.112095,-0.282744,-0.424893,0.792515,0.674829,1.030097,-0.579877,1.122386,0.634915,0.584817,-0.445882,-0.334737,-0.524078,-0.573665,-0.118681,-0.417344,-0.954166,-1.230685,-1.138424,-0.548801,2.716903,0.284625,-0.357152,-0.055993,1.23564,1.073087,0.976313,-0.569909,-0.539528,-0.347705,-0.251596,-1.033912,-1.054534,-0.538253,-0.571065,0.285329,1.531746,1.174935,0.415438,0.897683,-0.53886,-0.56754,-0.13169,0.599652,-0.106003,-0.412467,-0.456401,-0.540155,1.085005,-0.543829,-1.184873,1.182703,0.735119,-0.788219,0.816427,-0.088142,0.256512,-1.031813,0.59552,-0.532046,-0.987486,1.347455,1.024223,-0.022021,0.680811,1.590976,-0.302485,0.67817,-0.970428,-0.228816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,-1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Below I'm trying to define if the rows in X_for_bag mean the same as X. <br>

In [105]:
for i in heroes:
    print(X[i].loc[X.index==0])

match_id
0    11
Name: r1_hero, dtype: int64
match_id
0    67
Name: r2_hero, dtype: int64
match_id
0    29
Name: r3_hero, dtype: int64
match_id
0    20
Name: r4_hero, dtype: int64
match_id
0    105
Name: r5_hero, dtype: int64
match_id
0    4
Name: d1_hero, dtype: int64
match_id
0    42
Name: d2_hero, dtype: int64
match_id
0    21
Name: d3_hero, dtype: int64
match_id
0    37
Name: d4_hero, dtype: int64
match_id
0    84
Name: d5_hero, dtype: int64


### Cross-validation

In [48]:
z = [] #list of accuracies
k = [] #list of regularization alphas
for i in np.linspace(1e-4, 1, num=10):
    start_time = datetime.datetime.now()
    model = RidgeClassifier(alpha=i)
    mean_score = cross_val_score(model, X_for_bag, y, scoring="roc_auc", cv = kfold).mean()
    #print(i)
    #print ('Time elapsed:', datetime.datetime.now() - start_time)
    z.append(mean_score)
    k.append(i)
    #print(mean_score)
for h,g in enumerate(z):
    if g == max(z):
        print(g,k[h])

0.7520568290141736 0.11120000000000001


### Test prediction of the winner

In [53]:
dota_test.fillna(0,inplace=True)

In [78]:
model = LogisticRegression(C = 1/(2*0.1112))
model.fit(X_for_bag,y)
pred = model.predict_proba(X_test_for_bag)[:, 1]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Possibility of Radiant win: <br>
0.00846 - минимальное значение<br>
0.996434 - максимальное значение

In [84]:
dota_test.head()

Unnamed: 0_level_0,start_time,lobby_type,r1_hero,r1_level,r1_xp,r1_gold,r1_lh,r1_kills,r1_deaths,r1_items,r2_hero,r2_level,r2_xp,r2_gold,r2_lh,r2_kills,r2_deaths,r2_items,r3_hero,r3_level,r3_xp,r3_gold,r3_lh,r3_kills,r3_deaths,r3_items,r4_hero,r4_level,r4_xp,r4_gold,r4_lh,r4_kills,r4_deaths,r4_items,r5_hero,r5_level,r5_xp,r5_gold,r5_lh,r5_kills,r5_deaths,r5_items,d1_hero,d1_level,d1_xp,d1_gold,d1_lh,d1_kills,d1_deaths,d1_items,d2_hero,d2_level,d2_xp,d2_gold,d2_lh,d2_kills,d2_deaths,d2_items,d3_hero,d3_level,d3_xp,d3_gold,d3_lh,d3_kills,d3_deaths,d3_items,d4_hero,d4_level,d4_xp,d4_gold,d4_lh,d4_kills,d4_deaths,d4_items,d5_hero,d5_level,d5_xp,d5_gold,d5_lh,d5_kills,d5_deaths,d5_items,first_blood_time,first_blood_team,first_blood_player1,first_blood_player2,radiant_bottle_time,radiant_courier_time,radiant_flying_courier_time,radiant_tpscroll_count,radiant_boots_count,radiant_ward_observer_count,radiant_ward_sentry_count,radiant_first_ward_time,dire_bottle_time,dire_courier_time,dire_flying_courier_time,dire_tpscroll_count,dire_boots_count,dire_ward_observer_count,dire_ward_sentry_count,dire_first_ward_time
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1
6,1430287923,0,93,4,1103,1089,8,0,1,9,102,3,1183,963,9,0,2,9,52,5,2289,2043,20,2,0,6,53,3,917,1006,14,0,0,6,57,3,1344,933,3,0,0,9,26,3,1122,949,5,1,0,10,89,4,1847,1866,25,1,0,10,20,3,841,1015,2,1,0,9,29,3,1091,918,6,0,1,8,39,5,1674,1048,12,0,1,7,1.0,0.0,2.0,0.0,72.0,-82.0,0.0,2,2,3,0,12.0,247.0,-86.0,272.0,3,4,2,0,118.0
7,1430293357,1,20,2,556,570,1,0,0,9,6,4,1194,1386,21,0,0,7,65,4,1374,1064,13,0,1,8,92,2,774,737,0,1,0,8,43,5,2254,1787,24,0,0,6,54,3,909,970,16,0,0,5,34,4,1543,929,10,0,0,7,93,4,1485,1389,19,0,0,14,99,3,1075,718,3,0,1,7,5,4,1130,994,1,1,0,10,84.0,1.0,9.0,0.0,138.0,-82.0,233.0,2,6,2,2,-29.0,168.0,-54.0,0.0,3,2,2,1,16.0
10,1430301774,1,112,2,751,808,1,0,0,13,26,2,421,569,3,0,0,6,29,3,960,896,3,1,2,6,11,5,1877,1511,23,0,0,8,41,4,1581,1593,26,0,0,4,91,3,687,675,1,0,0,1,22,2,515,793,4,0,0,8,10,3,964,1022,12,0,0,8,49,5,2434,1917,22,0,0,9,54,4,1372,2005,24,2,1,11,168.0,1.0,9.0,0.0,33.0,-88.0,0.0,5,4,3,1,-22.0,46.0,-87.0,186.0,1,3,3,0,-34.0
13,1430323933,1,27,3,708,903,1,1,1,11,91,2,672,901,6,0,0,8,74,5,2141,1626,28,0,0,7,52,4,1579,2169,28,2,0,7,2,5,1960,1668,27,0,0,8,110,4,1255,704,5,0,1,6,25,2,490,648,1,0,1,9,77,4,1308,1355,22,0,0,7,62,2,536,692,0,1,1,6,49,4,1815,1424,18,0,0,5,55.0,0.0,3.0,0.0,208.0,-78.0,180.0,4,2,2,2,-49.0,30.0,-89.0,210.0,3,4,2,1,-26.0
16,1430331112,1,39,4,1259,661,4,0,0,9,93,5,1703,964,10,0,1,6,30,3,1099,942,2,1,0,6,85,3,1285,1013,10,0,0,8,102,2,484,609,4,0,0,8,14,4,2273,1680,16,1,0,6,64,2,511,541,1,0,0,4,70,4,1135,1399,19,0,0,10,3,3,623,520,1,0,0,10,96,5,1884,1409,15,0,1,11,285.0,1.0,5.0,0.0,225.0,-76.0,0.0,1,3,2,0,36.0,180.0,-86.0,180.0,1,3,2,1,-33.0


Can't upload to Kaggle

In [152]:
pred_pandas = pd.DataFrame(pred,dota_test.index)
#pred_pandas['match_id_hash'] = pred_pandas.index.tolist()
pred_pandas['radiant_win_prob'] = pred_pandas.loc[:,0]
pred_pandas.index.dtype

dtype('int64')

In [146]:
pred_pandas = pd.DataFrame(pred,dota_test.index)
pred_pandas['match_id'] = pred_pandas.index.tolist()
pred_pandas['radiant_win_prob'] = pred_pandas.loc[:,0]
pred_pandas = pred_pandas.reset_index(drop=True)

pred_pandas.drop(pred_pandas.columns[0],axis=1,inplace = True)
#pred_pandas.index = pred_pandas.index.map(str)
pred_pandas['match_id_hash'] = pred_pandas['match_id'].apply(lambda x:str(x))
pred_pandas.drop(['match_id'],axis=1,inplace =True)
pred_pandas.set_index('match_id_hash')
pred_pandas.index

RangeIndex(start=0, stop=17177, step=1)

In [137]:
import csv

In [141]:
#pred_pandas.dtypes.to_frame('types').to_csv('submission.csv')
#pred_pandas.to_csv('submission.csv',index=False)
pred_pandas.to_csv('submission.csv',index=False, quoting=csv.QUOTE_NONNUMERIC)
d = pd.read_csv('submission.csv')
d.dtypes

radiant_win_prob    float64
match_id_hash         int64
dtype: object