In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold

from sklearn.cross_validation import cross_val_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Lasso, Ridge

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
def normalize_data(X):
    return pd.DataFrame(
        StandardScaler().fit_transform(X),
        index = X.index, 
        columns=X.columns
    )

def logEstimation(X, y):
    grid = {'C': np.power(10.0, np.arange(-5, 1))}
    kf=KFold(y.size, n_folds=5, shuffle=True, random_state=241)
    clf=LogisticRegression(random_state=241)
    gs = GridSearchCV(clf, grid, scoring='roc_auc', cv=kf)
    gs.fit(X, y)
    return gs

In [22]:
train = pd.read_csv('processing_tables/train_goldScore_heroes_items_exp_creeps.csv')
test = pd.read_csv('processing_tables/test_goldScore_heroes_items_exp_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

In [23]:
train = pd.merge(train, events, on='mid', how='left')
test = pd.merge(test, events, on='mid', how='left')

X_train = train.drop(['radiant_won', 'mid'], 1)
y_train = train.radiant_won

X_test = test.drop('mid', 1)



In [24]:
clf = Lasso(alpha=0.00015, max_iter=12000, selection='random')
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict(X_test)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.73865
1,7,0.5663
2,9,0.203888
3,10,0.408726
4,12,0.495797


In [27]:
X_train.head()

Unnamed: 0,gold_score_player_0,gold_score_player_1,gold_score_player_2,gold_score_player_3,gold_score_player_4,gold_score_player_5,gold_score_player_6,gold_score_player_7,gold_score_player_8,gold_score_player_9,...,radiant_destroy_barracks,dire_destroy_barracks,radiant_make_fb,dire_make_fb,radiant_kill_roshan,dire_kill_roshan,radiant_denay_tower,dire_denay_tower,radiant_destroy_tower,dire_destroy_tower
0,1.217054,1.415443,0.948802,0.788948,1.318744,0.846922,1.231741,1.162932,0.595855,0.990382,...,0,0,1,0,0,0,0,0,0,0
1,0.855613,1.220598,1.094664,0.881353,1.262385,1.188179,1.217203,0.968288,1.017415,0.961522,...,0,0,1,0,0,0,0,0,0,0
2,0.868434,0.697458,1.929615,0.944899,0.851335,0.851719,0.71371,1.054471,0.564103,0.867038,...,0,0,0,1,0,0,0,0,0,0
3,0.838467,0.731373,1.123121,0.65798,0.540907,1.073678,0.96121,0.760368,0.977203,0.828358,...,0,0,0,1,0,0,0,0,0,0
4,0.974335,0.743297,0.614883,0.97285,0.729776,1.03552,1.465198,1.105064,1.171903,1.045423,...,0,0,0,0,0,0,0,0,0,0


In [28]:
for i, value in enumerate(clf.coef_):
    print X_train.columns[i], value

gold_score_player_0 -0.0193374765336
gold_score_player_1 -0.0
gold_score_player_2 -0.00492556882044
gold_score_player_3 0.0
gold_score_player_4 -0.0365537318877
gold_score_player_5 0.125504733181
gold_score_player_6 -0.0
gold_score_player_7 0.00235440849061
gold_score_player_8 0.0
gold_score_player_9 -0.0129654579087
radiant_norm_gold 0.0286605736428
dire_norm_gold -0.125263697105
radiant_carry_norm_gold 0.0146389342716
dire_carry_norm_gold -0.0115741955482
radiant_best_gold_score -0.0
dire_best_gold_score -0.0499507057476
radiant_sum_gold_score -0.0296364177035
dire_sum_gold_score 0.102776897454
hero_0 -0.0261207419824
hero_1 -0.0145097319194
hero_2 -0.0368115733571
hero_3 0.0201734413522
hero_4 -0.0626194559475
hero_5 0.0817806849046
hero_6 0.10096438981
hero_7 0.0611317661811
hero_8 -0.0174288707455
hero_9 -0.0673613728344
hero_10 -0.0212399413663
hero_11 -0.0373738713366
hero_12 0.0531356164208
hero_13 -0.064697100252
hero_14 -0.0
hero_15 -0.0662767342454
hero_16 0.05526595557
hero

In [21]:
test_matches.to_csv('submissions/all_data-8.csv', index=None)

In [69]:
train.head().ix[:,12:18]

Unnamed: 0,radiant_norm_gold,dire_norm_gold,radiant_carry_norm_gold,dire_carry_norm_gold,radiant_best_gold_score,dire_best_gold_score
0,0.524044,0.801772,0.007513,0.665735,1.415443,1.231741
1,0.796655,1.744066,0.012183,1.882619,1.262385,1.217203
2,0.500272,-1.725554,2.64668,-0.531107,1.929615,1.054471
3,-0.785714,-0.978204,-0.553753,-1.172477,1.123121,0.977203
4,-1.560604,0.780926,-1.384913,-0.283913,1.03552,1.465198


In [81]:
train.head().ix[:,371:399]

Unnamed: 0,exp_player_0,exp_player_1,exp_player_2,exp_player_3,exp_player_4,exp_player_5,exp_player_6,exp_player_7,exp_player_8,exp_player_9,...,xp_score_player_4,xp_score_player_5,xp_score_player_6,xp_score_player_7,xp_score_player_8,xp_score_player_9,radiant_best_xp_score,dire_best_xp_score,radiant_sum_xp_score,dire_sum_xp_score
0,-1.2745,1.162958,-1.302607,1.270762,0.733066,0.597751,0.555076,1.665819,-2.495594,-0.628572,...,1.252376,0.979085,1.483068,1.105635,0.165278,0.61088,1.426583,1.483068,0.644328,-0.929431
1,-1.414502,1.325397,-0.980861,-0.54998,1.638381,1.30684,0.439235,-0.733371,0.335357,0.593712,...,1.293011,1.174909,1.019756,1.065908,1.115372,0.957386,1.293011,1.293011,0.003595,0.179923
2,-0.224488,-0.964361,2.891867,-0.339754,1.139497,-0.640619,-1.385491,0.156859,-0.165589,-0.555142,...,1.46262,0.843992,0.599844,0.935352,0.812252,1.136832,1.934823,1.934823,1.708594,0.125583
3,-0.81904,0.448039,1.642045,0.18307,-1.375065,0.254733,-1.02036,-0.442476,0.22197,-1.021748,...,0.538298,1.273792,0.679902,0.765532,0.881989,0.922339,1.273792,1.273792,-0.331204,-1.379719
4,-0.432672,-0.923981,-1.226475,0.40335,-1.328381,1.382452,0.26501,0.666618,-0.460209,-0.512386,...,0.662305,1.133416,1.101094,1.126272,1.068821,0.905163,1.133416,1.133416,-1.211915,-0.01618


In [85]:
train.head().ix[:,399:415]

Unnamed: 0,creeps_player_0,creeps_player_1,creeps_player_2,creeps_player_3,creeps_player_4,creeps_player_5,creeps_player_6,creeps_player_7,creeps_player_8,creeps_player_9,radiant_creeps,dire_creeps,radiant_carry_creeps,dire_carry_creeps,creeps_diff,creeps_rel
0,-1.243959,0.839247,-1.237188,1.563027,0.759367,0.323709,0.435091,2.505723,-1.301075,1.028664,0.451955,1.926818,0.400137,1.788257,-1.183,0.770833
1,-1.191912,1.878427,-0.668118,0.060755,1.072314,1.106572,-0.191645,-1.244911,0.167004,0.660455,0.763204,0.314862,0.860007,-0.189175,0.338896,1.090278
2,0.317441,-1.083236,2.177236,0.78599,-0.179471,-1.137635,-0.766154,1.515972,-0.72433,-0.759781,1.35112,-1.196346,1.319876,0.34321,1.994292,1.757576
3,0.109254,0.735329,0.935627,-0.198257,-1.170468,-0.093818,-0.922838,-0.203068,0.534024,-1.075389,0.279038,-1.129181,-0.519603,-1.101837,1.113194,1.415842
4,0.733814,-1.135195,-0.56465,0.16436,-1.222626,0.741236,-0.139417,1.359696,-0.776761,-0.654578,-1.346376,0.348445,-0.749538,0.115045,-1.316499,0.662069


In [7]:
clf = Ridge(alpha=0.0001)
np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

0.76480789888998779

In [8]:
clf = Lasso(alpha=0.0001, max_iter=6000)
np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

0.76620202717209263

In [67]:
train.ix[:5,2:7]

Unnamed: 0,gold_score_player_0,gold_score_player_1,gold_score_player_2,gold_score_player_3,gold_score_player_4
0,1.217054,1.415443,0.948802,0.788948,1.318744
1,0.855613,1.220598,1.094664,0.881353,1.262385
2,0.868434,0.697458,1.929615,0.944899,0.851335
3,0.838467,0.731373,1.123121,0.65798,0.540907
4,0.974335,0.743297,0.614883,0.97285,0.729776
5,1.561157,1.068639,0.903588,0.926351,1.222453


In [60]:
train.ix[:5,2:7].sum(1)

0    5.688992
1    5.314613
2    5.291741
3    3.891847
4    4.035142
5    5.682188
dtype: float64