In [3]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold

from sklearn.cross_validation import cross_val_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Lasso, Ridge

%pylab inline

pd.set_option("display.max_rows", 15)
pd.set_option("display.max_columns", 150)
sns.set_style('whitegrid')
%config InlineBackend.figure_format = 'svg'


plt.rcParams['figure.figsize'] = (12.0, 5.0)

Populating the interactive namespace from numpy and matplotlib


In [4]:
def normalize_data(X):
    return pd.DataFrame(
        StandardScaler().fit_transform(X),
        index = X.index, 
        columns=X.columns
    )

def logEstimation(X, y):
    grid = {'C': np.power(10.0, np.arange(-5, 4))}
    kf=KFold(y.size, n_folds=5, shuffle=True, random_state=241)
    clf=LogisticRegression(random_state=241)
    gs = GridSearchCV(clf, grid, scoring='roc_auc', cv=kf)
    gs.fit(X, y)
    return gs

In [5]:
def logEstimation_linear(X, y):
    grid = {'C': [0.0001, 0.001, 0.01, 0.5, 0.1, 0.5, 1, 5, 10]}
    kf=KFold(y.size, n_folds=5, shuffle=True, random_state=241)
    clf=LogisticRegression(random_state=241)
    gs = GridSearchCV(clf, grid, scoring='roc_auc', cv=kf)
    gs.fit(X, y)
    return gs

In [6]:
def my_clf_cross_val(X_train, y_train, with_lasso = True): 
    clf=LogisticRegression(random_state=241, C=0.1)
    score = cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc')
    print 'logreg: ', np.mean(score), np.std(score)
    clf=Ridge(alpha=0.0001)
    score = cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc')
    print 'Ridge: ', np.mean(score), np.std(score)
    clf=Lasso(alpha=0.0001, max_iter=6000)
    if with_lasso:
        score = cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc')
        print 'Lasso: ', np.mean(score), np.std(score)

In [8]:
def easy_test():
    clf=Ridge(alpha=0.0001)
    print 'Ridge: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
    clf=Lasso(alpha=0.0001, max_iter=6000)
    print 'Lasso: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

In [7]:
def full_test():
    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=228)
    print 'RF: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
   
    gs = logEstimation(X_train, y_train)
    print gs.best_params_
    print('best score of log grid search: {}'.format(gs.best_score_))
    
    clf=LogisticRegression(random_state=241, C=gs.best_params_['C'])
    print 'logreg: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
    
    clf=Ridge(alpha=0.0001)
    print 'Ridge: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
    
    clf=Lasso(alpha=0.00015, max_iter=12000)
    print 'Lasso: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

In [9]:
def total_merge(gold, heroes, items, xp, creeps, events):
    train_matches = pd.read_csv('data/train.csv')
    test_matches = pd.read_csv('data/test.csv')
    
    train = pd.merge(train_matches, gold, on='mid', how='left')
    test = pd.merge(test_matches, gold, on='mid', how='left')
    train = pd.merge(train, heroes, on='mid', how='left')
    test = pd.merge(test, heroes, on='mid', how='left')
    train = pd.merge(train, items, on='mid', how='left')
    test = pd.merge(test, items, on='mid', how='left')
    train = pd.merge(train, xp, on='mid', how='left')
    test = pd.merge(test, xp, on='mid', how='left')
    train = pd.merge(train, creeps, on='mid', how='left')
    test = pd.merge(test, creeps, on='mid', how='left')
    train = pd.merge(train, events, on='mid', how='left')
    test = pd.merge(test, events, on='mid', how='left')

    X_train = train.drop(['radiant_won'], 1)
    y_train = train.radiant_won

    return X_train, y_train, test

# GOLD STANDART

In [51]:
gold = pd.read_csv('processing_tables/goldScoreWithCarry.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/only_items.csv')
xp = pd.read_csv('processing_tables/only_exp.csv')
xp_score = pd.read_csv('processing_tables/xp_score.csv')
xp = pd.merge(xp, xp_score, on='mid', how='left')
creeps = pd.read_csv('processing_tables/creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)

In [50]:
creeps.drop(['creeps_diff'], 1, inplace=True)
X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.765969822732
Ridge:  0.764805355026
Lasso:  0.766205494153


#  3) - cropped-1

In [163]:
gold = pd.read_csv('processing_tables/gold_stat.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/only_items.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.767068642918
Ridge:  0.765069226126
Lasso:  0.76680058736


In [166]:
clf = Lasso(alpha=0.00015, max_iter=12000, selection='random')
print 'Lasso: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict(X_test)
test_matches.head()

Lasso:  0.767138785895


Unnamed: 0,mid,radiant_won
0,3,0.685594
1,7,0.56896
2,9,0.207443
3,10,0.388243
4,12,0.503102


In [167]:
test_matches.to_csv('submissions/cropped-1.csv', index=None)

# With Cropped Events

In [41]:
gold = pd.read_csv('processing_tables/gold_stat.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/only_items.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events_cropped.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.76717484477
Ridge:  0.765241262579
Lasso:  0.766967025891


In [43]:
gs = logEstimation_linear(X_train, y_train)
print gs.grid_scores_
print gs.best_params_
print('best score of log grid search: {}'.format(gs.best_score_))
clf=LogisticRegression(random_state=241, C=gs.best_params_['C'])
print 'logreg: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

[mean: 0.72350, std: 0.00508, params: {'C': 0.0001}, mean: 0.74932, std: 0.00729, params: {'C': 0.001}, mean: 0.76646, std: 0.01083, params: {'C': 0.01}, mean: 0.76684, std: 0.01120, params: {'C': 0.5}, mean: 0.76752, std: 0.01138, params: {'C': 0.1}, mean: 0.76684, std: 0.01120, params: {'C': 0.5}, mean: 0.76664, std: 0.01114, params: {'C': 1}, mean: 0.76625, std: 0.01103, params: {'C': 5}, mean: 0.76610, std: 0.01102, params: {'C': 10}]
{'C': 0.1}
best score of log grid search: 0.767522676665
logreg:  0.76717484477


In [44]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.729537
1,7,0.600068
2,9,0.177503
3,10,0.364566
4,12,0.50387


In [45]:
test_matches.to_csv('submissions/with_cropped_events.csv', index=None)

# 1) With cropped items

In [13]:
gold = pd.read_csv('processing_tables/gold_stat.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.768729181764
Ridge:  0.767748195646
Lasso:  0.768365266477


In [14]:
gs = logEstimation_linear(X_train, y_train)
print gs.grid_scores_
print gs.best_params_
print('best score of log grid search: {}'.format(gs.best_score_))
clf=LogisticRegression(random_state=241, C=gs.best_params_['C'])
print 'logreg: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

[mean: 0.72454, std: 0.00516, params: {'C': 0.0001}, mean: 0.74984, std: 0.00754, params: {'C': 0.001}, mean: 0.76742, std: 0.01100, params: {'C': 0.01}, mean: 0.76890, std: 0.01153, params: {'C': 0.5}, mean: 0.76911, std: 0.01160, params: {'C': 0.1}, mean: 0.76890, std: 0.01153, params: {'C': 0.5}, mean: 0.76882, std: 0.01146, params: {'C': 1}, mean: 0.76862, std: 0.01142, params: {'C': 5}, mean: 0.76856, std: 0.01144, params: {'C': 10}]
{'C': 0.1}
best score of log grid search: 0.769109659471
logreg:  0.768729181764


In [15]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.704479
1,7,0.526255
2,9,0.153394
3,10,0.384878
4,12,0.550598


In [16]:
test_matches.to_csv('submissions/with_cropped_items.csv', index=None)

# 2) With cropped items and events

In [17]:
gold = pd.read_csv('processing_tables/gold_stat.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events_cropped.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.768891703732
Ridge:  0.767991692533
Lasso:  0.768600442808


In [18]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

Lasso2:  0.76867911356


In [19]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/with_cropped_items_events.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.704094
1,7,0.522105
2,9,0.153881
3,10,0.386449
4,12,0.55249


# 1) lasso 0.76269

In [20]:
gold = pd.read_csv('processing_tables/gold_stat_dif.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.768830201087
Ridge:  0.76787056204
Lasso:  0.768476686177


In [21]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/cropped_items_events_goldStatDif.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.703232
1,7,0.517801
2,9,0.151132
3,10,0.386902
4,12,0.546474


In [26]:
gold = pd.read_csv('processing_tables/gold_stats_usual_dif.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.768899382226 0.00567106969127
Ridge:  0.768306721781 0.00591528254533
Lasso:  0.768595774301 0.00594092866854


In [34]:
gold = pd.read_csv('processing_tables/gold_stats_adapt_dif.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events_cropped.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769254382848 0.00568661630208
Ridge:  0.768540778247 0.00565586238941
Lasso:  0.76900440766 0.00580143502894


In [35]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

Lasso2:  0.768967083336


Lasso(alpha=0.00015, copy_X=True, fit_intercept=True, max_iter=12000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [36]:
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

second_gold_dif -0.00445717201172
adapt_score_sum_dif -0.0196351851119
dire_best_creeps -0.0243520409374
dire_second_creeps -0.0103223300512
rad_creeps_sum -0.00505117098916
barracks_dif -0.0
make_fb -0.00429199717334
kill_roshan -0.0375580925728
pushing -0.0304878012461


# 0.76262

In [19]:
gold = pd.read_csv('processing_tables/gold_stats_adapt_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps2.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769006508164 0.00563920655664
Ridge:  0.768216625786 0.00566150042797
Lasso:  0.768693087269 0.00587526922375


In [11]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

Lasso2:  0.768700854721


Lasso(alpha=0.00015, copy_X=True, fit_intercept=True, max_iter=12000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [13]:
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

second_gold_dif -0.00734078564414
adapt_score_sum_dif -0.0
dire_best_creeps -0.0263821986337
dire_second_creeps -0.0111857902993
rad_creeps_sum -0.00851029209591
radiant_take_aegis -0.0
dire_take_aegis 0.0
radiant_steal_aegis 0.0
dire_steal_aegis -0.0
radiant_destroy_barracks -0.0
dire_destroy_barracks -0.0
radiant_make_fb -0.0264005396576
dire_make_fb -0.0178608563296
radiant_kill_roshan -0.016792562585
radiant_denay_tower -0.0
dire_denay_tower -0.0320101242926
radiant_destroy_tower -0.0295518778494


In [37]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/adapthGold_croppedEvents.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.720901
1,7,0.488509
2,9,0.157156
3,10,0.435753
4,12,0.583108


# logreg 0.76252

In [51]:
gold = pd.read_csv('processing_tables/gold_stats_adapt_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769103987736
Ridge:  0.768222997104
Lasso:  0.76878372997


In [52]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

Lasso2:  0.768843672615


Lasso(alpha=0.00015, copy_X=True, fit_intercept=True, max_iter=12000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [53]:
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

adapt_score_sum_dif 0.0
sum_creeps_dif -0.0131034094218
radiant_take_aegis -0.0
dire_take_aegis 0.0
radiant_steal_aegis 0.0
dire_steal_aegis -0.0
radiant_destroy_barracks -0.0
dire_destroy_barracks -0.0
radiant_make_fb -0.0259819385947
dire_make_fb -0.0179045465266
radiant_kill_roshan -0.0155819383747
radiant_denay_tower -0.0
dire_denay_tower -0.0313426377648
radiant_destroy_tower -0.0287696695177


In [54]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/logreg_very_crop_creeps.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.729692
1,7,0.468287
2,9,0.162953
3,10,0.390292
4,12,0.578631


In [None]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

In [None]:
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

In [None]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/logreg_very_crop_creeps.csv', index=None)
test_matches.head()

# threshold_gold_score 0.76297

In [67]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769069496006 0.00600877674853
Ridge:  0.768169310013 0.00578611917496
Lasso:  0.768734954971 0.00609786660687


In [68]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
# test_matches.to_csv('submissions/thresholdGold_easyCroppedEvents.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.698131
1,7,0.513709
2,9,0.158672
3,10,0.387372
4,12,0.561858


In [69]:
gold.head()

Unnamed: 0,mid,gold_dif,top_gold_dif,threshold_score_sum_dif,top_threshold_gold_score_dif,second_threshold_gold_score_dif,third_threshold_gold_score_dif
0,0,-0.191463,-0.463171,-1.251202,-1.140037,-1.157989,-1.121482
1,1,-0.657847,-1.316314,0.702919,0.197841,0.712788,1.299354
2,2,1.556804,2.277633,1.497239,2.588696,1.050663,0.180734
3,3,0.793066,0.05073,-0.309281,-0.292735,-1.380092,0.339919
4,4,0.130693,0.426918,0.436196,0.411904,-0.086841,0.92376


# 0.76299 hand events

In [107]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test= total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769098461087 0.00599224871226
Ridge:  0.768202455192 0.00574751213618


In [86]:
events.max()

mid                      49947
radiant_make_fb              1
dire_make_fb                 1
radiant_kill_roshan          1
dire_kill_roshan             2
radiant_denay_tower          2
dire_denay_tower             2
radiant_destroy_tower        6
dire_destroy_tower           7
dtype: int64

In [107]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.768802017774
threshold_score_sum_dif -0.0170285822233
sum_creeps_dif -0.00900064841111
radiant_make_fb -0.026631288495
dire_make_fb -0.0175996469743
radiant_kill_roshan -0.0178522155815
radiant_denay_tower -0.0
dire_denay_tower -0.0319353708447
radiant_destroy_tower -0.0298492140113


In [109]:
events = pd.read_csv('processing_tables/dummy_events.csv')
midRadWin = events.loc[events.radiant_destroy_barracks!=0].mid.values
midDireWin = events.loc[events.dire_destroy_barracks!=0].mid.values
print midRadWin, midDireWin

[14337 34802 40699] [ 7341  9348  9512 29462 36614 47375]


In [110]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
for mid in midRadWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 1
for mid in midDireWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 0
# test_matches.to_csv('submissions/thresholdGold_withHandEvents.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.6985
1,7,0.513692
2,9,0.158695
3,10,0.387303
4,12,0.561851


In [165]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)


In [166]:
X_train, y_train, test= total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.768799301857 0.00611035805161
Ridge:  0.767870951236 0.00584925694023


# 0.76281

In [102]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
events.drop('barracks_dif', 1, inplace=True)

X_train, y_train, test= total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769101148072 0.00599022833835
Ridge:  0.768327277155 0.00576934444735


In [105]:
events = pd.read_csv('processing_tables/dummy_events.csv')
midRadWin = events.loc[events.radiant_destroy_barracks!=0].mid.values
midDireWin = events.loc[events.dire_destroy_barracks!=0].mid.values
print midRadWin, midDireWin

X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
for mid in midRadWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 1
for mid in midDireWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 0
test_matches.to_csv('submissions/last_submit-1.csv', index=None)
# test_matches.head()

[14337 34802 40699] [ 7341  9348  9512 29462 36614 47375]


In [106]:
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.696987
1,7,0.5151
2,9,0.16041
3,10,0.387755
4,12,0.561959


# CURRENT TEST

In [8]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769069496006 0.00600877674853
Ridge:  0.768169310013 0.00578611917496
Lasso:  0.768734954971 0.00609786660687


In [10]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769113088551 0.00614139434181
Ridge:  0.768091318205 0.00585900100205
Lasso:  0.768906171935 0.00651028181203


In [11]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.768972181685
gold_rank_0 0.0
gold_rank_1 0.0
gold_rank_3 0.0
gold_square_rank_0 -0.0
gold_square_rank_1 0.0
gold_square_rank_2 0.0
gold_square_rank_3 -0.0051092946684
gold_square_rank_4 -0.0211581690939
gold_log_rank_4 0.0
gold_score_rank_rel_1 -0.00545617203299
gold_score_rank_rel_2 -0.00627447903678
gold_score_rank_rel_3 -0.0164697980575
gold_score_rank_dif_0 -0.0215791602994
gold_score_rank_dif_2 -0.0
sum_creeps_dif -0.0139034983352
radiant_take_aegis -0.0
dire_take_aegis 0.0
radiant_steal_aegis 0.0
dire_steal_aegis -0.0
radiant_destroy_barracks -0.0
dire_destroy_barracks -0.0
radiant_make_fb -0.0271744213239
dire_make_fb -0.0173165580822
radiant_kill_roshan -0.0138837294478
radiant_denay_tower -0.0
dire_denay_tower -0.0305522079754
radiant_destroy_tower -0.0257207635304


In [16]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769120218401 0.00614428773512
Ridge:  0.768263233768 0.00589628319516
Lasso:  0.768923170767 0.00651480780025


In [17]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.768993006614
gold_rank_3 0.0
gold_square_rank_0 -0.0
gold_square_rank_1 0.0
gold_square_rank_2 0.0
gold_square_rank_3 -0.00459835947871
gold_square_rank_4 -0.0212225767509
gold_log_rank_4 0.0
gold_score_rank_rel_1 -0.00551709739187
gold_score_rank_rel_2 -0.00626214018667
gold_score_rank_rel_3 -0.0164919059924
gold_score_rank_dif_0 -0.0215548512568
gold_score_rank_dif_2 -0.0
sum_creeps_dif -0.013918214527
radiant_take_aegis -0.0
dire_take_aegis 0.0
radiant_steal_aegis 0.0
dire_steal_aegis -0.0
radiant_destroy_barracks -0.0
dire_destroy_barracks -0.0
radiant_make_fb -0.0271823042789
dire_make_fb -0.0173341732333
radiant_kill_roshan -0.0138182806149
radiant_denay_tower -0.0
dire_denay_tower -0.0305307765036
radiant_destroy_tower -0.02565072251


In [18]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769257000209 0.00616973210926
Ridge:  0.768375800538 0.00588643296084
Lasso:  0.769001041624 0.00651032526406


In [19]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769237759188 0.00616545980241
Ridge:  0.768354958024 0.00588818803067
Lasso:  0.768978123652 0.00652714753152


In [20]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.769036965111
gold_rank_3 0.0
gold_square_rank_3 -0.00754489142023
gold_square_rank_4 -0.0213710738109
gold_log_rank_4 0.0
gold_score_rank_rel_1 -0.00504553274582
gold_score_rank_rel_2 -0.00646595286632
gold_score_rank_rel_3 -0.0164435508769
gold_score_rank_dif_0 -0.0220631678225
gold_score_rank_dif_2 -0.0
sum_creeps_dif -0.0139132858871
radiant_take_aegis -0.0
dire_take_aegis 0.0
radiant_steal_aegis 0.0
dire_steal_aegis -0.0
radiant_destroy_barracks -0.0
dire_destroy_barracks -0.0
radiant_make_fb -0.0272143806973
dire_make_fb -0.0173390193486
radiant_kill_roshan -0.0141614628547
radiant_denay_tower -0.0
dire_denay_tower -0.0305860932254
radiant_destroy_tower -0.0259688257391


In [22]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
#              'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769173737341 0.00604865730518
Ridge:  0.768327622225 0.00588295897767
Lasso:  0.768985474391 0.00635414842302


In [24]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
#              'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769256320809 0.0061951279143
Ridge:  0.768384007855 0.0059335541087
Lasso:  0.769037780526 0.00654443737737


In [25]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
#              'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769253379483 0.00605184370149
Ridge:  0.768336436864 0.00589879470678
Lasso:  0.7690444808 0.00638949209895


In [27]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769275697437 0.00603190098226
Ridge:  0.768411518125 0.00586301483547
Lasso:  0.769059188603 0.00636542990019


In [28]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769280485431 0.00617310016806
Ridge:  0.768475221587 0.00588820239419
Lasso:  0.769052038661 0.00652103645519


In [29]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.769075980422
gold_rank_3 0.0
gold_square_rank_3 -0.00852771431931
gold_square_rank_4 -0.0216689302051
gold_log_rank_4 0.0
gold_score_rank_rel_3 -0.0179441309041
gold_score_rank_dif_0 -0.0193359830812
gold_score_rank_dif_1 0.0
gold_score_rank_dif_2 -0.00456203542917
sum_creeps_dif -0.0137743185979
radiant_make_fb -0.0270832315813
dire_make_fb -0.0172911530527
radiant_kill_roshan -0.0148290035123
radiant_denay_tower -0.0
dire_denay_tower -0.0305894945259
radiant_destroy_tower -0.0258919900324


In [30]:
events.head()

Unnamed: 0,mid,radiant_make_fb,dire_make_fb,radiant_kill_roshan,dire_kill_roshan,radiant_denay_tower,dire_denay_tower,radiant_destroy_tower,dire_destroy_tower
0,0,1,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0,0
2,2,0,1,0,0,0,0,0,0
3,3,1,0,0,0,0,0,0,0
4,4,0,1,0,0,0,0,0,0


In [35]:
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

Unnamed: 0,mid,barracks_dif,make_fb,kill_roshan,pushing
0,0,0.0,1,0,0
1,1,0.0,1,0,0
2,2,0.0,-1,0,0
3,3,0.0,1,0,0
4,4,0.0,-1,0,0


In [36]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769398848412 0.00612645752364
Ridge:  0.768585843161 0.00591882604328
Lasso:  0.769216079589 0.00634182081628


In [37]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.769270127905
gold_dif 0.0
gold_rank_3 0.0
gold_square_rank_3 -0.00681800666135
gold_square_rank_4 -0.0215412928528
gold_log_rank_4 0.0
gold_score_rank_rel_3 -0.0167130052879
sum_creeps_dif -0.013110812398
barracks_dif -0.0
make_fb -0.00457533424625
kill_roshan -0.0334749887699
pushing -0.0267675322705


In [41]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769357177313 0.00624942728067
Ridge:  0.768649901017 0.005944592082


In [46]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769352728017 0.00610679415887
Ridge:  0.768585843161 0.00591882604328


In [47]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2','gold_rank_3'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769353498501 0.00610286968601
Ridge:  0.768533236446 0.00595365481921


In [52]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2','gold_rank_3'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2',
           'gold_square_rank_3', 'gold_square_rank_4'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769376911877 0.00609895358603
Ridge:  0.768607284104 0.00606503551133


In [53]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.769275354313
gold_dif 0.0
gold_score_rank_rel_3 -0.0161553289891
sum_creeps_dif -0.0130834745732
barracks_dif -0.0
make_fb -0.00441764260803
kill_roshan -0.033585636588
pushing -0.123482142943


# 0.76217

In [55]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2','gold_rank_3'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2',
           'gold_square_rank_3', 'gold_square_rank_4'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
# new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.drop('barracks_dif', 1, inplace=True)
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769377682615 0.00610022185709
Ridge:  0.768614716341 0.00606502174282


In [56]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

Lasso2:  0.769275354313
gold_dif 0.0
gold_score_rank_rel_3 -0.0161553289891
sum_creeps_dif -0.0130834745732
make_fb -0.00441764260803
kill_roshan -0.033585636588
pushing -0.123482142943


In [58]:
events.head()

Unnamed: 0,mid,radiant_make_fb,dire_make_fb,radiant_kill_roshan,dire_kill_roshan,radiant_denay_tower,dire_denay_tower,radiant_destroy_tower,dire_destroy_tower
0,0,1,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0,0
2,2,0,1,0,0,0,0,0,0
3,3,1,0,0,0,0,0,0,0
4,4,0,1,0,0,0,0,0,0


In [59]:
events = pd.read_csv('processing_tables/dummy_events.csv')
midRadWin = events.loc[events.radiant_destroy_barracks!=0].mid.values
midDireWin = events.loc[events.dire_destroy_barracks!=0].mid.values
print midRadWin, midDireWin


[14337 34802 40699] [ 7341  9348  9512 29462 36614 47375]


In [60]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
for mid in midRadWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 1
for mid in midDireWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 0
test_matches.to_csv('submissions/logGold_normalTowers.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.696544
1,7,0.487055
2,9,0.165199
3,10,0.396882
4,12,0.568977


# 0.76206

In [81]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2','gold_rank_3'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2',
           'gold_square_rank_3', 'gold_square_rank_4'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
# new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.drop('barracks_dif', 1, inplace=True)
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769377682615 0.00610022185709
Ridge:  0.768614716341 0.00606502174282


In [82]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict(X_test)

Lasso2:  0.769275354313


In [83]:
for mid in midRadWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 1
for mid in midDireWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 0
test_matches.to_csv('submissions/logGold_normalTowers_lasso.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.65773
1,7,0.479715
2,9,0.199016
3,10,0.419774
4,12,0.555358


# 0.76195

In [61]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2',
           'gold_score_rank_dif_3','gold_score_rank_dif_4'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
# new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.drop('barracks_dif', 1, inplace=True)
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769008672724 0.00595554510124
Ridge:  0.768256688318 0.00570382573486


In [62]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/stupid_submition.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.712639
1,7,0.515127
2,9,0.173322
3,10,0.389474
4,12,0.558358


In [66]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2','gold_rank_3'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2'], 1, inplace=True)
gold.drop(['gold_log_rank_0','gold_log_rank_1','gold_log_rank_2'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
new_events = pd.read_csv('processing_tables/dummy_events_cropped.csv')
# new_events.barracks_dif = new_events.barracks_dif / new_events.barracks_dif.max()
new_events.drop('barracks_dif', 1, inplace=True)
new_events.pushing = new_events.pushing / new_events.pushing.max()
new_events.head()

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769048545963 0.00598025162835
Ridge:  0.768218215178 0.00582315447505


In [64]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')
gold.drop(['gold_rank_0','gold_rank_1','gold_rank_2'], 1, inplace=True)
gold.drop(['gold_square_rank_0','gold_square_rank_1','gold_square_rank_2'], 1, inplace=True)
gold.drop(['gold_score_rank_rel_0','gold_score_rank_rel_1','gold_score_rank_rel_2'], 1, inplace=True)
gold.drop(['gold_score_rank_dif_0','gold_score_rank_dif_1','gold_score_rank_dif_2',
           'gold_score_rank_dif_3','gold_score_rank_dif_4'], 1, inplace=True)

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, new_events)
X_train.index = X_train.mid
X_train.drop('mid', 1, inplace=True)
my_clf_cross_val(X_train, y_train, False)


logreg:  0.769008672724 0.00595554510124
Ridge:  0.768256688318 0.00570382573486


# 0.76242

In [67]:
gold = pd.read_csv('data/gold.csv', index_col='mid')
gold = gold[gold.times == 600]
gold.drop('times', 1, inplace=True)
gold_log_rank_dif = pd.DataFrame(data = np.sort(np.log(gold.values[:,0:5])) - np.sort(np.log(gold.values[:,5:10])),
                             index=gold.index,
                             columns = ['gold_log_rank_{}'.format(i) for i in range(5)]) 
gold_log_rank_dif = normalize_data(gold_log_rank_dif)
gold_log_rank_dif.reset_index(inplace=True)

gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
gold = pd.merge(gold, gold_log_rank_dif, on='mid', how='left')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test= total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769221207115 0.00606106106091
Ridge:  0.768440743658 0.00588337309906
Lasso:  0.769004031477 0.00627050185506


In [68]:
events = pd.read_csv('processing_tables/dummy_events.csv')
midRadWin = events.loc[events.radiant_destroy_barracks!=0].mid.values
midDireWin = events.loc[events.dire_destroy_barracks!=0].mid.values
print midRadWin, midDireWin

[14337 34802 40699] [ 7341  9348  9512 29462 36614 47375]


In [69]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
for mid in midRadWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 1
for mid in midDireWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 0
test_matches.to_csv('submissions/stupid_submition-2.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.703574
1,7,0.496558
2,9,0.163576
3,10,0.393074
4,12,0.566141


In [73]:
gold = pd.read_csv('data/gold.csv', index_col='mid')
gold = gold[gold.times == 600]
gold.drop('times', 1, inplace=True)
gold_log_rank_dif = pd.DataFrame(data = np.sort(np.log(gold.values[:,3:5])) - np.sort(np.log(gold.values[:,8:10])),
                             index=gold.index,
                             columns = ['gold_log_rank_{}'.format(i) for i in range(3,5)]) 
gold_log_rank_dif = normalize_data(gold_log_rank_dif)
gold_log_rank_dif.reset_index(inplace=True)


gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
gold = pd.merge(gold, gold_log_rank_dif, on='mid', how='left')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test= total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769087324325 0.00590591860863
Ridge:  0.768206753974 0.00565137392905
Lasso:  0.768757104435 0.00601452642012


# source with squares 0.76234

In [75]:
gold = pd.read_csv('data/gold.csv', index_col='mid')
gold = gold[gold.times == 600]
gold.drop('times', 1, inplace=True)
gold_log_rank_dif = pd.DataFrame(data = np.sort(np.square(gold.values[:,0:5])) - np.sort(np.square(gold.values[:,5:10])),
                             index=gold.index,
                             columns = ['gold_log_rank_{}'.format(i) for i in range(0,5)]) 
gold_log_rank_dif = normalize_data(gold_log_rank_dif)
gold_log_rank_dif.reset_index(inplace=True)


gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
gold = pd.merge(gold, gold_log_rank_dif, on='mid', how='left')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis',
             'dire_steal_aegis', 'radiant_destroy_barracks', 'dire_destroy_barracks'], 1, inplace=True)

X_train, y_train, test= total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train, False)

logreg:  0.769159107302 0.00597193282402
Ridge:  0.768401038199 0.00587127797461


In [77]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
for mid in midRadWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 1
for mid in midDireWin:
    if mid in test_matches.mid.values:
        test_matches.ix[test_matches[test_matches.mid == mid].index[0],1] = 0
test_matches.to_csv('submissions/stupid_submition-3.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.701458
1,7,0.501445
2,9,0.164383
3,10,0.392198
4,12,0.560544


In [None]:
gold = pd.read_csv('processing_tables/all_stats_gold.csv')

heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
# events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

# 0.76259

In [61]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
# gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')
events.drop(['radiant_take_aegis', 'dire_take_aegis', 'radiant_steal_aegis', 'dire_steal_aegis'], 1, inplace=True)

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769252392706 0.00607900331348
Ridge:  0.768381612071 0.00579973681582
Lasso:  0.768962313417 0.00617029273645


In [62]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/thresholdGold_easyCroppedEvents.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.702474
1,7,0.502219
2,9,0.159457
3,10,0.389295
4,12,0.564214


# 0.76237

In [55]:
gold = pd.read_csv('processing_tables/gold_stats_threshold_dif.csv')
# gold.drop('third_gold_dif', 1, inplace=True)
gold.drop('second_gold_dif', 1, inplace=True)
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps_dif.csv')
events = pd.read_csv('processing_tables/dummy_events_cropped.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.769369409336 0.00615589383453
Ridge:  0.76856257118 0.00582483116481
Lasso:  0.769149537669 0.00613274630592


In [56]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/thresholdGold_croppedEvents.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.702448
1,7,0.495897
2,9,0.160178
3,10,0.391034
4,12,0.566139


In [62]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

Lasso2:  0.768789333124


Lasso(alpha=0.00015, copy_X=True, fit_intercept=True, max_iter=12000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [63]:
for i, col in enumerate(X_train.columns):
    if clf.coef_[i] < 0.001 and 'hero' not in col and 'item' not in col:
        print col, clf.coef_[i]

threshold_score_sum_dif -0.0170285786099
sum_creeps_dif -0.0090006484224
radiant_take_aegis -0.0
dire_take_aegis 0.0
radiant_steal_aegis 0.0
dire_steal_aegis -0.0
radiant_destroy_barracks -0.0
dire_destroy_barracks -0.0
radiant_make_fb -0.0266312884868
dire_make_fb -0.0175996470007
radiant_kill_roshan -0.0178522156211
radiant_denay_tower -0.0
dire_denay_tower -0.0319353708381
radiant_destroy_tower -0.0298492139832


In [64]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)
clf=LogisticRegression(random_state=241, C=0.1)
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict_proba(X_test)[:, 1]
test_matches.to_csv('submissions/threshold_gold_score.csv', index=None)
test_matches.head()

Unnamed: 0,mid,radiant_won
0,3,0.698131
1,7,0.513709
2,9,0.158672
3,10,0.387372
4,12,0.561858


# Some cropped

In [22]:
gold = pd.read_csv('processing_tables/gold_stat_dif.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/only_items.csv')
xp = pd.read_csv('processing_tables/xp_cropped_stats.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)
my_clf_cross_val(X_train, y_train)

logreg:  0.767279671556
Ridge:  0.765272149983
Lasso:  0.767001884579


In [23]:
clf = Lasso(alpha=0.00015, max_iter=12000)
print 'Lasso2: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

Lasso2:  0.767310326374


In [32]:
X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)

clf = Lasso(alpha=0.00015, max_iter=12000)
clf.fit(X_train, y_train)
print 'Lasso: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict(X_test)
test_matches.head()

Lasso:  0.767310326374


Unnamed: 0,mid,radiant_won
0,3,0.686447
1,7,0.570634
2,9,0.204366
3,10,0.392924
4,12,0.504773


In [30]:
test_matches.to_csv('submissions/some_cropped.csv', index=None)

In [23]:
gold = pd.read_csv('processing_tables/goldRankDif.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/items_dif.csv')
xp = pd.read_csv('processing_tables/xpDif.csv')
creeps = pd.read_csv('processing_tables/cropped_creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

In [26]:
gold = pd.read_csv('processing_tables/goldScoreWithoutCarry.csv')
gold.head()

Unnamed: 0,mid,gold_score_player_0,gold_score_player_1,gold_score_player_2,gold_score_player_3,gold_score_player_4,gold_score_player_5,gold_score_player_6,gold_score_player_7,gold_score_player_8,gold_score_player_9,radiant_norm_gold,dire_norm_gold,radiant_carry_norm_gold,dire_carry_norm_gold,radiant_best_gold_score,dire_best_gold_score,radiant_sum_gold_score,dire_sum_gold_score
0,0,1.217054,1.415443,0.948802,0.788948,1.318744,0.846922,1.231741,1.162932,0.595855,0.990382,0.524044,0.801772,0.007513,0.665735,1.415443,1.231741,0.499268,0.44876
1,1,0.855613,1.220598,1.094664,0.881353,1.262385,1.188179,1.217203,0.968288,1.017415,0.961522,0.796655,1.744066,0.012183,1.882619,1.262385,1.217203,1.01561,0.846631
2,2,0.868434,0.697458,1.929615,0.944899,0.851335,0.851719,0.71371,1.054471,0.564103,0.867038,0.500272,-1.725554,2.64668,-0.531107,1.929615,1.054471,0.430333,-0.253739
3,3,1.165936,1.435251,0.988624,0.849398,1.09548,1.15544,1.041295,0.644137,1.078761,0.791755,0.20734,-0.926862,-0.264248,-0.342132,1.435251,1.078761,0.822192,-0.19818
4,4,0.838467,0.731373,1.123121,0.65798,0.540907,1.073678,0.96121,0.760368,0.977203,0.828358,-0.785714,-0.978204,-0.553753,-1.172477,1.123121,0.977203,-1.37506,-1.089529


In [28]:
gold = pd.read_csv('processing_tables/gold_stat_dif.csv')
gold.head()

Unnamed: 0,mid,sum_gold_dif,gold_top_dif,gold_second_dif,radiant_top_gold_score,radiant_second_gold_score,dire_top_gold_score,dire_second_gold_score
0,0,-0.191463,-0.463171,-0.665978,-1.337223,-0.95813,0.249037,0.689982
1,1,-0.657847,-1.316314,0.611509,0.628305,1.584879,0.354146,0.575621
2,2,1.556804,2.277633,1.158354,3.398393,0.49806,-0.202526,-0.999247
3,3,0.793066,0.05073,-0.345696,-0.508247,-1.072844,-0.101398,0.89174
4,4,0.130693,0.426918,0.147465,0.050132,-0.343672,-0.524219,-0.221502


# Submit

In [58]:
gold = pd.read_csv('processing_tables/goldScoreWithCarry.csv')
heroes = pd.read_csv('processing_tables/only_heroes.csv')
items = pd.read_csv('processing_tables/only_items.csv')
xp = pd.read_csv('processing_tables/only_exp.csv')
xp_score = pd.read_csv('processing_tables/xp_score.csv')
xp = pd.merge(xp, xp_score, on='mid', how='left')
creeps = pd.read_csv('processing_tables/creeps.csv')
events = pd.read_csv('processing_tables/dummy_events.csv')

X_train, y_train, test = total_merge(gold, heroes, items, xp, creeps, events)

In [59]:
X_train.index = X_train.mid
X_train.drop('mid', 1,inplace=True)

X_test = test.copy()
X_test.index = test.mid
X_test.drop('mid', 1, inplace=True)

clf = Lasso(alpha=0.00015, max_iter=12000, selection='random')
print 'Lasso: ', np.mean(cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc'))
clf.fit(X_train, y_train)

test_matches = pd.read_csv('data/test.csv')
test_matches['radiant_won'] = clf.predict(X_test)
test_matches.head()

Lasso:  0.766235028716


Unnamed: 0,mid,radiant_won
0,3,0.738834
1,7,0.56583
2,9,0.203988
3,10,0.409177
4,12,0.496458


In [56]:
test_matches.to_csv('submissions/all_data-5_with_norm_mid.csv', index=None)