In [2]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Lasso, Ridge

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
def normalize_data(X):
    return pd.DataFrame(
        StandardScaler().fit_transform(X),
        index = X.index, 
        columns=X.columns
    )

def logEstimation(X, y):
#     grid = {'C': np.power(10.0, np.arange(-5, 1))}
    grid = {'C': [0.05, 0.01, 0.05, 0.1, 0.5, 0.75, 1]}
    kf=KFold(y.size, n_folds=5, shuffle=True, random_state=241)
    clf=LogisticRegression(random_state=241)
    gs = GridSearchCV(clf, grid, scoring='roc_auc', cv=kf)
    gs.fit(X, y)
    return gs

In [3]:
events = pd.read_csv('data/events.csv')
events.head()

Unnamed: 0,mid,event_type,from_team,time
0,0,3,radiant,1
1,1,3,radiant,222
2,2,3,dire,143
3,3,3,radiant,143
4,4,3,dire,53


In [4]:
train_matches = pd.read_csv('data/train.csv')
test_matches = pd.read_csv('data/test.csv')

In [6]:
print l

49948


In [10]:
events.mid.unique().shape

(49057L,)

event_type — тип события

0 — Командой был забран Aegis

1 — Командой был украден Aegis

2 — Командой были разрушены бараки соперника

3 — Командой был сделано первое убийство героя соперника

4 — Командой был убит Roshan.

5 — Командой была разрушена своя башня

6 — Командой была разрушена башня соперника

In [5]:
l = train_matches.shape[0] + test_matches.shape[0]

eventname = ['take_aegis', 'steal_aegis', 'destroy_barracks', 'make_fb', 'kill_roshan', 'denay_tower', 'destroy_tower']

dummy_events = pd.DataFrame(data = np.arange(l), columns=['mid'])
dummy_events.head()

for event_index in range(7):
    current_event = events.loc[(events.event_type == event_index)][['mid', 'from_team']]
    current_event['radiant_' + eventname[event_index]] = (current_event.from_team == 'radiant').astype(int)
    current_event['dire_' +  eventname[event_index]] = (current_event.from_team == 'dire').astype(int)
    current_event.drop('from_team', 1, inplace=True)
    current_event = current_event.groupby('mid').sum()
    current_event.reset_index(inplace=True)
    dummy_events = pd.merge(dummy_events, current_event, on='mid', how='left')

dummy_events.fillna(0, inplace=True)
for col in dummy_events.columns:
    dummy_events[col] = dummy_events[col].astype(int)
dummy_events.head()

Unnamed: 0,mid,radiant_take_aegis,dire_take_aegis,radiant_steal_aegis,dire_steal_aegis,radiant_destroy_barracks,dire_destroy_barracks,radiant_make_fb,dire_make_fb,radiant_kill_roshan,dire_kill_roshan,radiant_denay_tower,dire_denay_tower,radiant_destroy_tower,dire_destroy_tower
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [11]:
dummy_events_cropped = pd.DataFrame(index=dummy_events.mid)
dummy_events_cropped['barracks_dif'] = dummy_events.radiant_destroy_barracks - dummy_events.dire_destroy_barracks
dummy_events_cropped['make_fb'] = dummy_events.radiant_make_fb - dummy_events.dire_make_fb
dummy_events_cropped['kill_roshan'] = dummy_events.radiant_kill_roshan - dummy_events.dire_kill_roshan
dummy_events_cropped['destroy_tower'] = (dummy_events.radiant_destroy_tower - dummy_events.dire_destroy_tower) * 0.2
dummy_events_cropped['denay_tower'] = (dummy_events.radiant_denay_tower - dummy_events.dire_denay_tower) * 0.5
dummy_events_cropped.reset_index(inplace=True)
dummy_events_cropped.to_csv('processing_tables/dummy_events_cropped.csv', index=None)
dummy_events_cropped.head()

Unnamed: 0,mid,barracks_dif,make_fb,kill_roshan,destroy_tower,denay_tower
0,0,0,1,0,0.0,0.0
1,1,0,1,0,0.0,0.0
2,2,0,-1,0,0.0,0.0
3,3,0,1,0,0.0,0.0
4,4,0,-1,0,0.0,0.0


In [15]:
dummy_events_cropped.max()

mid             49947
barracks_dif        3
make_fb             1
kill_roshan         1
pushing             5
dtype: int64

In [24]:
dummy_events[dummy_events.radiant_destroy_barracks != 0]

Unnamed: 0,mid,radiant_take_aegis,dire_take_aegis,radiant_steal_aegis,dire_steal_aegis,radiant_destroy_barracks,dire_destroy_barracks,radiant_make_fb,dire_make_fb,radiant_kill_roshan,dire_kill_roshan,radiant_denay_tower,dire_denay_tower,radiant_destroy_tower,dire_destroy_tower
14337,14337,0,0,0,0,3,0,1,0,0,0,0,0,6,1
34802,34802,0,0,0,0,1,0,1,0,0,0,0,0,4,1
40699,40699,0,0,0,0,2,0,1,0,0,0,0,0,4,1


In [25]:
train_matches.loc[train_matches.mid == 14337]

Unnamed: 0,mid,radiant_won
7116,14337,1


In [17]:
dummy_events.head()
dummy_events.to_csv('processing_tables/dummy_events.csv', index=None)