In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

import numpy as np
import pandas as pd

%matplotlib inline
from matplotlib import pylab as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score, KFold
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV



In [3]:
import dota_ml_utils

In [None]:
data_url = 'https://dl.dropboxusercontent.com/u/67618204/dota_ml/data.zip'
if not os.path.exists('data/'):
    !wget $data_url -q --show-progress
    !tar -xvf data.zip
else:
    print('Data alredy exists')

In [None]:
data_params = {
    'last_gold_by_player': True, 'last_gold_by_team': True,
    'gold_speed_by_player': True, 'gold_speed_by_team': True,
    'max_gold_by_player': True, 'max_gold_by_team': True,
    
    'last_lh_by_player': True, 'last_lh_by_team': True,
    'lh_speed_by_player': True, 'lh_speed_by_team': True,
    'max_lh_by_player': True, 'max_lh_by_team': True,
}

train_df, test_df = dota_ml_utils.transform_data('data/', **data_params)

Adding 'last_gold_by_player'...
Adding 'last_gold_by_team'...
Adding 'gold_speed_by_player'...
Adding 'gold_speed_by_team'...
Adding 'max_gold_by_player'...
Adding 'max_gold_by_team'...
Adding 'last_lh_by_player'...


In [None]:
X_train = train_df.drop('radiant_won', axis=1)
y_train = train_df['radiant_won']

In [None]:
rfc_model_param_grid = {
    'n_estimators': [100, 1000, 5000],
    'max_depth': [None, 2, 4, 6, 10],
    'max_features': [None, 'sqrt', 'log2'],
    'min_samples_leaf': [1, 2, 5, 100],
    'criterion': ['gini', 'entropy'],

    'oob_score': [True],
    'random_state': [0],
}

rfc_model = RandomForestClassifier()

In [None]:
n_iter = 1
rfc_gs = RandomizedSearchCV(rfc_model, rfc_model_param_grid,
                            scoring='roc_auc', cv=None, n_iter=n_iter,
                            refit=True, n_jobs=3, verbose=2)

rfc_gs.fit(X_train, y_train)

---

In [None]:
best_score = rfc_gs.best_score_
best_estimator = rfc_gs.best_estimator_
best_params = rfc_gs.best_params_

print('best_score: {}'.format(best_score))
print('best params: {}'.format(best_params))

In [None]:
dota_ml_utils.make_submission(test_df, best_estimator,
                              'submissions/', 'rfc', {**data_params, **best_params}, best_score)