#### Подключение Google Drive

In [0]:
g_drive = True

In [0]:
if g_drive:
    from google.colab import drive
    drive.mount('/content/gdrive')

In [None]:
if g_drive:
    main_path = '/content/gdrive/My Drive/Projects/ai_contest_2019/final/'
    !pip install catboost
    !pip install lightgbm
else:
    main_path = ''

### Импорт библиотек

In [0]:
# %pylab inline
import pandas as pd
import numpy as np
import seaborn as sns
import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures

from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from datetime import datetime

### Вспомогательные функции
#### Submission

In [0]:
def subm(model, X_train, y_train, X_test, name):
    model.fit(X_train, y_train)
    y_pred = model.predict_proba(X_test)[:, 1]
    df_submission_ = pd.DataFrame({'skilled_prob': y_pred}, index=X_test.index)
    cur_time = str(datetime.now().strftime('%d%m_%I%M%S'))
    df_submission_.to_csv(f'{main_path}submissions/{cur_time}_{name}.csv')

In [0]:
def to_ans(X_test, y_pred, name):
    df_submission_ = pd.DataFrame({'skilled_prob': y_pred}, index=X_test.index)
    df_submission_.to_csv(f'{main_path}submissions/submission_{name}.csv')

#### Accuracy

In [0]:
def acc(model, X_train_s, y_train_s, X_test_s, y_test_s):
    model.fit(X_train_s, y_train_s)
    y_pred = model.predict(X_test_s)
    acc = accuracy_score(y_pred, y_test_s)
    return acc

### Features defs

In [0]:
# features table upload
def features_up(X, data_type):
    features_table = pd.read_csv(f'{main_path}data/features_{data_type}.csv', index_col=0)
    X = X.join(features_table)
    print(data_type)
    return X

In [0]:
# KD calc
def kd(X, data_type):
    X['kd'] = X['kills']/X['deaths']
    X['kd'] = X['kills'].where(X['kd'] == np.inf, other=X['kd'])
    X['kd'] = X['kills'].where(X['kd'].isnull(), other=X['kd'])
    
    X['kd_x16'] = X['avg_kills_x16']/X['avg_deaths_x16']
    X['kd_x16'] = X['avg_kills_x16'].where(X['kd_x16'] == np.inf, other=X['kd_x16'])
    X['kd_x16'] = X['avg_kills_x16'].where(X['kd_x16'].isnull(), other=X['kd_x16'])
    return X

In [0]:
# KDA calc
def kda(X, data_type):
    X['ka'] = X['kills'] + X['assists']
    X['kda'] = X['ka']/X['deaths']
    X['kda'] = X['ka'].where(X['kda'] == np.inf, other=X['kda'])
    X['kda'] = X['ka'].where(X['kda'].isnull(), other=X['kda'])
    
    X['ka_x16'] = X['avg_kills_x16'] + X['avg_assists_x16']
    X['kda_x16'] = X['ka_x16']/X['avg_deaths_x16']
    X['kda_x16'] = X['ka_x16'].where(X['kda_x16'] == np.inf, other=X['kda_x16'])
    X['kda_x16'] = X['ka_x16'].where(X['kda_x16'].isnull(), other=X['kda_x16'])
    return X

In [0]:
# ratios between player's gold and other
def gold_ratio(X, data_type):
    X['gold_ratio_radiant'] = X['gold']/X['radiant_gold']
    X['gold_ratio_radiant'] = X['gold']/X['dire_gold']
    X['sum_gold'] = X['dire_gold'] + X['radiant_gold']
    X['gold_ratio_sum'] = X['gold']/X['sum_gold']
    return X

In [0]:
# combinations of features
def comb(X, data_type):
    X['gxpm_per_min_1'] = X['gold_per_min']*X['xp_per_min']
    X['gxpm_per_min_1_avg'] = X['avg_gpm_x16']*X['avg_xpm_x16']
    return X

In [0]:
# features per minute
def pm(X, data_type):
    col_to_pm = ['kills', 'deaths', 'assists',
         'denies', 'level', 'net_worth', 'gold', 'gold_spent', 'last_hits',
         'gold_per_min', 'xp_per_min', 'hero_damage', 'tower_damage',
         'hero_healing', 'scaled_hero_damage', 'scaled_tower_damage',
         'scaled_hero_healing', 'stuns', 'team_fight_participation',
         'observer_wards_placed', 'sentry_wards_placed', 'creeps_stacked',
         'camps_stacked', 'rune_pickups', 'tower_kills', 'roshan_kills',
         'nearby_creep_death_count', 'fight_score',
       'farm_score', 'support_score', 'push_score', 'radiant_gold', 'dire_gold', 'sum_gold', 'gxpm_per_min_1', 'ka', 'kda']
    for name in col_to_pm:
            X[f'{name}_per_min'] = X[name]/X['duration']
    print(data_type)
    return X

In [0]:
# player's hero id one-hot coding
def hero_to_cat(X, data_type):
    print(data_type)
    for hero_id in set(X.hero_id):
        X['is_hero_{}'.format(hero_id)] = X.hero_id == hero_id
    return X

In [0]:
# apply all features
def all_features(X, data_type):
    X = features_up(X, data_type)
    X = kd(X, data_type)
    X = kda(X, data_type)
    X = gold_ratio(X, data_type)
    X = hero_to_cat(X, data_type)
    X = comb(X, data_type)
    X = pm(X, data_type)
    X = X.drop(['player_team', 'winner_team'], axis=1)
    return X

### Data upload

In [0]:
# df_train = pd.read_csv(f'{main_path}data/academy2019_final_train.csv', index_col='id')
df_train = pd.read_csv(
    'https://s3.eu-central-1.amazonaws.com/ai-academy-2019/public/final/academy2019_final_train.csv',
    index_col='id')
y_train = df_train['skilled']
X_train = df_train.drop(['skilled'], axis=1)

# X_test = pd.read_csv(f'{main_path}data/academy2019_final_test.csv', index_col='id')
X_test = pd.read_csv(
    'https://s3.eu-central-1.amazonaws.com/ai-academy-2019/public/final/academy2019_final_test.csv',
    index_col='id')

In [19]:
X_train.head()

Unnamed: 0_level_0,player_team,winner_team,duration,pre_game_duration,first_blood_time,first_blood_claimed,hero_id,hero_pick_order,leaver_status,party_players,...,avg_deaths_x16,avg_assists_x16,avg_gpm_x16,avg_xpm_x16,best_kills_x16,best_assists_x16,best_gpm_x16,best_xpm_x16,win_streak,best_win_streak
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,radiant,radiant,1761,90,52,0,6,5,0,1,...,7,11,514,601,24,28,681,797,5,5
1,dire,dire,2609,90,108,0,7,4,0,3,...,9,14,412,515,23,40,624,796,1,5
2,radiant,radiant,1811,90,102,1,108,5,0,1,...,7,15,458,499,5,15,458,499,1,1
3,radiant,radiant,2668,90,0,0,9,6,0,3,...,4,18,372,483,14,27,462,650,5,5
7,dire,radiant,1716,90,52,0,31,2,0,1,...,11,15,290,387,16,37,513,744,0,5


In [23]:
# apply features preprocessing
X_train = all_features(X_train, 'train')
X_test = all_features(X_test, 'test')

train
train
train
test
test
test


In [24]:
X_train.head()

Unnamed: 0_level_0,duration,pre_game_duration,first_blood_time,first_blood_claimed,hero_id,hero_pick_order,leaver_status,party_players,kills,deaths,...,fight_score_per_min,farm_score_per_min,support_score_per_min,push_score_per_min,radiant_gold_per_min,dire_gold_per_min,sum_gold_per_min,gxpm_per_min_1_per_min,ka_per_min,kda_per_min
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1761,90,52,0,6,5,0,1,10,4,...,0.000324,0.147882,0.0,1.937353,46.368541,32.105054,78.473595,241.226576,0.010221,0.002555
1,2609,90,108,0,7,4,0,3,7,8,...,0.000268,0.080858,0.191644,0.124462,33.400153,39.795707,73.19586,121.889996,0.008432,0.001054
2,1811,90,102,1,108,5,0,1,5,7,...,0.000552,0.086742,0.765323,0.215142,46.069023,28.260629,74.329652,126.196576,0.011044,0.001578
3,2668,90,0,0,9,6,0,3,13,2,...,0.0003,0.053304,1.65967,0.22193,40.895052,27.282609,68.177661,104.76012,0.014993,0.007496
7,1716,90,52,0,31,2,0,1,4,9,...,0.0005,0.032077,2.027972,0.107359,42.691142,30.452797,73.143939,78.407343,0.012238,0.00136


### Models

#### CatBoost

In [0]:
cat_f = ['hero_id', 'party_players']

In [0]:
cbc = CatBoostClassifier(depth=9, iterations=1000, cat_features = cat_f, task_type='GPU')

In [None]:
subm(cbc, X_train, y_train, X_test, 'cbc1000')

#### LightGBM

In [0]:
import lightgbm as lgb

In [0]:
d_train = lgb.Dataset(X_train, label=y_train)
params = {}
params['learning_rate'] = 0.03
params['max_depth'] = 10
clf = lgb.train(params, d_train, 3000)

In [0]:
pred = clf.predict(X_test)

In [0]:
to_ans(X_test, pred, 'lbg3000')

#### Predict mean

In [0]:
cbc.fit(X_train, y_train)
cbc_pred = cbc.predict_proba(X_test)[:, 1]

In [0]:
to_ans(X_test, 0.5*pred+0.5*cbc_pred, 'lgb_cbc')