In [1]:
import os
os.chdir('/pc_win_loss')

In [2]:
import warnings
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
import lightgbm as lgb
import xgboost as xgb
import catboost as catb


from functions.visualizer import *
from src.config import *

## データ取り込み

In [3]:
df_prep = pd.read_pickle('output/df_prep.pkl')
df_prep.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience,blueWins
0,0,5,8,6,0,0,14536,17256,0
1,1,10,1,5,0,0,14536,17863,0
2,0,3,10,2,0,0,17409,17256,0
3,1,7,10,8,0,0,19558,18201,0
4,0,4,9,4,0,0,17409,17256,0


# モデル構築

In [4]:
random_state = 0

In [5]:
# 20%の被験者を選ぶ
df_train, df_val = train_test_split(df_prep, test_size=0.2, random_state=random_state)

print(len(df_train))
print(len(df_val))

6400
1600


### LightGBM

In [6]:
feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]

In [7]:
train_x.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience
1001,1,11,3,8,1,1,18274,18472
7360,0,6,11,7,0,0,20619,17256
5234,1,6,3,5,1,0,16961,18201
7390,0,9,5,8,0,0,18117,18472
6841,1,9,4,9,0,0,14536,17256


In [8]:
start_time = datetime.now()

model = lgb.LGBMClassifier(random_state=random_state)

# パラメータ調整
params = {
    "max_depth": [10,25],
    "learning_rate" : [0.005,0.01],
    "num_leaves": [100,300],
    "n_estimators": [100, 200]
}
# グリッドサーチの設定
lg_grid_search = GridSearchCV(model, n_jobs=-1, param_grid=params, cv = 3, scoring='accuracy', verbose=5)
lg_grid_search.fit(train_x,train_y)
print(lg_grid_search.best_params_)
print(lg_grid_search.best_index_)
print(lg_grid_search.best_score_)
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

Fitting 3 folds for each of 16 candidates, totalling 48 fits
{'learning_rate': 0.01, 'max_depth': 25, 'n_estimators': 200, 'num_leaves': 300}
15
0.772968860967469
Duration: 0:00:53.160328


## XGBoost

In [9]:
feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]

In [10]:
train_x.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience
1001,1,11,3,8,1,1,18274,18472
7360,0,6,11,7,0,0,20619,17256
5234,1,6,3,5,1,0,16961,18201
7390,0,9,5,8,0,0,18117,18472
6841,1,9,4,9,0,0,14536,17256


In [11]:
start_time = datetime.now()

model = xgb.XGBClassifier(random_state=random_state)

# パラメータ調整
params = {
    "max_depth": [3, 6, 10],
    "min_child_weight" : [3, 6, 9],
    "n_estimators": [100, 200],
    "learning_rate": [0.01, 0.05]
         }

# グリッドサーチの設定
xgb_grid_search = GridSearchCV(model, param_grid=params, cv = 3, verbose=3, n_jobs=-1, scoring='accuracy')
xgb_grid_search.fit(train_x,train_y)
print(xgb_grid_search.best_params_)
print(xgb_grid_search.best_index_)
print(xgb_grid_search.best_score_)
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

Fitting 3 folds for each of 36 candidates, totalling 108 fits
{'learning_rate': 0.05, 'max_depth': 6, 'min_child_weight': 9, 'n_estimators': 200}
29
0.7804684951798789
Duration: 0:02:40.944629


## CatBoost実装

In [12]:
feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]

In [13]:
train_x.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience
1001,1,11,3,8,1,1,18274,18472
7360,0,6,11,7,0,0,20619,17256
5234,1,6,3,5,1,0,16961,18201
7390,0,9,5,8,0,0,18117,18472
6841,1,9,4,9,0,0,14536,17256


In [14]:
start_time = datetime.now()

model = catb.CatBoostClassifier(random_seed=random_state)

# パラメータ調整
params = {
    'depth': [3],
    'learning_rate' : [0.15, 0.3],
    'l2_leaf_reg': [3],
    'iterations': [100, 200]
         }

# グリッドサーチの設定
ctb_grid_search = GridSearchCV(model, params, scoring='accuracy', cv = 3, verbose=2)
ctb_grid_search.fit(train_x, train_y)
print(ctb_grid_search.best_params_)
print(ctb_grid_search.best_index_)
print(ctb_grid_search.best_score_)
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
0:	learn: 0.6506037	total: 105ms	remaining: 10.4s
1:	learn: 0.6200097	total: 121ms	remaining: 5.92s
2:	learn: 0.5975910	total: 126ms	remaining: 4.07s
3:	learn: 0.5797255	total: 133ms	remaining: 3.18s
4:	learn: 0.5628853	total: 137ms	remaining: 2.6s
5:	learn: 0.5528655	total: 142ms	remaining: 2.22s
6:	learn: 0.5420031	total: 146ms	remaining: 1.94s
7:	learn: 0.5348477	total: 150ms	remaining: 1.73s
8:	learn: 0.5259448	total: 155ms	remaining: 1.56s
9:	learn: 0.5167969	total: 159ms	remaining: 1.43s
10:	learn: 0.5117711	total: 163ms	remaining: 1.32s
11:	learn: 0.5090601	total: 167ms	remaining: 1.23s
12:	learn: 0.5050759	total: 172ms	remaining: 1.15s
13:	learn: 0.5023359	total: 176ms	remaining: 1.08s
14:	learn: 0.4999936	total: 180ms	remaining: 1.02s
15:	learn: 0.4984483	total: 184ms	remaining: 966ms
16:	learn: 0.4969013	total: 188ms	remaining: 920ms
17:	learn: 0.4949750	total: 193ms	remaining: 879ms
18:	learn: 0.4921932	total: 198ms