In [1]:
import os
os.chdir('/pc_win_loss')

In [2]:
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
import lightgbm as lgb
import xgboost as xgb
import catboost as catb


from functions.visualizer import *
from src.config import *

## データ取り込み

In [3]:
df_prep = pd.read_pickle('output/df_prep.pkl')
df_prep.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience,blueWins
0,0,5,8,6,0,0,14536,17256,0
1,1,10,1,5,0,0,14536,17863,0
2,0,3,10,2,0,0,17409,17256,0
3,1,7,10,8,0,0,19558,18201,0
4,0,4,9,4,0,0,17409,17256,0


# モデル構築

In [4]:
lgb_acc = []
xgb_acc = []
catb_acc = []

trial = 1

for random_state in range(5):
    print('trial: {}'.format(trial))
    trial += 1
    
    # 20%の被験者を選ぶ
    df_train, df_val = train_test_split(df_prep, test_size=0.2, random_state=random_state)

    feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

    train_x = df_train.drop(COL_BLUEWINS, axis=1)
    train_y = df_train[COL_BLUEWINS]

    val_x = df_val.drop(COL_BLUEWINS, axis=1)
    val_y = df_val[COL_BLUEWINS]

    print('score: accuracy')
    # LightGBM
    lgb_clf = lgb.LGBMClassifier(random_state=random_state)
    lgb_clf.fit(train_x, train_y)
    lgb_pred_y = lgb_clf.predict(val_x)
    print('LightGBM -> {}'.format(accuracy_score(lgb_pred_y, val_y)))
    lgb_acc.append(accuracy_score(lgb_pred_y, val_y))

    # XGBoost
    xgb_clf = xgb.XGBClassifier(
        learning_rate=0.05, 
        max_depth=6, 
        min_child_weight=9, 
        n_estimators=200, 
        random_state=random_state
                             )
    xgb_clf.fit(train_x, train_y)
    xgb_pred_y = xgb_clf.predict(val_x)
    print('XGBoost -> {}'.format(accuracy_score(xgb_pred_y, val_y)))
    xgb_acc.append(accuracy_score(xgb_pred_y, val_y))

    # CatBoost
    catb_clf = catb.CatBoostClassifier(
        custom_loss=['Accuracy'], 
        random_seed=random_state
                                   )
    catb_clf.fit(train_x, train_y, verbose=False)
    catb_pred_y = catb_clf.predict(val_x)
    print('CatBoost -> {}'.format(accuracy_score(catb_pred_y, val_y)))
    catb_acc.append(accuracy_score(catb_pred_y, val_y))

df_acc = pd.DataFrame({
    'LightGBM': lgb_acc, 
    'XGBoost': xgb_acc, 
    'CatBoost': catb_acc
})
df_acc.loc['mean'] = df_acc.mean()

trial: 1
score: accuracy
LightGBM -> 0.806875
XGBoost -> 0.81
CatBoost -> 0.80375
trial: 2
score: accuracy
LightGBM -> 0.77625
XGBoost -> 0.7775
CatBoost -> 0.768125
trial: 3
score: accuracy
LightGBM -> 0.785
XGBoost -> 0.7875
CatBoost -> 0.784375
trial: 4
score: accuracy
LightGBM -> 0.794375
XGBoost -> 0.79125
CatBoost -> 0.799375
trial: 5
score: accuracy
LightGBM -> 0.795625
XGBoost -> 0.79125
CatBoost -> 0.8


In [5]:
df_acc

Unnamed: 0,LightGBM,XGBoost,CatBoost
0,0.806875,0.81,0.80375
1,0.77625,0.7775,0.768125
2,0.785,0.7875,0.784375
3,0.794375,0.79125,0.799375
4,0.795625,0.79125,0.8
mean,0.791625,0.7915,0.791125
