In [11]:
import os
os.chdir('/pc_win_loss')

In [12]:
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
import lightgbm as lgb
import xgboost as xgb
import catboost as catb


from functions.visualizer import *
from src.config import *

In [13]:
pd.set_option('display.max_rows', 400)

## データ取り込み

In [14]:
df_prep = pd.read_pickle('output/df_prep.pkl')
df_prep.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience,blueWins
0,0,5,8,6,0,0,14536,17256,0
1,1,10,1,5,0,0,14536,17863,0
2,0,3,10,2,0,0,17409,17256,0
3,1,7,10,8,0,0,19558,18201,0
4,0,4,9,4,0,0,17409,17256,0


In [15]:
df_prep.shape

(8000, 9)

# モデル構築

In [16]:
lgb_pred = pd.DataFrame()
xgb_pred = pd.DataFrame()
catb_pred = pd.DataFrame()

lgb_acc = []
xgb_acc = []
catb_acc = []

print('score: accuracy')
trial = 1

    
# 20%の被験者を選ぶ
df_train, df_val = train_test_split(df_prep, test_size=0.2)

feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]



for random_state in range(5):
    print('trial: {}'.format(trial))
    # LightGBM
    lgb_clf = lgb.LGBMClassifier(random_state=random_state)
    lgb_clf.fit(train_x, train_y)
    lgb_pred_y = lgb_clf.predict(val_x)
    print('LightGBM -> {}'.format(accuracy_score(lgb_pred_y, val_y)))
    
    lgb_pred['lgb_{}'.format(trial)] = lgb_pred_y
    lgb_acc.append(accuracy_score(lgb_pred_y, val_y))

    # XGBoost
    xgb_clf = xgb.XGBClassifier(
        learning_rate=0.05, 
        max_depth=6, 
        min_child_weight=9, 
        n_estimators=200, 
        random_state=random_state
                             )
    xgb_clf.fit(train_x, train_y)
    xgb_pred_y = xgb_clf.predict(val_x)
    print('XGBoost -> {}'.format(accuracy_score(xgb_pred_y, val_y)))

    xgb_pred['xgb_{}'.format(trial)] = xgb_pred_y
    xgb_acc.append(accuracy_score(xgb_pred_y, val_y))

    # CatBoost
    catb_clf = catb.CatBoostClassifier(
        custom_loss=['Accuracy'], 
        random_seed=random_state
                                   )
    catb_clf.fit(train_x, train_y, verbose=False)
    catb_pred_y = catb_clf.predict(val_x)
    print('CatBoost -> {}'.format(accuracy_score(catb_pred_y, val_y)))

    catb_pred['catb_{}'.format(trial)] = catb_pred_y
    catb_acc.append(accuracy_score(catb_pred_y, val_y))
    
    trial += 1

print('__________training finished__________')
    
df_pred = pd.merge(lgb_pred, xgb_pred, left_index=True, right_index=True)
df_pred = pd.merge(df_pred, catb_pred, left_index=True, right_index=True)    
df_pred['count_0'] = (df_pred == 0).sum(axis=1)
df_pred['count_1'] = (df_pred == 1).sum(axis=1)
display(df_pred.head())

# 各モデルの正解率をデータフレームに格納
df_acc = pd.DataFrame({
    'LightGBM': lgb_acc, 
    'XGBoost': xgb_acc, 
    'CatBoost': catb_acc
})
df_acc.loc['mean'] = df_acc.mean()
print('__________result training__________')
display(df_acc)

# vote法でアンサンブル学習
df_pred['pred'] = (df_pred['count_0'] < df_pred['count_1']).astype(int)
df_pred[COL_BLUEWINS] = val_y.reset_index
acc_ensenble = accuracy_score(val_y.reset_index(drop=True), df_pred['pred'])
print('final accuracy: {}'.format(acc_ensenble))

score: accuracy
trial: 1
LightGBM -> 0.77375
XGBoost -> 0.774375
CatBoost -> 0.774375
trial: 2
LightGBM -> 0.77375
XGBoost -> 0.774375
CatBoost -> 0.7775
trial: 3
LightGBM -> 0.77375
XGBoost -> 0.774375
CatBoost -> 0.775625
trial: 4
LightGBM -> 0.77375
XGBoost -> 0.774375
CatBoost -> 0.775
trial: 5
LightGBM -> 0.77375
XGBoost -> 0.774375
CatBoost -> 0.779375
__________training finished__________


Unnamed: 0,lgb_1,lgb_2,lgb_3,lgb_4,lgb_5,xgb_1,xgb_2,xgb_3,xgb_4,xgb_5,catb_1,catb_2,catb_3,catb_4,catb_5,count_0,count_1
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15
4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15


__________result training__________


Unnamed: 0,LightGBM,XGBoost,CatBoost
0,0.77375,0.774375,0.774375
1,0.77375,0.774375,0.7775
2,0.77375,0.774375,0.775625
3,0.77375,0.774375,0.775
4,0.77375,0.774375,0.779375
mean,0.77375,0.774375,0.776375


final accuracy: 0.77375


In [17]:
df_pred

Unnamed: 0,lgb_1,lgb_2,lgb_3,lgb_4,lgb_5,xgb_1,xgb_2,xgb_3,xgb_4,xgb_5,catb_1,catb_2,catb_3,catb_4,catb_5,count_0,count_1,pred,blueWins
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,<bound method Series.reset_index of 382 0\...
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,<bound method Series.reset_index of 382 0\...
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,<bound method Series.reset_index of 382 0\...
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1,<bound method Series.reset_index of 382 0\...
4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1,<bound method Series.reset_index of 382 0\...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,<bound method Series.reset_index of 382 0\...
1596,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1,<bound method Series.reset_index of 382 0\...
1597,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1,<bound method Series.reset_index of 382 0\...
1598,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1,<bound method Series.reset_index of 382 0\...


In [11]:
lgb_pred = pd.DataFrame()
xgb_pred = pd.DataFrame()
catb_pred = pd.DataFrame()

lgb_acc = []
xgb_acc = []
catb_acc = []

print('score: accuracy')
print()
trial = 1

    
# 20%の被験者を選ぶ
df_train, df_val = train_test_split(df_prep, test_size=0.2)

feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]



for random_state in range(5):
    print('trial: {}'.format(trial))
    # LightGBM
    lgb_clf = lgb.LGBMClassifier(random_state=random_state)
    lgb_clf.fit(train_x, train_y)
    lgb_pred_y = lgb_clf.predict(val_x)
    print('LightGBM -> {}'.format(accuracy_score(lgb_pred_y, val_y)))
    
    lgb_pred['lgb_{}'.format(trial)] = lgb_pred_y
    lgb_acc.append(accuracy_score(lgb_pred_y, val_y))

    # XGBoost
    xgb_clf = xgb.XGBClassifier(
        learning_rate=0.05, 
        max_depth=6, 
        min_child_weight=9, 
        n_estimators=200, 
        random_state=random_state
                             )
    xgb_clf.fit(train_x, train_y)
    xgb_pred_y = xgb_clf.predict(val_x)
    print('XGBoost -> {}'.format(accuracy_score(xgb_pred_y, val_y)))

    xgb_pred['xgb_{}'.format(trial)] = xgb_pred_y
    xgb_acc.append(accuracy_score(xgb_pred_y, val_y))

    # CatBoost
    catb_clf = catb.CatBoostClassifier(
        custom_loss=['Accuracy'], 
        random_seed=random_state
                                   )
    catb_clf.fit(train_x, train_y, verbose=False)
    catb_pred_y = catb_clf.predict(val_x)
    print('CatBoost -> {}'.format(accuracy_score(catb_pred_y, val_y)))

    catb_pred['catb_{}'.format(trial)] = catb_pred_y
    catb_acc.append(accuracy_score(catb_pred_y, val_y))
    
    trial += 1

print('__________training finished__________')
    
df_pred = pd.merge(lgb_pred, xgb_pred, left_index=True, right_index=True)
df_pred = pd.merge(df_pred, catb_pred, left_index=True, right_index=True)    
df_pred['count_0'] = (df_pred == 0).sum(axis=1)
df_pred['count_1'] = (df_pred == 1).sum(axis=1)
display(df_pred.head())

# 各モデルの正解率をデータフレームに格納
df_acc = pd.DataFrame({
    'LightGBM': lgb_acc, 
    'XGBoost': xgb_acc, 
    'CatBoost': catb_acc
})
df_acc.loc['mean'] = df_acc.mean()
print('__________result training__________')
display(df_acc)

# votingでアンサンブル学習
df_pred['pred'] = (df_pred['count_0'] < df_pred['count_1']).astype(int)
acc_ensenble = accuracy_score(val_y.reset_index(drop=True), df_pred['pred'])
print('final accuracy: {}'.format(acc_ensenble))

score: accuracy
trial: 1
LightGBM -> 0.803125
XGBoost -> 0.796875
CatBoost -> 0.803125
trial: 2
LightGBM -> 0.803125
XGBoost -> 0.796875
CatBoost -> 0.8025
trial: 3
LightGBM -> 0.803125
XGBoost -> 0.796875
CatBoost -> 0.806875
trial: 4
LightGBM -> 0.803125
XGBoost -> 0.796875
CatBoost -> 0.801875
trial: 5
LightGBM -> 0.803125
XGBoost -> 0.796875
CatBoost -> 0.808125
__________training finished__________


Unnamed: 0,lgb_1,lgb_2,lgb_3,lgb_4,lgb_5,xgb_1,xgb_2,xgb_3,xgb_4,xgb_5,catb_1,catb_2,catb_3,catb_4,catb_5,count_0,count_1
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15


__________result training__________


Unnamed: 0,LightGBM,XGBoost,CatBoost
0,0.803125,0.796875,0.803125
1,0.803125,0.796875,0.8025
2,0.803125,0.796875,0.806875
3,0.803125,0.796875,0.801875
4,0.803125,0.796875,0.808125
mean,0.803125,0.796875,0.8045


final accuracy: 0.80375


In [12]:
lgb_pred = pd.DataFrame()
xgb_pred = pd.DataFrame()
catb_pred = pd.DataFrame()

lgb_acc = []
xgb_acc = []
catb_acc = []

print('score: accuracy')
print()
trial = 1

    
# 20%の被験者を選ぶ
df_train, df_val = train_test_split(df_prep, test_size=0.2)

feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]



for random_state in range(5):
    print('trial: {}'.format(trial))
    # LightGBM
    lgb_clf = lgb.LGBMClassifier(random_state=random_state)
    lgb_clf.fit(train_x, train_y)
    lgb_pred_y = lgb_clf.predict(val_x)
    print('LightGBM -> {}'.format(accuracy_score(lgb_pred_y, val_y)))
    
    lgb_pred['lgb_{}'.format(trial)] = lgb_pred_y
    lgb_acc.append(accuracy_score(lgb_pred_y, val_y))

    # XGBoost
    xgb_clf = xgb.XGBClassifier(
        learning_rate=0.05, 
        max_depth=6, 
        min_child_weight=9, 
        n_estimators=200, 
        random_state=random_state
                             )
    xgb_clf.fit(train_x, train_y)
    xgb_pred_y = xgb_clf.predict(val_x)
    print('XGBoost -> {}'.format(accuracy_score(xgb_pred_y, val_y)))

    xgb_pred['xgb_{}'.format(trial)] = xgb_pred_y
    xgb_acc.append(accuracy_score(xgb_pred_y, val_y))

    # CatBoost
    catb_clf = catb.CatBoostClassifier(
        custom_loss=['Accuracy'], 
        random_seed=random_state
                                   )
    catb_clf.fit(train_x, train_y, verbose=False)
    catb_pred_y = catb_clf.predict(val_x)
    print('CatBoost -> {}'.format(accuracy_score(catb_pred_y, val_y)))

    catb_pred['catb_{}'.format(trial)] = catb_pred_y
    catb_acc.append(accuracy_score(catb_pred_y, val_y))
    
    trial += 1

print('__________training finished__________')
    
df_pred = pd.merge(lgb_pred, xgb_pred, left_index=True, right_index=True)
df_pred = pd.merge(df_pred, catb_pred, left_index=True, right_index=True)    
df_pred['count_0'] = (df_pred == 0).sum(axis=1)
df_pred['count_1'] = (df_pred == 1).sum(axis=1)
display(df_pred.head())

# 各モデルの正解率をデータフレームに格納
df_acc = pd.DataFrame({
    'LightGBM': lgb_acc, 
    'XGBoost': xgb_acc, 
    'CatBoost': catb_acc
})
df_acc.loc['mean'] = df_acc.mean()
print('__________result training__________')
display(df_acc)

# votingでアンサンブル学習
df_pred['pred'] = (df_pred['count_0'] < df_pred['count_1']).astype(int)
acc_ensenble = accuracy_score(val_y.reset_index(drop=True), df_pred['pred'])
print('final accuracy: {}'.format(acc_ensenble))

score: accuracy

trial: 1
LightGBM -> 0.784375
XGBoost -> 0.78
CatBoost -> 0.78375
trial: 2
LightGBM -> 0.784375
XGBoost -> 0.78
CatBoost -> 0.785
trial: 3
LightGBM -> 0.784375
XGBoost -> 0.78
CatBoost -> 0.7825
trial: 4
LightGBM -> 0.784375
XGBoost -> 0.78
CatBoost -> 0.779375
trial: 5
LightGBM -> 0.784375
XGBoost -> 0.78
CatBoost -> 0.781875
__________training finished__________


Unnamed: 0,lgb_1,lgb_2,lgb_3,lgb_4,lgb_5,xgb_1,xgb_2,xgb_3,xgb_4,xgb_5,catb_1,catb_2,catb_3,catb_4,catb_5,count_0,count_1
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0


__________result training__________


Unnamed: 0,LightGBM,XGBoost,CatBoost
0,0.784375,0.78,0.78375
1,0.784375,0.78,0.785
2,0.784375,0.78,0.7825
3,0.784375,0.78,0.779375
4,0.784375,0.78,0.781875
mean,0.784375,0.78,0.7825


final accuracy: 0.783125


In [None]:
lgb_pred = pd.DataFrame()
xgb_pred = pd.DataFrame()
catb_pred = pd.DataFrame()

lgb_acc = []
xgb_acc = []
catb_acc = []

print('score: accuracy')
print()
trial = 1

    
# 20%の被験者を選ぶ
df_train, df_val = train_test_split(df_prep, test_size=0.2)

feature_cols = list(df_train.drop(COL_BLUEWINS, axis=1).columns)

train_x = df_train.drop(COL_BLUEWINS, axis=1)
train_y = df_train[COL_BLUEWINS]

val_x = df_val.drop(COL_BLUEWINS, axis=1)
val_y = df_val[COL_BLUEWINS]



for random_state in range(5):
    print('trial: {}'.format(trial))
    # LightGBM
    lgb_clf = lgb.LGBMClassifier(random_state=random_state)
    lgb_clf.fit(train_x, train_y)
    lgb_pred_y = lgb_clf.predict(val_x)
    print('LightGBM -> {}'.format(accuracy_score(lgb_pred_y, val_y)))
    
    lgb_pred['lgb_{}'.format(trial)] = lgb_pred_y
    lgb_acc.append(accuracy_score(lgb_pred_y, val_y))

    # XGBoost
    xgb_clf = xgb.XGBClassifier(
        learning_rate=0.05, 
        max_depth=6, 
        min_child_weight=9, 
        n_estimators=200, 
        random_state=random_state
                             )
    xgb_clf.fit(train_x, train_y)
    xgb_pred_y = xgb_clf.predict(val_x)
    print('XGBoost -> {}'.format(accuracy_score(xgb_pred_y, val_y)))

    xgb_pred['xgb_{}'.format(trial)] = xgb_pred_y
    xgb_acc.append(accuracy_score(xgb_pred_y, val_y))

    # CatBoost
    catb_clf = catb.CatBoostClassifier(
        custom_loss=['Accuracy'], 
        random_seed=random_state
                                   )
    catb_clf.fit(train_x, train_y, verbose=False)
    catb_pred_y = catb_clf.predict(val_x)
    print('CatBoost -> {}'.format(accuracy_score(catb_pred_y, val_y)))

    catb_pred['catb_{}'.format(trial)] = catb_pred_y
    catb_acc.append(accuracy_score(catb_pred_y, val_y))
    
    trial += 1

print('__________training finished__________')
    
df_pred = pd.merge(lgb_pred, xgb_pred, left_index=True, right_index=True)
df_pred = pd.merge(df_pred, catb_pred, left_index=True, right_index=True)    
df_pred['count_0'] = (df_pred == 0).sum(axis=1)
df_pred['count_1'] = (df_pred == 1).sum(axis=1)
display(df_pred.head())

# 各モデルの正解率をデータフレームに格納
df_acc = pd.DataFrame({
    'LightGBM': lgb_acc, 
    'XGBoost': xgb_acc, 
    'CatBoost': catb_acc
})
df_acc.loc['mean'] = df_acc.mean()
print('__________result training__________')
display(df_acc)

# votingでアンサンブル学習
df_pred['pred'] = (df_pred['count_0'] < df_pred['count_1']).astype(int)
acc_ensenble = accuracy_score(val_y.reset_index(drop=True), df_pred['pred'])
print('final accuracy: {}'.format(acc_ensenble))

score: accuracy

trial: 1
LightGBM -> 0.796875
XGBoost -> 0.79375
CatBoost -> 0.79
trial: 2
LightGBM -> 0.796875
XGBoost -> 0.79375
CatBoost -> 0.789375
trial: 3
LightGBM -> 0.796875
XGBoost -> 0.79375
CatBoost -> 0.79
trial: 4
LightGBM -> 0.796875
XGBoost -> 0.79375
