In [1]:
import os
os.chdir('/pc_win_loss')

In [2]:
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
import lightgbm as lgb
import xgboost as xgb
import catboost as catb


from functions.visualizer import *
from src.config import *

In [3]:
pd.set_option('display.max_rows', 400)

## データ取り込み

In [4]:
df_prep = pd.read_pickle('output/df_prep.pkl')
df_test = pd.read_csv('input/test.csv')

In [5]:
print('df_prep')
display(df_prep.head())
print()
print('df_test')
display(df_test.head())

df_prep


Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience,blueWins
0,0,5,8,6,0,0,14536,17256,0
1,1,10,1,5,0,0,14536,17863,0
2,0,3,10,2,0,0,17409,17256,0
3,1,7,10,8,0,0,19558,18201,0
4,0,4,9,4,0,0,17409,17256,0



df_test


Unnamed: 0,gameId,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience
0,9,0,7,6,6,0,0,16961,18201
1,15,0,6,6,6,2,1,18513,18021
2,18,1,6,4,3,0,0,13475,17256
3,23,0,5,4,7,0,0,17409,17256
4,31,0,10,8,9,0,0,18117,18472


# 前処理

In [6]:
train_x = df_prep.drop(COL_BLUEWINS, axis=1)
train_y = df_prep[COL_BLUEWINS]

test_x = df_test.drop(COL_ID, axis=1)

# モデル構築

In [7]:
lgb_pred = pd.DataFrame()
xgb_pred = pd.DataFrame()
catb_pred = pd.DataFrame()

trial = 1


for random_state in range(5):
    print('trial: {}'.format(trial))
    # LightGBM
    lgb_clf = lgb.LGBMClassifier(random_state=random_state)
    lgb_clf.fit(train_x, train_y)
    lgb_pred_y = lgb_clf.predict(test_x)    
    lgb_pred['lgb_{}'.format(trial)] = lgb_pred_y

    # XGBoost
    xgb_clf = xgb.XGBClassifier(
        learning_rate=0.05, 
        max_depth=6, 
        min_child_weight=9, 
        n_estimators=200, 
        random_state=random_state
                             )
    xgb_clf.fit(train_x, train_y)
    xgb_pred_y = xgb_clf.predict(test_x)
    xgb_pred['xgb_{}'.format(trial)] = xgb_pred_y

    # CatBoost
    catb_clf = catb.CatBoostClassifier(
        custom_loss=['Accuracy'], 
        random_seed=random_state
                                   )
    catb_clf.fit(train_x, train_y, verbose=False)
    catb_pred_y = catb_clf.predict(test_x)
    catb_pred['catb_{}'.format(trial)] = catb_pred_y
    
    trial += 1

print('__________training finished__________')
    
df_pred = pd.merge(lgb_pred, xgb_pred, left_index=True, right_index=True)
df_pred = pd.merge(df_pred, catb_pred, left_index=True, right_index=True)    
df_pred['count_0'] = (df_pred == 0).sum(axis=1)
df_pred['count_1'] = (df_pred == 1).sum(axis=1)

# vote法でアンサンブル学習
df_pred['pred'] = (df_pred['count_0'] < df_pred['count_1']).astype(int)
display(df_pred.head())

print('_________________end_________________')

trial: 1
trial: 2
trial: 3
trial: 4
trial: 5
__________training finished__________


Unnamed: 0,lgb_1,lgb_2,lgb_3,lgb_4,lgb_5,xgb_1,xgb_2,xgb_3,xgb_4,xgb_5,catb_1,catb_2,catb_3,catb_4,catb_5,count_0,count_1,pred
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0
4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1


_________________end_________________


In [8]:
df_pred.head(100)

Unnamed: 0,lgb_1,lgb_2,lgb_3,lgb_4,lgb_5,xgb_1,xgb_2,xgb_3,xgb_4,xgb_5,catb_1,catb_2,catb_3,catb_4,catb_5,count_0,count_1,pred
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0
4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,15,1
9,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,13,2,0


In [9]:
# 提出用CSVを出力

df_submit = df_test.copy()
df_submit['pred'] = df_pred['pred']
df_submit = df_submit[[COL_ID, 'pred']]
df_submit.to_csv('output/submit.csv', index=None, header=None)

In [10]:
# 確認用
pd.read_csv('output/submit.csv').head(100)

Unnamed: 0,9,1
0,15,1
1,18,1
2,23,0
3,31,1
4,32,1
5,33,1
6,36,1
7,38,1
8,39,0
9,42,0
