In [1]:
import pandas as pd
import datetime
import logging
from sklearn.model_selection import KFold
import argparse
import json
import numpy as np

import sys
sys.path.append('..')
from utils import load_datasets, load_target
from logs.logger import log_best
from models.lgbm import train_and_predict
import os
os.chdir('../')

%matplotlib inline

In [2]:
os.getcwd()

'C:\\Users\\takuy\\work\\work_tokumoto\\splatoon_competition'

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('--config', default='./configs/default.json')
options = parser.parse_args(args=[])
config = json.load(open(options.config))

feats = config['features']
logging.debug(feats)

target_name = config['target_name']

In [4]:
X_train_all, X_test = load_datasets(feats, target_name)
y_train_all = load_target(target_name)

In [5]:
X_train_all.head()

Unnamed: 0,A1-level,A2-level,A3-level,A4-level,B1-level,B2-level,B3-level,B4-level,enc_period,enc_game-ver,...,enc_A4-weapon,enc_A4-rank,enc_B1-weapon,enc_B1-rank,enc_B2-weapon,enc_B2-rank,enc_B3-weapon,enc_B3-rank,enc_B4-weapon,enc_B4-rank
0,139,118.0,13.0,10.0,28,26.0,68.0,31.0,0.515152,0.525161,...,0.530612,0.554801,0.501116,0.556006,0.570447,0.556006,0.598246,0.556006,0.498008,0.557492
1,198,77.0,198.0,123.0,83,118.0,168.0,151.0,0.466667,0.524499,...,0.542091,0.554401,0.503333,0.555556,0.585821,0.555556,0.592,0.555556,0.516249,0.556777
2,114,68.0,225.0,107.0,50,163.0,160.0,126.0,0.6,0.525747,...,0.504202,0.531184,0.493896,0.526357,0.505183,0.530684,0.512195,0.542117,0.529514,0.525678
3,336,131.0,189.0,41.0,273,189.0,194.0,391.0,0.571429,0.523781,...,0.529047,0.552484,0.584475,0.553457,0.579299,0.553457,0.488304,0.553457,0.552743,0.554913
4,299,97.0,96.0,136.0,101,45.0,246.0,160.0,0.58,0.524329,...,0.53627,0.51928,0.519149,0.518985,0.483444,0.518985,0.51358,0.518985,0.499267,0.518624


In [6]:
# 訓練データとテストデータの列を確認
print(X_train_all.columns)
print(X_test.columns)

Index(['A1-level', 'A2-level', 'A3-level', 'A4-level', 'B1-level', 'B2-level',
       'B3-level', 'B4-level', 'enc_period', 'enc_game-ver', 'enc_lobby-mode',
       'enc_lobby', 'enc_mode', 'enc_stage', 'enc_A1-weapon', 'enc_A1-rank',
       'enc_A2-weapon', 'enc_A2-rank', 'enc_A3-weapon', 'enc_A3-rank',
       'enc_A4-weapon', 'enc_A4-rank', 'enc_B1-weapon', 'enc_B1-rank',
       'enc_B2-weapon', 'enc_B2-rank', 'enc_B3-weapon', 'enc_B3-rank',
       'enc_B4-weapon', 'enc_B4-rank'],
      dtype='object')
Index(['A1-level', 'A2-level', 'A3-level', 'A4-level', 'B1-level', 'B2-level',
       'B3-level', 'B4-level', 'enc_period', 'enc_game-ver', 'enc_lobby-mode',
       'enc_lobby', 'enc_mode', 'enc_stage', 'enc_A1-weapon', 'enc_A1-rank',
       'enc_A2-weapon', 'enc_A2-rank', 'enc_A3-weapon', 'enc_A3-rank',
       'enc_A4-weapon', 'enc_A4-rank', 'enc_B1-weapon', 'enc_B1-rank',
       'enc_B2-weapon', 'enc_B2-rank', 'enc_B3-weapon', 'enc_B3-rank',
       'enc_B4-weapon', 'enc_B4-rank'],
  

In [7]:
# 訓練データとテストデータの列を確認
print(X_train_all.shape)
print(X_test.shape)

(66125, 30)
(28340, 30)


In [8]:
#　訓練データに欠損がないことの確認
X_train_all.isnull().sum().sum()

0

In [9]:
#　テストデータに欠損がないことの確認
X_test.isnull().sum().sum()

0

In [10]:
print("-------------------train-------------------")
print(X_train_all.isnull().sum())
print("-------------------test-------------------")
print(X_test.isnull().sum())

-------------------train-------------------
A1-level          0
A2-level          0
A3-level          0
A4-level          0
B1-level          0
B2-level          0
B3-level          0
B4-level          0
enc_period        0
enc_game-ver      0
enc_lobby-mode    0
enc_lobby         0
enc_mode          0
enc_stage         0
enc_A1-weapon     0
enc_A1-rank       0
enc_A2-weapon     0
enc_A2-rank       0
enc_A3-weapon     0
enc_A3-rank       0
enc_A4-weapon     0
enc_A4-rank       0
enc_B1-weapon     0
enc_B1-rank       0
enc_B2-weapon     0
enc_B2-rank       0
enc_B3-weapon     0
enc_B3-rank       0
enc_B4-weapon     0
enc_B4-rank       0
dtype: int64
-------------------test-------------------
A1-level          0
A2-level          0
A3-level          0
A4-level          0
B1-level          0
B2-level          0
B3-level          0
B4-level          0
enc_period        0
enc_game-ver      0
enc_lobby-mode    0
enc_lobby         0
enc_mode          0
enc_stage         0
enc_A1-weapon     0


In [14]:
display(X_test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28340 entries, 0 to 28339
Data columns (total 30 columns):
A1-level          28340 non-null int64
A2-level          28340 non-null float64
A3-level          28340 non-null float64
A4-level          28340 non-null float64
B1-level          28340 non-null int64
B2-level          28340 non-null float64
B3-level          28340 non-null float64
B4-level          28340 non-null float64
enc_period        28340 non-null float64
enc_game-ver      28340 non-null float64
enc_lobby-mode    28340 non-null float64
enc_lobby         28340 non-null float64
enc_mode          28340 non-null float64
enc_stage         28340 non-null float64
enc_A1-weapon     28340 non-null float64
enc_A1-rank       28340 non-null float64
enc_A2-weapon     28340 non-null float64
enc_A2-rank       28340 non-null float64
enc_A3-weapon     28340 non-null float64
enc_A3-rank       28340 non-null float64
enc_A4-weapon     28340 non-null float64
enc_A4-rank       28340 non-null f

None