#### ライブラリの読み込み

In [None]:
# ライブラリ
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

#### データの読み込み

In [None]:
# データの読み込み

DIR = "/content/drive/MyDrive/コンペ/PCゲームの勝敗予測/"
train = pd.read_csv(DIR + "train.csv")
test = pd.read_csv(DIR + "test.csv")
sample_submit = pd.read_csv(DIR + "sample_submit.csv", header=None) # ヘッダ無し

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train.shape, test.shape

((8000, 10), (2000, 9))

In [None]:
# データを結合・gameIDを削除
df = pd.concat([train, test], axis=0).drop("gameId", axis=1)
df.head()

Unnamed: 0,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueTotalGold,blueTotalExperience,blueWins
0,0,5,8,6,0,0,14536,17256,0.0
1,1,10,1,5,0,0,14536,17863,0.0
2,0,3,10,2,0,0,17409,17256,0.0
3,1,7,10,8,0,0,19558,18201,0.0
4,0,4,9,4,0,0,17409,17256,0.0


In [None]:
# train, testデータに戻す
train = df[df["blueWins"].notnull()]
test = df[df["blueWins"].isnull()]

# X, yに分割
X = train.drop("blueWins", axis=1)
y = train["blueWins"]
X_test = test.drop("blueWins", axis=1)

X.shape, y.shape, X_test.shape

((8000, 8), (8000,), (2000, 8))

#### データを訓練データと検証データに分割

In [None]:
# 訓練データと評価データに分割
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, stratify=y, random_state=0)

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import KFold
from lightgbm import LGBMClassifier
from tqdm.notebook import tqdm

kf = KFold(n_splits=10, shuffle=True)

roc_mean = []
test_pred = []

for train_index, valid_index in tqdm(kf.split(X), total=10):
  X_train = X.iloc[train_index]
  X_valid = X.iloc[valid_index]
  y_train = y[train_index]
  y_valid = y[valid_index]

  lgb = LGBMClassifier(random_state=0)
  lgb.fit(X_train, y_train)

  lgb_valid_pred = lgb.predict_proba(X_valid)[:, 1]
  roc_mean.append(roc_auc_score(y_valid, lgb_valid_pred))

  lgb_test_pred = lgb.predict_proba(X_test)[:, 1]
  test_pred.append(lgb_test_pred)

  0%|          | 0/10 [00:00<?, ?it/s]

#### ROC

In [None]:
np.mean(roc_mean)

0.8699382199048857

#### 予測ラベルを作成

In [None]:
sample_submit[1] = lgb_test_pred

# 0.5を境界に予測結果を0, 1に変換
for count in range(2000):
  if sample_submit.iloc[count, 1] < 0.5:
    sample_submit.iloc[count, 1] = 0
  else:
    sample_submit.iloc[count, 1] = 1

# 予測結果をint型に変換
sample_submit = sample_submit.astype("int")
sample_submit

Unnamed: 0,0,1
0,9,1
1,15,1
2,18,1
3,23,0
4,31,1
...,...,...
1995,9971,0
1996,9980,0
1997,9983,1
1998,9996,1
