# Baseline

FEなし => 

**LightGBM**<br>
LightGBM Average accuracy across all folds ===> 0.7793749999999999<br>
LightGBM Average conf-matrix across all folds  => [[631.8 181.4] [171.6 615.2]]<br>

**XGboost**<br>
XGB Average accuracy across all folds =======> 0.7762500000000001<br>
XGB Average conf-matrix across all folds =====>  [[663.8 149.4] [208.6 578.2]]<br>

**CatBoost**<br>
CatBoost Average accuracy across all folds ======> 0.7747499999999998<br>
CatBoost Average conf-matrix across all folds ====> [[637.8 175.4] [185.  601.8]]<br>

# Library

In [1]:
import pandas as pd
import numpy as np

import optuna
import xgboost as xgb
import lightgbm as lgb
import catboost as cb

from datetime import datetime
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import StandardScaler


# Data Loading

In [2]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')

# Add New Feature

In [3]:
# 基準
# lgb = 0.7793749999999999
# xgb = 0.7762500000000001
# Cat = 0.7747499999999998

In [4]:
def add_features(df):                                                                               # lgb=0.7793, xgb=0.7762, cb=0.7747
    # df['assistRate'] = df['blueAssists'] / (df['blueKills'] + 1)                                  # lgb=0.7825, xgb=0.7713, cb=0.7832
    # df['blueHerald'] = (df['blueEliteMonsters'] - df['blueDragons']).clip(lower=0)                # lgb=0.7843, xgb=0.7746, cb=0.7745
    # df['eliteMonsterDragonRatio'] = df['blueEliteMonsters'] / (df['blueDragons'] + 1)             # lgb=0.7857, xgb=0.7782, cb=0.7824
    df['logTotalGold'] = np.log1p(df['blueTotalGold'])                                            # lgb=0.7964, xgb=0.7826, cb=0.7827
    # df['logTotalExperience'] = np.log1p(df['blueTotalExperience'])                                # lgb=0.7876, xgb=0.7722, cb=0.7777
    df['firstBloodKillRatio'] = df['blueFirstBlood'] * df['blueKills']                            # lgb=0.7920, xgb=0.7751, cb=0.7893
    # df['totalCombatPoints'] = df['blueKills'] + df['blueDeaths'] + df['blueAssists']              # lgb=0.7869, xgb=0.7360, cb=0.7870

    total_combat_actions = df['blueKills'] + df['blueDeaths'] + df['blueAssists'] 
    df['killRatio'] = df['blueKills'] / total_combat_actions                                      # lgb=0.7913, xgb=0.7747, cb=0.7811
    # df['deathRatio'] = df['blueDeaths'] / total_combat_actions                                    # lgb=0.7864, xgb=0.7848, cb=0.7860
    # df['assistRatio'] = df['blueAssists'] / total_combat_actions                                  # lgb=0.7863, xgb=0.7746, cb=0.7815

    # df['teamStrength'] = df['blueKills'] + df['blueAssists'] + df['blueEliteMonsters']            # lgb=0.7782, xgb=0.7246, cb=0.7869
    # df['dragonKillImpact'] = df['blueDragons'] / (df['blueKills'] + 1)                            # lgb=0.7870, xgb=0.7694, cb=0.7857
    df['eliteMonsterUtilization'] = df['blueEliteMonsters']\
    / (df['blueKills'] + df['blueAssists'] + 1)                                                   # lgb=0.7900, xgb=0.7746, cb=0.7885
    # df['goldExperienceRatio'] = df['blueTotalGold'] / (df['blueTotalExperience'] + 1)             # lgb=0.7911, xgb=0.7324, cb=0.7886
    # df['teamEfficiency'] = \
        # (df['blueKills'] + df['blueAssists'] + df['blueEliteMonsters']) / (df['blueDeaths'] + 1)  # lgb=0.7900, xgb=0.7762, cb=0.7840
    # df['killToMonsterRatio'] = df['blueKills'] / (df['blueEliteMonsters'] + 1)                    # lgb=0.7859, xgb=0.7758, cb=0.7800
    df['avgGoldPerKill'] = df['blueTotalGold'] / (df['blueKills'] + 1)                            # lgb=0.7933, xgb=0.7714, cb=0.7831
    # df['expToKillRatio'] = df['blueTotalExperience'] / (df['blueKills'] + 1)                      # lgb=0.7875, xgb=0.7781, cb=0.7820
    # df['killsMinusDeaths'] = df['blueKills'] - df['blueDeaths']                                   # lgb=0.7791, xgb=0.7879, cb=0.7884
    # df['killDeathRatio'] = df['blueKills'] / (df['blueKills'] + df['blueDeaths'])                 # lgb=0.7879, xgb=0.7752, cb=0.7863
    # df['avgExperiencePerKill'] = df['blueTotalExperience'] / (df['blueKills'] + 1)                # lgb=0.7886, xgb=0.7191, cb=0.7821
    df['killDeathDiff'] = (df['blueKills'] - df['blueDeaths']).clip(lower=0)                      # lgb=0.7895, xgb=0.7664, cb=0.7883
    # df['eliteMonsterHeraldRatio'] = df['blueEliteMonsters'] / (df['blueHerald'] + 1)              # lgb=0.7885, xgb=0.7553, cb=0.7829
    # df['goldXexperience'] = np.log1p(df['blueTotalGold'] * df['blueTotalExperience'])             # lgb=0.7828, xgb=0.7730, cb=0.7765
    # df['goldPerKill'] = df['blueTotalGold'] / (df['blueKills'] + 1)                               # lgb=0.7805, xgb=0.7525, cb=0.7742
    # df['experiencePerKill'] = df['blueTotalExperience'] / (df['blueKills'] + 1)                   # lgb=0.7809, xgb=0.7795, cb=0.7833
    # df['eliteMonstersPerGold'] = df['blueEliteMonsters'] / df['blueTotalGold']                  # lgb=0., xgb=0., cb=0.
    # df['eliteMonstersPerExperience'] = df['blueEliteMonsters'] / df['blueTotalExperience']      # lgb=0., xgb=0., cb=0.

    
    return df

- apply

In [5]:
train = add_features(train)
test = add_features(test)

# Data

In [6]:
# データの準備
X = train.drop(['blueWins', 'gameId'], axis=1)
y = train['blueWins']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

test_drop_gameId = test.drop('gameId', axis=1)

# test_drop_gameId_temp = test.drop('gameId', axis=1)

# # 特徴量の標準化
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_val = scaler.transform(X_val)  # 検証データも同じスケーラーを使用して標準化

# # テストデータも同じスケーラーを使用して標準化
# test_drop_gameId_scaled = scaler.transform(test_drop_gameId_temp)

# # NumPy配列からPandasデータフレームに変換
# X_train = pd.DataFrame(X_train, columns=X.columns)
# X_val = pd.DataFrame(X_val, columns=X.columns)
# test_drop_gameId = pd.DataFrame(test_drop_gameId_scaled, columns=test_drop_gameId_temp.columns)

# Model

- variable

In [7]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True)

lgb_threshold = 0.5
xgb_threshold = 0.5
cb_threshold = 0.5

# 各フォールドでのスコアを保存するリスト
lgb_fold_scores = []
xgb_fold_scores = []
cb_fold_scores = []

# 各フォール度での混同行列を保存するリスト
lgb_conf_scores = []
xgb_conf_scores = []
cb_conf_scores = []

# testを保存するリスト
lgb_test_predictions = []
xgb_test_predictions = []
cb_test_predictions = []

## LightGBM

In [8]:
for train_index, val_index in kf.split(X):
    X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # LightGBMではDMatrixの代わりに普通のデータフレームを使用します
    train_data = lgb.Dataset(X_train_fold, label=y_train_fold)
    val_data = lgb.Dataset(X_val_fold, label=y_val_fold, reference=train_data)

    # Optunaでのハイパーパラメータチューニング
    def objective(trial):
        param = {
            'objective': 'binary',
            'metric': 'binary_logloss',
            'verbosity': -1,
            'boosting_type': 'gbdt',
            'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
            'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
            'num_leaves': trial.suggest_int('num_leaves', 2, 256),
            'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
            'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
            'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'feature_pre_filter': False,  # 特徴量の事前フィルタリングを無効化
            'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 100),  # Optunaでこの値を調整する
        }

        callbacks = lgb.early_stopping(stopping_rounds=100, verbose=False)
        gbm = lgb.train(param, train_data, valid_sets=[val_data], num_boost_round=1000, callbacks=[callbacks])
        preds = gbm.predict(X_val_fold, num_iteration=gbm.best_iteration)
        pred_labels = np.rint(preds)
        accuracy = accuracy_score(y_val_fold, pred_labels)
        return accuracy

    lgb_study = optuna.create_study(direction='maximize')
    lgb_study.optimize(objective, n_trials=100)

    # 各フォールドの最適パラメータでモデルを再学習
    lgb_best_params = lgb_study.best_params
    lgb_best_params['objective'] = 'binary'
    lgb_best_params['metric'] = 'binary_logloss'
    lgb_best_params['verbosity'] = -1

    lgb_final = lgb.train(lgb_best_params, train_data, num_boost_round=lgb_study.best_trial.number)

    # 各フォールドのモデル性能を評価
    lgb_y_val_pred = lgb_final.predict(X_val_fold, num_iteration=lgb_final.best_iteration)
    lgb_y_val_pred_int = np.rint(lgb_y_val_pred)
    lgb_fold_accuracy = accuracy_score(y_val_fold, lgb_y_val_pred_int)
    lgb_fold_scores.append(lgb_fold_accuracy)

    # 混同行列
    lgb_conf_matrix = confusion_matrix(y_val_fold, lgb_y_val_pred_int)
    lgb_conf_scores.append(lgb_conf_matrix)


    # テストデータセットでの予測
    lgb_y_test_pred_fold = lgb_final.predict(test_drop_gameId, num_iteration=lgb_final.best_iteration)
    lgb_test_predictions.append(lgb_y_test_pred_fold)

[I 2024-01-30 14:29:23,165] A new study created in memory with name: no-name-17596895-aa53-46fb-ad94-18164af64a9e
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
[I 2024-01-30 14:29:23,983] Trial 0 finished with value: 0.785625 and parameters: {'lambda_l1': 8.090472408217527, 'lambda_l2': 1.8879087441638584e-06, 'num_leaves': 30, 'feature_fraction': 0.9464603876159896, 'bagging_fraction': 0.7377053045101711, 'bagging_freq': 4, 'min_child_samples': 30, 'min_data_in_leaf': 25}. Best is trial 0 with value: 0.785625.
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_unif

- lgb_平均スコア

In [9]:
# 全フォールドの平均スコアを計算
lgb_average_score = sum(lgb_fold_scores) / n_splits
print(f"LightGBM Average accuracy across all folds: {lgb_average_score:.4f}")


lgb_average_conf = sum(lgb_conf_scores) / n_splits
print(f"LightGBM Average conf-matrix across all folds: {lgb_average_conf}")

LightGBM Average accuracy across all folds: 0.7839
LightGBM Average conf-matrix across all folds: [[630.  183.2]
 [162.6 624.2]]


- lgb_予測平均

In [10]:
# 予測の平均を計算
lgb_y_test_pred_avg = np.mean(lgb_test_predictions, axis=0)

# 最終的な予測結果を整数に変換
lgb_y_test_pred_int = (lgb_y_test_pred_avg > lgb_threshold).astype(int)

- output

In [11]:
# 現在の日時を取得してフォーマットする
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
output = f'../data/output/{current_time}_LightGBM_cv{lgb_average_score:.4f}.csv'

submit = pd.DataFrame({
    '0': test['gameId'],
    '1': lgb_y_test_pred_int
})

# カラムヘッダー消去して出力
# ヘッダーなしでCSVファイルとして保存
submit.to_csv(output, header=False, index=False)

In [12]:
submit

Unnamed: 0,0,1
0,9,1
1,15,1
2,18,0
3,23,0
4,31,1
...,...,...
1995,9971,0
1996,9980,0
1997,9983,1
1998,9996,1


## XGBoost

In [13]:
for train_index, val_index in kf.split(X):
    X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    dtrain = xgb.DMatrix(X_train_fold, label=y_train_fold)
    dval = xgb.DMatrix(X_val_fold, label=y_val_fold)
    dtest = xgb.DMatrix(test_drop_gameId)

    # Optunaでのハイパーパラメータチューニング
    def objective(trial):
        param = {
            'verbosity': 0,
            'objective': 'binary:logistic',
            'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
            'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
            'max_depth': trial.suggest_int('max_depth', 3, 9),
            'eta': trial.suggest_loguniform('eta', 1e-8, 1.0),
            'subsample': trial.suggest_float('subsample', 0.5, 1),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1)
        }

        bst = xgb.train(param, dtrain, num_boost_round=1000, evals=[(dval, "eval")], early_stopping_rounds=100, verbose_eval=False)
        preds = bst.predict(dval)
        pred_labels = np.rint(preds)
        accuracy = accuracy_score(y_val_fold, pred_labels)
        return accuracy

    xgb_study = optuna.create_study(direction='maximize')
    xgb_study.optimize(objective, n_trials=100)

    # 各フォールドの最適パラメータでモデルを再学習
    xgb_best_params = xgb_study.best_params
    xgb_best_params['objective'] = 'binary:logistic'
    xgb_best_params['verbosity'] = 0

    xgb_final_bst = xgb.train(xgb_best_params, dtrain, num_boost_round=xgb_study.best_trial.number)

    # 各フォールドのモデル性能を評価
    xgb_y_val_pred = xgb_final_bst.predict(dval)
    xgb_y_val_pred_int = np.rint(xgb_y_val_pred)
    fold_accuracy = accuracy_score(y_val_fold, xgb_y_val_pred_int)
    xgb_fold_scores.append(fold_accuracy)

    # 混同行列
    xgb_conf_matrix = confusion_matrix(y_val_fold, xgb_y_val_pred_int)
    xgb_conf_scores.append(xgb_conf_matrix)

    # テストデータセットでの予測
    xgb_y_test_pred_fold = xgb_final_bst.predict(dtest)
    xgb_test_predictions.append(xgb_y_test_pred_fold)

[I 2024-01-30 14:32:45,186] A new study created in memory with name: no-name-87e7d1a3-84a1-4a83-82d5-72294342982f
  'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
  'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
  'eta': trial.suggest_loguniform('eta', 1e-8, 1.0),


[I 2024-01-30 14:32:45,736] Trial 0 finished with value: 0.778125 and parameters: {'alpha': 3.856590361142648e-06, 'lambda': 0.05025625649752977, 'max_depth': 4, 'eta': 0.5683665687754869, 'subsample': 0.8128337250230036, 'colsample_bytree': 0.694063739106369}. Best is trial 0 with value: 0.778125.
  'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
  'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
  'eta': trial.suggest_loguniform('eta', 1e-8, 1.0),
[I 2024-01-30 14:32:46,228] Trial 1 finished with value: 0.778125 and parameters: {'alpha': 6.454167050774014e-08, 'lambda': 2.303326753608073e-05, 'max_depth': 8, 'eta': 0.1023606643554213, 'subsample': 0.778698825321743, 'colsample_bytree': 0.7221117240598953}. Best is trial 0 with value: 0.778125.
  'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
  'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
  'eta': trial.suggest_loguniform('eta', 1e-8, 1.0),
[I 2024-01-30 14:32:50,633] Trial 2 finished with value: 0.7

- xgb_平均スコア

In [14]:
# 全フォールドの平均スコアを計算
xgb_average_score = sum(xgb_fold_scores) / n_splits
print(f"XGB Average accuracy across all folds: {xgb_average_score:.4f}")

xgb_average_conf = sum(xgb_conf_scores) / n_splits
print(f"XGB Average conf-matrix across all folds: {xgb_average_conf}")

XGB Average accuracy across all folds: 0.7266
XGB Average conf-matrix across all folds: [[676.6 136.6]
 [300.8 486. ]]


- xgb_予測平均

In [15]:
# 予測の平均を計算
xgb_y_test_pred_avg = np.mean(xgb_test_predictions, axis=0)

# 最終的な予測結果を整数に変換
xgb_y_test_pred_int = (xgb_y_test_pred_avg > xgb_threshold).astype(int)

- output

In [16]:
# 現在の日時を取得してフォーマットする
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
output = f'../data/output/{current_time}_XGBoost_cv{xgb_average_score:.4f}.csv'

submit = pd.DataFrame({
    '0': test['gameId'],
    '1': xgb_y_test_pred_int
})

# カラムヘッダー消去して出力
# ヘッダーなしでCSVファイルとして保存
submit.to_csv(output, header=False, index=False)

## CatBoost

In [17]:
for train_index, val_index in kf.split(X):
    X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # Optunaでのハイパーパラメータチューニング
    def objective(trial):
        param = {
            'iterations': 1000,
            'depth': trial.suggest_int('depth', 4, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'random_strength': trial.suggest_int('random_strength', 1, 20),
            'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 100),
            'scale_pos_weight': trial.suggest_float('scale_pos_weight', 0.1, 10.0),
            'loss_function': 'Logloss'
        }

        model = cb.CatBoostClassifier(**param, verbose=False)
        model.fit(X_train_fold, y_train_fold, eval_set=(X_val_fold, y_val_fold), early_stopping_rounds=100)
        preds = model.predict(X_val_fold)
        accuracy = accuracy_score(y_val_fold, preds)
        return accuracy

    cb_study = optuna.create_study(direction='maximize')
    cb_study.optimize(objective, n_trials=100)

    # 各フォールドの最適パラメータでモデルを再学習
    cb_best_params = cb_study.best_params
    cb_best_params['loss_function'] = 'Logloss'
    model_fold = cb.CatBoostClassifier(**cb_best_params, verbose=False)
    model_fold.fit(X_train_fold, y_train_fold)

    # 各フォールドのモデル性能を評価
    cb_y_val_pred = model_fold.predict(X_val_fold)
    cb_fold_accuracy = accuracy_score(y_val_fold, cb_y_val_pred)
    cb_fold_scores.append(cb_fold_accuracy)

    # 混同行列
    cb_conf_matrix = confusion_matrix(y_val_fold, cb_y_val_pred)
    cb_conf_scores.append(cb_conf_matrix)

    # テストデータセットでの予測
    cb_y_test_pred_fold = model_fold.predict(test_drop_gameId)
    cb_test_predictions.append(cb_y_test_pred_fold)



[I 2024-01-30 14:48:52,886] A new study created in memory with name: no-name-e9aacf98-38a1-4b1d-9b45-d5c0cd1a8b86
[I 2024-01-30 14:48:53,927] Trial 0 finished with value: 0.715 and parameters: {'depth': 10, 'learning_rate': 0.2921139170362465, 'random_strength': 9, 'bagging_temperature': 0.022974829855684997, 'l2_leaf_reg': 96.11952373539836, 'scale_pos_weight': 4.520178244990523}. Best is trial 0 with value: 0.715.
[I 2024-01-30 14:48:54,537] Trial 1 finished with value: 0.755625 and parameters: {'depth': 8, 'learning_rate': 0.1778888765246378, 'random_strength': 3, 'bagging_temperature': 0.6435419668818809, 'l2_leaf_reg': 50.56034011356818, 'scale_pos_weight': 1.9062284619457084}. Best is trial 1 with value: 0.755625.
[I 2024-01-30 14:48:55,416] Trial 2 finished with value: 0.7675 and parameters: {'depth': 5, 'learning_rate': 0.19661803150061513, 'random_strength': 4, 'bagging_temperature': 0.08186433294070683, 'l2_leaf_reg': 88.274081526673, 'scale_pos_weight': 1.2797015392368813}. 

- Catboost平均スコア

In [18]:
# 全フォールドの平均スコアを計算
cb_average_score = sum(cb_fold_scores) / n_splits
print(f"CatBoost Average accuracy across all folds: {cb_average_score:.4f}")

cb_average_conf = sum(cb_conf_scores) / n_splits
print(f"CatBoost Average conf-matrix across all folds: {cb_average_conf}")

CatBoost Average accuracy across all folds: 0.7722
CatBoost Average conf-matrix across all folds: [[629.4 183.8]
 [180.6 606.2]]


- CB_予測平均

In [19]:
# 予測の平均を計算
cb_y_test_pred_avg = np.mean(cb_test_predictions, axis=0)

# 最終的な予測結果を整数に変換
cb_y_test_pred_int = (cb_y_test_pred_avg > cb_threshold).astype(int)

- output

In [20]:
# 現在の日時を取得してフォーマットする
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
output = f'../data/output/{current_time}_CatBoost_cv{cb_average_score:.4f}.csv'

submit = pd.DataFrame({
    '0': test['gameId'],
    '1': cb_y_test_pred_int
})

# カラムヘッダー消去して出力
# ヘッダーなしでCSVファイルとして保存
submit.to_csv(output, header=False, index=False)

In [21]:
submit

Unnamed: 0,0,1
0,9,1
1,15,1
2,18,0
3,23,0
4,31,1
...,...,...
1995,9971,0
1996,9980,0
1997,9983,1
1998,9996,1


# 合成

- 予測リストに追加

In [22]:
# 予測結果をNumPy配列に変換
cb_predictions = np.array(cb_y_test_pred_int)  # CatBoostの予測結果
lgb_predictions = np.array(lgb_y_test_pred_int)  # LightGBMの予測結果
xgb_predictions = np.array(xgb_y_test_pred_int)  # XGBoostの予測結果

# 3つのモデルの予測結果を組み合わせる
combined_predictions = np.vstack((cb_predictions, lgb_predictions, xgb_predictions))

# 多数決で最終的な予測を決定（行方向に合計し、1.5より大きい場合は1とする）
final_predictions = np.mean(combined_predictions, axis=0) > 0.5
final_predictions = final_predictions.astype(int)

# 最終的な予測をCSVファイルに保存
merge_submit = pd.DataFrame({
    '0': test['gameId'],  # 適切なID列を使用
    '1': final_predictions
})
merge_submit.to_csv(f'../data/output/{current_time}_final_predictions.csv', index=False)

In [23]:
merge_submit

Unnamed: 0,0,1
0,9,1
1,15,1
2,18,0
3,23,0
4,31,1
...,...,...
1995,9971,0
1996,9980,0
1997,9983,1
1998,9996,1


# 

# Logistic classifier