### Simple ensembles with CatBoost, XGBoost, LightGBM, Gated Recurrent Unit

In [1]:
class Config:
    name = "Ensembles/CXLG"

    n_splits = 5
    seed = 2022
    target = "target"

    # Colab Env
    upload_from_colab = True
    api_path = "/content/drive/MyDrive/workspace/kaggle.json"
    drive_path = "/content/drive/MyDrive/workspace/kaggle-amex"
    
    # Kaggle Env
    kaggle_dataset_path = None
    
    # Reka Env
    dir_path = '/home/abe/kaggle/kaggle-amex'

In [2]:
import os
import json
import warnings
import shutil
import logging
import joblib
import random
import datetime
import sys
import gc
import multiprocessing
import joblib
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-pastel')
import seaborn as sns
sns.set_palette("winter_r")

from tqdm.auto import tqdm
tqdm.pandas()
warnings.filterwarnings('ignore')

In [3]:
INPUT = os.path.join(Config.dir_path, 'input')
OUTPUT = os.path.join(Config.dir_path, 'output')
SUBMISSION = os.path.join(Config.dir_path, 'submissions')
OUTPUT_EXP = os.path.join(OUTPUT, Config.name)
EXP_MODEL = os.path.join(OUTPUT_EXP, "model")
EXP_FIG = os.path.join(OUTPUT_EXP, "fig")
EXP_PREDS = os.path.join(OUTPUT_EXP, "preds")

# make dirs
for d in [INPUT, SUBMISSION, EXP_MODEL, EXP_FIG, EXP_PREDS]:
    os.makedirs(d, exist_ok=True)

In [4]:
gru = pd.read_csv(Config.dir_path + "/submissions/gru.csv")
catboost = pd.read_csv(Config.dir_path + "/submissions/catboost.csv")
lgbm = pd.read_csv(Config.dir_path + "/submissions/lgbm.csv")
xgb = pd.read_csv(Config.dir_path + "/submissions/xgb.csv")

In [5]:
gru = gru.sort_values(by='customer_ID').reset_index(drop=True)
catboost = catboost.sort_values(by='customer_ID').reset_index(drop=True)
lgbm = lgbm.sort_values(by='customer_ID').reset_index(drop=True)
xgb = xgb.sort_values(by='customer_ID').reset_index(drop=True)

In [6]:
gru

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.014276
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.000603
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.022027
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.351459
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.897665
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.014919
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.800350
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.486699
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.406399


In [7]:
catboost

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.025189
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.001823
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.039974
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.243433
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.861667
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.010708
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.797325
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.375842
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.273093


In [8]:
xgb

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.033261
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.001208
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.062023
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.273247
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.861655
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.020789
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.802945
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.578994
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.299865


In [9]:
lgbm

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.024753
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.000849
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.056213
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.203234
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.906757
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.019561
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.741062
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.410902
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.387772


In [10]:
ensembles = pd.DataFrame()
ensembles['customer_ID'] = gru['customer_ID']
ensembles['prediction'] = (gru['prediction'] + catboost['prediction'] + lgbm['prediction'] + xgb['prediction']) / 4
ensembles.to_csv(Config.dir_path + "/submissions/ensemble_cxlg.csv", index=False)

In [11]:
! kaggle competitions submit -c amex-default-prediction -f /home/abe/kaggle/kaggle-amex/submissions/ensemble_cxlg.csv -m "Ensemble with CatBoost, XGBoost, LightGBM, GatedRecurrentUnit"

100%|██████████████████████████████████████| 74.9M/74.9M [00:16<00:00, 4.89MB/s]
Successfully submitted to American Express - Default Prediction