In [1]:
# ====================================================
# Library
# ====================================================
import os
import gc
import warnings
warnings.filterwarnings('ignore')
import random
import scipy as sp
import numpy as np
import pandas as pd
import joblib
import itertools
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from itertools import combinations
import itertools

In [2]:
# ====================================================
# Get the difference
# ====================================================
def get_difference(data, num_features):
    df1 = []
    customer_ids = []
    for customer_id, df in tqdm(data.groupby(['customer_ID'])):
        # Get the differences
        diff_df1 = df[num_features].diff(1).iloc[[-1]].values.astype(np.float32)
        # Append to lists
        df1.append(diff_df1)
        customer_ids.append(customer_id)
    # Concatenate
    df1 = np.concatenate(df1, axis = 0)
    # Transform to dataframe
    df1 = pd.DataFrame(df1, columns = [col + '_diff1' for col in df[num_features].columns])
    # Add customer id
    df1['customer_ID'] = customer_ids
    return df1


In [3]:
'''train = pd.read_parquet('../input/train.parquet')
test = pd.read_parquet('../input/test.parquet')'''

"train = pd.read_parquet('../input/train.parquet')\ntest = pd.read_parquet('../input/test.parquet')"

In [4]:
'''# Delete columns with missing values above threshold:
def feature_filter(df, threshold=0.95):
    filtered_features = [col for col in df.columns if df[col].isnull().sum()/len(df) < threshold]
    return filtered_features'''

'# Delete columns with missing values above threshold:\ndef feature_filter(df, threshold=0.95):\n    filtered_features = [col for col in df.columns if df[col].isnull().sum()/len(df) < threshold]\n    return filtered_features'

In [5]:
'''train = train[feature_filter(train)]
test = test[feature_filter(test)]'''

'train = train[feature_filter(train)]\ntest = test[feature_filter(test)]'

In [6]:
'''# Replace null values with appropriate entity:
train["P_2"].fillna(1.05, inplace = True)
train["P_3"].fillna(2.45, inplace = True)'''

'# Replace null values with appropriate entity:\ntrain["P_2"].fillna(1.05, inplace = True)\ntrain["P_3"].fillna(2.45, inplace = True)'

In [7]:
'''# Handpicked feature Engineering:
def handpicked(df):
    df['payment_default'] = df['P_2'] * df['D_39']
    df['payment'] = df.P_2 * df.P_3 * df.P_4
    temp = df['D_39'].value_counts().to_dict()
    df['D_39_counts'] = df['D_39'].map(temp)
    return df
handpicked(train)
handpicked(test)
print(len(train.columns), len(test.columns))'''

"# Handpicked feature Engineering:\ndef handpicked(df):\n    df['payment_default'] = df['P_2'] * df['D_39']\n    df['payment'] = df.P_2 * df.P_3 * df.P_4\n    temp = df['D_39'].value_counts().to_dict()\n    df['D_39_counts'] = df['D_39'].map(temp)\n    return df\nhandpicked(train)\nhandpicked(test)\nprint(len(train.columns), len(test.columns))"

In [8]:
'''#compute after-pay feature:
def after_pay(df):
    for bcol in [f'B_{i}' for i in [11,14,17]]+['D_39','D_131']+[f'S_{i}' for i in [16,23]]:
        for pcol in ['P_2','P_3']:
            if bcol in train.columns:
                df[f'{bcol}-{pcol}'] = df[bcol] - df[pcol]
after_pay(train)
after_pay(test)
print(len(train.columns), len(test.columns))'''

"#compute after-pay feature:\ndef after_pay(df):\n    for bcol in [f'B_{i}' for i in [11,14,17]]+['D_39','D_131']+[f'S_{i}' for i in [16,23]]:\n        for pcol in ['P_2','P_3']:\n            if bcol in train.columns:\n                df[f'{bcol}-{pcol}'] = df[bcol] - df[pcol]\nafter_pay(train)\nafter_pay(test)\nprint(len(train.columns), len(test.columns))"

In [9]:
'''train.to_parquet('../input/train_smn_2.parquet')
test.to_parquet('../input/test_smn_2.parquet')'''

"train.to_parquet('../input/train_smn_2.parquet')\ntest.to_parquet('../input/test_smn_2.parquet')"

In [10]:
# ====================================================
# Read & preprocess data and save it to disk
# ====================================================
def read_preprocess_data():
    train = pd.read_parquet('../input/train_smn_2.parquet')
    features = train.drop(['customer_ID', 'S_2'], axis = 1).columns.to_list()
    cat_features = [
        "B_30",
        "B_38",
        "D_114",
        "D_116",
        "D_117",
        "D_120",
        "D_126",
        "D_63",
        "D_64",
        "D_66",
        "D_68",
    ]
    num_features = [col for col in features if col not in cat_features]
    print('Starting training feature engineer...')
    train_num_agg = train.groupby("customer_ID")[num_features].agg(['mean', 'std', 'min', 'max', 'last'])
    train_num_agg.columns = ['_'.join(x) for x in train_num_agg.columns]
    train_num_agg.reset_index(inplace = True)
    train_cat_agg = train.groupby("customer_ID")[cat_features].agg(['count', 'last', 'nunique'])
    train_cat_agg.columns = ['_'.join(x) for x in train_cat_agg.columns]
    train_cat_agg.reset_index(inplace = True)
    train_labels = pd.read_csv('../input/train_labels.csv')
    # Transform float64 columns to float32
    cols = list(train_num_agg.dtypes[train_num_agg.dtypes == 'float64'].index)
    for col in tqdm(cols):
        train_num_agg[col] = train_num_agg[col].astype(np.float32)
    # Transform int64 columns to int32
    cols = list(train_cat_agg.dtypes[train_cat_agg.dtypes == 'int64'].index)
    for col in tqdm(cols):
        train_cat_agg[col] = train_cat_agg[col].astype(np.int32)
    # Get the difference
    train_diff = get_difference(train, num_features)
    train = train_num_agg.merge(train_cat_agg, how = 'inner', on = 'customer_ID').merge(train_diff, how = 'inner', on = 'customer_ID').merge(train_labels, how = 'inner', on = 'customer_ID')
    del train_num_agg, train_cat_agg, train_diff
    gc.collect()
    test = pd.read_parquet('../input/test_smn_2.parquet')
    print('Starting test feature engineer...')
    test_num_agg = test.groupby("customer_ID")[num_features].agg(['mean', 'std', 'min', 'max', 'last'])
    test_num_agg.columns = ['_'.join(x) for x in test_num_agg.columns]
    test_num_agg.reset_index(inplace = True)
    test_cat_agg = test.groupby("customer_ID")[cat_features].agg(['count', 'last', 'nunique'])
    test_cat_agg.columns = ['_'.join(x) for x in test_cat_agg.columns]
    test_cat_agg.reset_index(inplace = True)
    # Transform float64 columns to float32
    cols = list(test_num_agg.dtypes[test_num_agg.dtypes == 'float64'].index)
    for col in tqdm(cols):
        test_num_agg[col] = test_num_agg[col].astype(np.float32)
    # Transform int64 columns to int32
    cols = list(test_cat_agg.dtypes[test_cat_agg.dtypes == 'int64'].index)
    for col in tqdm(cols):
        test_cat_agg[col] = test_cat_agg[col].astype(np.int32)
    # Get the difference
    test_diff = get_difference(test, num_features)
    test = test_num_agg.merge(test_cat_agg, how = 'inner', on = 'customer_ID').merge(test_diff, how = 'inner', on = 'customer_ID')
    del test_num_agg, test_cat_agg, test_diff
    gc.collect()
    # Save files to disk
    train.to_parquet('../input/train_fe_smn_2.parquet')
    test.to_parquet('../input/test_fe_smn_2.parquet')

# Read & Preprocess Data
#read_preprocess_data()

In [11]:
# ====================================================
# Directory settings
# ====================================================
import os

expt_name = "lgbm_smn_5_fold_6"
OUTPUT_DIR = f'../{expt_name}/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)



In [12]:

# ====================================================
# Configurations
# ====================================================
class CFG:
    input_dir = '../input/'
    seed = 42
    n_folds = 5
    target = 'target'
    boosting_type = 'dart'
    metric = 'binary_logloss'

# ====================================================
# Seed everything
# ====================================================
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)


In [13]:
# ====================================================
# Read data
# ====================================================
train = pd.read_parquet(CFG.input_dir + 'train_fe_smn_2.parquet')
test = pd.read_parquet(CFG.input_dir + 'test_fe_smn_2.parquet')

In [14]:
train.head()

Unnamed: 0,customer_ID,P_2_mean,P_2_std,P_2_min,P_2_max,P_2_last,D_39_mean,D_39_std,D_39_min,D_39_max,D_39_last,B_1_mean,B_1_std,B_1_min,B_1_max,B_1_last,B_2_mean,B_2_std,B_2_min,B_2_max,B_2_last,R_1_mean,R_1_std,R_1_min,R_1_max,R_1_last,S_3_mean,S_3_std,S_3_min,S_3_max,S_3_last,D_41_mean,D_41_std,D_41_min,D_41_max,D_41_last,B_3_mean,B_3_std,B_3_min,B_3_max,B_3_last,D_42_mean,D_42_std,D_42_min,D_42_max,D_42_last,D_43_mean,D_43_std,D_43_min,D_43_max,D_43_last,D_44_mean,D_44_std,D_44_min,D_44_max,D_44_last,B_4_mean,B_4_std,B_4_min,B_4_max,B_4_last,D_45_mean,D_45_std,D_45_min,D_45_max,D_45_last,B_5_mean,B_5_std,B_5_min,B_5_max,B_5_last,R_2_mean,R_2_std,R_2_min,R_2_max,R_2_last,D_46_mean,D_46_std,D_46_min,D_46_max,D_46_last,D_47_mean,D_47_std,D_47_min,D_47_max,D_47_last,D_48_mean,D_48_std,D_48_min,D_48_max,D_48_last,D_49_mean,D_49_std,D_49_min,D_49_max,D_49_last,B_6_mean,B_6_std,B_6_min,B_6_max,B_6_last,B_7_mean,B_7_std,B_7_min,B_7_max,B_7_last,B_8_mean,B_8_std,B_8_min,B_8_max,B_8_last,D_50_mean,D_50_std,D_50_min,D_50_max,D_50_last,D_51_mean,D_51_std,D_51_min,D_51_max,D_51_last,B_9_mean,B_9_std,B_9_min,B_9_max,B_9_last,R_3_mean,R_3_std,R_3_min,R_3_max,R_3_last,D_52_mean,D_52_std,D_52_min,D_52_max,D_52_last,P_3_mean,P_3_std,P_3_min,P_3_max,P_3_last,B_10_mean,B_10_std,B_10_min,B_10_max,B_10_last,D_53_mean,D_53_std,D_53_min,D_53_max,D_53_last,S_5_mean,S_5_std,S_5_min,S_5_max,S_5_last,B_11_mean,B_11_std,B_11_min,B_11_max,B_11_last,S_6_mean,S_6_std,S_6_min,S_6_max,S_6_last,D_54_mean,D_54_std,D_54_min,D_54_max,D_54_last,R_4_mean,R_4_std,R_4_min,R_4_max,R_4_last,S_7_mean,S_7_std,S_7_min,S_7_max,S_7_last,B_12_mean,B_12_std,B_12_min,B_12_max,B_12_last,S_8_mean,S_8_std,S_8_min,S_8_max,S_8_last,D_55_mean,D_55_std,D_55_min,D_55_max,D_55_last,D_56_mean,D_56_std,D_56_min,D_56_max,D_56_last,B_13_mean,B_13_std,B_13_min,B_13_max,B_13_last,R_5_mean,R_5_std,R_5_min,R_5_max,R_5_last,D_58_mean,D_58_std,D_58_min,D_58_max,D_58_last,S_9_mean,S_9_std,S_9_min,S_9_max,S_9_last,B_14_mean,B_14_std,B_14_min,B_14_max,B_14_last,D_59_mean,D_59_std,D_59_min,D_59_max,D_59_last,D_60_mean,D_60_std,D_60_min,D_60_max,D_60_last,D_61_mean,D_61_std,D_61_min,D_61_max,D_61_last,B_15_mean,B_15_std,B_15_min,B_15_max,B_15_last,S_11_mean,S_11_std,S_11_min,S_11_max,...,D_131-P_2_min,D_131-P_2_max,D_131-P_2_last,D_131-P_3_mean,D_131-P_3_std,D_131-P_3_min,D_131-P_3_max,D_131-P_3_last,S_16-P_2_mean,S_16-P_2_std,S_16-P_2_min,S_16-P_2_max,S_16-P_2_last,S_16-P_3_mean,S_16-P_3_std,S_16-P_3_min,S_16-P_3_max,S_16-P_3_last,S_23-P_2_mean,S_23-P_2_std,S_23-P_2_min,S_23-P_2_max,S_23-P_2_last,S_23-P_3_mean,S_23-P_3_std,S_23-P_3_min,S_23-P_3_max,S_23-P_3_last,B_30_count,B_30_last,B_30_nunique,B_38_count,B_38_last,B_38_nunique,D_114_count,D_114_last,D_114_nunique,D_116_count,D_116_last,D_116_nunique,D_117_count,D_117_last,D_117_nunique,D_120_count,D_120_last,D_120_nunique,D_126_count,D_126_last,D_126_nunique,D_63_count,D_63_last,D_63_nunique,D_64_count,D_64_last,D_64_nunique,D_66_count,D_66_last,D_66_nunique,D_68_count,D_68_last,D_68_nunique,P_2_diff1,D_39_diff1,B_1_diff1,B_2_diff1,R_1_diff1,S_3_diff1,D_41_diff1,B_3_diff1,D_42_diff1,D_43_diff1,D_44_diff1,B_4_diff1,D_45_diff1,B_5_diff1,R_2_diff1,D_46_diff1,D_47_diff1,D_48_diff1,D_49_diff1,B_6_diff1,B_7_diff1,B_8_diff1,D_50_diff1,D_51_diff1,B_9_diff1,R_3_diff1,D_52_diff1,P_3_diff1,B_10_diff1,D_53_diff1,S_5_diff1,B_11_diff1,S_6_diff1,D_54_diff1,R_4_diff1,S_7_diff1,B_12_diff1,S_8_diff1,D_55_diff1,D_56_diff1,B_13_diff1,R_5_diff1,D_58_diff1,S_9_diff1,B_14_diff1,D_59_diff1,D_60_diff1,D_61_diff1,B_15_diff1,S_11_diff1,D_62_diff1,D_65_diff1,B_16_diff1,B_17_diff1,B_18_diff1,B_19_diff1,B_20_diff1,S_12_diff1,R_6_diff1,S_13_diff1,B_21_diff1,D_69_diff1,B_22_diff1,D_70_diff1,D_71_diff1,D_72_diff1,S_15_diff1,B_23_diff1,P_4_diff1,D_74_diff1,D_75_diff1,D_76_diff1,B_24_diff1,R_7_diff1,D_77_diff1,B_25_diff1,B_26_diff1,D_78_diff1,D_79_diff1,R_8_diff1,R_9_diff1,S_16_diff1,D_80_diff1,R_10_diff1,R_11_diff1,B_27_diff1,D_81_diff1,D_82_diff1,S_17_diff1,R_12_diff1,B_28_diff1,R_13_diff1,D_83_diff1,R_14_diff1,R_15_diff1,D_84_diff1,R_16_diff1,B_29_diff1,S_18_diff1,D_86_diff1,D_87_diff1,R_17_diff1,R_18_diff1,B_31_diff1,S_19_diff1,R_19_diff1,B_32_diff1,S_20_diff1,R_20_diff1,R_21_diff1,B_33_diff1,D_89_diff1,R_22_diff1,R_23_diff1,D_91_diff1,D_92_diff1,D_93_diff1,D_94_diff1,R_24_diff1,R_25_diff1,D_96_diff1,S_22_diff1,S_23_diff1,S_24_diff1,S_25_diff1,S_26_diff1,D_102_diff1,D_103_diff1,D_104_diff1,D_105_diff1,D_106_diff1,D_107_diff1,B_36_diff1,B_37_diff1,R_26_diff1,R_27_diff1,D_108_diff1,D_109_diff1,D_111_diff1,D_112_diff1,B_40_diff1,S_27_diff1,D_113_diff1,D_115_diff1,D_118_diff1,D_119_diff1,D_121_diff1,D_122_diff1,D_123_diff1,D_124_diff1,D_125_diff1,D_127_diff1,D_128_diff1,D_129_diff1,B_41_diff1,D_130_diff1,D_131_diff1,D_132_diff1,D_133_diff1,R_28_diff1,D_135_diff1,D_136_diff1,D_137_diff1,D_138_diff1,D_139_diff1,D_140_diff1,D_141_diff1,D_142_diff1,D_143_diff1,D_144_diff1,D_145_diff1,payment_default_diff1,payment_diff1,D_39_counts_diff1,B_11-P_2_diff1,B_11-P_3_diff1,B_14-P_2_diff1,B_14-P_3_diff1,B_17-P_2_diff1,B_17-P_3_diff1,D_39-P_2_diff1,D_39-P_3_diff1,D_131-P_2_diff1,D_131-P_3_diff1,S_16-P_2_diff1,S_16-P_3_diff1,S_23-P_2_diff1,S_23-P_3_diff1,target
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0.933824,0.024194,0.86858,0.960384,0.934745,0.230769,0.83205,0,3,0,0.012007,0.006547,0.00193,0.021655,0.009382,1.005086,0.003222,1.000242,1.009672,1.007647,0.004509,0.003081,0.000263,0.009228,0.006104,0.113215,0.01167,0.098882,0.135021,0.135021,0.0,0.0,0.0,0.0,0.0,0.006456,0.002942,0.000783,0.009866,0.007174,,,,,,,,,,,0.0,0.0,0,0,0,2.846154,2.44425,0,6,5,0.725369,0.009515,0.708906,0.740102,0.740102,0.14665,0.047205,0.060492,0.231717,0.231717,0.0,0.0,0,0,0,0.378074,0.085674,0.231009,0.519619,0.420521,0.532874,0.006578,0.521311,0.542119,0.539715,0.240978,0.076875,0.135586,0.403448,0.192376,-1.0,0.0,-1,-1,-1,0.11351,0.04736,0.063902,0.221899,0.149564,0.036624,0.023195,0.001681,0.060502,0.058425,0.0,0.0,0.0,0.0,0.0,0.150326,0.002922,0.145179,0.154326,0.153461,2.923077,0.954074,2,4,2,0.00622,0.00318,0.000519,0.009535,0.009535,0.0,0.0,0,0,0,0.204972,0.0024,0.200782,0.208214,0.203524,0.680138,0.050671,0.581678,0.741813,0.629392,0.27028,0.181875,0.096219,0.741934,0.326101,,,,,,0.029112,0.014758,0.007165,0.054221,0.034643,0.00723,0.003031,0.002749,0.01026,0.01026,0.0,0.0,0,0,0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0,0,0,0.098374,0.026775,0.074646,0.161345,0.105671,0.125683,0.011772,0.11106,0.148266,0.112294,2510.0,429.583527,1544,3166,1544,0.224432,0.068116,0.148284,0.354596,0.187285,0.158571,0.004747,0.152025,0.166636,0.166636,0.100432,0.013723,0.074886,0.12074,0.100107,0.0,0.0,0,0,0,0.064803,0.069456,0.000267,0.158612,0.007174,0.039818,0.026706,0.007397,0.093935,0.007397,0.023142,0.013715,0.009725,0.056653,0.010239,7.769231,0.438529,7,8,8,0.534817,0.39213,0.141639,1.009424,0.258461,0.225847,0.071863,0.121276,0.383477,0.227637,0.026247,0.016911,0.007219,0.063955,0.014553,16.615385,1.660244,15,19,...,-0.960384,-0.86858,-0.934745,-0.680138,0.050671,-0.741813,-0.581678,-0.629392,-0.92885,0.024609,-0.955508,-0.860152,-0.934719,-0.675165,0.049278,-0.736937,-0.577378,-0.629366,-0.798788,0.0233,-0.820325,-0.737125,-0.802944,-0.545102,0.050167,-0.603106,-0.447213,-0.497591,13,0,1,13,2,1,13,1,1,13,0,1,13,5,1,13,0,1,13,2,1,13,0,1,13,0,1,13,-1,1,13,6,1,-0.002604,0.0,-0.010455,-0.00066,0.005497,0.032036,0.0,-0.00028,,,0.0,-1.0,4e-05,0.099092,0.0,-0.005328,0.00144,-0.044856,0.0,-0.00059,-0.002076,0.0,0.004123,0.0,0.008471,0.0,-0.004036,-0.01796,0.000637,,-0.015503,0.003001,0.0,0.0,0.0,0.02832,0.001234,-836.0,-0.009234,0.000188,0.007926,0.0,0.001454,-0.00622,-0.019715,0.0,0.116822,-0.000491,-0.027712,-4.0,-0.007366,0.0,0.0,,0.0,0.0,0.0,-0.003377,-0.000863,-170.0,0.002029,-0.001965,0.0,0.0,0.065053,0.0,1.0,-0.003948,0.0,0.0,0.0,,-0.006484,0.0,0.004078,-0.003098,0.001055,0.0,0.0,0.0,0.0,-0.005,0.0,0.0,0.0,0.004499,0.0,0.0,-0.00054,0.0,-0.014099,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.002642,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.015536,-0.004521,-0.014052,-0.007142,-0.001815,0.020808,0.0,-0.000908,0.006881,0.0,0.0,-0.001942,-0.010028,0.0,0.002271,0.0,0.0,0.0,0.0,-0.000464,-0.050461,0.0,-0.000946,0.006405,0.009222,0.000738,0.0,0.0,0.0,0.0,0.0,-8.5e-05,0.0,0.0,0.0,0.0,,0.006088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-0.003376,0.0,0.0,0.0,0.0,0.005605,0.020961,-0.01711,-0.001755,,,0.002604,0.01796,0.002604,0.01796,-0.002396,0.012959,-0.001917,0.013439,0
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0.89982,0.022119,0.861109,0.929122,0.880519,7.153846,6.743468,0,19,6,0.025654,0.027756,0.006711,0.109644,0.034684,0.991083,0.051531,0.819772,1.008534,1.004028,0.006246,0.002129,0.001023,0.008996,0.006911,0.120578,0.023824,0.089799,0.165509,0.165509,0.0,0.0,0.0,0.0,0.0,0.005663,0.003354,0.000861,0.012861,0.005068,,,,,,0.144571,0.169598,0.060646,0.5256,0.060646,0.0,0.0,0,0,0,0.846154,0.800641,0,3,1,0.256461,0.009261,0.239459,0.267228,0.266275,0.035462,0.043899,0.004075,0.165146,0.027,0.0,0.0,0,0,0,0.452041,0.013177,0.432424,0.471737,0.438828,0.392433,0.006671,0.382562,0.402878,0.402195,0.048203,0.031312,0.010117,0.105999,0.014696,-1.0,0.0,-1,-1,-1,0.20227,0.015915,0.167634,0.226641,0.167634,0.028049,0.013631,0.015836,0.068204,0.028411,0.0,0.0,0.0,0.0,0.0,,,,,,1.153846,0.375534,1,2,1,0.010298,0.011024,0.001722,0.045093,0.012926,0.538462,0.518875,0,1,1,0.158313,0.06703,0.103495,0.242366,0.242366,0.566665,0.03688,0.510142,0.619012,0.570898,0.298815,0.003047,0.294,0.302757,0.29713,,,,,,0.016785,0.017104,0.002045,0.052949,0.043929,0.013792,0.021041,0.000416,0.081246,0.01457,0.0,0.0,0,0,0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0,0,0,0.103002,0.035143,0.072583,0.208516,0.208516,0.025823,0.004665,0.01905,0.032917,0.01905,1286.461548,772.374573,0,2402,1284,0.048069,0.007596,0.036112,0.06077,0.036112,0.705671,0.01854,0.684371,0.748383,0.748383,0.046753,0.024456,0.008499,0.073904,0.017684,0.0,0.0,0,0,0,0.005146,0.002801,4e-06,0.009756,0.009756,0.033809,0.052705,0.006782,0.127805,0.127805,0.014848,0.014395,0.001797,0.057174,0.018667,15.923077,0.27735,15,16,15,0.32653,0.221335,0.059118,0.857541,0.411989,0.053319,0.030845,0.015966,0.103947,0.048978,0.00556,0.00292,9.5e-05,0.009642,0.009538,14.230769,3.244324,10,23,...,-0.929122,-0.861109,-0.880519,-0.566665,0.03688,-0.619012,-0.510142,-0.570898,-0.895608,0.022676,-0.927894,-0.852205,-0.873588,-0.562453,0.036707,-0.617784,-0.507154,-0.563967,-0.764205,0.022943,-0.793682,-0.723884,-0.747654,-0.431051,0.038022,-0.483571,-0.376399,-0.438033,13,0,1,13,2,1,13,1,1,13,0,1,13,0,1,13,0,2,13,2,1,13,3,1,13,0,1,13,-1,1,13,6,1,0.001663,-12.0,0.000126,-0.001391,-0.000337,0.006024,0.0,-0.003665,,-0.001382,0.0,0.0,0.002539,0.016845,0.0,-0.031395,0.003588,0.004579,0.0,-0.044039,-0.004767,0.0,,0.0,0.001192,0.0,0.003206,0.053055,-0.004486,,0.037568,-0.000448,0.0,0.0,0.0,0.072336,-0.003547,288.0,-0.011404,0.05266,0.009185,0.0,0.001132,,-0.004625,-1.0,-0.112783,-0.054044,0.005553,-2.0,-0.004542,0.0,0.0,,0.0,0.0,0.0,0.00395,0.005626,0.0,-0.002191,0.001692,0.0,0.0,-0.002879,0.0,-1.0,-0.000241,0.0,0.0,0.0,,-0.006784,0.0,0.003503,-0.011997,-0.007419,0.0,0.0,0.0,0.0,0.006864,0.0,0.0,0.0,0.005359,0.0,0.0,0.001794,0.0,0.004686,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,-0.004191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.073286,-0.007379,-0.080668,0.001961,0.001206,0.000189,0.0,0.0,,0.0,0.0,0.002813,0.002615,0.0,-0.001804,0.0,0.0,0.0,0.0,-6e-06,0.288733,0.0,0.005435,0.007249,-0.007188,0.011845,0.0,0.0,0.0,0.0,0.0,-0.007516,0.0,0.0,0.0,0.0,,-0.006597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000641,0.0,-10.536292,0.0,33523.0,-0.002111,-0.053503,-0.006288,-0.057681,,,-12.001662,-12.053055,-0.001663,-0.053055,0.005201,-0.046191,-0.009042,-0.060434,0
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0.878454,0.028911,0.79767,0.904482,0.880875,0.0,0.0,0,0,0,0.004386,0.002786,0.001472,0.009997,0.004284,0.815677,0.003545,0.810796,0.819987,0.812649,0.006621,0.001919,0.00354,0.009443,0.00645,,,,,,0.0,0.0,0.0,0.0,0.0,0.005493,0.002834,0.000626,0.009383,0.007196,,,,,,,,,,,0.076923,0.27735,0,1,0,2.230769,1.69085,1,7,2,0.236871,0.008896,0.222406,0.251598,0.251598,0.004618,0.003043,0.000215,0.008656,0.001557,0.0,0.0,0,0,0,0.464475,0.060166,0.413028,0.647064,0.433713,0.328617,0.007183,0.31829,0.339566,0.339125,0.092284,0.060616,0.030227,0.255134,0.08037,-1.0,0.0,-1,-1,-1,0.176674,0.024615,0.129857,0.213943,0.183628,0.034433,0.015459,0.021261,0.079764,0.026981,0.0,0.0,0.0,0.0,0.0,,,,,,0.615385,0.50637,0,1,1,0.00473,0.003302,0.000422,0.009521,0.009392,0.0,0.0,0,0,0,0.199863,0.00299,0.195188,0.203649,0.202159,0.618191,0.075604,0.381123,0.678706,0.628938,0.273711,0.052875,0.162125,0.302619,0.296313,,,,,,0.005948,0.002943,0.001054,0.00873,0.001824,0.004683,0.002312,0.000111,0.007619,0.005092,1.0,0.0,1,1,1,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0,0,0,,,,,,0.011541,0.002969,0.0061,0.015486,0.007158,0.0,0.0,0,0,0,0.077362,0.016318,0.057529,0.09923,0.098963,0.208154,0.003188,0.20153,0.211538,0.209386,0.003778,0.002688,0.000427,0.008332,0.001749,0.0,0.0,0,0,0,0.023569,0.037544,0.000726,0.093983,0.002847,,,,,,0.004729,0.003074,0.000684,0.008507,0.006699,15.923077,0.27735,15,16,15,0.004735,0.002602,0.000553,0.00855,0.00282,0.109526,0.061762,0.040357,0.249231,0.137834,0.004716,0.002986,1.9e-05,0.009969,0.006031,12.0,0.0,12,12,...,-0.904482,-0.79767,-0.880875,-0.618191,0.075604,-0.678706,-0.381123,-0.628938,-0.874368,0.0295,-0.903706,-0.793167,-0.871569,-0.614105,0.075772,-0.671846,-0.376619,-0.619633,-0.743706,0.030058,-0.772993,-0.657581,-0.748183,-0.483443,0.077091,-0.544554,-0.241034,-0.496247,13,0,1,13,1,1,13,1,2,13,0,1,13,0,1,13,0,1,13,2,1,13,3,1,13,2,1,13,-1,1,13,6,1,0.014532,0.0,-0.004034,-0.006303,-0.000306,,0.0,0.002823,,,0.0,-2.0,0.006082,0.001342,0.0,-0.053204,-0.000441,-0.059945,0.0,0.00556,-0.015999,0.0,,0.0,0.00897,0.0,0.006024,0.041822,-0.005711,,-0.002897,-0.002436,0.0,0.0,0.0,,0.001058,0.0,-0.000267,-0.002152,-0.000395,0.0,-0.006358,,0.004024,-1.0,-0.005039,0.060685,0.002519,0.0,-0.002247,0.0,0.0,,0.0,0.0,0.0,0.003069,0.007775,0.0,-0.006896,0.008785,0.0,0.0,0.001042,0.0,0.0,-0.007777,0.0,0.0,0.0,,-0.002833,0.0,0.0054,0.006263,0.006181,0.0,0.0,0.0,0.0,0.007835,0.0,0.0,0.0,-0.000455,0.0,0.0,-0.006686,0.0,-0.020807,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,-0.002523,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000909,-0.00167,0.000263,-0.001102,-0.002282,0.005371,0.0,0.0,,0.0,0.0,-0.000963,-0.005231,0.0,-0.001479,0.0,0.0,0.0,0.0,-0.014236,,0.0,-0.000573,0.007793,0.009068,0.001451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.007902,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-0.006491,0.0,0.0,0.0,0.0,-0.016968,-0.044258,-0.010508,-0.037797,,,-0.014532,-0.041822,-0.014532,-0.041822,-0.006697,-0.033986,-0.016202,-0.043491,0
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0.598969,0.020107,0.567442,0.623392,0.621776,1.538462,3.017045,0,9,0,0.059876,0.080531,0.00591,0.279991,0.012564,0.955264,0.080981,0.812053,1.009999,1.006183,0.005665,0.003473,0.000199,0.009915,0.007829,0.24775,0.095122,0.149216,0.40742,0.287766,0.0,0.0,0.0,0.0,0.0,0.006423,0.00336,5.3e-05,0.010927,0.009937,,,,,,0.061026,0.041993,0.006633,0.149891,0.046104,0.0,0.0,0,0,0,2.230769,2.832956,0,8,0,0.069334,0.008501,0.056394,0.085103,0.085103,0.088374,0.074462,0.000228,0.283781,0.118818,0.0,0.0,0,0,0,0.431905,0.030525,0.384254,0.471676,0.410723,0.403269,0.006355,0.39223,0.414224,0.414224,0.076686,0.063902,0.005276,0.177252,0.013057,-1.0,0.0,-1,-1,-1,0.160625,0.031266,0.079987,0.196887,0.174331,0.06213,0.07359,0.004301,0.252338,0.011969,1.004676,0.001928,1.002021,1.008767,1.005561,0.439581,0.044539,0.341256,0.482535,0.430318,0.076923,0.27735,0,1,1,0.052241,0.053342,0.001702,0.176352,0.020526,0.615385,0.650444,0,2,2,0.199698,0.00213,0.1953,0.203203,0.198356,0.610934,0.09009,0.3451,0.704214,0.67208,0.306553,0.079528,0.192981,0.431901,0.411625,0.004336,0.003589,0.000346,0.00999,0.001379,0.056297,0.044583,0.002999,0.150845,0.02297,0.044294,0.071076,0.000672,0.241378,0.005491,0.0,0.0,0,0,0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0,0,0,0.261497,0.078128,0.152622,0.370595,0.279464,0.048949,0.02528,0.009411,0.077831,0.074835,961.307678,405.585052,528,1511,528,0.061726,0.018374,0.0214,0.094076,0.0214,0.564632,0.018147,0.533675,0.580167,0.554483,0.081928,0.041875,0.013755,0.124311,0.055897,0.0,0.0,0,0,0,0.023349,0.034747,5.3e-05,0.088388,0.009294,0.016887,0.008305,0.005059,0.031257,0.011429,0.03335,0.029768,0.006169,0.103393,0.017101,26.538462,2.025479,24,29,29,0.673302,0.331873,0.081805,1.00851,0.394758,0.066872,0.050442,0.026844,0.171638,0.026844,0.004382,0.003003,0.000218,0.009221,0.002199,12.461538,1.664101,10,14,...,-0.623392,-0.567442,-0.621776,-0.610934,0.09009,-0.704214,-0.3451,-0.67208,-0.59347,0.020696,-0.622428,-0.561958,-0.615063,-0.605434,0.088259,-0.694389,-0.344773,-0.665367,-0.461909,0.021377,-0.488943,-0.427884,-0.488943,-0.473874,0.090423,-0.569697,-0.208708,-0.539247,13,0,1,13,2,1,13,1,1,13,0,1,13,7,2,13,0,1,13,2,1,13,3,1,13,0,1,13,-1,1,13,3,3,-0.001615,0.0,-0.002025,-0.003816,0.00406,-0.027694,0.0,0.005133,,-0.008052,0.0,0.0,0.010168,0.002863,0.0,-0.015666,0.004188,,0.0,-0.021213,-0.000396,1.9e-05,-0.016143,1.0,-0.013831,2.0,0.000415,-0.010977,0.009746,-0.005053,-0.007352,0.000565,0.0,0.0,0.0,-0.026107,-0.000784,0.0,-0.021855,0.017433,-0.004979,0.0,0.002537,0.00637,0.001067,0.0,-0.490735,-0.062369,-0.000796,0.0,-0.000617,0.0,-1.0,0.318869,0.0,0.0,0.0,-0.295977,-0.003092,0.0,-0.004697,-0.00014,0.0,0.0,-0.001836,0.0,0.0,0.001609,0.0,0.0,0.0,,0.002884,0.0,-0.003236,-0.0221,0.001808,0.0,0.0,0.0,0.0,0.003583,0.0,0.0,1.0,0.007059,0.0,0.0,0.003391,0.0,-0.006715,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,-0.004253,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.007152,-0.006135,0.007993,-0.00394,0.040299,-0.001329,0.0,0.0,,0.0,0.0,-0.003921,-0.00515,0.0,-0.003183,0.0,0.0,0.0,0.0,0.025346,0.058123,0.0,0.006993,-0.001055,0.005564,0.000747,0.0,0.0,0.0,0.0,0.0,-0.001377,0.0,0.0,0.0,0.0,,0.000632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000741,0.0,0.0,0.0,0.0,0.00218,0.011542,0.002683,0.012044,0.320485,0.329846,0.001615,0.010977,0.001615,0.010977,0.005198,0.01456,-0.00452,0.004842,0
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0.891679,0.042325,0.805045,0.940382,0.8719,0.0,0.0,0,0,0,0.005941,0.002475,0.000776,0.009806,0.007679,0.814543,0.003143,0.81067,0.819947,0.815746,0.00418,0.002581,0.000336,0.009076,0.001247,0.173102,0.004669,0.16619,0.176403,0.176403,0.0,0.0,0.0,0.0,0.0,0.005088,0.00291,4.9e-05,0.009686,0.005528,,,,,,0.048778,0.006847,0.037001,0.061963,0.044671,0.0,0.0,0,0,0,11.692307,9.384248,3,25,21,0.20915,0.117203,0.06315,0.305305,0.069952,0.004572,0.002297,0.001201,0.00783,0.004855,0.0,0.0,0,0,0,0.474523,0.076167,0.366783,0.694332,0.465525,0.471961,0.007588,0.461473,0.484715,0.480303,0.253697,0.093176,0.13784,0.491528,0.325121,-1.0,0.0,-1,-1,-1,0.075672,0.046857,0.030852,0.195757,0.048857,0.11529,0.070823,0.035662,0.216773,0.159818,0.386868,0.509339,0.0,1.008826,1.005185,0.093218,0.020103,0.073834,0.136212,0.095238,0.153846,0.375534,0,1,0,0.006685,0.002242,0.002925,0.009847,0.004027,0.153846,0.375534,0,1,0,0.23347,0.028414,0.191802,0.25644,0.253811,0.527254,0.088509,0.254276,0.584359,0.570419,0.100315,0.074579,0.044728,0.260673,0.125195,,,,,,0.005051,0.002665,0.002389,0.00935,0.00935,0.005017,0.003694,0.000714,0.009807,0.001001,0.846154,0.375534,0,1,1,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0,0,0,0.12029,0.008589,0.108082,0.128201,0.122915,0.04964,0.060154,0.005756,0.151135,0.013041,157.07692,383.420013,0,1021,0,0.203298,0.041725,0.125503,0.254067,0.254067,0.178482,0.009615,0.163719,0.190924,0.183075,0.004422,0.002974,0.000626,0.008859,0.006051,0.0,0.0,0,0,0,0.318151,0.102317,0.094102,0.392473,0.382744,,,,,,0.004924,0.003445,2.5e-05,0.009628,0.009469,23.153847,3.71587,18,28,28,0.003476,0.002267,0.000846,0.009551,0.00267,0.356445,0.255848,0.082395,0.715081,0.600739,0.006005,0.002529,0.001513,0.00989,0.005842,12.538462,1.391365,12,17,...,-0.940382,-0.805045,-0.8719,-0.527254,0.088509,-0.584359,-0.254276,-0.570419,-0.887315,0.04365,-0.93768,-0.79533,-0.864329,-0.52289,0.089991,-0.581657,-0.244561,-0.562848,-0.756159,0.042603,-0.80517,-0.665572,-0.739801,-0.391734,0.089403,-0.444325,-0.114802,-0.43832,13,0,1,13,1,2,13,1,1,13,0,1,13,5,1,13,0,1,13,2,1,13,3,1,13,0,1,13,1,1,13,6,1,-0.007338,0.0,-0.000843,0.002465,-0.004237,,0.0,0.001567,,0.00767,0.0,-1.0,-0.001607,-0.000876,0.0,0.009716,-0.004412,0.081822,0.0,-0.00552,-0.004206,0.004426,-0.001273,0.0,-0.005526,0.0,-0.002511,0.000418,0.000639,,0.000884,-0.008344,0.0,0.0,0.0,,0.003644,0.0,0.0221,-0.005202,-0.001676,0.0,0.002218,,0.002199,1.0,-0.001635,-0.026399,0.001009,1.0,0.005869,0.0,0.0,,0.001988,0.0,0.0,-0.00673,0.00295,0.0,0.002785,0.004478,0.0,0.0,0.002538,0.0,0.0,-0.008005,0.0,0.0,0.0,,0.006578,0.0,-0.002954,0.006692,0.003931,0.0,0.0,0.0,0.0,0.00471,1.0,0.0,0.0,-0.003384,0.0,0.0,0.007219,0.0,-0.010899,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.005786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-06,0.000128,-0.000984,-0.005093,-0.007105,0.010343,0.0,-0.000868,0.005986,0.0,0.0,-0.007248,0.002984,0.0,0.001717,0.0,0.0,0.0,0.0,-0.014405,,0.0,0.003443,0.005265,0.004413,0.006381,0.0,0.0,0.0,0.0,0.0,0.001335,0.0,0.0,-0.002185,0.0,,0.002409,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000618,0.0,0.0,0.0,0.0,-0.001006,-0.008761,0.009537,0.001781,,,0.007338,-0.000418,0.007338,-0.000418,0.012047,0.004292,0.007465,-0.00029,0


In [15]:
len(train.columns)

1163

In [16]:


# ====================================================
# Amex metric
# ====================================================
def amex_metric(y_true, y_pred):
    labels = np.transpose(np.array([y_true, y_pred]))
    labels = labels[labels[:, 1].argsort()[::-1]]
    weights = np.where(labels[:,0]==0, 20, 1)
    cut_vals = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])
    gini = [0,0]
    for i in [1,0]:
        labels = np.transpose(np.array([y_true, y_pred]))
        labels = labels[labels[:, i].argsort()[::-1]]
        weight = np.where(labels[:,0]==0, 20, 1)
        weight_random = np.cumsum(weight / np.sum(weight))
        total_pos = np.sum(labels[:, 0] *  weight)
        cum_pos_found = np.cumsum(labels[:, 0] * weight)
        lorentz = cum_pos_found / total_pos
        gini[i] = np.sum((lorentz - weight_random) * weight)
    return 0.5 * (gini[1]/gini[0] + top_four)

# ====================================================
# LGBM amex metric
# ====================================================
def lgb_amex_metric(y_pred, y_true):
    y_true = y_true.get_label()
    return 'amex_metric', amex_metric(y_true, y_pred), True

# ====================================================
# Train & Evaluate
# ====================================================
def train_and_evaluate(train, test):
    # Label encode categorical features
    cat_features = [
        "B_30",
        "B_38",
        "D_114",
        "D_116",
        "D_117",
        "D_120",
        "D_126",
        "D_63",
        "D_64",
        "D_66",
        "D_68"
    ]
    cat_features = [f"{cf}_last" for cf in cat_features]
    for cat_col in cat_features:
        encoder = LabelEncoder()
        train[cat_col] = encoder.fit_transform(train[cat_col])
        test[cat_col] = encoder.transform(test[cat_col])
    # Round last float features to 2 decimal place
    num_cols = list(train.dtypes[(train.dtypes == 'float32') | (train.dtypes == 'float64')].index)
    num_cols = [col for col in num_cols if 'last' in col]
    for col in num_cols:
        train[col + '_round2'] = train[col].round(2)
        test[col + '_round2'] = test[col].round(2)
    # Get the difference between last and mean
    num_cols = [col for col in train.columns if 'last' in col]
    num_cols = [col[:-5] for col in num_cols if 'round' not in col]
    for col in num_cols:
        try:
            train[f'{col}_last_mean_diff'] = train[f'{col}_last'] - train[f'{col}_mean']
            test[f'{col}_last_mean_diff'] = test[f'{col}_last'] - test[f'{col}_mean']
        except:
            pass
    # Transform float64 and float32 to float16
    num_cols = list(train.dtypes[(train.dtypes == 'float32') | (train.dtypes == 'float64')].index)
    for col in tqdm(num_cols):
        train[col] = train[col].astype(np.float16)
        test[col] = test[col].astype(np.float16)
    # Get feature list
    features = [col for col in train.columns if col not in ['customer_ID', CFG.target]]
    params = {
        'objective': 'binary',
        'metric': CFG.metric,
        'boosting': CFG.boosting_type,
        'seed': CFG.seed,
        'num_leaves': 100,
        'learning_rate': 0.01,
        'feature_fraction': 0.20,
        'bagging_freq': 10,
        'bagging_fraction': 0.50,
        'n_jobs': -1,
        'lambda_l2': 2,
        #'path_smooth': 20, #added new
        'min_data_in_leaf': 40, #changed from 40
        'device': 'gpu',
        'gpu_platform_id': 1,
        'gpu_device_id': 0
        }
    # Create a numpy array to store test predictions
    test_predictions = np.zeros(len(test))
    # Create a numpy array to store out of folds predictions
    oof_predictions = np.zeros(len(train))
    kfold = StratifiedKFold(n_splits = CFG.n_folds, shuffle = True, random_state = CFG.seed)
    for fold, (trn_ind, val_ind) in enumerate(kfold.split(train, train[CFG.target])):
        print(' ')
        print('-'*50)
        print(f'Training fold {fold} with {len(features)} features...')
        x_train, x_val = train[features].iloc[trn_ind], train[features].iloc[val_ind]
        y_train, y_val = train[CFG.target].iloc[trn_ind], train[CFG.target].iloc[val_ind]
        lgb_train = lgb.Dataset(x_train, y_train, categorical_feature = cat_features)
        lgb_valid = lgb.Dataset(x_val, y_val, categorical_feature = cat_features)
        model = lgb.train(
            params = params,
            train_set = lgb_train,
            num_boost_round = 15000,
            valid_sets = [lgb_train, lgb_valid],
            early_stopping_rounds = 1500,
            verbose_eval = 500,
            feval = lgb_amex_metric
            )
        # Save best model
        joblib.dump(model, OUTPUT_DIR+f'lgbm_fold{fold}_seed{CFG.seed}.pkl')
        # Predict validation
        val_pred = model.predict(x_val)
        # Add to out of folds array
        oof_predictions[val_ind] = val_pred
        # Predict the test set
        test_pred = model.predict(test[features])
        test_predictions += test_pred / CFG.n_folds
        # Compute fold metric
        score = amex_metric(y_val, val_pred)
        print(f'Our fold {fold} CV score is {score}')
        del x_train, x_val, y_train, y_val, lgb_train, lgb_valid
        gc.collect()
    # Compute out of folds metric
    score = amex_metric(train[CFG.target], oof_predictions)
    print(f'Our out of folds CV score is {score}')
    # Create a dataframe to store out of folds predictions
    oof_df = pd.DataFrame({'customer_ID': train['customer_ID'], 'target': train[CFG.target], 'prediction': oof_predictions})
    oof_df.to_csv(OUTPUT_DIR+f'oof_{expt_name}_seed{CFG.seed}.csv', index = False)
    # Create a dataframe to store test prediction
    test_df = pd.DataFrame({'customer_ID': test['customer_ID'], 'prediction': test_predictions})
    test_df.to_csv(OUTPUT_DIR+f'test_{expt_name}_seed{CFG.seed}.csv', index = False)


seed_everything(CFG.seed)
#train, test = read_data()
train_and_evaluate(train, test)

  0%|          | 0/1164 [00:00<?, ?it/s]

 
--------------------------------------------------
Training fold 0 with 1452 features...
[LightGBM] [Info] Number of positive: 95062, number of negative: 272068
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 246946
[LightGBM] [Info] Number of data points in the train set: 367130, number of used features: 1444
[LightGBM] [Info] Using requested OpenCL platform 1 device 0
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 3090, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 847 dense feature groups (296.90 MB) transferred to GPU in 0.133886 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.258933 -> initscore=-1.051523
[LightGBM] [Info] Start training from score -1.051523
[500]	training's binary_logloss: 0.334721	training's amex_metric: 0.779609	valid_1's binary_logloss: 0.3366