# MLP Keras Models for Tabular Data

## For Kaggle AMEX default prediction competition data

https://www.kaggle.com/competitions/amex-default-prediction/data

This competition had some random looking data where no information on columns was given. The notebook is mainly to allow me to copy it for use in MLP experiments in other tabular data cases.

This assumes TF/Keras and various other Python libraries are installed. And GPU configured. I used a Docker image and Pipfile with all these installed.

In [1]:
import pandas as pd
import numpy as np
import multiprocessing
from multiprocessing import Process
import glob
import os

import optuna  # pip install optuna
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from optuna.integration import LightGBMPruningCallback
from sklearn.model_selection import train_test_split
import lightgbm as lgbm
import xgboost as xgb
from tqdm import tqdm

tqdm.pandas()

import time

N_FOLDS = 5


In [2]:
# https://www.kaggle.com/kyakovlev
# https://www.kaggle.com/competitions/amex-default-prediction/discussion/327534
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)



In [3]:
def amex_metric(y_true, y_pred, return_components=False) -> float:
    """Amex metric for ndarrays"""
    def top_four_percent_captured(df) -> float:
        """Corresponds to the recall for a threshold of 4 %"""
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        four_pct_cutoff = int(0.04 * df['weight'].sum())
        df['weight_cumsum'] = df['weight'].cumsum()
        df_cutoff = df.loc[df['weight_cumsum'] <= four_pct_cutoff]
        return (df_cutoff['target'] == 1).sum() / (df['target'] == 1).sum()
        
    def weighted_gini(df) -> float:
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()

    def normalized_weighted_gini(df) -> float:
        """Corresponds to 2 * AUC - 1"""
        df2 = pd.DataFrame({'target': df.target, 'prediction': df.target})
        df2.sort_values('prediction', ascending=False, inplace=True)
        return weighted_gini(df) / weighted_gini(df2)

    df = pd.DataFrame({'target': y_true.ravel(), 'prediction': y_pred.ravel()})
    df.sort_values('prediction', ascending=False, inplace=True)
    g = normalized_weighted_gini(df)
    d = top_four_percent_captured(df)

    if return_components: return g, d, 0.5 * (g + d)
    return 0.5 * (g + d)

In [10]:
def format_time(seconds):
    seconds = int(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes = minutes % 60
    seconds = seconds % 60
    if hours > 0:
        return f"{hours}h, {minutes}m, {seconds}s"
    if minutes > 0:
        return f"{minutes}m, {seconds}s"
    return f"{seconds}s"
        

# Load Data

Should actually put the paths into variables and use those across this notebook, like in the CNN example..

In [4]:
#df_train = pd.read_parquet("large_train.parquet", engine="pyarrow")
df_train = pd.read_parquet("large_train_v2.parquet", engine="pyarrow")
df_train.replace([np.inf, -np.inf], np.nan,inplace=True)
df_train.fillna(-1, inplace=True)
df_train.head()


Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13,customer_ID,target
0,0.008214,0.037988,0.090349,0.106776,0.140657,0.160164,0.206365,0.226899,0.271047,0.285421,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0
1,0.0,0.047228,0.068788,0.119097,0.155031,0.175565,0.203285,0.23922,0.275154,0.311088,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0
2,0.010267,0.042094,0.073922,0.103696,0.13655,0.168378,0.199179,0.231006,0.261807,0.293635,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0
3,0.030801,0.052361,0.091376,0.11807,0.140657,0.162218,0.206365,0.2423,0.276181,0.311088,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0
4,0.029774,0.053388,0.092402,0.116016,0.146817,0.163244,0.210472,0.24846,0.280287,0.312115,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0


In [5]:
df_train["target"] = df_train["target"] > 0.6
#df_train["target"] > 0.6
strat = df_train["fake_splitter"]
df_train.drop("fake_splitter", axis=1)

Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13,customer_ID,target
0,0.008214,0.037988,0.090349,0.106776,0.140657,0.160164,0.206365,0.226899,0.271047,0.285421,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,False
1,0.000000,0.047228,0.068788,0.119097,0.155031,0.175565,0.203285,0.239220,0.275154,0.311088,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,False
2,0.010267,0.042094,0.073922,0.103696,0.136550,0.168378,0.199179,0.231006,0.261807,0.293635,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,False
3,0.030801,0.052361,0.091376,0.118070,0.140657,0.162218,0.206365,0.242300,0.276181,0.311088,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,False
4,0.029774,0.053388,0.092402,0.116016,0.146817,0.163244,0.210472,0.248460,0.280287,0.312115,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458908,0.019507,0.056468,0.064682,0.109856,0.144764,0.167351,0.197125,0.223819,0.256674,0.284394,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ffff41c8a52833b56430603969b9ca48d208e7c192c6a4...,False
458909,0.021561,0.036961,0.084189,0.114990,0.146817,0.178645,0.209446,0.240246,0.272074,0.303901,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,ffff518bb2075e4816ee3fe9f3b152c57fc0e6f01bf7fd...,False
458910,0.015400,0.054415,0.083162,0.103696,0.125257,0.174538,0.196099,0.219713,0.252567,0.295688,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ffff9984b999fccb2b6127635ed0736dda94e544e67e02...,False
458911,0.024641,0.055441,0.087269,0.113963,0.144764,0.181725,0.190965,0.241273,0.267967,0.292608,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,ffffa5c46bc8de74f5a4554e74e239c8dee6b9baf38814...,True


In [6]:
#df_train = pd.read_parquet("large_train.parquet", engine="pyarrow")
df_test = pd.read_parquet("large_test_v2.parquet", engine="pyarrow")
df_test.replace([np.inf, -np.inf], np.nan,inplace=True)
df_test.fillna(-1, inplace=True)
df_test.head()


Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_5,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13,customer_ID
0,0.73922,0.774127,0.805955,0.831622,0.858316,0.887064,0.921971,0.967146,0.980493,-1.0,...,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,00000469ba478561f23a92a868bd366de6f6527a684c9a...
1,0.428131,0.465092,0.495893,0.513347,0.546201,0.586242,0.622177,0.629363,0.662218,0.711499,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...
2,0.596509,0.641684,0.663244,0.704312,0.743326,0.749487,0.811088,0.812115,0.858316,0.894251,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...
3,0.410678,0.455852,0.49692,0.502053,0.534908,0.588296,0.62423,0.637577,0.684805,0.705339,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...
4,0.610883,0.652977,0.684805,0.705339,0.728953,0.772074,0.800821,0.837782,0.843943,0.87885,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...


In [7]:
cat_cols = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68'] 
new_cat_cols = []
for cat_col in cat_cols:
    new_cat_cols.extend([col for col in df_train.columns if col.startswith(cat_col)])
cat_cols = new_cat_cols
#cat_cols

In [8]:
#df_train.reset_index(inplace=True)

In [9]:
df_train.shape

(458913, 3214)

# Split Data

In [11]:
X = df_train.drop(["fake_splitter", "customer_ID", "target"], axis=1)
y = df_train["target"]


In [31]:
stratzip = zip(strat, y)
#stratified k-fold only supports single binary value, tuple stratification breaks it
#(so stratifying by a single variable that is a combination of two works to avoid that)
stratzip = [f"{a}-{b}" for (a,b) in stratzip]
#stratzip = list(stratzip)

# Keras Models and Training Code

In [13]:
def reset_keras():
    sess = tf.compat.v1.keras.backend.get_session()
    tf.compat.v1.keras.backend.clear_session()
    sess.close()
    sess = tf.compat.v1.keras.backend.get_session()

    try:
        del classifier # this is from global space - change this as you need
    except:
        pass

    # use the same config as you used to create the session
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    config.gpu_options.visible_device_list = "0"
    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))

In [33]:
def delete_keras_models():
    for fl in glob.glob("keras/*"):
        #Do what you want with the file
        os.remove(fl)

def create_model(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input
    from tensorflow.keras import backend as K 
    import tensorflow as tf

    print(tf.config.experimental.get_memory_info('GPU:0'))
    print("creating classifier")
    #https://stackoverflow.com/questions/59567226/how-to-programmatically-determine-available-gpu-memory-with-tensorflow?noredirect=1&lq=1
    print(tf.config.experimental.get_memory_info('GPU:0'))

    input_shape = X_input.shape[1]
    print(f"input shape: {input_shape}")
    input1 = Input(shape=(input_shape))
    dense1 = Dense(128, activation='relu')(input1)
    drop1 = Dropout(0.5)(dense1)
    hidden1 = Dense(64, activation='relu')(drop1)
    drop2 = Dropout(0.3)(hidden1)
    hidden2 = Dense(16, activation='relu')(drop2)
    output = Dense(1, activation='sigmoid')(hidden2)

    print("layers created")

    model = Model(inputs=input1, outputs=output)

    print("model created")

    model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'], run_eagerly=True)
    
    print("model compiled")
    
    return model
    
def train_and_predict(df_from, strat):
    delete_keras_models()

    manager = multiprocessing.Manager()
    process_dict = manager.dict()
    #note: "bool" type in X caused OOM by 100GB
    X = df_from.drop(["fake_splitter", "customer_ID", "target"], axis=1)
#    X = X.values.reshape(-1, 13, 233)
#    X = X.values.reshape(-1, X.shape[1], 1)
    y = df_from["target"]

    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=69)

    cv_scores = np.empty(N_FOLDS)
    cv_accuracies = np.empty(N_FOLDS)
    time_start_all_folds = time.time()
    acc_score_total = 0
    
    n_classes = 1
    train_preds = np.zeros((X.shape[0], n_classes))
    N_FOLDS_TO_TRAIN = N_FOLDS

    for idx, (train_idx, test_idx) in enumerate(cv.split(X, strat)):
        print(f"on index {idx} vs {N_FOLDS_TO_TRAIN}")
        if idx > N_FOLDS_TO_TRAIN:
            break
        
        time_start_this_fold = time.time()
        print(f"=== STARTING FOLD {idx+1}/{N_FOLDS} ===")
        #K.clear_session()
        #reset_keras()
        axis = 0
        X_train, X_test = np.take(X, train_idx, axis), np.take(X, test_idx, axis)
        y_train, y_test = np.take(y, train_idx, axis), np.take(y, test_idx, axis)
        
        p = Process(target=train_and_predict_2, args=(idx, process_dict, X_train, y_train, X_test, y_test))
        p.start()
        flag = p.join()
        print(f"Subprocess exited with code {flag}")
        
        preds = process_dict[f"preds-{idx}"]
        np.add.at(train_preds, test_idx, preds)
        print(f"count zerO: {np.count_nonzero(train_preds)}")

        preds_this_round = (preds >= 0.5)
        acc_score = accuracy_score(y_test, preds_this_round)
        acc_score_total += acc_score

        log_loss_fold = log_loss(y_test, preds)
        cv_scores[idx] = log_loss_fold
        cv_accuracies[idx] = acc_score
        
        amex_score = amex_metric(y_test, preds)
        
        time_total_this_fold = time.time() - time_start_this_fold
        print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS} log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
        print(f"time to run this fold: {format_time(time_total_this_fold)}")
    return process_dict

def train_and_predict_2(idx, process_dict, X_train, y_train, X_test, y_test):
    #have to import tensorflow here to not reserve GPU in main process (this func runs in its own process)
    #-> in order to avoid oom errors, as keras seems to leak memory in some of these cases..
    #for protoing it is much simpler to run it all inside the main process as done at the end of this notebook
    import tensorflow as tf

    try:
        model = create_model(X_train)

        print("fitting")

        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=f"keras/mlp-model-fold{idx}",
    #        filepath=f"keras/cnn-model-fold{idx}-"+"{epoch:02d}",
            save_weights_only=True,
            monitor='val_loss',
            mode='min',
            save_best_only=True)

        print(f"fit: {model}")
        model.fit(X_train, y_train, batch_size=16, epochs=10, validation_data=(X_test, y_test), callbacks=[model_checkpoint_callback])
        #have to store history right after fit(), as predict() seems to clear it
        history = model.history.history

        print(f"predicting")
        preds = model.predict(X_test)
        print(preds.shape)

        process_dict[f"history-{idx}"] = history
        process_dict[f"preds-{idx}"] = preds
    except:# Error as e:
        import traceback
        traceback.print_exc()
        


In [15]:
process_dict = train_and_predict(df_train, stratzip)


on index 0 vs 5
=== STARTING FOLD 1/5 ===


2022-09-07 23:36:09.368817: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 23:36:09.397445: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 23:36:09.398106: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 23:36:09.400458: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211


2022-09-07 23:36:10.096681: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 23:36:10.097220: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 23:36:10.097720: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-07 23:36:10.098211: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


layers created
model created
model compiled
fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d388610>
Epoch 1/10
   16/22946 [..............................] - ETA: 2:46 - loss: 0.8091 - accuracy: 0.4883 

2022-09-07 23:36:17.198869: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91783, 1)
Subprocess exited with code None
count zerO: 91783
=== FINISHED FOLD 1/5 log loss=0.24286326943451353, accuracy=0.8944793698179402, amex=0.7652423149560644 ===
time to run this fold: 31m, 0s
on index 1 vs 5
=== STARTING FOLD 2/5 ===


2022-09-08 00:07:08.690284: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:07:08.695256: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:07:08.695789: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:07:08.696539: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211
layers created
model created
model compiled
fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d3a0610>


2022-09-08 00:07:08.975665: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:07:08.976208: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:07:08.976711: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:07:08.977193: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


Epoch 1/10
   22/22946 [..............................] - ETA: 2:48 - loss: 0.6189 - accuracy: 0.6761

2022-09-08 00:07:15.381130: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91783, 1)
Subprocess exited with code None
count zerO: 183566
=== FINISHED FOLD 2/5 log loss=0.24589536677643997, accuracy=0.8917010775415927, amex=0.7535809524516308 ===
time to run this fold: 31m, 19s
on index 2 vs 5
=== STARTING FOLD 3/5 ===


2022-09-08 00:38:28.238731: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:38:28.243149: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:38:28.243690: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:38:28.244471: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211
layers created
model created
model compiled
fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d3a0730>

2022-09-08 00:38:28.561380: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:38:28.561891: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:38:28.562374: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 00:38:28.562827: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6



Epoch 1/10
   23/22946 [..............................] - ETA: 2:42 - loss: 0.6257 - accuracy: 0.7147

2022-09-08 00:38:34.950178: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91783, 1)
Subprocess exited with code None
count zerO: 275349
=== FINISHED FOLD 3/5 log loss=0.2456329506996844, accuracy=0.8924310602181231, amex=0.7629473860022347 ===
time to run this fold: 30m, 47s
on index 3 vs 5
=== STARTING FOLD 4/5 ===


2022-09-08 01:09:15.984696: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:09:15.988069: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:09:15.988591: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:09:15.989294: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211
layers created
model created
model compiled
fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d3a0730>


2022-09-08 01:09:16.261569: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:09:16.262094: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:09:16.262580: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:09:16.263047: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22258 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


Epoch 1/10
   24/22946 [..............................] - ETA: 2:42 - loss: 0.5508 - accuracy: 0.7578

2022-09-08 01:09:22.243438: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91782, 1)
Subprocess exited with code None
count zerO: 367131
=== FINISHED FOLD 4/5 log loss=0.2444246388906094, accuracy=0.8957093983569763, amex=0.7684236273640473 ===
time to run this fold: 31m, 32s
on index 4 vs 5
=== STARTING FOLD 5/5 ===


2022-09-08 01:40:48.526606: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:40:48.530495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:40:48.531022: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:40:48.531783: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211


2022-09-08 01:40:48.828439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:40:48.828970: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:40:48.829469: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 01:40:48.829936: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


layers created
model created
model compiled
fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d3a0730>
Epoch 1/10
   23/22946 [..............................] - ETA: 2:49 - loss: 0.7392 - accuracy: 0.5788

2022-09-08 01:40:55.200659: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91782, 1)
Subprocess exited with code None
count zerO: 458913
=== FINISHED FOLD 5/5 log loss=0.24378958830734634, accuracy=0.8951319430825216, amex=0.7662574222610683 ===
time to run this fold: 30m, 0s


# Predict Test Set

In [16]:
X_test = df_test.drop(["customer_ID"], axis=1)
X_test_np = X_test.values.reshape (-1, X_test.shape[1], 1)
#X_test_np = X_test.values.reshape (-1, 13, 233)
X_test_np.shape

(924621, 3211, 1)

In [17]:
def predict_keras(idx, process_dict, X_test_np, weights_filepath):
    model = create_model(X_test_np)
    model.load_weights(filepath)
    preds = model.predict(X_test_np)
    process_dict[f"preds-{idx}"] = preds

In [18]:
N_FOLDS

5

In [19]:
manager = multiprocessing.Manager()
process_dict = manager.dict()
for idx in range(N_FOLDS):
    #todo: remove inner loop
    searchpath = f"keras/mlp-model-fold{idx}*.data-*"
    print(searchpath)
    for fl in glob.glob(searchpath):

        ending = fl.index(".")
        filepath = fl[0:ending]
        print(f"=== STARTING TO PREDICT {idx+1}/{N_FOLDS}: {filepath} === ")

        p = Process(target=predict_keras, args=(idx, process_dict, X_test_np, filepath))
        p.start()
        flag = p.join()
        print(f"=== FINISHED PREDICTION {idx+1}/{N_FOLDS}: {sum(process_dict[f'preds-{idx}'])} === ")
        print(f"Subprocess exited with code {flag}")


keras/mlp-model-fold0*.data-*
=== STARTING TO PREDICT 1/5: keras/mlp-model-fold0 === 


2022-09-08 02:10:50.595757: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:10:50.600008: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:10:50.600552: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:10:50.601337: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211
layers created
model created

2022-09-08 02:10:50.905336: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:10:50.905858: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:10:50.906348: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:10:50.906808: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22258 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6



model compiled


2022-09-08 02:10:58.270859: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 11875832124 exceeds 10% of free system memory.
2022-09-08 02:11:03.004934: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 11875832124 exceeds 10% of free system memory.


  108/28895 [..............................] - ETA: 41s  

2022-09-08 02:11:07.184320: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 1/5: [224653.56] === 
Subprocess exited with code None
keras/mlp-model-fold1*.data-*
=== STARTING TO PREDICT 2/5: keras/mlp-model-fold1 === 


2022-09-08 02:11:53.651694: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:11:53.656913: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:11:53.657652: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:11:53.658329: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211
layers created
model created
model compiled


2022-09-08 02:11:53.967433: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:11:53.967971: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:11:53.968475: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:11:53.968949: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-09-08 02:12:01.339646: W tensorflow/core/framework/cpu_allocator_impl.cc

  104/28895 [..............................] - ETA: 42s  

2022-09-08 02:12:10.286916: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 2/5: [231129.44] === 
Subprocess exited with code None
keras/mlp-model-fold2*.data-*
=== STARTING TO PREDICT 3/5: keras/mlp-model-fold2 === 


2022-09-08 02:12:57.002093: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:12:57.006500: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:12:57.007036: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:12:57.007784: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211


2022-09-08 02:12:57.311105: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:12:57.311720: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:12:57.312231: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:12:57.312717: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


layers created
model created
model compiled


2022-09-08 02:13:04.684397: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 11875832124 exceeds 10% of free system memory.
2022-09-08 02:13:09.465322: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 11875832124 exceeds 10% of free system memory.


   98/28895 [..............................] - ETA: 44s  

2022-09-08 02:13:13.550391: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 3/5: [234494.58] === 
Subprocess exited with code None
keras/mlp-model-fold3*.data-*
=== STARTING TO PREDICT 4/5: keras/mlp-model-fold3 === 


2022-09-08 02:13:59.506184: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:13:59.509722: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:13:59.510220: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:13:59.510883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211


2022-09-08 02:13:59.808612: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:13:59.809173: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:13:59.809657: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:13:59.810121: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


layers created
model created
model compiled


2022-09-08 02:14:06.640013: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 11875832124 exceeds 10% of free system memory.
2022-09-08 02:14:11.081200: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 11875832124 exceeds 10% of free system memory.


  109/28895 [..............................] - ETA: 40s  

2022-09-08 02:14:14.943316: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 4/5: [235686.39] === 
Subprocess exited with code None
keras/mlp-model-fold4*.data-*
=== STARTING TO PREDICT 5/5: keras/mlp-model-fold4 === 


2022-09-08 02:15:00.058577: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:15:00.062271: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:15:00.062821: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:15:00.063479: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier
{'current': 0, 'peak': 0}
input shape: 3211
layers created
model created
model compiled


2022-09-08 02:15:00.377004: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:15:00.377573: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:15:00.378085: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-08 02:15:00.378583: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-09-08 02:15:07.255554: W tensorflow/core/framework/cpu_allocator_impl.cc

  116/28895 [..............................] - ETA: 38s  

2022-09-08 02:15:15.560761: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 5/5: [233698.48] === 
Subprocess exited with code None


In [20]:
process_dict["preds-0"]

array([[0.09172646],
       [0.00251949],
       [0.03374162],
       ...,
       [0.488423  ],
       [0.39359516],
       [0.13368696]], dtype=float32)

In [21]:
sum(process_dict["preds-1"])

array([231129.44], dtype=float32)

In [22]:
sum(process_dict["preds-2"])

array([234494.58], dtype=float32)

In [23]:
preds = []
for x in range(N_FOLDS):
    preds.append(process_dict[f"preds-{x}"])

In [24]:
preds = sum(preds)/N_FOLDS
preds

array([[0.09867042],
       [0.00171823],
       [0.02655552],
       ...,
       [0.54257154],
       [0.40819913],
       [0.1374466 ]], dtype=float32)

In [25]:
submission = pd.DataFrame()
submission["customer_ID"] = df_test["customer_ID"]
submission["prediction"] = preds
submission

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.098670
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.001718
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.026556
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.442982
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.791041
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.024941
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.659131
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.542572
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.408199


In [26]:
submission.to_csv("mlp_submission.csv", index=False)

# Some Experiments on Keras Training (in main process / notebook)

In [27]:
X.dtypes

S_2_1        float32
S_2_2        float32
S_2_3        float32
S_2_4        float32
S_2_5        float32
              ...   
D_68_7_9     float32
D_68_7_10    float32
D_68_7_11    float32
D_68_7_12    float32
D_68_7_13    float32
Length: 3211, dtype: object

In [28]:
df_from = df_train
X = df_from.drop(["customer_ID", "target"], axis=1)
y = df_from["target"]
X.shape[1]


3212

In [29]:
X[:16]

Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_4,D_68_7_5,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13
0,0.008214,0.037988,0.090349,0.106776,0.140657,0.160164,0.206365,0.226899,0.271047,0.285421,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.0,0.047228,0.068788,0.119097,0.155031,0.175565,0.203285,0.23922,0.275154,0.311088,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.010267,0.042094,0.073922,0.103696,0.13655,0.168378,0.199179,0.231006,0.261807,0.293635,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.030801,0.052361,0.091376,0.11807,0.140657,0.162218,0.206365,0.2423,0.276181,0.311088,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.029774,0.053388,0.092402,0.116016,0.146817,0.163244,0.210472,0.24846,0.280287,0.312115,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,0.01848,0.059548,0.081109,0.119097,0.13655,0.175565,0.209446,0.24538,0.258727,0.293635,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,0.010267,0.053388,0.073922,0.103696,0.143737,0.165298,0.194045,0.234086,0.26078,0.300821,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.024641,0.054415,0.085216,0.095483,0.147844,0.182752,0.214579,0.234086,0.274127,0.304928,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.014374,0.045175,0.077002,0.107803,0.13963,0.171458,0.202259,0.23306,0.264887,0.298768,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,0.016427,0.046201,0.079055,0.109856,0.141684,0.173511,0.204312,0.23614,0.26694,0.297741,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
import tensorflow as tf

df_from = df_train
strat = strat
X = df_from.drop(["fake_splitter", "customer_ID", "target"], axis=1)
y = df_from["target"]

cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1121218)

cv_scores = np.empty(N_FOLDS)
cv_accuracies = np.empty(N_FOLDS)
time_start_all_folds = time.time()
acc_score_total = 0

n_classes = 1
train_preds = np.zeros((X.shape[0], n_classes))
N_FOLDS_TO_TRAIN = N_FOLDS

for idx, (train_idx, test_idx) in enumerate(cv.split(X, strat)):
    print(f"on index {idx} vs {N_FOLDS_TO_TRAIN}")
    if idx > N_FOLDS_TO_TRAIN:
        break

    time_start_this_fold = time.time()
    print(f"=== STARTING FOLD {idx+1}/{N_FOLDS} ===")
    #K.clear_session()
    #reset_keras()
    axis = 0
    X_train, X_test = np.take(X, train_idx, axis), np.take(X, test_idx, axis)
    y_train, y_test = np.take(y, train_idx, axis), np.take(y, test_idx, axis)

    model = create_model(X_train)
    #break

    print("model compiled, fitting")

    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f"keras/mlp-model-fold{idx}",
        save_weights_only=True,
        monitor='val_loss',
        mode='min',
        save_best_only=True)

    print(f"fit: {model}")
#    model.fit(X_train, y_train, batch_size=16, epochs=10, validation_data=(X_test, y_test), callbacks=[model_checkpoint_callback])
    model.fit(X_train, y_train, batch_size=16, epochs=10)
    #have to store history right after fit(), as predict() seems to clear it
    history = model.history.history

    print(f"predicting")
    preds = model.predict(X_test)
    print(preds.shape)

#    preds = process_dict[f"preds-{idx}"]
    np.add.at(train_preds, test_idx, preds)
    print(f"count zerO: {np.count_nonzero(train_preds)}")

    preds_this_round = (preds >= 0.5)
    acc_score = accuracy_score(y_test, preds_this_round)
    acc_score_total += acc_score

    log_loss_fold = log_loss(y_test, preds)
    cv_scores[idx] = log_loss_fold
    cv_accuracies[idx] = acc_score

    amex_score = amex_metric(y_test, preds)

    time_total_this_fold = time.time() - time_start_this_fold
    print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS} log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
    print(f"time to run this fold: {format_time(time_total_this_fold)}")


on index 0 vs 5
=== STARTING FOLD 1/5 ===
{'current': 5060864, 'peak': 5901622016}
creating classifier
{'current': 5060864, 'peak': 5901622016}
input shape: 3211
layers created
model created
model compiled
model compiled, fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d2576a0>


2022-09-08 23:16:59.533263: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 4715417720 exceeds 10% of free system memory.
2022-09-08 23:17:01.036757: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 4715417720 exceeds 10% of free system memory.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91783, 1)
count zerO: 91783
=== FINISHED FOLD 1/5 log loss=0.2448184033478684, accuracy=0.8958412777965419, amex=0.7673515878737743 ===
time to run this fold: 26m, 27s
on index 1 vs 5
=== STARTING FOLD 2/5 ===
{'current': 5904870400, 'peak': 5906785280}
creating classifier
{'current': 5904870400, 'peak': 5906785280}
input shape: 3211
layers created
model created
model compiled
model compiled, fitting
fit: <keras.engine.functional.Functional object at 0x7f0f7d27b6d0>


2022-09-08 23:43:26.894367: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 4715417720 exceeds 10% of free system memory.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91783, 1)
count zerO: 183566
=== FINISHED FOLD 2/5 log loss=0.23851308438911942, accuracy=0.8966148415283877, amex=0.7672578943092362 ===
time to run this fold: 26m, 1s
on index 2 vs 5
=== STARTING FOLD 3/5 ===
{'current': 1184885760, 'peak': 11806554624}
creating classifier
{'current': 1184885760, 'peak': 11806554624}
input shape: 3211
layers created
model created
model compiled
model compiled, fitting
fit: <keras.engine.functional.Functional object at 0x7f0de8159be0>
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
predicting
(91783, 1)
count zerO: 275349
=== FINISHED FOLD 3/5 log loss=0.24222644856354858, accuracy=0.8953183051327588, amex=0.7677705446673134 ===
time to run this fold: 26m, 38s
on index 3 vs 5
=== STARTING FOLD 4/5 ===
{'current': 7086221824, 'peak': 11806554624}
creating classifier
{'c

In [40]:
np.add.at(train_preds, test_idx, preds)

In [41]:
train_preds.shape

(458913, 1)

In [42]:
preds.shape

(91782, 1)

In [43]:
test_idx.shape

(91782,)

In [44]:
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 3211)]            0         
                                                                 
 dense_20 (Dense)            (None, 128)               411136    
                                                                 
 dropout_10 (Dropout)        (None, 128)               0         
                                                                 
 dense_21 (Dense)            (None, 64)                8256      
                                                                 
 dropout_11 (Dropout)        (None, 64)                0         
                                                                 
 dense_22 (Dense)            (None, 16)                1040      
                                                                 
 dense_23 (Dense)            (None, 1)                 17  

In [45]:
X_train.shape

(367131, 3211)

In [46]:
X_test.shape

(91782, 3211)

In [47]:
y_train.shape

(367131,)

In [48]:
X_train.dtypes.value_counts()

float32    3156
int8         55
dtype: int64

In [49]:
X_train.describe()

Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_4,D_68_7_5,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13
count,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,...,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0,367131.0
mean,0.04426,0.059258,0.071902,0.084733,0.10084,0.116781,0.129181,0.142236,0.152075,0.160167,...,0.448892,0.432663,0.420068,0.405782,0.390555,0.37269,0.353574,0.333963,0.316871,0.290161
std,0.08202,0.133742,0.180797,0.219717,0.249807,0.278633,0.310322,0.339955,0.372396,0.404858,...,0.567186,0.583207,0.598746,0.616486,0.632286,0.649945,0.667362,0.684465,0.698892,0.724027
min,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,0.010267,0.041068,0.071869,0.102669,0.13347,0.165298,0.196099,0.226899,0.2577,0.288501,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.01848,0.049281,0.080082,0.110883,0.14271,0.173511,0.204312,0.23614,0.26694,0.297741,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.027721,0.057495,0.089322,0.120123,0.150924,0.182752,0.213552,0.244353,0.275154,0.304928,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,0.405544,0.405544,0.405544,0.405544,0.405544,0.405544,0.405544,0.405544,0.405544,0.405544,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [50]:
X_train.select_dtypes(include=['bool'])

0
2
3
4
6
...
458906
458908
458909
458911
458912
