# 1D CNN-Based Keras Models for Tabular Data

## For Kaggle AMEX default prediction competition data

https://www.kaggle.com/competitions/amex-default-prediction/data

This competition had some random looking data where no information on columns was given. The notebook is mainly to allow me to copy it for use in 1D CNN experiments in other tabular data cases.

This assumes TF/Keras and various other Python libraries are installed. And GPU configured. I used a Docker image and Pipfile with all these installed.

In [1]:
import pandas as pd
import numpy as np
import multiprocessing
from multiprocessing import Process
import glob
import os

import optuna  # pip install optuna
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from optuna.integration import LightGBMPruningCallback
from sklearn.model_selection import train_test_split
import lightgbm as lgbm
import xgboost as xgb
from tqdm import tqdm

tqdm.pandas()

import time

N_FOLDS = 5


In [2]:
# https://www.kaggle.com/kyakovlev
# https://www.kaggle.com/competitions/amex-default-prediction/discussion/327534
def amex_metric_mod(y_true, y_pred):

    labels     = np.transpose(np.array([y_true, y_pred]))
    labels     = labels[labels[:, 1].argsort()[::-1]]
    weights    = np.where(labels[:,0]==0, 20, 1)
    cut_vals   = labels[np.cumsum(weights) <= int(0.04 * np.sum(weights))]
    top_four   = np.sum(cut_vals[:,0]) / np.sum(labels[:,0])

    gini = [0,0]
    for i in [1,0]:
        labels         = np.transpose(np.array([y_true, y_pred]))
        labels         = labels[labels[:, i].argsort()[::-1]]
        weight         = np.where(labels[:,0]==0, 20, 1)
        weight_random  = np.cumsum(weight / np.sum(weight))
        total_pos      = np.sum(labels[:, 0] *  weight)
        cum_pos_found  = np.cumsum(labels[:, 0] * weight)
        lorentz        = cum_pos_found / total_pos
        gini[i]        = np.sum((lorentz - weight_random) * weight)

    return 0.5 * (gini[1]/gini[0] + top_four)



In [3]:
def amex_metric(y_true, y_pred, return_components=False) -> float:
    """Amex metric for ndarrays"""
    def top_four_percent_captured(df) -> float:
        """Corresponds to the recall for a threshold of 4 %"""
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        four_pct_cutoff = int(0.04 * df['weight'].sum())
        df['weight_cumsum'] = df['weight'].cumsum()
        df_cutoff = df.loc[df['weight_cumsum'] <= four_pct_cutoff]
        return (df_cutoff['target'] == 1).sum() / (df['target'] == 1).sum()
        
    def weighted_gini(df) -> float:
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()

    def normalized_weighted_gini(df) -> float:
        """Corresponds to 2 * AUC - 1"""
        df2 = pd.DataFrame({'target': df.target, 'prediction': df.target})
        df2.sort_values('prediction', ascending=False, inplace=True)
        return weighted_gini(df) / weighted_gini(df2)

    df = pd.DataFrame({'target': y_true.ravel(), 'prediction': y_pred.ravel()})
    df.sort_values('prediction', ascending=False, inplace=True)
    g = normalized_weighted_gini(df)
    d = top_four_percent_captured(df)

    if return_components: return g, d, 0.5 * (g + d)
    return 0.5 * (g + d)

In [4]:
def format_time(seconds):
    seconds = int(seconds)
    minutes = seconds // 60
    hours = minutes // 60
    minutes = minutes % 60
    seconds = seconds % 60
    if hours > 0:
        return f"{hours}h, {minutes}m, {seconds}s"
    if minutes > 0:
        return f"{minutes}m, {seconds}s"
    return f"{seconds}s"
        

# Path Configurations

In [5]:
MODEL_IDENTIFIER = "large_v2"
PREDICTIONS_FILE = f"predictions_cnn_{MODEL_IDENTIFIER}.csv"
SUBMISSIONS_FILE = f"submission_cnn_{MODEL_IDENTIFIER}.csv"
OPTIMIZER_PARAMS_FILE = f"optimizer_results_cnn_{MODEL_IDENTIFIER}.csv"
TRAIN_DATA = "large_train_v2.parquet"
TEST_DATA = "large_test_v2.parquet"

# Data Reads

In [6]:
df_train = pd.read_parquet(TRAIN_DATA, engine="pyarrow")
#this was just because I accidentally at one point scaled the target so False=0.5 and True=1.0
df_train["target"] = df_train["target"] > 0.6
df_train.head()


Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13,customer_ID,target
0,0.008214,0.037988,0.090349,0.106776,0.140657,0.160164,0.206365,0.226899,0.271047,0.285421,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,False
1,0.0,0.047228,0.068788,0.119097,0.155031,0.175565,0.203285,0.23922,0.275154,0.311088,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,False
2,0.010267,0.042094,0.073922,0.103696,0.13655,0.168378,0.199179,0.231006,0.261807,0.293635,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,False
3,0.030801,0.052361,0.091376,0.11807,0.140657,0.162218,0.206365,0.2423,0.276181,0.311088,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,False
4,0.029774,0.053388,0.092402,0.116016,0.146817,0.163244,0.210472,0.24846,0.280287,0.312115,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,False


In [7]:
df_train.shape

(458913, 3214)

In [8]:
df_test = pd.read_parquet(TEST_DATA, engine="pyarrow")
#df_test = pd.read_parquet("deloitte-data/keras_test.parquet", engine="pyarrow")
df_test.head()


Unnamed: 0,S_2_1,S_2_2,S_2_3,S_2_4,S_2_5,S_2_6,S_2_7,S_2_8,S_2_9,S_2_10,...,D_68_7_5,D_68_7_6,D_68_7_7,D_68_7_8,D_68_7_9,D_68_7_10,D_68_7_11,D_68_7_12,D_68_7_13,customer_ID
0,0.73922,0.774127,0.805955,0.831622,0.858316,0.887064,0.921971,0.967146,0.980493,,...,1.0,1.0,1.0,1.0,1.0,,,,,00000469ba478561f23a92a868bd366de6f6527a684c9a...
1,0.428131,0.465092,0.495893,0.513347,0.546201,0.586242,0.622177,0.629363,0.662218,0.711499,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...
2,0.596509,0.641684,0.663244,0.704312,0.743326,0.749487,0.811088,0.812115,0.858316,0.894251,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...
3,0.410678,0.455852,0.49692,0.502053,0.534908,0.588296,0.62423,0.637577,0.684805,0.705339,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...
4,0.610883,0.652977,0.684805,0.705339,0.728953,0.772074,0.800821,0.837782,0.843943,0.87885,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...


In [9]:
[col for col in df_train.columns if col not in df_test.columns]

['fake_splitter', 'target']

In [10]:
# categorical values are expected to have been handled in preprocessing (one-hot encoded in this case)

In [11]:
df_train.shape

(458913, 3214)

# Preprocessing: Replace inf values

In [12]:
#have to fill na's here as the above groupby creates them
df_train.replace([np.inf, -np.inf], np.nan,inplace=True)
df_train.fillna(-1, inplace=True)

In [13]:
#have to fill na's here as the above groupby creates them
df_test.replace([np.inf, -np.inf], np.nan,inplace=True)
df_test.fillna(-1, inplace=True)

# Preprocessing: Create Stratification Column

In [14]:
# fake_splitter is combination of two columns to stratify on. created in pre-processing
strat = df_train["fake_splitter"]
df_train = df_train.drop("fake_splitter", axis=1)

In [15]:
X = df_train.drop(["customer_ID", "target"], axis=1)
y = df_train["target"]
X.shape

(458913, 3211)

In [16]:
stratzip = zip(strat, y)
#stratified k-fold only supports single binary value, tuple stratification breaks it
stratzip = [f"{a}-{b}" for (a,b) in stratzip]
strat = stratzip

In [17]:
3029*13*233

9174841

# Model Creation and Training

In [18]:
def reset_keras():
    sess = tf.compat.v1.keras.backend.get_session()
    tf.compat.v1.keras.backend.clear_session()
    sess.close()
    sess = tf.compat.v1.keras.backend.get_session()

    try:
        del classifier # this is from global space - change this as you need
    except:
        pass

    # use the same config as you used to create the session
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    config.gpu_options.visible_device_list = "0"
    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config))

In [19]:
def delete_keras_models():
    for fl in glob.glob("keras/*"):
        #Do what you want with the file
        os.remove(fl)

def create_model_cnn(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input
    from tensorflow.keras import backend as K 
    import tensorflow as tf

    print(tf.config.experimental.get_memory_info('GPU:0'))
    print("creating classifier, cnn")
    #https://stackoverflow.com/questions/59567226/how-to-programmatically-determine-available-gpu-memory-with-tensorflow?noredirect=1&lq=1
    print(tf.config.experimental.get_memory_info('GPU:0'))

    input1 = Input(shape=(X_input.shape[1:]))
    conv1 = Conv1D(filters=64, kernel_size=13, strides=13, activation='relu')(input1)
    conv2 = Conv1D(filters=32, kernel_size=3, strides=1, activation='relu')(conv1)
    flatten = Flatten()(conv2)
    hidden1 = Dense(64, activation='relu')(flatten)
    drop1 = Dropout(0.3)(hidden1)
    hidden2 = Dense(16, activation='relu')(drop1)
    output = Dense(1, activation='sigmoid')(hidden2)

    print("layers created")

    model = Model(inputs=input1, outputs=output)

    print("model created")

    model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model


def create_model_cnn_batchnorm(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input
    from tensorflow.keras import backend as K 
    import tensorflow as tf

    print(tf.config.experimental.get_memory_info('GPU:0'))
    print("creating classifier, cnn")
    #https://stackoverflow.com/questions/59567226/how-to-programmatically-determine-available-gpu-memory-with-tensorflow?noredirect=1&lq=1
    print(tf.config.experimental.get_memory_info('GPU:0'))

    input1 = Input(shape=(X_input.shape[1:]))
    x = Conv1D(filters=64, kernel_size=13, strides=13, activation='relu')(input1)
    x = BatchNormalization()(x)
    x = Conv1D(filters=32, kernel_size=6, strides=1, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Flatten()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(16, activation='relu')(x)
    x = BatchNormalization()(x)
    output = Dense(1, activation='sigmoid')(x)

    print("layers created")

    model = Model(inputs=input1, outputs=output)

    print("model created")

    model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model


def create_model_wavenet(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input
    from tensorflow.keras import backend as K 
    import tensorflow as tf

    print("creating classifier, wavenet")
    wavenet_model = tf.keras.Sequential()
    wavenet_model.add(tf.keras.layers.InputLayer(input_shape=(X_input.shape[1:])))
    for rate in (1, 2, 4, 8) * 2:
        wavenet_model.add(tf.keras.layers.Conv1D(
            filters=32, kernel_size=2, padding="causal", activation="relu",
            dilation_rate=rate))
    wavenet_model.add(tf.keras.layers.Conv1D(filters=14, kernel_size=1))
    wavenet_model.add(tf.keras.layers.Flatten())
    wavenet_model.add(tf.keras.layers.Dense(6, activation='relu'))
    wavenet_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    wavenet_model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'])
    return wavenet_model


def create_model_bilstm(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input, Bidirectional, LSTM, GRU
    from tensorflow.keras import backend as K
    import tensorflow as tf

    print("creating classifier, bilstm")
    #https://stackoverflow.com/questions/59567226/how-to-programmatically-determine-available-gpu-memory-with-tensorflow?noredirect=1&lq=1
    print(tf.config.experimental.get_memory_info('GPU:0'))

    input1 = Input(shape=(X_input.shape[1:]))
    gru1 = Bidirectional(LSTM(64, return_sequences=True))(input1)
    gru2 = Bidirectional(LSTM(32, return_sequences=False))(gru1)
    hidden1 = Dense(32, activation='relu')(gru2)
    drop1 = Dropout(0.3)(hidden1)
    hidden2 = Dense(16, activation='relu')(drop1)
    output = Dense(1, activation='sigmoid')(hidden2)

    print("layers created")

    model = Model(inputs=input1, outputs=output)

    print("model created")

    model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

def create_model_gru(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input, Bidirectional, LSTM, GRU
    from tensorflow.keras import backend as K 
    import tensorflow as tf

    print("creating classifier, gru")
    #https://stackoverflow.com/questions/59567226/how-to-programmatically-determine-available-gpu-memory-with-tensorflow?noredirect=1&lq=1
    print(tf.config.experimental.get_memory_info('GPU:0'))

    input1 = Input(shape=(X_input.shape[1:]))
    gru1 = tf.keras.layers.GRU(64, return_sequences=True)(input1)
    gru2 = tf.keras.layers.GRU(32, return_sequences=False)(gru1)
    hidden1 = Dense(32, activation='relu')(gru2)
    drop1 = Dropout(0.3)(hidden1)
    hidden2 = Dense(16, activation='relu')(drop1)
    output = Dense(1, activation='sigmoid')(hidden2)

    print("layers created")

    model = Model(inputs=input1, outputs=output)

    print("model created")

    model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

def create_model_cnn_gru(X_input):
    from keras.models import Model
    from keras.optimizers import Adam
    from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, MaxPooling1D, Flatten, Dropout, Dense, Input, Bidirectional, LSTM, GRU
    from tensorflow.keras import backend as K 
    import tensorflow as tf

    print("creating classifier, cnn-gru")

    input1 = Input(shape=(X_input.shape[1:]))
    cnn1 = Conv1D(filters=32, kernel_size=4, strides=2, activation="relu")(input1)
    gru1 = Bidirectional(GRU(64, return_sequences=False))(cnn1)
    hidden1 = Dense(32, activation='relu')(gru1)
    drop1 = Dropout(0.3)(hidden1)
    hidden2 = Dense(16, activation='relu')(drop1)
    output = Dense(1, activation='sigmoid')(hidden2)

    print("layers created")

    model = Model(inputs=input1, outputs=output)

    print("model created")

    model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

    
def train_and_predict(df_from, strat, X, y):
    delete_keras_models()

    manager = multiprocessing.Manager()
    process_dict = manager.dict()
    X_cols = X.columns
    X = X.values.reshape(-1, X.shape[1], 1)

    cols_to_drop = [col for col in df_from.columns if "fake" in col]
    df_from = df_train.drop(cols_to_drop, axis=1)

    cv = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=1911)

    cv_scores = np.empty(N_FOLDS)
    cv_accuracies = np.empty(N_FOLDS)
    time_start_all_folds = time.time()
    acc_score_total = 0
    
    n_classes = 1
    train_preds = np.zeros((X.shape[0], n_classes))

    for idx, (train_idx, test_idx) in enumerate(cv.split(X, strat)):
        
        time_start_this_fold = time.time()
        print(f"=== STARTING FOLD {idx+1}/{N_FOLDS} ===")
        #K.clear_session()
        #reset_keras()
        axis = 0
        X_train, X_test = np.take(X, train_idx, axis), np.take(X, test_idx, axis)
        y_train, y_test = np.take(y, train_idx, axis), np.take(y, test_idx, axis)
        
        p = Process(target=train_and_predict_2, args=(idx, process_dict, X_train, y_train, X_test, y_test))
        p.start()
        flag = p.join()
        print(f"Subprocess exited with code {flag}")
        p.close()
        print("Subprocess closed")
        
        preds = process_dict[f"preds-{idx}"]
        np.add.at(train_preds, test_idx, preds)
        print(f"count zerO: {np.count_nonzero(train_preds)}")

        preds_this_round = (preds >= 0.5)
        acc_score = accuracy_score(y_test, preds_this_round)
        acc_score_total += acc_score

        log_loss_fold = log_loss(y_test, preds)
        cv_scores[idx] = log_loss_fold
        cv_accuracies[idx] = acc_score

        amex_score = amex_metric(y_test, preds)

        time_total_this_fold = time.time() - time_start_this_fold
        print(f"=== FINISHED FOLD {idx+1}/{N_FOLDS} log loss={log_loss_fold}, accuracy={acc_score}, amex={amex_score} ===")
        print(f"time to run this fold: {format_time(time_total_this_fold)}")

    #print(type(train_preds))
    #print(type(process_dict))
    return process_dict, train_preds

def train_and_predict_2(idx, process_dict, X_train, y_train, X_test, y_test):
    #tensorflow must be imported inside a process if processes are used. otherwise the main process reserves GPU
    import tensorflow as tf
    
    model = create_model_cnn_batchnorm(X_train)

    print("model compiled, fitting")
    
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f"keras/cnn-model-fold{idx}",
#        filepath=f"keras/cnn-model-fold{idx}-"+"{epoch:02d}",
        save_weights_only=True,
        monitor='val_loss',
        mode='min',
        save_best_only=True)

    model.fit(X_train, y_train, batch_size=64, epochs=5, validation_data=(X_test, y_test), callbacks=[model_checkpoint_callback])
    #have to store history right after fit(), as predict() seems to clear it
    history = model.history.history

    filepath = f"keras/cnn-model-fold{idx}"
    print(f"loading best weights: {filepath}")
    model.load_weights(filepath)
    print(f"predicting")
    preds = model.predict(X_test)
    print(preds.shape)
        
    process_dict[f"history-{idx}"] = history
    process_dict[f"preds-{idx}"] = preds
        


In [20]:
process_dict, train_preds = train_and_predict(df_train, strat, X, y)


=== STARTING FOLD 1/5 ===


2022-08-28 00:04:12.562758: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:04:12.592565: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:04:12.593432: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:04:12.596049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}


2022-08-28 00:04:13.256919: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:04:13.257787: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:04:13.258349: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:04:13.258815: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22258 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


layers created
model created
model compiled, fitting
Epoch 1/5


2022-08-28 00:04:17.434332: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-08-28 00:04:19.256741: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loading best weights: keras/cnn-model-fold0
predicting
(91783, 1)
Subprocess exited with code None
Subprocess closed
count zerO: 91783
=== FINISHED FOLD 1/5 log loss=0.22840913584116562, accuracy=0.8989464279877537, amex=0.7772265659263757 ===
time to run this fold: 1m, 55s
=== STARTING FOLD 2/5 ===


2022-08-28 00:06:06.666926: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:06:06.671680: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:06:06.672212: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:06:06.672901: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created
model compiled, fitting


2022-08-28 00:06:06.937501: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:06:06.938010: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:06:06.938480: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:06:06.938930: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


Epoch 1/5


2022-08-28 00:06:10.569489: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100


   1/5737 [..............................] - ETA: 2:49:58 - loss: 0.8800 - accuracy: 0.4219

2022-08-28 00:06:11.318193: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loading best weights: keras/cnn-model-fold1
predicting
(91783, 1)
Subprocess exited with code None
Subprocess closed
count zerO: 183566
=== FINISHED FOLD 2/5 log loss=0.2287704374929481, accuracy=0.8993059716941045, amex=0.7740029384025113 ===
time to run this fold: 1m, 51s
=== STARTING FOLD 3/5 ===


2022-08-28 00:07:57.726333: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:07:57.729548: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:07:57.730073: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:07:57.730753: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created
model compiled, fitting


2022-08-28 00:07:57.997981: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:07:57.998494: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:07:57.999015: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:07:57.999491: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


Epoch 1/5


2022-08-28 00:08:01.693520: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100


   1/5737 [..............................] - ETA: 2:52:10 - loss: 1.1746 - accuracy: 0.2344

2022-08-28 00:08:02.445502: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loading best weights: keras/cnn-model-fold2
predicting
(91783, 1)
Subprocess exited with code None
Subprocess closed
count zerO: 275349
=== FINISHED FOLD 3/5 log loss=0.22596239363569523, accuracy=0.9006460891450486, amex=0.78236262311369 ===
time to run this fold: 1m, 52s
=== STARTING FOLD 4/5 ===


2022-08-28 00:09:49.814584: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:09:49.819341: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:09:49.819885: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:09:49.820593: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created
model compiled, fitting


2022-08-28 00:09:50.082408: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:09:50.082918: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:09:50.083430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:09:50.083905: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


Epoch 1/5


2022-08-28 00:09:53.734458: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100


   1/5737 [..............................] - ETA: 2:51:45 - loss: 1.0727 - accuracy: 0.5625

2022-08-28 00:09:54.490258: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loading best weights: keras/cnn-model-fold3
predicting
(91782, 1)
Subprocess exited with code None
Subprocess closed
count zerO: 367131
=== FINISHED FOLD 4/5 log loss=0.22891536152259445, accuracy=0.8978013118040574, amex=0.7739837860098228 ===
time to run this fold: 1m, 52s
=== STARTING FOLD 5/5 ===


2022-08-28 00:11:41.917262: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:11:41.920487: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:11:41.921025: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:11:41.921712: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created
model compiled, fitting


2022-08-28 00:11:42.184686: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:11:42.185251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:11:42.185764: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:11:42.186242: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22258 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6


Epoch 1/5


2022-08-28 00:11:45.852854: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100


   1/5737 [..............................] - ETA: 2:51:30 - loss: 0.6357 - accuracy: 0.7188

2022-08-28 00:11:46.612299: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
loading best weights: keras/cnn-model-fold4
predicting
(91782, 1)
Subprocess exited with code None
Subprocess closed
count zerO: 458913
=== FINISHED FOLD 5/5 log loss=0.22678537793670261, accuracy=0.8990869669434094, amex=0.7770146034903859 ===
time to run this fold: 1m, 52s


In [21]:
458913/64*0.8

5736.4125

In [22]:
#large data set

#13 rows
#wavenet 0.76-0.77
#gru 0.75-0.76 (3 iter)
#bi-lstm 0.75-0.77

#1 row
#bi-lstm 0.75-0.77
#wavenet 0.77-0.79
#gru-cnn 0.70
#perus cnn 0.77-0.78

#deloitte data set
#wavenet 0.774-0.784
#cnn 0.766-0.785


# Save Out of Fold Predictions from Training 

In [23]:
process_dict["preds-1"]

array([[0.99111253],
       [0.28556508],
       [0.12409421],
       ...,
       [0.20284677],
       [0.00877489],
       [0.00362701]], dtype=float32)

In [24]:
train_preds.shape

(458913, 1)

In [25]:
df_preds = pd.DataFrame()
df_preds["customer_ID"] = df_train["customer_ID"]
df_preds["prediction"] = train_preds

In [26]:
amex_metric_mod(y, train_preds[:, 0])    

0.7768854485848968

In [27]:
df_preds.to_csv("predictions_cnn.csv")

# Reload Models, Predict Entire Training Set

In [28]:
#test.replace([np.inf, -np.inf], np.nan,inplace=True)
X_train = df_train.drop(["customer_ID", "target"], axis=1)
X = X_train.values.reshape(-1, X_train.shape[1], 1)


In [29]:
X_train.columns

Index(['S_2_1', 'S_2_2', 'S_2_3', 'S_2_4', 'S_2_5', 'S_2_6', 'S_2_7', 'S_2_8',
       'S_2_9', 'S_2_10',
       ...
       'D_68_7_4', 'D_68_7_5', 'D_68_7_6', 'D_68_7_7', 'D_68_7_8', 'D_68_7_9',
       'D_68_7_10', 'D_68_7_11', 'D_68_7_12', 'D_68_7_13'],
      dtype='object', length=3211)

In [30]:
def predict_keras(idx, process_dict, X, weights_filepath):
    model = create_model_cnn_batchnorm(X)
    model.load_weights(filepath)
    preds = model.predict(X)
    process_dict[f"preds-{idx}"] = preds

In [31]:

manager = multiprocessing.Manager()
process_dict = manager.dict()
for idx in range(N_FOLDS):
    #todo: remove inner loop
    searchpath = f"keras/cnn-model-fold{idx}*.data-*"
    print(searchpath)
    for fl in glob.glob(searchpath):

        ending = fl.index(".")
        filepath = fl[0:ending]
        print(f"=== STARTING TO PREDICT {idx+1}/{N_FOLDS}: {filepath} === ")

        p = Process(target=predict_keras, args=(idx, process_dict, X, filepath))
        p.start()
        flag = p.join()
        p.close()
        print(f"=== FINISHED PREDICTION {idx+1}/{N_FOLDS}: {sum(process_dict[f'preds-{idx}'])} === ")
        print(f"Subprocess exited with code {flag}")



keras/cnn-model-fold0*.data-*
=== STARTING TO PREDICT 1/5: keras/cnn-model-fold0 === 


2022-08-28 00:13:31.322204: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:31.325625: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:31.326132: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:31.326804: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:13:31.606477: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:31.606981: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:31.607453: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:31.607897: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22258 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:13:38.064818: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384

  204/14342 [..............................] - ETA: 10s  

2022-08-28 00:13:38.806804: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 1/5: [116012.125] === 
Subprocess exited with code None
keras/cnn-model-fold1*.data-*
=== STARTING TO PREDICT 2/5: keras/cnn-model-fold1 === 


2022-08-28 00:13:52.679345: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:52.682847: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:52.683350: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:52.684012: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:13:52.948977: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:52.949506: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:52.949994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:13:52.950454: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:13:59.469252: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384

  205/14342 [..............................] - ETA: 10s  

2022-08-28 00:14:00.217461: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 2/5: [117119.58] === 
Subprocess exited with code None
keras/cnn-model-fold2*.data-*
=== STARTING TO PREDICT 3/5: keras/cnn-model-fold2 === 


2022-08-28 00:14:12.862967: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:12.866430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:12.866935: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:12.867596: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:14:13.121214: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:13.121725: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:13.122238: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:13.122706: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:14:19.735873: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384

  198/14342 [..............................] - ETA: 10s  

2022-08-28 00:14:20.487721: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 3/5: [119885.86] === 
Subprocess exited with code None
keras/cnn-model-fold3*.data-*
=== STARTING TO PREDICT 4/5: keras/cnn-model-fold3 === 


2022-08-28 00:14:33.828208: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:33.831679: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:33.832184: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:33.832847: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:14:34.092043: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:34.092561: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:34.093086: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:34.093556: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:14:40.639532: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384

  211/14342 [..............................] - ETA: 10s  

2022-08-28 00:14:41.390599: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 4/5: [115811.31] === 
Subprocess exited with code None
keras/cnn-model-fold4*.data-*
=== STARTING TO PREDICT 5/5: keras/cnn-model-fold4 === 


2022-08-28 00:14:54.095170: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:54.098604: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:54.099091: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:54.099758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:14:54.353601: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:54.354116: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:54.354627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:14:54.355094: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:15:00.957510: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384

  215/14342 [..............................] - ETA: 9s   

2022-08-28 00:15:01.701497: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 5/5: [117759.39] === 
Subprocess exited with code None


In [32]:
preds = []
for x in range(N_FOLDS):
    preds.append(process_dict[f"preds-{x}"])

In [33]:
preds = sum(preds)/N_FOLDS
preds

array([[0.0023371 ],
       [0.00368023],
       [0.00388113],
       ...,
       [0.004959  ],
       [0.08748712],
       [0.00424258]], dtype=float32)

In [34]:
amex_metric_mod(y, preds[:, 0])   


0.7880889773206304

In [35]:
df_preds = pd.DataFrame()
df_preds["customer_ID"] = df_train["customer_ID"]
df_preds["prediction"] = preds[:, 0]

In [36]:
df_preds

Unnamed: 0,customer_ID,prediction
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0.002337
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0.003680
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0.003881
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0.008267
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0.003272
...,...,...
458908,ffff41c8a52833b56430603969b9ca48d208e7c192c6a4...,0.004575
458909,ffff518bb2075e4816ee3fe9f3b152c57fc0e6f01bf7fd...,0.031651
458910,ffff9984b999fccb2b6127635ed0736dda94e544e67e02...,0.004959
458911,ffffa5c46bc8de74f5a4554e74e239c8dee6b9baf38814...,0.087487


In [37]:
df_preds.to_csv("predictions_cnn_2.csv")

# Create Kaggle Submission

Use all models created from folds to predict the test set and write submission.csv.

In [38]:
X_test = df_test.drop("customer_ID", axis=1)
X_test_np = X_test.values.reshape (-1, X_test.shape[1], 1)
#X_test_np = X_test.values.reshape (-1, 13, 233)
X_test_np.shape

(924621, 3211, 1)

In [39]:
N_FOLDS

5

In [40]:
manager = multiprocessing.Manager()
process_dict = manager.dict()
for idx in range(N_FOLDS):
    #todo: remove inner loop
    searchpath = f"keras/cnn-model-fold{idx}*.data-*"
    print(searchpath)
    for fl in glob.glob(searchpath):

        ending = fl.index(".")
        filepath = fl[0:ending]
        print(f"=== STARTING TO PREDICT {idx+1}/{N_FOLDS}: {filepath} === ")

        p = Process(target=predict_keras, args=(idx, process_dict, X_test_np, filepath))
        p.start()
        flag = p.join()
        p.close()
        print(f"=== FINISHED PREDICTION {idx+1}/{N_FOLDS}: {sum(process_dict[f'preds-{idx}'])} === ")
        print(f"Subprocess exited with code {flag}")


keras/cnn-model-fold0*.data-*
=== STARTING TO PREDICT 1/5: keras/cnn-model-fold0 === 


2022-08-28 00:15:17.418589: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:17.422084: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:17.422604: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:17.423281: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:15:17.680889: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:17.681428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:17.681909: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:17.682366: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:15:23.513886: W tensorflow/core/framework/cpu_allocator_impl.cc

  211/28895 [..............................] - ETA: 20s  

2022-08-28 00:15:31.141745: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 1/5: [223097.23] === 
Subprocess exited with code None
keras/cnn-model-fold1*.data-*
=== STARTING TO PREDICT 2/5: keras/cnn-model-fold1 === 


2022-08-28 00:15:55.481310: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:55.485990: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:55.486492: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:55.487156: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:15:55.745472: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:55.745987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:55.746469: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:15:55.746919: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:16:01.539087: W tensorflow/core/framework/cpu_allocator_impl.cc

  202/28895 [..............................] - ETA: 21s  

2022-08-28 00:16:09.223256: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 2/5: [225171.44] === 
Subprocess exited with code None
keras/cnn-model-fold2*.data-*
=== STARTING TO PREDICT 3/5: keras/cnn-model-fold2 === 


2022-08-28 00:16:33.945314: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:16:33.948856: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:16:33.949469: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:16:33.950136: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:16:34.218774: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:16:34.219296: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:16:34.219783: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:16:34.220241: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:16:40.090616: W tensorflow/core/framework/cpu_allocator_impl.cc

  215/28895 [..............................] - ETA: 20s  

2022-08-28 00:16:47.774612: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 3/5: [232699.84] === 
Subprocess exited with code None
keras/cnn-model-fold3*.data-*
=== STARTING TO PREDICT 4/5: keras/cnn-model-fold3 === 


2022-08-28 00:17:13.263268: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:13.266821: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:13.267327: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:13.268016: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:17:13.523546: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:13.524054: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:13.524567: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:13.525044: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:17:19.418043: W tensorflow/core/framework/cpu_allocator_impl.cc

  197/28895 [..............................] - ETA: 22s  

2022-08-28 00:17:27.116564: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 4/5: [225868.64] === 
Subprocess exited with code None
keras/cnn-model-fold4*.data-*
=== STARTING TO PREDICT 5/5: keras/cnn-model-fold4 === 


2022-08-28 00:17:51.692870: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:51.697750: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:51.698272: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:51.698940: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

{'current': 0, 'peak': 0}
creating classifier, cnn
{'current': 0, 'peak': 0}
layers created
model created


2022-08-28 00:17:51.952192: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:51.952709: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:51.953217: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-28 00:17:51.953681: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22256 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:2d:00.0, compute capability: 8.6
2022-08-28 00:17:57.821454: W tensorflow/core/framework/cpu_allocator_impl.cc

  217/28895 [..............................] - ETA: 20s  

2022-08-28 00:18:05.490846: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


=== FINISHED PREDICTION 5/5: [229921.48] === 
Subprocess exited with code None


In [41]:
sum(process_dict["preds-2"])

array([232699.84], dtype=float32)

In [42]:
preds = []
for x in range(N_FOLDS):
    preds.append(process_dict[f"preds-{x}"])

In [43]:
preds = sum(preds)/N_FOLDS
preds

array([[0.02569789],
       [0.00239859],
       [0.03533811],
       ...,
       [0.5338541 ],
       [0.41173655],
       [0.06334434]], dtype=float32)

In [45]:
submission = pd.DataFrame()
submission["customer_ID"] = df_test["customer_ID"]
submission["prediction"] = preds
submission

Unnamed: 0,customer_ID,prediction
0,00000469ba478561f23a92a868bd366de6f6527a684c9a...,0.025698
1,00001bf2e77ff879fab36aa4fac689b9ba411dae63ae39...,0.002399
2,0000210045da4f81e5f122c6bde5c2a617d03eef67f82c...,0.035338
3,00003b41e58ede33b8daf61ab56d9952f17c9ad1c3976c...,0.224146
4,00004b22eaeeeb0ec976890c1d9bfc14fd9427e98c4ee9...,0.828007
...,...,...
924616,ffff952c631f2c911b8a2a8ca56ea6e656309a83d2f64c...,0.007329
924617,ffffcf5df59e5e0bba2a5ac4578a34e2b5aa64a1546cd3...,0.744417
924618,ffffd61f098cc056dbd7d2a21380c4804bbfe60856f475...,0.533854
924619,ffffddef1fc3643ea179c93245b68dca0f36941cd83977...,0.411737


In [46]:
submission.to_csv("submission_cnn.csv", index=False)