In [1]:
SEED = 666

# Install and import packages

In [2]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import MissingIndicator, SimpleImputer, IterativeImputer, KNNImputer
from category_encoders.cat_boost import CatBoostEncoder
from deepctr.inputs import  SparseFeat, DenseFeat, get_feature_names
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam,RMSprop
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import backend as K
from tensorflow.keras import callbacks
from tensorflow.keras import utils
from tensorflow.keras.callbacks import ReduceLROnPlateau
from deepctr.models import DeepFM
import tensorflow.keras as keras
import tensorflow as tf
import pandas as pd
import numpy as np
import sklearn
import re
import warnings
warnings.simplefilter('ignore')
from tensorflow.keras import layers
from tensorflow.keras import utils
from keras_tqdm import TQDMNotebookCallback

Using TensorFlow backend.


# Load data

In [11]:
train = pd.read_csv('cat_in_dat/train_cat_kaggle.csv')
test = pd.read_csv('cat_in_dat/test_cat_kaggle.csv')

test["target"] = -1

In [13]:
features = [feat for feat in train.columns if feat not in ['target','id']]
train['nan_features'] = train[features].isnull().sum(axis=1)
test['nan_features'] = test[features].isnull().sum(axis=1)

data = pd.concat([train, test]).reset_index(drop=True)

# Label encode and fillna

In [14]:
def convert_data_to_numeric(df):
    
    bin_3_mapping = {'T':1 , 'F':0}
    bin_4_mapping = {'Y':1 , 'N':0}
    nom_0_mapping = {'Red' : 0, 'Blue' : 1, 'Green' : 2}
    nom_1_mapping = {'Trapezoid' : 0, 'Star' : 1, 'Circle': 2, 'Triangle' : 3, 'Polygon' : 4}
    nom_2_mapping = {'Hamster' : 0 , 'Axolotl' : 1, 'Lion' : 2, 'Dog' : 3, 'Cat' : 4, 'Snake' : 5}
    nom_3_mapping = {'Russia' : 0, 'Canada' : 1, 'Finland' : 2, 'Costa Rica' : 3, 'China' : 4, 'India' : 5}
    nom_4_mapping = {'Bassoon' : 0, 'Theremin' : 1, 'Oboe' : 2, 'Piano' : 3}
    nom_5_mapping = dict(zip((df.nom_5.dropna().unique()), range(len((df.nom_5.dropna().unique())))))
    nom_6_mapping = dict(zip((df.nom_6.dropna().unique()), range(len((df.nom_6.dropna().unique())))))
    nom_7_mapping = dict(zip((df.nom_7.dropna().unique()), range(len((df.nom_7.dropna().unique())))))
    nom_8_mapping = dict(zip((df.nom_8.dropna().unique()), range(len((df.nom_8.dropna().unique())))))
    nom_9_mapping = dict(zip((df.nom_9.dropna().unique()), range(len((df.nom_9.dropna().unique())))))
    ord_1_mapping = {'Novice' : 0, 'Contributor' : 1, 'Expert' : 2, 'Master': 3, 'Grandmaster': 4}
    ord_2_mapping = { 'Freezing': 0, 'Cold': 1, 'Warm' : 2, 'Hot': 3, 'Boiling Hot' : 4, 'Lava Hot' : 5}
    ord_3_mapping = {'a':0, 'b':1, 'c':2 ,'d':3 ,'e':4, 'f':5, 'g':6, 'h':7, 'i':8, 'j':9, 'k':10, 'l':11, 'm':12, 'n':13, 'o':14}
    ord_4_mapping = {'A':0, 'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6, 'H':7, 'I':8, 'J':9, 'K':10,'L':11,'M':12,
                 'N':13,'O':14,'P':15,'Q':16,'R':17,'S':18,'T':19,'U':20,'V':21,'W':22,'X':23,'Y':24,'Z':25}
    sorted_ord_5 = sorted(df.ord_5.dropna().unique())
    ord_5_mapping = dict(zip(sorted_ord_5, range(len(sorted_ord_5))))

    df['bin_3'] = df.loc[df.bin_3.notnull(), 'bin_3'].map(bin_3_mapping)
    df['bin_4'] = df.loc[df.bin_4.notnull(), 'bin_4'].map(bin_4_mapping)
    df['nom_0'] = df.loc[df.nom_0.notnull(), 'nom_0'].map(nom_0_mapping)
    df['nom_1'] = df.loc[df.nom_1.notnull(), 'nom_1'].map(nom_1_mapping)
    df['nom_2'] = df.loc[df.nom_2.notnull(), 'nom_2'].map(nom_2_mapping)
    df['nom_3'] = df.loc[df.nom_3.notnull(), 'nom_3'].map(nom_3_mapping)
    df['nom_4'] = df.loc[df.nom_4.notnull(), 'nom_4'].map(nom_4_mapping)
    df['nom_5'] = df.loc[df.nom_5.notnull(), 'nom_5'].map(nom_5_mapping)
    df['nom_6'] = df.loc[df.nom_6.notnull(), 'nom_6'].map(nom_6_mapping)
    df['nom_7'] = df.loc[df.nom_7.notnull(), 'nom_7'].map(nom_7_mapping)
    df['nom_8'] = df.loc[df.nom_8.notnull(), 'nom_8'].map(nom_8_mapping)
    df['nom_9'] = df.loc[df.nom_9.notnull(), 'nom_9'].map(nom_9_mapping)
    df['ord_1'] = df.loc[df.ord_1.notnull(), 'ord_1'].map(ord_1_mapping)
    df['ord_2'] = df.loc[df.ord_2.notnull(), 'ord_2'].map(ord_2_mapping)
    df['ord_3'] = df.loc[df.ord_3.notnull(), 'ord_3'].map(ord_3_mapping)
    df['ord_4'] = df.loc[df.ord_4.notnull(), 'ord_4'].map(ord_4_mapping)
    df['ord_5'] = df.loc[df.ord_5.notnull(), 'ord_5'].map(ord_5_mapping)
    
    return df

## Define sparse features

In [15]:
# Create NaN count
# data['nan_count'] = data.isnull().sum(axis=1) Problem with nan count

sparse_features = [feat for feat in train.columns if feat not in ['id', 'target']]

## Keep categories present in train AND test

In [16]:
for col in sparse_features:
    train_unique_values = set(train[col].dropna().unique())
    test_unique_values  = set(test[col].dropna().unique())

    symmetric_difference_values = train_unique_values.symmetric_difference(test_unique_values)
    if symmetric_difference_values:
        print(f'{len(symmetric_difference_values)} values in {col}, {symmetric_difference_values} Replaced with nan')
        data.loc[data[col].isin(symmetric_difference_values), col] = np.nan

5 values in nom_5, {'7331b57f0', '0385d0739', 'd6bb2181a', 'b3ad70fcb', 'd1d7d8352'} Replaced with nan
7 values in nom_6, {'b4b8de4b9', 'a18f02793', 'f2c0f1d10', '3a121fefb', '27fadf6ea', 'ee6983c6d', 'd6ea07c05'} Replaced with nan
8 values in nom_9, {'d1e6704ed', '68a201317', '5f565a682', '47a0cd9da', 'b5f21647b', '432e3fc6a', '1538d82e9', '2394a46de'} Replaced with nan
1 values in nan_features, {7} Replaced with nan


## Fillna

In [None]:
%%time
features = [feat for feat in data.columns if feat not in ['target','id']]
data[features] = convert_data_to_numeric(data[features])
data[features] = data[features].astype('category')
imp = IterativeImputer(max_iter=500, initial_strategy='most_frequent', random_state=SEED, add_indicator=True)
indicator_cols = [feat + '_ind' for feat in features]
for col in indicator_cols:
    data[col] = 0
data[features+indicator_cols] = imp.fit_transform(data[features])
data[features] = data[features].round(0).astype(np.int16)
data[indicator_cols] = data[indicator_cols].astype(np.uint8)

In [18]:
data[features] = convert_data_to_numeric(data[features])
data = data.fillna(-1)
data[features] = data[features] + 1

In [19]:
train = data[data.target != -1].reset_index(drop=True)
test  = data[data.target == -1].reset_index(drop=True)

# Create dense features with catboostencoder

In [61]:
%%time
features_enc = [col + '_enc' for col in features]
for col in features:
    catenc = CatBoostEncoder(return_df=False, random_state=SEED)
    train[col + '_enc'] = catenc.fit_transform(train.loc[:, col].astype('str'), train.target.values)
    test[col + '_enc'] = catenc.transform(test.loc[:, col].astype('str'))

CPU times: user 16 s, sys: 513 ms, total: 16.5 s
Wall time: 17.2 s


## Quick nan count study

In [None]:
# tab = pd.crosstab(train.nan_count, train.target)
# tab.T/tab.sum(axis=1)

# Define Keras model

## Define functions

In [20]:
def auc(y_true, y_pred):
    def fallback_auc(y_true, y_pred):
        try:
            return roc_auc_score(y_true, y_pred)
        except:
            return 0.5
    return tf.py_function(fallback_auc, (y_true, y_pred), tf.double)

In [21]:
class CyclicLR(keras.callbacks.Callback):

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1 / (2. ** (x - 1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma ** (x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(
                self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

    def on_batch_end(self, epoch, logs=None):

        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        K.set_value(self.model.optimizer.lr, self.clr())


## Define Hyperparams

In [55]:
target = ['target']
N_Splits = 5
Verbose = 1
Epochs = 50
BATCH_SIZE = 512

DROPOUT = 0.3
NNLAYERS = (300, 300)
PATIENCE = 5

MAX_EMB_DIM = 50

In [56]:
import math

In [14]:
dense_features = features_enc
all_features = features + indicator_cols + features_enc

NameError: name 'features_enc' is not defined

In [None]:
sparse_features += indicator_cols

In [15]:
all_features = features

In [57]:
def create_model(data, catcols, densecols, dnn_layers, last_dense):
    """
    """
    inputs = []
    outputs = []
    outputs_emb = []
    
    # Cat cols
    for c in catcols:
        
        num_unique_values = int(data[c].nunique())
        embed_dim = int(min(np.ceil(math.log(num_unique_values)), MAX_EMB_DIM))
        inp = layers.Input(shape=(1,))
        out = layers.Embedding(num_unique_values + 1, embed_dim, name=c)(inp)
        out = layers.SpatialDropout1D(DROPOUT)(out)
        out = layers.Reshape(target_shape=(embed_dim, ))(out)
        inputs.append(inp)
        outputs.append(out)
        outputs_emb.append(out)
        
    # First dense for embeddings
#     outputs_emb = layers.Concatenate()(outputs_emb)
#     outputs_emb = layers.Dense(last_dense, activation="relu")(outputs_emb)
#     outputs_emb = layers.Dropout(DROPOUT)(outputs_emb)
        
    # Dense cols
#     if densecols:
#         dense_inp = layers.Input(shape=(len(densecols),))
#         inputs.append(dense_inp)

#         outputs.append(dense_inp)
        
    x = layers.Concatenate()(outputs)
#     x = layers.BatchNormalization()(x)
    
    # DNN layers
    for size in dnn_layers:
#         x = layers.Concatenate()([x, outputs_emb])
#         x = layers.BatchNormalization()(x)
        x = layers.Dense(size, activation="relu")(x)
        x = layers.Dropout(DROPOUT)(x)
        
#     x = layers.Concatenate()([x, outputs_emb])
    
#     x = layers.BatchNormalization()(x)
    
    y = layers.Dense(2, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=y)
    return model


In [58]:
def run_cv():
    """
    """
    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test),))

    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)


    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):

        # Split
        X_train, X_val = train[all_features].iloc[tr_ind], train[all_features].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = [X_train.loc[:, sparse_features].values[:, k] \
                             for k in range(X_train.loc[:, sparse_features].values.shape[1])]
    #     train_model_input += [X_train.loc[:, dense_features].values]
        val_model_input = [X_val.loc[:, sparse_features].values[:, k] \
                           for k in range(X_val.loc[:, sparse_features].values.shape[1])]
    #     val_model_input += [X_val.loc[:, dense_features].values]
        test_model_input = [test.loc[:, sparse_features].values[:, k] \
                            for k in range(X_val.loc[:, sparse_features].values.shape[1])]
    #     test_model_input += [test.loc[:, dense_features].values]


        # Define model
        model = create_model(data, sparse_features, ['id'], (300, 300), 256)
        opt = keras.optimizers.Adam(learning_rate=1e-3)
        model.compile(opt, "binary_crossentropy", metrics=[auc])

        # Define callbacks
        es = callbacks.EarlyStopping(
            monitor='val_auc', 
            min_delta=0.001, 
            patience=PATIENCE, 
            verbose=Verbose, 
            mode='max', 
            baseline=None, 
            restore_best_weights=True
        )
        #sb = callbacks.ModelCheckpoint(
         #   './nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose
        #)
    #     clr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
    #                        step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
    #                        gamma=1., scale_fn=None, scale_mode='cycle')
        reduce_lr = ReduceLROnPlateau(
            monitor='val_auc', 
            mode='max',
            factor=0.5,
            patience=3, 
            min_lr=1e-6,
            verbose=True,
        )

    #     cb = TQDMNotebookCallback()
    #     setattr(cb,'on_train_batch_begin',lambda x,y:None)
    #     setattr(cb,'on_train_batch_end',lambda x,y:None)

        # Train model
        history = model.fit(
            train_model_input, utils.to_categorical(y_train),
            validation_data=(val_model_input, utils.to_categorical(y_val)),
            batch_size=1024, 
            epochs=Epochs, 
            verbose=1,
            callbacks=[reduce_lr, es]
        )

        # Predict
        val_pred = model.predict(val_model_input, batch_size=512)[:, 1]
        print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=512)[:, 1].ravel() / (N_Splits)
        K.clear_session()
        
    return oof_pred_deepfm

In [59]:
oof_pred_deepfm = run_cv()

Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 1 : 0.78518
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 2 : 0.78533
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 3 : 0.78591
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50

In [60]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 50 no BN

OOF AUC : 0.78466


In [54]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 20 no BN

OOF AUC : 0.78431


In [46]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 200 no BN

OOF AUC : 0.78399


In [39]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 100 no BN

OOF AUC : 0.78487


In [33]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
# No BN

OOF AUC : 0.78306


In [29]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")

OOF AUC : 0.78296


In [23]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")

OOF AUC : 0.78375


In [None]:
0.7838

In [None]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")

In [None]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")

In [114]:
del oof_pred_deepfm

In [115]:
import gc
gc.collect()

7595

* test with nan_count
* add dense features OK
* test with ln instead of *0.5 OK
* increase MAX_EMB_DIM OK
* archi

In [66]:
target = ['target']
N_Splits = 5
Verbose = 1
Epochs = 50
BATCH_SIZE = 512

DROPOUT = 0.3
NNLAYERS = (300, 300)
PATIENCE = 5

MAX_EMB_DIM = 100

In [67]:
dense_features = features_enc
all_features = features + features_enc

In [118]:
def create_model(data, catcols, densecols, dnn_layers, last_dense):
    """
    """
    inputs = []
    outputs = []
    dense_out = []
    
    # Cat cols
    for c in catcols:
        
        num_unique_values = int(data[c].nunique())
        embed_dim = int(min(np.ceil(math.log(num_unique_values)), MAX_EMB_DIM))
        inp = layers.Input(shape=(1,))
        out = layers.Embedding(num_unique_values + 1, embed_dim, name=c)(inp)
        out = layers.SpatialDropout1D(DROPOUT)(out)
        out = layers.Reshape(target_shape=(embed_dim, ))(out)
        inputs.append(inp)
        outputs.append(out)
        
#     First dense for embeddings
#     outputs_emb = layers.Concatenate()(outputs_emb)
#     outputs_emb = layers.Dense(last_dense, activation="relu")(outputs_emb)
#     outputs_emb = layers.Dropout(DROPOUT)(outputs_emb)

#     outputs_emb = layers.Concatenate()(outputs_emb)
#     outputs_emb = layers.Dense(last_dense, activation="relu")(outputs_emb)
#     outputs_emb = layers.Dropout(DROPOUT)(outputs_emb)
        
    # Dense cols
    if densecols:
        dense_inp = layers.Input(shape=(len(densecols),))
        inputs.append(dense_inp)

    dense_inp = layers.Dense(
        last_dense, 
        activation="relu",
        kernel_regularizer=keras.regularizers.l2(0.01)
    )(dense_inp)
    dense_inp = layers.Dropout(0.5)(dense_inp)
        
    x = layers.Concatenate()(outputs)
#     x = layers.BatchNormalization()(x)
    
    # DNN layers
    for size in dnn_layers:
#         x = layers.Concatenate()([x, outputs_emb])
#         x = layers.BatchNormalization()(x)
        x = layers.Dense(size, activation="relu")(x)
        x = layers.Dropout(DROPOUT)(x)
        
    x = layers.Concatenate()([x, dense_inp])
    
#     x = layers.BatchNormalization()(x)
    
    y = layers.Dense(2, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=y)
    return model


In [119]:
def run_cv():
    """
    """
    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test),))

    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)


    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):

        # Split
        X_train, X_val = train[all_features].iloc[tr_ind], train[all_features].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = [X_train.loc[:, sparse_features].values[:, k] \
                             for k in range(X_train.loc[:, sparse_features].values.shape[1])]
        train_model_input += [X_train.loc[:, dense_features].values]
        val_model_input = [X_val.loc[:, sparse_features].values[:, k] \
                           for k in range(X_val.loc[:, sparse_features].values.shape[1])]
        val_model_input += [X_val.loc[:, dense_features].values]
        test_model_input = [test.loc[:, sparse_features].values[:, k] \
                            for k in range(X_val.loc[:, sparse_features].values.shape[1])]
        test_model_input += [test.loc[:, dense_features].values]


        # Define model
        model = create_model(data, sparse_features, dense_features, (300, 300), 32)
        opt = keras.optimizers.Adam(learning_rate=1e-3)
        model.compile(opt, "binary_crossentropy", metrics=[auc])

        # Define callbacks
        es = callbacks.EarlyStopping(
            monitor='val_auc', 
            min_delta=0.001, 
            patience=PATIENCE, 
            verbose=Verbose, 
            mode='max', 
            baseline=None, 
            restore_best_weights=True
        )
        #sb = callbacks.ModelCheckpoint(
         #   './nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose
        #)
    #     clr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
    #                        step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
    #                        gamma=1., scale_fn=None, scale_mode='cycle')
        reduce_lr = ReduceLROnPlateau(
            monitor='val_auc', 
            mode='max',
            factor=0.5,
            patience=3, 
            min_lr=1e-6,
            verbose=True,
        )

    #     cb = TQDMNotebookCallback()
    #     setattr(cb,'on_train_batch_begin',lambda x,y:None)
    #     setattr(cb,'on_train_batch_end',lambda x,y:None)

        # Train model
        history = model.fit(
            train_model_input, utils.to_categorical(y_train),
            validation_data=(val_model_input, utils.to_categorical(y_val)),
            batch_size=1024, 
            epochs=Epochs, 
            verbose=1,
            callbacks=[reduce_lr, es]
        )

        # Predict
        val_pred = model.predict(val_model_input, batch_size=512)[:, 1]
        print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=512)[:, 1].ravel() / (N_Splits)
        K.clear_session()
        
    return oof_pred_deepfm

In [120]:
oof_pred_deepfm = run_cv()

Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 1 : 0.78585
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 2 : 0.78582
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 3 : 0.78591
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50

In [121]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 100 no BN (300, 300), 32 reg 0.01

OOF AUC : 0.78444


In [110]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 100 no BN (300, 300), 128 reg 0.001

OOF AUC : 0.78476


In [103]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 100 no BN (300, 300), 128 no reg

OOF AUC : 0.78412


* add nan_features OK
* archi

In [28]:
target = ['target']
N_Splits = 5
Verbose = 1
Epochs = 50
BATCH_SIZE = 512

DROPOUT = 0.3
NNLAYERS = (300, 300)
PATIENCE = 5

MAX_EMB_DIM = 100

In [23]:
import math

In [24]:
dense_features = features_enc
all_features = features + indicator_cols + features_enc

NameError: name 'features_enc' is not defined

In [None]:
sparse_features += indicator_cols

In [26]:
all_features = features + ['nan_features']

In [29]:
def create_model(data, catcols, densecols, dnn_layers, last_dense):
    """
    """
    inputs = []
    outputs = []
    outputs_emb = []
    
    # Cat cols
    for c in catcols:
        
        num_unique_values = int(data[c].nunique())
        embed_dim = int(min(np.ceil(math.log(num_unique_values)), MAX_EMB_DIM))
        inp = layers.Input(shape=(1,))
        out = layers.Embedding(num_unique_values + 1, embed_dim, name=c)(inp)
        out = layers.SpatialDropout1D(DROPOUT)(out)
        out = layers.Reshape(target_shape=(embed_dim, ))(out)
        inputs.append(inp)
        outputs.append(out)
        outputs_emb.append(out)
        
    # First dense for embeddings
#     outputs_emb = layers.Concatenate()(outputs_emb)
#     outputs_emb = layers.Dense(last_dense, activation="relu")(outputs_emb)
#     outputs_emb = layers.Dropout(DROPOUT)(outputs_emb)
        
    # Dense cols
#     if densecols:
#         dense_inp = layers.Input(shape=(len(densecols),))
#         inputs.append(dense_inp)

#         outputs.append(dense_inp)
        
    x = layers.Concatenate()(outputs)
#     x = layers.BatchNormalization()(x)
    
    # DNN layers
    for size in dnn_layers:
#         x = layers.Concatenate()([x, outputs_emb])
#         x = layers.BatchNormalization()(x)
        x = layers.Dense(size, activation="relu")(x)
        x = layers.Dropout(DROPOUT)(x)
        
#     x = layers.Concatenate()([x, outputs_emb])
    
#     x = layers.BatchNormalization()(x)
    
    y = layers.Dense(2, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=y)
    return model


In [39]:
train.nan_features = train.nan_features.replace({-1: 0})
test.nan_features = test.nan_features.replace({-1: 0})

In [45]:
sparse_features = sparse_features[:-1]

In [46]:
def run_cv():
    """
    """
    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test),))

    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)


    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):

        # Split
        X_train, X_val = train[all_features].iloc[tr_ind], train[all_features].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = [X_train.loc[:, sparse_features].values[:, k] \
                             for k in range(X_train.loc[:, sparse_features].values.shape[1])]
    #     train_model_input += [X_train.loc[:, dense_features].values]
        val_model_input = [X_val.loc[:, sparse_features].values[:, k] \
                           for k in range(X_val.loc[:, sparse_features].values.shape[1])]
    #     val_model_input += [X_val.loc[:, dense_features].values]
        test_model_input = [test.loc[:, sparse_features].values[:, k] \
                            for k in range(X_val.loc[:, sparse_features].values.shape[1])]
    #     test_model_input += [test.loc[:, dense_features].values]


        # Define model
        model = create_model(data, sparse_features, ['id'], (300, 300), 256)
        opt = keras.optimizers.Adam(learning_rate=1e-3)
        model.compile(opt, "binary_crossentropy", metrics=[auc])

        # Define callbacks
        es = callbacks.EarlyStopping(
            monitor='val_auc', 
            min_delta=0.001, 
            patience=PATIENCE, 
            verbose=Verbose, 
            mode='max', 
            baseline=None, 
            restore_best_weights=True
        )
        #sb = callbacks.ModelCheckpoint(
         #   './nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose
        #)
    #     clr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
    #                        step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
    #                        gamma=1., scale_fn=None, scale_mode='cycle')
        reduce_lr = ReduceLROnPlateau(
            monitor='val_auc', 
            mode='max',
            factor=0.5,
            patience=3, 
            min_lr=1e-6,
            verbose=True,
        )

    #     cb = TQDMNotebookCallback()
    #     setattr(cb,'on_train_batch_begin',lambda x,y:None)
    #     setattr(cb,'on_train_batch_end',lambda x,y:None)

        # Train model
        history = model.fit(
            train_model_input, utils.to_categorical(y_train),
            validation_data=(val_model_input, utils.to_categorical(y_val)),
            batch_size=1024, 
            epochs=Epochs, 
            verbose=1,
            callbacks=[reduce_lr, es]
        )

        # Predict
        val_pred = model.predict(val_model_input, batch_size=512)[:, 1]
        print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=512)[:, 1].ravel() / (N_Splits)
        K.clear_session()
        
    return oof_pred_deepfm

In [47]:
oof_pred_deepfm = run_cv()

Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Restoring model weights from the end of the best epoch.
Epoch 00006: early stopping
validation AUC fold 1 : 0.78483
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 2 : 0.78542
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/50
Epoch 7/50
Epoch 00007: early stopping
validation AUC fold 3 : 0.78611
Train on 383639 samples, validate on 95910 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 00005: ReduceLROnPlateau reducing learn

In [48]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")
#ln dim maxembdim 100 no BN (300, 300) nan features

OOF AUC : 0.78435


In [None]:
target = ['target']
N_Splits = 5
Verbose = 1
Epochs = 50
BATCH_SIZE = 512

DROPOUT = 0.2
NNLAYERS = (256, 256, 256)
PATIENCE = 5

In [None]:
from tqdm import tqdm_notebook as tqdm

In [None]:
oof_pred_deepfm = np.zeros((len(train), ))
y_pred_deepfm = np.zeros((len(test),))

skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)


for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):
    
    # Split
    X_train, X_val = train[all_features].iloc[tr_ind], train[all_features].iloc[val_ind]
    y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
    train_model_input = {name:X_train[name] for name in feature_names}
    val_model_input = {name:X_val[name] for name in feature_names}
    test_model_input = {name:test[name] for name in feature_names}
    
    # Define model
    model = DeepFM(sparse_feature_columns, sparse_feature_columns + dense_feature_columns,
                   dnn_hidden_units=NNLAYERS, dnn_dropout=DROPOUT, dnn_use_bn=False, task='binary')
    opt = keras.optimizers.Adam(learning_rate=1e-3)
    model.compile(opt, "binary_crossentropy", metrics=[auc])
    
    # Define callbacks
    es = callbacks.EarlyStopping(
        monitor='val_auc', 
        min_delta=0.0, 
        patience=PATIENCE, 
        verbose=Verbose, 
        mode='max', 
        baseline=None, 
        restore_best_weights=True
    )
    #sb = callbacks.ModelCheckpoint(
     #   './nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose
    #)
#     clr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
#                        step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
#                        gamma=1., scale_fn=None, scale_mode='cycle')
    reduce_lr = ReduceLROnPlateau(
        monitor='val_auc', 
        mode='max',
        factor=0.5,
        patience=3, 
        min_lr=1e-7,
        verbose=True,
    )
    
    # Train model
    history = model.fit(
        train_model_input, y_train,
        validation_data=(val_model_input, y_val),
        batch_size=BATCH_SIZE, 
        epochs=Epochs, 
        verbose=Verbose,
        callbacks=[reduce_lr, es]
    )
    
    # Predict
    val_pred = model.predict(val_model_input, batch_size=512)
    print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
    oof_pred_deepfm[val_ind] = val_pred.ravel()
    y_pred_deepfm += model.predict(test_model_input, batch_size=512).ravel() / (N_Splits)
    K.clear_session()

In [None]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")

* Deeper
* decrease LR
* back to cyclic lr
* decrease patience reduceLRplateau
* add reg for DNN
* dropout levels
* treat emb dim
* nunique + 1 ?
* pseudo label

# Grid

In [None]:
from sklearn.model_selection import ParameterSampler
grid_params = {
    'dnn_dropout': [0.0, 0.1, 0.2, 0.3, 0.5],
    'dnn_hidden_units': [(256,256, 256), (256, 256), (128, 128, 128), (256, 256, 256, 256)],
    'emb_dim': [2, 3, 4, 8],
    'lr': [1e-3, 1e-4],
    'cyclic': [True, False],
    'bn': [True, False],
    'l2_reg_linear': [1e-05, 1e-04],
    'l2_reg_embedding': [1e-05, 1e-04], 
    'l2_reg_dnn': [0.0, 1e-05, 1e-04]    
}
list_params = list(ParameterSampler(grid_params,
                                    n_iter=50,
                                    random_state=0))

In [None]:
list_params

## 1

model = DeepFM(sparse_feature_columns, sparse_feature_columns + dense_feature_columns,
               dnn_hidden_units=param['dnn_hidden_units'], dnn_dropout=param['dnn_dropout'], 
               dnn_use_bn=param['bn'], task='binary',
               l2_reg_linear=param['l2_reg_linear'], l2_reg_embedding=param['l2_reg_embedding'], 
               l2_reg_dnn=param['l2_reg_dnn'], init_std=0.0001,
               seed=SEED)

In [None]:
params = [{'lr': 0.0001,
  'l2_reg_linear': 0.0001,
  'l2_reg_embedding': 1e-05,
  'l2_reg_dnn': 0.0001,
  'emb_dim': 4,
  'dnn_hidden_units': (256, 256, 256),
  'dnn_dropout': 0.0,
  'cyclic': False,
  'bn': False}]

In [None]:
from tqdm import tqdm_notebook as tqdm

In [None]:
cv_perf = []

for param in tqdm(params):
    
    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test),))

    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)
    
    sparse_feature_columns = [SparseFeat(feat, train[feat].nunique() + 1, embedding_dim=param['emb_dim']) 
                          for feat in features]
    dense_feature_columns = [DenseFeat(feat, 1) for feat in features_enc + indicator_cols]

    feature_names = get_feature_names(sparse_feature_columns + dense_feature_columns)

    all_features = features + features_enc + indicator_cols

    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):

        # Split
        X_train, X_val = train[all_features].iloc[tr_ind], train[all_features].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = {name:X_train[name] for name in feature_names}
        val_model_input = {name:X_val[name] for name in feature_names}
        test_model_input = {name:test[name] for name in feature_names}

        # Define model
        model = DeepFM(sparse_feature_columns, sparse_feature_columns + dense_feature_columns,
                       dnn_hidden_units=param['dnn_hidden_units'], dnn_dropout=param['dnn_dropout'], 
                       dnn_use_bn=param['bn'], task='binary',
                       l2_reg_linear=param['l2_reg_linear'], l2_reg_embedding=param['l2_reg_embedding'], 
                       l2_reg_dnn=param['l2_reg_dnn'], init_std=0.0001,
                       seed=SEED)
        opt = keras.optimizers.Adam(learning_rate=param['lr'])
        model.compile(opt, "binary_crossentropy", metrics=[auc])

        # Define callbacks
        es = callbacks.EarlyStopping(
            monitor='val_auc', 
            min_delta=0.0, 
            patience=PATIENCE, 
            verbose=Verbose, 
            mode='max', 
            baseline=None, 
            restore_best_weights=True
        )
        #sb = callbacks.ModelCheckpoint(
         #   './nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose
        #)
        if param['cyclic']:
            reduce_lr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
                           step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
                           gamma=1., scale_fn=None, scale_mode='cycle')
        else:
            reduce_lr = ReduceLROnPlateau(
                monitor='val_auc', 
                mode='max',
                factor=0.5,
                patience=3, 
                min_lr=1e-7,
                verbose=True,
            )

        # Train model
        history = model.fit(
            train_model_input, y_train,
            validation_data=(val_model_input, y_val),
            batch_size=BATCH_SIZE, 
            epochs=Epochs, 
            verbose=Verbose,
            callbacks=[reduce_lr, es]
        )

        # Predict
        val_pred = model.predict(val_model_input, batch_size=512)
        print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=512).ravel() / (N_Splits)
        K.clear_session()
    cv_perf.append(round(roc_auc_score(train.target.values, oof_pred_deepfm), 5))        

In [None]:
cv_perf

## 2



In [None]:
features

In [None]:
train.target.value_counts()

In [None]:
features_ord = [feat for feat in features if 'ord' in feat]

In [None]:
sparse_feature_columns = [SparseFeat(feat, train[feat].nunique() + 1, embedding_dim=4) 
                          for feat in features if 'ord' not in feat]
dense_feature_columns = [DenseFeat(feat, 1) for feat in features_enc + indicator_cols + features_ord]

feature_names = get_feature_names(sparse_feature_columns + dense_feature_columns)

all_features = features + features_enc + indicator_cols

In [None]:
cv_perf = []

for param in tqdm(list_params[:10]):
    
    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test),))

    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)
    
    sparse_feature_columns = [SparseFeat(feat, train[feat].nunique() + 1, embedding_dim=param['emb_dim']) 
                          for feat in features]
    dense_feature_columns = [DenseFeat(feat, 1) for feat in features_enc + indicator_cols]

    feature_names = get_feature_names(sparse_feature_columns + dense_feature_columns)

    all_features = features + features_enc + indicator_cols

    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):

        # Split
        X_train, X_val = train[all_features].iloc[tr_ind], train[all_features].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = {name:X_train[name] for name in feature_names}
        val_model_input = {name:X_val[name] for name in feature_names}
        test_model_input = {name:test[name] for name in feature_names}

        # Define model
        model = DeepFM(sparse_feature_columns, sparse_feature_columns + dense_feature_columns,
                       dnn_hidden_units=param['dnn_hidden_units'], dnn_dropout=param['dnn_dropout'], 
                       dnn_use_bn=param['bn'], task='binary',
                       l2_reg_linear=param['l2_reg_linear'], l2_reg_embedding=param['l2_reg_embedding'], 
                       l2_reg_dnn=param['l2_reg_dnn'], init_std=0.0001,
                       seed=SEED)
        opt = keras.optimizers.Adam(learning_rate=param['lr'])
        model.compile(opt, "binary_crossentropy", metrics=[auc])

        # Define callbacks
        es = callbacks.EarlyStopping(
            monitor='val_auc', 
            min_delta=0.0, 
            patience=PATIENCE, 
            verbose=Verbose, 
            mode='max', 
            baseline=None, 
            restore_best_weights=True
        )
        #sb = callbacks.ModelCheckpoint(
         #   './nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose
        #)
        if param['cyclic']:
            reduce_lr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
                           step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
                           gamma=1., scale_fn=None, scale_mode='cycle')
        else:
            reduce_lr = ReduceLROnPlateau(
                monitor='val_auc', 
                mode='max',
                factor=0.5,
                patience=3, 
                min_lr=1e-7,
                verbose=True,
            )

        # Train model
        history = model.fit(
            train_model_input, y_train,
            validation_data=(val_model_input, y_val),
            batch_size=BATCH_SIZE, 
            epochs=Epochs, 
            verbose=Verbose,
            callbacks=[reduce_lr, es]
        )

        # Predict
        val_pred = model.predict(val_model_input, batch_size=512)
        print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=512).ravel() / (N_Splits)
        K.clear_session()
    cv_perf.append(round(roc_auc_score(train.target.values, oof_pred_deepfm), 5))     

## Grid 20

In [None]:
target = ['target']
N_Splits = 10
Verbose = 1
Epochs = 50
SEED = 0

In [None]:
from sklearn.model_selection import ParameterSampler
grid_params = {
    'dnn_dropout': [0.0, 0.1, 0.2],
    'dnn_hidden_units': [(256,), (512,), (256, 256), (512, 512)],
    'linear': [linear_feature_columns, linear_feature_columns + dnn_feature_columns],
    'sparse': [dnn_feature_columns, linear_feature_columns + dnn_feature_columns],
    'batch_size': [128, 256, 512],
}
list_params = list(ParameterSampler(grid_params,
                                    n_iter=20,
                                    random_state=0))

In [None]:
cv_perf = []

for param in list_params:

    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test),))

    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)
    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):
        X_train, X_val = train[features+features_enc+indicator_cols].iloc[tr_ind], train[features+features_enc+indicator_cols].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = {name:X_train[name] for name in feature_names}
        val_model_input = {name:X_val[name] for name in feature_names}
        test_model_input = {name:test[name] for name in feature_names}
        model = DeepFM(param['linear'], param['sparse'],
                       dnn_hidden_units=param['dnn_hidden_units'], dnn_dropout=param['dnn_dropout'], dnn_use_bn=False, task='binary')
        model.compile("adam", "binary_crossentropy", metrics=[auc], )
        es = callbacks.EarlyStopping(monitor='val_auc', min_delta=0.001, patience=4, verbose=Verbose, mode='max', baseline=None, restore_best_weights=True)
        sb = callbacks.ModelCheckpoint('./nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose)
        clr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
                           step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
                           gamma=1., scale_fn=None, scale_mode='cycle')
        history = model.fit(train_model_input, y_train,
                            validation_data=(val_model_input, y_val),
                            batch_size=param['batch_size'], epochs=Epochs, verbose=Verbose,
                            callbacks=[es, sb, clr],)
        model.load_weights('./nn_model.w8')
        val_pred = model.predict(val_model_input, batch_size=param['batch_size'])
        print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=param['batch_size']).ravel() / (N_Splits)
        K.clear_session()
    cv_perf.append(round(roc_auc_score(train.target.values, oof_pred_deepfm), 5))

In [None]:
cv_perf

In [None]:
list_params[2]

## CV 50 best model

In [None]:
target = ['target']
N_Splits = 50
Verbose = 1
Epochs = 50
BATCH_SIZE = 32

In [None]:
oof_pred_deepfm = np.zeros((len(train), ))
y_pred_deepfm = np.zeros((len(test),))

skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)


for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):
    
    # Split
    X_train, X_val = train[features+features_enc+indicator_cols].iloc[tr_ind], train[features+features_enc+indicator_cols].iloc[val_ind]
    y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
    train_model_input = {name:X_train[name] for name in feature_names}
    val_model_input = {name:X_val[name] for name in feature_names}
    test_model_input = {name:test[name] for name in feature_names}
    
    # Define model
    model = DeepFM(linear_feature_columns, linear_feature_columns + dnn_feature_columns,
                   dnn_hidden_units=(512, 512), dnn_dropout=0.0, dnn_use_bn=False, task='binary')
    model.compile("adam", "binary_crossentropy", metrics=[auc])
    
    # Define callbacks
    es = callbacks.EarlyStopping(monitor='val_auc', min_delta=0.001, patience=4, verbose=Verbose, mode='max', baseline=None, restore_best_weights=True)
    sb = callbacks.ModelCheckpoint('./nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose)
#     clr = CyclicLR(base_lr=0.00001 / 100, max_lr = 0.0001, 
#                        step_size= int(1.0*(test.shape[0])/1024) , mode='exp_range',
#                        gamma=1., scale_fn=None, scale_mode='cycle')
    reduce_lr = ReduceLROnPlateau(monitor='val_auc', factor=0.2,
                                  patience=5, min_lr=0.00001)
    
    # Train model
    history = model.fit(train_model_input, y_train,
                        validation_data=(val_model_input, y_val),
                        batch_size=BATCH_SIZE, epochs=Epochs, verbose=Verbose,
                        callbacks=[es, sb, reduce_lr],)
    model.load_weights('./nn_model.w8')
    
    # Predict
    val_pred = model.predict(val_model_input, batch_size=512)
    print(f"validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}")
    oof_pred_deepfm[val_ind] = val_pred.ravel()
    y_pred_deepfm += model.predict(test_model_input, batch_size=512).ravel() / (N_Splits)
    K.clear_session()

In [None]:
print(f"OOF AUC : {round(roc_auc_score(train.target.values, oof_pred_deepfm), 5)}")

In [None]:
test_idx = test.id.values
submission = pd.DataFrame.from_dict({
    'id': test_idx,
    'target': y_pred_deepfm
})
submission.to_csv("submission_deepfm_cv_50.csv", index=False)
print("Submission file saved!")

In [None]:
np.save('oof_pred_deepfm_cv_50.npy',oof_pred_deepfm)
np.save('y_pred_deepfm_cv_50.npy',    y_pred_deepfm)