# Part3: Model definition

data is taken from https://www.kaggle.com/c/santander-customer-transaction-prediction

In this challenge, Santander invites Kagglers to help them identify which customers will make a specific transaction in the future, irrespective of the amount of money transacted. The data provided for this competition has the same structure as the real data they have available to solve this problem.

The data is anonimyzed, each row containing 200 numerical values identified just with a number.

In this notebook we define ML models for testing their performance in training and validation

## Load packages

In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Activation, Dropout
from keras.models import Sequential
from keras import regularizers
from keras.constraints import max_norm

from sklearn.metrics import roc_auc_score, roc_curve


Using TensorFlow backend.


## Build models

### DNN

In [2]:
import tensorflow as tf
from keras import backend as K
# Add RUC metric to monitor NN
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [3]:
#from keras.constraints import max_norm
def build_NN_128(X_train_NN):
    model = Sequential()
    model.add(Dense(units = 128, activation = "relu", input_dim = X_train_NN.shape[1], kernel_initializer = "normal", 
                    kernel_regularizer=regularizers.l2(0.001), kernel_constraint = max_norm(5.),
                   activity_regularizer=regularizers.l2(1e-7)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(units = 32, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.15))
    model.add(Dense(4, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.15))
    model.add(Dense(units = 1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',auc])
    model.summary()
    return model

In [4]:
#from keras.constraints import max_norm
def build_NN_64(X_train_NN):
    model = Sequential()
    model.add(Dense(units = 64, activation = "relu", input_dim = X_train_NN.shape[1], kernel_initializer = "normal", 
                    kernel_regularizer=regularizers.l2(0.0001), kernel_constraint = max_norm(5.),
                   activity_regularizer=regularizers.l2(1e-6)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(units = 32, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.15))
    model.add(Dense(4, activation='relu', kernel_regularizer=regularizers.l2(0.001), kernel_constraint=max_norm(5)))
    model.add(Dropout(rate=0.15))
    model.add(Dense(units = 1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',auc])
    model.summary()
    return model

In [5]:
#kernel_regularizer: Regularizer function applied to the kernel weights matrix
#activity_regularizer: Regularizer function applied to the output of the layer (its "activation")
def build_NN_32(X_train_NN):
    model = Sequential()
    model.add(Dense(units = 32, activation = "relu", input_dim = X_train_NN.shape[1], kernel_initializer = "normal", 
                    kernel_regularizer=regularizers.l2(3e-3), kernel_constraint = max_norm(5.),
                   activity_regularizer=regularizers.l2(1e-6)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(units = 16, activation='relu', kernel_regularizer=regularizers.l2(3e-3), 
                    kernel_constraint=max_norm(5),activity_regularizer=regularizers.l2(1e-6)))
 #   model.add(Dropout(rate=0.1))
 #   model.add(Dense(units = 8, activation='relu', kernel_regularizer=regularizers.l2(3e-3), 
 #                   kernel_constraint=max_norm(5),activity_regularizer=regularizers.l2(1e-6)))
 #   model.add(Dropout(rate=0.1))
 #   model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(3e-3), 
 #                   kernel_constraint=max_norm(5),activity_regularizer=regularizers.l2(1e-6)))
 #   model.add(Dropout(rate=0.1))
 #   model.add(Dense(4, activation='relu', kernel_regularizer=regularizers.l2(3e-3), 
 #                   kernel_constraint=max_norm(5),activity_regularizer=regularizers.l2(1e-6)))
 #   model.add(Dropout(rate=0.1))
    model.add(Dense(units = 1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',auc])
    #model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy',auc])
    #model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy',auc])
    model.summary()
    return model

In [6]:
def build_NN_16(X_train_NN):
    model = Sequential()
    model.add(Dense(units = 16, activation = "relu", input_dim = X_train_NN.shape[1], kernel_initializer = "normal", 
                    kernel_regularizer=regularizers.l2(0.005), kernel_constraint = max_norm(3.)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(units = 8, activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_constraint=max_norm(3)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_constraint=max_norm(3)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(4, activation='tanh', kernel_regularizer=regularizers.l2(0.01), kernel_constraint=max_norm(3)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(units = 1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])
    model.summary()
    return model

In [7]:
def build_NN_8(X_train_NN):
    model = Sequential()
    model.add(Dense(units = 128, activation = "relu", input_dim = X_train_NN.shape[1], kernel_initializer = "normal", 
                    kernel_regularizer=regularizers.l2(0.0001), kernel_constraint = max_norm(3.)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(units = 8, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_constraint=max_norm(3)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_constraint=max_norm(3)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(4, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_constraint=max_norm(3)))
    model.add(Dropout(rate=0.1))
    model.add(Dense(units = 1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])
    model.summary()
    return model

In [8]:
from imblearn.ensemble import BalancedBaggingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from imblearn.ensemble import RUSBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
def build_BalancedBaggingClassifier():
    base_estimator = AdaBoostClassifier(n_estimators=200) 
    bbc = RUSBoostClassifier(n_estimators=200,base_estimator=base_estimator)
    #base_estimator = AdaBoostClassifier(base_estimator=GradientBoostingClassifier(max_depth=5, learning_rate=0.9),n_estimators=20)
    ###bbc = RUSBoostClassifier(n_estimators=200,base_estimator=base_estimator)
    #base_estimator = DecisionTreeClassifier(max_depth = 100)
    #bbc = BalancedBaggingClassifier(n_estimators=10,base_estimator=base_estimator, random_state=42, max_samples = 0.5)
    return bbc