In [1]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from sklearn.datasets import make_classification
from keras.models import Sequential
from keras.utils import np_utils
from keras.callbacks import Callback, EarlyStopping
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras import backend as K
from keras.optimizers import SGD
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
# define roc_callback, inspired by https://github.com/keras-team/keras/issues/6050#issuecomment-329996505
def auc_roc(y_true, y_pred):
    # any tensorflow metric
    value, update_op = tf.contrib.metrics.streaming_auc(y_pred, y_true)

    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        value = tf.identity(value)
        return value

In [3]:
from sklearn.cross_validation import train_test_split

X = pd.read_csv('./preprocessed_data/all_data_v4_mix_v5.csv')

features = X[X['TARGET'].notnull()]
test_features = X[X['TARGET'].isnull()]

# Extract the ids
train_ids = features['SK_ID_CURR']
test_ids = test_features['SK_ID_CURR']

# Extract the labels for training
labels = features['TARGET']

# Remove the ids and target
features = features.drop(columns = ['SK_ID_CURR', 'TARGET'])
test_features = test_features.drop(columns = ['SK_ID_CURR'])

features = pd.get_dummies(features)
test_features = pd.get_dummies(test_features)

# Align the dataframes by the columns
features, test_features = features.align(test_features, join = 'inner', axis = 1)




In [4]:
print(features.shape, test_features.shape)

(307511, 1155) (48744, 1155)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=0)

In [6]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(215257, 1155) (92254, 1155) (215257,) (92254,)


In [7]:
import os
from datetime import datetime
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

def train_nn(X_train,X_test,y_train,y_test,metric=auc_roc):
    input_dim = X_train.shape[1]

    model = Sequential()
    model.add(Dense(256, input_dim=input_dim, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy',metric])
    
    log_name = '/EP{epoch:02d}-LOSS{val_auc_roc:.4f}.h5'
    log_dir = datetime.now().strftime('./nn_log/Final_model_%Y%m%d_%H%M')
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    es = EarlyStopping(monitor='auc_roc', patience=50)
    mc = ModelCheckpoint(log_dir + log_name, monitor='auc_roc', save_best_only=True)
    
    model.fit(X_train, y_train, epochs=1500, batch_size=128,
          validation_data= (X_test, y_test),callbacks = [es, mc])
    print("The model save in dir {}".format(log_dir))

In [8]:
train_nn(X_train,X_test,y_train,y_test)

Instructions for updating:
Please switch to tf.metrics.auc. Note that the order of the labels and predictions arguments has been switched.
Train on 215257 samples, validate on 92254 samples
Epoch 1/150


InternalError: Failed to create session.