In [22]:
import numpy as np
import pandas as pd

from keras.models import Sequential, load_model
from keras.layers import Dense, BatchNormalization, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.optimizers import RMSprop
from keras import backend as K
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args

import os

In [23]:
print(os.listdir("../input"))

['train.csv', 'sample_submission.csv', 'test.csv']


In [24]:
train_data = pd.read_csv('../input/train.csv')
test_data = pd.read_csv('../input/test.csv')

In [25]:
x_train = train_data.drop(['id', 'target'], axis = 1)
y_train = train_data['target']

In [26]:
del train_data

In [27]:
test_data = test_data.drop(['id'], axis = 1)

In [28]:
y_train.value_counts()

1.0    160
0.0     90
Name: target, dtype: int64

In [29]:
x_train.shape

(250, 300)

In [30]:
x_tr, x_val, y_tr, y_val = train_test_split(x_train, y_train, test_size = 0.2, stratify = y_train, random_state = 42)

In [31]:
dim_num_dense_layers = Integer(low = 1, high = 5, name='num_dense_layers')
dim_num_dense_nodes = Integer(low = 5, high = 512, name='num_dense_nodes')
dim_activation = Categorical(categories = ['relu', 'tanh'], name = 'activation')
dim_optimizer = Categorical(categories = ['rmsprop', 'adam', 'nadam'], name = 'optimizer')

In [32]:
dimensions = [dim_num_dense_layers,
              dim_num_dense_nodes,
              dim_activation,
              dim_optimizer]

In [33]:
default_parameters = [3, 200, 'relu', 'rmsprop']

In [34]:
def create_model(num_dense_layers, num_dense_nodes, activation, optimizer):
    model = Sequential()
    
    model.add(Dense(300, input_shape = (300,)))
    
    
    for i in range(num_dense_layers):
        name = 'layer_dense_{0}'.format(i + 1)
        
        model.add(Dense(num_dense_nodes, activation = activation, name = name))
    
    model.add(Dropout(0.5))
    
    model.add(Dense(1, activation = 'sigmoid'))
    
    model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    
    return model

In [35]:
path_best_model = 'best_model.keras'

best_accuracy = 0.0

best_roc_auc = 0.0

In [36]:
@use_named_args(dimensions = dimensions)
def fitness(num_dense_layers, num_dense_nodes, activation, optimizer):
    
    reduce_learning_rate = ReduceLROnPlateau(monitor = 'val_acc', patience = 3, verbose = 0, factor = 0.75, min_lr = 0.00001)
    early_stopping = EarlyStopping(monitor = 'val_loss', min_delta = 1e-10, patience = 10, verbose = 1, restore_best_weights = True)

    callbacks = [reduce_learning_rate, early_stopping]
    
    batch_size = 8

    train_step_size = x_train.shape[0] // batch_size

    print('Num dense layers: ', num_dense_layers)
    print('Num dense nodes: ', num_dense_nodes)
    print('Activation: ', activation)
    print('Optimizer: ', optimizer)
    print()
    
    model = create_model(num_dense_layers = num_dense_layers,
                         num_dense_nodes = num_dense_nodes,
                         activation = activation,
                         optimizer = optimizer)
    
    history = model.fit(x_tr, 
                        y_tr,
                        epochs = 50,
                        validation_data = (x_val, y_val),
                        verbose = 0,
                        callbacks = callbacks)
    
    roc_auc = roc_auc_score(y_val, model.predict(x_val))
    
    print()
    print("ROC AUC: {0:.2%}".format(roc_auc))
    print()
    
    global best_roc_auc
    
    if roc_auc > best_roc_auc:
        model.save(path_best_model)
        
        best_roc_auc = roc_auc
    
    #accuracy = history.history['val_acc'][-1]
    
    #print()
    #print("Accuracy: {0:.2%}".format(accuracy))
    #print()
    
    #global best_accuracy
    
    #if accuracy > best_accuracy:
    #    model.save(path_best_model)
    #    
    #    best_accuracy = accuracy
    
    del model
    
    K.clear_session()
    
    return -roc_auc
        

In [37]:
search_result = gp_minimize(func = fitness, dimensions = dimensions, acq_func = 'EI', n_calls = 100, x0 = default_parameters)

Num dense layers:  3
Num dense nodes:  200
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 60.42%

Num dense layers:  4
Num dense nodes:  160
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 58.33%

Num dense layers:  3
Num dense nodes:  282
Activation:  relu
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 71.35%

Num dense layers:  3
Num dense nodes:  315
Activation:  relu
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 70.49%

Num dense layers:  1
Num dense nodes:  327
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 68.58%

Num dense layers:  4
Num dense nodes:  499
Activation:  tanh
Optimizer:  adam

Restoring model 



Num dense layers:  1
Num dense nodes:  6
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping

ROC AUC: 71.88%

Num dense layers:  1
Num dense nodes:  6
Activation:  relu
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00023: early stopping

ROC AUC: 53.65%





Num dense layers:  5
Num dense nodes:  6
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00028: early stopping

ROC AUC: 75.69%





Num dense layers:  1
Num dense nodes:  5
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 61.63%

Num dense layers:  5
Num dense nodes:  5
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00021: early stopping

ROC AUC: 70.66%

Num dense layers:  5
Num dense nodes:  5
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00026: early stopping

ROC AUC: 67.71%

Num dense layers:  5
Num dense nodes:  512
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 72.05%

Num dense layers:  3
Num dense nodes:  512
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 60.24%

Num dense layers:  5
Num dense nodes:  512
Activation:  relu
Optimizer:  nadam

Restoring model



Num dense layers:  5
Num dense nodes:  5
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00016: early stopping

ROC AUC: 76.56%





Num dense layers:  5
Num dense nodes:  5
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 52.78%

Num dense layers:  2
Num dense nodes:  512
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 72.40%

Num dense layers:  1
Num dense nodes:  511
Activation:  tanh
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 50.35%

Num dense layers:  5
Num dense nodes:  5
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00023: early stopping

ROC AUC: 71.70%





Num dense layers:  1
Num dense nodes:  5
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 61.98%

Num dense layers:  2
Num dense nodes:  512
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 50.87%





Num dense layers:  5
Num dense nodes:  5
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00021: early stopping

ROC AUC: 60.76%





Num dense layers:  1
Num dense nodes:  5
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 69.62%





Num dense layers:  1
Num dense nodes:  5
Activation:  tanh
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping

ROC AUC: 53.82%

Num dense layers:  5
Num dense nodes:  503
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 63.02%

Num dense layers:  5
Num dense nodes:  5
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 62.76%

Num dense layers:  3
Num dense nodes:  512
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 65.62%

Num dense layers:  1
Num dense nodes:  512
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 71.35%





Num dense layers:  1
Num dense nodes:  512
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 59.55%

Num dense layers:  1
Num dense nodes:  5
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping

ROC AUC: 72.22%





Num dense layers:  5
Num dense nodes:  5
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00026: early stopping

ROC AUC: 62.76%

Num dense layers:  5
Num dense nodes:  5
Activation:  tanh
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 51.74%

Num dense layers:  5
Num dense nodes:  506
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 63.37%

Num dense layers:  5
Num dense nodes:  7
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 70.31%

Num dense layers:  5
Num dense nodes:  8
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00016: early stopping

ROC AUC: 67.36%





Num dense layers:  5
Num dense nodes:  512
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 54.51%

Num dense layers:  1
Num dense nodes:  6
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00016: early stopping

ROC AUC: 63.37%

Num dense layers:  1
Num dense nodes:  8
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 54.86%

Num dense layers:  5
Num dense nodes:  10
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 62.67%

Num dense layers:  1
Num dense nodes:  512
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 82.99%

Num dense layers:  1
Num dense nodes:  510
Activation:  tanh
Optimizer:  nadam

Restoring model weig



Num dense layers:  5
Num dense nodes:  512
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 71.70%

Num dense layers:  1
Num dense nodes:  511
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 65.62%

Num dense layers:  1
Num dense nodes:  510
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 52.43%

Num dense layers:  5
Num dense nodes:  7
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00018: early stopping

ROC AUC: 76.39%

Num dense layers:  5
Num dense nodes:  511
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 55.03%





Num dense layers:  1
Num dense nodes:  510
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 60.07%

Num dense layers:  5
Num dense nodes:  10
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping

ROC AUC: 72.74%

Num dense layers:  1
Num dense nodes:  9
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 61.98%

Num dense layers:  5
Num dense nodes:  11
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 52.60%





Num dense layers:  5
Num dense nodes:  512
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 79.34%





Num dense layers:  5
Num dense nodes:  511
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00013: early stopping

ROC AUC: 69.79%

Num dense layers:  5
Num dense nodes:  510
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 42.88%

Num dense layers:  1
Num dense nodes:  5
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping

ROC AUC: 70.66%

Num dense layers:  5
Num dense nodes:  512
Activation:  relu
Optimizer:  adam

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 62.67%

Num dense layers:  2
Num dense nodes:  5
Activation:  tanh
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping

ROC AUC: 57.12%





Num dense layers:  1
Num dense nodes:  512
Activation:  relu
Optimizer:  rmsprop

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 73.26%





Num dense layers:  1
Num dense nodes:  512
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 65.97%

Num dense layers:  5
Num dense nodes:  511
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00011: early stopping

ROC AUC: 57.47%

Num dense layers:  1
Num dense nodes:  7
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 64.06%





Num dense layers:  1
Num dense nodes:  5
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00012: early stopping

ROC AUC: 69.10%

Num dense layers:  1
Num dense nodes:  9
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00014: early stopping

ROC AUC: 69.79%





Num dense layers:  1
Num dense nodes:  5
Activation:  tanh
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00016: early stopping

ROC AUC: 58.85%





Num dense layers:  1
Num dense nodes:  5
Activation:  relu
Optimizer:  nadam

Restoring model weights from the end of the best epoch
Epoch 00018: early stopping

ROC AUC: 60.59%



In [38]:
search_result.x

[1, 512, 'tanh', 'nadam']

In [39]:
search_result.fun

-0.8298611111111112

In [40]:
predict_model = load_model(path_best_model)

predictions = predict_model.predict(test_data)

In [41]:
ps = []
for i, value in enumerate(predictions):
    ps.append(value[0])

#predictions = np.argmax(predictions, axis = 1)

predictions = pd.Series(ps, name = "target")

submission = pd.concat([pd.Series(range(250, 20000), name = "id"), predictions], axis = 1)

submission.to_csv("dont-overfit-submission.csv", index = False)