## Earning predictions

The aim of project is to predict if a person will earn more than 50 000 USD using neural network. I've compared results from random forest and xgboost model with deep neural network.

In [2]:
import pandas as pd
import numpy as np
np.random.seed(2019)
from sklearn.model_selection import KFold, StratifiedKFold,GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import learning_curve, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from functools import partial
import os
import warnings
warnings.filterwarnings('ignore')

In [7]:
df = pd.read_hdf('train.adult.h5')
df['target'] = df['Target'].factorize()[0]
df=df.fillna(-1)
df=df.drop_duplicates()
df=df.reset_index()

In [8]:
def feat_eng(df):
    cat_feats = df.select_dtypes(include=[np.object]).columns
    for cat_feat in cat_feats:
         df['{0}_cat'.format(cat_feat)] = pd.factorize( df[cat_feat] )[0]              
    df['White']=df['Race'].apply(lambda x: 1 if x=='White' else 0)
    df['Black']=df['Race'].apply(lambda x: 1 if x=='Black' else 0)
    df['Other_race']=df['Race'].apply(lambda x: 1 if  (x!='White') & (x!='Black') else 0)
    df['Extra_hours']=df['Hours per week'].map(lambda x: 1 if x>40 else 0)
    df['Extra_hours_num']=df['Hours per week'].map(lambda x: x-40 if x>40 else 0)
    df['Husband']=df['Relationship'].apply(lambda x: 1 if x=='Husband' else 0)
    df['Married-civ-spouse']=df['Martial Status'].apply(lambda x: 1 if x=='Married-civ-spouse' else 0)
    df['Never-married']=df['Martial Status'].apply(lambda x: 1 if x=='Never-married' else 0)
    df['Country_us']=df['Country'].apply(lambda x: 1 if x =='United-States' else 0)
    df['Country_other']=df['Country'].apply(lambda x: 1 if (x!='United-States') else 0)
    df['Occ_white']=pd.factorize(df[['Occupation_cat','White']].apply(lambda x: '{0}-{1}'.format(x['Occupation_cat'], x['White']),axis=1))[0]
    df['Occ_other']=pd.factorize(df[['Occupation_cat','Other_race']].apply(lambda x: '{0}-{1}'.format(x['Occupation_cat'], x['Other_race']),axis=1))[0]   
    df['Productive_age']=df['Age'].apply(lambda x: 1 if (x >=24) & (x<=70) else 0)
    df['Master_bachelor']=df['Education'].apply(lambda x: 1 if (x =='Bachelors') | (x=='Masters') else 0)
    df['Doctor_prof']=df['Education'].apply(lambda x: 1 if (x =='Prof-school') | (x=='Doctorate') else 0)
    df['White_husband']=df[['Relationship','Race']].apply(lambda x: 1 if (x['Relationship']=='Husband') & (x['Race']=='White' ) else 0,axis=1)
    df['Black_husband']=df[['Relationship','Race']].apply(lambda x: 1 if (x['Relationship']=='Husband') & (x['Race']=='Black' ) else 0,axis=1)
    df['Occ_sex']=pd.factorize(df[['Occupation_cat','Sex']].apply(lambda x: '{0}-{1}'.format(x['Occupation_cat'], x['Sex']),axis=1))[0]
    df['Occ_rel_sex']=pd.factorize(df[['Occupation_cat','Relationship_cat','Sex']].apply(lambda x: '{0}-{1}-{2}'.format(x['Occupation_cat'], x['Relationship_cat'],x['Sex']),axis=1))[0]
    df['Married_productive']=pd.factorize(df[['Married-civ-spouse','Productive_age']].apply(lambda x: '{0}-{1}'.format(x['Married-civ-spouse'], x['Productive_age']),axis=1))[0]
    df['Occ_martial']=pd.factorize(df[['Occupation_cat','Martial Status']].apply(lambda x: '{0}-{1}'.format(x['Occupation_cat'], x['Martial Status']),axis=1))[0]
    df['Educ_martial']=df[['Education','Martial Status']].apply(lambda x: 1 if (((x['Education'] =='Assoc-voc') | (x['Education'] =='Bachelors')) & (x['Martial Status']=='Married-AF-spouse') ) else 0,axis=1)   
    df['fnlwgt_log']=np.log2(df['fnlwgt']+1)    
    return df

In [9]:
df_full=feat_eng(df)

### Neural network

Neural network has been build in order to compare result with previous models. Data has been transformed and normalized to fulfill requirements of neural network.

In [10]:
feats = ['Age', 'Education-Num', 'Extra_hours',
        'Husband','Married-civ-spouse','Never-married',
        'Occupation_cat','Occ_white','Occ_other',
        'Master_bachelor','Doctor_prof','Productive_age',
        'Capital Gain', 'Capital Loss','Relationship_cat',
        'White_husband','Black_husband',
        'Sex','White','Black','Other_race',
        'Country_us','Country_other','fnlwgt_log',
        'Occ_sex','Occ_rel_sex','Married_productive',
        'Occ_martial','Martial Status_cat', 'Educ_martial']
df_all = df_full[feats]
y_all = df_full['target']

Categorical features for one-hot encoding:

In [11]:
features=[ 'Education-Num', 'Husband', 'Married-civ-spouse',
       'Never-married', 'Occupation_cat', 'Occ_white', 'Occ_other',
       'Master_bachelor', 'Doctor_prof', 'Productive_age', 'Relationship_cat', 'White_husband', 'Black_husband',
       'Sex', 'White', 'Black', 'Other_race', 'Country_us', 'Country_other',
        'Occ_sex', 'Occ_rel_sex', 'Married_productive',
       'Occ_martial', 'Martial Status_cat', 'Educ_martial']

In [12]:
for feat in features:
    df_all[feat]=df_all[feat].astype('object')
cat=df_all.select_dtypes(include=['object']).columns
num=df_all.select_dtypes(exclude=['object']).columns

In [13]:
df_all[num]=StandardScaler().fit_transform(df_all[num])
for feat in cat:
    df_all=pd.concat([df_all, pd.get_dummies(df_all[feat].astype('category'))], axis=1)

In [14]:
for f in cat:
    del df_all[f]

In [15]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32537 entries, 0 to 32536
Columns: 413 entries, Age to 1
dtypes: float64(5), uint8(408)
memory usage: 13.9 MB


Dataframe has been split into train, test and valid set:

In [16]:
X_train, X_t, y_train, y_t = train_test_split(df_all, y_all, test_size=0.3, random_state=2018) 

X_train=X_train.reset_index()
y_train=y_train.reset_index()
X_t=X_t.reset_index()
y_t=y_t.reset_index()

del X_train['index']
del X_t['index']
del y_train['index']
del y_t['index']

X_test, X_valid, y_test, y_valid = train_test_split(X_t, y_t, test_size=0.5, random_state=2018) 

X_test=X_test.reset_index()
y_test=y_test.reset_index()
X_valid=X_valid.reset_index()
y_valid=y_valid.reset_index()

del X_valid['index']
del X_test['index']
del y_test['index']
del y_valid['index']

print(X_train.shape, ' ', y_train.shape)
print(X_valid.shape, ' ', y_valid.shape)
print(X_test.shape, ' ', y_test.shape)

(22775, 413)   (22775, 1)
(4881, 413)   (4881, 1)
(4881, 413)   (4881, 1)


### Deep neural network models

In [22]:
tf.reset_default_graph()

In [23]:
n_inputs = X_train.shape[1]
n_hidden1 = 300
n_hidden2 = 200
n_hidden3 = 100
n_outputs = 2

In [24]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y") 

In [25]:
with tf.name_scope("gsn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    hidden1 = tf.layers.dense(X, n_hidden1, name="h1",
                              activation=tf.nn.elu, kernel_initializer=he_init)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="h2",
                              activation=tf.nn.elu, kernel_initializer=he_init)
    hidden3 = tf.layers.dense(hidden2, n_hidden3, name="h3",
                              activation=tf.nn.elu, kernel_initializer=he_init)
    logits = tf.layers.dense(hidden3, n_outputs, name="out")

In [26]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)

In [27]:
learning_rate = 0.01

with tf.name_scope("learn"):
    #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9, use_nesterov=True)
    training_op = optimizer.minimize(loss)

In [28]:
with tf.name_scope("estimation"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [29]:
from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_board"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

In [30]:
logdir = log_dir("earning_gsn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [31]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [32]:
def next_batch(x_= X_train.values, y_= y_train.values.ravel(),batch_size=300, batch_index=2, epocha=1):
    tf.set_random_seed(epoch*len(x_)//batch_size + batch_index)
    np.random.seed(epocha*len(x_)//batch_size + batch_index)
    indices = np.random.randint(len(x_), size=batch_size) 
    np.random.shuffle(indices)
    return x_[indices],y_[indices]

In [35]:
n_epochs = 50
n_batches = 100
batch_s=X_train.shape[0] // n_batches

checkpoint_path = "/tmp/my_gsn_earning.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_gsn_earning"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 10

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Learn interrupted. Back to epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)
        
    for epoch in range(start_epoch, n_epochs):
        for iteration in range(n_batches ):
            X_batch, y_batch = next_batch(batch_index=iteration,epocha=epoch,batch_size=batch_s)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary],
                                                                                  feed_dict={X: X_valid.values, y: y_valid.values.ravel()})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 2 == 0:
            print("Epoch:", epoch,
                  "\tValidation: {:.3f}%".format(accuracy_val * 100),"\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 2
                if epochs_without_progress > max_epochs_without_progress:
                    print("Early stop")
                    break

Epoch: 0 	Validation: 84.696% 	Loss: 0.33120
Epoch: 2 	Validation: 84.860% 	Loss: 0.32741
Epoch: 4 	Validation: 84.204% 	Loss: 0.32549
Epoch: 6 	Validation: 85.085% 	Loss: 0.31937
Epoch: 8 	Validation: 85.187% 	Loss: 0.31897
Epoch: 10 	Validation: 85.515% 	Loss: 0.31955
Epoch: 12 	Validation: 85.208% 	Loss: 0.31484
Epoch: 14 	Validation: 85.065% 	Loss: 0.31884
Epoch: 16 	Validation: 84.942% 	Loss: 0.31810
Epoch: 18 	Validation: 85.597% 	Loss: 0.31434
Epoch: 20 	Validation: 85.392% 	Loss: 0.31575
Epoch: 22 	Validation: 85.413% 	Loss: 0.31795
Epoch: 24 	Validation: 85.392% 	Loss: 0.31588
Early stop


In [36]:
os.remove(checkpoint_epoch_path)
with tf.Session() as sess:
    saver.restore(sess, final_model_path)
    accuracy_val = accuracy.eval(feed_dict={X: X_test.values, y: y_test.values.ravel()})
print(accuracy_val)

INFO:tensorflow:Restoring parameters from ./my_gsn_earning
0.85576725


Results are comparable, however a little bit worse than using RandomForestClassifier (about 86% of accuracy) and XGBClassifier (about 87% of accuracy). 

### Dropout

In [38]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y") 

logdir = log_dir("earning_gsn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

training = tf.placeholder_with_default(False, shape=(), name='learn')

dropout_rate = 0.6 
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

with tf.name_scope("gsn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    hidden1 = tf.layers.dense(X, n_hidden1, name="h1",
                              activation=tf.nn.elu,kernel_initializer=he_init)
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)

    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, name="h2",
                              activation=tf.nn.elu,kernel_initializer=he_init)
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)

    hidden3 = tf.layers.dense(hidden2_drop, n_hidden3, name="h3",
                              activation=tf.nn.elu,kernel_initializer=he_init)
    logits = tf.layers.dense(hidden3, n_outputs, name="output")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
    
learning_rate = 0.01

with tf.name_scope("learn"):
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                       momentum=0.9, use_nesterov=True)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("validation"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [39]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 50
n_batches = 100
batch_s=X_train.shape[0] // n_batches

checkpoint_path = "/tmp/my_gsn_earning.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_gsn_earning"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 10

with tf.Session() as sess:  
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Learn interrupted. Back to epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)        
    for epoch in range(start_epoch, n_epochs):
        for iteration in range(n_batches):
            X_batch, y_batch = next_batch(batch_index=iteration,epocha=epoch,batch_size=batch_s)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary],
                                                                                  feed_dict={X: X_valid.values, y: y_valid.values.ravel()})

        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 2 == 0:
            print("Epoch:", epoch,
                  "\tValidation: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 2
                if epochs_without_progress > max_epochs_without_progress:
                    print("Early stop")
                    break

Epoch: 0 	Validation: 84.532% 	Loss: 0.33293
Epoch: 2 	Validation: 84.778% 	Loss: 0.32966
Epoch: 4 	Validation: 84.163% 	Loss: 0.32873
Epoch: 6 	Validation: 85.003% 	Loss: 0.32100
Epoch: 8 	Validation: 85.208% 	Loss: 0.31999
Epoch: 10 	Validation: 85.290% 	Loss: 0.32120
Epoch: 12 	Validation: 85.474% 	Loss: 0.31580
Epoch: 14 	Validation: 85.310% 	Loss: 0.32002
Epoch: 16 	Validation: 84.860% 	Loss: 0.31989
Epoch: 18 	Validation: 85.720% 	Loss: 0.31539
Epoch: 20 	Validation: 85.351% 	Loss: 0.31734
Epoch: 22 	Validation: 85.146% 	Loss: 0.31961
Epoch: 24 	Validation: 85.433% 	Loss: 0.31667
Early stop


In [40]:
os.remove(checkpoint_epoch_path)
with tf.Session() as sess:
    saver.restore(sess, final_model_path)
    accuracy_val = accuracy.eval(feed_dict={X: X_test.values, y: y_test.values.ravel()})
accuracy_val

INFO:tensorflow:Restoring parameters from ./my_gsn_earning


0.8543331

Results with dropout are not better than using initial model.

### Batch normalization

In [42]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y") 
batch_norm_momentum = 0.9

logdir = log_dir("earning_gsn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

training = tf.placeholder_with_default(False, shape=(), name='learn')

with tf.name_scope("gsn"):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    my_batch_norm_layer = partial(tf.layers.batch_normalization,
            training=training,momentum=batch_norm_momentum)
    my_dense_layer = partial(tf.layers.dense,kernel_initializer=he_init)

    hidden1 = my_dense_layer(X, n_hidden1, name="h1")
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name="h2")
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    hidden3 = my_dense_layer(bn2, n_hidden3, name="h3")
    bn3 = tf.nn.elu(my_batch_norm_layer(hidden3))
    logits_before_bn = my_dense_layer(bn3, n_outputs, name="output")
    logits = my_batch_norm_layer(logits_before_bn)    
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
    
learning_rate = 0.01

with tf.name_scope("learn"):
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("validation"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

In [43]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 50
n_batches = 100
batch_s=X_train.shape[0] // n_batches

checkpoint_path = "/tmp/my_gsn_earning.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_gsn_earning"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 10

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:    
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Learn interrupted. Back to epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)
    for epoch in range(start_epoch, n_epochs):
        for iteration in range(n_batches):
            X_batch, y_batch = next_batch(batch_index=iteration,epocha=epoch,batch_size=batch_s)
            sess.run([training_op, extra_update_ops], feed_dict={training: True,X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], 
                                                                                  feed_dict={X: X_valid.values, y: y_valid.values.ravel()})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 2 == 0:
            print("Epoch:", epoch,
                  "\tValidation: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 2
                if epochs_without_progress > max_epochs_without_progress:
                    print("Early stop")
                    break

Epoch: 0 	Validation: 84.798% 	Loss: 0.33731
Epoch: 2 	Validation: 84.737% 	Loss: 0.33159
Epoch: 4 	Validation: 84.634% 	Loss: 0.32991
Epoch: 6 	Validation: 84.675% 	Loss: 0.32647
Epoch: 8 	Validation: 84.942% 	Loss: 0.32500
Epoch: 10 	Validation: 85.065% 	Loss: 0.32630
Epoch: 12 	Validation: 85.187% 	Loss: 0.32355
Epoch: 14 	Validation: 84.962% 	Loss: 0.32935
Epoch: 16 	Validation: 85.310% 	Loss: 0.32451
Epoch: 18 	Validation: 84.839% 	Loss: 0.32873
Epoch: 20 	Validation: 84.614% 	Loss: 0.33626
Epoch: 22 	Validation: 84.614% 	Loss: 0.33357
Early stop


In [44]:
os.remove(checkpoint_epoch_path)
with tf.Session() as sess:
    saver.restore(sess, final_model_path)
    accuracy_val = accuracy.eval(feed_dict={X: X_test.values, y: y_test.values.ravel()})
accuracy_val

INFO:tensorflow:Restoring parameters from ./my_gsn_earning


0.8531039

Batch normalization has no significant impact on result in this case. 

### Hyperparameters optimalization

In [46]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=3, n_neurons=[300,200,100], momentum=0.9,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None
        self.momentum=momentum
        
    def _dnn(self, inputs):
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons[layer],
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int64, shape=(None), name="y")

        if self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='learn')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_prop")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate,
                                       momentum=self.momentum, use_nesterov=True) 
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="acc")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """get params for early stopp"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        self.close_session()
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        self.class_to_index_ = {label: index for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label] for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)

        max_checks_without_progress = 10
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)

                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid, self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tLoss function for validation set: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stop!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLoss function of last mini batch: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("Record %s not fitted" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [51]:
#dnn_clf = DNNClassifier(random_state=2019)
#dnn_clf.fit(X_train.values, y_train.values.ravel(), n_epochs=1000, X_valid=X_valid.values, y_valid=y_valid.values.ravel())

In [52]:
#from sklearn.metrics import accuracy_score
#y_pred = dnn_clf.predict(X_test.values)
#accuracy_score(y_test, y_pred)

In [47]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "batch_size": [200, 250, 227],
    "learning_rate": [0.01, 0.02],
    "momentum": [0.85,0.9, 0.95]}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=2019), param_distribs, n_iter=20, random_state=2019, verbose=2,
                                fit_params={"X_valid": X_valid.values, "y_valid": y_valid.values.ravel(), "n_epochs": 1000})
rnd_search.fit(X_train.values, y_train.values.ravel())

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] momentum=0.85, learning_rate=0.01, batch_size=200 ...............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0	Loss function for validation set: 0.335371	Best loss: 0.335371	Accuracy: 84.39%
1	Loss function for validation set: 0.330413	Best loss: 0.330413	Accuracy: 84.45%
2	Loss function for validation set: 0.328130	Best loss: 0.328130	Accuracy: 84.76%
3	Loss function for validation set: 0.328469	Best loss: 0.328130	Accuracy: 84.51%
4	Loss function for validation set: 0.324100	Best loss: 0.324100	Accuracy: 84.63%
5	Loss function for validation set: 0.328080	Best loss: 0.324100	Accuracy: 84.55%
6	Loss function for validation set: 0.322653	Best loss: 0.322653	Accuracy: 84.68%
7	Loss function for validation set: 0.323508	Best loss: 0.322653	Accuracy: 84.78%
8	Loss function for validation set: 0.327235	Best loss: 0.322653	Accuracy: 84.53%
9	Loss function for validation set: 0.323507	Best loss: 0.322653	Accuracy: 85.00%
10	Loss function for validation set: 0.324731	Best loss: 0.322653	Accuracy: 84.43%
11	Loss function for validation set: 0.321610	Best loss: 0.321610	Accuracy: 85.06%
12	Loss functi

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   46.8s remaining:    0.0s


0	Loss function for validation set: 0.333067	Best loss: 0.333067	Accuracy: 84.31%
1	Loss function for validation set: 0.330940	Best loss: 0.330940	Accuracy: 84.65%
2	Loss function for validation set: 0.331170	Best loss: 0.330940	Accuracy: 84.51%
3	Loss function for validation set: 0.326162	Best loss: 0.326162	Accuracy: 84.98%
4	Loss function for validation set: 0.327089	Best loss: 0.326162	Accuracy: 84.14%
5	Loss function for validation set: 0.324087	Best loss: 0.324087	Accuracy: 84.76%
6	Loss function for validation set: 0.324094	Best loss: 0.324087	Accuracy: 84.70%
7	Loss function for validation set: 0.322281	Best loss: 0.322281	Accuracy: 84.80%
8	Loss function for validation set: 0.322366	Best loss: 0.322281	Accuracy: 84.98%
9	Loss function for validation set: 0.323491	Best loss: 0.322281	Accuracy: 84.84%
10	Loss function for validation set: 0.321545	Best loss: 0.321545	Accuracy: 84.98%
11	Loss function for validation set: 0.321669	Best loss: 0.321545	Accuracy: 85.13%
12	Loss functi

1	Loss function for validation set: 0.329623	Best loss: 0.329623	Accuracy: 84.57%
2	Loss function for validation set: 0.327610	Best loss: 0.327610	Accuracy: 84.63%
3	Loss function for validation set: 0.326555	Best loss: 0.326555	Accuracy: 84.39%
4	Loss function for validation set: 0.323763	Best loss: 0.323763	Accuracy: 84.61%
5	Loss function for validation set: 0.326908	Best loss: 0.323763	Accuracy: 84.68%
6	Loss function for validation set: 0.321309	Best loss: 0.321309	Accuracy: 84.68%
7	Loss function for validation set: 0.323913	Best loss: 0.321309	Accuracy: 84.96%
8	Loss function for validation set: 0.325640	Best loss: 0.321309	Accuracy: 84.53%
9	Loss function for validation set: 0.322102	Best loss: 0.321309	Accuracy: 85.06%
10	Loss function for validation set: 0.321683	Best loss: 0.321309	Accuracy: 84.80%
11	Loss function for validation set: 0.320084	Best loss: 0.320084	Accuracy: 85.29%
12	Loss function for validation set: 0.326185	Best loss: 0.320084	Accuracy: 84.92%
13	Loss funct

18	Loss function for validation set: 0.318939	Best loss: 0.314938	Accuracy: 84.78%
19	Loss function for validation set: 0.317040	Best loss: 0.314938	Accuracy: 85.25%
20	Loss function for validation set: 0.318404	Best loss: 0.314938	Accuracy: 85.11%
21	Loss function for validation set: 0.320852	Best loss: 0.314938	Accuracy: 85.09%
22	Loss function for validation set: 0.315479	Best loss: 0.314938	Accuracy: 85.52%
23	Loss function for validation set: 0.316621	Best loss: 0.314938	Accuracy: 85.64%
24	Loss function for validation set: 0.317616	Best loss: 0.314938	Accuracy: 85.21%
25	Loss function for validation set: 0.315084	Best loss: 0.314938	Accuracy: 85.62%
Early stop!
[CV] . momentum=0.9, learning_rate=0.01, batch_size=200, total=  31.9s
[CV] momentum=0.95, learning_rate=0.01, batch_size=200 ...............
0	Loss function for validation set: 0.333545	Best loss: 0.333545	Accuracy: 84.35%
1	Loss function for validation set: 0.329116	Best loss: 0.329116	Accuracy: 84.55%
2	Loss function fo

8	Loss function for validation set: 0.324812	Best loss: 0.319405	Accuracy: 84.92%
9	Loss function for validation set: 0.318849	Best loss: 0.318849	Accuracy: 85.17%
10	Loss function for validation set: 0.320216	Best loss: 0.318849	Accuracy: 84.96%
11	Loss function for validation set: 0.317719	Best loss: 0.317719	Accuracy: 85.41%
12	Loss function for validation set: 0.325368	Best loss: 0.317719	Accuracy: 84.82%
13	Loss function for validation set: 0.318005	Best loss: 0.317719	Accuracy: 84.92%
14	Loss function for validation set: 0.316486	Best loss: 0.316486	Accuracy: 85.33%
15	Loss function for validation set: 0.317469	Best loss: 0.316486	Accuracy: 85.29%
16	Loss function for validation set: 0.316501	Best loss: 0.316486	Accuracy: 85.35%
17	Loss function for validation set: 0.318259	Best loss: 0.316486	Accuracy: 85.35%
18	Loss function for validation set: 0.333414	Best loss: 0.316486	Accuracy: 84.39%
19	Loss function for validation set: 0.318211	Best loss: 0.316486	Accuracy: 85.54%
20	Los

10	Loss function for validation set: 0.319206	Best loss: 0.316387	Accuracy: 85.04%
11	Loss function for validation set: 0.318370	Best loss: 0.316387	Accuracy: 85.47%
12	Loss function for validation set: 0.325024	Best loss: 0.316387	Accuracy: 84.65%
13	Loss function for validation set: 0.318188	Best loss: 0.316387	Accuracy: 85.23%
14	Loss function for validation set: 0.316593	Best loss: 0.316387	Accuracy: 85.41%
15	Loss function for validation set: 0.316760	Best loss: 0.316387	Accuracy: 85.04%
16	Loss function for validation set: 0.317603	Best loss: 0.316387	Accuracy: 85.52%
17	Loss function for validation set: 0.320112	Best loss: 0.316387	Accuracy: 85.31%
Early stop!
[CV] . momentum=0.9, learning_rate=0.02, batch_size=200, total=  24.0s
[CV] momentum=0.9, learning_rate=0.02, batch_size=200 ................
0	Loss function for validation set: 0.328803	Best loss: 0.328803	Accuracy: 84.61%
1	Loss function for validation set: 0.329885	Best loss: 0.328803	Accuracy: 84.55%
2	Loss function fo

17	Loss function for validation set: 0.326458	Best loss: 0.315420	Accuracy: 85.45%
18	Loss function for validation set: 0.329908	Best loss: 0.315420	Accuracy: 85.60%
19	Loss function for validation set: 0.327394	Best loss: 0.315420	Accuracy: 85.11%
Early stop!
[CV]  momentum=0.95, learning_rate=0.02, batch_size=200, total=  23.9s
[CV] momentum=0.95, learning_rate=0.02, batch_size=200 ...............
0	Loss function for validation set: 0.329850	Best loss: 0.329850	Accuracy: 84.74%
1	Loss function for validation set: 0.323843	Best loss: 0.323843	Accuracy: 84.88%
2	Loss function for validation set: 0.324767	Best loss: 0.323843	Accuracy: 84.76%
3	Loss function for validation set: 0.323266	Best loss: 0.323266	Accuracy: 85.19%
4	Loss function for validation set: 0.320416	Best loss: 0.320416	Accuracy: 85.02%
5	Loss function for validation set: 0.318019	Best loss: 0.318019	Accuracy: 85.09%
6	Loss function for validation set: 0.315413	Best loss: 0.315413	Accuracy: 85.39%
7	Loss function for val

33	Loss function for validation set: 0.318130	Best loss: 0.317155	Accuracy: 85.56%
34	Loss function for validation set: 0.320171	Best loss: 0.317155	Accuracy: 85.15%
35	Loss function for validation set: 0.318097	Best loss: 0.317155	Accuracy: 85.39%
36	Loss function for validation set: 0.320102	Best loss: 0.317155	Accuracy: 85.37%
37	Loss function for validation set: 0.317002	Best loss: 0.317002	Accuracy: 85.41%
38	Loss function for validation set: 0.318398	Best loss: 0.317002	Accuracy: 85.37%
39	Loss function for validation set: 0.322637	Best loss: 0.317002	Accuracy: 85.47%
40	Loss function for validation set: 0.320122	Best loss: 0.317002	Accuracy: 85.33%
41	Loss function for validation set: 0.318401	Best loss: 0.317002	Accuracy: 85.58%
42	Loss function for validation set: 0.322800	Best loss: 0.317002	Accuracy: 84.70%
43	Loss function for validation set: 0.317484	Best loss: 0.317002	Accuracy: 85.43%
44	Loss function for validation set: 0.319925	Best loss: 0.317002	Accuracy: 85.23%
45	L

34	Loss function for validation set: 0.319423	Best loss: 0.315216	Accuracy: 85.37%
35	Loss function for validation set: 0.320190	Best loss: 0.315216	Accuracy: 85.27%
36	Loss function for validation set: 0.321201	Best loss: 0.315216	Accuracy: 85.09%
37	Loss function for validation set: 0.320361	Best loss: 0.315216	Accuracy: 85.39%
38	Loss function for validation set: 0.318764	Best loss: 0.315216	Accuracy: 85.23%
Early stop!
[CV] . momentum=0.9, learning_rate=0.01, batch_size=250, total=  41.6s
[CV] momentum=0.9, learning_rate=0.01, batch_size=250 ................
0	Loss function for validation set: 0.334036	Best loss: 0.334036	Accuracy: 84.25%
1	Loss function for validation set: 0.329933	Best loss: 0.329933	Accuracy: 84.61%
2	Loss function for validation set: 0.331244	Best loss: 0.329933	Accuracy: 84.43%
3	Loss function for validation set: 0.327235	Best loss: 0.327235	Accuracy: 85.00%
4	Loss function for validation set: 0.329191	Best loss: 0.327235	Accuracy: 84.68%
5	Loss function for v

12	Loss function for validation set: 0.324914	Best loss: 0.316620	Accuracy: 84.59%
13	Loss function for validation set: 0.317679	Best loss: 0.316620	Accuracy: 85.33%
14	Loss function for validation set: 0.321247	Best loss: 0.316620	Accuracy: 85.19%
15	Loss function for validation set: 0.318271	Best loss: 0.316620	Accuracy: 85.31%
16	Loss function for validation set: 0.316833	Best loss: 0.316620	Accuracy: 85.33%
17	Loss function for validation set: 0.318598	Best loss: 0.316620	Accuracy: 85.29%
18	Loss function for validation set: 0.319256	Best loss: 0.316620	Accuracy: 85.13%
19	Loss function for validation set: 0.317514	Best loss: 0.316620	Accuracy: 85.25%
20	Loss function for validation set: 0.325734	Best loss: 0.316620	Accuracy: 84.98%
21	Loss function for validation set: 0.326665	Best loss: 0.316620	Accuracy: 85.13%
22	Loss function for validation set: 0.318325	Best loss: 0.316620	Accuracy: 85.37%
Early stop!
[CV]  momentum=0.95, learning_rate=0.01, batch_size=250, total=  25.1s
[CV]

34	Loss function for validation set: 0.317710	Best loss: 0.314739	Accuracy: 85.47%
35	Loss function for validation set: 0.319531	Best loss: 0.314739	Accuracy: 85.17%
36	Loss function for validation set: 0.324054	Best loss: 0.314739	Accuracy: 84.82%
37	Loss function for validation set: 0.319487	Best loss: 0.314739	Accuracy: 85.72%
38	Loss function for validation set: 0.321828	Best loss: 0.314739	Accuracy: 85.19%
Early stop!
[CV]  momentum=0.85, learning_rate=0.02, batch_size=250, total=  46.2s
[CV] momentum=0.85, learning_rate=0.02, batch_size=250 ...............
0	Loss function for validation set: 0.330642	Best loss: 0.330642	Accuracy: 84.51%
1	Loss function for validation set: 0.329642	Best loss: 0.329642	Accuracy: 84.59%
2	Loss function for validation set: 0.332982	Best loss: 0.329642	Accuracy: 84.45%
3	Loss function for validation set: 0.331278	Best loss: 0.329642	Accuracy: 84.63%
4	Loss function for validation set: 0.327767	Best loss: 0.327767	Accuracy: 84.63%
5	Loss function for v

31	Loss function for validation set: 0.321882	Best loss: 0.315942	Accuracy: 85.17%
32	Loss function for validation set: 0.320281	Best loss: 0.315942	Accuracy: 85.72%
33	Loss function for validation set: 0.325978	Best loss: 0.315942	Accuracy: 85.29%
34	Loss function for validation set: 0.322382	Best loss: 0.315942	Accuracy: 85.15%
35	Loss function for validation set: 0.322779	Best loss: 0.315942	Accuracy: 85.17%
36	Loss function for validation set: 0.328030	Best loss: 0.315942	Accuracy: 84.70%
37	Loss function for validation set: 0.323674	Best loss: 0.315942	Accuracy: 85.45%
38	Loss function for validation set: 0.322888	Best loss: 0.315942	Accuracy: 85.21%
Early stop!
[CV] . momentum=0.9, learning_rate=0.02, batch_size=250, total=  42.3s
[CV] momentum=0.9, learning_rate=0.02, batch_size=250 ................
0	Loss function for validation set: 0.329874	Best loss: 0.329874	Accuracy: 84.61%
1	Loss function for validation set: 0.330039	Best loss: 0.329874	Accuracy: 84.68%
2	Loss function fo

5	Loss function for validation set: 0.317337	Best loss: 0.317337	Accuracy: 85.21%
6	Loss function for validation set: 0.320828	Best loss: 0.317337	Accuracy: 85.33%
7	Loss function for validation set: 0.317666	Best loss: 0.317337	Accuracy: 85.29%
8	Loss function for validation set: 0.315980	Best loss: 0.315980	Accuracy: 85.39%
9	Loss function for validation set: 0.320360	Best loss: 0.315980	Accuracy: 85.39%
10	Loss function for validation set: 0.319472	Best loss: 0.315980	Accuracy: 85.19%
11	Loss function for validation set: 0.319000	Best loss: 0.315980	Accuracy: 85.49%
12	Loss function for validation set: 0.319808	Best loss: 0.315980	Accuracy: 85.21%
13	Loss function for validation set: 0.319959	Best loss: 0.315980	Accuracy: 85.74%
14	Loss function for validation set: 0.317885	Best loss: 0.315980	Accuracy: 85.25%
15	Loss function for validation set: 0.321353	Best loss: 0.315980	Accuracy: 85.70%
16	Loss function for validation set: 0.323422	Best loss: 0.315980	Accuracy: 85.02%
17	Loss f

14	Loss function for validation set: 0.320100	Best loss: 0.320100	Accuracy: 84.94%
15	Loss function for validation set: 0.320760	Best loss: 0.320100	Accuracy: 85.04%
16	Loss function for validation set: 0.321908	Best loss: 0.320100	Accuracy: 85.19%
17	Loss function for validation set: 0.320819	Best loss: 0.320100	Accuracy: 85.17%
18	Loss function for validation set: 0.319567	Best loss: 0.319567	Accuracy: 85.13%
19	Loss function for validation set: 0.320647	Best loss: 0.319567	Accuracy: 85.33%
20	Loss function for validation set: 0.318123	Best loss: 0.318123	Accuracy: 85.27%
21	Loss function for validation set: 0.325669	Best loss: 0.318123	Accuracy: 85.06%
22	Loss function for validation set: 0.318395	Best loss: 0.318123	Accuracy: 85.25%
23	Loss function for validation set: 0.318219	Best loss: 0.318123	Accuracy: 85.47%
24	Loss function for validation set: 0.319153	Best loss: 0.318123	Accuracy: 85.37%
25	Loss function for validation set: 0.319508	Best loss: 0.318123	Accuracy: 85.09%
26	L

15	Loss function for validation set: 0.318467	Best loss: 0.317544	Accuracy: 85.33%
16	Loss function for validation set: 0.316900	Best loss: 0.316900	Accuracy: 85.43%
17	Loss function for validation set: 0.319947	Best loss: 0.316900	Accuracy: 85.33%
18	Loss function for validation set: 0.330414	Best loss: 0.316900	Accuracy: 84.39%
19	Loss function for validation set: 0.319612	Best loss: 0.316900	Accuracy: 85.31%
20	Loss function for validation set: 0.319338	Best loss: 0.316900	Accuracy: 85.35%
21	Loss function for validation set: 0.325059	Best loss: 0.316900	Accuracy: 85.15%
22	Loss function for validation set: 0.316517	Best loss: 0.316517	Accuracy: 85.47%
23	Loss function for validation set: 0.316521	Best loss: 0.316517	Accuracy: 85.66%
24	Loss function for validation set: 0.317869	Best loss: 0.316517	Accuracy: 85.41%
25	Loss function for validation set: 0.317176	Best loss: 0.316517	Accuracy: 85.37%
26	Loss function for validation set: 0.316931	Best loss: 0.316517	Accuracy: 85.52%
27	L

32	Loss function for validation set: 0.315976	Best loss: 0.314955	Accuracy: 85.47%
33	Loss function for validation set: 0.318371	Best loss: 0.314955	Accuracy: 85.21%
34	Loss function for validation set: 0.315270	Best loss: 0.314955	Accuracy: 85.49%
35	Loss function for validation set: 0.319219	Best loss: 0.314955	Accuracy: 85.21%
36	Loss function for validation set: 0.316947	Best loss: 0.314955	Accuracy: 85.60%
Early stop!
[CV] . momentum=0.9, learning_rate=0.01, batch_size=227, total=  41.9s
[CV] momentum=0.95, learning_rate=0.01, batch_size=227 ...............
0	Loss function for validation set: 0.334938	Best loss: 0.334938	Accuracy: 84.39%
1	Loss function for validation set: 0.329861	Best loss: 0.329861	Accuracy: 84.59%
2	Loss function for validation set: 0.325456	Best loss: 0.325456	Accuracy: 84.80%
3	Loss function for validation set: 0.325771	Best loss: 0.325456	Accuracy: 84.74%
4	Loss function for validation set: 0.321797	Best loss: 0.321797	Accuracy: 85.09%
5	Loss function for v

15	Loss function for validation set: 0.318102	Best loss: 0.316957	Accuracy: 85.25%
16	Loss function for validation set: 0.316481	Best loss: 0.316481	Accuracy: 85.52%
17	Loss function for validation set: 0.319634	Best loss: 0.316481	Accuracy: 85.33%
18	Loss function for validation set: 0.333708	Best loss: 0.316481	Accuracy: 84.29%
19	Loss function for validation set: 0.318515	Best loss: 0.316481	Accuracy: 85.37%
20	Loss function for validation set: 0.316859	Best loss: 0.316481	Accuracy: 85.58%
21	Loss function for validation set: 0.323616	Best loss: 0.316481	Accuracy: 85.37%
22	Loss function for validation set: 0.315796	Best loss: 0.315796	Accuracy: 85.60%
23	Loss function for validation set: 0.318835	Best loss: 0.315796	Accuracy: 85.33%
24	Loss function for validation set: 0.317692	Best loss: 0.315796	Accuracy: 85.37%
25	Loss function for validation set: 0.320645	Best loss: 0.315796	Accuracy: 85.31%
26	Loss function for validation set: 0.316467	Best loss: 0.315796	Accuracy: 85.45%
27	L

10	Loss function for validation set: 0.320048	Best loss: 0.317567	Accuracy: 84.96%
11	Loss function for validation set: 0.318259	Best loss: 0.317567	Accuracy: 85.47%
12	Loss function for validation set: 0.325417	Best loss: 0.317567	Accuracy: 84.72%
13	Loss function for validation set: 0.317842	Best loss: 0.317567	Accuracy: 85.02%
14	Loss function for validation set: 0.316634	Best loss: 0.316634	Accuracy: 85.58%
15	Loss function for validation set: 0.317018	Best loss: 0.316634	Accuracy: 85.25%
16	Loss function for validation set: 0.316731	Best loss: 0.316634	Accuracy: 85.41%
17	Loss function for validation set: 0.320548	Best loss: 0.316634	Accuracy: 85.23%
18	Loss function for validation set: 0.333976	Best loss: 0.316634	Accuracy: 84.55%
19	Loss function for validation set: 0.319659	Best loss: 0.316634	Accuracy: 85.33%
20	Loss function for validation set: 0.316924	Best loss: 0.316634	Accuracy: 85.60%
21	Loss function for validation set: 0.326840	Best loss: 0.316634	Accuracy: 85.31%
22	L

21	Loss function for validation set: 0.335685	Best loss: 0.317769	Accuracy: 85.15%
22	Loss function for validation set: 0.323369	Best loss: 0.317769	Accuracy: 85.04%
23	Loss function for validation set: 0.330692	Best loss: 0.317769	Accuracy: 85.00%
24	Loss function for validation set: 0.325587	Best loss: 0.317769	Accuracy: 85.23%
25	Loss function for validation set: 0.326703	Best loss: 0.317769	Accuracy: 85.52%
26	Loss function for validation set: 0.326140	Best loss: 0.317769	Accuracy: 85.02%
Early stop!
[CV]  momentum=0.95, learning_rate=0.02, batch_size=227, total=  31.7s
[CV] momentum=0.95, learning_rate=0.02, batch_size=227 ...............
0	Loss function for validation set: 0.329578	Best loss: 0.329578	Accuracy: 84.65%
1	Loss function for validation set: 0.326659	Best loss: 0.326659	Accuracy: 84.82%
2	Loss function for validation set: 0.328159	Best loss: 0.326659	Accuracy: 84.90%
3	Loss function for validation set: 0.321368	Best loss: 0.321368	Accuracy: 84.94%
4	Loss function for 

[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed: 32.8min finished


0	Loss function for validation set: 0.332806	Best loss: 0.332806	Accuracy: 84.51%
1	Loss function for validation set: 0.329042	Best loss: 0.329042	Accuracy: 84.78%
2	Loss function for validation set: 0.326357	Best loss: 0.326357	Accuracy: 84.84%
3	Loss function for validation set: 0.324649	Best loss: 0.324649	Accuracy: 84.92%
4	Loss function for validation set: 0.323582	Best loss: 0.323582	Accuracy: 84.96%
5	Loss function for validation set: 0.320667	Best loss: 0.320667	Accuracy: 85.23%
6	Loss function for validation set: 0.321729	Best loss: 0.320667	Accuracy: 85.09%
7	Loss function for validation set: 0.319255	Best loss: 0.319255	Accuracy: 85.09%
8	Loss function for validation set: 0.319884	Best loss: 0.319255	Accuracy: 85.11%
9	Loss function for validation set: 0.321671	Best loss: 0.319255	Accuracy: 84.92%
10	Loss function for validation set: 0.315515	Best loss: 0.315515	Accuracy: 85.64%
11	Loss function for validation set: 0.316437	Best loss: 0.315515	Accuracy: 85.27%
12	Loss functi

RandomizedSearchCV(cv='warn', error_score='raise-deprecating',
          estimator=DNNClassifier(activation=<function elu at 0x000001EE92EB5048>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x000001EE993F2950>,
       learning_rate=0.01, momentum=0.9, n_hidden_layers=3,
       n_neurons=[300, 200, 100], random_state=2019),
          fit_params={'X_valid': array([[-0.40956, -0.6458 , ...,  1.     ,  0.     ],
       [ 1.71688, -0.6458 , ...,  1.     ,  0.     ],
       ...,
       [-1.3628 , -0.6458 , ...,  1.     ,  0.     ],
       [-1.50945, -0.6458 , ...,  1.     ,  0.     ]]), 'y_valid': array([0, 0, ..., 0, 0], dtype=int64), 'n_epochs': 1000},
          iid='warn', n_iter=20, n_jobs=None,
          param_distributions={'batch_size': [200, 250, 227], 'learning_rate': [0.01, 0.02], 'momentum': [0.85, 0.9, 0.95]},
          pre_dispatch='2*n_jobs', random_state=2019, refit=True,


In [48]:
rnd_search.best_params_

{'batch_size': 227, 'learning_rate': 0.01, 'momentum': 0.9}

In [49]:
y_pred = rnd_search.predict(X_test.values)
accuracy_score(y_test.values, y_pred)

0.8528989961073551

In [50]:
rnd_search.best_estimator_.save("./best_model_rs_earning")