In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import sys
sys.path.append('../py_model')
from utils import init_logging
import logging 

  return f(*args, **kwds)


# auto-encoder network architecture

In [2]:
def autoencoder(dimensions=[278, 1000, 100, 278]):
    """
    Build a stacked deep autoencoder with tied weights, that is w = wT.

    return a dict.
    
    Parameters
    ----------
    dimensions : list, optional
        The number of neurons for each layer of the autoencoder.
    Returns
    -------
    x : Tensor
        Input placeholder to the network
    z : Tensor
        Inner-most latent representation
    y : Tensor
        Output reconstruction of the input
    loss : Tensor
        Overall cost to use for training
    """    
    logging.info('activation : {}'.format('relu'))
    # input to the network
    x = tf.placeholder(tf.float32, [None, dimensions[0]], name='x')
    current_input = x
    logging.info('current_input : {}'.format(current_input.shape))
    #---------------------------
    # Build the encoder
    #---------------------------
    encoder = [] # for putting the weight of encoder, w1,w2,..
    for layer_i, n_output in enumerate(dimensions[1:]):
        logging.info('layer_i-encoder : {}'.format(layer_i))
        logging.info('n_output-encoder: {}'.format(n_output))
        n_input = int(current_input.get_shape()[1]) # [0]: batch_szie, [1]:input_dim
        logging.info('n_input : {}'.format(n_input))
        W = tf.Variable(
            tf.random_uniform([n_input, n_output],
                              minval = -1.0 / math.sqrt(n_input),
                              maxval = 1.0 / math.sqrt(n_input)))
        b = tf.Variable(tf.zeros([n_output]))
        # saving layer of encoding for decoder
        encoder.append(W)
        output = tf.nn.relu(tf.matmul(current_input, W) + b)
        # assign current_input
        current_input = output
    #---------------------------
    # latent representation (output of encoder)
    #---------------------------
    z = current_input
    encoder.reverse() # [...,w2,w1]
    
    #---------------------------
    # Build the decoder using the same weights
    #---------------------------
    for layer_i, n_output in enumerate(dimensions[:-1][::-1]):
        logging.info('layer_i-decoder : {}'.format(layer_i))
        logging.info('n_output-decoder : {}'.format(n_output))
        W = tf.transpose(encoder[layer_i])
        b = tf.Variable(tf.zeros([n_output]))
        output = tf.nn.relu(tf.matmul(current_input, W) + b)
        # assign current_input
        current_input = output

    # now have the reconstruction through the network
    y = current_input
    # Define loss and, minimize the mean squared error
    loss = tf.reduce_mean(tf.pow(x - y, 2)) 
    return {'x': x, 'z': z, 'y': y, 'loss': loss}


# input

In [7]:
log_dir = '../log/auto_encoder'
checkpoint_dir = '../log/auto_encoder/checkpoints'

init_logging(log_dir)

In [4]:
pad_zero = True
# reload again for filling
df = pd.read_hdf('../features/base_featurs.h5','base_featurs')
print (df.shape)
copy_for_the_following_merge = df[['SK_ID_CURR','TARGET']].copy()
no_need_to_comoress = ['index','TARGET', 'SK_ID_CURR']
df.drop(no_need_to_comoress, axis = 1, inplace = True)
# handling with infinity
df.replace([np.inf, -np.inf], np.nan, inplace = True)
print (df.shape)
if pad_zero == True:
    # preprocessing for feature scailing
    df.replace(np.nan, 0, inplace = True)
    logging.info('pad_zero : {}'.format('True'))
else:
    logging.info('pad_zero : {}'.format('False'))
# preprocessing for feature scailing ignoring nan
for f in df.columns.tolist():
    mean = df[f].mean()
    std = df[f].std()
    df[f] = (df[f] - mean) / std
logging.info('input of ae : {}'.format(df.shape))

(356251, 281)
(356251, 278)


pad_zero : True
input of ae : (356251, 278)


# raw_feature_generator
### raw_feature_generator which allows to quickly set up Python generators that can automatically turn image files on disk into batches of pre-processed tensors. This is what we will use here.

In [5]:
length = len(df)
print ('length',length)
idx = np.arange(length) # 1-D array
print ('idx', idx.shape, idx)


length 356251
idx (356251,) [     0      1      2 ... 356248 356249 356250]


In [6]:
# def raw_feature_generator(batch_size = 128, shuffle = True, num_epochs = 10000, allow_smaller_final_batch = False):
#     epoch_num = 0
#     while epoch_num < num_epochs:
#         if shuffle:
#             np.random.shuffle(idx)
#         for i in range(0, length, batch_size):
#             batch_idx = idx[i: i + batch_size]
#             if not allow_smaller_final_batch and len(batch_idx) != batch_size:
#                 break # terminate the loop
#             yield df.values[batch_idx]
#         epoch_num += 1
    

In [6]:
def raw_feature_generator(batch_size = 128, shuffle = True, allow_smaller_final_batch = False):
    if shuffle:
        np.random.shuffle(idx)
    for i in range(0, length, batch_size):
        batch_idx = idx[i: i + batch_size]
        if not allow_smaller_final_batch and len(batch_idx) != batch_size:
            break # terminate the loop
        yield df.values[batch_idx]
def save(step, averaged = False):
    '''
    
    '''
    #--------
    # create saver object
    #--------
    if averaged:
        saver = tf.train.Saver(self.ema.variables_to_restore(), max_to_keep=1)
        checkpoint_dir_averaged = checkpoint_dir + '_avg'
        checkpoint_dir =  checkpoint_dir_averaged
    else:
        saver = tf.train.Saver(max_to_keep=1)
        checkpoint_dir = checkpoint_dir
    
    if not os.path.isdir(checkpoint_dir):
        logging.info('creating checkpoint directory {}'.format(checkpoint_dir))
        os.mkdir(checkpoint_dir)

    model_path = os.path.join(checkpoint_dir, 'model')
    logging.info('saving model to {}'.format(model_path))
    saver.save(sess, model_path, global_step=step)

# training 

In [8]:
# display_step
log_interval = 100
# learning_rate
learning_rate = 0.001
# define auto-encoder network architecture
ae = autoencoder(dimensions=[278, 140, 70, 30])
# optimizer
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(ae['loss'])

# Initialize the variables (i.e. assign their default value)
# We create a session to use the graph
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Training
batch_size = 256
n_epochs = 10
step = 0
min_steps_to_checkpoint = 100
enable_parameter_averaging = False
logging.info('learning_rate : {}'.format(learning_rate))
logging.info('batch_size : {}'.format(batch_size))
logging.info('n_epochs : {}'.format(n_epochs))
logging.info('log_interval : {}'.format(log_interval))
logging.info('min_steps_to_checkpoint : {}'.format(min_steps_to_checkpoint))
logging.info('enable_parameter_averaging : {}'.format(enable_parameter_averaging))

#-------------------------
# training - fit
#-------------------------
for epoch in range(n_epochs):
    epoch_cost = 0.0
    for batch_xs in raw_feature_generator(batch_size = batch_size):
        _, batch_loss = sess.run([optimizer, ae['loss']], feed_dict={ae['x']: batch_xs})
        epoch_cost += batch_loss / batch_size
    metric_log = (
                "[[epoch {:>8}]]     "
                "[[loss at this epoch]]     loss: {:<12}"
            ).format(epoch, round(epoch_cost, 8))

    logging.info(metric_log)
    if step > min_steps_to_checkpoint:
        save(step)
        if enable_parameter_averaging:
            save(step, averaged=True)


activation : relu
current_input : (?, 278)
layer_i-encoder : 0
n_output-encoder: 140
n_input : 278
layer_i-encoder : 1
n_output-encoder: 70
n_input : 140
layer_i-encoder : 2
n_output-encoder: 30
n_input : 70
layer_i-decoder : 0
n_output-decoder : 70
layer_i-decoder : 1
n_output-decoder : 140
layer_i-decoder : 2
n_output-decoder : 278
learning_rate : 0.001
batch_size : 256
n_epochs : 1000
log_interval : 100
[[epoch        0]]     [[loss at this epoch]]     loss: 4.01145299  
[[epoch        1]]     [[loss at this epoch]]     loss: 3.21221754  
[[epoch        2]]     [[loss at this epoch]]     loss: 2.95331249  
[[epoch        3]]     [[loss at this epoch]]     loss: 2.82267365  
[[epoch        4]]     [[loss at this epoch]]     loss: 2.73162832  
[[epoch        5]]     [[loss at this epoch]]     loss: 2.67879417  
[[epoch        6]]     [[loss at this epoch]]     loss: 2.63703678  
[[epoch        7]]     [[loss at this epoch]]     loss: 2.60075413  
[[epoch        8]]     [[loss at this 

[[epoch      113]]     [[loss at this epoch]]     loss: 2.19308126  
[[epoch      114]]     [[loss at this epoch]]     loss: 2.19315659  
[[epoch      115]]     [[loss at this epoch]]     loss: 2.19849456  
[[epoch      116]]     [[loss at this epoch]]     loss: 2.19442061  
[[epoch      117]]     [[loss at this epoch]]     loss: 2.19153523  
[[epoch      118]]     [[loss at this epoch]]     loss: 2.19284991  
[[epoch      119]]     [[loss at this epoch]]     loss: 2.19029327  
[[epoch      120]]     [[loss at this epoch]]     loss: 2.19027032  
[[epoch      121]]     [[loss at this epoch]]     loss: 2.19089144  
[[epoch      122]]     [[loss at this epoch]]     loss: 2.18939261  
[[epoch      123]]     [[loss at this epoch]]     loss: 2.19119215  
[[epoch      124]]     [[loss at this epoch]]     loss: 2.19026759  
[[epoch      125]]     [[loss at this epoch]]     loss: 2.19525368  
[[epoch      126]]     [[loss at this epoch]]     loss: 2.1875733   
[[epoch      127]]     [[loss at t

[[epoch      232]]     [[loss at this epoch]]     loss: 2.17709087  
[[epoch      233]]     [[loss at this epoch]]     loss: 2.17904708  
[[epoch      234]]     [[loss at this epoch]]     loss: 2.18206759  
[[epoch      235]]     [[loss at this epoch]]     loss: 2.17708546  
[[epoch      236]]     [[loss at this epoch]]     loss: 2.18257644  
[[epoch      237]]     [[loss at this epoch]]     loss: 2.17837302  
[[epoch      238]]     [[loss at this epoch]]     loss: 2.18083998  
[[epoch      239]]     [[loss at this epoch]]     loss: 2.17771769  
[[epoch      240]]     [[loss at this epoch]]     loss: 2.17878121  
[[epoch      241]]     [[loss at this epoch]]     loss: 2.17919793  
[[epoch      242]]     [[loss at this epoch]]     loss: 2.17453858  
[[epoch      243]]     [[loss at this epoch]]     loss: 2.18017379  
[[epoch      244]]     [[loss at this epoch]]     loss: 2.17746148  
[[epoch      245]]     [[loss at this epoch]]     loss: 2.19387091  
[[epoch      246]]     [[loss at t

[[epoch      351]]     [[loss at this epoch]]     loss: 2.17463637  
[[epoch      352]]     [[loss at this epoch]]     loss: 2.17167081  
[[epoch      353]]     [[loss at this epoch]]     loss: 2.18575687  
[[epoch      354]]     [[loss at this epoch]]     loss: 2.17833275  
[[epoch      355]]     [[loss at this epoch]]     loss: 2.17390456  
[[epoch      356]]     [[loss at this epoch]]     loss: 2.17589149  
[[epoch      357]]     [[loss at this epoch]]     loss: 2.18027171  
[[epoch      358]]     [[loss at this epoch]]     loss: 2.17604421  
[[epoch      359]]     [[loss at this epoch]]     loss: 2.17373018  
[[epoch      360]]     [[loss at this epoch]]     loss: 2.1760234   
[[epoch      361]]     [[loss at this epoch]]     loss: 2.17580766  
[[epoch      362]]     [[loss at this epoch]]     loss: 2.17435522  
[[epoch      363]]     [[loss at this epoch]]     loss: 2.17437845  
[[epoch      364]]     [[loss at this epoch]]     loss: 2.17420072  
[[epoch      365]]     [[loss at t

[[epoch      470]]     [[loss at this epoch]]     loss: 2.16781328  
[[epoch      471]]     [[loss at this epoch]]     loss: 2.17823437  
[[epoch      472]]     [[loss at this epoch]]     loss: 2.17463402  
[[epoch      473]]     [[loss at this epoch]]     loss: 2.16960617  
[[epoch      474]]     [[loss at this epoch]]     loss: 2.17356381  
[[epoch      475]]     [[loss at this epoch]]     loss: 2.17286434  
[[epoch      476]]     [[loss at this epoch]]     loss: 2.17685036  
[[epoch      477]]     [[loss at this epoch]]     loss: 2.16869671  
[[epoch      478]]     [[loss at this epoch]]     loss: 2.17105615  
[[epoch      479]]     [[loss at this epoch]]     loss: 2.17072783  
[[epoch      480]]     [[loss at this epoch]]     loss: 2.17010348  
[[epoch      481]]     [[loss at this epoch]]     loss: 2.16838062  
[[epoch      482]]     [[loss at this epoch]]     loss: 2.16962099  
[[epoch      483]]     [[loss at this epoch]]     loss: 2.17224354  
[[epoch      484]]     [[loss at t

[[epoch      589]]     [[loss at this epoch]]     loss: 2.17656042  
[[epoch      590]]     [[loss at this epoch]]     loss: 2.17999265  
[[epoch      591]]     [[loss at this epoch]]     loss: 2.1743945   
[[epoch      592]]     [[loss at this epoch]]     loss: 2.17186172  
[[epoch      593]]     [[loss at this epoch]]     loss: 2.17525246  
[[epoch      594]]     [[loss at this epoch]]     loss: 2.17419183  
[[epoch      595]]     [[loss at this epoch]]     loss: 2.18514063  
[[epoch      596]]     [[loss at this epoch]]     loss: 2.17361228  
[[epoch      597]]     [[loss at this epoch]]     loss: 2.1759076   
[[epoch      598]]     [[loss at this epoch]]     loss: 2.18375139  
[[epoch      599]]     [[loss at this epoch]]     loss: 2.17428079  
[[epoch      600]]     [[loss at this epoch]]     loss: 2.17642672  
[[epoch      601]]     [[loss at this epoch]]     loss: 2.17585171  
[[epoch      602]]     [[loss at this epoch]]     loss: 2.17273115  
[[epoch      603]]     [[loss at t

[[epoch      708]]     [[loss at this epoch]]     loss: 2.1666305   
[[epoch      709]]     [[loss at this epoch]]     loss: 2.17425621  
[[epoch      710]]     [[loss at this epoch]]     loss: 2.16493247  
[[epoch      711]]     [[loss at this epoch]]     loss: 2.16867286  
[[epoch      712]]     [[loss at this epoch]]     loss: 2.16531413  
[[epoch      713]]     [[loss at this epoch]]     loss: 2.16792789  
[[epoch      714]]     [[loss at this epoch]]     loss: 2.17820312  
[[epoch      715]]     [[loss at this epoch]]     loss: 2.17071876  
[[epoch      716]]     [[loss at this epoch]]     loss: 2.18234065  
[[epoch      717]]     [[loss at this epoch]]     loss: 2.16621404  
[[epoch      718]]     [[loss at this epoch]]     loss: 2.16883938  
[[epoch      719]]     [[loss at this epoch]]     loss: 2.16610012  
[[epoch      720]]     [[loss at this epoch]]     loss: 2.165311    
[[epoch      721]]     [[loss at this epoch]]     loss: 2.17742491  
[[epoch      722]]     [[loss at t

[[epoch      827]]     [[loss at this epoch]]     loss: 2.18415643  
[[epoch      828]]     [[loss at this epoch]]     loss: 2.17560517  
[[epoch      829]]     [[loss at this epoch]]     loss: 2.18023867  
[[epoch      830]]     [[loss at this epoch]]     loss: 2.18330648  
[[epoch      831]]     [[loss at this epoch]]     loss: 2.18095476  
[[epoch      832]]     [[loss at this epoch]]     loss: 2.16541528  
[[epoch      833]]     [[loss at this epoch]]     loss: 2.17695363  
[[epoch      834]]     [[loss at this epoch]]     loss: 2.1831426   
[[epoch      835]]     [[loss at this epoch]]     loss: 2.18193662  
[[epoch      836]]     [[loss at this epoch]]     loss: 2.18858589  
[[epoch      837]]     [[loss at this epoch]]     loss: 2.18602701  
[[epoch      838]]     [[loss at this epoch]]     loss: 2.18126771  
[[epoch      839]]     [[loss at this epoch]]     loss: 2.1844276   
[[epoch      840]]     [[loss at this epoch]]     loss: 2.18278778  
[[epoch      841]]     [[loss at t

[[epoch      946]]     [[loss at this epoch]]     loss: 2.18741653  
[[epoch      947]]     [[loss at this epoch]]     loss: 2.18226489  
[[epoch      948]]     [[loss at this epoch]]     loss: 2.17905997  
[[epoch      949]]     [[loss at this epoch]]     loss: 2.18707792  
[[epoch      950]]     [[loss at this epoch]]     loss: 2.17906212  
[[epoch      951]]     [[loss at this epoch]]     loss: 2.19148916  
[[epoch      952]]     [[loss at this epoch]]     loss: 2.18631561  
[[epoch      953]]     [[loss at this epoch]]     loss: 2.18072447  
[[epoch      954]]     [[loss at this epoch]]     loss: 2.18271791  
[[epoch      955]]     [[loss at this epoch]]     loss: 2.18842459  
[[epoch      956]]     [[loss at this epoch]]     loss: 2.18112125  
[[epoch      957]]     [[loss at this epoch]]     loss: 2.18661168  
[[epoch      958]]     [[loss at this epoch]]     loss: 2.18437008  
[[epoch      959]]     [[loss at this epoch]]     loss: 2.1908459   
[[epoch      960]]     [[loss at t