In [None]:
from utils.utils import *
from utils.utils_nn import *

import numpy as np
np.random.seed(seed())

import tensorflow as tf

import os

from tensorflow.contrib import rnn
from tensorflow.contrib.tensorboard.plugins import projector  # embeddings visualizer

import random
random.seed(seed())

# import matplotlib.pyplot as plt

# import re

In [None]:
kwargs_neural_data_init = \
    {'mk_chars': True, 
               'model': 'neural', 
               'char_filter': 100, 'allowed_chars': None, 
               'mk_ngrams': False, 'ngram_width': 5, 
               'ngram_filter': 10, 'allowed_ngrams': None, 
               'keep_infreq_labels': False, 'label_count_thresh': 10, 
               'valid_ratio': 0.25, 
               'scale_func': unscale, 'to_permute': True, }

x_feed_train, y_feed_train, x_feed_val, y_feed_val,\
    char_int, char_int_inv, label_int, label_int_inv, \
    statistics_dict =\
    data_load_preprocess(**kwargs_neural_data_init)

In [None]:
kwargs_simple_lstm = nice_dict({
    # log
    'log_dir': 'logdir/', 
    'del_log': True, 
    # preprocessing and data
    'top_k': 5, 
    'seed': seed(), 
    # learning hyper-params
    'learn_rate': 1E-1,  # 1E-4
    'dynamic_learn_rate': False, 
    'rnn_type': 'LSTM',
    'bidirection': False, 
    'char_embed_dim': 4, 
    'one_hot': False,
    'hidden_state_size': 32, 
    'keep_prob': 0.7, 
    'l2_wieght_reg': 1E-4, 
    'target_rep': True, 
    'target_rep_weight': 0.1, 
    'epochs': 1000,
    'summary_step': 10, 
    'save_step': np.inf,
    'verbose_summary': False
})

if kwargs_simple_lstm.save_step == np.inf: 
    kwargs_simple_lstm.save_step = kwargs_simple_lstm.epochs
    
kwargs_simple_lstm = {**kwargs_simple_lstm, 
                      **statistics_dict}

kwargs_simple_lstm = nice_dict({**kwargs_simple_lstm, 
                                **{'scale_func': kwargs_neural_data_init['scale_func'], 
                                   'keep_infreq_labels': kwargs_neural_data_init['keep_infreq_labels']}})

if kwargs_simple_lstm.del_log: remove_dir_content(kwargs_simple_lstm.log_dir)

In [None]:
look_at_some_examples = False
"""
Collect examples from training and validation sets,
group by label and print two examples for each 
(e.g. for each label, print 2 training and 2 validation examples).
This was done due to a suspicion raised by similar evaulation metrics on the training and test.
"""
if look_at_some_examples:
    label_to_text_val = {}  # collect validation examples
    for obs,label in zip(x_feed_val, y_feed_val):
        label_to_text_val.setdefault(label,[]).append(obs)

    label_to_text_train = {}  # collect training examples
    for obs,label in zip(x_feed_train, y_feed_train):
        label_to_text_train.setdefault(label,[]).append(obs)

    unique_keys = list(label_to_text_train.keys())
    unique_keys.sort()

    label_to_text_merge = {}  # collect both
    for key in unique_keys:
        label_to_text_merge[key] = {'training': label_to_text_train[key], 
                                    'validation': label_to_text_val[key]}

    for key in unique_keys:
        cur_dict = label_to_text_merge[key]
        print('Key:{}, training:'.format(key))
        print(''.join([char for char in cur_dict['training'][0] if char != '<pad-char>']))
        print(''.join([char for char in cur_dict['training'][1] if char != '<pad-char>']))
        print('validation:')
        print(''.join([char for char in cur_dict['validation'][0] if char != '<pad-char>']))
        print(''.join([char for char in cur_dict['validation'][1] if char != '<pad-char>']))    

In [None]:
# returns np.arrays to feed into tf model
# training data
X_train, _, Y_train = index_transorm_xy(x=x_feed_train, 
                                        y=y_feed_train, 
                                        char_int=char_int, 
                                        label_int=label_int, 
                                        **kwargs_simple_lstm)

# validation data
X_val, _, Y_val = index_transorm_xy(x=x_feed_val, 
                                    y=y_feed_val, 
                                    char_int=char_int, 
                                    label_int=label_int, 
                                    **kwargs_simple_lstm)

# write a metadata file for embeddings visualizer and create path string
embed_vis_path = write_embeddings_metadata(log_dir=kwargs_simple_lstm.log_dir, 
                                           dictionary=char_int, 
                                           file_name='metadata.tsv')

In [None]:
class Lstm_model(object):

    def __init__(self, 
                 *args, 
                 hparam_str, 
                 seq_len, 
                 n_class, 
                 n_char, 
                 char_embed_dim, 
                 one_hot, 
                 hidden_state_size, 
                 keep_prob, 
                 learn_rate, 
                 dynamic_learn_rate, 
                 rnn_type, 
                 bidirection, 
                 top_k, 
                 epochs, 
                 log_dir, 
                 embed_vis_path, 
                 summary_step, 
                 save_step, 
                 seed, 
                 l2_wieght_reg, 
                 target_rep, 
                 target_rep_weight, 
                 verbose_summary, 
                 feed_dict_train, 
                 feed_dict_test, 
                 **kwargs):
        
        self.hparam_str = hparam_str
        self.seq_len = seq_len 
        self.n_class = n_class 
        self.n_char = n_char
        self.char_embed_dim = char_embed_dim
        self.one_hot = one_hot
        self.hidden_state_size = hidden_state_size
        self.learn_rate = learn_rate
        self.dynamic_learn_rate = dynamic_learn_rate
        self.rnn_type = rnn_type
        self.bidirection = bidirection
        self.top_k = top_k
        self.epochs = epochs
        self.log_dir = log_dir
        self.embed_vis_path = embed_vis_path
        self.summary_step = summary_step 
        self.save_step = save_step
        self.seed = seed
        self.l2_wieght_reg = l2_wieght_reg
        self.target_rep = target_rep
        self.verbose_summary = verbose_summary
        self.target_rep_weight = target_rep_weight if self.target_rep else 0.0
        self.embedding_matrix = None

        # clear tf graph and set seeds
        tf.reset_default_graph()
        tf.set_random_seed(self.seed)
        np.random.seed(self.seed)
        random.seed(self.seed)
        
        # Setup placeholders, and reshape the data
        self.x_ = tf.placeholder(tf.int32, [None, self.seq_len], 
                            name='Examples')
        self.y_ = tf.placeholder(tf.int32, [None, self.n_class], 
                            name='Lables')
        self.keep_prob = tf.placeholder(tf.float32, [], 
                            name='Keep_probability')

        self.feed_dict_train = {self.x_: feed_dict_train['x'], 
                                self.y_: feed_dict_train['y'], 
                                self.keep_prob: keep_prob}

        self.feed_dict_train_eval = {**self.feed_dict_train, 
                                     **{self.keep_prob: 1.0}}

        self.feed_dict_test = {self.x_: feed_dict_test['x'], 
                               self.y_: feed_dict_test['y'], 
                               self.keep_prob: 1.0}

        self.embedding_matrix = self.embed_matrix()

        self.outputs = self.lstm_unit(input=self.x_)
        with tf.name_scope('logits_seq'):
            if self.bidirection: logit_in_size = 2 * self.hidden_state_size
            else: logit_in_size = self.hidden_state_size
            self.logits = [self.logit(input=out, 
                                      size_in=logit_in_size, 
                                      size_out=self.n_class) 
                           for out in self.outputs]

        with tf.name_scope('Cost_function'):
            # cross entropy loss with target replication and
            # regularization terms based on the weights' L2 norm
            with tf.name_scope('target_replication_loss'):
                self.cost_targetrep = tf.reduce_mean(
                    [tf.nn.softmax_cross_entropy_with_logits(
                        logits=log, labels=self.y_) 
                     for log in self.logits])
            with tf.name_scope('cross_entropy'):
                self.cost_crossent = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=self.logits[-1], labels=self.y_))
            with tf.name_scope('L2_norm_reg'):
                self.cost_l2reg = tf.reduce_mean([tf.nn.l2_loss(weight) 
                                                  for weight in tf.trainable_variables()])
            with tf.name_scope('total_cost'):
                self.cost = self.target_rep_weight * self.cost_targetrep + \
                    (1 - self.target_rep_weight) * self.cost_crossent + \
                    self.l2_wieght_reg * self.cost_l2reg
            # add summaries
            tf.summary.scalar('Total_cost_train', 
                              self.cost, collections=['train'])
            tf.summary.scalar('Total_cost_test', 
                              self.cost, collections=['test'])
            
        with tf.name_scope('Cost_function_additional_metrics'):
            tf.summary.scalar('Target_rep_cost_train', 
                              self.cost_targetrep, collections=['train'])
            tf.summary.scalar('Target_rep_cost_test', 
                              self.cost_targetrep, collections=['test'])
            tf.summary.scalar('Cross_entropy_train', 
                              self.cost_crossent, collections=['train'])
            tf.summary.scalar('Cross_entropy_test', 
                              self.cost_crossent, collections=['test'])
            tf.summary.scalar('L2_norm_train', 
                              self.cost_l2reg, collections=['train'])
            tf.summary.scalar('L2_norm_test', 
                              self.cost_l2reg, collections=['test'])            
            
        with tf.name_scope('Train'):
            if self.dynamic_learn_rate:
                self.optimizer = tf.train.GradientDescentOptimizer(self.learn_rate)
            else:
                self.optimizer = tf.train.AdamOptimizer(self.learn_rate)
            self.train_step = self.optimizer.minimize(self.cost)

        with tf.name_scope('Accuracy'):  # takes the last element of logits
            self.correct_prediction = tf.equal(tf.argmax(self.logits[-1], 1), tf.argmax(self.y_, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
            tf.summary.scalar('Accuracy_train', self.accuracy, collections=['train'])
            tf.summary.scalar('Accuracy_test', self.accuracy, collections=['test'])
        
        with tf.name_scope('Mean_Reciprocal_Rank'):  # takes the last element of logits
            self.recip_rank = tf.reduce_mean(
                self.get_reciprocal_rank(self.logits[-1], 
                                         self.y_, 
                                         True))
            tf.summary.scalar('Mean_Reciprocal_Rank_train', 
                              self.recip_rank, collections=['train'])
            tf.summary.scalar('Mean_Reciprocal_Rank_test', 
                              self.recip_rank, collections=['test'])        
        
        with tf.name_scope('In_top_{}'.format(self.top_k)):  # takes the last element of logits
            self.y_targets = tf.argmax(self.y_, 1)
            self.top_k_res = tf.reduce_mean(tf.cast(
                tf.nn.in_top_k(self.logits[-1], self.y_targets, self.top_k), 
                tf.float32))
            tf.summary.scalar('In_top_{}_train'.format(self.top_k), self.top_k_res, collections=['train'])
            tf.summary.scalar('In_top_{}_test'.format(self.top_k), self.top_k_res, collections=['test'])

        # summaries per collection and saver object
        self.summ_train = tf.summary.merge_all('train')
        self.summ_test = tf.summary.merge_all('test')
        self.saver = tf.train.Saver()
        self.init_op = tf.global_variables_initializer()
        
        # init vars and setup writer
        self.sess = tf.Session()
        self.sess.run(self.init_op)
        self.writer = tf.summary.FileWriter(self.log_dir + self.hparam_str)
        self.writer.add_graph(self.sess.graph)
        
        # Add embedding tensorboard visualization. Need tensorflow version
        self.config = projector.ProjectorConfig()
        self.embed = self.config.embeddings.add()
        self.embed.tensor_name = self.embedding_matrix.name
        self.embed.metadata_path = os.path.join(self.embed_vis_path)
        projector.visualize_embeddings(self.writer, self.config)
        
        
    def embed_matrix(self, stddev=0.1, name='embeddings'):
        # index_size would be the size of the character set
        with tf.name_scope(name):
            if not self.one_hot:
                embedding_matrix = tf.get_variable(
                    'embedding_matrix', 
                    initializer=tf.truncated_normal([self.n_char, self.char_embed_dim], 
                                                    stddev=stddev, 
                                                    seed=self.seed), 
                    trainable=True)
            else:
                # creating a one-hot for each character corresponds to the identity matrix
                embedding_matrix = tf.constant(value=np.identity(self.n_char), 
                                               name='embedding_matrix', 
                                               dtype=tf.float32)
                self.char_embed_dim = self.n_char
            if self.verbose_summary:
                tf.summary.histogram('embedding_matrix', embedding_matrix, collections=['train'])
            self.embedding_matrix = embedding_matrix
            return self.embedding_matrix
        
        
    def lstm_unit(self, 
                  input, 
                  name='LSTM'):
        # check, then set the right name
        assert self.rnn_type in ['LSTM', 'GRU'], \
            'rnn_type has to be either LSTM or GRU'
        name = 'LSTM' if self.rnn_type == 'LSTM' else 'GRU'
        if self.bidirection: name += '_bidir'
        with tf.name_scope(name):
            input = tf.nn.embedding_lookup(self.embedding_matrix, input)
            # reshaping
            # Permuting batch_size and n_steps
            input = tf.transpose(input, [1, 0, 2])
            # Reshaping to (n_steps*batch_size, n_input)
            input = tf.reshape(input, [-1, self.char_embed_dim])
            # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
            rnn_inputs = tf.split(input, self.seq_len, 0)
            
            # setting the correct RNN cell type (LSTM of GRU)
            rnn_cell = rnn.BasicLSTMCell if self.rnn_type == 'LSTM' \
                else rnn.GRUCell
            # setting the args (forget_bias applies only to LSTM)
            rnn_cell_args = {'num_units': self.hidden_state_size}
            if 'LSTMCell' in str(rnn_cell.__call__ ):
                rnn_cell_args['forget_bias'] = 1.0
            rnn_cell(**rnn_cell_args)
            
            cell_fw = rnn_cell(**rnn_cell_args)
            cell_fw = rnn.DropoutWrapper(cell_fw, 
                                         output_keep_prob=self.keep_prob, 
                                         seed=self.seed)
            
            if self.bidirection:
                # add another cell for backwards direction and a dropout wrapper
                cell_bw = rnn_cell(**rnn_cell_args)
                cell_bw = rnn.DropoutWrapper(cell_bw, 
                                             output_keep_prob=self.keep_prob, 
                                             seed=self.seed)
                outputs, _, _ = rnn.static_bidirectional_rnn(
                    cell_fw, cell_bw, rnn_inputs, dtype=tf.float32, scope=name)
            else:
                outputs, _ = rnn.static_rnn(cell_fw, rnn_inputs, dtype=tf.float32, scope=name)
            
            if not self.target_rep:  # take only last output (list for structure consistency)
                outputs = [outputs[-1]]
            if self.verbose_summary:
                tf.summary.histogram('outputs', outputs, collections=['train'])
            return outputs


    def logit(self, 
              input, 
              size_in, 
              size_out, 
              stddev=0.1, 
              name='logit'):

        with tf.name_scope(name):
            w = tf.Variable(tf.truncated_normal([size_in, size_out], 
                                                stddev=stddev, 
                                                seed=self.seed), 
                            name='W')
            b = tf.Variable(tf.constant(0.1, 
                                        shape=[size_out]), 
                            name='B')
            logits = tf.matmul(input, w) + b
            if self.verbose_summary:
                tf.summary.histogram('weights', w, collections=['train'])
                tf.summary.histogram('biases', b, collections=['train'])
                tf.summary.histogram('logits', logits, collections=['train'])
            return logits
    
        
    def train(self):
        print('Starting to train model {:s}'.format(self.hparam_str))
        for i in range(1, self.epochs+1):
            # update learning rate, if it is dynamic
            if self.dynamic_learn_rate: self.update_lr(epoch=i)
            # train step
            self.sess.run(self.train_step, feed_dict=self.feed_dict_train)
            if i % self.summary_step == 0:
                # train summary
                # use self.feed_dict_train_eval for evaluation (keep probability set to 1.0)
                [train_accuracy, train_cost,_ , _, _, _, train_top_k, s] = \
                    self.sess.run([self.accuracy, 
                                   self.cost, self.cost_targetrep, self.cost_crossent, self.cost_l2reg, 
                                   self.recip_rank, 
                                   self.top_k_res, 
                                   self.summ_train],
                                  feed_dict=self.feed_dict_train_eval)
                self.writer.add_summary(s, i)
                print('{:.3f} of observations in the top is {}'.format(train_top_k, self.top_k))
                # test summary
                [test_accuracy, test_cost,_ , _, _, _, test_top_k, s] = \
                    self.sess.run([self.accuracy, 
                                   self.cost, self.cost_targetrep, self.cost_crossent, self.cost_l2reg, 
                                   self.recip_rank, 
                                   self.top_k_res, 
                                   self.summ_test],
                                  feed_dict=self.feed_dict_test)
                self.writer.add_summary(s, i)
                
                print('Epoch number {}, '.format(i) +
                      'training accuracy is {:.5f} and '.format(train_accuracy) + 
                      'test accuracy is {:.5f}, '.format(test_accuracy))
                print('training cost is {:.5f} and '.format(train_cost) + 
                      'test cost is {:.5f} and '.format(test_cost))
                
            if i % self.save_step == 0:
                print('Saving step {}'.format(i))
                self.saver.save(self.sess, os.path.join(self.log_dir, 
                                                        self.hparam_str, 
                                                        'model.ckpt'), i)
            
        print('Training the model is done! ({:s})'.format(self.hparam_str))
    
    
    def tf_get_rank_order(self, input, reciprocal):
        """
        Returns a tensor of the rank of the input tensor's elements.
        rank(highest element) = 1.
        """
        assert isinstance(reciprocal, bool), 'reciprocal has to be bool'
        size = tf.size(input)
        indices_of_ranks = tf.nn.top_k(-input, k=size)[1]
        indices_of_ranks = size - tf.nn.top_k(-indices_of_ranks, k=size)[1]
        if reciprocal:
            indices_of_ranks = tf.cast(indices_of_ranks, tf.float32)
            indices_of_ranks = tf.map_fn(
                lambda x: tf.reciprocal(x), indices_of_ranks, 
                dtype=tf.float32)
            return indices_of_ranks
        else:
            return indices_of_ranks
    
    
    def get_reciprocal_rank(self, logits, targets, reciprocal=True):
        """
        Returns a tensor containing the (reciprocal) ranks
        of the logits tensor (wrt the targets tensor).
        The targets tensor should be a 'one hot' vector 
        (otherwise apply one_hot on targets, such that index_mask is a one_hot).
        """
        function_to_map = lambda x: self.tf_get_rank_order(x, reciprocal=reciprocal)
        ordered_array_dtype = tf.float32 if reciprocal is not None else tf.int32
        ordered_array = tf.map_fn(function_to_map, logits, 
                                  dtype=ordered_array_dtype)

        size = int(logits.shape[1])
        index_mask = tf.reshape(
                targets, [-1,size])
        if reciprocal:
            index_mask = tf.cast(index_mask, tf.float32)

        return tf.reduce_sum(ordered_array * index_mask,1)
    
    
    def restore(self, cp_path, feed_dict = None):
        
        print('Loading variables from {:s}'.format(cp_path))

        ckpt = tf.train.get_checkpoint_state(cp_path)
        if ckpt and ckpt.model_checkpoint_path:
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            raise Exception("no checkpoint found")

#         if feed_dict:
#             self.feed(feed_dict=feed_dict)
        print('Loading successful')
    
    def close_session(self):
        self.sess.close()
    
    def update_lr(self, epoch):
        self.learn_rate = 1.0 / np.sqrt(epoch)

In [None]:
# kwargs_feed_dict_train = {'x': X_train, 'y': Y_train}
# kwargs_feed_dict_test = {'x': X_val, 'y': Y_val}

# hparam_str = make_hparam_string(**kwargs_simple_lstm)

# lstm = Lstm_model(hparam_str=hparam_str, 
#                   embed_vis_path=embed_vis_path, 
#                   feed_dict_train=kwargs_feed_dict_train, 
#                   feed_dict_test=kwargs_feed_dict_test, 
# #                   **{**kwargs_simple_lstm, 
# #                      **{'epochs': 40}}
#                   **kwargs_simple_lstm
#                  )

# lstm.train()
# lstm.close_session()

In [None]:
# lstm.restore(cp_path=os.path.join(lstm.log_dir, hparam_str))

In [None]:
# lstm.sess.run(lstm.outputs, feed_dict=lstm.feed_dict_train_eval)

In [None]:
# trying out a LOT of hyper-parameters configurations
kwargs_feed_dict_train = {'x': X_train, 'y': Y_train}
kwargs_feed_dict_test = {'x': X_val, 'y': Y_val}

lstm_models = {}
for learn_rate in list(np.logspace(-2, -3, 2)):
    for keep_prob in [0.8]:
        for one_hot, char_embed_dim in [(True, 4)] + list(zip([False] * 1 , [4])):
            for hidden_state_size in [64, 128]:
                for l2_wieght_reg in list(np.logspace(-3, -4, 2)):
                    for target_rep_weight in [0.1, 0.3]:
                        # collect new hyperparameters as args
                        current_kw_simple_lstm = {
                            **kwargs_simple_lstm, 
                            **{'learn_rate': learn_rate, 
                               'keep_prob': keep_prob, 
                               'one_hot': one_hot, 
                               'char_embed_dim': char_embed_dim, 
                               'hidden_state_size': hidden_state_size, 
                               'l2_wieght_reg': l2_wieght_reg, 
                               'target_rep_weight': target_rep_weight
        #                        'bidirection': bidirection, 
        #                        'target_rep': target_rep
                              }}
                        hparam_str = make_hparam_string(**current_kw_simple_lstm)
                        var = 'lstm_{}'.format(hparam_str)
                        lstm_models[var] = Lstm_model(feed_dict_train=kwargs_feed_dict_train, 
                                                      feed_dict_test=kwargs_feed_dict_test, 
                                                      hparam_str=hparam_str, 
                                                      embed_vis_path=embed_vis_path, 
                                                      **current_kw_simple_lstm)
                        lstm_models[var].train()
                        lstm_models[var].close_session()

In [None]:
# getting data directly from a tensorboard log dir
from tensorflow.python.summary import event_multiplexer
# specify path (for parent log dir)
log_parent_dir = './logdir_exper_4_3/'
ea = event_multiplexer.EventMultiplexer().AddRunsFromDirectory(log_parent_dir)
ea.Reload()  # load

child_dir = next(os.walk(log_parent_dir))[1]
print(ea.Scalars(child_dir[0], 'accuracy/accuracy_test'))  # specify run, scalar_name