In [1]:
import numpy as np

from utils.utils import *
from utils.utils_nn import *

np.random.seed(seed())

import tensorflow as tf
tf.reset_default_graph()
tf.set_random_seed(seed())

import pandas as pd

import os

from tensorflow.contrib import rnn
from tensorflow.contrib.tensorboard.plugins import projector  # embeddings visualizer

# from collections import Counter
# from math import ceil

import random
random.seed(seed())

# import matplotlib.pyplot as plt

# import re

In [2]:
# initialize data from main (original) CSV file
x, y, n, main_data = init_data()
freq = [i for i in main_data['CNT'][:n]]  # frequencies, turned into a list
# initialize data from suggestions CSV file
x_suggest, y_suggest, freq_suggest = init_data_suggest()

In [3]:
kwargs_simple_lstm = nice_dict({
    # log
    'log_dir': 'logdir/', 
    'del_log': True, 
    # preprocessing and data
    'char_filter': 100, 
    'n': n, 
#     'batch_size': n, 
    'scale_func': log_scale, 
    'to_permute': True, 
    'seed': seed(), 
    # learning hyper-params
    'learn_rate': 1E-1,  # 1E-4
    'char_embed_dim': 4, 
    'one_hot': False,
    'hidden_state_size': 8, 
#     'activate_bool': True, 
    'keep_prob': 0.7, 
    'epochs': 500,
    'summary_step': 5, 
    'save_step': 10
})

if kwargs_simple_lstm.del_log: remove_dir_content(kwargs_simple_lstm.log_dir)

Log directory was not found.


In [4]:
# filter characters according to 'char_filter',
# makes all sequences the same (max) length and pads with 'unknown' character
x_char_filtered_pad, statistics_dict = \
    text_filter_pad_to_index(text=x, y=y, **kwargs_simple_lstm)
# update main dict with newly calculated figures
kwargs_simple_lstm = nice_dict({**kwargs_simple_lstm, **statistics_dict})

# create look-up dictionaries (and inverse) for an index representation
char_int, char_int_inv, label_int, label_int_inv = \
    lookup_dicts_chars_labels(**kwargs_simple_lstm)

# transform x_suggest in a similar manner
# taking into consideration the given character set
x_suggest_char_filtered_pad, statistics_dict = \
    text_filter_pad_to_index(text=x_suggest, y=y_suggest, **kwargs_simple_lstm)

# check that there are no "new" statistics popping out
assert_no_stats_change(new_dict=statistics_dict, 
                       kwargs=kwargs_simple_lstm)

# merge original and suggested data
x_merge, y_merge, freq_merge = \
    x_char_filtered_pad + x_suggest_char_filtered_pad, \
    y + y_suggest, \
    freq + freq_suggest
# y_merge = y + y_suggest
# freq_merge = freq + freq_suggest

In [5]:
# split to training and validation sets
x_val, x_train, y_val, y_train, freq_val, freq_train, valid_index = \
    train_validation_split(x=x_merge, y=y_merge, freq=freq_merge, 
                           label_count_thresh=10, 
                           valid_ratio=0.25)

number of potential labels to draw from: 82
number of potential observation to draw from: 1587
365 observations sampled for validation
2919 total number of observations
2554 observations for training
The ratio of validation to total observations is about 0.125


In [6]:
for i in range(6):
    print(len([x_val, x_train, y_val, y_train, freq_val, freq_train][i]))

365
2554
365
2554
365
2554


In [7]:
valid_index[:10]
# [1, 9, 11, 20, 21, 22, 24, 26, 27, 35]
# [2, 3, 9, 10, 15, 16, 18, 20, 22, 23]

[4, 6, 7, 8, 18, 22, 36, 37, 43, 50]

In [None]:
# scale data (proportional to frequency)
x_scaled, y_scaled, kwargs_simple_lstm['n'] = \
    scale_permute_data(x=x_merge, 
                       y=y_merge, 
                       freq=freq_merge, 
                       scale_func=kwargs_simple_lstm.scale_func, 
                       to_permute=kwargs_simple_lstm.to_permute)

In [None]:
x_feed, y_feed = x_scaled, y_scaled

In [None]:
# returns np.arrays to feed into tf model
X, _, Y_dense = index_transorm_xy(x=x_feed, 
                                  y=y_feed, 
                                  char_int=char_int, 
                                  label_int=label_int, 
                                  **kwargs_simple_lstm)

# write a metadata file for embeddings visualizer and create path string
embed_vis_path = write_embeddings_metadata(log_dir=kwargs_simple_lstm.log_dir, 
                                           dictionary=char_int, 
                                           file_name='metadata.tsv')

In [None]:
class Lstm_model(object):

    def __init__(self, 
                 hparam_str, 
                 n, 
                 seq_len, 
                 n_class, 
                 n_char, 
                 char_embed_dim, 
                 one_hot, 
                 hidden_state_size, 
                 keep_prob, 
                 learn_rate, 
                 epochs, 
                 log_dir, 
                 embed_vis_path, 
                 summary_step, 
                 save_step, 
                 seed, 
                 *args, **kwargs):
        self.feed_dict = {}
        self.hparam_str = hparam_str
        self.n = n
        self.seq_len = seq_len 
        self.n_class = n_class 
        self.n_char = n_char
        self.char_embed_dim = char_embed_dim
        self.one_hot = one_hot
        self.hidden_state_size = hidden_state_size
        self.keep_prob = keep_prob
        self.learn_rate = learn_rate
        self.epochs = epochs
        self.log_dir = log_dir
        self.embed_vis_path = embed_vis_path
        self.summary_step = summary_step 
        self.save_step = save_step
        self.seed = seed
        # placeholders
        self.embedding_matrix = None
                
        # g = tf.Graph()
        # with g.as_default():
        #     tf.set_random_seed(1)
        
#         self.g = tf.Graph()
#         self.g.seed = self.seed
    #         with self.g.as_default():
#         tf.set_random_seed(self.seed)
        self.sess = tf.Session()
        

        # Setup placeholders, and reshape the data
        self.x_ = tf.placeholder(tf.int32, [self.n, self.seq_len], 
                            name='Examples')
        self.y_ = tf.placeholder(tf.int32, [self.n, self.n_class], 
                            name='Lables')

        self.embedding_matrix = self.embed_matrix()

        self.outputs = self.lstm_unit(input=self.x_)

        self.logits = self.logit(input=self.outputs, 
                            size_in=self.hidden_state_size, 
                            size_out=self.n_class)

        with tf.name_scope('cross_entropy'):
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.logits, labels=self.y_), name='cross_entropy')
            tf.summary.scalar('cross_entropy', self.cost)

        with tf.name_scope('train'):
            self.train_step = tf.train.AdamOptimizer(
                self.learn_rate).minimize(self.cost)

        with tf.name_scope('accuracy'):
            self.correct_prediction = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.y_, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
            tf.summary.scalar('accuracy', self.accuracy)

        # embedding vis
        self.embedding_vis = tf.Variable(tf.zeros(self.embedding_matrix.get_shape().as_list()), 
                                    trainable=False, 
                                    name='embedding_vis')
#         tf.nn.embedding_lookup(embeddings, input)
        self.assignment = self.embedding_vis.assign(self.embedding_matrix)

        # summaries and saver object
        self.summ = tf.summary.merge_all()
        self.saver = tf.train.Saver()

        # init vars and setup writer
        self.sess.run(tf.global_variables_initializer())
        self.writer = tf.summary.FileWriter(self.log_dir + self.hparam_str)
        self.writer.add_graph(self.sess.graph)

        # Add embedding tensorboard visualization. Need tensorflow version
        self.config = projector.ProjectorConfig()
        self.embed = self.config.embeddings.add()
        self.embed.tensor_name = self.embedding_vis.name
        self.embed.metadata_path = os.path.join(self.embed_vis_path)
        projector.visualize_embeddings(self.writer, self.config)

        # embedding vis

        
    def embed_matrix(self, stddev=0.1, name='embeddings'):
        # index_size would be the size of the character set
        with tf.name_scope(name):
            if not self.one_hot:
                embedding_matrix = tf.get_variable(
                    'embedding_matrix', 
                    initializer=tf.truncated_normal([self.n_char, self.char_embed_dim], 
                                                    stddev=stddev, 
                                                    seed=self.seed), 
                    trainable=True)
            else:
                # creating a one-hot for each character corresponds to the identity matrix
                embedding_matrix = tf.constant(value=np.identity(self.n_char), 
                                               name='embedding_matrix', 
                                               dtype=tf.float32)

            tf.summary.histogram('embedding_matrix', embedding_matrix)
            self.embedding_matrix = embedding_matrix
            return self.embedding_matrix
        
        
    def lstm_unit(self, 
                  input, 
                  name='LSTM'):
        with tf.name_scope(name):

            rnn_inputs = [tf.squeeze(i) for i in 
                          tf.split(tf.nn.embedding_lookup(self.embedding_matrix, input),
                                   self.seq_len, 
                                   1)]

            cell = rnn.BasicLSTMCell(num_units=self.hidden_state_size)
            keep_prob = tf.constant(self.keep_prob)
            cell = rnn.DropoutWrapper(cell, 
                                      output_keep_prob=keep_prob, 
                                      seed=self.seed)

            outputs, states = rnn.static_rnn(cell, rnn_inputs, dtype=tf.float32)
            outputs = outputs[-1]
    #         outputs = tf.constant(value=outputs, 
    #                               name='outputs')
            tf.summary.histogram('outputs', outputs)
            return outputs


    def logit(self, 
              input, 
              size_in, 
              size_out, 
              stddev=0.1, 
              name='logit'):

        with tf.name_scope(name):
            w = tf.Variable(tf.truncated_normal([size_in, size_out], 
                                                stddev=stddev, 
                                                seed=self.seed), 
                           name='W')
            b = tf.Variable(tf.constant(0.1, 
                                        shape=[size_out]), 
                            name='B')
            logits = tf.matmul(input, w) + b
            tf.summary.histogram('weights', w)
            tf.summary.histogram('biases', b)
            tf.summary.histogram('logits', logits)
            return logits
    
    
    def feed(self, feed_dict):
        self.feed_dict = {self.x_: feed_dict['x'], 
                          self.y_: feed_dict['y']}
        
        
    def train(self):
        print('Starting to train model {:s}'.format(self.hparam_str))
        
        self.sess.graph.seed = self.seed
        for i in range(self.epochs):
            if (i+1) % self.summary_step == 0:
                # minimizing cost (while also tracking accuracy, for summary)
#                 [train_accuracy, train_cost, s] = self.sess.run([self.accuracy, self.cost, self.summ], 
#                                                                 feed_dict=self.feed_dict)
                [train_accuracy, train_cost, s] = [self.run_accuracy(), 
                                                   self.run_cost(), self.run_summary()]
                self.writer.add_summary(s, i+1)
                print('Epoch number {}, '.format(i+1) +
                      'accuracy is {:.5f} and '.format(train_accuracy) + 
                      'cost is {:.5f}'.format(train_cost))
            if (i+1) % self.save_step == 0:
                print('Saving step {}'.format(i+1))
                self.sess.run(self.assignment, feed_dict=self.feed_dict)
                self.saver.save(self.sess, os.path.join(self.log_dir, 
                                                        self.hparam_str, 
                                                        'model.ckpt'), i+1)
            self.sess.run(self.train_step, feed_dict=self.feed_dict)

#         self.sess.close()
        print('Training model {:s} is done!'.format(self.hparam_str))
    
    
    def restore(self, cp_path, feed_dict = None):
        
        print('Loading variables from {:s}'.format(cp_path))

        ckpt = tf.train.get_checkpoint_state(cp_path)
        if ckpt and ckpt.model_checkpoint_path:
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            raise Exception("no checkpoint found")

        if feed_dict:
            self.feed(feed_dict=feed_dict)
        print('Loading successful')

        
    def run_accuracy(self):
        train_accuracy = self.sess.run(self.accuracy, 
                                       feed_dict=self.feed_dict)
        return train_accuracy
        
    def run_cost(self):
        train_cost = self.sess.run(self.cost, 
                                   feed_dict=self.feed_dict)
        return train_cost
    
    def run_summary(self):
        train_summary = self.sess.run(self.summ, 
                                      feed_dict=self.feed_dict)
        return train_summary

In [None]:
kwargs_feed_dict = {'x': X, 'y': Y_dense}

hparam_str = make_hparam_string(**kwargs_simple_lstm)

lstm = Lstm_model(hparam_str=hparam_str, 
                  embed_vis_path=embed_vis_path, 
                  **{**kwargs_simple_lstm, 
                     **{'epochs': 30}})

In [None]:
lstm.feed(feed_dict=kwargs_feed_dict)
lstm.train()

In [None]:
# lstm.restore(cp_path=os.path.join(lstm.log_dir, hparam_str))

In [None]:
# this works after training!
# need to check if it's working after calling self.restore()
# seems to work after calling self.restore(), yet numbers are not identical
# need to solve the seed() problem
# lstm.feed(feed_dict=lstm.feed_dict)
with lstm.sess.as_default() as sess:
# with lstm.sess as sess:
#     with lstm.Graph().as_default():
#         print(lstm.logits.eval(feed_dict=lstm.feed_dict))
    print(lstm.embedding_matrix.eval())

In [None]:
# lstm.train()

In [None]:
# trying out a LOT of hyper-parameters configurations
kwargs_feed_dict = {'x': X, 'y': Y_dense}
lstm_models = {}
for learn_rate in list(np.logspace(-1, -2, 2)):
    for keep_prob in [0.7, 1.0]:
        for one_hot, char_embed_dim in [(True, 4)] + list(zip([False] * 1 , [4])):
#             for char_embed_dim in list(np.linspace(2, 6, 3)):
            for hidden_state_size in [4, 32]:
                    current_kw_simple_lstm = {
                        **kwargs_simple_lstm, 
                        **{'learn_rate': learn_rate, 
                           'one_hot': one_hot, 
                           'keep_prob': keep_prob, 
                           'char_embed_dim': char_embed_dim, 
                           'hidden_state_size': hidden_state_size}}
                    hparam_str = make_hparam_string(learn_rate, 
                                                    one_hot, 
                                                    keep_prob, 
                                                    char_embed_dim, 
                                                    hidden_state_size)
                    var = 'lstm_{}'.format(hparam_str)
#                         print(var)
                    lstm_models[var] = Lstm_model(feed_dict=kwargs_feed_dict, 
                                                  hparam_str=hparam_str, 
                                                  embed_vis_path=embed_vis_path, 
                                                  **current_kw_simple_lstm)
                    lstm_models[var].train()


    
#     lstm_models[var] = Lstm_model(feed_dict=kwargs_feed_dict, 
#                                   hparam_str=hparam_str, 
#                                   embed_vis_path=embed_vis_path, 
#                                   **current_kw_simple_lstm)
#     lstm_models[var].train()


In [None]:
### MNIST EMBEDDINGS ###
mnist = tf.contrib.learn.datasets.mnist.read_data_sets(train_dir=kwargs_tf_simple.log_dir + 'data', one_hot=True)
### Get a sprite and labels file for the embedding projector ###
urllib.request.urlretrieve(kwargs_tf_simple.GIST_URL + 'labels_1024.tsv', kwargs_tf_simple.log_dir + 'labels_1024.tsv')
urllib.request.urlretrieve(kwargs_tf_simple.GIST_URL + 'sprite_1024.png', kwargs_tf_simple.log_dir + 'sprite_1024.png')

pcp1()

def conv_layer(input, size_in, size_out, name="conv"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
        act = tf.nn.relu(conv + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.nn.relu(tf.matmul(input, w) + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act


def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):
    tf.reset_default_graph()
    sess = tf.Session()
    
    tf.set_random_seed(seed())

    # Setup placeholders, and reshape the data
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    tf.summary.image('input', x_image, 3)
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")

    if use_two_conv:
        conv1 = conv_layer(x_image, 1, 32, "conv1")
        conv_out = conv_layer(conv1, 32, 64, "conv2")
    else:
        conv1 = conv_layer(x_image, 1, 64, "conv")
        conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

    flattened = tf.reshape(conv_out, [-1, 7 * 7 * 64])


    if use_two_fc:
        fc1 = fc_layer(flattened, 7 * 7 * 64, 1024, "fc1")
        embedding_input = fc1
        embedding_size = 1024
        logits = fc_layer(fc1, 1024, 10, "fc2")
    else:
        embedding_input = flattened
        embedding_size = 7*7*64
        logits = fc_layer(flattened, 7*7*64, 10, "fc")

    with tf.name_scope("xent"):
        xent = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=y), name="xent")
        tf.summary.scalar("xent", xent)

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    summ = tf.summary.merge_all()


    embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
    assignment = embedding.assign(embedding_input)
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(kwargs_tf_simple.log_dir + hparam)
    writer.add_graph(sess.graph)

    config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
    embedding_config = config.embeddings.add()
    embedding_config.tensor_name = embedding.name
    embedding_config.sprite.image_path = kwargs_tf_simple.log_dir + 'sprite_1024.png'
    embedding_config.metadata_path = kwargs_tf_simple.log_dir + 'labels_1024.tsv'
    # Specify the width and height of a single thumbnail.
    embedding_config.sprite.single_image_dim.extend([28, 28])
    tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)

    pcp3()
    
    for i in range(1000 + 1):
        batch = mnist.train.next_batch(100)
        if i % 5 == 0:
            
#             pcp4()
            
            [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})
            writer.add_summary(s, i)
            print('Iteration number {}, Accuracy is currently {}'.format(i, train_accuracy))
        if i % 500 == 0:
            sess.run(assignment, feed_dict={x: mnist.test.images[:1024], y: mnist.test.labels[:1024]})
            saver.save(sess, os.path.join(kwargs_tf_simple.log_dir, "model.ckpt"), i)
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
    conv_param = "conv=2" if use_two_conv else "conv=1"
    fc_param = "fc=2" if use_two_fc else "fc=1"
    return "lr_%.0E,%s,%s" % (learning_rate, conv_param, fc_param)

def main():
    
#     tf.set_random_seed(seed())
    
    # You can try adding some more learning rates
    for learning_rate in [1E-4]:

        # Include "False" as a value to try different model architectures
        for use_two_fc in [False]:
            for use_two_conv in [True]:
                # Construct a hyperparameter string for each one (example: "lr_1E-3,fc=2,conv=2)
                
                pcp2()
                
                hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
                print('Starting run for %s' % hparam)

                # Actually run with the new settings
                mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)


if __name__ == '__main__':
    main()