In [12]:
import tensorflow as tf
import numpy as np
import pandas as pd

import os

from tensorflow.contrib import rnn

# from sklearn.feature_extraction import DictVectorizer
# from sklearn import svm
# from sklearn.metrics import accuracy_score  # gt, pred

from utils.utils import user_opt_gen, nice_dict, seed, init_data, pcp1, pcp2, pcp3, pcp4
from utils.utils_baseline_svm import filter_dict_by_val_atleast, char_freq_map

# from collections import Counter
# from math import isnan

# import matplotlib.pyplot as plt

# import re

In [13]:
def remove_dir_content(path):
    if tf.gfile.Exists(path):
        tf.gfile.DeleteRecursively(path)
        print('Log directory was deleted.')
    else:
        print('Log directory was not found.')
#         print(path)


# pad a list to max_length with the pad_symbol
def pad_list(*, input_list, max_length, pad_symbol):
    output_list = input_list + [pad_symbol] * (max_length - len(input_list))
    return output_list


def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()
    

def index_to_dense(index, length):
    output_list = [0.0] * length
    output_list[index] = 1.0
    return output_list

In [14]:
x, y, n, _ = init_data()

np.random.seed(seed())

In [15]:
kwargs_simple_lstm = nice_dict({
    # log
    'log_dir': 'logdir/', 
    'del_log': True, 
    # preprocessing and data
    'char_filter': 100, 
    'n': n,
    'batch_size': n, 
    # learning hyper-params
    'learn_rate': 1,  # 1E-4
    'char_embed_dim': 4, 
    'one_hot': False,
    'hidden_state_size': 4, 
    'activate_bool': True, 
    'keep_prob': 1.0, 
    'epochs': 10,
    'save_step': 5, 
    'print_step': 100
})

if kwargs_simple_lstm.del_log: remove_dir_content(kwargs_simple_lstm.log_dir)

Log directory was not found.


In [6]:
# filter by character, appear at least 'char_filter' times in the input
filter_keys_chars = list(
    filter_dict_by_val_atleast(
        input_dict=char_freq_map(input_data=x), 
        value=kwargs_simple_lstm.char_filter)
    .keys())

# create a list of character lists
x_char = [list(line) for line in x]
x_char_filtered = []
unknown = '<unk-char>'
# replace chars not in 'filter_keys_chars' with 'unknown'
for line in x_char:
    x_char_filtered.append([char if (char in filter_keys_chars) else unknown for char in line])
    
# pad lines, so that all lines are same length
max_line_len = int(np.max([len(line) for line in x]))
pad = '<pad-char>'
x_char_filtered_pad = []
for i, line in enumerate(x_char_filtered):
    x_char_filtered_pad.append(pad_list(input_list=line, 
                                    max_length=max_line_len, 
                                    pad_symbol=pad))

In [7]:
# statistics based on filtered features
label_set = y.unique()
n_label = len(label_set)

# number of unique characters iin input ('x_char_filtered')
char_set = set([char for line in x_char_filtered_pad for char in line])
n_char = len(char_set)

In [8]:
kwargs_simple_lstm = nice_dict({**kwargs_simple_lstm, **{
    'seq_len': max_line_len,
    'n_class': n_label,
    'n_char': n_char
}
                               })

In [None]:
# create lookup dict for characters (and inv)
char_int = {}
char_int_inv = {}
for i, char in enumerate(char_set):
    char_int[char] = i
    char_int_inv[i] = char

# transform x from a list of symbols into a list of ints
X = []
for line in x_char_filtered_pad:
    X.append([char_int[char] for char in line])

# same for labels
label_int = {}
label_int_inv = {}
for i, label in enumerate(label_set):
    label_int[label] = i
    label_int_inv[i] = label
# create Y as a list of list(int)
Y = [[label_int[label]] for label in y]

# transform into format acceptable by tf
X = np.array(X)
Y_dense = np.array(
    [index_to_dense(label[0], 
                    kwargs_simple_lstm.n_class) for label in Y])

In [None]:
def embed_matrix(index_size, 
                 embedding_dim, 
                 one_hot, 
                 stddev=0.1, 
                 seed=seed(), 
                 name="embedding_matrix"):
    # index_size would be the size of the character set
        
    with tf.name_scope(name):
        if not one_hot:
            embedding_matrix = tf.get_variable(
                'embedding_matrix', 
                initializer=tf.truncated_normal([index_size, embedding_dim], 
                                                stddev=stddev, 
                                                seed=seed), 
                trainable=True)
        else:
            # creating a one-hot for each character corresponds to the identity matrix
            embedding_matrix = tf.constant(value=np.identity(index_size), 
                                           name='embedding_matrix', 
                                           dtype=tf.float32)
            
        tf.summary.histogram('embedding_matrix', embedding_matrix)
        return embedding_matrix


def lstm_unit(input, 
              embeddings, 
              seq_length, 
              hidden_state_size, 
              seed=seed(), 
              keep_prob=kwargs_simple_lstm.keep_prob, 
              name='LSTM'):
    with tf.name_scope(name):
        
        rnn_inputs = [tf.squeeze(i) for i in 
                      tf.split(tf.nn.embedding_lookup(embeddings, input),
                               seq_length, 
                               1)]

        cell = rnn.BasicLSTMCell(num_units=hidden_state_size)
        keep_prob = tf.constant(keep_prob)
        cell = rnn.DropoutWrapper(cell, 
                                  output_keep_prob=keep_prob, 
                                  seed=seed)

        outputs, states = rnn.static_rnn(cell, rnn_inputs, dtype=tf.float32)
        outputs = outputs[-1]
#         outputs = tf.constant(value=outputs, 
#                               name='outputs')
        tf.summary.histogram('outputs', outputs)
        return outputs
        

def logit(input, 
          size_in, 
          size_out, 
          stddev=0.1, 
          seed=seed(), 
          name='logit'):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], 
                                            stddev=stddev, 
                                            seed=seed), 
                       name='W')
        b = tf.Variable(tf.constant(0.1, 
                                    shape=[size_out]), 
                        name='B')
        logits = tf.matmul(input, w) + b
        tf.summary.histogram('weights', w)
        tf.summary.histogram('biases', b)
        tf.summary.histogram('logits', logits)
        return logits
                        

def lstm_simple_model(feed_dict, 
                      hparam_str, 
                      n, 
                      seq_len, 
                      n_class, 
                      n_char, 
                      char_embed_dim, 
                      one_hot, 
                      hidden_state_size, 
                      log_dir, *args, **kwargs):
    
    tf.reset_default_graph()
    sess = tf.Session()
    
#     tf.set_random_seed(seed())

    # Setup placeholders, and reshape the data
    x_ = tf.placeholder(tf.int32, [n, 
                                   seq_len])
    y_ = tf.placeholder(tf.int32, [n, 
                                   n_class])

    embedding_matrix = embed_matrix(index_size=n_char, 
                                    embedding_dim=char_embed_dim, 
                                    one_hot=one_hot)
    outputs = lstm_unit(input=x_, 
                        embeddings=embedding_matrix, 
                        hidden_state_size=hidden_state_size, 
                        seq_length=seq_len)
    
    logits = logit(input=outputs, 
               size_in=hidden_state_size, 
               size_out=kwargs_simple_lstm.n_class)
    
    with tf.name_scope('cross_entropy'):
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=y_), name='cross_entropy')
    tf.summary.scalar('cross_entropy', cost)
    
    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(
            kwargs_simple_lstm.learn_rate).minimize(cost)
# train_op = tf.train.AdamOptimizer(1e-4).minimize(cost)
          
    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)

    summ = tf.summary.merge_all()
    
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(log_dir + hparam_str)
    writer.add_graph(sess.graph)
    
    
    
    feed_dict = {x_: kwargs_feed_dict['x'], 
                 y_: kwargs_feed_dict['y']}
    
    for i in range(kwargs_simple_lstm.epochs + 1):
#         batch = mnist.train.next_batch(100)
        if i % print_step == 0:
            
#             pcp4()
            # minimizing cost, but tracking accuracy
            [train_accuracy, train_cost, s] = sess.run([accuracy, cost, summ], feed_dict=feed_dict)
            writer.add_summary(s, i)
            print('Iteration number {}, '.format(i) +
                  'accuracy is {:.5f} and '.format(train_accuracy) + 
                  'cost is {:.5f}'.format(train_cost))
        if i % save_step == 0:
#             sess.run(assignment, feed_dict=feed_dict)
            saver.save(sess, os.path.join(log_dir, "model.ckpt"), i)
        sess.run(train_step, feed_dict=feed_dict)
        
    print('Training is done!')

In [None]:
kwargs_feed_dict = {'x': X, 'y': Y_dense}
lstm_simple_model(feed_dict=kwargs_feed_dict, 
                  hparam_str='testrun', 
                  **kwargs_simple_lstm)

In [None]:
reset_graph()

g = tf.Graph()

tf.set_random_seed(seed())

x_ = tf.placeholder(tf.int32, [kwargs_simple_lstm.n, 
                               kwargs_simple_lstm.seq_len])
y_ = tf.placeholder(tf.int32, [kwargs_simple_lstm.n, 
                               kwargs_simple_lstm.n_class])

embedding_matrix = embed_matrix(index_size=kwargs_simple_lstm.n_char, 
                                embedding_dim=kwargs_simple_lstm.char_embed_dim, 
                                one_hot=kwargs_simple_lstm.one_hot)

outputs = lstm_unit(input=x_, 
                    embeddings=embedding_matrix, 
                    seq_length=kwargs_simple_lstm.seq_len)
# rnn_inputs = [tf.squeeze(i) for i in tf.split(tf.nn.embedding_lookup(embedding_matrix, x_),
#                                               kwargs_simple_lstm.seq_len, 
#                                               1)]
# # gives: list index out of range
# # rnn_inputs = [tf.nn.embedding_lookup(embedding_matrix, x_)]

# cell = rnn.BasicLSTMCell(num_units=kwargs_simple_lstm.hidden_state_size)
# keep_prob = tf.constant(kwargs_simple_lstm.keep_prob)
# cell = rnn.DropoutWrapper(cell, 
#                           output_keep_prob=keep_prob, 
#                           seed=seed())

# outputs, states = rnn.static_rnn(cell, rnn_inputs, dtype=tf.float32)
# print('outputs are of length {}.'.format(len(outputs)))
# outputs = outputs[-1]

# W = tf.Variable(tf.truncated_normal([kwargs_simple_lstm.hidden_state_size, 
#                                      kwargs_simple_lstm.n_class], 
#                                     stddev=0.1, 
#                                     seed=seed()))
# b = tf.Variable(tf.truncated_normal([kwargs_simple_lstm.n_class], 
#                                     stddev=0.1, 
#                                     seed=seed()))

# logits = tf.matmul(outputs, W) + b

# logits = logit(input=outputs, 
#                size_in=kwargs_simple_lstm.hidden_state_size, 
#                size_out=kwargs_simple_lstm.n_class)

# cost = tf.reduce_mean(
#     tf.nn.softmax_cross_entropy_with_logits(
#         logits=logits, labels=y_), name="xent")

# # train_op = tf.train.AdamOptimizer(1e-4).minimize(cost)
# train_op = tf.train.AdamOptimizer(kwargs_simple_lstm.learn_rate).minimize(cost)


# init = tf.initialize_all_variables().run()

In [None]:
# feed dict into the network
kwargs_feed_dict = {x_: X, y_: Y_dense}

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(kwargs_simple_lstm.epochs):
        sess.run(train_op, feed_dict=kwargs_feed_dict)
        if i % kwargs_simple_lstm.print_step == 0:
            c = sess.run(cost, feed_dict=kwargs_feed_dict)
            print('training cost:', c)

    response = sess.run(tf.nn.softmax(logits), feed_dict=kwargs_feed_dict)
    print(response)

In [None]:
with tf.Session() as sess:
    print(tf.trainable_variables())
    print(embedding_matrix)

In [None]:
# one_hot True
# learning rate 1
# epochs 1000
# training cost: 5.69061
# training cost: 5.69047

In [None]:
# tf.trainable_variables()

In [None]:
# hold_eighty = kwargs_simple_lstm.seq_len
# tf.split(tf.nn.embedding_lookup(embedding_matrix, x_), hold_eighty, 1)
# tf.split(tf.nn.embedding_lookup(embedding_matrix, x_), 80, 1)
# tf.split(tf.nn.embedding_lookup(embedding_matrix, x_), tf.constant(kwargs_simple_lstm.seq_len), 1)

In [None]:
# https://github.com/wpm/tfrnnlm
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/recurrent_network.ipynb
# http://r2rt.com/recurrent-neural-networks-in-tensorflow-iii-variable-length-sequences.html


In [None]:
# Define weights
with tf.variable_scope('softmax'):
    weights = {
        'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
    }
    biases = {
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

In [None]:
reset_graph()

EPOCHS = 10000
PRINT_STEP = 1000

data = np.array([[1, 2, 3, 4, 5], 
                 [ 2, 3, 4, 5, 6], 
                 [3, 4, 5, 6, 7], 
                [1, 2, 2, 4, 5]])

# making sure that embedding index starts from 0 (to match embeddings index range)
new_data = []
for line in data:
    new_data.append([el - 1 for el in line])
data = np.array(new_data)
data

target = np.array([[0], [1], [2], [0]])

embed_dim = 4
seq_len = data.shape[1]
n = data.shape[0]
n_labels = 3

target_dense = np.array(
    [index_to_dense(label[0], n_labels) for label in target])

x_ = tf.placeholder(tf.int32, [n, seq_len])
y_ = tf.placeholder(tf.int32, [n, n_labels])

embedding_matrix = tf.Variable(
    tf.truncated_normal([7, embed_dim], 
                        stddev=0.1, 
                        seed=seed()))

rnn_inputs = [tf.squeeze(i) for i in tf.split(tf.nn.embedding_lookup(embedding_matrix, x_),
                                              seq_len, 
                                              1)]
cell = rnn.BasicLSTMCell(num_units=embed_dim)
cell = rnn.DropoutWrapper(cell, 
                          output_keep_prob=kwargs_simple_lstm.keep_prob)

outputs, states = rnn.static_rnn(cell, rnn_inputs, dtype=tf.float32)
# print(len(outputs))
outputs = outputs[-1]
# print(outputs.shape)
W = tf.Variable(tf.random_normal([embed_dim, n_labels]))     
b = tf.Variable(tf.random_normal([n_labels]))

y = tf.matmul(outputs, W) + b
# print(y.shape)
# print(y)
# cost = tf.reduce_mean(tf.square(y - y_))
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
        logits=y, labels=y_), name="xent")

# train_op = tf.train.AdamOptimizer(1e-4).minimize(cost)
train_op = tf.train.AdamOptimizer(1e-4).minimize(cost)


# init = tf.initialize_all_variables().run()

with tf.Session() as sess:
#     sess.run(init)
    tf.global_variables_initializer().run()
    for i in range(EPOCHS):
        sess.run(train_op, feed_dict={x_:data, y_:target_dense})
        if i % PRINT_STEP == 0:
            c = sess.run(cost, feed_dict={x_:data, y_:target_dense})
            print('training cost:', c)

    response = sess.run(tf.nn.softmax(y), feed_dict={x_:data})
    print(response)

In [None]:
# training cost: 1.57332
# training cost: 0.873486
# training cost: 0.530809
# training cost: 0.346015
# training cost: 0.205075
# training cost: 0.110332
# training cost: 0.0573604
# training cost: 0.0307796
# training cost: 0.0176844
# training cost: 0.0107775
# [[  9.95861769e-01   4.09586774e-03   4.23801393e-05]
#  [  5.38118649e-03   9.88556504e-01   6.06230181e-03]
#  [  1.61634802e-04   8.70981626e-03   9.91128564e-01]
#  [  9.97194529e-01   2.77369726e-03   3.17150661e-05]]

In [None]:
# a simple LSTM architecture

# sess = tf.Session()

# tf.set_random_seed(seed())

def build_graph(*, 
                vocab_size,
                state_size,
                batch_size,
                num_classes,
                keep_prob, 
                embedding_matrix):

    reset_graph()

    # Placeholders
    x = tf.placeholder(tf.int32, [batch_size, None]) # [batch_size, num_steps]
    seqlen = tf.placeholder(tf.int32, [batch_size])
    y = tf.placeholder(tf.int32, [batch_size])
    keep_prob = tf.constant(keep_prob)

    # Embedding layer
    embeddings = tf.get_variable('embedding_matrix', [vocab_size, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

    # RNN
#     cell = tf.nn.rnn_cell.GRUCell(state_size)
    cell = tf.contrib.rnn.BasicLSTMCell(state_size, forget_bias=1.0)
    
#     init_state = tf.get_variable('init_state', [1, state_size],
#                                  initializer=tf.constant_initializer(0.0))
#     init_state = tf.tile(init_state, [batch_size, 1])
#     rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, sequence_length=seqlen,
#                                                  initial_state=init_state)
    outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32)

    # Add dropout, as the model otherwise quickly overfits
    cell_outputs = tf.nn.dropout(outputs, keep_prob)

    """
    Obtain the last relevant output. The best approach in the future will be to use:

        last_rnn_output = tf.gather_nd(rnn_outputs, tf.pack([tf.range(batch_size), seqlen-1], axis=1))

    which is the Tensorflow equivalent of numpy's rnn_outputs[range(30), seqlen-1, :], but the
    gradient for this op has not been implemented as of this writing.

    The below solution works, but throws a UserWarning re: the gradient.
    """
#     idx = tf.range(batch_size)*tf.shape(rnn_outputs)[1] + (seqlen - 1)
#     last_rnn_output = tf.gather(tf.reshape(rnn_outputs, [-1, state_size]), idx)

    # Softmax layer
    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
    logits = tf.matmul(cell_outputs[-1], W) + b
    preds = tf.nn.softmax(logits)
    
    correct = tf.equal(tf.cast(tf.argmax(preds,1),tf.int32), y)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

    return {
        'x': x,
        'seqlen': seqlen,
        'y': y,
        'dropout': keep_prob,
        'loss': loss,
        'ts': train_step,
        'preds': preds,
        'accuracy': accuracy
    }

In [None]:
# activate_bool = True
# if activate_bool:
#     # vectorizer transforms dict into sparse matrix
#     v = DictVectorizer(sparse=True)

#     # create a sparse X matrix with character and n-grams features
#     X = v.fit_transform(x)

In [None]:
def lstm_unit(*, 
              input, 
              size_in, 
              size_out, 
              name="LSTM", 
              activate = tf.nn.relu, 
              activate_bool = True,
              n_input, 
#               n_steps, 
              n_hidden):
    # activate can be (commonly):
    # tf.sigmoid
    # tf.nn.relu
    # tf.tanh
    # tf.nn.relu6
    assert activate in [tf.nn.relu, 
                        tf.nn.relu6, 
                        tf.sigmoid, 
                        tf.tanh], 'Please choose activation function from the given set'
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        
#         # Permuting batch_size and n_steps
#         x = tf.transpose(input, [1, 0, 2])
#         # Reshape to (n_steps*batch_size, n_input)
#         x = tf.reshape(x, [-1, n_input])
#         # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
#         x = tf.split(0, n_steps, x)
        
#         lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
        lstm_fw_cell = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
        
        output, state = lstm_fw_cell(input, state, dtype=tf.float32)        
        
        lin_activations = tf.matmul(output[-1], w) + b
        
        # apply activation if 'activate_bool'
        if activate_bool:
            act = activate(lin_activations)
        else:
            act = lin_activations
        
#         act = activate(tf.matmul(input, w) + b)
        
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act

    
def make_hparam_string(learning_rate, fc, conv):
    conv_param = 'conv={:d}'.format(conv)
    fc_param = 'conv={:d}'.format(fc)
#     return "lr_%.0E,%s,%s" % (learning_rate, conv_param, fc_param)
    return 'lr_{:.0E},{},{}'.format(learning_rate, conv_param, fc_param)


def simple_lstm_model(*, 
                      input_x, 
                      input_y, 
                      learn_rate, 
                      n, 
                      class_dim, 
                      char_embed_dim, 
                      n_char,
                      keep_prob,
                      iterations, 
                      hparam_str):
    
    reset_graph()
    
    sess = tf.Session()
    
    tf.set_random_seed(seed())
    
#     x = tf.placeholder(tf.float32, shape=[None, char_embed_dim], name="x")
#     x_image = tf.reshape(x, [-1, 28, 28, 1])
#     tf.summary.image('input', x_image, 3)
#     y = tf.placeholder(tf.float32, shape=[None, class_dim], name="labels")
    
    # Placeholders
    x = tf.placeholder(tf.int32, [batch_size, None], name='x') # [batch_size, num_steps]
    seq_len = tf.placeholder(tf.int32, [batch_size])
    y = tf.placeholder(tf.int32, [batch_size], name='labels')
    keep_prob = tf.constant(keep_prob)
    
    # Embedding layer
    embeddings = tf.get_variable('embedding_matrix', [n_char, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)    
    
    
    logits = lstm_unit(x)

    with tf.name_scope("xent"):
        xent = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=y), name="xent")
    tf.summary.scalar("xent", xent)

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    summ = tf.summary.merge_all()
    
    saver = tf.train.Saver()
    
    for i in range(iterations + 1):
        if i % 5 == 0:  
            [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: input_x, y: input_y})
            writer.add_summary(s, i)
            print('Iteration number {}, Accuracy is currently {}'.format(i, train_accuracy))
        if i % 500 == 0:
            saver.save(sess, os.path.join(kwargs_tf_simple.log_dir, "model.ckpt"), i)
        
        sess.run(train_step, feed_dict={x: input_x, y: input_y})
        
    
# https://www.tensorflow.org/tutorials/recurrent
# https://www.tensorflow.org/programmers_guide/reading_data#preloaded_data
# https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup
# https://www.tensorflow.org/api_guides/python/nn#Embeddings
# https://github.com/dhwajraj/deep-siamese-text-similarity/blob/master/siamese_network.py

# source code, from lines 125, 141
# https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py

In [None]:
simple_lstm_model(input_x, 
                  input_y, 
                  learn_rate, 
                  n, 
                  class_dim, 
                  char_embed_dim, 
                  iterations, 
                  hparam_str)

In [None]:
# kwargs_tf_simple.log_dir
# os.path.join(os.path.curdir + '/logdir/')

In [None]:
### MNIST EMBEDDINGS ###
mnist = tf.contrib.learn.datasets.mnist.read_data_sets(train_dir=kwargs_tf_simple.log_dir + 'data', one_hot=True)
### Get a sprite and labels file for the embedding projector ###
urllib.request.urlretrieve(kwargs_tf_simple.GIST_URL + 'labels_1024.tsv', kwargs_tf_simple.log_dir + 'labels_1024.tsv')
urllib.request.urlretrieve(kwargs_tf_simple.GIST_URL + 'sprite_1024.png', kwargs_tf_simple.log_dir + 'sprite_1024.png')

pcp1()

def conv_layer(input, size_in, size_out, name="conv"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
        act = tf.nn.relu(conv + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
        act = tf.nn.relu(tf.matmul(input, w) + b)
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act


def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):
    tf.reset_default_graph()
    sess = tf.Session()
    
    tf.set_random_seed(seed())

    # Setup placeholders, and reshape the data
    x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    tf.summary.image('input', x_image, 3)
    y = tf.placeholder(tf.float32, shape=[None, 10], name="labels")

    if use_two_conv:
        conv1 = conv_layer(x_image, 1, 32, "conv1")
        conv_out = conv_layer(conv1, 32, 64, "conv2")
    else:
        conv1 = conv_layer(x_image, 1, 64, "conv")
        conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

    flattened = tf.reshape(conv_out, [-1, 7 * 7 * 64])


    if use_two_fc:
        fc1 = fc_layer(flattened, 7 * 7 * 64, 1024, "fc1")
        embedding_input = fc1
        embedding_size = 1024
        logits = fc_layer(fc1, 1024, 10, "fc2")
    else:
        embedding_input = flattened
        embedding_size = 7*7*64
        logits = fc_layer(flattened, 7*7*64, 10, "fc")

    with tf.name_scope("xent"):
        xent = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=y), name="xent")
    tf.summary.scalar("xent", xent)

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    summ = tf.summary.merge_all()


    embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
    assignment = embedding.assign(embedding_input)
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(kwargs_tf_simple.log_dir + hparam)
    writer.add_graph(sess.graph)

    config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
    embedding_config = config.embeddings.add()
    embedding_config.tensor_name = embedding.name
    embedding_config.sprite.image_path = kwargs_tf_simple.log_dir + 'sprite_1024.png'
    embedding_config.metadata_path = kwargs_tf_simple.log_dir + 'labels_1024.tsv'
    # Specify the width and height of a single thumbnail.
    embedding_config.sprite.single_image_dim.extend([28, 28])
    tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)

    pcp3()
    
    for i in range(1000 + 1):
        batch = mnist.train.next_batch(100)
        if i % 5 == 0:
            
#             pcp4()
            
            [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})
            writer.add_summary(s, i)
            print('Iteration number {}, Accuracy is currently {}'.format(i, train_accuracy))
        if i % 500 == 0:
            sess.run(assignment, feed_dict={x: mnist.test.images[:1024], y: mnist.test.labels[:1024]})
            saver.save(sess, os.path.join(kwargs_tf_simple.log_dir, "model.ckpt"), i)
        sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})

def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
    conv_param = "conv=2" if use_two_conv else "conv=1"
    fc_param = "fc=2" if use_two_fc else "fc=1"
    return "lr_%.0E,%s,%s" % (learning_rate, conv_param, fc_param)

def main():
    
#     tf.set_random_seed(seed())
    
    # You can try adding some more learning rates
    for learning_rate in [1E-4]:

        # Include "False" as a value to try different model architectures
        for use_two_fc in [False]:
            for use_two_conv in [True]:
                # Construct a hyperparameter string for each one (example: "lr_1E-3,fc=2,conv=2)
                
                pcp2()
                
                hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
                print('Starting run for %s' % hparam)

                # Actually run with the new settings
                mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)


if __name__ == '__main__':
    main()

In [None]:
print('Done!')

In [None]:
# Iteration number 925, Accuracy is currently 0.5
# Iteration number 930, Accuracy is currently 0.6000000238418579
# Iteration number 935, Accuracy is currently 0.5699999928474426
# Iteration number 940, Accuracy is currently 0.550000011920929
# Iteration number 945, Accuracy is currently 0.5400000214576721
# Iteration number 950, Accuracy is currently 0.5799999833106995
# Iteration number 955, Accuracy is currently 0.6200000047683716
# Iteration number 960, Accuracy is currently 0.5199999809265137
# Iteration number 965, Accuracy is currently 0.5400000214576721
# Iteration number 970, Accuracy is currently 0.6700000166893005
# Iteration number 975, Accuracy is currently 0.5899999737739563
# Iteration number 980, Accuracy is currently 0.46000000834465027
# Iteration number 985, Accuracy is currently 0.5699999928474426
# Iteration number 990, Accuracy is currently 0.5899999737739563
# Iteration number 995, Accuracy is currently 0.550000011920929
# Iteration number 1000, Accuracy is currently 0.5799999833106995

In [None]:
reset_graph()
target_dense

In [None]:
target_dense.shape

In [None]:
reset_graph()

EPOCHS = 10
PRINT_STEP = 1

data = np.array([[1, 2, 3, 4, 5], [ 2, 3, 4, 5, 6], [3, 4, 5, 6, 7]])
target = np.array([[6], [7], [8]])

x_ = tf.placeholder(tf.float32, [None, data.shape[1]])
y_ = tf.placeholder(tf.float32, [None, 1])



cell = rnn.BasicLSTMCell(num_units=data.shape[1])

outputs, states = rnn.static_rnn(cell, [x_], dtype=tf.float32)
outputs = outputs[-1]

W = tf.Variable(tf.random_normal([data.shape[1], 1]))     
b = tf.Variable(tf.random_normal([1]))

y = tf.matmul(outputs, W) + b

cost = tf.reduce_mean(tf.square(y - y_))
train_op = tf.train.RMSPropOptimizer(0.005, 0.2).minimize(cost)

# init = tf.initialize_all_variables().run()

with tf.Session() as sess:
#     sess.run(init)
    tf.global_variables_initializer().run()
    for i in range(EPOCHS):
        sess.run(train_op, feed_dict={x_:data, y_:target})
        if i % PRINT_STEP == 0:
            c = sess.run(cost, feed_dict={x_:data, y_:target})
            print('training cost:', c)

    response = sess.run(y, feed_dict={x_:data})
    print(response)