In [14]:
# predict the next char
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import re
%matplotlib inline

In [2]:
def make_mini_batch(train_data, size_of_mini_batch, length_of_sequences):
    inputs  = np.empty(0)
    outputs = np.empty(0)
    for _ in range(size_of_mini_batch):
        index   = random.randint(0, len(train_data) - length_of_sequences - 1)
        inputs  = np.append(inputs, train_data[index:index + length_of_sequences])
        outputs = np.append(outputs, train_data[index + length_of_sequences])
    inputs  = inputs.reshape(-1, length_of_sequences, 128)
    outputs = outputs.reshape(-1, 128)
    return (inputs, outputs)

In [3]:
def make_prediction_initial(train_data, index, length_of_sequences):
    return train_data[index:index + length_of_sequences], train_data[index + length_of_sequences + 1]
#inputs, _  = make_prediction_initial(train_data, 0, length_of_initial_sequences)
#inputs.shape

In [4]:
train_data_path             = "./blog.txt"
num_of_char                 = 128
num_of_input_nodes          = 128
num_of_hidden_nodes         = 512
num_of_output_nodes         = 128
length_of_sequences         = 30
num_of_training_epochs      = 1000
length_of_initial_sequences = 30
num_of_prediction_epochs    = 100
size_of_mini_batch          = 4
learning_rate               = 0.001
forget_bias                 = 1.0

In [5]:
#train_data = np.load(train_data_path)
f = open(train_data_path)
text = f.read()
f.close()
train_data = np.eye(num_of_char)[[ord(x) for x in text if ord(x) < 128]]

In [6]:
def inference(input_ph, size_of_mini_batch_ph):
    with tf.name_scope("inference") as scope:
        weight1_var = tf.Variable(tf.truncated_normal([num_of_input_nodes, num_of_hidden_nodes], stddev=0.1), name="weight1")
        weight2_var = tf.Variable(tf.truncated_normal([num_of_hidden_nodes, num_of_output_nodes], stddev=0.1), name="weight2")
        bias1_var   = tf.Variable(tf.truncated_normal([num_of_hidden_nodes], stddev=0.1), name="bias1")
        bias2_var   = tf.Variable(tf.truncated_normal([num_of_output_nodes], stddev=0.1), name="bias2")

        weight1_hist = tf.histogram_summary("layer1/weights", weight1_var)
        weight2_hist = tf.histogram_summary("layer2/weights", weight2_var)
        bias1_hist = tf.histogram_summary("layer1/biases", bias1_var)
        bias2_hist = tf.histogram_summary("layer2/biases", bias2_var)
        
        # pre rnn
        in1 = tf.transpose(input_ph, [1, 0, 2])         # (batch, sequence, data) -> (sequence, batch, data)
        in2 = tf.reshape(in1, [-1, num_of_input_nodes]) # (sequence, batch, data) -> (sequence * batch, data)
        in3 = tf.matmul(in2, weight1_var) + bias1_var
        in4 = tf.split(0, length_of_sequences, in3)     # sequence * (batch, data)

        cell = tf.nn.rnn_cell.BasicLSTMCell(num_of_hidden_nodes, forget_bias=forget_bias, state_is_tuple=True)
        #cell = tf.nn.rnn_cell.MultiRNNCell([_cell] * 3, state_is_tuple=True)
        initial_state = cell.zero_state(size_of_mini_batch_ph, tf.float32)
        rnn_output, states_op = tf.nn.rnn(cell, in4, initial_state=initial_state, dtype=tf.float32)
        
        # post rnn
        #output_op = tf.matmul(rnn_output[-1], weight2_var) + bias2_var
        out1 = tf.matmul(rnn_output[-1], weight2_var) + bias2_var
        output_op = out1
        #output_argmax = tf.argmax(out1, 0)

    return output_op, rnn_output

In [7]:
def loss(output_op):
    with tf.name_scope("loss") as scope:
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output_op, supervisor_ph))
        loss_op = cross_entropy
        tf.scalar_summary("loss", loss_op)
    return loss_op

In [8]:
def train(loss_op):
    with tf.name_scope("training") as scope:
        #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        #optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        training_op = optimizer.minimize(loss_op)
    return training_op

In [18]:
with tf.Graph().as_default():
    input_ph      = tf.placeholder(tf.float32, [None, length_of_sequences, num_of_input_nodes], name="input")
    supervisor_ph = tf.placeholder(tf.float32, [None, num_of_output_nodes], name="supervisor")
    size_of_mini_batch_ph = tf.placeholder(tf.int32, name="size_of_mini_batch")

    output_op, rnn_output = inference(input_ph, size_of_mini_batch_ph)
    loss_op = loss(output_op)
    training_op = train(loss_op)

    summary_op = tf.merge_all_summaries()
    #istate_ph = cell.zero_state(size_of_mini_batch, tf.float32)

    with tf.Session() as sess:
        summary_writer = tf.train.SummaryWriter("data", graph=sess.graph)
        """
        # random seed fix
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)
        """
        epoch_start = 0
        ckpt = tf.train.get_checkpoint_state("./train/")
        saver = tf.train.Saver()
        if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            epoch_start = int(re.search(r"(\d+)$", ckpt.model_checkpoint_path).group())
        else:
            print("Created model with fresh parameters.")
            sess.run(tf.initialize_all_variables())
        # init variables

        for epoch in range(epoch_start, num_of_training_epochs):
            inputs, supervisors = make_mini_batch(train_data, size_of_mini_batch, length_of_sequences)
            #state = sess.run(istate_ph)

            train_dict = {
                input_ph:      inputs,
                supervisor_ph: supervisors,
                size_of_mini_batch_ph: size_of_mini_batch,
            }
            rnnout, _ = sess.run([rnn_output, training_op], feed_dict=train_dict)
#            print("rnnout: {}".format(rnnout[-1]))

            if epoch % 10 == 0:
                summary_str, train_loss = sess.run([summary_op, loss_op], feed_dict=train_dict)
                summary_writer.add_summary(summary_str, epoch)
                print("train#%d, train loss: %e" % (epoch, train_loss))

            if epoch % 10 == 0:
                saver.save(sess, "train/translate.ckpt", global_step=epoch)

        inputs, _  = make_prediction_initial(train_data, 0, length_of_initial_sequences)
#        outputs = np.empty(0)
        outputs = np.array([[0 for _ in range(128)]])

#        istate_ph = cell.zero_state(1, tf.float32)
#        sess.run(istate_ph)

        print("prediction: ")
        for epoch in range(num_of_prediction_epochs):
            pred_dict = {
                input_ph:  [inputs],
                size_of_mini_batch_ph: 1
            }
#            output, states = sess.run([output_op, states_op], feed_dict=pred_dict)
            output = sess.run(output_op, feed_dict=pred_dict)
#            print(output)
            output_onehotvec = np.eye(num_of_char)[[np.argmax(output)]]
            inputs  = np.delete(inputs, 0, 0)
            #inputs  = np.append(inputs, output_onehotvec, 0)
            inputs  = np.append(inputs, output_onehotvec, 0)
            outputs = np.append(outputs, output_onehotvec, 0)
        
        outputs  = np.delete(outputs, 0, 0)
        output_ascii = np.argmax(outputs.reshape(num_of_prediction_epochs, num_of_output_nodes), axis=1)
        print("asciis: ", output_ascii)
        print("outputs:", [chr(x) for x in output_ascii])

Reading model parameters from ./train/translate.ckpt-550
train#550, train loss: 2.791191e+00
train#560, train loss: 2.811883e+00
train#570, train loss: 2.522734e+00
train#580, train loss: 1.612566e+00
train#590, train loss: 1.247543e+00
train#600, train loss: 1.844284e+00
train#610, train loss: 2.224573e+00
train#620, train loss: 1.902623e+00
train#630, train loss: 2.860112e+00
train#640, train loss: 1.478112e+00


KeyboardInterrupt: 

In [137]:
ins = inputs
outs = outputs
out = output
out_ohv = output_onehotvec

print("inputs: {}".format(ins.shape))
print("outputs: {}".format(outs.shape))
print("output: {}".format(out.shape))
print("onehotvec: {}".format(out_ohv.shape))

inputs: (30, 128)
outputs: (100, 128)
output: (1, 128)
onehotvec: (1, 128)
