In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random

## 1. Load the data

In [6]:
text = "I am a boy"
print(set(text))

{'b', 'a', 'y', 'o', 'I', ' ', 'm'}


In [2]:
# Read the data
corpus = ""
with open('shakespeare.txt', 'r') as f:
    corpus += f.read()
corpus = corpus.lower()

In [3]:
# Construct character vocabulary
vocab = list(set(corpus))
print(vocab)

['w', 'v', '3', "'", 'p', 'f', 'u', ' ', 'r', 't', 'k', ':', '&', 'e', '!', 'x', 'g', 'l', 'b', '-', 'a', 'q', ';', '.', 'c', '\n', 'y', 'i', 'd', 'h', 's', 'o', 'z', 'n', ',', '$', 'j', '?', 'm']


In [20]:
def vocab_encoding(corpus, vocab):
    output = np.zeros((len(corpus), len(vocab)))
    
    cnt = 0
    for char in corpus:
        v = [0.0] * len(vocab)
        v[vocab.index(char)] = 1.0
        output[cnt, :] = v
        cnt += 1
    
    return output
    

In [22]:
data = vocab_encoding(corpus=corpus, vocab=vocab)

# 2. Define parameters

In [6]:
## Model structure parameters
in_size = len(vocab) # Size of input vectors at each time step
hidden_size = 64 # Size of hidden state vector
num_layers = 1 # Number of hidden layers
out_size = len(vocab) # Size of output vectors at each time step

learning_rate = 0.001 # Learning rate

# Data and train parameters
batch_size = 64 # Training batch size
time_steps = 50 # (Maximum) number of time steps in each batch
num_epochs = 10000
display_interval = 20

# 3. Make the graph

## 3.1. Placeholders

In [7]:
# Placeholder for inputs: shape [batch_size, timesteps, in_size]
X = tf.placeholder(tf.float32, shape=[None, None, in_size], name='input_X')

# Placeholder for outputs: shape [batch_size, timesteps, in_size]
Y = tf.placeholder(tf.float32, shape=[None, None, out_size], name='target_Y')

# Placeholder for initial state
state_size = num_layers * 2 * hidden_size
hidden_init = tf.placeholder(tf.float32, shape=[None, state_size])

## 3.2. RNN

In [8]:
hidden_cells = [rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=False) for i in range(num_layers)]
hidden = rnn.MultiRNNCell(hidden_cells, state_is_tuple=False)

outputs, hidden_new_state = tf.nn.dynamic_rnn(cell=hidden, 
                                              inputs=X, 
                                              initial_state=hidden_init, 
                                              dtype=tf.float32)

W = tf.get_variable(name='weights', 
                    shape=[hidden_size, out_size], 
                    initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable(name='biases', 
                    shape=[out_size], 
                    initializer=tf.constant_initializer(0.0))

outputs_reshaped = tf.reshape(outputs, [-1, hidden_size])
logits = tf.nn.xw_plus_b(x=outputs_reshaped, weights=W, biases=b, name='logits')

batch_time_shape = tf.shape(outputs)
outputs_activated = tf.reshape(tensor=tf.nn.softmax(logits), 
                               shape=[batch_time_shape[0], batch_time_shape[1], out_size])



In [9]:
print(X)
print(Y)

print(outputs)
print(outputs_reshaped)

print(logits)

Tensor("input_X:0", shape=(?, ?, 39), dtype=float32)
Tensor("target_Y:0", shape=(?, ?, 39), dtype=float32)
Tensor("rnn/transpose:0", shape=(?, ?, 64), dtype=float32)
Tensor("Reshape:0", shape=(?, 64), dtype=float32)
Tensor("logits:0", shape=(?, 39), dtype=float32)


### 3.3. Cost and training operator

In [10]:
Y_batch_flatten = tf.reshape(Y, [-1, out_size])

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_batch_flatten))
train_op = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)

# 4. Run the graph

In [11]:
# Declare the session
sess = tf.Session()

In [12]:
# Initialize all variables
sess.run(tf.global_variables_initializer())

## 4.1. Make batches and train them

In [13]:
batch_x = np.zeros([batch_size, time_steps, in_size])
batch_y = np.zeros([batch_size, time_steps, in_size])

In [14]:
possible_batch_idx = range(data.shape[0] - time_steps - 1)

for i in range(num_epochs):
    batch_id = random.sample(population=possible_batch_idx, 
                             k=batch_size)
    
    for j in range(time_steps):
        idx_X = [k + j for k in batch_id]
        idx_Y = [k + j + 1 for k in batch_id]
        
        batch_x[:, j, :] = data[idx_X, :]
        batch_y[:, j, :] = data[idx_Y, :]
    
    init_value = np.zeros((batch_x.shape[0], state_size))
    training_cost, _ = sess.run([cost, train_op], feed_dict={X:batch_x, Y:batch_y, hidden_init:init_value})
    
    if i % display_interval == 0:
        print("epoch: ", i, "\tcost: ", training_cost)

epoch:  0 	cost:  3.66473
epoch:  20 	cost:  3.66031
epoch:  40 	cost:  3.6536
epoch:  60 	cost:  3.63497
epoch:  80 	cost:  3.59763
epoch:  100 	cost:  3.49742
epoch:  120 	cost:  3.2099
epoch:  140 	cost:  3.1362
epoch:  160 	cost:  3.08786
epoch:  180 	cost:  3.0691
epoch:  200 	cost:  3.05458
epoch:  220 	cost:  3.02659
epoch:  240 	cost:  2.97358
epoch:  260 	cost:  2.93546
epoch:  280 	cost:  2.90337
epoch:  300 	cost:  2.88383
epoch:  320 	cost:  2.8073
epoch:  340 	cost:  2.73717
epoch:  360 	cost:  2.71905
epoch:  380 	cost:  2.63051
epoch:  400 	cost:  2.63928
epoch:  420 	cost:  2.59724
epoch:  440 	cost:  2.56152
epoch:  460 	cost:  2.57654
epoch:  480 	cost:  2.531
epoch:  500 	cost:  2.47867
epoch:  520 	cost:  2.47099
epoch:  540 	cost:  2.49559
epoch:  560 	cost:  2.43446
epoch:  580 	cost:  2.4354
epoch:  600 	cost:  2.38725
epoch:  620 	cost:  2.40215
epoch:  640 	cost:  2.38854
epoch:  660 	cost:  2.40684
epoch:  680 	cost:  2.36739
epoch:  700 	cost:  2.33774
epoch:

In [15]:
saver = tf.train.Saver(tf.global_variables())

In [16]:
saver.save(sess, "saved/model.ckpt")

Type is unsupported, or the types of the items don't match field type in CollectionDef.
'dict' object has no attribute 'name'


'saved/model.ckpt'

## 4.2. Test

In [17]:
# Pre-allocate 'hidden_last_state'
hidden_last_state = np.zeros(state_size)

In [19]:
def test_op(x, init_zero_state=True):
    ## Reset the initial state of the network
    if init_zero_state:
        init_value = np.zeros(state_size)
    else:
        init_value = hidden_last_state
    out, hidden_next_state = sess.run([outputs_activated, hidden_new_state], feed_dict={X:[x], hidden_init:[init_value]})
    
    return out[0][0], hidden_next_state[0]

In [23]:
TEST_PREFIX = 'The '
TEST_PREFIX = TEST_PREFIX.lower()

for i in range(len(TEST_PREFIX)):
    test_data = vocab_encoding(corpus=TEST_PREFIX[i], vocab=vocab)
    out, hidden_last_state = test_op(test_data)

In [26]:
print("SENTENCE: ")
gen_str = TEST_PREFIX
for i in range(500):
    element = np.random.choice(range(len(vocab)), p=out)
    gen_str += vocab[element]
    
    out, _ = test_op(vocab_encoding(vocab[element], vocab), init_zero_state=False)
print(gen_str)

SENTENCE: 
the ma iniloloreara ini myofofofofenelonofashamofamofomevenedofeachanoupenofrolalinovinenedreyono i conextheanoupewithenonousisha ofounouneno henousthevithisofo counokifouno isofofofleyoflofofleneyofofunofoflenopasoupofoupunoupali o?
grenofofoflionenousofonenounofenofrisonofevewhofrofofedispreasekitomanofofoupi evinonothi thenononino ounouthonexenofononorono.
ealonofo, heli imyenofanofropasa
erisi inistofexi ca i inoofo ewofofanonafofino; isprofrewexunofi isevelinofi itinofasofinacofo ofaneanofokine
