## Language model for Wilhelm Meister

This model corresponds with the variable character length input.

In [2]:
import numpy as np
import tensorflow as tf
np.random.seed(10)

import warnings
warnings.filterwarnings('ignore')

### Loading Data

In [3]:
X_data = np.load("../../../data/goethe/X_arr.npy")
y_data = np.load("../../../data/goethe/y_arr.npy")
X_data.shape = (601216,30,67)
y_data.shape = (601216,67)
print(X_data.shape)
print(y_data.shape)

(601216, 30, 67)
(601216, 67)


In [4]:
# creating a data set of tuples for easier shuffling
all_data = []
for i in range(0,X_data.shape[0]):
    ax = X_data[i]
    ax.shape = (1,30,67)
    ay = y_data[i]
    ay.shape = (1,67)
    all_data.append((ax,ay))

### Model Functions

In [5]:
def get_placeholders():
    X = tf.placeholder(tf.float32, shape=[None,30,67], name='X')
    y = tf.placeholder(tf.float32, shape=[None,67], name='Y')
    return X,y

In [6]:
# Cross entropy loss to compare predicted char with actual char from novel
def cost_function(logits,y):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,logits=logits))

In [7]:
# returns the number of non-zero entries (i.e. non-padding input)
def get_len(seq):
    lis_ints = tf.sign(tf.reduce_max(tf.abs(seq),2)) 
    len_seq = tf.reduce_sum(lis_ints, 1) # squash the (?,27) last dim and sum all nonzero entries
    len_seq = tf.cast(len_seq, tf.int32)
    return len_seq

In [8]:
# Unidirectional rnn
def rnn_cell(the_input):
    # the_input = tf.unstack(the_input, axis=1)
    forward_cell = tf.nn.rnn_cell.LSTMCell(num_units=256,activation=tf.nn.tanh,reuse=tf.AUTO_REUSE,name="lstm0")
    outputs,curr_state = tf.nn.dynamic_rnn(forward_cell,inputs=the_input,dtype=tf.float32,sequence_length=get_len(the_input))
    num_chars = tf.reshape(get_len(the_input),())
    flat = tf.reshape(outputs[-1][num_chars-1], [1,256]) # getting the last rnn prediction output
    out = tf.layers.dense(flat,67,activation=None,kernel_initializer=tf.contrib.layers.xavier_initializer(seed=0))
    return out

In [9]:
# Getting testing examples
ax = all_data[0][0]
ax2 = all_data[1][0]
print(ax.shape)
ay = all_data[0][1]
print(ay.shape)

(1, 30, 67)
(1, 67)


In [10]:
# Testing the forward prop and cost function
X,y = get_placeholders()
pred = rnn_cell(X)
cost = cost_function(pred,y)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run(pred,feed_dict={X:ax,y:ay})
    acost = sess.run(cost,feed_dict={X:ax,y:ay})
    
print(acost)
print(out.shape)

4.169738
(1, 67)


In [11]:
def model(all_data,lr=0.001,num_epochs=1,retrain=True,print_cost=False):
    tf.reset_default_graph() # resetting graph
    tf.set_random_seed(1)
    costs = []
    
    X,y = get_placeholders()
    pred = rnn_cell(X)
    cost = cost_function(pred,y)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    grads, variables = zip(*optimizer.compute_gradients(cost))
    grads, _ = tf.clip_by_global_norm(grads, 5.0) # gradient clipping
    optimizer = optimizer.apply_gradients(zip(grads, variables))
    
    init = tf.global_variables_initializer()
    saver = tf.train.Saver() # to save/load model
    with tf.Session() as sess:
        if retrain:
            saver = tf.train.import_meta_graph("../../../data/goethe/model/language_model.ckpt.meta")
            saver.restore(sess, "../../../data/goethe/model/language_model.ckpt")
        else:
            sess.run(init) # only run init for new model
        
        for epoch in range(num_epochs):
            start = time.time()
            running_cost = 0
            old_running_cost = 0 # used to show total cost change per stochastic step
            np.random.shuffle(all_data) # shuffling the data each epoch
            for aset in all_data:
                (ax,ay) = aset
                _,temp_cost = sess.run([optimizer,cost], feed_dict={X:ax,y:ay})
                running_cost += temp_cost
                
                if print_cost:
                    cost_change = running_cost - old_running_cost
                    old_running_cost = running_cost
                    print("Cost change:",cost_change)
                    
            costs.append(running_cost)
            print("Cost at epoch {}: {}, took: {}".format(epoch+1,running_cost,time.time()-start))
            loc = saver.save(sess, "../../../data/goethe/model/language_model.ckpt") # save model on each epoch
            
    return costs

In [12]:
import time

In [13]:
start = time.time()
acosts = model(all_data,lr=0.001,num_epochs=5,retrain=False,print_cost=False)
print(time.time()-start)

Cost at epoch 1: 965676.767146199
Cost at epoch 2: 849474.8872904718
Cost at epoch 3: 824538.1005650768
Cost at epoch 4: 814300.9861731377
Cost at epoch 5: 808328.5675513552
65225.13166999817


In [None]:
acosts = model(all_data,lr=0.001,num_epochs=10,retrain=True,print_cost=False)

INFO:tensorflow:Restoring parameters from ../../../data/goethe/model/language_model.ckpt
Cost at epoch 1: 807290.2152265992, took:12127.424995183945
Cost at epoch 2: 807875.3710603345, took:13286.836048841476
Cost at epoch 3: 817729.0371951577, took:11121.283535003662
Cost at epoch 4: 833756.3013035274, took:14971.63272690773
Cost at epoch 5: 861691.0771627927, took:11139.098929166794
Cost at epoch 6: 897471.2254729965, took:9835.154791116714
Cost at epoch 7: 978145.6041760879, took:9888.452490568161


In [13]:
# the first portion of the trained model was reloaded and trained here
acosts = model(all_data,lr=0.0001,num_epochs=10,retrain=True,print_cost=False)

INFO:tensorflow:Restoring parameters from ../../../data/goethe/model/language_model.ckpt
Cost at epoch 1: 740841.3659302207, took: 17452.68856716156
Cost at epoch 2: 720019.2352663989, took: 15542.097027778625
Cost at epoch 3: 711996.1797979918, took: 18953.772819042206
Cost at epoch 4: 706761.4022440105, took: 15239.236209869385
Cost at epoch 5: 702348.8793506197, took: 12850.624649047852
Cost at epoch 6: 700080.791130583, took: 13272.373820781708
Cost at epoch 7: 697224.9187926188, took: 19427.749516010284
Cost at epoch 8: 695899.0947302906, took: 19249.488911151886
Cost at epoch 9: 694935.2619768975, took: 17169.080928087234
Cost at epoch 10: 694799.0752039538, took: 12794.184622049332


### Sampling using the novel

In [15]:
# returns one hot encoding for a particular character
def get_one_hot_encoding(char):
    char_enc = encoder[char]
    hot_vec = np.zeros((67,1)) # vocab_size = 66 (indexed at 1 so need 67 spots)
    hot_vec[char_enc] = 1
    hot_vec = hot_vec.T # shape (1,67)
    hot_vec.shape = (1,1,67)
    return hot_vec

In [16]:
# Making an encoder and decoder for all of the characters in the novel
keys = ['B', 'o', 'k', ' ', 'I', 'C', 'h', 'a', 'p', 't', 'e', 'r', '\n', 
        'T', 'H', 'E', 'P', 'L', 'A', 'Y', 'w', 's', 'l', 'i', 'n', 'b', 
        'g', 'u', ':', 'd', 'm', 'c', ',', 'f', '.', 'S', 'M', 'y', '’', 
        'N', 'v', ';', '-', 'x', 'O', 'q', '!', '“', 'W', '?', '”', 'j', 
        'z', 'V', 'J', 'G', 'D', 'F', '‘', 'K', '—', 'U', 'Q', 'R', 'X', 'Z']

encoder = {}
decoder = {}
key_no = 1 # the int representing a key
for key in keys:
    encoder[key] = key_no
    decoder[key_no] = key
    key_no += 1

In [17]:
# This sample cell samples the model and takes in a previous state of the lstm
def sample_cell(the_input,a,c):
    past_state = tf.contrib.rnn.LSTMStateTuple(a,c)
    forward_cell = tf.nn.rnn_cell.LSTMCell(num_units=256,activation=tf.nn.tanh,reuse=tf.AUTO_REUSE,name="lstm0")
    outputs,curr_state = tf.nn.dynamic_rnn(forward_cell,inputs=the_input,initial_state=past_state,dtype=tf.float32,sequence_length=get_len(the_input))
    num_chars = tf.reshape(get_len(the_input),())
    flat = tf.reshape(outputs[-1][num_chars-1], [1,256]) # getting the last rnn prediction output
    
    out = tf.layers.dense(flat,67,activation=None,kernel_initializer=tf.contrib.layers.xavier_initializer(seed=0))
    out = tf.nn.softmax(out) # softmax on the logits
    achar = tf.argmax(out,axis=-1)
    achar = tf.reshape(achar,()) # character in int representation
    
    return achar,curr_state # returns the curr state as well as pred

In [21]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[1, 1, 67])
a = tf.placeholder(tf.float32, shape=[1, 256])
c = tf.placeholder(tf.float32, shape=[1, 256])
out = sample_cell(x,a,c)

start_char = "M"
ax = get_one_hot_encoding(start_char) # getting first character to go into model
all_chars = start_char

saver = tf.train.Saver()
sess = tf.InteractiveSession()
saver = tf.train.import_meta_graph("../../../data/goethe/model/language_model.ckpt.meta") # loading model
saver.restore(sess, "../../../data/goethe/model/language_model.ckpt")

for i in range(0,1000): # sampling for n iterations
    if i == 0:
        (achar,past_state) = sess.run(out,feed_dict={x:ax,a:np.zeros((1,256)),c:np.zeros((1,256))})
        all_chars += decoder[achar] # adding to the string
    else:
        a_a = past_state[0]
        a_c = past_state[1]
        ax = get_one_hot_encoding(decoder[achar])
        (achar,past_state) = sess.run(out,feed_dict={x:ax,a:a_a,c:a_c})
        all_chars += decoder[achar] # adding to the string
        

INFO:tensorflow:Restoring parameters from ../../../data/goethe/model/language_model.ckpt


In [22]:
all_chars

'Mignon had been seen the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the stage, and the stranger was to the contrary, I was not to be a part of the