## Language model for Wilhelm Meister

In [1]:
import numpy as np
import tensorflow as tf
np.random.seed(10)

import warnings
warnings.filterwarnings('ignore')

### Loading Data

In [2]:
X_data = np.load("../../../data/goethe/X_arr.npy")
y_data = np.load("../../../data/goethe/y_arr.npy")
X_data.shape = (601213,30,67)
y_data.shape = (601213,67)
print(X_data.shape)
print(y_data.shape)

(601213, 30, 67)
(601213, 67)


In [3]:
# creating a data set of tuples for easier shuffling
all_data = []
for i in range(0,X_data.shape[0]):
    ax = X_data[i]
    ax.shape = (1,30,67)
    ay = y_data[i]
    ay.shape = (1,67)
    all_data.append((ax,ay))

### Model Functions

In [4]:
def get_placeholders():
    X = tf.placeholder(tf.float32, shape=[None,30,67], name='X')
    y = tf.placeholder(tf.float32, shape=[None,67], name='Y')
    return X,y

In [5]:
# Cross entropy loss to compare predicted char with actual char from novel
def cost_function(logits,y):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,logits=logits))

In [6]:
# returns the number of non-zero entries (i.e. non-padding input)
def get_len(seq):
    lis_ints = tf.sign(tf.reduce_max(tf.abs(seq),2)) 
    len_seq = tf.reduce_sum(lis_ints, 1) # squash the (?,27) last dim and sum all nonzero entries
    len_seq = tf.cast(len_seq, tf.int32)
    return len_seq

In [7]:
# Unidirectional rnn
def rnn_cell(the_input):
    # the_input = tf.unstack(the_input, axis=1)
    forward_cell = tf.nn.rnn_cell.LSTMCell(num_units=256,activation=tf.nn.tanh,reuse=tf.AUTO_REUSE,name="lstm0")
    outputs,curr_state = tf.nn.dynamic_rnn(forward_cell,inputs=the_input,dtype=tf.float32,sequence_length=get_len(the_input))
    num_chars = tf.reshape(get_len(the_input),())
    flat = tf.reshape(outputs[-1][num_chars-1], [1,256]) # getting the last rnn prediction output
    out = tf.layers.dense(flat,67,activation=None,kernel_initializer=tf.contrib.layers.xavier_initializer(seed=0))
    return out

In [8]:
# Getting testing examples
ax = all_data[0][0]
print(ax.shape)
ay = all_data[0][1]
print(ay.shape)

(1, 30, 67)
(1, 67)


In [9]:
# Testing the forward prop and cost function
X,y = get_placeholders()
pred = rnn_cell(X)
cost = cost_function(pred,y)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run(pred,feed_dict={X:ax,y:ay})
    acost = sess.run(cost,feed_dict={X:ax,y:ay})
    
print(acost)
print(out.shape)

4.209082
(1, 67)


In [18]:
def model(all_data,lr=0.001,num_epochs=1,retrain=True,print_cost=False):
    tf.reset_default_graph() # resetting graph
    tf.set_random_seed(1)
    costs = []
    
    X,y = get_placeholders()
    pred = rnn_cell(X)
    cost = cost_function(pred,y)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    grads, variables = zip(*optimizer.compute_gradients(cost))
    grads, _ = tf.clip_by_global_norm(grads, 5.0) # gradient clipping
    optimizer = optimizer.apply_gradients(zip(grads, variables))
    
    init = tf.global_variables_initializer()
    saver = tf.train.Saver() # to save/load model
    with tf.Session() as sess:
        if retrain:
            saver = tf.train.import_meta_graph("../../../data/goethe/model/language_model.ckpt.meta")
            saver.restore(sess, "../../../data/goethe/model/language_model.ckpt")
        else:
            sess.run(init) # only run init for new model
        
        for epoch in range(num_epochs):
            running_cost = 0
            old_running_cost = 0 # used to show total cost change per stochastic step
            np.random.shuffle(all_data) # shuffling the data each epoch
            for aset in all_data:
                (ax,ay) = aset
                _,temp_cost = sess.run([optimizer,cost], feed_dict={X:ax,y:ay})
                running_cost += temp_cost
                
                if print_cost:
                    cost_change = running_cost - old_running_cost
                    old_running_cost = running_cost
                    print("Cost change:",cost_change)
                    
            costs.append(running_cost)
            print("Cost at epoch {}: {}".format(epoch+1,running_cost))
            loc = saver.save(sess, "../../../data/goethe/model/language_model.ckpt") # save model on each epoch
            
    return costs

In [23]:
small_data = all_data[:1000]

In [24]:
import time

In [25]:
start = time.time()
acosts = model(small_data,lr=0.001,num_epochs=1,retrain=False,print_cost=False)
print(time.time()-start)

Cost at epoch 1: 2442.7007448524237
16.230627059936523
