## Language model for Wilhelm Meister

This model corresponds with the fixed length input. This model will also use batch gradient descent rather than stochastic.

In [1]:
import numpy as np
import tensorflow as tf
import time
np.random.seed(10)

import warnings
warnings.filterwarnings('ignore')

### Loading Data

In [2]:
X_data = np.load("../../../data/goethe/X_arr_traditional.npy")
y_data = np.load("../../../data/goethe/y_arr_traditional.npy")
X_data.shape = (1202437,15,67)
y_data.shape = (1202437,67)
print(X_data.shape)
print(y_data.shape)

(1202437, 15, 67)
(1202437, 67)


In [3]:
# creating a data set of tuples for easier shuffling
all_data = []
for i in range(0,X_data.shape[0]):
    ax = X_data[i]
    ax.shape = (1,15,67)
    ay = y_data[i]
    ay.shape = (1,67)
    all_data.append((ax,ay))

### Model Functions

In [4]:
def get_placeholders():
    X = tf.placeholder(tf.float32, shape=[None,15,67], name='X')
    y = tf.placeholder(tf.float32, shape=[None,67], name='Y')
    return X,y

In [5]:
# Cross entropy loss to compare predicted char with actual char from novel
def cost_function(logits,y):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,logits=logits))

In [6]:
# Unidirectional rnn
def rnn_cell(the_input):
    forward_cell = tf.nn.rnn_cell.LSTMCell(num_units=256,activation=tf.nn.tanh,reuse=tf.AUTO_REUSE,name="lstm0")
    outputs,curr_state = tf.nn.dynamic_rnn(forward_cell,inputs=the_input,dtype=tf.float32)
    flat = outputs[:,-1,:] # getting the last rnn prediction output for all inputs
    out = tf.layers.dense(flat,67,activation=None,kernel_initializer=tf.contrib.layers.xavier_initializer(seed=0))
    return out

In [7]:
# Getting testing examples
ax1 = all_data[0][0]
ax2 = all_data[1][0]
ax = np.concatenate([ax1,ax2])
print(ax.shape)
ay1 = all_data[0][1]
ay2 = all_data[1][1]
ay = np.concatenate([ay1,ay2])
print(ay.shape)

(2, 15, 67)
(2, 67)


In [8]:
# Testing the forward prop and cost function
X,y = get_placeholders()
pred = rnn_cell(X)
cost = cost_function(pred,y)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    out = sess.run(pred,feed_dict={X:ax,y:ay})
    acost = sess.run(cost,feed_dict={X:ax,y:ay})
    
print(acost)
print(out.shape)

4.188203
(2, 67)


In [9]:
# takes in a list of x,y pairs and returns a list of numpy vector pairs consisting of 15000 observations each
def get_input_data(all_data):
    cut_off = 15000
    i = 0
    end_len = len(all_data)
    ret_data = [] # holds the list of return tuple pairs
    while(i+15000<end_len):
        x_data_temp = np.concatenate([all_data[j][0] for j in range(i,i+cut_off)])
        y_data_temp = np.concatenate([all_data[j][1] for j in range(i,i+cut_off)])
        ret_data.append((x_data_temp,y_data_temp))
        i += cut_off
        
    return ret_data

In [10]:
test_out = get_input_data(all_data)
print(len(test_out))

80


In [11]:
def model(all_data,lr=0.001,num_epochs=1,retrain=True,print_cost=False):
    tf.reset_default_graph() # resetting graph
    tf.set_random_seed(1)
    costs = []
    
    X,y = get_placeholders()
    pred = rnn_cell(X)
    cost = cost_function(pred,y)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    grads, variables = zip(*optimizer.compute_gradients(cost))
    grads, _ = tf.clip_by_global_norm(grads, 5.0) # gradient clipping
    optimizer = optimizer.apply_gradients(zip(grads, variables))
    
    init = tf.global_variables_initializer()
    saver = tf.train.Saver() # to save/load model
    with tf.Session() as sess:
        if retrain:
            saver = tf.train.import_meta_graph("../../../data/goethe/model2/language_model.ckpt.meta")
            saver.restore(sess, "../../../data/goethe/model2/language_model.ckpt")
        else:
            sess.run(init) # only run init for new model
        
        for epoch in range(num_epochs):
            start = time.time()
            running_cost = 0
            old_running_cost = 0 # used to show total cost change per stochastic step
            np.random.shuffle(all_data) # shuffling the data each epoch
            input_data = get_input_data(all_data) # get list of numpy vector inputs to the model
            for aset in input_data:
                (ax,ay) = aset
                _,temp_cost = sess.run([optimizer,cost], feed_dict={X:ax,y:ay})
                running_cost += temp_cost
                
                if print_cost:
                    cost_change = running_cost - old_running_cost
                    old_running_cost = running_cost
                    print("Cost change:",cost_change)
                    
            costs.append(running_cost)
            print("Cost at epoch {}: {}, took: {}".format(epoch+1,running_cost,time.time()-start))
            loc = saver.save(sess, "../../../data/goethe/model2/language_model.ckpt") # save model on each epoch
            
    return costs