In [None]:
import logging
from os import listdir
from os.path import join
import pickle
import tensorflow as tf
import math
import timeit
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
from sklearn.metrics import f1_score, confusion_matrix

from train import read_data

import arpabet_preprocess

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
max_len = 200

pad = len(arpabet_preprocess.get_arpabet_list()[0])

X_train, y_train, lang_dict, lang_rev_dict = arpabet_preprocess.load_data("train", 
                                                     label_file='data/labels/train/labels.train.csv',
                                                     line=False,
                                                    max_length=max_len)

X_dev, y_dev, _, _ = arpabet_preprocess.load_data("dev", 
                                                     label_file='data/labels/dev/labels.dev.csv',
                                                     line=False,
                                                    max_length=max_len)
X_train, y_train, X_val, y_val = arpabet_preprocess.split_data((X_train, y_train), lang_dict)
X_mask_train = (X_train != pad).sum(1)
X_mask_val = (X_val != pad).sum(1)
X_mask_dev = (X_dev != pad).sum(1)
print("X_train", X_train.shape)
print("X_mask_train", X_mask_train.shape)
print("y_train", y_train.shape)
print("X_val", X_val.shape)
print("X_mask_val", X_mask_val.shape)
print("y_val", y_val.shape)
print("X_dev", X_dev.shape)
print("X_mask_dev", X_mask_dev.shape)
print("y_dev", y_dev.shape)
print("lang_dict", lang_dict)
print("pad", pad)

In [None]:
# clear old variables
tf.reset_default_graph()

emb_size=100

# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.int64, [None, max_len])
y = tf.placeholder(tf.int64, [None])
mask = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)

def simple_model(X,y,mask):
    num_hidden = 100
    
    print("X", X)
    with tf.variable_scope("embeddings"):
        embeddings = tf.get_variable(initializer=tf.contrib.layers.xavier_initializer(), 
                                     name="L",
                                    shape=(pad+1, emb_size))
        
    X_embed = tf.nn.embedding_lookup(embeddings, X)  
    
    print("X_embed", X_embed)
    
    gru_cell = tf.contrib.rnn.GRUCell(num_hidden)
    outputs, state = tf.nn.dynamic_rnn(cell=gru_cell,
                                       inputs=X_embed,
                                       sequence_length=mask,
                                       dtype=tf.float32)
    
    print("outputs", outputs)
    print("state", state)
    
    yout = tf.layers.dense(inputs=state, units=len(lang_dict), activation=tf.nn.relu,
                           kernel_initializer=tf.contrib.layers.xavier_initializer())
    print("yout", yout)
    return yout


y_out = simple_model(X,y,mask)


# define our loss
total_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_out,labels=y)
mean_loss = tf.reduce_mean(total_loss)

# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)

In [None]:
def run_model(session, predict, loss_val, Xd, maskd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%X_train.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         mask: maskd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[i:i+batch_size].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                      .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss,total_correct

with tf.Session() as sess:
    with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0" 
        sess.run(tf.global_variables_initializer())
        print('Training')
        run_model(sess,y_out,mean_loss,X_train,X_mask_train,y_train,5,64,100,train_step,True)
        print('Validation')
        run_model(sess,y_out,mean_loss,X_val,X_mask_val,y_val,1,64)