# Goal

We will attemp to convert English into Pig Latin.  We will use the Text8 data as a corpus of text.  The modeling will be done using a sequence of characters and the input sequence will be the sequence of characters for one word.  We will use the method described [here](https://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf) 

# Import functions

In [1]:
import pickle
import tensorflow as tf
import numpy as np
import string
from tqdm import tqdm
import os
from matplotlib import pyplot as plt
%matplotlib inline

# Import parameters

[e1_var,e2_var,e3_var,d1_var,d2_var,d3_var,W_softmax,b_softmax]

In [2]:
params = pickle.load(open('params.p','r'))

In [3]:
len(params)

8

# Padding inputs and outputs

There is no good way to handle sequences of multiple lengths (see [here](https://www.tensorflow.org/tutorials/seq2seq)).  So we pad inputs and outputs to fixed lengths.

In [8]:
# find maximum length of input
max_in = 10

In [9]:
# find maximum length of input
max_out = 13

In [10]:
pad_char = '~'
def pad_in(w):
    while(len(w)<max_in):
        w = pad_char+w
    return w

def pad_out(w):
    while(len(w)<max_out):
        w = w+pad_char
    return w

def un_pad(w):
    return w.replace(pad_char,'')

# Vectorize data

In [12]:
alphabet = string.ascii_lowercase+'~'
alphabet_size = len(alphabet)+1 #need to add one for the end of sequence key

#returns a unique integer for the letter
def char2id(x):
    return alphabet.find(x)

#return a one hot encoded vector of the letter
def one_hot(l):
    r = np.zeros(alphabet_size)
    r[char2id(l)] = 1.0
    return r

#return the letter of the one-hot encoded letter
def un_one_hot(v):
    ind = np.argmax(v)
#     if ind >= alphabet_size-2:
    if ind == alphabet_size-1:
        return ''
    else:
        return alphabet[ind]

#returns the the End of Sequence vector
def getEOSvec():
    r = np.zeros(alphabet_size)
    r[alphabet_size-1] = 1.0
    return r

#returns the word a matrix of one hot encoded vectors
def vectorizeWord(w):
    r = np.ndarray((len(w)+1,alphabet_size))
    for i,l in enumerate(w):
        r[i] = one_hot(l)
    r[len(w)] = getEOSvec()
    return r

#returns the string of the vectorized word
def unvectorizeWord(M):
    r = ''
    for i in xrange(M.shape[0]):
        r += un_one_hot(M[i])
    return r

# Deep Traditional

https://arxiv.org/pdf/1409.2329.pdf

<img src="Notes/OtherLSTM.png">

In [15]:
#NOTE: the +1 in max_in+1 and max_out+1 happens because the end token is needed

num_nodes = 500
g = tf.Graph()
with g.as_default():
    #the previous state that gets fed into the cell
    state_0v = tf.constant(0.0,dtype=tf.float32,shape=[1,num_nodes])
    hidden_0v = tf.constant(0.0,dtype=tf.float32,shape=[1,num_nodes])
    
    def create_LSTM_Variables(num_nodes,iv,Name,is_input=False):
        if is_input:
            fx = tf.Variable(iv[0][0],name=Name+'fx')
            ix = tf.Variable(iv[0][1],name=Name+'ix')
            cx = tf.Variable(iv[0][2],name=Name+'cx')
            ox = tf.Variable(iv[0][3],name=Name+'ox')
        else:
            fx = tf.Variable(iv[0][0],name=Name+'fx')
            ix = tf.Variable(iv[0][1],name=Name+'ix')
            cx = tf.Variable(iv[0][2],name=Name+'cx')
            ox = tf.Variable(iv[0][3],name=Name+'ox')
        fb = tf.Variable(iv[1][0],name=Name+'fb')
        ib = tf.Variable(iv[1][1],name=Name+'ib')
        cb = tf.Variable(iv[1][2],name=Name+'cb')
        ob = tf.Variable(iv[1][3],name=Name+'ob')
        return[[fx,ix,cx,ox],[fb,ib,cb,ob]]
    
    e1_var = create_LSTM_Variables(num_nodes,params[0],'e1',is_input = True)
    e2_var = create_LSTM_Variables(num_nodes,params[1],'e2')
    e3_var = create_LSTM_Variables(num_nodes,params[2],'e3')
    
    d1_var = create_LSTM_Variables(num_nodes,params[3],'d1',is_input = True)
    d2_var = create_LSTM_Variables(num_nodes,params[4],'d2')
    d3_var = create_LSTM_Variables(num_nodes,params[5],'d3')
    
    #softmax
    W_softmax = tf.Variable(params[6])
    b_softmax = tf.Variable(params[7])
    
    #model
    #hl is the previous hiddent layer from current time step but previous  layer
    #ht is the previous hidden layer from the current layer but previous timestep
    #state is the previous state from the same layer but previous timestep
    def LSTM(hl,ht,state,varrs):
        #get variables out
        x,b=varrs[0],varrs[1]
        fx,ix,cx,ox = x[0],x[1],x[2],x[3]
        fb,ib,cb,ob = b[0],b[1],b[1],b[3]
        
        #computations
        input_chan = tf.concat(1,[hl,ht])
        forget_gate = tf.sigmoid(tf.matmul(input_chan,fx)+fb)
        insert_gate = tf.sigmoid(tf.matmul(input_chan,ix)+ib)
        output_gate = tf.sigmoid(tf.matmul(input_chan,ox)+ob)
        candidate = tf.tanh(tf.matmul(input_chan,cx)+cb)
        state = forget_gate * state + insert_gate * candidate
        h = output_gate * tf.tanh(state)
        return h, state
    
    
    def model(input_sequence,train = True):
        #Encode sequence
        for i in range(max_in+1):
            if i == 0:
                if train:
                    state1,h1 = state_0,hidden_0
                    state2,h2 = state_0,hidden_0
                    state3,h3 = state_0,hidden_0
                else:
                    state1,h1 = state_0v,hidden_0v
                    state2,h2 = state_0v,hidden_0v
                    state3,h3 = state_0v,hidden_0v

            h1,state1 = LSTM(h1,input_sequence[i],state1,e1_var) #layer 1
            h2,state2 = LSTM(h2,h1,state2,e2_var) #layer 2
            h3,state3 = LSTM(h3,h2,state3,e3_var) #layer 3

        #Decode sequence
        logits_list = list()
        for i in range(max_out+1):
            if i == 0:
                h1,state1 = LSTM(h1,input_sequence[-1],state1,d1_var) #layer 1

            else:
                h1,state1 = LSTM(h1,tf.nn.softmax(logit),state1,d1_var) #layer 1
            h2,state2 = LSTM(h2,h1,state2,d2_var) #layer 2
            h3,state3 = LSTM(h3,h2,state3,d3_var) #layer 3

            logit =tf.matmul(h3,W_softmax)+b_softmax
            logits_list.append(logit)

        logits = tf.concat(0,logits_list)
        return logits
    
    #inference train
#     pred = tf.nn.softmax(logits_train)
        
    init = tf.initialize_all_variables()

In [16]:
sess = tf.Session(graph = g)
sess.run(init)

In [17]:
with g.as_default():
    val_sequence = list()
    for i in range(max_in+1):
        val_sequence.append(tf.placeholder(tf.float32,shape=(1,alphabet_size)))
    logits_val = model(val_sequence,train=False)
    pred_val = tf.nn.softmax(logits_val)

In [18]:
v = ['piglatin','translator','is','working']
for s in range(len(v)):
    inputs = vectorizeWord(pad_in(v[s][::-1]))
    
    fd = {}
    for i in range(len(inputs)):
        fd[val_sequence[i]] = np.expand_dims(inputs[i],0)
    
    translated = sess.run([pred_val],feed_dict=fd)[0]
    print 'Input:          ',v[s]
    print 'Output:         ',unvectorizeWord(translated)
    print ' '

Input:           piglatin
Output:          iglatinpay~~~
 
Input:           translator
Output:          anslaturtray~
 
Input:           is
Output:          isway~~~~~~~~
 
Input:           working
Output:          orkingway~~~~
 
