In [None]:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
from tensorflow.contrib.seq2seq.python.ops import attention_decoder_fn
from tensorflow.contrib.seq2seq.python.ops import decoder_fn as decoder_fn_lib
from tensorflow.contrib.seq2seq.python.ops import seq2seq
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import rnn
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables
from tensorflow.python.platform import test




In [None]:
def get_decoders_train_inference():
    decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(output_fn=output_fn,
                                                                                   encoder_state=encoder_state,
                                                                                  attention_keys=attention_keys,
                                                                                  attention_values=attention_values,
                                                                                  attention_score_fn=attention_score_fn,
                                                                                  attention_construct_fn=attention_construct_fn,
                                                                                  embeddings=decoder_embeddings,
                                                                                  start_of_sequence_id=start_of_sequence_id,
                                                                                  end_of_sequence_id=end_of_sequence_id,
                                                                                  maximum_length=decoder_sequence_length - 1,
                                                                                  num_decoder_symbols=num_decoder_symbols,
                                                                                  dtype=dtypes.int32)
    decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(
      encoder_state=encoder_state,
      attention_keys=attention_keys,
      attention_values=attention_values,
      attention_score_fn=attention_score_fn,
      attention_construct_fn=attention_construct_fn)
    return decoder_fn_train,decoder_fn_inference

In [None]:
batch_size = 32
encoder_embedding_size = 64
decoder_embedding_size = 64
encoder_hidden_size = 1024
decoder_hidden_size = encoder_hidden_size
input_sequence_length = 10
decoder_sequence_length = 10
num_decoder_symbols = 10000


PAD_ID = 0
GO_ID = 1
EOS_ID = 2
UNK_ID = 3
start_of_sequence_id = GO_ID
end_of_sequence_id = EOS_ID

def process_lines(line,frline):
    numbers_in = [int(a) for a in line.split()]
    numbers_out = [int(a) for a in frline.split()]
    return numbers_in,numbers_out
    
def add_padding(lines,maxlen):
    new_lines = []
    for line in lines:
        line = line+[EOS_ID]
        new_l = line + [PAD_ID]*(maxlen-len(line))
        new_lines.append(new_l)
    return new_lines
def get_some_data():
    maxlen = 10000
    with open('giga-fren.release2.fixed.en.ids10000') as fin:
        with open('giga-fren.release2.fixed.fr.ids10000') as fout:
            en_sentences = []
            fr_sentences = []
            for i,enline in enumerate(fin):
                if i>maxlen:
                    break
                frline = fout.readline()
                numbers_in,numbers_out = process_lines(enline,frline)
                if len(numbers_in)+1 < input_sequence_length and len(numbers_out)+1 < decoder_sequence_length:
                    en_sentences.append(numbers_in)
                    fr_sentences.append(numbers_out)
    return en_sentences,fr_sentences
en_sentences, fr_sentences = get_some_data()
en_sentences = add_padding(en_sentences,input_sequence_length)
fr_sentences = add_padding(fr_sentences,decoder_sequence_length)
en_sentences = [a[::-1] for a in en_sentences]

In [None]:
# Define inputs/outputs to model
tf.reset_default_graph()


inputs_placeholder = tf.placeholder(tf.int32, [batch_size,input_sequence_length])
decoder_inputs_placeholder = tf.placeholder(tf.int32,[batch_size,input_sequence_length])

encoder_embeddings = tf.Variable(
    tf.random_uniform([num_decoder_symbols, encoder_embedding_size], -1.0, 1.0))

decoder_embeddings = tf.Variable(
    tf.random_uniform([num_decoder_symbols, decoder_embedding_size], -1.0, 1.0))


inputs = tf.transpose(inputs_placeholder)
decoder_inputs = tf.transpose(decoder_inputs_placeholder)
inputs = tf.nn.embedding_lookup(encoder_embeddings, inputs)
decoder_inputs = tf.nn.embedding_lookup(decoder_embeddings, decoder_inputs)
# inputs = constant_op.constant(
#     0.5,
#     shape=[input_sequence_length, batch_size, encoder_embedding_size])
# decoder_inputs = constant_op.constant(
#     0.4,
#     shape=[decoder_sequence_length, batch_size, decoder_embedding_size])
decoder_length = constant_op.constant(
    decoder_sequence_length, dtype=dtypes.int32, shape=[batch_size,])

# attention
attention_option = "luong"  # can be "bahdanau"

with variable_scope.variable_scope("rnn") as scope:
  # Define model
  cell = core_rnn_cell_impl.GRUCell(decoder_hidden_size)
  encoder_outputs, encoder_state = rnn.dynamic_rnn(
      cell=cell,
      inputs=inputs,
      dtype=dtypes.float32,
      time_major=True,
      scope=scope)

#   attention_states: size [batch_size, max_time, num_units]
attention_states = array_ops.transpose(encoder_outputs, [1, 0, 2])


with variable_scope.variable_scope("decoder") as scope:
  # Prepare attention
  (attention_keys, attention_values, attention_score_fn,
   attention_construct_fn) = (attention_decoder_fn.prepare_attention(
       attention_states, attention_option, decoder_hidden_size))

  # setting up weights for computing the final output
  def create_output_fn():

    def output_fn(x):
      return layers.linear(x, num_decoder_symbols, scope=scope)

    return output_fn

  output_fn = create_output_fn()
  
  decoder_fn_train,decoder_fn_inference = get_decoders_train_inference()
  # Train decoder
  decoder_cell = core_rnn_cell_impl.GRUCell(decoder_hidden_size)
  (decoder_outputs_train, decoder_state_train, _) = (
      seq2seq.dynamic_rnn_decoder(
          cell=decoder_cell,
          decoder_fn=decoder_fn_train,
          inputs=decoder_inputs,
          sequence_length=decoder_length,
          time_major=True,
          scope=scope))
  decoder_outputs_train = output_fn(decoder_outputs_train)
  # Setup variable reuse
  scope.reuse_variables()
  # Inference decoder
  (decoder_outputs_inference, decoder_state_inference, _) = (
      seq2seq.dynamic_rnn_decoder(
          cell=decoder_cell,
          decoder_fn=decoder_fn_inference,
          time_major=True,
          scope=scope))


weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in tf.unstack(decoder_outputs_train)]

my_loss = tf.nn.seq2seq.sequence_loss(decoder_inputs_placeholder, decoder_outputs_train, weights, num_decoder_symbols)

optimizer = tf.train.AdamOptimizer().minimize(my_loss)


In [None]:


sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
variables.global_variables_initializer().run()

In [None]:


for _ in range(10000):
    in_x = en_sentences[:batch_size]
    in_y = fr_sentences[:batch_size]
    (not_important,the_loss,decoder_outputs_train_res, decoder_state_train_res) = sess.run([optimizer,my_loss,decoder_outputs_train, decoder_state_train],feed_dict={inputs_placeholder:in_x ,decoder_inputs_placeholder: in_y })
    #(decoder_outputs_inference_res, decoder_state_inference_res) = sess.run([decoder_outputs_inference, decoder_state_inference])
    #print((decoder_sequence_length, batch_size,num_decoder_symbols))
    #print(decoder_outputs_train_res.shape)
    #print(my_loss)
    print(my_loss.eval(feed_dict={inputs_placeholder:in_x ,decoder_inputs_placeholder: in_y }))


In [None]:
import numpy as np
vocabulary_in = [a.strip() for a in open('vocab10000.from')]
vocabulary_out = [a.strip() for a in open('vocab10000.to')]

print(decoder_outputs_train_res.shape)
a = decoder_outputs_train_res.transpose(1,0,2)
print(a.shape)
for index_sentences,sentence in enumerate(a):
    print(sentence.shape)
    string_sentence = ""
    for index_now in in_x[index_sentences]:
        if index_now == EOS_ID:
            break
        string_sentence += (vocabulary_in[index_now]) + " "
    print(string_sentence)
    string_sentence = ""
    activations = []
    activations_of_inspected = []
    for whoo_i,word in enumerate(sentence):
        index_now = np.argmax(word)
        if index_now == EOS_ID:
            break
        string_sentence += (vocabulary_out[index_now]) + " "
        activations.append(word[index_now])
        activations_of_inspected.append(word[in_y[index_sentences][whoo_i]])
    print(string_sentence)
    string_sentence = ""
    for index_now in in_y[index_sentences]:
        if index_now == EOS_ID:
            break
        string_sentence += (vocabulary_out[index_now]) + " "
    print(string_sentence)
    print(activations)
    print(activations_of_inspected)

In [None]:
def lstm_cell(prevc,prevh,xt,hidden_size):
    concatenated = tf.concat(0,prevh,xt)
    W1 = tf.Variable(rng.randn(),shape=[batch,input_size,hidden_size])
    W2 = tf.Variable(rng.randn(),shape=[batch,input_size,hidden_size])
    W3 = tf.Variable(rng.randn(),shape=[batch,input_size,hidden_size])
    tempC =  tf.element_mul(tf.matmul(concatenated,W1),prevc)
    add_to_c = tf.element_mul(tf.matmul(concatenated,W2),tf.nn.tanh(concatenated))
    new_c = tempC + add_to_c
    new_h = tf.element_mul(tf.nn.tanh(new_c),tf.matmul(concatenated,W3))
    return new_c,new_h



forget_weights =   ix = tf.Variable(tf.truncated_normal([vocabulary_size, num_nodes], -0.1, 0.1))
