In [1]:
import numpy as np
from dataset import Dataset

In [2]:
# dataset = Dataset('../../small-en', '../../small-fr', num_words=40000, buckets=[10, 20, 40, 80, 200], nthreads=20)
# dataset.save('small-dataset')
dataset = Dataset.load('small-dataset')

In [3]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [4]:
import os
print(os.environ["CUDA_VISIBLE_DEVICES"])

0


### Define the network

In [5]:
import tensorflow as tf

In [6]:
english_input = tf.placeholder(tf.int32, shape=[None, None])
english_length = tf.placeholder(tf.int32, shape=[None])


french_input = tf.placeholder(tf.int32, shape=[None, None])
french_length = tf.placeholder(tf.int32, shape=[None])

french_input_neg = tf.placeholder(tf.int32, shape=[None, None])
french_length_neg = tf.placeholder(tf.int32, shape=[None])

In [7]:
uniform_initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1)

with tf.variable_scope('english_word_embedding'):
    enlish_word_emb = tf.get_variable(
    name="word_embedding",
    shape=[len(dataset.vocab['en']), 512],
    initializer=uniform_initializer)
    
with tf.variable_scope('french_word_embedding'):
    french_word_emb = tf.get_variable(
    name="word_embedding",
    shape=[len(dataset.vocab['fr']), 512],
    initializer=uniform_initializer)
    
english_embedding = tf.nn.embedding_lookup(enlish_word_emb, english_input)
french_embedding = tf.nn.embedding_lookup(french_word_emb, french_input)


In [8]:
def encoder(words, lengths, embedding, size):
    cell_fw = tf.contrib.rnn.LSTMCell(size, initializer=uniform_initializer)
    cell_bw = tf.contrib.rnn.LSTMCell(size, initializer=uniform_initializer)
    _outputs, (state_bw, state_fw) = tf.nn.bidirectional_dynamic_rnn(cell_fw, 
                                                 cell_bw, 
                                                 embedding, 
                                                 sequence_length=lengths,
                                                 dtype=tf.float32)
    outputs = tf.concat((state_bw.h, state_fw.h), 1)
    return outputs

with tf.variable_scope('english_encoder'):
    english_encoder = encoder(english_input, english_length, english_embedding, 512) 
    
with tf.variable_scope('french_encoder') as scope:
    french_encoder = encoder(french_input, french_length, french_embedding, 512)  
    scope.reuse_variables()
    french_encoder_neg = encoder(french_input_neg, french_length_neg, french_embedding, 512)  
    

In [9]:
with tf.name_scope('pairwise_distance'):
    pairwise_distance = tf.reduce_sum(tf.square(english_encoder), 1, keep_dims=True) +\
        tf.transpose(tf.reduce_sum(tf.square(french_encoder), 1, keep_dims=True)) -\
        2. * tf.matmul(english_encoder, tf.transpose(french_encoder))

In [10]:
with tf.name_scope('loss'):
    with tf.name_scope('triplet_loss'):
        triplet_loss = tf.nn.relu(tf.reduce_sum(tf.square(english_encoder - french_encoder), 1) + 0.2 -\
                       tf.reduce_sum(tf.square(english_encoder - french_encoder_neg), 1))
    with tf.name_scope('distance_loss'):
        dist_loss = tf.reduce_sum(tf.square(english_encoder - french_encoder), 1)
    loss = tf.reduce_mean(triplet_loss + dist_loss)

In [11]:
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-2)
    grads, params = zip(*optimizer.compute_gradients(loss))
    clipped_grads, _ = tf.clip_by_global_norm(grads, 20.)
    train_op = optimizer.apply_gradients(zip(clipped_grads, params))

In [12]:
tf.summary.scalar('loss', loss)
merged = tf.summary.merge_all()

In [13]:
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)

In [None]:
train_writer = tf.summary.FileWriter('./logs', sess.graph)

In [None]:
epochs = 10
for epoch in range(epochs):
    print('Epoch %d' % (epoch + 1))
    total_loss = 0.
    iters = 0
    for e_x, e_length, f_x, f_length in dataset.iterate_minibatches(128):
        
        feed_dict = {
            english_input: e_x,  
            english_length: e_length,
            french_input: f_x,
            french_length: f_length
        }

        distances = sess.run(pairwise_distance, feed_dict=feed_dict)
        np.fill_diagonal(distances, np.inf)
        perm = np.argmin(distances, axis=1)
        
        feed_dict = {
            english_input: e_x,  
            english_length: e_length,
            french_input: f_x,
            french_length: f_length,
            french_input_neg: f_x[perm],
            french_length_neg: f_length[perm]
        }
        _, c, summary = sess.run([train_op, loss, merged], feed_dict=feed_dict)
        train_writer.add_summary(summary, iters)
        total_loss += c
        iters += 1
    print('Average loss on train: %.3f' % (total_loss / iters))
    total_loss = 0.

N/A% (0 of 38919420) |                   | Elapsed Time: 0:00:00 ETA:  --:--:--

Epoch 1


 33% (12936960 of 38919420) |####          | Elapsed Time: 0:25:12 ETA: 0:50:38