In [1]:
import tensorflow as tf

In [2]:

def triplet_loss(y_pred, alpha=0.2):
    """
    Implementation of the triplet loss as defined by formula (3)

    Arguments:
    y_true -- true labels, required when you define a loss in Keras, you don't need it in this function.
    y_pred -- python list containing three objects:
            anchor -- the encodings for the anchor images, of shape (None, 128)
            positive -- the encodings for the positive images, of shape (None, 128)
            negative -- the encodings for the negative images, of shape (None, 128)

    Returns:
    loss -- real number, value of the loss
    """

    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    ### START CODE HERE ### (≈ 4 lines)
    # Step 1: Compute the (encoding) distance between the anchor and the positive, you will need to sum over axis=-1
    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), axis=-1)
    # Step 2: Compute the (encoding) distance between the anchor and the negative, you will need to sum over axis=-1
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), axis=-1)
    # Step 3: subtract the two previous distances and add alpha.
    basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
    # Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples.
    loss = tf.reduce_sum(tf.nn.relu(basic_loss))
    ### END CODE HERE ###
    return loss

In [3]:
def my_fully_connected_layer(X, output_count, layer_name, encoding_vector_name, reuse_flag):
    with tf.name_scope(layer_name):
        X_flatten = tf.layers.flatten(X, name='flatten')
    W_shape_input = X_flatten.get_shape()
    W_shape = [W_shape_input[1], output_count]
    with tf.variable_scope(layer_name, reuse = reuse_flag):
        W = tf.get_variable('weight', W_shape, initializer=tf.contrib.layers.xavier_initializer())
        B = tf.get_variable('biases', output_count, initializer=tf.contrib.layers.xavier_initializer())
    with tf.name_scope(layer_name + encoding_vector_name):
        output = tf.add(tf.matmul(X_flatten, W), B)
    return output

In [4]:
def conv_layer_full(X, dropout_prob, is_training_flag, F, F_stride, M, M_stride, layer_name,
               is_batch_normalization_flag=False, is_pooling_flag=None,
               nonlinear_act=tf.nn.relu, pooling_act=tf.nn.max_pool,
               use_cudnn_on_gpu=False, reuse_convlayer_flag = False):
    """
    Reusable code for making a simple neural net layer --  convolution part
    It does a matrix multiply, bias add, and then uses relu to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.

    Arguments:
    X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f -- integer, specifying the shape of the middle CONV's window [height, width, channel_prev, channel_curr]

    Returns:
    Y -- output of this layer, tensor of shape (m, n_H, n_W, n_C)

    """
    with tf.name_scope(layer_name):

        # convolution
        with tf.variable_scope(layer_name, reuse = reuse_convlayer_flag):
            W = tf.get_variable('weight', F, initializer=tf.contrib.layers.xavier_initializer())
            B = tf.get_variable('bias', F[3], initializer=tf.zeros_initializer())
            tf.get_variable_scope().reuse_variables()

        Z = tf.add(tf.nn.conv2d(X, W, strides=F_stride, padding='SAME', use_cudnn_on_gpu=use_cudnn_on_gpu), B,
                   name='preactivation')

        # batch normalization
        # with tf.variable_scope(layer_name):
        if is_batch_normalization_flag:
            Z_batch = tf.contrib.layers.batch_norm(Z, center=False, scale=False, is_training=is_training_flag, reuse = reuse_convlayer_flag,scope = layer_name)
        else:
            Z_batch = Z

        # nonlinearity
        A = nonlinear_act(Z_batch, name='activation')

        # dropout.
        hidden_dropout = tf.nn.dropout(A, dropout_prob, name='hidden_dropout')

        # pooling.
        if is_pooling_flag:
            maxpool = pooling_act(hidden_dropout, ksize=M, strides=M_stride, padding='SAME', name='max_pooling')
        else:
            maxpool = hidden_dropout

    return maxpool


In [5]:
tf.reset_default_graph()
# you should have three X
X_1 = tf.placeholder(dtype=tf.float32, shape=[None, 5, 5, 32])
X_2 = tf.placeholder(dtype=tf.float32, shape=[None, 5, 5, 32])
X_3 = tf.placeholder(dtype=tf.float32, shape=[None, 5, 5, 32])


dropout_prob = tf.placeholder(dtype=tf.float32)
is_training_flag = tf.placeholder(dtype=tf.bool)
F=[5,5,32,64]
F_stride=[1,1,1,1]
M=[1,2,2,1]
M_stride=[1,2,2,1]
layer_name = 'L1'
output_layer_1 = conv_layer_full(X_1, dropout_prob, is_training_flag, F, F_stride, M, M_stride, layer_name,
               is_batch_normalization_flag=False, is_pooling_flag=False,
               nonlinear_act=tf.nn.relu, pooling_act=tf.nn.max_pool,
               use_cudnn_on_gpu=False, reuse_convlayer_flag = False)
output_layer_2 = conv_layer_full(X_2, dropout_prob, is_training_flag, F, F_stride, M, M_stride, layer_name,
               is_batch_normalization_flag=False, is_pooling_flag=False,
               nonlinear_act=tf.nn.relu, pooling_act=tf.nn.max_pool,
               use_cudnn_on_gpu=False, reuse_convlayer_flag = True)
output_layer_3 = conv_layer_full(X_3, dropout_prob, is_training_flag, F, F_stride, M, M_stride, layer_name,
               is_batch_normalization_flag=False, is_pooling_flag=False,
               nonlinear_act=tf.nn.relu, pooling_act=tf.nn.max_pool,
               use_cudnn_on_gpu=False, reuse_convlayer_flag = True)

encoding1 = my_fully_connected_layer(output_layer_1, 10, 'a', 'b', False)
encoding2 = my_fully_connected_layer(output_layer_2, 10, 'a', 'c', True)
encoding3 = my_fully_connected_layer(output_layer_3, 10, 'a', 'd', True)

loss_triplet = triplet_loss([encoding1, encoding2, encoding3], alpha=0.2)

last_layer = tf.contrib.layers.flatten(output_layer_1)
logits = tf.contrib.layers.fully_connected(last_layer, 7, activation_fn=None)
ground_truth_labels = tf.placeholder(dtype=tf.int64, shape=[None])
cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=ground_truth_labels)
curr = tf.get_default_graph()


In [19]:
vec = [1,2, 3]
v1, v2, v3 = vec

In [22]:
v3

3

In [6]:
b = curr.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='a')
a = curr.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'L1')

In [7]:
b

[<tf.Variable 'a/weight:0' shape=(1600, 10) dtype=float32_ref>,
 <tf.Variable 'a/biases:0' shape=(10,) dtype=float32_ref>]

In [8]:
a

[<tf.Variable 'L1/weight:0' shape=(5, 5, 32, 64) dtype=float32_ref>,
 <tf.Variable 'L1/bias:0' shape=(64,) dtype=float32_ref>]

In [11]:
trainable_variable = curr.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

In [10]:
with tf.name_scope('train_conv'):
    train_step_conv = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy_mean, var_list = a)
with tf.name_scope('train_encoding'):
    train_step_encoding = tf.train.AdamOptimizer(0.01).minimize(loss_triplet, var_list = b)
with tf.name_scope('train_everything'):
    train_step_encoding_full = tf.train.AdamOptimizer(0.01).minimize(loss_triplet)

In [12]:
## prepare some training data.
import numpy as np

In [13]:
np.random.seed(0)
tf.set_random_seed(0)
batch_size = 7
input_shape = [5,5,32]

X_1_value = np.random.randn(batch_size, input_shape[0], input_shape[1], input_shape[2])
X_2_value = np.random.randn(batch_size, input_shape[0], input_shape[1], input_shape[2])
X_3_value = np.random.randn(batch_size, input_shape[0], input_shape[1], input_shape[2])

dropout_prob_value = 0.5
is_training_flag_value = 1
ground_truth_labels_value =np.array(range(7))


In [14]:
sess = tf.InteractiveSession()
initializer = tf.global_variables_initializer()
sess.run(initializer)


In [15]:
trainable_variable_value = sess.run(trainable_variable)
print('W1 %f' % trainable_variable_value[0][0,0,0,0])
print('B1 %f' % trainable_variable_value[1][0])
print('Wa %f' %  trainable_variable_value[2][0,0])
print('Ba %f' %  trainable_variable_value[3][0])
print('W_fully_connected %f' %  trainable_variable_value[4][0,0])
print('B_fully_connected %f' %  trainable_variable_value[5][0])

W1 0.031802
B1 0.000000
Wa 0.022093
Ba 0.061073
W_fully_connected -0.021150
B_fully_connected 0.000000


In [16]:
X_input_feeding_dict = {X_1: X_1_value, 
                        dropout_prob: dropout_prob_value, 
                        is_training_flag: is_training_flag_value,
                        ground_truth_labels: ground_truth_labels_value}

In [17]:
trainable_variable_value = sess.run(trainable_variable)
print('W1 %f' % trainable_variable_value[0][0,0,0,0])
print('B1 %f' % trainable_variable_value[1][0])
print('Wa %f' %  trainable_variable_value[2][0,0])
print('Ba %f' %  trainable_variable_value[3][0])
print('W_fully_connected %f' %  trainable_variable_value[4][0,0])
print('B_fully_connected %f' %  trainable_variable_value[5][0])
sess.run(train_step_conv, feed_dict=X_input_feeding_dict)
trainable_variable_value = sess.run(trainable_variable)
print('W1 %f' % trainable_variable_value[0][0,0,0,0])
print('B1 %f' % trainable_variable_value[1][0])
print('Wa %f' %  trainable_variable_value[2][0,0])
print('Ba %f' %  trainable_variable_value[3][0])
print('W_fully_connected %f' %  trainable_variable_value[4][0,0])
print('B_fully_connected %f' %  trainable_variable_value[5][0])

W1 0.031802
B1 0.000000
Wa 0.022093
Ba 0.061073
W_fully_connected -0.021150
B_fully_connected 0.000000
W1 0.031840
B1 0.001077
Wa 0.022093
Ba 0.061073
W_fully_connected -0.021150
B_fully_connected 0.000000


In [18]:
X_input_feeding_dict = X_input_feeding_dict = {X_1: X_1_value, 
                                               X_2: X_2_value,
                                               X_3: X_3_value,
                        dropout_prob: dropout_prob_value, 
                        is_training_flag: is_training_flag_value,
                        ground_truth_labels: ground_truth_labels_value}
trainable_variable_value = sess.run(trainable_variable)
print('W1 %f' % trainable_variable_value[0][0,0,0,0])
print('B1 %f' % trainable_variable_value[1][0])
print('Wa %f' %  trainable_variable_value[2][0,0])

print('Ba %f' %  trainable_variable_value[3][0])
print('W_fully_connected %f' %  trainable_variable_value[4][0,0])
print('B_fully_connected %f' %  trainable_variable_value[5][0])
sess.run(train_step_encoding, feed_dict=X_input_feeding_dict)
trainable_variable_value = sess.run(trainable_variable)
print('W1 %f' % trainable_variable_value[0][0,0,0,0])
print('B1 %f' % trainable_variable_value[1][0])
print('Wa %f' %  trainable_variable_value[2][0,0])
print('Ba %f' %  trainable_variable_value[3][0])
print('W_fully_connected %f' %  trainable_variable_value[4][0,0])
print('B_fully_connected %f' %  trainable_variable_value[5][0])

W1 0.031840
B1 0.001077
Wa 0.022093
Ba 0.061073
W_fully_connected -0.021150
B_fully_connected 0.000000
W1 0.031840
B1 0.001077
Wa 0.032093
Ba 0.058335
W_fully_connected -0.021150
B_fully_connected 0.000000


In [28]:
train_step_encoding

<tf.Operation 'train_everything/Adam' type=NoOp>