In [None]:
import numpy as np
import time
import tensorflow as tf

# Initialization

In [None]:
config = {
    'x_width': 256,
    'x_height': 40,
    'num_channels': 1,
    'dropout': [0.1, 0.1],
    'embedding_size': 256,
    'batch_size': 256,
    'positives_per_batch_count': 40,
    'triplets_count': 1024,
    'masked_count': 4,
    'alpha_margin': 0.19,
    'learning_rate': 2e-5,

    'tfrecords_train': './TF_Records_File_train.tfrecords',
    'tfrecords_cv': './TF_Records_File_cv.tfrecords',

    'model_path': './trained_model.ckpt',
    'log_path': './train_log.csv'
}

# Prepare data

In [None]:
def batch_generator():
    # each batch contains an anchor x at index 0 followed by
    # config["positives_per_batch_count"] positives followed by
    # negatives.
    
     ### ==> TODO: You need to write this function for your case to use this code
    train_ratio = .7
    iters = 1e5

    for _iter in range(iters):
        batch_data = np.zeros([config['batch_size'],
                               config['num_channels'],
                               config['x_height'],
                               config['x_width']], dtype = np.float32)

        batch_data[0, :, :, :] = #anchor_data        
        batch_data[1:(config['positives_per_batch_count'] + 1), :, :, :] = #positive_data
        batch_data[(config['positives_per_batch_count'] + 1):, :, :, :] = #negative_data
        
        if i <= np.int(train_ratio * iters):
            batch_type = 'train'
        else:
            batch_type = 'cv'
        yield np.array(batch_data).tobytes(), batch_type

In [None]:
 def write_to_tfrecord():
    # write batches to tfrecords file
    data_generator = batch_generator()
    writer_train = tf.python_io.TFRecordWriter(config['tfrecords_train'])
    writer_cv = tf.python_io.TFRecordWriter(config['tfrecords_cv'])
    train_counter = 0
    cv_counter = 0
    try:
        while(1):
            batch, batch_type = next(data_generator)
            feature = tf.train.Feature(bytes_list = tf.train.BytesList(value = [batch]))
            feature_dict = {'batch':
                tf.train.Feature(bytes_list = tf.train.BytesList(value = [batch]))}
            example = tf.train.Example(features = tf.train.Features(feature = feature_dict))
            if batch_type == 'train':
                train_counter += 1
                writer_train.write(example.SerializeToString())
                print('>>>>>> train: ', train_counter, end = '\r')
            else:
                cv_counter += 1
                writer_cv.write(example.SerializeToString())
                print('>>>>>> cv: ', cv_counter, end = '\r')
    except:
        print('')
    finally:
        print('total_number of trained records:', train_counter)
        print('total_number of cv records:', cv_counter)
        writer_train.close()
        writer_cv.close()

In [None]:
def read_from_tfrecord(tfrecord_file, queue_size, num_threads, min_capacity):
    # read batch from tfrecords file
    with tf.variable_scope('Queue_Batch_Shuffle', reuse = False) as scope:
        tfrecord_file_queue = tf.train.string_input_producer(tfrecord_file, name = 'queue')
        reader = tf.TFRecordReader()
        _, tfrecord_serialized = reader.read(tfrecord_file_queue)
        tfrecord_features = tf.parse_single_example(tfrecord_serialized,
                    features = {'batch': tf.FixedLenFeature([], tf.string)},
                    name = 'features')

        batch_data = tf.decode_raw(tfrecord_features['batch'], tf.float32)
        batch_data = tf.reshape(batch_data, [config['batch_size'],
                                             config['num_channels'],
                                             config['x_height'],
                                             config['x_width']])

        batch_data_shuffled = tf.train.shuffle_batch([batch_data],
                                                      batch_size = 1,
                                                      capacity = queue_size,
                                                      num_threads = num_threads,
                                                      min_after_dequeue = min_capacity)
        return batch_data_shuffled[0]

In [None]:
def triplet_list_maker():
    # make a list of <anchor_index, positive_index, negativeIndex> triplets
    # of size config['triplets_count']
    triplets = []
    negatives_per_batch_count = config['batch_size'] - config['positives_per_batch_count']
    positives_index = list(range(config['positives_per_batch_count']))
    np.random.shuffle(positives_index)
    for positive_idx in positives_index:
        pos_idx = positive_idx + 1
        negatives_index = list(range(negatives_per_batch_count))
        np.random.shuffle(negatives_index)
        for negative_idx in negatives_index:
            neg_idx = negative_idx + 1 + config['positives_per_batch_count']
            triplets.append([0, pos_idx, neg_idx])
            if len(triplets) == config['triplets_count']:
                result = np.array(triplets)
                np.random.shuffle(result)
                return result

# Build Model

In [None]:
def build_model(x, dropout_rate = [0.1, 0.05], is_training = True, print_summary = False):
    # build the CNN model
    print('Build model...')
    with tf.variable_scope('Model', reuse = False) as scope:
        # block 1
        conv1 = tf.layers.conv2d(x, 32, [3, 5],
                                 strides = [1, 1],
                                 data_format = 'channels_first',
                                 padding = 'same',
                                 name = 'conv1')
        batch_norm1 = tf.layers.batch_normalization(conv1, training = is_training,
                                                    axis = 1, name = 'batch_norm1')
        relu1 = tf.nn.relu(batch_norm1, name = 'relu1')
        max_pool1 = tf.layers.max_pooling2d(relu1, [2, 2],
                                            strides = [2, 2],
                                            data_format = 'channels_first',
                                            padding = 'valid',
                                            name = 'max_pool1')

        # block 2
        conv2 = tf.layers.conv2d(max_pool1, 64, [2, 5],
                                 strides = [1, 1],
                                 data_format = 'channels_first',
                                 padding = 'same',
                                 name = 'conv2')
        batch_norm2 = tf.layers.batch_normalization(conv2, training = is_training,
                                                    axis = 1, name = 'batch_norm2')
        relu2 = tf.nn.relu(batch_norm2, name = 'relu2')
        max_pool2 = tf.layers.max_pooling2d(relu2, [2, 2],
                                            strides = [2, 2],
                                            data_format = 'channels_first',
                                            padding = 'valid',
                                            name = 'max_pool2')

        # block 3
        conv3 = tf.layers.conv2d(max_pool2, 128, [3, 3],
                                 strides = [1, 1],
                                 data_format = 'channels_first',
                                 padding = 'same',
                                 name = 'conv3')
        batch_norm3 = tf.layers.batch_normalization(conv3, training = is_training,
                                                    axis = 1, name = 'batch_norm3')
        relu3 = tf.nn.relu(batch_norm3, name = 'relu3')
        max_pool3 = tf.layers.max_pooling2d(relu3, [2, 2],
                                            strides = [2, 2],
                                            data_format = 'channels_first',
                                            padding = 'valid',
                                            name = 'max_pool3')

        # block 4
        conv4 = tf.layers.conv2d(max_pool3, 256, [2, 2],
                                 strides = [1, 1],
                                 data_format = 'channels_first',
                                 padding = 'same',
                                 name = 'conv4')
        batch_norm4 = tf.layers.batch_normalization(conv4, training = is_training,
                                                    axis = 1, name = 'batch_norm4')
        relu4 = tf.nn.relu(batch_norm4, name = 'relu4')
        max_pool4 = tf.layers.max_pooling2d(relu4, [2, 2],
                                            strides = [2, 2],
                                            data_format = 'channels_first',
                                            padding = 'valid',
                                            name = 'max_pool4')
        dropout4 = tf.layers.dropout(max_pool4, dropout_rate[0], name = 'dropout4')

        # block 5
        flatten_length = dropout4.get_shape().as_list()[1] * \
                         dropout4.get_shape().as_list()[2] * \
                         dropout4.get_shape().as_list()[3]
        flatten5 = tf.reshape(dropout4, (-1, flatten_length), name = 'flatten5')
        fc5 = tf.layers.dense(flatten5, 1024, name = 'fc5')
        batch_norm5 = tf.layers.batch_normalization(fc5, training = is_training,
                                                    name = 'batch_norm5')
        relu5 = tf.nn.relu(batch_norm5, name = 'relu5')

        # block 6
        fc6 = tf.layers.dense(relu5, 1024, name = 'fc6')
        batch_norm6 = tf.layers.batch_normalization(fc6, training = is_training,
                                                    name = 'batch_norm6')
        relu6 = tf.nn.relu(batch_norm6, name = 'relu6')

        # block 7
        fc7 = tf.layers.dense(relu6, config['embedding_size'], name = 'fc7')
        batch_norm7 = tf.layers.batch_normalization(fc7, training = is_training,
                                                    name = 'batch_norm7')
        relu7 = tf.nn.relu(batch_norm7, name = 'relu7')
        dropout7 = tf.layers.dropout(relu7, dropout_rate[1], name = 'dropout7')
        l27 = tf.nn.l2_normalize(fc7, 1, name ='l27')

        # block 8
        fc8 = tf.layers.dense(dropout7, config['embedding_size'], name = 'fc8')
        l28 = tf.nn.l2_normalize(fc8, 1, name ='l28')

        assert fc8.get_shape()[1] == config['embedding_size']
        if print_summary:
            print('Model summary:\n x: %s\n' \
                  ' conv1: %s\n max_pool1: %s\n' \
                  ' conv2: %s\n max_pool2: %s\n' \
                  ' conv3: %s\n max_pool3: %s\n' \
                  ' conv4: %s\n max_pool4 %s\n' \
                  ' flatten5 %s\n fc5 %s\n' \
                  ' fc6 %s\n fc7 %s\n' \
                  ' fc8 %s\n' %(x.get_shape(),
                                conv1.get_shape(), max_pool1.get_shape(),
                                conv2.get_shape(), max_pool2.get_shape(),
                                conv3.get_shape(), max_pool3.get_shape(),
                                conv4.get_shape(), max_pool4.get_shape(),
                                flatten5.get_shape(), fc5.get_shape(),
                                fc6.get_shape(), fc7.get_shape(), fc8.get_shape()))
        return l27, l28

In [None]:
def optimizer(loss):
    # model optimizer
     with tf.variable_scope('Optimizer', reuse = False) as scope:
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            all_vars = tf.trainable_variables()
            model_vars = [var for var in all_vars if var.name.startswith('Model')]

            optimizer = tf.train.AdamOptimizer(learning_rate = config['learning_rate']).minimize(loss, var_list = model_vars)
            return optimizer

In [None]:
def prepare_triplet_embeddings(triplets, embeddings):
    # prepare triplet embeddings
    # triplets: (anchor_index, positive_index, negative_index)

    with tf.variable_scope('Embeddings', reuse = False) as scope:
        anchor_embeddings = tf.gather(embeddings, triplets[:, 0])
        positive_embeddings = tf.gather(embeddings, triplets[:, 1])        
        negative_embeddings = tf.gather(embeddings, triplets[:, 2])

        return anchor_embeddings, positive_embeddings, negative_embeddings

In [None]:
def triplet_loss_acc(triplet_embeddings1, triplet_embeddings2, mask, threshold):
    # triplet loss that is being minimized:
    # loss = reduce_mean(l2_norm_squared(f(x_a), f(x_p)) -
    #                    l2_norm_squared(f(x_a), f(x_n)) +
    #                    alpha)

    anchor_embeddings1, positive_embeddings1, nagative_embeddings1 = triplet_embeddings1
    anchor_embeddings2, positive_embeddings2, nagative_embeddings2 = triplet_embeddings2

    with tf.variable_scope('Loss', reuse = False) as scope:
        ap_dist2 = tf.reduce_sum(tf.square(anchor_embeddings2 - positive_embeddings2), axis = -1)
        an_dist2 = tf.reduce_sum(tf.square(anchor_embeddings2 - nagative_embeddings2), axis = -1)

        flags = tf.cast(tf.greater(an_dist2, ap_dist2), tf.float32)
        flags = tf.maximum(flags, mask) # let a few ap > an cases skip and be used in training (for exploration)
        base_loss2  = (ap_dist2 - an_dist2 + config['alpha_margin']) * flags
        loss2 = tf.reduce_sum(tf.maximum(base_loss2, 0))

        ap_dist1 = tf.reduce_sum(tf.square(anchor_embeddings1 - positive_embeddings1), axis = -1)
        an_dist1 = tf.reduce_sum(tf.square(anchor_embeddings1 - nagative_embeddings1), axis = -1)
        base_loss1  = (ap_dist1 - an_dist1 + config['alpha_margin']) * flags
        loss1 = tf.reduce_sum(tf.maximum(base_loss1, 0))

        loss = loss1 + loss2

        ap_acc = tf.reduce_mean(tf.cast(tf.greater(threshold, tf.sqrt(ap_dist2)), tf.float32))
        an_acc = tf.reduce_mean(tf.cast(tf.greater(tf.sqrt(an_dist2), threshold), tf.float32))
        triplets_used = tf.reduce_sum(flags)

        return loss, loss2, ap_acc, an_acc, triplets_used

# Training

In [None]:
train_batches_count = 1234567 # put your number here
cv_batches_count = 12345 # put your number here

In [None]:
def save_model_on_imporvemnet(file_path, sess, cv_loss, cv_losses):
  #  save model when there is improvemnet in cv_loss value
    if cv_losses == [] or cv_loss < np.min(cv_losses):
        # save the entire model
        saver = tf.train.Saver(max_to_keep = 1)
        saver.save(sess, file_path)
        print('Model saved')

    print('')

In [None]:
def log_loss(file_path, epoch, train_loss, cv_loss, log_mode = 'a'):
    # log train and cv losses
    mode = log_mode if epoch == 0 else 'a'

    with open(file_path, mode) as f:
        if mode == 'w':
            header = 'epoch, train_loss, cv_loss\n'
            f.write(header)

        line = '%d, %f, %f\n' %(epoch, train_loss, cv_loss)
        f.write(line)

In [None]:
def cool_down_mask(epoch):
    if epoch > 0 and epoch % 20 == 0:
        if config['masked_count'] >= 1:
            config['masked_count'] -= 1

In [None]:
def decrease_learning_rate(epoch):
    #if epoch > 0 and epoch % 10 == 0:
    #    if config['learning_rate'] > 0:
    #        config['learning_rate'] = config['learning_rate'] * (1 - 1e-5)
    pass # this function does nothing

In [None]:
def build_mask(size, num_ones):
    mask = np.zeros(size, dtype = np.float32)
    index = list(range(size))
    np.random.shuffle(index)

    for idx in index[:num_ones]:
        mask[idx] = 1.0
    return mask

In [None]:
def get_placeholders_tensors():
    # get model's placeholders and tensors
    x = tf.placeholder(tf.float32, name = 'x', shape = [None,
                                                        config['num_channels'],
                                                        config['x_height'],
                                                        config['x_width']])
    dropout_rate = tf.placeholder(tf.float32, name = 'dropout_rate', shape = [2])
    is_training = tf.placeholder(tf.bool, name = 'is_training')
    triplets = tf.placeholder(tf.int32, name = 'triplets', shape = [None, 3])
    same_threshold = tf.placeholder(tf.float32, name = 'same_threshold')
    mask = tf.placeholder(tf.float32, name='mask')
    

    embeddings1, embeddings2 = build_model(x, dropout_rate, is_training, print_summary = True)

    triplet_embedings1 = prepare_triplet_embeddings(triplets, embeddings1)
    triplet_embedings2 = prepare_triplet_embeddings(triplets, embeddings2)
    loss, loss2, ap_acc, an_acc, triplets_used  = triplet_loss_acc(triplet_embedings1, triplet_embedings2, mask, same_threshold)
    optim = optimizer(loss)

    placeholders_tensors = {'x': x,
                            'dropout_rate': dropout_rate,
                            'is_training': is_training,
                            'triplets': triplets,
                            'embeddings1': embeddings1,
                            'embeddings2': embeddings2,
                            'same_threshold': same_threshold,
                            'mask': mask,
                            'optimizer': optim,
                            'loss': loss2,
                            'ap_acc': ap_acc,
                            'an_acc': an_acc,
                            'triplets_used': triplets_used}
    return placeholders_tensors

In [None]:
def train_per_batch(sess, batch, placeholders_tensors, epoch, threshold):
    # train_per_batch and get train loss
    tmp_loss, tmp_ap_acc, tmp_an_acc, tmp_triplets_used = [], [], [], []
    t_total = 0
    bad_batch_counter = 0

    cool_down_mask(epoch)
    decrease_learning_rate(epoch)
    for iteration in range(train_batches_count):
        t_start = time.time()
        batch_x = sess.run(batch)
        batch_triplets = triplet_list_maker()
        train_mask = build_mask(config['triplets_count'], config['masked_count'])
        feed_dictionary = {placeholders_tensors['x']: batch_x,
                           placeholders_tensors['dropout_rate']: config['dropout'],
                           placeholders_tensors['is_training']: True,
                           placeholders_tensors['triplets']: batch_triplets,
                           placeholders_tensors['same_threshold']: threshold,
                           placeholders_tensors['mask']: train_mask}

        sess.run(placeholders_tensors['optimizer'], feed_dict = feed_dictionary)
        train_loss = sess.run(placeholders_tensors['loss'], feed_dict = feed_dictionary)
        train_ap_acc = sess.run(placeholders_tensors['ap_acc'], feed_dict = feed_dictionary)
        train_an_acc = sess.run(placeholders_tensors['an_acc'], feed_dict = feed_dictionary)
        train_triplets_used = sess.run(placeholders_tensors['triplets_used'], feed_dict = feed_dictionary)  

        tmp_loss.append(train_loss)
        tmp_ap_acc.append(train_ap_acc)
        tmp_an_acc.append(train_an_acc)
        tmp_triplets_used.append(train_triplets_used)

        if int(train_triplets_used) != config ['triplets_count']:
            bad_batch_counter += 1
        t_total += (time.time() - t_start)

        print(' '*200, end = '\r')
        print('epoch: %d, time: %f | train_loss: %f |' \
              ' ap_acc: %f | an_acc: %f | triplets_used: %d | bads: %d' %(epoch, t_total, train_loss,
                                                               train_ap_acc, train_an_acc,
                                                               int(train_triplets_used), bad_batch_counter),
                                                               end = '\r')
    train_loss = np.mean(tmp_loss)
    train_ap_acc = np.mean(tmp_ap_acc)
    train_an_acc = np.mean(tmp_an_acc)
    train_triplets_used = np.mean(tmp_triplets_used)
    print(' '*200, end = '\r')
    print('epoch: %d, time: %f | train_loss: %f |' \
          ' ap_acc: %f | an_acc: %f | triplets_used: %d | bads: %d\n' %(epoch, t_total, train_loss,
                                                                       train_ap_acc, train_an_acc,
                                                                       int(train_triplets_used), bad_batch_counter),
                                                                       end = '\r')
    return train_loss

In [None]:
def cv_per_batch(sess, batch, placeholders_tensors, epoch, threshold):
    # cross-validate per batch and get cv loss and accuracy
    tmp_loss, tmp_ap_acc, tmp_an_acc, tmp_triplets_used = [], [], [], []
    t_total = 0
    bad_batch_counter = 0

    for iteration in range(cv_batches_count):
        t_start = time.time()
        batch_x = sess.run(batch)
        batch_triplets = triplet_list_maker()
        test_mask = build_mask(config['triplets_count'], config['masked_count'])
        cv_feed_dictionary = {placeholders_tensors['x']: batch_x,
                              placeholders_tensors['dropout_rate']: [0.0, 0.0],
                              placeholders_tensors['is_training']: False,
                              placeholders_tensors['triplets']: batch_triplets,
                              placeholders_tensors['same_threshold']: threshold,
                              placeholders_tensors['mask']: test_mask}
        
        cv_loss = sess.run(placeholders_tensors['loss'], feed_dict = cv_feed_dictionary)
        cv_ap_acc = sess.run(placeholders_tensors['ap_acc'], feed_dict = cv_feed_dictionary)
        cv_an_acc = sess.run(placeholders_tensors['an_acc'], feed_dict = cv_feed_dictionary)
        cv_triplets_used = sess.run(placeholders_tensors['triplets_used'], feed_dict = cv_feed_dictionary)

        tmp_loss.append(cv_loss)
        tmp_ap_acc.append(cv_ap_acc)
        tmp_an_acc.append(cv_an_acc)
        tmp_triplets_used.append(cv_triplets_used)

        if int(cv_triplets_used) != config['triplets_count']:
            bad_batch_counter += 1
        t_total += (time.time() - t_start)

        print(' '*200, end = '\r')
        print('time: %f | cv_loss: %f |' \
              ' cv_ap_acc: %f | cv_an_acc: %f | cv_triplets_used: %d | cv_bads: %d' %(t_total, cv_loss,
                                                                                      cv_ap_acc, cv_an_acc,
                                                                                      int(cv_triplets_used),
                                                                                      bad_batch_counter), end = '\r')
    cv_loss = np.mean(tmp_loss)
    cv_ap_acc = np.mean(tmp_ap_acc)
    cv_an_acc = np.mean(tmp_an_acc)
    cv_triplets_used = np.mean(tmp_triplets_used)
    print(' '*200, end = '\r')
    print('time: %f | cv_loss: %f |' \
          ' cv_ap_acc: %f | cv_an_acc: %f | cV_triplets_used: %d | cV_bads: %d' %(t_total, cv_loss,
                                                                                  cv_ap_acc, cv_an_acc,
                                                                                  int(cv_triplets_used),
                                                                                  bad_batch_counter))
    return cv_loss

In [None]:
def train_model(epochs, init_epoch, queue_size, num_threads, resume, threshold):
    # train model
    train_losses, cv_losses = [], []

    tf.reset_default_graph()
    placeholders_tensors = get_placeholders_tensors()
    train_batch  = read_from_tfrecord([config['tfrecords_train']], queue_size, num_threads,
                                      np.int(0.5 * queue_size))
    cv_batch  = read_from_tfrecord([config['tfrecords_cv']], 50, 2, 10)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess = sess, coord = coord)

        if resume:
            print('loading weights....')
            saver = tf.train.Saver()
            saver.restore(sess, (config['model_path']))  # to load the best saved model so far replace
                                                              # "curr_model_path" with "model_path"

            # load loss and accuracy so that less accurate model
            # won't be saved after resume
            tmp = np.genfromtxt(config['log_path'], delimiter = ',', names = True)
            train_losses = list(tmp['train_loss'])
            cv_losses = list(tmp['cv_loss'])
            del tmp
        else:
            print('initializing weights....')
            init_op =tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
            sess.run(init_op)

        print('training....')
        for epoch in range(init_epoch, epochs):
            # training
            train_loss = train_per_batch(sess,train_batch, 
                                         placeholders_tensors, epoch, threshold)
            train_losses.append(train_loss)

            #cross-validation
            cv_loss = cv_per_batch(sess, cv_batch, 
                                   placeholders_tensors, epoch, threshold)

            #save model
            save_model_on_imporvemnet(config['model_path'], sess, cv_loss, cv_losses)
            cv_losses.append(cv_loss)

            # log results
            log_loss(config['log_path'], epoch, train_loss, cv_loss,
                     log_mode = ('a' if resume else 'w'))

        coord.request_stop()
        coord.join(threads)
        return train_losses, cv_losses

In [None]:
train_model(epochs = 1000, init_epoch = 8,
            queue_size = 500, num_threads = 10,
            resume = False, threshold = 0.84)