# DMS with QL

https://github.com/AdrianUng/keras-triplet-loss-mnist/blob/master/Triplet_loss_KERAS_semi_hard_from_TF.ipynb

In [3]:
import keras
# from __future__ import print_function, division
import re
import numpy as np
import pandas as pd

import os
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import sys
from annoy import AnnoyIndex
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
    
# %matplotlib inline

from keras.layers import Conv1D, Input, Add, Activation, Dropout, Embedding, MaxPooling1D, \
    GlobalMaxPool1D, Flatten, Dense, Concatenate, BatchNormalization
from keras.models import Sequential, Model
from keras.regularizers import l2
from keras.initializers import TruncatedNormal
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import optimizers

from methods.baseline import Baseline
from methods.experiments import Experiment
from methods.evaluation import Evaluation
from methods.retrieval import Retrieval

import os
from keras_bert import load_vocabulary
import random

from keras.constraints import MaxNorm
from keras.initializers import TruncatedNormal, RandomUniform
from keras.layers import GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.layers import Dense, Input, LSTM, GRU, Dropout, Bidirectional, GlobalAveragePooling1D, TimeDistributed

import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import dtypes

from keras.layers import concatenate, Add, Lambda, merge, Average, Maximum
from keras.optimizers import Adam, Nadam

from keras.layers import Layer
from keras import backend as K
import _pickle as pickle

## Configurações Globais

In [6]:
MAX_SEQUENCE_LENGTH_T = 20 # 100
MAX_SEQUENCE_LENGTH_D = 20 # 500
EMBEDDING_DIM = 300
MAX_NB_WORDS = 20000
'''
    Configuration
'''
epochs = int(os.environ['epochs'])
freeze_train = .1 # 10% with freeze weights
best_loss = 1
best_epoch = 0
verbose = 0
loss = 1

### Parse bugs preproprecessed

In [7]:
# Domain to use
DOMAIN = os.environ['base']
METHOD = 'DMS_QL_{}'.format(epochs)
PREPROCESSING = 'bert'
TOKEN = 'bert'
# Dataset paths
DIR = 'data/processed/{}/{}'.format(DOMAIN, PREPROCESSING)
DIR_PAIRS = 'data/normalized/{}'.format(DOMAIN)
DATASET = os.path.join('data/normalized/{}'.format(DOMAIN), '{}.csv'.format(DOMAIN))
# Glove embeddings
GLOVE_DIR='data/embed'
# Save model
SAVE_PATH = '{}_preprocessing_{}_feature@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)
SAVE_PATH_FEATURE = '{}_preprocessing_{}_feature_@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)

# Extract CORPUs
EXTRACT_CORPUS = False

In [None]:
print("*********")
print("{} for {} epochs in {}".format(METHOD, epochs, DOMAIN))
print("*********")

In [8]:
pretrained_path = 'uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
model_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')

In [9]:
token_dict = load_vocabulary(vocab_path)

In [10]:
baseline = Baseline(DOMAIN, DIR, DATASET, MAX_SEQUENCE_LENGTH_T, MAX_SEQUENCE_LENGTH_D,
                   token_dict['[CLS]'], token_dict['[SEP]'])
evaluation = Evaluation(verbose=0)
retrieval = Retrieval()
experiment = Experiment(baseline, evaluation)

In [11]:
experiment.set_retrieval(retrieval, baseline, DOMAIN)

#### Loading bug ids in memory

In [12]:
experiment.load_ids()
len(baseline.bug_ids)

Reading bug ids


361006

#### Dicionário de títulos e descrições

In [13]:
%%time

experiment.load_bugs(TOKEN)
len(baseline.sentence_dict)

HBox(children=(IntProgress(value=0, max=361006), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


CPU times: user 34.9 s, sys: 3.58 s, total: 38.5 s
Wall time: 37.8 s


#### Hashing bugs by buckets

In [14]:
issues_by_buckets = experiment.get_buckets_for_bugs()

HBox(children=(IntProgress(value=0, max=361006), HTML(value='')))




#### Prepare the train and test

In [15]:
# path_train='train_chronological', path_test='test_chronological'
experiment.prepare_dataset(issues_by_buckets, path_train='train_chronological', path_test='test_chronological')
# Read and create the test queries duplicates
retrieval.create_queries()

In [16]:
baseline.train_data[:10]

[[275492, 218812],
 [288296, 264093],
 [273286, 293887],
 [57162, 62059],
 [82146, 67997],
 [56777, 61857],
 [169445, 165179],
 [250521, 273893],
 [247266, 241461],
 [36781, 38338]]

#### Recovery bug ids from train

In [17]:
bug_train_ids = experiment.get_train_ids(baseline.train_data)

### Export the corpus train

In [18]:
if EXTRACT_CORPUS:
    corpus = []
    export_file = open(os.path.join(DIR, 'corpus_train.txt'), 'w')
    for bug_id in tqdm(baseline.bug_set):
        bug = baseline.bug_set[bug_id]
        title = bug['title']
        desc = bug['description']
        export_file.write("{}\n{}\n".format(title, desc))
    export_file.close()

# Generating tiple of batches

In [19]:
idx = np.random.choice(baseline.bug_ids, 1)[0]
baseline.bug_set[idx]

{'bug_severity': '2\n',
 'bug_status': '2\n',
 'component': '176\n',
 'creation_ts': '2011-08-10 04:47:00 -0400',
 'delta_ts': '2011-09-05 06:47:02 -0400',
 'description': '[CLS] hello we have a bug ##zi ##lla " bug 351 ##70 ##9 " for creating a snaps ##hot repository for the w ##tp - inc ##uba ##tor service interface editor project with the project \' s hudson build . we have updated the project \' s repository po ##ms to extract the update site at " / home / data / http ##d / download . eclipse . org / web ##to ##ols / inc ##uba ##tor / repository / si ##ed ##itor / snaps ##hot ##s " , but as expected such directory does not exists . could you help us create it , and apply any additional steps if needed to create the repository ? best regards , dim ##ita ##r ff ##or more information : original bug ##iz ##lla : https : / / bugs . eclipse . org / bugs / show _ bug . c ##gi ? id = 351 ##70 ##9 hudson build job : https : / / hudson . eclipse . org / hudson / view / w ##tp / job / cb ##i 

### Generating the batch test

In [20]:
"Train ", len(baseline.dup_sets_train)

('Train ', 39339)

In [21]:
# data - path
# batch_size - 128
# n_neg - 1
def batch_iterator(self, retrieval, model, data, dup_sets, bug_ids, 
                   batch_size, n_neg, issues_by_buckets, TRIPLET_HARD=False, FLOATING_PADDING=False):
    # global train_data
    # global self.dup_sets
    # global self.bug_ids
    # global self.bug_set

    random.shuffle(data)

    batch_features = {'title' : [], 'desc' : [], 'info' : []}

    n_train = len(data)

    batch_triplets, batch_bugs_anchor, batch_bugs_pos, batch_bugs_neg, batch_bugs = [], [], [], [], []

    all_bugs = list(issues_by_buckets.keys())
    buckets = retrieval.buckets

    for offset in range(batch_size):
        anchor, pos = data[offset][0], data[offset][1]
        batch_bugs_anchor.append(anchor)
        batch_bugs_pos.append(pos)
        batch_bugs.append(anchor)
        batch_bugs.append(pos)
        #batch_bugs += dup_sets[anchor]

    for anchor, pos in zip(batch_bugs_anchor, batch_bugs_pos):
        while True:
            neg = self.get_neg_bug(anchor, buckets[issues_by_buckets[anchor]], issues_by_buckets, all_bugs)
            bug_anchor = self.bug_set[anchor]
            bug_pos = self.bug_set[pos]
            if neg not in self.bug_set:
                continue
            batch_bugs.append(neg)
            batch_bugs_neg.append(neg)
            bug_neg = self.bug_set[neg]
            break
        
        # triplet bug and master
        batch_triplets.append([anchor, pos, neg])
    
    random.shuffle(batch_bugs)
    
    for bug_id in batch_bugs:
        bug = self.bug_set[bug_id]
        self.read_batch_bugs(batch_features, bug)

    batch_features['title'] = np.array(batch_features['title'])
    batch_features['desc'] = np.array(batch_features['desc'])
    batch_features['info'] = np.array(batch_features['info'])
    
    sim = np.asarray([issues_by_buckets[bug_id] for bug_id in batch_bugs])

    input_sample = {}

    input_sample = { 'title' : batch_features['title'], 
                        'description' : batch_features['desc'], 
                            'info' : batch_features['info'] }

    return batch_triplets, input_sample, sim #sim

In [22]:
%%time

batch_size = 64
batch_size_test = 128

# we want a constant validation group to have a frame of reference for model performance
batch_triplets_valid, valid_input_sample, valid_sim = batch_iterator(baseline, retrieval, None, 
                                                                                      baseline.train_data, 
                                                                                      baseline.dup_sets_train,
                                                                                      bug_train_ids,
                                                                                      batch_size_test, 1,
                                                                                      issues_by_buckets)

validation_sample = [valid_input_sample['title'], 
             valid_input_sample['description'],
            valid_input_sample['info'], valid_sim]

# Categorical columns
number_of_columns_info = valid_input_sample['info'].shape[1]
# Max sequence title
MAX_SEQUENCE_LENGTH_T = valid_input_sample['title'].shape[1]
MAX_SEQUENCE_LENGTH_D = valid_input_sample['description'].shape[1]

CPU times: user 482 ms, sys: 3.99 ms, total: 486 ms
Wall time: 486 ms


In [23]:
valid_input_sample['title'].shape, valid_input_sample['description'].shape, valid_input_sample['info'].shape, valid_sim.shape

((384, 20), (384, 20), (384, 1682), (384,))

### Validar entrada

In [24]:
# %%time 

#baseline.display_batch(baseline.train_data, baseline.dup_sets_train, bug_train_ids, 5)

In [25]:
"Test ", len(baseline.test_data)

('Test ', 16995)

## Pre-trained embeddings

Loading pretrained word vectors

### Glove

In [26]:
vocab = baseline.load_vocabulary(os.path.join(DIR, 'vocab_embed.pkl'))
#print(np.random.choice(vocab, 10))
# for token in vocab:
#     print(token)

vocabulary loaded


In [27]:
"Total vocabulary: {}".format(len(vocab))

'Total vocabulary: 21175'

In [28]:
def generating_embed(baseline, GLOVE_DIR, EMBEDDING_DIM):
    embeddings_index = {}
    embed_path = os.path.join(GLOVE_DIR, 'glove.42B.300d.txt')
    
    f2 = open(embed_path, 'rb')
    num_lines = sum(1 for line in f2)
    f2.close()
    
    f = open(embed_path, 'rb')
    vocab = baseline.load_vocabulary(os.path.join(baseline.DIR, 'vocab_embed.pkl'))
    vocab_size = len(vocab) 

    # Initialize uniform the vector considering the Tanh activation
    embedding_matrix = np.random.uniform(-1.0, 1.0, (num_lines + vocab_size, EMBEDDING_DIM))
    embedding_matrix[0, :] = np.zeros(EMBEDDING_DIM)

    loop = tqdm(f)
    loop.set_description("Loading Glove")
    
    i = 0
    for line in loop:
        tokens = line.split()
        word = tokens[0]
        embed = np.asarray(tokens[1:], dtype='float32')
        embeddings_index[word] = embed
        embedding_matrix[i] = embed
        loop.update(1)
        i+=1
    f.close()
    loop.close()

    print('Total %s word vectors in Glove 42B 300d.' % len(embeddings_index))

    loop = tqdm(total=vocab_size)
    loop.set_description('Loading embedding from dataset pretrained')
    
    for word, embed in vocab.items():
        if word in embeddings_index:
            embedding_matrix[i] = embeddings_index[word]
        else:
            embedding_matrix[i] = np.asarray(embed, dtype='float32')
        loop.update(1)
        i+=1
    loop.close()
    baseline.embedding_matrix = embedding_matrix

In [29]:
%%time

generating_embed(baseline, GLOVE_DIR=GLOVE_DIR, EMBEDDING_DIM=EMBEDDING_DIM) # MAX_NB_WORDS=MAX_NB_WORDS

vocabulary loaded


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Total 1917494 word vectors in Glove 42B 300d.


HBox(children=(IntProgress(value=0, max=21175), HTML(value='')))


CPU times: user 1min 37s, sys: 3.97 s, total: 1min 41s
Wall time: 1min 39s


In [30]:
len(baseline.embedding_matrix)

1938669

## Experiment

## Towards Accurate Duplicate Bug Retrieval Using Deep Learning Techniques

https://github.com/tqtg/DuplicateBugFinder

### Embedding layer

In [31]:
# Is missing the padding_idx used in pytorch
# https://pytorch.org/docs/stable/_modules/torch/nn/modules/sparse.html
# https://stackoverflow.com/questions/54824768/rnn-model-gru-of-word2vec-to-regression-not-learning
def embedding_layer(embeddings, num_words, embedding_dim, max_sequence_length, trainable):
    embedding_layer = Embedding(num_words,
                                  embedding_dim,
                                  name='embedding_layer',
                                  weights=[embeddings],
                                  embeddings_constraint=MaxNorm(max_value=1, axis=0),
#                                   input_length=max_sequence_length,
                                  input_length=None,
                                  trainable=trainable)
    return embedding_layer

### CNN with filter 3,4,5

In [32]:
def cnn_model(embedding_layer, max_sequence_length):

    sequence_input = Input(shape=(max_sequence_length,), name='Feature_BugInput')
    #sequence_input = Input(shape=(None,), name='Feature_BugInput')
    embedded_sequences = embedding_layer(sequence_input)

    # Yoon Kim model (https://arxiv.org/abs/1408.5882)
    # best combination filter (3, 4, 5) e 128 e 256
    convs = []
    filter_sizes = [3, 4, 5]
    n_filters = 64

    for index, filter_size in enumerate(filter_sizes):
        l_conv = Conv1D(filters=n_filters, kernel_size=filter_size)(embedded_sequences)
        l_pool = MaxPooling1D(pool_size=filter_size)(l_conv) # index+1
        convs.append(l_pool)

    l_merge = Concatenate(axis=1)(convs)

    # add a 1D convnet with global maxpooling, instead of Yoon Kim model
    #conv = Conv1D(filters=n_filters * 3, kernel_size=3)(l_merge)
    layer = GlobalAveragePooling1D()(l_merge)
    #layer = Flatten()(l_merge)
    layer = Dense(300, activation='tanh')(layer)
    #layer = LeakyReLU()(layer)

    cnn_feature_model = Model(inputs=[sequence_input], outputs=[layer], name = 'FeatureCNNGenerationModel') # inputs=visible

    return cnn_feature_model

### Bi-LSTM

In [33]:
def lstm_model(embedding_layer, max_sequence_length):
    number_lstm_units = 75
    rate_drop_lstm = 0
    recurrent_dropout = 0

    sequence_input = Input(shape=(max_sequence_length, ), name='Feature_BugInput')
    #sequence_input = Input(shape=(None, ), name='Feature_BugInput')
    embedded_sequences = embedding_layer(sequence_input)

    left_layer = LSTM(number_lstm_units, return_sequences=True)(embedded_sequences)
    right_layer = LSTM(number_lstm_units, return_sequences=True, go_backwards=True)(left_layer)
    
    lstm_layer = Concatenate()([left_layer, right_layer])
    
    #lstm_layer = TimeDistributed(Dense(50))(lstm_layer)
    #layer = Flatten()(lstm_layer)
    layer = GlobalAveragePooling1D()(lstm_layer)
    layer = Dense(300, activation='tanh')(layer)

    lstm_feature_model = Model(inputs=[sequence_input], outputs=[layer], name = 'FeatureLstmGenerationModel') # inputs=visible

    return lstm_feature_model

### MLP

In [34]:
def mlp_model(input_size):
    info_input = Input(shape=(input_size, ), name='Feature_BugInput')
    input_size = 300
    
    for units in [64, 32]:
        layer = Dense(units, activation='tanh', kernel_initializer='random_uniform')(info_input)
    
    layer = Dense(input_size, activation='tanh')(info_input)
    
    mlp_feature_model = Model(inputs=[info_input], outputs=[layer], name = 'FeatureMlpGenerationModel')
    
    return mlp_feature_model

### Siamese model

In [35]:
def pairwise_distance(feature, squared=False):
    """Computes the pairwise distance matrix with numerical stability.

    output[i, j] = || feature[i, :] - feature[j, :] ||_2

    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.

    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    pairwise_distances_squared = math_ops.add(
        math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
        math_ops.reduce_sum(
            math_ops.square(array_ops.transpose(feature)),
            axis=[0],
            keepdims=True)) - 2.0 * math_ops.matmul(feature,
                                                    array_ops.transpose(feature))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
    # Get the mask where the zero distances are at.
    error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = math_ops.sqrt(
            pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)

    # Undo conditionally adding 1e-16.
    pairwise_distances = math_ops.multiply(
        pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))

    num_data = array_ops.shape(feature)[0]
    # Explicitly set diagonals to zero.
    mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
        array_ops.ones([num_data]))
    pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
    return pairwise_distances

def masked_maximum(data, mask, dim=1):
    """Computes the axis wise maximum over chosen elements.

    Args:
      data: 2-D float `Tensor` of size [n, m].
      mask: 2-D Boolean `Tensor` of size [n, m].
      dim: The dimension over which to compute the maximum.

    Returns:
      masked_maximums: N-D `Tensor`.
        The maximized dimension is of size 1 after the operation.
    """
    axis_minimums = math_ops.reduce_min(data, dim, keepdims=True)
    masked_maximums = math_ops.reduce_max(
        math_ops.multiply(data - axis_minimums, mask), dim,
        keepdims=True) + axis_minimums
    return masked_maximums

def masked_minimum(data, mask, dim=1):
    """Computes the axis wise minimum over chosen elements.

    Args:
      data: 2-D float `Tensor` of size [n, m].
      mask: 2-D Boolean `Tensor` of size [n, m].
      dim: The dimension over which to compute the minimum.

    Returns:
      masked_minimums: N-D `Tensor`.
        The minimized dimension is of size 1 after the operation.
    """
    axis_maximums = math_ops.reduce_max(data, dim, keepdims=True)
    masked_minimums = math_ops.reduce_min(
        math_ops.multiply(data - axis_maximums, mask), dim,
        keepdims=True) + axis_maximums
    return masked_minimums

In [36]:
## required for semi-hard triplet loss:

def triplet_loss(vects):
    margin = 1.
    labels = vects[:, :1]
 
    labels = tf.cast(labels, dtype='int32')

    embeddings = tf.cast(vects[:, 1:], dtype='float32')

    ### Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here:
    
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    # lshape=array_ops.shape(labels)
    # assert lshape.shape == 1
    # labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size  
    batch_size = array_ops.size(labels) # was 'array_ops.size(labels)'

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile, array_ops.reshape(
                array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(
                math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
            0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(
        mask_final, negatives_outside, negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(
        adjacency, dtype=dtypes.float32) - array_ops.diag(
        array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(
                math_ops.multiply(loss_mat, mask_positives), 0.0)),
        num_positives,
        name='triplet_semihard_loss')
    
    ### Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance

def quintet_loss(inputs):
    margin = 1.
    labels = inputs[:, :1]
 
    labels = tf.cast(labels, dtype='int32')

    embeddings = inputs[:, 1:]

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size  
    batch_size = array_ops.size(labels) # was 'array_ops.size(labels)'

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile, array_ops.reshape(
                array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(
                math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
            0.0), [batch_size, batch_size])
    
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])

    semi_hard_negatives = array_ops.where(
        mask_final, negatives_outside, negatives_inside)
    
    semi_hard_negatives_mask = math_ops.equal(pdist_matrix, masked_maximum(pdist_matrix, adjacency_not))
    
    # Remove false negatives with similarity equal to the true negatives
    semi_hard_negatives_mask = tf.reshape(tf.cast(semi_hard_negatives_mask, 'int32'), (-1, 1)) * tf.reshape(tf.cast(adjacency_not, 'int32'), (-1, 1))
    semi_hard_negatives_mask = tf.cast(tf.reshape(semi_hard_negatives_mask, (batch_size, batch_size)), 'bool')
    
    # Recovery the bug label from semi-hard-negatives
    label_matrix = tf.repeat(tf.reshape(labels, (1, -1)), repeats=[batch_size], axis=0)
    semi_hard_negatives_ids = tf.reshape(label_matrix, (-1, 1)) * tf.cast(tf.reshape(semi_hard_negatives_mask, (-1, 1)), 'int32')
    semi_hard_negatives_ids = tf.reshape(semi_hard_negatives_ids, (batch_size, batch_size))

    i = tf.constant(0)
    most_freq_matrix = tf.Variable([])
    def most_frequent(i, most_freq_matrix):
        batch = tf.gather(semi_hard_negatives_ids, i)
        neg_label_default = [tf.unique(batch)[0][0]]
        batch = tf.boolean_mask(batch, tf.greater(batch, 0))
        unique, _, count = tf.unique_with_counts(batch)
        max_occurrences = tf.reduce_max(count)
        max_cond = tf.equal(count, max_occurrences)
        max_numbers = tf.squeeze(tf.gather(unique, tf.where(max_cond)))
        max_numbers = tf.cond(tf.cast(tf.size(unique) > 1, tf.bool), lambda: unique[0], lambda: max_numbers)
        max_numbers = tf.cond(tf.cast(tf.shape(unique) == 0, tf.bool), 
                              lambda: neg_label_default, 
                              lambda: max_numbers)
        most_freq_matrix = tf.concat([most_freq_matrix, [max_numbers]], axis=0)
        return [tf.add(i, 1), most_freq_matrix]
    _, negatives_ids = tf.while_loop(lambda i, _: i<batch_size, 
                                        most_frequent, 
                                        [i, most_freq_matrix],
                                       shape_invariants=[i.get_shape(),
                                                   tf.TensorShape([None])])
    negatives_ids = tf.cast(negatives_ids, 'int32')
    labels_neg = tf.reshape(negatives_ids, (-1, 1))
    mask_negatives = math_ops.equal(labels_neg, semi_hard_negatives_ids)
    mask_negatives = tf.cast(mask_negatives, 'float32')
    labels_neg = tf.cast(labels_neg, 'float32')
    
    mask_positives = math_ops.cast(
        adjacency, dtype=dtypes.float32) - array_ops.diag(
        array_ops.ones([batch_size]))
    
    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    # Include the anchor to positives
    mask_positives_centroids = math_ops.cast(adjacency, dtype=dtypes.float32)

    # centroid pos 
    embed_pos = tf.matmul(mask_positives, embeddings)
    num_of_pos = tf.reduce_sum(mask_positives_centroids, axis=1, keepdims=True)
    centroid_embed_pos = tf.math.xdivy(embed_pos, num_of_pos)
    labels_pos = tf.cast(labels, dtype=dtypes.float32)

    # centroid negs
    embed_neg = tf.matmul(mask_negatives, embeddings)
    num_of_neg = tf.reduce_sum(mask_negatives, axis=1, keepdims=True)
    centroid_embed_neg = tf.math.xdivy(embed_neg, num_of_neg)
    
    i = tf.constant(0)
    batch_centroid_matrix = tf.Variable([])
    def iter_centroids(i, batch_centroid_matrix):
        # anchor
        anchor = [tf.gather(embeddings, i)]
        label_pos = [tf.gather(labels_pos, i)]
        # centroid pos
        centroid_pos = [tf.gather(centroid_embed_pos, i)]
        # centroid neg
        centroid_neg = [tf.gather(centroid_embed_neg, i)]
        label_neg = [tf.gather(labels_neg, i)]
        # new batch
        new_batch = tf.concat([anchor, centroid_pos, centroid_neg], axis=0)
        new_labels = tf.concat([label_pos, label_pos, label_neg], axis=0)
        # Batch anchor + centroid_positive + centroid_negative
        batch_anchor_centroids = tf.concat([new_labels, new_batch], axis=1)
        TL_single_triplet = triplet_loss(batch_anchor_centroids)
        batch_centroid_matrix = tf.concat([batch_centroid_matrix, [TL_single_triplet]], axis=0)
        
        return [tf.add(i, 1), batch_centroid_matrix]
    _, batch_centroid_matrix = tf.while_loop(lambda i, a: i<batch_size, 
                                        iter_centroids, 
                                        [i, batch_centroid_matrix],
                                       shape_invariants=[i.get_shape(),
                                                   tf.TensorShape([None])])
    
    TL_centroid = tf.reduce_mean(batch_centroid_matrix)
    TL = triplet_loss(inputs)
    TL_pos = tf.constant(0.0)
    TL_neg = tf.constant(0.0) #tf.reduce_mean(batch_centroid_matrix_neg)
   
    return K.stack([TL, TL_pos, TL_neg, TL_centroid], axis=0)

def quintet_trainable(inputs):
    TL = inputs[0]
    TL_pos = inputs[1]
    TL_neg = inputs[2]
    TL_centroid = inputs[3]
    TL_anchor_w = inputs[4]
    TL_pos_w = inputs[5]
    TL_neg_w = inputs[6]
    TL_centroid_w = inputs[7]
                                                         
    TL_anchor_w = tf.math.maximum(0.0, TL_anchor_w)
    TL_pos_w = 0.0 # tf.math.maximum(0.0, TL_pos_w)
    TL_neg_w = 0.0 #tf.math.maximum(0.0, TL_neg_w)
    TL_centroid_w = tf.math.maximum(0.0, TL_centroid_w)

    sum_of_median = tf.reduce_sum([TL * TL_anchor_w, TL_centroid * TL_centroid_w]) # 
    sum_of_weigths = tf.reduce_sum([TL_anchor_w, TL_centroid_w])
    weigthed_median = tf.truediv(sum_of_median, sum_of_weigths)    
    return K.stack([weigthed_median, TL_anchor_w, TL_pos_w, TL_neg_w, TL_centroid_w, TL, TL_pos, TL_neg, TL_centroid], axis=0)

def custom_loss(y_true, y_pred):
    return tf.reduce_mean(y_pred[0])

def TL_w_anchor(y_true, y_pred):
    return tf.reduce_mean(y_pred[1])
def TL_w_pos(y_true, y_pred):
    return tf.reduce_mean(y_pred[2])
def TL_w_neg(y_true, y_pred):
    return tf.reduce_mean(y_pred[3])
def TL_w_centroid(y_true, y_pred):
    return tf.reduce_mean(y_pred[4])
def TL(y_true, y_pred):
    return tf.reduce_mean(y_pred[5])
def TL_pos(y_true, y_pred):
    return tf.reduce_mean(y_pred[6])
def TL_neg(y_true, y_pred):
    return tf.reduce_mean(y_pred[7])
def TL_centroid(y_true, y_pred):
    return tf.reduce_mean(y_pred[8])

In [37]:
def siamese_model(title_feature_model, desc_feature_model, categorical_feature_model, sequence_length_info, 
                  sequence_length_t, sequence_length_d, name):
  
    bug_t = Input(shape = (sequence_length_t, ), name = 'title_{}'.format(name))
    bug_d = Input(shape = (sequence_length_d, ), name = 'desc_{}'.format(name))
    bug_i = Input(shape = (sequence_length_info, ), name = 'info_{}'.format(name))
    
    bug_t_feat = title_feature_model(bug_t)
    bug_d_feat = desc_feature_model(bug_d)
    bug_i_feat = categorical_feature_model(bug_i)
    
    #bug_feature_output = Add(name = 'merge_features_{}'.format(name))([bug_i_feat, bug_t_feat, bug_d_feat])
    bug_feature_output = concatenate([bug_i_feat, bug_t_feat, bug_d_feat], name = 'merge_features_{}'.format(name))
    
    #     bug_feature_output = Activation('tanh')(bug_feature_output)
    
    # Bug representation layer
    # bug_feature_output = Dense(300, activation='tanh')(bug_feature_output)
    
    bug_feature_model = Model(inputs=[bug_t, bug_d, bug_i], outputs=[bug_feature_output], name = 'merge_features_{}'.format(name))
    
    return bug_feature_model

In [38]:
class QuintetWeights(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(QuintetWeights, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = tf.reshape(self.add_weight(name='quintet_kernel_weight', 
                                      shape=(input_shape[0], self.output_dim),
                                      initializer=keras.initializers.Ones(),
#                                       initializer=keras.initializers.RandomUniform(minval=0.0, maxval=1.0, seed=None),
                                      trainable=False), (1, 1))
        super(QuintetWeights, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        x = tf.reshape(x, (1, 1))
        return [K.dot(x, self.kernel), self.kernel]

    def compute_output_shape(self, input_shape):
        return [input_shape, input_shape]
    
def max_margin_objective(encoded_anchor, decay_lr=1):
    
    input_labels = Input(shape=(1,), name='input_label')    # input layer for labels
    inputs = np.concatenate([encoded_anchor.input, [input_labels]], -1).tolist()
    
    encoded_anchor = encoded_anchor.output
    
    feature = concatenate([input_labels, encoded_anchor])  # concatenating the labels + embeddings
    
    TL_loss = Lambda(quintet_loss, name='quintet_loss')(feature)
    
    tl_l = Lambda(lambda x:tf.reshape(x[0], (1,)), name='TL')(TL_loss)
    tl_l_p = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_pos')(TL_loss)
    tl_l_n = Lambda(lambda x:tf.reshape(x[2], (1,)), name='TL_neg')(TL_loss)
    tl_l_c = Lambda(lambda x:tf.reshape(x[3], (1, )), name='TL_centroid')(TL_loss)
    
    TL_w = QuintetWeights(output_dim=1)(tl_l)
    TL_pos_w = QuintetWeights(output_dim=1)(tl_l_p)
    TL_neg_w = QuintetWeights(output_dim=1)(tl_l_n)
    TL_centroid_w = QuintetWeights(output_dim=1)(tl_l_c)
    
    TL_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_weight')(TL_w)
    TL_pos_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_pos_weight')(TL_pos_w)
    TL_neg_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_neg_weight')(TL_neg_w)
    TL_centroid_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_centroid_weight')(TL_centroid_w)
    
    output = concatenate([tl_l, tl_l_p, tl_l_n, tl_l_c, TL_weight, TL_pos_weight, TL_neg_weight, TL_centroid_weight])
    output = Lambda(quintet_trainable, name='quintet_trainable')(output)
    
    similarity_model = Model(inputs = inputs, outputs = output, name = 'Similarity_Model')

    # setup the optimization process 
    similarity_model.compile(optimizer='adam', loss=custom_loss, metrics=[TL_w_anchor, TL_w_pos, TL_w_neg, TL_w_centroid,
                                                                         TL, TL_pos, TL_neg, TL_centroid]) 
    # metrics=[pos_distance, neg_distance, custom_margin_loss]

    return similarity_model

In [39]:
# Domain to use
limit_train = int(epochs * freeze_train) # 10% de 1000 , 100 epocas
METHOD = 'DMS_QL_{}'.format(limit_train)
SAVE_PATH = '{}_preprocessing_{}_feature@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)
SAVE_PATH_FEATURE = '{}_preprocessing_{}_feature_@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)

In [40]:
def save_loss(result):
    with open(os.path.join(DIR,'{}_log.pkl'.format(METHOD)), 'wb') as f:
        pickle.dump(result, f)
    print("=> result saved!")

In [41]:
%%time

# Inspired on https://'pastebin.com/TaGFdcBA
# TODO: https://stackoverflow.com/questions/49941903/keras-compute-cosine-distance-between-two-flattened-outputs
keras.backend.clear_session()

# Clear GPU memory
# from numba import cuda
# cuda.select_device(0)
# cuda.close()

# Embeddings
desc_embedding_layer = embedding_layer(embeddings=baseline.embedding_matrix, 
                              num_words=len(baseline.embedding_matrix), 
                              embedding_dim=EMBEDDING_DIM, 
                              max_sequence_length=MAX_SEQUENCE_LENGTH_D, 
                              trainable=False)
title_embedding_layer = embedding_layer(embeddings=baseline.embedding_matrix, 
                              num_words=len(baseline.embedding_matrix), 
                              embedding_dim=EMBEDDING_DIM, 
                              max_sequence_length=MAX_SEQUENCE_LENGTH_T, 
                              trainable=False)

# Feature models
'''
    cnn_model
    lstm_model
    mlp_model
'''
desc_feature_model = cnn_model(desc_embedding_layer, MAX_SEQUENCE_LENGTH_D)
title_feature_model = lstm_model(title_embedding_layer, MAX_SEQUENCE_LENGTH_T)
categorical_feature_model = mlp_model(number_of_columns_info)

# Similarity model
encoded_anchor = siamese_model(title_feature_model, desc_feature_model, categorical_feature_model, 
                                     number_of_columns_info, MAX_SEQUENCE_LENGTH_T, MAX_SEQUENCE_LENGTH_D, 'in')

similarity_model = max_margin_objective(encoded_anchor, decay_lr=1)

# cnn_feature_model.summary()
# lstm_feature_model.summary()
similarity_model.summary()

'''
    Experiment
'''
result = { 'train' : [], 'test' : [] }
print("Total of ", limit_train)
for epoch in range(limit_train):
    batch_triplet_train, \
        train_input_sample, train_sim = batch_iterator(baseline, retrieval, encoded_anchor, baseline.train_data, 
                                                       baseline.dup_sets_train, bug_train_ids, 
                                                           batch_size, 1, issues_by_buckets, TRIPLET_HARD=False)
    train_batch = [train_input_sample['title'], train_input_sample['description'], train_input_sample['info'], train_sim]
    
    h = similarity_model.train_on_batch(x=train_batch, y=train_sim)
    h_validation = similarity_model.test_on_batch(x=validation_sample, y=valid_sim)
    
    # save results
    result['train'].append(h)
    result['test'].append(h_validation)
    
    if( (epoch+1) % 10 == 0 or (epoch+1 == limit_train) ):
        save_loss(result)
    
    if (epoch+1 == limit_train): #(epoch > 1 and epoch % 10 == 0) or (epoch+1 == epochs):
        recall, exported_rank, debug = experiment.evaluate_validation_test(retrieval, verbose, encoded_anchor, issues_by_buckets, bug_train_ids)
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
                "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}, " +
              "recall@25: {:.2f}").format(epoch+1, h[0], h_validation[0], h[1], h[2], h[3], 
                                          h[4], h[5], h[6], h[7], h[8], recall))
    else:
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
              "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}").format(
            epoch+1, h[0], h_validation[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], h[8]))
    loss = h[0]
    
    if loss < best_loss:
        best_loss = loss
        best_epoch = epoch+1

#experiment.save_model(similarity_model, SAVE_PATH.replace('@number_of_epochs@', str(epochs)))
#experiment.save_model(encoded_anchor, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)), verbose=1)
print('Best_epoch={}, Best_loss={:.2f}, Recall@25={:.2f}'.format(best_epoch, best_loss, recall))













Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
info_in (InputLayer)            (None, 1682)         0                                            
__________________________________________________________________________________________________
title_in (InputLayer)           (None, 20)           0                                            
__________________________________________________________________________________________________
desc_in (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
FeatureMlpGenerationModel (Mode (None, 300)        

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "




Epoch: 1 Loss: 0.62, Loss_test: 0.70
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.80, TL_pos: 0.43, TL_neg: 0.27, TL_centroid: 0.29
Epoch: 2 Loss: 0.69, Loss_test: 0.69
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.77, TL_pos: 0.61, TL_neg: 0.08, TL_centroid: 0.16
Epoch: 3 Loss: 0.72, Loss_test: 0.69
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.74, TL_pos: 0.69, TL_neg: 0.04, TL_centroid: 0.13
Epoch: 4 Loss: 0.63, Loss_test: 0.69
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.66, TL_pos: 0.60, TL_neg: 0.06, TL_centroid: 0.16
Epoch: 5 Loss: 0.61, Loss_test: 0.69
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.64, TL_pos: 0.58, TL_neg: 0.04, TL_centroid: 0.16
Epoch: 6 Loss: 0.70, Loss_test: 0.68
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.72, TL_pos: 0.68, TL_neg: 0.03, TL_centroid: 0.12
Epoch: 7 Loss: 0.75, Loss_test: 0.68
TL_w: 1.00, TL_pos_

Epoch: 53 Loss: 0.22, Loss_test: 0.43
TL_w: 0.97, TL_pos_w: 1.03, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.22, TL_pos: 0.21, TL_neg: 0.01, TL_centroid: 0.09
Epoch: 54 Loss: 0.30, Loss_test: 0.43
TL_w: 0.97, TL_pos_w: 1.03, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.31, TL_pos: 0.29, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 55 Loss: 0.30, Loss_test: 0.43
TL_w: 0.97, TL_pos_w: 1.03, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.31, TL_pos: 0.29, TL_neg: 0.02, TL_centroid: 0.18
Epoch: 56 Loss: 0.34, Loss_test: 0.42
TL_w: 0.97, TL_pos_w: 1.03, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.36, TL_pos: 0.33, TL_neg: 0.02, TL_centroid: 0.13
Epoch: 57 Loss: 0.30, Loss_test: 0.41
TL_w: 0.97, TL_pos_w: 1.03, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.31, TL_pos: 0.29, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 58 Loss: 0.15, Loss_test: 0.41
TL_w: 0.97, TL_pos_w: 1.03, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.15, TL_pos: 0.15, TL_neg: 0.00, TL_centroid: 0.15
Epoch: 59 Loss: 0.46, Loss_test: 0.41
TL_w: 0.97, TL

In [42]:
EXPORT_RANK_PATH = os.path.join(DIR, 'exported_rank_{}.txt'.format(METHOD))
EXPORT_RANK_PATH

'data/processed/eclipse/bert/exported_rank_DMS_QL_100.txt'

In [43]:
with open(EXPORT_RANK_PATH, 'w') as file_out:
    for row in exported_rank:
        file_out.write(row + "\n")

In [44]:
experiment.save_model(similarity_model, SAVE_PATH.replace('@number_of_epochs@', str(limit_train)))
experiment.save_model(encoded_anchor, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(limit_train)), verbose=1)

Saved model 'modelos/model_bert_preprocessing_DMS_QL_100_feature_100epochs_64batch(eclipse).h5' to disk


In [45]:
len(result['train']), len(result['test'])

(100, 100)

In [46]:
model = similarity_model.get_layer('concatenate_4')
output = Lambda(quintet_trainable, name='quintet_trainable')(model.output)
inputs = similarity_model.inputs
model = Model(inputs = inputs, outputs = output, name = 'Similarity_Model')

# setup the optimization process 
model.compile(optimizer='adam', loss=custom_loss, metrics=[TL_w_anchor, TL_w_pos, TL_w_neg, TL_w_centroid,
                                                                         TL, TL_pos, TL_neg, TL_centroid])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
info_in (InputLayer)            (None, 1682)         0                                            
__________________________________________________________________________________________________
title_in (InputLayer)           (None, 20)           0                                            
__________________________________________________________________________________________________
desc_in (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
FeatureMlpGenerationModel (Mode (None, 300)          504900      info_in[0][0]                    
__________________________________________________________________________________________________
FeatureLst

In [47]:
# Domain to use
METHOD = 'DMS_QL_{}'.format(epochs)
SAVE_PATH = '{}_preprocessing_{}_feature@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)
SAVE_PATH_FEATURE = '{}_preprocessing_{}_feature_@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)

In [48]:
end_train = epochs - limit_train
for epoch in range(limit_train, end_train):
    batch_triplet_train, \
        train_input_sample, train_sim = batch_iterator(baseline, retrieval, encoded_anchor, baseline.train_data, 
                                                       baseline.dup_sets_train, bug_train_ids, 
                                                           batch_size, 1, issues_by_buckets, TRIPLET_HARD=False)
    train_batch = [train_input_sample['title'], train_input_sample['description'], train_input_sample['info'], train_sim]
    
    h = similarity_model.train_on_batch(x=train_batch, y=train_sim)
    h_validation = similarity_model.test_on_batch(x=validation_sample, y=valid_sim)
    
    # save results
    result['train'].append(h)
    result['test'].append(h_validation)
    
    if( (epoch+1) % 10 == 0 or (epoch+1 == limit_train) ):
        save_loss(result)
    
    if (epoch+1 == limit_train): #(epoch > 1 and epoch % 10 == 0) or (epoch+1 == epochs):
        recall, exported_rank, debug = experiment.evaluate_validation_test(retrieval, verbose, encoded_anchor, issues_by_buckets, bug_train_ids)
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
                "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}, " +
              "recall@25: {:.2f}").format(epoch+1, h[0], h_validation[0], h[1], h[2], h[3], 
                                          h[4], h[5], h[6], h[7], h[8], recall))
    else:
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
              "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}").format(
            epoch+1, h[0], h_validation[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], h[8]))
    loss = h[0]
    
    if loss < best_loss:
        best_loss = loss
        best_epoch = epoch+1

#experiment.save_model(similarity_model, SAVE_PATH.replace('@number_of_epochs@', str(epochs)))
#experiment.save_model(encoded_anchor, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)), verbose=1)
print('Best_epoch={}, Best_loss={:.2f}, Recall@25={:.2f}'.format(best_epoch, best_loss, recall))

Epoch: 101 Loss: 0.23, Loss_test: 0.32
TL_w: 0.96, TL_pos_w: 1.04, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.25, TL_pos: 0.21, TL_neg: 0.01, TL_centroid: 0.15
Epoch: 102 Loss: 0.20, Loss_test: 0.32
TL_w: 0.96, TL_pos_w: 1.04, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.21, TL_pos: 0.20, TL_neg: 0.01, TL_centroid: 0.15
Epoch: 103 Loss: 0.25, Loss_test: 0.32
TL_w: 0.96, TL_pos_w: 1.04, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.25, TL_pos: 0.24, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 104 Loss: 0.30, Loss_test: 0.31
TL_w: 0.95, TL_pos_w: 1.04, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.31, TL_pos: 0.30, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 105 Loss: 0.17, Loss_test: 0.31
TL_w: 0.95, TL_pos_w: 1.04, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.18, TL_pos: 0.16, TL_neg: 0.01, TL_centroid: 0.21
Epoch: 106 Loss: 0.22, Loss_test: 0.31
TL_w: 0.95, TL_pos_w: 1.04, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.24, TL_pos: 0.20, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 107 Loss: 0.16, Loss_test: 0.31
TL_w: 0

Epoch: 152 Loss: 0.19, Loss_test: 0.23
TL_w: 0.94, TL_pos_w: 1.05, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.20, TL_pos: 0.19, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 153 Loss: 0.19, Loss_test: 0.23
TL_w: 0.94, TL_pos_w: 1.05, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.20, TL_pos: 0.19, TL_neg: 0.01, TL_centroid: 0.15
Epoch: 154 Loss: 0.27, Loss_test: 0.23
TL_w: 0.94, TL_pos_w: 1.05, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.27, TL_pos: 0.26, TL_neg: 0.01, TL_centroid: 0.11
Epoch: 155 Loss: 0.19, Loss_test: 0.23
TL_w: 0.94, TL_pos_w: 1.05, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.20, TL_pos: 0.19, TL_neg: 0.01, TL_centroid: 0.09
Epoch: 156 Loss: 0.28, Loss_test: 0.23
TL_w: 0.94, TL_pos_w: 1.05, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.28, TL_pos: 0.27, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 157 Loss: 0.17, Loss_test: 0.23
TL_w: 0.94, TL_pos_w: 1.05, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.18, TL_pos: 0.17, TL_neg: 0.01, TL_centroid: 0.09
Epoch: 158 Loss: 0.17, Loss_test: 0.24
TL_w: 0

Epoch: 203 Loss: 0.15, Loss_test: 0.20
TL_w: 0.94, TL_pos_w: 1.06, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.15, TL_pos: 0.14, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 204 Loss: 0.16, Loss_test: 0.20
TL_w: 0.94, TL_pos_w: 1.06, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.17, TL_pos: 0.16, TL_neg: 0.01, TL_centroid: 0.09
Epoch: 205 Loss: 0.19, Loss_test: 0.20
TL_w: 0.94, TL_pos_w: 1.06, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.19, TL_pos: 0.18, TL_neg: 0.01, TL_centroid: 0.11
Epoch: 206 Loss: 0.15, Loss_test: 0.20
TL_w: 0.94, TL_pos_w: 1.06, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.15, TL_pos: 0.15, TL_neg: 0.00, TL_centroid: 0.09
Epoch: 207 Loss: 0.13, Loss_test: 0.20
TL_w: 0.94, TL_pos_w: 1.06, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.13, TL_pos: 0.12, TL_neg: 0.01, TL_centroid: 0.10
Epoch: 208 Loss: 0.16, Loss_test: 0.20
TL_w: 0.94, TL_pos_w: 1.06, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.17, TL_pos: 0.16, TL_neg: 0.01, TL_centroid: 0.08
Epoch: 209 Loss: 0.10, Loss_test: 0.20
TL_w: 0

Epoch: 254 Loss: 0.20, Loss_test: 0.18
TL_w: 0.93, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.20, TL_pos: 0.20, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 255 Loss: 0.13, Loss_test: 0.18
TL_w: 0.93, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.13, TL_pos: 0.13, TL_neg: 0.00, TL_centroid: 0.10
Epoch: 256 Loss: 0.15, Loss_test: 0.18
TL_w: 0.93, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.15, TL_pos: 0.15, TL_neg: 0.00, TL_centroid: 0.13
Epoch: 257 Loss: 0.18, Loss_test: 0.18
TL_w: 0.93, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.18, TL_pos: 0.18, TL_neg: 0.01, TL_centroid: 0.11
Epoch: 258 Loss: 0.12, Loss_test: 0.18
TL_w: 0.93, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.12, TL_pos: 0.11, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 259 Loss: 0.19, Loss_test: 0.17
TL_w: 0.93, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.19, TL_pos: 0.19, TL_neg: 0.01, TL_centroid: 0.09
=> result saved!
Epoch: 260 Loss: 0.11, Loss_t

Epoch: 305 Loss: 0.06, Loss_test: 0.14
TL_w: 0.92, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.09
Epoch: 306 Loss: 0.07, Loss_test: 0.14
TL_w: 0.92, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.01, TL_centroid: 0.10
Epoch: 307 Loss: 0.11, Loss_test: 0.15
TL_w: 0.92, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.11, TL_pos: 0.11, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 308 Loss: 0.05, Loss_test: 0.15
TL_w: 0.92, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 309 Loss: 0.10, Loss_test: 0.15
TL_w: 0.92, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.11, TL_pos: 0.09, TL_neg: 0.00, TL_centroid: 0.15
=> result saved!
Epoch: 310 Loss: 0.03, Loss_test: 0.15
TL_w: 0.92, TL_pos_w: 1.07, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.03, TL_pos: 0.03, TL_neg: 0.00, TL_centroid: 0.13
Epoch: 311 Loss: 0.18, Loss_t

Epoch: 356 Loss: 0.09, Loss_test: 0.14
TL_w: 0.92, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.08, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 357 Loss: 0.12, Loss_test: 0.14
TL_w: 0.92, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.12, TL_pos: 0.12, TL_neg: 0.01, TL_centroid: 0.17
Epoch: 358 Loss: 0.13, Loss_test: 0.14
TL_w: 0.92, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.14, TL_pos: 0.13, TL_neg: 0.00, TL_centroid: 0.12
Epoch: 359 Loss: 0.13, Loss_test: 0.14
TL_w: 0.92, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.13, TL_pos: 0.13, TL_neg: 0.01, TL_centroid: 0.11
=> result saved!
Epoch: 360 Loss: 0.08, Loss_test: 0.13
TL_w: 0.92, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.08, TL_pos: 0.08, TL_neg: 0.01, TL_centroid: 0.08
Epoch: 361 Loss: 0.08, Loss_test: 0.13
TL_w: 0.92, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.08, TL_pos: 0.08, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 362 Loss: 0.05, Loss_t

Epoch: 407 Loss: 0.16, Loss_test: 0.15
TL_w: 0.91, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.16, TL_pos: 0.16, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 408 Loss: 0.09, Loss_test: 0.15
TL_w: 0.91, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.09, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 409 Loss: 0.06, Loss_test: 0.14
TL_w: 0.91, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.16
=> result saved!
Epoch: 410 Loss: 0.10, Loss_test: 0.14
TL_w: 0.91, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.10, TL_pos: 0.10, TL_neg: 0.00, TL_centroid: 0.14
Epoch: 411 Loss: 0.07, Loss_test: 0.14
TL_w: 0.91, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.15
Epoch: 412 Loss: 0.06, Loss_test: 0.14
TL_w: 0.91, TL_pos_w: 1.08, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.14
Epoch: 413 Loss: 0.09, Loss_t

Epoch: 458 Loss: 0.11, Loss_test: 0.12
TL_w: 0.91, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.11, TL_pos: 0.11, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 459 Loss: 0.09, Loss_test: 0.12
TL_w: 0.91, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.08, TL_neg: 0.01, TL_centroid: 0.10
=> result saved!
Epoch: 460 Loss: 0.07, Loss_test: 0.12
TL_w: 0.91, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.12
Epoch: 461 Loss: 0.11, Loss_test: 0.11
TL_w: 0.91, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.11, TL_pos: 0.11, TL_neg: 0.01, TL_centroid: 0.10
Epoch: 462 Loss: 0.04, Loss_test: 0.11
TL_w: 0.91, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.14
Epoch: 463 Loss: 0.07, Loss_test: 0.11
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.01, TL_centroid: 0.09
Epoch: 464 Loss: 0.09, Loss_t

Epoch: 509 Loss: 0.06, Loss_test: 0.12
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.06, TL_neg: 0.01, TL_centroid: 0.11
=> result saved!
Epoch: 510 Loss: 0.13, Loss_test: 0.11
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.13, TL_pos: 0.12, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 511 Loss: 0.07, Loss_test: 0.11
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.14
Epoch: 512 Loss: 0.06, Loss_test: 0.12
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.22
Epoch: 513 Loss: 0.07, Loss_test: 0.11
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 514 Loss: 0.07, Loss_test: 0.11
TL_w: 0.90, TL_pos_w: 1.09, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.10
Epoch: 515 Loss: 0.09, Loss_t

=> result saved!
Epoch: 560 Loss: 0.06, Loss_test: 0.13
TL_w: 0.90, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 561 Loss: 0.05, Loss_test: 0.12
TL_w: 0.90, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.13
Epoch: 562 Loss: 0.09, Loss_test: 0.12
TL_w: 0.90, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.09, TL_neg: 0.00, TL_centroid: 0.17
Epoch: 563 Loss: 0.11, Loss_test: 0.12
TL_w: 0.90, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.11, TL_pos: 0.11, TL_neg: 0.01, TL_centroid: 0.13
Epoch: 564 Loss: 0.09, Loss_test: 0.12
TL_w: 0.90, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.10, TL_pos: 0.09, TL_neg: 0.01, TL_centroid: 0.12
Epoch: 565 Loss: 0.05, Loss_test: 0.12
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.08
Epoch: 566 Loss: 0.11, Loss_t

Epoch: 611 Loss: 0.06, Loss_test: 0.10
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.15
Epoch: 612 Loss: 0.08, Loss_test: 0.10
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.08, TL_pos: 0.08, TL_neg: 0.00, TL_centroid: 0.15
Epoch: 613 Loss: 0.03, Loss_test: 0.10
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.03, TL_pos: 0.03, TL_neg: 0.00, TL_centroid: 0.14
Epoch: 614 Loss: 0.09, Loss_test: 0.10
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.08, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 615 Loss: 0.07, Loss_test: 0.10
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.13
Epoch: 616 Loss: 0.05, Loss_test: 0.10
TL_w: 0.89, TL_pos_w: 1.10, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.13
Epoch: 617 Loss: 0.05, Loss_test: 0.10
TL_w: 0

Epoch: 662 Loss: 0.05, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.19
Epoch: 663 Loss: 0.07, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 664 Loss: 0.08, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.08, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 665 Loss: 0.07, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.18
Epoch: 666 Loss: 0.04, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.13
Epoch: 667 Loss: 0.06, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 668 Loss: 0.15, Loss_test: 0.10
TL_w: 0

Epoch: 713 Loss: 0.07, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 714 Loss: 0.09, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.09, TL_pos: 0.09, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 715 Loss: 0.05, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.20
Epoch: 716 Loss: 0.05, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.17
Epoch: 717 Loss: 0.05, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.15
Epoch: 718 Loss: 0.06, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 719 Loss: 0.05, Loss_test: 0.09
TL_w: 0

Epoch: 764 Loss: 0.11, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.11, TL_pos: 0.11, TL_neg: 0.01, TL_centroid: 0.14
Epoch: 765 Loss: 0.06, Loss_test: 0.10
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.22
Epoch: 766 Loss: 0.05, Loss_test: 0.10
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.16
Epoch: 767 Loss: 0.03, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.03, TL_pos: 0.03, TL_neg: 0.00, TL_centroid: 0.27
Epoch: 768 Loss: 0.07, Loss_test: 0.09
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.15
Epoch: 769 Loss: 0.04, Loss_test: 0.10
TL_w: 0.88, TL_pos_w: 1.11, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.19
=> result saved!
Epoch: 770 Loss: 0.06, Loss_t

Epoch: 815 Loss: 0.07, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.18
Epoch: 816 Loss: 0.04, Loss_test: 0.10
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.19
Epoch: 817 Loss: 0.04, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.21
Epoch: 818 Loss: 0.05, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.05, TL_pos: 0.05, TL_neg: 0.00, TL_centroid: 0.20
Epoch: 819 Loss: 0.12, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.12, TL_pos: 0.12, TL_neg: 0.01, TL_centroid: 0.16
=> result saved!
Epoch: 820 Loss: 0.03, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.03, TL_pos: 0.03, TL_neg: 0.01, TL_centroid: 0.21
Epoch: 821 Loss: 0.03, Loss_t

Epoch: 866 Loss: 0.04, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.24
Epoch: 867 Loss: 0.04, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.01, TL_centroid: 0.19
Epoch: 868 Loss: 0.08, Loss_test: 0.08
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.08, TL_pos: 0.08, TL_neg: 0.01, TL_centroid: 0.18
Epoch: 869 Loss: 0.06, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.06, TL_pos: 0.06, TL_neg: 0.00, TL_centroid: 0.19
=> result saved!
Epoch: 870 Loss: 0.07, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.07, TL_pos: 0.07, TL_neg: 0.00, TL_centroid: 0.21
Epoch: 871 Loss: 0.03, Loss_test: 0.09
TL_w: 0.87, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.03, TL_pos: 0.03, TL_neg: 0.00, TL_centroid: 0.24
Epoch: 872 Loss: 0.06, Loss_t

In [49]:
len(result['train']), len(result['test'])

(900, 900)

In [50]:
encoded = model.get_layer('merge_features_in')
output = encoded.output
inputs = similarity_model.inputs[:-1]
encoded_anchor = Model(inputs = inputs, outputs = output, name = 'Similarity_Model')

In [51]:
SAVE_PATH.replace('@number_of_epochs@', str(epochs))

'bert_preprocessing_DMS_QL_1000_feature1000epochs_64batch(eclipse)'

In [52]:
experiment.save_model(model, SAVE_PATH.replace('@number_of_epochs@', str(epochs)))
experiment.save_model(encoded_anchor, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)), verbose=1)
"Model saved"

Saved model 'modelos/model_bert_preprocessing_DMS_QL_1000_feature_1000epochs_64batch(eclipse).h5' to disk


'Model saved'

In [53]:
recall, exported_rank, debug = experiment.evaluate_validation_test(retrieval, 1, encoded_anchor, issues_by_buckets, bug_train_ids)
print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
       "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
        "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}, " +
      "recall@25: {:.2f}").format(epoch+1, h[0], h_validation[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], h[8], recall))

HBox(children=(IntProgress(value=0, max=16995), HTML(value='')))




HBox(children=(IntProgress(value=0, max=27321), HTML(value='')))




HBox(children=(IntProgress(value=0, max=30481), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=0, max=27321), HTML(value='')))


Epoch: 900 Loss: 0.04, Loss_test: 0.09
TL_w: 0.86, TL_pos_w: 1.12, TL_neg_w: 0.00, TL_centroid_w: 0.00
TL: 0.04, TL_pos: 0.04, TL_neg: 0.00, TL_centroid: 0.16, recall@25: 0.57


In [54]:
recall

0.57

In [55]:
exported_rank[:20]

['2:15392,9779,94|14674:0.5633346736431122,10095:0.5581936836242676,14673:0.5481487810611725,8461:0.5406591296195984,9437:0.529998391866684,17942:0.5292403995990753,252:0.5259393751621246,7563:0.5223670899868011,9908:0.5223032534122467,15392:0.5179720520973206,7522:0.517794281244278,4895:0.5174091756343842,9779:0.5111019909381866,11859:0.5092878043651581,7282:0.508865624666214,24133:0.508631020784378,64581:0.5066477954387665,13134:0.5065221786499023,170:0.5022992491722107,7288:0.5000718533992767,178:0.4991546869277954,20729:0.498002290725708,16778:0.4974820613861084,5937:0.49738675355911255,71:0.4942132234573364,43395:0.49379175901412964,12670:0.4934414029121399,7247:0.4912916421890259,12411:0.4890766143798828',
 '393232:393282,390667,383388|397285:0.5118381679058075,383309:0.5113343000411987,402073:0.506451427936554,412309:0.4995068907737732,387113:0.4977801442146301,355404:0.4947406053543091,421343:0.48736315965652466,408095:0.4869709610939026,410468:0.4862065315246582,405563:0.47781

### Retrieval evaluation

In [56]:
print("Total of queries:", len(retrieval.test))

Total of queries: 16995


#### Getting the model trained

In [57]:
print(SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)))

'bert_preprocessing_DMS_QL_1000_feature_1000epochs_64batch(eclipse)'

In [58]:
encoded_anchor.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
info_in (InputLayer)            (None, 1682)         0                                            
__________________________________________________________________________________________________
title_in (InputLayer)           (None, 20)           0                                            
__________________________________________________________________________________________________
desc_in (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
FeatureMlpGenerationModel (Mode (None, 300)          504900      info_in[0][0]                    
__________________________________________________________________________________________________
FeatureLst

In [59]:
print(len(exported_rank))

27321

In [60]:
EXPORT_RANK_PATH = os.path.join(DIR, 'exported_rank_{}.txt'.format(METHOD))
print(EXPORT_RANK_PATH)

'data/processed/eclipse/bert/exported_rank_DMS_QL_1000.txt'

In [61]:
with open(EXPORT_RANK_PATH, 'w') as file_out:
    for row in exported_rank:
        file_out.write(row + "\n")

In [62]:
report = experiment.evaluation.evaluate(EXPORT_RANK_PATH)
print(report)

{'0 - recall_at_1': 0.24,
 '1 - recall_at_5': 0.4,
 '2 - recall_at_10': 0.47,
 '3 - recall_at_15': 0.52,
 '4 - recall_at_20': 0.55,
 '5 - recall_at_25': 0.57}

#### Some ideas to visualizate

- https://towardsdatascience.com/building-a-recommendation-system-using-neural-network-embeddings-1ef92e5c80c9