# DWEN with QL

In [1]:
import keras

Using TensorFlow backend.


In [2]:
from __future__ import print_function, division

In [3]:
import re
import numpy as np
import pandas as pd

import os
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import sys
from annoy import AnnoyIndex
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
    
%matplotlib inline

In [4]:
from keras.layers import Conv1D, Input, Add, Activation, Dropout, Embedding, MaxPooling1D, \
    GlobalMaxPool1D, Flatten, Dense, Concatenate, BatchNormalization
from keras.models import Sequential, Model
from keras.regularizers import l2
from keras.initializers import TruncatedNormal
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras import optimizers

In [5]:
from methods.baseline import Baseline
from methods.experiments import Experiment
from methods.evaluation import Evaluation
from methods.retrieval import Retrieval

## Auxiliary methods

## Configurações Globais

In [6]:
MAX_SEQUENCE_LENGTH_T = 20 # 100
MAX_SEQUENCE_LENGTH_D = 20 # 500
EMBEDDING_DIM = 300
MAX_NB_WORDS = 20000
'''
    Configuration
'''
epochs = 1000
freeze_train = .1 # 10% with freeze weights
best_loss = float('inf')
best_epoch = 0
verbose = 0
loss = 0

### Parse bugs preproprecessed

In [7]:
# Domain to use
DOMAIN = 'eclipse'
METHOD = 'DWEN_QL_{}'.format(epochs)
PREPROCESSING = 'bert'
TOKEN = 'bert'
# Dataset paths
DIR = 'data/processed/{}/{}'.format(DOMAIN, PREPROCESSING)
DIR_PAIRS = 'data/normalized/{}'.format(DOMAIN)
DATASET = os.path.join('data/normalized/{}'.format(DOMAIN), '{}.csv'.format(DOMAIN))
# Path embeddings
GLOVE_DIR='data/embed'
# Save model
SAVE_PATH = '{}_preprocessing_{}_feature@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)
SAVE_PATH_FEATURE = '{}_preprocessing_{}_feature_@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)

# Extract CORPUs
EXTRACT_CORPUS = False

In [8]:
import os

pretrained_path = 'uncased_L-12_H-768_A-12'
config_path = os.path.join(pretrained_path, 'bert_config.json')
model_path = os.path.join(pretrained_path, 'bert_model.ckpt')
vocab_path = os.path.join(pretrained_path, 'vocab.txt')

In [9]:
from keras_bert import load_vocabulary

token_dict = load_vocabulary(vocab_path)

In [10]:
baseline = Baseline(DOMAIN, DIR, DATASET, MAX_SEQUENCE_LENGTH_T, MAX_SEQUENCE_LENGTH_D,
                   token_dict['[CLS]'], token_dict['[SEP]'])
evaluation = Evaluation(verbose=0)
retrieval = Retrieval()
experiment = Experiment(baseline, evaluation)

In [11]:
experiment.set_retrieval(retrieval, baseline, DOMAIN)

#### Loading bug ids in memory

In [12]:
experiment.load_ids()
len(baseline.bug_ids)

Reading bug ids


361006

#### Dicionário de títulos e descrições

In [13]:
%%time

experiment.load_bugs(TOKEN)
len(baseline.sentence_dict)

HBox(children=(IntProgress(value=0, max=361006), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


CPU times: user 47.9 s, sys: 10.9 s, total: 58.8 s
Wall time: 1min 34s


#### Hashing bugs by buckets

In [14]:
issues_by_buckets = experiment.get_buckets_for_bugs()

HBox(children=(IntProgress(value=0, max=361006), HTML(value='')))




#### Prepare the train and test

In [15]:
experiment.prepare_dataset(issues_by_buckets, path_train='train_chronological', path_test='test_chronological')
# Read and create the test queries duplicates
retrieval.create_queries()

In [16]:
baseline.train_data[:10]

[[275492, 218812],
 [288296, 264093],
 [273286, 293887],
 [57162, 62059],
 [82146, 67997],
 [56777, 61857],
 [169445, 165179],
 [250521, 273893],
 [247266, 241461],
 [36781, 38338]]

In [17]:
bug_train_ids = experiment.get_train_ids(baseline.train_data)

In [18]:
idx = np.random.choice(baseline.bug_ids, 1)[0]
baseline.bug_set[idx]

{'bug_severity': '4\n',
 'bug_status': '1\n',
 'component': '658\n',
 'creation_ts': '2009-06-01 23:17:00 -0400',
 'delta_ts': '2009-06-03 03:02:50 -0400',
 'description': '[CLS] cd ##t head : to reproduce turn on ind ##entation of public / protected / private as well as of declaration ##s relative to public / protected / private . position the cu ##rso ##r after the opening brace of the class declaration and hit enter . class a { } ; the cu ##rso ##r get ind ##ented by one level . it is expected to be ind ##ented by two levels since the declaration ##s before the first public / protected / private should be ind ##ented the same way as the ones after an access spec ##ifier . [SEP]',
 'description_segment': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,


### Generating the batch test

In [19]:
"Train ", len(baseline.dup_sets_train)

('Train ', 39339)

In [20]:
import random
random.choice(list(issues_by_buckets))

77789

In [21]:
import random

# data - path
# batch_size - 128
# n_neg - 1
def batch_iterator(self, retrieval, model, data, dup_sets, bug_ids, 
                   batch_size, n_neg, issues_by_buckets, TRIPLET_HARD=False, FLOATING_PADDING=False):
    # global train_data
    # global self.dup_sets
    # global self.bug_ids
    # global self.bug_set

    random.shuffle(data)

    batch_features = {'title' : [], 'desc' : [], 'info' : [] }

    n_train = len(data)

    batch_triplets, batch_bugs_anchor, batch_bugs_pos, batch_bugs_neg, batch_bugs = [], [], [], [], []

    all_bugs = list(issues_by_buckets.keys())
    buckets = retrieval.buckets

    for offset in range(batch_size):
        anchor, pos = data[offset][0], data[offset][1]
        batch_bugs_anchor.append(anchor)
        batch_bugs_pos.append(pos)
        batch_bugs.append(anchor)
        batch_bugs.append(pos)
        #batch_bugs += dup_sets[anchor]

    for anchor, pos in zip(batch_bugs_anchor, batch_bugs_pos):
        while True:
            neg = self.get_neg_bug(anchor, buckets[issues_by_buckets[anchor]], issues_by_buckets, all_bugs)
            bug_anchor = self.bug_set[anchor]
            bug_pos = self.bug_set[pos]
            if neg not in self.bug_set:
                continue
            batch_bugs.append(neg)
            batch_bugs_neg.append(neg)
            bug_neg = self.bug_set[neg]
            break
        
        # triplet bug and master
        batch_triplets.append([anchor, pos, neg])
    
    random.shuffle(batch_bugs)
    
    for bug_id in batch_bugs:
        bug = self.bug_set[bug_id]
        self.read_batch_bugs(batch_features, bug)

    batch_features['title'] = np.array(batch_features['title'])
    batch_features['desc'] = np.array(batch_features['desc'])
    
    sim = np.asarray([issues_by_buckets[bug_id] for bug_id in batch_bugs])

    input_sample = {}

    input_sample = { 'title' : batch_features['title'], 
                        'description' : batch_features['desc'] }

    return batch_triplets, input_sample, sim #sim

In [22]:
%%time

batch_size = 64
batch_size_test = 128

# we want a constant validation group to have a frame of reference for model performance
batch_triplets_valid, valid_input_sample, valid_sim = batch_iterator(baseline, retrieval, None, 
                                                                                      baseline.train_data, 
                                                                                      baseline.dup_sets_train,
                                                                                      bug_train_ids,
                                                                                      batch_size_test, 1,
                                                                                      issues_by_buckets)

validation_sample = [valid_input_sample['title'], 
             valid_input_sample['description'], valid_sim]

# Max sequence title
MAX_SEQUENCE_LENGTH_T = valid_input_sample['title'].shape[1]
MAX_SEQUENCE_LENGTH_D = valid_input_sample['description'].shape[1]

CPU times: user 457 ms, sys: 0 ns, total: 457 ms
Wall time: 457 ms


In [23]:
valid_input_sample['title'].shape, valid_input_sample['description'].shape, valid_sim.shape

((384, 20), (384, 20), (384,))

## Pre-trained embeddings

Loading pretrained word vectors

### Vocab

In [24]:
vocab = baseline.load_vocabulary(os.path.join(DIR, 'vocab_embed.pkl'))
#print(np.random.choice(vocab, 10))
# for token in vocab:
#     print(token)

vocabulary loaded


In [25]:
"Total vocabulary: {}".format(len(vocab))

'Total vocabulary: 21175'

In [26]:
def generating_embed(baseline, GLOVE_DIR, EMBEDDING_DIM):
    embeddings_index = {}
    embed_path = os.path.join(GLOVE_DIR, 'glove.42B.300d.txt')
    
    f2 = open(embed_path, 'rb')
    num_lines = sum(1 for line in f2)
    f2.close()
    
    f = open(embed_path, 'rb')
    vocab = baseline.load_vocabulary(os.path.join(baseline.DIR, 'vocab_embed.pkl'))
    vocab_size = len(vocab) 

    # Initialize uniform the vector considering the Tanh activation
    embedding_matrix = np.random.uniform(-1.0, 1.0, (num_lines + vocab_size, EMBEDDING_DIM))
    embedding_matrix[0, :] = np.zeros(EMBEDDING_DIM)

    loop = tqdm(f)
    loop.set_description("Loading Glove")
    i = 0
    for line in loop:
        tokens = line.split()
        word = tokens[0]
        embed = np.asarray(tokens[1:], dtype='float32')
        embeddings_index[word] = embed
        embedding_matrix[i] = embed
        i+=1
        loop.update(1)
    f.close()
    loop.close()

    print('Total %s word vectors in Glove 42B 300d.' % len(embeddings_index))

    loop = tqdm(total=vocab_size)
    loop.set_description('Loading embedding from dataset pretrained')
    
    for word, embed in vocab.items():
        if word in embeddings_index:
            embedding_matrix[i] = embeddings_index[word]
        else:
            embedding_matrix[i] = np.asarray(embed, dtype='float32')
        loop.update(1)
        i+=1
    loop.close()
    baseline.embedding_matrix = embedding_matrix

In [27]:
%%time

generating_embed(baseline, GLOVE_DIR=GLOVE_DIR, EMBEDDING_DIM=EMBEDDING_DIM) # MAX_NB_WORDS=MAX_NB_WORDS

vocabulary loaded


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Total 1917494 word vectors in Glove 42B 300d.


HBox(children=(IntProgress(value=0, max=21175), HTML(value='')))


CPU times: user 1min 36s, sys: 3.44 s, total: 1min 39s
Wall time: 1min 39s


## Experiment

## Propose

https://github.com/tqtg/DuplicateBugFinder

In [28]:
from keras.initializers import RandomUniform, RandomNormal, Ones

### Embedding layer

In [29]:
from keras.constraints import MaxNorm
from keras.initializers import TruncatedNormal, RandomUniform

# Is missing the padding_idx used in pytorch
# https://pytorch.org/docs/stable/_modules/torch/nn/modules/sparse.html
# https://stackoverflow.com/questions/54824768/rnn-model-gru-of-word2vec-to-regression-not-learning
def embedding_layer(embeddings, num_words, embedding_dim, max_sequence_length, trainable, name):
    embedding_layer = Embedding(num_words,
                                  embedding_dim,
                                  name='embedding_layer_{}'.format(name),
                                  weights=[embeddings],
                                  #input_length=max_sequence_length,
                                  input_length=None,
                                  trainable=trainable)
    return embedding_layer

### DWEN model

In [30]:
def pairwise_distance(feature, squared=False):
    """Computes the pairwise distance matrix with numerical stability.

    output[i, j] = || feature[i, :] - feature[j, :] ||_2

    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.

    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    pairwise_distances_squared = math_ops.add(
        math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
        math_ops.reduce_sum(
            math_ops.square(array_ops.transpose(feature)),
            axis=[0],
            keepdims=True)) - 2.0 * math_ops.matmul(feature,
                                                    array_ops.transpose(feature))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
    # Get the mask where the zero distances are at.
    error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = math_ops.sqrt(
            pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)

    # Undo conditionally adding 1e-16.
    pairwise_distances = math_ops.multiply(
        pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))

    num_data = array_ops.shape(feature)[0]
    # Explicitly set diagonals to zero.
    mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
        array_ops.ones([num_data]))
    pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
    return pairwise_distances

def masked_maximum(data, mask, dim=1):
    """Computes the axis wise maximum over chosen elements.

    Args:
      data: 2-D float `Tensor` of size [n, m].
      mask: 2-D Boolean `Tensor` of size [n, m].
      dim: The dimension over which to compute the maximum.

    Returns:
      masked_maximums: N-D `Tensor`.
        The maximized dimension is of size 1 after the operation.
    """
    axis_minimums = math_ops.reduce_min(data, dim, keepdims=True)
    masked_maximums = math_ops.reduce_max(
        math_ops.multiply(data - axis_minimums, mask), dim,
        keepdims=True) + axis_minimums
    return masked_maximums

def masked_minimum(data, mask, dim=1):
    """Computes the axis wise minimum over chosen elements.

    Args:
      data: 2-D float `Tensor` of size [n, m].
      mask: 2-D Boolean `Tensor` of size [n, m].
      dim: The dimension over which to compute the minimum.

    Returns:
      masked_minimums: N-D `Tensor`.
        The minimized dimension is of size 1 after the operation.
    """
    axis_maximums = math_ops.reduce_max(data, dim, keepdims=True)
    masked_minimums = math_ops.reduce_min(
        math_ops.multiply(data - axis_maximums, mask), dim,
        keepdims=True) + axis_maximums
    return masked_minimums

In [31]:
## required for semi-hard triplet loss:
import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import dtypes

def triplet_loss(vects):
    margin = 1.
    labels = vects[:, :1]
 
    labels = tf.cast(labels, dtype='int32')

    embeddings = tf.cast(vects[:, 1:], dtype='float32')

    ### Code from Tensorflow function [tf.contrib.losses.metric_learning.triplet_semihard_loss] starts here:
    
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    # lshape=array_ops.shape(labels)
    # assert lshape.shape == 1
    # labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size  
    batch_size = array_ops.size(labels) # was 'array_ops.size(labels)'

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile, array_ops.reshape(
                array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(
                math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
            0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(
        mask_final, negatives_outside, negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(
        adjacency, dtype=dtypes.float32) - array_ops.diag(
        array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(
                math_ops.multiply(loss_mat, mask_positives), 0.0)),
        num_positives,
        name='triplet_semihard_loss')
    
    ### Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance

def quintet_loss(inputs):
    margin = 1.
    labels = inputs[:, :1]
 
    labels = tf.cast(labels, dtype='int32')

    embeddings = inputs[:, 1:]

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    # global batch_size  
    batch_size = array_ops.size(labels) # was 'array_ops.size(labels)'

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)

    mask_positives = math_ops.cast(
        adjacency, dtype=dtypes.float32) - array_ops.diag(array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)
    
    mask_negatives = adjacency_not
    
    # Include the anchor to positives
    mask_positives_centroids = math_ops.cast(adjacency, dtype=dtypes.float32)

#     return mask_positives

    # pos 
    embed_pos = tf.matmul(mask_positives_centroids, embeddings)
    num_of_pos = tf.reduce_sum(mask_positives_centroids, axis=1, keepdims=True)
    centroid_embed_pos = tf.math.xdivy(embed_pos, num_of_pos)
    labels_pos = tf.cast(labels, dtype=dtypes.float32)
    # negs
    embed_neg = tf.matmul(mask_negatives, embeddings)
    num_of_neg = tf.reduce_sum(mask_negatives, axis=1, keepdims=True)
    centroid_embed_neg = tf.math.xdivy(embed_neg, num_of_neg)

#     return mask_positives_centroids
    i = tf.constant(0)
    batch_centroid_matrix = tf.Variable([])
    batch_centroid_matrix_neg = tf.Variable([])
    batch_centroid_matrix_all = tf.Variable([])
    def iter_centroids(i, batch_centroid_matrix, batch_centroid_matrix_neg, batch_centroid_matrix_all):
        # centroid pos
        mask_positives_batch = tf.reshape(tf.gather(mask_positives, i), (-1, 1))
        centroid_pos = tf.gather(centroid_embed_pos, i)
        
        centroid_embed = tf.repeat([centroid_pos], repeats=[batch_size], axis=0)
        new_batch_centroid_pos = mask_positives_batch * centroid_embed
        new_batch_embeddings = tf.cast(tf.logical_not(tf.cast(mask_positives_batch, 'bool')), 'float32') * embeddings 
        new_batch = tf.reduce_sum([new_batch_centroid_pos, new_batch_embeddings], axis=0, keepdims=True)[0]
        
        vects_new_batch = tf.concat([labels_pos, new_batch], axis=1)
        TL_new_batch = triplet_loss(vects_new_batch)
        batch_centroid_matrix = tf.concat([batch_centroid_matrix, [TL_new_batch]], axis=0) 
        
        # centroid neg
        centroid_neg = tf.gather(centroid_embed_neg, i)
        mask_negatives_batch = tf.reshape(tf.gather(mask_negatives, i), (-1, 1))
        
        centroid_embed = tf.repeat([centroid_neg], repeats=[batch_size], axis=0)
        new_batch_centroid_neg = mask_negatives_batch * centroid_embed
        new_batch_embeddings = tf.cast(tf.logical_not(tf.cast(mask_negatives_batch, 'bool')), 'float32') * embeddings 
        new_batch = tf.reduce_sum([new_batch_centroid_neg, new_batch_embeddings], axis=0, keepdims=True)[0]
        
        vects_new_batch = tf.concat([labels_pos, new_batch], axis=1)
        TL_new_batch = triplet_loss(vects_new_batch)
        batch_centroid_matrix_neg = tf.concat([batch_centroid_matrix_neg, [TL_new_batch]], axis=0) 
        
        # centroid pos and neg
        new_batch_centroids = tf.reduce_sum([new_batch_centroid_pos, new_batch_centroid_neg], axis=0, keepdims=True)[0]
        vects_new_batch_centroids = tf.concat([labels_pos, new_batch_centroids], axis=1)
        TL_new_batch = triplet_loss(vects_new_batch_centroids)
        batch_centroid_matrix_all = tf.concat([batch_centroid_matrix_all, [TL_new_batch]], axis=0) 
        
        return [tf.add(i, 1), batch_centroid_matrix, batch_centroid_matrix_neg, batch_centroid_matrix_all]
    _, batch_centroid_matrix, batch_centroid_matrix_neg, batch_centroid_matrix_all = tf.while_loop(lambda i, a, b, c: i<batch_size // 3, 
                                        iter_centroids, 
                                        [i, batch_centroid_matrix, batch_centroid_matrix_neg, batch_centroid_matrix_all],
                                       shape_invariants=[i.get_shape(),
                                                   tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])])


    TL = triplet_loss(inputs)
    TL_pos = tf.reduce_mean(batch_centroid_matrix)
    TL_neg = tf.reduce_mean(batch_centroid_matrix_neg) #triplet_loss(vects_neg)
    TL_centroid = tf.reduce_mean(batch_centroid_matrix_all) # triplet_loss(vects_centroids)
   
    return K.stack([TL, TL_pos, TL_neg, TL_centroid], axis=0)

def quintet_trainable(inputs):
    TL = inputs[0]
    TL_pos = inputs[1]
    TL_neg = inputs[2]
    TL_centroid = inputs[3]
    TL_anchor_w = inputs[4]
    TL_pos_w = inputs[5]
    TL_neg_w = inputs[6]
    TL_centroid_w = inputs[7]

    sum_of_median = tf.reduce_sum([TL * TL_anchor_w, TL_pos * TL_pos_w, TL_neg * TL_neg_w, TL_centroid * TL_centroid_w]) # 
    sum_of_weigths = tf.reduce_sum([TL_anchor_w, TL_pos_w, TL_neg_w, TL_centroid_w])
    weigthed_median = tf.truediv(sum_of_median, sum_of_weigths)    
    return K.stack([weigthed_median, TL_anchor_w, TL_pos_w, TL_neg_w, TL_centroid_w, TL, TL_pos, TL_neg, TL_centroid], axis=0)

def custom_loss(y_true, y_pred):
    return tf.reduce_mean(y_pred[0])

def TL_w_anchor(y_true, y_pred):
    return tf.reduce_mean(y_pred[1])
def TL_w_pos(y_true, y_pred):
    return tf.reduce_mean(y_pred[2])
def TL_w_neg(y_true, y_pred):
    return tf.reduce_mean(y_pred[3])
def TL_w_centroid(y_true, y_pred):
    return tf.reduce_mean(y_pred[4])
def TL(y_true, y_pred):
    return tf.reduce_mean(y_pred[5])
def TL_pos(y_true, y_pred):
    return tf.reduce_mean(y_pred[6])
def TL_neg(y_true, y_pred):
    return tf.reduce_mean(y_pred[7])
def TL_centroid(y_true, y_pred):
    return tf.reduce_mean(y_pred[8])

In [32]:
from keras.layers import concatenate, Add, Lambda, merge, Average, Maximum, Subtract, \
    Average, GlobalAveragePooling1D, BatchNormalization, Activation
from keras.optimizers import Adam, Nadam
import keras.backend as K

from keras.layers import Layer
from keras import backend as K

class QuintetWeights(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(QuintetWeights, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = tf.reshape(self.add_weight(name='quintet_kernel_weight', 
                                      shape=(input_shape[0], self.output_dim),
                                      initializer=keras.initializers.Ones(),
#                                       initializer=keras.initializers.RandomUniform(minval=0.0, maxval=1.0, seed=None),
                                      trainable=True), (1, 1))
        super(QuintetWeights, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        x = tf.reshape(x, (1, 1))
        return [K.dot(x, self.kernel), self.kernel]

    def compute_output_shape(self, input_shape):
        return [input_shape, input_shape]

def dwen_feature(title_feature_model, desc_feature_model, \
                  sequence_length_t, sequence_length_d, name):
    bug_t = Input(shape = (sequence_length_t, ), name = 'title_{}'.format(name))
    bug_d = Input(shape = (sequence_length_d, ), name = 'desc_{}'.format(name))
    
    # Embedding feature
    bug_t_feat = title_feature_model(bug_t)
    bug_d_feat = desc_feature_model(bug_d)
    
    bug_t_feat = GlobalAveragePooling1D()(bug_t_feat)
    bug_d_feat = GlobalAveragePooling1D()(bug_d_feat)
    
    #bug_feature_output = Add(name = 'merge_features_{}'.format(name))([bug_i_feat, bug_t_feat, bug_d_feat])
    bug_feature_output = Average(name = 'merge_features_{}'.format(name))([bug_t_feat, bug_d_feat])
    
    bug_feature_model = Model(inputs=[bug_t, bug_d], outputs=[bug_feature_output], name = 'merge_features_{}'.format(name))
    
    return bug_feature_model

def dwen_model(encoded_anchor, name):
    
    input_labels = Input(shape=(1,), name='input_label')    # input layer for labels
    inputs = np.concatenate([encoded_anchor.input, [input_labels]], -1).tolist()
    
    encoded_anchor = encoded_anchor.output
    
    hidden_layers = 2
    
    # Deep Hidden MLPs
    for _ in range(hidden_layers):
        number_of_units = K.int_shape(encoded_anchor)[1]
        encoded_anchor = Dense(number_of_units // 2)(encoded_anchor)
#         bug_feature_output = BatchNormalization()(bug_feature_output)
        encoded_anchor = Activation('relu')(encoded_anchor)
        #bug_feature_output = Dropout(.5)(bug_feature_output)
    
     # Concatenation
    output = concatenate([input_labels, encoded_anchor])  # concatenating the labels + embeddings
    
    TL_loss = Lambda(quintet_loss, name='quintet_loss')(output)
    
    tl_l = Lambda(lambda x:tf.reshape(x[0], (1,)), name='TL')(TL_loss)
    tl_l_p = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_pos')(TL_loss)
    tl_l_n = Lambda(lambda x:tf.reshape(x[2], (1,)), name='TL_neg')(TL_loss)
    tl_l_c = Lambda(lambda x:tf.reshape(x[3], (1, )), name='TL_centroid')(TL_loss)
    
    TL_w = QuintetWeights(output_dim=1)(tl_l)
    TL_pos_w = QuintetWeights(output_dim=1)(tl_l_p)
    TL_neg_w = QuintetWeights(output_dim=1)(tl_l_n)
    TL_centroid_w = QuintetWeights(output_dim=1)(tl_l_c)
    
    TL_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_weight')(TL_w)
    TL_pos_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_pos_weight')(TL_pos_w)
    TL_neg_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_neg_weight')(TL_neg_w)
    TL_centroid_weight = Lambda(lambda x:tf.reshape(x[1], (1,)), name='TL_centroid_weight')(TL_centroid_w)
    
    output = concatenate([tl_l, tl_l_p, tl_l_n, tl_l_c, TL_weight, TL_pos_weight, TL_neg_weight, TL_centroid_weight])
    output = Lambda(quintet_trainable, name='quintet_trainable')(output)

    similarity_model = Model(inputs=inputs, outputs=[output], name = 'dwen_output')

    #optimizer = Nadam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=K.epsilon(), schedule_decay=0.01)
    optimizer = Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)

    # setup the optimization process 
    similarity_model.compile(optimizer='adam', loss=custom_loss, metrics=[TL_w_anchor, TL_w_pos, TL_w_neg, TL_w_centroid,
                                                                         TL, TL_pos, TL_neg, TL_centroid])
    
    return similarity_model

In [33]:
import _pickle as pickle
def save_loss(result):
    with open(os.path.join(DIR,'{}_log.pkl'.format(METHOD)), 'wb') as f:
        pickle.dump(result, f)
    print("=> result saved!")

In [34]:
# Domain to use
limit_train = int(epochs * freeze_train) # 10% de 1000 , 100 epocas
METHOD = 'DWEN_QL_{}'.format(limit_train)
SAVE_PATH = '{}_preprocessing_{}_feature@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)
SAVE_PATH_FEATURE = '{}_preprocessing_{}_feature_@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)

In [35]:
%%time

# Inspired on https://'pastebin.com/TaGFdcBA
# TODO: https://stackoverflow.com/questions/49941903/keras-compute-cosine-distance-between-two-flattened-outputs
keras.backend.clear_session()

# Clear GPU memory
# from numba import cuda
# cuda.select_device(0)
# cuda.close()

# Embeddings
desc_embedding_layer = embedding_layer(embeddings=baseline.embedding_matrix, 
                              num_words=len(baseline.embedding_matrix), 
                              embedding_dim=EMBEDDING_DIM, 
                              max_sequence_length=MAX_SEQUENCE_LENGTH_D, 
                              trainable=False, name='desc')
title_embedding_layer = embedding_layer(embeddings=baseline.embedding_matrix, 
                              num_words=len(baseline.embedding_matrix), 
                              embedding_dim=EMBEDDING_DIM, 
                              max_sequence_length=MAX_SEQUENCE_LENGTH_T, 
                              trainable=False, name='title')

# Similarity model
encoded_anchor = dwen_feature(title_embedding_layer, desc_embedding_layer, 
                                    MAX_SEQUENCE_LENGTH_T, MAX_SEQUENCE_LENGTH_D, 'in')

similarity_model = dwen_model(encoded_anchor, 'dwen')

# cnn_feature_model.summary()
# lstm_feature_model.summary()
similarity_model.summary()


'''
    Experiment
'''
result = { 'train' : [], 'test' : [] }
print("Total of ", limit_train)
for epoch in range(limit_train):
    batch_triplet_train, \
        train_input_sample, train_sim = batch_iterator(baseline, retrieval, encoded_anchor, baseline.train_data, 
                                                       baseline.dup_sets_train, bug_train_ids, 
                                                           batch_size, 1, issues_by_buckets, TRIPLET_HARD=False)
    train_batch = [train_input_sample['title'], train_input_sample['description'], train_sim]
    
    
    h = similarity_model.train_on_batch(x=train_batch, y=train_sim)
    h_validation = similarity_model.test_on_batch(x=validation_sample, y=valid_sim)
    
    # save results
    result['train'].append(h)
    result['test'].append(h_validation)
    
    if( (epoch+1) % 10 == 0 or (epoch+1 == limit_train) ):
        save_loss(result)
    
    if (epoch+1 == limit_train): #(epoch > 1 and epoch % 10 == 0) or (epoch+1 == epochs):
        recall, exported_rank, debug = experiment.evaluate_validation_test(retrieval, verbose, encoded_anchor, issues_by_buckets, bug_train_ids, method='dwen')
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
                "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}, " +
              "recall@25: {:.2f}").format(epoch+1, h[0], h_validation[0], h[1], h[2], h[3], 
                                          h[4], h[5], h[6], h[7], h[8], recall))
    else:
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
              "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}").format(
            epoch+1, h[0], h_validation[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], h[8]))
    
    loss = h[0]
    
    if loss < best_loss:
        best_loss = loss
        best_epoch = epoch+1

#experiment.save_model(similarity_model, SAVE_PATH.replace('@number_of_epochs@', str(epochs)))
#experiment.save_model(bug_feature_output_a, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)), verbose=1)
print('Best_epoch={}, Best_loss={:.2f}, Recall@25={:.2f}'.format(best_epoch, best_loss, recall))












Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
title_in (InputLayer)           (None, 20)           0                                            
__________________________________________________________________________________________________
desc_in (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
embedding_layer_title (Embeddin (None, 20, 300)      581600700   title_in[0][0]                   
__________________________________________________________________________________________________
embedding_layer_desc (Embedding (None, 20, 300)     





  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch: 1 Loss: 0.86, Loss_test: 0.86
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 1.00, TL_centroid_w: 1.00
TL: 0.98, TL_pos: 0.98, TL_neg: 0.91, TL_centroid: 0.56
Epoch: 2 Loss: 0.86, Loss_test: 0.85
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 1.00, TL_centroid_w: 1.00
TL: 0.97, TL_pos: 0.97, TL_neg: 0.89, TL_centroid: 0.60
Epoch: 3 Loss: 0.86, Loss_test: 0.84
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 1.00, TL_centroid_w: 1.00
TL: 0.98, TL_pos: 0.98, TL_neg: 0.87, TL_centroid: 0.61
Epoch: 4 Loss: 0.85, Loss_test: 0.83
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 1.00, TL_centroid_w: 1.00
TL: 0.97, TL_pos: 0.96, TL_neg: 0.84, TL_centroid: 0.63
Epoch: 5 Loss: 0.84, Loss_test: 0.81
TL_w: 1.00, TL_pos_w: 1.00, TL_neg_w: 1.00, TL_centroid_w: 1.00
TL: 0.98, TL_pos: 0.98, TL_neg: 0.81, TL_centroid: 0.58
Epoch: 6 Loss: 0.79, Loss_test: 0.80
TL_w: 0.99, TL_pos_w: 1.00, TL_neg_w: 1.00, TL_centroid_w: 1.00
TL: 0.96, TL_pos: 0.96, TL_neg: 0.74, TL_centroid: 0.50
Epoch: 7 Loss: 0.78, Loss_test: 0.78
TL_w: 0.99, TL_pos_w:

Epoch: 53 Loss: 0.52, Loss_test: 0.44
TL_w: 0.94, TL_pos_w: 0.94, TL_neg_w: 1.05, TL_centroid_w: 1.05
TL: 0.89, TL_pos: 0.88, TL_neg: 0.21, TL_centroid: 0.19
Epoch: 54 Loss: 0.53, Loss_test: 0.44
TL_w: 0.94, TL_pos_w: 0.94, TL_neg_w: 1.05, TL_centroid_w: 1.05
TL: 0.85, TL_pos: 0.84, TL_neg: 0.22, TL_centroid: 0.27
Epoch: 55 Loss: 0.46, Loss_test: 0.43
TL_w: 0.94, TL_pos_w: 0.94, TL_neg_w: 1.05, TL_centroid_w: 1.05
TL: 0.80, TL_pos: 0.79, TL_neg: 0.16, TL_centroid: 0.16
Epoch: 56 Loss: 0.49, Loss_test: 0.43
TL_w: 0.94, TL_pos_w: 0.94, TL_neg_w: 1.05, TL_centroid_w: 1.05
TL: 0.82, TL_pos: 0.81, TL_neg: 0.16, TL_centroid: 0.22
Epoch: 57 Loss: 0.52, Loss_test: 0.43
TL_w: 0.94, TL_pos_w: 0.94, TL_neg_w: 1.05, TL_centroid_w: 1.06
TL: 0.80, TL_pos: 0.80, TL_neg: 0.21, TL_centroid: 0.31
Epoch: 58 Loss: 0.51, Loss_test: 0.42
TL_w: 0.94, TL_pos_w: 0.94, TL_neg_w: 1.05, TL_centroid_w: 1.06
TL: 0.92, TL_pos: 0.90, TL_neg: 0.17, TL_centroid: 0.15
Epoch: 59 Loss: 0.49, Loss_test: 0.42
TL_w: 0.94, TL

In [36]:
EXPORT_RANK_PATH = os.path.join(DIR, 'exported_rank_{}.txt'.format(METHOD))
EXPORT_RANK_PATH

'data/processed/eclipse/bert/exported_rank_DWEN_QL_100.txt'

In [37]:
with open(EXPORT_RANK_PATH, 'w') as file_out:
    for row in exported_rank:
        file_out.write(row + "\n")

In [38]:
experiment.save_model(similarity_model, SAVE_PATH.replace('@number_of_epochs@', str(limit_train)))
experiment.save_model(encoded_anchor, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(limit_train)), verbose=1)

Saved model 'modelos/model_bert_preprocessing_DWEN_QL_100_feature_100epochs_64batch(eclipse).h5' to disk


In [39]:
len(result['train']), len(result['test'])

(100, 100)

In [40]:
model = similarity_model.get_layer('concatenate_2')
output = Lambda(quintet_trainable, name='quintet_trainable')(model.output)
inputs = similarity_model.inputs
model = Model(inputs = inputs, outputs = output, name = 'Similarity_Model')

# setup the optimization process 
model.compile(optimizer='adam', loss=custom_loss, metrics=[TL_w_anchor, TL_w_pos, TL_w_neg, TL_w_centroid,
                                                                         TL, TL_pos, TL_neg, TL_centroid])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
title_in (InputLayer)           (None, 20)           0                                            
__________________________________________________________________________________________________
desc_in (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
embedding_layer_title (Embeddin (None, 20, 300)      581600700   title_in[0][0]                   
__________________________________________________________________________________________________
embedding_layer_desc (Embedding (None, 20, 300)      581600700   desc_in[0][0]                    
__________________________________________________________________________________________________
global_ave

In [41]:
# Domain to use
METHOD = 'DWEN_QL_{}'.format(epochs)
SAVE_PATH = '{}_preprocessing_{}_feature@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)
SAVE_PATH_FEATURE = '{}_preprocessing_{}_feature_@number_of_epochs@epochs_64batch({})'.format(PREPROCESSING, METHOD, DOMAIN)

In [42]:
end_train = epochs - limit_train
for epoch in range(limit_train, end_train):
    batch_triplet_train, \
        train_input_sample, train_sim = batch_iterator(baseline, retrieval, encoded_anchor, baseline.train_data, 
                                                       baseline.dup_sets_train, bug_train_ids, 
                                                           batch_size, 1, issues_by_buckets, TRIPLET_HARD=False)
    train_batch = [train_input_sample['title'], train_input_sample['description'], train_sim]
    
    
    h = similarity_model.train_on_batch(x=train_batch, y=train_sim)
    h_validation = similarity_model.test_on_batch(x=validation_sample, y=valid_sim)
    
    # save results
    result['train'].append(h)
    result['test'].append(h_validation)
    
    if( (epoch+1) % 10 == 0 or (epoch+1 == limit_train) ):
        save_loss(result)
    
    if (epoch+1 == epochs): #(epoch > 1 and epoch % 10 == 0) or (epoch+1 == epochs):
        recall, exported_rank, debug = experiment.evaluate_validation_test(retrieval, verbose, encoded_anchor, issues_by_buckets, bug_train_ids, method='dwen')
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
                "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}, " +
              "recall@25: {:.2f}").format(epoch+1, h[0], h_validation[0], h[1], h[2], h[3], 
                                          h[4], h[5], h[6], h[7], h[8], recall))
    else:
        print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
               "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
              "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}").format(
            epoch+1, h[0], h_validation[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], h[8]))
    
    loss = h[0]
    
    if loss < best_loss:
        best_loss = loss
        best_epoch = epoch+1

#experiment.save_model(similarity_model, SAVE_PATH.replace('@number_of_epochs@', str(epochs)))
#experiment.save_model(bug_feature_output_a, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)), verbose=1)
print('Best_epoch={}, Best_loss={:.2f}, Recall@25={:.2f}'.format(best_epoch, best_loss, recall))

Epoch: 101 Loss: 0.29, Loss_test: 0.33
TL_w: 0.89, TL_pos_w: 0.89, TL_neg_w: 1.10, TL_centroid_w: 1.10
TL: 0.58, TL_pos: 0.58, TL_neg: 0.03, TL_centroid: 0.10
Epoch: 102 Loss: 0.44, Loss_test: 0.33
TL_w: 0.89, TL_pos_w: 0.89, TL_neg_w: 1.11, TL_centroid_w: 1.10
TL: 0.85, TL_pos: 0.84, TL_neg: 0.09, TL_centroid: 0.16
Epoch: 103 Loss: 0.38, Loss_test: 0.33
TL_w: 0.88, TL_pos_w: 0.89, TL_neg_w: 1.11, TL_centroid_w: 1.10
TL: 0.80, TL_pos: 0.80, TL_neg: 0.03, TL_centroid: 0.08
Epoch: 104 Loss: 0.43, Loss_test: 0.33
TL_w: 0.88, TL_pos_w: 0.88, TL_neg_w: 1.11, TL_centroid_w: 1.10
TL: 0.89, TL_pos: 0.88, TL_neg: 0.04, TL_centroid: 0.10
Epoch: 105 Loss: 0.34, Loss_test: 0.33
TL_w: 0.88, TL_pos_w: 0.88, TL_neg_w: 1.11, TL_centroid_w: 1.10
TL: 0.68, TL_pos: 0.68, TL_neg: 0.04, TL_centroid: 0.10
Epoch: 106 Loss: 0.38, Loss_test: 0.33
TL_w: 0.88, TL_pos_w: 0.88, TL_neg_w: 1.11, TL_centroid_w: 1.10
TL: 0.72, TL_pos: 0.71, TL_neg: 0.06, TL_centroid: 0.16
Epoch: 107 Loss: 0.40, Loss_test: 0.33
TL_w: 0

Epoch: 152 Loss: 0.34, Loss_test: 0.26
TL_w: 0.83, TL_pos_w: 0.83, TL_neg_w: 1.16, TL_centroid_w: 1.15
TL: 0.74, TL_pos: 0.73, TL_neg: 0.04, TL_centroid: 0.07
Epoch: 153 Loss: 0.39, Loss_test: 0.26
TL_w: 0.83, TL_pos_w: 0.83, TL_neg_w: 1.16, TL_centroid_w: 1.15
TL: 0.86, TL_pos: 0.85, TL_neg: 0.02, TL_centroid: 0.10
Epoch: 154 Loss: 0.31, Loss_test: 0.27
TL_w: 0.83, TL_pos_w: 0.83, TL_neg_w: 1.16, TL_centroid_w: 1.15
TL: 0.66, TL_pos: 0.66, TL_neg: 0.02, TL_centroid: 0.09
Epoch: 155 Loss: 0.34, Loss_test: 0.27
TL_w: 0.83, TL_pos_w: 0.83, TL_neg_w: 1.16, TL_centroid_w: 1.15
TL: 0.72, TL_pos: 0.70, TL_neg: 0.05, TL_centroid: 0.09
Epoch: 156 Loss: 0.30, Loss_test: 0.27
TL_w: 0.82, TL_pos_w: 0.82, TL_neg_w: 1.16, TL_centroid_w: 1.15
TL: 0.64, TL_pos: 0.61, TL_neg: 0.04, TL_centroid: 0.09
Epoch: 157 Loss: 0.32, Loss_test: 0.26
TL_w: 0.82, TL_pos_w: 0.82, TL_neg_w: 1.17, TL_centroid_w: 1.15
TL: 0.69, TL_pos: 0.68, TL_neg: 0.03, TL_centroid: 0.10
Epoch: 158 Loss: 0.32, Loss_test: 0.25
TL_w: 0

Epoch: 203 Loss: 0.34, Loss_test: 0.18
TL_w: 0.77, TL_pos_w: 0.78, TL_neg_w: 1.21, TL_centroid_w: 1.19
TL: 0.76, TL_pos: 0.75, TL_neg: 0.03, TL_centroid: 0.10
Epoch: 204 Loss: 0.29, Loss_test: 0.17
TL_w: 0.77, TL_pos_w: 0.77, TL_neg_w: 1.21, TL_centroid_w: 1.19
TL: 0.65, TL_pos: 0.64, TL_neg: 0.03, TL_centroid: 0.10
Epoch: 205 Loss: 0.33, Loss_test: 0.17
TL_w: 0.77, TL_pos_w: 0.77, TL_neg_w: 1.21, TL_centroid_w: 1.19
TL: 0.78, TL_pos: 0.77, TL_neg: 0.03, TL_centroid: 0.07
Epoch: 206 Loss: 0.27, Loss_test: 0.17
TL_w: 0.77, TL_pos_w: 0.77, TL_neg_w: 1.21, TL_centroid_w: 1.19
TL: 0.64, TL_pos: 0.63, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 207 Loss: 0.32, Loss_test: 0.17
TL_w: 0.77, TL_pos_w: 0.77, TL_neg_w: 1.21, TL_centroid_w: 1.19
TL: 0.76, TL_pos: 0.75, TL_neg: 0.02, TL_centroid: 0.06
Epoch: 208 Loss: 0.26, Loss_test: 0.16
TL_w: 0.77, TL_pos_w: 0.77, TL_neg_w: 1.21, TL_centroid_w: 1.19
TL: 0.60, TL_pos: 0.58, TL_neg: 0.02, TL_centroid: 0.09
Epoch: 209 Loss: 0.24, Loss_test: 0.16
TL_w: 0

Epoch: 254 Loss: 0.24, Loss_test: 0.12
TL_w: 0.72, TL_pos_w: 0.72, TL_neg_w: 1.25, TL_centroid_w: 1.23
TL: 0.59, TL_pos: 0.58, TL_neg: 0.03, TL_centroid: 0.05
Epoch: 255 Loss: 0.28, Loss_test: 0.12
TL_w: 0.72, TL_pos_w: 0.72, TL_neg_w: 1.25, TL_centroid_w: 1.23
TL: 0.69, TL_pos: 0.68, TL_neg: 0.02, TL_centroid: 0.08
Epoch: 256 Loss: 0.22, Loss_test: 0.12
TL_w: 0.72, TL_pos_w: 0.72, TL_neg_w: 1.25, TL_centroid_w: 1.23
TL: 0.54, TL_pos: 0.54, TL_neg: 0.02, TL_centroid: 0.07
Epoch: 257 Loss: 0.30, Loss_test: 0.12
TL_w: 0.72, TL_pos_w: 0.72, TL_neg_w: 1.26, TL_centroid_w: 1.23
TL: 0.75, TL_pos: 0.72, TL_neg: 0.03, TL_centroid: 0.07
Epoch: 258 Loss: 0.20, Loss_test: 0.12
TL_w: 0.72, TL_pos_w: 0.72, TL_neg_w: 1.26, TL_centroid_w: 1.23
TL: 0.49, TL_pos: 0.48, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 259 Loss: 0.18, Loss_test: 0.12
TL_w: 0.72, TL_pos_w: 0.72, TL_neg_w: 1.26, TL_centroid_w: 1.23
TL: 0.45, TL_pos: 0.44, TL_neg: 0.01, TL_centroid: 0.04
=> result saved!
Epoch: 260 Loss: 0.26, Loss_t

Epoch: 305 Loss: 0.17, Loss_test: 0.11
TL_w: 0.67, TL_pos_w: 0.67, TL_neg_w: 1.29, TL_centroid_w: 1.27
TL: 0.46, TL_pos: 0.45, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 306 Loss: 0.17, Loss_test: 0.11
TL_w: 0.67, TL_pos_w: 0.67, TL_neg_w: 1.29, TL_centroid_w: 1.27
TL: 0.44, TL_pos: 0.43, TL_neg: 0.02, TL_centroid: 0.06
Epoch: 307 Loss: 0.16, Loss_test: 0.11
TL_w: 0.67, TL_pos_w: 0.67, TL_neg_w: 1.29, TL_centroid_w: 1.27
TL: 0.42, TL_pos: 0.42, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 308 Loss: 0.20, Loss_test: 0.11
TL_w: 0.67, TL_pos_w: 0.67, TL_neg_w: 1.30, TL_centroid_w: 1.27
TL: 0.53, TL_pos: 0.53, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 309 Loss: 0.23, Loss_test: 0.11
TL_w: 0.67, TL_pos_w: 0.67, TL_neg_w: 1.30, TL_centroid_w: 1.27
TL: 0.60, TL_pos: 0.58, TL_neg: 0.02, TL_centroid: 0.06
=> result saved!
Epoch: 310 Loss: 0.16, Loss_test: 0.11
TL_w: 0.67, TL_pos_w: 0.67, TL_neg_w: 1.30, TL_centroid_w: 1.27
TL: 0.43, TL_pos: 0.43, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 311 Loss: 0.13, Loss_t

Epoch: 356 Loss: 0.21, Loss_test: 0.10
TL_w: 0.62, TL_pos_w: 0.62, TL_neg_w: 1.33, TL_centroid_w: 1.30
TL: 0.55, TL_pos: 0.54, TL_neg: 0.02, TL_centroid: 0.07
Epoch: 357 Loss: 0.19, Loss_test: 0.10
TL_w: 0.62, TL_pos_w: 0.62, TL_neg_w: 1.33, TL_centroid_w: 1.30
TL: 0.55, TL_pos: 0.53, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 358 Loss: 0.16, Loss_test: 0.10
TL_w: 0.62, TL_pos_w: 0.62, TL_neg_w: 1.33, TL_centroid_w: 1.30
TL: 0.41, TL_pos: 0.41, TL_neg: 0.01, TL_centroid: 0.07
Epoch: 359 Loss: 0.23, Loss_test: 0.10
TL_w: 0.62, TL_pos_w: 0.62, TL_neg_w: 1.33, TL_centroid_w: 1.30
TL: 0.60, TL_pos: 0.58, TL_neg: 0.04, TL_centroid: 0.09
=> result saved!
Epoch: 360 Loss: 0.23, Loss_test: 0.10
TL_w: 0.62, TL_pos_w: 0.62, TL_neg_w: 1.33, TL_centroid_w: 1.30
TL: 0.66, TL_pos: 0.65, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 361 Loss: 0.15, Loss_test: 0.10
TL_w: 0.62, TL_pos_w: 0.62, TL_neg_w: 1.33, TL_centroid_w: 1.30
TL: 0.41, TL_pos: 0.40, TL_neg: 0.01, TL_centroid: 0.06
Epoch: 362 Loss: 0.20, Loss_t

Epoch: 407 Loss: 0.21, Loss_test: 0.08
TL_w: 0.57, TL_pos_w: 0.57, TL_neg_w: 1.37, TL_centroid_w: 1.33
TL: 0.63, TL_pos: 0.61, TL_neg: 0.02, TL_centroid: 0.07
Epoch: 408 Loss: 0.11, Loss_test: 0.08
TL_w: 0.57, TL_pos_w: 0.57, TL_neg_w: 1.37, TL_centroid_w: 1.33
TL: 0.31, TL_pos: 0.30, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 409 Loss: 0.18, Loss_test: 0.08
TL_w: 0.57, TL_pos_w: 0.57, TL_neg_w: 1.37, TL_centroid_w: 1.33
TL: 0.54, TL_pos: 0.54, TL_neg: 0.01, TL_centroid: 0.04
=> result saved!
Epoch: 410 Loss: 0.17, Loss_test: 0.07
TL_w: 0.57, TL_pos_w: 0.57, TL_neg_w: 1.37, TL_centroid_w: 1.34
TL: 0.50, TL_pos: 0.48, TL_neg: 0.03, TL_centroid: 0.05
Epoch: 411 Loss: 0.17, Loss_test: 0.07
TL_w: 0.57, TL_pos_w: 0.57, TL_neg_w: 1.37, TL_centroid_w: 1.34
TL: 0.50, TL_pos: 0.49, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 412 Loss: 0.18, Loss_test: 0.07
TL_w: 0.57, TL_pos_w: 0.57, TL_neg_w: 1.37, TL_centroid_w: 1.34
TL: 0.52, TL_pos: 0.51, TL_neg: 0.01, TL_centroid: 0.06
Epoch: 413 Loss: 0.15, Loss_t

Epoch: 458 Loss: 0.17, Loss_test: 0.06
TL_w: 0.52, TL_pos_w: 0.52, TL_neg_w: 1.40, TL_centroid_w: 1.36
TL: 0.49, TL_pos: 0.48, TL_neg: 0.04, TL_centroid: 0.06
Epoch: 459 Loss: 0.18, Loss_test: 0.06
TL_w: 0.52, TL_pos_w: 0.52, TL_neg_w: 1.40, TL_centroid_w: 1.36
TL: 0.55, TL_pos: 0.54, TL_neg: 0.02, TL_centroid: 0.06
=> result saved!
Epoch: 460 Loss: 0.14, Loss_test: 0.06
TL_w: 0.52, TL_pos_w: 0.52, TL_neg_w: 1.40, TL_centroid_w: 1.36
TL: 0.46, TL_pos: 0.45, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 461 Loss: 0.12, Loss_test: 0.06
TL_w: 0.52, TL_pos_w: 0.52, TL_neg_w: 1.40, TL_centroid_w: 1.36
TL: 0.36, TL_pos: 0.36, TL_neg: 0.01, TL_centroid: 0.06
Epoch: 462 Loss: 0.14, Loss_test: 0.06
TL_w: 0.52, TL_pos_w: 0.52, TL_neg_w: 1.40, TL_centroid_w: 1.36
TL: 0.42, TL_pos: 0.40, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 463 Loss: 0.17, Loss_test: 0.06
TL_w: 0.52, TL_pos_w: 0.52, TL_neg_w: 1.40, TL_centroid_w: 1.37
TL: 0.54, TL_pos: 0.54, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 464 Loss: 0.20, Loss_t

Epoch: 509 Loss: 0.18, Loss_test: 0.06
TL_w: 0.47, TL_pos_w: 0.47, TL_neg_w: 1.43, TL_centroid_w: 1.39
TL: 0.63, TL_pos: 0.61, TL_neg: 0.03, TL_centroid: 0.04
=> result saved!
Epoch: 510 Loss: 0.11, Loss_test: 0.05
TL_w: 0.47, TL_pos_w: 0.47, TL_neg_w: 1.43, TL_centroid_w: 1.39
TL: 0.35, TL_pos: 0.34, TL_neg: 0.01, TL_centroid: 0.06
Epoch: 511 Loss: 0.15, Loss_test: 0.05
TL_w: 0.47, TL_pos_w: 0.47, TL_neg_w: 1.43, TL_centroid_w: 1.39
TL: 0.49, TL_pos: 0.49, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 512 Loss: 0.13, Loss_test: 0.05
TL_w: 0.47, TL_pos_w: 0.47, TL_neg_w: 1.43, TL_centroid_w: 1.39
TL: 0.42, TL_pos: 0.41, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 513 Loss: 0.14, Loss_test: 0.05
TL_w: 0.47, TL_pos_w: 0.47, TL_neg_w: 1.43, TL_centroid_w: 1.39
TL: 0.48, TL_pos: 0.47, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 514 Loss: 0.09, Loss_test: 0.05
TL_w: 0.47, TL_pos_w: 0.47, TL_neg_w: 1.43, TL_centroid_w: 1.39
TL: 0.30, TL_pos: 0.29, TL_neg: 0.01, TL_centroid: 0.05
Epoch: 515 Loss: 0.11, Loss_t

=> result saved!
Epoch: 560 Loss: 0.11, Loss_test: 0.05
TL_w: 0.42, TL_pos_w: 0.43, TL_neg_w: 1.46, TL_centroid_w: 1.41
TL: 0.43, TL_pos: 0.42, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 561 Loss: 0.20, Loss_test: 0.05
TL_w: 0.42, TL_pos_w: 0.42, TL_neg_w: 1.46, TL_centroid_w: 1.41
TL: 0.70, TL_pos: 0.68, TL_neg: 0.04, TL_centroid: 0.07
Epoch: 562 Loss: 0.08, Loss_test: 0.05
TL_w: 0.42, TL_pos_w: 0.42, TL_neg_w: 1.46, TL_centroid_w: 1.41
TL: 0.32, TL_pos: 0.31, TL_neg: 0.01, TL_centroid: 0.02
Epoch: 563 Loss: 0.10, Loss_test: 0.05
TL_w: 0.42, TL_pos_w: 0.42, TL_neg_w: 1.46, TL_centroid_w: 1.41
TL: 0.35, TL_pos: 0.34, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 564 Loss: 0.15, Loss_test: 0.05
TL_w: 0.42, TL_pos_w: 0.42, TL_neg_w: 1.46, TL_centroid_w: 1.42
TL: 0.55, TL_pos: 0.54, TL_neg: 0.01, TL_centroid: 0.06
Epoch: 565 Loss: 0.10, Loss_test: 0.05
TL_w: 0.42, TL_pos_w: 0.42, TL_neg_w: 1.46, TL_centroid_w: 1.42
TL: 0.38, TL_pos: 0.37, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 566 Loss: 0.15, Loss_t

Epoch: 611 Loss: 0.11, Loss_test: 0.05
TL_w: 0.37, TL_pos_w: 0.37, TL_neg_w: 1.48, TL_centroid_w: 1.44
TL: 0.40, TL_pos: 0.39, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 612 Loss: 0.12, Loss_test: 0.05
TL_w: 0.37, TL_pos_w: 0.37, TL_neg_w: 1.49, TL_centroid_w: 1.44
TL: 0.48, TL_pos: 0.48, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 613 Loss: 0.04, Loss_test: 0.05
TL_w: 0.37, TL_pos_w: 0.37, TL_neg_w: 1.49, TL_centroid_w: 1.44
TL: 0.12, TL_pos: 0.12, TL_neg: 0.00, TL_centroid: 0.03
Epoch: 614 Loss: 0.10, Loss_test: 0.05
TL_w: 0.37, TL_pos_w: 0.37, TL_neg_w: 1.49, TL_centroid_w: 1.44
TL: 0.37, TL_pos: 0.36, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 615 Loss: 0.09, Loss_test: 0.05
TL_w: 0.37, TL_pos_w: 0.37, TL_neg_w: 1.49, TL_centroid_w: 1.44
TL: 0.37, TL_pos: 0.36, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 616 Loss: 0.09, Loss_test: 0.05
TL_w: 0.37, TL_pos_w: 0.37, TL_neg_w: 1.49, TL_centroid_w: 1.44
TL: 0.37, TL_pos: 0.36, TL_neg: 0.00, TL_centroid: 0.04
Epoch: 617 Loss: 0.10, Loss_test: 0.04
TL_w: 0

Epoch: 662 Loss: 0.09, Loss_test: 0.04
TL_w: 0.32, TL_pos_w: 0.32, TL_neg_w: 1.51, TL_centroid_w: 1.46
TL: 0.36, TL_pos: 0.35, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 663 Loss: 0.06, Loss_test: 0.04
TL_w: 0.32, TL_pos_w: 0.32, TL_neg_w: 1.51, TL_centroid_w: 1.46
TL: 0.27, TL_pos: 0.26, TL_neg: 0.00, TL_centroid: 0.03
Epoch: 664 Loss: 0.10, Loss_test: 0.04
TL_w: 0.32, TL_pos_w: 0.32, TL_neg_w: 1.51, TL_centroid_w: 1.46
TL: 0.45, TL_pos: 0.44, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 665 Loss: 0.08, Loss_test: 0.04
TL_w: 0.32, TL_pos_w: 0.32, TL_neg_w: 1.51, TL_centroid_w: 1.46
TL: 0.36, TL_pos: 0.35, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 666 Loss: 0.12, Loss_test: 0.04
TL_w: 0.32, TL_pos_w: 0.32, TL_neg_w: 1.51, TL_centroid_w: 1.46
TL: 0.53, TL_pos: 0.53, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 667 Loss: 0.12, Loss_test: 0.04
TL_w: 0.32, TL_pos_w: 0.32, TL_neg_w: 1.51, TL_centroid_w: 1.46
TL: 0.51, TL_pos: 0.50, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 668 Loss: 0.08, Loss_test: 0.04
TL_w: 0

Epoch: 713 Loss: 0.05, Loss_test: 0.03
TL_w: 0.27, TL_pos_w: 0.27, TL_neg_w: 1.53, TL_centroid_w: 1.47
TL: 0.28, TL_pos: 0.28, TL_neg: 0.01, TL_centroid: 0.02
Epoch: 714 Loss: 0.11, Loss_test: 0.03
TL_w: 0.27, TL_pos_w: 0.27, TL_neg_w: 1.53, TL_centroid_w: 1.47
TL: 0.51, TL_pos: 0.50, TL_neg: 0.02, TL_centroid: 0.06
Epoch: 715 Loss: 0.06, Loss_test: 0.03
TL_w: 0.27, TL_pos_w: 0.27, TL_neg_w: 1.53, TL_centroid_w: 1.47
TL: 0.25, TL_pos: 0.25, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 716 Loss: 0.08, Loss_test: 0.03
TL_w: 0.27, TL_pos_w: 0.27, TL_neg_w: 1.53, TL_centroid_w: 1.48
TL: 0.37, TL_pos: 0.37, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 717 Loss: 0.13, Loss_test: 0.03
TL_w: 0.27, TL_pos_w: 0.27, TL_neg_w: 1.53, TL_centroid_w: 1.48
TL: 0.69, TL_pos: 0.68, TL_neg: 0.03, TL_centroid: 0.04
Epoch: 718 Loss: 0.08, Loss_test: 0.03
TL_w: 0.27, TL_pos_w: 0.27, TL_neg_w: 1.53, TL_centroid_w: 1.48
TL: 0.42, TL_pos: 0.41, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 719 Loss: 0.10, Loss_test: 0.03
TL_w: 0

Epoch: 764 Loss: 0.07, Loss_test: 0.03
TL_w: 0.22, TL_pos_w: 0.22, TL_neg_w: 1.55, TL_centroid_w: 1.49
TL: 0.41, TL_pos: 0.40, TL_neg: 0.01, TL_centroid: 0.04
Epoch: 765 Loss: 0.05, Loss_test: 0.03
TL_w: 0.22, TL_pos_w: 0.22, TL_neg_w: 1.55, TL_centroid_w: 1.49
TL: 0.33, TL_pos: 0.32, TL_neg: 0.01, TL_centroid: 0.02
Epoch: 766 Loss: 0.06, Loss_test: 0.03
TL_w: 0.22, TL_pos_w: 0.22, TL_neg_w: 1.55, TL_centroid_w: 1.49
TL: 0.31, TL_pos: 0.30, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 767 Loss: 0.07, Loss_test: 0.03
TL_w: 0.22, TL_pos_w: 0.22, TL_neg_w: 1.55, TL_centroid_w: 1.49
TL: 0.41, TL_pos: 0.40, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 768 Loss: 0.07, Loss_test: 0.03
TL_w: 0.22, TL_pos_w: 0.22, TL_neg_w: 1.55, TL_centroid_w: 1.49
TL: 0.32, TL_pos: 0.31, TL_neg: 0.02, TL_centroid: 0.05
Epoch: 769 Loss: 0.09, Loss_test: 0.03
TL_w: 0.22, TL_pos_w: 0.22, TL_neg_w: 1.55, TL_centroid_w: 1.49
TL: 0.52, TL_pos: 0.51, TL_neg: 0.01, TL_centroid: 0.04
=> result saved!
Epoch: 770 Loss: 0.06, Loss_t

Epoch: 817 Loss: 0.05, Loss_test: 0.02
TL_w: 0.16, TL_pos_w: 0.16, TL_neg_w: 1.57, TL_centroid_w: 1.50
TL: 0.38, TL_pos: 0.37, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 818 Loss: 0.05, Loss_test: 0.02
TL_w: 0.16, TL_pos_w: 0.16, TL_neg_w: 1.57, TL_centroid_w: 1.50
TL: 0.31, TL_pos: 0.30, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 819 Loss: 0.05, Loss_test: 0.02
TL_w: 0.16, TL_pos_w: 0.16, TL_neg_w: 1.57, TL_centroid_w: 1.50
TL: 0.36, TL_pos: 0.36, TL_neg: 0.01, TL_centroid: 0.02
=> result saved!
Epoch: 820 Loss: 0.06, Loss_test: 0.02
TL_w: 0.16, TL_pos_w: 0.16, TL_neg_w: 1.57, TL_centroid_w: 1.50
TL: 0.46, TL_pos: 0.45, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 821 Loss: 0.04, Loss_test: 0.02
TL_w: 0.16, TL_pos_w: 0.16, TL_neg_w: 1.57, TL_centroid_w: 1.50
TL: 0.33, TL_pos: 0.32, TL_neg: 0.00, TL_centroid: 0.02
Epoch: 822 Loss: 0.06, Loss_test: 0.02
TL_w: 0.16, TL_pos_w: 0.16, TL_neg_w: 1.57, TL_centroid_w: 1.50
TL: 0.44, TL_pos: 0.42, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 823 Loss: 0.03, Loss_t

Epoch: 868 Loss: 0.04, Loss_test: 0.02
TL_w: 0.11, TL_pos_w: 0.11, TL_neg_w: 1.59, TL_centroid_w: 1.51
TL: 0.30, TL_pos: 0.29, TL_neg: 0.00, TL_centroid: 0.03
Epoch: 869 Loss: 0.05, Loss_test: 0.02
TL_w: 0.11, TL_pos_w: 0.11, TL_neg_w: 1.59, TL_centroid_w: 1.51
TL: 0.42, TL_pos: 0.41, TL_neg: 0.01, TL_centroid: 0.04
=> result saved!
Epoch: 870 Loss: 0.05, Loss_test: 0.02
TL_w: 0.11, TL_pos_w: 0.10, TL_neg_w: 1.59, TL_centroid_w: 1.51
TL: 0.51, TL_pos: 0.50, TL_neg: 0.02, TL_centroid: 0.03
Epoch: 871 Loss: 0.04, Loss_test: 0.02
TL_w: 0.10, TL_pos_w: 0.10, TL_neg_w: 1.59, TL_centroid_w: 1.51
TL: 0.37, TL_pos: 0.37, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 872 Loss: 0.04, Loss_test: 0.02
TL_w: 0.10, TL_pos_w: 0.10, TL_neg_w: 1.59, TL_centroid_w: 1.51
TL: 0.30, TL_pos: 0.29, TL_neg: 0.01, TL_centroid: 0.03
Epoch: 873 Loss: 0.04, Loss_test: 0.02
TL_w: 0.10, TL_pos_w: 0.10, TL_neg_w: 1.59, TL_centroid_w: 1.51
TL: 0.48, TL_pos: 0.47, TL_neg: 0.01, TL_centroid: 0.02
Epoch: 874 Loss: 0.03, Loss_t

In [43]:
len(result['train']), len(result['test'])

(900, 900)

In [44]:
encoded = model.get_layer('activation_2')
output = encoded.output
inputs = similarity_model.inputs[:-1]
bug_feature_output_a = Model(inputs = inputs, outputs = output, name = 'Similarity_Model')

In [45]:
SAVE_PATH.replace('@number_of_epochs@', str(epochs))

'bert_preprocessing_DWEN_QL_1000_feature1000epochs_64batch(eclipse)'

In [46]:
experiment.save_model(model, SAVE_PATH.replace('@number_of_epochs@', str(epochs)))
experiment.save_model(bug_feature_output_a, SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs)), verbose=1)
"Model saved"

Saved model 'modelos/model_bert_preprocessing_DWEN_QL_1000_feature_1000epochs_64batch(eclipse).h5' to disk


'Model saved'

In [47]:
recall, exported_rank, debug = experiment.evaluate_validation_test(retrieval, verbose, encoded_anchor, issues_by_buckets, bug_train_ids, method='dwen')
print(("Epoch: {} Loss: {:.2f}, Loss_test: {:.2f}\n" +
       "TL_w: {:.2f}, TL_pos_w: {:.2f}, TL_neg_w: {:.2f}, TL_centroid_w: {:.2f}\n" + 
        "TL: {:.2f}, TL_pos: {:.2f}, TL_neg: {:.2f}, TL_centroid: {:.2f}, " +
      "recall@25: {:.2f}").format(epoch+1, h[0], h_validation[0], h[1], h[2], h[3], 
                                  h[4], h[5], h[6], h[7], h[8], recall))

Epoch: 900 Loss: 0.04, Loss_test: 0.01
TL_w: 0.07, TL_pos_w: 0.07, TL_neg_w: 1.59, TL_centroid_w: 1.52
TL: 0.37, TL_pos: 0.37, TL_neg: 0.01, TL_centroid: 0.04, recall@25: 0.13


In [48]:
exported_rank[:20]

['2:15392,9779,94|43732:0.719656765460968,78340:0.7187950015068054,333429:0.7107122242450714,33440:0.7089104354381561,26120:0.7078737318515778,301829:0.707709014415741,301830:0.707709014415741,301831:0.707709014415741,158250:0.7062311172485352,59311:0.7031238079071045,79004:0.7030397355556488,94701:0.7029220759868622,163079:0.7018101215362549,162411:0.6995002329349518,42078:0.6971099376678467,66587:0.6963174045085907,116875:0.6959206163883209,208295:0.695848286151886,352380:0.6939741671085358,58797:0.6937785148620605,375762:0.6936006546020508,376605:0.6934903562068939,255684:0.6931604444980621,2370:0.692665308713913,139097:0.6920564472675323,141028:0.6915636360645294,80388:0.6909751296043396,133693:0.6901670694351196,227010:0.6896843910217285',
 '393232:393282,390667,383388|194945:0.6596272885799408,105557:0.6550727784633636,105558:0.6550727784633636,320153:0.632684051990509,29302:0.6326440572738647,72698:0.6321587562561035,152050:0.6311775147914886,104673:0.6299512982368469,356782:0.6

### Retrieval evaluation

In [49]:
print("Total of queries:", len(retrieval.test))

Total of queries: 16995


#### Getting the model trained

In [50]:
SAVE_PATH_FEATURE.replace('@number_of_epochs@', str(epochs))

'bert_preprocessing_DWEN_QL_1000_feature_1000epochs_64batch(eclipse)'

In [51]:
bug_feature_output_a.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
title_in (InputLayer)           (None, 20)           0                                            
__________________________________________________________________________________________________
desc_in (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
embedding_layer_title (Embeddin (None, 20, 300)      581600700   title_in[0][0]                   
__________________________________________________________________________________________________
embedding_layer_desc (Embedding (None, 20, 300)      581600700   desc_in[0][0]                    
__________________________________________________________________________________________________
global_ave

In [52]:
EXPORT_RANK_PATH = os.path.join(DIR, 'exported_rank_{}.txt'.format(METHOD))
EXPORT_RANK_PATH

'data/processed/eclipse/bert/exported_rank_DWEN_QL_1000.txt'

In [53]:
with open(EXPORT_RANK_PATH, 'w') as file_out:
    for row in exported_rank:
        file_out.write(row + "\n")

In [54]:
report = experiment.evaluation.evaluate(EXPORT_RANK_PATH)
report

{'1 - recall_at_5': 0.11,
 '2 - recall_at_10': 0.12,
 '3 - recall_at_15': 0.12,
 '4 - recall_at_20': 0.13,
 '5 - recall_at_25': 0.13}