In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time
import os
import math
import csv

import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import tensorflow_datasets as tfds
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
#from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
import tqdm
from ipywidgets import IntProgress

In [None]:
class Arguments(object):
    dataset = # possible data sets to try: 'mnist', 'svhn_cropped', 'cifar10', 'cifar100',
    lossChoice = # 'all_triplet', 'hard_triplet', 'semihard_triplet', 'contrastive'
    save_path = # enter a path to save
    excel_path = ''
    runBreg = # True      
    trial_index = # enter an id for the experiment
    
    def add_args(self, args_dict):
        [setattr(self, key, args_dict[key]) for key in args_dict]
        
    def get_args(self):
        arg_list = {var: getattr(self, var) for var in dir(self)
                    if not callable(getattr(self, var))
                    and not var.startswith("__")}
        return arg_list
    def print_args(self):
        arg_dict = self.get_args()
        print_str = ''
        for arg in arg_dict:
            print_str += arg + ' = ' + str(arg_dict[arg]) + '\n'
        return print_str
    
    @classmethod
    def init_csv(self, name):
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        path = os.path.join(self.save_path, self.dataset + '_' + name + '.csv')
        print(path)
        headers = ['model_number', 'lossChoice',  
                   'test_acc_breg', 'test_auc_breg',
                  'test_avp_breg', 'test_acc_euc',
                  'test_auc_euc', 'test_avp_euc'] + list(self.get_args(self).keys()) 
        with open(path, 'w') as csv_file:
            csv_writer = csv.writer(csv_file, delimiter=',')
            csv_writer.writerow(headers)
        self.excel_path = path
        return path

    def init_excel(self, name):
        if not os.path.isfile(self.save_path + name):
            wb = Workbook()
            sheet1 = wb.add_sheet('Sheet 1')
            for i, h in enumerate(self.headers):
                sheet1.write(i, 0, self.headers)
            wb.save(self.save_path + name)
            self.excel_path = self.save_path + name
            return self.excel_path
            
    def addrow_to_excel(self, val_list):
        #with open(excel_path) as f:
        #    reader = csv.reader(f)
        data = [self.trial_index, self.lossChoice] + val_list + list(self.get_args().values())  #, '%.3f'%(knn_train), 
        with open(self.excel_path, 'a') as f:
            writer = csv.writer(f)
            writer.writerow(data)

arguments = Arguments()
excel_path = Arguments.init_csv('results')
print(excel_path)

In [None]:
def _pairwise_divergences(embed):

    max_out = tf.math.argmax(embed, 1, output_type=tf.dtypes.int32)
    one_to_n = tf.range(tf.shape(embed)[0], dtype=tf.dtypes.int32)
    max_indices = tf.transpose(tf.stack([one_to_n, max_out]))
    max_values = tf.gather_nd(embed, max_indices)
    max_values_repeated = tf.transpose(tf.reshape(tf.tile(max_values, [tf.shape(embed)[0]]), [tf.shape(embed)[0], tf.shape(embed)[0]]))
    repeated_max_out = tf.tile(max_out, [tf.shape(embed)[0]])
    repeated_one_to_n = tf.tile(one_to_n, [tf.shape(embed)[0]])
    mat_rotn = tf.reshape(tf.transpose(tf.reshape(repeated_one_to_n, [tf.shape(embed)[0], tf.shape(embed)[0]])), [-1])
    new_max_indices = tf.transpose(tf.stack([mat_rotn, repeated_max_out]))
    new_max_values = tf.gather_nd(embed, new_max_indices)
    reshaped_new_max_values = tf.reshape(new_max_values, [tf.shape(embed)[0], tf.shape(embed)[0]])
    div_matrix = tf.maximum(tf.subtract(max_values_repeated, reshaped_new_max_values), 0.0)  
    
#    #for differentiability, this version uses softmax instead of argmax
#    sftmx = tf.nn.softmax(tf.multiply(1.0, embed))
#    ES = tf.linalg.matmul(embed, sftmx, transpose_b=True)
#    one_vec = tf.reshape(tf.ones([tf.shape(embed)[0]]), [1, tf.shape(embed)[0]])
#    diag_ES = tf.reshape(tf.linalg.diag_part(ES), [1, tf.shape(embed)[0]])
#    max_outputs = tf.linalg.matmul(diag_ES, one_vec, transpose_a=True)
#    div_matrix = tf.maximum(tf.subtract(max_outputs, ES), 0.0)
    
    return div_matrix

In [None]:
def _pairwise_distances(embeddings, squared=False):

    dot_product = tf.matmul(embeddings, tf.transpose(embeddings))
    square_norm = tf.diag_part(dot_product)
    distances = tf.expand_dims(square_norm, 1) - 2.0 * dot_product + tf.expand_dims(square_norm, 0)
    distances = tf.maximum(distances, 0.0)
    if not squared:
        mask = tf.to_float(tf.equal(distances, 0.0))
        distances = distances + mask * 1e-16
        distances = tf.sqrt(distances)
        distances = distances * (1.0 - mask)

    return distances

In [None]:
def _get_anchor_positive_triplet_mask(labels):

    indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
    indices_not_equal = tf.logical_not(indices_equal)
    labels_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
    mask = tf.logical_and(indices_not_equal, labels_equal)
    return mask

In [None]:
def _get_anchor_negative_triplet_mask(labels):

    labels_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))

    mask = tf.logical_not(labels_equal)

    return mask


def _get_triplet_mask(labels):

    indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
    indices_not_equal = tf.logical_not(indices_equal)
    i_not_equal_j = tf.expand_dims(indices_not_equal, 2)
    i_not_equal_k = tf.expand_dims(indices_not_equal, 1)
    j_not_equal_k = tf.expand_dims(indices_not_equal, 0)
    distinct_indices = tf.logical_and(tf.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k)
    label_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
    i_equal_j = tf.expand_dims(label_equal, 2)
    i_equal_k = tf.expand_dims(label_equal, 1)
    valid_labels = tf.logical_and(i_equal_j, tf.logical_not(i_equal_k))
    mask = tf.logical_and(distinct_indices, valid_labels)
    return mask


def batch_all_triplet_loss(labels, embeddings, margin, squared=False, breg=False):

    if breg:
        pairwise_dist = _pairwise_divergences(embeddings)
    else:
        pairwise_dist = _pairwise_distances(embeddings, squared=True)  
    anchor_positive_dist = tf.expand_dims(pairwise_dist, 2)
    assert anchor_positive_dist.shape[2] == 1, "{}".format(anchor_positive_dist.shape)
    anchor_negative_dist = tf.expand_dims(pairwise_dist, 1)
    assert anchor_negative_dist.shape[1] == 1, "{}".format(anchor_negative_dist.shape)
    
    triplet_loss = anchor_positive_dist - anchor_negative_dist + margin
    mask = _get_triplet_mask(labels)
    mask = tf.to_float(mask)
    triplet_loss = tf.multiply(mask, triplet_loss)

    triplet_loss = tf.maximum(triplet_loss, 0.0)
    valid_triplets = tf.to_float(tf.greater(triplet_loss, 1e-16))
    num_positive_triplets = tf.reduce_sum(valid_triplets)
    num_valid_triplets = tf.reduce_sum(mask)
    fraction_positive_triplets = num_positive_triplets / (num_valid_triplets + 1e-16)
    triplet_loss = tf.reduce_sum(triplet_loss) / (num_positive_triplets + 1e-16)

    return triplet_loss, fraction_positive_triplets


def batch_hard_triplet_loss(labels, embeddings, margin, squared=False, breg=False):

    if breg:
        pairwise_dist = _pairwise_divergences(embeddings)
    else:
        pairwise_dist = _pairwise_distances(embeddings, squared=True)  
    mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
    mask_anchor_positive = tf.to_float(mask_anchor_positive)
    anchor_positive_dist = tf.multiply(mask_anchor_positive, pairwise_dist)

    hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=1, keepdims=True)
    tf.summary.scalar("hardest_positive_dist", tf.reduce_mean(hardest_positive_dist))

    mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
    mask_anchor_negative = tf.to_float(mask_anchor_negative)

    max_anchor_negative_dist = tf.reduce_max(pairwise_dist, axis=1, keepdims=True)
    anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative)

    hardest_negative_dist = tf.reduce_min(anchor_negative_dist, axis=1, keepdims=True)
    tf.summary.scalar("hardest_negative_dist", tf.reduce_mean(hardest_negative_dist))
    triplet_loss = tf.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)
    triplet_loss = tf.reduce_mean(triplet_loss)

    return triplet_loss
  
def contrastive_loss(labels, embeddings, margin, breg=False):

    label_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
    i_equal_j = tf.expand_dims(label_equal, 2)
    i_equal_j = tf.to_float(i_equal_j)
    i_equal_j = tf.reshape(i_equal_j, [tf.shape(embeddings)[0], tf.shape(embeddings)[0]])

    if breg:
        distances = _pairwise_divergences(embeddings)
    else:
        distances = _pairwise_distances(embeddings, squared=True)    

    sim_term = tf.math.multiply(i_equal_j, distances)
    dissim_term = tf.multiply(tf.subtract(1.0, i_equal_j), tf.maximum(tf.subtract(margin,distances), 0))
    #dissim_term = tf.multiply(tf.subtract(1.0, i_equal_j),  tf.pow(tf.maximum(tf.subtract(margin, sqrt_distances), 0), 2))
    cont_loss = tf.reduce_mean(tf.add(sim_term, dissim_term))
    return cont_loss

def _masked_maximum(data, mask, dim=1):

    axis_minimums = tf.math.reduce_min(data, dim, keepdims=True)
    masked_maximums = tf.math.reduce_max(
        tf.math.multiply(data - axis_minimums, mask), dim,
        keepdims=True) + axis_minimums
    return masked_maximums
  
def _masked_minimum(data, mask, dim=1):

    axis_maximums = tf.math.reduce_max(data, dim, keepdims=True)
    masked_minimums = tf.math.reduce_min(
        tf.math.multiply(data - axis_maximums, mask), dim,
        keepdims=True) + axis_maximums
    return masked_minimums
  
def triplet_semihard_loss(y_true, y_pred, margin=1.0, breg=False):

    labels, embeddings = y_true, y_pred
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    if breg:
        pdist_matrix = _pairwise_divergences(embeddings)
    else:
        pdist_matrix = _pairwise_distances(embeddings, squared=True) 
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    adjacency_not = tf.math.logical_not(adjacency)

    batch_size = tf.size(labels)

    pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
    mask = tf.math.logical_and(
        tf.tile(adjacency_not, [batch_size, 1]),
        tf.math.greater(pdist_matrix_tile,
                        tf.reshape(tf.transpose(pdist_matrix), [-1, 1])))
    mask_final = tf.reshape(
        tf.math.greater(
            tf.math.reduce_sum(
                tf.cast(mask, dtype=tf.dtypes.float32), 1, keepdims=True),
            0.0), [batch_size, batch_size])
    mask_final = tf.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    mask = tf.cast(mask, dtype=tf.dtypes.float32)

    negatives_outside = tf.reshape(
        _masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = tf.transpose(negatives_outside)
    negatives_inside = tf.tile(
        _masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = tf.where(mask_final, negatives_outside,
                                   negatives_inside)

    loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = tf.cast(
        adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag(
            tf.ones([batch_size]))

    num_positives = tf.math.reduce_sum(mask_positives)

    triplet_loss = tf.math.truediv(
        tf.math.reduce_sum(
            tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)),
        num_positives)

    return triplet_loss

In [None]:
def get_loss(lossChoice, runBreg, margin, pretrain, y, k, out):
    if lossChoice == 'contrastive':
        loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(y, k, dtype=tf.float32), logits=out))
        loss2 = contrastive_loss(y, out, margin, breg=runBreg)
        loss = pretrain*loss1 + (1-pretrain)*loss2

    if lossChoice == 'all_triplet':
        loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(y, k, dtype=tf.float32), logits=out))
        loss2, fpt = batch_all_triplet_loss(y, out, margin, squared=True, breg=runBreg)
        loss = pretrain*loss1 + (1-pretrain)*loss2

    if lossChoice == 'hard_triplet':
        loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(y, k, dtype=tf.float32), logits=out))
        loss2 = batch_hard_triplet_loss(y, out, margin, squared=True, breg=runBreg)
        loss = pretrain*loss1 + (1-pretrain)*loss2

    if lossChoice == 'semihard_triplet':
        loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(y, k, dtype=tf.float32), logits=out))
        loss2 = triplet_semihard_loss(y, out, margin, breg=runBreg)
        loss = pretrain*loss1 + (1-pretrain)*loss2
        
    return loss1, loss2, loss


In [None]:
def knn_euc_test_np(embed_test, test_y, K):
    knn_x_train, knn_x_test, knn_y_train, knn_y_test = train_test_split(embed_test, test_y, test_size=0.33, random_state=10)

    knn = KNeighborsClassifier(n_neighbors=K, metric='euclidean')
    knn.fit(knn_x_train, knn_y_train)
    pred_prob = knn.predict_proba(knn_x_test)
    y_pred = knn.predict(knn_x_test)

    one = time.time()
    knn_y_test_onehot = np.eye(10)[knn_y_test]
    acc = accuracy_score(knn_y_test, y_pred)
    avp = average_precision_score(knn_y_test_onehot, pred_prob)
    auc_ovo = roc_auc_score(knn_y_test, pred_prob, multi_class='ovo')
    
    return acc, avp, auc_ovo #accuracy_score(knn_y_test, y_pred)

In [None]:
def knn_breg_test_np(test_X, test_Y, K):
    
    def bregman_twopoints(x,y):
        term1 = np.max(x)
        term2 = x[np.argmax(y)]
        return term1 - term2

    knn_x_train, knn_x_test, knn_y_train, knn_y_test = train_test_split(test_X, test_Y, test_size=0.33, random_state=10)
    knn = KNeighborsClassifier(n_neighbors=K, metric=bregman_twopoints)

    knn.fit(knn_x_train, knn_y_train)
    pred_prob = knn.predict_proba(knn_x_test)
    y_pred = knn.predict(knn_x_test)

    knn_y_test_onehot = np.eye(10)[knn_y_test]
    acc = accuracy_score(knn_y_test, y_pred)
    avp = average_precision_score(knn_y_test_onehot, pred_prob)
    auc_ovo = roc_auc_score(knn_y_test, pred_prob, multi_class='ovo')
    
    return acc, avp, auc_ovo #accuracy_score(knn_y_test, y_pred)

In [None]:
def euc_model(conv_filters, height, width,input_channels,batch_size, k, training_ph, x, reuse=None):
    with tf.variable_scope("euc_model", reuse=reuse):
        new_height = height
        new_width = width
        layer1 = create_new_conv_layer(x, input_channels, conv_filters[0], 
                                       [3, 3], [2, 2], training_bool=training_ph, 
                                       name='layer1', doPool=False)
        layer2 = create_new_conv_layer(layer1, conv_filters[0], conv_filters[1], 
                                       [3, 3], [2, 2], training_bool=training_ph, 
                                       name='layer2', doPool=True)
        new_height = new_height / 2
        new_width = new_width / 2
        layer3 = create_new_conv_layer(layer2, conv_filters[1], conv_filters[2], 
                                       [3, 3], [2, 2], training_bool=training_ph,
                                       name='layer3', doPool=False)
        layer4 = create_new_conv_layer(layer3, conv_filters[2], conv_filters[3],
                                       [3, 3], [2, 2], training_bool=training_ph, 
                                       name='layer4', doPool=True)
        new_height = int(new_height / 2)
        new_width = int(new_width / 2)

        flattened = tf.reshape(layer4, [-1, new_height * new_width * conv_filters[3]])

        wd1 = tf.get_variable('wd1', [new_width * new_height * conv_filters[3], 1000], initializer = tf.initializers.glorot_uniform())
        bd1 = tf.Variable(tf.truncated_normal([1000], stddev=0.01), name='bd1')
        dense_layer1 = tf.matmul(flattened, wd1) + bd1
        dense_layer1 = tf.layers.batch_normalization(dense_layer1, training=training_ph)
        dense_layer1 = tf.nn.relu(dense_layer1)

  #wd2 = tf.Variable(tf.truncated_normal([1000, k], stddev=0.03), name='wd2')
        wd2 = tf.get_variable('wd2', [1000, k], initializer = tf.initializers.glorot_uniform())
        bd2 = tf.Variable(tf.truncated_normal([k], stddev=0.01), name='bd2')
        dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
        out = dense_layer2 

        return out

In [None]:
def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, training_bool, name, act_func, useBias= True, doPool=True, batchNorm=True):
    conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
                      num_filters]

    weights = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
                                      name=name+'_W')

    out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')

    if useBias:
        bias = tf.Variable(tf.truncated_normal([num_filters], stddev=0.01), name=name+'_b')
        out_layer += bias

    if batchNorm:
        out_layer = tf.layers.batch_normalization(out_layer, training=training_bool)

    if act_func == 'relu':
        out_layer = tf.nn.relu(out_layer)

    if doPool:
        ksize = [1, pool_shape[0], pool_shape[1], 1]
        strides = [1, 2, 2, 1]
        out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
                                 padding='SAME')

    return out_layer

In [None]:
def breg_model(model_args, height, width,input_channels,batch_size, k, training_ph, x, reuse=None):   
    conv_filters = [2 ** (int(n)) for n in model_args['conv_layers']['conv_filter_nums']] 
    conv_doPools = [n for n in model_args['conv_layers']['conv_doPools_list']]     
    conv_batchNorms = [n for n in model_args['conv_layers']['conv_batchNorms_list']]  
    conv_kernels = [int(2*n - 1) for n in model_args['conv_layers']['conv_kernels_list']]  
    conv_pools = [int(n) for n in model_args['conv_layers']['conv_pools_list']]   
    act_func = model_args['activation']
    dense_hiddens = int(model_args['dense_layers']) 
    breg_batchNorm = model_args['breg_batchNorm'] 
    useBiases = model_args['conv_layers']['useBiases'] 
    
    with tf.variable_scope("breg_model", reuse=reuse):
        new_height = height
        new_width = width
        conv_out = create_new_conv_layer(x, input_channels, conv_filters[0],
                                        [conv_kernels[0], conv_kernels[0]], 
                                        [conv_pools[0], conv_pools[0]], 
                                        training_bool=training_ph,
                                        name='layer1', 
                                        act_func = act_func, 
                                        useBias = useBiases[0], 
                                        doPool=conv_doPools[0],
                                        batchNorm = conv_batchNorms[0])
        
        if conv_doPools[0] == True:
            new_height = int(new_height / 2)
            new_width = int(new_width / 2)
        
        for i in range(len(conv_filters)-1): #(num_layers - 1):
            conv_out = create_new_conv_layer(conv_out, conv_filters[i], conv_filters[i+1],
                                       [conv_kernels[i], conv_kernels[i]], 
                                       [conv_pools[0], conv_pools[0]],
                                       training_bool=training_ph,
                                       name='layer%d' %(i), 
                                       act_func = act_func, 
                                       useBias = useBiases[i], 
                                        doPool=conv_doPools[i],
                                        batchNorm = conv_batchNorms[i])
            if conv_doPools[i] == True:
                new_height = int(new_height / 2)
                new_width = int(new_width / 2)
        
        flattened = tf.reshape(conv_out, [-1, new_height * new_width * conv_filters[-1]])

        rmat = tf.truncated_normal([new_height * new_width * conv_filters[-1], dense_hiddens], 
                                   stddev = 0.03)
        rmat2 = tf.truncated_normal([dense_hiddens], stddev=0.01)
        rmat3 = tf.truncated_normal([1], stddev=0.01)
        rmat4 = tf.truncated_normal([dense_hiddens, 1], stddev=0.03)

        wd_layer = 'wd1_0'
        wd = tf.Variable(rmat, name=wd_layer)
        bd_layer = 'bd1_0'
        bd = tf.Variable(rmat2, name=bd_layer)

        dense_layer1 = tf.matmul(flattened, wd) + bd
        
        if breg_batchNorm:
            dense_layer1 = tf.layers.batch_normalization(dense_layer1, training=training_ph)
            
        dense_layer1 = tf.nn.relu(dense_layer1)

        wd_layer2 = 'wd2_0'
        wd2 = tf.Variable(rmat4, name=wd_layer2)
        bd_layer2 = 'bd2_0'
        bd2 = tf.Variable(rmat3, name=bd_layer2)

        dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2

        out = dense_layer2

        for i in range(k-1):
            wd_layer = 'wd1_' + str(i+1)
            wd = tf.Variable(rmat, name=wd_layer)
            bd_layer = 'bd1_' + str(i+1)
            bd = tf.Variable(rmat2, name=bd_layer)

            dense_layer1 = tf.matmul(flattened, wd) + bd
            
            if breg_batchNorm:
                dense_layer1 = tf.layers.batch_normalization(dense_layer1, training=training_ph)
                
            dense_layer1 = tf.nn.relu(dense_layer1)

            wd_layer2 = 'wd2_' + str(i+1)
            wd2 = tf.Variable(rmat4, name=wd_layer2)
            bd_layer2 = 'bd2_' + str(i+1)
            bd2 = tf.Variable(rmat3, name=bd_layer2)

            dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
            out = tf.concat([out, dense_layer2], axis=1)

        return out

def train(args, train_x, train_y, data_test, k, n_train, n_test):
    model_param_dict = args.model_params
    optimizer_name = args.optimizer
    print(model_param_dict['conv_layers']['conv_filter_nums'])
    
    save_path = args.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    cur_path = save_path + str(args.trial_index) + '/'
    if not os.path.exists(cur_path):
        os.makedirs(cur_path)
    
    if args.runBreg:
        name = 'breg_'
    else:
        name = 'euc_'

    with open(cur_path + name + "arguments.txt", "w") as f: 
        f.write(args.print_args())
    
    K = int(args.K)
    height = train_x.shape[1]
    width = train_x.shape[2]
    input_channels = train_x.shape[3]
    batch_size = int(args.batch_size)
    
    x = tf.placeholder(tf.float32, [None, height, width, input_channels])
    y = tf.placeholder(tf.int32, [None,])
    pretrain = tf.placeholder(tf.float32)
    training_ph = tf.placeholder(tf.bool)
    test_x_ph = tf.placeholder(tf.float32, [None, height, width, input_channels])
    test_y_ph = tf.placeholder(tf.int64, [None,])
    
    if args.runBreg:
        out = breg_model(model_param_dict, height, width,input_channels,
                     batch_size, k, training_ph, x)
    else:    
        out = breg_model(model_param_dict, height, width,input_channels,
                     batch_size, k, training_ph, x)
    
    if args.is_normalize == True:
        out = tf.nn.l2_normalize(out, axis=1)
    
    _, _, loss = get_loss(args.lossChoice, args.runBreg, args.margin, pretrain, y, k, out)

    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(extra_update_ops):
        if optimizer_name == 'adam':
            optimiser = tf.train.AdamOptimizer(learning_rate=args.learning_rate).minimize(loss)
        elif optimizer_name == 'sgd': 
            optimiser = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate).minimize(loss)
        else:
            optimiser = tf.train.RMSPropOptimizer(learning_rate=args.learning_rate).minimize(loss)

    init_op = tf.global_variables_initializer()
    output = out
        
    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        sess.run(init_op)
        total_batch = int(n_train / batch_size)
        loss_list = []
        
        saver =tf.train.Saver()
        print(args.epochs, 'epoch number chosen')
        for epoch in range(int(args.epochs)):
            epoch_stime = time.time()
            avg_cost = 0
            rp = np.random.permutation(train_y.shape[0])
            train_x = train_x[rp,:,:]
            train_y = train_y[rp]
            for i in range(total_batch):
                batch_x = train_x[i*batch_size:(i+1)*batch_size-1,:,:].copy()
                batch_y = train_y[i*batch_size:(i+1)*batch_size-1].copy()
                if epoch > int(args.pretrain_epoch):
                    _, c = sess.run([optimiser, loss], 
                                    feed_dict={x: batch_x, y: batch_y, 
                                               training_ph:True, pretrain: 0.0 })
                else:
                    _, c = sess.run([optimiser, loss], 
                                    feed_dict={x: batch_x, y: batch_y, 
                                               training_ph:True, pretrain: 1.0 })
                avg_cost += c / total_batch
            loss_list.append(avg_cost)
            epoch_etime = time.time()
            if epoch == 0:
                print('time per epoch: %.2f' %(epoch_etime - epoch_stime))

        #saver.save(sess, cur_path + name + 'trained_model', global_step=int(args.epochs))            
        print("\nTraining complete!")
        
        data_test = data_test.batch(n_test)

        iterator_test = tf.data.make_one_shot_iterator(data_test)
        next_element = iterator_test.get_next()
        test_x = next_element["image"]
        test_y = next_element["label"]
        test_x = tfds.as_numpy(test_x) / 255
        test_x = 2*(test_x - 0.5)
        test_y = tfds.as_numpy(test_y)

        if args.runBreg:
            file_prefix = args.save_path + args.dataset + '-' + args.lossChoice + '-bregman-'
        else:
            file_prefix = args.save_path + args.dataset + '-' + args.lossChoice + '-euclidean-'
        
        np.savetxt(cur_path + name + 'loss.txt', loss_list, delimiter='\n')
        print('start testing')
        test_stime = time.time()
        embed_test = output.eval(feed_dict={x: test_x, training_ph:False, pretrain: 0.0, })
        
        if args.runBreg:
            breg_test_acc, breg_test_avp, breg_test_auc = knn_breg_test_np(embed_test, 
                                                                       test_y, K)
        else:
            breg_test_acc, breg_test_avp, breg_test_auc = knn_euc_test_np(embed_test, 
                                                                       test_y, K)
                    
        val_list = [breg_test_acc, breg_test_auc, breg_test_avp] 
  
        return (1 - breg_test_acc),val_list
    

In [None]:
def train_test(args=arguments):
    data, info = tfds.load(name=args.dataset, with_info=True)
    data_train = data['train']
    data_test = data['test']

    k = info.features['label'].num_classes
    n_train = info.splits['train'].num_examples
    n_test = info.splits['test'].num_examples
    data_train = data_train.batch(n_train)

    iterator =  tf.data.make_one_shot_iterator(data_train)
    element = iterator.get_next()
    train_x, train_Y = element["image"], element["label"]
    train_x = tfds.as_numpy(train_x).astype(np.float32) / 255.0

    train_x = 2.0*(train_x - 0.5)
    train_y = tfds.as_numpy(train_Y)
    print(np.max(train_y))

    train_acc, metrics_list = train(args, train_x, train_y, data_test, k, n_train, n_test)
    return train_acc, metrics_list

def objective(params):
    global trial_idx
    global excel_path
    tf.reset_default_graph()
    args = Arguments()
    
    args.add_args(params)
    args.trial_index = trial_idx
    args.excel_path = excel_path
    res, eval_list_breg = train_test(args)
    
    tf.reset_default_graph()
    args.runBreg = False
    _, eval_list_euc = train_test(args)

    args.addrow_to_excel(eval_list_breg + eval_list_euc)
    trial_idx += 1
    return res #{'test_acc': res, 'status': STATUS_OK }

In [None]:
import pickle

def conv_filter_nums(num_layers):
    conv_filter_list = []
    for l in range(num_layers):
        f_name = 'filter_%d%d' % (num_layers, (l+1))
        conv_filter_list.append(hp.quniform(f_name, 3, 7, 2)) 
    return conv_filter_list

def conv_doPools(num_layers):
    conv_doPools_list = [True] * num_layers
    for l in range(min(num_layers, 3)):
        f_name = 'doPools_%d%d' % (num_layers, (l+1))
        conv_doPools_list[-l-1] = hp.choice(f_name, [True, False])
    return conv_doPools_list

def conv_batchNorms(num_layers):
    conv_batchNorms_list = []
    for l in range(num_layers):
        f_name = 'batchNorm_%d%d' % (num_layers, (l+1))
        conv_batchNorms_list.append(hp.choice(f_name, [True, False]))
    return conv_batchNorms_list

def conv_kernels(num_layers):
    conv_kernels_list = []
    for l in range(num_layers):
        f_name = 'kernel_%d%d' % (num_layers, (l+1))
        conv_kernels_list.append(hp.quniform(f_name, 1, 4, 2)) 
    return conv_kernels_list

def conv_pools(num_layers):
    conv_pools_list = []
    for l in range(num_layers):
        f_name = 'pool_%d%d' % (num_layers, (l+1))
        conv_pools_list.append(hp.quniform(f_name, 1, 2, 2)) 
    return conv_pools_list

def useBias(num_layers):
    useBias_list = []
    for l in range(num_layers):
        f_name = 'bias_%d%d' % (num_layers, (l+1))
        useBias_list.append(hp.choice(f_name, [True, False])) 
    return useBias_list

#provide f space
trials = Trials()

ss = time.time()

best = fmin(objective,
    space=fspace,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials)
pickle.dump(trials, open(args.save_path + "/hyperopt_results.p", "wb"))