In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='2'
import time
import numpy as np
import tensorflow as tf

In [None]:
import os
import time
import numpy as np
import tensorflow as tf

class VGG16_GAP:
    def __init__(self, scope_name="VGG16"):
        """
        load pre-trained weights from path
        :param vgg16_npy_path: file path of vgg16 pre-trained weights
        """
        
        self.scope_name = scope_name
        
        self.gamma_var = []
        self.net_shape = []
        
        # operation dictionary
        self.prob_dict = {}
        self.loss_dict = {}
        self.accu_dict = {}

        # parameter dictionary
        self.para_dict = {}

    def build(self, vgg16_npy_path, 
              classes=10, 
              shape=(32,32,3), 
              prof_type=None, 
              conv_pre_training=True, 
              fc_pre_training=True,
              new_bn=True):
        """
        load variable from npy to build the VGG
        :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
        """
        
        # input information
        self.H, self.W, self.C = shape
        self.classes = classes
        
        start_time = time.time()
        print("build model started")

        if prof_type is None:
            self.prof_type = "all-one"
        else:
            self.prof_type = prof_type

        # load pre-trained weights
        if isinstance(vgg16_npy_path,dict):
            self.data_dict = vgg16_npy_path
            print("parameters loaded")
        else:
            self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
            print("npy file loaded")

        # input placeholder
        self.x = tf.placeholder(tf.float32, [None, self.H, self.W, self.C])
        self.y = tf.placeholder(tf.float32, [None, self.classes])
        self.is_train = tf.placeholder(tf.bool)
        
        self.x = tf.placeholder(tf.float32, [None, self.H, self.W, self.C])
        self.is_train = tf.placeholder(tf.bool)
        
        self.x = self.x/255.0
        assert self.x.get_shape().as_list()[1:] == [self.H, self.W, self.C]

        dp={
            'conv1_1':1.00,
            'conv1_2':1.00,
            'conv2_1':1.00,
            'conv2_2':1.00,
            'conv3_1':1.00,
            'conv3_2':1.00,
            'conv3_3':1.00,
            'conv4_1':1.00,
            'conv4_2':1.00,
            'conv4_3':1.00,
            'conv5_1':1.00,
            'conv5_2':1.00,
            'conv5_3':1.00
            }

        # declare and initialize the weights of VGG16
        with tf.variable_scope(self.scope_name):
            # weight decay
            self._weight_decay = 0.0
            for k, v in sorted(dp.items()):
                (conv_filter, gamma, beta, bn_mean, bn_variance), conv_bias = self.get_conv_filter(name=k, new_bn=new_bn), self.get_bias(name=k)
                self.para_dict[k] = [conv_filter, conv_bias]
                self.para_dict[k+"_gamma"] = gamma
                self.para_dict[k+"_beta"] = beta
                self.para_dict[k+"_bn_mean"] = bn_mean
                self.para_dict[k+"_bn_variance"] = bn_variance
                self.gamma_var.append(self.para_dict[k+"_gamma"])

                # weight decay
                self._weight_decay += tf.nn.l2_loss(conv_filter)+tf.nn.l2_loss(conv_bias)

            if fc_pre_training:
                fc_W, fc_b = self.get_fc_layer(name='fc_2'), self.get_bias(name='fc_2')
                self.para_dict['fc_2'] = [fc_W, fc_b]

                self._weight_decay += tf.nn.l2_loss(fc_W) + tf.nn.l2_loss(fc_b)
            else:
                fc_W = self.get_fc_layer(name='fc_2', shape=(512, self.classes))
                fc_b = self.get_bias(name='fc_2', shape=(self.classes,))
                self.para_dict['fc_2'] = [fc_W, fc_b]

                self._weight_decay += tf.nn.l2_loss(fc_W) + tf.nn.l2_loss(fc_b)
        
        print(("build model finished: %ds" % (time.time() - start_time)))
        
    def add_centers(self, centers):
        with tf.variable_scope(self.scope_name):
            # classes vs. feature
            assert centers.shape == (self.classes, self.para_dict['fc_2'][0].get_shape().as_list()[0])
            self.centers = tf.get_variable(initializer=centers, name="centers", dtype=tf.float32, trainable=False)

    def sparsity_train(self, l1_gamma=0.001, l1_gamma_diff=0.001, decay=0.0005, keep_prob=0.0, lambda_c = 1e-4):
        
        self._keep_prob = keep_prob
        start_time = time.time()
        with tf.name_scope("var_dp"):
            conv1_1 = self.idp_conv_bn_layer( self.x, "conv1_1")
            conv1_2 = self.idp_conv_bn_layer(conv1_1, "conv1_2")
            pool1 = self.max_pool(conv1_2, 'pool1')

            conv2_1 = self.idp_conv_bn_layer(  pool1, "conv2_1")
            conv2_2 = self.idp_conv_bn_layer(conv2_1, "conv2_2")
            pool2 = self.max_pool(conv2_2, 'pool2')

            conv3_1 = self.idp_conv_bn_layer(  pool2, "conv3_1")
            conv3_2 = self.idp_conv_bn_layer(conv3_1, "conv3_2")
            conv3_3 = self.idp_conv_bn_layer(conv3_2, "conv3_3")
            pool3 = self.max_pool(conv3_3, 'pool3')

            conv4_1 = self.idp_conv_bn_layer(  pool3, "conv4_1")
            conv4_2 = self.idp_conv_bn_layer(conv4_1, "conv4_2")
            conv4_3 = self.idp_conv_bn_layer(conv4_2, "conv4_3")
            pool4   = self.max_pool(conv4_3, 'pool4')

            conv5_1 = self.idp_conv_bn_layer(  pool4, "conv5_1")
            conv5_2 = self.idp_conv_bn_layer(conv5_1, "conv5_2")
            conv5_3 = self.idp_conv_bn_layer(conv5_2, "conv5_3")
            pool5 = self.global_avg_pool(conv5_3, 'pool5')
            
            # features
            self.features = pool5
            
            # dropout
            pool5 = self.dropout_layer(pool5, self._keep_prob)
            
            # logit
            logits = self.fc_layer(pool5, 'fc_2')       
            prob = tf.nn.softmax(logits, name="prob")
            
            # cross_entropy loss
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.y)
            loss = tf.reduce_mean(cross_entropy)
            accuracy = tf.reduce_mean(tf.cast(tf.equal(x=tf.argmax(logits, 1), y=tf.argmax(self.y, 1)),tf.float32))
            
            # center loss
            labels = tf.argmax(self.y, 1)
            batch_centers = tf.gather(self.centers, labels, axis=0) # batch,
            self.center_loss = tf.nn.l2_loss(self.features - batch_centers) # tf.reduce_sum(tf.reduce_mean(tf.square(tf.subtract(x=self.features, y=batch_centers)), axis=1))                

            diff = batch_centers - self.features
            unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
            appear_times = tf.gather(unique_count, unique_idx)
            appear_times = tf.reshape(appear_times, [-1, 1])

            diff = diff / tf.cast((1 + appear_times), tf.float32)
            diff = 0.5 * diff
            self.centers_update_op = tf.scatter_sub(self.centers, labels, diff)
            
            # gamma l1 regularization
            l1_gamma_regularizer = tf.contrib.layers.l1_regularizer(scale=l1_gamma)
            gamma_l1 = tf.contrib.layers.apply_regularization(l1_gamma_regularizer, self.gamma_var)

            # gamma_diff l1 regularization
            def non_increasing_constraint_axis_0(a):
                return tf.nn.relu(a[1:]-a[:-1])
            gamma_diff_var = [non_increasing_constraint_axis_0(x) for x in self.gamma_var]
            l1_gamma_diff_regularizer = tf.contrib.layers.l1_regularizer(scale=l1_gamma_diff)
            gamma_diff_l1 = tf.contrib.layers.apply_regularization(l1_gamma_diff_regularizer, gamma_diff_var)
            
            self.prob_dict["var_dp"] = prob
            self.loss_dict["var_dp"] = loss + gamma_l1 + gamma_diff_l1 + self._weight_decay * decay + self.center_loss*lambda_c
            self.accu_dict["var_dp"] = accuracy
     
        print(("sparsity train operation setup: %ds" % (time.time() - start_time)))
    
    def set_idp_operation(self, dp, decay=0.0002, keep_prob=1.0, lambda_c = 1e-4):
        self._keep_prob = keep_prob
        if type(dp) != list:
            raise ValueError("when block_variational is False, dp must be a list.")
        self.dp = dp 
        print("DP under test:", np.round(self.dp,2))
        start_time = time.time()
        # create operations at every dot product percentages
        for dp_i in dp:
            with tf.name_scope(str(int(dp_i*100))):
                conv1_1 = self.idp_conv_bn_layer( self.x, "conv1_1", dp_i)
                conv1_2 = self.idp_conv_bn_layer(conv1_1, "conv1_2", dp_i)
                pool1 = self.max_pool(conv1_2, 'pool1')

                if dp_i == 1.0:
                    self.net_shape.append(conv1_1.get_shape())
                    self.net_shape.append(pool1.get_shape())

                conv2_1 = self.idp_conv_bn_layer(  pool1, "conv2_1", dp_i)
                conv2_2 = self.idp_conv_bn_layer(conv2_1, "conv2_2", dp_i)
                pool2 = self.max_pool(conv2_2, 'pool2')

                if dp_i == 1.0:
                    self.net_shape.append(conv2_1.get_shape())
                    self.net_shape.append(pool2.get_shape())

                conv3_1 = self.idp_conv_bn_layer(  pool2, "conv3_1", dp_i)
                conv3_2 = self.idp_conv_bn_layer(conv3_1, "conv3_2", dp_i)
                conv3_3 = self.idp_conv_bn_layer(conv3_2, "conv3_3", dp_i)
                pool3 = self.max_pool(conv3_3, 'pool3')

                if dp_i == 1.0:
                    self.net_shape.append(conv3_1.get_shape())
                    self.net_shape.append(conv3_2.get_shape())
                    self.net_shape.append(pool3.get_shape())

                conv4_1 = self.idp_conv_bn_layer(  pool3, "conv4_1", dp_i)
                conv4_2 = self.idp_conv_bn_layer(conv4_1, "conv4_2", dp_i)
                conv4_3 = self.idp_conv_bn_layer(conv4_2, "conv4_3", dp_i)
                pool4 = self.max_pool(conv4_3, 'pool4')
                
                if dp_i == 1.0:
                    self.net_shape.append(conv4_1.get_shape())
                    self.net_shape.append(conv4_2.get_shape())
                    self.net_shape.append(pool4.get_shape())

                conv5_1 = self.idp_conv_bn_layer(  pool4, "conv5_1", dp_i)
                conv5_2 = self.idp_conv_bn_layer(conv5_1, "conv5_2", dp_i)
                conv5_3 = self.idp_conv_bn_layer(conv5_2, "conv5_3", dp_i)
                pool5 = self.global_avg_pool(conv5_3, 'pool5')

                if dp_i == 1.0:
                    self.net_shape.append(conv5_1.get_shape())
                    self.net_shape.append(conv5_2.get_shape())
                    self.net_shape.append(pool5.get_shape())
                    # features
                    self.features = pool5
                
                pool5 = self.dropout_layer(pool5, self._keep_prob)
                
                logits = self.fc_layer(pool5, 'fc_2')
                prob = tf.nn.softmax(logits, name="prob")

                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.y)
                loss = tf.reduce_mean(cross_entropy)
                accuracy = tf.reduce_mean(tf.cast(tf.equal(x=tf.argmax(logits, 1), y=tf.argmax(self.y, 1)), dtype=tf.float32))
                
                # center loss
                labels = tf.argmax(self.y, 1)
                batch_centers = tf.gather(self.centers, labels, axis=0) # batch,
                self.center_loss = tf.nn.l2_loss(self.features - batch_centers) # tf.reduce_sum(tf.reduce_mean(tf.square(tf.subtract(x=self.features, y=batch_centers)), axis=1))                
                
                diff = batch_centers - self.features
                unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
                appear_times = tf.gather(unique_count, unique_idx)
                appear_times = tf.reshape(appear_times, [-1, 1])

                diff = diff / tf.cast((1 + appear_times), tf.float32)
                diff = 0.5 * diff
                self.centers_update_op = tf.scatter_sub(self.centers, labels, diff)
                
                # self.feature_dict[str(int(dp_i*100))] = fc_1
                self.prob_dict[str(int(dp_i*100))] = prob
                self.loss_dict[str(int(dp_i*100))] = loss + self._weight_decay * decay + self.center_loss*lambda_c
                self.accu_dict[str(int(dp_i*100))] = accuracy

        print(("Set dp operations finished: %ds" % (time.time() - start_time)))

    def spareness(self, thresh=0.05):
        N_active, N_total = 0,0
        for gamma in self.gamma_var:
            m = tf.cast(tf.less(tf.abs(gamma), thresh), tf.float32)
            n_active = tf.reduce_sum(m)
            n_total  = tf.cast(tf.reduce_prod(tf.shape(m)), tf.float32)
            N_active += n_active
            N_total  += n_total
        return N_active/N_total
    
    def global_avg_pool(self, bottom, name):
        return tf.reduce_mean(bottom, axis=[1,2], name=name)

    def avg_pool(self, bottom, name):
        return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)

    def max_pool(self, bottom, name):
        return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
    
    def dropout_layer(self, bottom, keep_prob):
        if self.is_train == True:
            return tf.nn.dropout(bottom, keep_prob=keep_prob)
        else:
            return bottom

    def idp_conv_bn_layer(self, bottom, name, dp=1.0):
        with tf.name_scope(name+str(int(dp*100))):
            with tf.variable_scope(self.scope_name,reuse=True):
                conv_filter = tf.get_variable(name=name+"_W")
                conv_biases = tf.get_variable(name=name+"_b")
                conv_gamma  = tf.get_variable(name=name+"_gamma")
                moving_mean = tf.get_variable(name=name+'_bn_mean')
                moving_variance = tf.get_variable(name=name+'_bn_variance')
                beta = tf.get_variable(name=name+'_beta')
            H,W,C,O = conv_filter.get_shape().as_list()
            print(bottom.get_shape().as_list())
            
            # ignore input images
            if name is not 'conv1_1':
                bottom = bottom[:,:,:,:int(C*dp)]
                print("AFTER",bottom.get_shape().as_list())
                conv_filter = conv_filter[:,:,:int(C*dp),:]

            # create a mask determined by the dot product percentage
            n1 = int(O * dp)
            n0 = O - n1
            mask = tf.constant(value=np.append(np.ones(n1, dtype='float32'), np.zeros(n0, dtype='float32')), dtype=tf.float32)
            conv_gamma = tf.multiply(conv_gamma, mask)
            beta = tf.multiply(beta, mask)
            
            conv = tf.nn.conv2d(bottom, conv_filter, [1, 1, 1, 1], padding='SAME')
            conv = tf.nn.bias_add(conv, conv_biases)

            from tensorflow.python.training.moving_averages import assign_moving_average
            def mean_var_with_update():
                mean, variance = tf.nn.moments(conv, [0,1,2], name='moments')
                with tf.control_dependencies([assign_moving_average(moving_mean, mean, 0.99),
                                              assign_moving_average(moving_variance, variance, 0.99)]):
                    return tf.identity(mean), tf.identity(variance)

            mean, variance = tf.cond(self.is_train, mean_var_with_update, lambda:(moving_mean, moving_variance))

            conv = tf.nn.batch_normalization(conv, mean, variance, beta, conv_gamma, 1e-05)
            relu = tf.nn.relu(conv)
            
            return relu

    def fc_layer(self, bottom, name):
        with tf.name_scope(name):
            shape = bottom.get_shape().as_list()
            dim = 1
            for d in shape[1:]:
                dim *= d
            x = tf.reshape(bottom, [-1, dim])
            
            with tf.variable_scope(self.scope_name,reuse=True):
                weights = tf.get_variable(name=name+"_W")
                biases = tf.get_variable(name=name+"_b")

            # Fully connected layer. Note that the '+' operation automatically broadcasts the biases.
            fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
            return fc

    def get_conv_filter(self, name, new_bn=False, shape=None):
        if shape is not None:
            conv_filter = tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name=name+"_W", dtype=tf.float32)
        elif name in self.data_dict.keys():
            conv_filter = tf.get_variable(initializer=self.data_dict[name][0], name=name+"_W")
        else:
            print("please specify a name in data_dict or specify a shape in use")

        H,W,C,O = conv_filter.get_shape().as_list()

        if name+"_gamma" in self.data_dict.keys() and not new_bn: 
            gamma = tf.get_variable(initializer=self.data_dict[name+"_gamma"], name=name+"_gamma")
        else:
            gamma = tf.get_variable(initializer=self.get_profile(O, self.prof_type), name=name+"_gamma")

        if name+"_beta" in self.data_dict.keys() and not new_bn:
            beta = tf.get_variable(initializer=self.data_dict[name+"_beta"], name=name+"_beta")
        else:
            beta = tf.get_variable(shape=(O,), initializer=tf.zeros_initializer(), name=name+'_beta')

        if name+"_bn_mean" in self.data_dict.keys() and not new_bn:
            bn_mean = tf.get_variable(initializer=self.data_dict[name+"_bn_mean"], name=name+"_bn_mean")
        else:
            bn_mean = tf.get_variable(shape=(O,), initializer=tf.zeros_initializer(), name=name+'_bn_mean')

        if name+"_bn_variance" in self.data_dict.keys() and not new_bn: 
            bn_variance = tf.get_variable(initializer=self.data_dict[name+"_bn_variance"], name=name+"_bn_variance")
        else:
            bn_variance = tf.get_variable(shape=(O,),initializer=tf.ones_initializer(), name=name+'_bn_variance')
        
        return conv_filter, gamma, beta, bn_mean, bn_variance

    def get_fc_layer(self, name, shape=None):
        if shape is not None:
            return tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name=name+"_W", dtype=tf.float32)
        elif name in self.data_dict.keys():
            return tf.get_variable(initializer=self.data_dict[name][0], name=name+"_W", dtype=tf.float32)
        else:
            print("please specify a name in data_dict or specify a shape in use")
            return None
            
    def get_bias(self, name, shape=None):
        if shape is not None:
            return tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name=name+"_b", dtype=tf.float32)
        elif name in self.data_dict.keys():
            return tf.get_variable(initializer=self.data_dict[name][1], name=name+"_b", dtype=tf.float32)
        else:
            print("please specify a name in data_dict or specify a shape in use")
            return None

    def get_profile(self, C, prof_type):
        def half_exp(n, k=1, dtype='float32'):
            n_ones = int(n/2)
            n_other = n - n_ones
            return np.append(np.ones(n_ones, dtype=dtype), np.exp((1-k)*np.arange(n_other), dtype=dtype))
        if prof_type == "linear":
            profile = np.linspace(2.0,0.0, num=C, endpoint=False, dtype='float32')
        elif prof_type == "all-one":
            profile = np.ones(C, dtype='float32')
        elif prof_type == "half-exp":
            profile = half_exp(C, 2.0)
        elif prof_type == "harmonic":
            profile = np.array(1.0/(np.arange(C)+1), dtype='float32')
        else:
            raise ValueError("prof_type must be \"all-one\", \"half-exp\", \"harmonic\" or \"linear\".")
        return profile
                


In [None]:
import numpy as np
import pandas as pd
import skimage.io as imageio
import pickle

In [None]:
from progress.bar import Bar
from ipywidgets import IntProgress
from IPython.display import display

In [None]:
with open('save/label_dict.pkl', 'rb') as f:
    y_dict = pickle.load(f)

In [None]:
HOME_DIR = "/home/cmchang/DLCV2018SPRING/final/"
TRAIN_DIR = HOME_DIR+"dlcv_final_2_dataset/train/"
VALID_DIR = HOME_DIR+"dlcv_final_2_dataset/val/"

In [None]:
dtrain = pd.read_csv(HOME_DIR+"dlcv_final_2_dataset/train_id.txt", header=None,sep=" ", names=["img", "id"])
dvalid = pd.read_csv(HOME_DIR+"dlcv_final_2_dataset/val_id.txt", header=None,sep=" ", names=["img", "id"])

In [None]:
train_list = list(TRAIN_DIR+dtrain.img)
valid_list = list(VALID_DIR+dvalid.img)

In [None]:
def readImgList(file_list):
    images = list()
    for i, file in enumerate(file_list):
        print(i, end="\r")
        img = imageio.imread(file)
        img = img.astype(int)
        images.append(img)
    return np.array(images)

In [None]:
def transformLabel(id_list, y_dict):
    label = list()
    for uid in list(id_list):
        label.append(y_dict[uid])
    return np.array(label)

In [None]:
def one_hot_encoding(class_numbers, num_classes):
    return np.eye(num_classes, dtype=float)[class_numbers]

In [None]:
def initialize_uninitialized(sess):
    global_vars = tf.global_variables()
    is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
    not_initialized_vars = [v for (v,f) in zip(global_vars, is_not_initialized) if not f]
    if len(not_initialized_vars): 
            sess.run(tf.variables_initializer(not_initialized_vars))

In [None]:
Xtrain = readImgList(train_list)

In [None]:
Xvalid = readImgList(valid_list)

In [None]:
Xtrain.shape

In [None]:
ytrain = transformLabel(list(dtrain.id), y_dict)

In [None]:
yvalid = transformLabel(list(dvalid.id), y_dict)

In [None]:
Ytrain = one_hot_encoding(ytrain, len(y_dict))
Yvalid = one_hot_encoding(yvalid, len(y_dict))

In [None]:
scope_name = "Model5"

In [None]:
model = VGG16_GAP(scope_name=scope_name)

In [None]:
SELF_DIR = "newCL_v3_lambda-1e-3_gap_rescale0-1_baseline/"
FLAG_init_from = HOME_DIR+SELF_DIR+"para_dict.npy"
FLAG_prof_type = "all-one"
FLAG_lambda_s = 0.0
FLAG_lambda_m = 0.0
FLAG_decay = 1e-5
FLAG_lr = 1e-5
FLAG_keep_prob = 1.0
FLAG_lambda_c = 1e-3
FLAG_save_dir = HOME_DIR+"newCL_v4_lambda-1e-3_gap_rescale0-1_baseline/"

#### First round of training

In [None]:
model.build(vgg16_npy_path=FLAG_init_from,
            shape=Xtrain.shape[1:],
            classes=len(y_dict),
            prof_type=FLAG_prof_type,
            conv_pre_training=True,
            fc_pre_training=False,
            new_bn=True)
centers = np.zeros((len(y_dict), 512)).astype(np.float32)
model.add_centers(centers.astype(np.float32))

#### Further training

In [None]:
# model.build(vgg16_npy_path=FLAG_init_from,
#             shape=Xtrain.shape[1:],
#             classes=len(y_dict),
#             prof_type=FLAG_prof_type,
#             conv_pre_training=True,
#             fc_pre_training=True,
#             new_bn=False)
# centers = np.load(HOME_DIR+SELF_DIR+"centers.npy")
# model.add_centers(centers.astype(np.float32))

In [None]:
model.sparsity_train(decay=FLAG_decay, 
                     l1_gamma=FLAG_lambda_s, 
                     l1_gamma_diff=FLAG_lambda_m, 
                     keep_prob=FLAG_keep_prob,
                     lambda_c = FLAG_lambda_c)

In [None]:
tasks = ['var_dp']
obj = 0.0
for cur_task in tasks:
    print(cur_task)
    obj += model.loss_dict[cur_task]

In [None]:
tracking = list()
for cur_task in tasks:
    tracking.append(model.accu_dict[cur_task])

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
transform = iaa.Sequential([
    sometimes(iaa.Affine(translate_percent={"x": (-0.15, 0.15), "y": (-0.15, 0.15)})),
    sometimes(iaa.Affine(scale={"x": (0.85, 1.15), "y":(0.85, 1.15)})),
    sometimes(iaa.Affine(rotate=(-45, 45))),
    sometimes(iaa.Fliplr(0.5))
])

In [None]:
print("===== create directory =====")
if not os.path.exists(FLAG_save_dir):
    os.makedirs(FLAG_save_dir)

arr_spareness = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    augment = True

   # hyper parameters
    batch_size = 32
    epoch = 100
    early_stop_patience = 10
    min_delta = 0.0001
    opt_type = 'adam'

    # recorder
    epoch_counter = 0
    history = list()

    # optimizer
    global_step = tf.Variable(0, trainable=False)

    # Passing global_step to minimize() will increment it at each step.
    learning_rate = FLAG_lr
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5)

    checkpoint_path = os.path.join(FLAG_save_dir, 'model.ckpt')
    
    # trainable variables
    train_vars = list()
    for var in tf.trainable_variables():
        if model.scope_name in var.name:
            train_vars.append(var)
    
    for var in train_vars:
        if '_mean' in var.name:
            train_vars.remove(var)
            print('%s is not trainable.'% var)
    
    for var in train_vars:
        if '_variance' in var.name:
            train_vars.remove(var)
            print('%s is not trainable.'% var)
    
    print(train_vars)
            
    train_op = opt.minimize(obj, global_step=global_step, var_list=train_vars)
    
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=len(tasks))

    # progress bar
    ptrain = IntProgress()
    pval = IntProgress()
    display(ptrain)
    display(pval)
    ptrain.max = int(Xtrain.shape[0]/batch_size)
    pval.max = int(Xvalid.shape[0]/batch_size)

    spareness = model.spareness(thresh=0.05)
    print("initial spareness: %s" % sess.run(spareness))

    # re-initialize
    initialize_uninitialized(sess)

    # reset due to adding a new task
    patience_counter = 0
    current_best_val_accu = 0

    # optimize when the aggregated obj
    while(patience_counter < early_stop_patience and epoch_counter < epoch):
        
        # start training
        stime = time.time()
        bar_train = Bar('Training', max=int(Xtrain.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
        bar_val =  Bar('Validation', max=int(Xvalid.shape[0]/batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds')
        train_loss, train_accu = 0.0, 0.0
        
        if augment:
            def load_batches():
                for i in range(int(Xtrain.shape[0]/batch_size)):
                    st = i*batch_size
                    ed = (i+1)*batch_size
                    batch = ia.Batch(images=Xtrain[st:ed,:,:,:], data=Ytrain[st:ed,:])
                    yield batch

            batch_loader = ia.BatchLoader(load_batches)
            bg_augmenter = ia.BackgroundAugmenter(batch_loader=batch_loader, augseq=transform, nb_workers=1)

            while True:
                batch = bg_augmenter.get_batch()
                if batch is None:
                    print("Finished epoch.")
                    break
                x_images_aug = batch.images_aug
                y_images = batch.data
                loss, accu, _, _ = sess.run([obj, model.accu_dict[cur_task], train_op, model.centers_update_op], 
                                         feed_dict={model.x: x_images_aug,
                                                    model.y: y_images,
                                                    model.is_train: True,
                                                    model.bn_train: True})
                bar_train.next()
                train_loss += loss
                train_accu += accu
                ptrain.value +=1
                ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max)
            batch_loader.terminate()
            bg_augmenter.terminate()
        else:
            for i in range(int(Xtrain.shape[0]/batch_size)):
                st = i*batch_size
                ed = (i+1)*batch_size
                loss, accu, _, _ = sess.run([obj, model.accu_dict[tasks[0]], train_op, model.centers_update_op],
                                                    feed_dict={model.x: Xtrain[st:ed,:],
                                                               model.y: Ytrain[st:ed,:],
                                                               model.is_train: True,
                                                               model.bn_train: True})
                train_loss += loss
                train_accu += accu
                ptrain.value +=1
                ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max)
        
        train_loss = train_loss/ptrain.value
        train_accu = train_accu/ptrain.value


        # validation
        val_loss = 0
        val_accu1 = 0
        val_accu2 = 0
        val_accu_dp = list()
        for i in range(int(Xvalid.shape[0]/batch_size)):
            st = i*batch_size
            ed = (i+1)*batch_size
            loss, accu1, accu2, accu_dp = sess.run([obj, model.accu_dict[tasks[0]], model.accu_dict[tasks[-1]], tracking],
                                                feed_dict={model.x: Xvalid[st:ed,:],
                                                           model.y: Yvalid[st:ed,:],
                                                           model.is_train: False,
                                                           model.bn_train: False})
            val_loss += loss
            val_accu1 += accu1
            val_accu2 += accu2
            val_accu_dp.append(accu_dp)
            pval.value += 1
            pval.description = "Testing %s/%s" % (pval.value, pval.value)
            
        val_accu_dp = np.mean(val_accu_dp, axis=0).tolist()
        dp_str = ""
        for i in range(len(tasks)):
            dp_str += "{0}%:{1}, ".format(tasks[i], np.round(val_accu_dp[i],4))
        
        print(dp_str)
        val_loss = val_loss/pval.value
        val_accu1 = val_accu1/pval.value
        val_accu2 = val_accu2/pval.value
        val_accu = val_accu1 #(val_accu1+val_accu2)/2
        
        #print("\nspareness: %s" % sess.run(spareness))
        # early stopping check
        if (val_accu - current_best_val_accu) > min_delta:
            current_best_val_accu = val_accu
            patience_counter = 0

            para_dict = sess.run(model.para_dict)
            np.save(os.path.join(FLAG_save_dir, "para_dict.npy"), para_dict)
            print("save in %s" % os.path.join(FLAG_save_dir, "para_dict.npy"))
        else:
            patience_counter += 1

        # shuffle Xtrain and Ytrain in the next epoch
        idx = np.random.permutation(Xtrain.shape[0])
        Xtrain, Ytrain = Xtrain[idx,:,:,:], Ytrain[idx,:]

        # epoch end
        # writer.add_summary(epoch_summary, epoch_counter)
        epoch_counter += 1

        ptrain.value = 0
        pval.value = 0
        bar_train.finish()
        bar_val.finish()

        print("Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu at %s: %.4f, val accu at %s: %.4f" % (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, train_accu, val_loss, tasks[0], val_accu1, tasks[-1], val_accu2))
        history.append([train_loss, train_accu, val_loss, val_accu ])
        
        if epoch_counter % 10 == 0:
            import matplotlib.pyplot as plt
            df = pd.DataFrame(history)
            df.columns = ['train_loss', 'train_accu', 'val_loss', 'val_accu']
            df[['train_loss', 'val_loss']].plot()
            plt.savefig(os.path.join(FLAG_save_dir, 'loss.png'))
            plt.close()
            df[['train_accu', 'val_accu']].plot()
            plt.savefig(os.path.join(FLAG_save_dir, 'accu.png'))
            plt.close()
            
    saver.save(sess, checkpoint_path, global_step=epoch_counter)
    
    # extract features and calculate center

    output = []
    for i in range(int(Xtrain.shape[0]/200+1)):
        print(i, end="\r")
        st = i*200
        ed = min((i+1)*200, Xtrain.shape[0])
        prob = sess.run(model.features, feed_dict={model.x: Xtrain[st:ed,:], 
                                                    model.is_train: False,
                                                   model.bn_train: False})
        output.append(prob)

    for i in range(int(Xvalid.shape[0]/200+1)):
        print(i, end="\r")
        st = i*200
        ed = min((i+1)*200, Xvalid.shape[0])
        prob = sess.run(model.features, feed_dict={model.x: Xvalid[st:ed,:], 
                                                    model.is_train: False,
                                                   model.bn_train: False})
        output.append(prob)

    EX = np.concatenate(output)
    print(EX.shape)
    EY = np.concatenate([ytrain, yvalid])
    print(EY.shape)
    centers = np.zeros((len(y_dict), EX.shape[1]))
    for i in range(len(y_dict)):
        centers[i,:] = np.mean(EX[EY==i,:], axis=0)
        np.save(arr=centers,file=FLAG_save_dir+"centers.npy")