### Version History
+ Ver 1.0 : pretrained된 model의 network와 동일하게 구성하고, Optimizer를 Momentum으로 구성함
+ Ver 1.1 : train된 weights를 loading하는 부분을 cp mothold를 이용하지 않고 get collection으로 refactoring함
+ Ver 1.2 : augmentation code 추가, code 정리
+ Ver 1.3 : restore후에 weight, bias를 초기화 해주는 방법 변경(Low->High API 변경)

In [1]:
%matplotlib inline
!pip install tensorboardcolab

import numpy as np 
import tensorflow as tf 
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import tensorboardcolab
from tensorflow.python.training import checkpoint_utils as cp



Using TensorFlow backend.


## Load Image Data set

In [0]:
def load_cifar10() :
    # load cifar10 dataset 
    from keras.datasets import cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    # reshape (None, 1) -> (None)
    y_train, y_test = [np.reshape(y_train, [-1]), np.reshape(y_test, [-1])]

    # normalization 
    x_train, x_test = [(x_train-x_train.max()) / (x_train.max()-x_train.min()),
                         (x_test-x_test.max()) / (x_test.max()-x_test.min())]

    temp = x_train
    ratio = int(len(x_train) * 0.7)
    ratio_end = int(len(x_train) * 1.0)  
    
    train_image = temp[0:ratio, :, :, :]
    valid_image = temp[ratio:ratio_end , :, :, :]
    
    train_label = y_train[0:ratio]
    valid_label = y_train[ratio:ratio_end ]
    
    return train_image, train_label, valid_image, valid_label, x_test, y_test

## Data Provider

In [0]:
class DataProvider(object):
    def __init__(self, x, y):
        self.epoch_count = 0
        
        self.data = x
        self.label = y
        
        npr.seed(42)
        
        self.indices = self.generate_indices()
        
    def generate_indices(self):
        indices = list(range(len(self.data)))
        npr.shuffle(indices)
        
        return indices
    
    def next_batch(self, batch_size):
        idx = batch_size
        if len(self.indices) < batch_size:
            print("all data consumed, epoch + 1")
            self.epoch_count += 1
            self.indices = self.generate_indices()
    
        target_indices = self.indices[:batch_size]
        del self.indices[:batch_size]
        
        return self.data[target_indices] , self.label[target_indices]

In [0]:
def cifar_generator(data, labels, batch_size=32):
    start_idx = 0
    num_step = len(data) // batch_size
    indexes = np.arange(0, len(data))
    while True:
        if start_idx >= num_step-1:
            np.random.shuffle(indexes)
            start_idx = 0
        else:
            start_idx += 1            
        batch_index = indexes[start_idx*batch_size: (start_idx+1)*batch_size]

        batch_data = data[batch_index]
        batch_label = labels[batch_index]

        yield batch_data, batch_label

## Load pretrained variables

In [0]:
def get_trained_weights() :
    
    # loading pretrained files
    from google.colab import drive
    drive.mount('/content/gdrive')
    
    !mkdir ./model
    !cp gdrive/My\ Drive/vgg/* model/ # from, to 임
    
    # temperary graph and session
    graph = tf.Graph()
    with graph.as_default() :
        sess = tf.Session(graph=graph)
        
        lode_dir = "./model/vgg_net_model_a"
        saver = tf.train.import_meta_graph(lode_dir + '.meta')
    
        saver.restore(sess, save_path = lode_dir)    
        
        reuse_vars = graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        
        
        # trained variable name : values
        reuse_vars_dict = dict([(var.name.replace(':0',''), sess.run(var.name)) 
                                for var in reuse_vars])
        print("get_trained_weights -> ", reuse_vars_dict.keys() )
        sess.close()
        
        return reuse_vars_dict       

## model functions

In [0]:
def conv2d(input, units, trained_dict, trainable, floor, model, name) :              
    kernel_init   = trained_dict[str(name+'_kernel'+str(floor))] 
    bias_init     = trained_dict[str(name+'_bias'+str(floor))]
    initializer_k = tf.constant_initializer(kernel_init)        
    initializer_b = tf.constant_initializer(bias_init)   

    if model == "BN" :
        use_bias = False

    layer = tf.layers.Conv2D(filters = units, # number of kernels 
                             kernel_size = [2,2], 
                             strides = [1,1],
                             padding = 'SAME', 
                             activation = tf.nn.relu, 
                             kernel_initializer = initializer_k, 
                             use_bias = True,
                             bias_initializer = initializer_b, 
                             name = name )(input)
    
    if model == "BN" :
        layer = tf.layers.BatchNormalization()(layer, training=is_train)
        
    return layer

In [0]:
def max_pooling2d(input) :
    pool  = tf.layers.MaxPooling2D(pool_size=[2,2], strides=[2,2])(input) 
    
    return pool

In [0]:
def fc(input, units, trained_dict, dr, is_train, model, name) :
    kernel_init = trained_dict[str(name+'_kernel')]
    bias_init   = trained_dict[str(name+'_bias')]    
    initializer_k = tf.constant_initializer(kernel_init)
    initializer_b = tf.constant_initializer(bias_init)
        
    dense = tf.layers.Dense(units = units, 
                            activation = tf.nn.relu,
                            kernel_initializer = initializer_k, 
                            use_bias = True,
                            bias_initializer = initializer_b, 
                            name = 'fc')(input) # TODO

    if model == "BN" :
        dense = tf.layers.BatchNormalization()(dense, training=is_train)
        
    dropout = tf.layers.Dropout(dr)(dense, training = is_train)
    
    # print(trained_dict['VGGBlock-1/conv1_bias1']) 
    return dropout

In [0]:
def softmax_l2_with_loss(ys_true, ys_pred, weight_decay) :  
    sce_loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(labels=ys_true, logits=ys_pred))
    l2_loss  = tf.add_n([tf.nn.l2_loss(var) for var in tf.global_variables()])
    loss     = sce_loss + weight_decay * l2_loss
    
    return loss

In [0]:
def accuracy(y_true, y_pred) :
    pred     = tf.cast(tf.arg_max(y_pred, 1), tf.int32)
    correct  = tf.equal(pred, y_true)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    # add tensor to tensorboard
    acc_tb   = tf.summary.scalar(name='accuracy', tensor=accuracy)
    
    return accuracy

## main model

In [11]:
graph = tf.Graph()
trained_dict = get_trained_weights()
 # print(trained_dict['VGGBlock-1/conv1_kernel1']) 

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
mkdir: cannot create directory ‘./model’: File exists


W0621 09:07:05.579914 139887982245760 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


get_trained_weights ->  dict_keys(['VGGBlock-1/conv1_kernel1', 'VGGBlock-1/conv1_bias1', 'VGGBlock-2/conv2_kernel1', 'VGGBlock-2/conv2_bias1', 'VGGBlock-3/conv3_kernel1', 'VGGBlock-3/conv3_bias1', 'VGGBlock-3/conv3_kernel2', 'VGGBlock-3/conv3_bias2', 'VGGBlock-4/conv4_kernel1', 'VGGBlock-4/conv4_bias1', 'VGGBlock-4/conv4_kernel2', 'VGGBlock-4/conv4_bias2', 'fc/fc1_kernel', 'fc/fc1_bias', 'fc/fc2_kernel', 'fc/fc2_bias', 'fc/fc3_kernel', 'fc/fc3_bias', 'y_pred/kernel', 'y_pred/bias'])


In [12]:
with graph.as_default() :
    xs       = tf.placeholder(tf.float32, (None, 32, 32, 3), name='xs') 
    ys       = tf.placeholder(tf.int32, (None), name='ys')
    lr       = tf.placeholder_with_default(0.001, (), name='lr')
    wd       = tf.placeholder_with_default(0.9, (), name='wd')
    is_train = tf.placeholder_with_default(False, (), name='is_train')
    m        = tf.placeholder_with_default(0.9, (), name='momentum')
    dr       = tf.placeholder_with_default(0.9, (), name='dropout_ratio')   
    model    = tf.placeholder_with_default("VGG", (), name='model')  
    
    with tf.name_scope('VGGBlock-1') :    
        layer = conv2d(xs, 32, trained_dict, is_train, 1, model, 
                       'VGGBlock-1/conv1')
        pool  = max_pooling2d(layer) 
        
    with tf.name_scope('VGGBlock-2') :
        layer = conv2d(pool, 64, trained_dict, is_train, 1, model, 
                       'VGGBlock-2/conv2')
        pool  = max_pooling2d(layer)
        
    with tf.name_scope('VGGBlock-3') :
        layer = conv2d(pool, 128, trained_dict, is_train, 1, model, 
                       'VGGBlock-3/conv3')
        layer = conv2d(layer, 128, trained_dict, is_train, 2, model, 
                       'VGGBlock-3/conv3')
        pool = max_pooling2d(layer)
        
    with tf.name_scope('VGGBlock-4') :
        layer = conv2d(pool, 256, trained_dict, is_train, 1, model,
                       'VGGBlock-4/conv4')
        layer = conv2d(layer, 256,  trained_dict, is_train, 2, model,
                       'VGGBlock-4/conv4')
        pool  = max_pooling2d(layer)
        
    with tf.name_scope('fc') : 
        flatten = tf.layers.flatten(pool)
        layer = fc(flatten, 1024, trained_dict, dr, is_train, model, 'fc/fc1')  
        layer = fc(layer, 1024, trained_dict, dr, is_train, model, 'fc/fc2')                                
        layer = fc(layer, 512, trained_dict, dr, is_train, model, 'fc/fc3')
        
    with tf.name_scope('output') : 
        y_pred  = tf.layers.Dense(10, 
                                  activation=None, 
                                  name='y_pred')(layer)     
    
    with tf.name_scope('Loss') :
        loss = softmax_l2_with_loss(ys, y_pred, wd)
    loss = tf.identity(loss, name='loss')
        
    with tf.name_scope('metric') :
        rmse = tf.sqrt(loss)
    rmse = tf.identity(rmse, name='rmse')
    
    with tf.name_scope('accuracy') :
        acc = accuracy(ys, y_pred)
    acc = tf.identity(acc, name='acc')

    with tf.name_scope('train') :
        # global_step = tf.train.get_or_create_global_step()
        optimizer   = tf.train.MomentumOptimizer(lr, 
                                                 momentum = m, 
                                                 use_nesterov = False)
        train_op    = optimizer.minimize(loss)

W0621 09:07:06.834163 139887982245760 deprecation.py:323] From <ipython-input-12-953cfd55700b>:30: flatten (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.flatten instead.
W0621 09:07:07.451270 139887982245760 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0621 09:07:07.486424 139887982245760 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has th

## reconstruct model

In [0]:
def model_b(config, graph_) :
    # load data    
    train_image, train_label, \
    valid_image, valid_label, \
    test_image, test_label = load_cifar10()
    
    # load hyper parameters
    model_type    = config['model_type']
    load_model    = config['load_model']
    save_model    = config['save_model']
    learning_rate = config['learning_rate']  
    batch_size    = config['batch_size']
    n_epoch       = config['epoch']
    n_step        = int(len(train_image) // batch_size)
    weight_decay  = config['weight_decay']
    dropout_ratio = config['dropout_ratio']
    
    # save directory 
    if load_model == None :
         lode_dir = None
    else :
         lode_dir = "./model/vgg_net_model_" + load_model  
    
    if save_model == None :
        save_dir = None
        log_dir = None
    else :
        save_dir = "./model/vgg_net_model_" + save_model  
        log_dir = "./log/vgg_net_model_" + save_model
        
    with graph_.as_default() :        
        
        saver           = tf.train.Saver()    
        sess            = tf.Session(graph=graph_)
        sess.run(tf.global_variables_initializer())
        
        # create Instance
        train_generator = cifar_generator(train_image, train_label, batch_size)
        
                     
        loss_, acc_ = sess.run([rmse, acc], feed_dict = { xs: valid_image, 
                                                          ys: valid_label,
                                                          is_train : False})
        print("check acc! valid loss = {:.4f}, valid acc = {:.2f}%".format(loss_, acc_*100))
                        
        train_loss = []
        train_acc = []
        valid_loss = []
        valid_acc = []
        cnt = 0
        maximum_acc = 0.5
        for i in tqdm(range(n_epoch)) :
            for step in range(n_step) :
                batch_xs, batch_ys = next(train_generator)
                _, train_loss_, train_acc_ = sess.run([train_op, rmse, acc], 
                                              feed_dict = { xs: batch_xs, 
                                                            ys: batch_ys, 
                                                            lr: learning_rate,
                                                            wd : weight_decay,
                                                            dr : dropout_ratio, 
                                                            model : model_type,
                                                            is_train : True})
                train_loss.append(train_loss_)
                train_acc.append(train_acc_)
                
                # check validation set
                if step % 100 == 0 :
                    loss_, acc_ = sess.run([rmse, acc], 
                                          feed_dict = { xs: valid_image, 
                                                        ys: valid_label,
                                                        wd : weight_decay,
                                                        is_train : False})
                    valid_loss.append(loss_)
                    valid_acc.append(acc_)
                
                    # Save the model
                    if acc_ > maximum_acc :
                        print("log current model! valid loss = {:.4f}, \
                               valid acc = {:.2f}%".format(loss_, acc_*100))
                        maximum_acc = acc_
                        saver.save(sess, save_path = save_dir)

            print(" valid loss = {:.4f}, valid acc = {:.2f}%". \
                  format(loss_, acc_*100))
        
        train_writer.flush() # file을 disk에 쓴다
        
    return valid_loss, valid_acc

In [0]:
def show_result(vgg_loss, vgg_acc, BN_loss, BN_acc) :
    plt.plot(vgg_acc, linestyle = "--", color = "red", label = "vgg_acc")
    plt.plot(BN_acc, linestyle = ":", color = "blue", label = "BN_acc")
    plt.legend()
    plt.show()
    
    plt.plot(vgg_loss, linestyle = "--", color = "red", label = "vgg_loss")
    plt.plot(BN_loss, linestyle = ":", color = "blue", label = "BN_loss")
    plt.legend()
    plt.show()

## main function

In [14]:
def main() :
    
    # config of hyper parameters
    config = {
        "model_type"      : "VGG",  # VGG or BN
        "load_model"      : "a",
        "save_model"      : "b",
        "learning_rate"   : 0.001,
        "batch_size"      : 1000,
        "epoch"           : 100,
        "weight_decay"    : 0.0005,
        "dropout_ratio"   : 0.5
    }
    
    # call reconstruct model
    vgg_loss, vgg_acc = model_b(config, graph)
    
    
    # # config of hyper parameters
    # config = {
    #     "model_type"      : "BN",  # VGG or BN
    #     "load_model"      : "a",
    #     "save_model"      : None,
    #     "learning_rate"   : 0.001,
    #     "batch_size"      : 1000,
    #     "epoch"           : 100,
    #     "weight_decay"    : 0.0005,
    #     # Cifa10 Dataset은 overfitting이 심하기 때문에 dropout을 제거하는 대신 비율을 줄임
    #     "dropout_ratio"   : 0.4 
    # }
    # 
    # # call reconstruct model
    # BN_loss, BN_acc = model_b(config)
# 
    # show_result(vgg_loss, vgg_acc, BN_loss, BN_acc)
    
    
if __name__ == '__main__':
    main()        

check acc! valid loss = 32.7248, valid acc = 10.77%


## save model

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')
# 
# !mkdir gdrive/My\ Drive/vgg
# !mv ./model/vgg* gdrive/My\ Drive/vgg
# !mv ./model/checkpoint gdrive/My\ Drive/vgg