In [1]:
%matplotlib inline
!pip install tensorboardcolab

import numpy as np 
import tensorflow as tf 
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import tensorboardcolab
from tensorflow.python.training import checkpoint_utils as cp



Using TensorFlow backend.


In [2]:
# load cifar10 dataset 
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
# reshape (None, 1) -> (None)
y_train, y_test = [np.reshape(y_train, [-1]), np.reshape(y_test, [-1])]

# normalization 
x_train, x_test = [(x_train - x_train.max()) / (x_train.max() - x_train.min()),
                   (x_test - x_test.max()) / (x_test.max() - x_test.min())]

# N class
n_classes = 10
print('image shape : {}, label shape : {} '.format(x_train.shape, y_train.shape))
print('image shape : {}, label shape : {} '.format(x_test.shape, y_test.shape))
print('train minimun : {}, train_maximum : {} '.format(x_train.min(), x_train.max()))
print('tests minimun : {}, test_maximum : {} '.format(x_test.min(), x_test.max()))

image shape : (50000, 32, 32, 3), label shape : (50000,) 
image shape : (10000, 32, 32, 3), label shape : (10000,) 
train minimun : 0.0, train_maximum : 1.0 
tests minimun : 0.0, test_maximum : 1.0 


In [10]:
temp = x_train
ratio = int(len(x_train) * 0.7)
ratio_end = int(len(x_train) * 0.85)  ### TODO

x_train = temp[0:ratio, :, :, :]
x_validation = temp[ratio:ratio_end , :, :, :]

y_train_label = y_train[0:ratio]
y_validation_label = y_train[ratio:ratio_end ]

print(y_train_label[:10]) # label이 one_hot encoding상태가 아니다.

print(x_train.shape, y_train_label.shape)
print(x_validation.shape, y_validation_label.shape)

[6 9 9 4 1 1 2 7 8 3]
(24500, 32, 32, 3) (24500,)
(5250, 32, 32, 3) (5250,)


## Data Provider

In [0]:
class DataProvider(object):
    def __init__(self, x, y):
        self.epoch_count = 0
        
        self.data = x
        self.label = y
        
        npr.seed(42)
        
        self.indices = self.generate_indices()
        
    def generate_indices(self):
        indices = list(range(len(self.data)))
        npr.shuffle(indices)
        
        return indices
    
    def next_batch(self, batch_size):
        idx = batch_size
        if len(self.indices) < batch_size:
            print("all data consumed, epoch + 1")
            self.epoch_count += 1
            self.indices = self.generate_indices()
    
        target_indices = self.indices[:batch_size]
        del self.indices[:batch_size]
        
        return self.data[target_indices] , self.label[target_indices]

In [0]:
def cifar_generator(data, labels, batch_size=32):
    start_idx = 0
    num_step = len(data) // batch_size
    indexes = np.arange(0, len(data))
    while True:
        if start_idx >= num_step-1:
            np.random.shuffle(indexes)
            start_idx = 0
        else:
            start_idx += 1            
        batch_index = indexes[start_idx*batch_size:
                              (start_idx+1)*batch_size]

        batch_data = data[batch_index]
        batch_label = labels[batch_index]

        yield batch_data, batch_label

## Model B : Build  --> Model A와 동일한 Structure이고, Optimizer만 Momentum으로 변경함

In [13]:
from google.colab import drive
drive.mount('/content/gdrive')

!mkdir ./model
!cp gdrive/My\ Drive/vgg/* model/ # from, to 임

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
mkdir: cannot create directory ‘./model’: File exists


In [0]:
graph = tf.Graph()
with graph.as_default() :
    xs = tf.placeholder(tf.float32, (None, 32, 32, 3), name='xs') 
    ys = tf.placeholder(tf.int32, (None), name='ys')
    lr = tf.placeholder_with_default(0.001, (), name='lr')
    wd = tf.placeholder_with_default(0.9, (), name='wd')
    is_train = tf.placeholder_with_default(False, (), name='is_train')
    m = tf.placeholder_with_default(0.9, (), name='momentum')
    
    # pretrained weight and bias
    conv1_w1  = cp.load_variable('./model/vgg_net_model_a', 'conv1/kernel1')   
    conv1_b1  = cp.load_variable('./model/vgg_net_model_a', 'conv1/bias1')
    
    conv2_w1  = cp.load_variable('./model/vgg_net_model_a', 'conv2/kernel1')   
    conv2_b1  = cp.load_variable('./model/vgg_net_model_a', 'conv2/bias1')

    conv3_w1  = cp.load_variable('./model/vgg_net_model_a', 'conv3/kernel1')    
    conv3_b1  = cp.load_variable('./model/vgg_net_model_a', 'conv3/bias1')
    conv3_w2  = cp.load_variable('./model/vgg_net_model_a', 'conv3/kernel2')   
    conv3_b2  = cp.load_variable('./model/vgg_net_model_a', 'conv3/bias2')

    conv4_w1  = cp.load_variable('./model/vgg_net_model_a', 'conv4/kernel1')  
    conv4_b1  = cp.load_variable('./model/vgg_net_model_a', 'conv4/bias1')
    conv4_w2  = cp.load_variable('./model/vgg_net_model_a', 'conv4/kernel2')   
    conv4_b2  = cp.load_variable('./model/vgg_net_model_a', 'conv4/bias2')

    f1_w1    = cp.load_variable('./model/vgg_net_model_a', 'fc1/kernel1')   
    f1_b1    = cp.load_variable('./model/vgg_net_model_a', 'fc1/bias1')
    f2_w1    = cp.load_variable('./model/vgg_net_model_a', 'fc2/kernel1')   
    f2_b1    = cp.load_variable('./model/vgg_net_model_a', 'fc2/bias1')
    f3_w1    = cp.load_variable('./model/vgg_net_model_a', 'fc3/kernel1')   
    f3_b1    = cp.load_variable('./model/vgg_net_model_a', 'fc3/bias1')

    with tf.name_scope('conv1') :  
        kernel = tf.Variable(conv1_w1, name='kernel1')               
        bias   = tf.Variable(conv1_b1, name='bias1')        
        layer  = tf.nn.conv2d(xs, kernel, strides=[1,1,1,1], padding='SAME')
        layer  = layer + bias
        layer  = tf.nn.relu(layer)
        pool   = tf.layers.MaxPooling2D(pool_size=[2,2], strides=[2,2])(layer)        

    
    with tf.name_scope('conv2') :  
        kernel = tf.Variable(conv2_w1, name='kernel1')    
        bias   = tf.Variable(conv2_b1, name='bias1')            
        layer  = tf.nn.conv2d(pool, kernel, strides=[1,1,1,1], padding='SAME') 
        layer  = layer + bias
        layer  = tf.nn.relu(layer)
        pool   = tf.layers.MaxPooling2D(pool_size=[2,2], strides=[2,2])(layer)        

    with tf.name_scope('conv3') :       
        kernel = tf.Variable(conv3_w1, name='kernel1')    
        bias   = tf.Variable(conv3_b1, name='bias1')            
        layer  = tf.nn.conv2d(pool, kernel, strides=[1,1,1,1], padding='SAME') 
        layer  = layer + bias
        layer  = tf.nn.relu(layer)
                                 
        kernel = tf.Variable(conv3_w2, name='kernel2')    
        bias   = tf.Variable(conv3_b2, name='bias2')            
        layer  = tf.nn.conv2d(layer, kernel, strides=[1,1,1,1], padding='SAME') 
        layer  = layer + bias
        layer  = tf.nn.relu(layer)
        pool   = tf.layers.MaxPooling2D(pool_size=[2,2], strides=[2,2])(layer)      

    with tf.name_scope('conv4') :       
        kernel = tf.Variable(conv4_w1, name='kernel1')    
        bias   = tf.Variable(conv4_b1, name='bias1')            
        layer  = tf.nn.conv2d(pool, kernel, strides=[1,1,1,1], padding='SAME') 
        layer  = layer + bias
        layer  = tf.nn.relu(layer)
                                 
        kernel = tf.Variable(conv4_w2, name='kernel2')    
        bias   = tf.Variable(conv4_b2, name='bias2')            
        layer  = tf.nn.conv2d(layer, kernel, strides=[1,1,1,1], padding='SAME') 
        layer  = layer + bias
        layer  = tf.nn.relu(layer)
        pool   = tf.layers.MaxPooling2D(pool_size=[2,2], strides=[2,2])(layer)      

                                 
    with tf.name_scope('fc1') : 
        flatten = tf.layers.flatten(pool)
                                 
        kernel  = tf.Variable(f1_w1, name = "kernel1")
        bias    = tf.Variable(f1_b1, name = "bias1")
        z       = tf.matmul(flatten, kernel) + bias  # [?,512], [32768,1024].
        logits  = tf.nn.relu(z)
        dropout = tf.layers.Dropout(0.5)(logits, training = is_train)

    with tf.name_scope('fc2') :                                  
        kernel  = tf.Variable(f2_w1, name = "kernel1")
        bias    = tf.Variable(f2_b1, name = "bias1")
        z       = tf.matmul(dropout, kernel) + bias
        logits  = tf.nn.relu(z)
        dropout = tf.layers.Dropout(0.5)(logits, training = is_train)

    with tf.name_scope('fc3') :                                  
        kernel  = tf.Variable(f3_w1, name = "kernel1")
        bias    = tf.Variable(f3_b1, name = "bias1")
        z       = tf.matmul(dropout, kernel) + bias
        logits  = tf.nn.relu(z)
        dropout = tf.layers.Dropout(0.5)(logits, training = is_train)    
                                 
        y_pred  = tf.layers.Dense(10, activation=None, name='y_pred')(dropout)     
    
    with tf.name_scope('Loss') :
        sce_loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=ys, logits=y_pred))
        l2_loss  = tf.add_n([tf.nn.l2_loss(var) for var in tf.global_variables()])
        loss     = sce_loss + wd * l2_loss
    loss = tf.identity(loss, name='loss')
        
    with tf.name_scope('metric') :
        rmse = tf.sqrt(loss)
    
    with tf.name_scope('accuracy') :
        pred     = tf.cast(tf.arg_max(y_pred, 1), tf.int32)
        correct  = tf.equal(pred, ys)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        
        # add tensor to tensorboard
        acc_tb   = tf.summary.scalar(name='accuracy', tensor=accuracy)

    with tf.name_scope('train') :
        global_step = tf.train.get_or_create_global_step()
        #train_op   = tf.train.AdamOptimizer(lr).minimize(loss, global_step = global_step)
        
        optimizer   = tf.train.MomentumOptimizer(lr, momentum = m, use_nesterov = False)
        train_op    = optimizer.minimize(loss, global_step = global_step)
        

## Model B : Train

In [15]:
with graph.as_default() :
    
    log_dir = "./log/vgg_net_model_b"   
    lode_dir = "./model/vgg_net_model_a"
    save_dir = "./model/vgg_net_model_b"    # dir + file name.
    
    train_writer = tf.summary.FileWriter(logdir = log_dir)
    train_writer.add_graph(tf.get_default_graph())
    merged_all = tf.summary.merge_all()    
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer()) 
    

    # Training
    batch_size = 1000
    n_epoch = 0 # 30
    n_step = int(len(x_train) // batch_size)  # //은 몫이다.
    learing_rate = 0.001
    weight_decay = 0.0005
    
    # instance 생성
    train_generator = cifar_generator(x_train, y_train_label, batch_size)
    
    train_loss = []
    valid_loss = []
    valid_acc = []
    cnt = 0
    minimum_loss = 1.1
    momentum = 0.9


    loss_, acc_= sess.run([rmse, accuracy], feed_dict = { xs : x_validation, 
                                                          ys : y_validation_label, 
                                                          wd : 0, 
                                                          m : 0, 
                                                          is_train : False })
    print("Loading결과 확인 -> loss = {:.4f}, acc = {:.2f}%".format(loss_, acc_*100))

        
    # for i in tqdm(range(n_epoch)) :
    #     for step in range(n_step) :
    #         batch_xs, batch_ys = next(train_generator)
    #         _, train_loss_, tbs_train_ = sess.run([train_op, rmse, merged_all], feed_dict = { xs: batch_xs, 
    #                                                                                           ys: batch_ys, 
    #                                                                                           lr: learing_rate,
    #                                                                                           wd : weight_decay,
    #                                                                                           m : momentum,
    #                                                                                           is_train : True})
    #         train_writer.add_summary(tbs_train_, global_step=cnt) # 흠 되야 하는데 안된다.
    #         cnt += 1
    #         train_loss.append(train_loss_)
    #         
    #         # check validation set
    #         if step % 100 == 0 :
    #             loss_, acc_ = sess.run([rmse, accuracy], feed_dict = { xs: x_validation, 
    #                                                                    ys: y_validation_label,
    #                                                                    wd : weight_decay,
    #                                                                    m : momentum,
    #                                                                    is_train : False})
    #             valid_loss.append(loss_)
    #             valid_acc.append(acc_)
    #             
    #             # Save the model
    #             if loss_ < minimum_loss :
    #                 print("log current model!")
    #                 minimum_loss = loss_
    #                 saver.save(sess, save_path = save_dir)
    #     print("loss = {:.4f}, acc = {:.2f}%".format(loss_, acc_*100))
    # print("loss = {:.4f}, acc = {:.2f}%".format(loss_, acc_*100))
    
    train_writer.flush() # file을 disk에 쓴다

Loading결과 확인 -> loss = 1.5514, acc = 6.78%


In [0]:
plt.plot(np.arange(0, len(train_loss), 1), train_loss)
plt.show()
plt.plot(np.arange(0, len(valid_loss), 1), valid_loss)
plt.show() # train과 validation 모두 봐야 한다.

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

!mkdir gdrive/My\ Drive/vgg
!mv ./model/vgg* gdrive/My\ Drive/vgg
!mv ./model/checkpoint gdrive/My\ Drive/vgg