# Model.variable

model.trainable_variable를 설정했을 때, weight가 어떻게 변하는지 살펴보자.

In [1]:
#Set up
import tensorflow as tf
tf.enable_eager_execution()
import tensorflow.contrib.eager as tfe

from tensorflow import keras

import matplotlib.pyplot as plt
import numpy as np
import time

## Load the dataset

In [2]:
(train_image, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()

In [3]:
train_image = train_image.reshape(-1,28,28,1) / 255

train_labels = keras.utils.to_categorical(train_labels, num_classes=10)

In [4]:
train_image.shape

(60000, 28, 28, 1)

In [5]:
#Just use 5000 data
train_image = train_image[:5000]
train_labels = train_labels[:5000]

## Define simple CNN model

In [6]:
#Case 1 : keras.Sequential
model = keras.Sequential([
    keras.layers.Conv2D(16, [3,3], activation=tf.nn.relu),
    keras.layers.Conv2D(16, [3,3], activation=tf.nn.relu),
    keras.layers.GlobalAvgPool2D(),
    keras.layers.Dense(10, activation='softmax')
])

In [7]:
#Case 2 : def model
def create_model():
    
    i = keras.layers.Input(shape=(28,28,1))
    
    x = keras.layers.Conv2D(16, [3,3], activation=tf.nn.relu)(i)
    x = keras.layers.Conv2D(16, [3,3], activation=tf.nn.relu)(x)
    x = keras.layers.GlobalAvgPool2D()(x)
    x = keras.layers.Dense(10, activation=None)(x) #to use tf.losses.soft_max_cross_entropy
    
    model = keras.models.Model(inputs = i, outputs=x)
    
    return model

In [8]:
model_2 = create_model()

In [9]:
model.compile(optimizer=tf.train.AdamOptimizer(), loss='categorical_crossentropy', metrics=['accuracy'])
model_2.compile(optimizer=tf.train.AdamOptimizer(), loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
model_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 16)        2320      
_________________________________________________________________
global_average_pooling2d_1 ( (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                170       
Total params: 2,650
Trainable params: 2,650
Non-trainable params: 0
_________________________________________________________________


## Apply trainable False in dense layer

Dense layer에만 False를 설정해서 해당 layer가 업데이트되는지 확인한다.

In [12]:
print(model.layers[3])
print(model_2.layers[4])

<tensorflow.python.keras.layers.core.Dense object at 0x000001E9481239E8>
<tensorflow.python.keras.layers.core.Dense object at 0x000001E94812F550>


In [13]:
#change dense layer trainable False
model.layers[3].trainable = False
model_2.layers[4].trainable = False

## Training model

In [17]:
model.fit(train_image, train_labels, batch_size=32, epochs=1)

Epoch 1/1


<tensorflow.python.keras.callbacks.History at 0x1e948ab2080>

In [15]:
model_2.fit(train_image, train_labels, batch_size=32, epochs=1)

Epoch 1/1


<tensorflow.python.keras.callbacks.History at 0x1e948c29cf8>

## Check the layer weight values

마지막 dense의 bias가 초기값인 0인 것으로 보아, 해당 layer가 업데이트되지 않았음을 확인할 수 있다.

In [16]:
model.variables[4:]

[<tf.Variable 'dense/kernel:0' shape=(16, 10) dtype=float32, numpy=
 array([[-0.06524268, -0.06301078,  0.20012039, -0.33915892, -0.15785557,
         -0.3247599 , -0.02898121,  0.40953153, -0.44148073,  0.2055642 ],
        [-0.45496157, -0.47648293,  0.16975194, -0.4567982 , -0.41078016,
          0.19211262,  0.07545751,  0.3764953 , -0.42544845,  0.39349824],
        [ 0.23093444, -0.4273506 ,  0.23896319, -0.3221113 ,  0.2929638 ,
          0.21329874, -0.10462677, -0.3884179 ,  0.27275258,  0.2980827 ],
        [ 0.18233633, -0.17863235,  0.13755113, -0.18339542,  0.3298977 ,
          0.05414987,  0.3123647 ,  0.4149763 ,  0.39305544, -0.09890416],
        [ 0.35792953, -0.32222092, -0.36607796,  0.42394054,  0.3184749 ,
          0.19352591,  0.45282483,  0.2298907 , -0.31858498, -0.02650949],
        [ 0.16544688,  0.07125747, -0.1324763 , -0.07750696, -0.26832098,
          0.3827796 , -0.43935832,  0.40890825, -0.2539935 ,  0.11252004],
        [-0.11604178,  0.21509767, -0.

In [18]:
model.variables[4:]

[<tf.Variable 'dense/kernel:0' shape=(16, 10) dtype=float32, numpy=
 array([[-0.06524268, -0.06301078,  0.20012039, -0.33915892, -0.15785557,
         -0.3247599 , -0.02898121,  0.40953153, -0.44148073,  0.2055642 ],
        [-0.45496157, -0.47648293,  0.16975194, -0.4567982 , -0.41078016,
          0.19211262,  0.07545751,  0.3764953 , -0.42544845,  0.39349824],
        [ 0.23093444, -0.4273506 ,  0.23896319, -0.3221113 ,  0.2929638 ,
          0.21329874, -0.10462677, -0.3884179 ,  0.27275258,  0.2980827 ],
        [ 0.18233633, -0.17863235,  0.13755113, -0.18339542,  0.3298977 ,
          0.05414987,  0.3123647 ,  0.4149763 ,  0.39305544, -0.09890416],
        [ 0.35792953, -0.32222092, -0.36607796,  0.42394054,  0.3184749 ,
          0.19352591,  0.45282483,  0.2298907 , -0.31858498, -0.02650949],
        [ 0.16544688,  0.07125747, -0.1324763 , -0.07750696, -0.26832098,
          0.3827796 , -0.43935832,  0.40890825, -0.2539935 ,  0.11252004],
        [-0.11604178,  0.21509767, -0.

## Apply in grad function

Tensorflow low-level로 모델을 학습시키는 과정에 trainable를 적용해보자.

In [21]:
train_image = tf.cast(train_image, tf.float32)

In [22]:
### convert tf.data
train_data = tf.data.Dataset.from_tensor_slices((train_image, train_labels))
train_data = train_data.batch(32)

In [23]:
x,y = tfe.Iterator(train_data).next()
print(x.shape)
print(y.shape)

(32, 28, 28, 1)
(32, 10)


## Define loss and optimizer

In [24]:
def loss_f(y_pred, labels):
    loss = tf.losses.softmax_cross_entropy(labels, y_pred)
    return loss

In [25]:
loss_f(model_3(x), y)

<tf.Tensor: id=90802, shape=(), dtype=float32, numpy=2.2911422>

In [72]:
optimizer = tf.train.AdamOptimizer(learning_rate=.01)
global_step = tf.train.get_or_create_global_step()

## Define the model and training

In [45]:
model_3 = create_model()

In [47]:
model_3.layers[4].trainable = False

In [48]:
#for plot
train_loss_hist = []
train_acc_hist = []

In [70]:
def training(model, train_data):
    start = time.time()
    iterator = tfe.Iterator(train_data)
    epoch_loss = tfe.metrics.Mean()
    epoch_acc = tfe.metrics.Accuracy()

    for img, label in iterator:
        with tf.GradientTape() as tape:
            pred = model(img, training=True)
            loss = loss_f(pred, label)
        variable = model.variables
        grad = tape.gradient(loss, variable)
        optimizer.apply_gradients([(g,v) for g, v in zip(grad, variable)], global_step)
        
        epoch_loss(loss)
        epoch_acc(tf.argmax(pred, axis=0), tf.argmax(y, axis=0))
        
        train_loss_hist.append(epoch_loss.result())
        train_acc_hist.append(epoch_acc.result())

        if global_step.numpy() % 10 ==  0:
             print("Epoch : {}, Loss : {:.3f}".format(global_step.numpy(), loss))
                
    print("End training : {:.2f}s".format((time.time()-start)))

In [71]:
training(model_3, train_data)

Epoch : 800, Loss : 2.352
Epoch : 810, Loss : 2.279
Epoch : 820, Loss : 2.339
Epoch : 830, Loss : 2.294
Epoch : 840, Loss : 2.322
Epoch : 850, Loss : 2.329
Epoch : 860, Loss : 2.304
Epoch : 870, Loss : 2.330
Epoch : 880, Loss : 2.295
Epoch : 890, Loss : 2.336
Epoch : 900, Loss : 2.264
Epoch : 910, Loss : 2.270
Epoch : 920, Loss : 2.304
Epoch : 930, Loss : 2.305
Epoch : 940, Loss : 2.297
End training :9.698448896408081s


In [52]:
model_3.variables[4:]

[<tf.Variable 'dense_3/kernel:0' shape=(16, 10) dtype=float32, numpy=
 array([[-0.45772293, -0.07714962, -0.09146898,  0.32559663,  0.14121312,
         -0.31564295, -0.34674475,  0.31994423,  0.34491792, -0.2642664 ],
        [-0.00925233,  0.25148568,  0.5816645 , -0.15537213,  0.55075365,
          0.14675498,  0.3779239 ,  0.24825016, -0.2113397 ,  0.13481595],
        [-0.12234296,  0.4343408 ,  0.11925983,  0.06176313,  0.47407475,
          0.18234634,  0.15771739,  0.27368525,  0.23689087,  0.06166781],
        [-0.30882064, -0.24325848, -0.30091697, -0.12990883, -0.29063466,
          0.09312929, -0.22254248, -0.20085086,  0.16743022, -0.47521797],
        [ 0.54994845,  0.6725851 , -0.2798149 , -0.45389056,  0.34819686,
         -0.8055184 ,  0.48391423, -0.441198  ,  0.37295783, -0.38631412],
        [ 0.23501493, -1.7059631 ,  0.42837575,  0.1037536 , -0.1712815 ,
         -0.0441816 ,  0.15631196, -0.19648983,  0.00533028, -0.1714201 ],
        [ 0.15900092, -0.2533721 ,  

Though we set `layer.trainable` is 'False', layer's weights are updated.

So we change grad function like that: `grad = tape.gradient(loss, model.trainable_variable)`

In [53]:
def training2(model, train_data):
    start = time.time()
    iterator = tfe.Iterator(train_data)
    epoch_loss = tfe.metrics.Mean()
    epoch_acc = tfe.metrics.Accuracy()

    for img, label in iterator:
        with tf.GradientTape() as tape:
            pred = model(img, training=True)
            loss = loss_f(pred, label)
        variable = model.trainable_variables
        grad = tape.gradient(loss, variable)
        optimizer.apply_gradients([(g,v) for g, v in zip(grad, variable)], global_step)
        
        epoch_loss(loss)
        epoch_acc(tf.argmax(pred, axis=0), tf.argmax(y, axis=0))
        
        train_loss_hist.append(epoch_loss.result())
        train_acc_hist.append(epoch_acc.result())

        if global_step.numpy() % 10 ==  0:
             print("Epoch : {}, Loss : {:.3f}".format(global_step.numpy(), loss))
        
    print("End training :{}s".format((time.time()-start)))

In [56]:
model_4 = create_model()
model_4.layers[4].trainable = False

In [57]:
training2(model_4, train_data)

Epoch : 320, Loss : 2.334
Epoch : 330, Loss : 2.297
Epoch : 340, Loss : 2.271
Epoch : 350, Loss : 2.228
Epoch : 360, Loss : 2.262
Epoch : 370, Loss : 2.237
Epoch : 380, Loss : 2.222
Epoch : 390, Loss : 2.243
Epoch : 400, Loss : 2.177
Epoch : 410, Loss : 2.225
Epoch : 420, Loss : 2.204
Epoch : 430, Loss : 2.217
Epoch : 440, Loss : 2.062
Epoch : 450, Loss : 2.229
Epoch : 460, Loss : 2.187
Epoch : 470, Loss : 2.109
End training :9.189740896224976s


In [58]:
model_4.variables[4:]

[<tf.Variable 'dense_6/kernel:0' shape=(16, 10) dtype=float32, numpy=
 array([[-0.35022792, -0.38278586, -0.44521713,  0.2181468 , -0.2912386 ,
          0.4096471 ,  0.47828656,  0.17735028, -0.10145628,  0.41595602],
        [ 0.47846866, -0.44271883,  0.47204155, -0.07375911, -0.34827596,
          0.3929022 ,  0.39748555, -0.11276248,  0.38856405, -0.22698146],
        [-0.13984519,  0.35900992,  0.1375565 , -0.3162738 , -0.12143007,
          0.44674718,  0.35331345,  0.07957345,  0.22580683, -0.38694674],
        [-0.10961318, -0.3276602 , -0.20238057,  0.07191193, -0.05005911,
          0.01051021,  0.45584345,  0.11884838,  0.41268438, -0.23318066],
        [-0.17419913, -0.13065568, -0.11781806, -0.12008375,  0.02953851,
         -0.17681289, -0.03113386, -0.07556254,  0.17338902,  0.43686748],
        [-0.19578063, -0.46347257, -0.24879786,  0.13706928, -0.12228966,
         -0.36498475, -0.25187021,  0.44119292,  0.47809303,  0.26554102],
        [ 0.03156841,  0.44536394, -