# 모델 훈련 조기 종료(Early Stopping)

In [None]:
# MNIST 데이터 로드 후 정규화(Normalization)

import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt

#1
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

# 레이블을 모두 원 핫 인코딩 벡터(One-hot encoding vector)로 변환

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (60000, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000, 10)

# 완전연결층(Dense layer) 사용해서 다층 퍼셉트론(MLP) 모델 구축하기

#4: x_train.shape = (60000, 28, 28)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=5, activation='sigmoid'))
model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
##model.summary()  이 코드를 사용하면 모델 구축 결과를 요약해서 볼 수 있다.

opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
# 모델의 loss는 크로스 엔트로피, 모델 성능 측정 지표는 Accuracy rate(정확도) 를 사용한다.

# 콜백 설정: 특정 조건에서 모델 조기 종료 설정
#5
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                            min_delta = 0.001,
                                            patience=1,
                                            verbose=2,
                                            mode = 'auto') #'min','max', 'auto'  # mode에 min이 주어지면 모니터링 값 감소할 때 stopping
                                                                                 # mode에 max가 주어지면 모니터링 값 감소할 때 stopping 
# #6
# ret = model.fit(x_train, y_train, epochs=100, batch_size=200,
#                 validation_split=0.2, verbose=2, callbacks=[callback])

# # 모델 학습 스케쥴러 정의하기

# pdf page 4
def scheduler(epoch, lr):
    if epoch % 2 == 0 and epoch:
        return 0.1*lr
    return lr
callback = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose = 1)

# 정의한 모델 학습 스케쥴러에 따라 모델 훈련시키기

#6
ret = model.fit(x_train, y_train, epochs=10, batch_size=200,
                validation_split=0.2, verbose=0, callbacks=[callback])



# 텐서보드 사용하기

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import datetime

#1
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (60000, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000, 10)

#4: x_train.shape = (60000, 28, 28)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=5, activation='sigmoid'))
model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
model.summary() 

opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# 텐서보드 활성화 하고, 아래 코드 사용해 텐서보드 이용하기

import os
path = "/content"
if not os.path.isdir(path):
    os.mkdir(path)
logdir = path + "3101"

callback = tf.keras.callbacks.TensorBoard(log_dir=logdir, update_freq ='epoch',
                                          histogram_freq =10, write_images=True)
#6
ret = model.fit(x_train, y_train, epochs=10, batch_size=200,
                validation_split=0.2, verbose=2, callbacks=[callback])



In [None]:

%reload_ext tensorboard
%tensorboard --logdir {logdir}

# Chapter 08 그래디언트 소실과 가중치 초기화

In [None]:
# MNIST 데이터 로드후 He 초기화, ReLU 또는 LeakyReLU 함수 불러오기
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import datetime

#1
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (60000, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000, 10)

#4: build a model
#4-1
##init = tf.keras.initializers.he_normal() # 'he_nomal'
##act = tf.keras.activations.relu          # 'relu'

#4-2
##init = tf.keras.initializers.he_normal() # 'he_normal'
##act = tf.keras.layers.LeakyReLU(alpha=0.3)

#4-3
init = tf.keras.initializers.he_uniform() # 'he_uniform'
act = tf.keras.layers.LeakyReLU(alpha=0.3)
# init = tf.keras.initializers.RandomUniform(0.0, 1.0) 
# act = tf.keras.layers.sigmoid(alpha=0.3)
# act = 'sigmoid'

n = 100
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=10,activation='softmax', kernel_initializer=init))
model.summary()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

#5: creates a summary file writer for the given log directory
import os
path = "/content"
if not os.path.isdir(path):
    os.mkdir(path)
##logdir = path + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
logdir = path + "3203"

file_writer = tf.summary.create_file_writer(logdir + "/gradient")
file_writer.set_as_default()

#6:  calculate averages and histograms of gradients in layers
class GradientCallback(tf.keras.callbacks.Callback):
    
    def __init__(self, freq=10):
##        super(GradientCallback, self).__init__()
        self.freq = freq

    def on_epoch_end(self, epoch, logs):
        if epoch%self.freq != 0:
            return
        with tf.GradientTape() as tape:
            y_pred = model(x_train)  # tensor, logits
            loss   = tf.keras.losses.binary_crossentropy(y_train, y_pred)
        grads = tape.gradient(loss, model.trainable_weights)
        for n in range(1, len(model.layers)):
            i2 = (n-1)*2 # weights
            i1 = i2 + 1  # biases

            bias_avg   = tf.reduce_mean(tf.abs(grads[i1]))
            weight_avg = tf.reduce_mean(tf.abs(grads[i2]))
            
            tf.summary.scalar("layer_%d/avg/bias"%n, data=bias_avg, step=epoch)   
            tf.summary.scalar("layer_%d/avg/weight"%n, data=weight_avg, step=epoch)
##            
            tf.summary.histogram("layer_%d/hist/bias"%n, data=grads[i1], step=epoch)
            tf.summary.histogram("layer_%d/hist/weight"%n, data=grads[i2], step=epoch)
            
    def on_train_end(self, logs):
        tf.summary.flush()

callback1 = GradientCallback() # freq = 10
callback2 = tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq= 10) #profile_batch=0      
                                  
#7: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=101, batch_size=200, validation_split=0.2,
                 verbose=2, callbacks=[callback1, callback2])

train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc = model.evaluate(x_test,  y_test, verbose=2)

In [None]:
# !kill 34838
%reload_ext tensorboard
%tensorboard --logdir {logdir}

# 배치 정규화


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import datetime

#1
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (60000, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000, 10)

#4: build a model
init = tf.keras.initializers.RandomUniform(0.0, 1.0)
##act = 'relu'
act = tf.keras.layers.LeakyReLU(alpha = 0.3 )

n = 100
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer=init))
model.add(tf.keras.layers.Dense(units=10,activation='softmax', kernel_initializer=init))
model.summary()

opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])



In [None]:
#6:  calculate averages and histograms of gradients in layers
class GradientCallback(tf.keras.callbacks.Callback):
    
    def __init__(self, freq=10):
##        super(GradientCallback, self).__init__()
        self.freq = freq

    def on_epoch_end(self, epoch, logs):
        if epoch%self.freq != 0:
            return
        with tf.GradientTape() as tape:
            y_pred = model(x_train)  # tensor, logits
            loss   = tf.keras.losses.binary_crossentropy(y_train, y_pred)
        grads = tape.gradient(loss, model.trainable_weights)
        for n in range(1, len(model.layers)):
            i2 = (n-1)*2 # weights
            i1 = i2 + 1  # biases

            bias_avg   = tf.reduce_mean(tf.abs(grads[i1]))
            weight_avg = tf.reduce_mean(tf.abs(grads[i2]))
            
            tf.summary.scalar("layer_%d/avg/bias"%n, data=bias_avg, step=epoch)   
            tf.summary.scalar("layer_%d/avg/weight"%n, data=weight_avg, step=epoch)
##            
            tf.summary.histogram("layer_%d/hist/bias"%n, data=grads[i1], step=epoch)
            tf.summary.histogram("layer_%d/hist/weight"%n, data=grads[i2], step=epoch)
            
    def on_train_end(self, logs):
        tf.summary.flush()

callback1 = GradientCallback() # freq = 10
callback2 = tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq= 10) #profile_batch=0 

#7: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=101, batch_size=200, validation_split=0.2,
                 verbose=2, callbacks=[callback1, callback2])

train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc = model.evaluate(x_test,  y_test, verbose=2)

# 과적합, 가중치 규제

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
#1
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# subsampling for overfitting
n_sample = 6000
x_train = x_train[:n_sample]
y_train = y_train[:n_sample]

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (n_sample, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000,    10)

#4: build a model without regularization
act = "relu"
init = "he_uniform"
n = 100
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
model.summary()

#4-1: configure the model for training
opt = 'rmsprop' # tf.keras.optimizers.RMSprop(learning_rate=0.001) 
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

#4-2: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=101, batch_size=400,
                                  validation_data = (x_test, y_test), verbose=0)
 
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc   = model.evaluate(x_test,  y_test, verbose=2)

#4-3: plot accuracies
plt.title("Without regularization by %s traing data in mnist"%n_sample)
plt.plot(ret.history['accuracy'],     "b-", label="train accuracy")
plt.plot(ret.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")
plt.show()

In [None]:
#5: build a model with weight regularization
reg = tf.keras.regularizers.l2(0.01)  # L2: 0.01, 0.1, 0.5
model2 = tf.keras.Sequential()
model2.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model2.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init,
                                 kernel_regularizer=reg))
model2.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init,
                                 kernel_regularizer=reg))
model2.add(tf.keras.layers.Dense(units=10, activation='softmax'))
##model2.summary()
 
#5-1: configure the model for training
model2.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
                                 
#5-2: train and evaluate the model
ret2 = model2.fit(x_train, y_train, epochs=201, batch_size=400,
                                    validation_data = (x_test, y_test), verbose=0)
train_loss2, train_acc2 = model2.evaluate(x_train, y_train, verbose=2)
test_loss2,  test_acc2  = model2.evaluate(x_test,  y_test,  verbose=2)

#5-3: plot accuracy
plt.title("With regularization by %s traing data in mnist"%n_sample)
plt.plot(ret2.history['accuracy'],     "b-", label="train accuracy")
plt.plot(ret2.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")
plt.show()


# 드롭아웃

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
#1
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# subsampling for overfitting
n_sample = 6000
x_train = x_train[:n_sample]
y_train = y_train[:n_sample]

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (n_sample, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000,    10)

#4: build a model without regularization
act = "relu"
init = "he_uniform"

n = 100
dropout_rate = 0.2

model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout(rate = dropout_rate))

model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout(rate = dropout_rate))

model.add(tf.keras.layers.Dense(units = 10, activation = 'softmax'))
model.summary()

#4-1: configure the model for training
# opt = 'rmsprop' # tf.keras.optimizers.RMSprop(learning_rate=0.001) 
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

#4-2: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=201, batch_size=400,
                                  validation_data = (x_test, y_test), verbose=0)
 
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc   = model.evaluate(x_test,  y_test, verbose=2)

#4-3: plot accuracies
plt.title("Dropout rate = %s, %s traing data in mnist" %(dropout_rate,n_sample))
plt.plot(ret.history['accuracy'],     "b-", label="train accuracy")
plt.plot(ret.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")
plt.show()

In [None]:
dropout_rate = 0.5

model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout(rate = dropout_rate))

model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout(rate = dropout_rate))

model.add(tf.keras.layers.Dense(units = 10, activation = 'softmax'))
model.summary()

#4-1: configure the model for training
# opt = 'rmsprop' # tf.keras.optimizers.RMSprop(learning_rate=0.001) 
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

#4-2: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=201, batch_size=400,
                                  validation_data = (x_test, y_test), verbose=0)
 
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc   = model.evaluate(x_test,  y_test, verbose=2)

#4-3: plot accuracies
plt.title("Dropout rate = %s, %s traing data in mnist" %(dropout_rate,n_sample))
plt.plot(ret.history['accuracy'],     "b-", label="train accuracy")
plt.plot(ret.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")
plt.show()

In [None]:
# 드롭아웃: CIFAR-10

import tensorflow as tf
from tensorflow.keras.datasets import cifar10
import numpy as np
import matplotlib.pyplot as plt
#1
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (50000, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000,    10)

#4: build a model with dropout
act = tf.keras.layers.LeakyReLU(alpha=0.3)
init = "he_uniform"
n = 100
dropout_rate = 0.2
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape = (32,32,3)))
model.add(tf.keras.layers.Dense(units = n, activation = act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout(rate = dropout_rate))

model.add(tf.keras.layers.Dense(units = n, activation = act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout(rate = dropout_rate))

model.add(tf.keras.layers.Dense(units = 10, activation = "softmax"))


#4-1: configure the model for training
# opt = 'rmsprop' # tf.keras.optimizers.RMSprop(learning_rate=0.001) 
model.compile(optimizer='opt', loss='categorical_crossentropy', metrics=['accuracy'])

#4-2: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=201, batch_size=400,
                                  validation_data = (x_test, y_test), verbose=2)
 
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc   = model.evaluate(x_test,  y_test, verbose=2)

#4-3: plot accuracies
plt.plot(ret.history['accuracy'],     "b-", label="train accuracy")
plt.plot(ret.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")
plt.show()


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
import numpy as np
import matplotlib.pyplot as plt

#1
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

#2:normalize images
x_train = x_train.astype('float32')
x_test  = x_test.astype('float32')
x_train /= 255.0 # [0, 1]
x_test  /= 255.0

#3: one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train) # (50000, 10)
y_test = tf.keras.utils.to_categorical(y_test)   # (10000, 10)

#4: build a model with dropout
act =  tf.keras.layers.LeakyReLU(alpha=0.3) #'relu','sigmoid'
init = 'he_uniform'
n = 100
dropout_rate = 0.2 # 0.5
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(32, 32, 3)))
model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout( rate=dropout_rate))

model.add(tf.keras.layers.Dense(units=n, activation=act, kernel_initializer = init))
model.add(tf.keras.layers.Dropout( rate=dropout_rate))

model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
model.summary()

#4-1: configure the model for training
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

#4-2: train and evaluate the model
ret = model.fit(x_train, y_train, epochs=201, batch_size=400,
                                  validation_data = (x_test, y_test), verbose=0)
 
train_loss, train_acc = model.evaluate(x_train, y_train, verbose=2)
test_loss, test_acc   = model.evaluate(x_test,  y_test, verbose=2)

#4-3: plot accuracies
plt.plot(ret.history['accuracy'],     "b-", label="train accuracy")
plt.plot(ret.history['val_accuracy'], "r-", label="val accuracy")
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(loc="best")
plt.show()
