## Variational Convolution Auto Encoder

일자 : 2017. 07. 27

In [None]:
%matplotlib inline

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from functools import partial
import os

### [Optional.  Tensorflow Graph Visualization ]

---

> _Jupyter에서 Tensorflow에서 구성되는 Graph를 시각적으로 보여주기 위한 helper 메소드입니다._<br>

In [None]:
from IPython.display import clear_output, Image, display, HTML
import numpy as np    

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))

    display(HTML(iframe))

<br>

# \[ Fashion MNIST \]

---
---

<br>

## 1. 데이터 가져오기

* Fashion MNIST 데이터를 가져오도록 하겠습니다.

In [None]:
# 데이터 다운로드
path = "https://s3.ap-northeast-2.amazonaws.com/pai-datasets/all-about-mnist/fashionmnist/train.csv"
if not os.path.exists("fashion_mnist.csv"):
    !wget path -o ./fashion_mnist.csv
    
    

In [None]:
# fashionmnist/train.csv 가져오기
df = pd.read_csv("fashion_mnist.csv")
images = df.iloc[:,1:].values.reshape(-1,28,28)
labels = df.iloc[:,0].values

## 2. 데이터 읽어보기

* MNIST와 같이 총 10가지의 라벨이 존재합니다.

In [None]:
label_names = ["T-shirt",'Trouser','Pullover','Dress','Coat',
               'Sandal','Shirt','Sneaker','Bag','Ankle boot']

fig = plt.figure(figsize=(7,7))
n_row = 4
n_col = 4

for idx, (image, label) in enumerate(zip(images, labels),1):
    ax = fig.add_subplot(n_row,n_col,idx)
    
    ax.set_title(label_names[label])
    ax.imshow(image, cmap='gray')
    ax.set_xticks([])
    ax.set_yticks([])    
    if idx >= n_row * n_col:
        break
plt.show()

<br>

# \[ Neural Network Modeling \]

---
---

이번 시간에는 `Variational Convolution Auto Encoder`를 모델링하는 시간을 가지도록 하겠습니다. Tensorflow를 이용해보도록 하겠습니다.

### (1) 인코더 구성하기

In [None]:
Conv2D = partial(tf.layers.Conv2D,
                 padding='same',
                 activation=tf.nn.leaky_relu)
BatchNorm = tf.layers.BatchNormalization
MaxPooling2D = partial(tf.layers.MaxPooling2D,
                       pool_size=(2,2),
                       strides=(2,2))

latent_size = 16
graph = tf.Graph()
with graph.as_default():
    x = tf.placeholder(tf.float32, shape=(None,28,28,1), name='images')
    is_train = tf.placeholder_with_default(False, None, name='is_train')
    lr = tf.placeholder_with_default(0.01, None, name='learning_rate')
    
    # Encoder Block 1  
    with tf.variable_scope('encoder_block1'):
        conv1_1 = Conv2D(8,(3,3),name='conv1_1')(x)
        norm1_1 = BatchNorm(name='norm1_1')(conv1_1, training=is_train)
        conv1_2 = Conv2D(8,(3,3),name='conv1_2')(norm1_1)
        norm1_2 = BatchNorm(name='norm1_2')(conv1_2, training=is_train)
        pool1 = MaxPooling2D(name='pool1')(norm1_2)
    
    # Encoder Block 2
    with tf.variable_scope('encoder_block2'):    
        conv2_1 = Conv2D(16,(3,3),name='conv2_1')(pool1)
        norm2_1 = BatchNorm(name='norm2_1')(conv2_1,training=is_train)
        conv2_2 = Conv2D(16,(3,3),name='conv2_2')(norm2_1)
        norm2_2 = BatchNorm(name='norm2_2')(conv2_2,training=is_train)
        pool2 = MaxPooling2D(name='poo12')(norm2_2)

    # Encoder Block 3
    with tf.variable_scope('encoder_block3'):    
        conv3_1 = Conv2D(32,(3,3),name='conv3_1')(pool2)
        norm3_1 = BatchNorm(name='norm3_1')(conv3_1,training=is_train)
        conv3_2 = Conv2D(32,(3,3),name='conv3_2')(norm3_1)
        norm3_2 = BatchNorm(name='norm3_2')(conv3_2,training=is_train)
        pool3 = MaxPooling2D(name='poo13')(norm3_2)

In [None]:
show_graph(graph)

### (2) Reparameterization Trick 구현하기

우리는 인코더에서 만들어낸 code의 평균과 표준편차에서, 하나의 Code를 랜덤하게 추출하게 됩니다.<br>
이렇게 랜덤하게 추출하게 되더라도, 아 잠재 공간 내 이미지들은 유사해야한다는 가정이 있어,<br>
Decoder는 원래 이미지로 복원하는 방향으로 학습되게 됩니다.

$softplus(x) = np.log(1+np.exp(x))$ 

In [None]:
with graph.as_default():
    with tf.variable_scope('reparameterization_trick'):
        flatten = tf.layers.Flatten()(pool3)
        code_mean = tf.layers.Dense(latent_size, name='code_mean')(flatten)
        code_sigma = tf.layers.Dense(latent_size, 
                                     activation=tf.nn.softplus,
                                     name='code_var')(flatten)
        # reparameterization trick
        noise = tf.random.normal(tf.shape(code_sigma))
        sampled_code = code_mean + code_sigma * noise
    sampled_code = tf.identity(sampled_code,"sampled_code")

In [None]:
show_graph(graph)

### (3) Decoder 구성하기

여느 Stacked Auto Encoder와 같이 복원하는 방향으로 학습하게 됩니다.

In [None]:
from tensorflow.keras.layers import UpSampling2D, ZeroPadding2D

In [None]:
Conv2DTranspose = partial(tf.layers.Conv2DTranspose,
                          activation=tf.nn.leaky_relu,
                          padding='same')

with graph.as_default():
    # Decoder reshape
    _, h_encoded, w_encoded, n_encoded = pool3.shape.as_list()
    decoded = tf.layers.Dense(h_encoded*w_encoded*n_encoded,
                   activation=tf.nn.leaky_relu,
                   name='dense_decoded')(sampled_code)
    decoded = tf.reshape(decoded,(-1,h_encoded,w_encoded,n_encoded))

    # Decoder Block 3
    with tf.variable_scope('decoder_block3'):
        upsample3 = UpSampling2D((2,2),name='upsample3')(decoded)
        padding = ZeroPadding2D(padding=((0,1),(0,1)),
                                name='padding')(upsample3)
        deconv3_1 = Conv2DTranspose(32,(3,3),name='deconv3_1')(padding)
        denorm3_1 = BatchNorm(name='denorm3_1')(deconv3_1,training=is_train)
        deconv3_2 = Conv2DTranspose(32,(3,3),name='deconv3_2')(denorm3_1)
        denorm3_2 = BatchNorm(name='denorm3_2')(deconv3_2,training=is_train)

    # Decoder Block 2
    with tf.variable_scope('decoder_block2'):
        upsample2 = UpSampling2D((2,2),name='upsample2')(denorm3_2)
        deconv2_1 = Conv2DTranspose(16,(3,3),name='deconv2_1')(upsample2)
        denorm2_1 = BatchNorm(name='denorm2_1')(deconv2_1,training=is_train)
        deconv2_2 = Conv2DTranspose(16,(3,3),name='deconv2_2')(denorm2_1)
        denorm2_2 = BatchNorm(name='denorm2_2')(deconv2_2,training=is_train)

    # Decoder Block 1
    with tf.variable_scope('decoder_block1'):
        upsample1 = UpSampling2D((2,2),name='upsample1')(denorm2_2)
        deconv1_1 = Conv2DTranspose(8,(3,3),name='deconv1_1')(upsample1)
        outputs = Conv2DTranspose(1,(3,3),activation='sigmoid',
                                 name='output')(deconv1_1)


In [None]:
show_graph(graph)

### (4) Loss 구성하기

Sparse Autoencoder와 같이, 우리는 랜덤하게 뽑아진 Code의 분포가 정규분포의 형태를 지켰는지에 대해 Regularization 손실을 걸어주게 됩니다.<br>

In [None]:
epsilon = 1e-8
with graph.as_default():        
    with tf.variable_scope("losses"):
        reconstruction_loss = -tf.reduce_sum(x * tf.log(epsilon+outputs) + 
                                    (1-x) * tf.log(epsilon+1-outputs),1)
        reconstruction_loss = tf.reduce_mean(reconstruction_loss)
        with tf.variable_scope("kl_divergence"):
            latent_loss = 0.5 * tf.reduce_sum(
                tf.square(code_mean) + tf.square(code_sigma) - 
                tf.log(epsilon + tf.square(code_sigma)) - 1,1)
            latent_loss = tf.reduce_mean(latent_loss)
        loss = reconstruction_loss + latent_loss

In [None]:
show_graph(graph)

### (5) 학습을 위한 operation 구성하기

In [None]:
with graph.as_default():
    with tf.variable_scope('metric'):
        mse = tf.losses.mean_squared_error(x, outputs)
        rmse = tf.sqrt(mse, name='rmse')
        
    with tf.variable_scope('train'):
        train_op = (tf.train
                    .AdamOptimizer(lr)
                    .minimize(loss, name='train_op'))

### (6) 모델 학습시키기

In [None]:
from tqdm import tqdm

In [None]:
num_epoch = 100 # epoch 횟수
num_batch = 128 # 배치 크기
num_data = len(images) # data의 수
num_step = num_data // num_batch # 1 epoch 별 학습 횟수

with graph.as_default():
    sess = tf.Session(graph=graph)
    
    sess.run(tf.global_variables_initializer())
    for i in range(num_epoch):        
        for j in tqdm(range(num_step)):
            batch_images = images[j*num_batch:(j+1)*num_batch,:,:,None].copy()
            batch_images = (batch_images/255.)
            sess.run(train_op, feed_dict={x:batch_images,
                                          lr:0.001})
            
            rec, lat, tot, rmse_value = sess.run(
                [reconstruction_loss,latent_loss, loss, rmse], 
                feed_dict={x:batch_images})
            print("{:2d}th epoch 전체 손실 : {:.3f}, 재구성 손실 : {:.3f} 잠재 손실 : {:.3f} RMSE : {:.3f}"
                  .format(i,tot,rec,lat, rmse_value))

In [None]:
        rec, lat, tot, rmse_value = sess.run(
            [reconstruction_loss,latent_loss, loss, rmse], 
            feed_dict={x:train_images[:,:,:,None]})
        print("{:2d}th epoch 전체 손실 : {:.3f}, 재구성 손실 : {:.3f} 잠재 손실 : {:.3f} RMSE : {:.3f}"
              .format(i,tot,rec,lat, rmse_value))