# 변이형 오토인코더 훈련 - 얼굴 데이터셋

## 라이브러리 임포트

In [14]:
import os
from glob import glob
import numpy as np

from models.VAE import VariationalAutoencoder
from keras.preprocessing.image import ImageDataGenerator

In [44]:
os.path.isdir("run/vae/0001_faces\\viz/model.png")
#RUN_FOLDER

False

In [34]:
os.path.exists(RUN_FOLDER)

True

In [45]:
# run params
section = 'vae'
run_id = '0001'
data_name = 'faces'
RUN_FOLDER = 'run/{}/'.format(section)
RUN_FOLDER += '_'.join([run_id, data_name])

if not os.path.exists(RUN_FOLDER):
    os.mkdir(RUN_FOLDER)
    os.mkdir(os.path.join(RUN_FOLDER, 'viz'))
    os.mkdir(os.path.join(RUN_FOLDER, 'images'))
    os.mkdir(os.path.join(RUN_FOLDER, 'weights'))

"""
(1) 첫 번째로 가진 의문점.
os.path.exists(RUN_FOLDER)를 실행하면 True로 나오기에 if not os.path.exists(RUN_FOLDER)는 돌아가지 않을 것이고,
os.mkdir도 실행 안될 거임. 그럼 viz, images, weights도 안 만들어지는 건데.
[Errno 2] No such file or directory: 'run/vae/0001_faces\\viz/model.png' 와 같은 에러가 뜨면 안되는데.

(2) 의문점을 해결하고자.
if not os.path.exists(RUN_FOLDER):를 if os.path.exists(RUN_FOLDER)로 바꾸었음.
이러나 저러나 RUN_FOLDER 아래 'viz', 'images', 'weights' folder들 만들어라.
그런데 에러가 사라졌음. 

실제로 folder들이 생기고 그 아래 file들도 생겼음. 정말 어이없당. 
"""

mode =  'build' #'load' #

DATA_FOLDER = './data/celeb/' 
# GDL_code 내에 있는 경로임. 
# path = C:/Windows/System32/GDL_code/data/celeb

## 데이터 적재

*CelebA 사이트에서 `img_align_celeba.zip` 파일을 다운로드 받은 후 `data/celeb/` 폴더 안에서 압축을 해제하세요. `data/celeb/img_align_celeba` 폴더에 이미지가 저장되어야 합니다.*

*`list_attr_celeba.csv` 파일은 깃허브에 포함되어 있으므로 다운로드 받을 필요가 없습니다.*

In [46]:
INPUT_DIM = (128,128,3)
BATCH_SIZE = 32

filenames = np.array(glob(os.path.join(DATA_FOLDER, '*/*.jpg')))

NUM_IMAGES = len(filenames)

In [47]:
data_gen = ImageDataGenerator(rescale=1./255)

data_flow = data_gen.flow_from_directory(DATA_FOLDER
                                         , target_size = INPUT_DIM[:2]
                                         , batch_size = BATCH_SIZE
                                         , shuffle = True
                                         , class_mode = 'input'
                                         , subset = "training"
                                            )

Found 202599 images belonging to 1 classes.


## 모델 만들기

In [48]:
vae = VariationalAutoencoder(
                input_dim = INPUT_DIM
                , encoder_conv_filters=[32,64,64, 64]
                , encoder_conv_kernel_size=[3,3,3,3]
                , encoder_conv_strides=[2,2,2,2]
                , decoder_conv_t_filters=[64,64,32,3]
                , decoder_conv_t_kernel_size=[3,3,3,3]
                , decoder_conv_t_strides=[2,2,2,2]
                , z_dim=200
                , use_batch_norm=True
                , use_dropout=True)

if mode == 'build':
    vae.save(RUN_FOLDER)
else:
    vae.load_weights(os.path.join(RUN_FOLDER, 'weights/weights.h5'))

In [49]:
vae.encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
encoder_conv_0 (Conv2D)         (None, 64, 64, 32)   896         encoder_input[0][0]              
__________________________________________________________________________________________________
batch_normalization_36 (BatchNo (None, 64, 64, 32)   128         encoder_conv_0[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_36 (LeakyReLU)      (None, 64, 64, 32)   0           batch_normalization_36[0][0]     
__________________________________________________________________________________________________
dropout_36

In [50]:
vae.decoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
decoder_input (InputLayer)   (None, 200)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 4096)              823296    
_________________________________________________________________
reshape_6 (Reshape)          (None, 8, 8, 64)          0         
_________________________________________________________________
decoder_conv_t_0 (Conv2DTran (None, 16, 16, 64)        36928     
_________________________________________________________________
batch_normalization_40 (Batc (None, 16, 16, 64)        256       
_________________________________________________________________
leaky_re_lu_40 (LeakyReLU)   (None, 16, 16, 64)        0         
_________________________________________________________________
dropout_40 (Dropout)         (None, 16, 16, 64)        0         
__________

## 모델 훈련

*주의: 이 훈련은 시간이 오래 걸립니다. 깃허브에 훈련된 모델이 포함되어 있으므로 아래 셀에서 VAE를 직접 훈련하지 않아도`03_06_vae_faces_analysis.ipynb` 노트북을 실행할 수 있습니다.*

In [51]:
LEARNING_RATE = 0.0005
R_LOSS_FACTOR = 10000
EPOCHS = 200
PRINT_EVERY_N_BATCHES = 100
INITIAL_EPOCH = 0

In [52]:
vae.compile(LEARNING_RATE, R_LOSS_FACTOR)

In [54]:
# 2시간 가량 걸림. 
vae.train_with_generator(     
    data_flow
    , epochs = EPOCHS
    , steps_per_epoch = NUM_IMAGES / BATCH_SIZE
    , run_folder = RUN_FOLDER
    , print_every_n_batches = PRINT_EVERY_N_BATCHES
    , initial_epoch = INITIAL_EPOCH
)

Epoch 1/200
 490/6331 [=>............................] - ETA: 2:46:46 - loss: 377.0558 - vae_r_loss: 296.9861 - vae_kl_loss: 80.0698

KeyboardInterrupt: 