# feature 데이터를 가져와서 vae 모델 학습시키고 mean, std, model weight 저장

In [0]:
%matplotlib inline

In [0]:
import os
import keras
import pickle

import numpy as np
import matplotlib.pyplot as plt

from keras.layers import Dense, Input
from keras.layers import Conv2D, Flatten, Lambda
from keras.layers import Reshape, Conv2DTranspose
from keras.models import Model
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras.layers.advanced_activations import LeakyReLU
from keras import backend as K

In [0]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [0]:
# input_shape = (129, 48, 1)
# input_shape = (40, 48, 1)
input_shape = (128, 48, 1)
# intermediate_dim = 512
intermediate_dim = 128
latent_dim = 40
latent_dim = 20
# batch_size = 16
batch_size = 3
kernel_size = 6
kernel_size = 3
stride_size = 3
stride_size = 1
filters = 16
filters = 4
epochs = 100

In [0]:
# 모델의 아키텍쳐 정의하는 부분 중요
# 일단 컴파일은 되는데 나중에 graphviz 에러 수정할 필요있음 이 모델을 쓸거면은
def sampling(args):
    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
    # Arguments
        args (tensor): mean and log of variance of Q(z|X)
    # Returns
        z (tensor): sampled latent vector
    """

    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for i in range(2):
    filters *= 2
    x = Conv2D(filters=filters,
               kernel_size=kernel_size,
               activation='tanh',
               strides=stride_size,
               padding='valid')(x)

# shape info needed to build decoder model
shape = K.int_shape(x)

# generate latent vector Q(z|X)
x = Flatten()(x)
x = Dense(intermediate_dim, activation='tanh')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()
# plot_model(encoder, to_file='../data/vae_cnn_encoder.png', show_shapes=True)

# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(shape[1] * shape[2] * shape[3], activation='tanh')(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

for i in range(2):
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        activation='tanh',
                        strides=stride_size,
                        padding='valid')(x)
    filters //= 2

outputs = Conv2DTranspose(filters=1,
                          kernel_size=kernel_size,
                          activation='sigmoid',
                          padding='same',
                          name='decoder_output')(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
# plot_model(decoder, to_file='../data/vae_cnn_decoder.png', show_shapes=True)

# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))

reconstruction_loss *= input_shape[0] * input_shape[1]
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -5e-4
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop')
# 이부분 어차피 모델 시각화 파일 저장하는거라 필요할때 에러 처리하도록
# plot_model(vae, to_file='../data/vae_cnn.png', show_shapes=True)
vae.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 128, 48, 1)   0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 126, 46, 8)   80          encoder_input[0][0]              
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 124, 44, 16)  1168        conv2d_1[0][0]                   
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 87296)        0           conv2d_2[0][0]                   
___________

  'be expecting any data to be passed to {0}.'.format(name))


# Finding approximate mean and std of data: mean이랑 std 찾기

In [0]:
%%time

# import numpy as np
# numpy pickle load error 해결
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

x_train = []

# 트레인 피쳐 가져와서
train_features_path = '../son/feature_train/'
n_files = len(os.listdir(train_features_path))
n_train = 0
for filename in sorted(os.listdir(train_features_path)):
    full_filename = os.path.join(train_features_path, filename)
    print(full_filename)
    data = np.load(full_filename)
    n_train += data.shape[0]
    # 데이터 23개
    x_train += [data[np.random.randint(data.shape[0], size=1)]]

# feature 데이터 다 가져와서 x_train에 다 합치는 거 같은데 어떻게 하는지는 자세히 모르겠네...
# 왜 랜덤으로 하는거지
print(len(x_train))
x_train = np.vstack(x_train)
x_mean = np.mean(x_train)
x_std = np.std(x_train)

print(x_train.shape, n_train)
print('mean', x_mean, 'std', x_std)

../son/feature_train/163-122947-0000
../son/feature_train/163-122947-0001
../son/feature_train/163-122947-0002
../son/feature_train/163-122947-0003
../son/feature_train/163-122947-0004
../son/feature_train/163-122947-0005
../son/feature_train/163-122947-0006
../son/feature_train/163-122947-0007
../son/feature_train/163-122947-0008
../son/feature_train/163-122947-0009
../son/feature_train/163-122947-0010
../son/feature_train/163-122947-0011
../son/feature_train/163-122947-0012
../son/feature_train/163-122947-0013
../son/feature_train/163-122947-0014
../son/feature_train/163-122947-0015
../son/feature_train/163-122947-0016
../son/feature_train/163-122947-0017
../son/feature_train/163-122947-0018
../son/feature_train/163-122947-0019
../son/feature_train/163-122947-0020
../son/feature_train/163-122947-0021
../son/feature_train/163-122947-0022
../son/feature_train/19-198-0000
../son/feature_train/19-198-0001
../son/feature_train/19-198-0002
../son/feature_train/19-198-0003
../son/feature_tr

In [0]:
# 위에거를 테스트로 해보면 이렇게 됨 이거는 개수만 세는거라 큰 의미가 없음!
# test_features_path = '/home/ds/DataScience/Datasets/LibriSpeech/VAELibriSpeech/test-clean-wav/'
test_features_path = '../son/feature_test/'
n_files = len(os.listdir(test_features_path))
n_test = 0
for filename in sorted(os.listdir(test_features_path)):
    full_filename = os.path.join(test_features_path, filename)
    print(full_filename)
    data = np.load(full_filename)
    n_test += data.shape[0]
    
# (n_train, n_test) (1047736, 30548)
(n_train, n_test) # (1456, 413)

../son/feature_test/118-47824-0079
../son/feature_test/118-47824-0080
../son/feature_test/118-47824-0081
../son/feature_test/118-47824-0082
../son/feature_test/118-47824-0083
../son/feature_test/118-47824-0084
../son/feature_test/118-47824-0085
../son/feature_test/118-47824-0086
../son/feature_test/19-198-0031
../son/feature_test/19-198-0032
../son/feature_test/19-198-0033
../son/feature_test/19-198-0034
../son/feature_test/19-198-0035
../son/feature_test/19-198-0036
../son/feature_test/19-198-0037
../son/feature_test/32-21631-0014
../son/feature_test/32-21631-0015
../son/feature_test/32-21631-0016
../son/feature_test/32-21631-0017
../son/feature_test/32-21631-0018
../son/feature_test/32-21631-0019
../son/feature_test/NB10627990
../son/feature_test/NB10629447
../son/feature_test/NB10631198
../son/feature_test/NB10633045
../son/feature_test/NB10637509
../son/feature_test/NB10639096
../son/feature_test/NB10640807


(885, 254)

In [0]:
# 여기서 저장 하는거!
pickle.dump(x_mean, open('../data/x_mean.pkl', 'wb'))
pickle.dump(x_std, open('../data/x_std.pkl', 'wb'))

In [0]:
# 여기서 불러오는거 이거를 결국 vae 모델 weight 만들 때 씀
x_mean = pickle.load(open('../data/x_mean.pkl', 'rb'))
x_std = pickle.load(open('../data/x_std.pkl', 'rb'))

In [0]:
%%time
x_train = []

n_files = len(os.listdir(train_features_path))
for epoch in range(epochs):
    print(epoch)
    for filename in sorted(os.listdir(train_features_path)):
#         print(filename)
        full_filename = os.path.join(train_features_path, filename)
        data = np.load(full_filename)
        # train 데이터 정규화
        x_train = (data - x_mean)/x_std
        # 그냥 데이터 1차원 추가 input형식 맞추주기 위해
        x_train = x_train.reshape(x_train.shape + (1,))

        # batch 사이즈로 잘라주기 
        n_batches = int(data.shape[0] / batch_size)

        # np array를 배치 = 3 로 자르기 
        for batch in np.array_split(x_train, [ind*batch_size for ind in range(1, n_batches+1)]):
            # 배치 모양 정확하지 않으면 실행하지 않음
            if batch.shape != (batch_size, data.shape[1], data.shape[2], 1):
                continue
            # train on batch라는 함수도 있음
            batch_loss = vae.train_on_batch(batch, y=None)
#         print('filename:', filename, 'loss:', batch_loss)
    # 여기서 웨이트 저장
    vae.save_weights('../son/model/{}.h5'.format(epoch))

0

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
CPU times: user 5h 40min 43s, sys: 45min 19s, total: 6h 26min 2s
Wall time: 1h 2min 9s


# ==== 밑에는 배치를 안하고 돌리는거 ! 안해도 됨!

In [0]:
%%time
# 여기는 확인하는 코드인듯 위에 부분이랑 배치해주는 부분만 다름
x_train = []
train_features_path = '/home/ds/DataScience/Datasets/LibriSpeech/VAELibriSpeech/train-clean-wav/'
n_files = len(os.listdir(train_features_path))
for epoch in range(epochs):
    print(epoch)
    for filename in sorted(os.listdir(train_features_path)):
        print(filename)
        full_filename = os.path.join(train_features_path, filename)
        data = np.load(full_filename)
        data = data[np.random.randint(data.shape[0], size=batch_size), :, :]
        x_train = (data - x_mean)/x_std
        x_train = x_train.reshape(x_train.shape + (1,))
        print(vae.train_on_batch(x_train, y=None))
    vae.save_weights('/home/ds/DataScience/Models/audio_vae/10/vae_cnn_audio_{}.h5'.format(epoch))