In [1]:
# Montando Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Definindo diretório padrão deste notebook
import sys
base_path = "/content/drive/MyDrive/VAE_TCC"
sys.path.append(base_path)

In [None]:
# Instalando bibliotecas auxiliares
#!pip install tensorflow
#!pip install soundfile
#!pip install auraloss
#!pip install librosa

In [None]:
# Importando bibliotecas auxiliares
import time
import numpy as np
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.backend import clear_session

In [None]:
# Importando arquivos auxiliares
import utils
import configs
from model import VAE_GAN
from audio_info import AudioInfo

In [None]:
# Re-carregando arquivos para garantia de alterações
import importlib
importlib.reload(utils)
importlib.reload(configs)

<module 'configs' from '/content/drive/MyDrive/VAE_TCC/configs.py'>

In [None]:
start_time = time.time()
current_id = utils.generate_random_id()
print(f"[ID do Experimento] - {current_id}")

[ID do Experimento] - ZF8046


In [None]:
# Carregando os dados de áudio e instanciando otimizador
audio_info = AudioInfo(audio_path=configs.AUDIO_PATH, duration=configs.AUDIO_DURATION, sr=configs.AUDIO_RATE)

In [None]:
# Extraindo e preparando dados de Treinamento
data = audio_info.get_audio_data(num_audio_samples=configs.NUM_AUDIO_SAMPLES)
multiband_dataset = []
for i in range(data.shape[0]):
    subbands = audio_info.multiband_decomposition(data[i], num_bands=configs.NUM_BANDS)
    multiband_dataset.append(subbands)

data = np.array(multiband_dataset)
print(f"[Dados de Treinamento] - Formato: {data.shape}")

[Extraindo formas de onda]
 - pop.00058: OK
 - pop.00055: OK
 - pop.00049: OK
 - pop.00059: OK
 - pop.00061: OK
 - pop.00033: OK
 - pop.00032: OK
 - pop.00019: OK
 - pop.00011: OK
 - pop.00022: OK
 - pop.00004: OK
 - pop.00082: OK
 - pop.00026: OK
 - pop.00053: OK
 - pop.00009: OK
 - pop.00016: OK
 - pop.00025: OK
 - pop.00000: OK
 - pop.00052: OK
 - pop.00035: OK
 - pop.00068: OK
 - pop.00038: OK
 - pop.00095: OK
 - pop.00067: OK
 - pop.00012: OK
 - pop.00065: OK
 - pop.00084: OK
 - pop.00087: OK
 - pop.00037: OK
 - pop.00018: OK
 - pop.00093: OK
 - pop.00078: OK
 - pop.00064: OK
 - pop.00096: OK
 - pop.00051: OK
 - pop.00034: OK
 - pop.00003: OK
 - pop.00080: OK
 - pop.00005: OK
 - pop.00054: OK
 - pop.00085: OK
 - pop.00073: OK
 - pop.00081: OK
 - pop.00028: OK
 - pop.00047: OK
 - pop.00062: OK
 - pop.00060: OK
 - pop.00066: OK
 - pop.00077: OK
 - pop.00090: OK
 - pop.00048: OK
 - pop.00092: OK
 - pop.00006: OK
 - pop.00002: OK
 - pop.00071: OK
 - pop.00014: OK
 - pop.00070: OK
 - p

In [None]:
# Inicializando o modelo
optimizer = tf.keras.optimizers.Adam(learning_rate=configs.LEARNING_RATE, beta_1=configs.BETA_1,
                                     beta_2=configs.BETA_2, epsilon=configs.EPSILON)
model = VAE_GAN(input_shape=data.shape, latent_dim=configs.LATENT_DIM, hidden_dims=configs.VAE_HIDDEN_DIMS, id=current_id,
            duration=configs.AUDIO_DURATION, rate=configs.AUDIO_RATE, kernel_sizes=configs.VAE_KERNELS, strides=configs.VAE_STRIDES,
             loud_stride=configs.LOUD_STRIDE, kl_beta=configs.KL_BETA, batch_size=configs.BATCH_SIZE, num_bands=configs.NUM_BANDS,
            residual_depth=configs.RESIDUAL_DEPTH, use_noise=configs.USE_NOISE)


[Incializando VAE-GAN]
	[CONSTRUINDO ENCODER]




		 - Forma de saída MU:  (None, 1, 128)
		 - Forma de saída LOG_VAR:  (None, 1, 128)

	[CONSTRUINDO DECODER]
	[INFO] Padding necessário para equivalência de saídas. (27392 != 27500)
		 - Forma de saída DECODER:  (None, 16, 27500)
[VAE-GAN Inicializado]


In [None]:
# Visualizando modelo
model.summary()

In [None]:
model.encoder.summary()

In [None]:
model.decoder.summary()

In [None]:
# Treinando a representação do modelo
clear_session()
signal_losses, kl_losses = model.train(data, configs.EPOCHS, optimizer)

[Iniciando Treinamento de Representação]
[INFO] Dividindo dataset em batches para o treinamento.
[INFO] Dataset dividido em batches de tamanho 8 para o treinamento.

# [ Época 1 | Loss: 673.0504684 |  Recon. Loss: 672.8544998 | KL Loss: 1.9596469]
# [ Época 16 | Loss: 586.882225 |  Recon. Loss: 586.7473946 | KL Loss: 1.3483536]
# [ Época 31 | Loss: 579.3168526 |  Recon. Loss: 579.1071472 | KL Loss: 2.0970709]
# [ Época 46 | Loss: 575.0065308 |  Recon. Loss: 574.8296967 | KL Loss: 1.7683347]
# [ Época 61 | Loss: 570.9011192 |  Recon. Loss: 570.6909485 | KL Loss: 2.1017462]
# [ Época 76 | Loss: 566.4673424 |  Recon. Loss: 566.2346954 | KL Loss: 2.3265136]
# [ Época 91 | Loss: 563.8622932 |  Recon. Loss: 563.6293907 | KL Loss: 2.3290488]
# [ Época 106 | Loss: 562.3856201 |  Recon. Loss: 562.1644096 | KL Loss: 2.212087]
# [ Época 121 | Loss: 560.8890915 |  Recon. Loss: 560.6207886 | KL Loss: 2.6830403]
# [ Época 136 | Loss: 559.4028702 |  Recon. Loss: 559.1079712 | KL Loss: 2.9488833]
# [ 

In [None]:
# Treinando o modelo com adversarial fine-tuning
clear_session()
gen_optimizer = tf.keras.optimizers.Adam(learning_rate=configs.LEARNING_RATE, beta_1=configs.BETA_1,
                                     beta_2=configs.BETA_2, epsilon=configs.EPSILON)
discr_optimizer = tf.keras.optimizers.Adam(learning_rate=configs.LEARNING_RATE, beta_1=configs.BETA_1,
                                     beta_2=configs.BETA_2, epsilon=configs.EPSILON)
generator_losses, discriminator_losses = model.train_gan(data, configs.DISCR_EPOCHS, gen_optimizer, discr_optimizer,
                                                         configs.DISCR_HIDDEN_DIMS, configs.DISCR_KERNELS, configs.DISCR_STRIDES)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
execution_time = str(round(execution_time, 2))

In [None]:
# Avaliando espaço latente
# reduced_latent = model.compact_latent_representation(data)

In [None]:
# Obtendo medias MU para visualização do espaço latente
mu = model.encode(data)[0]
mu = mu.numpy()

In [None]:
utils.show_results(execution_time)
#utils.save_metadata(current_id, execution_time)
utils.save_graphs(current_id, signal_losses, kl_losses, generator_losses, discriminator_losses, mu, configs.COMPACT_LATENT_SPACE)

In [None]:
# Gerando N formas de ondas a partir do espaço latente
generated = model.sample(configs.NUM_SAMPLES_GENERATE, data, configs.COMPACT_LATENT_SPACE)

In [None]:
# Para cada espectrograma gerado, salva o resultado em formato de áudio e em um arquivo txt
for i in range(configs.NUM_SAMPLES_GENERATE):
    curr_gen = generated[i]
    curr_gen = audio_info.multiband_synthesis(curr_gen, configs.NUM_BANDS)
    file_result = base_path + '/results/generated_audio_' + current_id + '_' + str(i+1) + '.ogg'
    sf.write(file_result, curr_gen, configs.AUDIO_RATE)
    print(f"[ Salvando resultado gerado {i + 1} ] - {current_id}")