<a href="https://colab.research.google.com/github/julioCROS/VAE-TCC/blob/main/VAE_TCC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Montando Drive
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

In [None]:
# Definindo diretório padrão deste notebook
import sys
base_path = "/content/drive/MyDrive/VAE_TCC"
sys.path.append(base_path)

In [None]:
# Instalando bibliotecas auxiliares
!pip install tensorflow
!pip install soundfile
!pip install auraloss
!pip install librosa

In [None]:
# Importando bibliotecas auxiliares
import time
import soundfile as sf
import tensorflow as tf
from tensorflow.keras.backend import clear_session

In [None]:
# Importando arquivos auxiliares
import utils
import config
from VAE import VAE
from AudioData import AudioData

In [None]:
# Re-carregando arquivos para garantia de alterações
import importlib
importlib.reload(config)
importlib.reload(utils)

<module 'utils' from '/content/drive/MyDrive/VAE_TCC/utils.py'>

In [None]:
start_time = time.time()
current_id = utils.generate_random_id()
print(f"[ID do Experimento] - {current_id}")

[ID do Experimento] - IU2675


In [None]:
# Carregando os dados de áudio e instanciando otimizador
audio_data = AudioData(audio_path=config.audio_path, duration=config.audio_duration, sr=config.audio_rate, n_mels=config.num_mels)
optimizer = tf.keras.optimizers.Adam(learning_rate=config.learning_rate)

In [None]:
# Extraindo dados de Treinamento
data = audio_data.get_mel_spectrograms(num_audio_samples=config.num_audio_samples)
data = data.reshape(data.shape[0], data.shape[1], 1)
print(f"[Dados de Treinamento] - Formato: {data.shape}")

[Extraindo formas de onda]
 - fancy: OK
 - ethereal: OK

[Dados de Treinamento] - Formato: (2, 132000, 1)


In [None]:
# Inicializando o modelo
model = VAE(input_shape=data.shape, latent_dim=config.latent_dim, hidden_dims=config.hidden_dims,
            id=current_id, duration=config.audio_duration, rate=config.audio_rate, kernel_sizes=config.kernel_sizes,
            strides=config.strides, kl_annealing_rate=config.kl_annealing_rate, max_kl_weight=config.max_kl_weight,
            batch_size=config.batch_size, residual_depth=config.residual_depth)


[Incializando VAE]
[INFO] Cropping necessário para equivalência de saídas. (132096 != 132000)


In [None]:
# Visualizando modelo
model.summary()

In [None]:
# Treinando a representação do modelo
clear_session()
signal_losses, kl_losses = model.train(data, config.epochs, optimizer)


[Iniciando treinamento de representação]
[INFO] Dataset não dividido em Batches para o treinamento.

[ Epoca 1 | Loss: 0.0476901 |  Recon. Loss: 0.0476901 | KL Loss: 158.8240051]
[ Epoca 2 | Loss: 1.0834384 |  Recon. Loss: 0.0444257 | KL Loss: 155.8518982]
[ Epoca 3 | Loss: 2.0487051 |  Recon. Loss: 0.0385855 | KL Loss: 150.7589722]
[ Epoca 4 | Loss: 2.8962727 |  Recon. Loss: 0.033476 | KL Loss: 143.1398315]
[ Epoca 5 | Loss: 3.5454385 |  Recon. Loss: 0.0404125 | KL Loss: 131.4384766]
[ Epoca 6 | Loss: 3.8521869 |  Recon. Loss: 0.0370048 | KL Loss: 114.4554596]
[ Epoca 7 | Loss: 3.7126098 |  Recon. Loss: 0.0314037 | KL Loss: 92.0301514]
[ Epoca 8 | Loss: 3.1026711 |  Recon. Loss: 0.0325505 | KL Loss: 65.7882996]
[ Epoca 9 | Loss: 2.1426539 |  Recon. Loss: 0.0332872 | KL Loss: 39.5506248]
[ Epoca 10 | Loss: 1.1488733 |  Recon. Loss: 0.0355904 | KL Loss: 18.5547161]
[ Epoca 11 | Loss: 0.473848 |  Recon. Loss: 0.0292149 | KL Loss: 6.6694965]
[ Epoca 12 | Loss: 0.181277 |  Recon. Loss: 0.

In [None]:
# Treinando o modelo com adversarial fine-tuning
#clear_session()
#discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=config.discr_learning_rate)
#generator_losses, discriminator_losses = model.adversarial_fine_tuning_train(data, config.epochs, optimizer, discriminator_optimizer)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
execution_time = str(round(execution_time, 2))

In [None]:
# Avaliando espaço latente
# reduced_latent = model.compact_latent_representation(data)

In [None]:
# Obtendo medias MU para visualização do espaço latente
mu = model.encode(data)[0]
mu = mu.numpy()

In [None]:
utils.show_results(execution_time)
utils.save_metadata(current_id, execution_time)
utils.save_graphs(current_id, signal_losses, kl_losses, mu, config.compact_latent_space, None)

################################################################################################################
[TREINAMENTOS CONCLUIDOS]
Tempo de execução: 197.14 segundos

################################################################################################################

[Salvando metadados de IU2675]
[Metadados salvos em /content/drive/MyDrive/VAE_TCC/results/results_spectrogram_metadata.txt]

[Graficos salvos em /content/drive/MyDrive/VAE_TCC/graphs/IU2675.png]



In [None]:
# Gerando N espectrogramas a partir do espaço latente
generated = model.sample(config.num_samples_generate, data, config.compact_latent_space)

In [None]:
# Para cada espectrograma gerado, salva o resultado em formato de áudio e em um arquivo txt
for i in range(config.num_samples_generate):
    curr_gen = generated[i]
    file_result = base_path + '/results/generated_audio_' + current_id + '_' + str(i+1) + '.ogg'
    #generated_audio = audio_data.mel_spectrogram_to_audio(curr_gen.numpy().reshape(curr_gen.shape[0], curr_gen.shape[1]))
    print(f"[ Salvando resultado gerado {i + 1} ] - {current_id}")
    sf.write(file_result, curr_gen, config.audio_rate)

[ Salvando resultado gerado 1 ] - IU2675
[ Salvando resultado gerado 2 ] - IU2675
[ Salvando resultado gerado 3 ] - IU2675
[ Salvando resultado gerado 4 ] - IU2675
[ Salvando resultado gerado 5 ] - IU2675
