In [1]:
import os
import pickle

import numpy as np
import soundfile as sf

from sound_generator import SoundGenerator
from vae import VAE
# from train import SPECTROGRAMS_PATH
%config Completer.use_jedi = False

HOP_LENGTH = 256
SAVE_DIR_ORIGINAL = "../output/samples/original/"
SAVE_DIR_GENERATED = "../output/samples/generated/"
SPECTROGRAMS_PATH = "C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\spectrograms\\"
MIN_MAX_VALUES_PATH = "C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\min_max_values.pkl"


def load_fsdd(spectrograms_path):
    x_train = []
    file_paths = []
    for root, _, file_names in os.walk(spectrograms_path):
        for file_name in file_names:
            file_path = os.path.join(root, file_name)
            spectrogram = np.load(file_path) # (n_bins, n_frames, 1)
            x_train.append(spectrogram)
            file_paths.append(file_path)
    x_train = np.array(x_train)
    x_train = x_train[..., np.newaxis] # -> (3000, 256, 64, 1)
    return x_train, file_paths


def select_spectrograms(spectrograms,
                        file_paths,
                        min_max_values,
                        num_spectrograms=2):
    sampled_indexes = np.random.choice(range(len(spectrograms)), num_spectrograms)
    sampled_spectrogrmas = spectrograms[sampled_indexes]
    file_paths = [file_paths[index] for index in sampled_indexes]
    sampled_min_max_values = [min_max_values[file_path] for file_path in
                           file_paths]
    print(file_paths)
    print(sampled_min_max_values)
    return sampled_spectrogrmas, sampled_min_max_values


def save_signals(signals, save_dir, sample_rate=22050):
    for i, signal in enumerate(signals):
        save_path = os.path.join(save_dir, str(i) + ".wav")
        sf.write(save_path, signal, sample_rate)

In [2]:
# if __name__ == "__main__":
if (1):
    # initialize sound generator
    vae = VAE.load("../trained_models/vae_model_fsdd")
    sound_generator = SoundGenerator(vae, HOP_LENGTH)
    
    # load spectrograms + min max values
    with open(MIN_MAX_VALUES_PATH, "rb") as f:
        min_max_values = pickle.load(f)
    specs, file_paths = load_fsdd(SPECTROGRAMS_PATH)
    
    # sample spectrograms + min max values
    sampled_specs, sampled_min_max_values = select_spectrograms(specs,
                                                                file_paths,
                                                                min_max_values,
                                                                5)
    
    # generate audio for sampled spectrograms
    signals, _ = sound_generator.generate(sampled_specs, 
                                          sampled_min_max_values)
    
    # convert spectrogram samples to audio
    original_signals = sound_generator.convert_spectrograms_to_audio(sampled_specs,
                                                                     sampled_min_max_values)
    
    # save audio samples
    save_signals(signals, SAVE_DIR_GENERATED)
    save_signals(original_signals, SAVE_DIR_ORIGINAL)

['C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\spectrograms\\3_lucas_27.wav.npy', 'C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\spectrograms\\8_yweweler_42.wav.npy', 'C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\spectrograms\\1_nicolas_22.wav.npy', 'C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\spectrograms\\0_theo_49.wav.npy', 'C:\\Users\\pbeata\\Desktop\\Data_Science\\Audio\\sound-generation\\datasets\\fsdd\\spectrograms\\5_theo_3.wav.npy']
[{'min': -56.098103, 'max': 23.901897}, {'min': -71.430916, 'max': 8.569086}, {'min': -63.202766, 'max': 16.797235}, {'min': -77.15126, 'max': 2.848741}, {'min': -80.739426, 'max': -0.73942804}]


