In [None]:
#Preprocessing Pipeline
#Load the file
#pad the signal if necessay
#Extracting log spectogram using Librosa
#Normalize Spectogram
#Save the Normalized Spectogram


In [None]:
!pip install librosa
import librosa




In [None]:
import numpy as np
import os
import pickle


In [None]:
"""
1- load a file
2- pad the signal (if necessary)
3- extracting log spectrogram from signal
4- normalise spectrogram
5- save the normalised spectrogram

PreprocessingPipeline
"""
import os
import pickle

import librosa
import numpy as np


class Loader:
    """Loader is responsible for loading an audio file."""

    #We are passing a constructor in the loader Class that will construct the class
    #It has following arguments
    # Sample rate -> we load audio file with this
    # Duration -> How long audio will play (sec)
    # Mono -> it is boolean and tells us if audio is mono or stereo
    def __init__(self, sample_rate, duration, mono):
        self.sample_rate = sample_rate
        self.duration = duration
        self.mono = mono

    def load(self, file_path):
        #This is like a thin wrapper around librosa
        #Now loading the files

        signal = librosa.load(file_path,
                              sr=self.sample_rate,
                              duration=self.duration,
                              mono=self.mono)[0]
        return signal

#Pad the signal if necessay
class Padder:
    """Padder is responsible to apply padding to an array."""

    #Padder is the Function to padding the data in array
    #We will make a constructor

    def __init__(self, mode="constant"):
        self.mode = mode

    #This function is for left Padding
    def left_pad(self, array, num_missing_items):
        #[1,2,3] -> 2 -> [0,0,1,2,3]
        padded_array = np.pad(array,
                              (num_missing_items, 0),
                              mode=self.mode)
        return padded_array


    #This function is for right Padding
    def right_pad(self, array, num_missing_items):
        #[1,2,3] -> 2 -> [1,2,3,0,0]
        padded_array = np.pad(array,
                              (0, num_missing_items),
                              mode=self.mode)
        return padded_array

#Extracting log spectogram using Librosa
class LogSpectrogramExtractor:
    """LogSpectrogramExtractor extracts log spectrograms (in dB) from a
    time-series signal.
    """

    #LogSpectogramExtractor creates a log spectogram in dB from a time series Signal
    def __init__(self, frame_size, hop_length):
        self.frame_size = frame_size
        self.hop_length = hop_length


    def extract(self, signal):
        stft = librosa.stft(signal,
                            n_fft=self.frame_size,
                            hop_length=self.hop_length)[:-1]
        spectrogram = librosa.amplitude_to_db(stft)
        #(1+frame_size/2,num_frame)  1024 -> 513 -> 512

        spectrogram = np.abs(stft)
        log_spectrogram = librosa.amplitude_to_db(spectrogram)
        return log_spectrogram


class MinMaxNormaliser:
    """MinMaxNormaliser applies min max normalisation to an array."""

    #MinMax Normalization applies min max normalization to array
    #We take a minimum value that is mapped close to zero
    #We take a maximum value that is mapped close to one

    def __init__(self, min_val, max_val):
        self.min = min_val
        self.max = max_val

    def normalise(self, array):
        #array will be squished between 0 and 1
        #1st step to Normalize array -> (array-array.min())/(array.max()-array.min())
        norm_array = (array - array.min()) / (array.max() - array.min())
        #2nd step to Normalize array -> Normalize array + (self.max - self.min)
        norm_array = norm_array * (self.max - self.min) + self.min
        return norm_array

    def denormalise(self, norm_array, original_min, original_max):
        #array will be squished between 0 and 1
        #1st step to Normalize array -> (norm_array-self.min)/(self.max()-self.min())
        array = (norm_array - self.min) / (self.max - self.min)
        #2nd step to Normalize array -> array * (original_max - original_min) + original_min
        array = array * (original_max - original_min) + original_min
        return array

#Save the Normalized Spectogram
class Saver:
    """saver is responsible to save features, and the min max values."""

    #We need feature save directory and min max value directory as the constructors that make the saver
    def __init__(self, feature_save_dir, min_max_values_save_dir):
        self.feature_save_dir = feature_save_dir
        self.min_max_values_save_dir = min_max_values_save_dir

    def save_feature(self, feature, file_path):
        #We generated save path and generated the path
        save_path = self._generate_save_path(file_path)
        #save the path of the feature
        np.save(save_path, feature)
        return save_path

    def save_min_max_values(self, min_max_values):
        #save the min max directory to a file named min max pkl
        save_path = os.path.join(self.min_max_values_save_dir,
                                 "min_max_values.pkl")
        self._save(min_max_values, save_path)

    @staticmethod
    def _save(data, save_path):
        with open(save_path, "wb") as f:
            pickle.dump(data, f)

    def _generate_save_path(self, file_path):
        file_name = os.path.split(file_path)[1]
        save_path = os.path.join(self.feature_save_dir, file_name + ".npy")
        return save_path


class PreprocessingPipeline:
    """PreprocessingPipeline processes audio files in a directory, applying
    the following steps to each file:
        1- load a file
        2- pad the signal (if necessary)
        3- extracting log spectrogram from signal
        4- normalise spectrogram
        5- save the normalised spectrogram

    Storing the min max values for all the log spectrograms.
    """

    def __init__(self):
        self.padder = None
        self.extractor = None
        self.normaliser = None
        self.saver = None
        self.min_max_values = {}
        self._loader = None
        self._num_expected_samples = None

    @property
    def loader(self):
        return self._loader

    @loader.setter
    def loader(self, loader):
        self._loader = loader
        self._num_expected_samples = int(loader.sample_rate * loader.duration)

    def process(self, audio_files_dir):


        for root, _, files in os.walk(audio_files_dir):
            for file in files:
                file_path = os.path.join(root, file)
                self._process_file(file_path)
                print(f"Processed file {file_path}")
        self.saver.save_min_max_values(self.min_max_values)

    def _process_file(self, file_path):

        #it is the bulk of the class
        #We will load the signal
        signal = self.loader.load(file_path)
        #Now we are deciding wether we want to apply padding or not
        #Then we are moving to is padding necessary function
        if self._is_padding_necessary(signal):
            #if the signal need padding then we will apply that to the signal
            signal = self._apply_padding(signal)
        #Now we are extracting the feature
        feature = self.extractor.extract(signal)
        #Then we will save the normalized feature
        norm_feature = self.normaliser.normalise(feature)
        #Then we will save the path
        save_path = self.saver.save_feature(norm_feature, file_path)
        #We also want
        self._store_min_max_value(save_path, feature.min(), feature.max())

    def _is_padding_necessary(self, signal):
        #How are we gonna check if the signal needs padding or not
        #Duration is fixed and we know number of expected samples
        #So if the length of the signal is less than expected samples then we provide results as boolean value
        if len(signal) < self._num_expected_samples:
            return True
        return False

    def _apply_padding(self, signal):
        #Here we will give the signal
        #Number of missing samples = Number of expected samples - length of signal
        num_missing_samples = self._num_expected_samples - len(signal)
        padded_signal = self.padder.right_pad(signal, num_missing_samples)
        return padded_signal

    def _store_min_max_value(self, save_path, min_val, max_val):
        #This will store the dictionary of minimum and maximum value
        self.min_max_values[save_path] = {
            "min": min_val,
            "max": max_val
        }





In [None]:
if __name__ == "__main__":
    FRAME_SIZE = 512
    HOP_LENGTH = 256
    DURATION = 0.74  # in seconds
    SAMPLE_RATE = 22050
    MONO = True

    SPECTROGRAMS_SAVE_DIR = "/content/drive/MyDrive/Spectogram/"
    MIN_MAX_VALUES_SAVE_DIR = "/content/drive/MyDrive/Min_max_value/"
    FILES_DIR = "/content/drive/MyDrive/recordings/"

    # instantiate all objects
    loader = Loader(SAMPLE_RATE, DURATION, MONO)
    padder = Padder()
    log_spectrogram_extractor = LogSpectrogramExtractor(FRAME_SIZE, HOP_LENGTH)
    min_max_normaliser = MinMaxNormaliser(0, 1)
    saver = Saver(SPECTROGRAMS_SAVE_DIR, MIN_MAX_VALUES_SAVE_DIR)

    preprocessing_pipeline = PreprocessingPipeline()
    preprocessing_pipeline.loader = loader
    preprocessing_pipeline.padder = padder
    preprocessing_pipeline.extractor = log_spectrogram_extractor
    preprocessing_pipeline.normaliser = min_max_normaliser
    preprocessing_pipeline.saver = saver

    preprocessing_pipeline.process(FILES_DIR)

  spectrogram = librosa.amplitude_to_db(stft)


Processed file /content/drive/MyDrive/recordings/5_george_24.wav
Processed file /content/drive/MyDrive/recordings/8_nicolas_5.wav
Processed file /content/drive/MyDrive/recordings/6_theo_17.wav
Processed file /content/drive/MyDrive/recordings/4_jackson_39.wav
Processed file /content/drive/MyDrive/recordings/3_lucas_44.wav
Processed file /content/drive/MyDrive/recordings/8_nicolas_41.wav
Processed file /content/drive/MyDrive/recordings/6_george_46.wav
Processed file /content/drive/MyDrive/recordings/7_theo_39.wav
Processed file /content/drive/MyDrive/recordings/4_lucas_37.wav
Processed file /content/drive/MyDrive/recordings/2_theo_24.wav
Processed file /content/drive/MyDrive/recordings/3_jackson_49.wav
Processed file /content/drive/MyDrive/recordings/3_yweweler_38.wav
Processed file /content/drive/MyDrive/recordings/3_jackson_1.wav
Processed file /content/drive/MyDrive/recordings/7_theo_38.wav
Processed file /content/drive/MyDrive/recordings/5_george_26.wav
Processed file /content/drive/

In [None]:
import pickle
import os
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
    Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
from tensorflow.keras.datasets import mnist

disable_eager_execution()


def _calculate_reconstruction_loss(y_target, y_predicted):
    error = y_target - y_predicted
    reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
    return reconstruction_loss


def calculate_kl_loss(model):
    # wrap `_calculate_kl_loss` such that it takes the model as an argument,
    # returns a function which can take arbitrary number of arguments
    # (for compatibility with `metrics` and utility in the loss function)
    # and returns the kl loss
    def _calculate_kl_loss(*args):
        kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
                               K.exp(model.log_variance), axis=1)
        return kl_loss
    return _calculate_kl_loss


class VAE:
    """
    VAE represents a Deep Convolutional variational autoencoder architecture
    with mirrored encoder and decoder components.
    """

    def __init__(self,
                 input_shape,
                 conv_filters,
                 conv_kernels,
                 conv_strides,
                 latent_space_dim):
        self.input_shape = input_shape # [28, 28, 1]
        self.conv_filters = conv_filters # [2, 4, 8]
        self.conv_kernels = conv_kernels # [3, 5, 3]
        self.conv_strides = conv_strides # [1, 2, 2]
        self.latent_space_dim = latent_space_dim # 2
        self.reconstruction_loss_weight = 1000

        self.encoder = None
        self.decoder = None
        self.model = None

        self._num_conv_layers = len(conv_filters)
        self._shape_before_bottleneck = None
        self._model_input = None

        self._build()

    def summary(self):
        self.encoder.summary()
        self.decoder.summary()
        self.model.summary()

    def compile(self, learning_rate=0.001):
        optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)
        self.model.compile(optimizer=optimizer,
                           loss=self._calculate_combined_loss,
                           metrics=[_calculate_reconstruction_loss,
                                    calculate_kl_loss(self)])

    def train(self, x_train, batch_size, num_epochs):
        self.model.fit(x_train,
                       x_train,
                       batch_size=batch_size,
                       epochs=num_epochs,
                       shuffle=True)

    def save(self, save_folder="."):
        self._create_folder_if_it_doesnt_exist(save_folder)
        self._save_parameters(save_folder)
        self._save_weights(save_folder)

    def load_weights(self, weights_path):
        self.model.load_weights(weights_path)

    def reconstruct(self, images):
        latent_representations = self.encoder.predict(images)
        reconstructed_images = self.decoder.predict(latent_representations)
        return reconstructed_images, latent_representations

    @classmethod
    def load(cls, save_folder="."):
        parameters_path = os.path.join(save_folder, "parameters.pkl")
        with open(parameters_path, "rb") as f:
            parameters = pickle.load(f)
        autoencoder = VAE(*parameters)
        weights_path = os.path.join(save_folder, "weights.h5")
        autoencoder.load_weights(weights_path)
        return autoencoder

    def _calculate_combined_loss(self, y_target, y_predicted):
        reconstruction_loss = _calculate_reconstruction_loss(y_target, y_predicted)
        kl_loss = calculate_kl_loss(self)()
        combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
                                                         + kl_loss
        return combined_loss

    def _create_folder_if_it_doesnt_exist(self, folder):
        if not os.path.exists(folder):
            os.makedirs(folder)

    def _save_parameters(self, save_folder):
        parameters = [
            self.input_shape,
            self.conv_filters,
            self.conv_kernels,
            self.conv_strides,
            self.latent_space_dim
        ]
        save_path = os.path.join(save_folder, "parameters.pkl")
        with open(save_path, "wb") as f:
            pickle.dump(parameters, f)

    def _save_weights(self, save_folder):
        save_path = os.path.join(save_folder, "weights.h5")
        self.model.save_weights(save_path)

    def _build(self):
        self._build_encoder()
        self._build_decoder()
        self._build_autoencoder()

    def _build_autoencoder(self):
        model_input = self._model_input
        model_output = self.decoder(self.encoder(model_input))
        self.model = Model(model_input, model_output, name="autoencoder")

    def _build_decoder(self):
        decoder_input = self._add_decoder_input()
        dense_layer = self._add_dense_layer(decoder_input)
        reshape_layer = self._add_reshape_layer(dense_layer)
        conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
        decoder_output = self._add_decoder_output(conv_transpose_layers)
        self.decoder = Model(decoder_input, decoder_output, name="decoder")

    def _add_decoder_input(self):
        return Input(shape=self.latent_space_dim, name="decoder_input")

    def _add_dense_layer(self, decoder_input):
        num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
        dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
        return dense_layer

    def _add_reshape_layer(self, dense_layer):
        return Reshape(self._shape_before_bottleneck)(dense_layer)

    def _add_conv_transpose_layers(self, x):
        """Add conv transpose blocks."""
        # loop through all the conv layers in reverse order and stop at the
        # first layer
        for layer_index in reversed(range(1, self._num_conv_layers)):
            x = self._add_conv_transpose_layer(layer_index, x)
        return x

    def _add_conv_transpose_layer(self, layer_index, x):
        layer_num = self._num_conv_layers - layer_index
        conv_transpose_layer = Conv2DTranspose(
            filters=self.conv_filters[layer_index],
            kernel_size=self.conv_kernels[layer_index],
            strides=self.conv_strides[layer_index],
            padding="same",
            name=f"decoder_conv_transpose_layer_{layer_num}"
        )
        x = conv_transpose_layer(x)
        x = ReLU(name=f"decoder_relu_{layer_num}")(x)
        x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
        return x

    def _add_decoder_output(self, x):
        conv_transpose_layer = Conv2DTranspose(
            filters=1,
            kernel_size=self.conv_kernels[0],
            strides=self.conv_strides[0],
            padding="same",
            name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
        )
        x = conv_transpose_layer(x)
        output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
        return output_layer

    def _build_encoder(self):
        encoder_input = self._add_encoder_input()
        conv_layers = self._add_conv_layers(encoder_input)
        bottleneck = self._add_bottleneck(conv_layers)
        self._model_input = encoder_input
        self.encoder = Model(encoder_input, bottleneck, name="encoder")

    def _add_encoder_input(self):
        return Input(shape=self.input_shape, name="encoder_input")

    def _add_conv_layers(self, encoder_input):
        """Create all convolutional blocks in encoder."""
        x = encoder_input
        for layer_index in range(self._num_conv_layers):
            x = self._add_conv_layer(layer_index, x)
        return x

    def _add_conv_layer(self, layer_index, x):
        """Add a convolutional block to a graph of layers, consisting of
        conv 2d + ReLU + batch normalization.
        """
        layer_number = layer_index + 1
        conv_layer = Conv2D(
            filters=self.conv_filters[layer_index],
            kernel_size=self.conv_kernels[layer_index],
            strides=self.conv_strides[layer_index],
            padding="same",
            name=f"encoder_conv_layer_{layer_number}"
        )
        x = conv_layer(x)
        x = ReLU(name=f"encoder_relu_{layer_number}")(x)
        x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
        return x

    def _add_bottleneck(self, x):
        """Flatten data and add bottleneck with Guassian sampling (Dense
        layer).
        """
        self._shape_before_bottleneck = K.int_shape(x)[1:]
        x = Flatten()(x)
        self.mu = Dense(self.latent_space_dim, name="mu")(x)
        self.log_variance = Dense(self.latent_space_dim,
                                  name="log_variance")(x)

        def sample_point_from_normal_distribution(args):
            mu, log_variance = args
            epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
                                      stddev=1.)
            sampled_point = mu + K.exp(log_variance / 2) * epsilon
            return sampled_point

        x = Lambda(sample_point_from_normal_distribution,
                   name="encoder_output")([self.mu, self.log_variance])
        return x






In [None]:
import os

import numpy as np


LEARNING_RATE = 0.0005
BATCH_SIZE = 64
EPOCHS = 150

SPECTROGRAMS_PATH = "/content/drive/MyDrive/Spectogram/"


def load_fsdd(spectrograms_path):
    #This is an empty list and we are gonna fill it with spectrogram
    x_train = []
    #We are going through all the spectogram
    for root, _, file_names in os.walk(spectrograms_path):
        for file_name in file_names:
            file_path = os.path.join(root, file_name)
            spectrogram = np.load(file_path) # (Comes out of spectrograms -> n_bins, n_frames , adding one extra dimension -> 1)
            x_train.append(spectrogram)
    #x_train is all the spectrogram
    x_train = np.array(x_train)
    #We are treating the spectograms as grey scale images
    x_train = x_train[..., np.newaxis] # -> (Number of samples -> 3000 , Number of bins -> 256 , Number of frames  64 , One extra dimension -> 1)
    return x_train


def train(x_train, learning_rate, batch_size, epochs):
    autoencoder = VAE(
        input_shape=(256, 64, 1),
        conv_filters=(512, 256, 128, 64, 32),
        conv_kernels=(3, 3, 3, 3, 3),
        conv_strides=(2, 2, 2, 2, (2, 1)),
        latent_space_dim=128
    )
    autoencoder.summary()
    autoencoder.compile(learning_rate)
    autoencoder.train(x_train, batch_size, epochs)
    return autoencoder


In [None]:


if __name__ == "__main__":
    x_train = load_fsdd(SPECTROGRAMS_PATH)
    autoencoder = train(x_train, LEARNING_RATE, BATCH_SIZE, EPOCHS)
    autoencoder.save("model")

Instructions for updating:
Colocations handled automatically by placer.


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 encoder_input (InputLayer)  [(None, 256, 64, 1)]         0         []                            
                                                                                                  
 encoder_conv_layer_1 (Conv  (None, 128, 32, 512)         5120      ['encoder_input[0][0]']       
 2D)                                                                                              
                                                                                                  
 encoder_relu_1 (ReLU)       (None, 128, 32, 512)         0         ['encoder_conv_layer_1[0][0]']
                                                                                                  
 encoder_bn_1 (BatchNormali  (None, 128, 32, 512)         2048      ['encoder_relu_1[0][0]']

In [None]:
import librosa



class SoundGenerator:
    """SoundGenerator is responsible for generating audios from
    spectrograms.
    """

    def __init__(self, vae, hop_length):
        #We have vae , hop length , min max normalizer
        self.vae = vae
        self.hop_length = hop_length
        self._min_max_normaliser = MinMaxNormaliser(0, 1)
        #The spectogram we create should be normalized



    def generate(self, spectrograms, min_max_values):
        #We will reconstruct spectrogram and min max values
        generated_spectrograms, latent_representations = \
            self.vae.reconstruct(spectrograms)
            #it will give us generated spectrogram and latent representations

        #We need to convert generated spectrogram into audio signals using generated spectrogram and min max values
        signals = self.convert_spectrograms_to_audio(generated_spectrograms, min_max_values)
        return signals, latent_representations

    def convert_spectrograms_to_audio(self, spectrograms, min_max_values):
        signals = []
        for spectrogram, min_max_value in zip(spectrograms, min_max_values):
            # reshape the log spectrogram
            log_spectrogram = spectrogram[:, :, 0]
            # apply denormalisation
            denorm_log_spec = self._min_max_normaliser.denormalise(
                log_spectrogram, min_max_value["min"], min_max_value["max"])
            # log spectrogram -> spectrogram
            spec = librosa.db_to_amplitude(denorm_log_spec)
            # apply Griffin-Lim (inverse short fourier transform)
            signal = librosa.istft(spec, hop_length=self.hop_length)
            # append signal to "signals"
            signals.append(signal)
        return signals

In [None]:
import os
import pickle

import numpy as np
import soundfile as sf



import os
import pickle
import numpy as np
import soundfile as sf

HOP_LENGTH = 256
SAVE_DIR_ORIGINAL = "/content/drive/MyDrive/original"
SAVE_DIR_GENERATED = "/content/drive/MyDrive/generated"
MIN_MAX_VALUES_PATH = "/content/drive/MyDrive/Min_max_value/min_max_values.pkl"
SPECTROGRAMS_PATH = "/content/drive/MyDrive/Spectogram"  # Replace with your actual path

# The rest of your code...


import os
import numpy as np

def load_fsdd(spectrograms_path):
    x_train = []
    file_paths = []
    for root, _, file_names in os.walk(spectrograms_path):
        for file_name in file_names:
            file_path = os.path.join(root, file_name)
            try:
                spectrogram = np.load(file_path) # (n_bins, n_frames, 1)
                x_train.append(spectrogram)
                file_paths.append(file_path)
            except Exception as e:
                print(f"Error loading file '{file_path}': {str(e)}")
    if not x_train:
        print("No valid spectrograms found in the directory.")
        return None, None
    x_train = np.array(x_train)
    x_train = x_train[..., np.newaxis] # -> (3000, 256, 64, 1)
    return x_train, file_paths


    if x_train:
        x_train = np.array(x_train)
        return x_train, file_paths
    else:
        print("No valid spectrograms found in the directory.")
        return None, None


def select_spectrograms(spectrograms,
                        file_paths,
                        min_max_values,
                        num_spectrograms=2):
    sampled_indexes = np.random.choice(range(len(spectrograms)), num_spectrograms)
    sampled_spectrogrmas = spectrograms[sampled_indexes]
    file_paths = [file_paths[index] for index in sampled_indexes]
    sampled_min_max_values = [min_max_values[file_path] for file_path in
                           file_paths]
    print(file_paths)
    print(sampled_min_max_values)
    return sampled_spectrogrmas, sampled_min_max_values


def save_signals(signals, save_dir, sample_rate=22050):
    for i, signal in enumerate(signals):
        save_path = os.path.join(save_dir, str(i) + ".wav")
        sf.write(save_path, signal, sample_rate)


if __name__ == "__main__":
    # initialise sound generator
    vae = VAE.load("model")
    sound_generator = SoundGenerator(vae, HOP_LENGTH)

    # load spectrograms + min max values
    with open(MIN_MAX_VALUES_PATH, "rb") as f:
        min_max_values = pickle.load(f)

    specs, file_paths = load_fsdd(SPECTROGRAMS_PATH)

    # sample spectrograms + min max values
    sampled_specs, sampled_min_max_values = select_spectrograms(specs,
                                                                file_paths,
                                                                min_max_values,
                                                                5)

    # generate audio for sampled spectrograms
    signals, _ = sound_generator.generate(sampled_specs,
                                          sampled_min_max_values)

    # convert spectrogram samples to audio
    original_signals = sound_generator.convert_spectrograms_to_audio(
        sampled_specs, sampled_min_max_values)

    # save audio signals
    save_signals(signals, SAVE_DIR_GENERATED)
    save_signals(original_signals, SAVE_DIR_ORIGINAL)


['/content/drive/MyDrive/Spectogram/2_theo_1.wav.npy', '/content/drive/MyDrive/Spectogram/3_theo_6.wav.npy', '/content/drive/MyDrive/Spectogram/9_lucas_1.wav.npy', '/content/drive/MyDrive/Spectogram/4_theo_25.wav.npy', '/content/drive/MyDrive/Spectogram/6_lucas_14.wav.npy']
[{'min': -73.64099, 'max': 6.3590093}, {'min': -74.73115, 'max': 5.2688527}, {'min': -55.75312, 'max': 24.24688}, {'min': -54.043983, 'max': 25.956018}, {'min': -55.34587, 'max': 24.654129}]


  updates=self.state_updates,
