# Armo las stft

Tengo las señales separadas en clips de 65280 muestras y limpias y con ruido. Ahora me toca generar las stft's.

Para esto voy a tener que:

* Cargar el archivo
* Extraer el espectrograma logaritmo (en dB) de la señal
* Normalizar el espectrograma
* Guardar el espectrograma normalizado



In [1]:
#Paquetes necesarios:
import os
import pickle
import librosa
import numpy as np
import glob

Lo que busco es crear espectrogramas de 256x256. Para mi caso particular, una forma de conseguir esto es eligiendo como parámetros de la stft:

frequency bins = (FRAME_SIZE/2) + 1

frames = ((samples - FRAME_SIZE)/hopsize) + 1 [ESTA FÓRMULA ESTÁ MAL!!!]


* FRAME_SIZE = 512
* HOP_LENGTH = 128

Con una Fs de 16kHz y una duración de audios de 32640 muestras o 2.04 segundos

Vamos a comprobar si esto es cierto:

In [2]:
#Cargo un audio random:
os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\6_2_audios_ruidosos_test')
data = np.load('common_voice_es_18309707_SNR-0.npy')

#Genero el espectrograma logarítmico:
stft = librosa.stft(data,
                    n_fft=512,
                    hop_length=256)[:-1]

spectrogram = np.abs(stft)
log_spectrogram = librosa.amplitude_to_db(spectrogram)

print(len(data))
print(np.size(log_spectrogram))
print(np.shape(log_spectrogram))

65280
65536
(256, 256)


Para manejar esto voy a hacerlo con clases porque soy un capo

In [7]:
class Loader:
    """Loader is responsible for loading an audio file."""

    def __init__(self, sample_rate, duration, mono):
        self.sample_rate = sample_rate
        self.duration = duration
        self.mono = mono

    def load(self, file_path):
        signal = np.load(file_path)
        return signal


class Padder:
    """Padder is responsible to apply padding to an array."""

    def __init__(self, mode="constant"):
        self.mode = mode

    def left_pad(self, array, num_missing_items):
        padded_array = np.pad(array,
                              (num_missing_items, 0),
                              mode=self.mode)
        return padded_array

    def right_pad(self, array, num_missing_items):
        padded_array = np.pad(array,
                              (0, num_missing_items),
                              mode=self.mode)
        return padded_array


class LogSpectrogramExtractor:
    """LogSpectrogramExtractor extracts log spectrograms (in dB) from a
    time-series signal.
    """

    def __init__(self, frame_size, hop_length):
        self.frame_size = frame_size
        self.hop_length = hop_length

    def extract(self, signal):
        stft = librosa.stft(signal,
                            n_fft=self.frame_size,
                            hop_length=self.hop_length)[:-1]
        spectrogram = np.abs(stft)
        log_spectrogram = librosa.amplitude_to_db(spectrogram)
        return log_spectrogram


class MinMaxNormaliser:
    """MinMaxNormaliser applies min max normalisation to an array."""

    def __init__(self, min_val, max_val):
        self.min = min_val
        self.max = max_val

    def normalise(self, array):
        norm_array = (array - array.min()) / (array.max() - array.min())
        norm_array = norm_array * (self.max - self.min) + self.min
        return norm_array

    def denormalise(self, norm_array, original_min, original_max):
        array = (norm_array - self.min) / (self.max - self.min)
        array = array * (original_max - original_min) + original_min
        return array


class Saver:
    """saver is responsible to save features, and the min max values."""

    def __init__(self, feature_save_dir, min_max_values_save_dir):
        self.feature_save_dir = feature_save_dir
        self.min_max_values_save_dir = min_max_values_save_dir

    def save_feature(self, feature, file_path):
        save_path = self._generate_save_path(file_path)
        np.save(save_path, feature)
        return save_path

    def save_min_max_values(self, min_max_values):
        save_path = os.path.join(self.min_max_values_save_dir,
                                 "min_max_values_ruido_train.pkl")
        self._save(min_max_values, save_path)

    @staticmethod
    def _save(data, save_path):
        with open(save_path, "wb") as f:
            pickle.dump(data, f)

    def _generate_save_path(self, file_path):
        file_name = os.path.split(file_path)[1]
        save_path = os.path.join(self.feature_save_dir, file_name) #le saqué un + '.npy'
        return save_path


class PreprocessingPipeline:
    """PreprocessingPipeline processes audio files in a directory, applying
    the following steps to each file:
        1- load a file
        2- pad the signal (if necessary)
        3- extracting log spectrogram from signal
        4- normalise spectrogram
        5- save the normalised spectrogram

    Storing the min max values for all the log spectrograms.
    """

    def __init__(self):
        self.padder = None
        self.extractor = None
        self.normaliser = None
        self.saver = None
        self.min_max_values = {}
        self._loader = None
        self._num_expected_samples = None

    @property
    def loader(self):
        return self._loader

    @loader.setter
    def loader(self, loader):
        self._loader = loader
        self._num_expected_samples = int(loader.sample_rate * loader.duration)

    def process(self, audio_files_dir):
        for root, _, files in os.walk(audio_files_dir):
            for file in files:
                file_path = os.path.join(root, file)
                self._process_file(file_path)
                print(f"Processed file {file_path}")
        self.saver.save_min_max_values(self.min_max_values)

    def _process_file(self, file_path):
        signal = self.loader.load(file_path)
        if self._is_padding_necessary(signal):
            signal = self._apply_padding(signal)
        feature = self.extractor.extract(signal)
        norm_feature = self.normaliser.normalise(feature)
        save_path = self.saver.save_feature(norm_feature, file_path)
        self._store_min_max_value(save_path, feature.min(), feature.max())

    def _is_padding_necessary(self, signal):
        if len(signal) < self._num_expected_samples:
            return True
        return False

    def _apply_padding(self, signal):
        num_missing_samples = self._num_expected_samples - len(signal)
        padded_signal = self.padder.right_pad(signal, num_missing_samples)
        return padded_signal

    def _store_min_max_value(self, save_path, min_val, max_val):
        self.min_max_values[save_path] = {
            "min": min_val,
            "max": max_val
        }

In [8]:
#os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica')

FRAME_SIZE = 512
HOP_LENGTH = 256
SAMPLE_RATE = 22050
DURATION = 65280 / SAMPLE_RATE  # in seconds
MONO = True

SPECTROGRAMS_SAVE_DIR = "/Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/8_stft_ruido_train/" # cambiar obvio
MIN_MAX_VALUES_SAVE_DIR = "/Users/maxia/Desktop/Facultad/Labo de acústica/Codigo" # cambiar obvio
FILES_DIR = "/Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/" # cambiar obvio

# instantiate all objects
loader = Loader(SAMPLE_RATE, DURATION, MONO)
padder = Padder()
log_spectrogram_extractor = LogSpectrogramExtractor(FRAME_SIZE, HOP_LENGTH)
min_max_normaliser = MinMaxNormaliser(0, 1)
saver = Saver(SPECTROGRAMS_SAVE_DIR, MIN_MAX_VALUES_SAVE_DIR)

preprocessing_pipeline = PreprocessingPipeline()
preprocessing_pipeline.loader = loader
preprocessing_pipeline.padder = padder
preprocessing_pipeline.extractor = log_spectrogram_extractor
preprocessing_pipeline.normaliser = min_max_normaliser
preprocessing_pipeline.saver = saver

preprocessing_pipeline.process(FILES_DIR)

sos_train/3-common_voice_es_19962188_1_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962190_1_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962190_2_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962191_1_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962192_1_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962192_2_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962193_1_SNR-15.npy
Processed file /Users/maxia/Desktop/Facultad/Labo de acústica/Codigo/6_audios_ruidosos_train/3-common_voice_es_19962193_2_SNR-15.npy
Processed file /Use

In [7]:
#prueba
os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\10_stft_ruido_test')

prueba = np.load('common_voice_es_18309707_SNR-0.npy')

print(np.shape(prueba))

(256, 256)


# Guardo los espectros en carpetas separadas

La idea ahora es mover las carpetas a la nueva de Drive y separarlos en carpetas de a 100 archivos

In [10]:
import shutil
import glob

In [11]:
stft_train = []
stft_ruido_train = []
#stft_test = []
#stft_ruido_test = []

os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\7_stft_train')
for dir in glob.iglob('*common*', recursive=True):
    stft_train.append(dir)

os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\8_stft_ruido_train')
for dir in glob.iglob('*common*', recursive=True):
    stft_ruido_train.append(dir)

#os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\9_stft_test')
#for dir in glob.iglob('common*', recursive=True):
#    stft_test.append(dir)

#os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\10_stft_ruido_test')
#for dir in glob.iglob('common*', recursive=True):
#    stft_ruido_test.append(dir)


In [12]:
print(f'stft_train tiene {len(stft_train)}')
print(f'stft_ruido_train tiene {len(stft_ruido_train)}') #HAY UN ERROR ACÁ, REVISAR
#print(f'stft_test tiene {len(stft_test)}')
#print(f'stft_ruido_test tiene {len(stft_ruido_test)}')

stft_train tiene 22000
stft_ruido_train tiene 22000


In [13]:
os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_train')
for i in range(0,int((len(stft_train)/200) + 1)):
    os.makedirs(str(i)) # creo la carpeta
print(f'{int((len(stft_train)/200) + 1)} carpetas creadas en stft_train')

os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_ruido_train')
for i in range(0,int((len(stft_ruido_train)/200) + 1)):
    os.makedirs(str(i)) # creo la carpeta
print(f'{int((len(stft_ruido_train)/200) + 1)} carpetas creadas en stft_ruido_train')

#os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_test')
#for i in range(0,int((len(stft_test)/100) + 1)):
#    os.makedirs(str(i)) # creo la carpeta
#print(f'{int((len(stft_test)/100) + 1)} carpetas creadas en stft_test')

#os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_ruido_test')
#for i in range(0,int((len(stft_ruido_test)/100) + 1)):
#    os.makedirs(str(i)) # creo la carpeta
#print(f'{int((len(stft_ruido_test)/100) + 1)} carpetas creadas en stft_ruido_test')


111 carpetas creadas en stft_train
111 carpetas creadas en stft_ruido_train


In [14]:
os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\7_stft_train')

path_to_move ='C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_train'
folder=0
i=1
for stft in stft_train:
    if (i%200) == 0:
        print(f'Se movieron {i} audios a stft_train')
        folder+=1
    path = path_to_move + '\\' + str(folder)
    shutil.move(stft, path)
    i+=1
print(f'{i} audios movidos a stft_train')


#os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\9_stft_test')
#path_to_move ='C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_test'
#folder=0
#i=1
#for stft in stft_test:
#    if (i%100) == 0:
#        print(f'Se movieron {i} audios a stft_test')
#        folder+=1
#    path = path_to_move + '\\' + str(folder)
#    shutil.move(stft, path)
#    i+=1
#print(f'{i} audios movidos a stft_test')


os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\8_stft_ruido_train')
path_to_move ='C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_ruido_train'
folder=0
i=1
for stft in stft_ruido_train:
    if (i%200) == 0:
        print(f'Se movieron {i} audios a stft_ruido_train')
        folder+=1
    path = path_to_move + '\\' + str(folder)
    shutil.move(stft, path)
    i+=1
print(f'{i} audios movidos a stft_ruido_train')


#os.chdir('C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\10_stft_ruido_test')
#path_to_move ='C:\\Users\\maxia\\Desktop\\Facultad\\Labo de acústica\\Codigo\\Drive\\stft_ruido_test'
#folder=0
#i=1
#for stft in stft_ruido_test:
#    if (i%100) == 0:
#        print(f'Se movieron {i} audios a stft_ruido_test')
#        folder+=1
#    path = path_to_move + '\\' + str(folder)
#    shutil.move(stft, path)
#    i+=1
#print(f'{i} audios movidos a stft_ruido_test')


Se movieron 200 audios a stft_train
Se movieron 400 audios a stft_train
Se movieron 600 audios a stft_train
Se movieron 800 audios a stft_train
Se movieron 1000 audios a stft_train
Se movieron 1200 audios a stft_train
Se movieron 1400 audios a stft_train
Se movieron 1600 audios a stft_train
Se movieron 1800 audios a stft_train
Se movieron 2000 audios a stft_train
Se movieron 2200 audios a stft_train
Se movieron 2400 audios a stft_train
Se movieron 2600 audios a stft_train
Se movieron 2800 audios a stft_train
Se movieron 3000 audios a stft_train
Se movieron 3200 audios a stft_train
Se movieron 3400 audios a stft_train
Se movieron 3600 audios a stft_train
Se movieron 3800 audios a stft_train
Se movieron 4000 audios a stft_train
Se movieron 4200 audios a stft_train
Se movieron 4400 audios a stft_train
Se movieron 4600 audios a stft_train
Se movieron 4800 audios a stft_train
Se movieron 5000 audios a stft_train
Se movieron 5200 audios a stft_train
Se movieron 5400 audios a stft_train
Se mo

In [15]:
#Cargo las stft con ruido que voy a usar para entrenar
os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\Drive\stft_ruido_train')

audios_noise = []
for dir in glob.glob('**/*common*', recursive=True):
    audios_noise.append(dir)

#Info de los audios
print(len(audios_noise))
print(audios_noise[0])
print(audios_noise[-1])

22000
0\0-common_voice_es_18307761_1_SNR-0.npy
99\3-common_voice_es_19690029_1_SNR-15.npy


In [16]:
#Cargo los audios limpios que voy a usar para entrenar. 
os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\Drive\stft_train')

audios_clean = []
for dir in glob.glob('**/*common*', recursive=True):
    audios_clean.append(dir)

#Info de los audios
print(len(audios_clean))
print(audios_clean[0])
print(audios_clean[-1])

22000
0\0-common_voice_es_18307761_1.npy
99\3-common_voice_es_19690029_1.npy


In [17]:
import pandas as pd

#Les doy un orden random para entrenar en la red

df = pd.DataFrame({
    'audios_noise' : audios_noise,
    'audios_clean' : audios_clean
})

df_shuffled = df.sample(frac=1).reset_index(drop=True)

df.head()

Unnamed: 0,audios_noise,audios_clean
0,0\0-common_voice_es_18307761_1_SNR-0.npy,0\0-common_voice_es_18307761_1.npy
1,0\0-common_voice_es_18307761_2_SNR-0.npy,0\0-common_voice_es_18307761_2.npy
2,0\0-common_voice_es_18307940_1_SNR-0.npy,0\0-common_voice_es_18307940_1.npy
3,0\0-common_voice_es_18307941_1_SNR-0.npy,0\0-common_voice_es_18307941_1.npy
4,0\0-common_voice_es_18307941_2_SNR-0.npy,0\0-common_voice_es_18307941_2.npy


In [18]:
df_shuffled.head()

Unnamed: 0,audios_noise,audios_clean
0,28\1-common_voice_es_18384549_2_SNR-5.npy,28\1-common_voice_es_18384549_2.npy
1,17\0-common_voice_es_19691497_1_SNR-0.npy,17\0-common_voice_es_19691497_1.npy
2,30\1-common_voice_es_18890607_2_SNR-5.npy,30\1-common_voice_es_18890607_2.npy
3,16\0-common_voice_es_19670454_2_SNR-0.npy,16\0-common_voice_es_19670454_2.npy
4,88\3-common_voice_es_19205892_2_SNR-15.npy,88\3-common_voice_es_19205892_2.npy


In [19]:
#guardo los valores para usarlos en Drive para cargarlos parejos y entrenar
os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo')

audios_noise_train_Drive = df_shuffled['audios_noise'].tolist()
audios_clean_train_Drive = df_shuffled['audios_clean'].tolist()

np.save('audios_noise_train_Drive', audios_noise_train_Drive)
np.save('audios_clean_train_Drive', audios_clean_train_Drive)

In [19]:
# Ahora hago lo mismo para el testing

os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\Drive\stft_test')

audios_clean = []
for dir in glob.glob('**/common*', recursive=True):
    audios_clean.append(dir)

os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo\Drive\stft_ruido_test')

audios_noise = []
for dir in glob.glob('**/common*', recursive=True):
    audios_noise.append(dir)

df2 = pd.DataFrame({
    'audios_noise' : audios_noise,
    'audios_clean' : audios_clean
})

df_shuffled = df2.sample(frac=1).reset_index(drop=True)

df2.head()

Unnamed: 0,audios_noise,audios_clean
0,0\common_voice_es_18309707_SNR-0.npy,0\common_voice_es_18309707.npy
1,0\common_voice_es_18309709_SNR-0.npy,0\common_voice_es_18309709.npy
2,0\common_voice_es_18309711_SNR-0.npy,0\common_voice_es_18309711.npy
3,0\common_voice_es_18330891_SNR-0.npy,0\common_voice_es_18330891.npy
4,0\common_voice_es_18369864_SNR-0.npy,0\common_voice_es_18369864.npy


In [20]:
df_shuffled.head()

Unnamed: 0,audios_noise,audios_clean
0,9\common_voice_es_22028641_SNR-15.npy,9\common_voice_es_22028641.npy
1,8\common_voice_es_22025865_SNR-15.npy,8\common_voice_es_22025865.npy
2,4\common_voice_es_21978545_SNR-5.npy,4\common_voice_es_21978545.npy
3,10\common_voice_es_22043244_SNR-15.npy,10\common_voice_es_22043244.npy
4,6\common_voice_es_21996040_SNR-10.npy,6\common_voice_es_21996040.npy


In [21]:
#guardo los valores para usarlos en Drive para cargarlos parejos y entrenar
os.chdir(r'C:\Users\maxia\Desktop\Facultad\Labo de acústica\Codigo')

audios_noise_test_Drive = df_shuffled['audios_noise'].tolist()
audios_clean_test_Drive = df_shuffled['audios_clean'].tolist()

np.save('audios_noise_test_Drive', audios_noise_test_Drive)
np.save('audios_clean_test_Drive', audios_clean_test_Drive)