Audio to STFT (using PCEN Normalisation)

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile
import scipy.signal as signal
import os
import librosa

def wav_to_stft(input_folder, output_folder, n_fft=1024, hop_length=256, window='hann', sr=16000):
    # Creating an output folder
    os.makedirs(output_folder, exist_ok=True)

    # Iterating all files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith('.wav'):
            wav_file = os.path.join(input_folder, filename)
            output_image = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}.png")

            # Loading the audio file
            sample_rate, audio = wavfile.read(wav_file)

            # Resampling audio (if and when needed)
            if sample_rate != sr:
                audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=sr)
                sample_rate = sr

            # Computing the STFT
            f, t, Zxx = signal.stft(audio, fs=sample_rate, nperseg=n_fft, noverlap=hop_length, window=window)
            stft_magnitude = np.abs(Zxx)

            # Apply PCEN normalization using librosa
            pcen = librosa.pcen(stft_magnitude, sr=sample_rate, hop_length=hop_length)

            # Ploting STFT as an image/spectrogram
            plt.figure(figsize=(10, 6))
            plt.pcolormesh(t, f, pcen, shading='gouraud')
            plt.title(f'STFT: {filename}')
            plt.ylabel('Frequency [Hz]')
            plt.xlabel('Time [sec]')
            plt.colorbar(label='Magnitude')
            plt.tight_layout()
            plt.savefig(output_image)
            plt.close()
            print(f"Saved {output_image}")
