https://www.analyticsvidhya.com/blog/2022/03/audio-denoiser-a-speech-enhancement-deep-learning-model/

https://towardsdatascience.com/40-open-source-audio-datasets-for-ml-59dc39d48f06

# Importing Libraries

In [None]:
import numpy as np
import scipy.signal as sps
import librosa
import soundfile as sf

# Spectral Gating Architecture

In [None]:
def spectral_gating(input_file, output_file, threshold_db=-60, hop_length=512, win_length=1024):
    # Load the audio file
    audio, sr = librosa.load(input_file, sr=None)

    # Apply short-time Fourier transform (STFT)
    stft = librosa.stft(audio, n_fft=win_length, hop_length=hop_length, win_length=win_length)

    # Convert magnitude spectrogram to dB scale
    magnitude = np.abs(stft)
    magnitude_db = librosa.amplitude_to_db(magnitude, ref=np.max)

    # Apply spectral gating based on the specified threshold
    mask = np.where(magnitude_db > threshold_db, 1.0, 0.0)

    # Apply the mask to the magnitude spectrogram
    magnitude_filtered = magnitude * mask

    # Invert the STFT to obtain the processed audio
    audio_filtered = librosa.istft(magnitude_filtered, hop_length=hop_length, win_length=win_length)

    # Normalize the processed audio
    audio_filtered /= np.max(np.abs(audio_filtered))

    # Save the processed audio to a file
    sf.write(output_file, audio_filtered, sr)

    print(f"Processed audio saved to: {output_file}")

# Input file and Output file

In [2]:
input_file = 'mixkit-small-group-cheer-and-applause-518.wav'
output_file = 'output.wav'
spectral_gating(input_file, output_file, threshold_db=-60, hop_length=512, win_length=1024)


Processed audio saved to: output.wav
