**This notebook made from [Theo Viel](https://www.kaggle.com/theoviel)'s [notebook](https://www.kaggle.com/theoviel/spectrogram-generation) and [discussion](https://www.kaggle.com/c/rfcx-species-audio-detection/discussion/198048).**
Thanks.


I try to remove noise by using the Sound Envelope from the [previous competition discussion](https://www.kaggle.com/c/birdsong-recognition/discussion/169582#946072).  
This notebook generates denoised spectrograms.  
The calculated dataset is [here](https://www.kaggle.com/takamichitoda/rfcx-denoise-melspec).

# Install denoise library

I use [noisereduce library](https://pypi.org/project/noisereduce/).

In [None]:
!pip install noisereduce

# Import Python Library

In [None]:
import os
import shutil
import  joblib
import numpy as np
import pandas as pd
import librosa as lb
import librosa.display
import matplotlib.pyplot as plt

from pathlib import Path
from tqdm.notebook import tqdm
import IPython.display

import noisereduce as nr
from matplotlib import pyplot as plt
from scipy.ndimage import maximum_filter1d

# Data

In [None]:
DATA_ROOT = Path("../input/rfcx-species-audio-detection")
TRAIN_AUDIO_ROOT = Path("../input/rfcx-species-audio-detection/train")
TEST_AUDIO_ROOT = Path("../input/rfcx-species-audio-detection/test")

In [None]:
df_train = pd.DataFrame({
    "recording_id": [path.stem for path in Path(TRAIN_AUDIO_ROOT).glob("*.flac")],
})

df_test = pd.DataFrame({
    "recording_id": [path.stem for path in Path(TEST_AUDIO_ROOT).glob("*.flac")],
})

# Tools

In [None]:
class params:
    """
    Parameters used for the audio data
    """
    sr = 32000

    # Melspectrogram
    n_mels = 128
    fmin = 20
    fmax = sr // 2  # Shannon theorem

In [None]:
def load_audio(record, sr=16000, root=""):
    y, _ = lb.load(
        root.joinpath(record).with_suffix(".flac").as_posix(),
        sr=sr, 
    )
    return y

In [None]:
def compute_melspec(y, params):
    """
    Computes a mel-spectrogram and puts it at decibel scale
    Arguments:
        y {np array} -- signal
        params {AudioParams} -- Parameters to use for the spectrogram. Expected to have the attributes sr, n_mels, f_min, f_max
    Returns:
        np array -- Mel-spectrogram
    """
    melspec = lb.feature.melspectrogram(
        y, sr=params.sr, n_mels=params.n_mels, fmin=params.fmin, fmax=params.fmax,
    )

    melspec = lb.power_to_db(melspec).astype(np.float32)
    return melspec

# Example

original sound

In [None]:
y = load_audio(df_train["recording_id"][0], params.sr, TRAIN_AUDIO_ROOT)
IPython.display.Audio(y, rate=params.sr)

## Denoise

I detect point no birdcall by using Sound Envelope.

In [None]:
def envelope(y, rate, threshold):
    mask = []
    y_mean = maximum_filter1d(np.abs(y), mode="constant", size=rate//20)
    for mean in y_mean:
        if mean > threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask, y_mean

In [None]:
thr = 0.25
mask, env = envelope(y, params.sr, thr)

plt.plot(y[mask], label="birdcall")
plt.plot(y[np.logical_not(mask)], label="noise")
plt.legend(bbox_to_anchor=(1, 1), loc='upper right')

denoise and check denoise sound

In [None]:
y_denoise = nr.reduce_noise(audio_clip=y, noise_clip=y[np.logical_not(mask)], verbose=True)

compare original sound spectrum and denoise sound spectrum.

In [None]:
def tmp_audio_to_spec(audio, sr):
    spec = librosa.power_to_db(
        librosa.feature.melspectrogram(audio, sr=sr, fmin=20, fmax=16000, n_mels=128)
    )
    return spec.astype(np.float32)

plt.figure(figsize=(16, 8))
plt.imshow(tmp_audio_to_spec(y, params.sr))
plt.show()

plt.figure(figsize=(16, 8))
plt.imshow(tmp_audio_to_spec(y_denoise, params.sr))
plt.show()

It seems that noise has been removed.

# Main

## Train

In [None]:
def load_and_save_train(record):
    y = load_audio(record, params.sr, TRAIN_AUDIO_ROOT)
    # denoise
    mask, env = envelope(y, 32000, thr)
    noise_clip = y[np.logical_not(mask)]
    if len(noise_clip):
        y = nr.reduce_noise(audio_clip=y, noise_clip=noise_clip, verbose=False)
    melspec = compute_melspec(y, params)

    np.save(OUT_TRAIN + record + ".npy", melspec)

In [None]:
OUT_TRAIN = 'train/'
os.mkdir(OUT_TRAIN)

In [None]:
_ = joblib.Parallel(n_jobs=8)(
    joblib.delayed(load_and_save_train)(record) for record in tqdm(df_train['recording_id'].values)
)

In [None]:
shutil.make_archive(OUT_TRAIN, 'zip', OUT_TRAIN)
shutil.rmtree(OUT_TRAIN)

## Test

In [None]:
def load_and_save_test(record):
    y = load_audio(record, params.sr, TEST_AUDIO_ROOT)
    melspec = compute_melspec(y, params)

    np.save(OUT_TEST + record + ".npy", melspec)

In [None]:
OUT_TEST = 'test/'
os.mkdir(OUT_TEST)

In [None]:
_ = joblib.Parallel(n_jobs=8)(
    joblib.delayed(load_and_save_test)(record) for record in tqdm(df_test['recording_id'].values)
)

In [None]:
shutil.make_archive(OUT_TEST, 'zip', OUT_TEST)
shutil.rmtree(OUT_TEST)