In [None]:
!pip install nb-black > /dev/null

In [None]:
import os
import warnings

from multiprocessing import Pool

warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf

import librosa
import librosa.display
import IPython.display as ipd

from tqdm.notebook import tqdm

plt.style.use("default")

%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [None]:
librosa.__version__

In [None]:
def create_mel_spectrogram(audio_data, **spec_params):
    sr, hop_length, n_fft, n_mels, fmin, fmax = [
        spec_params[k] for k in ["sr", "hop_length", "n_fft", "n_mels", "fmin", "fmax"]
    ]
    melspec = librosa.feature.melspectrogram(
        audio_data,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels,
        fmin=fmin,
        fmax=fmax,
        power=1,
    )
    return melspec


def pcen_bird(melspec, **spec_params):
    """
    parameters are taken from [1]:
        - [1] Lostanlen, et. al. Per-Channel Energy Normalization: Why and How. IEEE Signal Processing Letters, 26(1), 39-43.
    """
    sr, hop_length = [spec_params[k] for k in ["sr", "hop_length"]]
    return librosa.pcen(
        melspec * (2 ** 31),
        time_constant=0.06,
        eps=1e-6,
        gain=0.8,
        power=0.25,
        bias=10,
        sr=sr,
        hop_length=hop_length,
    )


def get_fullpath(filename, audio_path="../input/birdclef-2022/train_audio"):
    return f"{audio_path}/{filename}"


def play_audio(audio_file):
    print(f"audio_file: {audio_file}")
    display(ipd.Audio(audio_file))

In [None]:
train = pd.read_csv(
    "../input/birdclef-2022-train-metadata-with-audio-metadata/train_ext.csv"
)

# Config Params

In [None]:
spec_params = dict(
    sr=32_000, hop_length=320, n_fft=1280, n_mels=128, fmin=0, fmax=16_000
)

# Broken file

In [None]:
train.query("length < 0.3")

## Play broken file
The second file is really noizy. **Watch out the Volume!**

In [None]:
play_audio("../input/birdclef-2022/train_audio/blkfra/XC649198.ogg")
play_audio("../input/birdclef-2022/train_audio/normoc/XC150238.ogg")

## Remove broken file

In [None]:
train = train.query("length >= 0.3")

In [None]:
len(train)

In [None]:
train["fullpath"] = "/kaggle/input/birdclef-2022/train_audio/" + train["filename"]

In [None]:
def print_spec(df, display_length_sec=30, is_debug=True, **spec_params):
    max_length = df["length"].max()
    if is_debug:
        df = df.sample(100, random_state=123)
    df = df[:3]
    for i, item in enumerate(tqdm(df.itertuples(), total=len(df))):
        fig, ax = plt.subplots(figsize=(960 / 72, 640 / 72), dpi=72)
        sr, hop_length, fmin, fmax = [
            spec_params[k] for k in ["sr", "hop_length", "fmin", "fmax"]
        ]
        audio_data, _ = librosa.core.load(item.fullpath, sr=sr, mono=True)
        spec = create_mel_spectrogram(audio_data[: sr * 30 - 1], **spec_params)
        spec = pcen_bird(spec, **spec_params)
        # spec = spec[:, : (sr * display_length_sec) // hop_length]
        print(f"shape: {spec.shape}")
        im = ax.imshow(
            spec,
            cmap="magma",
        )
        ax.set(title=f"filename: {item.filename}")
        play_audio(item.fullpath)

In [None]:
print_spec(train, **spec_params)

# Convert audio to spectrogram

In [None]:
def create_directory_if_not_exist(dir_path):
    if os.path.isdir(dir_path):
        return
    os.makedirs(dir_path)


def audio2spec_one(data):
    _, fullpath, filename = data
    sr = spec_params["sr"]
    audio_data, _ = librosa.core.load(fullpath, sr=sr, mono=True)
    # spec = create_mel_spectrogram(audio_data[: sr * 30 - 1], **spec_params)
    spec = create_mel_spectrogram(audio_data, **spec_params)
    spec = pcen_bird(spec, **spec_params)
    spec = spec.astype(np.float16)

    out_filename = filename[:-4] + ".npy"
    with open(out_filename, "wb") as f:
        np.save(f, spec)


def audio2spec(df, is_debug=False, **spec_params):
    sr, hop_length, fmin, fmax = [
        spec_params[k] for k in ["sr", "hop_length", "fmin", "fmax"]
    ]
    df = df.copy()
    df["directory"] = df["filename"].apply(lambda x: x.split("/")[0])
    dir_paths = set(df["directory"])
    for dir_path in dir_paths:
        create_directory_if_not_exist(dir_path)

    if is_debug:
        df = df.sample(100, random_state=123)

    with Pool(processes=4) as pool:
        list(
            tqdm(
                pool.imap(
                    audio2spec_one, df[["fullpath", "filename"]].itertuples(name=None)
                ),
                total=len(df),
            )
        )

In [None]:
%%time
audio2spec(train, is_debug=False, **spec_params)

In [None]:
def plot_npy(npy_file):
    fig, ax = plt.subplots(figsize=(960 / 72, 640 / 72), dpi=72)
    img = np.load(npy_file)
    ax.imshow(img[:, :1920], cmap="magma")

In [None]:
plot_npy("skylar/XC630808.npy")
plot_npy("spodov/XC381365.npy")
plot_npy("normoc/XC196164.npy")

In [None]:
!mkdir /tmp/train_audio
!mv /kaggle/working/* /tmp/train_audio
!mv /tmp/train_audio /kaggle/working

# Output metadata

In [None]:
metadata = train.copy()
metadata["filename"] = metadata["filename"].apply(lambda x: x[:-4]) + ".npy"
metadata = metadata.drop("fullpath", axis=1)
metadata.to_csv("spec_metadata.csv", index=False)