In [None]:
import pandas as pd
import numpy as np
import librosa
import torch
import matplotlib.pyplot as plt
from IPython.display import Audio

import random
import glob
import os
import csv
import io
import ast

import sys
sys.path.append("..")
import utils

In [None]:
RANDOM_SEED = 21

# Set seed for experiment reproducibility
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

In [None]:
is_in_kaggle_env = utils.get_is_in_kaggle_env()

In [None]:
data_path = '/kaggle/input/birdclef-2023/' if is_in_kaggle_env else '../data/'

In [None]:
device = 'cpu' if is_in_kaggle_env else utils.determine_device()

In [None]:
if not is_in_kaggle_env and not os.path.exists('../data'):
    !kaggle competitions download -c 'birdclef-2023'
    !mkdir ../data
    !unzip -q birdclef-2023.zip -d ../data
    !rm birdclef-2023.zip

## Data Exploration

In [None]:
audio_numpy, audio_sr = librosa.load(f"{data_path}/train_audio/abethr1/XC128013.ogg", sr=32000)

In [None]:
type(audio_numpy), type(audio_sr), audio_sr, audio_numpy.shape

In [None]:
Audio(data=audio_numpy, rate=audio_sr)

In [None]:
mel_spectrogram = librosa.feature.melspectrogram(y=audio_numpy, sr=audio_sr, n_mels=128)

librosa.display.specshow(librosa.power_to_db(mel_spectrogram, ref=np.max), y_axis='mel', fmax=8000, x_axis='time')

In [None]:
def normalize_spectrogram(spectrogram):
    min_val = np.min(spectrogram)
    max_val = np.max(spectrogram)
    if max_val - min_val == 0:
        return spectrogram
    else:
        return (spectrogram - min_val) / (max_val - min_val)

In [None]:
mel_spectrogram = librosa.feature.melspectrogram(y=audio_numpy, sr=audio_sr, n_mels=128, fmax=8000)
log_mel_spectrogram = librosa.amplitude_to_db(mel_spectrogram)
norm_log_mel_spectrogram = normalize_spectrogram(log_mel_spectrogram)

print(f"Duration of the audio: {len(audio_numpy) / audio_sr} seconds")

print(f"norm_log_mel_spectrogram shape: {norm_log_mel_spectrogram.shape}")

librosa.display.specshow(norm_log_mel_spectrogram, y_axis='mel', fmax=8000, x_axis='time')