In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import librosa
import librosa.display
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from ipywidgets import interact
from ipywidgets import widgets
from IPython.display import display

In [None]:
train_files = glob.glob( '../input/rfcx-species-audio-detection/train/*.flac' )
df = pd.read_csv('/kaggle/input/rfcx-species-audio-detection/train_tp.csv')

SR = 48000
CLIP_LENGTH = 7 # number of seconds to clip around the sound
START_TIME = 1 # start 2 seconds before t_min
N_CLASS = 24

# fft params
N_FFT = 1024
HOP_LEN = 512

## Helper Functions

In [None]:
def get_full_path(recording_id):
    return f'../input/rfcx-species-audio-detection/train/{recording_id}.flac'

def pad_truncate_sequence(x, max_len):
    if len(x) < max_len:
        return np.concatenate((x, np.zeros(max_len - len(x)))) # pad back
    else:
        return x[0 : max_len]

def cut_audio(audio, t_min, start_time=START_TIME):
    start = max(0, t_min - start_time)
    end = start + CLIP_LENGTH
    snippet = audio[int(start*SR):int(end*SR)]
    if t_min - start_time < 0:
        return t_min, pad_truncate_sequence(snippet, CLIP_LENGTH*SR)
    else:
        return START_TIME, pad_truncate_sequence(snippet, CLIP_LENGTH*SR)

def load_audio(recording_id):
    audio, _ = librosa.core.load(get_full_path(recording_id), sr=SR, mono=True)
    return audio

def create_spectrogram_from_row(row):
    recording_id = row.recording_id
    t_min = row.t_min
    t_max = row.t_max
    f_min = row.f_min
    f_max = row.f_max
    
    audio = load_audio(recording_id)
    start, audio = cut_audio(audio, t_min)
    stft = librosa.stft(audio, n_fft=N_FFT, hop_length=HOP_LEN, win_length=N_FFT)
    S_db = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
    fig, ax = plt.subplots(figsize=(10,10))
    ax.set(title=f'spectrogram of {recording_id} from time {t_min} to {t_max}')
    img = librosa.display.specshow(S_db, hop_length=HOP_LEN, sr=SR, x_axis='time', y_axis='linear')
    ax.add_patch(
        patches.Rectangle(xy=(start, f_min), width=t_max-t_min, height=f_max-f_min, ec='#00ff00', fill=False)
    )
    fig.colorbar(img, ax=ax)

In [None]:
image_slider = widgets.IntSlider(min=0, max=df[df.species_id == 0].shape[0], value=0)
species_dropdown = widgets.Dropdown(options=list(range(N_CLASS)), value=0)

def update_x_range(*args):
    image_slider.max = df[df.species_id == int(species_dropdown.value)].shape[0]
image_slider.observe(update_x_range, 'value')

def view_image(image_idx, species_id):
    sub_df = df[df.species_id == int(species_id)]
    create_spectrogram_from_row(sub_df.iloc[image_idx])

interact(view_image,image_idx=image_slider, species_id=species_dropdown)