In [23]:
import os
import librosa
import soundfile as sf
import numpy as np
from datasets import load_dataset


In [22]:

# Step 1: Preprocessing and Saving Audio Files as Call Snippets

# Create a directory to save snippets
os.makedirs('bird_snippets', exist_ok=True)

# Load dataset
dataset = load_dataset('DBD-research-group/BirdSet', 'HSN', trust_remote_code=True)
dataset_train = dataset['train']

# Desired snippet length in seconds
desired_length = 5.0

# Process each sample in the training dataset
for idx, sample in enumerate(dataset_train):
    # Access the audio path
    if 'filepath' in sample:
        audio_path = sample['filepath']
    elif isinstance(sample['audio'], dict) and 'path' in sample['audio']:
        audio_path = sample['audio']['path']
    else:
        print(f"Audio path not found in sample {idx}")
        continue  # Skip this sample if audio path is not found

    # Load the audio file
    y, sr = librosa.load(audio_path, sr=None)
    duration = librosa.get_duration(y=y, sr=sr)

    # Get the detected events
    detected_events = sample['detected_events']
    if not detected_events:
        continue  # Skip if there are no detected events

    # Process each detected event
    for event_idx, event in enumerate(detected_events):
        # Each event is a list of [start_time, end_time]
        if len(event) != 2:
            print(f"Invalid event format in sample {idx}, event {event_idx}")
            continue
        start_time, end_time = event
        event_duration = end_time - start_time

        # Adjust the start and end times to get a 5-second snippet
        if event_duration >= desired_length:
            # If the event is longer than desired_length, take the central portion
            center_time = (start_time + end_time) / 2
            start_time_adj = max(0, center_time - desired_length / 2)
            end_time_adj = start_time_adj + desired_length
            if end_time_adj > duration:
                end_time_adj = duration
                start_time_adj = end_time_adj - desired_length
        else:
            # If the event is shorter than desired_length, expand the window
            start_time_adj = max(0, start_time - (desired_length - event_duration) / 2)
            end_time_adj = start_time_adj + desired_length
            if end_time_adj > duration:
                end_time_adj = duration
                start_time_adj = end_time_adj - desired_length
            if start_time_adj < 0:
                start_time_adj = 0
                end_time_adj = desired_length

        # Convert times to sample indices
        start_sample = int(start_time_adj * sr)
        end_sample = int(end_time_adj * sr)

        # Extract the adjusted snippet
        snippet = y[start_sample:end_sample]

        # Ensure the snippet is exactly desired_length seconds
        snippet_length = int(desired_length * sr)
        if len(snippet) < snippet_length:
            # Pad with zeros if necessary
            padding = snippet_length - len(snippet)
            snippet = np.pad(snippet, (0, padding), 'constant')
        elif len(snippet) > snippet_length:
            # Truncate if necessary
            snippet = snippet[:snippet_length]

        # Save the snippet
        ebird_code = sample['ebird_code']
        snippet_filename = f"{ebird_code}_{idx}_{event_idx}.wav"
        snippet_path = os.path.join('bird_snippets', snippet_filename)
        sf.write(snippet_path, snippet, sr)

    print(f"Processed sample {idx+1}/{len(dataset_train)}")
Print('* Call Snippets are splitted!')s

Processed sample 1/5460
Processed sample 3/5460
Processed sample 4/5460
Processed sample 5/5460
Processed sample 6/5460
Processed sample 7/5460
Processed sample 8/5460
Processed sample 9/5460
Processed sample 10/5460
Processed sample 11/5460
Processed sample 12/5460
Processed sample 13/5460
Processed sample 14/5460
Processed sample 15/5460
Processed sample 16/5460
Processed sample 17/5460
Processed sample 19/5460
Processed sample 20/5460
Processed sample 21/5460
Processed sample 22/5460
Processed sample 23/5460
Processed sample 24/5460
Processed sample 25/5460
Processed sample 26/5460
Processed sample 27/5460
Processed sample 28/5460
Processed sample 29/5460
Processed sample 30/5460
Processed sample 31/5460
Processed sample 32/5460
Processed sample 33/5460
Processed sample 34/5460
Processed sample 35/5460
Processed sample 36/5460
Processed sample 37/5460
Processed sample 38/5460
Processed sample 39/5460
Processed sample 40/5460
Processed sample 41/5460
Processed sample 42/5460
Processe