In [None]:
import os
import random
import numpy as np
import soundfile as sf
import librosa
import librosa.display
import matplotlib.pyplot as plt
from scipy.io import wavfile
from datasets import load_dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# get dataset 
dataset = load_dataset('DBD-research-group/BirdSet', 'HSN', trust_remote_code=True)

def preprocess_audio(file_path):
    data, samplerate = sf.read(file_path)
    D = np.abs(librosa.stft(data))**2
    S = librosa.feature.melspectrogram(S=D, sr=samplerate)
    log_S = librosa.power_to_db(S, ref=np.max)
    return log_S, samplerate

def augment_audio(data, rate):
    # Time-stretching
    data_stretch = librosa.effects.time_stretch(data, rate=random.uniform(0.8, 1.2))
    # Pitch-shifting
    data_shift = librosa.effects.pitch_shift(data, sr=rate, n_steps=random.randint(-5, 5))
    # Adding noise
    noise = np.random.randn(len(data))
    data_noise = data + 0.005 * noise
    return data_stretch, data_shift, data_noise

# Preprocess and augment data
preprocessed_data = []
for i in range(len(dataset['train'])):
    file_path = dataset['train'][i]['filepath']
    log_S, samplerate = preprocess_audio(file_path)
    data_stretch, data_shift, data_noise = augment_audio(log_S, samplerate)
    preprocessed_data.extend([log_S, data_stretch, data_shift, data_noise])

# Visualize example spectrogram
plt.figure(figsize=(10, 4))
librosa.display.specshow(log_S, sr=samplerate, y_axis='mel', x_axis='time')
plt.title('Mel Spectrogram')
plt.colorbar(format='%+2.0f dB')
plt.show()

# Example data shape and stats
print(f'Processed Data Shape: {np.array(preprocessed_data).shape}')
print(f'Sample Rate: {samplerate}')
