In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
files = os.listdir("/kaggle/input/birdclef-2025/train_soundscapes")

In [3]:
len(files)

9726

In [4]:
# import os
# from pydub import AudioSegment
# from tqdm import tqdm
# from IPython.display import display, clear_output

# folder_path = "/kaggle/input/birdclef-2025/train_soundscapes"  # Change this to your folder path

# def get_audio_duration(file_path):
#     audio = AudioSegment.from_file(file_path)
#     return len(audio) / 1000  # Convert from ms to seconds

# ogg_files = [f for f in os.listdir(folder_path) if f.endswith(".ogg")]

# total_duration = 0

# for file in tqdm(ogg_files, desc="Processing Audio Files", unit="file"):
#     file_path = os.path.join(folder_path, file)
#     duration = get_audio_duration(file_path)
#     total_duration += duration
#     clear_output(wait=True)

#     print(f"\rTotal Duration: {total_duration:.2f} seconds ({total_duration/60:.2f} minutes)", end="", flush=True)

# print()  # Ensure the final print moves to a new line


In [5]:
import os
import librosa
import numpy as np
import soundfile as sf
import zipfile
from tqdm import tqdm
from PIL import Image

# Define parameters
N_FFT = 1024
HOP_LENGTH = 512
N_MELS = 128
FMIN = 50
FMAX = 14000

TARGET_DURATION = 5.0  # Seconds
TARGET_SHAPE = (256, 256)  # Output image shape

folder_path = "/kaggle/input/birdclef-2025/train_soundscapes"  # Change this to your folder path
output_folder = "spectrograms"  # Folder to save images
os.makedirs(output_folder, exist_ok=True)

def create_mel_spectrogram(y, sr, output_path):
    # Generate Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, 
                                              n_mels=N_MELS, fmin=FMIN, fmax=FMAX)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)  # Convert to dB scale

    # Normalize to 0-255 (convert float -> uint8)
    mel_spec_db = np.interp(mel_spec_db, (mel_spec_db.min(), mel_spec_db.max()), (0, 255)).astype(np.uint8)

    # Resize to (256, 256)
    mel_spec_resized = Image.fromarray(mel_spec_db).resize(TARGET_SHAPE)

    # Save as PNG
    mel_spec_resized.convert("L").save(output_path)  # Convert to grayscale before saving

def process_audio(file_path, output_folder):
    y, sr = librosa.load(file_path, sr=None)  # Load audio file with original sample rate
    max_samples = int(TARGET_DURATION * sr)  # Samples per 5-second window
    num_windows = len(y) // max_samples  # Number of 5-sec windows
    
    for i in range(num_windows):
        start_sample = i * max_samples
        end_sample = start_sample + max_samples
        y_window = y[start_sample:end_sample]
        
        output_path = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(file_path))[0]}_seg{i}.png")
        create_mel_spectrogram(y_window, sr, output_path)

def zip_and_cleanup(output_folder, batch_index):
    zip_filename = f"spectrograms_batch_{batch_index}.zip"
    zip_filepath = os.path.join(output_folder, zip_filename)
    
    with zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(output_folder):
            for file in files:
                if file.endswith(".png"):
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.basename(file_path))
                    os.remove(file_path)  # Delete image file after adding to zip

ogg_files = [f for f in os.listdir(folder_path) if f.endswith(".ogg")]
batch_size = 500
batch_index = 0

for idx, file in enumerate(tqdm(ogg_files, desc="Generating Spectrograms", unit="file")):
    file_path = os.path.join(folder_path, file)
    process_audio(file_path, output_folder)
    
    # Every 500 files, create a zip and delete images
    if (idx + 1) % batch_size == 0:
        zip_and_cleanup(output_folder, batch_index)
        batch_index += 1

# Final zip for any remaining images
zip_and_cleanup(output_folder, batch_index)

print("\nSpectrogram generation complete! Images saved in zipped batches in:", output_folder)

Generating Spectrograms: 100%|██████████| 9726/9726 [48:10<00:00,  3.36file/s]  



Spectrogram generation complete! Images saved in zipped batches in: spectrograms


In [7]:
import shutil

folder_path = "/kaggle/working/spectrograms"  # Change this to your folder path
output_zip = "spectograms_all.zip"  # Change this to your desired output path (without .zip)

shutil.make_archive(output_zip, 'zip', folder_path)

print(f"Zipped folder saved at {output_zip}.zip")


print(f"Zipped folder saved as {output_zip}")


Zipped folder saved at spectograms_all.zip.zip
Zipped folder saved as spectograms_all.zip


In [11]:
import os
os.chdir(r'/kaggle/working')
from IPython.display import FileLink
FileLink(r'spectograms_all.zip.zip')

