In [1]:
from pydub import AudioSegment, silence
import os

def split_audio(input_file, min_silence_length_ms, min_segment_length_ms, max_segment_length_ms, silence_thresh=-40):
    audio = AudioSegment.from_file(input_file)

    # Detect the silent parts of the audio
    silent_ranges = silence.detect_silence(audio, min_silence_len=min_silence_length_ms, silence_thresh=silence_thresh)

    # Initialize the start and end points for splitting
    start_ms = 0
    segment_count = 1

    # Get the directory and base filename of the input file
    input_directory = os.path.dirname(input_file)
    base_filename = os.path.splitext(os.path.basename(input_file))[0]

    # Split the audio into segments based on silent parts
    for (start, end) in silent_ranges:
        if start > start_ms and (end - start_ms) >= min_segment_length_ms:
            segment_length = end - start_ms if (end - start_ms) <= max_segment_length_ms else max_segment_length_ms
            segment = audio[start_ms:start_ms + segment_length]

            # Check if the segment is within the desired duration range
            if (segment_length >= min_segment_length_ms) and (segment_length <= max_segment_length_ms):
                # Generate the output filename based on the original filename
                output_file = os.path.join(input_directory, f"{base_filename}_segment{segment_count}.mp3")
                segment.export(output_file, format="mp3")
                segment_count += 1

            # Update the start point for the next segment
            start_ms = end

    # Export the last segment if there's any audio remaining
    if start_ms < len(audio):
        remaining_segment = audio[start_ms:]
        if len(remaining_segment) >= min_segment_length_ms:
            output_file = os.path.join(input_directory, f"{base_filename}_segment{segment_count}.mp3")
            remaining_segment.export(output_file, format="mp3")

# Directory containing the input audio files (change this to your directory)
input_audio_directory = "data/blackstork"

# Minimum silence length to consider as a gap (in milliseconds)
min_silence_length_ms = 10

# Minimum and maximum segment lengths (in milliseconds)
min_segment_length_ms = 2000  # 2 seconds
max_segment_length_ms = 5000  # 5 seconds

# Loop through all audio files in the directory and split them
for input_file in os.listdir(input_audio_directory):
    if input_file.endswith('.mp3'):
        input_path = os.path.join(input_audio_directory, input_file)
        split_audio(input_path, min_silence_length_ms, min_segment_length_ms, max_segment_length_ms)
