<a href="https://colab.research.google.com/github/noahdanieldsouza/PAM-classification/blob/main/audio_cut.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install Libraries { vertical-output: true }
pip install pydub librosa numpy scipy


In [None]:
#@title Imports { vertical-output: true }
import os
import xml.etree.ElementTree as ET
from pydub import AudioSegment
import numpy as np
import librosa
import soundfile as sf
from datetime import datetime, timedelta

In [None]:
#@title Configuration { vertical-output: true }
WAV_DIR = '' #@param {type:'string'}
OUTPUT_DIR = '' #@param {type:'string'}
EVENT_THRESHOLD_DB = -30  #@param {type:'int'} # in dB, adjust based on your noise level
MIN_SILENCE_DURATION = 0.3 #@param {type:'float'} # seconds
CLIP_LENGTH = 5  #@param {type:'int'} second

os.makedirs(OUTPUT_DIR, exist_ok=True)



In [None]:
#@title Functions { vertical-output: true }
#parse xml timestamps
def parse_xml_time(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    start_str = root.findtext('StartTime')  # modify if tag name differs
    return datetime.fromisoformat(start_str)

#convert decibels to amplitude
def db_to_amplitude_ratio(db):
    return 10 ** (db / 20)

#detect audio events
def detect_events(audio, sr, threshold_db=-30):
    # Convert to mono
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)

    frame_length = int(sr * 0.1)  # 100ms
    hop_length = int(sr * 0.05)
    rms = librosa.feature.rms(audio, frame_length=frame_length, hop_length=hop_length)[0]
    times = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop_length)

    threshold = db_to_amplitude_ratio(threshold_db)
    is_loud = rms > threshold

    events = []
    start = None
    for i, loud in enumerate(is_loud):
        if loud and start is None:
            start = times[i]
        elif not loud and start is not None:
            end = times[i]
            if end - start > MIN_SILENCE_DURATION:
                events.append((start, end))
            start = None
    if start is not None:
        events.append((start, times[-1]))
    return events

#create new xml for the five second clip
def write_xml_for_clip(base_datetime, clip_start, output_path):
    clip_time = base_datetime + timedelta(seconds=clip_start)
    root = ET.Element("ClipMetadata")
    start_elem = ET.SubElement(root, "StartTime")
    start_elem.text = clip_time.isoformat()
    tree = ET.ElementTree(root)
    tree.write(output_path)

