<a href="https://colab.research.google.com/github/noahdanieldsouza/PAM-classification/blob/main/audio_cut_frequency.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Install Libraries { vertical-output: true }
!pip install pydub librosa numpy scipy



In [4]:
#@title Imports { vertical-output: true }
import os
import xml.etree.ElementTree as ET
from pydub import AudioSegment
import numpy as np
import librosa
import soundfile as sf
from datetime import datetime, timedelta

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# ==== Configuration ====
WAV_PATH = '/content/drive/MyDrive/lake-sounds/8567.240831110000.wav'
OUTPUT_CLIP_DIR = '/content/drive/MyDrive/lake-sounds/clips'
CLIP_LENGTH_SECONDS = 5.0

# Spectral thresholds (tune as needed)
BOAT_CENTROID_MIN = 9000  #@param {type:'int'} # minimum frequency of boat noise
BOAT_CENTROID_MAX = 12000  #@param {type:'int'} # maximum frequency of boat noise
BOAT_BW_MIN       = 11000 #@param {type:'int'} # minimum bandwidth of boat noise
BOAT_BW_MAX       = 12500  #@param {type:'int'} # maximum bandwidth of boat noise
CHUNK_DURATION = 1.0  # seconds  #@param {type:'int'} #length of audio analyzed
MIN_EVENT_DURATION = 0.75  # seconds
OVERLAP = 0.5
SAMPLE_RATE = None  # use original

Mounted at /content/drive


In [2]:
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta

# Load base time from XML
def parse_xml_time(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    for proc_event in root.findall(".//PROC_EVENT"):
        wav_handler = proc_event.find("WavFileHandler")
        if wav_handler is not None:
            start_time_str = wav_handler.attrib.get("SamplingStartTimeLocal")
            if start_time_str:
                return datetime.fromisoformat(start_time_str)
    raise ValueError("SamplingStartTimeLocal not found")

# Write XML file for clip
def write_xml_for_clip(base_datetime, clip_start, output_path):
    clip_time = base_datetime + timedelta(seconds=clip_start)
    clip_time_str = clip_time.isoformat()
    root = ET.Element("ST")
    proc_event = ET.SubElement(root, "PROC_EVENT", attrib={"ID": "4"})
    ET.SubElement(proc_event, "WavFileHandler", attrib={
        "SamplingStartTimeLocal": clip_time_str
    })
    tree = ET.ElementTree(root)
    tree.write(output_path)


In [5]:
os.makedirs(OUTPUT_CLIP_DIR, exist_ok=True)
# ==== Load audio ====
y, sr = librosa.load(WAV_PATH, sr=SAMPLE_RATE)
duration_sec = librosa.get_duration(y=y, sr=sr)
samples_per_chunk = int(CHUNK_DURATION * sr)

print(f"Loaded audio: {WAV_PATH}")
print(f"Duration: {duration_sec/60:.2f} minutes, Sample rate: {sr}, Total samples: {len(y)}")

# ==== Event Detection ====
event_times = []
in_event = False
event_start = None

hop = int((CHUNK_DURATION - OVERLAP) * sr)

for i in range(0, len(y) - samples_per_chunk, hop):
    chunk = y[i:i+samples_per_chunk]
    if len(chunk) < samples_per_chunk:
        break

    centroid = librosa.feature.spectral_centroid(y=chunk, sr=sr).mean()
    bandwidth = librosa.feature.spectral_bandwidth(y=chunk, sr=sr).mean()

    t_start = i / sr
    t_end = (i + samples_per_chunk) / sr

    is_boat = (BOAT_CENTROID_MIN  <= centroid <= BOAT_CENTROID_MAX) \
           and (BOAT_BW_MIN       <= bandwidth <= BOAT_BW_MAX)

    if is_boat:
      print(f"[DETECTED] {t_start:.2f}s → Centroid: {centroid:.2f} Hz, Bandwidth: {bandwidth:.2f} Hz")


    if not is_boat:
        if not in_event:
            event_start = t_start
            in_event = True
    else:
        if in_event:
            if t_end - event_start >= MIN_EVENT_DURATION:
                event_times.append((event_start, t_end))
            in_event = False

if in_event:
    if duration_sec - event_start >= MIN_EVENT_DURATION:
        event_times.append((event_start, duration_sec))

print(f"Detected {len(event_times)} events")

# ==== Clip Saving Function ====
def save_clip(y, sr, start_sec, clip_index):
    clip_start = int(start_sec * sr)
    clip_end = clip_start + int(CLIP_LENGTH_SECONDS * sr)

    if clip_end > len(y):
        clip_end = len(y)

    clip = y[clip_start:clip_end]

    timestamp = str(timedelta(seconds=int(start_sec))).replace(":", "-")
    filename = f"clip_{clip_index:03}_{timestamp}.wav"
    out_path = os.path.join(OUTPUT_CLIP_DIR, filename)

    sf.write(out_path, clip, sr)
    return out_path

# ==== Extract & Save Clips ====
saved_clips = []
base_xml_path = WAV_PATH.replace(".wav", ".log.xml")
base_datetime = parse_xml_time(base_xml_path)
for idx, (start_sec, end_sec) in enumerate(event_times):
    center_time = (start_sec + end_sec) / 2.0
    clip_start = max(0, center_time - CLIP_LENGTH_SECONDS / 2.0)
    clip_path = save_clip(y, sr, clip_start, idx)

    # XML output path
    clip_filename = os.path.basename(clip_path)
    xml_filename = clip_filename.replace(".wav", ".xml")
    xml_path = os.path.join(OUTPUT_CLIP_DIR, xml_filename)

    # Write XML
    write_xml_for_clip(base_datetime, clip_start, xml_path)

    saved_clips.append((clip_path, xml_path, clip_start))

print(f"Saved {len(saved_clips)} 5-second clips and XMLs to {OUTPUT_CLIP_DIR}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[DETECTED] 1189.50s → Centroid: 11841.11 Hz, Bandwidth: 11527.79 Hz
[DETECTED] 1190.00s → Centroid: 11366.92 Hz, Bandwidth: 11425.74 Hz
[DETECTED] 1190.50s → Centroid: 11406.97 Hz, Bandwidth: 11409.60 Hz
[DETECTED] 1191.00s → Centroid: 11855.14 Hz, Bandwidth: 11409.21 Hz
[DETECTED] 1192.00s → Centroid: 11490.66 Hz, Bandwidth: 11458.96 Hz
[DETECTED] 1192.50s → Centroid: 11518.23 Hz, Bandwidth: 11451.66 Hz
[DETECTED] 1193.50s → Centroid: 11796.61 Hz, Bandwidth: 11457.00 Hz
[DETECTED] 1194.00s → Centroid: 11599.15 Hz, Bandwidth: 11464.42 Hz
[DETECTED] 1195.00s → Centroid: 11201.59 Hz, Bandwidth: 11319.58 Hz
[DETECTED] 1195.50s → Centroid: 11107.11 Hz, Bandwidth: 11552.62 Hz
[DETECTED] 1196.00s → Centroid: 11930.75 Hz, Bandwidth: 11677.16 Hz
[DETECTED] 1197.50s → Centroid: 11936.58 Hz, Bandwidth: 11745.76 Hz
[DETECTED] 1199.00s → Centroid: 11849.34 Hz, Bandwidth: 11716.76 Hz
[DETECTED] 1200.00s → Centroid: 11953.40 Hz, Bandwi

In [4]:
# --- XML time extraction ---
def parse_xml_time(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    for proc_event in root.findall(".//PROC_EVENT"):
        wav_handler = proc_event.find("WavFileHandler")
        if wav_handler is not None:
            time_str = wav_handler.attrib.get("SamplingStartTimeLocal")
            if time_str:
                return datetime.fromisoformat(time_str)
    raise ValueError(f"Time not found in {xml_path}")

def write_xml_for_clip(base_datetime, clip_start, output_path):
    clip_time = base_datetime + timedelta(seconds=clip_start)
    root = ET.Element("ST")
    proc_event = ET.SubElement(root, "PROC_EVENT", attrib={"ID": "4"})
    ET.SubElement(proc_event, "WavFileHandler", attrib={
        "SamplingStartTimeLocal": clip_time.isoformat()
    })
    tree = ET.ElementTree(root)
    tree.write(output_path)

# --- Spectral-based detector ---
def detect_events_by_spectral_features(audio, sr):
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)

    frame_len = int(sr * 0.1)
    hop_len = int(sr * 0.05)

    centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=frame_len, hop_length=hop_len)[0]
    bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=frame_len, hop_length=hop_len)[0]

    times = librosa.frames_to_time(np.arange(len(centroid)), sr=sr, hop_length=hop_len)

    is_event = (centroid > CENTROID_THRESHOLD_HZ) & (bandwidth > BANDWIDTH_THRESHOLD_HZ)

    events = []
    start = None
    for i, val in enumerate(is_event):
        if val and start is None:
            start = times[i]
        elif not val and start is not None:
            end = times[i]
            if end - start > MIN_SILENCE_DURATION:
                events.append((start, end))
            start = None
    if start is not None:
        events.append((start, times[-1]))

    return events

In [5]:
def process_file(wav_path, xml_path, chunk_duration=60, overlap=5):
    print(f"\n▶️ Processing {os.path.basename(wav_path)}")
    base_time = parse_xml_time(xml_path)

    with sf.SoundFile(wav_path) as f:
        sr = f.samplerate
        total_samples = len(f)
        chunk_samples = int(chunk_duration * sr)
        hop_samples = chunk_samples - int(overlap * sr)

    events_all = []

    with sf.SoundFile(wav_path) as f:
        for start_sample in range(0, total_samples, hop_samples):
            f.seek(start_sample)
            samples_to_read = min(chunk_samples, total_samples - start_sample)
            chunk = f.read(samples_to_read)
            if chunk.ndim > 1:
                chunk = np.mean(chunk, axis=1)

            events = detect_events_by_spectral_features(chunk, sr)

            for start, end in events:
                abs_start = (start_sample / sr) + start
                abs_end = (start_sample / sr) + end

                if not events_all or abs_start > events_all[-1][1]:
                    events_all.append([abs_start, abs_end])
                else:
                    events_all[-1][1] = max(events_all[-1][1], abs_end)

    print(f"✅ {len(events_all)} spectral events found")

    audio_seg = AudioSegment.from_file(wav_path)
    base_name = os.path.splitext(os.path.basename(wav_path))[0]

    for i, (start, end) in enumerate(events_all):
        duration = end - start
        n_clips = int(np.ceil(duration / CLIP_LENGTH))
        for j in range(n_clips):
            clip_start = start + j * CLIP_LENGTH
            clip_ms = int(clip_start * 1000)
            clip = audio_seg[clip_ms: clip_ms + CLIP_LENGTH * 1000]

            suffix = f"{base_name}_spec{i+1}_{j+1}of{n_clips}"
            wav_out = os.path.join(OUTPUT_DIR, f"{suffix}.wav")
            xml_out = os.path.join(OUTPUT_DIR, f"{suffix}.xml")

            clip.export(wav_out, format="wav")
            write_xml_for_clip(base_time, clip_start, xml_out)

# --- Main ---
for fname in os.listdir(WAV_DIR):
    if fname.endswith(".wav"):
        wav_file = os.path.join(WAV_DIR, fname)
        xml_file = os.path.join(WAV_DIR, fname.replace(".wav", ".log.xml"))
        if os.path.exists(xml_file):
            process_file(wav_file, xml_file)


▶️ Processing 8567.240831110000.wav
✅ 1 spectral events found
