In [5]:
import xml.etree.ElementTree as ET
from pydub import AudioSegment
from IPython.display import Audio, display
from io import BytesIO


def parse_trs(filepath):
    tree = ET.parse(filepath)
    root = tree.getroot()

    segments = []
    for turn in root.iter("Turn"):
        speaker = turn.attrib.get("speaker", "unknown")
        start = float(turn.attrib["startTime"])
        end = float(turn.attrib["endTime"])
        text = "".join(turn.itertext()).strip().replace("\n", " ")
        if text:
            segments.append((speaker, start, end, text))
    return segments


def play_segments(wav_path, segments, max_play=5):
    audio = AudioSegment.from_wav(wav_path)
    for i, (speaker, start, end, text) in enumerate(segments[:max_play]):
        print(f"[{i+1}] Speaker: {speaker} | {start:.2f}s - {end:.2f}s")
        print(f"Transcript: {text}")

        segment = audio[start * 1000 : end * 1000]

        buffer = BytesIO()
        segment.export(buffer, format="wav")
        buffer.seek(0)

        display(Audio(data=buffer.read(), rate=16000))
        print("-" * 60)


trs_path = "ZCU_CZ_ATC\ACCU-0CUxlz.trs"
wav_path = "ZCU_CZ_ATC\ACCU-0CUxlz.wav"

segments = parse_trs(trs_path)
play_segments(wav_path, segments, max_play=10)

[1] Speaker: unknown | 0.00s - 22.53s
Transcript: ..  [ground]Lufthansa 7 3 9 Praha  ..  [air]Lufthansa 7 3 9 [unintelligible] radio descending down level 3 5 0  ..  [ground]good afternoon Lufthansa 7 3 9 Praha radar contact descend FL 3 0 0 level by RAPET  ..  [air]Lufthansa 7 3 9 descending FL 3 0 0 level [unintelligible]  ..


  trs_path = "ZCU_CZ_ATC\ACCU-0CUxlz.trs"
  wav_path = "ZCU_CZ_ATC\ACCU-0CUxlz.wav"


------------------------------------------------------------


In [6]:
import os
from pydub import AudioSegment

def get_total_duration_in_hours(folder_path):
    total_duration_ms = 0

    for filename in os.listdir(folder_path):
        if filename.endswith(".wav"):
            file_path = os.path.join(folder_path, filename)
            audio = AudioSegment.from_wav(file_path)
            total_duration_ms += len(audio)  

    total_hours = total_duration_ms / (1000 * 60 * 60)
    return total_hours


folder = "ZCU_CZ_ATC"
hours = get_total_duration_in_hours(folder)
print(f"Tổng thời lượng: {hours:.2f} giờ")


Tổng thời lượng: 20.58 giờ
