In [1]:
import torch
from datetime import datetime, timezone
from utils.transcript_audio import WhisperTranscriber, transcribe_catalog
from utils.process_audio import build_audio_catalog, vad_split_segments, pyannote_vad_segments

base_dir = "/store/kruu/atc_muac/audio_sdrplay"

start = datetime(2025, 8, 25, 14, 0, 0, tzinfo=timezone.utc)
stop  = datetime(2025, 8, 25, 17, 0, 0, tzinfo=timezone.utc)

catalog = build_audio_catalog(
    base_dir,
    sectors=["delta_low", "delta_mid", "delta_high"],
    start=start, 
    stop=stop,
    max_workers=128,         
    show_progress=True,  
)
print(len(catalog), "files downloaded")

Loading audio: 100%|██████████| 10/10 [00:38<00:00,  3.84s/file]

10 files downloaded





In [None]:
# import os

# test_vad_webrtc = vad_split_segments(catalog[0][1],
#                               target_sr=16000, 
#                               frame_ms=20,               
#                               aggressiveness=0,          
#                               min_speech_ms=300,         # if very short segments are noise: 400/600
#                               min_silence_ms=400,        # if segments are choppy: 300/400
#                               pre_pad_ms=200,            # if segments are choppy: 200
#                               post_pad_ms=150,           
#                               normalize_dbfs=-20.0,      
#                               use_compressor=True) 

# test_vad_pyannote =  pyannote_vad_segments(catalog[0][1],
#                                            model_id="pyannote/voice-activity-detection",
#                                            hf_token=os.getenv("HUGGINGFACE_TOKEN"),
#                                            target_sr=16000,                    
#                                            min_speech_ms=300,        
#                                            min_silence_ms=400,       
#                                            pre_pad_ms=200,            
#                                            post_pad_ms=150,           
#                                            normalize_dbfs=-20.0,      
#                                            use_compressor=True)   

In [2]:
transcriber = WhisperTranscriber(
    model_name="jacktol/whisper-medium.en-fine-tuned-for-ATC",
    device=None,
    dtype=torch.float16,
    )

transcripts = transcribe_catalog(
    catalog,
    segmentation_method=vad_split_segments,  
    transcriber=transcriber,                
    verbose=False                      
)

Transcribing:   0%|          | 0/10 [00:00<?, ?audio chunk/s]

In [3]:
from shapely.geometry import Polygon as ShapelyPolygon
from utils.transcript_audio import extract_adsb

# Build polygon and bbox
delta_coords = [
    (53.454167, 3.606111),
    (52.733333, 5.583333),
    (52.663333, 7.168889),
    (51.193611, 5.521389),
    (51.607778, 3.171944),
    (51.480556, 3.171944),
    (51.636944, 2.500000),
    (51.455556, 2.500000),
    (51.500000, 2.000000),
    (51.950556, 2.356389),
]
delta_geom = ShapelyPolygon([(lon, lat) for lat, lon in delta_coords])
bbox = delta_geom.bounds  # (W,S,E,N)

adsb_traf = extract_adsb(
    start=start,
    stop=stop,
    bbox = bbox,           # or bbox=bbox
    chunk_minutes=60,             # 1-hour chunks
    min_baroalt_m=10000,          # ~FL330
)

RUNNING: : 91.4% [00:05, 15.6%/s]
DOWNLOAD: 117klines [00:06, 16.8klines/s]
RUNNING: : 80.6% [00:18, 4.31%/s]
DOWNLOAD: 123klines [00:11, 10.4klines/s]
RUNNING: : 75.0% [00:07, 9.77%/s]
DOWNLOAD: 108klines [00:06, 16.6klines/s]


In [4]:
from utils.cs_matching import build_timestamp_range, closest_callsign_at_time, merge_callsign_entities
from utils.transcript_audio import extract_callsign_communications
adsb_ranges = build_timestamp_range(adsb_traf)

callsign_communications, df = extract_callsign_communications(
    transcripts,
    adsb_traf=adsb_traf,
    adsb_ranges=adsb_ranges,
    closest_callsign_at_time=closest_callsign_at_time,
    merge_callsign_entities=merge_callsign_entities,  # or None
    batch_size=64,
    match_threshold=0.7,
    time_tolerance_s=60,
    include_unmatched=True,
    progress=True,
    return_df=True,
)

Device set to use cpu


NER (callsigns):   0%|          | 0/19 [00:00<?, ?batch/s]

In [10]:
df.query("matched == True")

Unnamed: 0,callsign,timestamp,sentence,detected,score,matched
0,EZY86AD,2025-08-25 14:01:04+00:00,hotel kilo easy eight six alfa delta flight le...,hotel kilo easy eight six alfa delta,0.78,True
1,EZY86AD,2025-08-25 14:01:09+00:00,easy eight six alfa delta good day identified,easy eight six alfa delta,1.00,True
2,UAE73J,2025-08-25 14:03:35+00:00,emirates seven three juliett contact munich on...,emirates seven three juliett,1.00,True
4,UAE73J,2025-08-25 14:03:46+00:00,three three two one five emirates seven three ...,three three two one five emirates seven three ...,0.90,True
7,RYR2901,2025-08-25 14:04:21+00:00,radar very good afternoon ryan air two nine ze...,ryan air two nine zero one,1.00,True
...,...,...,...,...,...,...
913,NSZ5372,2025-08-25 17:00:04+00:00,right north five three seven two hello radar c...,five three seven two,0.75,True
914,DLH1KC,2025-08-25 17:00:09+00:00,praha radar channex five uniform whiskey good ...,lufthansa,0.75,True
916,EXS741K,2025-08-25 17:00:21+00:00,continue present heading channex nine eight fo...,channex nine eight four,0.74,True
917,RYR35ER,2025-08-25 17:00:35+00:00,ryan air three five echo romeo descend flight ...,ryan air three five echo romeo,1.00,True


In [15]:
df.query("score == 1").groupby("callsign").count()

Unnamed: 0_level_0,timestamp,sentence,detected,score,matched
callsign,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACA843,1,1,1,1,1
AFR15HA,1,1,1,1,1
AFR86RK,1,1,1,1,1
CCA631,4,4,4,4,4
CSN673,1,1,1,1,1
...,...,...,...,...,...
UAL75,1,1,1,1,1
UAL927,1,1,1,1,1
UAL933,1,1,1,1,1
WZZ23TX,1,1,1,1,1


In [17]:
df.query("callsign == 'CCA631'")

Unnamed: 0,callsign,timestamp,sentence,detected,score,matched
783,CCA631,2025-08-25 16:46:32+00:00,martinair con turn right turn six three one ma...,air china six three one,1.0,True
809,CCA631,2025-08-25 16:54:12+00:00,air china six three one turn left heading thre...,air china six three one,1.0,True
812,CCA631,2025-08-25 16:54:25+00:00,departure air china six three one,china six three one,0.8,True
832,CCA631,2025-08-25 16:58:44+00:00,air china six three one descend flight level t...,air china six three one,1.0,True
837,CCA631,2025-08-25 16:59:40+00:00,air china six three one resume own navigation ...,air china six three one,1.0,True
838,CCA631,2025-08-25 16:59:45+00:00,direct to golf zone two nine zero by golf air ...,china six two one,0.78,True
