In [1]:
import os
from datetime import datetime, timedelta
from pathlib import Path
from utils.atc_pipeline import RecordingSession, download_archive_audio, download_adsb, split_audio_by_vad, transcribe_audio 
from utils.liveatc_utils import get_stations

In [2]:
stations = get_stations("epwa")

for station in stations:
    print(f"[{station['identifier']}] - {station['title']}")

    for freq in station['frequencies']:
      print(f"\t{freq['title']} - {freq['frequency']}")

[epwa_app] - EPWA Approach #1
	Warsaw Approach - 128.805
[epwa_app2] - EPWA Approach #2
	Warsaw Approach - 125.055
[epwa_gnd] - EPWA Ground
	Warsaw Ground - 121.900
[epwa_twr2] - EPWA Tower
	Warsaw Tower - 118.300


In [3]:
# Get all stations at Warsaw airport

# icao = "EPWA"
# start = datetime(2025, 6, 15, 10, 0)
# duration = timedelta(minutes=30)

# recordings = []
# for station in get_stations(icao):
#     if station["up"]:
#         session = RecordingSession(
#             airport=icao,
#             channel=station["identifier"],
#             start_time=start,
#             end_time=start + duration,
#             audio_path=Path()
#         )
#         recordings.append(session)

In [None]:
session = RecordingSession(
    airport="EPWA",
    channel="epwa_app",  # used in the `m=` param
    start_time=datetime(2025, 6, 23, 10, 0),
    end_time=datetime(2025, 6, 23, 10, 30),
    audio_path=Path()  # placeholder
)

In [5]:
session = download_archive_audio(session)

[audio] Using cached file: audio/EPWA-App-Jun-23-2025-1000Z.mp3


In [6]:
session = download_adsb(session)

RUNNING: : 43.3% [00:10, 3.97%/s]
DOWNLOAD: 122klines [00:15, 7.81klines/s]


[adsb] Saved 86 records to adsb/EPWA-epwa_app-Jun-23-2025-1000Z.parquet


In [9]:
session.adsb_data.data.callsign.unique()

<ArrowExtensionArray>
[ 'UAE98X',  'WZZ495', 'TVP7304',  'LOT6KG',  'LOT171', 'LOT6529',  'QTR51V',
  'LOT8WZ',  'LOT7LK', 'WZZ8241', 'RYR106B',   'LOT5N',  'LOT7VJ',  'LOT266',
 'RYR38HV',  'LOT27M',  'LOT3YM',  'LOT346', 'WZZ27MF', 'CAI63DE',  'MGH871',
  'LOT2LP', 'ENT7551',  'LOT437', 'ENT76WP',  'THY4KJ',  'LOT4CA', 'ENT17GM',
 'ENT4088', 'FIN1143',  'LOT252', 'ENT76DJ',  'LOT425',  'PLF101', 'ENT4095',
  'LOT3KY',   'LOT6E',  'LOT2KT',  'JDI85H',  'LOT5TA',  'LOT2XZ',  'LOT3FG',
    'LOT4', 'WZZ37DB', 'WZZ38KV', 'SWR462F',  'LOT1AN', 'WZZ6442',  'LOT4LV',
  'LOT454', 'WZZ17JZ', 'ENT1343',  'LOT431',  'LOT6AT',  'LOT318', 'LOT6358',
 'TVP7465',  'LOT2CH',  'LOT7HL',  'LOT3MH', 'ENT71PC', 'LOT3945',  'LOT48F',
 'TAP120Y',  'FHY752',  'FDB1WA', 'WZZ68NP',  'LOT3EJ',  'LOT7CR',  'LOT192',
  'LOT282',  'LOT6MJ', 'BAW84NT', 'ENT4215',   'GCK40', 'LOT6126', 'WZZ1919',
  'DLH4LK',  'WZZ1LK',  'LOT3VC',  'LOT3LN',  'SEH4MP',   'MOCNY',  'LOT4NJ',
 'LOT3908', 'TVP7405']
Length: 86, dtype: 

In [11]:
session.adsb_data.data.icao24.unique()

<ArrowExtensionArray>
['896309', '4d200e', '49d287', '48ad0e', '48af19', '48af07', '06a0b1',
 '489789', '48ad08', '471f32', '4cac53', '495278', '489787', '48ada9',
 '4cac7f', '48ada1', '48ad19', '48ad0d', '4d2425', '4bb562', '4bb4e7',
 '48af0c', '4892c1', '48af02', '4891b2', '4baa87', '48ae83', '4891ad',
 '489227', '461e21', '48ad0b', '4892c0', '48ae82', '48d981', '4892c7',
 '48adac', '48ae22', '48adb9', '48d48d', '48ad0f', '48ad82', '48ad85',
 '48ae23', '4d2448', '4d24d6', '4b028a', '48ac80', '4d2529', '48ac82',
 '48ac81', '471fae', '4892c2', '48ae80', '48af10', '48ae81', '48af08',
 '48cb15', '48adae', '48ada3', '48ada5', '4892c8', '489788', '48af05',
 '4952cf', '4b9910', '896555', '4d242b', '48ada6', '48ad80', '48af12',
 '48af01', '48ad00', '40624f', '505cd8', '4408d8', '48af0b', '471db4',
 '3c6598', '471fa1', '48ad03', '48ad0c', '468e45', '43ea72', '48ad01',
 '48ada7', '49d3d3']
Length: 86, dtype: string[pyarrow]

In [12]:
session = transcribe_audio(session)

[vad] Saved chunk 1: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_000.wav
[vad] Saved chunk 2: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_001.wav
[vad] Saved chunk 3: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_002.wav
[vad] Saved chunk 4: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_003.wav
[vad] Saved chunk 5: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_004.wav
[vad] Saved chunk 6: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_005.wav
[vad] Saved chunk 7: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_006.wav
[vad] Saved chunk 8: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_007.wav
[vad] Saved chunk 9: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_008.wav
[vad] Saved chunk 10: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_009.wav
[vad] Saved chunk 11: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_010.wav
[vad] Saved chunk 12: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_011.wav
[vad] Saved chunk 13: chunks/EPWA-epwa_app-Jun-23-2025-1000Z/speech_012.wav
[vad] Saved chunk 14:

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


[transcribe] Chunk 1/219: [10:00:02] thousand looking for traffic lot five tango alfa
[transcribe] Chunk 2/219: [10:00:13] ruzyne three seven delta bravo contact radar one two five zero five five
[transcribe] Chunk 3/219: [10:00:19] one two five zero five five wizz air three seven delta bravo
[transcribe] Chunk 4/219: [10:00:24] approach czech air force zero eight eight descending seven thousand feet
[transcribe] Chunk 5/219: [10:00:30] tango four zero eight eight qality descend seven thousand qnh one zero one zero traffic below
[transcribe] Chunk 6/219: [10:00:34] descending seven thousand qnh one zero one zero copy that turkish four zero eight eight
[transcribe] Chunk 7/219: [10:01:22] and air force zero eight eight descend altitude five thousand feet
[transcribe] Chunk 8/219: [10:01:24] descending five thousand and turn four zero eight eight
[transcribe] Chunk 9/219: [10:01:47] six five tango alfa for contact approach one two five decimal zero five
[transcribe] Chunk 10/219: [10:01: