From 72ee1f11fcaba68da7a8fd381d196bad0793c0e3 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Mon, 17 Nov 2025 15:05:46 -0800 Subject: [PATCH 01/15] initial refactor from KelseyCreekSoftware --- pyproject.toml | 1 + sigmf/apps/convert_blue.py | 707 +++++++++++++++++++++++++++++++++++++ sigmf/apps/convert_wav.py | 8 +- sigmf/error.py | 4 + tests/test_convert.py | 37 ++ 5 files changed, 751 insertions(+), 6 deletions(-) create mode 100644 sigmf/apps/convert_blue.py create mode 100644 tests/test_convert.py diff --git a/pyproject.toml b/pyproject.toml index cac61a3..768e518 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ [project.scripts] sigmf_validate = "sigmf.validate:main" sigmf_convert_wav = "sigmf.apps.convert_wav:main [apps]" + sigmf_convert_blue = "sigmf.apps.convert_blue:main [apps]" [project.optional-dependencies] test = [ "pylint", diff --git a/sigmf/apps/convert_blue.py b/sigmf/apps/convert_blue.py new file mode 100644 index 0000000..7d77ab7 --- /dev/null +++ b/sigmf/apps/convert_blue.py @@ -0,0 +1,707 @@ +# Copyright: Multiple Authors +# +# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +""" +Blue File converter with HCB and Extended Header Parsing +This script reads and parses the HCB (Header Control Block) and extended header keywords from a Blue file format. +It supports different file types and extracts metadata accordingly. +Converts the extracted metadata into SigMF format. +""" + +import argparse +import hashlib +import json +import logging +import os +import struct +from datetime import datetime, timezone + +import numpy as np + +from .. import __version__ as toolversion +from ..error import SigMFConversionError + +log = logging.getLogger() + +# fmt: off +HCB_LAYOUT = [ + # HCB field definitions: (name, offset, size, fmt, description) up to adjunct + ("version", 0, 4, "4s", "Header version"), + ("head_rep", 4, 4, "4s", "Header representation"), + ("data_rep", 8, 4, "4s", "Data representation"), + ("detached", 12, 4, "i", "Detached header"), + ("protected", 16, 4, "i", "Protected from overwrite"), + ("pipe", 20, 4, "i", "Pipe mode (N/A)"), + ("ext_start", 24, 4, "i", "Extended header start (512-byte blocks)"), + ("ext_size", 28, 4, "i", "Extended header size in bytes"), + ("data_start",32, 8, "d", "Data start in bytes"), + ("data_size", 40, 8, "d", "Data size in bytes"), + ("type", 48, 4, "i", "File type code"), + ("format", 52, 2, "2s", "Data format code"), + ("flagmask", 54, 2, "h", "16-bit flagmask"), + ("timecode", 56, 8, "d", "Time code field"), + ("inlet", 64, 2, "h", "Inlet owner"), + ("outlets", 66, 2, "h", "Number of outlets"), + ("outmask", 68, 4, "i", "Outlet async mask"), + ("pipeloc", 72, 4, "i", "Pipe location"), + ("pipesize", 76, 4, "i", "Pipe size in bytes"), + ("in_byte", 80, 8, "d", "Next input byte"), + ("out_byte", 88, 8, "d", "Next out byte (cumulative)"), + ("outbytes", 96, 64, "8d", "Next out byte (each outlet)"), + ("keylength", 160, 4, "i", "Length of keyword string"), + ("keywords", 164, 92, "92s", "User defined keyword string"), + # Adjunct starts at 256 +] +# fmt: on + +TYPE_MAP = { + # Extended header type map + "B": (np.int8, 1), + "I": (np.int16, 2), + "L": (np.int32, 4), + "X": (np.int64, 8), + "F": (np.float32, 4), + "D": (np.float64, 8), + "A": (np.dtype("S1"), 1), +} + + +HEADER_SIZE_BYTES = 512 +BLOCK_SIZE_BYTES = 512 + + +def detect_endian(data, layout, probe_fields=("data_size", "version")): + """ + Detect endianness of a Bluefile header. + + TODO: Look at this code and see if can be improved and possibly simplified. + + Parameters + ---------- + data : bytes + Raw header data. + layout : list of tuples + HCB layout definition (name, offset, size, fmt, desc). + probe_fields : tuple of str, optional + Field names to test for sanity checks. + + Returns + ------- + str + "<" for little-endian or ">" for big-endian. + """ + # TODO: handle both types of endianess 'EEEI' or IEEE and data rep and signal rep + endianness = data[8:12].decode("utf-8") + log.debug(f"endianness: {endianness}") + if endianness not in ("EEEI", "IEEE"): + raise SigMFConversionError(f"Unexpected endianness: {endianness}") + + for endian in ("<", ">"): + ok = True + for name, offset, size, fmt, desc in layout: + if name not in probe_fields: + continue + raw = data[offset : offset + size] + try: + val = struct.unpack(endian + fmt, raw)[0] + # sanity checks + MAX_DATA_SIZE_FACTOR = 100 + + if name == "data_size": + if val <= 0 or val > len(data) * MAX_DATA_SIZE_FACTOR: + ok = False + break + elif name == "version": + if not (0 < val < 10): # expect small version number + ok = False + break + except Exception: + ok = False + break + if ok: + return endian + # fallback + return "<" + + +def read_hcb(file_path): + """ + Read HCB fields and adjunct block from a Blue file. + + Parameters + ---------- + file_path : str + Path to the Blue file. + + Returns + ------- + dict + Parsed HCB fields and adjunct metadata. + """ + + hcb = {} + with open(file_path, "rb") as handle: + data = handle.read(HEADER_SIZE_BYTES) + endian = detect_endian(data, HCB_LAYOUT) + + # fixed fields + for name, offset, size, fmt, desc in HCB_LAYOUT: + raw = data[offset : offset + size] + try: + val = struct.unpack(endian + fmt, raw)[0] + except struct.error: + raise ValueError(f"Failed to unpack field {name} with endian {endian}") + if isinstance(val, bytes): + val = val.decode("ascii", errors="replace").strip("\x00 ") + hcb[name] = val + + # adjunct parsing + adjunct_offset_bytes = 256 + handle.seek(adjunct_offset_bytes) + if hcb["type"] in (1000, 1001): + hcb["adjunct"] = { + "xstart": struct.unpack(f"{endian}d", handle.read(8))[0], + "xdelta": struct.unpack(f"{endian}d", handle.read(8))[0], + "xunits": struct.unpack(f"{endian}i", handle.read(4))[0], + } + elif hcb["type"] == 2000: + hcb["adjunct"] = { + "xstart": struct.unpack(f"{endian}d", handle.read(8))[0], + "xdelta": struct.unpack(f"{endian}d", handle.read(8))[0], + "xunits": struct.unpack(f"{endian}i", handle.read(4))[0], + "subsize": struct.unpack(f"{endian}i", handle.read(4))[0], + "ystart": struct.unpack(f"{endian}d", handle.read(8))[0], + "ydelta": struct.unpack(f"{endian}d", handle.read(8))[0], + "yunits": struct.unpack(f"{endian}i", handle.read(4))[0], + } + else: + hcb["adjunct_raw"] = handle.read(adjunct_offset_bytes) + + return hcb + + +def parse_extended_header(file_path, hcb, endian="<"): + """ + Parse extended header keyword records. + + Parameters + ---------- + file_path : str + Path to the Bluefile. + hcb : dict + Header Control Block containing 'ext_size' and 'ext_start'. + endian : str, optional + Endianness ('<' for little-endian, '>' for big-endian). + + Returns + ------- + list of dict + List of dictionaries containing parsed records. + """ + if hcb["ext_size"] <= 0: + return [] + entries = [] + with open(file_path, "rb") as handle: + handle.seek(int(hcb["ext_start"]) * BLOCK_SIZE_BYTES) + bytes_remaining = int(hcb["ext_size"]) + while bytes_remaining > 0: + lkey = struct.unpack(f"{endian}i", handle.read(4))[0] + lext = struct.unpack(f"{endian}h", handle.read(2))[0] + ltag = struct.unpack(f"{endian}b", handle.read(1))[0] + type_char = handle.read(1).decode("ascii", errors="replace") + + dtype, bytes_per_element = TYPE_MAP.get(type_char, (np.dtype("S1"), 1)) + val_len = lkey - lext + val_count = val_len // bytes_per_element if bytes_per_element else 0 + + if type_char == "A": + raw = handle.read(val_len) + if len(raw) < val_len: + raise ValueError("Unexpected end of extended header") + value = raw.rstrip(b"\x00").decode("ascii", errors="replace") + else: + value = np.frombuffer(handle.read(val_len), dtype=dtype, count=val_count) + if value.size == 1: + value = value[0] + else: + value = value.tolist() + + tag = handle.read(ltag).decode("ascii", errors="replace") if ltag > 0 else "" + + total = 4 + 2 + 1 + 1 + val_len + ltag + pad = (8 - (total % 8)) % 8 + if pad: + handle.read(pad) + + entries.append({"tag": tag, "type": type_char, "value": value, "lkey": lkey, "lext": lext, "ltag": ltag}) + bytes_remaining -= lkey + + return entries + + +def parse_data_values(file_path, hcb, endianness): + """ + Parse key HCB values used for further processing. + + Parameters + ---------- + file_path : str + Path to the Blue file. + hcb : dict + Header Control Block dictionary. + endianness : str + Endianness ('<' for little-endian, '>' for big-endian). + + Returns + ------- + numpy.ndarray + Parsed samples. + """ + log.info("parsing blue file data values") + with open(file_path, "rb") as handle: + data = handle.read(HEADER_SIZE_BYTES) + if len(data) < HEADER_SIZE_BYTES: + raise ValueError("Incomplete header") + dtype = data[52:54].decode("utf-8") # eg 'CI', 'CF', 'SD' + log.debug(f"data type: {dtype}") + + + time_interval = np.frombuffer(data[264:272], dtype=np.float64)[0] + if time_interval <= 0: + raise ValueError(f"Invalid time interval: {time_interval}") + sample_rate_hz = 1 / time_interval + log.info(f"sample rate: {sample_rate_hz/1e6:.3f} MHz") + extended_header_data_size = int.from_bytes(data[28:32], byteorder="little") + file_size_bytes = os.path.getsize(file_path) + log.debug(f"file size: {file_size_bytes} bytes") + + # Determine destination path for SigMF data file + dest_path = file_path.rsplit(".", 1)[0] + + ### complex data parsing + + # complex 16-bit integer IQ data > ci16_le in SigMF + if dtype == "CI": + elem_size = np.dtype(np.int16).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + raw_samples = np.fromfile(file_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) + # reassemble interleaved IQ samples + samples = raw_samples[::2] + 1j * raw_samples[1::2] # convert to IQIQIQ... + # normalize samples to -1.0 to +1.0 range + samples = samples.astype(np.float32) / 32767.0 + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # complex 32-bit integer IQ data > ci32_le in SigMF + elif dtype == "CL": + elem_size = np.dtype(np.int32).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + raw_samples = np.fromfile(file_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) + # reassemble interleaved IQ samples + samples = raw_samples[::2] + 1j * raw_samples[1::2] # convert to IQIQIQ... + # normalize samples to -1.0 to +1.0 range + samples = samples.astype(np.float32) / 2147483647.0 + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # complex 32-bit float IQ data > cf32_le in SigMF + elif dtype == "CF": + # each complex sample is 8 bytes total (2 × float32), so np.complex64 is appropriate + # no need to reassemble IQ — already complex + elem_size = np.dtype(np.complex64).itemsize # will be 8 bytes + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.complex64, offset=HEADER_SIZE_BYTES, count=elem_count) + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + ### scalar data parsing + + # scalar data parsing > ri8_le in SigMF + elif dtype == "SB": + elem_size = np.dtype(np.int8).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.int8, offset=HEADER_SIZE_BYTES, count=elem_count) + # normalize samples to -1.0 to +1.0 range + samples = samples.astype(np.float32) / 127.0 + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # scalar data parsing > ri16_le in SigMF + elif dtype == "SI": + elem_size = np.dtype(np.int16).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) + # normalize samples to -1.0 to +1.0 range + samples = samples / 32767.0 + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # scalar data parsing > ri32_le in SigMF + elif dtype == "SL": + elem_size = np.dtype(np.int32).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) + # normalize samples to -1.0 to +1.0 range + samples = samples / 2147483647.0 + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # scalar data parsing > ri64_le in SigMF + elif dtype == "SX": + elem_size = np.dtype(np.int64).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.int64, offset=HEADER_SIZE_BYTES, count=elem_count) + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # scalar data parsing > rf32_le in SigMF + elif dtype == "SF": + elem_size = np.dtype(np.float32).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.float32, offset=HEADER_SIZE_BYTES, count=elem_count) + # save out as SigMF IQ data file + samples.tofile(f"{dest_path}.sigmf-data") + + # scalar data parsing > rf64_le in SigMF + elif dtype == "SD": + elem_size = np.dtype(np.float64).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size + samples = np.fromfile(file_path, dtype=np.float64, offset=HEADER_SIZE_BYTES, count=elem_count) + # save out as SigMF IQ data file + samples.astype(np.complex64).tofile(f"{dest_path}.sigmf-data") + else: + raise ValueError(f"Unsupported data type: {dtype}") + + # TODO: validate handling of scalar types - reshape per mathlab port shown here? + # return the IQ data if needed for further processing if needed + return samples + + +def blue_to_sigmf(hcb, ext_entries, file_path): + """ + Build a SigMF metadata dict from parsed Bluefile HCB and extended header. + + Parameters + ---------- + hcb : dict + Header Control Block from read_hcb(). + ext_entries : list of dict + Parsed extended header entries from parse_extended_header(). + file_path : str + Path to the original blue file. + + Returns + ------- + dict + SigMF metadata structure. + + Raises + ------ + ValueError + If required fields are missing or invalid. + """ + # helper to look up extended header values by tag + def get_tag(tag): + for entry in ext_entries: + if entry["tag"] == tag: + return entry["value"] + return None + + # s - scalar + # c - complex + # v - vector + # q - quad - TODO: pri 2 - add support for other types if they are commonly used. + # + # b: 8-bit integer + # i: 16-bit integer + # l: 32-bit integer + # x: 64-bit integer + # f: 32-bit float + # d: 64-bit float + + # global datatype object - little endian + datatype_map_le = { + "SB": "ri8_le", + "SI": "ri16_le", + "SL": "ri32_le", + "SX": "ri64_le", + "SF": "rf32_le", + "SD": "rf64_le", + "CB": "ci8_le", + "CI": "ci16_le", + "CL": "ci32_le", + "CX": "ci64_le", + "CF": "cf32_le", + "CD": "cf32_le", + } + + # global datatype object - big endian + datatype_map_be = { + "SB": "ri8_be", + "SI": "ri16_be", + "SL": "ri32_be", + "SX": "ri64_be", + "SF": "rf32_be", + "SD": "rf64_be", + "CB": "ci8_be", + "CI": "ci16_be", + "CL": "ci32_be", + "CX": "ci64_be", + "CF": "cf32_be", + "CD": "cf32_be", + } + + # header data representation: 'EEEI' or 'IEEE' (little or big data endianess representation) + header_rep = hcb.get("head_rep") + + # data_rep: 'EEEI' or 'IEEE' (little or big data endianess representation) + data_rep = hcb.get("data_rep") + + # data_format: for example 'CI' or 'SD' (data format code - real or complex, int or float) + data_format = hcb.get("format") + + if data_rep == "EEEI": # little endian + data_map = datatype_map_le.get(data_format) + elif data_rep == "IEEE": # big endian + data_map = datatype_map_be.get(data_format) + + datatype = data_map if data_map is not None else "unknown" + + log.info(f"determined SigMF datatype: {datatype} and data representation: {data_rep}") + + # sample rate: prefer adjunct.xdelta, else extended header SAMPLE_RATE + if "adjunct" in hcb and "xdelta" in hcb["adjunct"]: + sample_rate_hz = 1.0 / hcb["adjunct"]["xdelta"] + else: + sample_rate_tag = get_tag("SAMPLE_RATE") + sample_rate_hz = float(sample_rate_tag) if sample_rate_tag is not None else None + + # for now define static values. perhaps take as JSON input + hardware_description = "Blue File Conversion - Unknown Hardware" + blue_author = "Blue File Conversion - Unknown Author" + blue_license = "Blue File Conversion - Unknown License" + + if "outlets" in hcb and hcb["outlets"] > 0: + num_channels = int(hcb["outlets"]) + else: + num_channels = 1 + + # base global metadata + global_md = { + "core:author": blue_author, + "core:datatype": datatype, + "core:description": hcb.get("keywords", ""), + "core:hw": hardware_description, + "core:license": blue_license, + "core:num_channels": num_channels, + "core:sample_rate": sample_rate_hz, + "core:version": "1.0.0", + } + + for name, _, _, _, desc in HCB_LAYOUT: + value = hcb.get(name) # safe access + if value is None: + continue # or set a default + global_md[f"core:blue_hcb_{name}"] = value + + # merge adjunct fields + adjunct = hcb.get("adjunct", {}) + for key, value in adjunct.items(): + global_md[f"core:blue_adjunct_header_{key}"] = value + + # merge extended header fields + for entry in ext_entries: + name = entry.get("tag") + if name is None: + continue + key = f"core:blue_extended_header_{name}" + value = entry.get("value") + if hasattr(value, "item"): + value = value.item() + global_md[key] = value + + # convert the datetime object to an ISO 8601 formatted string + epoch_time_raw = int(hcb.get("timecode", 0)) + + # adjust for Bluefile POSIX epoch (1950 vs 1970) + bluefile_epoch_offset = 631152000 # seconds between 1950 and 1970 + epoch_time = epoch_time_raw - bluefile_epoch_offset + + dt_object_utc = datetime.fromtimestamp(epoch_time, tz=timezone.utc) + # format with milliseconds and Zulu suffix + iso_8601_string = dt_object_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + log.debug(f"epoch time: {epoch_time}") + log.info(f"ISO 8601 time: {iso_8601_string}") + + # captures array + captures = [ + { + "core:datetime": iso_8601_string, + "core:frequency": float(get_tag("RF_FREQ") or 0.0), + "core:sample_start": 0, + } + ] + + # compute SHA‑512 hash of data file + def compute_sha512(path, bufsize=1024 * 1024): + """Compute SHA-512 hash of a file in chunks.""" + hash_obj = hashlib.sha512() + with open(path, "rb") as handle: + chunk = handle.read(bufsize) + while chunk: + hash_obj.update(chunk) + chunk = handle.read(bufsize) + return hash_obj.hexdigest() + + # strip the extension from the original file path + base_file_name = os.path.splitext(file_path)[0] + + # build the .sigmf-data path + data_file_path = base_file_name + ".sigmf-data" + + # compute SHA-512 of the data file + data_sha512 = compute_sha512(data_file_path) # path to the .sigmf-data file + global_md["core:sha512"] = data_sha512 + + # annotations array + datatype_sizes_bytes = { + "ri8_le": 1, + "ri16_le": 2, + "ri32_le": 4, + "ci16_le": 4, + "ci32_le": 8, + "cf32_le": 8, + "rf32_le": 4, + "rf64_le": 8, + "ri64_le": 8, + "ri8_be": 1, + "ri16_be": 2, + "ri32_be": 4, + "ci16_be": 4, + "ci32_be": 8, + "cf32_be": 8, + "rf32_be": 4, + "rf64_be": 8, + "ri64_be": 8, + } + + # calculate sample count + data_size_bytes = int(hcb.get("data_size", 0)) + if datatype not in datatype_sizes_bytes: + raise ValueError(f"Unsupported datatype {datatype}") + bytes_per_sample = datatype_sizes_bytes[datatype] + sample_count = int(data_size_bytes // bytes_per_sample) + + rf_freq_hz = float(get_tag("RF_FREQ") or 0.0) + bandwidth_hz = float(get_tag("SBT_BANDWIDTH") or 0.0) + + annotations = [ + { + "core:sample_start": 0, + "core:sample_count": sample_count, + "core:freq_upper_edge": rf_freq_hz + bandwidth_hz, + "core:freq_lower_edge": rf_freq_hz, + "core:label": "Sceptere", + } + ] + + # final SigMF object + sigmf_metadata = { + "global": global_md, + "captures": captures, + "annotations": annotations, + } + + # write .sigmf-meta file + base_file_name = os.path.splitext(file_path)[0] + meta_path = base_file_name + ".sigmf-meta" + + with open(meta_path, "w") as meta_handle: + json.dump(sigmf_metadata, meta_handle, indent=2) + log.info(f"wrote SigMF metadata to {meta_path}") + + return sigmf_metadata + + +def blue_file_to_sigmf(file_path): + """ + Convert a MIDIS Bluefile to SigMF metadata and data. + + Parameters + ---------- + file_path : str + Path to the Blue file. + + Returns + ------- + numpy.ndarray + IQ Data. + """ + log.info("starting blue file processing") + + # read Header control block (HCB) from blue file to determine how to process the rest of the file + hcb = read_hcb(file_path) + + log.debug("Header Control Block (HCB) Fields") + for name, _, _, _, desc in HCB_LAYOUT: + log.debug(f"{name:10s}: {hcb[name]!r} # {desc}") + + log.debug("Adjunct Header") + log.debug(hcb.get("adjunct", hcb.get("adjunct_raw"))) + + # data_rep: 'EEEI' or 'IEEE' (little or big extended header endianness representation) + extended_header_endianness = hcb.get("head_rep") + + if extended_header_endianness == "EEEI": + ext_endianness = "<" # little-endian + elif extended_header_endianness == "IEEE": + ext_endianness = ">" # big-endian + else: + raise ValueError(f"Unknown head_rep value: {extended_header_endianness}") + + # parse extended header entries + ext_entries = parse_extended_header(file_path, hcb, ext_endianness) + log.debug("Extended Header Keywords") + for entry in ext_entries: + log.debug(f"{entry['tag']:20s}:{entry['value']}") + log.info(f"total extended header entries: {len(ext_entries)}") + + # data_rep: 'EEEI' or 'IEEE' (little or big data endianness representation) + data_rep_endianness = hcb.get("data_rep") + data_endianness = "<" if data_rep_endianness == "EEEI" else ">" + + # parse key data values + # iq_data will be available if needed for further processing. + try: + iq_data = parse_data_values(file_path, hcb, data_endianness) + except Exception as error: + raise RuntimeError(f"Failed to parse data values: {error}") from error + + # call the SigMF conversion for metadata generation + blue_to_sigmf(hcb, ext_entries, file_path) + + # return the IQ data if needed for further processing if needed + return iq_data + + +def main() -> None: + """ + Entry-point for sigmf_convert_blue + """ + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("input", type=str, help="Blue (cdif) file path") + parser.add_argument("-v", "--verbose", action="count", default=0) + parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") + args = parser.parse_args() + + level_lut = { + 0: logging.WARNING, + 1: logging.INFO, + 2: logging.DEBUG, + } + logging.basicConfig(level=level_lut[min(args.verbose, 2)]) + + blue_file_to_sigmf(args.input) diff --git a/sigmf/apps/convert_wav.py b/sigmf/apps/convert_wav.py index c2f1f2e..9a7ddeb 100755 --- a/sigmf/apps/convert_wav.py +++ b/sigmf/apps/convert_wav.py @@ -77,9 +77,9 @@ def convert_wav( def main() -> None: """ - entry-point for sigmf_convert_wav + Entry-point for sigmf_convert_wav """ - parser = argparse.ArgumentParser(description="Convert wav to sigmf archive.") + parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("input", type=str, help="wav path") parser.add_argument("--author", type=str, default=None, help=f"set {SigMFFile.AUTHOR_KEY} metadata") parser.add_argument("-v", "--verbose", action="count", default=0) @@ -97,7 +97,3 @@ def main() -> None: wav_path=args.input, author=args.author, ) - - -if __name__ == "__main__": - main() diff --git a/sigmf/error.py b/sigmf/error.py index 9f2564c..f4364bc 100644 --- a/sigmf/error.py +++ b/sigmf/error.py @@ -22,3 +22,7 @@ class SigMFAccessError(SigMFError): class SigMFFileError(SigMFError): """Exceptions related to reading or writing SigMF files or archives.""" + + +class SigMFConversionError(SigMFError): + """Exceptions related to converting to SigMF format.""" \ No newline at end of file diff --git a/tests/test_convert.py b/tests/test_convert.py new file mode 100644 index 0000000..64041ac --- /dev/null +++ b/tests/test_convert.py @@ -0,0 +1,37 @@ +# Copyright: Multiple Authors +# +# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +"""Tests for Converters""" + +import unittest +import os + +def _has_apps_dependencies(): + """Check if optional [apps] dependencies are available.""" + try: + import scipy.io.wavfile # noqa: F401 + return True + except ImportError: + return False + + +@unittest.skipUnless(_has_apps_dependencies(), "Optional [apps] dependencies not available") +class TestWAVConverter(unittest.TestCase): + def test_wav_to_sigmffile(self): + # Placeholder for actual test implementation + self.assertTrue(True) + + +@unittest.skipUnless(_has_apps_dependencies(), "Optional [apps] dependencies not available") +class TestBlueConverter(unittest.TestCase): + def setUp(self) -> None: + # skip test if environment variable not set + if not os.getenv("NONSIGMF_RECORDINGS_PATH"): + self.skipTest("NONSIGMF_RECORDINGS_PATH environment variable needed for Bluefile tests.") + + def test_blue_to_sigmffile(self): + # Placeholder for actual test implementation + self.assertTrue(True) \ No newline at end of file From 1139675cb47f1a65b6438a8eedecf2f3d5950f68 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Mon, 17 Nov 2025 15:21:52 -0800 Subject: [PATCH 02/15] add test for wavfile converter --- .gitignore | 1 + tests/test_convert.py | 51 ++++++++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 85bad5a..1aea56d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__/ *.swp *.py[cod] .cache +.vscode # packaging related dist/ diff --git a/tests/test_convert.py b/tests/test_convert.py index 64041ac..d284c4e 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -8,24 +8,49 @@ import unittest import os +import tempfile +from pathlib import Path +import numpy as np -def _has_apps_dependencies(): - """Check if optional [apps] dependencies are available.""" - try: - import scipy.io.wavfile # noqa: F401 - return True - except ImportError: - return False +try: + from scipy.io import wavfile + SCIPY_AVAILABLE = True +except ImportError: + SCIPY_AVAILABLE = False -@unittest.skipUnless(_has_apps_dependencies(), "Optional [apps] dependencies not available") +import sigmf +from sigmf.apps.convert_wav import convert_wav + + +@unittest.skipUnless(SCIPY_AVAILABLE, "scipy is required for WAV file tests") class TestWAVConverter(unittest.TestCase): - def test_wav_to_sigmffile(self): - # Placeholder for actual test implementation - self.assertTrue(True) + def setUp(self) -> None: + """create temp wav file for testing""" + if not SCIPY_AVAILABLE: + self.skipTest("scipy is required for WAV file tests") + self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.tmp_dir.name) + self.wav_path = self.tmp_path / "foo.wav" + samp_rate = 48000 + duration_s = 0.1 + ttt = np.linspace(0, duration_s, int(samp_rate * duration_s), endpoint=False) + freq = 440 # A4 note + self.audio_data = 0.5 * np.sin(2 * np.pi * freq * ttt) + wavfile.write(self.wav_path, samp_rate, self.audio_data.astype(np.float32)) + + def tearDown(self) -> None: + """clean up temporary directory""" + self.tmp_dir.cleanup() + + def test_wav_to_sigmf(self): + sigmf_path = convert_wav(wav_path=self.wav_path, out_path=str(self.tmp_path / "bar")) + meta = sigmf.fromfile(sigmf_path) + data = meta.read_samples() + # allow small numerical differences due to data type conversions + self.assertTrue(np.allclose(self.audio_data, data, atol=1e-8)) -@unittest.skipUnless(_has_apps_dependencies(), "Optional [apps] dependencies not available") class TestBlueConverter(unittest.TestCase): def setUp(self) -> None: # skip test if environment variable not set @@ -34,4 +59,4 @@ def setUp(self) -> None: def test_blue_to_sigmffile(self): # Placeholder for actual test implementation - self.assertTrue(True) \ No newline at end of file + self.assertTrue(True) From 1aab574d6291783de9a71b7779fca65116762606 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Tue, 18 Nov 2025 13:33:10 -0800 Subject: [PATCH 03/15] refactor second pass & add tests --- sigmf/apps/convert_blue.py | 130 ++++++++++++++++++++++++------------- tests/test_convert.py | 57 +++++++++++++--- 2 files changed, 134 insertions(+), 53 deletions(-) diff --git a/sigmf/apps/convert_blue.py b/sigmf/apps/convert_blue.py index 7d77ab7..b1fc235 100644 --- a/sigmf/apps/convert_blue.py +++ b/sigmf/apps/convert_blue.py @@ -18,6 +18,8 @@ import os import struct from datetime import datetime, timezone +from pathlib import Path +from typing import Optional import numpy as np @@ -242,14 +244,16 @@ def parse_extended_header(file_path, hcb, endian="<"): return entries -def parse_data_values(file_path, hcb, endianness): +def parse_data_values(blue_path: Path, out_path: Path, hcb: dict, endianness: str): """ Parse key HCB values used for further processing. Parameters ---------- - file_path : str + blue_path : Path Path to the Blue file. + out_path : Path + Path to output SigMF metadata file. hcb : dict Header Control Block dictionary. endianness : str @@ -261,25 +265,24 @@ def parse_data_values(file_path, hcb, endianness): Parsed samples. """ log.info("parsing blue file data values") - with open(file_path, "rb") as handle: + with open(blue_path, "rb") as handle: data = handle.read(HEADER_SIZE_BYTES) if len(data) < HEADER_SIZE_BYTES: raise ValueError("Incomplete header") dtype = data[52:54].decode("utf-8") # eg 'CI', 'CF', 'SD' log.debug(f"data type: {dtype}") - time_interval = np.frombuffer(data[264:272], dtype=np.float64)[0] if time_interval <= 0: raise ValueError(f"Invalid time interval: {time_interval}") sample_rate_hz = 1 / time_interval log.info(f"sample rate: {sample_rate_hz/1e6:.3f} MHz") extended_header_data_size = int.from_bytes(data[28:32], byteorder="little") - file_size_bytes = os.path.getsize(file_path) + file_size_bytes = os.path.getsize(blue_path) log.debug(f"file size: {file_size_bytes} bytes") # Determine destination path for SigMF data file - dest_path = file_path.rsplit(".", 1)[0] + dest_path = out_path.with_suffix(".sigmf-data") ### complex data parsing @@ -287,25 +290,25 @@ def parse_data_values(file_path, hcb, endianness): if dtype == "CI": elem_size = np.dtype(np.int16).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - raw_samples = np.fromfile(file_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) + raw_samples = np.fromfile(blue_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) # reassemble interleaved IQ samples samples = raw_samples[::2] + 1j * raw_samples[1::2] # convert to IQIQIQ... # normalize samples to -1.0 to +1.0 range samples = samples.astype(np.float32) / 32767.0 # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # complex 32-bit integer IQ data > ci32_le in SigMF elif dtype == "CL": elem_size = np.dtype(np.int32).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - raw_samples = np.fromfile(file_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) + raw_samples = np.fromfile(blue_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) # reassemble interleaved IQ samples samples = raw_samples[::2] + 1j * raw_samples[1::2] # convert to IQIQIQ... # normalize samples to -1.0 to +1.0 range samples = samples.astype(np.float32) / 2147483647.0 # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # complex 32-bit float IQ data > cf32_le in SigMF elif dtype == "CF": @@ -313,9 +316,9 @@ def parse_data_values(file_path, hcb, endianness): # no need to reassemble IQ — already complex elem_size = np.dtype(np.complex64).itemsize # will be 8 bytes elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.complex64, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.complex64, offset=HEADER_SIZE_BYTES, count=elem_count) # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) ### scalar data parsing @@ -323,64 +326,94 @@ def parse_data_values(file_path, hcb, endianness): elif dtype == "SB": elem_size = np.dtype(np.int8).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.int8, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.int8, offset=HEADER_SIZE_BYTES, count=elem_count) # normalize samples to -1.0 to +1.0 range samples = samples.astype(np.float32) / 127.0 # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # scalar data parsing > ri16_le in SigMF elif dtype == "SI": elem_size = np.dtype(np.int16).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) # normalize samples to -1.0 to +1.0 range samples = samples / 32767.0 # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # scalar data parsing > ri32_le in SigMF elif dtype == "SL": elem_size = np.dtype(np.int32).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) # normalize samples to -1.0 to +1.0 range samples = samples / 2147483647.0 # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # scalar data parsing > ri64_le in SigMF elif dtype == "SX": elem_size = np.dtype(np.int64).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.int64, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.int64, offset=HEADER_SIZE_BYTES, count=elem_count) # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # scalar data parsing > rf32_le in SigMF elif dtype == "SF": elem_size = np.dtype(np.float32).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.float32, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.float32, offset=HEADER_SIZE_BYTES, count=elem_count) # save out as SigMF IQ data file - samples.tofile(f"{dest_path}.sigmf-data") + samples.tofile(dest_path) # scalar data parsing > rf64_le in SigMF elif dtype == "SD": elem_size = np.dtype(np.float64).itemsize elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(file_path, dtype=np.float64, offset=HEADER_SIZE_BYTES, count=elem_count) + samples = np.fromfile(blue_path, dtype=np.float64, offset=HEADER_SIZE_BYTES, count=elem_count) # save out as SigMF IQ data file - samples.astype(np.complex64).tofile(f"{dest_path}.sigmf-data") + samples.astype(np.complex64).tofile(dest_path) else: raise ValueError(f"Unsupported data type: {dtype}") - # TODO: validate handling of scalar types - reshape per mathlab port shown here? + # TODO: validate handling of scalar types - Reshape per mathlab port shown here? + + """ + # Save out as SigMF IQ data file + if dtype in ("CI", "CL", "CF"): + # Complex IQ data → save as cf32_le + samples.astype(np.complex64).tofile(dest_path) + else: + # Scalar data → save in native dtype + samples.tofile(dest_path) + """ + + # TODO: validate handling of scalar types - Reshape per mathlab port shown here? + + """ + fmt_size_char = self.hcb["format"][0] + fmt_type_char = self.hcb["format"][1] + + elementsPerSample = self.FormatSize(fmt_size_char) + print('Elements Per Sample', elementsPerSample/1e6) + elem_count_per_sample = ( + np.prod(elementsPerSample) if isinstance(elementsPerSample, (tuple, list)) else elementsPerSample + ) + + dtype_str, elem_bytes = self.FormatType(fmt_type_char) + bytesPerSample = int(elem_bytes) * int(elem_count_per_sample) + + bytesRead = int(self.dataOffset - self.hcb["data_start"]) + bytes_remaining = int(self.hcb["data_size"] - bytesRead) + """ + # return the IQ data if needed for further processing if needed return samples -def blue_to_sigmf(hcb, ext_entries, file_path): +def construct_sigmf(hcb: dict, ext_entries: list, blue_path: Path, out_path: Path): """ Build a SigMF metadata dict from parsed Bluefile HCB and extended header. @@ -390,8 +423,10 @@ def blue_to_sigmf(hcb, ext_entries, file_path): Header Control Block from read_hcb(). ext_entries : list of dict Parsed extended header entries from parse_extended_header(). - file_path : str + blue_path : Path Path to the original blue file. + out_path : Path + Path to output SigMF metadata file. Returns ------- @@ -556,14 +591,12 @@ def compute_sha512(path, bufsize=1024 * 1024): chunk = handle.read(bufsize) return hash_obj.hexdigest() - # strip the extension from the original file path - base_file_name = os.path.splitext(file_path)[0] - # build the .sigmf-data path - data_file_path = base_file_name + ".sigmf-data" + data_path = out_path.with_suffix(".sigmf-data") + meta_path = out_path.with_suffix(".sigmf-meta") # compute SHA-512 of the data file - data_sha512 = compute_sha512(data_file_path) # path to the .sigmf-data file + data_sha512 = compute_sha512(data_path) # path to the .sigmf-data file global_md["core:sha512"] = data_sha512 # annotations array @@ -615,10 +648,6 @@ def compute_sha512(path, bufsize=1024 * 1024): "annotations": annotations, } - # write .sigmf-meta file - base_file_name = os.path.splitext(file_path)[0] - meta_path = base_file_name + ".sigmf-meta" - with open(meta_path, "w") as meta_handle: json.dump(sigmf_metadata, meta_handle, indent=2) log.info(f"wrote SigMF metadata to {meta_path}") @@ -626,25 +655,36 @@ def compute_sha512(path, bufsize=1024 * 1024): return sigmf_metadata -def blue_file_to_sigmf(file_path): +def convert_blue( + blue_path: str, + out_path: Optional[str] = None, +) -> np.ndarray: """ Convert a MIDIS Bluefile to SigMF metadata and data. Parameters ---------- - file_path : str + blue_path : str Path to the Blue file. + out_path : str + Path to the output SigMF metadata file. Returns ------- numpy.ndarray IQ Data. """ - log.info("starting blue file processing") + log.debug("starting blue file processing") - # read Header control block (HCB) from blue file to determine how to process the rest of the file - hcb = read_hcb(file_path) + blue_path = Path(blue_path) + if out_path is None: + # extension will be changed later + out_path = Path(blue_path) + else: + out_path = Path(out_path) + # read Header control block (HCB) from blue file to determine how to process the rest of the file + hcb = read_hcb(blue_path) log.debug("Header Control Block (HCB) Fields") for name, _, _, _, desc in HCB_LAYOUT: log.debug(f"{name:10s}: {hcb[name]!r} # {desc}") @@ -663,7 +703,7 @@ def blue_file_to_sigmf(file_path): raise ValueError(f"Unknown head_rep value: {extended_header_endianness}") # parse extended header entries - ext_entries = parse_extended_header(file_path, hcb, ext_endianness) + ext_entries = parse_extended_header(blue_path, hcb, ext_endianness) log.debug("Extended Header Keywords") for entry in ext_entries: log.debug(f"{entry['tag']:20s}:{entry['value']}") @@ -676,12 +716,12 @@ def blue_file_to_sigmf(file_path): # parse key data values # iq_data will be available if needed for further processing. try: - iq_data = parse_data_values(file_path, hcb, data_endianness) + iq_data = parse_data_values(blue_path, out_path, hcb, data_endianness) except Exception as error: raise RuntimeError(f"Failed to parse data values: {error}") from error # call the SigMF conversion for metadata generation - blue_to_sigmf(hcb, ext_entries, file_path) + construct_sigmf(hcb, ext_entries, blue_path, out_path) # return the IQ data if needed for further processing if needed return iq_data @@ -704,4 +744,4 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - blue_file_to_sigmf(args.input) + convert_blue(args.input) diff --git a/tests/test_convert.py b/tests/test_convert.py index d284c4e..26af615 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -6,10 +6,11 @@ """Tests for Converters""" -import unittest import os import tempfile +import unittest from pathlib import Path + import numpy as np try: @@ -20,11 +21,15 @@ SCIPY_AVAILABLE = False import sigmf +from sigmf.apps.convert_blue import convert_blue from sigmf.apps.convert_wav import convert_wav +BLUE_ENV_VAR = "NONSIGMF_RECORDINGS_PATH" + -@unittest.skipUnless(SCIPY_AVAILABLE, "scipy is required for WAV file tests") class TestWAVConverter(unittest.TestCase): + """wav loopback test""" + def setUp(self) -> None: """create temp wav file for testing""" if not SCIPY_AVAILABLE: @@ -52,11 +57,47 @@ def test_wav_to_sigmf(self): class TestBlueConverter(unittest.TestCase): + """As we have no blue files in the repository, test only when env path specified.""" + def setUp(self) -> None: - # skip test if environment variable not set - if not os.getenv("NONSIGMF_RECORDINGS_PATH"): - self.skipTest("NONSIGMF_RECORDINGS_PATH environment variable needed for Bluefile tests.") + blue_path = Path(os.getenv(BLUE_ENV_VAR, "nopath")) + if not blue_path or blue_path == Path("nopath"): + # skip test if environment variable not set + self.skipTest(f"Set {BLUE_ENV_VAR} environment variable to location with .cdif files to run test.") + if not blue_path.is_dir(): + self.fail(f"{blue_path} is not a valid directory.") + self.bluefiles = list(blue_path.glob("*.cdif")) + if not self.bluefiles: + self.fail(f"No .cdif files found in {BLUE_ENV_VAR}.") + self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.tmp_dir.name) + + def tearDown(self) -> None: + """clean up temporary directory""" + self.tmp_dir.cleanup() + + def test_blue_to_sigmf(self): + for bdx, bluefile in enumerate(self.bluefiles): + sigmf_path = self.tmp_path / f"converted_{bdx}" + _ = convert_blue(blue_path=bluefile, out_path=sigmf_path) + meta = sigmf.fromfile(sigmf_path) + + ### EVERYTHING BELOW HERE IS FOR DEBUGGING ONLY _ REMOVE LATER ### + # plot stft of RF data for visual inspection + from scipy.signal import spectrogram + + samples = meta.read_samples() + freqs, times, spec = spectrogram(samples, fs=meta.get_global_field("core:sample_rate"), nperseg=1024) + # use imshow to plot spectrogram + import matplotlib.pyplot as plt - def test_blue_to_sigmffile(self): - # Placeholder for actual test implementation - self.assertTrue(True) + plt.figure() + plt.imshow( + 10 * np.log10(spec), aspect="auto", extent=[times[0], times[-1], freqs[0], freqs[-1]], origin="lower" + ) + plt.colorbar(label="Intensity [dB]") + plt.ylabel("Frequency [Hz]") + plt.xlabel("Time [s]") + plt.title(f"Spectrogram of {bluefile.name}") + plt.show() + self.assertIsInstance(meta, sigmf.SigMFFile) From 8e759870ac3f9c1933096089cfaa17095982401a Mon Sep 17 00:00:00 2001 From: Teque5 Date: Tue, 18 Nov 2025 16:07:22 -0800 Subject: [PATCH 04/15] refactor third pass --- sigmf/apps/convert_blue.py | 582 ++++++++++++++----------------------- sigmf/apps/convert_wav.py | 6 +- tests/test_convert.py | 3 +- 3 files changed, 231 insertions(+), 360 deletions(-) diff --git a/sigmf/apps/convert_blue.py b/sigmf/apps/convert_blue.py index b1fc235..c2becc1 100644 --- a/sigmf/apps/convert_blue.py +++ b/sigmf/apps/convert_blue.py @@ -5,14 +5,14 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """ -Blue File converter with HCB and Extended Header Parsing -This script reads and parses the HCB (Header Control Block) and extended header keywords from a Blue file format. +X-Midas BLUE File converter. +This script reads and parses the HCB (Header Control Block) and Extended Headers. It supports different file types and extracts metadata accordingly. Converts the extracted metadata into SigMF format. """ import argparse -import hashlib +import getpass import json import logging import os @@ -23,8 +23,11 @@ import numpy as np +from .. import SigMFFile from .. import __version__ as toolversion from ..error import SigMFConversionError +from ..sigmffile import get_sigmf_filenames +from ..utils import SIGMF_DATETIME_ISO8601_FMT log = logging.getLogger() @@ -42,7 +45,7 @@ ("data_start",32, 8, "d", "Data start in bytes"), ("data_size", 40, 8, "d", "Data size in bytes"), ("type", 48, 4, "i", "File type code"), - ("format", 52, 2, "2s", "Data format code"), + ("format", 52, 2, "2s", "2 Letter data format code"), ("flagmask", 54, 2, "h", "16-bit flagmask"), ("timecode", 56, 8, "d", "Time code field"), ("inlet", 64, 2, "h", "Inlet owner"), @@ -55,7 +58,7 @@ ("outbytes", 96, 64, "8d", "Next out byte (each outlet)"), ("keylength", 160, 4, "i", "Length of keyword string"), ("keywords", 164, 92, "92s", "User defined keyword string"), - # Adjunct starts at 256 + # Adjunct starts at 256 after this ] # fmt: on @@ -70,10 +73,52 @@ "A": (np.dtype("S1"), 1), } - HEADER_SIZE_BYTES = 512 BLOCK_SIZE_BYTES = 512 +NORMALIZATION_FACTORS = { + # format : normalization factor + "SB": 2**7 - 1, # scalar 8-bit integer + "SI": 2**15 - 1, # scalar 16-bit integer + "SL": 2**31 - 1, # scalar 32-bit integer + "CB": 2**7 - 1, # complex 8-bit integer + "CI": 2**15 - 1, # complex 16-bit integer + "CL": 2**31 - 1, # complex 32-bit integer +} + +# Data type configurations +DATA_TYPE_CONFIGS = { + "CB": {"dtype": np.int8, "complex": True, "normalize": True}, + "CI": {"dtype": np.int16, "complex": True, "normalize": True}, + "CL": {"dtype": np.int32, "complex": True, "normalize": True}, + "CF": {"dtype": np.complex64, "complex": True, "normalize": False}, + "SB": {"dtype": np.int8, "complex": False, "normalize": True}, + "SI": {"dtype": np.int16, "complex": False, "normalize": True}, + "SL": {"dtype": np.int32, "complex": False, "normalize": True}, + "SX": {"dtype": np.int64, "complex": False, "normalize": False}, + "SF": {"dtype": np.float32, "complex": False, "normalize": False}, + "SD": {"dtype": np.float64, "complex": False, "normalize": False}, +} + +DATATYPE_MAP_BASE = { + # S = Scalar + "SB": "ri8", + "SI": "ri16", + "SL": "ri32", + "SX": "ri64", + "SF": "rf32", + "SD": "rf64", + # C = Complex + "CB": "ci8", + "CI": "ci16", + "CL": "ci32", + "CX": "ci64", + "CF": "cf32", + "CD": "cf32", # FIXME: should be cf64? D should be double. + # V = Vector (not supported) + # Q = Quad (not supported) +} + def detect_endian(data, layout, probe_fields=("data_size", "version")): """ @@ -131,7 +176,7 @@ def detect_endian(data, layout, probe_fields=("data_size", "version")): def read_hcb(file_path): """ - Read HCB fields and adjunct block from a Blue file. + Read Header Control Block (HCB) and adjunct block from a Blue file. Parameters ---------- @@ -147,6 +192,8 @@ def read_hcb(file_path): hcb = {} with open(file_path, "rb") as handle: data = handle.read(HEADER_SIZE_BYTES) + if len(data) < HEADER_SIZE_BYTES: + raise SigMFConversionError("Incomplete header") endian = detect_endian(data, HCB_LAYOUT) # fixed fields @@ -155,7 +202,7 @@ def read_hcb(file_path): try: val = struct.unpack(endian + fmt, raw)[0] except struct.error: - raise ValueError(f"Failed to unpack field {name} with endian {endian}") + raise SigMFConversionError(f"Failed to unpack field {name} with endian {endian}") if isinstance(val, bytes): val = val.decode("ascii", errors="replace").strip("\x00 ") hcb[name] = val @@ -182,17 +229,19 @@ def read_hcb(file_path): else: hcb["adjunct_raw"] = handle.read(adjunct_offset_bytes) + validate_hcb(hcb) + return hcb -def parse_extended_header(file_path, hcb, endian="<"): +def read_extended_header(file_path, hcb, endian="<"): """ - Parse extended header keyword records. + Read Extended Header from a BLUE file. Parameters ---------- file_path : str - Path to the Bluefile. + Path to the BLUE file. hcb : dict Header Control Block containing 'ext_size' and 'ext_start'. endian : str, optional @@ -222,7 +271,7 @@ def parse_extended_header(file_path, hcb, endian="<"): if type_char == "A": raw = handle.read(val_len) if len(raw) < val_len: - raise ValueError("Unexpected end of extended header") + raise SigMFConversionError("Unexpected end of extended header") value = raw.rstrip(b"\x00").decode("ascii", errors="replace") else: value = np.frombuffer(handle.read(val_len), dtype=dtype, count=val_count) @@ -241,188 +290,84 @@ def parse_extended_header(file_path, hcb, endian="<"): entries.append({"tag": tag, "type": type_char, "value": value, "lkey": lkey, "lext": lext, "ltag": ltag}) bytes_remaining -= lkey + validate_extended_header(entries) + return entries -def parse_data_values(blue_path: Path, out_path: Path, hcb: dict, endianness: str): +def parse_data_values(blue_path: Path, out_path: Path, hcb: dict) -> np.ndarray: """ Parse key HCB values used for further processing. Parameters ---------- blue_path : Path - Path to the Blue file. + Path to the BLUE file. out_path : Path Path to output SigMF metadata file. hcb : dict Header Control Block dictionary. - endianness : str - Endianness ('<' for little-endian, '>' for big-endian). Returns ------- numpy.ndarray Parsed samples. """ - log.info("parsing blue file data values") - with open(blue_path, "rb") as handle: - data = handle.read(HEADER_SIZE_BYTES) - if len(data) < HEADER_SIZE_BYTES: - raise ValueError("Incomplete header") - dtype = data[52:54].decode("utf-8") # eg 'CI', 'CF', 'SD' - log.debug(f"data type: {dtype}") - - time_interval = np.frombuffer(data[264:272], dtype=np.float64)[0] - if time_interval <= 0: - raise ValueError(f"Invalid time interval: {time_interval}") - sample_rate_hz = 1 / time_interval - log.info(f"sample rate: {sample_rate_hz/1e6:.3f} MHz") - extended_header_data_size = int.from_bytes(data[28:32], byteorder="little") - file_size_bytes = os.path.getsize(blue_path) - log.debug(f"file size: {file_size_bytes} bytes") + log.info("parsing BLUE file data values") + + file_size_bytes = os.path.getsize(blue_path) + extended_header_data_size = hcb.get("ext_size") + format = hcb.get("format") # Determine destination path for SigMF data file dest_path = out_path.with_suffix(".sigmf-data") - ### complex data parsing - - # complex 16-bit integer IQ data > ci16_le in SigMF - if dtype == "CI": - elem_size = np.dtype(np.int16).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - raw_samples = np.fromfile(blue_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) - # reassemble interleaved IQ samples - samples = raw_samples[::2] + 1j * raw_samples[1::2] # convert to IQIQIQ... - # normalize samples to -1.0 to +1.0 range - samples = samples.astype(np.float32) / 32767.0 - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # complex 32-bit integer IQ data > ci32_le in SigMF - elif dtype == "CL": - elem_size = np.dtype(np.int32).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - raw_samples = np.fromfile(blue_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) - # reassemble interleaved IQ samples - samples = raw_samples[::2] + 1j * raw_samples[1::2] # convert to IQIQIQ... - # normalize samples to -1.0 to +1.0 range - samples = samples.astype(np.float32) / 2147483647.0 - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # complex 32-bit float IQ data > cf32_le in SigMF - elif dtype == "CF": - # each complex sample is 8 bytes total (2 × float32), so np.complex64 is appropriate - # no need to reassemble IQ — already complex - elem_size = np.dtype(np.complex64).itemsize # will be 8 bytes - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.complex64, offset=HEADER_SIZE_BYTES, count=elem_count) - # save out as SigMF IQ data file - samples.tofile(dest_path) - - ### scalar data parsing - - # scalar data parsing > ri8_le in SigMF - elif dtype == "SB": - elem_size = np.dtype(np.int8).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.int8, offset=HEADER_SIZE_BYTES, count=elem_count) - # normalize samples to -1.0 to +1.0 range - samples = samples.astype(np.float32) / 127.0 - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # scalar data parsing > ri16_le in SigMF - elif dtype == "SI": - elem_size = np.dtype(np.int16).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.int16, offset=HEADER_SIZE_BYTES, count=elem_count) - # normalize samples to -1.0 to +1.0 range - samples = samples / 32767.0 - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # scalar data parsing > ri32_le in SigMF - elif dtype == "SL": - elem_size = np.dtype(np.int32).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.int32, offset=HEADER_SIZE_BYTES, count=elem_count) - # normalize samples to -1.0 to +1.0 range - samples = samples / 2147483647.0 - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # scalar data parsing > ri64_le in SigMF - elif dtype == "SX": - elem_size = np.dtype(np.int64).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.int64, offset=HEADER_SIZE_BYTES, count=elem_count) - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # scalar data parsing > rf32_le in SigMF - elif dtype == "SF": - elem_size = np.dtype(np.float32).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.float32, offset=HEADER_SIZE_BYTES, count=elem_count) - # save out as SigMF IQ data file - samples.tofile(dest_path) - - # scalar data parsing > rf64_le in SigMF - elif dtype == "SD": - elem_size = np.dtype(np.float64).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size - samples = np.fromfile(blue_path, dtype=np.float64, offset=HEADER_SIZE_BYTES, count=elem_count) - # save out as SigMF IQ data file - samples.astype(np.complex64).tofile(dest_path) - else: - raise ValueError(f"Unsupported data type: {dtype}") - - # TODO: validate handling of scalar types - Reshape per mathlab port shown here? - - """ - # Save out as SigMF IQ data file - if dtype in ("CI", "CL", "CF"): - # Complex IQ data → save as cf32_le - samples.astype(np.complex64).tofile(dest_path) - else: - # Scalar data → save in native dtype - samples.tofile(dest_path) - """ + config = DATA_TYPE_CONFIGS[format] + np_dtype = config["dtype"] + is_complex = config["complex"] + should_normalize = config["normalize"] - # TODO: validate handling of scalar types - Reshape per mathlab port shown here? + # calculate element size and count + elem_size = np.dtype(np_dtype).itemsize + elem_count = (file_size_bytes - extended_header_data_size) // elem_size - """ - fmt_size_char = self.hcb["format"][0] - fmt_type_char = self.hcb["format"][1] + # read raw samples + raw_samples = np.fromfile(blue_path, dtype=np_dtype, offset=HEADER_SIZE_BYTES, count=elem_count) - elementsPerSample = self.FormatSize(fmt_size_char) - print('Elements Per Sample', elementsPerSample/1e6) - elem_count_per_sample = ( - np.prod(elementsPerSample) if isinstance(elementsPerSample, (tuple, list)) else elementsPerSample - ) - - dtype_str, elem_bytes = self.FormatType(fmt_type_char) - bytesPerSample = int(elem_bytes) * int(elem_count_per_sample) + if is_complex: + # complex data: already in IQIQIQ... format or native complex + if np_dtype == np.complex64: + # already complex, no reassembly needed + samples = raw_samples + else: + # reassemble interleaved IQ samples + samples = raw_samples[::2] + 1j * raw_samples[1::2] + # normalize if needed + if should_normalize: + samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[format] + else: + # scalar data + samples = raw_samples + if should_normalize: + samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[format] - bytesRead = int(self.dataOffset - self.hcb["data_start"]) - bytes_remaining = int(self.hcb["data_size"] - bytesRead) - """ + # save out as SigMF IQ data file + samples.tofile(dest_path) # return the IQ data if needed for further processing if needed return samples -def construct_sigmf(hcb: dict, ext_entries: list, blue_path: Path, out_path: Path): +def construct_sigmf(hcb: dict, ext_entries: list, blue_path: Path, out_path: Path) -> SigMFFile: """ - Build a SigMF metadata dict from parsed Bluefile HCB and extended header. + Built & write a SigMF object from BLUE metadata. Parameters ---------- hcb : dict Header Control Block from read_hcb(). ext_entries : list of dict - Parsed extended header entries from parse_extended_header(). + Parsed extended header entries from read_extended_header(). blue_path : Path Path to the original blue file. out_path : Path @@ -435,7 +380,7 @@ def construct_sigmf(hcb: dict, ext_entries: list, blue_path: Path, out_path: Pat Raises ------ - ValueError + SigMFConversionError If required fields are missing or invalid. """ # helper to look up extended header values by tag @@ -445,79 +390,24 @@ def get_tag(tag): return entry["value"] return None - # s - scalar - # c - complex - # v - vector - # q - quad - TODO: pri 2 - add support for other types if they are commonly used. - # - # b: 8-bit integer - # i: 16-bit integer - # l: 32-bit integer - # x: 64-bit integer - # f: 32-bit float - # d: 64-bit float - - # global datatype object - little endian - datatype_map_le = { - "SB": "ri8_le", - "SI": "ri16_le", - "SL": "ri32_le", - "SX": "ri64_le", - "SF": "rf32_le", - "SD": "rf64_le", - "CB": "ci8_le", - "CI": "ci16_le", - "CL": "ci32_le", - "CX": "ci64_le", - "CF": "cf32_le", - "CD": "cf32_le", - } - - # global datatype object - big endian - datatype_map_be = { - "SB": "ri8_be", - "SI": "ri16_be", - "SL": "ri32_be", - "SX": "ri64_be", - "SF": "rf32_be", - "SD": "rf64_be", - "CB": "ci8_be", - "CI": "ci16_be", - "CL": "ci32_be", - "CX": "ci64_be", - "CF": "cf32_be", - "CD": "cf32_be", - } - - # header data representation: 'EEEI' or 'IEEE' (little or big data endianess representation) - header_rep = hcb.get("head_rep") - # data_rep: 'EEEI' or 'IEEE' (little or big data endianess representation) data_rep = hcb.get("data_rep") # data_format: for example 'CI' or 'SD' (data format code - real or complex, int or float) data_format = hcb.get("format") + endian_suffix = "_le" if data_rep == "EEEI" else "_be" - if data_rep == "EEEI": # little endian - data_map = datatype_map_le.get(data_format) - elif data_rep == "IEEE": # big endian - data_map = datatype_map_be.get(data_format) - - datatype = data_map if data_map is not None else "unknown" + # get base datatype and add endianness + base_datatype = DATATYPE_MAP_BASE.get(data_format) + datatype = base_datatype + endian_suffix log.info(f"determined SigMF datatype: {datatype} and data representation: {data_rep}") # sample rate: prefer adjunct.xdelta, else extended header SAMPLE_RATE if "adjunct" in hcb and "xdelta" in hcb["adjunct"]: - sample_rate_hz = 1.0 / hcb["adjunct"]["xdelta"] + sample_rate_hz = 1 / hcb["adjunct"]["xdelta"] else: - sample_rate_tag = get_tag("SAMPLE_RATE") - sample_rate_hz = float(sample_rate_tag) if sample_rate_tag is not None else None - - # for now define static values. perhaps take as JSON input - hardware_description = "Blue File Conversion - Unknown Hardware" - blue_author = "Blue File Conversion - Unknown Author" - blue_license = "Blue File Conversion - Unknown License" + sample_rate_hz = float(get_tag("SAMPLE_RATE")) if "outlets" in hcb and hcb["outlets"] > 0: num_channels = int(hcb["outlets"]) @@ -525,134 +415,122 @@ def get_tag(tag): num_channels = 1 # base global metadata - global_md = { - "core:author": blue_author, - "core:datatype": datatype, - "core:description": hcb.get("keywords", ""), - "core:hw": hardware_description, - "core:license": blue_license, - "core:num_channels": num_channels, - "core:sample_rate": sample_rate_hz, - "core:version": "1.0.0", + global_info = { + # FIXME: what common fields are in hcb? + "core:author": getpass.getuser(), + SigMFFile.DATATYPE_KEY: datatype, + # FIXME: is this the most apt description? + SigMFFile.DESCRIPTION_KEY: hcb.get("keywords", ""), + SigMFFile.RECORDER_KEY: "Official SigMF BLUE converter", + SigMFFile.NUM_CHANNELS_KEY: num_channels, + SigMFFile.SAMPLE_RATE_KEY: sample_rate_hz, + SigMFFile.EXTENSIONS_KEY: [{"name": "blue", "version": "0.0.1", "optional": True}], } - for name, _, _, _, desc in HCB_LAYOUT: - value = hcb.get(name) # safe access - if value is None: - continue # or set a default - global_md[f"core:blue_hcb_{name}"] = value + global_info["blue:hcb"] = hcb # merge adjunct fields - adjunct = hcb.get("adjunct", {}) - for key, value in adjunct.items(): - global_md[f"core:blue_adjunct_header_{key}"] = value + if "adjunct" in hcb: + global_info["blue:adjunct"] = hcb["adjunct"] # merge extended header fields - for entry in ext_entries: - name = entry.get("tag") - if name is None: - continue - key = f"core:blue_extended_header_{name}" - value = entry.get("value") - if hasattr(value, "item"): - value = value.item() - global_md[key] = value - - # convert the datetime object to an ISO 8601 formatted string - epoch_time_raw = int(hcb.get("timecode", 0)) - - # adjust for Bluefile POSIX epoch (1950 vs 1970) - bluefile_epoch_offset = 631152000 # seconds between 1950 and 1970 - epoch_time = epoch_time_raw - bluefile_epoch_offset - - dt_object_utc = datetime.fromtimestamp(epoch_time, tz=timezone.utc) - # format with milliseconds and Zulu suffix - iso_8601_string = dt_object_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - log.debug(f"epoch time: {epoch_time}") - log.info(f"ISO 8601 time: {iso_8601_string}") - - # captures array - captures = [ - { - "core:datetime": iso_8601_string, - "core:frequency": float(get_tag("RF_FREQ") or 0.0), - "core:sample_start": 0, - } - ] - - # compute SHA‑512 hash of data file - def compute_sha512(path, bufsize=1024 * 1024): - """Compute SHA-512 hash of a file in chunks.""" - hash_obj = hashlib.sha512() - with open(path, "rb") as handle: - chunk = handle.read(bufsize) - while chunk: - hash_obj.update(chunk) - chunk = handle.read(bufsize) - return hash_obj.hexdigest() - - # build the .sigmf-data path - data_path = out_path.with_suffix(".sigmf-data") - meta_path = out_path.with_suffix(".sigmf-meta") - - # compute SHA-512 of the data file - data_sha512 = compute_sha512(data_path) # path to the .sigmf-data file - global_md["core:sha512"] = data_sha512 - - # annotations array - datatype_sizes_bytes = { - "ri8_le": 1, - "ri16_le": 2, - "ri32_le": 4, - "ci16_le": 4, - "ci32_le": 8, - "cf32_le": 8, - "rf32_le": 4, - "rf64_le": 8, - "ri64_le": 8, - "ri8_be": 1, - "ri16_be": 2, - "ri32_be": 4, - "ci16_be": 4, - "ci32_be": 8, - "cf32_be": 8, - "rf32_be": 4, - "rf64_be": 8, - "ri64_be": 8, + if ext_entries: + extended = {} + for entry in ext_entries: + key = entry.get("tag") + value = entry.get("value") + if hasattr(value, "item"): + value = value.item() + extended[key] = value + global_info["blue:extended"] = extended + + # BLUE uses 1950-01-01 as epoch, UNIX uses 1970-01-01 + blue_timecode = int(hcb.get("timecode", 0)) + blue_epoch = blue_timecode - 631152000 # seconds between 1950 and 1970 + blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) + + capture_info = { + SigMFFile.DATETIME_KEY: blue_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), } - # calculate sample count - data_size_bytes = int(hcb.get("data_size", 0)) - if datatype not in datatype_sizes_bytes: - raise ValueError(f"Unsupported datatype {datatype}") - bytes_per_sample = datatype_sizes_bytes[datatype] - sample_count = int(data_size_bytes // bytes_per_sample) - - rf_freq_hz = float(get_tag("RF_FREQ") or 0.0) - bandwidth_hz = float(get_tag("SBT_BANDWIDTH") or 0.0) - - annotations = [ - { - "core:sample_start": 0, - "core:sample_count": sample_count, - "core:freq_upper_edge": rf_freq_hz + bandwidth_hz, - "core:freq_lower_edge": rf_freq_hz, - "core:label": "Sceptere", - } - ] - - # final SigMF object - sigmf_metadata = { - "global": global_md, - "captures": captures, - "annotations": annotations, - } + if get_tag("RF_FREQ") is not None: + # FIXME: I believe there are many possible keys related to tune frequency + capture_info[SigMFFile.FREQUENCY_KEY] = float(get_tag("RF_FREQ")) + + # actually write to SigMF + filenames = get_sigmf_filenames(out_path) + + meta = SigMFFile( + data_file=filenames["data_fn"], + global_info=global_info, + ) + meta.add_capture(0, metadata=capture_info) + log.debug("created %r", meta) + + meta.tofile(filenames["meta_fn"], toarchive=False) + log.info("wrote %s", filenames["meta_fn"]) + + return meta + + +def validate_hcb(hcb: dict) -> None: + """ + Check that BLUE Header Control Block (HCB) contains minimum required fields. + + Parameters + ---------- + hcb : dict + Header Control Block dictionary. + + Raises + ------ + SigMFConversionError + If required fields are missing or invalid. + """ + required = ["version", "data_start", "data_size", "data_rep", "head_rep", "detached", "format", "type"] + for field in required: + if field not in hcb: + raise SigMFConversionError(f"Missing required HCB field: {field}") + if hcb[field] is None: + raise SigMFConversionError(f"Required HCB field {field} is None") + log.debug(f"HCB field {field}: {hcb[field]!r}") + + for rep_field in ["data_rep", "head_rep"]: + if hcb[rep_field] not in ("EEEI", "IEEE"): + raise SigMFConversionError(f"Invalid value for {rep_field}: {hcb[rep_field]}") + + if hcb["format"] not in DATATYPE_MAP_BASE: + raise SigMFConversionError(f"Unsupported data format: {hcb['format']}") + if hcb["format"] not in DATA_TYPE_CONFIGS: + raise SigMFConversionError(f"Unsupported data format: {hcb['format']}") + + # validate xdelta (1 / samp_rate) if present + if "adjunct" in hcb and "xdelta" in hcb["adjunct"]: + xdelta = hcb["adjunct"]["xdelta"] + if xdelta <= 0: + raise SigMFConversionError(f"Invalid adjunct xdelta time interval: {xdelta}") - with open(meta_path, "w") as meta_handle: - json.dump(sigmf_metadata, meta_handle, indent=2) - log.info(f"wrote SigMF metadata to {meta_path}") - return sigmf_metadata +def validate_extended_header(entries: list) -> None: + """ + Check that BLUE Extended Header contains minimum required fields. + + Parameters + ---------- + entries : list of dict + List of extended header entries. + + Raises + ------ + SigMFConversionError + If required fields are missing or invalid. + """ + # check for SAMPLE_RATE if present + for entry in entries: + if entry["tag"] == "SAMPLE_RATE": + sample_rate = float(entry["value"]) + if sample_rate <= 0: + raise SigMFConversionError(f"Invalid SAMPLE_RATE in extended header: {sample_rate}") def convert_blue( @@ -673,6 +551,12 @@ def convert_blue( ------- numpy.ndarray IQ Data. + + Notes + ----- + This function currently reads BLUE then writes a SigMF pair. We could also + implement a function that instead writes metadata only for a non-conforming + dataset using the HEADER_BYTES_KEY and TRAILING_BYTES_KEY in most cases. """ log.debug("starting blue file processing") @@ -685,6 +569,7 @@ def convert_blue( # read Header control block (HCB) from blue file to determine how to process the rest of the file hcb = read_hcb(blue_path) + log.debug("Header Control Block (HCB) Fields") for name, _, _, _, desc in HCB_LAYOUT: log.debug(f"{name:10s}: {hcb[name]!r} # {desc}") @@ -694,37 +579,22 @@ def convert_blue( # data_rep: 'EEEI' or 'IEEE' (little or big extended header endianness representation) extended_header_endianness = hcb.get("head_rep") + ext_endianness = "<" if extended_header_endianness == "EEEI" else ">" - if extended_header_endianness == "EEEI": - ext_endianness = "<" # little-endian - elif extended_header_endianness == "IEEE": - ext_endianness = ">" # big-endian - else: - raise ValueError(f"Unknown head_rep value: {extended_header_endianness}") - - # parse extended header entries - ext_entries = parse_extended_header(blue_path, hcb, ext_endianness) + # read extended header entries + ext_entries = read_extended_header(blue_path, hcb, ext_endianness) log.debug("Extended Header Keywords") for entry in ext_entries: log.debug(f"{entry['tag']:20s}:{entry['value']}") log.info(f"total extended header entries: {len(ext_entries)}") - # data_rep: 'EEEI' or 'IEEE' (little or big data endianness representation) - data_rep_endianness = hcb.get("data_rep") - data_endianness = "<" if data_rep_endianness == "EEEI" else ">" - # parse key data values - # iq_data will be available if needed for further processing. - try: - iq_data = parse_data_values(blue_path, out_path, hcb, data_endianness) - except Exception as error: - raise RuntimeError(f"Failed to parse data values: {error}") from error + _ = parse_data_values(blue_path, out_path, hcb) # call the SigMF conversion for metadata generation - construct_sigmf(hcb, ext_entries, blue_path, out_path) + meta = construct_sigmf(hcb, ext_entries, blue_path, out_path) - # return the IQ data if needed for further processing if needed - return iq_data + return meta def main() -> None: diff --git a/sigmf/apps/convert_wav.py b/sigmf/apps/convert_wav.py index 9a7ddeb..3a9b025 100755 --- a/sigmf/apps/convert_wav.py +++ b/sigmf/apps/convert_wav.py @@ -28,6 +28,7 @@ def convert_wav( wav_path: str, out_path: Optional[str] = None, + to_archive: bool = True, author: Optional[str] = None, ) -> PathLike: """ @@ -42,7 +43,7 @@ def convert_wav( SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data), SigMFFile.DESCRIPTION_KEY: f"converted from {wav_path.name}", SigMFFile.NUM_CHANNELS_KEY: 1 if len(wav_data.shape) < 2 else wav_data.shape[1], - SigMFFile.RECORDER_KEY: "Official SigMF wav converter", + SigMFFile.RECORDER_KEY: "Official SigMF WAV converter", SigMFFile.SAMPLE_RATE_KEY: samp_rate, } @@ -50,7 +51,6 @@ def convert_wav( wav_datetime = datetime.fromtimestamp(modify_time, tz=timezone.utc) capture_info = { - SigMFFile.START_INDEX_KEY: 0, SigMFFile.DATETIME_KEY: wav_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), } @@ -72,7 +72,7 @@ def convert_wav( arc_path = filenames["archive_fn"] meta.tofile(arc_path, toarchive=True) log.info("wrote %s", arc_path) - return arc_path + return meta def main() -> None: diff --git a/tests/test_convert.py b/tests/test_convert.py index 26af615..64a16e8 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -49,7 +49,8 @@ def tearDown(self) -> None: self.tmp_dir.cleanup() def test_wav_to_sigmf(self): - sigmf_path = convert_wav(wav_path=self.wav_path, out_path=str(self.tmp_path / "bar")) + sigmf_path = self.tmp_path / "bar" + _ = convert_wav(wav_path=self.wav_path, out_path=sigmf_path) meta = sigmf.fromfile(sigmf_path) data = meta.read_samples() # allow small numerical differences due to data type conversions From 59467dfec35c427f947082e283b739ac3b166951 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Wed, 19 Nov 2025 10:34:32 -0800 Subject: [PATCH 05/15] refactor fourth pass --- sigmf/apps/convert_blue.py | 304 +++++++++++++++++++++---------------- 1 file changed, 177 insertions(+), 127 deletions(-) diff --git a/sigmf/apps/convert_blue.py b/sigmf/apps/convert_blue.py index c2becc1..43f3bb1 100644 --- a/sigmf/apps/convert_blue.py +++ b/sigmf/apps/convert_blue.py @@ -13,7 +13,6 @@ import argparse import getpass -import json import logging import os import struct @@ -32,8 +31,8 @@ log = logging.getLogger() # fmt: off -HCB_LAYOUT = [ - # HCB field definitions: (name, offset, size, fmt, description) up to adjunct +FIXED_LAYOUT = [ + # Fixed Header definitions: (key, offset, size, fmt, description) up to adjunct ("version", 0, 4, "4s", "Header version"), ("head_rep", 4, 4, "4s", "Header representation"), ("data_rep", 8, 4, "4s", "Data representation"), @@ -58,12 +57,12 @@ ("outbytes", 96, 64, "8d", "Next out byte (each outlet)"), ("keylength", 160, 4, "i", "Length of keyword string"), ("keywords", 164, 92, "92s", "User defined keyword string"), - # Adjunct starts at 256 after this + # Adjunct starts at byte 256 after this ] # fmt: on TYPE_MAP = { - # Extended header type map + # Extended Header type map "B": (np.int8, 1), "I": (np.int16, 2), "L": (np.int32, 4), @@ -120,7 +119,7 @@ } -def detect_endian(data, layout, probe_fields=("data_size", "version")): +def detect_endian(data, probe_fields=("data_size", "version")): """ Detect endianness of a Bluefile header. @@ -130,8 +129,6 @@ def detect_endian(data, layout, probe_fields=("data_size", "version")): ---------- data : bytes Raw header data. - layout : list of tuples - HCB layout definition (name, offset, size, fmt, desc). probe_fields : tuple of str, optional Field names to test for sanity checks. @@ -139,17 +136,21 @@ def detect_endian(data, layout, probe_fields=("data_size", "version")): ------- str "<" for little-endian or ">" for big-endian. + + Raises + ------ + SigMFConversionError + If the endianness is unexpected. """ # TODO: handle both types of endianess 'EEEI' or IEEE and data rep and signal rep - endianness = data[8:12].decode("utf-8") - log.debug(f"endianness: {endianness}") + endianness = data[8:12].decode("ascii") if endianness not in ("EEEI", "IEEE"): raise SigMFConversionError(f"Unexpected endianness: {endianness}") for endian in ("<", ">"): ok = True - for name, offset, size, fmt, desc in layout: - if name not in probe_fields: + for key, offset, size, fmt, _ in FIXED_LAYOUT: + if key not in probe_fields: continue raw = data[offset : offset + size] try: @@ -157,12 +158,12 @@ def detect_endian(data, layout, probe_fields=("data_size", "version")): # sanity checks MAX_DATA_SIZE_FACTOR = 100 - if name == "data_size": + if key == "data_size": if val <= 0 or val > len(data) * MAX_DATA_SIZE_FACTOR: ok = False break - elif name == "version": - if not (0 < val < 10): # expect small version number + elif key == "version": + if not 0 < val < 10: # expect small version number ok = False break except Exception: @@ -176,7 +177,9 @@ def detect_endian(data, layout, probe_fields=("data_size", "version")): def read_hcb(file_path): """ - Read Header Control Block (HCB) and adjunct block from a Blue file. + Read Header Control Block (HCB) from BLUE file. + + First 256 bytes contains fixed header, followed by 256 bytes of adjunct header. Parameters ---------- @@ -185,39 +188,52 @@ def read_hcb(file_path): Returns ------- - dict - Parsed HCB fields and adjunct metadata. - """ + h_fixed : dict + Fixed Header + h_keywords : dict + Custom User Keywords + h_adjunct : dict + Adjunct Header - hcb = {} + Raises + ------ + SigMFConversionError + If header cannot be parsed. + """ with open(file_path, "rb") as handle: - data = handle.read(HEADER_SIZE_BYTES) - if len(data) < HEADER_SIZE_BYTES: - raise SigMFConversionError("Incomplete header") - endian = detect_endian(data, HCB_LAYOUT) + header_bytes = handle.read(256) - # fixed fields - for name, offset, size, fmt, desc in HCB_LAYOUT: - raw = data[offset : offset + size] + endian = detect_endian(header_bytes) + + # fixed header fields + h_fixed = {} + for key, offset, size, fmt, _ in FIXED_LAYOUT: + raw = header_bytes[offset : offset + size] try: val = struct.unpack(endian + fmt, raw)[0] except struct.error: - raise SigMFConversionError(f"Failed to unpack field {name} with endian {endian}") + raise SigMFConversionError(f"Failed to unpack field {key} with endian {endian}") if isinstance(val, bytes): - val = val.decode("ascii", errors="replace").strip("\x00 ") - hcb[name] = val - - # adjunct parsing - adjunct_offset_bytes = 256 - handle.seek(adjunct_offset_bytes) - if hcb["type"] in (1000, 1001): - hcb["adjunct"] = { + val = val.decode("ascii", errors="replace") + h_fixed[key] = val + + # parse user keywords & decode standard keywords + h_keywords = {} + + for field in h_fixed["keywords"].split("\x00"): + if "=" in field: + key, value = field.split("=", 1) + h_keywords[key] = value + + # variable (adjunct) header parsing + if h_fixed["type"] in (1000, 1001): + h_adjunct = { "xstart": struct.unpack(f"{endian}d", handle.read(8))[0], "xdelta": struct.unpack(f"{endian}d", handle.read(8))[0], "xunits": struct.unpack(f"{endian}i", handle.read(4))[0], } - elif hcb["type"] == 2000: - hcb["adjunct"] = { + elif h_fixed["type"] == 2000: + h_adjunct = { "xstart": struct.unpack(f"{endian}d", handle.read(8))[0], "xdelta": struct.unpack(f"{endian}d", handle.read(8))[0], "xunits": struct.unpack(f"{endian}i", handle.read(4))[0], @@ -227,14 +243,19 @@ def read_hcb(file_path): "yunits": struct.unpack(f"{endian}i", handle.read(4))[0], } else: - hcb["adjunct_raw"] = handle.read(adjunct_offset_bytes) + h_adjunct = handle.read(256) + + ver_lut = {"1.0": "BLUE 1.0", "1.1": "BLUE 1.1", "2.0": "Platinum"} + spec_str = ver_lut.get(h_keywords.get("VER", "1.0")) + log.info(f"Read {h_fixed['version']} type {h_fixed['type']} using {spec_str} specification.") - validate_hcb(hcb) + validate_fixed(h_fixed) + validate_adjunct(h_adjunct) - return hcb + return h_fixed, h_keywords, h_adjunct -def read_extended_header(file_path, hcb, endian="<"): +def read_extended_header(file_path, h_fixed): """ Read Extended Header from a BLUE file. @@ -242,8 +263,8 @@ def read_extended_header(file_path, hcb, endian="<"): ---------- file_path : str Path to the BLUE file. - hcb : dict - Header Control Block containing 'ext_size' and 'ext_start'. + h_fixed : dict + Fixed Header containing 'ext_size' and 'ext_start'. endian : str, optional Endianness ('<' for little-endian, '>' for big-endian). @@ -251,13 +272,19 @@ def read_extended_header(file_path, hcb, endian="<"): ------- list of dict List of dictionaries containing parsed records. + + Raises + ------ + SigMFConversionError + If the extended header cannot be parsed. """ - if hcb["ext_size"] <= 0: - return [] entries = [] + if h_fixed["ext_size"] <= 0: + return entries + endian = "<" if h_fixed.get("head_rep") == "EEEI" else ">" with open(file_path, "rb") as handle: - handle.seek(int(hcb["ext_start"]) * BLOCK_SIZE_BYTES) - bytes_remaining = int(hcb["ext_size"]) + handle.seek(int(h_fixed["ext_start"]) * BLOCK_SIZE_BYTES) + bytes_remaining = int(h_fixed["ext_size"]) while bytes_remaining > 0: lkey = struct.unpack(f"{endian}i", handle.read(4))[0] lext = struct.unpack(f"{endian}h", handle.read(2))[0] @@ -295,9 +322,9 @@ def read_extended_header(file_path, hcb, endian="<"): return entries -def parse_data_values(blue_path: Path, out_path: Path, hcb: dict) -> np.ndarray: +def write_data(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: """ - Parse key HCB values used for further processing. + Write SigMF data file from BLUE file samples. Parameters ---------- @@ -305,7 +332,7 @@ def parse_data_values(blue_path: Path, out_path: Path, hcb: dict) -> np.ndarray: Path to the BLUE file. out_path : Path Path to output SigMF metadata file. - hcb : dict + h_fixed : dict Header Control Block dictionary. Returns @@ -313,16 +340,16 @@ def parse_data_values(blue_path: Path, out_path: Path, hcb: dict) -> np.ndarray: numpy.ndarray Parsed samples. """ - log.info("parsing BLUE file data values") + log.debug("parsing BLUE file data values") file_size_bytes = os.path.getsize(blue_path) - extended_header_data_size = hcb.get("ext_size") - format = hcb.get("format") + extended_header_data_size = h_fixed.get("ext_size") + fmt = h_fixed.get("format") # Determine destination path for SigMF data file dest_path = out_path.with_suffix(".sigmf-data") - config = DATA_TYPE_CONFIGS[format] + config = DATA_TYPE_CONFIGS[fmt] np_dtype = config["dtype"] is_complex = config["complex"] should_normalize = config["normalize"] @@ -344,99 +371,96 @@ def parse_data_values(blue_path: Path, out_path: Path, hcb: dict) -> np.ndarray: samples = raw_samples[::2] + 1j * raw_samples[1::2] # normalize if needed if should_normalize: - samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[format] + samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[fmt] else: # scalar data samples = raw_samples if should_normalize: - samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[format] + samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[fmt] # save out as SigMF IQ data file samples.tofile(dest_path) + log.info("wrote %s", dest_path) # return the IQ data if needed for further processing if needed return samples -def construct_sigmf(hcb: dict, ext_entries: list, blue_path: Path, out_path: Path) -> SigMFFile: +def construct_sigmf(out_path: Path, h_fixed: dict, h_keywords: dict, h_adjunct: dict, h_extended: list) -> SigMFFile: """ Built & write a SigMF object from BLUE metadata. Parameters ---------- - hcb : dict - Header Control Block from read_hcb(). - ext_entries : list of dict - Parsed extended header entries from read_extended_header(). - blue_path : Path - Path to the original blue file. out_path : Path Path to output SigMF metadata file. + h_fixed : dict + Fixed Header + h_keywords : dict + Custom User Keywords + h_adjunct : dict + Adjunct Header + h_extended : list of dict + Parsed extended header entries from read_extended_header(). Returns ------- dict SigMF metadata structure. - - Raises - ------ - SigMFConversionError - If required fields are missing or invalid. """ # helper to look up extended header values by tag def get_tag(tag): - for entry in ext_entries: + for entry in h_extended: if entry["tag"] == tag: return entry["value"] return None # data_rep: 'EEEI' or 'IEEE' (little or big data endianess representation) - data_rep = hcb.get("data_rep") + data_rep = h_fixed.get("data_rep") # data_format: for example 'CI' or 'SD' (data format code - real or complex, int or float) - data_format = hcb.get("format") + data_format = h_fixed.get("format") endian_suffix = "_le" if data_rep == "EEEI" else "_be" # get base datatype and add endianness base_datatype = DATATYPE_MAP_BASE.get(data_format) datatype = base_datatype + endian_suffix - log.info(f"determined SigMF datatype: {datatype} and data representation: {data_rep}") + log.info(f"Using SigMF datatype: {datatype} for BLUE format {h_fixed['format']} and endianness {data_rep}.") # sample rate: prefer adjunct.xdelta, else extended header SAMPLE_RATE - if "adjunct" in hcb and "xdelta" in hcb["adjunct"]: - sample_rate_hz = 1 / hcb["adjunct"]["xdelta"] + if "xdelta" in h_adjunct: + sample_rate_hz = 1 / h_adjunct["xdelta"] else: sample_rate_hz = float(get_tag("SAMPLE_RATE")) - if "outlets" in hcb and hcb["outlets"] > 0: - num_channels = int(hcb["outlets"]) + if "outlets" in h_fixed and h_fixed["outlets"] > 0: + num_channels = int(h_fixed["outlets"]) else: num_channels = 1 # base global metadata global_info = { - # FIXME: what common fields are in hcb? + # FIXME: what common fields are in h_fixed? "core:author": getpass.getuser(), SigMFFile.DATATYPE_KEY: datatype, - # FIXME: is this the most apt description? - SigMFFile.DESCRIPTION_KEY: hcb.get("keywords", ""), + # FIXME: what is the most apt description? + # SigMFFile.DESCRIPTION_KEY: ???, SigMFFile.RECORDER_KEY: "Official SigMF BLUE converter", SigMFFile.NUM_CHANNELS_KEY: num_channels, SigMFFile.SAMPLE_RATE_KEY: sample_rate_hz, SigMFFile.EXTENSIONS_KEY: [{"name": "blue", "version": "0.0.1", "optional": True}], } - global_info["blue:hcb"] = hcb - - # merge adjunct fields - if "adjunct" in hcb: - global_info["blue:adjunct"] = hcb["adjunct"] + # merge HCB values into metadata + global_info["blue:fixed"] = h_fixed + global_info["blue:keywords"] = h_keywords + global_info["blue:adjunct"] = h_adjunct # merge extended header fields - if ext_entries: + if h_extended: extended = {} - for entry in ext_entries: + for entry in h_extended: key = entry.get("tag") value = entry.get("value") if hasattr(value, "item"): @@ -445,8 +469,12 @@ def get_tag(tag): global_info["blue:extended"] = extended # BLUE uses 1950-01-01 as epoch, UNIX uses 1970-01-01 - blue_timecode = int(hcb.get("timecode", 0)) - blue_epoch = blue_timecode - 631152000 # seconds between 1950 and 1970 + blue_start_time = float(h_fixed.get("timecode", 0)) + blue_start_time += h_adjunct.get("xstart", 0) + blue_start_time += float(h_keywords.get("TC_PREC", 0)) + + blue_epoch = blue_start_time - 631152000 # seconds between 1950 and 1970 + # FIXME: I am unsure if the timezone is always UTC in these files blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) capture_info = { @@ -473,14 +501,32 @@ def get_tag(tag): return meta -def validate_hcb(hcb: dict) -> None: +def validate_file(blue_path: Path) -> None: """ - Check that BLUE Header Control Block (HCB) contains minimum required fields. + Basic validation of the BLUE file. Parameters ---------- - hcb : dict - Header Control Block dictionary. + blue_path : Path + Path to the BLUE file. + + Raises + ------ + SigMFConversionError + If the file is abnormal. + """ + if blue_path.stat().st_size < 512: + raise SigMFConversionError("BLUE file is too small to contain required headers.") + + +def validate_fixed(h_fixed: dict) -> None: + """ + Check that Fixed Header contains minimum required fields. + + Parameters + ---------- + h_fixed : dict + Fixed Header dictionary. Raises ------ @@ -489,24 +535,29 @@ def validate_hcb(hcb: dict) -> None: """ required = ["version", "data_start", "data_size", "data_rep", "head_rep", "detached", "format", "type"] for field in required: - if field not in hcb: - raise SigMFConversionError(f"Missing required HCB field: {field}") - if hcb[field] is None: - raise SigMFConversionError(f"Required HCB field {field} is None") - log.debug(f"HCB field {field}: {hcb[field]!r}") + if field not in h_fixed: + raise SigMFConversionError(f"Missing required Fixed Header field: {field}") + # FIXME: when could this possibly occur? + if h_fixed[field] is None: + raise SigMFConversionError(f"Required Fixed Header field {field} is None") for rep_field in ["data_rep", "head_rep"]: - if hcb[rep_field] not in ("EEEI", "IEEE"): - raise SigMFConversionError(f"Invalid value for {rep_field}: {hcb[rep_field]}") + if h_fixed[rep_field] not in ("EEEI", "IEEE"): + raise SigMFConversionError(f"Invalid value for {rep_field}: {h_fixed[rep_field]}") + # FIXME: merge these lookup tables into one + if h_fixed["format"] not in DATATYPE_MAP_BASE: + raise SigMFConversionError(f"Unsupported data format: {h_fixed['format']}") + if h_fixed["format"] not in DATA_TYPE_CONFIGS: + raise SigMFConversionError(f"Unsupported data format: {h_fixed['format']}") - if hcb["format"] not in DATATYPE_MAP_BASE: - raise SigMFConversionError(f"Unsupported data format: {hcb['format']}") - if hcb["format"] not in DATA_TYPE_CONFIGS: - raise SigMFConversionError(f"Unsupported data format: {hcb['format']}") +def validate_adjunct(adjunct: dict) -> None: + """ + Check that the Adjunct header contains minimum required fields. + """ # validate xdelta (1 / samp_rate) if present - if "adjunct" in hcb and "xdelta" in hcb["adjunct"]: - xdelta = hcb["adjunct"]["xdelta"] + if "xdelta" in adjunct: + xdelta = adjunct["xdelta"] if xdelta <= 0: raise SigMFConversionError(f"Invalid adjunct xdelta time interval: {xdelta}") @@ -558,8 +609,6 @@ def convert_blue( implement a function that instead writes metadata only for a non-conforming dataset using the HEADER_BYTES_KEY and TRAILING_BYTES_KEY in most cases. """ - log.debug("starting blue file processing") - blue_path = Path(blue_path) if out_path is None: # extension will be changed later @@ -567,32 +616,33 @@ def convert_blue( else: out_path = Path(out_path) - # read Header control block (HCB) from blue file to determine how to process the rest of the file - hcb = read_hcb(blue_path) + validate_file(blue_path) - log.debug("Header Control Block (HCB) Fields") - for name, _, _, _, desc in HCB_LAYOUT: - log.debug(f"{name:10s}: {hcb[name]!r} # {desc}") + # read Header control block (HCB) to determine how to process the rest of the file + h_fixed, h_keywords, h_adjunct = read_hcb(blue_path) - log.debug("Adjunct Header") - log.debug(hcb.get("adjunct", hcb.get("adjunct_raw"))) + # read extended header + h_extended = read_extended_header(blue_path, h_fixed) - # data_rep: 'EEEI' or 'IEEE' (little or big extended header endianness representation) - extended_header_endianness = hcb.get("head_rep") - ext_endianness = "<" if extended_header_endianness == "EEEI" else ">" + # write to SigMF data file + _ = write_data(blue_path, out_path, h_fixed) - # read extended header entries - ext_entries = read_extended_header(blue_path, hcb, ext_endianness) - log.debug("Extended Header Keywords") - for entry in ext_entries: - log.debug(f"{entry['tag']:20s}:{entry['value']}") - log.info(f"total extended header entries: {len(ext_entries)}") + log.debug(">>>>>>>>> Fixed Header") + for key, _, _, _, desc in FIXED_LAYOUT: + log.debug(f"{key:10s}: {h_fixed[key]!r} # {desc}") - # parse key data values - _ = parse_data_values(blue_path, out_path, hcb) + log.debug(">>>>>>>>> User Keywords") + log.debug(h_keywords) + + log.debug(">>>>>>>>> Adjunct Header") + log.debug(h_adjunct) + + log.debug(">>>>>>>>> Extended Header") + for entry in h_extended: + log.debug(f"{entry['tag']:20s}:{entry['value']}") # call the SigMF conversion for metadata generation - meta = construct_sigmf(hcb, ext_entries, blue_path, out_path) + meta = construct_sigmf(out_path, h_fixed, h_keywords, h_adjunct, h_extended) return meta From 1358927b57354974d94d69b3f81b89a301dcd916 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Mon, 15 Dec 2025 15:20:04 -0800 Subject: [PATCH 06/15] refactor normalization pass --- sigmf/apps/convert_blue.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sigmf/apps/convert_blue.py b/sigmf/apps/convert_blue.py index 43f3bb1..0e23c83 100644 --- a/sigmf/apps/convert_blue.py +++ b/sigmf/apps/convert_blue.py @@ -369,14 +369,13 @@ def write_data(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: else: # reassemble interleaved IQ samples samples = raw_samples[::2] + 1j * raw_samples[1::2] - # normalize if needed - if should_normalize: - samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[fmt] else: # scalar data samples = raw_samples - if should_normalize: - samples = samples.astype(np.float32) / NORMALIZATION_FACTORS[fmt] + + # normalize if needed + if should_normalize: + samples /= NORMALIZATION_FACTORS[fmt] # save out as SigMF IQ data file samples.tofile(dest_path) @@ -468,11 +467,10 @@ def get_tag(tag): extended[key] = value global_info["blue:extended"] = extended - # BLUE uses 1950-01-01 as epoch, UNIX uses 1970-01-01 blue_start_time = float(h_fixed.get("timecode", 0)) blue_start_time += h_adjunct.get("xstart", 0) blue_start_time += float(h_keywords.get("TC_PREC", 0)) - + # timecode uses 1950-01-01 as epoch, datetime uses 1970-01-01 blue_epoch = blue_start_time - 631152000 # seconds between 1950 and 1970 # FIXME: I am unsure if the timezone is always UTC in these files blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) @@ -609,6 +607,8 @@ def convert_blue( implement a function that instead writes metadata only for a non-conforming dataset using the HEADER_BYTES_KEY and TRAILING_BYTES_KEY in most cases. """ + log.debug(f"convert {blue_path}") + blue_path = Path(blue_path) if out_path is None: # extension will be changed later From 407c11e468d98e5ae342760cfb5ba5e90f1e5f7b Mon Sep 17 00:00:00 2001 From: Teque5 Date: Tue, 16 Dec 2025 11:10:24 -0800 Subject: [PATCH 07/15] Documentation & Simplified Install * drop scipy optional dependency and [apps] entirely * add documentation for converters * update converter entry points --- .github/workflows/main.yml | 2 +- .readthedocs.yaml | 1 - docs/source/api.rst | 3 +- docs/source/converters.rst | 84 +++++++++++++++++++ docs/source/index.rst | 1 + pyproject.toml | 9 +- sigmf/{apps => convert}/__init__.py | 0 .../{apps/convert_blue.py => convert/blue.py} | 0 sigmf/{apps/convert_wav.py => convert/wav.py} | 28 +++++-- tests/test_convert.py | 47 ++++++----- 10 files changed, 138 insertions(+), 37 deletions(-) create mode 100644 docs/source/converters.rst rename sigmf/{apps => convert}/__init__.py (100%) rename sigmf/{apps/convert_blue.py => convert/blue.py} (100%) rename sigmf/{apps/convert_wav.py => convert/wav.py} (80%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e688093..8aaf0e5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install .[test,apps] + pip install .[test] - name: Test with pytest run: | coverage run diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9ee2227..e0fcec7 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,7 +18,6 @@ python: path: . extra_requirements: - test - - apps - requirements: docs/requirements.txt # Build documentation in the "docs/" directory with Sphinx diff --git a/docs/source/api.rst b/docs/source/api.rst index 2c3bddb..62d166a 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -7,9 +7,10 @@ SigMF API :template: custom-module-template.rst :recursive: - sigmf.apps.convert_wav sigmf.archive sigmf.archivereader + sigmf.convert.blue + sigmf.convert.wav sigmf.error sigmf.schema sigmf.sigmf_hash diff --git a/docs/source/converters.rst b/docs/source/converters.rst new file mode 100644 index 0000000..6a7dec9 --- /dev/null +++ b/docs/source/converters.rst @@ -0,0 +1,84 @@ +================= +Format Converters +================= + +The SigMF Python library includes converters to import data from various file formats into SigMF format. +These converters make it easy to migrate existing RF recordings to the standardized SigMF format while preserving metadata when possible. + +Overview +-------- + +Converters are available for: + +* **BLUE files** - MIDAS Blue and Platinum BLUE RF recordings (``.cdif``) +* **WAV files** - Audio recordings (``.wav``) + +All converters return a :class:`~sigmf.SigMFFile` object that can be used immediately or saved to disk. +Converters preserve datatypes and metadata where possible. + + +Command Line Usage +~~~~~~~~~~~~~~~~~~ + +Converters can be used from the command line after ``pip install sigmf``: + +.. code-block:: bash + + sigmf_convert_blue input.cdif + sigmf_convert_wav input.wav + +or by using module syntax: + +.. code-block:: bash + + python3 -m sigmf.convert.blue input.cdif + python3 -m sigmf.convert.wav input.wav + + +BLUE Converter +-------------- + +The BLUE converter handles CDIF (.cdif) recordings while placing BLUE header information into the following global fields: + +* ``blue:fixed`` - fixed header information (at start of file) +* ``blue:adjunct`` - adjunct header information (after fixed header) +* ``blue:extended`` - extended header information (at end of file) +* ``blue:keywords`` - user-defined key-value pairs + +.. autofunction:: sigmf.convert.blue.blue_to_sigmf + + +.. code-block:: python + + from sigmf.convert.blue import blue_to_sigmf + + # read BLUE, write SigMF, and return SigMFFile object + meta = blue_to_sigmf(blue_path="recording.cdif", out_path="recording.sigmf") + + # access converted data + samples = meta.read_samples() + sample_rate_hz = meta.sample_rate + + # access BLUE-specific metadata + blue_type = meta.get_global_field("blue:fixed")["type"] # e.g., 1000 + blue_version = meta.get_global_field("blue:keywords")["IO"] # e.g., "X-Midas" + + +WAV Converter +------------- + +This is useful when working with audio datasets. + +.. autofunction:: sigmf.convert.wav.wav_to_sigmf + + +.. code-block:: python + + from sigmf.convert.wav import wav_to_sigmf + + # read WAV, write SigMF, and return SigMFFile object + meta = wav_to_sigmf(wav_path="recording.wav", out_path="recording.sigmf") + + # access converted data + samples = meta.read_samples() + sample_rate_hz = meta.sample_rate \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index f845252..9d4a6ab 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -23,6 +23,7 @@ To get started, see the :doc:`quickstart` section or learn how to :ref:`install` quickstart advanced + converters developers .. toctree:: diff --git a/pyproject.toml b/pyproject.toml index 768e518..dc950af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,8 +33,8 @@ dependencies = [ [project.scripts] sigmf_validate = "sigmf.validate:main" - sigmf_convert_wav = "sigmf.apps.convert_wav:main [apps]" - sigmf_convert_blue = "sigmf.apps.convert_blue:main [apps]" + sigmf_convert_wav = "sigmf.convert.wav:main" + sigmf_convert_blue = "sigmf.convert.blue:main" [project.optional-dependencies] test = [ "pylint", @@ -42,9 +42,6 @@ dependencies = [ "pytest-cov", "hypothesis", # next-gen testing framework ] - apps = [ - "scipy", # for wav i/o - ] [tool.setuptools] packages = ["sigmf"] @@ -107,6 +104,6 @@ legacy_tox_ini = ''' [testenv] usedevelop = True - deps = .[test,apps] + deps = .[test] commands = coverage run ''' diff --git a/sigmf/apps/__init__.py b/sigmf/convert/__init__.py similarity index 100% rename from sigmf/apps/__init__.py rename to sigmf/convert/__init__.py diff --git a/sigmf/apps/convert_blue.py b/sigmf/convert/blue.py similarity index 100% rename from sigmf/apps/convert_blue.py rename to sigmf/convert/blue.py diff --git a/sigmf/apps/convert_wav.py b/sigmf/convert/wav.py similarity index 80% rename from sigmf/apps/convert_wav.py rename to sigmf/convert/wav.py index 3a9b025..913353a 100755 --- a/sigmf/apps/convert_wav.py +++ b/sigmf/convert/wav.py @@ -10,12 +10,12 @@ import getpass import logging import tempfile +import wave from datetime import datetime, timezone -from os import PathLike from pathlib import Path from typing import Optional -from scipy.io import wavfile +import numpy as np from .. import SigMFFile from .. import __version__ as toolversion @@ -25,19 +25,27 @@ log = logging.getLogger() -def convert_wav( +def wav_to_sigmf( wav_path: str, out_path: Optional[str] = None, to_archive: bool = True, author: Optional[str] = None, -) -> PathLike: +) -> SigMFFile: """ - Read a wav and write a sigmf archive. + Read a wav, write a sigmf, return SigMFFile object. + + Note: Can only read PCM wav files. Use scipy.io.wavefile for broader support. """ wav_path = Path(wav_path) wav_stem = wav_path.stem - samp_rate, wav_data = wavfile.read(wav_path) - + with wave.open(str(wav_path), "rb") as wav_reader: + n_channels = wav_reader.getnchannels() + samp_width = wav_reader.getsampwidth() + samp_rate = wav_reader.getframerate() + n_frames = wav_reader.getnframes() + raw_data = wav_reader.readframes(n_frames) + np_dtype = f"int{samp_width * 8}" + wav_data = np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) global_info = { SigMFFile.AUTHOR_KEY: getpass.getuser() if author is None else author, SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data), @@ -93,7 +101,11 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - _ = convert_wav( + _ = wav_to_sigmf( wav_path=args.input, author=args.author, ) + + +if __name__ == "__main__": + main() diff --git a/tests/test_convert.py b/tests/test_convert.py index 64a16e8..95f993c 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -9,20 +9,14 @@ import os import tempfile import unittest +import wave from pathlib import Path import numpy as np -try: - from scipy.io import wavfile - - SCIPY_AVAILABLE = True -except ImportError: - SCIPY_AVAILABLE = False - import sigmf -from sigmf.apps.convert_blue import convert_blue -from sigmf.apps.convert_wav import convert_wav +from sigmf.convert.blue import blue_to_sigmf +from sigmf.convert.wav import wav_to_sigmf BLUE_ENV_VAR = "NONSIGMF_RECORDINGS_PATH" @@ -32,8 +26,6 @@ class TestWAVConverter(unittest.TestCase): def setUp(self) -> None: """create temp wav file for testing""" - if not SCIPY_AVAILABLE: - self.skipTest("scipy is required for WAV file tests") self.tmp_dir = tempfile.TemporaryDirectory() self.tmp_path = Path(self.tmp_dir.name) self.wav_path = self.tmp_path / "foo.wav" @@ -42,7 +34,17 @@ def setUp(self) -> None: ttt = np.linspace(0, duration_s, int(samp_rate * duration_s), endpoint=False) freq = 440 # A4 note self.audio_data = 0.5 * np.sin(2 * np.pi * freq * ttt) - wavfile.write(self.wav_path, samp_rate, self.audio_data.astype(np.float32)) + # note scipy could write float wav files directly, + # but to avoid adding scipy as a dependency for sigmf-python, + # convert float audio to 16-bit PCM integer format + audio_int16 = (self.audio_data * 32767).astype(np.int16) + + # write wav file using built-in wave module + with wave.open(str(self.wav_path), "wb") as wav_file: + wav_file.setnchannels(1) # mono + wav_file.setsampwidth(2) # 16-bit = 2 bytes + wav_file.setframerate(samp_rate) + wav_file.writeframes(audio_int16.tobytes()) def tearDown(self) -> None: """clean up temporary directory""" @@ -50,11 +52,10 @@ def tearDown(self) -> None: def test_wav_to_sigmf(self): sigmf_path = self.tmp_path / "bar" - _ = convert_wav(wav_path=self.wav_path, out_path=sigmf_path) - meta = sigmf.fromfile(sigmf_path) + meta = wav_to_sigmf(wav_path=self.wav_path, out_path=sigmf_path) data = meta.read_samples() - # allow small numerical differences due to data type conversions - self.assertTrue(np.allclose(self.audio_data, data, atol=1e-8)) + # allow numerical differences due to PCM conversion + self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) class TestBlueConverter(unittest.TestCase): @@ -79,18 +80,24 @@ def tearDown(self) -> None: def test_blue_to_sigmf(self): for bdx, bluefile in enumerate(self.bluefiles): - sigmf_path = self.tmp_path / f"converted_{bdx}" - _ = convert_blue(blue_path=bluefile, out_path=sigmf_path) - meta = sigmf.fromfile(sigmf_path) + sigmf_path = self.tmp_path / bluefile.stem + meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path) ### EVERYTHING BELOW HERE IS FOR DEBUGGING ONLY _ REMOVE LATER ### # plot stft of RF data for visual inspection + import matplotlib.pyplot as plt from scipy.signal import spectrogram + from swiftfox import summary samples = meta.read_samples() + plt.figure(figsize=(10, 10)) + summary(samples, detail=0.1, samp_rate=meta.get_global_field("core:sample_rate")) + plt.figure() + plt.plot(samples.real) + plt.plot(samples.imag) + freqs, times, spec = spectrogram(samples, fs=meta.get_global_field("core:sample_rate"), nperseg=1024) # use imshow to plot spectrogram - import matplotlib.pyplot as plt plt.figure() plt.imshow( From 64ceecd0a9eb6c5a89313608d6275053e30a2ab8 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Tue, 16 Dec 2025 14:27:41 -0800 Subject: [PATCH 08/15] API homologation --- docs/source/converters.rst | 8 +++--- sigmf/convert/blue.py | 55 +++++++++++++++++++++++++------------- sigmf/convert/wav.py | 11 +++----- tests/test_convert.py | 2 +- 4 files changed, 45 insertions(+), 31 deletions(-) diff --git a/docs/source/converters.rst b/docs/source/converters.rst index 6a7dec9..8c74e39 100644 --- a/docs/source/converters.rst +++ b/docs/source/converters.rst @@ -24,15 +24,15 @@ Converters can be used from the command line after ``pip install sigmf``: .. code-block:: bash - sigmf_convert_blue input.cdif - sigmf_convert_wav input.wav + sigmf_convert_blue recording.cdif + sigmf_convert_wav recording.wav or by using module syntax: .. code-block:: bash - python3 -m sigmf.convert.blue input.cdif - python3 -m sigmf.convert.wav input.wav + python3 -m sigmf.convert.blue recording.cdif + python3 -m sigmf.convert.wav recording.wav BLUE Converter diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index 0e23c83..dc76026 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -322,7 +322,7 @@ def read_extended_header(file_path, h_fixed): return entries -def write_data(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: +def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: """ Write SigMF data file from BLUE file samples. @@ -342,12 +342,17 @@ def write_data(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: """ log.debug("parsing BLUE file data values") - file_size_bytes = os.path.getsize(blue_path) - extended_header_data_size = h_fixed.get("ext_size") + # use header data_size field instead of file size calculation + data_size_bytes = int(h_fixed.get("data_size", 0)) fmt = h_fixed.get("format") + log.debug(f"format: {fmt}, data_size from header: {data_size_bytes} bytes") + # Determine destination path for SigMF data file dest_path = out_path.with_suffix(".sigmf-data") + print("#" * 80) + print("Writing SigMF data to:", dest_path) + print("#" * 80) config = DATA_TYPE_CONFIGS[fmt] np_dtype = config["dtype"] @@ -356,7 +361,9 @@ def write_data(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: # calculate element size and count elem_size = np.dtype(np_dtype).itemsize - elem_count = (file_size_bytes - extended_header_data_size) // elem_size + elem_count = data_size_bytes // elem_size + + log.debug(f"elem_size: {elem_size}, elem_count: {elem_count}, is_complex: {is_complex}") # read raw samples raw_samples = np.fromfile(blue_path, dtype=np_dtype, offset=HEADER_SIZE_BYTES, count=elem_count) @@ -369,6 +376,7 @@ def write_data(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: else: # reassemble interleaved IQ samples samples = raw_samples[::2] + 1j * raw_samples[1::2] + log.debug(f"Deinterleaved {len(raw_samples)} samples into {len(samples)} complex samples") else: # scalar data samples = raw_samples @@ -470,21 +478,27 @@ def get_tag(tag): blue_start_time = float(h_fixed.get("timecode", 0)) blue_start_time += h_adjunct.get("xstart", 0) blue_start_time += float(h_keywords.get("TC_PREC", 0)) - # timecode uses 1950-01-01 as epoch, datetime uses 1970-01-01 - blue_epoch = blue_start_time - 631152000 # seconds between 1950 and 1970 - # FIXME: I am unsure if the timezone is always UTC in these files - blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) - capture_info = { - SigMFFile.DATETIME_KEY: blue_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), - } + if blue_start_time == 0: + log.warning("BLUE timecode is zero or missing; datetime metadata will be absent.") + capture_info = {} + else: + # timecode uses 1950-01-01 as epoch, datetime uses 1970-01-01 + blue_epoch = blue_start_time - 631152000 # seconds between 1950 and 1970 + # FIXME: I am unsure if the timezone is always UTC in these files + blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) + + capture_info = { + SigMFFile.DATETIME_KEY: blue_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), + } if get_tag("RF_FREQ") is not None: # FIXME: I believe there are many possible keys related to tune frequency capture_info[SigMFFile.FREQUENCY_KEY] = float(get_tag("RF_FREQ")) # actually write to SigMF - filenames = get_sigmf_filenames(out_path) + filenames = get_sigmf_filenames(out_path.stem) + print("dbug", filenames) meta = SigMFFile( data_file=filenames["data_fn"], @@ -582,12 +596,12 @@ def validate_extended_header(entries: list) -> None: raise SigMFConversionError(f"Invalid SAMPLE_RATE in extended header: {sample_rate}") -def convert_blue( +def blue_to_sigmf( blue_path: str, out_path: Optional[str] = None, -) -> np.ndarray: +) -> SigMFFile: """ - Convert a MIDIS Bluefile to SigMF metadata and data. + Read a MIDAS Bluefile, write to SigMF, return SigMFFile object. Parameters ---------- @@ -625,7 +639,7 @@ def convert_blue( h_extended = read_extended_header(blue_path, h_fixed) # write to SigMF data file - _ = write_data(blue_path, out_path, h_fixed) + _ = data_loopback(blue_path, out_path, h_fixed) log.debug(">>>>>>>>> Fixed Header") for key, _, _, _, desc in FIXED_LAYOUT: @@ -652,7 +666,8 @@ def main() -> None: Entry-point for sigmf_convert_blue """ parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("input", type=str, help="Blue (cdif) file path") + parser.add_argument("-i", "--input", type=str, required=True, help="BLUE file path") + parser.add_argument("-o", "--output", type=str, default=None, help="SigMF path") parser.add_argument("-v", "--verbose", action="count", default=0) parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") args = parser.parse_args() @@ -664,4 +679,8 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - convert_blue(args.input) + _ = blue_to_sigmf(blue_path=args.input, out_path=args.output) + + +if __name__ == "__main__": + main() diff --git a/sigmf/convert/wav.py b/sigmf/convert/wav.py index 913353a..32fb1f0 100755 --- a/sigmf/convert/wav.py +++ b/sigmf/convert/wav.py @@ -29,7 +29,6 @@ def wav_to_sigmf( wav_path: str, out_path: Optional[str] = None, to_archive: bool = True, - author: Optional[str] = None, ) -> SigMFFile: """ Read a wav, write a sigmf, return SigMFFile object. @@ -47,7 +46,6 @@ def wav_to_sigmf( np_dtype = f"int{samp_width * 8}" wav_data = np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) global_info = { - SigMFFile.AUTHOR_KEY: getpass.getuser() if author is None else author, SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data), SigMFFile.DESCRIPTION_KEY: f"converted from {wav_path.name}", SigMFFile.NUM_CHANNELS_KEY: 1 if len(wav_data.shape) < 2 else wav_data.shape[1], @@ -88,8 +86,8 @@ def main() -> None: Entry-point for sigmf_convert_wav """ parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("input", type=str, help="wav path") - parser.add_argument("--author", type=str, default=None, help=f"set {SigMFFile.AUTHOR_KEY} metadata") + parser.add_argument("-i", "--input", type=str, required=True, help="WAV path") + parser.add_argument("-o", "--output", type=str, default=None, help="SigMF path") parser.add_argument("-v", "--verbose", action="count", default=0) parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") args = parser.parse_args() @@ -101,10 +99,7 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - _ = wav_to_sigmf( - wav_path=args.input, - author=args.author, - ) + _ = wav_to_sigmf(wav_path=args.input, out_path=args.output) if __name__ == "__main__": diff --git a/tests/test_convert.py b/tests/test_convert.py index 95f993c..4919cb5 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -54,7 +54,7 @@ def test_wav_to_sigmf(self): sigmf_path = self.tmp_path / "bar" meta = wav_to_sigmf(wav_path=self.wav_path, out_path=sigmf_path) data = meta.read_samples() - # allow numerical differences due to PCM conversion + # allow numerical differences due to PCM quantization self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) From a02f5dad366e2541ce890f86e7157adae8c9d8d2 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Wed, 17 Dec 2025 15:18:59 -0800 Subject: [PATCH 09/15] add support for metadata-only (0 sample) BLUE files --- sigmf/convert/blue.py | 242 ++++++++++++++++++++++++------------------ sigmf/convert/wav.py | 2 - tests/test_convert.py | 58 +++++----- 3 files changed, 170 insertions(+), 132 deletions(-) diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index dc76026..be3fcb7 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -12,9 +12,9 @@ """ import argparse +import base64 import getpass import logging -import os import struct from datetime import datetime, timezone from pathlib import Path @@ -61,62 +61,63 @@ ] # fmt: on -TYPE_MAP = { - # Extended Header type map - "B": (np.int8, 1), - "I": (np.int16, 2), - "L": (np.int32, 4), - "X": (np.int64, 8), - "F": (np.float32, 4), - "D": (np.float64, 8), - "A": (np.dtype("S1"), 1), -} - HEADER_SIZE_BYTES = 512 BLOCK_SIZE_BYTES = 512 -NORMALIZATION_FACTORS = { - # format : normalization factor - "SB": 2**7 - 1, # scalar 8-bit integer - "SI": 2**15 - 1, # scalar 16-bit integer - "SL": 2**31 - 1, # scalar 32-bit integer - "CB": 2**7 - 1, # complex 8-bit integer - "CI": 2**15 - 1, # complex 16-bit integer - "CL": 2**31 - 1, # complex 32-bit integer +TYPE_MAP = { + # BLUE code to numpy dtype + "A": np.dtype("S1"), # ASCII character + "B": np.int8, + "I": np.int16, + "L": np.int32, + "X": np.int64, + "F": np.float32, + "D": np.float64, + # unsupported codes + # "P" : packed bits + # "N" : 4-bit integer } -# Data type configurations -DATA_TYPE_CONFIGS = { - "CB": {"dtype": np.int8, "complex": True, "normalize": True}, - "CI": {"dtype": np.int16, "complex": True, "normalize": True}, - "CL": {"dtype": np.int32, "complex": True, "normalize": True}, - "CF": {"dtype": np.complex64, "complex": True, "normalize": False}, - "SB": {"dtype": np.int8, "complex": False, "normalize": True}, - "SI": {"dtype": np.int16, "complex": False, "normalize": True}, - "SL": {"dtype": np.int32, "complex": False, "normalize": True}, - "SX": {"dtype": np.int64, "complex": False, "normalize": False}, - "SF": {"dtype": np.float32, "complex": False, "normalize": False}, - "SD": {"dtype": np.float64, "complex": False, "normalize": False}, -} -DATATYPE_MAP_BASE = { - # S = Scalar - "SB": "ri8", - "SI": "ri16", - "SL": "ri32", - "SX": "ri64", - "SF": "rf32", - "SD": "rf64", - # C = Complex - "CB": "ci8", - "CI": "ci16", - "CL": "ci32", - "CX": "ci64", - "CF": "cf32", - "CD": "cf32", # FIXME: should be cf64? D should be double. - # V = Vector (not supported) - # Q = Quad (not supported) -} +def blue_to_sigmf_type_str(h_fixed): + """ + Convert BLUE format code to SigMF datatype string. + + Parameters + ---------- + h_fixed : dict + Fixed Header dictionary containing 'format' and 'data_rep' fields. + + Returns + ------- + str + SigMF datatype string (e.g., 'ci16_le', 'rf32_be'). + """ + # extract format code and endianness from header + format_code = h_fixed.get("format") + endianness = h_fixed.get("data_rep") + + # parse format code components + is_complex = format_code[0] == "C" + numpy_dtype = TYPE_MAP[format_code[1]] + + # compute everything from numpy dtype + dtype_obj = np.dtype(numpy_dtype) + bits = dtype_obj.itemsize * 8 # bytes to bits + + # infer sigmf type from numpy kind + sigmf_type = "i" if dtype_obj.kind in ("i", "u") else "f" + + # build datatype string + prefix = "c" if is_complex else "r" + datatype = f"{prefix}{sigmf_type}{bits}" + + # add endianness for types > 8 bits + if bits > 8: + endian_suffix = "_le" if endianness == "EEEI" else "_be" + datatype += endian_suffix + + return datatype def detect_endian(data, probe_fields=("data_size", "version")): @@ -142,7 +143,6 @@ def detect_endian(data, probe_fields=("data_size", "version")): SigMFConversionError If the endianness is unexpected. """ - # TODO: handle both types of endianess 'EEEI' or IEEE and data rep and signal rep endianness = data[8:12].decode("ascii") if endianness not in ("EEEI", "IEEE"): raise SigMFConversionError(f"Unexpected endianness: {endianness}") @@ -159,14 +159,14 @@ def detect_endian(data, probe_fields=("data_size", "version")): MAX_DATA_SIZE_FACTOR = 100 if key == "data_size": - if val <= 0 or val > len(data) * MAX_DATA_SIZE_FACTOR: + if val < 0 or val > lenf(data) * MAX_DATA_SIZE_FACTOR: ok = False break elif key == "version": if not 0 < val < 10: # expect small version number ok = False break - except Exception: + except (struct.error, ValueError, IndexError): ok = False break if ok: @@ -243,7 +243,10 @@ def read_hcb(file_path): "yunits": struct.unpack(f"{endian}i", handle.read(4))[0], } else: - h_adjunct = handle.read(256) + # read raw adjunct header as bytes and convert to base64 for JSON serialization + log.warning(f"Unknown BLUE file type {h_fixed['type']}, encoding adjunct header in metadata as base64.") + raw_adjunct = handle.read(256) + h_adjunct = {"raw_base64": base64.b64encode(raw_adjunct).decode("ascii")} ver_lut = {"1.0": "BLUE 1.0", "1.1": "BLUE 1.1", "2.0": "Platinum"} spec_str = ver_lut.get(h_keywords.get("VER", "1.0")) @@ -291,7 +294,15 @@ def read_extended_header(file_path, h_fixed): ltag = struct.unpack(f"{endian}b", handle.read(1))[0] type_char = handle.read(1).decode("ascii", errors="replace") - dtype, bytes_per_element = TYPE_MAP.get(type_char, (np.dtype("S1"), 1)) + # get dtype and compute bytes per element + if type_char in TYPE_MAP: + dtype = TYPE_MAP[type_char] + bytes_per_element = np.dtype(dtype).itemsize + else: + # fallback for unknown types + dtype = np.dtype("S1") + bytes_per_element = 1 + val_len = lkey - lext val_count = val_len // bytes_per_element if bytes_per_element else 0 @@ -303,7 +314,16 @@ def read_extended_header(file_path, h_fixed): else: value = np.frombuffer(handle.read(val_len), dtype=dtype, count=val_count) if value.size == 1: - value = value[0] + val_item = value[0] + # handle bytes first (numpy.bytes_ is also np.generic) + if isinstance(val_item, bytes): + # handle bytes from S1 dtype - convert to base64 for JSON + value = base64.b64encode(val_item).decode("ascii") + elif isinstance(val_item, np.generic): + # convert numpy scalar to native python type + value = val_item.item() + else: + value = val_item else: value = value.tolist() @@ -338,7 +358,7 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: Returns ------- numpy.ndarray - Parsed samples. + Parsed samples. Empty array for zero-sample files. """ log.debug("parsing BLUE file data values") @@ -348,16 +368,9 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: log.debug(f"format: {fmt}, data_size from header: {data_size_bytes} bytes") - # Determine destination path for SigMF data file - dest_path = out_path.with_suffix(".sigmf-data") - print("#" * 80) - print("Writing SigMF data to:", dest_path) - print("#" * 80) - - config = DATA_TYPE_CONFIGS[fmt] - np_dtype = config["dtype"] - is_complex = config["complex"] - should_normalize = config["normalize"] + # parse format code components + is_complex = fmt[0] == "C" + np_dtype = TYPE_MAP[fmt[1]] # calculate element size and count elem_size = np.dtype(np_dtype).itemsize @@ -365,6 +378,17 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: log.debug(f"elem_size: {elem_size}, elem_count: {elem_count}, is_complex: {is_complex}") + # check for zero-sample file (metadata-only) + if elem_count == 0: + log.info("detected zero-sample BLUE file, creating metadata-only SigMF") + return np.array([], dtype=np_dtype) + + # Determine destination path for SigMF data file + dest_path = out_path.with_suffix(".sigmf-data") + print("#" * 80) + print("Writing SigMF data to:", dest_path) + print("#" * 80) + # read raw samples raw_samples = np.fromfile(blue_path, dtype=np_dtype, offset=HEADER_SIZE_BYTES, count=elem_count) @@ -381,9 +405,13 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: # scalar data samples = raw_samples + # print('dbug', samples.dtype, len(samples), get_normalization_factor(fmt)) + # normalize if needed - if should_normalize: - samples /= NORMALIZATION_FACTORS[fmt] + # if should_normalize: + # norm_factor = get_normalization_factor(fmt) + # if norm_factor: + # samples /= norm_factor # save out as SigMF IQ data file samples.tofile(dest_path) @@ -393,7 +421,9 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: return samples -def construct_sigmf(out_path: Path, h_fixed: dict, h_keywords: dict, h_adjunct: dict, h_extended: list) -> SigMFFile: +def construct_sigmf( + out_path: Path, h_fixed: dict, h_keywords: dict, h_adjunct: dict, h_extended: list, is_metadata_only: bool = False +) -> SigMFFile: """ Built & write a SigMF object from BLUE metadata. @@ -409,11 +439,13 @@ def construct_sigmf(out_path: Path, h_fixed: dict, h_keywords: dict, h_adjunct: Adjunct Header h_extended : list of dict Parsed extended header entries from read_extended_header(). + is_metadata_only : bool, optional + If True, creates a metadata-only SigMF file. Returns ------- - dict - SigMF metadata structure. + SigMFFile + SigMF object. """ # helper to look up extended header values by tag def get_tag(tag): @@ -422,18 +454,10 @@ def get_tag(tag): return entry["value"] return None - # data_rep: 'EEEI' or 'IEEE' (little or big data endianess representation) - data_rep = h_fixed.get("data_rep") + # get sigmf datatype from blue format and endianness + datatype = blue_to_sigmf_type_str(h_fixed) - # data_format: for example 'CI' or 'SD' (data format code - real or complex, int or float) - data_format = h_fixed.get("format") - endian_suffix = "_le" if data_rep == "EEEI" else "_be" - - # get base datatype and add endianness - base_datatype = DATATYPE_MAP_BASE.get(data_format) - datatype = base_datatype + endian_suffix - - log.info(f"Using SigMF datatype: {datatype} for BLUE format {h_fixed['format']} and endianness {data_rep}.") + log.info(f"Using SigMF datatype: {datatype} for BLUE format {h_fixed['format']}") # sample rate: prefer adjunct.xdelta, else extended header SAMPLE_RATE if "xdelta" in h_adjunct: @@ -448,10 +472,8 @@ def get_tag(tag): # base global metadata global_info = { - # FIXME: what common fields are in h_fixed? "core:author": getpass.getuser(), SigMFFile.DATATYPE_KEY: datatype, - # FIXME: what is the most apt description? # SigMFFile.DESCRIPTION_KEY: ???, SigMFFile.RECORDER_KEY: "Official SigMF BLUE converter", SigMFFile.NUM_CHANNELS_KEY: num_channels, @@ -459,6 +481,10 @@ def get_tag(tag): SigMFFile.EXTENSIONS_KEY: [{"name": "blue", "version": "0.0.1", "optional": True}], } + # set metadata-only flag for zero-sample files + if is_metadata_only: + global_info[SigMFFile.METADATA_ONLY_KEY] = True + # merge HCB values into metadata global_info["blue:fixed"] = h_fixed global_info["blue:keywords"] = h_keywords @@ -485,7 +511,6 @@ def get_tag(tag): else: # timecode uses 1950-01-01 as epoch, datetime uses 1970-01-01 blue_epoch = blue_start_time - 631152000 # seconds between 1950 and 1970 - # FIXME: I am unsure if the timezone is always UTC in these files blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) capture_info = { @@ -493,17 +518,25 @@ def get_tag(tag): } if get_tag("RF_FREQ") is not None: - # FIXME: I believe there are many possible keys related to tune frequency + # There may be other keys related to tune frequency capture_info[SigMFFile.FREQUENCY_KEY] = float(get_tag("RF_FREQ")) # actually write to SigMF filenames = get_sigmf_filenames(out_path.stem) print("dbug", filenames) - meta = SigMFFile( - data_file=filenames["data_fn"], - global_info=global_info, - ) + # for metadata-only files, don't specify data_file and skip checksum + if is_metadata_only: + meta = SigMFFile( + data_file=None, + global_info=global_info, + skip_checksum=True, + ) + else: + meta = SigMFFile( + data_file=filenames["data_fn"], + global_info=global_info, + ) meta.add_capture(0, metadata=capture_info) log.debug("created %r", meta) @@ -549,17 +582,13 @@ def validate_fixed(h_fixed: dict) -> None: for field in required: if field not in h_fixed: raise SigMFConversionError(f"Missing required Fixed Header field: {field}") - # FIXME: when could this possibly occur? - if h_fixed[field] is None: - raise SigMFConversionError(f"Required Fixed Header field {field} is None") - for rep_field in ["data_rep", "head_rep"]: if h_fixed[rep_field] not in ("EEEI", "IEEE"): raise SigMFConversionError(f"Invalid value for {rep_field}: {h_fixed[rep_field]}") - # FIXME: merge these lookup tables into one - if h_fixed["format"] not in DATATYPE_MAP_BASE: - raise SigMFConversionError(f"Unsupported data format: {h_fixed['format']}") - if h_fixed["format"] not in DATA_TYPE_CONFIGS: + if h_fixed["data_size"] < 0: + raise SigMFConversionError(f"Invalid data_size: {h_fixed['data_size']} (must be >= 0)") + # validate format code is supported + if len(h_fixed["format"]) != 2 or h_fixed["format"][0] not in "SC" or h_fixed["format"][1] not in TYPE_MAP: raise SigMFConversionError(f"Unsupported data format: {h_fixed['format']}") @@ -638,8 +667,15 @@ def blue_to_sigmf( # read extended header h_extended = read_extended_header(blue_path, h_fixed) - # write to SigMF data file - _ = data_loopback(blue_path, out_path, h_fixed) + # check if this is a zero-sample (metadata-only) file + data_size_bytes = int(h_fixed.get("data_size", 0)) + is_metadata_only = data_size_bytes == 0 + + # write to SigMF data file only if samples exist + if not is_metadata_only: + _ = data_loopback(blue_path, out_path, h_fixed) + else: + log.info("skipping data file creation for zero-sample BLUE file") log.debug(">>>>>>>>> Fixed Header") for key, _, _, _, desc in FIXED_LAYOUT: @@ -656,7 +692,7 @@ def blue_to_sigmf( log.debug(f"{entry['tag']:20s}:{entry['value']}") # call the SigMF conversion for metadata generation - meta = construct_sigmf(out_path, h_fixed, h_keywords, h_adjunct, h_extended) + meta = construct_sigmf(out_path, h_fixed, h_keywords, h_adjunct, h_extended, is_metadata_only) return meta diff --git a/sigmf/convert/wav.py b/sigmf/convert/wav.py index 32fb1f0..bba0d47 100755 --- a/sigmf/convert/wav.py +++ b/sigmf/convert/wav.py @@ -7,7 +7,6 @@ """converter for wav containers""" import argparse -import getpass import logging import tempfile import wave @@ -28,7 +27,6 @@ def wav_to_sigmf( wav_path: str, out_path: Optional[str] = None, - to_archive: bool = True, ) -> SigMFFile: """ Read a wav, write a sigmf, return SigMFFile object. diff --git a/tests/test_convert.py b/tests/test_convert.py index 4919cb5..b467c9f 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -68,7 +68,8 @@ def setUp(self) -> None: self.skipTest(f"Set {BLUE_ENV_VAR} environment variable to location with .cdif files to run test.") if not blue_path.is_dir(): self.fail(f"{blue_path} is not a valid directory.") - self.bluefiles = list(blue_path.glob("*.cdif")) + self.bluefiles = list(blue_path.glob("**/*.cdif")) + print("bluefiles", self.bluefiles) if not self.bluefiles: self.fail(f"No .cdif files found in {BLUE_ENV_VAR}.") self.tmp_dir = tempfile.TemporaryDirectory() @@ -82,30 +83,33 @@ def test_blue_to_sigmf(self): for bdx, bluefile in enumerate(self.bluefiles): sigmf_path = self.tmp_path / bluefile.stem meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path) - - ### EVERYTHING BELOW HERE IS FOR DEBUGGING ONLY _ REMOVE LATER ### - # plot stft of RF data for visual inspection - import matplotlib.pyplot as plt - from scipy.signal import spectrogram - from swiftfox import summary - - samples = meta.read_samples() - plt.figure(figsize=(10, 10)) - summary(samples, detail=0.1, samp_rate=meta.get_global_field("core:sample_rate")) - plt.figure() - plt.plot(samples.real) - plt.plot(samples.imag) - - freqs, times, spec = spectrogram(samples, fs=meta.get_global_field("core:sample_rate"), nperseg=1024) - # use imshow to plot spectrogram - - plt.figure() - plt.imshow( - 10 * np.log10(spec), aspect="auto", extent=[times[0], times[-1], freqs[0], freqs[-1]], origin="lower" - ) - plt.colorbar(label="Intensity [dB]") - plt.ylabel("Frequency [Hz]") - plt.xlabel("Time [s]") - plt.title(f"Spectrogram of {bluefile.name}") - plt.show() + print(f"Converted {bluefile} to SigMF at {sigmf_path}") + if not meta.get_global_field("core:metadata_only"): + print(meta.read_samples(count=10)) + + # ### EVERYTHING BELOW HERE IS FOR DEBUGGING ONLY _ REMOVE LATER ### + # # plot stft of RF data for visual inspection + # import matplotlib.pyplot as plt + # from scipy.signal import spectrogram + # from swiftfox import summary + + # samples = meta.read_samples() + # plt.figure(figsize=(10, 10)) + # summary(samples, detail=0.1, samp_rate=meta.get_global_field("core:sample_rate")) + # plt.figure() + # plt.plot(samples.real) + # plt.plot(samples.imag) + + # freqs, times, spec = spectrogram(samples, fs=meta.get_global_field("core:sample_rate"), nperseg=1024) + # # use imshow to plot spectrogram + + # plt.figure() + # plt.imshow( + # 10 * np.log10(spec), aspect="auto", extent=[times[0], times[-1], freqs[0], freqs[-1]], origin="lower" + # ) + # plt.colorbar(label="Intensity [dB]") + # plt.ylabel("Frequency [Hz]") + # plt.xlabel("Time [s]") + # plt.title(f"Spectrogram of {bluefile.name}") + # plt.show() self.assertIsInstance(meta, sigmf.SigMFFile) From 01ba1f95f92fa39c7594077351cb0859faf9925d Mon Sep 17 00:00:00 2001 From: Teque5 Date: Wed, 17 Dec 2025 16:39:17 -0800 Subject: [PATCH 10/15] simplify endian detection --- sigmf/convert/blue.py | 44 +++++++------------------------------------ 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index be3fcb7..28e70ff 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -120,18 +120,14 @@ def blue_to_sigmf_type_str(h_fixed): return datatype -def detect_endian(data, probe_fields=("data_size", "version")): +def detect_endian(data): """ Detect endianness of a Bluefile header. - TODO: Look at this code and see if can be improved and possibly simplified. - Parameters ---------- data : bytes Raw header data. - probe_fields : tuple of str, optional - Field names to test for sanity checks. Returns ------- @@ -144,36 +140,13 @@ def detect_endian(data, probe_fields=("data_size", "version")): If the endianness is unexpected. """ endianness = data[8:12].decode("ascii") - if endianness not in ("EEEI", "IEEE"): + if endianness == "EEEI": + return "<" + elif endianness == "IEEE": + return ">" + else: raise SigMFConversionError(f"Unexpected endianness: {endianness}") - for endian in ("<", ">"): - ok = True - for key, offset, size, fmt, _ in FIXED_LAYOUT: - if key not in probe_fields: - continue - raw = data[offset : offset + size] - try: - val = struct.unpack(endian + fmt, raw)[0] - # sanity checks - MAX_DATA_SIZE_FACTOR = 100 - - if key == "data_size": - if val < 0 or val > lenf(data) * MAX_DATA_SIZE_FACTOR: - ok = False - break - elif key == "version": - if not 0 < val < 10: # expect small version number - ok = False - break - except (struct.error, ValueError, IndexError): - ok = False - break - if ok: - return endian - # fallback - return "<" - def read_hcb(file_path): """ @@ -268,8 +241,6 @@ def read_extended_header(file_path, h_fixed): Path to the BLUE file. h_fixed : dict Fixed Header containing 'ext_size' and 'ext_start'. - endian : str, optional - Endianness ('<' for little-endian, '>' for big-endian). Returns ------- @@ -522,8 +493,7 @@ def get_tag(tag): capture_info[SigMFFile.FREQUENCY_KEY] = float(get_tag("RF_FREQ")) # actually write to SigMF - filenames = get_sigmf_filenames(out_path.stem) - print("dbug", filenames) + filenames = get_sigmf_filenames(out_path) # for metadata-only files, don't specify data_file and skip checksum if is_metadata_only: From 0d2d134a7b54753bc9c9769852bdae5c12b414cb Mon Sep 17 00:00:00 2001 From: Teque5 Date: Wed, 17 Dec 2025 16:43:13 -0800 Subject: [PATCH 11/15] handle duplicate keys in extended header --- sigmf/convert/blue.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index 28e70ff..60bc62d 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -461,15 +461,23 @@ def get_tag(tag): global_info["blue:keywords"] = h_keywords global_info["blue:adjunct"] = h_adjunct - # merge extended header fields + # merge extended header fields, handling duplicate keys if h_extended: extended = {} + tag_counts = {} for entry in h_extended: - key = entry.get("tag") + tag = entry.get("tag") value = entry.get("value") if hasattr(value, "item"): value = value.item() - extended[key] = value + + # handle duplicate tags by numbering them + if tag in extended: + tag_counts[tag] = tag_counts.get(tag, 0) + 1 + numbered_tag = f"{tag}_{tag_counts[tag]}" + extended[numbered_tag] = value + else: + extended[tag] = value global_info["blue:extended"] = extended blue_start_time = float(h_fixed.get("timecode", 0)) @@ -557,7 +565,6 @@ def validate_fixed(h_fixed: dict) -> None: raise SigMFConversionError(f"Invalid value for {rep_field}: {h_fixed[rep_field]}") if h_fixed["data_size"] < 0: raise SigMFConversionError(f"Invalid data_size: {h_fixed['data_size']} (must be >= 0)") - # validate format code is supported if len(h_fixed["format"]) != 2 or h_fixed["format"][0] not in "SC" or h_fixed["format"][1] not in TYPE_MAP: raise SigMFConversionError(f"Unsupported data format: {h_fixed['format']}") From 0681e79866a9852163f6ee39ddee6ca09ec32da6 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Wed, 17 Dec 2025 17:12:46 -0800 Subject: [PATCH 12/15] remove debug, fix cf32 typo --- sigmf/convert/blue.py | 27 ++++++--------------------- tests/test_convert.py | 25 ++++++++++--------------- 2 files changed, 16 insertions(+), 36 deletions(-) diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index 60bc62d..f3c37ec 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -145,7 +145,7 @@ def detect_endian(data): elif endianness == "IEEE": return ">" else: - raise SigMFConversionError(f"Unexpected endianness: {endianness}") + raise SigMFConversionError(f"Unsupported endianness: {endianness}") def read_hcb(file_path): @@ -313,7 +313,7 @@ def read_extended_header(file_path, h_fixed): return entries -def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: +def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> None: """ Write SigMF data file from BLUE file samples. @@ -356,41 +356,26 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> np.ndarray: # Determine destination path for SigMF data file dest_path = out_path.with_suffix(".sigmf-data") - print("#" * 80) - print("Writing SigMF data to:", dest_path) - print("#" * 80) # read raw samples raw_samples = np.fromfile(blue_path, dtype=np_dtype, offset=HEADER_SIZE_BYTES, count=elem_count) if is_complex: - # complex data: already in IQIQIQ... format or native complex - if np_dtype == np.complex64: + # check if data is already complex or needs deinterleaving + if np.iscomplexobj(raw_samples): # already complex, no reassembly needed samples = raw_samples else: # reassemble interleaved IQ samples samples = raw_samples[::2] + 1j * raw_samples[1::2] - log.debug(f"Deinterleaved {len(raw_samples)} samples into {len(samples)} complex samples") else: # scalar data samples = raw_samples - # print('dbug', samples.dtype, len(samples), get_normalization_factor(fmt)) - - # normalize if needed - # if should_normalize: - # norm_factor = get_normalization_factor(fmt) - # if norm_factor: - # samples /= norm_factor - # save out as SigMF IQ data file samples.tofile(dest_path) log.info("wrote %s", dest_path) - # return the IQ data if needed for further processing if needed - return samples - def construct_sigmf( out_path: Path, h_fixed: dict, h_keywords: dict, h_adjunct: dict, h_extended: list, is_metadata_only: bool = False @@ -627,7 +612,7 @@ def blue_to_sigmf( implement a function that instead writes metadata only for a non-conforming dataset using the HEADER_BYTES_KEY and TRAILING_BYTES_KEY in most cases. """ - log.debug(f"convert {blue_path}") + log.debug(f"read {blue_path}") blue_path = Path(blue_path) if out_path is None: @@ -650,7 +635,7 @@ def blue_to_sigmf( # write to SigMF data file only if samples exist if not is_metadata_only: - _ = data_loopback(blue_path, out_path, h_fixed) + data_loopback(blue_path, out_path, h_fixed) else: log.info("skipping data file creation for zero-sample BLUE file") diff --git a/tests/test_convert.py b/tests/test_convert.py index b467c9f..c677283 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -91,25 +91,20 @@ def test_blue_to_sigmf(self): # # plot stft of RF data for visual inspection # import matplotlib.pyplot as plt # from scipy.signal import spectrogram - # from swiftfox import summary + # from swiftfox import summary, smartspec + # if meta.get_global_field("core:metadata_only"): + # print("Metadata only file, skipping plot.") + # continue # samples = meta.read_samples() - # plt.figure(figsize=(10, 10)) - # summary(samples, detail=0.1, samp_rate=meta.get_global_field("core:sample_rate")) + # # plt.figure(figsize=(10, 10)) + # summary(samples, detail=0.1, samp_rate=meta.get_global_field("core:sample_rate"), title=sigmf_path.name) # plt.figure() - # plt.plot(samples.real) - # plt.plot(samples.imag) - - # freqs, times, spec = spectrogram(samples, fs=meta.get_global_field("core:sample_rate"), nperseg=1024) + # # plt.plot(samples.real) + # # plt.plot(samples.imag) + # # plt.figure() + # spec = smartspec(samples, detail=0.5, samp_rate=meta.get_global_field("core:sample_rate")) # # use imshow to plot spectrogram - # plt.figure() - # plt.imshow( - # 10 * np.log10(spec), aspect="auto", extent=[times[0], times[-1], freqs[0], freqs[-1]], origin="lower" - # ) - # plt.colorbar(label="Intensity [dB]") - # plt.ylabel("Frequency [Hz]") - # plt.xlabel("Time [s]") - # plt.title(f"Spectrogram of {bluefile.name}") # plt.show() self.assertIsInstance(meta, sigmf.SigMFFile) From c70edd74fd0687288dc90acd22545ed767cae4a6 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Tue, 23 Dec 2025 13:15:31 -0800 Subject: [PATCH 13/15] standardize converter API * update documentation * allow converters to --archive (.sigmf) or create SigMF pairs (.sigmf-data & .sigmf-meta) * add tests --- docs/source/converters.rst | 9 ++++ sigmf/convert/blue.py | 93 ++++++++++++++++++++++++++------------ sigmf/convert/wav.py | 85 ++++++++++++++++++++++++---------- tests/test_convert.py | 38 +++++++++++----- tests/testdata.py | 10 ++++ 5 files changed, 169 insertions(+), 66 deletions(-) diff --git a/docs/source/converters.rst b/docs/source/converters.rst index 8c74e39..76e09d9 100644 --- a/docs/source/converters.rst +++ b/docs/source/converters.rst @@ -35,6 +35,15 @@ or by using module syntax: python3 -m sigmf.convert.wav recording.wav +Output Naming +~~~~~~~~~~~~~ + +All converters treat the value passed with ``-o/--output`` as a base name and ignore any existing suffix. The tools +emit ``.sigmf-data`` and ``.sigmf-meta`` files (retaining any original extensions such as ``.cdif`` or +``.tmp`` in the base). Supplying ``--archive`` packages the result as ``.sigmf`` instead of producing separate +meta/data files. + + BLUE Converter -------------- diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index f3c37ec..72b0534 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -14,8 +14,10 @@ import argparse import base64 import getpass +import io import logging import struct +import tempfile from datetime import datetime, timezone from pathlib import Path from typing import Optional @@ -221,6 +223,7 @@ def read_hcb(file_path): raw_adjunct = handle.read(256) h_adjunct = {"raw_base64": base64.b64encode(raw_adjunct).decode("ascii")} + # FIXME: I've seen VER=2.0.14 ver_lut = {"1.0": "BLUE 1.0", "1.1": "BLUE 1.1", "2.0": "Platinum"} spec_str = ver_lut.get(h_keywords.get("VER", "1.0")) log.info(f"Read {h_fixed['version']} type {h_fixed['type']} using {spec_str} specification.") @@ -313,7 +316,7 @@ def read_extended_header(file_path, h_fixed): return entries -def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> None: +def data_loopback(blue_path: Path, data_path: Path, h_fixed: dict) -> None: """ Write SigMF data file from BLUE file samples. @@ -321,8 +324,8 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> None: ---------- blue_path : Path Path to the BLUE file. - out_path : Path - Path to output SigMF metadata file. + data_path : Path + Destination path for the SigMF dataset (.sigmf-data). h_fixed : dict Header Control Block dictionary. @@ -354,9 +357,6 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> None: log.info("detected zero-sample BLUE file, creating metadata-only SigMF") return np.array([], dtype=np_dtype) - # Determine destination path for SigMF data file - dest_path = out_path.with_suffix(".sigmf-data") - # read raw samples raw_samples = np.fromfile(blue_path, dtype=np_dtype, offset=HEADER_SIZE_BYTES, count=elem_count) @@ -373,20 +373,26 @@ def data_loopback(blue_path: Path, out_path: Path, h_fixed: dict) -> None: samples = raw_samples # save out as SigMF IQ data file - samples.tofile(dest_path) - log.info("wrote %s", dest_path) + samples.tofile(data_path) + log.info("wrote %s", data_path) def construct_sigmf( - out_path: Path, h_fixed: dict, h_keywords: dict, h_adjunct: dict, h_extended: list, is_metadata_only: bool = False + filenames: dict, + h_fixed: dict, + h_keywords: dict, + h_adjunct: dict, + h_extended: list, + is_metadata_only: bool = False, + create_archive: bool = False, ) -> SigMFFile: """ Built & write a SigMF object from BLUE metadata. Parameters ---------- - out_path : Path - Path to output SigMF metadata file. + filenames : dict + Mapping returned by get_sigmf_filenames containing destination paths. h_fixed : dict Fixed Header h_keywords : dict @@ -397,6 +403,8 @@ def construct_sigmf( Parsed extended header entries from read_extended_header(). is_metadata_only : bool, optional If True, creates a metadata-only SigMF file. + create_archive : bool, optional + When True, package output as SigMF archive instead of a meta/data pair. Returns ------- @@ -485,8 +493,7 @@ def get_tag(tag): # There may be other keys related to tune frequency capture_info[SigMFFile.FREQUENCY_KEY] = float(get_tag("RF_FREQ")) - # actually write to SigMF - filenames = get_sigmf_filenames(out_path) + # TODO: if no output path is specified, construct non-conforming metadata only SigMF # for metadata-only files, don't specify data_file and skip checksum if is_metadata_only: @@ -495,6 +502,7 @@ def get_tag(tag): global_info=global_info, skip_checksum=True, ) + meta.data_buffer = io.BytesIO() else: meta = SigMFFile( data_file=filenames["data_fn"], @@ -503,8 +511,12 @@ def get_tag(tag): meta.add_capture(0, metadata=capture_info) log.debug("created %r", meta) - meta.tofile(filenames["meta_fn"], toarchive=False) - log.info("wrote %s", filenames["meta_fn"]) + if create_archive: + meta.tofile(filenames["archive_fn"], toarchive=True) + log.info("wrote %s", filenames["archive_fn"]) + else: + meta.tofile(filenames["meta_fn"], toarchive=False) + log.info("wrote %s", filenames["meta_fn"]) return meta @@ -590,6 +602,7 @@ def validate_extended_header(entries: list) -> None: def blue_to_sigmf( blue_path: str, out_path: Optional[str] = None, + create_archive: bool = False, ) -> SigMFFile: """ Read a MIDAS Bluefile, write to SigMF, return SigMFFile object. @@ -600,6 +613,8 @@ def blue_to_sigmf( Path to the Blue file. out_path : str Path to the output SigMF metadata file. + create_archive : bool, optional + When True, package output as a .sigmf archive. Returns ------- @@ -616,10 +631,14 @@ def blue_to_sigmf( blue_path = Path(blue_path) if out_path is None: - # extension will be changed later - out_path = Path(blue_path) + base_path = blue_path else: - out_path = Path(out_path) + base_path = Path(out_path) + + filenames = get_sigmf_filenames(base_path) + + # ensure output directory exists + filenames["base_fn"].parent.mkdir(parents=True, exist_ok=True) validate_file(blue_path) @@ -631,13 +650,31 @@ def blue_to_sigmf( # check if this is a zero-sample (metadata-only) file data_size_bytes = int(h_fixed.get("data_size", 0)) - is_metadata_only = data_size_bytes == 0 - - # write to SigMF data file only if samples exist - if not is_metadata_only: - data_loopback(blue_path, out_path, h_fixed) - else: - log.info("skipping data file creation for zero-sample BLUE file") + metadata_only = data_size_bytes == 0 + + with tempfile.TemporaryDirectory() as temp_dir: + if not metadata_only: + if create_archive: + # for archives, write data to a temporary file that will be cleaned up + data_path = Path(temp_dir) / filenames["data_fn"].name + filenames["data_fn"] = data_path # update path for construct_sigmf + else: + # for file pairs, write to the final destination + data_path = filenames["data_fn"] + data_loopback(blue_path, data_path, h_fixed) + else: + log.info("skipping data file creation for zero-sample BLUE file") + + # call the SigMF conversion for metadata generation + meta = construct_sigmf( + filenames=filenames, + h_fixed=h_fixed, + h_keywords=h_keywords, + h_adjunct=h_adjunct, + h_extended=h_extended, + is_metadata_only=metadata_only, + create_archive=create_archive, + ) log.debug(">>>>>>>>> Fixed Header") for key, _, _, _, desc in FIXED_LAYOUT: @@ -653,9 +690,6 @@ def blue_to_sigmf( for entry in h_extended: log.debug(f"{entry['tag']:20s}:{entry['value']}") - # call the SigMF conversion for metadata generation - meta = construct_sigmf(out_path, h_fixed, h_keywords, h_adjunct, h_extended, is_metadata_only) - return meta @@ -667,6 +701,7 @@ def main() -> None: parser.add_argument("-i", "--input", type=str, required=True, help="BLUE file path") parser.add_argument("-o", "--output", type=str, default=None, help="SigMF path") parser.add_argument("-v", "--verbose", action="count", default=0) + parser.add_argument("--archive", action="store_true", help="Write a .sigmf archive instead of meta/data pair") parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") args = parser.parse_args() @@ -677,7 +712,7 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - _ = blue_to_sigmf(blue_path=args.input, out_path=args.output) + _ = blue_to_sigmf(blue_path=args.input, out_path=args.output, create_archive=args.archive) if __name__ == "__main__": diff --git a/sigmf/convert/wav.py b/sigmf/convert/wav.py index bba0d47..49217ca 100755 --- a/sigmf/convert/wav.py +++ b/sigmf/convert/wav.py @@ -23,26 +23,39 @@ log = logging.getLogger() +try: + from scipy.io import wavfile +except ImportError: + SCIPY_INSTALLED = False +else: + SCIPY_INSTALLED = True + def wav_to_sigmf( wav_path: str, out_path: Optional[str] = None, + create_archive: bool = False, ) -> SigMFFile: """ - Read a wav, write a sigmf, return SigMFFile object. + Read a wav, optionally write a sigmf, return SigMFFile object. - Note: Can only read PCM wav files. Use scipy.io.wavefile for broader support. + Raises + ------ + wave.Error + If the wav file is not PCM and Scipy is not installed. """ wav_path = Path(wav_path) - wav_stem = wav_path.stem - with wave.open(str(wav_path), "rb") as wav_reader: - n_channels = wav_reader.getnchannels() - samp_width = wav_reader.getsampwidth() - samp_rate = wav_reader.getframerate() - n_frames = wav_reader.getnframes() - raw_data = wav_reader.readframes(n_frames) - np_dtype = f"int{samp_width * 8}" - wav_data = np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) + if SCIPY_INSTALLED: + samp_rate, wav_data = wavfile.read(wav_path) + else: + with wave.open(str(wav_path), "rb") as wav_reader: + n_channels = wav_reader.getnchannels() + samp_width = wav_reader.getsampwidth() + samp_rate = wav_reader.getframerate() + n_frames = wav_reader.getnframes() + raw_data = wav_reader.readframes(n_frames) + np_dtype = f"int{samp_width * 8}" + wav_data = np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) global_info = { SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data), SigMFFile.DESCRIPTION_KEY: f"converted from {wav_path.name}", @@ -58,24 +71,39 @@ def wav_to_sigmf( SigMFFile.DATETIME_KEY: wav_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), } - temp_dir = Path(tempfile.mkdtemp()) if out_path is None: - # extension will be changed - out_path = Path(wav_stem) + base_path = wav_path.with_suffix(".sigmf") else: - out_path = Path(out_path) - filenames = get_sigmf_filenames(out_path) + base_path = Path(out_path) + + filenames = get_sigmf_filenames(base_path) - data_path = temp_dir / filenames["data_fn"] - wav_data.tofile(data_path) + output_dir = filenames["meta_fn"].parent + output_dir.mkdir(parents=True, exist_ok=True) - meta = SigMFFile(data_file=data_path, global_info=global_info) - meta.add_capture(0, metadata=capture_info) - log.debug("created %r", meta) + if create_archive: + # use temporary directory for data file when creating archive + with tempfile.TemporaryDirectory() as temp_dir: + data_path = Path(temp_dir) / filenames["data_fn"].name + wav_data.tofile(data_path) + + meta = SigMFFile(data_file=data_path, global_info=global_info) + meta.add_capture(0, metadata=capture_info) + log.debug("created %r", meta) + + meta.tofile(filenames["archive_fn"], toarchive=True) + log.info("wrote %s", filenames["archive_fn"]) + else: + data_path = filenames["data_fn"] + wav_data.tofile(data_path) + + meta = SigMFFile(data_file=data_path, global_info=global_info) + meta.add_capture(0, metadata=capture_info) + log.debug("created %r", meta) + + meta.tofile(filenames["meta_fn"], toarchive=False) + log.info("wrote %s and %s", filenames["meta_fn"], filenames["data_fn"]) - arc_path = filenames["archive_fn"] - meta.tofile(arc_path, toarchive=True) - log.info("wrote %s", arc_path) return meta @@ -87,6 +115,9 @@ def main() -> None: parser.add_argument("-i", "--input", type=str, required=True, help="WAV path") parser.add_argument("-o", "--output", type=str, default=None, help="SigMF path") parser.add_argument("-v", "--verbose", action="count", default=0) + parser.add_argument( + "-a", "--archive", action="store_true", help="Save as SigMF archive instead of separate meta/data files." + ) parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") args = parser.parse_args() @@ -97,7 +128,11 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - _ = wav_to_sigmf(wav_path=args.input, out_path=args.output) + wav_path = Path(args.input) + if args.output is None: + args.output = wav_path.with_suffix(".sigmf") + + _ = wav_to_sigmf(wav_path=wav_path, out_path=args.output, create_archive=args.archive) if __name__ == "__main__": diff --git a/tests/test_convert.py b/tests/test_convert.py index c677283..5bd3051 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -18,14 +18,14 @@ from sigmf.convert.blue import blue_to_sigmf from sigmf.convert.wav import wav_to_sigmf -BLUE_ENV_VAR = "NONSIGMF_RECORDINGS_PATH" +from .testdata import NONSIGMF_REPO, NONSIGMF_ENV class TestWAVConverter(unittest.TestCase): """wav loopback test""" def setUp(self) -> None: - """create temp wav file for testing""" + """temp wav file for testing""" self.tmp_dir = tempfile.TemporaryDirectory() self.tmp_path = Path(self.tmp_dir.name) self.wav_path = self.tmp_path / "foo.wav" @@ -50,28 +50,35 @@ def tearDown(self) -> None: """clean up temporary directory""" self.tmp_dir.cleanup() - def test_wav_to_sigmf(self): - sigmf_path = self.tmp_path / "bar" + def test_wav_to_sigmf_pair(self): + sigmf_path = self.tmp_path / "bar.tmp" meta = wav_to_sigmf(wav_path=self.wav_path, out_path=sigmf_path) data = meta.read_samples() # allow numerical differences due to PCM quantization self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) + filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) + self.assertTrue(filenames["data_fn"].exists(), "dataset path missing") + self.assertTrue(filenames["meta_fn"].exists(), "metadata path missing") + + def test_wav_to_sigmf_archive(self): + sigmf_path = self.tmp_path / "baz.ext" + wav_to_sigmf(wav_path=self.wav_path, out_path=sigmf_path, create_archive=True) + filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) + self.assertTrue(filenames["archive_fn"].exists(), "archive path missing") class TestBlueConverter(unittest.TestCase): """As we have no blue files in the repository, test only when env path specified.""" def setUp(self) -> None: - blue_path = Path(os.getenv(BLUE_ENV_VAR, "nopath")) - if not blue_path or blue_path == Path("nopath"): + """temp paths & blue files""" + if not NONSIGMF_REPO: # skip test if environment variable not set - self.skipTest(f"Set {BLUE_ENV_VAR} environment variable to location with .cdif files to run test.") - if not blue_path.is_dir(): - self.fail(f"{blue_path} is not a valid directory.") - self.bluefiles = list(blue_path.glob("**/*.cdif")) + self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with .cdif files to run test. ") + self.bluefiles = list(NONSIGMF_REPO.glob("**/*.cdif")) print("bluefiles", self.bluefiles) if not self.bluefiles: - self.fail(f"No .cdif files found in {BLUE_ENV_VAR}.") + self.fail(f"No .cdif files found in {NONSIGMF_ENV}.") self.tmp_dir = tempfile.TemporaryDirectory() self.tmp_path = Path(self.tmp_dir.name) @@ -79,7 +86,7 @@ def tearDown(self) -> None: """clean up temporary directory""" self.tmp_dir.cleanup() - def test_blue_to_sigmf(self): + def test_blue_to_sigmf_pair(self): for bdx, bluefile in enumerate(self.bluefiles): sigmf_path = self.tmp_path / bluefile.stem meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path) @@ -108,3 +115,10 @@ def test_blue_to_sigmf(self): # plt.show() self.assertIsInstance(meta, sigmf.SigMFFile) + + def test_blue_to_sigmf_archive(self): + for bdx, bluefile in enumerate(self.bluefiles): + sigmf_path = self.tmp_path / f"{bluefile.stem}_archive" + meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path, create_archive=True) + print(f"Converted {bluefile} to SigMF archive at {sigmf_path}") + self.assertIsInstance(meta, sigmf.SigMFFile) diff --git a/tests/testdata.py b/tests/testdata.py index b91ad67..1bec8b7 100644 --- a/tests/testdata.py +++ b/tests/testdata.py @@ -6,10 +6,20 @@ """Shared test data for tests.""" +import os +from pathlib import Path + import numpy as np from sigmf import SigMFFile, __specification__, __version__ +# detection for https://github.com/sigmf/example_nonsigmf_recordings +NONSIGMF_ENV = "EXAMPLE_NONSIGMF_RECORDINGS_PATH" +NONSIGMF_REPO = None +_recordings_path = Path(os.getenv(NONSIGMF_ENV, "nopath")) +if _recordings_path.is_dir(): + NONSIGMF_REPO = Path(_recordings_path) + TEST_FLOAT32_DATA = np.arange(16, dtype=np.float32) TEST_METADATA = { From bc9492fd7ecc19edbe600395c9478d28c30ccadc Mon Sep 17 00:00:00 2001 From: Teque5 Date: Wed, 31 Dec 2025 15:11:53 -0800 Subject: [PATCH 14/15] fromfile improvements, BLUE & WAV NCD conversion * fromfile() now autodetects SigMF, BLUE, & WAV formats automatically * Converters now support conversion to non-conforming dataset without writing datafiles back to disk * Add utils.get_magic_bytes() for autodetection purposes * split tests for converters into separate files * Validated implementation against lots of files beyond nonsigmf-examples repo * Updated converter documentation * Added slightly more to README * Drop support for float WAV files; tricky to support NCD * Fix bug in sigmffile._count_samples for NCD files * Fig bug in read_samples when using some NCD files with header & trailing bytes --- README.md | 20 ++- docs/source/converters.rst | 73 ++++++---- sigmf/convert/blue.py | 273 ++++++++++++++++++++++++++++--------- sigmf/convert/wav.py | 159 ++++++++++++++++++--- sigmf/sigmffile.py | 78 ++++++++--- sigmf/utils.py | 37 ++++- tests/test_convert.py | 124 ----------------- tests/test_convert_blue.py | 120 ++++++++++++++++ tests/test_convert_wav.py | 204 +++++++++++++++++++++++++++ 9 files changed, 833 insertions(+), 255 deletions(-) mode change 100755 => 100644 sigmf/convert/wav.py delete mode 100644 tests/test_convert.py create mode 100644 tests/test_convert_blue.py create mode 100644 tests/test_convert_wav.py diff --git a/README.md b/README.md index 2dca188..af79c02 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,26 @@ freely under the terms GNU Lesser GPL v3 License. This module follows the SigMF specification [html](https://sigmf.org/)/[pdf](https://sigmf.github.io/SigMF/sigmf-spec.pdf) from the [spec repository](https://github.com/sigmf/SigMF). -To install the latest PyPI release, install from pip: +### Install Latest ```bash pip install sigmf ``` -**[Please visit the documentation for examples & more info.](https://sigmf.readthedocs.io/en/latest/)** +### Read SigMF + +```python +import sigmf + +# read SigMF recording +meta = sigmf.fromfile("recording.sigmf-meta") +samples = meta[0:1024] # get first 1024 samples + +# fromfile() also supports BLUE and WAV files via auto-detection +meta = sigmf.fromfile("recording.cdif") # BLUE file +meta = sigmf.fromfile("recording.wav") # WAV file +``` + +### Full API & Docs + +**[Please visit our documentation for more info.](https://sigmf.readthedocs.io/en/latest/)** diff --git a/docs/source/converters.rst b/docs/source/converters.rst index 76e09d9..5edfcd7 100644 --- a/docs/source/converters.rst +++ b/docs/source/converters.rst @@ -3,7 +3,7 @@ Format Converters ================= The SigMF Python library includes converters to import data from various file formats into SigMF format. -These converters make it easy to migrate existing RF recordings to the standardized SigMF format while preserving metadata when possible. +Converters can create standard SigMF file pairs or Non-Conforming Datasets (NCDs) that reference the original files. Overview -------- @@ -13,35 +13,52 @@ Converters are available for: * **BLUE files** - MIDAS Blue and Platinum BLUE RF recordings (``.cdif``) * **WAV files** - Audio recordings (``.wav``) -All converters return a :class:`~sigmf.SigMFFile` object that can be used immediately or saved to disk. -Converters preserve datatypes and metadata where possible. +All converters return a :class:`~sigmf.SigMFFile` object. Auto-detection is available through :func:`~sigmf.sigmffile.fromfile`. + + +Auto-Detection +~~~~~~~~~~~~~~ + +The :func:`~sigmf.sigmffile.fromfile` function automatically detects file formats and creates Non-Conforming Datasets: + +.. code-block:: python + + import sigmf + + # auto-detect and create NCD for any supported format + meta = sigmf.fromfile("recording.cdif") # BLUE file + meta = sigmf.fromfile("recording.wav") # WAV file + meta = sigmf.fromfile("recording.sigmf") # SigMF archive + + samples = meta.read_samples() Command Line Usage ~~~~~~~~~~~~~~~~~~ -Converters can be used from the command line after ``pip install sigmf``: +Converters can be used from the command line: .. code-block:: bash sigmf_convert_blue recording.cdif sigmf_convert_wav recording.wav -or by using module syntax: +or by using module execution: .. code-block:: bash - python3 -m sigmf.convert.blue recording.cdif - python3 -m sigmf.convert.wav recording.wav + python -m sigmf.convert.blue recording.cdif + python -m sigmf.convert.wav recording.wav + +Output Options +~~~~~~~~~~~~~~ -Output Naming -~~~~~~~~~~~~~ +Converters support multiple output modes: -All converters treat the value passed with ``-o/--output`` as a base name and ignore any existing suffix. The tools -emit ``.sigmf-data`` and ``.sigmf-meta`` files (retaining any original extensions such as ``.cdif`` or -``.tmp`` in the base). Supplying ``--archive`` packages the result as ``.sigmf`` instead of producing separate -meta/data files. +* **Standard conversion**: Creates ``.sigmf-data`` and ``.sigmf-meta`` files +* **Archive mode**: Creates single ``.sigmf`` archive with ``--archive`` +* **Non-Conforming Dataset**: Creates metadata-only file referencing original data with ``--ncd`` BLUE Converter @@ -56,38 +73,42 @@ The BLUE converter handles CDIF (.cdif) recordings while placing BLUE header inf .. autofunction:: sigmf.convert.blue.blue_to_sigmf - .. code-block:: python from sigmf.convert.blue import blue_to_sigmf - # read BLUE, write SigMF, and return SigMFFile object - meta = blue_to_sigmf(blue_path="recording.cdif", out_path="recording.sigmf") + # standard conversion + meta = blue_to_sigmf(blue_path="recording.cdif", out_path="recording") - # access converted data - samples = meta.read_samples() + # create NCD automatically (metadata-only, references original file) + meta = blue_to_sigmf(blue_path="recording.cdif") + + # access standard SigMF data & metadata + all_samples = meta.read_samples() sample_rate_hz = meta.sample_rate # access BLUE-specific metadata - blue_type = meta.get_global_field("blue:fixed")["type"] # e.g., 1000 - blue_version = meta.get_global_field("blue:keywords")["IO"] # e.g., "X-Midas" + blue_type = meta.get_global_field("blue:fixed")["type"] # e.g., 1000 + blue_version = meta.get_global_field("blue:keywords")["IO"] # e.g., "X-Midas" WAV Converter ------------- -This is useful when working with audio datasets. +Converts WAV audio recordings to SigMF format. .. autofunction:: sigmf.convert.wav.wav_to_sigmf - .. code-block:: python from sigmf.convert.wav import wav_to_sigmf - # read WAV, write SigMF, and return SigMFFile object - meta = wav_to_sigmf(wav_path="recording.wav", out_path="recording.sigmf") + # standard conversion + meta = wav_to_sigmf(wav_path="recording.wav", out_path="recording") - # access converted data - samples = meta.read_samples() + # create NCD automatically (metadata-only, references original file) + meta = wav_to_sigmf(wav_path="recording.wav") + + # access standard SigMF data & metadata + all_samples = meta.read_samples() sample_rate_hz = meta.sample_rate \ No newline at end of file diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index 72b0534..e9a0633 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -20,7 +20,7 @@ import tempfile from datetime import datetime, timezone from pathlib import Path -from typing import Optional +from typing import Optional, Tuple import numpy as np @@ -225,7 +225,7 @@ def read_hcb(file_path): # FIXME: I've seen VER=2.0.14 ver_lut = {"1.0": "BLUE 1.0", "1.1": "BLUE 1.1", "2.0": "Platinum"} - spec_str = ver_lut.get(h_keywords.get("VER", "1.0")) + spec_str = ver_lut.get(h_keywords.get("VER", "1.0"), "Unknown") log.info(f"Read {h_fixed['version']} type {h_fixed['type']} using {spec_str} specification.") validate_fixed(h_fixed) @@ -355,7 +355,7 @@ def data_loopback(blue_path: Path, data_path: Path, h_fixed: dict) -> None: # check for zero-sample file (metadata-only) if elem_count == 0: log.info("detected zero-sample BLUE file, creating metadata-only SigMF") - return np.array([], dtype=np_dtype) + return # read raw samples raw_samples = np.fromfile(blue_path, dtype=np_dtype, offset=HEADER_SIZE_BYTES, count=elem_count) @@ -377,22 +377,20 @@ def data_loopback(blue_path: Path, data_path: Path, h_fixed: dict) -> None: log.info("wrote %s", data_path) -def construct_sigmf( - filenames: dict, +def _build_common_metadata( h_fixed: dict, h_keywords: dict, h_adjunct: dict, h_extended: list, - is_metadata_only: bool = False, - create_archive: bool = False, -) -> SigMFFile: + is_ncd: bool = False, + blue_file_name: str = None, + trailing_bytes: int = 0, +) -> Tuple[dict, dict]: """ - Built & write a SigMF object from BLUE metadata. + Build common global_info and capture_info metadata for both standard and NCD SigMF files. Parameters ---------- - filenames : dict - Mapping returned by get_sigmf_filenames containing destination paths. h_fixed : dict Fixed Header h_keywords : dict @@ -400,16 +398,23 @@ def construct_sigmf( h_adjunct : dict Adjunct Header h_extended : list of dict - Parsed extended header entries from read_extended_header(). - is_metadata_only : bool, optional - If True, creates a metadata-only SigMF file. - create_archive : bool, optional - When True, package output as SigMF archive instead of a meta/data pair. + Parsed extended header entries. + is_ncd : bool, optional + If True, adds NCD-specific fields. + blue_file_name : str, optional + Original BLUE file name (required for NCD). + trailing_bytes : int, optional + Number of trailing bytes (for NCD). Returns ------- - SigMFFile - SigMF object. + tuple[dict, dict] + (global_info, capture_info) dictionaries. + + Raises + ------ + SigMFConversionError + If SigMF spec compliance is violated. """ # helper to look up extended header values by tag def get_tag(tag): @@ -420,7 +425,6 @@ def get_tag(tag): # get sigmf datatype from blue format and endianness datatype = blue_to_sigmf_type_str(h_fixed) - log.info(f"Using SigMF datatype: {datatype} for BLUE format {h_fixed['format']}") # sample rate: prefer adjunct.xdelta, else extended header SAMPLE_RATE @@ -438,16 +442,16 @@ def get_tag(tag): global_info = { "core:author": getpass.getuser(), SigMFFile.DATATYPE_KEY: datatype, - # SigMFFile.DESCRIPTION_KEY: ???, - SigMFFile.RECORDER_KEY: "Official SigMF BLUE converter", + SigMFFile.RECORDER_KEY: f"Official SigMF BLUE converter", SigMFFile.NUM_CHANNELS_KEY: num_channels, SigMFFile.SAMPLE_RATE_KEY: sample_rate_hz, SigMFFile.EXTENSIONS_KEY: [{"name": "blue", "version": "0.0.1", "optional": True}], } - # set metadata-only flag for zero-sample files - if is_metadata_only: - global_info[SigMFFile.METADATA_ONLY_KEY] = True + # add NCD-specific fields + if is_ncd: + global_info[SigMFFile.TRAILING_BYTES_KEY] = trailing_bytes + global_info[SigMFFile.DATASET_KEY] = blue_file_name # merge HCB values into metadata global_info["blue:fixed"] = h_fixed @@ -473,52 +477,32 @@ def get_tag(tag): extended[tag] = value global_info["blue:extended"] = extended + # calculate blue start time blue_start_time = float(h_fixed.get("timecode", 0)) blue_start_time += h_adjunct.get("xstart", 0) blue_start_time += float(h_keywords.get("TC_PREC", 0)) + capture_info = {} if blue_start_time == 0: log.warning("BLUE timecode is zero or missing; datetime metadata will be absent.") - capture_info = {} else: # timecode uses 1950-01-01 as epoch, datetime uses 1970-01-01 blue_epoch = blue_start_time - 631152000 # seconds between 1950 and 1970 blue_datetime = datetime.fromtimestamp(blue_epoch, tz=timezone.utc) - - capture_info = { - SigMFFile.DATETIME_KEY: blue_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), - } + capture_info[SigMFFile.DATETIME_KEY] = blue_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT) if get_tag("RF_FREQ") is not None: - # There may be other keys related to tune frequency + # it's possible other keys indicate tune frequency, but RF_FREQ is standard capture_info[SigMFFile.FREQUENCY_KEY] = float(get_tag("RF_FREQ")) - # TODO: if no output path is specified, construct non-conforming metadata only SigMF - - # for metadata-only files, don't specify data_file and skip checksum - if is_metadata_only: - meta = SigMFFile( - data_file=None, - global_info=global_info, - skip_checksum=True, + # validate SigMF spec compliance: metadata_only and dataset fields are mutually exclusive + if SigMFFile.METADATA_ONLY_KEY in global_info and SigMFFile.DATASET_KEY in global_info: + raise SigMFConversionError( + "SigMF spec violation: core:metadata_only MAY NOT be used in conjunction with " + "Non-Conforming Datasets or the core:dataset field" ) - meta.data_buffer = io.BytesIO() - else: - meta = SigMFFile( - data_file=filenames["data_fn"], - global_info=global_info, - ) - meta.add_capture(0, metadata=capture_info) - log.debug("created %r", meta) - if create_archive: - meta.tofile(filenames["archive_fn"], toarchive=True) - log.info("wrote %s", filenames["archive_fn"]) - else: - meta.tofile(filenames["meta_fn"], toarchive=False) - log.info("wrote %s", filenames["meta_fn"]) - - return meta + return global_info, capture_info def validate_file(blue_path: Path) -> None: @@ -599,10 +583,142 @@ def validate_extended_header(entries: list) -> None: raise SigMFConversionError(f"Invalid SAMPLE_RATE in extended header: {sample_rate}") +def construct_sigmf( + filenames: dict, + h_fixed: dict, + h_keywords: dict, + h_adjunct: dict, + h_extended: list, + is_metadata_only: bool = False, + create_archive: bool = False, +) -> SigMFFile: + """ + Built & write a SigMF object from BLUE metadata. + + Parameters + ---------- + filenames : dict + Mapping returned by get_sigmf_filenames containing destination paths. + h_fixed : dict + Fixed Header + h_keywords : dict + Custom User Keywords + h_adjunct : dict + Adjunct Header + h_extended : list of dict + Parsed extended header entries from read_extended_header(). + is_metadata_only : bool, optional + If True, creates a metadata-only SigMF file. + create_archive : bool, optional + When True, package output as SigMF archive instead of a meta/data pair. + + Returns + ------- + SigMFFile + SigMF object. + """ + # use shared helper to build common metadata + global_info, capture_info = _build_common_metadata(h_fixed, h_keywords, h_adjunct, h_extended) + + # set metadata-only flag for zero-sample files (only for non-NCD files) + if is_metadata_only: + # ensure we're not accidentally setting metadata_only for an NCD + if SigMFFile.DATASET_KEY in global_info: + raise ValueError( + "Cannot set metadata_only=True for Non-Conforming Dataset files. " + "Per SigMF spec, metadata_only MAY NOT be used with core:dataset field." + ) + global_info[SigMFFile.METADATA_ONLY_KEY] = True + + # for metadata-only files, don't specify data_file and skip checksum + if is_metadata_only: + meta = SigMFFile( + data_file=None, + global_info=global_info, + skip_checksum=True, + ) + meta.data_buffer = io.BytesIO() + else: + meta = SigMFFile( + data_file=filenames["data_fn"], + global_info=global_info, + ) + meta.add_capture(0, metadata=capture_info) + log.debug("created %r", meta) + + if create_archive: + meta.tofile(filenames["archive_fn"], toarchive=True) + log.info("wrote %s", filenames["archive_fn"]) + else: + meta.tofile(filenames["meta_fn"], toarchive=False) + log.info("wrote %s", filenames["meta_fn"]) + + return meta + + +def construct_sigmf_ncd( + blue_path: Path, + h_fixed: dict, + h_keywords: dict, + h_adjunct: dict, + h_extended: list, + header_bytes: int, + trailing_bytes: int, +) -> SigMFFile: + """ + Construct Non-Conforming Dataset SigMF metadata for BLUE file. + + Parameters + ---------- + blue_path : Path + Path to the original BLUE file. + h_fixed : dict + Fixed Header + h_keywords : dict + Custom User Keywords + h_adjunct : dict + Adjunct Header + h_extended : list of dict + Parsed extended header entries from read_extended_header(). + header_bytes : int + Number of header bytes to skip. + trailing_bytes : int + Number of trailing bytes to ignore. + + Returns + ------- + SigMFFile + NCD SigMF object pointing to original BLUE file. + """ + # use shared helper to build common metadata, with NCD-specific additions + global_info, capture_info = _build_common_metadata( + h_fixed, + h_keywords, + h_adjunct, + h_extended, + is_ncd=True, + blue_file_name=blue_path.name, + trailing_bytes=trailing_bytes, + ) + + # add NCD-specific capture info + capture_info[SigMFFile.HEADER_BYTES_KEY] = header_bytes + + # create NCD metadata-only SigMF pointing to original file + meta = SigMFFile(global_info=global_info, skip_checksum=True) + meta.set_data_file(data_file=blue_path, offset=header_bytes, skip_checksum=True) + meta.data_buffer = io.BytesIO() + meta.add_capture(0, metadata=capture_info) + log.debug("created NCD SigMF: %r", meta) + + return meta + + def blue_to_sigmf( blue_path: str, out_path: Optional[str] = None, create_archive: bool = False, + create_ncd: bool = False, ) -> SigMFFile: """ Read a MIDAS Bluefile, write to SigMF, return SigMFFile object. @@ -611,24 +727,24 @@ def blue_to_sigmf( ---------- blue_path : str Path to the Blue file. - out_path : str + out_path : str, optional Path to the output SigMF metadata file. create_archive : bool, optional When True, package output as a .sigmf archive. + create_ncd : bool, optional + When True, create Non-Conforming Dataset with header_bytes and trailing_bytes. Returns ------- - numpy.ndarray - IQ Data. - - Notes - ----- - This function currently reads BLUE then writes a SigMF pair. We could also - implement a function that instead writes metadata only for a non-conforming - dataset using the HEADER_BYTES_KEY and TRAILING_BYTES_KEY in most cases. + SigMFFile + SigMF object, potentially as Non-Conforming Dataset. """ log.debug(f"read {blue_path}") + # auto-enable NCD when no output path is specified + if out_path is None: + create_ncd = True + blue_path = Path(blue_path) if out_path is None: base_path = blue_path @@ -648,10 +764,36 @@ def blue_to_sigmf( # read extended header h_extended = read_extended_header(blue_path, h_fixed) + # calculate NCD byte boundaries if requested + if create_ncd: + header_bytes = HEADER_SIZE_BYTES + int(h_fixed.get("ext_size", 0)) + + # for NCD, trailing_bytes = file_size - header_bytes - actual_data_size + file_size = blue_path.stat().st_size + actual_data_size = file_size - header_bytes + trailing_bytes = 0 # assume no trailing bytes for NCD unless file is smaller than expected + + log.debug( + f"BLUE NCD: file_size={file_size}, header_bytes={header_bytes}, actual_data_size={actual_data_size}, trailing_bytes={trailing_bytes}" + ) + # check if this is a zero-sample (metadata-only) file data_size_bytes = int(h_fixed.get("data_size", 0)) metadata_only = data_size_bytes == 0 + # handle NCD case where no output files are created + if create_ncd and out_path is None: + # create metadata-only SigMF for NCD pointing to original file + return construct_sigmf_ncd( + blue_path=blue_path, + h_fixed=h_fixed, + h_keywords=h_keywords, + h_adjunct=h_adjunct, + h_extended=h_extended, + header_bytes=header_bytes, + trailing_bytes=trailing_bytes, + ) + with tempfile.TemporaryDirectory() as temp_dir: if not metadata_only: if create_archive: @@ -702,6 +844,9 @@ def main() -> None: parser.add_argument("-o", "--output", type=str, default=None, help="SigMF path") parser.add_argument("-v", "--verbose", action="count", default=0) parser.add_argument("--archive", action="store_true", help="Write a .sigmf archive instead of meta/data pair") + parser.add_argument( + "--ncd", action="store_true", help="Process as Non-Conforming Dataset and write .sigmf-meta only." + ) parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") args = parser.parse_args() @@ -712,7 +857,7 @@ def main() -> None: } logging.basicConfig(level=level_lut[min(args.verbose, 2)]) - _ = blue_to_sigmf(blue_path=args.input, out_path=args.output, create_archive=args.archive) + _ = blue_to_sigmf(blue_path=args.input, out_path=args.output, create_archive=args.archive, create_ncd=args.ncd) if __name__ == "__main__": diff --git a/sigmf/convert/wav.py b/sigmf/convert/wav.py old mode 100755 new mode 100644 index 49217ca..e877d16 --- a/sigmf/convert/wav.py +++ b/sigmf/convert/wav.py @@ -7,6 +7,7 @@ """converter for wav containers""" import argparse +import io import logging import tempfile import wave @@ -23,43 +24,131 @@ log = logging.getLogger() -try: - from scipy.io import wavfile -except ImportError: - SCIPY_INSTALLED = False -else: - SCIPY_INSTALLED = True + +def _calculate_wav_ncd_bytes(wav_path: Path) -> tuple: + """ + Calculate header_bytes and trailing_bytes for WAV NCD. + + Returns + ------- + tuple + (header_bytes, trailing_bytes) + """ + # use wave module to get basic info + with wave.open(str(wav_path), "rb") as wav_reader: + n_channels = wav_reader.getnchannels() + samp_width = wav_reader.getsampwidth() + n_frames = wav_reader.getnframes() + + # calculate sample data size in bytes + sample_bytes = n_frames * n_channels * samp_width + file_size = wav_path.stat().st_size + + # parse WAV file structure to find data chunk + with open(wav_path, "rb") as handle: + # skip RIFF header (12 bytes: 'RIFF' + size + 'WAVE') + handle.seek(12) + header_bytes = 12 + + # search for 'data' chunk + while header_bytes < file_size: + chunk_id = handle.read(4) + if len(chunk_id) != 4: + break + chunk_size = int.from_bytes(handle.read(4), "little") + + if chunk_id == b"data": + # found data chunk, header ends here + header_bytes += 8 # include chunk_id and chunk_size + break + + # skip this chunk + header_bytes += 8 + chunk_size + # ensure even byte boundary (WAV chunks are word-aligned) + if chunk_size % 2: + header_bytes += 1 + handle.seek(header_bytes) + + trailing_bytes = max(0, file_size - header_bytes - sample_bytes) + return header_bytes, trailing_bytes def wav_to_sigmf( wav_path: str, out_path: Optional[str] = None, create_archive: bool = False, + create_ncd: bool = False, ) -> SigMFFile: """ Read a wav, optionally write a sigmf, return SigMFFile object. + Parameters + ---------- + wav_path : str + Path to the WAV file. + out_path : str, optional + Path to the output SigMF metadata file. + create_archive : bool, optional + When True, package output as a .sigmf archive. + create_ncd : bool, optional + When True, create Non-Conforming Dataset with header_bytes and trailing_bytes. + + Returns + ------- + SigMFFile + SigMF object, potentially as Non-Conforming Dataset. + Raises ------ wave.Error - If the wav file is not PCM and Scipy is not installed. + If the wav file cannot be read. """ wav_path = Path(wav_path) - if SCIPY_INSTALLED: - samp_rate, wav_data = wavfile.read(wav_path) - else: - with wave.open(str(wav_path), "rb") as wav_reader: - n_channels = wav_reader.getnchannels() - samp_width = wav_reader.getsampwidth() - samp_rate = wav_reader.getframerate() - n_frames = wav_reader.getnframes() + + # auto-enable NCD when no output path is specified + if out_path is None: + create_ncd = True + + # use built-in wave module exclusively for precise sample boundary detection + with wave.open(str(wav_path), "rb") as wav_reader: + n_channels = wav_reader.getnchannels() + samp_width = wav_reader.getsampwidth() + samp_rate = wav_reader.getframerate() + n_frames = wav_reader.getnframes() + + # for NCD support, calculate precise byte boundaries + if create_ncd: + header_bytes, trailing_bytes = _calculate_wav_ncd_bytes(wav_path) + log.debug(f"WAV NCD: header_bytes={header_bytes}, trailing_bytes={trailing_bytes}") + + # only read audio data if we're not creating NCD metadata-only + wav_data = None # initialize variable + if create_ncd and out_path is None: + # metadata-only NCD: don't read audio data + pass + else: + # normal conversion: read the audio data raw_data = wav_reader.readframes(n_frames) - np_dtype = f"int{samp_width * 8}" - wav_data = np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) + + np_dtype = f"int{samp_width * 8}" + + if wav_data is None: + # for NCD metadata-only, create dummy sample to get datatype + dummy_sample = np.array([0], dtype=np_dtype) + datatype_str = get_data_type_str(dummy_sample) + else: + # normal case: process actual audio data + wav_data = ( + np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) + if n_channels > 1 + else np.frombuffer(raw_data, dtype=np_dtype) + ) + datatype_str = get_data_type_str(wav_data) + global_info = { - SigMFFile.DATATYPE_KEY: get_data_type_str(wav_data), + SigMFFile.DATATYPE_KEY: datatype_str, SigMFFile.DESCRIPTION_KEY: f"converted from {wav_path.name}", - SigMFFile.NUM_CHANNELS_KEY: 1 if len(wav_data.shape) < 2 else wav_data.shape[1], + SigMFFile.NUM_CHANNELS_KEY: n_channels, SigMFFile.RECORDER_KEY: "Official SigMF WAV converter", SigMFFile.SAMPLE_RATE_KEY: samp_rate, } @@ -71,6 +160,33 @@ def wav_to_sigmf( SigMFFile.DATETIME_KEY: wav_datetime.strftime(SIGMF_DATETIME_ISO8601_FMT), } + if create_ncd: + # NCD requires extra fields + global_info[SigMFFile.TRAILING_BYTES_KEY] = trailing_bytes + global_info[SigMFFile.DATASET_KEY] = wav_path.name + capture_info[SigMFFile.HEADER_BYTES_KEY] = header_bytes + + # handle NCD case where no output files are created + if create_ncd and out_path is None: + # create metadata-only SigMF for NCD pointing to original file + meta = SigMFFile(global_info=global_info, skip_checksum=True) + meta.set_data_file(data_file=wav_path, offset=header_bytes, skip_checksum=True) + meta.data_buffer = io.BytesIO() + meta.add_capture(0, metadata=capture_info) + log.debug("created NCD SigMF: %r", meta) + return meta + + # if we get here, we need the actual audio data to create a new data file + if wav_data is None: + # need to read the audio data now for normal file creation + with wave.open(str(wav_path), "rb") as wav_reader: + raw_data = wav_reader.readframes(n_frames) + wav_data = ( + np.frombuffer(raw_data, dtype=np_dtype).reshape(-1, n_channels) + if n_channels > 1 + else np.frombuffer(raw_data, dtype=np_dtype) + ) + if out_path is None: base_path = wav_path.with_suffix(".sigmf") else: @@ -118,6 +234,9 @@ def main() -> None: parser.add_argument( "-a", "--archive", action="store_true", help="Save as SigMF archive instead of separate meta/data files." ) + parser.add_argument( + "--ncd", action="store_true", help="Process as Non-Conforming Dataset and write .sigmf-meta only." + ) parser.add_argument("--version", action="version", version=f"%(prog)s v{toolversion}") args = parser.parse_args() @@ -132,7 +251,7 @@ def main() -> None: if args.output is None: args.output = wav_path.with_suffix(".sigmf") - _ = wav_to_sigmf(wav_path=wav_path, out_path=args.output, create_archive=args.archive) + _ = wav_to_sigmf(wav_path=wav_path, out_path=args.output, create_archive=args.archive, create_ncd=args.ncd) if __name__ == "__main__": diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 81f6683..728aa0c 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -23,8 +23,8 @@ SIGMF_METADATA_EXT, SigMFArchive, ) -from .error import SigMFAccessError, SigMFError, SigMFFileError -from .utils import dict_merge +from .error import SigMFAccessError, SigMFConversionError, SigMFError, SigMFFileError +from .utils import dict_merge, get_magic_bytes class SigMFMetafile: @@ -493,9 +493,17 @@ def _count_samples(self): if self.data_file is None: sample_count = self._get_sample_count_from_annotations() else: - header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()]) file_size = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes - file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes # bytes + + # calculate actual data size accounting for offset, header_bytes, and trailing_bytes + if hasattr(self, "data_offset") and self.data_offset > 0: + # for NCD with offset, we start from offset position + file_data_size = file_size - self.data_offset - self.get_global_field(self.TRAILING_BYTES_KEY, 0) + else: + # for conforming datasets or NCD without offset, subtract header_bytes + header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()]) + file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes + sample_size = self.get_sample_size() # size of a sample in bytes num_channels = self.get_num_channels() sample_count = file_data_size // sample_size // num_channels @@ -701,6 +709,8 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F """ if count == 0: raise IOError("Number of samples must be greater than zero, or -1 for all samples.") + elif count == -1: + count = self.sample_count - start_index elif start_index + count > self.sample_count: raise IOError("Cannot read beyond EOF.") if self.data_file is None and not isinstance(self.data_buffer, io.BytesIO): @@ -711,8 +721,6 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F raise SigMFFileError("No signal data file has been associated with the metadata.") first_byte = start_index * self.get_sample_size() * self.get_num_channels() - if not self._is_conforming_dataset(): - warnings.warn(f"Recording dataset appears non-compliant, resulting data may be erroneous") return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False) def _read_datafile(self, first_byte, nitems, autoscale, raw_components): @@ -732,7 +740,10 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components): if self.data_file is not None: fp = open(self.data_file, "rb") - fp.seek(first_byte, 0) + # account for data_offset when seeking (important for NCDs) + seek_position = first_byte + getattr(self, "data_offset", 0) + fp.seek(seek_position, 0) + data = np.fromfile(fp, dtype=data_type_in, count=nitems) elif self.data_buffer is not None: data = np.frombuffer(self.data_buffer.getbuffer(), dtype=data_type_in, count=nitems) @@ -1060,6 +1071,7 @@ def fromarchive(archive_path, dir=None, skip_checksum=False): access SigMF archives without extracting them. """ from .archivereader import SigMFArchiveReader + return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile @@ -1067,8 +1079,8 @@ def fromfile(filename, skip_checksum=False): """ Creates and returns a SigMFFile or SigMFCollection instance with metadata loaded from the specified file. The filename may be that of either a - sigmf-meta file, a sigmf-data file, a sigmf-collection file, or a sigmf - archive. + sigmf-meta file, a sigmf-data file, a sigmf-collection file, a sigmf + archive, or a non-SigMF RF recording that can be converted (WAV, BLUE). Parameters ---------- @@ -1079,22 +1091,34 @@ def fromfile(filename, skip_checksum=False): Returns ------- - object - SigMFFile with dataset & metadata or a SigMFCollection depending on file type. + SigMFFile | SigMFCollection + A SigMFFile or a SigMFCollection depending on file type. + + Raises + ------ + SigMFFileError + If the file cannot be read as any supported format. + SigMFConversionError + If auto-detection conversion fails. """ + file_path = Path(filename) fns = get_sigmf_filenames(filename) meta_fn = fns["meta_fn"] archive_fn = fns["archive_fn"] collection_fn = fns["collection_fn"] - # extract the extension to check whether we are dealing with an archive, collection, etc. - file_path = Path(filename) - ext = file_path.suffix + # extract the extension to check file type + ext = file_path.suffix.lower() + + # group SigMF extensions for cleaner checking + sigmf_extensions = (SIGMF_METADATA_EXT, SIGMF_DATASET_EXT, SIGMF_COLLECTION_EXT, SIGMF_ARCHIVE_EXT) - if (ext.lower().endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn): + # try SigMF archive + if (ext.endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn): return fromarchive(archive_fn, skip_checksum=skip_checksum) - if (ext.lower().endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn): + # try SigMF collection + if (ext.endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn): collection_fp = open(collection_fn, "rb") bytestream_reader = codecs.getreader("utf-8") mdfile_reader = bytestream_reader(collection_fp) @@ -1104,7 +1128,8 @@ def fromfile(filename, skip_checksum=False): dir_path = meta_fn.parent return SigMFCollection(metadata=metadata, base_path=dir_path, skip_checksums=skip_checksum) - else: + # try standard SigMF metadata file + if Path.is_file(meta_fn): meta_fp = open(meta_fn, "rb") bytestream_reader = codecs.getreader("utf-8") mdfile_reader = bytestream_reader(meta_fp) @@ -1114,6 +1139,23 @@ def fromfile(filename, skip_checksum=False): data_fn = get_dataset_filename_from_metadata(meta_fn, metadata) return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum) + # try auto-detection for non-SigMF files only + if Path.is_file(file_path) and not ext.endswith(sigmf_extensions): + magic_bytes = get_magic_bytes(file_path, count=4, offset=0) + + if magic_bytes == b"RIFF": + from .convert.wav import wav_to_sigmf + + return wav_to_sigmf(file_path, create_ncd=True) + + elif magic_bytes == b"BLUE": + from .convert.blue import blue_to_sigmf + + return blue_to_sigmf(file_path, create_ncd=True) + + # if file doesn't exist at all or no valid files found, raise original error + raise SigMFFileError(f"Cannot read {filename} as SigMF or supported non-SigMF format.") + def get_sigmf_filenames(filename): """ @@ -1126,7 +1168,7 @@ def get_sigmf_filenames(filename): Returns ------- - dict with 'data_fn', 'meta_fn', and 'archive_fn' as keys. + dict with filename keys. """ stem_path = Path(filename) # If the path has a sigmf suffix, remove it. Otherwise do not remove the diff --git a/sigmf/utils.py b/sigmf/utils.py index 571a5e4..3c325c3 100644 --- a/sigmf/utils.py +++ b/sigmf/utils.py @@ -10,10 +10,11 @@ import sys from copy import deepcopy from datetime import datetime, timezone +from pathlib import Path import numpy as np -from .error import SigMFError +from .error import SigMFConversionError, SigMFError SIGMF_DATETIME_ISO8601_FMT = "%Y-%m-%dT%H:%M:%S.%fZ" @@ -112,3 +113,37 @@ def get_data_type_str(ray: np.ndarray) -> str: # only append endianness for types over 8 bits data_type_str += get_endian_str(ray) return data_type_str + + +def get_magic_bytes(file_path: Path, count: int = 4, offset: int = 0) -> bytes: + """ + Get magic bytes from a file to help identify file type. + + Parameters + ---------- + file_path : Path + Path to the file to read magic bytes from. + count : int, optional + Number of bytes to read. Default is 4. + offset : int, optional + Byte offset to start reading from. Default is 0. + + Returns + ------- + bytes + Magic bytes from the file. + + Raises + ------ + SigMFConversionError + If file cannot be read or is too small. + """ + try: + with open(file_path, "rb") as handle: + handle.seek(offset) + magic_bytes = handle.read(count) + if len(magic_bytes) < count: + raise SigMFConversionError(f"File {file_path} too small to read {count} magic bytes at offset {offset}") + return magic_bytes + except (IOError, OSError) as err: + raise SigMFConversionError(f"Cannot read magic bytes from {file_path}: {err}") diff --git a/tests/test_convert.py b/tests/test_convert.py deleted file mode 100644 index 5bd3051..0000000 --- a/tests/test_convert.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright: Multiple Authors -# -# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python -# -# SPDX-License-Identifier: LGPL-3.0-or-later - -"""Tests for Converters""" - -import os -import tempfile -import unittest -import wave -from pathlib import Path - -import numpy as np - -import sigmf -from sigmf.convert.blue import blue_to_sigmf -from sigmf.convert.wav import wav_to_sigmf - -from .testdata import NONSIGMF_REPO, NONSIGMF_ENV - - -class TestWAVConverter(unittest.TestCase): - """wav loopback test""" - - def setUp(self) -> None: - """temp wav file for testing""" - self.tmp_dir = tempfile.TemporaryDirectory() - self.tmp_path = Path(self.tmp_dir.name) - self.wav_path = self.tmp_path / "foo.wav" - samp_rate = 48000 - duration_s = 0.1 - ttt = np.linspace(0, duration_s, int(samp_rate * duration_s), endpoint=False) - freq = 440 # A4 note - self.audio_data = 0.5 * np.sin(2 * np.pi * freq * ttt) - # note scipy could write float wav files directly, - # but to avoid adding scipy as a dependency for sigmf-python, - # convert float audio to 16-bit PCM integer format - audio_int16 = (self.audio_data * 32767).astype(np.int16) - - # write wav file using built-in wave module - with wave.open(str(self.wav_path), "wb") as wav_file: - wav_file.setnchannels(1) # mono - wav_file.setsampwidth(2) # 16-bit = 2 bytes - wav_file.setframerate(samp_rate) - wav_file.writeframes(audio_int16.tobytes()) - - def tearDown(self) -> None: - """clean up temporary directory""" - self.tmp_dir.cleanup() - - def test_wav_to_sigmf_pair(self): - sigmf_path = self.tmp_path / "bar.tmp" - meta = wav_to_sigmf(wav_path=self.wav_path, out_path=sigmf_path) - data = meta.read_samples() - # allow numerical differences due to PCM quantization - self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) - filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) - self.assertTrue(filenames["data_fn"].exists(), "dataset path missing") - self.assertTrue(filenames["meta_fn"].exists(), "metadata path missing") - - def test_wav_to_sigmf_archive(self): - sigmf_path = self.tmp_path / "baz.ext" - wav_to_sigmf(wav_path=self.wav_path, out_path=sigmf_path, create_archive=True) - filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) - self.assertTrue(filenames["archive_fn"].exists(), "archive path missing") - - -class TestBlueConverter(unittest.TestCase): - """As we have no blue files in the repository, test only when env path specified.""" - - def setUp(self) -> None: - """temp paths & blue files""" - if not NONSIGMF_REPO: - # skip test if environment variable not set - self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with .cdif files to run test. ") - self.bluefiles = list(NONSIGMF_REPO.glob("**/*.cdif")) - print("bluefiles", self.bluefiles) - if not self.bluefiles: - self.fail(f"No .cdif files found in {NONSIGMF_ENV}.") - self.tmp_dir = tempfile.TemporaryDirectory() - self.tmp_path = Path(self.tmp_dir.name) - - def tearDown(self) -> None: - """clean up temporary directory""" - self.tmp_dir.cleanup() - - def test_blue_to_sigmf_pair(self): - for bdx, bluefile in enumerate(self.bluefiles): - sigmf_path = self.tmp_path / bluefile.stem - meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path) - print(f"Converted {bluefile} to SigMF at {sigmf_path}") - if not meta.get_global_field("core:metadata_only"): - print(meta.read_samples(count=10)) - - # ### EVERYTHING BELOW HERE IS FOR DEBUGGING ONLY _ REMOVE LATER ### - # # plot stft of RF data for visual inspection - # import matplotlib.pyplot as plt - # from scipy.signal import spectrogram - # from swiftfox import summary, smartspec - - # if meta.get_global_field("core:metadata_only"): - # print("Metadata only file, skipping plot.") - # continue - # samples = meta.read_samples() - # # plt.figure(figsize=(10, 10)) - # summary(samples, detail=0.1, samp_rate=meta.get_global_field("core:sample_rate"), title=sigmf_path.name) - # plt.figure() - # # plt.plot(samples.real) - # # plt.plot(samples.imag) - # # plt.figure() - # spec = smartspec(samples, detail=0.5, samp_rate=meta.get_global_field("core:sample_rate")) - # # use imshow to plot spectrogram - - # plt.show() - self.assertIsInstance(meta, sigmf.SigMFFile) - - def test_blue_to_sigmf_archive(self): - for bdx, bluefile in enumerate(self.bluefiles): - sigmf_path = self.tmp_path / f"{bluefile.stem}_archive" - meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path, create_archive=True) - print(f"Converted {bluefile} to SigMF archive at {sigmf_path}") - self.assertIsInstance(meta, sigmf.SigMFFile) diff --git a/tests/test_convert_blue.py b/tests/test_convert_blue.py new file mode 100644 index 0000000..0f1ea58 --- /dev/null +++ b/tests/test_convert_blue.py @@ -0,0 +1,120 @@ +# Copyright: Multiple Authors +# +# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +"""Tests for BLUE Converter""" + +import tempfile +import unittest +from pathlib import Path +from typing import cast + +import sigmf +from sigmf.convert.blue import blue_to_sigmf + +from .testdata import NONSIGMF_ENV, NONSIGMF_REPO + + +class TestBlueConverter(unittest.TestCase): + """BLUE converter tests using external files""" + + def setUp(self) -> None: + """setup paths to blue files""" + self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.tmp_dir.name) + if not NONSIGMF_REPO: + # skip test if environment variable not set + self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with BLUE files to run test.") + + # look for blue files in blue/ directory + blue_dir = NONSIGMF_REPO / "blue" + self.bluefiles = [] + if blue_dir.exists(): + for ext in ["*.cdif", "*.tmp"]: + self.bluefiles.extend(blue_dir.glob(f"**/{ext}")) + + if not self.bluefiles: + self.fail(f"No BLUE files (*.cdif, *.tmp) found in {blue_dir}.") + + def tearDown(self) -> None: + """clean up temporary directory""" + self.tmp_dir.cleanup() + + def _validate_ncd_structure(self, meta, expected_file): + """validate basic NCD structure""" + self.assertEqual(meta.data_file, expected_file, "NCD should point to original file") + self.assertIn("core:trailing_bytes", meta._metadata["global"]) + captures = meta.get_captures() + self.assertGreater(len(captures), 0, "Should have at least one capture") + self.assertIn("core:header_bytes", captures[0]) + + # validate SigMF spec compliance: NCDs must not have metadata_only field + global_meta = meta._metadata["global"] + has_dataset = "core:dataset" in global_meta + has_metadata_only = "core:metadata_only" in global_meta + + self.assertTrue(has_dataset, "NCD should have core:dataset field") + self.assertFalse(has_metadata_only, "NCD should NOT have core:metadata_only field (spec violation)") + + return captures + + def _validate_auto_detection(self, file_path): + """validate auto-detection works and returns valid NCD""" + meta_auto_raw = sigmf.fromfile(file_path) + # auto-detection should return SigMFFile, not SigMFCollection + self.assertIsInstance(meta_auto_raw, sigmf.SigMFFile) + meta_auto = cast(sigmf.SigMFFile, meta_auto_raw) + # data_file might be Path or str, so convert both for comparison + self.assertEqual(str(meta_auto.data_file), str(file_path)) + self.assertIn("core:trailing_bytes", meta_auto._metadata["global"]) + return meta_auto + + def test_blue_to_sigmf_pair(self): + """test standard blue to sigmf conversion with file pairs""" + for bluefile in self.bluefiles: + sigmf_path = self.tmp_path / bluefile.stem + meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path) + if not meta.get_global_field("core:metadata_only"): + meta.read_samples(count=10) + self.assertIsInstance(meta, sigmf.SigMFFile) + + def test_blue_to_sigmf_archive(self): + """test blue to sigmf conversion with archive output""" + for bluefile in self.bluefiles: + sigmf_path = self.tmp_path / f"{bluefile.stem}_archive" + meta = blue_to_sigmf(blue_path=bluefile, out_path=str(sigmf_path), create_archive=True) + self.assertIsInstance(meta, sigmf.SigMFFile) + + def test_blue_to_sigmf_ncd(self): + """test blue to sigmf conversion as Non-Conforming Dataset""" + for bluefile in self.bluefiles: + meta = blue_to_sigmf(blue_path=str(bluefile), create_ncd=True) + + # validate basic NCD structure + self._validate_ncd_structure(meta, bluefile) + + # verify this is metadata-only (no separate data file created) + self.assertIsInstance(meta.data_buffer, type(meta.data_buffer)) + + # test that data can be read if not metadata-only + if not meta.get_global_field("core:metadata_only"): + _ = meta.read_samples(count=10) + + def test_blue_auto_detection(self): + """test automatic BLUE detection through fromfile()""" + for bluefile in self.bluefiles: + # validate auto-detection works + self._validate_auto_detection(bluefile) + + def test_blue_directory_files_ncd(self): + """test NCD conversion""" + for blue_file in self.bluefiles: + meta = blue_to_sigmf(blue_path=str(blue_file), create_ncd=True) + + # validate basic NCD structure + self._validate_ncd_structure(meta, blue_file) + + # validate auto-detection also works + self._validate_auto_detection(blue_file) diff --git a/tests/test_convert_wav.py b/tests/test_convert_wav.py new file mode 100644 index 0000000..c4e5670 --- /dev/null +++ b/tests/test_convert_wav.py @@ -0,0 +1,204 @@ +# Copyright: Multiple Authors +# +# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +"""Tests for WAV Converter""" + +import tempfile +import unittest +import wave +from pathlib import Path +from typing import cast + +import numpy as np + +import sigmf +from sigmf.convert.wav import wav_to_sigmf + +from .testdata import NONSIGMF_ENV, NONSIGMF_REPO + + +class TestWAVConverter(unittest.TestCase): + """wav converter tests""" + + def setUp(self) -> None: + """temp wav file for testing""" + self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.tmp_dir.name) + self.wav_path = self.tmp_path / "foo.wav" + samp_rate = 48000 + duration_s = 0.1 + ttt = np.linspace(0, duration_s, int(samp_rate * duration_s), endpoint=False) + freq = 440 # A4 note + self.audio_data = 0.5 * np.sin(2 * np.pi * freq * ttt) + # convert float audio to 16-bit PCM integer format + audio_int16 = (self.audio_data * 32767).astype(np.int16) + + # write wav file using built-in wave module + with wave.open(str(self.wav_path), "wb") as wav_file: + wav_file.setnchannels(1) # mono + wav_file.setsampwidth(2) # 16-bit = 2 bytes + wav_file.setframerate(samp_rate) + wav_file.writeframes(audio_int16.tobytes()) + + def tearDown(self) -> None: + """clean up temporary directory""" + self.tmp_dir.cleanup() + + def _validate_ncd_structure(self, meta, expected_file): + """validate basic NCD structure""" + self.assertEqual(meta.data_file, expected_file, "NCD should point to original file") + self.assertIn("core:trailing_bytes", meta._metadata["global"]) + captures = meta.get_captures() + self.assertGreater(len(captures), 0, "Should have at least one capture") + self.assertIn("core:header_bytes", captures[0]) + + # validate SigMF spec compliance: NCDs must not have metadata_only field + global_meta = meta._metadata["global"] + has_dataset = "core:dataset" in global_meta + has_metadata_only = "core:metadata_only" in global_meta + + self.assertTrue(has_dataset, "NCD should have core:dataset field") + self.assertFalse(has_metadata_only, "NCD should NOT have core:metadata_only field (spec violation)") + + return captures + + def _validate_dataset_key(self, meta, expected_filename): + """validate DATASET_KEY is correctly set""" + dataset_filename = meta.get_global_field("core:dataset") + self.assertEqual(dataset_filename, expected_filename, "DATASET_KEY should contain filename") + self.assertIsInstance(dataset_filename, str, "DATASET_KEY should be a string") + + def _validate_auto_detection(self, file_path): + """validate auto-detection works and returns valid NCD""" + meta_auto_raw = sigmf.fromfile(file_path) + # auto-detection should return SigMFFile, not SigMFCollection + self.assertIsInstance(meta_auto_raw, sigmf.SigMFFile) + meta_auto = cast(sigmf.SigMFFile, meta_auto_raw) + # data_file might be Path or str, so convert both for comparison + self.assertEqual(str(meta_auto.data_file), str(file_path)) + self.assertIn("core:trailing_bytes", meta_auto._metadata["global"]) + return meta_auto + + def test_wav_to_sigmf_pair(self): + """test standard wav to sigmf conversion with file pairs""" + sigmf_path = self.tmp_path / "bar.tmp" + meta = wav_to_sigmf(wav_path=str(self.wav_path), out_path=str(sigmf_path)) + data = meta.read_samples() + # allow numerical differences due to PCM quantization + self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) + self.assertGreater(len(data), 0, "Should read some samples") + filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) + self.assertTrue(filenames["data_fn"].exists(), "dataset path missing") + self.assertTrue(filenames["meta_fn"].exists(), "metadata path missing") + + def test_wav_to_sigmf_archive(self): + """test wav to sigmf conversion with archive output""" + sigmf_path = self.tmp_path / "baz.ext" + wav_to_sigmf(wav_path=str(self.wav_path), out_path=str(sigmf_path), create_archive=True) + filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) + self.assertTrue(filenames["archive_fn"].exists(), "archive path missing") + + def test_wav_to_sigmf_ncd(self): + """test wav to sigmf conversion as Non-Conforming Dataset""" + meta = wav_to_sigmf(wav_path=str(self.wav_path), create_ncd=True) + + # validate basic NCD structure + captures = self._validate_ncd_structure(meta, self.wav_path) + self.assertEqual(len(captures), 1, "Should have exactly one capture") + + # validate DATASET_KEY is set for NCD + self._validate_dataset_key(meta, self.wav_path.name) + + # header_bytes should be non-zero for WAV files + header_bytes = captures[0]["core:header_bytes"] + self.assertGreater(header_bytes, 0, "WAV files should have non-zero header bytes") + + # verify data can still be read correctly from NCD + data = meta.read_samples() + self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) + self.assertGreater(len(data), 0, "Should read some samples") + + # verify this is metadata-only (no separate data file created) + self.assertIsInstance(meta.data_buffer, type(meta.data_buffer)) + + def test_wav_auto_detection(self): + """test automatic WAV detection through fromfile()""" + # validate auto-detection works + meta_raw = self._validate_auto_detection(self.wav_path) + meta = cast(sigmf.SigMFFile, meta_raw) + + # validate DATASET_KEY is set for auto-detected NCD + self._validate_dataset_key(meta, self.wav_path.name) + + # verify data can be read correctly + data = meta.read_samples() + self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) + self.assertGreater(len(data), 0, "Should read some samples") + + +class TestWAVConverterWithRealFiles(unittest.TestCase): + """Test WAV converter with real example files if available""" + + def setUp(self) -> None: + """setup paths to example wav files""" + self.wav_dir = None + if NONSIGMF_REPO: + wav_path = NONSIGMF_REPO / "wav" + if wav_path.exists(): + self.wav_dir = wav_path + self.wav_files = list(wav_path.glob("*.wav")) + + def _validate_ncd_structure(self, meta, expected_file): + """validate basic NCD structure""" + self.assertEqual(meta.data_file, expected_file, "NCD should point to original file") + self.assertIn("core:trailing_bytes", meta._metadata["global"]) + captures = meta.get_captures() + self.assertGreater(len(captures), 0, "Should have at least one capture") + self.assertIn("core:header_bytes", captures[0]) + + # validate SigMF spec compliance: NCDs must not have metadata_only field + global_meta = meta._metadata["global"] + has_dataset = "core:dataset" in global_meta + has_metadata_only = "core:metadata_only" in global_meta + + self.assertTrue(has_dataset, "NCD should have core:dataset field") + self.assertFalse(has_metadata_only, "NCD should NOT have core:metadata_only field (spec violation)") + + return captures + + def _validate_dataset_key(self, meta, expected_filename): + """validate DATASET_KEY is correctly set""" + dataset_filename = meta.get_global_field("core:dataset") + self.assertEqual(dataset_filename, expected_filename, "DATASET_KEY should contain filename") + + def _validate_auto_detection(self, file_path): + """validate auto-detection works and returns valid NCD""" + meta_auto_raw = sigmf.fromfile(file_path) + # auto-detection should return SigMFFile, not SigMFCollection + self.assertIsInstance(meta_auto_raw, sigmf.SigMFFile) + meta_auto = cast(sigmf.SigMFFile, meta_auto_raw) + # data_file might be Path or str, so convert both for comparison + self.assertEqual(str(meta_auto.data_file), str(file_path)) + self.assertIn("core:trailing_bytes", meta_auto._metadata["global"]) + return meta_auto + + def test_real_wav_files_ncd(self): + """test NCD conversion with real example wav files""" + if not self.wav_dir or not hasattr(self, "wav_files"): + self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with wav/ directory to run test.") + + if not self.wav_files: + self.skipTest(f"No .wav files found in {self.wav_dir}") + + for wav_file in self.wav_files: + meta = wav_to_sigmf(wav_path=str(wav_file), create_ncd=True) + + # validate basic NCD structure + self._validate_ncd_structure(meta, wav_file) + + # validate auto-detection also works + meta_auto = self._validate_auto_detection(wav_file) + self._validate_dataset_key(meta_auto, wav_file.name) From 062159b28779b289ff25122603894c39a4e610bb Mon Sep 17 00:00:00 2001 From: Teque5 Date: Fri, 2 Jan 2026 10:53:48 -0800 Subject: [PATCH 15/15] refactor tests; failing test should be fixed after feature/unify-sample-access merged --- sigmf/convert/blue.py | 4 +- tests/test_convert_blue.py | 114 +++++++------------- tests/test_convert_wav.py | 211 +++++++++++++++---------------------- 3 files changed, 127 insertions(+), 202 deletions(-) diff --git a/sigmf/convert/blue.py b/sigmf/convert/blue.py index e9a0633..9e5943c 100644 --- a/sigmf/convert/blue.py +++ b/sigmf/convert/blue.py @@ -402,7 +402,7 @@ def _build_common_metadata( is_ncd : bool, optional If True, adds NCD-specific fields. blue_file_name : str, optional - Original BLUE file name (required for NCD). + Original BLUE file name (for NCD). trailing_bytes : int, optional Number of trailing bytes (for NCD). @@ -450,8 +450,8 @@ def get_tag(tag): # add NCD-specific fields if is_ncd: - global_info[SigMFFile.TRAILING_BYTES_KEY] = trailing_bytes global_info[SigMFFile.DATASET_KEY] = blue_file_name + global_info[SigMFFile.TRAILING_BYTES_KEY] = trailing_bytes # merge HCB values into metadata global_info["blue:fixed"] = h_fixed diff --git a/tests/test_convert_blue.py b/tests/test_convert_blue.py index 0f1ea58..b89f87d 100644 --- a/tests/test_convert_blue.py +++ b/tests/test_convert_blue.py @@ -9,15 +9,17 @@ import tempfile import unittest from pathlib import Path -from typing import cast + +import numpy as np import sigmf from sigmf.convert.blue import blue_to_sigmf +from .test_convert_wav import _validate_ncd from .testdata import NONSIGMF_ENV, NONSIGMF_REPO -class TestBlueConverter(unittest.TestCase): +class TestBlueWithNonSigMFRepo(unittest.TestCase): """BLUE converter tests using external files""" def setUp(self) -> None: @@ -28,93 +30,57 @@ def setUp(self) -> None: # skip test if environment variable not set self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with BLUE files to run test.") - # look for blue files in blue/ directory + # glob all files in blue/ directory blue_dir = NONSIGMF_REPO / "blue" - self.bluefiles = [] + self.blue_paths = [] if blue_dir.exists(): for ext in ["*.cdif", "*.tmp"]: - self.bluefiles.extend(blue_dir.glob(f"**/{ext}")) - - if not self.bluefiles: + self.blue_paths.extend(blue_dir.glob(f"**/{ext}")) + if not self.blue_paths: self.fail(f"No BLUE files (*.cdif, *.tmp) found in {blue_dir}.") def tearDown(self) -> None: """clean up temporary directory""" self.tmp_dir.cleanup() - def _validate_ncd_structure(self, meta, expected_file): - """validate basic NCD structure""" - self.assertEqual(meta.data_file, expected_file, "NCD should point to original file") - self.assertIn("core:trailing_bytes", meta._metadata["global"]) - captures = meta.get_captures() - self.assertGreater(len(captures), 0, "Should have at least one capture") - self.assertIn("core:header_bytes", captures[0]) - - # validate SigMF spec compliance: NCDs must not have metadata_only field - global_meta = meta._metadata["global"] - has_dataset = "core:dataset" in global_meta - has_metadata_only = "core:metadata_only" in global_meta - - self.assertTrue(has_dataset, "NCD should have core:dataset field") - self.assertFalse(has_metadata_only, "NCD should NOT have core:metadata_only field (spec violation)") - - return captures - - def _validate_auto_detection(self, file_path): - """validate auto-detection works and returns valid NCD""" - meta_auto_raw = sigmf.fromfile(file_path) - # auto-detection should return SigMFFile, not SigMFCollection - self.assertIsInstance(meta_auto_raw, sigmf.SigMFFile) - meta_auto = cast(sigmf.SigMFFile, meta_auto_raw) - # data_file might be Path or str, so convert both for comparison - self.assertEqual(str(meta_auto.data_file), str(file_path)) - self.assertIn("core:trailing_bytes", meta_auto._metadata["global"]) - return meta_auto - - def test_blue_to_sigmf_pair(self): + def test_sigmf_pair(self): """test standard blue to sigmf conversion with file pairs""" - for bluefile in self.bluefiles: - sigmf_path = self.tmp_path / bluefile.stem - meta = blue_to_sigmf(blue_path=bluefile, out_path=sigmf_path) - if not meta.get_global_field("core:metadata_only"): - meta.read_samples(count=10) + for blue_path in self.blue_paths: + sigmf_path = self.tmp_path / blue_path.stem + meta = blue_to_sigmf(blue_path=blue_path, out_path=sigmf_path) self.assertIsInstance(meta, sigmf.SigMFFile) + # FIXME: REPLACE BELOW WITH BELOW COMMENTED AFTER PR #121 MERGED + if not meta.get_global_field("core:metadata_only"): + _ = meta.read_samples(count=10) + # check sample read consistency + # np.testing.assert_array_equal(meta.read_samples(count=10), meta[0:10]) - def test_blue_to_sigmf_archive(self): + def test_sigmf_archive(self): """test blue to sigmf conversion with archive output""" - for bluefile in self.bluefiles: - sigmf_path = self.tmp_path / f"{bluefile.stem}_archive" - meta = blue_to_sigmf(blue_path=bluefile, out_path=str(sigmf_path), create_archive=True) - self.assertIsInstance(meta, sigmf.SigMFFile) - - def test_blue_to_sigmf_ncd(self): - """test blue to sigmf conversion as Non-Conforming Dataset""" - for bluefile in self.bluefiles: - meta = blue_to_sigmf(blue_path=str(bluefile), create_ncd=True) - - # validate basic NCD structure - self._validate_ncd_structure(meta, bluefile) - - # verify this is metadata-only (no separate data file created) - self.assertIsInstance(meta.data_buffer, type(meta.data_buffer)) + for blue_path in self.blue_paths: + sigmf_path = self.tmp_path / f"{blue_path.stem}_archive" + meta = blue_to_sigmf(blue_path=blue_path, out_path=sigmf_path, create_archive=True) + # now read newly created archive + arc_meta = sigmf.fromfile(sigmf_path) + self.assertIsInstance(arc_meta, sigmf.SigMFFile) + # FIXME: REPLACE BELOW WITH BELOW COMMENTED AFTER PR #121 MERGED + if not arc_meta.get_global_field("core:metadata_only"): + _ = arc_meta.read_samples(count=10) + # check sample read consistency + # np.testing.assert_array_equal(meta.read_samples(count=10), meta[0:10]) + + def test_create_ncd(self): + """test direct NCD conversion""" + for blue_path in self.blue_paths: + meta = blue_to_sigmf(blue_path=blue_path) + _validate_ncd(self, meta, blue_path) # test that data can be read if not metadata-only if not meta.get_global_field("core:metadata_only"): _ = meta.read_samples(count=10) - def test_blue_auto_detection(self): - """test automatic BLUE detection through fromfile()""" - for bluefile in self.bluefiles: - # validate auto-detection works - self._validate_auto_detection(bluefile) - - def test_blue_directory_files_ncd(self): - """test NCD conversion""" - for blue_file in self.bluefiles: - meta = blue_to_sigmf(blue_path=str(blue_file), create_ncd=True) - - # validate basic NCD structure - self._validate_ncd_structure(meta, blue_file) - - # validate auto-detection also works - self._validate_auto_detection(blue_file) + def test_autodetect_ncd(self): + """test automatic NCD conversion""" + for blue_path in self.blue_paths: + meta = sigmf.fromfile(blue_path) + _validate_ncd(self, meta, blue_path) diff --git a/tests/test_convert_wav.py b/tests/test_convert_wav.py index c4e5670..12e3aba 100644 --- a/tests/test_convert_wav.py +++ b/tests/test_convert_wav.py @@ -10,7 +10,6 @@ import unittest import wave from pathlib import Path -from typing import cast import numpy as np @@ -20,11 +19,28 @@ from .testdata import NONSIGMF_ENV, NONSIGMF_REPO +def _validate_ncd(test, meta, target_path): + """non-conforming dataset has a specific structure""" + test.assertEqual(str(meta.data_file), str(target_path), "Auto-detected NCD should point to original file") + test.assertIsInstance(meta, sigmf.SigMFFile) + + global_info = meta.get_global_info() + capture_info = meta.get_captures() + + # validate NCD SigMF spec compliance + test.assertGreater(len(capture_info), 0, "Should have at least one capture") + test.assertIn("core:header_bytes", capture_info[0]) + test.assertGreater(capture_info[0]["core:header_bytes"], 0, "Should have non-zero core:header_bytes field") + test.assertIn("core:trailing_bytes", global_info, "Should have core:trailing_bytes field.") + test.assertIn("core:dataset", global_info, "Should have core:dataset field.") + test.assertNotIn("core:metadata_only", global_info, "Should NOT have core:metadata_only field.") + + class TestWAVConverter(unittest.TestCase): - """wav converter tests""" + """Create a realistic WAV file and test conversion methods.""" def setUp(self) -> None: - """temp wav file for testing""" + """temp WAV file with tone for testing""" self.tmp_dir = tempfile.TemporaryDirectory() self.tmp_path = Path(self.tmp_dir.name) self.wav_path = self.tmp_path / "foo.wav" @@ -47,49 +63,14 @@ def tearDown(self) -> None: """clean up temporary directory""" self.tmp_dir.cleanup() - def _validate_ncd_structure(self, meta, expected_file): - """validate basic NCD structure""" - self.assertEqual(meta.data_file, expected_file, "NCD should point to original file") - self.assertIn("core:trailing_bytes", meta._metadata["global"]) - captures = meta.get_captures() - self.assertGreater(len(captures), 0, "Should have at least one capture") - self.assertIn("core:header_bytes", captures[0]) - - # validate SigMF spec compliance: NCDs must not have metadata_only field - global_meta = meta._metadata["global"] - has_dataset = "core:dataset" in global_meta - has_metadata_only = "core:metadata_only" in global_meta - - self.assertTrue(has_dataset, "NCD should have core:dataset field") - self.assertFalse(has_metadata_only, "NCD should NOT have core:metadata_only field (spec violation)") - - return captures - - def _validate_dataset_key(self, meta, expected_filename): - """validate DATASET_KEY is correctly set""" - dataset_filename = meta.get_global_field("core:dataset") - self.assertEqual(dataset_filename, expected_filename, "DATASET_KEY should contain filename") - self.assertIsInstance(dataset_filename, str, "DATASET_KEY should be a string") - - def _validate_auto_detection(self, file_path): - """validate auto-detection works and returns valid NCD""" - meta_auto_raw = sigmf.fromfile(file_path) - # auto-detection should return SigMFFile, not SigMFCollection - self.assertIsInstance(meta_auto_raw, sigmf.SigMFFile) - meta_auto = cast(sigmf.SigMFFile, meta_auto_raw) - # data_file might be Path or str, so convert both for comparison - self.assertEqual(str(meta_auto.data_file), str(file_path)) - self.assertIn("core:trailing_bytes", meta_auto._metadata["global"]) - return meta_auto - def test_wav_to_sigmf_pair(self): """test standard wav to sigmf conversion with file pairs""" sigmf_path = self.tmp_path / "bar.tmp" meta = wav_to_sigmf(wav_path=str(self.wav_path), out_path=str(sigmf_path)) data = meta.read_samples() + self.assertGreater(len(data), 0, "Should read some samples") # allow numerical differences due to PCM quantization self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) - self.assertGreater(len(data), 0, "Should read some samples") filenames = sigmf.sigmffile.get_sigmf_filenames(sigmf_path) self.assertTrue(filenames["data_fn"].exists(), "dataset path missing") self.assertTrue(filenames["meta_fn"].exists(), "metadata path missing") @@ -104,101 +85,79 @@ def test_wav_to_sigmf_archive(self): def test_wav_to_sigmf_ncd(self): """test wav to sigmf conversion as Non-Conforming Dataset""" meta = wav_to_sigmf(wav_path=str(self.wav_path), create_ncd=True) - - # validate basic NCD structure - captures = self._validate_ncd_structure(meta, self.wav_path) - self.assertEqual(len(captures), 1, "Should have exactly one capture") - - # validate DATASET_KEY is set for NCD - self._validate_dataset_key(meta, self.wav_path.name) - - # header_bytes should be non-zero for WAV files - header_bytes = captures[0]["core:header_bytes"] - self.assertGreater(header_bytes, 0, "WAV files should have non-zero header bytes") + _validate_ncd(self, meta, self.wav_path) # verify data can still be read correctly from NCD data = meta.read_samples() - self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) self.assertGreater(len(data), 0, "Should read some samples") - - # verify this is metadata-only (no separate data file created) - self.assertIsInstance(meta.data_buffer, type(meta.data_buffer)) - - def test_wav_auto_detection(self): - """test automatic WAV detection through fromfile()""" - # validate auto-detection works - meta_raw = self._validate_auto_detection(self.wav_path) - meta = cast(sigmf.SigMFFile, meta_raw) - - # validate DATASET_KEY is set for auto-detected NCD - self._validate_dataset_key(meta, self.wav_path.name) - - # verify data can be read correctly - data = meta.read_samples() + # allow numerical differences due to PCM quantization self.assertTrue(np.allclose(self.audio_data, data, atol=1e-4)) - self.assertGreater(len(data), 0, "Should read some samples") -class TestWAVConverterWithRealFiles(unittest.TestCase): +class TestWAVWithNonSigMFRepo(unittest.TestCase): """Test WAV converter with real example files if available""" def setUp(self) -> None: """setup paths to example wav files""" - self.wav_dir = None - if NONSIGMF_REPO: - wav_path = NONSIGMF_REPO / "wav" - if wav_path.exists(): - self.wav_dir = wav_path - self.wav_files = list(wav_path.glob("*.wav")) - - def _validate_ncd_structure(self, meta, expected_file): - """validate basic NCD structure""" - self.assertEqual(meta.data_file, expected_file, "NCD should point to original file") - self.assertIn("core:trailing_bytes", meta._metadata["global"]) - captures = meta.get_captures() - self.assertGreater(len(captures), 0, "Should have at least one capture") - self.assertIn("core:header_bytes", captures[0]) - - # validate SigMF spec compliance: NCDs must not have metadata_only field - global_meta = meta._metadata["global"] - has_dataset = "core:dataset" in global_meta - has_metadata_only = "core:metadata_only" in global_meta - - self.assertTrue(has_dataset, "NCD should have core:dataset field") - self.assertFalse(has_metadata_only, "NCD should NOT have core:metadata_only field (spec violation)") - - return captures - - def _validate_dataset_key(self, meta, expected_filename): - """validate DATASET_KEY is correctly set""" - dataset_filename = meta.get_global_field("core:dataset") - self.assertEqual(dataset_filename, expected_filename, "DATASET_KEY should contain filename") - - def _validate_auto_detection(self, file_path): - """validate auto-detection works and returns valid NCD""" - meta_auto_raw = sigmf.fromfile(file_path) - # auto-detection should return SigMFFile, not SigMFCollection - self.assertIsInstance(meta_auto_raw, sigmf.SigMFFile) - meta_auto = cast(sigmf.SigMFFile, meta_auto_raw) - # data_file might be Path or str, so convert both for comparison - self.assertEqual(str(meta_auto.data_file), str(file_path)) - self.assertIn("core:trailing_bytes", meta_auto._metadata["global"]) - return meta_auto - - def test_real_wav_files_ncd(self): - """test NCD conversion with real example wav files""" - if not self.wav_dir or not hasattr(self, "wav_files"): - self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with wav/ directory to run test.") - - if not self.wav_files: - self.skipTest(f"No .wav files found in {self.wav_dir}") - - for wav_file in self.wav_files: - meta = wav_to_sigmf(wav_path=str(wav_file), create_ncd=True) - - # validate basic NCD structure - self._validate_ncd_structure(meta, wav_file) - - # validate auto-detection also works - meta_auto = self._validate_auto_detection(wav_file) - self._validate_dataset_key(meta_auto, wav_file.name) + self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.tmp_dir.name) + if not NONSIGMF_REPO: + # skip test if environment variable not set + self.skipTest(f"Set {NONSIGMF_ENV} environment variable to path with WAV files to run test.") + + # glob all files in wav/ directory + wav_dir = NONSIGMF_REPO / "wav" + self.wav_paths = [] + if wav_dir.exists(): + self.wav_paths = list(wav_dir.glob("*.wav")) + if not self.wav_paths: + self.fail(f"No WAV files (*.wav) found in {wav_dir}.") + + def tearDown(self) -> None: + """clean up temporary directory""" + self.tmp_dir.cleanup() + + def test_sigmf_pair(self): + """test standard wav to sigmf conversion with file pairs""" + for wav_path in self.wav_paths: + sigmf_path = self.tmp_path / wav_path.stem + meta = wav_to_sigmf(wav_path=wav_path, out_path=sigmf_path) + self.assertIsInstance(meta, sigmf.SigMFFile) + # FIXME: REPLACE BELOW WITH BELOW COMMENTED AFTER PR #121 MERGED + if not meta.get_global_field("core:metadata_only"): + _ = meta.read_samples(count=10) + # check sample read consistency + # np.testing.assert_array_equal(meta.read_samples(count=10), meta[0:10]) + + def test_sigmf_archive(self): + """test wav to sigmf conversion with archive output""" + for wav_path in self.wav_paths: + sigmf_path = self.tmp_path / f"{wav_path.stem}_archive" + meta = wav_to_sigmf(wav_path=wav_path, out_path=sigmf_path, create_archive=True) + # now read newly created archive + arc_meta = sigmf.fromfile(sigmf_path) + # FIXME: I believe this error related to sample_count being 0 is fixed by PR 121 + print("dbug", arc_meta) + print("dbug len", len(arc_meta)) + print("dbug sample_count", arc_meta.sample_count) + self.assertIsInstance(arc_meta, sigmf.SigMFFile) + # FIXME: REPLACE BELOW WITH BELOW COMMENTED AFTER PR #121 MERGED + if not arc_meta.get_global_field("core:metadata_only"): + _ = arc_meta.read_samples(count=10) + # check sample read consistency + # np.testing.assert_array_equal(meta.read_samples(count=10), meta[0:10]) + + def test_create_ncd(self): + """test direct NCD conversion""" + for wav_path in self.wav_paths: + meta = wav_to_sigmf(wav_path=wav_path) + _validate_ncd(self, meta, wav_path) + + # test file read + _ = meta.read_samples(count=10) + + def test_autodetect_ncd(self): + """test automatic NCD conversion""" + for wav_path in self.wav_paths: + meta = sigmf.fromfile(wav_path) + _validate_ncd(self, meta, wav_path)