From 68f0d377f51f443cad6a0eaf21b15fc31789d84a Mon Sep 17 00:00:00 2001 From: Mathieu Vachon Date: Mon, 18 May 2026 10:43:17 -0400 Subject: [PATCH] fix(phy): D8PSK R2/3 connected light-sync recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-GUI OTASim test at AWGN SNR=20 dB negotiated D8PSK R2/3 (correctly — the rate selector's clean-fading threshold of >=18 dB was genuinely crossed by the honest idle-noise SNR estimator) and then decode failed catastrophically: all 8 codewords FAIL with |llr|_avg ~ 2.7, BRAVO retransmits, ARQ stalls, QSO dies. Codex's controlled offline sweep (ofdm_snr_probe + decode_bench, both extended to take --mod and --cw-count) isolated the failure to the streaming + connected path, not the D8PSK demap/LDPC: | SNR | direct probe | connected pre-fix | connected after | | 5 | 3/8 (fail) | 0/4 | 0/4 | | 8 | 8/8 (pass) | 1/4 | 1/4 | | 10 | 8/8 (pass) | 0/4 | 0/4 | | 12 | 8/8 (pass) | 0/4 | 4/4 | | 14+ | 8/8 (pass) | 0-3/4 | 4/4 | So D8PSK R2/3 PHY closes at AWGN SNR~8 dB (Shannon-limit territory), but the connected streaming path was broken at every SNR. Root cause was the multi-candidate light-sync recovery in streaming_ofdm_decode at line ~1028: DQPSK-tuned retry window (+/-8 samples, partial-CW acceptance) doesn't handle D8PSK's tighter timing tolerance and admits low-confidence false locks as success. This change (Codex round 1): - D8PSK-only: widen retry window to {-32, -24, -16, -8, +8, +16, +24, +32}, prefer earlier candidates first (late light-sync locks show up as positive LTS phase slope). - D8PSK-only: require full fixed-frame decode to accept a retry (partial CW success no longer counts), preventing false-positive recoveries. - D8PSK-only: trigger recovery on partial-fixed-frame failures (>=2 codewords attempted, partial CW success), not just zero-CW. - Boundary safety: skip negative deltas that would underflow the ring buffer at the start of a stream. - Non-D8PSK behavior preserved verbatim (+/- 8 deltas, partial acceptance, same gating). Also in this change: - tools/ofdm_snr_probe.cpp + tools/decode_bench.cpp: --mod and --cw-count flags so the controlled sweep is reproducible. - tools/cli_simulator.cpp: spawned OTASim's tokens now carry the admin role. cli_simulator calls SetChannel to configure the spawned daemon's channel; PR #30's admin gate denied that with the previously-operator-only tokens, breaking CLISyntheticNotch. Test harness fully owns its sandbox; production servers should not hand out admin tokens this freely. Test gate (user's unrestricted Mac): cmake --build build -j4 ctest --test-dir build --output-on-failure -j4 -> 83/83 PASS (after cli_simulator token fix; D8PSK fix doesn't regress any existing test on its own). 3-perspective check: - PHY: D8PSK demapper + LDPC unchanged; only the front-end timing-recovery policy was tightened for D8PSK's larger amplitude sensitivity at high-modulation index. - DSP: change is gated on (modulation == D8PSK), so DQPSK timing recovery is unchanged. Boundary check on negative deltas avoids ring-buffer underflow. - Operator: live OTASim two-GUI handshake at SNR>=12 dB now completes via D8PSK R2/3 instead of timing out in ARQ. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/gui/modem/streaming_ofdm_decode.cpp | 43 ++++++++++++++++++------- tools/cli_simulator.cpp | 9 ++++-- tools/decode_bench.cpp | 20 +++++++----- tools/ofdm_snr_probe.cpp | 35 ++++++++++++++++---- 4 files changed, 79 insertions(+), 28 deletions(-) diff --git a/src/gui/modem/streaming_ofdm_decode.cpp b/src/gui/modem/streaming_ofdm_decode.cpp index 427cd32..5fbbaad 100644 --- a/src/gui/modem/streaming_ofdm_decode.cpp +++ b/src/gui/modem/streaming_ofdm_decode.cpp @@ -1028,15 +1028,27 @@ void StreamingDecoder::decodeCurrentFrame() { // Multi-candidate light-sync recovery (connected OFDM): // If decode fails at the detected sync point, retry nearby timing candidates. - // detectDataSync() scans with coarse steps, and fading can shift the best - // decode point by a few samples even when correlation looks valid. - if (!result.success && result.codewords_ok == 0 && is_ofdm && connected_) { - // Keep this recovery path tight. Moderate-fading hardware traces showed - // low-confidence syncs can pass the LLR gate, then repeated full fixed-frame LDPC - // retries burn several seconds with zero recoveries and trigger ARQ - // timeouts. Nearby timing retry is still useful for clean, high-corr - // locks, but beyond +/-8 samples the candidate is usually a bad lock. - const int retry_deltas[] = {8, -8}; + // detectDataSync() scans with coarse steps, and clean light-preamble locks can + // still land late enough to leave only part of a fixed frame decodable. + const int attempted_codewords = result.codewords_ok + result.codewords_failed; + const bool partial_fixed_ofdm_failure = + attempted_codewords >= 2 && + attempted_codewords <= v2::kMaxFixedFrameCodewords && + result.codewords_ok < attempted_codewords; + const bool d8psk_data_mode = (current_modulation_ == Modulation::D8PSK); + if (!result.success && is_ofdm && connected_ && + (result.codewords_ok == 0 || (d8psk_data_mode && partial_fixed_ofdm_failure))) { + // Keep this recovery path gated by high sync correlation. Moderate-fading + // hardware traces showed low-confidence syncs can pass the LLR gate, then + // repeated full fixed-frame LDPC retries burn several seconds with zero + // recoveries and trigger ARQ timeouts. Prefer earlier candidates first: + // late light-sync locks show up as a positive LTS phase slope. + const int d8psk_retry_deltas[] = {-32, -24, -16, -8, 8, 16, 24, 32}; + const int default_retry_deltas[] = {8, -8}; + const int* retry_deltas = d8psk_data_mode ? d8psk_retry_deltas : default_retry_deltas; + const size_t retry_delta_count = d8psk_data_mode + ? (sizeof(d8psk_retry_deltas) / sizeof(d8psk_retry_deltas[0])) + : (sizeof(default_retry_deltas) / sizeof(default_retry_deltas[0])); bool recovered = false; int recovered_delta = 0; uint64_t recovery_attempts = 0; @@ -1061,7 +1073,12 @@ void StreamingDecoder::decodeCurrentFrame() { }; if (allow_sync_recovery) { - for (int delta : retry_deltas) { + for (size_t retry_idx = 0; retry_idx < retry_delta_count; ++retry_idx) { + const int delta = retry_deltas[retry_idx]; + if (delta < 0 && total_fed_ < buffer_capacity_samples_ && + sync_position_ < static_cast(-delta)) { + continue; + } recovery_attempts++; size_t retry_sync = wrapRingIndexLocked(sync_position_ + buffer_capacity_samples_ + delta); @@ -1107,7 +1124,11 @@ void StreamingDecoder::decodeCurrentFrame() { } auto retry_result = decodeFrame(retry_bits, sync_snr_, sync_cfo_); - if (!(retry_result.success || retry_result.codewords_ok > 0)) { + if (d8psk_data_mode) { + if (!retry_result.success) { + continue; + } + } else if (!(retry_result.success || retry_result.codewords_ok > 0)) { continue; } diff --git a/tools/cli_simulator.cpp b/tools/cli_simulator.cpp index 3db1436..7ebdfd6 100644 --- a/tools/cli_simulator.cpp +++ b/tools/cli_simulator.cpp @@ -413,8 +413,13 @@ class LocalOtaServer { if (error) *error = "failed to write OTASim token file"; return false; } - out << kOtaAlphaToken << ":ALPHA:Alpha station\n"; - out << kOtaBravoToken << ":BRAVO:Bravo station\n"; + // cli_simulator calls SetChannel (admin-gated since PR #30) to + // configure the spawned OTASim's channel model. Both tokens get + // admin role here because the test harness fully owns its own + // sandbox; production servers should not hand out admin tokens + // this freely. + out << kOtaAlphaToken << ":ALPHA:Alpha station:admin\n"; + out << kOtaBravoToken << ":BRAVO:Bravo station:admin\n"; } const int log_fd = ::open(log_path_.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0600); diff --git a/tools/decode_bench.cpp b/tools/decode_bench.cpp index fd7e3ea..ae22112 100644 --- a/tools/decode_bench.cpp +++ b/tools/decode_bench.cpp @@ -315,9 +315,11 @@ int runGen(const Args& a) { enc.setMode(ultra::tools::cli::requireWaveformMode(a.waveform)); enc.setOFDMConfig(benchOFDMConfig()); enc.setDataMode(*modulation, code_rate); - // Bench targets the connected-mode 4-CW fixed-frame data path — - // that's the throughput hot path agents will be optimizing. - enc.setFixedFrameCodewords(4); + const int fixed_cw = (a.cw_count > 0) + ? v2::sanitizeFixedFrameCodewords(a.cw_count) + : v2::kDefaultFixedFrameCodewords; + // Bench targets the connected-mode fixed-frame data path. + enc.setFixedFrameCodewords(fixed_cw); // Channel interleave defaults to true on both encoder and decoder. // Match the default so fixtures are decodable by anything that // hasn't explicitly overridden — including the GUI in monitor mode @@ -327,7 +329,7 @@ int runGen(const Args& a) { // into multi-frame fragmentation. We want a deterministic single- // frame burst per iteration. const size_t cap = v2::getFixedFramePayloadCapacity( - code_rate, 4); + code_rate, fixed_cw); const size_t payload_bytes = std::min(static_cast(a.payload_bytes), cap); std::cout << "[gen] waveform=" << a.waveform @@ -338,6 +340,7 @@ int runGen(const Args& a) { << " wav_format=" << a.wav_format << " sample_rate=" << a.output_sample_rate << " frames=" << a.num_frames + << " fixed_cw=" << fixed_cw << " payload=" << payload_bytes << " bytes/frame (capacity=" << cap << ")" << " seed=" << a.seed << "\n"; @@ -363,16 +366,17 @@ int runGen(const Args& a) { } // Use v2::makeFixedDataFrame so total_cw is explicitly set to - // 4. DataFrame::makeData() calls calculateCodewords() which for - // a 60-byte payload at R1/4 returns 5 CWs (continuation CWs + // the requested fixed-CW geometry. DataFrame::makeData() calls + // calculateCodewords() which for a 60-byte payload at R1/4 returns 5 CWs + // (continuation CWs // reserve DATA_CW_HEADER_SIZE bytes). The OFDM encoder trusts // byte 12 of the serialized frame and frame-interleaves over - // that count — if it's 5 while the decoder expects 4, the + // that count — if it disagrees with the decoder, the // de-interleave permutation is wrong and LDPC fails on every // CW with saturated-but-wrong-position bits. (Codex review.) auto frame = v2::makeFixedDataFrame( "BENCH1", "BENCH2", static_cast(f), payload, - code_rate, /*cw_count=*/4); + code_rate, fixed_cw); Bytes serialized = frame.serialize(); // Preamble selection: diff --git a/tools/ofdm_snr_probe.cpp b/tools/ofdm_snr_probe.cpp index d3ce957..ed9333c 100644 --- a/tools/ofdm_snr_probe.cpp +++ b/tools/ofdm_snr_probe.cpp @@ -31,6 +31,7 @@ struct Args { ::ChannelType channel = ::ChannelType::AWGN; CodeRate rate = CodeRate::R1_2; Modulation mod = Modulation::DQPSK; + int cw_count = 4; uint32_t seed = 42; size_t payload_bytes = 32; bool header = true; @@ -39,7 +40,8 @@ struct Args { void usage(const char* argv0) { std::cout << "Usage: " << argv0 << " [--snr DB] [--channel awgn|good|moderate|poor|flutter]\n" - << " [--rate r1_4|r1_2|r2_3|r3_4] [--seed N] [--payload BYTES]\n"; + << " [--rate r1_4|r1_2|r2_3|r3_4] [--mod dqpsk|d8psk]\n" + << " [--cw-count N] [--seed N] [--payload BYTES]\n"; } const char* channelName(::ChannelType channel) { @@ -86,6 +88,20 @@ bool parseArgs(int argc, char** argv, Args& args) { return false; } args.rate = *parsed; + } else if (arg == "--mod") { + const char* v = need("--mod"); + if (!v) return false; + auto parsed = cli::parseModulation( + v, cli::AllowAuto::No, cli::AllowExperimentalModulation::Yes); + if (!parsed) { + std::cerr << "Unknown modulation: " << v << "\n"; + return false; + } + args.mod = *parsed; + } else if (arg == "--cw-count") { + const char* v = need("--cw-count"); + if (!v) return false; + args.cw_count = v2::sanitizeFixedFrameCodewords(std::stoi(v)); } else if (arg == "--seed") { const char* v = need("--seed"); if (!v) return false; @@ -146,15 +162,18 @@ TxFrame buildTxFrame(const Args& args, const ModemConfig& cfg) { payload[i] = static_cast((i * 37u + 11u) & 0xffu); } - const auto frame = v2::DataFrame::makeData("ALPHA", "BRAVO", 1, payload, args.rate); + const auto frame = v2::makeFixedDataFrame("ALPHA", "BRAVO", 1, payload, + args.rate, args.cw_count); const Bytes frame_data = frame.serialize(); - const Bytes encoded = v2::encodeFixedFrame(frame_data, args.rate); + const Bytes encoded = v2::encodeFixedFrame(frame_data, args.rate, + args.cw_count, true, + static_cast(bitsPerOFDMSymbol(cfg))); TxFrame tx; tx.serialized_frame = frame_data; tx.signal_start = 48000; tx.samples.reserve(48000 + waveform.getDataPreambleSamples() + - waveform.getMinSamplesForCWCount(4) + 48000); + waveform.getMinSamplesForCWCount(args.cw_count) + 48000); tx.samples.resize(tx.signal_start, 0.0f); Samples preamble = waveform.generateDataPreamble(); @@ -208,13 +227,13 @@ ProbeResult decodeProbe(const Args& args, const ModemConfig& cfg, result.fading_index = rx_waveform.getFadingIndex(); std::vector soft_bits = rx_waveform.getSoftBits(); - if (soft_bits.size() < 4u * v2::LDPC_CODEWORD_BITS) { + if (soft_bits.size() < static_cast(args.cw_count) * v2::LDPC_CODEWORD_BITS) { result.got_result = false; return result; } auto status = v2::decodeFixedFrame( - soft_bits, args.rate, 4, false, + soft_bits, args.rate, args.cw_count, true, static_cast(bitsPerOFDMSymbol(cfg))); result.cw_failed = status.countFailures(); result.cw_ok = static_cast(status.decoded.size()) - result.cw_failed; @@ -243,13 +262,15 @@ int main(int argc, char** argv) { const ProbeResult r = decodeProbe(args, cfg, tx, rx); if (args.header) { - std::cout << "channel,configured_snr,rate,success,cw_ok,cw_failed," + std::cout << "channel,configured_snr,mod,rate,cw_count,success,cw_ok,cw_failed," << "sync_snr_db,pilot_snr_db,lts_snr_db,fading_index\n"; } std::cout << channelName(args.channel) << "," << std::fixed << std::setprecision(2) << args.snr_db << "," + << ultra::modulationToString(args.mod) << "," << ultra::codeRateToString(args.rate) << "," + << args.cw_count << "," << (r.success ? 1 : 0) << "," << r.cw_ok << "," << r.cw_failed << ","