From 0362d1b0756cba02a21ece4fba9a7d100f29e7b9 Mon Sep 17 00:00:00 2001 From: jagdeep sidhu Date: Thu, 23 Apr 2026 16:28:23 -0700 Subject: [PATCH] fix(mnCount): keep scheduler alive on repo throws, reject oversized seed ts (Codex PR16 round 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two P2 issues Codex raised on b79c18e, both real: 1. services/mnCountLogger.js — repo.getLatestDate() sat outside runAndReschedule()'s try/catch, and the setTimeout callback fired runAndReschedule() without attaching .catch. A transient SQLite read failure there would reject the returned promise, escape as an unhandled rejection, and silently kill the logger's ability to reschedule future writes until the next process restart — exactly the failure mode the "keep sampling every UTC day" design is meant to prevent. Fix is defense-in-depth: * Wrap the entire runAndReschedule body (pre-flight repo read + sample path) in one try/catch. Any failure is treated as a tick failure: log + exponential backoff clamped to this UTC day. The scheduler loop stays alive. * Extract scheduleBackoffRetry() so the happy-path catch and the last-resort scheduler.catch share the same retry math (can't drift out of sync on future edits). * Attach a belt-and-braces .catch on the setTimeout callback that re-arms even if runAndReschedule ever starts rejecting again after a future refactor. * msUntilNextMidnightUtc is itself wrapped so a hypothetical throw in the time-math path can't kill the scheduler either. 2. lib/mnCountSeed.js — parseSeedCsv validated that ts is finite and positive but passed it straight into new Date(ts).toISOString(). ECMAScript only defines Date as valid within ±8.64e15 ms of the epoch; a finite-but-oversized ts would throw RangeError. Because the seed runs inside a single db.transaction(), a single bad row anywhere in the ~2900-row history would abort the transaction and prevent any history from loading — worse than partial-seed since even isEmpty() would stay true and every subsequent boot would retry the same broken file. Fix: introduce JS_DATE_MAX_ABS_MS (the spec's 8.64e15) and add ts > JS_DATE_MAX_ABS_MS to the existing skip-and-warn path. A malformed row is now a one-line warn in the log, not a global loss of history. Tests added (3): * scheduler survives a synchronous throw in repo.getLatestDate(): wires a brittle repo whose getLatestDate throws once, asserts the logger logs mncount_tick_failed, schedules a same-day retry, and on retry actually writes the row. Proves the scheduler is still alive after the throw. * parseSeedCsv skips oversized ts: feeds 9e15 as ts, asserts no throw, asserts the good rows around it still parse, asserts the skip was logged at warn. * seedMasternodeCount rolls forward past an oversized ts row: end-to-end check that the txn does not roll back over a single bad row. Full suite: 910/910 (was 907/907). Made-with: Cursor --- lib/mnCountSeed.js | 11 +++++ lib/mnCountSeed.test.js | 47 ++++++++++++++++++ services/mnCountLogger.js | 89 +++++++++++++++++++++++++--------- services/mnCountLogger.test.js | 68 ++++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 24 deletions(-) diff --git a/lib/mnCountSeed.js b/lib/mnCountSeed.js index 2b064ac..9a4bbed 100644 --- a/lib/mnCountSeed.js +++ b/lib/mnCountSeed.js @@ -35,6 +35,16 @@ const DEFAULT_SEED_PATH = path.join( 'masternode-count.csv' ); +// Largest absolute millisecond value that the ECMAScript Date +// object represents without producing an Invalid Date. Anything +// outside this range makes `new Date(ms).toISOString()` throw +// `RangeError: Invalid time value`, which — absent the bounds +// check below — would abort the whole seed transaction and block +// the entire history from loading over a single bad row +// (Codex PR16 P2 round 2). Spec reference: ECMA-262 +// "Time Values and Time Range" (8.64e15 ms from the epoch). +const JS_DATE_MAX_ABS_MS = 8640000000000000; + function utcDateString(ms) { return new Date(ms).toISOString().slice(0, 10); } @@ -69,6 +79,7 @@ function* parseSeedCsv(text, log) { if ( !Number.isFinite(ts) || ts <= 0 || + ts > JS_DATE_MAX_ABS_MS || !Number.isInteger(total) || total < 0 ) { diff --git a/lib/mnCountSeed.test.js b/lib/mnCountSeed.test.js index b0d5c33..aa88ec2 100644 --- a/lib/mnCountSeed.test.js +++ b/lib/mnCountSeed.test.js @@ -52,6 +52,53 @@ describe('parseSeedCsv', () => { const rows = Array.from(parseSeedCsv(csv)); expect(rows).toHaveLength(2); }); + + // Codex PR16 P2 round 2: an out-of-Date-range ts used to reach + // `new Date(ts).toISOString()` and throw RangeError, which aborted + // the whole seed transaction and stopped the entire history from + // loading. parseSeedCsv must skip such rows with a warn log like + // any other malformed value. + test('skips timestamps larger than the JS Date safe range (no RangeError)', () => { + const calls = []; + const log = (level, event, meta) => calls.push({ level, event, meta }); + const csv = [ + 'Timestamp;Amount', + '1526425200000;820', + // 9e15 > 8.64e15 (JS Date max), finite, positive. Pre-fix this + // would explode `utcDateString`. + '9000000000000000;999', + // Sanity: a row BELOW the cap still loads. + '1526511600000;823', + '', + ].join('\n'); + let rows; + expect(() => { + rows = Array.from(parseSeedCsv(csv, log)); + }).not.toThrow(); + expect(rows.map((r) => r.total)).toEqual([820, 823]); + const skips = calls.filter((c) => c.event === 'mncount_seed_skip'); + expect(skips).toHaveLength(1); + expect(skips[0].meta.reason).toBe('values'); + }); + + test('a single oversized ts does not roll back the whole seed', () => { + // Even if the generator had been called from inside a transaction, + // the skip path must prevent a RangeError from aborting the txn. + const csv = [ + 'Timestamp;Amount', + '1526425200000;820', + '9000000000000000;999', + '1526511600000;823', + '', + ].join('\n'); + const db = require('./db').openDatabase(':memory:'); + const repo = createMasternodeCountRepo(db); + const result = seedMasternodeCount({ db, repo, readFile: () => csv }); + expect(result.seeded).toBe(true); + expect(result.inserted).toBe(2); + expect(repo.getAll().map((r) => r.users)).toEqual([820, 823]); + db.close(); + }); }); describe('seedMasternodeCount', () => { diff --git a/services/mnCountLogger.js b/services/mnCountLogger.js index d996a81..748d2a9 100644 --- a/services/mnCountLogger.js +++ b/services/mnCountLogger.js @@ -110,6 +110,29 @@ function createMnCountLogger({ return { date, total, inserted: result.inserted }; } + // Post-error reschedule helper used by every catch path below. + // Keeps the schedule() argument logic (backoff, midnight clamp, + // fallback when msUntilNextMidnightUtc itself rejects) in one + // place so a rescue path on the outer try/catch cannot drift out + // of sync with the main one. + function scheduleBackoffRetry() { + if (stopped) return; + currentRetryMs = Math.min(currentRetryMs * 2, MAX_RETRY_MS); + let untilMidnight; + try { + untilMidnight = msUntilNextMidnightUtc(now()); + } catch (innerErr) { + // msUntilNextMidnightUtc wraps Math + Date only; a throw here + // would be an invariant violation, but we handle it rather + // than letting the scheduler die. + log('error', 'mncount_schedule_invariant', { + err: innerErr && innerErr.message, + }); + untilMidnight = MAX_RETRY_MS; + } + schedule(Math.min(currentRetryMs, untilMidnight)); + } + function schedule(ms) { if (stopped) return; if (timer) { @@ -118,9 +141,24 @@ function createMnCountLogger({ } timer = setTimeoutImpl(() => { timer = null; - // Fire-and-forget: runAndReschedule owns its own rescheduling - // and error handling. - runAndReschedule(); + // Belt-and-braces: runAndReschedule is documented never to + // reject, but if it ever did (e.g. a future refactor drops + // the outer try/catch below) an unhandled rejection here + // would silently kill the logger until the next process + // restart. Attach a last-resort catch that logs and arms a + // short retry so the scheduler self-heals (Codex PR16 P2 + // round 2). + runAndReschedule().catch((err) => { + lastError = err && err.message; + log('error', 'mncount_scheduler_invariant', { + err: err && err.message, + }); + try { + scheduleBackoffRetry(); + } catch { + /* final fallback: give up silently rather than crash */ + } + }); }, ms); if (timer && typeof timer.unref === 'function') timer.unref(); } @@ -128,34 +166,37 @@ function createMnCountLogger({ async function runAndReschedule() { if (stopped) return; - // Fast-path: if today's row is already there (boot after a - // successful earlier tick, or a spurious re-fire on the same - // UTC day) skip the RPC entirely and arm for next midnight. - // The INSERT OR IGNORE in the repo would collapse a duplicate - // write anyway, but avoiding the RPC call keeps Core's load - // bounded and stops a same-day re-sample from shadowing the - // 00:00 snapshot with an afternoon value at the log layer. - const today = utcDateString(now()); - if (repo.getLatestDate() === today) { - if (stopped) return; - schedule(msUntilNextMidnightUtc(now())); - return; - } - + // Single outer try covers BOTH the pre-flight repo read and + // the sample path. The pre-flight `repo.getLatestDate()` call + // used to sit outside the try/catch; a transient SQLite read + // failure there would reject the returned promise and — since + // the scheduler callback doesn't await this function — silently + // kill the logger's ability to reschedule future writes + // (Codex PR16 P2 round 2). Treating any failure here as a + // tick failure (log + backoff + clamp to this UTC day) keeps + // the scheduler alive. try { + // Fast-path: if today's row is already there (boot after a + // successful earlier tick, or a spurious re-fire on the same + // UTC day) skip the RPC entirely and arm for next midnight. + // The INSERT OR IGNORE in the repo would collapse a duplicate + // write anyway, but avoiding the RPC call keeps Core's load + // bounded and stops a same-day re-sample from shadowing the + // 00:00 snapshot with an afternoon value at the log layer. + const today = utcDateString(now()); + if (repo.getLatestDate() === today) { + if (stopped) return; + schedule(msUntilNextMidnightUtc(now())); + return; + } + await sampleAndWrite('tick'); if (stopped) return; schedule(msUntilNextMidnightUtc(now())); } catch (err) { lastError = err && err.message; log('error', 'mncount_tick_failed', { err: err && err.message }); - currentRetryMs = Math.min(currentRetryMs * 2, MAX_RETRY_MS); - // Never overshoot the next day's midnight: we want at least - // one scheduled attempt BEFORE the next UTC-day boundary so a - // single bad sample cannot lose its day entirely. - const untilMidnight = msUntilNextMidnightUtc(now()); - if (stopped) return; - schedule(Math.min(currentRetryMs, untilMidnight)); + scheduleBackoffRetry(); } } diff --git a/services/mnCountLogger.test.js b/services/mnCountLogger.test.js index 2de41a9..1625230 100644 --- a/services/mnCountLogger.test.js +++ b/services/mnCountLogger.test.js @@ -431,6 +431,74 @@ describe('createMnCountLogger', () => { expect(logger.getDiagnostics().stopped).toBe(true); }); + // Codex PR16 P2 round 2: repo.getLatestDate() used to sit + // outside runAndReschedule()'s try/catch. A transient SQLite + // read failure there would reject the returned promise, the + // setTimeout callback didn't attach a .catch, and the scheduler + // silently died — daily /mnCount updates would halt until + // process restart. This test wires a repo whose getLatestDate() + // throws once on the first tick, then recovers, and asserts the + // logger stays alive and rearms. + test('scheduler survives a synchronous throw in repo.getLatestDate()', async () => { + const clockLocal = { nowMs: msAt('2024-03-15T06:00:00Z') }; + const schedLocal = makeManualScheduler(clockLocal); + const dbLocal = openDatabase(':memory:'); + const realRepo = createMasternodeCountRepo(dbLocal); + + let throwOnce = true; + const brittleRepo = { + ...realRepo, + getLatestDate: () => { + if (throwOnce) { + throwOnce = false; + throw new Error('synthetic sqlite read transient'); + } + return realRepo.getLatestDate(); + }, + }; + + const logsLocal = []; + const fetchTotalLocal = jest.fn(async () => 4242); + const loggerLocal = createMnCountLogger({ + repo: brittleRepo, + fetchTotal: fetchTotalLocal, + now: () => clockLocal.nowMs, + log: (level, event, meta) => logsLocal.push({ level, event, meta }), + setTimeoutImpl: schedLocal.setTimeoutImpl, + clearTimeoutImpl: schedLocal.clearTimeoutImpl, + }); + + try { + loggerLocal.start(); + for (let i = 0; i < 20; i++) await new Promise((r) => setImmediate(r)); + + // First pass hit the throw. The logger MUST have logged the + // failure and scheduled a retry inside today's UTC window, + // not gone silent. + expect( + logsLocal.some((l) => l.event === 'mncount_tick_failed') + ).toBe(true); + const firstPending = schedLocal.pending(); + expect(firstPending).toHaveLength(1); + expect(firstPending[0].delay).toBeGreaterThan(0); + expect(firstPending[0].delay).toBeLessThan( + msUntilNextMidnightUtc(clockLocal.nowMs) + 1 + ); + + // Second pass: getLatestDate() now returns null (empty repo), + // the sample path writes today's row, and the logger arms for + // next midnight — proof the scheduler is still alive. + await schedLocal.fireNext(); + expect(realRepo.getAll()).toEqual([ + { date: '2024-03-15', users: 4242 }, + ]); + expect(schedLocal.pending()).toHaveLength(1); + } finally { + loggerLocal.stop(); + dbLocal.close(); + } + }); + test('MAX_RETRY_MS cap keeps the backoff sane on chronic failure', async () => { // Verify the exposed constants so they cannot regress silently. expect(BASE_RETRY_MS).toBe(60 * 1000);