From c5870ac284e15750b55af13a5fcc89a9beb89522 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:28:42 -0600 Subject: [PATCH 1/2] fix(snapshot): close TOCTOU race on save/restore/delete Use temp-file + atomic rename for `snapshotSave --force` and `snapshotRestore` so two concurrent saves or a restore racing with a reader can't observe a missing or partially-written database file. `snapshotDelete` now unlinks directly and maps ENOENT to the existing "not found" error instead of a separate existsSync check. Adds two regression tests: temp-file cleanup on success, and two concurrent --force saves producing a valid SQLite destination. Closes #995 Impact: 3 functions changed, 1 affected --- src/features/snapshot.ts | 69 +++++++++++++++++++++++++++++-------- tests/unit/snapshot.test.ts | 33 ++++++++++++++++++ 2 files changed, 88 insertions(+), 14 deletions(-) diff --git a/src/features/snapshot.ts b/src/features/snapshot.ts index c4dbd469..f1473e50 100644 --- a/src/features/snapshot.ts +++ b/src/features/snapshot.ts @@ -37,24 +37,42 @@ export function snapshotSave( const dir = snapshotsDir(dbPath); const dest = path.join(dir, `${name}.db`); - if (fs.existsSync(dest)) { - if (!options.force) { - throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); - } - fs.unlinkSync(dest); - debug(`Deleted existing snapshot: ${dest}`); + if (!options.force && fs.existsSync(dest)) { + throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); } fs.mkdirSync(dir, { recursive: true }); + // VACUUM INTO a unique temp path on the same filesystem, then atomically + // rename over the destination. This closes the TOCTOU window between + // existsSync/unlinkSync/VACUUM INTO where two concurrent saves could + // observe a missing file or interleave their VACUUM writes. + const tmp = path.join(dir, `.${name}.db.tmp-${process.pid}-${Date.now()}`); + try { + fs.unlinkSync(tmp); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }); try { - db.exec(`VACUUM INTO '${dest.replace(/'/g, "''")}'`); + db.exec(`VACUUM INTO '${tmp.replace(/'/g, "''")}'`); } finally { db.close(); } + try { + fs.renameSync(tmp, dest); + } catch (err) { + try { + fs.unlinkSync(tmp); + } catch { + /* cleanup best-effort */ + } + throw err; + } + const stat = fs.statSync(dest); debug(`Snapshot saved: ${dest} (${stat.size} bytes)`); return { name, path: dest, size: stat.size }; @@ -74,16 +92,36 @@ export function snapshotRestore(name: string, options: SnapshotDbPathOptions = { throw new DbError(`Snapshot "${name}" not found at ${src}`, { file: src }); } - // Remove WAL/SHM sidecar files for a clean restore + // Remove WAL/SHM sidecars first so the old journal can't be replayed over + // the restored DB. unlink then check ENOENT — avoids the existsSync/unlinkSync + // race another process could wedge into. for (const suffix of ['-wal', '-shm']) { const sidecar = dbPath + suffix; - if (fs.existsSync(sidecar)) { + try { fs.unlinkSync(sidecar); debug(`Removed sidecar: ${sidecar}`); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; } } - fs.copyFileSync(src, dbPath); + // Copy to a temp path next to the DB, then rename atomically. Readers that + // open dbPath during restore see either the pre-restore or post-restore + // file, never a partially-written one. + fs.mkdirSync(path.dirname(dbPath), { recursive: true }); + const tmp = `${dbPath}.restore-tmp-${process.pid}-${Date.now()}`; + try { + fs.copyFileSync(src, tmp); + fs.renameSync(tmp, dbPath); + } catch (err) { + try { + fs.unlinkSync(tmp); + } catch { + /* cleanup best-effort */ + } + throw err; + } + debug(`Restored snapshot "${name}" → ${dbPath}`); } @@ -122,10 +160,13 @@ export function snapshotDelete(name: string, options: SnapshotDbPathOptions = {} const dir = snapshotsDir(dbPath); const target = path.join(dir, `${name}.db`); - if (!fs.existsSync(target)) { - throw new DbError(`Snapshot "${name}" not found at ${target}`, { file: target }); + try { + fs.unlinkSync(target); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + throw new DbError(`Snapshot "${name}" not found at ${target}`, { file: target }); + } + throw err; } - - fs.unlinkSync(target); debug(`Deleted snapshot: ${target}`); } diff --git a/tests/unit/snapshot.test.ts b/tests/unit/snapshot.test.ts index ec149252..cc02812f 100644 --- a/tests/unit/snapshot.test.ts +++ b/tests/unit/snapshot.test.ts @@ -132,6 +132,39 @@ describe('snapshotSave', () => { it('rejects invalid name', () => { expect(() => snapshotSave('bad name', { dbPath })).toThrow(/Invalid snapshot name/); }); + + it('leaves no temp files in snapshots dir after success', () => { + snapshotSave('clean', { dbPath }); + const entries = fs.readdirSync(snapshotsDir(dbPath)); + expect(entries).toContain('clean.db'); + // Temp files are named `..db.tmp--` — none should remain. + expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); + }); + + it('does not corrupt output when two --force saves race on the same name', async () => { + // Prime the target so both calls take the --force overwrite path. + snapshotSave('race', { dbPath }); + + // Issue both concurrently. Pre-fix, the existsSync/unlinkSync/VACUUM INTO + // sequence could interleave and produce a corrupt DB; post-fix, rename + // is atomic so the winner's file is intact and the loser either succeeds + // (overwriting) or fails cleanly. + const results = await Promise.allSettled([ + Promise.resolve().then(() => snapshotSave('race', { dbPath, force: true })), + Promise.resolve().then(() => snapshotSave('race', { dbPath, force: true })), + ]); + + // At least one save must have succeeded. + const succeeded = results.filter((r) => r.status === 'fulfilled'); + expect(succeeded.length).toBeGreaterThanOrEqual(1); + + // The final file must be a valid SQLite DB with the expected contents. + const finalPath = path.join(snapshotsDir(dbPath), 'race.db'); + const db = new Database(finalPath, { readonly: true }); + const rows = db.prepare('SELECT name FROM nodes').all(); + db.close(); + expect(rows).toEqual([{ name: 'hello' }]); + }); }); // ─── snapshotRestore ──────────────────────────────────────────────────── From a5cb048c3b6e842c769b59873d5cb88f9f6797db Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 22 Apr 2026 01:03:15 -0600 Subject: [PATCH 2/2] fix(snapshot): close non-force TOCTOU and fix race test (#1003) Address Greptile review feedback: 1. Non-force path still had a TOCTOU window: two concurrent callers could both pass existsSync(dest) and overwrite each other silently. Replace the final rename with linkSync(tmp, dest), which fails atomically with EEXIST if another writer won the race, then unlinkSync(tmp). The fail-fast existsSync above is retained as a cheap guard; linkSync is the authoritative check. 2. The original race regression test used Promise.resolve().then(...) on synchronous better-sqlite3 calls, so both microtasks executed sequentially and the test could not reproduce the TOCTOU it claimed to guard. Replace it with a worker_threads-based harness that spawns two workers racing on snapshotSave, plus a new non-force race test that verifies exactly one caller wins and the other sees a clear "already exists" error. 3. process.pid is shared across worker_threads in the same process, so add randomBytes(6) to the temp filename to keep concurrent callers in any thread from colliding on the same tmp path. 4. Log best-effort cleanup failures at debug so repeated tmp leaks surface in troubleshooting without spamming normal runs (per Claude review's minor suggestion). Impact: 2 functions changed, 1 affected --- src/features/snapshot.ts | 54 ++++++++++++++--- tests/unit/snapshot-race-worker.mjs | 21 +++++++ tests/unit/snapshot.test.ts | 90 ++++++++++++++++++++++++++--- 3 files changed, 149 insertions(+), 16 deletions(-) create mode 100644 tests/unit/snapshot-race-worker.mjs diff --git a/src/features/snapshot.ts b/src/features/snapshot.ts index f1473e50..f9e2d7b5 100644 --- a/src/features/snapshot.ts +++ b/src/features/snapshot.ts @@ -1,3 +1,4 @@ +import { randomBytes } from 'node:crypto'; import fs from 'node:fs'; import path from 'node:path'; import { getDatabase } from '../db/better-sqlite3.js'; @@ -37,6 +38,8 @@ export function snapshotSave( const dir = snapshotsDir(dbPath); const dest = path.join(dir, `${name}.db`); + // Cheap fail-fast for the common non-force case; the authoritative check + // below uses an atomic linkSync that closes the TOCTOU window. if (!options.force && fs.existsSync(dest)) { throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); } @@ -44,10 +47,17 @@ export function snapshotSave( fs.mkdirSync(dir, { recursive: true }); // VACUUM INTO a unique temp path on the same filesystem, then atomically - // rename over the destination. This closes the TOCTOU window between + // place it at the destination. This closes the TOCTOU window between // existsSync/unlinkSync/VACUUM INTO where two concurrent saves could // observe a missing file or interleave their VACUUM writes. - const tmp = path.join(dir, `.${name}.db.tmp-${process.pid}-${Date.now()}`); + // + // Unique temp name: process.pid is shared across worker_threads in the + // same process, so we add random bytes to keep concurrent callers in any + // thread from colliding on the temp path. + const tmp = path.join( + dir, + `.${name}.db.tmp-${process.pid}-${Date.now()}-${randomBytes(6).toString('hex')}`, + ); try { fs.unlinkSync(tmp); } catch (err) { @@ -63,12 +73,38 @@ export function snapshotSave( } try { - fs.renameSync(tmp, dest); + if (options.force) { + // renameSync overwrites atomically — the correct semantics for --force. + fs.renameSync(tmp, dest); + } else { + // Non-force path: linkSync fails atomically with EEXIST if dest exists, + // closing the TOCTOU window between existsSync above and the final + // placement. We then unlink the temp file; on POSIX and NTFS, link + // creates a second reference so tmp can safely be removed. + try { + fs.linkSync(tmp, dest); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'EEXIST') { + throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); + } + throw err; + } + try { + fs.unlinkSync(tmp); + } catch (cleanupErr) { + // Best-effort — dest is already in place, so a leftover tmp file is + // harmless. Log at debug so repeated failures surface during + // troubleshooting without noising up normal operation. + debug(`snapshotSave: failed to remove temp file ${tmp}: ${cleanupErr}`); + } + } } catch (err) { try { fs.unlinkSync(tmp); - } catch { - /* cleanup best-effort */ + } catch (cleanupErr) { + if ((cleanupErr as NodeJS.ErrnoException).code !== 'ENOENT') { + debug(`snapshotSave: failed to remove temp file ${tmp}: ${cleanupErr}`); + } } throw err; } @@ -109,15 +145,17 @@ export function snapshotRestore(name: string, options: SnapshotDbPathOptions = { // open dbPath during restore see either the pre-restore or post-restore // file, never a partially-written one. fs.mkdirSync(path.dirname(dbPath), { recursive: true }); - const tmp = `${dbPath}.restore-tmp-${process.pid}-${Date.now()}`; + const tmp = `${dbPath}.restore-tmp-${process.pid}-${Date.now()}-${randomBytes(6).toString('hex')}`; try { fs.copyFileSync(src, tmp); fs.renameSync(tmp, dbPath); } catch (err) { try { fs.unlinkSync(tmp); - } catch { - /* cleanup best-effort */ + } catch (cleanupErr) { + if ((cleanupErr as NodeJS.ErrnoException).code !== 'ENOENT') { + debug(`snapshotRestore: failed to remove temp file ${tmp}: ${cleanupErr}`); + } } throw err; } diff --git a/tests/unit/snapshot-race-worker.mjs b/tests/unit/snapshot-race-worker.mjs new file mode 100644 index 00000000..79729054 --- /dev/null +++ b/tests/unit/snapshot-race-worker.mjs @@ -0,0 +1,21 @@ +// Worker entry used by snapshot.test.ts to create genuine concurrency +// between two snapshotSave calls. better-sqlite3 is synchronous, so +// Promise-based concurrency cannot exercise the TOCTOU race — only +// separate threads can. +// +// This file is .mjs so Node loads it directly without a TypeScript loader; +// it imports the compiled dist build when available, falling back to the +// TS source via the project's ts-resolve-hooks when running under vitest +// (which propagates the strip-types flag to workers via NODE_OPTIONS). + +import { parentPort, workerData } from 'node:worker_threads'; + +const { dbPath, name, force } = workerData; + +try { + const mod = await import('../../src/features/snapshot.ts'); + mod.snapshotSave(name, { dbPath, force }); + parentPort.postMessage({ ok: true }); +} catch (err) { + parentPort.postMessage({ ok: false, error: err?.message ?? String(err) }); +} diff --git a/tests/unit/snapshot.test.ts b/tests/unit/snapshot.test.ts index cc02812f..a532b62f 100644 --- a/tests/unit/snapshot.test.ts +++ b/tests/unit/snapshot.test.ts @@ -1,6 +1,8 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { Worker } from 'node:worker_threads'; import Database from 'better-sqlite3'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { @@ -12,6 +14,8 @@ import { validateSnapshotName, } from '../../src/features/snapshot.js'; +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + let tmpDir: string; let dbPath: string; @@ -141,21 +145,54 @@ describe('snapshotSave', () => { expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); }); + // Worker infrastructure for genuine cross-thread concurrency on + // snapshotSave. better-sqlite3 is synchronous, so Promise-based + // concurrency would queue two sequential microtasks — only separate + // threads exercise the TOCTOU race. + const nodeMajor = Number(process.versions.node.split('.')[0]); + const raceWorkerPath = path.join(__dirname, 'snapshot-race-worker.mjs'); + // --import requires a URL (file://…) or a bare/relative specifier, not a + // drive-letter path on Windows. Use the file:// URL directly. + const loaderUrl = new URL('../../scripts/ts-resolve-loader.js', import.meta.url).href; + const raceExecArgv = [ + nodeMajor >= 23 ? '--strip-types' : '--experimental-strip-types', + '--import', + loaderUrl, + ]; + const spawnSaveWorker = (workerData: { + dbPath: string; + name: string; + force: boolean; + }): Promise<{ ok: boolean; error?: string }> => + new Promise((resolve, reject) => { + const w = new Worker(raceWorkerPath, { workerData, execArgv: raceExecArgv }); + let messageReceived = false; + w.once('message', (msg) => { + messageReceived = true; + resolve(msg); + }); + w.once('error', reject); + w.once('exit', (code) => { + if (!messageReceived) { + reject(new Error(`worker exited with code ${code} before posting a message`)); + } + }); + }); + it('does not corrupt output when two --force saves race on the same name', async () => { - // Prime the target so both calls take the --force overwrite path. + // Prime the target so both workers take the --force overwrite path. snapshotSave('race', { dbPath }); - // Issue both concurrently. Pre-fix, the existsSync/unlinkSync/VACUUM INTO - // sequence could interleave and produce a corrupt DB; post-fix, rename - // is atomic so the winner's file is intact and the loser either succeeds - // (overwriting) or fails cleanly. + // Spawn two worker threads racing on the same name. Post-fix, the atomic + // rename ensures the winner's file is intact and the loser either + // overwrites cleanly or leaves no corrupt artifact. const results = await Promise.allSettled([ - Promise.resolve().then(() => snapshotSave('race', { dbPath, force: true })), - Promise.resolve().then(() => snapshotSave('race', { dbPath, force: true })), + spawnSaveWorker({ dbPath, name: 'race', force: true }), + spawnSaveWorker({ dbPath, name: 'race', force: true }), ]); // At least one save must have succeeded. - const succeeded = results.filter((r) => r.status === 'fulfilled'); + const succeeded = results.filter((r) => r.status === 'fulfilled' && r.value.ok === true); expect(succeeded.length).toBeGreaterThanOrEqual(1); // The final file must be a valid SQLite DB with the expected contents. @@ -164,6 +201,43 @@ describe('snapshotSave', () => { const rows = db.prepare('SELECT name FROM nodes').all(); db.close(); expect(rows).toEqual([{ name: 'hello' }]); + + // No temp files should leak from either worker. + const entries = fs.readdirSync(snapshotsDir(dbPath)); + expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); + }); + + it('atomically rejects a concurrent non-force save when one already won', async () => { + // With no existing snapshot, two concurrent non-force saves race on the + // same name. Post-fix, the atomic linkSync(tmp, dest) makes the guard + // authoritative: exactly one must succeed, the other must fail with + // "already exists". Pre-fix, both could pass existsSync and silently + // overwrite each other. + const results = await Promise.allSettled([ + spawnSaveWorker({ dbPath, name: 'nonforce-race', force: false }), + spawnSaveWorker({ dbPath, name: 'nonforce-race', force: false }), + ]); + const fulfilled = results.filter((r) => r.status === 'fulfilled'); + expect(fulfilled).toHaveLength(2); + + const outcomes = fulfilled.map( + (r) => (r as PromiseFulfilledResult<{ ok: boolean; error?: string }>).value, + ); + const wins = outcomes.filter((o) => o.ok); + const losses = outcomes.filter((o) => !o.ok); + expect(wins).toHaveLength(1); + expect(losses).toHaveLength(1); + expect(losses[0].error).toMatch(/already exists/); + + // Final snapshot must be valid. + const finalPath = path.join(snapshotsDir(dbPath), 'nonforce-race.db'); + const db = new Database(finalPath, { readonly: true }); + const rows = db.prepare('SELECT name FROM nodes').all(); + db.close(); + expect(rows).toEqual([{ name: 'hello' }]); + + const entries = fs.readdirSync(snapshotsDir(dbPath)); + expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); }); });