diff --git a/src/features/snapshot.ts b/src/features/snapshot.ts index c4dbd469..f9e2d7b5 100644 --- a/src/features/snapshot.ts +++ b/src/features/snapshot.ts @@ -1,3 +1,4 @@ +import { randomBytes } from 'node:crypto'; import fs from 'node:fs'; import path from 'node:path'; import { getDatabase } from '../db/better-sqlite3.js'; @@ -37,24 +38,77 @@ export function snapshotSave( const dir = snapshotsDir(dbPath); const dest = path.join(dir, `${name}.db`); - if (fs.existsSync(dest)) { - if (!options.force) { - throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); - } - fs.unlinkSync(dest); - debug(`Deleted existing snapshot: ${dest}`); + // Cheap fail-fast for the common non-force case; the authoritative check + // below uses an atomic linkSync that closes the TOCTOU window. + if (!options.force && fs.existsSync(dest)) { + throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); } fs.mkdirSync(dir, { recursive: true }); + // VACUUM INTO a unique temp path on the same filesystem, then atomically + // place it at the destination. This closes the TOCTOU window between + // existsSync/unlinkSync/VACUUM INTO where two concurrent saves could + // observe a missing file or interleave their VACUUM writes. + // + // Unique temp name: process.pid is shared across worker_threads in the + // same process, so we add random bytes to keep concurrent callers in any + // thread from colliding on the temp path. + const tmp = path.join( + dir, + `.${name}.db.tmp-${process.pid}-${Date.now()}-${randomBytes(6).toString('hex')}`, + ); + try { + fs.unlinkSync(tmp); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }); try { - db.exec(`VACUUM INTO '${dest.replace(/'/g, "''")}'`); + db.exec(`VACUUM INTO '${tmp.replace(/'/g, "''")}'`); } finally { db.close(); } + try { + if (options.force) { + // renameSync overwrites atomically — the correct semantics for --force. + fs.renameSync(tmp, dest); + } else { + // Non-force path: linkSync fails atomically with EEXIST if dest exists, + // closing the TOCTOU window between existsSync above and the final + // placement. We then unlink the temp file; on POSIX and NTFS, link + // creates a second reference so tmp can safely be removed. + try { + fs.linkSync(tmp, dest); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'EEXIST') { + throw new ConfigError(`Snapshot "${name}" already exists. Use --force to overwrite.`); + } + throw err; + } + try { + fs.unlinkSync(tmp); + } catch (cleanupErr) { + // Best-effort — dest is already in place, so a leftover tmp file is + // harmless. Log at debug so repeated failures surface during + // troubleshooting without noising up normal operation. + debug(`snapshotSave: failed to remove temp file ${tmp}: ${cleanupErr}`); + } + } + } catch (err) { + try { + fs.unlinkSync(tmp); + } catch (cleanupErr) { + if ((cleanupErr as NodeJS.ErrnoException).code !== 'ENOENT') { + debug(`snapshotSave: failed to remove temp file ${tmp}: ${cleanupErr}`); + } + } + throw err; + } + const stat = fs.statSync(dest); debug(`Snapshot saved: ${dest} (${stat.size} bytes)`); return { name, path: dest, size: stat.size }; @@ -74,16 +128,38 @@ export function snapshotRestore(name: string, options: SnapshotDbPathOptions = { throw new DbError(`Snapshot "${name}" not found at ${src}`, { file: src }); } - // Remove WAL/SHM sidecar files for a clean restore + // Remove WAL/SHM sidecars first so the old journal can't be replayed over + // the restored DB. unlink then check ENOENT — avoids the existsSync/unlinkSync + // race another process could wedge into. for (const suffix of ['-wal', '-shm']) { const sidecar = dbPath + suffix; - if (fs.existsSync(sidecar)) { + try { fs.unlinkSync(sidecar); debug(`Removed sidecar: ${sidecar}`); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + } + + // Copy to a temp path next to the DB, then rename atomically. Readers that + // open dbPath during restore see either the pre-restore or post-restore + // file, never a partially-written one. + fs.mkdirSync(path.dirname(dbPath), { recursive: true }); + const tmp = `${dbPath}.restore-tmp-${process.pid}-${Date.now()}-${randomBytes(6).toString('hex')}`; + try { + fs.copyFileSync(src, tmp); + fs.renameSync(tmp, dbPath); + } catch (err) { + try { + fs.unlinkSync(tmp); + } catch (cleanupErr) { + if ((cleanupErr as NodeJS.ErrnoException).code !== 'ENOENT') { + debug(`snapshotRestore: failed to remove temp file ${tmp}: ${cleanupErr}`); + } } + throw err; } - fs.copyFileSync(src, dbPath); debug(`Restored snapshot "${name}" → ${dbPath}`); } @@ -122,10 +198,13 @@ export function snapshotDelete(name: string, options: SnapshotDbPathOptions = {} const dir = snapshotsDir(dbPath); const target = path.join(dir, `${name}.db`); - if (!fs.existsSync(target)) { - throw new DbError(`Snapshot "${name}" not found at ${target}`, { file: target }); + try { + fs.unlinkSync(target); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + throw new DbError(`Snapshot "${name}" not found at ${target}`, { file: target }); + } + throw err; } - - fs.unlinkSync(target); debug(`Deleted snapshot: ${target}`); } diff --git a/tests/unit/snapshot-race-worker.mjs b/tests/unit/snapshot-race-worker.mjs new file mode 100644 index 00000000..79729054 --- /dev/null +++ b/tests/unit/snapshot-race-worker.mjs @@ -0,0 +1,21 @@ +// Worker entry used by snapshot.test.ts to create genuine concurrency +// between two snapshotSave calls. better-sqlite3 is synchronous, so +// Promise-based concurrency cannot exercise the TOCTOU race — only +// separate threads can. +// +// This file is .mjs so Node loads it directly without a TypeScript loader; +// it imports the compiled dist build when available, falling back to the +// TS source via the project's ts-resolve-hooks when running under vitest +// (which propagates the strip-types flag to workers via NODE_OPTIONS). + +import { parentPort, workerData } from 'node:worker_threads'; + +const { dbPath, name, force } = workerData; + +try { + const mod = await import('../../src/features/snapshot.ts'); + mod.snapshotSave(name, { dbPath, force }); + parentPort.postMessage({ ok: true }); +} catch (err) { + parentPort.postMessage({ ok: false, error: err?.message ?? String(err) }); +} diff --git a/tests/unit/snapshot.test.ts b/tests/unit/snapshot.test.ts index ec149252..a532b62f 100644 --- a/tests/unit/snapshot.test.ts +++ b/tests/unit/snapshot.test.ts @@ -1,6 +1,8 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { Worker } from 'node:worker_threads'; import Database from 'better-sqlite3'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { @@ -12,6 +14,8 @@ import { validateSnapshotName, } from '../../src/features/snapshot.js'; +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + let tmpDir: string; let dbPath: string; @@ -132,6 +136,109 @@ describe('snapshotSave', () => { it('rejects invalid name', () => { expect(() => snapshotSave('bad name', { dbPath })).toThrow(/Invalid snapshot name/); }); + + it('leaves no temp files in snapshots dir after success', () => { + snapshotSave('clean', { dbPath }); + const entries = fs.readdirSync(snapshotsDir(dbPath)); + expect(entries).toContain('clean.db'); + // Temp files are named `..db.tmp--` — none should remain. + expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); + }); + + // Worker infrastructure for genuine cross-thread concurrency on + // snapshotSave. better-sqlite3 is synchronous, so Promise-based + // concurrency would queue two sequential microtasks — only separate + // threads exercise the TOCTOU race. + const nodeMajor = Number(process.versions.node.split('.')[0]); + const raceWorkerPath = path.join(__dirname, 'snapshot-race-worker.mjs'); + // --import requires a URL (file://…) or a bare/relative specifier, not a + // drive-letter path on Windows. Use the file:// URL directly. + const loaderUrl = new URL('../../scripts/ts-resolve-loader.js', import.meta.url).href; + const raceExecArgv = [ + nodeMajor >= 23 ? '--strip-types' : '--experimental-strip-types', + '--import', + loaderUrl, + ]; + const spawnSaveWorker = (workerData: { + dbPath: string; + name: string; + force: boolean; + }): Promise<{ ok: boolean; error?: string }> => + new Promise((resolve, reject) => { + const w = new Worker(raceWorkerPath, { workerData, execArgv: raceExecArgv }); + let messageReceived = false; + w.once('message', (msg) => { + messageReceived = true; + resolve(msg); + }); + w.once('error', reject); + w.once('exit', (code) => { + if (!messageReceived) { + reject(new Error(`worker exited with code ${code} before posting a message`)); + } + }); + }); + + it('does not corrupt output when two --force saves race on the same name', async () => { + // Prime the target so both workers take the --force overwrite path. + snapshotSave('race', { dbPath }); + + // Spawn two worker threads racing on the same name. Post-fix, the atomic + // rename ensures the winner's file is intact and the loser either + // overwrites cleanly or leaves no corrupt artifact. + const results = await Promise.allSettled([ + spawnSaveWorker({ dbPath, name: 'race', force: true }), + spawnSaveWorker({ dbPath, name: 'race', force: true }), + ]); + + // At least one save must have succeeded. + const succeeded = results.filter((r) => r.status === 'fulfilled' && r.value.ok === true); + expect(succeeded.length).toBeGreaterThanOrEqual(1); + + // The final file must be a valid SQLite DB with the expected contents. + const finalPath = path.join(snapshotsDir(dbPath), 'race.db'); + const db = new Database(finalPath, { readonly: true }); + const rows = db.prepare('SELECT name FROM nodes').all(); + db.close(); + expect(rows).toEqual([{ name: 'hello' }]); + + // No temp files should leak from either worker. + const entries = fs.readdirSync(snapshotsDir(dbPath)); + expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); + }); + + it('atomically rejects a concurrent non-force save when one already won', async () => { + // With no existing snapshot, two concurrent non-force saves race on the + // same name. Post-fix, the atomic linkSync(tmp, dest) makes the guard + // authoritative: exactly one must succeed, the other must fail with + // "already exists". Pre-fix, both could pass existsSync and silently + // overwrite each other. + const results = await Promise.allSettled([ + spawnSaveWorker({ dbPath, name: 'nonforce-race', force: false }), + spawnSaveWorker({ dbPath, name: 'nonforce-race', force: false }), + ]); + const fulfilled = results.filter((r) => r.status === 'fulfilled'); + expect(fulfilled).toHaveLength(2); + + const outcomes = fulfilled.map( + (r) => (r as PromiseFulfilledResult<{ ok: boolean; error?: string }>).value, + ); + const wins = outcomes.filter((o) => o.ok); + const losses = outcomes.filter((o) => !o.ok); + expect(wins).toHaveLength(1); + expect(losses).toHaveLength(1); + expect(losses[0].error).toMatch(/already exists/); + + // Final snapshot must be valid. + const finalPath = path.join(snapshotsDir(dbPath), 'nonforce-race.db'); + const db = new Database(finalPath, { readonly: true }); + const rows = db.prepare('SELECT name FROM nodes').all(); + db.close(); + expect(rows).toEqual([{ name: 'hello' }]); + + const entries = fs.readdirSync(snapshotsDir(dbPath)); + expect(entries.filter((f) => f.includes('.tmp-'))).toEqual([]); + }); }); // ─── snapshotRestore ────────────────────────────────────────────────────