|
| 1 | +/** |
| 2 | + * Direction A1 parity check (Plan 008) |
| 3 | + * |
| 4 | + * Plan 008's Direction A1 narrows candidate docs by expanding the query term |
| 5 | + * over the analyzer-tokenized term dictionary, then restricting BitapSearch to |
| 6 | + * docs whose postings include any expanded token. The plan's recall gate |
| 7 | + * requires bit-identical results vs. baseline on these fixtures. |
| 8 | + * |
| 9 | + * This script verifies the BASELINE behavior (what A1 must reproduce) on the |
| 10 | + * cross-separator fixture. The structural argument below explains why A1 |
| 11 | + * cannot match it. |
| 12 | + * |
| 13 | + * Run: |
| 14 | + * node bench/direction-a-parity-check.mjs |
| 15 | + */ |
| 16 | + |
| 17 | +import Fuse from '../dist/fuse.mjs' |
| 18 | + |
| 19 | +function showHits(label, fuse, query) { |
| 20 | + const hits = fuse.search(query).map((r) => ({ |
| 21 | + refIndex: r.refIndex, |
| 22 | + item: r.item |
| 23 | + })) |
| 24 | + console.log(` ${label} → query="${query}" matches: ${hits.length}`) |
| 25 | + for (const h of hits) console.log(` [${h.refIndex}] ${JSON.stringify(h.item)}`) |
| 26 | +} |
| 27 | + |
| 28 | +console.log('Direction A1 parity check — cross-separator fixture') |
| 29 | +console.log('='.repeat(72)) |
| 30 | + |
| 31 | +// Fixture: a corpus with BOTH the concatenated form ("foobar") tokenized as |
| 32 | +// one analyzer token AND the separated form ("foo bar") tokenized as two. |
| 33 | +// A1 expansion of query "foobar" against this dictionary finds the exact |
| 34 | +// "foobar" token, restricts to its postings, and DROPS the "foo bar" doc. |
| 35 | +const docs = [ |
| 36 | + { title: 'foobar' }, // tokenizes to ["foobar"] |
| 37 | + { title: 'foo bar' }, // tokenizes to ["foo", "bar"] |
| 38 | + { title: 'foo-bar baz' }, // tokenizes to ["foo", "bar", "baz"] |
| 39 | + { title: 'fizzbuzz quux' } // unrelated control |
| 40 | +] |
| 41 | +const fuseToken = new Fuse(docs, { |
| 42 | + keys: ['title'], |
| 43 | + useTokenSearch: true, |
| 44 | + includeScore: true |
| 45 | +}) |
| 46 | +const fuseDefault = new Fuse(docs, { |
| 47 | + keys: ['title'], |
| 48 | + includeScore: true |
| 49 | +}) |
| 50 | + |
| 51 | +console.log('\nCorpus:') |
| 52 | +for (let i = 0; i < docs.length; i++) console.log(` [${i}] ${JSON.stringify(docs[i])}`) |
| 53 | + |
| 54 | +console.log('\nBaseline behavior (what A1 must match):') |
| 55 | +showHits('default fuzzy ', fuseDefault, 'foobar') |
| 56 | +showHits('useTokenSearch', fuseToken, 'foobar') |
| 57 | + |
| 58 | +console.log('\nStructural argument for A1 failure:') |
| 59 | +console.log(' Baseline `useTokenSearch` runs `BitapSearch("foobar").searchIn(text)`') |
| 60 | +console.log(' on EVERY record\'s field text. For doc [1] with text "foo bar", Bitap') |
| 61 | +console.log(' matches "foobar" within distance budget (substituting one char to bridge') |
| 62 | +console.log(' the separator).') |
| 63 | +console.log() |
| 64 | +console.log(' A1 expansion: walks `terms.keys()` and finds dict tokens within Bitap\'s') |
| 65 | +console.log(' accept budget for "foobar". Dictionary contains: foobar, foo, bar, baz,') |
| 66 | +console.log(' fizzbuzz, quux.') |
| 67 | +console.log(' - "foobar" is exact → in expansion.') |
| 68 | +console.log(' - "foo", "bar" (length 3) vs query (length 6): edit distance 3, well') |
| 69 | +console.log(' outside default threshold; NOT in expansion.') |
| 70 | +console.log(' Candidate doc set = postings of "foobar" = { doc 0 } only.') |
| 71 | +console.log(' A1 then runs Bitap only on doc 0\'s text → MISSES doc 1 ("foo bar").') |
| 72 | +console.log() |
| 73 | +console.log(' This is a strict recall regression vs. baseline. A1 fails the') |
| 74 | +console.log(' cross-separator parity gate. The plan\'s decision rule routes to') |
| 75 | +console.log(' Direction B.') |
| 76 | + |
| 77 | +// Simulate A1's restricted candidate set explicitly to make the regression |
| 78 | +// auditable. We mark which baseline matches A1 would lose if it gated on the |
| 79 | +// "foobar" exact-token expansion. |
| 80 | +const baselineMatches = new Set(fuseToken.search('foobar').map((r) => r.refIndex)) |
| 81 | +const a1Candidate = new Set([0]) // postings of "foobar" |
| 82 | +const dropped = [...baselineMatches].filter((i) => !a1Candidate.has(i)) |
| 83 | +console.log(`\nRecall regression: A1 candidate set drops ${dropped.length} match(es): ${JSON.stringify(dropped)}`) |
| 84 | +console.log('='.repeat(72)) |
0 commit comments