From ec08457dc0c81cfb68e8d9252e3d2d950bc192c8 Mon Sep 17 00:00:00 2001 From: Rich Harris Date: Mon, 28 Oct 2024 22:10:57 -0400 Subject: [PATCH 1/4] prioritise current section when searching --- .../site-kit/src/lib/search/SearchBox.svelte | 10 +++++++- .../site-kit/src/lib/search/search-worker.ts | 4 +-- packages/site-kit/src/lib/search/search.ts | 25 ++++++++++++++++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/packages/site-kit/src/lib/search/SearchBox.svelte b/packages/site-kit/src/lib/search/SearchBox.svelte index 09b21e06c3..8727d3ea32 100644 --- a/packages/site-kit/src/lib/search/SearchBox.svelte +++ b/packages/site-kit/src/lib/search/SearchBox.svelte @@ -10,6 +10,7 @@ It appears when the user clicks on the `Search` component or presses the corresp import Icon from '../components/Icon.svelte'; import SearchResults from './SearchResults.svelte'; import SearchWorker from './search-worker.js?worker'; + import { page } from '$app/stores'; interface Props { placeholder?: string; @@ -94,7 +95,14 @@ It appears when the user clicks on the `Search` component or presses the corresp const id = uid++; pending.add(id); - worker.postMessage({ type: 'query', id, payload: $search_query }); + worker.postMessage({ + type: 'query', + id, + payload: { + query: $search_query, + path: $page.url.pathname + } + }); } }); diff --git a/packages/site-kit/src/lib/search/search-worker.ts b/packages/site-kit/src/lib/search/search-worker.ts index 6534d038bb..95d163a9da 100644 --- a/packages/site-kit/src/lib/search/search-worker.ts +++ b/packages/site-kit/src/lib/search/search-worker.ts @@ -12,8 +12,8 @@ addEventListener('message', async (event) => { } if (type === 'query') { - const query = payload; - const results = search(query); + const { query, path } = payload; + const results = search(query, path); postMessage({ type: 'results', payload: { results, query } }); } diff --git a/packages/site-kit/src/lib/search/search.ts b/packages/site-kit/src/lib/search/search.ts index 235fececeb..2dfb54274d 100644 --- a/packages/site-kit/src/lib/search/search.ts +++ b/packages/site-kit/src/lib/search/search.ts @@ -47,19 +47,36 @@ export function init(blocks: Block[]) { /** * Search for a given query in the existing index */ -export function search(query: string): BlockGroup[] { +export function search(query: string, path: string): BlockGroup[] { const escaped = query.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); const regex = new RegExp(`(^|\\b)${escaped}`, 'i'); + const parts = path.split('/'); + const blocks = indexes .flatMap((index) => index.search(query)) // @ts-expect-error flexsearch types are wrong i think? .map(lookup) .map((block, rank) => ({ block: block as Block, rank })) .sort((a, b) => { - // If rank is way lower, give that priority - if (Math.abs(a.rank - b.rank) > 3) { - return a.rank - b.rank; + // prioritise current section + const a_parts = a.block.href.split('/'); + const b_parts = b.block.href.split('/'); + + for (let i = 0; i < parts.length; i += 1) { + const a_part_matches = a_parts[i] === parts[i]; + const b_part_matches = b_parts[i] === parts[i]; + + if (!a_part_matches || !b_part_matches) { + if (a_part_matches !== b_part_matches) { + if (i > 1) { + console.log('here', a, b); + } + return a_part_matches ? -1 : 1; + } + + break; + } } const a_title_matches = regex.test(a.block.breadcrumbs.at(-1)!); From 1ed686b8c0c68c9dc9aa8cc5fd09d9ef3bc65afb Mon Sep 17 00:00:00 2001 From: Rich Harris Date: Mon, 28 Oct 2024 23:03:49 -0400 Subject: [PATCH 2/4] use weighting system --- packages/site-kit/src/lib/search/search.ts | 68 ++++++++++----------- packages/site-kit/src/lib/search/types.d.ts | 1 + 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/packages/site-kit/src/lib/search/search.ts b/packages/site-kit/src/lib/search/search.ts index 2dfb54274d..5428e27fbc 100644 --- a/packages/site-kit/src/lib/search/search.ts +++ b/packages/site-kit/src/lib/search/search.ts @@ -44,12 +44,20 @@ export function init(blocks: Block[]) { inited = true; } +const CURRENT_SECTION_BOOST = 2; +const EXACT_MATCH_BOOST = 10; +const WORD_MATCH_BOOST = 4; +const NEAR_MATCH_BOOST = 2; +const BREADCRUMB_LENGTH_BOOST = 0.2; + /** * Search for a given query in the existing index */ export function search(query: string, path: string): BlockGroup[] { const escaped = query.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); - const regex = new RegExp(`(^|\\b)${escaped}`, 'i'); + const exact_match = new RegExp(`^${escaped}$`, 'i'); + const word_match = new RegExp(`(^|\\b)${escaped}($|\\b)`, 'i'); + const near_match = new RegExp(`(^|\\b)${escaped}`, 'i'); const parts = path.split('/'); @@ -57,60 +65,50 @@ export function search(query: string, path: string): BlockGroup[] { .flatMap((index) => index.search(query)) // @ts-expect-error flexsearch types are wrong i think? .map(lookup) - .map((block, rank) => ({ block: block as Block, rank })) - .sort((a, b) => { + .map((block) => { + const block_parts = block.href.split('/'); + // prioritise current section - const a_parts = a.block.href.split('/'); - const b_parts = b.block.href.split('/'); - - for (let i = 0; i < parts.length; i += 1) { - const a_part_matches = a_parts[i] === parts[i]; - const b_part_matches = b_parts[i] === parts[i]; - - if (!a_part_matches || !b_part_matches) { - if (a_part_matches !== b_part_matches) { - if (i > 1) { - console.log('here', a, b); - } - return a_part_matches ? -1 : 1; - } - - break; - } + let score = block_parts.findIndex((part, i) => part !== parts[i]); + if (score === -1) score = block_parts.length; + score *= CURRENT_SECTION_BOOST; + + if (block.breadcrumbs.some((text) => exact_match.test(text))) { + console.log('EXACT MATCH', block.breadcrumbs); + score += EXACT_MATCH_BOOST; + } else if (block.breadcrumbs.some((text) => word_match.test(text))) { + score += WORD_MATCH_BOOST; + } else if (block.breadcrumbs.some((text) => near_match.test(text))) { + score += NEAR_MATCH_BOOST; } - const a_title_matches = regex.test(a.block.breadcrumbs.at(-1)!); - const b_title_matches = regex.test(b.block.breadcrumbs.at(-1)!); - - // massage the order a bit, so that title matches - // are given higher priority - if (a_title_matches !== b_title_matches) { - return a_title_matches ? -1 : 1; - } + // prioritise branches over leaves + score -= block.breadcrumbs.length * BREADCRUMB_LENGTH_BOOST; - return a.block.breadcrumbs.length - b.block.breadcrumbs.length || a.rank - b.rank; - }) - .map(({ block }) => block); + return { block, score }; + }); const groups: Record = {}; - for (const block of blocks) { + for (const { score, block } of blocks) { const breadcrumbs = block.breadcrumbs.slice(0, 2); const group = (groups[breadcrumbs.join('::')] ??= { breadcrumbs, - blocks: [] + blocks: [], + score: 0 }); + group.score = Math.max(score, group.score); group.blocks.push(block); } - return Object.values(groups); + return Object.values(groups).sort((a, b) => b.score - a.score); } /** * Get a block with details by its href */ export function lookup(href: string) { - return map.get(href); + return map.get(href)!; } diff --git a/packages/site-kit/src/lib/search/types.d.ts b/packages/site-kit/src/lib/search/types.d.ts index c853d0df01..a537337c85 100644 --- a/packages/site-kit/src/lib/search/types.d.ts +++ b/packages/site-kit/src/lib/search/types.d.ts @@ -8,4 +8,5 @@ export interface Block { export interface BlockGroup { breadcrumbs: string[]; blocks: Block[]; + score: number; } From cfa8d5cf9448483153f8d09b30e2d53518cf7a5e Mon Sep 17 00:00:00 2001 From: Rich Harris Date: Mon, 28 Oct 2024 23:26:27 -0400 Subject: [PATCH 3/4] lint --- apps/svelte.dev/src/routes/search/+page.server.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/svelte.dev/src/routes/search/+page.server.js b/apps/svelte.dev/src/routes/search/+page.server.js index a558f43c40..467f6d1751 100644 --- a/apps/svelte.dev/src/routes/search/+page.server.js +++ b/apps/svelte.dev/src/routes/search/+page.server.js @@ -11,7 +11,7 @@ export async function load({ url, fetch }) { const query = url.searchParams.get('q') ?? ''; - const results = query ? search(query) : []; + const results = query ? search(query, '') : []; return { query, From dbe7a022048d7723d9bfa21e3e9fc46e19af8a0d Mon Sep 17 00:00:00 2001 From: Rich Harris Date: Tue, 29 Oct 2024 18:07:26 -0400 Subject: [PATCH 4/4] sort blocks within groups --- packages/site-kit/src/lib/search/search.ts | 44 +++++++++++++++------ packages/site-kit/src/lib/search/types.d.ts | 1 - 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/packages/site-kit/src/lib/search/search.ts b/packages/site-kit/src/lib/search/search.ts index 5428e27fbc..56e022118f 100644 --- a/packages/site-kit/src/lib/search/search.ts +++ b/packages/site-kit/src/lib/search/search.ts @@ -50,6 +50,12 @@ const WORD_MATCH_BOOST = 4; const NEAR_MATCH_BOOST = 2; const BREADCRUMB_LENGTH_BOOST = 0.2; +interface Entry { + block: Block; + score: number; + rank: number; +} + /** * Search for a given query in the existing index */ @@ -65,7 +71,7 @@ export function search(query: string, path: string): BlockGroup[] { .flatMap((index) => index.search(query)) // @ts-expect-error flexsearch types are wrong i think? .map(lookup) - .map((block) => { + .map((block, rank) => { const block_parts = block.href.split('/'); // prioritise current section @@ -85,25 +91,39 @@ export function search(query: string, path: string): BlockGroup[] { // prioritise branches over leaves score -= block.breadcrumbs.length * BREADCRUMB_LENGTH_BOOST; - return { block, score }; - }); + const entry: Entry = { block, score, rank }; - const groups: Record = {}; + return entry; + }); - for (const { score, block } of blocks) { - const breadcrumbs = block.breadcrumbs.slice(0, 2); + const grouped: Record = {}; - const group = (groups[breadcrumbs.join('::')] ??= { + for (const entry of blocks) { + const breadcrumbs = entry.block.breadcrumbs.slice(0, 2); + const group = (grouped[breadcrumbs.join('::')] ??= { breadcrumbs, - blocks: [], - score: 0 + entries: [] }); - group.score = Math.max(score, group.score); - group.blocks.push(block); + group.entries.push(entry); } - return Object.values(groups).sort((a, b) => b.score - a.score); + const sorted = Object.values(grouped); + + // sort blocks within groups... + for (const group of sorted) { + group.entries.sort((a, b) => b.score - a.score || a.rank - b.rank); + } + + // ...then sort groups + sorted.sort((a, b) => b.entries[0].score - a.entries[0].score); + + return sorted.map((group) => { + return { + breadcrumbs: group.breadcrumbs, + blocks: group.entries.map((entry) => entry.block) + }; + }); } /** diff --git a/packages/site-kit/src/lib/search/types.d.ts b/packages/site-kit/src/lib/search/types.d.ts index a537337c85..c853d0df01 100644 --- a/packages/site-kit/src/lib/search/types.d.ts +++ b/packages/site-kit/src/lib/search/types.d.ts @@ -8,5 +8,4 @@ export interface Block { export interface BlockGroup { breadcrumbs: string[]; blocks: Block[]; - score: number; }