-
Notifications
You must be signed in to change notification settings - Fork 82
/
search.ts
93 lines (80 loc) · 3.11 KB
/
search.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import {readFile} from "node:fs/promises";
import {basename, dirname, join} from "node:path/posix";
import he from "he";
import MiniSearch from "minisearch";
import type {Config} from "./config.js";
import {visitMarkdownFiles} from "./files.js";
import type {Logger} from "./logger.js";
import {parseMarkdown} from "./markdown.js";
import {faint, strikethrough} from "./tty.js";
// Avoid reindexing too often in preview.
const indexCache = new WeakMap();
const reindexingDelay = 10 * 60 * 1000; // 10 minutes
export interface SearchIndexEffects {
logger: Logger;
}
const defaultEffects: SearchIndexEffects = {logger: console};
const indexOptions = {
fields: ["title", "text", "keywords"],
storeFields: ["title"],
processTerm(term) {
return term.match(/\p{N}/gu)?.length > 6 ? null : term.slice(0, 15).toLowerCase(); // fields to return with search results
}
};
export async function searchIndex(config: Config, effects = defaultEffects): Promise<string> {
const {root, pages, search, md} = config;
if (!search) return "{}";
if (indexCache.has(config) && indexCache.get(config).freshUntil > +new Date()) return indexCache.get(config).json;
// Get all the listed pages (which are indexed by default)
const pagePaths = new Set(["/index"]);
for (const p of pages) {
if ("path" in p) pagePaths.add(p.path);
else for (const {path} of p.pages) pagePaths.add(path);
}
// Index the pages
const index = new MiniSearch(indexOptions);
for (const file of visitMarkdownFiles(root)) {
const sourcePath = join(root, file);
const source = await readFile(sourcePath, "utf8");
const path = `/${join(dirname(file), basename(file, ".md"))}`;
const {body, title, data} = parseMarkdown(source, {...config, path});
// Skip pages that opt-out of indexing, and skip unlisted pages unless
// opted-in. We only log the first case.
const listed = pagePaths.has(path);
const indexed = data?.index === undefined ? listed : Boolean(data.index);
if (!indexed) {
if (listed) effects.logger.log(`${faint("index")} ${strikethrough(sourcePath)} ${faint("(skipped)")}`);
continue;
}
// eslint-disable-next-line import/no-named-as-default-member
const text = he
.decode(
body
.replaceAll(/[\n\r]/g, " ")
.replaceAll(/<style\b.*<\/style\b[^>]*>/gi, " ")
.replaceAll(/<[^>]+>/g, " ")
)
.normalize("NFD")
.replaceAll(/[\u0300-\u036f]/g, "")
.replace(/[^\p{L}\p{N}]/gu, " "); // keep letters & numbers
effects.logger.log(`${faint("index")} ${sourcePath}`);
index.add({id: md.normalizeLink(path).slice("/".length), title, text, keywords: normalizeKeywords(data?.keywords)});
}
// Pass the serializable index options to the client.
const json = JSON.stringify(
Object.assign(
{
options: {
fields: indexOptions.fields,
storeFields: indexOptions.storeFields
}
},
index.toJSON()
)
);
indexCache.set(config, {json, freshUntil: +new Date() + reindexingDelay});
return json;
}
function normalizeKeywords(keywords: any): string {
return keywords ? String(keywords) : "";
}