Skip to content

Commit 409bd29

Browse files
mojavelinuxmarcusdacoregio
authored andcommitted
rework the search indexing process
- reconfigure the docsearch scraper to store component and version for each res - switch from the Docker image to a resuable GitHub Action - add publish-docsearch-config extension to transform Handlebars into YAML
1 parent 2fb91e6 commit 409bd29

File tree

8 files changed

+159
-47
lines changed

8 files changed

+159
-47
lines changed

.github/actions/algolia-config.json

Lines changed: 0 additions & 20 deletions
This file was deleted.

.github/actions/algolia-docsearch-scraper.sh

Lines changed: 0 additions & 21 deletions
This file was deleted.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"index_name": "spring-security-docs",
3+
"start_urls": [
4+
{{#each components}}
5+
{{#each versions}}
6+
{
7+
"url": "{{{@root.site.url}}}/{{#if (eq ./activeVersionSegment '')}}(?:$|index.html$|[a-z].*){{else}}{{{./activeVersionSegment}}}/{{/if}}",
8+
"extra_attributes": {
9+
"component": "{{#if (eq ./name 'ROOT')}}spring-security{{else}}{{{./name}}}{{/if}}",
10+
"version": "{{{./version}}}",
11+
"version_rank": {{#if (eq this ../latest)}}1{{else}}2{{/if}}
12+
}
13+
}{{#unless (and @last @../last)}},{{/unless}}
14+
{{/each}}
15+
{{/each}}
16+
],
17+
"sitemap_urls": [
18+
"{{{site.url}}}/sitemap.xml"
19+
],
20+
"scrape_start_urls": true,
21+
"stop_urls": [
22+
{{#each stopPages}}
23+
"{{{@root.site.url}}}{{{./pub.url}}}"{{#unless @last}},{{/unless}}
24+
{{/each}}
25+
],
26+
"selectors": {
27+
"default": {
28+
"lvl0": {
29+
"global": true,
30+
"selector": ".nav-panel-explore .context .title, .nav-panel-explore .context .version"
31+
},
32+
"lvl1": ".doc > h1.page",
33+
"lvl2": ".doc .sect1 > h2:first-child",
34+
"lvl3": ".doc .sect2 > h3:first-child",
35+
"lvl4": ".doc .sect3 > h4:first-child",
36+
"text": ".doc p, .doc dt, .doc td.content, .doc th.tableblock"
37+
}
38+
},
39+
"selectors_exclude": [
40+
"#section-summary"
41+
],
42+
"min_indexed_level": 1,
43+
"custom_settings": {
44+
"advancedSyntax": true,
45+
"attributesForFaceting": [
46+
"component",
47+
"version"
48+
],
49+
"attributesToRetrieve": [
50+
"anchor",
51+
"content",
52+
"hierarchy",
53+
"url",
54+
"component",
55+
"version"
56+
],
57+
"attributesToSnippet": [
58+
"content:25"
59+
],
60+
"customRanking": [
61+
"desc(weight.page_rank)",
62+
"asc(version_rank)",
63+
"desc(weight.level)",
64+
"asc(weight.position)"
65+
]
66+
}
67+
}

.github/workflows/rebuild-search-index.yml

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,21 @@ jobs:
88
runs-on: ubuntu-latest
99
steps:
1010
- name: Checkout
11-
uses: actions/checkout@v2
11+
uses: actions/checkout@v3
1212
with:
1313
fetch-depth: 5
14-
- name: Run Docsearch Scraper
15-
run: $GITHUB_WORKSPACE/.github/actions/algolia-docsearch-scraper.sh "${{ secrets.ALGOLIA_APPLICATION_ID }}" "${{ secrets.ALGOLIA_WRITE_API_KEY }}" $GITHUB_WORKSPACE/.github/actions/algolia-config.json
14+
- name: Configure Indexer
15+
run: |
16+
CONFIG_FILE=.github/actions/docsearch-config.json
17+
if [ ! -f $CONFIG_FILE ]; then
18+
curl -sL -o $CONFIG_FILE $(node -p "require('fs').readFileSync('antora-playbook.yml', 'utf8').match(/^ url: (.*)/m)[1]")/docsearch-config.json
19+
fi
20+
INDEX_NAME=$(node -p "JSON.parse(require('fs').readFileSync('$CONFIG_FILE')).index_name")
21+
echo "CONFIG_FILE=${CONFIG_FILE}" >> $GITHUB_ENV
22+
echo "INDEX_NAME_TMP=${INDEX_NAME}-${GITHUB_RUN_ID}" >> $GITHUB_ENV
23+
- name: Run Indexer
24+
uses: darrenjennings/algolia-docsearch-action@master
25+
with:
26+
algolia_application_id: ${{ secrets.ALGOLIA_APP_ID }}
27+
algolia_api_key: ${{ secrets.ALGOLIA_API_KEY }}
28+
file: ${{ env.CONFIG_FILE }}

antora-playbook-for-indexing.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
antora:
2+
extensions:
3+
- '@springio/antora-extensions/partial-build-extension'
4+
- ./lib/antora/extensions/inject-collector-config.js
5+
- '@antora/collector-extension'
6+
- ./lib/antora/extensions/version-fix.js
7+
- '@antora/atlas-extension'
8+
- '@opendevise/antora-release-line-extension'
9+
- require: '@springio/antora-extensions/tabs-migration-extension'
10+
# uncomment this option to save the migrated content to the worktree
11+
#save_result: true
12+
unwrap_example_block: always
13+
- id: publish-docsearch-config
14+
require: ./lib/antora/extensions/publish-docsearch-config
15+
template_path: ./.github/actions/docsearch-config.json.hbs
16+
site:
17+
title: Spring Security
18+
url: https://docs.spring.io/spring-security/reference
19+
robots: allow
20+
git:
21+
ensure_git_suffix: false
22+
content:
23+
sources:
24+
- url: https://github.com/spring-projects/spring-security
25+
branches: main
26+
tags: 6.0.1
27+
start_path: docs
28+
asciidoc:
29+
attributes:
30+
page-pagination: ''
31+
hide-uri-scheme: '@'
32+
tabs-sync-option: '@'
33+
extensions:
34+
- '@asciidoctor/tabs'
35+
- '@springio/asciidoctor-extensions'
36+
urls:
37+
latest_version_segment_strategy: redirect:to
38+
latest_version_segment: ''
39+
redirect_facility: httpd
40+
ui:
41+
bundle:
42+
url: https://github.com/spring-io/antora-ui-spring/releases/download/latest/ui-bundle.zip
43+
snapshot: true

antora-playbook.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ antora:
1010
# uncomment this option to save the migrated content to the worktree
1111
#save_result: true
1212
unwrap_example_block: always
13+
- require: ./lib/antora/extensions/publish-docsearch-config
14+
template_path: ./.github/actions/docsearch-config.json.hbs
1315
site:
1416
title: Spring Security
1517
url: https://docs.spring.io/spring-security/reference

build.gradle

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ plugins {
44
}
55

66
antora {
7-
version = '3.2.0-alpha.2'
7+
version = '3.2.0-alpha.2'
88
options = ['--clean', '--fetch', '--stacktrace']
99
environment = [
1010
'ALGOLIA_API_KEY': '82c7ead946afbac3cf98c32446154691',
1111
'ALGOLIA_APP_ID': '244V8V9FGG',
12-
'ALGOLIA_INDEX_NAME': 'security-docs',
13-
]
12+
'ALGOLIA_INDEX_NAME': 'spring-security-docs',
13+
]
1414
dependencies = [
1515
'@antora/atlas-extension': '1.0.0-alpha.1',
1616
'@antora/collector-extension': '1.0.0-alpha.2',
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
'use strict'
2+
3+
const fsp = require('node:fs/promises')
4+
const ospath = require('node:path')
5+
6+
/**
7+
* An Antora extension that generates the docsearch config file from a Handlebars template and publishes it with the
8+
* site, where the scraper job can retrieve it.
9+
*/
10+
module.exports.register = function ({ config: { templatePath = './docsearch/config.json.hbs' } }) {
11+
const expandPath = this.require('@antora/expand-path-helper')
12+
const handlebars = this.require('handlebars').create()
13+
handlebars.registerHelper('eq', (a, b) => a === b)
14+
handlebars.registerHelper('and', (a, b) => a && b)
15+
16+
this.on('beforePublish', async ({ playbook, contentCatalog, siteCatalog }) => {
17+
templatePath = expandPath(templatePath, { dot: playbook.dir })
18+
const templateSrc = await fsp.readFile(templatePath, 'utf8')
19+
const templateBasename = ospath.basename(templatePath)
20+
const template = handlebars.compile(templateSrc, { noEscape: true, preventIndent: true, srcName: templateBasename })
21+
const components = contentCatalog.getComponentsSortedBy('name').filter((component) => component.latest.version)
22+
const stopPages = contentCatalog.getPages((page) => {
23+
return page.out && ('page-archived' in page.asciidoc.attributes || 'page-noindex' in page.asciidoc.attributes)
24+
})
25+
const compiled = template({ components, site: playbook.site, stopPages })
26+
siteCatalog.addFile({ contents: Buffer.from(compiled), out: { path: 'docsearch-config.json' } })
27+
})
28+
}

0 commit comments

Comments
 (0)