From bd2ea84028e172d245b49952fd6dc3ea9eb20c6f Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Tue, 3 Mar 2026 15:09:40 -0400 Subject: [PATCH] [WIP] Fix subpar caching usage during ingestion Signed-off-by: Juan Cruz Viotti --- test/cli/CMakeLists.txt | 2 + test/cli/index/common/rebuild-modify-cache.sh | 104 +++++++++ test/cli/index/common/rebuild-two-to-three.sh | 204 ++++++++++++++++++ 3 files changed, 310 insertions(+) create mode 100755 test/cli/index/common/rebuild-modify-cache.sh create mode 100755 test/cli/index/common/rebuild-two-to-three.sh diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index 0cf210ae..ec9c729b 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -33,6 +33,8 @@ if(ONE_INDEX) sourcemeta_one_test_cli(common index extra-files-on-rebuild) sourcemeta_one_test_cli(common index extra-directories-on-rebuild) sourcemeta_one_test_cli(common index directory-schema-same-name) + sourcemeta_one_test_cli(common index rebuild-two-to-three) + sourcemeta_one_test_cli(common index rebuild-modify-cache) sourcemeta_one_test_cli(common index rebuild-cache) sourcemeta_one_test_cli(common index rebuild-to-empty) sourcemeta_one_test_cli(common index verbose-long) diff --git a/test/cli/index/common/rebuild-modify-cache.sh b/test/cli/index/common/rebuild-modify-cache.sh new file mode 100755 index 00000000..41cf74bf --- /dev/null +++ b/test/cli/index/common/rebuild-modify-cache.sh @@ -0,0 +1,104 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta.com/", + "contents": { + "example": { + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } + } + } +} +EOF + +mkdir "$TMP/schemas" + +cat << 'EOF' > "$TMP/schemas/a.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/a" +} +EOF + +remove_threads_information() { + expr='s/ \[[^]]*[^a-z-][^]]*\]//g' + if [ "$(uname -s)" = "Darwin" ]; then + sed -i '' "$expr" "$1" + else + sed -i "$expr" "$1" + fi +} + +# Run 1: index one schema from scratch +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 + +# Run 2: modify the schema and re-index +cat << 'EOF' > "$TMP/schemas/a.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/a", + "type": "string" +} +EOF +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 + +# Run 3: re-index with no changes. Everything should be fully cached. +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 2> "$TMP/output.txt" +remove_threads_information "$TMP/output.txt" + +cat << EOF > "$TMP/expected.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/a.json (#1) +(100%) Resolving: a.json +(100%) Ingesting: https://sourcemeta.com/example/schemas/a +(skip) Ingesting: https://sourcemeta.com/example/schemas/a [materialise] +(100%) Analysing: https://sourcemeta.com/example/schemas/a +(skip) Analysing: https://sourcemeta.com/example/schemas/a [positions] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [locations] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [dependencies] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [stats] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [health] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [bundle] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [editor] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [blaze-exhaustive] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [blaze-fast] +(skip) Analysing: https://sourcemeta.com/example/schemas/a [metadata] +( 50%) Reviewing: schemas +(100%) Reviewing: schemas +(skip) Reviewing: schemas [dependencies] +(100%) Reworking: https://sourcemeta.com/example/schemas/a +(skip) Reworking: https://sourcemeta.com/example/schemas/a [dependents] +( 0%) Producing: explorer +(skip) Producing: explorer [search] +( 33%) Producing: example/schemas +(skip) Producing: example/schemas [directory] +( 66%) Producing: example +(skip) Producing: example [directory] +(100%) Producing: . +(skip) Producing: . [directory] +( 25%) Rendering: example/schemas +(skip) Rendering: example/schemas [directory] +( 50%) Rendering: example +(skip) Rendering: example [directory] +( 75%) Rendering: . +(skip) Rendering: . [index] +(skip) Rendering: . [not-found] +(100%) Rendering: example/schemas/a +(skip) Rendering: example/schemas/a [schema] +(skip) Producing: routes.bin [routes] +EOF + +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/cli/index/common/rebuild-two-to-three.sh b/test/cli/index/common/rebuild-two-to-three.sh new file mode 100755 index 00000000..208a1a74 --- /dev/null +++ b/test/cli/index/common/rebuild-two-to-three.sh @@ -0,0 +1,204 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta.com/", + "contents": { + "example": { + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } + } + } +} +EOF + +mkdir "$TMP/schemas" + +cat << 'EOF' > "$TMP/schemas/a.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/a" +} +EOF + +cat << 'EOF' > "$TMP/schemas/b.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/b" +} +EOF + +remove_threads_information() { + expr='s/ \[[^]]*[^a-z-][^]]*\]//g' + if [ "$(uname -s)" = "Darwin" ]; then + sed -i '' "$expr" "$1" + else + sed -i "$expr" "$1" + fi +} + +# Run 1: index two schemas from scratch +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 + +# Run 2: add a third schema and re-index +cat << 'EOF' > "$TMP/schemas/c.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/c" +} +EOF +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 + +# Run 3: re-index with no changes. All three schemas should be fully cached. +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 2> "$TMP/output.txt" +remove_threads_information "$TMP/output.txt" +grep '(skip) Ingesting:' "$TMP/output.txt" | sort > "$TMP/ingest_actual.txt" + +cat << 'EOF' | sort > "$TMP/ingest_expected.txt" +(skip) Ingesting: https://sourcemeta.com/example/schemas/a [materialise] +(skip) Ingesting: https://sourcemeta.com/example/schemas/b [materialise] +(skip) Ingesting: https://sourcemeta.com/example/schemas/c [materialise] +EOF + +diff "$TMP/ingest_actual.txt" "$TMP/ingest_expected.txt" + +cd "$TMP/output" +find . -mindepth 1 | LC_ALL=C sort > "$TMP/manifest.txt" +cd - > /dev/null + +cat << 'EOF' > "$TMP/expected_manifest.txt" +./configuration.json +./dependency-tree.metapack +./dependency-tree.metapack.deps +./explorer +./explorer/% +./explorer/%/404.metapack +./explorer/%/404.metapack.deps +./explorer/%/directory-html.metapack +./explorer/%/directory-html.metapack.deps +./explorer/%/directory.metapack +./explorer/%/directory.metapack.deps +./explorer/%/search.metapack +./explorer/%/search.metapack.deps +./explorer/example +./explorer/example/% +./explorer/example/%/directory-html.metapack +./explorer/example/%/directory-html.metapack.deps +./explorer/example/%/directory.metapack +./explorer/example/%/directory.metapack.deps +./explorer/example/schemas +./explorer/example/schemas/% +./explorer/example/schemas/%/directory-html.metapack +./explorer/example/schemas/%/directory-html.metapack.deps +./explorer/example/schemas/%/directory.metapack +./explorer/example/schemas/%/directory.metapack.deps +./explorer/example/schemas/a +./explorer/example/schemas/a/% +./explorer/example/schemas/a/%/schema-html.metapack +./explorer/example/schemas/a/%/schema-html.metapack.deps +./explorer/example/schemas/a/%/schema.metapack +./explorer/example/schemas/a/%/schema.metapack.deps +./explorer/example/schemas/b +./explorer/example/schemas/b/% +./explorer/example/schemas/b/%/schema-html.metapack +./explorer/example/schemas/b/%/schema-html.metapack.deps +./explorer/example/schemas/b/%/schema.metapack +./explorer/example/schemas/b/%/schema.metapack.deps +./explorer/example/schemas/c +./explorer/example/schemas/c/% +./explorer/example/schemas/c/%/schema-html.metapack +./explorer/example/schemas/c/%/schema-html.metapack.deps +./explorer/example/schemas/c/%/schema.metapack +./explorer/example/schemas/c/%/schema.metapack.deps +./routes.bin +./routes.bin.deps +./schemas +./schemas/example +./schemas/example/schemas +./schemas/example/schemas/a +./schemas/example/schemas/a/% +./schemas/example/schemas/a/%/blaze-exhaustive.metapack +./schemas/example/schemas/a/%/blaze-exhaustive.metapack.deps +./schemas/example/schemas/a/%/blaze-fast.metapack +./schemas/example/schemas/a/%/blaze-fast.metapack.deps +./schemas/example/schemas/a/%/bundle.metapack +./schemas/example/schemas/a/%/bundle.metapack.deps +./schemas/example/schemas/a/%/dependencies.metapack +./schemas/example/schemas/a/%/dependencies.metapack.deps +./schemas/example/schemas/a/%/dependents.metapack +./schemas/example/schemas/a/%/dependents.metapack.deps +./schemas/example/schemas/a/%/editor.metapack +./schemas/example/schemas/a/%/editor.metapack.deps +./schemas/example/schemas/a/%/health.metapack +./schemas/example/schemas/a/%/health.metapack.deps +./schemas/example/schemas/a/%/locations.metapack +./schemas/example/schemas/a/%/locations.metapack.deps +./schemas/example/schemas/a/%/positions.metapack +./schemas/example/schemas/a/%/positions.metapack.deps +./schemas/example/schemas/a/%/schema.metapack +./schemas/example/schemas/a/%/schema.metapack.deps +./schemas/example/schemas/a/%/stats.metapack +./schemas/example/schemas/a/%/stats.metapack.deps +./schemas/example/schemas/b +./schemas/example/schemas/b/% +./schemas/example/schemas/b/%/blaze-exhaustive.metapack +./schemas/example/schemas/b/%/blaze-exhaustive.metapack.deps +./schemas/example/schemas/b/%/blaze-fast.metapack +./schemas/example/schemas/b/%/blaze-fast.metapack.deps +./schemas/example/schemas/b/%/bundle.metapack +./schemas/example/schemas/b/%/bundle.metapack.deps +./schemas/example/schemas/b/%/dependencies.metapack +./schemas/example/schemas/b/%/dependencies.metapack.deps +./schemas/example/schemas/b/%/dependents.metapack +./schemas/example/schemas/b/%/dependents.metapack.deps +./schemas/example/schemas/b/%/editor.metapack +./schemas/example/schemas/b/%/editor.metapack.deps +./schemas/example/schemas/b/%/health.metapack +./schemas/example/schemas/b/%/health.metapack.deps +./schemas/example/schemas/b/%/locations.metapack +./schemas/example/schemas/b/%/locations.metapack.deps +./schemas/example/schemas/b/%/positions.metapack +./schemas/example/schemas/b/%/positions.metapack.deps +./schemas/example/schemas/b/%/schema.metapack +./schemas/example/schemas/b/%/schema.metapack.deps +./schemas/example/schemas/b/%/stats.metapack +./schemas/example/schemas/b/%/stats.metapack.deps +./schemas/example/schemas/c +./schemas/example/schemas/c/% +./schemas/example/schemas/c/%/blaze-exhaustive.metapack +./schemas/example/schemas/c/%/blaze-exhaustive.metapack.deps +./schemas/example/schemas/c/%/blaze-fast.metapack +./schemas/example/schemas/c/%/blaze-fast.metapack.deps +./schemas/example/schemas/c/%/bundle.metapack +./schemas/example/schemas/c/%/bundle.metapack.deps +./schemas/example/schemas/c/%/dependencies.metapack +./schemas/example/schemas/c/%/dependencies.metapack.deps +./schemas/example/schemas/c/%/dependents.metapack +./schemas/example/schemas/c/%/dependents.metapack.deps +./schemas/example/schemas/c/%/editor.metapack +./schemas/example/schemas/c/%/editor.metapack.deps +./schemas/example/schemas/c/%/health.metapack +./schemas/example/schemas/c/%/health.metapack.deps +./schemas/example/schemas/c/%/locations.metapack +./schemas/example/schemas/c/%/locations.metapack.deps +./schemas/example/schemas/c/%/positions.metapack +./schemas/example/schemas/c/%/positions.metapack.deps +./schemas/example/schemas/c/%/schema.metapack +./schemas/example/schemas/c/%/schema.metapack.deps +./schemas/example/schemas/c/%/stats.metapack +./schemas/example/schemas/c/%/stats.metapack.deps +./version.json +EOF + +diff "$TMP/manifest.txt" "$TMP/expected_manifest.txt"