diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8c75e3..dc41271 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: id-token: "write" contents: "read" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install Nix uses: DeterminateSystems/determinate-nix-action@main @@ -28,10 +28,11 @@ jobs: run: nix --accept-flake-config profile add nixpkgs#omnix - name: Configure permissions - run: sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 + run: sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 + - name: Run CI - run: om ci run --include-all-dependencies - + run: om ci run + - name: Collect artifacts if: ${{ success() }} run: | @@ -46,16 +47,3 @@ jobs: path: artifacts/ retention-days: 7 - - name: Create release archive - if: ${{ success() && github.ref_type == 'tag' && github.ref == 'refs/heads/main' }} - run: | - cd release-artifacts - tar czf ../zigmark-${{ github.ref_name }}.tar.gz bin/ lib/ docs/ - cd .. - - - name: Create GitHub Release - if: ${{ success() && github.ref_type == 'tag' && github.ref == 'refs/heads/main' }} - uses: softprops/action-gh-release@v2 - with: - files: zigmark-${{ github.ref_name }}.tar.gz - generate_release_notes: true \ No newline at end of file diff --git a/.github/workflows/flakehub-publish-tagged.yml b/.github/workflows/flakehub-publish-tagged.yml deleted file mode 100644 index 61f90e5..0000000 --- a/.github/workflows/flakehub-publish-tagged.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: "Publish tags to FlakeHub" -on: - push: - tags: - - "v?[0-9]+.[0-9]+.[0-9]+*" - workflow_dispatch: - inputs: - tag: - description: "The existing tag to publish to FlakeHub" - type: "string" - required: true -jobs: - flakehub-publish: - runs-on: "ubuntu-latest" - permissions: - id-token: "write" - contents: "read" - steps: - - uses: "actions/checkout@v6" - with: - persist-credentials: false - ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}" - - uses: "DeterminateSystems/determinate-nix-action@v3" - - uses: "DeterminateSystems/flakehub-push@main" - with: - visibility: "unlisted" - name: "sc2in/zigmark" - tag: "${{ inputs.tag }}" - include-output-paths: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..1f7b047 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,104 @@ +name: Release + +on: + push: + tags: + - "v?[0-9]+.[0-9]+.[0-9]+*" + workflow_dispatch: + inputs: + tag: + description: "Existing tag to re-publish to FlakeHub" + type: string + required: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + system: x86_64-linux + - os: ubuntu-24.04-arm + system: aarch64-linux + + runs-on: ${{ matrix.os }} + permissions: + id-token: write + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Install Nix + uses: DeterminateSystems/determinate-nix-action@main + + - name: Setup Nix cache + uses: DeterminateSystems/flakehub-cache-action@main + with: + use-gha-cache: "enabled" + flakehub-flake-name: "sc2in/ZigMark" + + - name: Build + run: nix build + + - name: Package + run: | + ARCHIVE="zigmark-${{ github.ref_name }}-${{ matrix.system }}.tar.gz" + tar czf "$ARCHIVE" --dereference \ + -C result \ + bin \ + lib \ + include + echo "ARCHIVE=$ARCHIVE" >> "$GITHUB_ENV" + + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.system }} + path: ${{ env.ARCHIVE }} + retention-days: 1 + + release: + needs: build + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + + - name: Generate checksums + run: | + cd dist + sha256sum *.tar.gz > checksums.txt + + - uses: softprops/action-gh-release@v2 + with: + files: | + dist/*.tar.gz + dist/checksums.txt + generate_release_notes: true + + flakehub-publish: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}" + + - uses: DeterminateSystems/determinate-nix-action@v3 + + - uses: DeterminateSystems/flakehub-push@main + with: + visibility: unlisted + name: sc2in/zigmark + tag: "${{ inputs.tag }}" + include-output-paths: true diff --git a/README.md b/README.md index 868c91e..ea14f51 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,22 @@ A CommonMark-compliant Markdown parser and HTML renderer for Zig. Passes **all 6 Builds as both a **CLI tool** and a **C-callable shared library** (`libzigmark.so`). +## Performance + + +_Last updated: 2026-03-18_ + +| Command | Mean [ms] | Min [ms] | Max [ms] | Relative | +|:---|---:|---:|---:|---:| +| `zigmark (ReleaseSafe)` | 2.7 ± 1.3 | 1.5 | 14.1 | 1.40 ± 1.10 | +| `zigmark (ReleaseSmall)` | 2.7 ± 1.1 | 1.5 | 9.3 | 1.41 ± 1.06 | +| `zigmark (ReleaseFast)` | 2.4 ± 1.4 | 1.4 | 15.1 | 1.29 ± 1.07 | +| `discount` | 2.0 ± 0.9 | 1.1 | 9.8 | 1.07 ± 0.82 | +| `lowdown` | 1.9 ± 1.2 | 1.0 | 9.7 | 1.00 | +| `pandoc` | 150.2 ± 14.3 | 118.0 | 245.8 | 79.09 ± 49.62 | + + + ## Installation Add `zigmark` as a dependency in your `build.zig.zon`: @@ -67,15 +83,63 @@ zigmark -f ai README.md Produces a token-efficient AST representation suitable for LLM consumption. +### Extract Frontmatter as JSON + +```bash +zigmark -f frontmatter post.md +``` + +Parses the frontmatter block (YAML `---`, TOML `+++`, JSON `{`, or ZON `.{`) and +emits it as pretty-printed JSON. Outputs `{}` when no frontmatter is present, +so the output is always valid JSON and safe to pipe. + +```bash +# Pipe into jq +zigmark -f frontmatter post.md | jq '.title' + +# Extract a nested key +zigmark -f frontmatter post.md | jq '.extra.author' +``` + +### Edit Frontmatter + +`--format markdown` re-serialises the frontmatter (in its original format) and +passes the body through verbatim. Use `--set` and `--delete` to mutate fields +before writing: + +```bash +# Update a field and delete another, keep body unchanged +zigmark -f markdown --set title="New Title" --delete draft post.md + +# Set a nested key (intermediate objects are created automatically) +zigmark -f markdown --set extra.owner=SC2 post.md + +# Pipe the result back over the original file +zigmark -f markdown --set date=2025-06-01 post.md -o post.md +``` + +`--format normalize` does the same frontmatter handling but also reconstructs +the Markdown body from the AST, normalising headings to ATX style, links to +inline, and code blocks to fenced: + +```bash +zigmark -f normalize --set title="Clean" post.md +``` + ### Options ``` Usage: zigmark [OPTIONS] [FILE] - -h, --help Display this help and exit. - -v, --version Print version and exit. - -f, --format Output format: "html" (default), "ast", or "ai". - -o, --output Write output to FILE instead of stdout. + -h, --help Display this help and exit. + -v, --version Print version and exit. + -f, --format Output format: "html" (default), "ast", "ai", + "terminal", "frontmatter", "markdown", or "normalize". + -o, --output Write output to FILE instead of stdout. + -s, --set ... Set a frontmatter field (KEY=VALUE). Repeatable. + Applies to: markdown, normalize, frontmatter formats. + -d, --delete ... Delete a frontmatter field (dot-path). Repeatable. + Applies to: markdown, normalize, frontmatter formats. ``` ## Zig Library Usage @@ -129,6 +193,70 @@ var links = try query.links(allocator); const para_count = query.count(.paragraph); ``` +### Frontmatter + +Extract and query structured metadata from the top of a Markdown file. All four formats are normalised to `std.json.Value` for uniform access. + +| Format | Opening marker | Example | +|--------|---------------|---------| +| YAML | `---` | `--- \ntitle: Hello\n---` | +| TOML | `+++` | `+++\ntitle = "Hello"\n+++` | +| JSON | `{` | `{"title": "Hello"}` | +| ZON | `.{` | `.{ .title = "Hello" }` | + +```zig +const FrontMatter = zigmark.FrontMatter; + +// Parse from a full Markdown document (auto-detects format) +var fm = try FrontMatter.initFromMarkdown(allocator, markdown_source); +defer fm.deinit(); + +// Dot-separated key lookup — returns ?std.json.Value +const title = fm.get("title"); // top-level key +const host = fm.get("server.host"); // nested key +const first = fm.get("tags"); // array → .array variant + +if (title) |t| std.debug.print("title: {s}\n", .{t.string}); + +// Or parse a bare frontmatter string directly +var fm2 = try FrontMatter.init(allocator, source, .toml); +defer fm2.deinit(); + +// Mutate: set a value at a dot-separated path (creates intermediates as needed) +try fm.set("title", .{ .string = "New Title" }); +try fm.set("extra.owner", .{ .string = "SC2" }); +try fm.set("draft", .{ .bool = false }); + +// Delete a key +_ = fm.delete("draft"); + +// Deep-merge another frontmatter document (overlay keys win on conflict) +var overlay = try FrontMatter.initFromMarkdown(allocator, other_source); +defer overlay.deinit(); +try fm.merge(overlay); + +// Re-serialise in the original format (YAML/TOML/JSON/ZON) +const serialized = try fm.serialize(allocator); +defer allocator.free(serialized); +``` + +ZON frontmatter supports the full frontmatter subset: anonymous structs, array tuples, strings (with escape sequences), integers (decimal / hex / octal / binary), floats, booleans, `null`, and enum literals (returned as strings). + +```zig +// ZON example +const source = + \\.{ + \\ .title = "My Post", + \\ .tags = .{ "zig", "wasm" }, + \\ .draft = false, + \\ .weight = 10, + \\ .status = .published, // enum literal → "published" + \\} +; +var fm = try FrontMatter.init(allocator, source, .zon); +defer fm.deinit(); +``` + ### Custom Renderers The renderer interface is pluggable — implement a `render(Allocator, AST.Document) ![]u8` function: @@ -147,15 +275,25 @@ The build produces `libzigmark.so` and `include/zigmark.h` — a self-contained ```c #include "zigmark.h" -ZigmarkDocument *zigmark_parse(const char *input, size_t len); -void zigmark_free_document(ZigmarkDocument *doc); - -char *zigmark_render_html(ZigmarkDocument *doc); -char *zigmark_render_ast(ZigmarkDocument *doc); -char *zigmark_render_ai(ZigmarkDocument *doc); -void zigmark_free_string(char *str); - -const char *zigmark_version(void); +ZigmarkDocument *zigmark_parse(const char *input, size_t len); +void zigmark_free_document(ZigmarkDocument *doc); + +char *zigmark_render_html(ZigmarkDocument *doc); +char *zigmark_render_ast(ZigmarkDocument *doc); +char *zigmark_render_ai(ZigmarkDocument *doc); +void zigmark_free_string(char *str); + +const char *zigmark_version(void); + +/* Frontmatter */ +ZigmarkFrontmatter *zigmark_frontmatter_parse(const char *input, size_t len); +void zigmark_frontmatter_free(ZigmarkFrontmatter *fm); +char *zigmark_frontmatter_to_json(ZigmarkFrontmatter *fm); +char *zigmark_frontmatter_get(ZigmarkFrontmatter *fm, const char *key); +char *zigmark_frontmatter_serialize(ZigmarkFrontmatter *fm); +int zigmark_frontmatter_merge(ZigmarkFrontmatter *base, ZigmarkFrontmatter *overlay); +int zigmark_frontmatter_set(ZigmarkFrontmatter *fm, const char *path, const char *json_value); +int zigmark_frontmatter_set_raw(ZigmarkFrontmatter *fm, const char *path, const char *raw); ``` ### Example @@ -265,7 +403,7 @@ Run the GFM suite with `zig build gfm`. ### Extensions -- **Frontmatter** — YAML (`---`) and TOML (`+++`) extraction, parsed as JSON +- **Frontmatter** — YAML (`---`), TOML (`+++`), JSON (`{`), and ZON (`.{`) extraction, all normalised to `std.json.Value` - **Footnotes** — `[^label]` references and definitions - **GFM Tables** — pipe-delimited tables with optional column alignment - **GFM Task lists** — `- [x]` / `- [ ]` items rendered as disabled checkboxes @@ -351,6 +489,9 @@ nix develop # WASM live preview demo nix run .#wasm-demo + +# Run CLI performance benchmark (compares zigmark vs cmark, updates README) +nix run .#bench ``` Requires **Zig 0.15.2** or later. @@ -362,13 +503,14 @@ Requires **Zig 0.15.2** or later. - **`HTMLRenderer`** — CommonMark-compliant HTML serialiser - **`ASTRenderer`** — Human-readable tree diagram with box-drawing characters - **`AIRenderer`** — Token-efficient AST representation for LLM consumption +- **`MarkdownRenderer`** — AST→Markdown normaliser; converts headings to ATX, links to inline, code blocks to fenced - **`Renderer`** — Type-erased vtable interface for pluggable output backends -- **`Frontmatter`** — YAML/TOML metadata extraction via [zig-yaml](https://github.com/kubkon/zig-yaml) and [tomlz](https://github.com/tsunaminoai/tomlz) +- **`Frontmatter`** — YAML/TOML/JSON/ZON metadata extraction, mutation (`set`, `delete`, `merge`), and re-serialisation; YAML via [zig-yaml](https://github.com/kubkon/zig-yaml), TOML via [tomlz](https://github.com/tsunaminoai/tomlz), JSON via `std.json`, ZON via a built-in recursive-descent parser - **C ABI** — Opaque-pointer API in `root.zig` exported as `libzigmark.so` ## Future Plans -- Additional renderers (LaTeX, plain text, Markdown normaliser) +- Additional renderers (LaTeX, plain text) - Streaming parser for large documents - AST modification API diff --git a/build.zig b/build.zig index 037b457..eb610b9 100644 --- a/build.zig +++ b/build.zig @@ -65,9 +65,20 @@ pub fn build(b: *std.Build) void { zigmark.addImport("mecha", mecha.module("mecha")); zigmark.addImport("dt", dt.module("datetime")); - // The shared library needs its own module instance so the exe doesn't - // get implicitly linked against the .so (which causes TLS / undefined - // symbol errors in ReleaseSafe). + // The shared library needs its own module instance. When the exe and .so + // share a module, lld rejects the build because the .so's PIC TLS access + // calls __tls_get_addr (provided by glibc) but the exe is fully static. + // Fixing that by adding linkLibC() compiles the module into the exe AND + // makes it dynamically linked against libc — but the .so is still never + // actually loaded at runtime because all zigmark symbols are already + // satisfied by the exe's own compiled copy. Net result: larger exe with + // a libc dependency and no size saving. + // + // The root cause is that Zig's module system always compiles code inline; + // there is no "header-only import" concept. The exe must either (a) keep + // the module compiled in (current approach, self-contained 4 MB binary) or + // (b) be rewritten to use extern C API declarations so @import("zigmark") + // is never used. (a) is the right trade-off for a CLI tool. const zigmark_lib = b.addModule("zigmark_lib", .{ .root_source_file = b.path("src/root.zig"), .target = target, @@ -228,8 +239,14 @@ pub fn build(b: *std.Build) void { } // ── spec runs wired into zig build test ─────────────────────────────────── - // Reuse the same spec_step pathway (summary mode now exits 1 on any failure). - test_step.dependOn(spec_step); + // Use --quiet: silent on full pass; dumps the table only on a regression. + // The verbose summary table is reserved for `zig build spec`. + const spec_check_cmark = b.addRunArtifact(spec_exe); + spec_check_cmark.addArgs(&.{ "--quiet", "--spec", spec_txt_path.getPath(b) }); + const spec_check_gfm = b.addRunArtifact(spec_exe); + spec_check_gfm.addArgs(&.{ "--quiet", "--gfm", "--spec", gfm_spec_txt_path.getPath(b) }); + spec_check_gfm.step.dependOn(&spec_check_cmark.step); + test_step.dependOn(&spec_check_gfm.step); // ── WASM build ─────────────────────────────────────────────────────────── diff --git a/flake.nix b/flake.nix index ceb8a19..b1f12c8 100644 --- a/flake.nix +++ b/flake.nix @@ -136,6 +136,94 @@ ${pkgs.python3}/bin/python3 -m http.server "$PORT" -d zig-out/wasm ''}"; }; + + # CLI performance benchmark vs cmark: nix run .#bench + # + # Builds zigmark (ReleaseFast), then runs hyperfine comparing zigmark + # and cmark on the CommonMark spec file. Results are written back into + # the README.md section. + bench = let + bench-app = pkgs.writeShellApplication { + name = "zigmark-bench"; + runtimeInputs = with pkgs; [hyperfine pandoc discount lowdown python3 zig]; + text = '' + set -euo pipefail + REPO="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + cd "$REPO" + + # Use the CommonMark spec as a large representative input. + # Fall back to README.md if the dependency cache is unavailable. + SPEC_TXT="$(find "$REPO/.zig-cache" -name "spec.txt" -path "*/commonmark_spec*" 2>/dev/null | head -1 || true)" + if [ -z "$SPEC_TXT" ]; then + echo " spec.txt not cached yet — using README.md as benchmark input" + BENCH_FILE="$REPO/README.md" + else + BENCH_FILE="$SPEC_TXT" + fi + echo " Benchmark file: $BENCH_FILE ($(wc -c < "$BENCH_FILE") bytes)" + + echo "▸ Building zigmark (ReleaseSafe, ReleaseSmall, ReleaseFast)…" + zig build -Doptimize=ReleaseSafe && cp zig-out/bin/zigmark /tmp/zigmark-safe + zig build -Doptimize=ReleaseSmall && cp zig-out/bin/zigmark /tmp/zigmark-small + zig build -Doptimize=ReleaseFast && cp zig-out/bin/zigmark /tmp/zigmark-fast + + RESULT_MD=$(mktemp /tmp/bench-result-XXXXXX.md) + trap 'rm -f "$RESULT_MD" /tmp/zigmark-safe /tmp/zigmark-small /tmp/zigmark-fast' EXIT + + echo "▸ Running hyperfine…" + hyperfine \ + -N \ + --warmup 50 \ + --runs 500 \ + --export-markdown "$RESULT_MD" \ + --command-name "zigmark (ReleaseSafe)" "/tmp/zigmark-safe -o /dev/null $BENCH_FILE" \ + --command-name "zigmark (ReleaseSmall)" "/tmp/zigmark-small -o /dev/null $BENCH_FILE" \ + --command-name "zigmark (ReleaseFast)" "/tmp/zigmark-fast -o /dev/null $BENCH_FILE" \ + --command-name "discount" "markdown -o /dev/null $BENCH_FILE" \ + --command-name "lowdown" "lowdown -o /dev/null $BENCH_FILE" \ + --command-name "pandoc" "pandoc -o /dev/null $BENCH_FILE" + + echo "" + echo "▸ Updating README.md…" + python3 - "$REPO/README.md" "$RESULT_MD" <<'PYEOF' + import re, sys, pathlib, datetime + + readme_path = pathlib.Path(sys.argv[1]) + bench_path = pathlib.Path(sys.argv[2]) + + bench_md = bench_path.read_text() + readme = readme_path.read_text() + + today = datetime.date.today().isoformat() + new_section = ( + f"\n" + f"_Last updated: {today}_\n\n" + f"{bench_md}\n" + f"" + ) + + updated = re.sub( + r".*?", + new_section, + readme, + flags=re.DOTALL, + ) + + if updated == readme: + print(" WARNING: bench markers not found in README.md — appending.") + updated = readme.rstrip() + "\n\n" + new_section + "\n" + + readme_path.write_text(updated) + print(f" README.md updated.") + PYEOF + + echo "✓ Done. Benchmark results written to README.md." + ''; + }; + in { + type = "app"; + program = "${bench-app}/bin/zigmark-bench"; + }; } ); }; diff --git a/include/zigmark.h b/include/zigmark.h index 3324364..f420c6d 100644 --- a/include/zigmark.h +++ b/include/zigmark.h @@ -67,6 +67,125 @@ void zigmark_free_string(char *str); */ const char *zigmark_version(void); +/* ── Frontmatter ──────────────────────────────────────────────────────────── */ + +/** Opaque handle to parsed frontmatter metadata. */ +typedef struct ZigmarkFrontmatter ZigmarkFrontmatter; + +/** + * Parse frontmatter from a UTF-8 Markdown buffer of @p len bytes. + * + * Auto-detects the format: + * - YAML — opening @c --- + * - TOML — opening @c +++ + * - JSON — opening @c { + * - ZON — opening @c .{ + * + * @param input Pointer to the Markdown source (need not be NUL-terminated). + * @param len Length of the input in bytes. + * @return An opaque frontmatter handle, or NULL if no valid frontmatter + * is present or on parse / allocation failure. + * Free with zigmark_frontmatter_free(). + */ +ZigmarkFrontmatter *zigmark_frontmatter_parse(const char *input, size_t len); + +/** + * Free a frontmatter handle previously returned by zigmark_frontmatter_parse(). + */ +void zigmark_frontmatter_free(ZigmarkFrontmatter *fm); + +/** + * Serialize the entire frontmatter to a pretty-printed JSON string. + * + * @return A NUL-terminated JSON string, or NULL on failure. + * Free with zigmark_free_string(). + */ +char *zigmark_frontmatter_to_json(ZigmarkFrontmatter *fm); + +/** + * Look up a dot-separated key path in the frontmatter and return its value + * as a compact JSON string. + * + * Examples: @c "title", @c "extra.author", @c "tags" + * + * @param fm A handle returned by zigmark_frontmatter_parse(). + * @param key A NUL-terminated dot-separated key path. + * @return A NUL-terminated JSON string for the value, or NULL if the key + * is not found or on failure. + * Free with zigmark_free_string(). + */ +char *zigmark_frontmatter_get(ZigmarkFrontmatter *fm, const char *key); + +/** + * Serialize the frontmatter back to its original format, including delimiters. + * + * The output reflects the current state of the parsed value tree, so any + * programmatic modifications are included. The format matches whatever was + * originally detected at parse time: + * + * | Format | Delimiters | Example output | + * |--------|------------|-----------------------------| + * | YAML | `---` | `---\ntitle: Hello\n---\n` | + * | TOML | `+++` | `+++\ntitle = "Hello"\n+++\n`| + * | JSON | none | `{\n "title": "Hello"\n}\n` | + * | ZON | none | `.{ .title = "Hello" }\n` | + * + * @param fm A handle returned by zigmark_frontmatter_parse(). + * @return A NUL-terminated string in the original format, or NULL on + * failure. Free with zigmark_free_string(). + */ +char *zigmark_frontmatter_serialize(ZigmarkFrontmatter *fm); + +/** + * Deep-merge @p overlay into @p base (overlay keys win for leaf conflicts). + * + * The merge is recursive for object values: keys present only in the overlay + * are added to the base; keys present in both are overwritten (scalar) or + * recursively merged (object). The base retains its original format. + * + * @param base The document to merge into; modified in place. + * @param overlay The document to merge from; left unmodified. + * @return 0 on success, -1 on failure. + */ +int zigmark_frontmatter_merge(ZigmarkFrontmatter *base, + const ZigmarkFrontmatter *overlay); + +/** + * Set a value at a dot-separated key path using a JSON-encoded value string. + * + * Intermediate objects that do not exist are created automatically. + * + * @param fm A handle returned by zigmark_frontmatter_parse(). + * @param path NUL-terminated dot-separated key path (e.g. @c "extra.owner"). + * @param json_value NUL-terminated compact JSON string for the new value + * (e.g. @c "\"hello\"", @c "42", @c "true", @c "[1,2]"). + * @return 0 on success, -1 on failure (bad JSON, OOM, or bad path). + */ +int zigmark_frontmatter_set(ZigmarkFrontmatter *fm, + const char *path, + const char *json_value); + +/** + * Set a value at a dot-separated key path using an auto-typed raw string. + * + * Type inference rules (applied in order): + * - @c "true" / @c "false" → boolean + * - @c "null" → null + * - Valid integer literal (no @c .) → integer + * - Valid float literal → float + * - Everything else → string + * + * Intermediate objects that do not exist are created automatically. + * + * @param fm A handle returned by zigmark_frontmatter_parse(). + * @param path NUL-terminated dot-separated key path. + * @param raw NUL-terminated raw value string. + * @return 0 on success, -1 on failure. + */ +int zigmark_frontmatter_set_raw(ZigmarkFrontmatter *fm, + const char *path, + const char *raw); + #ifdef __cplusplus } #endif diff --git a/src/main.zig b/src/main.zig index a8294b7..aa9d5c7 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,5 +1,12 @@ const std = @import("std"); +pub const std_options: std.Options = .{ + .log_scope_levels = &.{ + .{ .scope = .tokenizer, .level = .warn }, + .{ .scope = .parser, .level = .warn }, + }, +}; + const clap = @import("clap"); const zigmark = @import("zigmark"); const AST = zigmark.AST; @@ -9,17 +16,19 @@ pub fn main() !void { var gpa_impl: std.heap.GeneralPurposeAllocator(.{}) = .{}; defer _ = gpa_impl.deinit(); const gpa = gpa_impl.allocator(); - - // All allocations are tracked by the GPA; doc.deinit() frees - // everything the parser and renderers allocate. const alloc = gpa; // ── CLI definition ─────────────────────────────────────────────────────── const params = comptime clap.parseParamsComptime( \\-h, --help Display this help and exit. \\-v, --version Print version and exit. - \\-f, --format Output format: "html" (default), "ast", "ai", or "terminal". + \\-f, --format Output format: "html" (default), "ast", "ai", "terminal", + \\ "frontmatter", "markdown", or "normalize". \\-o, --output Write output to FILE instead of stdout. + \\-s, --set ... Set a frontmatter field (KEY=VALUE). Repeatable. + \\ Applies to: markdown, normalize, frontmatter formats. + \\-d, --delete ... Delete a frontmatter field (dot-path). Repeatable. + \\ Applies to: markdown, normalize, frontmatter formats. \\ Input markdown file (reads stdin if omitted). \\ ); @@ -53,6 +62,18 @@ pub fn main() !void { \\ \\Usage: zigmark [OPTIONS] [FILE] \\ + \\Formats: + \\ html CommonMark-compliant HTML (default) + \\ ast Human-readable AST tree diagram + \\ ai Token-efficient AI representation + \\ terminal ANSI-styled terminal output + \\ frontmatter Print parsed frontmatter as JSON + \\ markdown Passthrough: re-serialize frontmatter in its original + \\ format and pass the body through verbatim. Useful for + \\ frontmatter editing without touching the body. + \\ normalize Reconstruct normalized Markdown from the AST. Converts + \\ headings to ATX, links to inline, code blocks to fenced. + \\ \\{s} , .{ version, help_writer.buffered() }); return; @@ -89,21 +110,113 @@ pub fn main() !void { // ── Resolve format ─────────────────────────────────────────────────────── const format: []const u8 = if (res.args.format) |f| f else "html"; - // ── Parse ──────────────────────────────────────────────────────────────── + // ── Output ─────────────────────────────────────────────────────────────── + const out_file = getOutputFile(res.args.output); + defer closeOutput(res.args.output, out_file); + var out_buf: [8192]u8 = undefined; + var writer = out_file.writer(&out_buf); + + // ── Frontmatter passthrough ("markdown") ───────────────────────────────── + // Parse frontmatter, apply --set/--delete, re-serialize in original format, + // then pass the body through verbatim. Does NOT re-parse the body. + if (std.mem.eql(u8, format, "markdown")) { + const body_off = zigmark.Frontmatter.bodyOffset(input) orelse 0; + const body = input[body_off..]; + + if (body_off > 0) { + var fm = zigmark.Frontmatter.initFromMarkdown(alloc, input) catch |err| { + std.debug.print("error: failed to parse frontmatter: {}\n", .{err}); + return err; + }; + defer fm.deinit(); + applyFrontmatterMods(&fm, res.args.set, res.args.delete); + const fm_str = fm.serialize(alloc) catch |err| { + std.debug.print("error: failed to serialize frontmatter: {}\n", .{err}); + return err; + }; + defer alloc.free(fm_str); + writer.interface.writeAll(fm_str) catch {}; + // Ensure one blank line between frontmatter and body + if (body.len > 0 and body[0] != '\n') writer.interface.writeByte('\n') catch {}; + } + writer.interface.writeAll(body) catch {}; + writer.interface.flush() catch {}; + return; + } + + // ── Frontmatter JSON dump ("frontmatter") ──────────────────────────────── + if (std.mem.eql(u8, format, "frontmatter")) { + var fm = zigmark.Frontmatter.initFromMarkdown(alloc, input) catch |err| switch (err) { + error.InvalidFrontMatter => { + writer.interface.writeAll("{}\n") catch {}; + writer.interface.flush() catch {}; + return; + }, + else => { + std.debug.print("error: failed to parse frontmatter: {}\n", .{err}); + return err; + }, + }; + defer fm.deinit(); + applyFrontmatterMods(&fm, res.args.set, res.args.delete); + const json_out = std.json.Stringify.valueAlloc(alloc, fm.root, .{ .whitespace = .indent_2 }) catch |err| { + std.debug.print("error: failed to serialize frontmatter to JSON: {}\n", .{err}); + return err; + }; + defer alloc.free(json_out); + writer.interface.writeAll(json_out) catch {}; + writer.interface.writeAll("\n") catch {}; + writer.interface.flush() catch {}; + return; + } + + // ── Parse body (strip frontmatter for body-only formats) ───────────────── + const body_start = zigmark.Frontmatter.bodyOffset(input) orelse 0; + const body_input = input[body_start..]; + var parser = zigmark.Parser.init(); - var doc = parser.parseMarkdown(alloc, input) catch |err| { + var doc = parser.parseMarkdown(alloc, body_input) catch |err| { std.debug.print("error: failed to parse markdown: {}\n", .{err}); return err; }; defer doc.deinit(alloc); - // ── Output ─────────────────────────────────────────────────────────────── - const out_file = getOutputFile(res.args.output); - defer closeOutput(res.args.output, out_file); - var out_buf: [8192]u8 = undefined; - var writer = out_file.writer(&out_buf); + // ── Normalize ("normalize") — full AST→Markdown reconstruction ─────────── + if (std.mem.eql(u8, format, "normalize")) { + // Re-serialize frontmatter if present (with any mods) + if (body_start > 0) { + var fm = zigmark.Frontmatter.initFromMarkdown(alloc, input) catch null; + if (fm) |*f| { + defer f.deinit(); + applyFrontmatterMods(f, res.args.set, res.args.delete); + const fm_str = f.serialize(alloc) catch null; + if (fm_str) |s| { + defer alloc.free(s); + writer.interface.writeAll(s) catch {}; + writer.interface.writeByte('\n') catch {}; + } + } + } + const md = zigmark.MarkdownRenderer.render(alloc, doc) catch |err| { + std.debug.print("error: failed to render normalized markdown: {}\n", .{err}); + return err; + }; + defer alloc.free(md); + writer.interface.writeAll(md) catch {}; + writer.interface.flush() catch {}; + return; + } - if (std.mem.eql(u8, format, "ast")) { + // ── HTML ───────────────────────────────────────────────────────────────── + if (std.mem.eql(u8, format, "html")) { + const h = zigmark.HTMLRenderer.render(alloc, doc) catch |err| { + std.debug.print("error: failed to render HTML: {}\n", .{err}); + return err; + }; + defer alloc.free(h); + writer.interface.writeAll(h) catch {}; + writer.interface.flush() catch {}; + } else if (std.mem.eql(u8, format, "ast")) { const ast_output = zigmark.ASTRenderer.render(alloc, doc) catch |err| { std.debug.print("error: failed to render AST: {}\n", .{err}); return err; @@ -111,14 +224,6 @@ pub fn main() !void { defer alloc.free(ast_output); writer.interface.writeAll(ast_output) catch {}; writer.interface.flush() catch {}; - } else if (std.mem.eql(u8, format, "html")) { - const html = zigmark.HTMLRenderer.render(alloc, doc) catch |err| { - std.debug.print("error: failed to render HTML: {}\n", .{err}); - return err; - }; - defer alloc.free(html); - writer.interface.writeAll(html) catch {}; - writer.interface.flush() catch {}; } else if (std.mem.eql(u8, format, "ai")) { const a = zigmark.AIRenderer.render(alloc, doc) catch |err| { std.debug.print("error: failed to render AI AST: {}\n", .{err}); @@ -136,11 +241,30 @@ pub fn main() !void { writer.interface.writeAll(term) catch {}; writer.interface.flush() catch {}; } else { - std.debug.print("error: unknown format '{s}'. Use 'html', 'ast', 'ai', or 'terminal'.\n", .{format}); + std.debug.print( + "error: unknown format '{s}'. Use 'html', 'ast', 'ai', 'terminal', 'frontmatter', 'markdown', or 'normalize'.\n", + .{format}, + ); return error.InvalidArgument; } } +// ── Frontmatter modification helper ────────────────────────────────────────── + +/// Apply `--set` and `--delete` flags to `fm`. Errors from individual +/// operations are silently skipped so a bad flag does not abort the run. +fn applyFrontmatterMods( + fm: *zigmark.Frontmatter, + sets: []const []const u8, + deletes: []const []const u8, +) void { + for (sets) |arg| { + const fa = zigmark.Frontmatter.parseFieldArg(arg) catch continue; + fm.set(fa.path, fa.value) catch continue; + } + for (deletes) |key| _ = fm.delete(key); +} + // ── Output helpers ─────────────────────────────────────────────────────────── fn getOutputFile(output_path: ?[]const u8) std.fs.File { diff --git a/src/markdown/frontmatter.zig b/src/markdown/frontmatter.zig index 4a29920..ddc7ab0 100644 --- a/src/markdown/frontmatter.zig +++ b/src/markdown/frontmatter.zig @@ -1,8 +1,9 @@ //! Frontmatter parser for Markdown documents. //! -//! Extracts and parses YAML (`---`) or TOML (`+++`) frontmatter blocks -//! from the beginning of a Markdown file. The parsed key/value tree is -//! normalised into `std.json.Value` for uniform downstream access. +//! Extracts and parses YAML (`---`), TOML (`+++`), JSON (`{`), or ZON (`.{`) +//! frontmatter blocks from the beginning of a Markdown file. The parsed +//! key/value tree is normalised into `std.json.Value` for uniform downstream +//! access. const std = @import("std"); const Array = std.ArrayList; @@ -22,26 +23,34 @@ root: JsonValue, /// The raw frontmatter source text (without delimiters). source: []const u8, original: Origin, +/// Arena that owns all memory allocated by `set()` and `merge()`. +/// Lazily initialised on first mutation; freed by `deinit()`. +set_arena: ?std.heap.ArenaAllocator = null, const Origin = union(Kind) { yaml: Yaml, toml: tomlz.Table, + /// `std.json.Parsed` owns all json memory in an arena; we must NOT call + /// `deinitJsonValue` on `.root` for this variant. + json: std.json.Parsed(JsonValue), + /// Arena that owns all ZON-parsed memory; same caveat as `.json`. + zon: std.heap.ArenaAllocator, }; const Kind = enum { yaml, toml, + json, + zon, }; -/// Parse `source` as frontmatter of `input_kind` (YAML or TOML). +/// Parse `source` as frontmatter of `input_kind` (YAML, TOML, JSON, or ZON). /// Returns a `FrontMatter` whose `.root` field contains the parsed tree. pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter { var orig: Origin = undefined; const value: JsonValue = switch (input_kind) { .yaml => blk: { var y = Yaml{ .source = source }; - // defer y.deinit(alloc); - y.load(alloc) catch |err| switch (err) { error.ParseFailure => { std.debug.assert(y.parse_errors.errorMessageCount() > 0); @@ -56,13 +65,22 @@ pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter }, .toml => blk: { const doc = try tomlz.parser.parse(alloc, source); - // defer doc.deinit(alloc); - var val: tomlz.Value = .{ .table = doc }; orig = .{ .toml = doc }; break :blk try tomlValueToJson(alloc, &val); }, - // else => return error.UnhandledSourceType, + .json => blk: { + const parsed = try std.json.parseFromSlice(JsonValue, alloc, source, .{}); + orig = .{ .json = parsed }; + break :blk parsed.value; + }, + .zon => blk: { + var arena = std.heap.ArenaAllocator.init(alloc); + errdefer arena.deinit(); + const v = try parseZon(arena.allocator(), source); + orig = .{ .zon = arena }; + break :blk v; + }, }; return .{ .allocator = alloc, @@ -73,27 +91,339 @@ pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter } /// Release all memory owned by this `FrontMatter` instance. pub fn deinit(self: *FrontMatter) void { - deinitJsonValue(self.allocator, &self.root); switch (self.original) { - inline else => |*o| o.deinit(self.allocator), + .yaml => |*o| { + deinitJsonValue(self.allocator, &self.root); + o.deinit(self.allocator); + }, + .toml => |*o| { + deinitJsonValue(self.allocator, &self.root); + o.deinit(self.allocator); + }, + // Arena-based: frees self.root memory too — do NOT call deinitJsonValue. + .json => |*p| p.deinit(), + .zon => |*a| a.deinit(), } + if (self.set_arena) |*a| a.deinit(); } /// Extract and parse frontmatter from a full Markdown document. /// -/// The first line must be `---` (YAML) or `+++` (TOML). The -/// frontmatter extends to the next matching delimiter. +/// Supported opening markers: +/// `---` → YAML (closes at next `---`) +/// `+++` → TOML (closes at next `+++`) +/// `{` → JSON (self-delimiting object) +/// `.{` → ZON (self-delimiting anonymous struct) pub fn initFromMarkdown(alloc: Allocator, txt: []const u8) !FrontMatter { if (txt.len < 3) return error.InvalidFrontMatter; + + // JSON: self-delimiting object starting with '{' + if (txt[0] == '{') { + const end = findBraceEnd(txt) orelse return error.InvalidFrontMatter; + return init(alloc, txt[0..end], .json); + } + + // ZON: self-delimiting anonymous struct starting with '.{' + if (txt.len >= 2 and txt[0] == '.' and txt[1] == '{') { + const end = findBraceEnd(txt[1..]) orelse return error.InvalidFrontMatter; + return init(alloc, txt[0 .. end + 1], .zon); + } + const kind: Kind = switch (txt[0]) { '-' => .yaml, '+' => .toml, else => return error.InvalidFrontMatter, }; - const end_fm = std.mem.indexOfPos(u8, txt, 3, if (kind == .yaml) "---" else if (kind == .toml) "+++" else "") orelse return error.InvalidFrontMatter; + const end_fm = std.mem.indexOfPos(u8, txt, 3, if (kind == .yaml) "---" else "+++") orelse + return error.InvalidFrontMatter; return init(alloc, txt[3..end_fm], kind); } +// ── JSON / ZON helpers ─────────────────────────────────────────────────────── + +/// Return the index one past the `}` that closes the `{` at `txt[0]`. +/// Accounts for string literals so braces inside strings are ignored. +/// Returns `null` if the input is malformed or has no matching close. +fn findBraceEnd(txt: []const u8) ?usize { + if (txt.len == 0 or txt[0] != '{') return null; + var depth: usize = 0; + var in_string = false; + var i: usize = 0; + while (i < txt.len) : (i += 1) { + const c = txt[i]; + if (in_string) { + if (c == '\\') { + i += 1; // skip escaped char + } else if (c == '"') { + in_string = false; + } + } else switch (c) { + '"' => in_string = true, + '{' => depth += 1, + '}' => { + depth -= 1; + if (depth == 0) return i + 1; + }, + else => {}, + } + } + return null; +} + +/// Parse a ZON value from `source` using `alloc`. +/// Supports: anonymous struct (`.{…}`), array tuple (`.{…}`), strings, +/// numbers (int/float/hex), booleans, null, and enum literals (`.tag`). +fn parseZon(alloc: Allocator, source: []const u8) !JsonValue { + var p = ZonParser{ .src = source, .pos = 0, .alloc = alloc }; + p.skipWs(); + const v = try p.parseValue(); + return v; +} + +const ZonParser = struct { + src: []const u8, + pos: usize, + alloc: Allocator, + + fn skipWs(p: *ZonParser) void { + while (p.pos < p.src.len) { + const c = p.src[p.pos]; + if (c == ' ' or c == '\t' or c == '\n' or c == '\r') { + p.pos += 1; + } else if (c == '/' and p.pos + 1 < p.src.len and p.src[p.pos + 1] == '/') { + while (p.pos < p.src.len and p.src[p.pos] != '\n') p.pos += 1; + } else break; + } + } + + fn peek(p: *ZonParser) ?u8 { + return if (p.pos < p.src.len) p.src[p.pos] else null; + } + + fn parseValue(p: *ZonParser) anyerror!JsonValue { + p.skipWs(); + const c = p.peek() orelse return error.ZonParseError; + return switch (c) { + '.' => p.parseDot(), + '"' => p.parseString(), + '\\' => p.parseMultilineString(), + '-', '0'...'9' => p.parseNumber(), + 't' => p.parseLit("true", JsonValue{ .bool = true }), + 'f' => p.parseLit("false", JsonValue{ .bool = false }), + 'n' => p.parseLit("null", JsonValue{ .null = {} }), + else => error.ZonParseError, + }; + } + + fn parseLit(p: *ZonParser, literal: []const u8, result: JsonValue) !JsonValue { + if (!std.mem.startsWith(u8, p.src[p.pos..], literal)) return error.ZonParseError; + p.pos += literal.len; + return result; + } + + fn parseDot(p: *ZonParser) !JsonValue { + p.pos += 1; // consume '.' + const next = p.peek() orelse return error.ZonParseError; + if (next == '{') return p.parseStructOrArray(); + // enum literal: .tag_name → string + const start = p.pos; + while (p.pos < p.src.len and identChar(p.src[p.pos])) p.pos += 1; + if (p.pos == start) return error.ZonParseError; + const s = try p.alloc.dupe(u8, p.src[start..p.pos]); + return JsonValue{ .string = s }; + } + + fn parseStructOrArray(p: *ZonParser) !JsonValue { + p.pos += 1; // consume '{' + p.skipWs(); + if (p.peek() == '}') { + p.pos += 1; + return JsonValue{ .object = std.json.ObjectMap.init(p.alloc) }; + } + return if (p.isStructField()) p.parseStructBody() else p.parseArrayBody(); + } + + /// Lookahead: are we at `.identifier =`? + fn isStructField(p: *ZonParser) bool { + if (p.peek() != '.') return false; + var i = p.pos + 1; + while (i < p.src.len and identChar(p.src[i])) i += 1; + if (i == p.pos + 1) return false; // no ident chars + while (i < p.src.len and wsChar(p.src[i])) i += 1; + return i < p.src.len and p.src[i] == '='; + } + + fn parseStructBody(p: *ZonParser) !JsonValue { + var obj = JsonValue{ .object = std.json.ObjectMap.init(p.alloc) }; + while (true) { + p.skipWs(); + const c = p.peek() orelse return error.ZonParseError; + if (c == '}') { p.pos += 1; break; } + if (c != '.') return error.ZonParseError; + p.pos += 1; // consume '.' + const ns = p.pos; + while (p.pos < p.src.len and identChar(p.src[p.pos])) p.pos += 1; + if (p.pos == ns) return error.ZonParseError; + const key = try p.alloc.dupe(u8, p.src[ns..p.pos]); + p.skipWs(); + if (p.peek() != '=') return error.ZonParseError; + p.pos += 1; // consume '=' + const val = try p.parseValue(); + try obj.object.put(key, val); + p.skipWs(); + if (p.peek() == ',') p.pos += 1; + } + return obj; + } + + fn parseArrayBody(p: *ZonParser) !JsonValue { + var arr = JsonValue{ .array = std.json.Array.init(p.alloc) }; + while (true) { + p.skipWs(); + if (p.peek() == '}') { p.pos += 1; break; } + const val = try p.parseValue(); + try arr.array.append(val); + p.skipWs(); + if (p.peek() == ',') p.pos += 1; + } + return arr; + } + + fn parseString(p: *ZonParser) !JsonValue { + p.pos += 1; // consume '"' + var buf: std.ArrayListUnmanaged(u8) = .{}; + while (p.pos < p.src.len) { + const c = p.src[p.pos]; + if (c == '"') { p.pos += 1; break; } + if (c == '\\') { + p.pos += 1; + if (p.pos >= p.src.len) return error.ZonParseError; + const esc = p.src[p.pos]; + p.pos += 1; + switch (esc) { + 'n' => try buf.append(p.alloc, '\n'), + 't' => try buf.append(p.alloc, '\t'), + 'r' => try buf.append(p.alloc, '\r'), + '"' => try buf.append(p.alloc, '"'), + '\'' => try buf.append(p.alloc, '\''), + '\\' => try buf.append(p.alloc, '\\'), + 'x' => { + if (p.pos + 2 > p.src.len) return error.ZonParseError; + const byte = std.fmt.parseInt(u8, p.src[p.pos .. p.pos + 2], 16) catch + return error.ZonParseError; + p.pos += 2; + try buf.append(p.alloc, byte); + }, + 'u' => { + if (p.peek() != '{') return error.ZonParseError; + p.pos += 1; + const us = p.pos; + while (p.pos < p.src.len and p.src[p.pos] != '}') p.pos += 1; + const cp = std.fmt.parseInt(u21, p.src[us..p.pos], 16) catch + return error.ZonParseError; + if (p.pos >= p.src.len) return error.ZonParseError; + p.pos += 1; // consume '}' + var ubuf: [4]u8 = undefined; + const ulen = std.unicode.utf8Encode(cp, &ubuf) catch + return error.ZonParseError; + try buf.appendSlice(p.alloc, ubuf[0..ulen]); + }, + else => return error.ZonParseError, + } + } else { + try buf.append(p.alloc, c); + p.pos += 1; + } + } + return JsonValue{ .string = try buf.toOwnedSlice(p.alloc) }; + } + + /// ZON multi-line string: consecutive lines each starting with `\\`. + fn parseMultilineString(p: *ZonParser) !JsonValue { + var buf: std.ArrayListUnmanaged(u8) = .{}; + while (p.pos + 1 < p.src.len and + p.src[p.pos] == '\\' and p.src[p.pos + 1] == '\\') + { + p.pos += 2; + const ls = p.pos; + while (p.pos < p.src.len and p.src[p.pos] != '\n') p.pos += 1; + try buf.appendSlice(p.alloc, p.src[ls..p.pos]); + if (p.pos < p.src.len) { try buf.append(p.alloc, '\n'); p.pos += 1; } + // skip indentation before next `\\` + while (p.pos < p.src.len and (p.src[p.pos] == ' ' or p.src[p.pos] == '\t')) + p.pos += 1; + } + return JsonValue{ .string = try buf.toOwnedSlice(p.alloc) }; + } + + fn parseNumber(p: *ZonParser) !JsonValue { + const start = p.pos; + const neg = p.src[p.pos] == '-'; + if (neg) p.pos += 1; + + // hex / octal / binary prefix + if (p.pos + 1 < p.src.len and p.src[p.pos] == '0') { + switch (p.src[p.pos + 1]) { + 'x', 'X' => { + p.pos += 2; + while (p.pos < p.src.len and std.ascii.isHex(p.src[p.pos])) p.pos += 1; + const n = std.fmt.parseInt(i64, p.src[start..p.pos], 0) catch + return error.ZonParseError; + return JsonValue{ .integer = n }; + }, + 'o' => { + p.pos += 2; + while (p.pos < p.src.len and p.src[p.pos] >= '0' and p.src[p.pos] <= '7') + p.pos += 1; + const n = std.fmt.parseInt(i64, p.src[start..p.pos], 0) catch + return error.ZonParseError; + return JsonValue{ .integer = n }; + }, + 'b' => { + p.pos += 2; + while (p.pos < p.src.len and (p.src[p.pos] == '0' or p.src[p.pos] == '1')) + p.pos += 1; + const n = std.fmt.parseInt(i64, p.src[start..p.pos], 0) catch + return error.ZonParseError; + return JsonValue{ .integer = n }; + }, + else => {}, + } + } + + while (p.pos < p.src.len and std.ascii.isDigit(p.src[p.pos])) p.pos += 1; + + const is_float = p.pos < p.src.len and + (p.src[p.pos] == '.' or p.src[p.pos] == 'e' or p.src[p.pos] == 'E'); + if (is_float) { + if (p.src[p.pos] == '.') { + p.pos += 1; + while (p.pos < p.src.len and std.ascii.isDigit(p.src[p.pos])) p.pos += 1; + } + if (p.pos < p.src.len and (p.src[p.pos] == 'e' or p.src[p.pos] == 'E')) { + p.pos += 1; + if (p.pos < p.src.len and (p.src[p.pos] == '+' or p.src[p.pos] == '-')) + p.pos += 1; + while (p.pos < p.src.len and std.ascii.isDigit(p.src[p.pos])) p.pos += 1; + } + const f = std.fmt.parseFloat(f64, p.src[start..p.pos]) catch + return error.ZonParseError; + return JsonValue{ .float = f }; + } + const n = std.fmt.parseInt(i64, p.src[start..p.pos], 10) catch + return error.ZonParseError; + return JsonValue{ .integer = n }; + } +}; + +fn identChar(c: u8) bool { + return std.ascii.isAlphanumeric(c) or c == '_'; +} + +fn wsChar(c: u8) bool { + return c == ' ' or c == '\t' or c == '\n' or c == '\r'; +} + test { const alloc = tst.allocator; const source = @@ -187,6 +517,58 @@ fn deinitJsonValue(alloc: std.mem.Allocator, value: *std.json.Value) void { } } +/// Deep-clone a `std.json.Value` tree using `alloc`. +/// Strings and number_string values are duplicated; scalars are copied by value. +/// The caller owns all allocated memory; free with `deinitJsonValue` for +/// containers (note: string values must be freed separately if needed). +pub fn cloneJsonValue(alloc: Allocator, value: std.json.Value) Allocator.Error!std.json.Value { + return switch (value) { + .null => .{ .null = {} }, + .bool => |b| .{ .bool = b }, + .integer => |n| .{ .integer = n }, + .float => |f| .{ .float = f }, + .number_string => |s| .{ .number_string = try alloc.dupe(u8, s) }, + .string => |s| .{ .string = try alloc.dupe(u8, s) }, + .array => |arr| blk: { + var new_arr = try std.json.Array.initCapacity(alloc, arr.items.len); + for (arr.items) |item| { + new_arr.appendAssumeCapacity(try cloneJsonValue(alloc, item)); + } + break :blk .{ .array = new_arr }; + }, + .object => |obj| blk: { + var new_obj: std.json.ObjectMap = .init(alloc); + var it = obj.iterator(); + while (it.next()) |entry| { + const key = try alloc.dupe(u8, entry.key_ptr.*); + const val = try cloneJsonValue(alloc, entry.value_ptr.*); + try new_obj.put(key, val); + } + break :blk .{ .object = new_obj }; + }, + }; +} + +/// Recursively deep-merge `overlay` into `base` using `alloc`. +/// For `.object` values: recurse (overlay keys win on leaf conflicts). +/// For all other types: overlay value replaces the base value (cloned). +fn mergeJsonValue(alloc: Allocator, base: *std.json.Value, overlay: std.json.Value) Allocator.Error!void { + if (base.* == .object and overlay == .object) { + var it = overlay.object.iterator(); + while (it.next()) |entry| { + if (base.object.getPtr(entry.key_ptr.*)) |existing| { + try mergeJsonValue(alloc, existing, entry.value_ptr.*); + } else { + const key = try alloc.dupe(u8, entry.key_ptr.*); + const val = try cloneJsonValue(alloc, entry.value_ptr.*); + try base.object.put(key, val); + } + } + } else { + base.* = try cloneJsonValue(alloc, overlay); + } +} + /// Recursively convert a `zig-yaml` value tree into `std.json.Value`. pub fn yamlNodeToJson(allocator: std.mem.Allocator, node: Yaml.Value) !JsonValue { switch (node) { @@ -310,6 +692,141 @@ pub fn get(self: FrontMatter, path: []const u8) ?std.json.Value { return jsonFindByPath(self.root, path); } +/// Remove the key at `path` from `self.root`. +/// Returns `true` if the key existed and was removed, `false` otherwise. +pub fn delete(self: *FrontMatter, path: []const u8) bool { + if (path.len == 0 or self.root != .object) return false; + var segs = std.mem.tokenizeScalar(u8, path, '.'); + var current: *std.json.Value = &self.root; + while (segs.next()) |seg| { + const is_last = segs.rest().len == 0; + if (current.* != .object) return false; + if (is_last) return current.object.orderedRemove(seg); + current = current.object.getPtr(seg) orelse return false; + } + return false; +} + +/// Return the byte offset in `txt` where the Markdown body begins — +/// i.e. the first byte after the frontmatter block and its closing +/// delimiter (including a trailing newline if present). +/// +/// Returns `null` when `txt` does not start with recognizable frontmatter. +pub fn bodyOffset(txt: []const u8) ?usize { + if (txt.len < 3) return null; + + // JSON: self-delimiting `{…}` + if (txt[0] == '{') { + const end = findBraceEnd(txt) orelse return null; + return if (end < txt.len and txt[end] == '\n') end + 1 else end; + } + + // ZON: self-delimiting `.{…}` + if (txt.len >= 2 and txt[0] == '.' and txt[1] == '{') { + const end = findBraceEnd(txt[1..]) orelse return null; + const abs = end + 1; + return if (abs < txt.len and txt[abs] == '\n') abs + 1 else abs; + } + + const marker: []const u8 = switch (txt[0]) { + '-' => "---", + '+' => "+++", + else => return null, + }; + const close = std.mem.indexOfPos(u8, txt, 3, marker) orelse return null; + var end = close + 3; + if (end < txt.len and txt[end] == '\n') end += 1; + return end; +} + +/// Set (or create) a value at a dot-separated key path in `self.root`. +/// Intermediate objects that do not exist are created automatically. +/// The provided `value` is deep-cloned; all new allocations are owned by +/// an internal arena and freed when `deinit()` is called. +/// +/// Returns `error.InvalidFieldArg` for an empty path and +/// `error.NotAnObject` if traversal hits a non-object intermediate node. +pub fn set(self: *FrontMatter, path: []const u8, value: std.json.Value) !void { + if (path.len == 0) return error.InvalidFieldArg; + if (self.root != .object) return error.NotAnObject; + if (self.set_arena == null) + self.set_arena = std.heap.ArenaAllocator.init(self.allocator); + const alloc = self.set_arena.?.allocator(); + const owned = try cloneJsonValue(alloc, value); + var segs = std.mem.tokenizeScalar(u8, path, '.'); + var current: *std.json.Value = &self.root; + while (segs.next()) |seg| { + const is_last = segs.rest().len == 0; + if (is_last) { + const key = try alloc.dupe(u8, seg); + try current.object.put(key, owned); + return; + } + if (current.object.getPtr(seg)) |child| { + if (child.* != .object) return error.NotAnObject; + current = child; + } else { + const key = try alloc.dupe(u8, seg); + try current.object.put(key, .{ .object = .init(alloc) }); + current = current.object.getPtr(seg).?; + } + } +} + +/// Deep-merge `overlay.root` into `self.root`. +/// +/// For object values the merge is recursive: overlay keys are added or +/// overwrite matching base keys; unmatched base keys are preserved. +/// For all other value types the overlay wins outright. +/// `self` retains its original format (YAML/TOML/JSON/ZON); the overlay's +/// format is ignored. All new allocations go into the internal set-arena +/// and are freed by `deinit()`. +pub fn merge(self: *FrontMatter, overlay: FrontMatter) !void { + if (self.set_arena == null) + self.set_arena = std.heap.ArenaAllocator.init(self.allocator); + try mergeJsonValue(self.set_arena.?.allocator(), &self.root, overlay.root); +} + +// ── Field argument parsing ──────────────────────────────────────────────────── + +/// Parsed result of a `"key.path=value"` command-line argument. +/// Both `path` and any string `value.string` alias the original `arg` slice. +pub const FieldArg = struct { + path: []const u8, + value: std.json.Value, +}; + +/// Infer the JSON type of a raw string value (no allocation required). +/// +/// Type precedence: +/// 1. `"true"` / `"false"` → `.bool` +/// 2. `"null"` → `.null` +/// 3. Valid integer (no `.` in string) → `.integer` +/// 4. Valid float (has `.` and parses) → `.float` +/// 5. Everything else → `.string` (aliases `raw`) +pub fn inferValue(raw: []const u8) std.json.Value { + if (std.mem.eql(u8, raw, "true")) return .{ .bool = true }; + if (std.mem.eql(u8, raw, "false")) return .{ .bool = false }; + if (std.mem.eql(u8, raw, "null")) return .{ .null = {} }; + if (std.mem.indexOfScalar(u8, raw, '.') == null) { + if (std.fmt.parseInt(i64, raw, 10)) |n| return .{ .integer = n } else |_| {} + } else { + if (std.fmt.parseFloat(f64, raw)) |f| return .{ .float = f } else |_| {} + } + return .{ .string = raw }; +} + +/// Parse a `"key.path=value"` argument into a `FieldArg`. +/// +/// The `value` is type-inferred via `inferValue`; string values alias `arg`. +/// Returns `error.InvalidFieldArg` when there is no `=` or the path is empty. +pub fn parseFieldArg(arg: []const u8) error{InvalidFieldArg}!FieldArg { + const eq = std.mem.indexOfScalar(u8, arg, '=') orelse return error.InvalidFieldArg; + const path = arg[0..eq]; + if (path.len == 0) return error.InvalidFieldArg; + return .{ .path = path, .value = inferValue(arg[eq + 1 ..]) }; +} + /// Looks up a value in a std.json.Value tree using a dot-separated key path. /// Returns the found value, or null if any part of the path is missing. pub fn jsonFindByPath(root: std.json.Value, path: []const u8) ?std.json.Value { @@ -356,6 +873,338 @@ test "jsonFindByPath works" { try tst.expect(not_found == null); } +// ── Serialization ───────────────────────────────────────────────────────────── + +/// Serialize the frontmatter back to its original format, including delimiters. +/// +/// | Format | Output | +/// |--------|------------------------------------------------| +/// | YAML | `---\nkey: val\n---\n` | +/// | TOML | `+++\nkey = "val"\n+++\n` | +/// | JSON | Pretty-printed JSON object followed by `\n` | +/// | ZON | `.{ .key = "val" }\n` | +/// +/// Serialization always reflects the current state of `self.root`, so any +/// modifications made after parsing are included in the output. +/// +/// The caller owns the returned slice; free with `alloc.free`. +pub fn serialize(self: FrontMatter, alloc: Allocator) ![]u8 { + var aw: std.Io.Writer.Allocating = .init(alloc); + defer aw.deinit(); + const w = &aw.writer; + switch (self.original) { + .yaml => { + try w.writeAll("---\n"); + try writeYamlValue(w, self.root, 0); + try w.writeAll("---\n"); + }, + .toml => { + try w.writeAll("+++\n"); + try writeTomlDocument(alloc, w, self.root); + try w.writeAll("+++\n"); + }, + .json => { + const json = try std.json.Stringify.valueAlloc(alloc, self.root, .{ .whitespace = .indent_2 }); + defer alloc.free(json); + try w.writeAll(json); + try w.writeByte('\n'); + }, + .zon => { + try writeZonValue(w, self.root, 0); + try w.writeByte('\n'); + }, + } + return aw.toOwnedSlice(); +} + +/// Prepend the serialized frontmatter to `body` and return the full Markdown +/// document. A single newline is inserted between the frontmatter block and +/// the body when `body` is non-empty and does not already start with `\n`. +/// +/// The caller owns the returned slice; free with `alloc.free`. +pub fn toMarkdown(self: FrontMatter, alloc: Allocator, body: []const u8) ![]u8 { + const fm_str = try self.serialize(alloc); + defer alloc.free(fm_str); + if (body.len == 0) return alloc.dupe(u8, fm_str); + const sep: []const u8 = if (body[0] == '\n') "" else "\n"; + return std.mem.concat(alloc, u8, &.{ fm_str, sep, body }); +} + +// ── YAML emitter ────────────────────────────────────────────────────────────── + +fn writeIndent(writer: anytype, level: usize) !void { + var i: usize = 0; + while (i < level * 2) : (i += 1) try writer.writeByte(' '); +} + +/// Returns true if `s` must be wrapped in double quotes to be a valid YAML +/// plain scalar. +fn yamlNeedsQuote(s: []const u8) bool { + if (s.len == 0) return true; + for (&[_][]const u8{ "true", "false", "null", "yes", "no", "on", "off", "~" }) |kw| { + if (std.ascii.eqlIgnoreCase(s, kw)) return true; + } + switch (s[0]) { + '{', '}', '[', ']', ',', '#', '&', '*', '?', '|', '<', '>', '=', '!', '%', '@', '`', ':', '"', '\'', '\\' => return true, + '-' => if (s.len == 1 or s[1] == ' ') return true, + else => {}, + } + for (s, 0..) |c, i| { + switch (c) { + '\n', '\r', '\t' => return true, + ':' => if (i + 1 < s.len and (s[i + 1] == ' ' or s[i + 1] == '\n')) return true, + '#' => if (i > 0 and s[i - 1] == ' ') return true, + else => {}, + } + } + if (s[s.len - 1] == ':') return true; + return false; +} + +fn writeYamlString(writer: anytype, s: []const u8) !void { + if (!yamlNeedsQuote(s)) { + try writer.writeAll(s); + return; + } + try writer.writeByte('"'); + for (s) |c| { + switch (c) { + '"' => try writer.writeAll("\\\""), + '\\' => try writer.writeAll("\\\\"), + '\n' => try writer.writeAll("\\n"), + '\r' => try writer.writeAll("\\r"), + '\t' => try writer.writeAll("\\t"), + else => try writer.writeByte(c), + } + } + try writer.writeByte('"'); +} + +/// Write the YAML representation of `value` at the given indent level. +/// For objects and arrays the output always ends with a newline; scalars do not +/// emit a trailing newline (the caller is responsible for that). +fn writeYamlValue(writer: anytype, value: std.json.Value, indent: usize) !void { + switch (value) { + .null => try writer.writeAll("null"), + .bool => |b| try writer.writeAll(if (b) "true" else "false"), + .integer => |n| try writer.print("{d}", .{n}), + .float => |f| try writer.print("{d}", .{f}), + .number_string => |s| try writer.writeAll(s), + .string => |s| try writeYamlString(writer, s), + .array => |arr| { + for (arr.items) |item| { + try writeIndent(writer, indent); + switch (item) { + .object, .array => { + try writer.writeAll("-\n"); + try writeYamlValue(writer, item, indent + 1); + }, + else => { + try writer.writeAll("- "); + try writeYamlValue(writer, item, indent); + try writer.writeByte('\n'); + }, + } + } + }, + .object => |obj| { + var it = obj.iterator(); + while (it.next()) |entry| { + try writeIndent(writer, indent); + try writer.writeAll(entry.key_ptr.*); + switch (entry.value_ptr.*) { + .object, .array => { + try writer.writeAll(":\n"); + try writeYamlValue(writer, entry.value_ptr.*, indent + 1); + }, + else => { + try writer.writeAll(": "); + try writeYamlValue(writer, entry.value_ptr.*, 0); + try writer.writeByte('\n'); + }, + } + } + }, + } +} + +// ── TOML emitter ────────────────────────────────────────────────────────────── + +fn isObjectArray(arr: std.json.Array) bool { + for (arr.items) |item| { + if (item == .object) return true; + } + return false; +} + +fn writeTomlString(writer: anytype, s: []const u8) !void { + try writer.writeByte('"'); + for (s) |c| { + switch (c) { + '"' => try writer.writeAll("\\\""), + '\\' => try writer.writeAll("\\\\"), + '\n' => try writer.writeAll("\\n"), + '\r' => try writer.writeAll("\\r"), + '\t' => try writer.writeAll("\\t"), + else => try writer.writeByte(c), + } + } + try writer.writeByte('"'); +} + +/// Write a TOML scalar or inline array. Objects are not handled here — they +/// appear as section headers, emitted by `writeTomlSection`. +fn writeTomlInline(writer: anytype, value: std.json.Value) !void { + switch (value) { + .null => try writer.writeAll("\"\""), + .bool => |b| try writer.writeAll(if (b) "true" else "false"), + .integer => |n| try writer.print("{d}", .{n}), + .float => |f| try writer.print("{d}", .{f}), + .number_string => |s| try writer.writeAll(s), + .string => |s| try writeTomlString(writer, s), + .array => |arr| { + try writer.writeByte('['); + for (arr.items, 0..) |item, i| { + if (i > 0) try writer.writeAll(", "); + try writeTomlInline(writer, item); + } + try writer.writeByte(']'); + }, + .object => |obj| { + // Inline table fallback — only reached for objects nested inside arrays. + try writer.writeByte('{'); + var it = obj.iterator(); + var first = true; + while (it.next()) |entry| { + if (!first) try writer.writeAll(", "); + first = false; + try writer.writeAll(entry.key_ptr.*); + try writer.writeAll(" = "); + try writeTomlInline(writer, entry.value_ptr.*); + } + try writer.writeByte('}'); + }, + } +} + +/// Write the scalar/array key-value pairs of `obj` then recurse into sub-tables. +/// `prefix` is the dotted section path used to build `[prefix.subkey]` headers. +fn writeTomlSection(alloc: Allocator, writer: anytype, obj: std.json.ObjectMap, prefix: []const u8) !void { + // Pass 1 — scalars and scalar arrays + var it = obj.iterator(); + while (it.next()) |entry| { + const v = entry.value_ptr.*; + const is_table = v == .object or (v == .array and isObjectArray(v.array)); + if (!is_table) { + try writer.writeAll(entry.key_ptr.*); + try writer.writeAll(" = "); + try writeTomlInline(writer, v); + try writer.writeByte('\n'); + } + } + // Pass 2 — sub-tables and arrays of tables + it = obj.iterator(); + while (it.next()) |entry| { + const v = entry.value_ptr.*; + const sub = try std.fmt.allocPrint(alloc, "{s}.{s}", .{ prefix, entry.key_ptr.* }); + defer alloc.free(sub); + if (v == .object) { + try writer.print("\n[{s}]\n", .{sub}); + try writeTomlSection(alloc, writer, v.object, sub); + } else if (v == .array and isObjectArray(v.array)) { + for (v.array.items) |item| { + if (item != .object) continue; + try writer.print("\n[[{s}]]\n", .{sub}); + try writeTomlSection(alloc, writer, item.object, sub); + } + } + } +} + +fn writeTomlDocument(alloc: Allocator, writer: anytype, root: std.json.Value) !void { + if (root != .object) return; + const obj = root.object; + // Pass 1 — top-level scalars and scalar arrays + var it = obj.iterator(); + while (it.next()) |entry| { + const v = entry.value_ptr.*; + const is_table = v == .object or (v == .array and isObjectArray(v.array)); + if (!is_table) { + try writer.writeAll(entry.key_ptr.*); + try writer.writeAll(" = "); + try writeTomlInline(writer, v); + try writer.writeByte('\n'); + } + } + // Pass 2 — [section] and [[array-of-tables]] + it = obj.iterator(); + while (it.next()) |entry| { + const v = entry.value_ptr.*; + if (v == .object) { + try writer.print("\n[{s}]\n", .{entry.key_ptr.*}); + try writeTomlSection(alloc, writer, v.object, entry.key_ptr.*); + } else if (v == .array and isObjectArray(v.array)) { + for (v.array.items) |item| { + if (item != .object) continue; + try writer.print("\n[[{s}]]\n", .{entry.key_ptr.*}); + try writeTomlSection(alloc, writer, item.object, entry.key_ptr.*); + } + } + } +} + +// ── ZON emitter ─────────────────────────────────────────────────────────────── + +fn writeZonString(writer: anytype, s: []const u8) !void { + try writer.writeByte('"'); + for (s) |c| { + switch (c) { + '"' => try writer.writeAll("\\\""), + '\\' => try writer.writeAll("\\\\"), + '\n' => try writer.writeAll("\\n"), + '\r' => try writer.writeAll("\\r"), + '\t' => try writer.writeAll("\\t"), + else => try writer.writeByte(c), + } + } + try writer.writeByte('"'); +} + +fn writeZonValue(writer: anytype, value: std.json.Value, indent: usize) !void { + switch (value) { + .null => try writer.writeAll("null"), + .bool => |b| try writer.writeAll(if (b) "true" else "false"), + .integer => |n| try writer.print("{d}", .{n}), + .float => |f| try writer.print("{d}", .{f}), + .number_string => |s| try writer.writeAll(s), + .string => |s| try writeZonString(writer, s), + .array => |arr| { + try writer.writeAll(".{\n"); + for (arr.items) |item| { + try writeIndent(writer, indent + 1); + try writeZonValue(writer, item, indent + 1); + try writer.writeAll(",\n"); + } + try writeIndent(writer, indent); + try writer.writeByte('}'); + }, + .object => |obj| { + try writer.writeAll(".{\n"); + var it = obj.iterator(); + while (it.next()) |entry| { + try writeIndent(writer, indent + 1); + try writer.writeByte('.'); + try writer.writeAll(entry.key_ptr.*); + try writer.writeAll(" = "); + try writeZonValue(writer, entry.value_ptr.*, indent + 1); + try writer.writeAll(",\n"); + } + try writeIndent(writer, indent); + try writer.writeByte('}'); + }, + } +} + // tera integration test moved to the standalone tera package test { diff --git a/src/markdown/frontmatter_test.zig b/src/markdown/frontmatter_test.zig index fbdeadf..eaad50c 100644 --- a/src/markdown/frontmatter_test.zig +++ b/src/markdown/frontmatter_test.zig @@ -338,3 +338,525 @@ test "frontmatter: jsonFindByPath single key" { try tst.expect(found != null); try tst.expectEqualStrings("value", found.?.string); } + +// ── JSON frontmatter tests ─────────────────────────────────────────────────── + +test "frontmatter: JSON basic parsing" { + const alloc = tst.allocator; + const source = + \\{"title": "Hello", "count": 5} + ; + var fm = try FrontMatter.init(alloc, source, .json); + defer fm.deinit(); + + const title = fm.get("title"); + try tst.expect(title != null); + try tst.expectEqualStrings("Hello", title.?.string); + + const count = fm.get("count"); + try tst.expect(count != null); + try tst.expectEqual(@as(i64, 5), count.?.integer); +} + +test "frontmatter: JSON nested object" { + const alloc = tst.allocator; + const source = + \\{"site": {"name": "My Site", "url": "https://example.com"}} + ; + var fm = try FrontMatter.init(alloc, source, .json); + defer fm.deinit(); + + const name = fm.get("site.name"); + try tst.expect(name != null); + try tst.expectEqualStrings("My Site", name.?.string); +} + +test "frontmatter: JSON arrays" { + const alloc = tst.allocator; + const source = + \\{"tags": ["zig", "wasm", "markdown"]} + ; + var fm = try FrontMatter.init(alloc, source, .json); + defer fm.deinit(); + + const tags = fm.get("tags"); + try tst.expect(tags != null); + try tst.expect(tags.? == .array); + try tst.expectEqual(@as(usize, 3), tags.?.array.items.len); + try tst.expectEqualStrings("zig", tags.?.array.items[0].string); +} + +test "frontmatter: JSON booleans and null" { + const alloc = tst.allocator; + const source = + \\{"draft": true, "published": false, "extra": null} + ; + var fm = try FrontMatter.init(alloc, source, .json); + defer fm.deinit(); + + try tst.expectEqualDeep(std.json.Value{ .bool = true }, fm.get("draft").?); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, fm.get("published").?); + try tst.expectEqualDeep(std.json.Value{ .null = {} }, fm.get("extra").?); +} + +test "frontmatter: initFromMarkdown JSON" { + const alloc = tst.allocator; + const input = + \\{"title": "Test", "weight": 10} + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + try tst.expectEqualStrings("Test", fm.get("title").?.string); + try tst.expectEqual(@as(i64, 10), fm.get("weight").?.integer); +} + +// ── ZON frontmatter tests ──────────────────────────────────────────────────── + +test "frontmatter: ZON basic parsing" { + const alloc = tst.allocator; + const source = + \\.{ + \\ .title = "Hello World", + \\ .count = 42, + \\} + ; + var fm = try FrontMatter.init(alloc, source, .zon); + defer fm.deinit(); + + const title = fm.get("title"); + try tst.expect(title != null); + try tst.expectEqualStrings("Hello World", title.?.string); + + const count = fm.get("count"); + try tst.expect(count != null); + try tst.expectEqual(@as(i64, 42), count.?.integer); +} + +test "frontmatter: ZON nested struct" { + const alloc = tst.allocator; + const source = + \\.{ + \\ .site = .{ + \\ .name = "My Site", + \\ .url = "https://example.com", + \\ }, + \\} + ; + var fm = try FrontMatter.init(alloc, source, .zon); + defer fm.deinit(); + + const name = fm.get("site.name"); + try tst.expect(name != null); + try tst.expectEqualStrings("My Site", name.?.string); +} + +test "frontmatter: ZON array" { + const alloc = tst.allocator; + const source = + \\.{ + \\ .tags = .{ "zig", "wasm", "markdown" }, + \\} + ; + var fm = try FrontMatter.init(alloc, source, .zon); + defer fm.deinit(); + + const tags = fm.get("tags"); + try tst.expect(tags != null); + try tst.expect(tags.? == .array); + try tst.expectEqual(@as(usize, 3), tags.?.array.items.len); + try tst.expectEqualStrings("zig", tags.?.array.items[0].string); +} + +test "frontmatter: ZON booleans and null" { + const alloc = tst.allocator; + const source = + \\.{ + \\ .draft = true, + \\ .published = false, + \\ .extra = null, + \\} + ; + var fm = try FrontMatter.init(alloc, source, .zon); + defer fm.deinit(); + + try tst.expectEqualDeep(std.json.Value{ .bool = true }, fm.get("draft").?); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, fm.get("published").?); + try tst.expectEqualDeep(std.json.Value{ .null = {} }, fm.get("extra").?); +} + +test "frontmatter: ZON numbers — int, negative, float" { + const alloc = tst.allocator; + const source = + \\.{ + \\ .weight = 10, + \\ .offset = -3, + \\ .version = 1.5, + \\ .hex = 0xFF, + \\} + ; + var fm = try FrontMatter.init(alloc, source, .zon); + defer fm.deinit(); + + try tst.expectEqual(@as(i64, 10), fm.get("weight").?.integer); + try tst.expectEqual(@as(i64, -3), fm.get("offset").?.integer); + try tst.expectApproxEqAbs(@as(f64, 1.5), fm.get("version").?.float, 0.001); + try tst.expectEqual(@as(i64, 255), fm.get("hex").?.integer); +} + +test "frontmatter: ZON enum literal becomes string" { + const alloc = tst.allocator; + const source = + \\.{ .status = .published } + ; + var fm = try FrontMatter.init(alloc, source, .zon); + defer fm.deinit(); + + try tst.expectEqualStrings("published", fm.get("status").?.string); +} + +test "frontmatter: initFromMarkdown ZON" { + const alloc = tst.allocator; + const input = + \\.{ .title = "Test", .weight = 7 } + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + try tst.expectEqualStrings("Test", fm.get("title").?.string); + try tst.expectEqual(@as(i64, 7), fm.get("weight").?.integer); +} + +// ── serialize / toMarkdown tests ───────────────────────────────────────────── + +test "frontmatter: serialize YAML round-trip" { + const alloc = tst.allocator; + // Note: zig-yaml represents YAML booleans as strings, so we test + // string, integer, and float values here — types it does round-trip. + const input = + \\--- + \\title: Hello World + \\weight: 5 + \\--- + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + // Must start and end with delimiters + try tst.expect(std.mem.startsWith(u8, out, "---\n")); + try tst.expect(std.mem.endsWith(u8, out, "---\n")); + // Re-parse and verify values survived the round-trip. + // Note: zig-yaml's scalar converter tries parseFloat before parseInt, so + // integers may come back as .float — accept either representation. + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + try tst.expectEqualStrings("Hello World", fm2.get("title").?.string); + const weight = fm2.get("weight").?; + switch (weight) { + .integer => |n| try tst.expectEqual(@as(i64, 5), n), + .float => |f| try tst.expectApproxEqAbs(@as(f64, 5.0), f, 0.001), + else => return error.UnexpectedType, + } +} + +test "frontmatter: serialize YAML nested and array" { + const alloc = tst.allocator; + const source = + \\tags: + \\ - zig + \\ - markdown + \\extra: + \\ owner: SC2 + ; + var fm = try FrontMatter.init(alloc, source, .yaml); + defer fm.deinit(); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + try tst.expectEqualStrings("zig", fm2.get("tags").?.array.items[0].string); + try tst.expectEqualStrings("SC2", fm2.get("extra.owner").?.string); +} + +test "frontmatter: serialize TOML round-trip" { + const alloc = tst.allocator; + const input = + \\+++ + \\title = "My Post" + \\weight = 3 + \\draft = false + \\+++ + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + try tst.expect(std.mem.startsWith(u8, out, "+++\n")); + try tst.expect(std.mem.endsWith(u8, out, "+++\n")); + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + try tst.expectEqualStrings("My Post", fm2.get("title").?.string); + try tst.expectEqual(@as(i64, 3), fm2.get("weight").?.integer); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, fm2.get("draft").?); +} + +test "frontmatter: serialize TOML nested section" { + const alloc = tst.allocator; + const source = + \\title = "Post" + \\ + \\[extra] + \\owner = "SC2" + ; + var fm = try FrontMatter.init(alloc, source, .toml); + defer fm.deinit(); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + try tst.expectEqualStrings("Post", fm2.get("title").?.string); + try tst.expectEqualStrings("SC2", fm2.get("extra.owner").?.string); +} + +test "frontmatter: serialize JSON round-trip" { + const alloc = tst.allocator; + const input = + \\{"title": "Test", "weight": 10, "draft": true} + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + try tst.expectEqualStrings("Test", fm2.get("title").?.string); + try tst.expectEqual(@as(i64, 10), fm2.get("weight").?.integer); + try tst.expectEqualDeep(std.json.Value{ .bool = true }, fm2.get("draft").?); +} + +test "frontmatter: serialize ZON round-trip" { + const alloc = tst.allocator; + const input = + \\.{ .title = "ZON Post", .draft = false, .weight = 7 } + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + try tst.expectEqualStrings("ZON Post", fm2.get("title").?.string); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, fm2.get("draft").?); + try tst.expectEqual(@as(i64, 7), fm2.get("weight").?.integer); +} + +test "frontmatter: toMarkdown reattaches body" { + const alloc = tst.allocator; + const source = + \\--- + \\title: Hello + \\--- + \\ + \\## Body content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, source); + defer fm.deinit(); + + const body = "## Body content"; + const doc = try fm.toMarkdown(alloc, body); + defer alloc.free(doc); + + try tst.expect(std.mem.startsWith(u8, doc, "---\n")); + try tst.expect(std.mem.indexOf(u8, doc, "## Body content") != null); +} + +// ── set / merge / parseFieldArg tests ─────────────────────────────────────── +// Use an arena allocator for these tests so that cloned strings (which are +// not freed by deinitJsonValue) are reclaimed by the arena on deinit. + +test "frontmatter: set top-level key" { + var fm = try FrontMatter.init(tst.allocator, "title = \"Old\"", .toml); + defer fm.deinit(); + + try fm.set("title", .{ .string = "New" }); + try tst.expectEqualStrings("New", fm.get("title").?.string); +} + +test "frontmatter: set creates new key" { + var fm = try FrontMatter.init(tst.allocator, "title = \"Hello\"", .toml); + defer fm.deinit(); + + try fm.set("draft", .{ .bool = true }); + try tst.expectEqualDeep(std.json.Value{ .bool = true }, fm.get("draft").?); +} + +test "frontmatter: set nested key (auto-creates intermediates)" { + var fm = try FrontMatter.init(tst.allocator, "title = \"Hello\"", .toml); + defer fm.deinit(); + + try fm.set("extra.owner", .{ .string = "SC2" }); + try tst.expectEqualStrings("SC2", fm.get("extra.owner").?.string); +} + +test "frontmatter: set overwrites existing nested key" { + const source = + \\[extra] + \\owner = "Old" + ; + var fm = try FrontMatter.init(tst.allocator, source, .toml); + defer fm.deinit(); + + try fm.set("extra.owner", .{ .string = "New" }); + try tst.expectEqualStrings("New", fm.get("extra.owner").?.string); +} + +test "frontmatter: set scalar types" { + var fm = try FrontMatter.init(tst.allocator, "x = 0", .toml); + defer fm.deinit(); + + try fm.set("n", .{ .integer = 42 }); + try fm.set("f", .{ .float = 3.14 }); + try fm.set("b", .{ .bool = false }); + try fm.set("z", .{ .null = {} }); + + try tst.expectEqual(@as(i64, 42), fm.get("n").?.integer); + try tst.expectApproxEqAbs(@as(f64, 3.14), fm.get("f").?.float, 0.001); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, fm.get("b").?); + try tst.expectEqualDeep(std.json.Value{ .null = {} }, fm.get("z").?); +} + +test "frontmatter: set empty path returns error" { + var fm = try FrontMatter.init(tst.allocator, "x = 1", .toml); + defer fm.deinit(); + + try tst.expectError(error.InvalidFieldArg, fm.set("", .{ .integer = 1 })); +} + +test "frontmatter: merge adds overlay keys" { + var base = try FrontMatter.init(tst.allocator, "title = \"Base\"", .toml); + defer base.deinit(); + + var overlay = try FrontMatter.init(tst.allocator, "author = \"Alice\"", .toml); + defer overlay.deinit(); + + try base.merge(overlay); + + try tst.expectEqualStrings("Base", base.get("title").?.string); + try tst.expectEqualStrings("Alice", base.get("author").?.string); +} + +test "frontmatter: merge overlay key wins on conflict" { + var base = try FrontMatter.init(tst.allocator, "title = \"Old\"\ndraft = false", .toml); + defer base.deinit(); + + var overlay = try FrontMatter.init(tst.allocator, "title = \"New\"", .toml); + defer overlay.deinit(); + + try base.merge(overlay); + + try tst.expectEqualStrings("New", base.get("title").?.string); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, base.get("draft").?); +} + +test "frontmatter: merge preserves base format" { + // TOML base merged with JSON overlay — result must serialize as TOML + var base = try FrontMatter.init(tst.allocator, "title = \"Base\"", .toml); + defer base.deinit(); + + var overlay = try FrontMatter.init(tst.allocator, "{\"draft\": true}", .json); + defer overlay.deinit(); + + try base.merge(overlay); + + const out = try base.serialize(tst.allocator); + defer tst.allocator.free(out); + + try tst.expect(std.mem.startsWith(u8, out, "+++\n")); + try tst.expect(std.mem.indexOf(u8, out, "draft") != null); +} + +test "frontmatter: merge deep-merges nested objects" { + const base_src = + \\[extra] + \\owner = "SC2" + \\version = "1.0" + ; + var base = try FrontMatter.init(tst.allocator, base_src, .toml); + defer base.deinit(); + + const overlay_src = + \\[extra] + \\version = "2.0" + \\reviewed = true + ; + var overlay = try FrontMatter.init(tst.allocator, overlay_src, .toml); + defer overlay.deinit(); + + try base.merge(overlay); + + // Original key preserved + try tst.expectEqualStrings("SC2", base.get("extra.owner").?.string); + // Conflicting key updated by overlay + try tst.expectEqualStrings("2.0", base.get("extra.version").?.string); + // New key from overlay added + try tst.expectEqualDeep(std.json.Value{ .bool = true }, base.get("extra.reviewed").?); +} + +test "frontmatter: parseFieldArg string value" { + const fa = try FrontMatter.parseFieldArg("title=Hello World"); + try tst.expectEqualStrings("title", fa.path); + try tst.expectEqualStrings("Hello World", fa.value.string); +} + +test "frontmatter: parseFieldArg bool values" { + const t = try FrontMatter.parseFieldArg("draft=true"); + try tst.expectEqualDeep(std.json.Value{ .bool = true }, t.value); + + const f = try FrontMatter.parseFieldArg("published=false"); + try tst.expectEqualDeep(std.json.Value{ .bool = false }, f.value); +} + +test "frontmatter: parseFieldArg null value" { + const fa = try FrontMatter.parseFieldArg("extra=null"); + try tst.expectEqualDeep(std.json.Value{ .null = {} }, fa.value); +} + +test "frontmatter: parseFieldArg integer value" { + const fa = try FrontMatter.parseFieldArg("weight=42"); + try tst.expectEqual(@as(i64, 42), fa.value.integer); +} + +test "frontmatter: parseFieldArg float value" { + const fa = try FrontMatter.parseFieldArg("version=1.5"); + try tst.expectApproxEqAbs(@as(f64, 1.5), fa.value.float, 0.001); +} + +test "frontmatter: parseFieldArg nested path" { + const fa = try FrontMatter.parseFieldArg("extra.owner=SC2"); + try tst.expectEqualStrings("extra.owner", fa.path); + try tst.expectEqualStrings("SC2", fa.value.string); +} + +test "frontmatter: parseFieldArg errors" { + try tst.expectError(error.InvalidFieldArg, FrontMatter.parseFieldArg("no-equals-sign")); + try tst.expectError(error.InvalidFieldArg, FrontMatter.parseFieldArg("=value-no-path")); +} diff --git a/src/markdown/renderers/markdown.zig b/src/markdown/renderers/markdown.zig new file mode 100644 index 0000000..8c3b7b6 --- /dev/null +++ b/src/markdown/renderers/markdown.zig @@ -0,0 +1,601 @@ +//! Markdown renderer for the Markdown AST — CommonMark + GFM. +//! +//! Serialises an `AST.Document` back into Markdown text. The output is +//! normalised: +//! +//! - Headings are always written in ATX format (`# heading`). +//! - Indented code blocks are normalised to fenced code blocks. +//! - Fenced code blocks preserve their original fence character and length. +//! - Links are always written in inline format `[text](url)` or +//! `[text](url "title")`, regardless of how they were originally written. +//! - Blocks are separated by a blank line. +//! +//! GFM extensions rendered: +//! - Tables → `| col | col |\n|---|---|\n| cell | cell |` +//! - Task list items → `- [x] item` / `- [ ] item` +//! - Strikethrough → `~~text~~` +//! - Extended autolinks → bare URL (no angle brackets) for www links +//! - Footnote definitions → `[^label]: content` +//! - Footnote references → `[^label]` +const std = @import("std"); +const Allocator = std.mem.Allocator; +const tst = std.testing; + +const AST = @import("../ast.zig"); +const Parser = @import("../parser.zig"); + +// ── Markdown escape helpers ─────────────────────────────────────────────────── + +/// Characters that must be escaped in regular Markdown text. +fn needsEscape(c: u8) bool { + return switch (c) { + '\\', '*', '_', '`', '[', ']', '<', '>', '!', '#', '|', '~', '&' => true, + else => false, + }; +} + +/// Write `s` with Markdown special characters backslash-escaped. +fn writeEscapedText(writer: anytype, s: []const u8) !void { + for (s) |c| { + if (needsEscape(c)) try writer.writeByte('\\'); + try writer.writeByte(c); + } +} + +/// Write a link URL. We pass it through verbatim — the URL was already +/// decoded/normalised by the parser, so no additional escaping is needed +/// beyond wrapping with angle brackets when the URL contains spaces or parens. +fn writeLinkUrl(writer: anytype, url: []const u8) !void { + // If the URL contains spaces or unbalanced parentheses, wrap in angle brackets. + var needs_brackets = false; + for (url) |c| { + if (c == ' ' or c == '\t' or c == '\n') { + needs_brackets = true; + break; + } + } + if (needs_brackets) { + try writer.writeByte('<'); + try writer.writeAll(url); + try writer.writeByte('>'); + } else { + try writer.writeAll(url); + } +} + +/// Write a link title, double-quote-wrapped with inner double quotes escaped. +fn writeLinkTitle(writer: anytype, title: []const u8) !void { + try writer.writeByte('"'); + for (title) |c| { + if (c == '"') try writer.writeByte('\\'); + try writer.writeByte(c); + } + try writer.writeByte('"'); +} + +// ── Inline renderer ─────────────────────────────────────────────────────────── + +fn renderInline(writer: anytype, item: AST.Inline) !void { + switch (item) { + .text => |t| try writeEscapedText(writer, t.content), + + .soft_break => try writer.writeByte('\n'), + + .hard_break => try writer.writeAll(" \n"), + + .code_span => |cs| { + // Use double backticks if the content contains a backtick. + const has_backtick = std.mem.indexOfScalar(u8, cs.content, '`') != null; + if (has_backtick) { + try writer.writeAll("`` "); + try writer.writeAll(cs.content); + try writer.writeAll(" ``"); + } else { + try writer.writeByte('`'); + try writer.writeAll(cs.content); + try writer.writeByte('`'); + } + }, + + .emphasis => |e| { + const m = e.marker; + try writer.writeByte(m); + for (e.children.items) |child| try renderInline(writer, child); + try writer.writeByte(m); + }, + + .strong => |s| { + const m = s.marker; + try writer.writeByte(m); + try writer.writeByte(m); + for (s.children.items) |child| try renderInline(writer, child); + try writer.writeByte(m); + try writer.writeByte(m); + }, + + .strikethrough => |s| { + try writer.writeAll("~~"); + for (s.children.items) |child| try renderInline(writer, child); + try writer.writeAll("~~"); + }, + + .link => |l| { + try writer.writeByte('['); + for (l.children.items) |child| try renderInline(writer, child); + try writer.writeAll("]("); + try writeLinkUrl(writer, l.destination.url); + if (l.destination.title) |title| { + try writer.writeByte(' '); + try writeLinkTitle(writer, title); + } + try writer.writeByte(')'); + }, + + .image => |img| { + try writer.writeAll("!["); + try writer.writeAll(img.alt_text); + try writer.writeAll("]("); + try writeLinkUrl(writer, img.destination.url); + if (img.destination.title) |title| { + try writer.writeByte(' '); + try writeLinkTitle(writer, title); + } + try writer.writeByte(')'); + }, + + .autolink => |al| { + if (al.is_gfm_www) { + // GFM extended www autolink: bare URL without angle brackets + try writer.writeAll(al.url); + } else { + try writer.writeByte('<'); + try writer.writeAll(al.url); + try writer.writeByte('>'); + } + }, + + .footnote_reference => |fr| { + try writer.writeAll("[^"); + try writer.writeAll(fr.label); + try writer.writeByte(']'); + }, + + .html_in_line => |hi| try writer.writeAll(hi.content), + } +} + +// ── Inline list renderer ────────────────────────────────────────────────────── + +fn renderInlines(writer: anytype, inlines: []const AST.Inline) !void { + for (inlines) |item| try renderInline(writer, item); +} + +// ── Block renderer ──────────────────────────────────────────────────────────── + +/// Render a single block to `writer`. Each block ends with exactly one `\n`; +/// the caller inserts the blank-line separator `\n` between blocks. +fn renderBlock(alloc: Allocator, writer: anytype, block: AST.Block) !void { + switch (block) { + .paragraph => |p| { + try renderInlines(writer, p.children.items); + try writer.writeByte('\n'); + }, + + .heading => |h| { + // ATX heading: `## text` + var i: u8 = 0; + while (i < h.level) : (i += 1) try writer.writeByte('#'); + try writer.writeByte(' '); + try renderInlines(writer, h.children.items); + try writer.writeByte('\n'); + }, + + .code_block => |cb| { + // Normalise indented code blocks to fenced format (``` ... ```) + try writer.writeAll("```\n"); + try writer.writeAll(cb.content); + if (cb.content.len > 0 and cb.content[cb.content.len - 1] != '\n') { + try writer.writeByte('\n'); + } + try writer.writeAll("```\n"); + }, + + .fenced_code_block => |fcb| { + // Preserve original fence character and length + var i: usize = 0; + while (i < fcb.fence_length) : (i += 1) try writer.writeByte(fcb.fence_char); + if (fcb.language) |lang| try writer.writeAll(lang); + try writer.writeByte('\n'); + if (fcb.content.len > 0) { + try writer.writeAll(fcb.content); + if (fcb.content[fcb.content.len - 1] != '\n') { + try writer.writeByte('\n'); + } + } + i = 0; + while (i < fcb.fence_length) : (i += 1) try writer.writeByte(fcb.fence_char); + try writer.writeByte('\n'); + }, + + .blockquote => |bq| { + // Buffer children, then prefix every line with "> " + var inner: std.Io.Writer.Allocating = .init(alloc); + defer inner.deinit(); + for (bq.children.items, 0..) |child, idx| { + if (idx > 0) try inner.writer.writeByte('\n'); + try renderBlock(alloc, &inner.writer, child); + } + const buf = try inner.toOwnedSlice(); + defer alloc.free(buf); + + var line_start: usize = 0; + for (buf, 0..) |c, i| { + if (c == '\n') { + const line = buf[line_start..i]; + if (line.len == 0) { + try writer.writeAll(">\n"); + } else { + try writer.writeAll("> "); + try writer.writeAll(line); + try writer.writeByte('\n'); + } + line_start = i + 1; + } + } + // Handle any remaining content without a trailing newline + if (line_start < buf.len) { + const line = buf[line_start..]; + try writer.writeAll("> "); + try writer.writeAll(line); + try writer.writeByte('\n'); + } + }, + + .list => |lst| { + const is_ordered = lst.type == .ordered; + const start_num: usize = lst.start orelse 1; + + for (lst.items.items, 0..) |item, idx| { + // Blank line between items in a loose list + if (!lst.tight and idx > 0) try writer.writeByte('\n'); + + // Build item prefix + var prefix_buf: [16]u8 = undefined; + const prefix: []const u8 = if (is_ordered) blk: { + const num = start_num + idx; + break :blk std.fmt.bufPrint(&prefix_buf, "{d}. ", .{num}) catch unreachable; + } else "- "; + + // Task list checkbox + var task_prefix: []const u8 = ""; + if (item.task_list_checked) |checked| { + task_prefix = if (checked) "[x] " else "[ ] "; + } + + // Indent for continuation lines + // Unordered: 2 spaces, ordered: prefix.len spaces + const indent_len = prefix.len; + var indent_buf: [16]u8 = @splat(' '); + const indent: []const u8 = indent_buf[0..indent_len]; + + // Buffer the item's block children + var item_buf: std.Io.Writer.Allocating = .init(alloc); + defer item_buf.deinit(); + + for (item.children.items, 0..) |child, cidx| { + if (!item.tight and cidx > 0) try item_buf.writer.writeByte('\n'); + try renderBlock(alloc, &item_buf.writer, child); + } + + const content = try item_buf.toOwnedSlice(); + defer alloc.free(content); + + // Write the first line with the list prefix (and optional task prefix) + var first_line = true; + var line_start: usize = 0; + for (content, 0..) |c, i| { + if (c == '\n') { + const line = content[line_start..i]; + if (first_line) { + try writer.writeAll(prefix); + try writer.writeAll(task_prefix); + try writer.writeAll(line); + first_line = false; + } else { + // Continuation: indent if non-empty + if (line.len == 0) { + try writer.writeByte('\n'); + } else { + try writer.writeAll(indent); + try writer.writeAll(line); + } + } + try writer.writeByte('\n'); + line_start = i + 1; + } + } + // Remaining content without trailing newline + if (line_start < content.len) { + const line = content[line_start..]; + if (first_line) { + try writer.writeAll(prefix); + try writer.writeAll(task_prefix); + try writer.writeAll(line); + } else { + try writer.writeAll(indent); + try writer.writeAll(line); + } + try writer.writeByte('\n'); + } + } + }, + + .thematic_break => try writer.writeAll("---\n"), + + .html_block => |hb| try writer.writeAll(hb.content), + + .footnote_definition => |fd| { + // First line: `[^label]: first block content` + // Continuation lines indented by 4 spaces + var first = true; + for (fd.children.items) |child| { + var child_buf: std.Io.Writer.Allocating = .init(alloc); + defer child_buf.deinit(); + try renderBlock(alloc, &child_buf.writer, child); + const content = try child_buf.toOwnedSlice(); + defer alloc.free(content); + + var line_start: usize = 0; + for (content, 0..) |c, i| { + if (c == '\n') { + const line = content[line_start..i]; + if (first) { + try writer.writeAll("[^"); + try writer.writeAll(fd.label); + try writer.writeAll("]: "); + try writer.writeAll(line); + first = false; + } else { + if (line.len == 0) { + try writer.writeByte('\n'); + } else { + try writer.writeAll(" "); + try writer.writeAll(line); + } + } + try writer.writeByte('\n'); + line_start = i + 1; + } + } + if (line_start < content.len) { + const line = content[line_start..]; + if (first) { + try writer.writeAll("[^"); + try writer.writeAll(fd.label); + try writer.writeAll("]: "); + try writer.writeAll(line); + first = false; + } else { + try writer.writeAll(" "); + try writer.writeAll(line); + } + try writer.writeByte('\n'); + } + } + // Edge case: empty footnote definition + if (first) { + try writer.writeAll("[^"); + try writer.writeAll(fd.label); + try writer.writeAll("]:\n"); + } + }, + + .table => |tbl| { + const ncols = tbl.alignments.items.len; + + // Header row + try writer.writeByte('|'); + for (tbl.header.cells.items) |cell| { + try writer.writeByte(' '); + try renderInlines(writer, cell.children.items); + try writer.writeAll(" |"); + } + // Pad missing header cells + if (tbl.header.cells.items.len < ncols) { + var pad = tbl.header.cells.items.len; + while (pad < ncols) : (pad += 1) try writer.writeAll(" |"); + } + try writer.writeByte('\n'); + + // Delimiter row + try writer.writeByte('|'); + for (tbl.alignments.items) |al| { + switch (al) { + .none => try writer.writeAll("---|"), + .left => try writer.writeAll(":---|"), + .center => try writer.writeAll(":---:|"), + .right => try writer.writeAll("---:|"), + } + } + try writer.writeByte('\n'); + + // Body rows + for (tbl.body.items) |row| { + try writer.writeByte('|'); + for (row.cells.items) |cell| { + try writer.writeByte(' '); + try renderInlines(writer, cell.children.items); + try writer.writeAll(" |"); + } + // Pad missing body cells + if (row.cells.items.len < ncols) { + var pad = row.cells.items.len; + while (pad < ncols) : (pad += 1) try writer.writeAll(" |"); + } + try writer.writeByte('\n'); + } + }, + } +} + +// ── Top-level render ────────────────────────────────────────────────────────── + +/// Render `doc` to an allocator-owned Markdown byte slice. +/// +/// The output is normalised CommonMark + GFM Markdown: ATX headings, fenced +/// code blocks, and inline-style links. Blocks are separated by blank lines. +/// +/// The caller owns the returned memory and must free it when done. +pub fn render(allocator: Allocator, doc: AST.Document) ![]u8 { + var aw: std.Io.Writer.Allocating = .init(allocator); + defer aw.deinit(); + const w = &aw.writer; + + for (doc.children.items, 0..) |block, idx| { + if (idx > 0) try w.writeByte('\n'); + try renderBlock(allocator, w, block); + } + + return aw.toOwnedSlice(); +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +fn parse(alloc: Allocator, src: []const u8) !AST.Document { + var parser = Parser.init(); + defer parser.deinit(alloc); + return parser.parseMarkdown(alloc, src); +} + +fn roundtrip(alloc: Allocator, input: []const u8) ![]u8 { + var doc = try parse(alloc, input); + defer doc.deinit(alloc); + return render(alloc, doc); +} + +fn expectMd(input: []const u8, expected: []const u8) !void { + const alloc = tst.allocator; + const out = try roundtrip(alloc, input); + defer alloc.free(out); + try tst.expectEqualStrings(expected, out); +} + +test "atx heading round-trip" { + try expectMd("# Heading 1", "# Heading 1\n"); + try expectMd("## Heading 2", "## Heading 2\n"); + try expectMd("### Heading 3", "### Heading 3\n"); +} + +test "setext heading normalised to atx" { + try expectMd("Title\n=====", "# Title\n"); + try expectMd("Title\n-----", "## Title\n"); +} + +test "paragraph" { + try expectMd("Hello world", "Hello world\n"); +} + +test "multiple blocks separated by blank line" { + try expectMd("# Title\n\nParagraph.", "# Title\n\nParagraph.\n"); +} + +test "thematic break" { + try expectMd("---", "---\n"); + try expectMd("***", "---\n"); +} + +test "indented code block normalised to fenced" { + try expectMd(" hello\n world", "```\nhello\nworld\n```\n"); +} + +test "fenced code block preserved" { + try expectMd("```\ncode\n```", "```\ncode\n```\n"); + try expectMd("```zig\nconst x = 1;\n```", "```zig\nconst x = 1;\n```\n"); +} + +test "fenced code block with tilde fence" { + try expectMd("~~~\ncode\n~~~", "~~~\ncode\n~~~\n"); +} + +test "blockquote" { + try expectMd("> hello", "> hello\n"); +} + +test "tight unordered list" { + try expectMd("- a\n- b\n- c", "- a\n- b\n- c\n"); +} + +test "loose unordered list" { + try expectMd("- a\n\n- b", "- a\n\n- b\n"); +} + +test "ordered list" { + try expectMd("1. first\n2. second", "1. first\n2. second\n"); +} + +test "ordered list custom start" { + try expectMd("3. first\n4. second", "3. first\n4. second\n"); +} + +test "emphasis round-trip" { + try expectMd("*em*", "*em*\n"); +} + +test "strong round-trip" { + try expectMd("**bold**", "**bold**\n"); +} + +test "strikethrough" { + try expectMd("~~strike~~", "~~strike~~\n"); +} + +test "code span no backtick" { + try expectMd("`code`", "`code`\n"); +} + +test "inline link normalised" { + try expectMd("[text](https://example.com)", "[text](https://example.com)\n"); +} + +test "inline link with title" { + try expectMd("[text](/url \"title\")", "[text](/url \"title\")\n"); +} + +test "reference link normalised to inline" { + try expectMd("[foo]: /url\n\n[foo]", "[foo](/url)\n"); +} + +test "image" { + try expectMd("![alt](img.png)", "![alt](img.png)\n"); +} + +test "autolink" { + try expectMd("", "\n"); +} + +test "footnote reference" { + try expectMd("[^note]\n\n[^note]: content", "[^note]\n\n[^note]: content\n"); +} + +test "hard break" { + try expectMd("line one \nline two", "line one \nline two\n"); +} + +test "soft break" { + try expectMd("line one\nline two", "line one\nline two\n"); +} + +test "html block passthrough" { + try expectMd("
\nhello\n
\n", "
\nhello\n
\n"); +} + +test "gfm task list" { + try expectMd("- [x] done\n- [ ] todo", "- [x] done\n- [ ] todo\n"); +} + +test "gfm table basic" { + try expectMd( + "a | b\n---|---\n1 | 2", + "| a | b |\n|---|---|\n| 1 | 2 |\n", + ); +} diff --git a/src/root.zig b/src/root.zig index b88c486..3ffef33 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,6 +1,18 @@ //! Copyright © 2025 [Star City Security Consulting, LLC (SC2)](https://sc2.in) //! SPDX-License-Identifier: AGPL-3.0-or-later const std = @import("std"); + +// Suppress zig-yaml's verbose tokenizer/parser debug tracing so that +// `zig build test` and nix checks stay quiet. All other log scopes +// (including our own warn/err paths for spec failures) remain at the +// default level. +pub const std_options: std.Options = .{ + .log_scope_levels = &.{ + .{ .scope = .tokenizer, .level = .warn }, + .{ .scope = .parser, .level = .warn }, + }, +}; + const Allocator = std.mem.Allocator; const mem = std.mem; const testing = std.testing; @@ -21,6 +33,7 @@ const ai = @import("markdown/renderers/ai.zig"); /// Renderers const ast_mod = @import("markdown/renderers/ast_renderer.zig"); const html = @import("markdown/renderers/html.zig"); +const markdown_mod = @import("markdown/renderers/markdown.zig"); const terminal = @import("markdown/renderers/terminal.zig"); /// Pre-built renderer that serialises an `AST.Document` to CommonMark-compliant HTML. @@ -36,6 +49,10 @@ pub const AIRenderer = Renderer.create(ai); /// Pre-built renderer that serialises an `AST.Document` with ANSI terminal styling. pub const TerminalRenderer = Renderer.create(terminal); +/// Pre-built renderer that serialises an `AST.Document` back to normalised +/// CommonMark+GFM Markdown. Useful for round-trip normalization passes. +pub const MarkdownRenderer = Renderer.create(markdown_mod); + /// A type-erased rendering back-end. /// /// Create concrete instances with `Renderer.create`, passing any struct that @@ -175,6 +192,148 @@ export fn zigmark_version() [*:0]const u8 { return version.ptr[0..version.len :0]; } +// ── Frontmatter C ABI ───────────────────────────────────────────────────────── + +/// Internal wrapper that pairs a FrontMatter with the allocator that owns it. +const OpaqueFm = struct { + fm: Frontmatter, + allocator: Allocator, +}; + +/// Serialize a `std.json.Value` to a NUL-terminated C string allocated with +/// `alloc`. Returns null on failure. Caller must free with +/// `zigmark_free_string`. +fn jsonValueToC(alloc: Allocator, value: std.json.Value, options: std.json.Stringify.Options) ?[*:0]u8 { + const json = std.json.Stringify.valueAlloc(alloc, value, options) catch return null; + const c_str = alloc.realloc(json, json.len + 1) catch { + alloc.free(json); + return null; + }; + c_str[json.len] = 0; + return c_str[0..json.len :0]; +} + +/// Parse frontmatter from a UTF-8 Markdown buffer. +/// Auto-detects YAML (`---`), TOML (`+++`), JSON (`{`), or ZON (`.{`). +/// +/// Returns an opaque handle, or null if no valid frontmatter is present or on +/// allocation / parse failure. Free with `zigmark_frontmatter_free`. +export fn zigmark_frontmatter_parse(input: [*]const u8, len: usize) ?*OpaqueFm { + const allocator = std.heap.page_allocator; + const slice = input[0..len]; + var fm = Frontmatter.initFromMarkdown(allocator, slice) catch return null; + const wrapper = allocator.create(OpaqueFm) catch { + fm.deinit(); + return null; + }; + wrapper.* = .{ .fm = fm, .allocator = allocator }; + return wrapper; +} + +/// Free a frontmatter handle previously returned by `zigmark_frontmatter_parse`. +export fn zigmark_frontmatter_free(ptr: ?*OpaqueFm) void { + const wrapper = ptr orelse return; + wrapper.fm.deinit(); + wrapper.allocator.destroy(wrapper); +} + +/// Serialize the entire frontmatter to a pretty-printed JSON string. +/// +/// Returns a NUL-terminated string, or null on failure. +/// Free with `zigmark_free_string`. +export fn zigmark_frontmatter_to_json(ptr: ?*OpaqueFm) ?[*:0]u8 { + const wrapper = ptr orelse return null; + return jsonValueToC(wrapper.allocator, wrapper.fm.root, .{ .whitespace = .indent_2 }); +} + +/// Look up a dot-separated key path in the frontmatter and return its value as +/// a compact JSON string (e.g. `"title"`, `"extra.author"`, `"tags"`). +/// +/// Returns a NUL-terminated string, or null if the key is not found or on +/// failure. Free with `zigmark_free_string`. +export fn zigmark_frontmatter_get(ptr: ?*OpaqueFm, key: [*:0]const u8) ?[*:0]u8 { + const wrapper = ptr orelse return null; + var key_len: usize = 0; + while (key[key_len] != 0) : (key_len += 1) {} + const value = wrapper.fm.get(key[0..key_len]) orelse return null; + return jsonValueToC(wrapper.allocator, value, .{}); +} + +/// Serialize the frontmatter back to its original format (YAML/TOML/JSON/ZON) +/// including delimiters, reflecting any modifications made to the parsed tree. +/// +/// Returns a NUL-terminated string, or null on failure. +/// Free with `zigmark_free_string`. +export fn zigmark_frontmatter_serialize(ptr: ?*OpaqueFm) ?[*:0]u8 { + const wrapper = ptr orelse return null; + const buf = wrapper.fm.serialize(wrapper.allocator) catch return null; + const c_str = wrapper.allocator.realloc(buf, buf.len + 1) catch { + wrapper.allocator.free(buf); + return null; + }; + c_str[buf.len] = 0; + return c_str[0..buf.len :0]; +} + +/// Deep-merge `overlay` into `base` (overlay keys win for leaf conflicts). +/// The base retains its original format (YAML/TOML/JSON/ZON). +/// +/// Returns 0 on success, -1 on failure. +export fn zigmark_frontmatter_merge(base: ?*OpaqueFm, overlay: ?*OpaqueFm) c_int { + const b = base orelse return -1; + const o = overlay orelse return -1; + b.fm.merge(o.fm) catch return -1; + return 0; +} + +/// Set a value at a dot-separated key path. +/// +/// @param path NUL-terminated dot-separated key path (e.g. `"extra.owner"`). +/// @param json_value NUL-terminated compact JSON string for the new value +/// (e.g. `"\"hello\""`, `"42"`, `"true"`, `"[1,2,3]"`). +/// +/// Returns 0 on success, -1 on failure (parse error, OOM, or bad path). +export fn zigmark_frontmatter_set(ptr: ?*OpaqueFm, path: [*:0]const u8, json_value: [*:0]const u8) c_int { + const wrapper = ptr orelse return -1; + var path_len: usize = 0; + while (path[path_len] != 0) : (path_len += 1) {} + var val_len: usize = 0; + while (json_value[val_len] != 0) : (val_len += 1) {} + const parsed = std.json.parseFromSlice( + std.json.Value, + wrapper.allocator, + json_value[0..val_len], + .{}, + ) catch return -1; + defer parsed.deinit(); + wrapper.fm.set(path[0..path_len], parsed.value) catch return -1; + return 0; +} + +/// Set a value using auto-typed raw string (not JSON-quoted). +/// +/// Type inference rules (applied in order): +/// - `"true"` / `"false"` → bool +/// - `"null"` → null +/// - Valid integer literal (no `.`) → integer +/// - Valid float literal → float +/// - Everything else → string (value is copied) +/// +/// @param path NUL-terminated dot-separated key path. +/// @param raw NUL-terminated raw value string. +/// +/// Returns 0 on success, -1 on failure. +export fn zigmark_frontmatter_set_raw(ptr: ?*OpaqueFm, path: [*:0]const u8, raw: [*:0]const u8) c_int { + const wrapper = ptr orelse return -1; + var path_len: usize = 0; + while (path[path_len] != 0) : (path_len += 1) {} + var raw_len: usize = 0; + while (raw[raw_len] != 0) : (raw_len += 1) {} + const value = Frontmatter.inferValue(raw[0..raw_len]); + wrapper.fm.set(path[0..path_len], value) catch return -1; + return 0; +} + test "enhanced parse and render" { const allocator = std.testing.allocator; @@ -802,23 +961,24 @@ test "CommonMark spec compliance" { const allocator = arena.allocator(); const summary = try runSpecSummary(allocator, default_spec_path); - std.debug.print("\n{s:<40} {s:>6} {s:>6} {s:>6}\n", .{ "Section", "Pass", "Fail", "Total" }); - std.debug.print("{s:-<58}\n", .{""}); - + // Print the section table only when something fails so CI logs stay clean. var section_total: usize = 0; - for (summary.sections) |s| { - const t = s.result.total(); - section_total += t; - if (t > 0) { - std.debug.print("{s:<40} {d:>6} {d:>6} {d:>6}\n", .{ s.section, s.result.passed, s.result.failed, t }); + for (summary.sections) |s| section_total += s.result.total(); + + if (summary.all.failed > 0 or summary.all.errors > 0) { + std.debug.print("\n{s:<40} {s:>6} {s:>6} {s:>6}\n", .{ "Section", "Pass", "Fail", "Total" }); + std.debug.print("{s:-<58}\n", .{""}); + for (summary.sections) |s| { + const t = s.result.total(); + if (t > 0) + std.debug.print("{s:<40} {d:>6} {d:>6} {d:>6}\n", .{ s.section, s.result.passed, s.result.failed, t }); } + std.debug.print("{s:-<58}\n", .{""}); + std.debug.print("{s:<40} {d:>6} {d:>6} {d:>6}\n", .{ + "TOTAL", summary.all.passed, summary.all.failed, summary.all.total(), + }); } - std.debug.print("{s:-<58}\n", .{""}); - std.debug.print("{s:<40} {d:>6} {d:>6} {d:>6}\n", .{ - "TOTAL", summary.all.passed, summary.all.failed, summary.all.total(), - }); - // Verify the section breakdown covers all tests (no miscategorization). try testing.expectEqual(summary.all.total(), section_total); diff --git a/src/spec_runner.zig b/src/spec_runner.zig index a26f2e9..fc4b733 100644 --- a/src/spec_runner.zig +++ b/src/spec_runner.zig @@ -1,6 +1,13 @@ const std = @import("std"); const print = std.debug.print; +pub const std_options: std.Options = .{ + .log_scope_levels = &.{ + .{ .scope = .tokenizer, .level = .warn }, + .{ .scope = .parser, .level = .warn }, + }, +}; + const zigmark = @import("zigmark"); const default_spec_path = "./src/markdown/spec.txt"; @@ -19,6 +26,7 @@ pub fn main() !void { var verbose = false; var number: ?usize = null; var summary_only = false; + var quiet = false; var gfm_mode = false; var spec_path: ?[]const u8 = null; @@ -27,6 +35,8 @@ pub fn main() !void { verbose = true; } else if (std.mem.eql(u8, arg, "--summary")) { summary_only = true; + } else if (std.mem.eql(u8, arg, "--quiet") or std.mem.eql(u8, arg, "-q")) { + quiet = true; } else if (std.mem.eql(u8, arg, "--gfm")) { gfm_mode = true; } else if (std.mem.eql(u8, arg, "--section") or std.mem.eql(u8, arg, "-s")) { @@ -42,6 +52,16 @@ pub fn main() !void { const use_spec_path = spec_path orelse default_spec_path; + // --quiet: silent on full pass; dump the full table only on failure. + if (quiet) { + const failed: usize = if (gfm_mode) + try quietCheck(allocator, use_spec_path, true) + else + try quietCheck(allocator, use_spec_path, false); + if (failed > 0) std.process.exit(1); + return; + } + if (summary_only) { const failed: usize = if (gfm_mode) try printGfmSummary(allocator, use_spec_path) @@ -100,6 +120,22 @@ pub fn main() !void { } } +/// Quiet check: run the full suite, emit nothing on success. +/// On failure, print the full section table then exit 1. +fn quietCheck(allocator: std.mem.Allocator, spec_path: []const u8, gfm: bool) !usize { + if (gfm) { + const summary = try zigmark.runGfmSpecSummary(allocator, spec_path); + if (summary.all.failed == 0) return 0; + _ = try printGfmSummary(allocator, spec_path); + return summary.all.failed; + } else { + const summary = try zigmark.runSpecSummary(allocator, spec_path); + if (summary.all.failed == 0) return 0; + _ = try printSummary(allocator, spec_path); + return summary.all.failed; + } +} + fn printGfmSummary(allocator: std.mem.Allocator, spec_path: []const u8) !usize { const summary = try zigmark.runGfmSpecSummary(allocator, spec_path);