Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
7c3117b
chore: checkpoint benchmark optimization state
raphaelmansuy Mar 25, 2026
7a2506c
Add OCR pack geometric markdown reconstruction
raphaelmansuy Mar 25, 2026
d47e81e
Add geometric service-flow markdown reconstruction
raphaelmansuy Mar 25, 2026
60887c7
Commit all outstanding benchmark work
raphaelmansuy Mar 25, 2026
1b6ef71
Add geometric AI pack markdown reconstruction
raphaelmansuy Mar 25, 2026
b8c67d9
Improve boundary metrics and land ownership reconstruction
raphaelmansuy Mar 25, 2026
5cba3eb
Remove benchmark-specific markdown renderers
raphaelmansuy Mar 25, 2026
6a8e633
Add geometric panel table reconstruction
raphaelmansuy Mar 25, 2026
fe4ae86
Add geometric panel augmentation and boundary metrics
raphaelmansuy Mar 25, 2026
e1ff432
Preserve grouped table headers and add occupancy metric
raphaelmansuy Mar 25, 2026
a5f0cfa
Recover text from page-raster infographic cards
raphaelmansuy Mar 25, 2026
80c13dc
Document rejected 00141 OCR refinement wave
raphaelmansuy Mar 25, 2026
4b66911
Checkpoint before next 50 OODA wave
raphaelmansuy Mar 25, 2026
679a60a
Reconstruct four-column layout tables from source text
raphaelmansuy Mar 25, 2026
1d0e33c
Render centered layout tables with embedded stub headers
raphaelmansuy Mar 25, 2026
4c64598
Checkpoint before next 50 OODA wave
raphaelmansuy Mar 26, 2026
4606d85
Checkpoint before geometric plate validation
raphaelmansuy Mar 26, 2026
ca5f41e
Trim top table plates before article body
raphaelmansuy Mar 26, 2026
267c392
Promote single-table report pages and score edge contamination
raphaelmansuy Mar 26, 2026
2abba53
Add geometry-driven layout renderers for benchmark dashboards
raphaelmansuy Mar 26, 2026
66177b0
Add geometry-driven stacked bar report rendering
raphaelmansuy Mar 26, 2026
db46086
Improve markdown rendering for infographic chart pages
raphaelmansuy Mar 26, 2026
62a2ae1
Improve layout chart extraction and table plate rendering
raphaelmansuy Mar 26, 2026
4ba7c67
Improve open-plate markdown dispatch and block-pair detection
raphaelmansuy Mar 26, 2026
e6c6f4b
Improve geometric markdown recovery for citations and projection sheets
raphaelmansuy Mar 26, 2026
d9eabfa
Improve geometric markdown recovery for appendix tables and paragraph…
raphaelmansuy Mar 26, 2026
b8a920c
Improve geometric caption-media markdown recovery
raphaelmansuy Mar 26, 2026
8086e50
Improve geometric figure explainer structure recovery
raphaelmansuy Mar 26, 2026
f0edeed
Cache geometric layout sources across markdown renderers
raphaelmansuy Mar 26, 2026
682fe10
Harden geometric plate and table merge dispatch
raphaelmansuy Mar 26, 2026
7fbc554
Recover benchmark frontier markdown from layout sources
raphaelmansuy Mar 26, 2026
9b92863
chore: prepare 0.2.1 release automation
raphaelmansuy Mar 26, 2026
9c9041d
Merge remote-tracking branch 'origin/main' into bench/ooda-score-impr…
raphaelmansuy Mar 26, 2026
fc9f40d
fix: clear clippy blockers for release CI
raphaelmansuy Mar 26, 2026
390c026
chore: format workspace for release ci
raphaelmansuy Mar 26, 2026
12b356f
ci: install poppler for rust matrix
raphaelmansuy Mar 26, 2026
d00b176
docs: refresh benchmark site and stabilize windows ci
raphaelmansuy Mar 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
33 changes: 32 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,23 @@ jobs:
name: Rust — build & test
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-14, windows-latest]
steps:
- uses: actions/checkout@v4
- name: Install Poppler (Ubuntu)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y poppler-utils
- name: Install Poppler (macOS)
if: runner.os == 'macOS'
run: brew install poppler
- name: Install Poppler (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: choco install poppler -y
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- run: cargo build
Expand Down Expand Up @@ -55,10 +68,28 @@ jobs:
# Install the platform binary into the local package directory
cp ../../target/release/libedgeparse_node.so npm/linux-x64-gnu/edgeparse-node.linux-x64-gnu.node
# Install the local platform package so require('edgeparse-linux-x64-gnu') resolves
npm install --save-dev file:./npm/linux-x64-gnu
npm install --no-save file:./npm/linux-x64-gnu
npm run build:ts
npm test

wasm:
name: WASM SDK — build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- uses: Swatinem/rust-cache@v2
- run: cargo check -p edgeparse-wasm --target wasm32-unknown-unknown

docker:
name: Docker — smoke build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: docker build -f docker/Dockerfile .

security:
name: Security audit
runs-on: ubuntu-latest
Expand Down
22 changes: 18 additions & 4 deletions .github/workflows/release-node.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ on:
workflow_dispatch:
inputs:
tag_name:
description: 'Tag name to publish (e.g. v0.2.0) — used for version sync'
description: 'Tag name to publish (e.g. v0.2.1) — used for version sync'
required: true
default: 'v0.2.0'
default: 'v0.2.1'

permissions:
contents: read
Expand Down Expand Up @@ -133,12 +133,26 @@ jobs:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
for dir in sdks/node/npm/*/; do
(cd "$dir" && npm publish --access public) || echo "::warning::Failed to publish $dir"
OUTPUT=$((cd "$dir" && npm publish --access public) 2>&1) && echo "$OUTPUT" || {
echo "$OUTPUT"
if echo "$OUTPUT" | grep -Eq "cannot publish over the previously published versions|You cannot publish over the previously published version"; then
echo "::warning::Package already published for $dir — skipping."
else
exit 1
fi
}
done

- name: Publish edgeparse (main package)
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
cd sdks/node
npm publish --access public
OUTPUT=$(npm publish --access public 2>&1) && echo "$OUTPUT" || {
echo "$OUTPUT"
if echo "$OUTPUT" | grep -Eq "cannot publish over the previously published versions|You cannot publish over the previously published version"; then
echo "edgeparse already published at this version — skipping."
else
exit 1
fi
}
108 changes: 108 additions & 0 deletions .github/workflows/release-wasm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
name: Release — WASM SDK (npm)

on:
push:
tags: ['v[0-9]+.[0-9]+.[0-9]+']
workflow_dispatch:
inputs:
tag_name:
description: 'Tag name to publish (e.g. v0.2.1) — used for version sync'
required: true
default: 'v0.2.1'

permissions:
contents: write

jobs:
publish-wasm:
name: Publish WASM package
runs-on: ubuntu-latest
environment: npm
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
registry-url: 'https://registry.npmjs.org'
- uses: dtolnay/rust-toolchain@stable
with:
targets: wasm32-unknown-unknown
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@wasm-pack

- name: Verify version consistency
env:
INPUT_TAG_NAME: ${{ inputs.tag_name }}
run: |
TAG_NAME="${INPUT_TAG_NAME:-$GITHUB_REF_NAME}"
TAG_VERSION="${TAG_NAME#v}"
CARGO_VERSION=$(cargo metadata --no-deps --format-version 1 \
| jq -r '.packages[] | select(.name=="edgeparse-wasm") | .version')
if [[ "$TAG_VERSION" != "$CARGO_VERSION" ]]; then
echo "ERROR: tag $TAG_VERSION ≠ Cargo.toml $CARGO_VERSION"
exit 1
fi

- name: Build WASM package
run: |
cd crates/edgeparse-wasm
wasm-pack build --target web --release

- name: Sync npm metadata
env:
INPUT_TAG_NAME: ${{ inputs.tag_name }}
run: |
node -e "
const fs = require('fs');
const refName = process.env.INPUT_TAG_NAME || process.env.GITHUB_REF_NAME;
const version = refName.replace(/^v/, '');
const path = 'crates/edgeparse-wasm/pkg/package.json';
const pkg = JSON.parse(fs.readFileSync(path, 'utf8'));
pkg.name = '@edgeparse/edgeparse-wasm';
pkg.version = version;
pkg.description = 'EdgeParse PDF parser — WebAssembly build for browsers';
pkg.repository = {
type: 'git',
url: 'https://github.com/raphaelmansuy/edgeparse'
};
pkg.files = [
'edgeparse_wasm_bg.wasm',
'edgeparse_wasm.js',
'edgeparse_wasm.d.ts'
];
fs.writeFileSync(path, JSON.stringify(pkg, null, 2) + '\n');
console.log('Version synced to: ' + version);
"

- name: Pack npm tarball
run: |
cd crates/edgeparse-wasm/pkg
npm pack

- uses: actions/upload-artifact@v4
with:
name: wasm-package
path: crates/edgeparse-wasm/pkg/*.tgz

- name: Publish WASM package to npm
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
cd crates/edgeparse-wasm/pkg
OUTPUT=$(npm publish --access public 2>&1) && echo "$OUTPUT" || {
echo "$OUTPUT"
if echo "$OUTPUT" | grep -Eq "cannot publish over the previously published versions|You cannot publish over the previously published version"; then
echo "@edgeparse/edgeparse-wasm already published at this version — skipping."
else
exit 1
fi
}

- name: Upload npm tarball to GitHub Release
env:
GH_TOKEN: ${{ github.token }}
INPUT_TAG_NAME: ${{ inputs.tag_name }}
run: |
TAG_NAME="${INPUT_TAG_NAME:-$GITHUB_REF_NAME}"
gh release upload "$TAG_NAME" crates/edgeparse-wasm/pkg/*.tgz \
--repo "${{ github.repository }}" --clobber
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,25 @@ this project adheres to [Semantic Versioning](https://semver.org/).

---

## [0.2.1] — 2026-03-26

### Added
- Dedicated `release-wasm.yml` workflow to publish `@edgeparse/edgeparse-wasm` on tagged releases and attach the npm tarball to the GitHub Release
- CI coverage for the WASM target and Docker image smoke builds so every shipped artifact is validated before release
- Release-channel documentation in the README covering crates, SDKs, CLI archives, Homebrew, and container images

### Changed
- Bumped the workspace and published SDK manifests to `0.2.1`
- Local release helpers now publish `pdf-cos` before `edgeparse-core`, matching the crates.io CI workflow
- `make publish-all` now includes the WASM SDK release path
- README benchmark results updated to the latest 200-document `opendataloader.org` comparison, where EdgeParse leads the published field on every reported metric

### Fixed
- Removed stale release documentation that still described five workflows and partial manual workarounds for older releases
- Updated install guidance to reflect Linux `glibc >= 2.17` compatibility for release binaries

---

## [0.2.0] — 2026-03-24

### Added
Expand Down
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ default-members = [
]

[workspace.package]
version = "0.2.0"
version = "0.2.1"
edition = "2021"
rust-version = "1.85"
license = "Apache-2.0"
Expand Down
62 changes: 52 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
bench-engines bench-non-ocr bench-ocr bench-compare-all bench-report \
run demo \
publish-rust publish-rust-dry publish-python publish-python-dry \
publish-node publish-node-dry \
publish-node publish-node-dry publish-wasm publish-wasm-dry \
publish-cli publish-cli-dry \
publish-brew publish-brew-dry \
wasm-build wasm-check wasm-size wasm-clean \
publish-all \
clean clean-bench clean-all

Expand Down Expand Up @@ -221,14 +222,28 @@ bench-report: bench-setup ## Regenerate HTML report from existing results (no r
# ══════════════════════════════════════════════════════════════════════════════

# ── Rust / crates.io ──────────────────────────────────────────────────────────
publish-rust-dry: ## Dry-run: verify edgeparse-core + edgeparse-cli can be published
publish-rust-dry: ## Dry-run: verify pdf-cos + edgeparse-core publish cleanly and edgeparse-cli packages cleanly
$(call log,cargo publish --dry-run [pdf-cos])
@cargo publish -p pdf-cos --dry-run --allow-dirty
$(call log,cargo publish --dry-run [edgeparse-core])
@cargo publish -p edgeparse-core --dry-run
$(call log,cargo publish --dry-run [edgeparse-cli])
@cargo publish -p edgeparse-cli --dry-run
@cargo publish -p edgeparse-core --dry-run --allow-dirty
$(call log,cargo package --allow-dirty [edgeparse-cli])
@OUTPUT=$$(cargo package -p edgeparse-cli --allow-dirty 2>&1) && echo "$$OUTPUT" || { \
echo "$$OUTPUT"; \
if echo "$$OUTPUT" | grep -q 'location searched: crates.io index' \
&& echo "$$OUTPUT" | grep -q 'required by package `edgeparse-cli'; then \
printf "$(BOLD)$(YELLOW) ⚠$(RESET) $(YELLOW)edgeparse-cli package dry-run requires edgeparse-core $(VERSION) to already exist on crates.io; the tagged CI release handles that publish order.$(RESET)\n"; \
else \
exit 1; \
fi; \
}
$(call ok,Rust dry-run passed — ready for crates.io)

publish-rust: ## Publish edgeparse-core then edgeparse-cli to crates.io
publish-rust: ## Publish pdf-cos, edgeparse-core, then edgeparse-cli to crates.io
$(call log,Publishing pdf-cos to crates.io ...)
@cargo publish -p pdf-cos
$(call log,Waiting 30 s for crates.io index to propagate ...)
@sleep 30
$(call log,Publishing edgeparse-core to crates.io ...)
@cargo publish -p edgeparse-core
$(call log,Waiting 30 s for crates.io index to propagate ...)
Expand Down Expand Up @@ -502,9 +517,36 @@ publish-brew: ## Generate Homebrew formula and push to $(BREW_TAP_REPO)
rm -rf "$$TAPDIR"
$(call ok,Homebrew formula v$(VERSION) pushed to $(BREW_TAP_REPO))

# ── WASM / npm ────────────────────────────────────────────────────────────────
publish-wasm-dry: ## Dry-run: build the WASM package and preview the npm tarball
$(call log,Building WebAssembly package [dry-run] ...)
@command -v wasm-pack >/dev/null 2>&1 || { \
$(call err,wasm-pack not found — install: cargo install wasm-pack); \
exit 1; }
@cd crates/edgeparse-wasm && wasm-pack build --target web --release
@node -e "const fs=require('fs');const p='crates/edgeparse-wasm/pkg/package.json';const pkg=JSON.parse(fs.readFileSync(p,'utf8'));pkg.name='@edgeparse/edgeparse-wasm';pkg.version='$(VERSION)';fs.writeFileSync(p,JSON.stringify(pkg,null,2)+'\n');"
@cd crates/edgeparse-wasm/pkg && npm pack --dry-run
$(call ok,WASM dry-run passed — ready for npm)

publish-wasm: ## Build and publish the WASM npm package (@edgeparse/edgeparse-wasm)
ifndef NPM_TOKEN
$(call err,NPM_TOKEN is required. Usage: NPM_TOKEN=<token> make publish-wasm)
@exit 1
endif
$(call log,Publishing @edgeparse/edgeparse-wasm to npm ...)
@command -v wasm-pack >/dev/null 2>&1 || { \
$(call err,wasm-pack not found — install: cargo install wasm-pack); \
exit 1; }
@printf "//registry.npmjs.org/:_authToken=%s\n" "$(NPM_TOKEN)" > ~/.npmrc
@cd crates/edgeparse-wasm && wasm-pack build --target web --release
@node -e "const fs=require('fs');const p='crates/edgeparse-wasm/pkg/package.json';const pkg=JSON.parse(fs.readFileSync(p,'utf8'));pkg.name='@edgeparse/edgeparse-wasm';pkg.version='$(VERSION)';fs.writeFileSync(p,JSON.stringify(pkg,null,2)+'\n');"
@cd crates/edgeparse-wasm/pkg && npm publish --access public
@rm -f ~/.npmrc
$(call ok,WASM package published to npm)

# ── Combined ──────────────────────────────────────────────────────────────────
publish-all: publish-rust publish-python publish-node publish-cli publish-brew ## Publish everything: Rust crates + Python wheels + Node.js packages + CLI binaries + Homebrew formula
$(call ok,All SDKs + CLI + Homebrew tap published)
publish-all: publish-rust publish-python publish-node publish-wasm publish-cli publish-brew ## Publish everything: crates + Python + Node + WASM + CLI + Homebrew
$(call ok,All publish targets completed)

# ══════════════════════════════════════════════════════════════════════════════
## WASM
Expand All @@ -513,12 +555,12 @@ publish-all: publish-rust publish-python publish-node publish-cli publish-brew #
WASM_CRATE := crates/edgeparse-wasm

wasm-build: ## Build WASM package (release, --target web)
$(call info,Building WASM package...)
$(call log,Building WASM package...)
@cd $(WASM_CRATE) && wasm-pack build --target web --release --scope edgeparse
$(call ok,WASM package built → $(WASM_CRATE)/pkg/)

wasm-check: ## Check WASM compilation (fast, no codegen)
$(call info,Checking WASM compilation...)
$(call log,Checking WASM compilation...)
@cargo check --target wasm32-unknown-unknown -p edgeparse-wasm
$(call ok,WASM check passed)

Expand Down
Loading
Loading