diff --git a/components/ui/copy-button-listener.tsx b/components/ui/copy-button-listener.tsx index c42b21c6..0c6bc84d 100644 --- a/components/ui/copy-button-listener.tsx +++ b/components/ui/copy-button-listener.tsx @@ -4,6 +4,26 @@ import {useEffect} from 'react'; const shellLanguages = new Set(['bash', 'sh', 'shell', 'zsh']); +// Strip shell comments so pasted snippets work in zsh (which doesn't treat +// `#` as a comment in interactive mode by default). Full-line comments are +// dropped; inline comments (a `#` preceded by whitespace, outside quotes — +// i.e. bash's own comment rule) are trimmed off the end of the line. This +// leaves `$#`, `${#arr}`, `url#frag`, and `"# in a string"` untouched. +function stripShellComments(line: string): string | null { + if (line.trimStart().startsWith('#')) return null; + let inSingle = false; + let inDouble = false; + for (let i = 0; i < line.length; i++) { + const c = line[i]; + if (c === "'" && !inDouble) inSingle = !inSingle; + else if (c === '"' && !inSingle) inDouble = !inDouble; + else if (c === '#' && !inSingle && !inDouble && /\s/.test(line[i - 1] ?? '')) { + return line.slice(0, i).replace(/\s+$/, ''); + } + } + return line; +} + export default function CopyButtonListener() { useEffect(() => { const handleClick = (event: MouseEvent) => { @@ -26,7 +46,8 @@ export default function CopyButtonListener() { const textToCopy = shellLanguages.has(language ?? '') ? codeBlock .split('\n') - .filter(line => !line.trimStart().startsWith('#')) + .map(stripShellComments) + .filter((line): line is string => line !== null) .join('\n') : codeBlock; diff --git a/contents/docs/connecting-to-postgres.mdx b/contents/docs/connecting-to-postgres.mdx index ac447948..e119b9b1 100644 --- a/contents/docs/connecting-to-postgres.mdx +++ b/contents/docs/connecting-to-postgres.mdx @@ -71,13 +71,70 @@ This configuration can cause problems like `slot has been invalidated because it ### PlanetScale for Postgres -You should use the `default` role that PlanetScale provides, because PlanetScale user-defined roles cannot create replication slots. +#### Roles -Planetscale Postgres defaults `max_connections` to 25, which can easily be exhausted by Zero's connection pools. This will result in an error like `remaining connection slots are reserved for roles with the SUPERUSER attribute`. -You should increase this value in the Parameters section of the PlanetScale dashboard to 100 or more. +`zero-cache` should connect using the `default` role that PlanetScale provides, because PlanetScale user-defined roles cannot create replication slots. + +#### Connection Limits + +Change `max_connections` to at least 100. The default is 25, which is too low for Zero in most configurations. + +#### Pooling Make sure to only use a direct connection for the `ZERO_UPSTREAM_DB`, and use pooled URLs for `ZERO_CVR_DB`, `ZERO_CHANGE_DB`, and your API (see [Deployment](/docs/self-host)). +#### High Availability + +PlanetScale Postgres can fail over to a standby during maintenance or an outage. By default a logical replication slot does **not** survive promotion of a standby, so after a failover zero-cache would find its slot missing and re-sync every replica from scratch. + +To avoid this, first, run `zero-cache` with [`ZERO_UPSTREAM_PG_REPLICATION_SLOT_FAILOVER=true`](/docs/zero-cache-config#pg-replication-slot-failover) so it creates failover-enabled slots. + +Then, run the script below to register Zero's replication slots with PlanetScale and enable the two cluster parameters failover needs: + +```bash +APP="" # your ZERO_APP_ID — on Zero Cloud this is your instance ID +ORG="" # PlanetScale organization +DB="" # PlanetScale database +BRANCH="main" +SHARD="0" + +if [ -z "$APP" ] || [ -z "$ORG" ] || [ -z "$DB" ]; then + echo "Set APP, ORG, and DB first — nothing was sent." +elif pscale api -X PATCH "organizations/${ORG}/databases/${DB}/branches/${BRANCH}/changes" --input=- >/dev/null < +Zero only uses a few slots at a time. We register the full `a–z` range with PlanetScale out of conservatism to cover potential issues where a slot doesn't get cleaned up. It also prepares us for potential future Zero versions where multiple replication-managers can run in parallel. + +Registrations don't cost anything if there is no actual slot with the same name. + + ### Neon #### Logical Replication @@ -151,6 +208,10 @@ difficult. [Hetzner](https://www.hetzner.com/) offers cheap hosted VPS that supp IPv4 addresses are only supported on the Pro plan and are an extra $4/month. +#### High Availability + +Zero does not support Supabase's high-availability automatic failover. Supabase does not currently expose the replication-slot failover configuration Zero needs, so a promotion would orphan Zero's replication slot and force a full resync. If you need this, [reach out on Discord](https://discord.rocicorp.dev/). + ### Render Render _can_ work with Zero, but requires admin/support-side setup, and does not support a few core Zero features. @@ -161,6 +222,8 @@ You also must ensure `wal_level=logical` by creating a Render support ticket. Render does not provide superuser access, but you can submit another support ticket to ask Render to create a publication with `FOR ALL TABLES` for you, and then set that publication in [App Publications](/docs/zero-cache-config#app-publications). +Zero does not support Render's high availability (HA). Render's standby replicates asynchronously, so a failover can drop the most recent writes — which is incompatible with a sync engine like Zero that must never miss a change. Do not enable HA for a database used as a Zero upstream. + ### Google Cloud SQL Zero works with Google Cloud SQL out of the box. In many configurations, when you connect with a user that has sufficient privileges, `zero-cache` will create its default publication automatically. diff --git a/contents/docs/otel.mdx b/contents/docs/otel.mdx index 6acea036..f40a86ef 100644 --- a/contents/docs/otel.mdx +++ b/contents/docs/otel.mdx @@ -147,6 +147,8 @@ This callback is called before sending WebSocket messages that trigger API serve | `total_lag` | Gauge | ms | End-to-end replication latency. Grows as an estimate if the next report hasn't arrived | | `events` | Counter | | Number of replication events processed | | `transactions` | Counter | | Count of replicated transactions | +| `shadow-sync-runs` | Counter | | Number of [shadow initial-sync](/docs/zero-cache-config#shadow-sync-enabled) runs. Has a `result` attribute: `success`, `error` | +| `shadow-sync-duration` | Histogram | s | Wall-clock duration of a shadow initial-sync run. Has a `result` attribute: `success`, `error` | ### zero.sync diff --git a/contents/docs/release-notes/1.6.mdx b/contents/docs/release-notes/1.6.mdx new file mode 100644 index 00000000..d333203e --- /dev/null +++ b/contents/docs/release-notes/1.6.mdx @@ -0,0 +1,59 @@ +--- +title: Zero 1.6 +description: PlanetScale Failover Support +--- + +## Installation + +```bash +npm install @rocicorp/zero@1.6 +``` + +## Upgrading + +### PlanetScale Failover + +Previous Zero versions lost replication slots after a PlanetScale Postgres failover, forcing resync. Zero 1.6 fixes this problem. To enable support, see [High Availability and Failover](/docs/connecting-to-postgres#high-availability). + +## Features + +- [**Litestream Region:**](/docs/zero-cache-config#litestream-region) Added support for deployments in non-standard AWS partitions like GovCloud (thanks [@ericykim](https://github.com/ericykim)!). + +## Performance + +- [~5% CPU reduction in replication-manager benchmarks by reusing stringified payloads across subscribers](https://github.com/rocicorp/mono/pull/5900) +- [Faster `EXISTS` subqueries via the new `Cap` operator — can turn exists-heavy queries that previously timed out into ones that complete in milliseconds](https://github.com/rocicorp/mono/pull/5943) +- [Bulk-insertion optimization in Replicache via `putMany`, speeding up large sync patches (3-5x faster for typical sync batches, up to 50x+ for large preloads)](https://github.com/rocicorp/mono/pull/5380) +- [Batch deletes/upserts in `SQLiteStore` writes (~8x faster on 1k-put commits)](https://github.com/rocicorp/mono/pull/5915) +- [Batch concurrent `SQLiteStore` `get`/`has` reads into fewer database queries](https://github.com/rocicorp/mono/pull/5958) +- [Parallelize I/O during pull and rebase](https://github.com/rocicorp/mono/pull/5926) +- [Heap-based k-way merge in `fetchMergeSort` (O(log K) per row vs O(K))](https://github.com/rocicorp/mono/pull/5921) +- [Initial sync progress reporting uses `pg_class` estimates instead of full scans](https://github.com/rocicorp/mono/pull/5932) +- [De-dupe SQLite requests in flip-join when children want the same parent](https://github.com/rocicorp/mono/pull/5918) +- [`zero-sqlite3` now gathers up to 128 STAT4 samples per index, improving SQLite query planning for skewed indexed data](https://github.com/rocicorp/mono/pull/5913) + +## Fixes + +- [Returning to an app after stale-tab GC or CVR purge caused a full page reload](https://github.com/rocicorp/mono/pull/5903) +- [`"Row already exists"` assertion failures during poke processing](https://github.com/rocicorp/mono/pull/5923) +- [Deadlock during post-initial-sync `changeLog` reset](https://github.com/rocicorp/mono/pull/5953) +- [Zombie `ViewSyncer`s inflated `active-client-groups` metric](https://github.com/rocicorp/mono/pull/5907) +- [`ConcurrentModificationException` now reconnects instead of erroring](https://github.com/rocicorp/mono/pull/5930) +- [`zero-cache` startup errors during change-streamer init not published](https://github.com/rocicorp/mono/pull/5956) +- [`TypeError: Expected string at context.query. Got null`](https://github.com/rocicorp/mono/pull/5944) +- [Replication slots were lost after a PlanetScale Postgres failover](https://github.com/rocicorp/mono/pull/5934) +- [Replication slot creation timeouts crashed the server during backfill retries](https://github.com/rocicorp/mono/pull/5901) +- [Shadow sync threw when a synced table could not be queried by ZQL](https://github.com/rocicorp/mono/pull/5950) +- [Shadow sync now reports metrics and logs verification-success counts](https://github.com/rocicorp/mono/pull/5941) +- [WebSocket errors are now logged as warnings instead of errors](https://github.com/rocicorp/mono/pull/5842) +- [Inspector now reports last query hydration time instead of histogram](https://github.com/rocicorp/mono/pull/5924) +- [Query/mutator functions now allow omitting `args`](https://github.com/rocicorp/mono/pull/5945) +- [Reoduce log volume at `INFO` level](https://github.com/rocicorp/mono/pull/5946) +- [Reclassify common Postgres config errors as warnings instead of errors](https://github.com/rocicorp/mono/pull/5981) +- [Union-fan-in queries could ignore reverse ordering](https://github.com/rocicorp/mono/pull/5980) +- [Abort `zero-cache` when ChangeDB CDC tables go missing](https://github.com/rocicorp/mono/pull/5989) +- [CVR purge failures retried immediately in a tight loop instead of backing off](https://github.com/rocicorp/mono/pull/5988) + +## Breaking Changes + +None. diff --git a/contents/docs/release-notes/index.mdx b/contents/docs/release-notes/index.mdx index a60cf62e..3cb0ddbb 100644 --- a/contents/docs/release-notes/index.mdx +++ b/contents/docs/release-notes/index.mdx @@ -2,6 +2,7 @@ title: Release Notes --- +- [Zero 1.6: PlanetScale Failover Support](/docs/release-notes/1.6) - [Zero 1.5: Schema Change Improvements and Client Group Auth](/docs/release-notes/1.5) - [Zero 1.4: Performance and Reliability Improvements](/docs/release-notes/1.4) - [Zero 1.3: Faster Initial Sync and Other Perf Improvements](/docs/release-notes/1.3) diff --git a/contents/docs/zero-cache-config.mdx b/contents/docs/zero-cache-config.mdx index dbd5acff..386824b5 100644 --- a/contents/docs/zero-cache-config.mdx +++ b/contents/docs/zero-cache-config.mdx @@ -390,6 +390,13 @@ flag: `--litestream-port`
env: `ZERO_LITESTREAM_PORT`
default: `--port + 2` +### Litestream Region + +The AWS region for the litestream backup bucket. Required for non-standard AWS partitions (e.g. GovCloud `us-gov-west-1`) where Litestream cannot auto-detect the region. The replication-manager and view-syncers must have the same region. + +flag: `--litestream-region`
+env: `ZERO_LITESTREAM_REGION`
+ ### Litestream Restore Parallelism The number of WAL files to download in parallel when performing the initial restore of the replica from the backup. @@ -522,6 +529,14 @@ flag: `--per-user-mutation-limit-window-ms`
env: `ZERO_PER_USER_MUTATION_LIMIT_WINDOW_MS`
default: `60000` +### PG Replication Slot Failover + +For upstream Postgres 17+, creates replication slots with the `failover` flag enabled so they can be synchronized to a standby and survive a failover. This requires additional Postgres-side configuration on your provider; see [High Availability and Failover](/docs/connecting-to-postgres#high-availability-and-failover). Has no effect on Postgres versions before 17. + +flag: `--upstream-pg-replication-slot-failover`
+env: `ZERO_UPSTREAM_PG_REPLICATION_SLOT_FAILOVER`
+default: `false` + ### Port The port for sync connections.