From 4ef37bdf0bdfae204ac529f323c3f29f94a78772 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Wed, 6 May 2026 12:50:50 -0700
Subject: [PATCH 1/4] Default plugins.calcite.enabled=true on the unified query
 path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unified query handler (`RestUnifiedQueryAction` → `TransportPPLQueryAction`
→ analytics-engine) builds its `UnifiedQueryContext` with an empty `Settings`
map; nothing wires the cluster setting through. PPL parsing is delegated to
the same `AstBuilder` the v2 path uses, and that builder gates `table` (and
other Calcite-only commands) on `Settings.Key.CALCITE_ENGINE_ENABLED`. With
no setting propagated, the gate sees `null`, fails the `Boolean.TRUE.equals`
check, and throws

  UnsupportedOperationException: Table command is supported only when
  plugins.calcite.enabled=true

even when the cluster setting is true, blocking every `table` query routed
through the analytics path under `tests.analytics.force_routing=true`.

The unified path is by definition Calcite-based — every query reaching
`UnifiedQueryContext` flows through Calcite's planner. Default
`CALCITE_ENGINE_ENABLED=true` in `buildSettings()` when the underlying map
doesn't have the key. This unblocks `table` and any other AstBuilder gate
that defends the same toggle, without changing v2 behavior (v2 constructs
`AstBuilder` with cluster `Settings`, not the unified context).

Also refresh the PPL analytics-engine routing dev doc to document the
unified-context dependency and switch the per-command verification recipe
from the bundle-branch `analyticsCompatibilityTest` task to the standard
`integTestRemote -Dtests.analytics.{force_routing,parquet_indices}=true` —
the bundle-branch task is purely for the daily coverage-report sweep.

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../sql/api/UnifiedQueryContext.java          |   8 +
 docs/dev/ppl-analytics-engine-coverage-sop.md | 152 +++++++
 docs/dev/ppl-analytics-engine-routing.md      | 426 ++++++++++++++++++
 3 files changed, 586 insertions(+)
 create mode 100644 docs/dev/ppl-analytics-engine-coverage-sop.md
 create mode 100644 docs/dev/ppl-analytics-engine-routing.md
diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java
index 4332ff17660..864b2610897 100644
--- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java
+++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java
@@ -225,6 +225,14 @@ private Settings buildSettings() {
         @Override
         @SuppressWarnings("unchecked")
         public <T> T getSettingValue(Key key) {
+          // The unified query path is always Calcite-based — every query reaching this context
+          // goes through Calcite's planner, never the v2 engine. Default
+          // {@code plugins.calcite.enabled} to true so AstBuilder gates (e.g. visitTableCommand)
+          // that protect v2-only code paths don't block valid analytics-engine queries when the
+          // underlying setting hasn't been propagated through Builder#setting.
+          if (key == Key.CALCITE_ENGINE_ENABLED && !settings.containsKey(key)) {
+            return (T) Boolean.TRUE;
+          }
           return (T) settings.get(key);
         }
 
diff --git a/docs/dev/ppl-analytics-engine-coverage-sop.md b/docs/dev/ppl-analytics-engine-coverage-sop.md
new file mode 100644
index 00000000000..710feb5f724
--- /dev/null
+++ b/docs/dev/ppl-analytics-engine-coverage-sop.md
@@ -0,0 +1,152 @@
+# SOP — PPL Analytics-Engine IT Coverage Report
+
+Run all PPL ITs through the analytics-engine path against an externally-managed
+cluster, get a bucketed markdown report. Daily.
+
+Output: `integ-test/build/reports/analytics-compatibility/REPORT.md`
+
+---
+
+## One-time setup
+
+JDK 25 + JDK 21, Rust stable, ~15 GB free disk.
+
+```bash
+mkdir -p ~/workspace && cd ~/workspace
+git clone git@github.com:opensearch-project/OpenSearch.git
+git clone git@github.com:opensearch-project/sql.git
+( cd OpenSearch && git remote add upstream https://github.com/opensearch-project/OpenSearch.git )
+( cd sql       && git remote add ahkcs    git@github.com:ahkcs/sql.git )
+```
+
+**OpenSearch — track `upstream/main` + apply commons-text patch:**
+
+`upstream/main` already has the full analytics-engine sandbox plugin set
+(`analytics-engine`, `analytics-backend-{datafusion,lucene}`, `composite-engine`,
+`dsl-query-executor`, `parquet-data-format`) and the `dataformat-native` rust
+crate. Track main; rebase daily.
+
+```bash
+cd ~/workspace/OpenSearch
+git fetch upstream main && git checkout -B main upstream/main
+```
+
+In `sandbox/plugins/analytics-engine/build.gradle`, add next to the existing
+`commons-math3` runtimeOnly:
+
+```groovy
+runtimeOnly "org.apache.commons:commons-text:1.11.0"
+```
+
+`commons-text` is only listed under `resolutionStrategy.force` on main, which
+pins the version but doesn't bundle the jar — without this `runtimeOnly` line
+the cluster dies on `LevenshteinDistance` the first time a query reaches
+Calcite's `SqlFunctions.<clinit>`. Track upstreaming; once it lands, drop this
+patch.
+
+**Build the native lib (one-time + after rust changes):**
+
+The cluster JVM loads `libopensearch_native.dylib` (rust + JNI/FFM) at startup
+for DataFusion execution, parquet I/O, and native repository ops. Without it
+the cluster crashes in `NativeBridge.<clinit>`.
+
+```bash
+cd ~/workspace/OpenSearch/sandbox/libs/dataformat-native/rust
+cargo build --release -p opensearch-native-lib   # ~5 min
+```
+
+---
+
+## Daily run (~30 min)
+
+### 1. Get the SQL bundle worktree
+
+First time:
+```bash
+cd ~/workspace/sql
+git fetch ahkcs feature/ppl-coverage-bundle
+DATE=$(date +%Y-%m-%d)
+git worktree add "../sql-ppl-coverage-${DATE}" ahkcs/feature/ppl-coverage-bundle
+```
+
+Reuse same day:
+```bash
+cd ~/workspace/sql-ppl-coverage-${DATE}
+git fetch ahkcs && git rebase ahkcs/feature/ppl-coverage-bundle
+```
+
+### 2. Publish the SQL plugin to mavenLocal
+
+`:run` installs `opensearch-sql-plugin:3.7.0.0-SNAPSHOT` from maven coords —
+mavenLocal first, then OpenSearch CI snapshots. The SQL plugin isn't on CI
+snapshots, so without this step the cluster fails to bootstrap with
+`Could not resolve …`. Re-publish whenever the bundle worktree is updated.
+
+```bash
+cd ~/workspace/sql-ppl-coverage-${DATE}
+./gradlew publishToMavenLocal   # ~1 min, JDK 21+
+```
+
+### 3. Bring up cluster (terminal A — leave running)
+
+```bash
+cd ~/workspace/OpenSearch
+JAVA_HOME=$(mise where java@temurin-25.0.1+8.0.LTS) ./gradlew :run \
+  -Dsandbox.enabled=true \
+  -PinstalledPlugins="['opensearch-job-scheduler:3.7.0.0-SNAPSHOT', \
+'analytics-engine', 'parquet-data-format', 'analytics-backend-datafusion', \
+'analytics-backend-lucene', 'composite-engine', \
+'opensearch-sql-plugin:3.7.0.0-SNAPSHOT']"
+```
+
+Wait for it:
+```bash
+until curl -sf http://localhost:9200/ >/dev/null; do sleep 4; done
+```
+
+### 4. Run the report (terminal B)
+
+```bash
+cd ~/workspace/sql-ppl-coverage-${DATE}
+rm -rf integ-test/build/test-results/analyticsCompatibility \
+       integ-test/build/test-results/analyticsCompatibilityTest \
+       integ-test/build/reports/analytics-compatibility
+
+./gradlew :integ-test:analyticsCompatibilityReport \
+  -Dtests.rest.cluster=localhost:9200 \
+  -Dtests.cluster=localhost:9300 \
+  -Dtests.clustername=runTask
+```
+
+`BUILD SUCCESSFUL` is expected — failures are the report payload, not a build
+break.
+
+### 5. Open the report
+
+```bash
+open integ-test/build/reports/analytics-compatibility/REPORT.md
+```
+
+### 6. Tear down
+
+`Ctrl-C` terminal A.
+
+---
+
+## When something breaks
+
+| Symptom | Fix |
+|---|---|
+| `BUILD FAILED … java.io.EOFException … KryoBackedDecoder` | `rm -rf integ-test/build/test-results/analyticsCompatibilityTest` and rerun. |
+| Run finishes <10 min, top bucket is `ConnectException` | Cluster died. Tail `~/workspace/OpenSearch/build/testclusters/runTask-0/logs/runTask.log` for `fatal error`; the missing class usually needs a `runtimeOnly` dep added to `analytics-engine/build.gradle`. |
+| Top bucket is `No backend can scan all requested fields` (thousands) | Parquet flag isn't reaching `TestUtils`. Confirm worktree is on `ahkcs/feature/ppl-coverage-bundle`. |
+| `BUILD FAILED` immediately, log shows `[BUILD] Starting OpenSearch process / Stopping node` | `task.getClusters().clear()` is missing. Re-fetch `feature/ppl-coverage-bundle`. |
+
+---
+
+## What the bundle branch is
+
+`ahkcs/feature/ppl-coverage-bundle` rebases on
+`upstream/feature/mustang-ppl-integration` and adds the report task, parquet
+flag, origin classifier, pass-rate fix, and testcluster auto-bind workaround.
+Daily upstream sync: `git rebase upstream/feature/mustang-ppl-integration`.
diff --git a/docs/dev/ppl-analytics-engine-routing.md b/docs/dev/ppl-analytics-engine-routing.md
new file mode 100644
index 00000000000..e2dcf40b686
--- /dev/null
+++ b/docs/dev/ppl-analytics-engine-routing.md
@@ -0,0 +1,426 @@
+# Routing PPL Commands Through the Analytics-Engine Path
+
+A guide for wiring an existing PPL command — already working on the v2 / Calcite engine — through the analytics-engine route so that `CalciteXxxCommandIT` passes against the force-routed analytics-engine path, without needing a per-command code change in the SQL plugin.
+
+This doc is the playbook distilled from landing PPL `fillnull` end-to-end through the analytics engine ([opensearch-project/OpenSearch#21472][pr-fillnull]). Pair with [`ppl-commands.md`](ppl-commands.md) (which covers adding a *new* PPL command on the v2 / Calcite engine).
+
+[pr-fillnull]: https://github.com/opensearch-project/OpenSearch/pull/21472
+
+## Goal
+
+For an existing PPL command `Xxx` whose `CalciteXxxCommandIT` currently passes on the v2 / Calcite engine but fails on the analytics-engine route, drive the analytics-engine route to parity. Two acceptance criteria:
+
+1. `CalciteXxxCommandIT` (in `opensearch-project/sql`) passes when run via `:integ-test:integTestRemote` with `-Dtests.analytics.force_routing=true -Dtests.analytics.parquet_indices=true`.
+2. A self-contained `XxxCommandIT` lives in `opensearch-project/OpenSearch` under `sandbox/qa/analytics-engine-rest`, exercising the command through `POST /_analytics/ppl` against a parquet-backed dataset. CI for OpenSearch core can verify the analytics-engine path without checking out the SQL plugin.
+
+The QA IT in OpenSearch core is the long-term verification surface — once it lands, the SQL-plugin IT becomes a duplicate maintained for the v2 / Calcite path.
+
+## Mental model — three layers
+
+A PPL query on the analytics-engine route passes through three independent layers:
+
+```
+PPL text
+  │
+  ▼
+[ Layer 1 ] SQL plugin lowering            (opensearch-project/sql)
+  │   PPLSyntaxParser → AstBuilder → CalciteRelNodeVisitor → RelNode tree
+  │
+  ▼
+[ Layer 2 ] Analytics-engine planner       (opensearch-project/OpenSearch · sandbox/plugins/analytics-engine)
+  │   Capability registry, OpenSearch{Project,Filter,Aggregate}Rule,
+  │   AnnotatedProjectExpression, fragment driver
+  │
+  ▼
+[ Layer 3 ] Backend                        (opensearch-project/OpenSearch · sandbox/plugins/analytics-backend-datafusion)
+      Substrait conversion via isthmus → DataFusion native execution
+```
+
+Knowing which layer a missing function lives in tells you the effort:
+
+| Bucket | Symptom (failure stack frame) | Layer | Effort |
+|---|---|---|---|
+| 1 (S0) | `OpenSearchProjectRule` / `OpenSearchFilterRule` throws `No backend supports scalar function [X] among [datafusion]` | Layer 2 capability registry | One line |
+| 2 (S1) | Substrait isthmus throws `Unable to convert call X` (the function exists in DataFusion but the call shape doesn't match) | Layer 2 adapter | Small adapter |
+| 3 (S1–M) | DataFusion runtime throws (function doesn't exist natively) | Layer 3 UDF | Half-day–multi-day Rust |
+
+**Layer 1 is almost always already done** for any command that works on the v2 Calcite engine — `CalciteRelNodeVisitor.visitXxx` produces a clean RelNode that's the same regardless of which backend executes it. Verify by reading the v2 unit test (`CalcitePPLXxxTest` in `ppl/src/test/java/.../calcite/`); the printed `LogicalPlan` shows exactly what reaches Layer 2.
+
+## Local environment setup
+
+### One-time
+
+1. **JDK 25** — `sandbox/libs/dataformat-native` targets JDK 25 (FFM API). Without it you'll see `error: release version 25 not supported`.
+   ```bash
+   # mise / asdf / sdkman
+   export JAVA_HOME=/path/to/temurin-25.0.x
+   export PATH=$JAVA_HOME/bin:$PATH
+   ```
+2. **Native lib built** — confirm `OpenSearch/sandbox/libs/dataformat-native/rust/target/release/libopensearch_native.dylib` (macOS) or `.so` (Linux) exists. Built automatically by `:run`/`:integTest` if missing, but a stale build is the most common source of weird FFM errors. Rebuild explicitly via the cargo target if in doubt.
+3. **`gh` CLI** authenticated against your OpenSearch fork for opening PRs.
+
+### Repos
+
+| Repo | Branch convention | Purpose |
+|---|---|---|
+| `opensearch-project/OpenSearch` | `feature/<descriptive>` off `upstream/main` | Where the actual fix lands (analytics-engine and DataFusion backend code) |
+| `opensearch-project/sql` | `feature/<descriptive>` off `ahkcs/feature/ppl-coverage-bundle` until the analytics-compatibility test task lands upstream; thereafter off `upstream/feature/mustang-ppl-integration` | Test-infrastructure work, IT carryovers (rare for new functions once QA-side ITs exist) |
+
+In the OpenSearch checkout: `feature/mustang-fillnull-command` was the dev branch; `pr/fillnull-poc` was the rebased clean branch I pushed for the PR. Use the same split when working on a new function — keep your dev branch with extra debugging/printf commits, then cherry-pick the clean ones onto a `pr/...` branch for PR submission.
+
+### Maven-local prerequisites (full publish-and-run flow from scratch)
+
+The `:run` task pulls `opensearch-job-scheduler:3.7.0.0-SNAPSHOT` and `opensearch-sql-plugin:3.7.0.0-SNAPSHOT` from `~/.m2/repository`. If those aren't published yet, the cluster bootstrap fails with `Could not resolve ...` or `Failed installing file:.../opensearch-sql-plugin...zip`. End-to-end:
+
+```bash
+# 1. Publish OpenSearch core + sandbox plugins to maven local
+cd /path/to/OpenSearch
+JAVA_HOME=/path/to/temurin-25 ./gradlew publishToMavenLocal -Dsandbox.enabled=true
+
+# 2. Publish SQL plugin to maven local (any JDK 21+ works for the SQL repo)
+cd /path/to/sql
+./gradlew publishToMavenLocal
+
+# 3. (Optional) Publish opensearch-job-scheduler from its own repo if not already in maven local
+#    Most devs already have this from prior work — check via:
+ls ~/.m2/repository/org/opensearch/plugin/opensearch-job-scheduler/3.7.0.0-SNAPSHOT/
+
+# 4. Run the server with all required plugins (back in OpenSearch)
+cd /path/to/OpenSearch
+JAVA_HOME=/path/to/temurin-25 ./gradlew :run -Dsandbox.enabled=true \
+  -PinstalledPlugins="['opensearch-job-scheduler:3.7.0.0-SNAPSHOT', \
+    'analytics-engine', 'parquet-data-format', 'analytics-backend-datafusion', \
+    'analytics-backend-lucene', 'composite-engine', \
+    'opensearch-sql-plugin:3.7.0.0-SNAPSHOT']"
+```
+
+**`tests.jvm.argline` and `--debug-server-jvm`.** The user-facing Mustang task brief shows:
+```bash
+-Dtests.jvm.argline="-Djava.library.path=$(pwd)/sandbox/libs/dataformat-native/rust/target/release \
+  -Dopensearch.experimental.feature.pluggable.dataformat.enabled=true" \
+--debug-server-jvm
+```
+You don't need `-Dtests.jvm.argline` for `:run` — `OpenSearch/gradle/run.gradle` already injects the native-lib path and the `pluggable.dataformat.enabled` flag automatically when `parquet-data-format` or `analytics-backend-datafusion` is in `installedPlugins` (lines 93–101). The flag is only needed for `:internalClusterTest` style tasks where the test JVM (not the cluster JVM) loads the native bridge — committed to `gradle.properties` for those use cases.
+
+`--debug-server-jvm` is the JVM-debug-attach flag for `:run`. Add it when you want to attach a debugger on port 5005; omit for normal runs.
+
+**Plugin install order matters.** `opensearch-sql-plugin` declares `analytics-engine` and `opensearch-job-scheduler` as `extendedPlugins`, so those must be installed first. Putting `opensearch-sql-plugin` first in the list yields:
+```
+IllegalArgumentException: Missing plugin [analytics-engine], dependency of [opensearch-sql]
+```
+
+## The development loop (Bucket 1 / S0)
+
+> **Branch base for the SQL plugin checkout.** Until the
+> `analyticsCompatibilityTest` task and `tests.analytics.{force_routing,parquet_indices}`
+> system properties land in `upstream/feature/mustang-ppl-integration`, base
+> your SQL plugin work on `ahkcs/feature/ppl-coverage-bundle` (which rebases on
+> top of the integration branch and adds those bits). See
+> [`ppl-analytics-engine-coverage-sop.md`](ppl-analytics-engine-coverage-sop.md)
+> for the standalone daily-report recipe.
+
+### 1. Triage — find the failure
+
+Start an OpenSearch test cluster locally and run the SQL-plugin's `CalciteXxxCommandIT` through the analytics-engine route. The cluster needs all 7 sandbox plugins.
+
+```bash
+# Terminal A — OpenSearch checkout, JDK 25
+cd /path/to/OpenSearch
+JAVA_HOME=/path/to/temurin-25 ./gradlew :run -Dsandbox.enabled=true \
+  -PinstalledPlugins="['opensearch-job-scheduler:3.7.0.0-SNAPSHOT', \
+    'analytics-engine', 'parquet-data-format', 'analytics-backend-datafusion', \
+    'analytics-backend-lucene', 'composite-engine', \
+    'opensearch-sql-plugin:3.7.0.0-SNAPSHOT']"
+```
+
+> **Plugin order matters.** `opensearch-sql-plugin` declares `analytics-engine` and `opensearch-job-scheduler` as `extendedPlugins`, so those must install first. The order above is correct.
+> **Maven local prerequisites.** `opensearch-sql-plugin:3.7.0.0-SNAPSHOT` and `opensearch-job-scheduler:3.7.0.0-SNAPSHOT` need to be in `~/.m2/repository`. Build/publish from their respective repos via `./gradlew publishToMavenLocal -Dsandbox.enabled=true` once.
+
+```bash
+# Terminal B — sql checkout, any JDK 21+
+cd /path/to/sql
+./gradlew :integ-test:integTestRemote \
+  -Dtests.rest.cluster=localhost:9200 \
+  -Dtests.cluster=localhost:9300 \
+  -Dtests.clustername=runTask \
+  -Dtests.analytics.force_routing=true \
+  -Dtests.analytics.parquet_indices=true \
+  --tests "org.opensearch.sql.calcite.remote.CalciteXxxCommandIT"
+```
+
+The standard `:integTestRemote` runs against the externally-managed `:run` cluster; the two `tests.analytics.*` system properties are what flip the route. JUnit XML lands in `integ-test/build/test-results/integTestRemote/`.
+
+**Why those two flags exist:**
+- `tests.analytics.force_routing=true` → `PPLIntegTestCase.init()` flips the cluster setting `plugins.calcite.analytics.force_routing=true`, which makes `RestUnifiedQueryAction.isAnalyticsIndex()` return `true` unconditionally so every PPL query routes through the analytics-engine path regardless of the index name pattern.
+- `tests.analytics.parquet_indices=true` → `TestUtils.createIndexByRestClient` injects the parquet/composite settings on every index it creates:
+  ```java
+  indexSettings.put("number_of_shards", 1);
+  indexSettings.put("pluggable.dataformat.enabled", true);
+  indexSettings.put("pluggable.dataformat", "composite");
+  indexSettings.put("composite.primary_data_format", "parquet");
+  ```
+  Without parquet-backed storage, the analytics-engine planner fails with `IllegalStateException: No backend can scan all requested fields on index [...]` — DataFusion can't read Lucene-backed indices.
+
+Without those two flags the same `:integTestRemote` invocation runs the v2 / Calcite path — that's the comparison baseline.
+
+**Note on `:integ-test:analyticsCompatibilityTest` / `analyticsCompatibilityReport`.** These tasks (defined on `ahkcs/feature/ppl-coverage-bundle`) are *report-generation* helpers — they sweep every PPL IT through the analytics path with `ignoreFailures=true` and bucket the failures into a markdown report (see [`ppl-analytics-engine-coverage-sop.md`](ppl-analytics-engine-coverage-sop.md)). They are not needed for verifying a specific command — `:integTestRemote --tests "..."` with the two system properties is the right primitive for per-command work.
+
+### 2. Read the failure
+
+```bash
+grep -E "<failure" integ-test/build/test-results/integTestRemote/TEST-org.opensearch.sql.calcite.remote.CalciteXxxCommandIT.xml \
+  | grep -oE 'details=&quot;[^&]+'
+```
+
+Look for the **deepest cluster-side stack frame** in the failure body — that's where the analytics-engine path actually broke. Two common shapes:
+
+**Bucket 1:**
+```
+No backend supports scalar function [<NAME>] among [datafusion]
+  at OpenSearchProjectRule.annotateExpr(OpenSearchProjectRule.java:123)
+```
+
+**Bucket 2:**
+```
+Unable to convert call ANNOTATED_PROJECT_EXPR(fp64?)
+```
+or
+```
+io.substrait.isthmus.SubstraitRelVisitorException: cannot convert <X> ...
+```
+
+If the failure is something else entirely (e.g. `Project rule encountered unmarked child [LogicalJoin]`), the command depends on a non-S0 planner gap (joins, sub-queries, etc.) — file a separate issue, don't try to wedge it into the function PR.
+
+### 3. Wire the function (Bucket 1 / S0)
+
+For a Bucket-1 / S0 fix, edit one file:
+
+`OpenSearch/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionAnalyticsBackendPlugin.java`
+
+```java
+private static final Set<ScalarFunction> STANDARD_PROJECT_OPS = Set.of(
+    ScalarFunction.COALESCE,
+    ScalarFunction.CEIL,
+    ScalarFunction.<NEW_FUNCTION>     // ← add here
+);
+```
+
+`STANDARD_PROJECT_OPS` is fanned out across `SUPPORTED_FIELD_TYPES` and the backend's supported formats by the `projectCapabilities()` override. The constant for `<NEW_FUNCTION>` must already exist in `sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java` — if it doesn't, add it (see "Adding a new ScalarFunction enum constant" below).
+
+**No Rust-side, convertor, or Substrait-extension changes needed for Bucket 1.** DataFusion's native runtime executes the call directly via the default Substrait extension catalog already loaded by `DataFusionPlugin.loadSubstraitExtensions`.
+
+### Adding a new `ScalarFunction` enum constant
+
+If the function isn't yet in the `ScalarFunction` enum:
+
+1. Open `sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java`.
+2. Pick the right `Category` (`COMPARISON`, `STRING`, `MATH`, `SCALAR`, etc.).
+3. Pick the right `SqlKind`. If Calcite has a dedicated `SqlKind` for the function (e.g. `SqlKind.COALESCE`, `SqlKind.CEIL`), use it. Otherwise use `SqlKind.OTHER` and rely on `fromSqlFunction` (which matches by name; the enum constant name must match `SqlFunction.getName().toUpperCase(Locale.ROOT)`).
+
+```java
+NEW_FUNCTION(Category.MATH, SqlKind.OTHER),
+```
+
+### 4. Verify locally
+
+Restart the cluster (the `:run` task rebuilds plugins automatically from source on restart), then re-run the IT.
+
+```bash
+# Terminal A — Ctrl-C, then re-run :run
+# Terminal B — re-run :integTestRemote with the two analytics-engine system properties
+```
+
+Or, faster: kill the gradle wrapper PID and re-launch.
+
+### 5. Add the QA IT in OpenSearch core
+
+This is the one-time investment that pays off across every future function: a self-contained IT in `sandbox/qa/analytics-engine-rest` that exercises the command through `POST /_analytics/ppl`, no SQL plugin needed.
+
+#### 5a. Dataset resources
+
+Pick or add a dataset under `sandbox/qa/analytics-engine-rest/src/test/resources/datasets/<name>/`:
+
+```
+datasets/<name>/
+  ├── mapping.json    ← OpenSearch mapping with a "settings" block containing "number_of_shards"
+  └── bulk.json       ← NDJSON bulk body, MUST end with a trailing newline (see "Common pitfalls")
+```
+
+`DatasetProvisioner.injectParquetSettings` injects parquet/composite settings adjacent to `"number_of_shards"`, so the mapping's `settings` block must contain that key. **Existing datasets to reuse:** `clickbench` (43-column wide table, dates, lots of types), `calcs` (fillnull-friendly: nullable int/double/keyword/boolean/date columns; verbose enough for `value=` syntax tests). Add a new dataset only if neither fits.
+
+#### 5b. Test class
+
+Place under `sandbox/qa/analytics-engine-rest/src/test/java/org/opensearch/analytics/qa/<Function>CommandIT.java`. Mirror the v2 / Calcite IT in the SQL plugin (`integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteXxxCommandIT.java`) one-test-method-to-one — same query strings, same expected rows.
+
+```java
+public class FillNullCommandIT extends AnalyticsRestTestCase {
+
+    private static final Dataset DATASET = new Dataset("calcs", "calcs");
+    private static boolean dataProvisioned = false;
+
+    /**
+     * Lazily provision the dataset on first invocation. Must be called inside a test method
+     * (not setUp) — OpenSearchRestTestCase's static client() is not initialized until after
+     * @BeforeClass but is reliably available inside test bodies.
+     */
+    private void ensureDataProvisioned() throws IOException {
+        if (dataProvisioned == false) {
+            DatasetProvisioner.provision(client(), DATASET);
+            dataProvisioned = true;
+        }
+    }
+
+    public void testFillNullSameValueOneField() throws IOException {
+        assertRows(
+            "source=" + DATASET.indexName + " | fields str2, num0 | fillnull with -1 in num0",
+            row("one", 12.3),
+            row("two", -12.3),
+            // …
+            row(null, -1)
+        );
+    }
+
+    // ── helpers ────────────────────────────────────────────────────────────
+
+    private static List<Object> row(Object... values) {
+        return Arrays.asList(values);
+    }
+
+    @SafeVarargs
+    @SuppressWarnings("varargs")
+    private final void assertRows(String ppl, List<Object>... expected) throws IOException {
+        Map<String, Object> response = executePpl(ppl);
+        @SuppressWarnings("unchecked")
+        List<List<Object>> actualRows = (List<List<Object>>) response.get("rows");
+        assertNotNull(actualRows);
+        assertEquals(expected.length, actualRows.size());
+        for (int i = 0; i < expected.length; i++) {
+            List<Object> want = expected[i];
+            List<Object> got = actualRows.get(i);
+            assertEquals(want.size(), got.size());
+            for (int j = 0; j < want.size(); j++) {
+                assertCellEquals(want.get(j), got.get(j));
+            }
+        }
+    }
+
+    private Map<String, Object> executePpl(String ppl) throws IOException {
+        ensureDataProvisioned();
+        Request request = new Request("POST", "/_analytics/ppl");
+        request.setJsonEntity("{\"query\": \"" + escapeJson(ppl) + "\"}");
+        Response response = client().performRequest(request);
+        return assertOkAndParse(response, "PPL: " + ppl);
+    }
+
+    /** Numeric-tolerant cell comparison — JSON parsing returns Integer/Long/Double interchangeably. */
+    private static void assertCellEquals(Object expected, Object actual) {
+        if (expected == null || actual == null) { assertEquals(expected, actual); return; }
+        if (expected instanceof Number && actual instanceof Number) {
+            if (Double.compare(((Number) expected).doubleValue(), ((Number) actual).doubleValue()) != 0) {
+                fail("expected <" + expected + "> but was <" + actual + ">");
+            }
+            return;
+        }
+        assertEquals(expected, actual);
+    }
+}
+```
+
+For tests that should fail with a planner-side validation error (e.g. type-incompatibility errors raised in `CalciteRelNodeVisitor` preflight), use a sibling `assertErrorContains` helper that catches `ResponseException` and asserts on the response body substring.
+
+#### 5c. Run the QA IT
+
+```bash
+cd /path/to/OpenSearch
+JAVA_HOME=/path/to/temurin-25 ./gradlew \
+  :sandbox:qa:analytics-engine-rest:integTest \
+  -Dsandbox.enabled=true \
+  --tests "*<Function>CommandIT"
+```
+
+The QA module boots its own self-contained test cluster with all 7 sandbox plugins; no need for a `:run` cluster running. JUnit XML lands at `sandbox/qa/analytics-engine-rest/build/test-results/integTest/TEST-org.opensearch.analytics.qa.<Function>CommandIT.xml`.
+
+### 6. Run the sandbox-wide `check` before pushing
+
+Always run this before opening or updating a PR:
+
+```bash
+cd /path/to/OpenSearch
+JAVA_HOME=/path/to/temurin-25 ./gradlew check -p sandbox -Dsandbox.enabled=true
+```
+
+This is the sandbox-tree equivalent of `./gradlew check` and runs everything OpenSearch core CI runs over the sandbox subprojects: compile (`-Werror`), unit tests, integ tests where applicable, `forbiddenApis`, `licenseHeaders`, `spotless`/`checkstyle`, `dependencyLicenses`, `thirdPartyAudit`, etc. CI fails on any of these — running locally first avoids the rebase-and-force-push churn when CI catches a missing license header or a forbidden API call.
+
+Common things this surfaces that focused IT runs miss:
+- Missing Apache 2.0 license headers on new files (`licenseHeaders`)
+- Code-style violations on the new IT class (`checkstyle`/`spotless`)
+- An import that pulls in a forbidden API (e.g. `java.util.Date`, `Optional.get()` in some places)
+- A unit test in a sibling module broken by a planner change
+
+Expect 5–15 min on a warm build, longer on a clean one. If `check` is green, the PR's CI almost always is too.
+
+## When you need more than capabilities (Bucket 2 / S1)
+
+If the planner accepts the function but Substrait conversion fails (`Unable to convert call ...`):
+
+1. Check whether `OpenSearchProjectRule.annotateExpr` is wrapping a sub-call you weren't expecting. The recursive `AnnotatedProjectExpression` strip in `OpenSearchProject.stripAnnotations` should handle every wrapper, but if the function lives somewhere other than a `Project` (e.g. `Filter`, `Aggregate`), the corresponding rel's `stripAnnotations` may not be recursive yet — same fix applies (`RexShuttle` that recursively unwraps).
+2. If the call shape itself is wrong for DataFusion (e.g. PPL emits a 3-arg call where DataFusion expects 2), add a `ScalarFunctionAdapter` in the DataFusion backend that rewrites the call into the equivalent DF-native shape. See `BackendPlanAdapter` and the `ScalarFunctionAdapter` SPI for the pattern.
+
+## When the runtime doesn't support the function (Bucket 3 / M)
+
+Implement a UDF in `sandbox/plugins/analytics-backend-datafusion/rust/src/udf.rs` and register it in `DataFusionService` startup. Roughly half a day per UDF; see existing UDF registrations for the boilerplate.
+
+## Common pitfalls
+
+- **`refresh=wait_for` hangs >60s on parquet-backed indices.** `analytics-backend-lucene`'s `LuceneCommitter.getSafeCommitInfo` is a `TODO:: with index deleter` stub. The QA module's `DatasetProvisioner` already uses `refresh=true` to sidestep this. The SQL plugin's `loadDataByRestClient` has a workaround keyed on `tests.analytics.parquet_indices=true`. Symptom: first test in a class run fails with `IllegalStateException: Failed to perform request` and elapsed time exactly 60s.
+- **Bulk file missing trailing newline.** `DatasetProvisioner` loads `bulk.json` via `BufferedReader.lines().collect(joining("\n"))`, which drops the final newline. The bulk endpoint requires a trailing `\n`. Workaround: ensure your `bulk.json` has at least two trailing newlines (one row of `\n\n`), or the joined output will be missing the terminator and the bulk fails with `The bulk request must be terminated by a newline [\n]`.
+- **`-Werror` blocks `@SafeVarargs`-eligible methods.** OpenSearch's javac is `-Werror`. Generic varargs assertion helpers need `@SafeVarargs @SuppressWarnings("varargs") private final void ...` (the `final` keyword is required for `@SafeVarargs` on instance methods; `private` alone won't compile until JDK 9+, and even then `final` is the safe choice).
+- **Plugin install order during `:run`.** `opensearch-sql-plugin` extends `analytics-engine` and `opensearch-job-scheduler` — install both before the SQL plugin or you'll see `Missing plugin [analytics-engine], dependency of [opensearch-sql]`.
+- **Gradle 9.x cached transform corruption.** Occasionally `:integTestRemote` / `:analyticsCompatibilityTest` will fail with `The contents of the immutable workspace ... have been modified`. Fix: `rm -rf ~/.gradle/caches/9.x/transforms/<hash>` and re-run.
+- **Number-format pitfalls in QA assertions.** `assertCellEquals` must use `Double.compare(e.doubleValue(), a.doubleValue())` — Jackson parsing returns `Integer`/`Long`/`Double` based on JSON shape, and naive `.equals()` on cross-type numbers fails even when values are equal.
+- **`ScalarFunction.fromSqlKind` returns null for `SqlKind.OTHER`.** If your function uses `SqlKind.OTHER`, the planner falls back to name-based resolution via `fromSqlFunction`. Make sure the enum constant name matches the function's SQL name uppercased.
+
+## PR shape
+
+For a single Bucket-1 function, a clean PR against `opensearch-project/OpenSearch:main` is:
+
+| Commit | Files | Purpose |
+|---|---|---|
+| 1 | `DataFusionAnalyticsBackendPlugin.java` | Add the `ScalarFunction.<NAME>` to `STANDARD_PROJECT_OPS` |
+| 2 | `<Function>CommandIT.java`, `datasets/<name>/{mapping,bulk}.json` (new or reused) | QA IT exercising the function via `POST /_analytics/ppl` |
+
+If you also need a planner fix for the same function, prepend it as commit 0 (e.g. fillnull's nested-strip fix).
+
+PR description should:
+- State the bucket (S0 / S1 / M).
+- Show the failure mode the fix addresses (paste the cluster-log stack trace).
+- List the test file count: `CalciteXxxCommandIT` v2-side go from `N/M → M/M`, and `XxxCommandIT` QA-side is `M/M`.
+- Frame it as the ongoing pattern: future Bucket-1 functions are one-line additions to `STANDARD_PROJECT_OPS`.
+
+**Before pushing**, run the sandbox-wide check (see Step 6 in the dev loop):
+```bash
+./gradlew check -p sandbox -Dsandbox.enabled=true
+```
+
+## Files to read once before starting
+
+- `OpenSearch/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java`
+- `OpenSearch/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/{Project,Filter,Aggregate}Capability.java`
+- `OpenSearch/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/BackendCapabilityProvider.java`
+- `OpenSearch/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/planner/CapabilityRegistry.java`
+- `OpenSearch/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/planner/rules/OpenSearchProjectRule.java`
+- `OpenSearch/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionAnalyticsBackendPlugin.java`
+- `OpenSearch/sandbox/qa/analytics-engine-rest/src/test/java/org/opensearch/analytics/qa/{AnalyticsRestTestCase,DatasetProvisioner,Dataset}.java`
+- `sql/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java` — find the `visitXxx` for your command to confirm the RelNode shape it produces
+- `sql/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPL<Xxx>Test.java` — the printed `LogicalPlan` shows exactly what reaches Layer 2
+
+## Reference: fillnull as the worked example
+
+The fillnull walkthrough that produced this guide:
+- PR: [opensearch-project/OpenSearch#21472][pr-fillnull]
+- Function: `COALESCE` (Layer 3 wire-up via `STANDARD_PROJECT_OPS`)
+- Adjacent: `CEIL` (one fillnull test uses `with ceil(...) in ...`)
+- Planner fix bundled: recursive `AnnotatedProjectExpression` strip in `OpenSearchProject.stripAnnotations` (general bug exposed by nested project calls; surfaced once both COALESCE and CEIL became project-capable)
+- Result: `CalciteFillNullCommandIT` 2/13 → 13/13; QA-side `FillNullCommandIT` 13/13
+
+The IT-infrastructure scaffolding (`tests.analytics.force_routing`, `tests.analytics.parquet_indices`, parquet-backed index helper in `TestUtils.makeParquetBacked`) was carried over from [`ahkcs/sql@29339c9c`](https://github.com/ahkcs/sql/commit/29339c9c0316d5bad43b5bcb13cc91627b54b1d9) on `feature/mustang-bin-command` — that commit is the canonical reference for the SQL-side test infra. The bundle branch's `analyticsCompatibilityTest` / `analyticsCompatibilityReport` Gradle tasks are report-generation helpers built on top of those system properties; per-command verification just needs the two `-D` flags on `:integTestRemote`. Once a function ships its QA-side IT in `sandbox/qa/analytics-engine-rest`, the SQL-side IT route becomes verification-only (the QA IT is the source of truth).

From 24c529181f96e0e6263b6f2f518593038f58d565 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Wed, 6 May 2026 14:30:20 -0700
Subject: [PATCH 2/4] =?UTF-8?q?Drop=20dev=20docs=20from=20this=20PR=20?=
 =?UTF-8?q?=E2=80=94=20track=20separately?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These local routing-and-coverage notes are useful but belong in their own
review thread (or stay as untracked working notes), not in the
unified-context fix. Keeping them on disk via .gitignore-style untrack so
the PR stays focused on the single api/ change.

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 docs/dev/ppl-analytics-engine-coverage-sop.md | 152 -------
 docs/dev/ppl-analytics-engine-routing.md      | 426 ------------------
 2 files changed, 578 deletions(-)
 delete mode 100644 docs/dev/ppl-analytics-engine-coverage-sop.md
 delete mode 100644 docs/dev/ppl-analytics-engine-routing.md

diff --git a/docs/dev/ppl-analytics-engine-coverage-sop.md b/docs/dev/ppl-analytics-engine-coverage-sop.md
deleted file mode 100644
index 710feb5f724..00000000000
--- a/docs/dev/ppl-analytics-engine-coverage-sop.md
+++ /dev/null
@@ -1,152 +0,0 @@
-# SOP — PPL Analytics-Engine IT Coverage Report
-
-Run all PPL ITs through the analytics-engine path against an externally-managed
-cluster, get a bucketed markdown report. Daily.
-
-Output: `integ-test/build/reports/analytics-compatibility/REPORT.md`
-
----
-
-## One-time setup
-
-JDK 25 + JDK 21, Rust stable, ~15 GB free disk.
-
-```bash
-mkdir -p ~/workspace && cd ~/workspace
-git clone git@github.com:opensearch-project/OpenSearch.git
-git clone git@github.com:opensearch-project/sql.git
-( cd OpenSearch && git remote add upstream https://github.com/opensearch-project/OpenSearch.git )
-( cd sql       && git remote add ahkcs    git@github.com:ahkcs/sql.git )
-```
-
-**OpenSearch — track `upstream/main` + apply commons-text patch:**
-
-`upstream/main` already has the full analytics-engine sandbox plugin set
-(`analytics-engine`, `analytics-backend-{datafusion,lucene}`, `composite-engine`,
-`dsl-query-executor`, `parquet-data-format`) and the `dataformat-native` rust
-crate. Track main; rebase daily.
-
-```bash
-cd ~/workspace/OpenSearch
-git fetch upstream main && git checkout -B main upstream/main
-```
-
-In `sandbox/plugins/analytics-engine/build.gradle`, add next to the existing
-`commons-math3` runtimeOnly:
-
-```groovy
-runtimeOnly "org.apache.commons:commons-text:1.11.0"
-```
-
-`commons-text` is only listed under `resolutionStrategy.force` on main, which
-pins the version but doesn't bundle the jar — without this `runtimeOnly` line
-the cluster dies on `LevenshteinDistance` the first time a query reaches
-Calcite's `SqlFunctions.<clinit>`. Track upstreaming; once it lands, drop this
-patch.
-
-**Build the native lib (one-time + after rust changes):**
-
-The cluster JVM loads `libopensearch_native.dylib` (rust + JNI/FFM) at startup
-for DataFusion execution, parquet I/O, and native repository ops. Without it
-the cluster crashes in `NativeBridge.<clinit>`.
-
-```bash
-cd ~/workspace/OpenSearch/sandbox/libs/dataformat-native/rust
-cargo build --release -p opensearch-native-lib   # ~5 min
-```
-
----
-
-## Daily run (~30 min)
-
-### 1. Get the SQL bundle worktree
-
-First time:
-```bash
-cd ~/workspace/sql
-git fetch ahkcs feature/ppl-coverage-bundle
-DATE=$(date +%Y-%m-%d)
-git worktree add "../sql-ppl-coverage-${DATE}" ahkcs/feature/ppl-coverage-bundle
-```
-
-Reuse same day:
-```bash
-cd ~/workspace/sql-ppl-coverage-${DATE}
-git fetch ahkcs && git rebase ahkcs/feature/ppl-coverage-bundle
-```
-
-### 2. Publish the SQL plugin to mavenLocal
-
-`:run` installs `opensearch-sql-plugin:3.7.0.0-SNAPSHOT` from maven coords —
-mavenLocal first, then OpenSearch CI snapshots. The SQL plugin isn't on CI
-snapshots, so without this step the cluster fails to bootstrap with
-`Could not resolve …`. Re-publish whenever the bundle worktree is updated.
-
-```bash
-cd ~/workspace/sql-ppl-coverage-${DATE}
-./gradlew publishToMavenLocal   # ~1 min, JDK 21+
-```
-
-### 3. Bring up cluster (terminal A — leave running)
-
-```bash
-cd ~/workspace/OpenSearch
-JAVA_HOME=$(mise where java@temurin-25.0.1+8.0.LTS) ./gradlew :run \
-  -Dsandbox.enabled=true \
-  -PinstalledPlugins="['opensearch-job-scheduler:3.7.0.0-SNAPSHOT', \
-'analytics-engine', 'parquet-data-format', 'analytics-backend-datafusion', \
-'analytics-backend-lucene', 'composite-engine', \
-'opensearch-sql-plugin:3.7.0.0-SNAPSHOT']"
-```
-
-Wait for it:
-```bash
-until curl -sf http://localhost:9200/ >/dev/null; do sleep 4; done
-```
-
-### 4. Run the report (terminal B)
-
-```bash
-cd ~/workspace/sql-ppl-coverage-${DATE}
-rm -rf integ-test/build/test-results/analyticsCompatibility \
-       integ-test/build/test-results/analyticsCompatibilityTest \
-       integ-test/build/reports/analytics-compatibility
-
-./gradlew :integ-test:analyticsCompatibilityReport \
-  -Dtests.rest.cluster=localhost:9200 \
-  -Dtests.cluster=localhost:9300 \
-  -Dtests.clustername=runTask
-```
-
-`BUILD SUCCESSFUL` is expected — failures are the report payload, not a build
-break.
-
-### 5. Open the report
-
-```bash
-open integ-test/build/reports/analytics-compatibility/REPORT.md
-```
-
-### 6. Tear down
-
-`Ctrl-C` terminal A.
-
----
-
-## When something breaks
-
-| Symptom | Fix |
-|---|---|
-| `BUILD FAILED … java.io.EOFException … KryoBackedDecoder` | `rm -rf integ-test/build/test-results/analyticsCompatibilityTest` and rerun. |
-| Run finishes <10 min, top bucket is `ConnectException` | Cluster died. Tail `~/workspace/OpenSearch/build/testclusters/runTask-0/logs/runTask.log` for `fatal error`; the missing class usually needs a `runtimeOnly` dep added to `analytics-engine/build.gradle`. |
-| Top bucket is `No backend can scan all requested fields` (thousands) | Parquet flag isn't reaching `TestUtils`. Confirm worktree is on `ahkcs/feature/ppl-coverage-bundle`. |
-| `BUILD FAILED` immediately, log shows `[BUILD] Starting OpenSearch process / Stopping node` | `task.getClusters().clear()` is missing. Re-fetch `feature/ppl-coverage-bundle`. |
-
----
-
-## What the bundle branch is
-
-`ahkcs/feature/ppl-coverage-bundle` rebases on
-`upstream/feature/mustang-ppl-integration` and adds the report task, parquet
-flag, origin classifier, pass-rate fix, and testcluster auto-bind workaround.
-Daily upstream sync: `git rebase upstream/feature/mustang-ppl-integration`.
diff --git a/docs/dev/ppl-analytics-engine-routing.md b/docs/dev/ppl-analytics-engine-routing.md
deleted file mode 100644
index e2dcf40b686..00000000000
--- a/docs/dev/ppl-analytics-engine-routing.md
+++ /dev/null
@@ -1,426 +0,0 @@
-# Routing PPL Commands Through the Analytics-Engine Path
-
-A guide for wiring an existing PPL command — already working on the v2 / Calcite engine — through the analytics-engine route so that `CalciteXxxCommandIT` passes against the force-routed analytics-engine path, without needing a per-command code change in the SQL plugin.
-
-This doc is the playbook distilled from landing PPL `fillnull` end-to-end through the analytics engine ([opensearch-project/OpenSearch#21472][pr-fillnull]). Pair with [`ppl-commands.md`](ppl-commands.md) (which covers adding a *new* PPL command on the v2 / Calcite engine).
-
-[pr-fillnull]: https://github.com/opensearch-project/OpenSearch/pull/21472
-
-## Goal
-
-For an existing PPL command `Xxx` whose `CalciteXxxCommandIT` currently passes on the v2 / Calcite engine but fails on the analytics-engine route, drive the analytics-engine route to parity. Two acceptance criteria:
-
-1. `CalciteXxxCommandIT` (in `opensearch-project/sql`) passes when run via `:integ-test:integTestRemote` with `-Dtests.analytics.force_routing=true -Dtests.analytics.parquet_indices=true`.
-2. A self-contained `XxxCommandIT` lives in `opensearch-project/OpenSearch` under `sandbox/qa/analytics-engine-rest`, exercising the command through `POST /_analytics/ppl` against a parquet-backed dataset. CI for OpenSearch core can verify the analytics-engine path without checking out the SQL plugin.
-
-The QA IT in OpenSearch core is the long-term verification surface — once it lands, the SQL-plugin IT becomes a duplicate maintained for the v2 / Calcite path.
-
-## Mental model — three layers
-
-A PPL query on the analytics-engine route passes through three independent layers:
-
-```
-PPL text
-  │
-  ▼
-[ Layer 1 ] SQL plugin lowering            (opensearch-project/sql)
-  │   PPLSyntaxParser → AstBuilder → CalciteRelNodeVisitor → RelNode tree
-  │
-  ▼
-[ Layer 2 ] Analytics-engine planner       (opensearch-project/OpenSearch · sandbox/plugins/analytics-engine)
-  │   Capability registry, OpenSearch{Project,Filter,Aggregate}Rule,
-  │   AnnotatedProjectExpression, fragment driver
-  │
-  ▼
-[ Layer 3 ] Backend                        (opensearch-project/OpenSearch · sandbox/plugins/analytics-backend-datafusion)
-      Substrait conversion via isthmus → DataFusion native execution
-```
-
-Knowing which layer a missing function lives in tells you the effort:
-
-| Bucket | Symptom (failure stack frame) | Layer | Effort |
-|---|---|---|---|
-| 1 (S0) | `OpenSearchProjectRule` / `OpenSearchFilterRule` throws `No backend supports scalar function [X] among [datafusion]` | Layer 2 capability registry | One line |
-| 2 (S1) | Substrait isthmus throws `Unable to convert call X` (the function exists in DataFusion but the call shape doesn't match) | Layer 2 adapter | Small adapter |
-| 3 (S1–M) | DataFusion runtime throws (function doesn't exist natively) | Layer 3 UDF | Half-day–multi-day Rust |
-
-**Layer 1 is almost always already done** for any command that works on the v2 Calcite engine — `CalciteRelNodeVisitor.visitXxx` produces a clean RelNode that's the same regardless of which backend executes it. Verify by reading the v2 unit test (`CalcitePPLXxxTest` in `ppl/src/test/java/.../calcite/`); the printed `LogicalPlan` shows exactly what reaches Layer 2.
-
-## Local environment setup
-
-### One-time
-
-1. **JDK 25** — `sandbox/libs/dataformat-native` targets JDK 25 (FFM API). Without it you'll see `error: release version 25 not supported`.
-   ```bash
-   # mise / asdf / sdkman
-   export JAVA_HOME=/path/to/temurin-25.0.x
-   export PATH=$JAVA_HOME/bin:$PATH
-   ```
-2. **Native lib built** — confirm `OpenSearch/sandbox/libs/dataformat-native/rust/target/release/libopensearch_native.dylib` (macOS) or `.so` (Linux) exists. Built automatically by `:run`/`:integTest` if missing, but a stale build is the most common source of weird FFM errors. Rebuild explicitly via the cargo target if in doubt.
-3. **`gh` CLI** authenticated against your OpenSearch fork for opening PRs.
-
-### Repos
-
-| Repo | Branch convention | Purpose |
-|---|---|---|
-| `opensearch-project/OpenSearch` | `feature/<descriptive>` off `upstream/main` | Where the actual fix lands (analytics-engine and DataFusion backend code) |
-| `opensearch-project/sql` | `feature/<descriptive>` off `ahkcs/feature/ppl-coverage-bundle` until the analytics-compatibility test task lands upstream; thereafter off `upstream/feature/mustang-ppl-integration` | Test-infrastructure work, IT carryovers (rare for new functions once QA-side ITs exist) |
-
-In the OpenSearch checkout: `feature/mustang-fillnull-command` was the dev branch; `pr/fillnull-poc` was the rebased clean branch I pushed for the PR. Use the same split when working on a new function — keep your dev branch with extra debugging/printf commits, then cherry-pick the clean ones onto a `pr/...` branch for PR submission.
-
-### Maven-local prerequisites (full publish-and-run flow from scratch)
-
-The `:run` task pulls `opensearch-job-scheduler:3.7.0.0-SNAPSHOT` and `opensearch-sql-plugin:3.7.0.0-SNAPSHOT` from `~/.m2/repository`. If those aren't published yet, the cluster bootstrap fails with `Could not resolve ...` or `Failed installing file:.../opensearch-sql-plugin...zip`. End-to-end:
-
-```bash
-# 1. Publish OpenSearch core + sandbox plugins to maven local
-cd /path/to/OpenSearch
-JAVA_HOME=/path/to/temurin-25 ./gradlew publishToMavenLocal -Dsandbox.enabled=true
-
-# 2. Publish SQL plugin to maven local (any JDK 21+ works for the SQL repo)
-cd /path/to/sql
-./gradlew publishToMavenLocal
-
-# 3. (Optional) Publish opensearch-job-scheduler from its own repo if not already in maven local
-#    Most devs already have this from prior work — check via:
-ls ~/.m2/repository/org/opensearch/plugin/opensearch-job-scheduler/3.7.0.0-SNAPSHOT/
-
-# 4. Run the server with all required plugins (back in OpenSearch)
-cd /path/to/OpenSearch
-JAVA_HOME=/path/to/temurin-25 ./gradlew :run -Dsandbox.enabled=true \
-  -PinstalledPlugins="['opensearch-job-scheduler:3.7.0.0-SNAPSHOT', \
-    'analytics-engine', 'parquet-data-format', 'analytics-backend-datafusion', \
-    'analytics-backend-lucene', 'composite-engine', \
-    'opensearch-sql-plugin:3.7.0.0-SNAPSHOT']"
-```
-
-**`tests.jvm.argline` and `--debug-server-jvm`.** The user-facing Mustang task brief shows:
-```bash
--Dtests.jvm.argline="-Djava.library.path=$(pwd)/sandbox/libs/dataformat-native/rust/target/release \
-  -Dopensearch.experimental.feature.pluggable.dataformat.enabled=true" \
---debug-server-jvm
-```
-You don't need `-Dtests.jvm.argline` for `:run` — `OpenSearch/gradle/run.gradle` already injects the native-lib path and the `pluggable.dataformat.enabled` flag automatically when `parquet-data-format` or `analytics-backend-datafusion` is in `installedPlugins` (lines 93–101). The flag is only needed for `:internalClusterTest` style tasks where the test JVM (not the cluster JVM) loads the native bridge — committed to `gradle.properties` for those use cases.
-
-`--debug-server-jvm` is the JVM-debug-attach flag for `:run`. Add it when you want to attach a debugger on port 5005; omit for normal runs.
-
-**Plugin install order matters.** `opensearch-sql-plugin` declares `analytics-engine` and `opensearch-job-scheduler` as `extendedPlugins`, so those must be installed first. Putting `opensearch-sql-plugin` first in the list yields:
-```
-IllegalArgumentException: Missing plugin [analytics-engine], dependency of [opensearch-sql]
-```
-
-## The development loop (Bucket 1 / S0)
-
-> **Branch base for the SQL plugin checkout.** Until the
-> `analyticsCompatibilityTest` task and `tests.analytics.{force_routing,parquet_indices}`
-> system properties land in `upstream/feature/mustang-ppl-integration`, base
-> your SQL plugin work on `ahkcs/feature/ppl-coverage-bundle` (which rebases on
-> top of the integration branch and adds those bits). See
-> [`ppl-analytics-engine-coverage-sop.md`](ppl-analytics-engine-coverage-sop.md)
-> for the standalone daily-report recipe.
-
-### 1. Triage — find the failure
-
-Start an OpenSearch test cluster locally and run the SQL-plugin's `CalciteXxxCommandIT` through the analytics-engine route. The cluster needs all 7 sandbox plugins.
-
-```bash
-# Terminal A — OpenSearch checkout, JDK 25
-cd /path/to/OpenSearch
-JAVA_HOME=/path/to/temurin-25 ./gradlew :run -Dsandbox.enabled=true \
-  -PinstalledPlugins="['opensearch-job-scheduler:3.7.0.0-SNAPSHOT', \
-    'analytics-engine', 'parquet-data-format', 'analytics-backend-datafusion', \
-    'analytics-backend-lucene', 'composite-engine', \
-    'opensearch-sql-plugin:3.7.0.0-SNAPSHOT']"
-```
-
-> **Plugin order matters.** `opensearch-sql-plugin` declares `analytics-engine` and `opensearch-job-scheduler` as `extendedPlugins`, so those must install first. The order above is correct.
-> **Maven local prerequisites.** `opensearch-sql-plugin:3.7.0.0-SNAPSHOT` and `opensearch-job-scheduler:3.7.0.0-SNAPSHOT` need to be in `~/.m2/repository`. Build/publish from their respective repos via `./gradlew publishToMavenLocal -Dsandbox.enabled=true` once.
-
-```bash
-# Terminal B — sql checkout, any JDK 21+
-cd /path/to/sql
-./gradlew :integ-test:integTestRemote \
-  -Dtests.rest.cluster=localhost:9200 \
-  -Dtests.cluster=localhost:9300 \
-  -Dtests.clustername=runTask \
-  -Dtests.analytics.force_routing=true \
-  -Dtests.analytics.parquet_indices=true \
-  --tests "org.opensearch.sql.calcite.remote.CalciteXxxCommandIT"
-```
-
-The standard `:integTestRemote` runs against the externally-managed `:run` cluster; the two `tests.analytics.*` system properties are what flip the route. JUnit XML lands in `integ-test/build/test-results/integTestRemote/`.
-
-**Why those two flags exist:**
-- `tests.analytics.force_routing=true` → `PPLIntegTestCase.init()` flips the cluster setting `plugins.calcite.analytics.force_routing=true`, which makes `RestUnifiedQueryAction.isAnalyticsIndex()` return `true` unconditionally so every PPL query routes through the analytics-engine path regardless of the index name pattern.
-- `tests.analytics.parquet_indices=true` → `TestUtils.createIndexByRestClient` injects the parquet/composite settings on every index it creates:
-  ```java
-  indexSettings.put("number_of_shards", 1);
-  indexSettings.put("pluggable.dataformat.enabled", true);
-  indexSettings.put("pluggable.dataformat", "composite");
-  indexSettings.put("composite.primary_data_format", "parquet");
-  ```
-  Without parquet-backed storage, the analytics-engine planner fails with `IllegalStateException: No backend can scan all requested fields on index [...]` — DataFusion can't read Lucene-backed indices.
-
-Without those two flags the same `:integTestRemote` invocation runs the v2 / Calcite path — that's the comparison baseline.
-
-**Note on `:integ-test:analyticsCompatibilityTest` / `analyticsCompatibilityReport`.** These tasks (defined on `ahkcs/feature/ppl-coverage-bundle`) are *report-generation* helpers — they sweep every PPL IT through the analytics path with `ignoreFailures=true` and bucket the failures into a markdown report (see [`ppl-analytics-engine-coverage-sop.md`](ppl-analytics-engine-coverage-sop.md)). They are not needed for verifying a specific command — `:integTestRemote --tests "..."` with the two system properties is the right primitive for per-command work.
-
-### 2. Read the failure
-
-```bash
-grep -E "<failure" integ-test/build/test-results/integTestRemote/TEST-org.opensearch.sql.calcite.remote.CalciteXxxCommandIT.xml \
-  | grep -oE 'details=&quot;[^&]+'
-```
-
-Look for the **deepest cluster-side stack frame** in the failure body — that's where the analytics-engine path actually broke. Two common shapes:
-
-**Bucket 1:**
-```
-No backend supports scalar function [<NAME>] among [datafusion]
-  at OpenSearchProjectRule.annotateExpr(OpenSearchProjectRule.java:123)
-```
-
-**Bucket 2:**
-```
-Unable to convert call ANNOTATED_PROJECT_EXPR(fp64?)
-```
-or
-```
-io.substrait.isthmus.SubstraitRelVisitorException: cannot convert <X> ...
-```
-
-If the failure is something else entirely (e.g. `Project rule encountered unmarked child [LogicalJoin]`), the command depends on a non-S0 planner gap (joins, sub-queries, etc.) — file a separate issue, don't try to wedge it into the function PR.
-
-### 3. Wire the function (Bucket 1 / S0)
-
-For a Bucket-1 / S0 fix, edit one file:
-
-`OpenSearch/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionAnalyticsBackendPlugin.java`
-
-```java
-private static final Set<ScalarFunction> STANDARD_PROJECT_OPS = Set.of(
-    ScalarFunction.COALESCE,
-    ScalarFunction.CEIL,
-    ScalarFunction.<NEW_FUNCTION>     // ← add here
-);
-```
-
-`STANDARD_PROJECT_OPS` is fanned out across `SUPPORTED_FIELD_TYPES` and the backend's supported formats by the `projectCapabilities()` override. The constant for `<NEW_FUNCTION>` must already exist in `sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java` — if it doesn't, add it (see "Adding a new ScalarFunction enum constant" below).
-
-**No Rust-side, convertor, or Substrait-extension changes needed for Bucket 1.** DataFusion's native runtime executes the call directly via the default Substrait extension catalog already loaded by `DataFusionPlugin.loadSubstraitExtensions`.
-
-### Adding a new `ScalarFunction` enum constant
-
-If the function isn't yet in the `ScalarFunction` enum:
-
-1. Open `sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java`.
-2. Pick the right `Category` (`COMPARISON`, `STRING`, `MATH`, `SCALAR`, etc.).
-3. Pick the right `SqlKind`. If Calcite has a dedicated `SqlKind` for the function (e.g. `SqlKind.COALESCE`, `SqlKind.CEIL`), use it. Otherwise use `SqlKind.OTHER` and rely on `fromSqlFunction` (which matches by name; the enum constant name must match `SqlFunction.getName().toUpperCase(Locale.ROOT)`).
-
-```java
-NEW_FUNCTION(Category.MATH, SqlKind.OTHER),
-```
-
-### 4. Verify locally
-
-Restart the cluster (the `:run` task rebuilds plugins automatically from source on restart), then re-run the IT.
-
-```bash
-# Terminal A — Ctrl-C, then re-run :run
-# Terminal B — re-run :integTestRemote with the two analytics-engine system properties
-```
-
-Or, faster: kill the gradle wrapper PID and re-launch.
-
-### 5. Add the QA IT in OpenSearch core
-
-This is the one-time investment that pays off across every future function: a self-contained IT in `sandbox/qa/analytics-engine-rest` that exercises the command through `POST /_analytics/ppl`, no SQL plugin needed.
-
-#### 5a. Dataset resources
-
-Pick or add a dataset under `sandbox/qa/analytics-engine-rest/src/test/resources/datasets/<name>/`:
-
-```
-datasets/<name>/
-  ├── mapping.json    ← OpenSearch mapping with a "settings" block containing "number_of_shards"
-  └── bulk.json       ← NDJSON bulk body, MUST end with a trailing newline (see "Common pitfalls")
-```
-
-`DatasetProvisioner.injectParquetSettings` injects parquet/composite settings adjacent to `"number_of_shards"`, so the mapping's `settings` block must contain that key. **Existing datasets to reuse:** `clickbench` (43-column wide table, dates, lots of types), `calcs` (fillnull-friendly: nullable int/double/keyword/boolean/date columns; verbose enough for `value=` syntax tests). Add a new dataset only if neither fits.
-
-#### 5b. Test class
-
-Place under `sandbox/qa/analytics-engine-rest/src/test/java/org/opensearch/analytics/qa/<Function>CommandIT.java`. Mirror the v2 / Calcite IT in the SQL plugin (`integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteXxxCommandIT.java`) one-test-method-to-one — same query strings, same expected rows.
-
-```java
-public class FillNullCommandIT extends AnalyticsRestTestCase {
-
-    private static final Dataset DATASET = new Dataset("calcs", "calcs");
-    private static boolean dataProvisioned = false;
-
-    /**
-     * Lazily provision the dataset on first invocation. Must be called inside a test method
-     * (not setUp) — OpenSearchRestTestCase's static client() is not initialized until after
-     * @BeforeClass but is reliably available inside test bodies.
-     */
-    private void ensureDataProvisioned() throws IOException {
-        if (dataProvisioned == false) {
-            DatasetProvisioner.provision(client(), DATASET);
-            dataProvisioned = true;
-        }
-    }
-
-    public void testFillNullSameValueOneField() throws IOException {
-        assertRows(
-            "source=" + DATASET.indexName + " | fields str2, num0 | fillnull with -1 in num0",
-            row("one", 12.3),
-            row("two", -12.3),
-            // …
-            row(null, -1)
-        );
-    }
-
-    // ── helpers ────────────────────────────────────────────────────────────
-
-    private static List<Object> row(Object... values) {
-        return Arrays.asList(values);
-    }
-
-    @SafeVarargs
-    @SuppressWarnings("varargs")
-    private final void assertRows(String ppl, List<Object>... expected) throws IOException {
-        Map<String, Object> response = executePpl(ppl);
-        @SuppressWarnings("unchecked")
-        List<List<Object>> actualRows = (List<List<Object>>) response.get("rows");
-        assertNotNull(actualRows);
-        assertEquals(expected.length, actualRows.size());
-        for (int i = 0; i < expected.length; i++) {
-            List<Object> want = expected[i];
-            List<Object> got = actualRows.get(i);
-            assertEquals(want.size(), got.size());
-            for (int j = 0; j < want.size(); j++) {
-                assertCellEquals(want.get(j), got.get(j));
-            }
-        }
-    }
-
-    private Map<String, Object> executePpl(String ppl) throws IOException {
-        ensureDataProvisioned();
-        Request request = new Request("POST", "/_analytics/ppl");
-        request.setJsonEntity("{\"query\": \"" + escapeJson(ppl) + "\"}");
-        Response response = client().performRequest(request);
-        return assertOkAndParse(response, "PPL: " + ppl);
-    }
-
-    /** Numeric-tolerant cell comparison — JSON parsing returns Integer/Long/Double interchangeably. */
-    private static void assertCellEquals(Object expected, Object actual) {
-        if (expected == null || actual == null) { assertEquals(expected, actual); return; }
-        if (expected instanceof Number && actual instanceof Number) {
-            if (Double.compare(((Number) expected).doubleValue(), ((Number) actual).doubleValue()) != 0) {
-                fail("expected <" + expected + "> but was <" + actual + ">");
-            }
-            return;
-        }
-        assertEquals(expected, actual);
-    }
-}
-```
-
-For tests that should fail with a planner-side validation error (e.g. type-incompatibility errors raised in `CalciteRelNodeVisitor` preflight), use a sibling `assertErrorContains` helper that catches `ResponseException` and asserts on the response body substring.
-
-#### 5c. Run the QA IT
-
-```bash
-cd /path/to/OpenSearch
-JAVA_HOME=/path/to/temurin-25 ./gradlew \
-  :sandbox:qa:analytics-engine-rest:integTest \
-  -Dsandbox.enabled=true \
-  --tests "*<Function>CommandIT"
-```
-
-The QA module boots its own self-contained test cluster with all 7 sandbox plugins; no need for a `:run` cluster running. JUnit XML lands at `sandbox/qa/analytics-engine-rest/build/test-results/integTest/TEST-org.opensearch.analytics.qa.<Function>CommandIT.xml`.
-
-### 6. Run the sandbox-wide `check` before pushing
-
-Always run this before opening or updating a PR:
-
-```bash
-cd /path/to/OpenSearch
-JAVA_HOME=/path/to/temurin-25 ./gradlew check -p sandbox -Dsandbox.enabled=true
-```
-
-This is the sandbox-tree equivalent of `./gradlew check` and runs everything OpenSearch core CI runs over the sandbox subprojects: compile (`-Werror`), unit tests, integ tests where applicable, `forbiddenApis`, `licenseHeaders`, `spotless`/`checkstyle`, `dependencyLicenses`, `thirdPartyAudit`, etc. CI fails on any of these — running locally first avoids the rebase-and-force-push churn when CI catches a missing license header or a forbidden API call.
-
-Common things this surfaces that focused IT runs miss:
-- Missing Apache 2.0 license headers on new files (`licenseHeaders`)
-- Code-style violations on the new IT class (`checkstyle`/`spotless`)
-- An import that pulls in a forbidden API (e.g. `java.util.Date`, `Optional.get()` in some places)
-- A unit test in a sibling module broken by a planner change
-
-Expect 5–15 min on a warm build, longer on a clean one. If `check` is green, the PR's CI almost always is too.
-
-## When you need more than capabilities (Bucket 2 / S1)
-
-If the planner accepts the function but Substrait conversion fails (`Unable to convert call ...`):
-
-1. Check whether `OpenSearchProjectRule.annotateExpr` is wrapping a sub-call you weren't expecting. The recursive `AnnotatedProjectExpression` strip in `OpenSearchProject.stripAnnotations` should handle every wrapper, but if the function lives somewhere other than a `Project` (e.g. `Filter`, `Aggregate`), the corresponding rel's `stripAnnotations` may not be recursive yet — same fix applies (`RexShuttle` that recursively unwraps).
-2. If the call shape itself is wrong for DataFusion (e.g. PPL emits a 3-arg call where DataFusion expects 2), add a `ScalarFunctionAdapter` in the DataFusion backend that rewrites the call into the equivalent DF-native shape. See `BackendPlanAdapter` and the `ScalarFunctionAdapter` SPI for the pattern.
-
-## When the runtime doesn't support the function (Bucket 3 / M)
-
-Implement a UDF in `sandbox/plugins/analytics-backend-datafusion/rust/src/udf.rs` and register it in `DataFusionService` startup. Roughly half a day per UDF; see existing UDF registrations for the boilerplate.
-
-## Common pitfalls
-
-- **`refresh=wait_for` hangs >60s on parquet-backed indices.** `analytics-backend-lucene`'s `LuceneCommitter.getSafeCommitInfo` is a `TODO:: with index deleter` stub. The QA module's `DatasetProvisioner` already uses `refresh=true` to sidestep this. The SQL plugin's `loadDataByRestClient` has a workaround keyed on `tests.analytics.parquet_indices=true`. Symptom: first test in a class run fails with `IllegalStateException: Failed to perform request` and elapsed time exactly 60s.
-- **Bulk file missing trailing newline.** `DatasetProvisioner` loads `bulk.json` via `BufferedReader.lines().collect(joining("\n"))`, which drops the final newline. The bulk endpoint requires a trailing `\n`. Workaround: ensure your `bulk.json` has at least two trailing newlines (one row of `\n\n`), or the joined output will be missing the terminator and the bulk fails with `The bulk request must be terminated by a newline [\n]`.
-- **`-Werror` blocks `@SafeVarargs`-eligible methods.** OpenSearch's javac is `-Werror`. Generic varargs assertion helpers need `@SafeVarargs @SuppressWarnings("varargs") private final void ...` (the `final` keyword is required for `@SafeVarargs` on instance methods; `private` alone won't compile until JDK 9+, and even then `final` is the safe choice).
-- **Plugin install order during `:run`.** `opensearch-sql-plugin` extends `analytics-engine` and `opensearch-job-scheduler` — install both before the SQL plugin or you'll see `Missing plugin [analytics-engine], dependency of [opensearch-sql]`.
-- **Gradle 9.x cached transform corruption.** Occasionally `:integTestRemote` / `:analyticsCompatibilityTest` will fail with `The contents of the immutable workspace ... have been modified`. Fix: `rm -rf ~/.gradle/caches/9.x/transforms/<hash>` and re-run.
-- **Number-format pitfalls in QA assertions.** `assertCellEquals` must use `Double.compare(e.doubleValue(), a.doubleValue())` — Jackson parsing returns `Integer`/`Long`/`Double` based on JSON shape, and naive `.equals()` on cross-type numbers fails even when values are equal.
-- **`ScalarFunction.fromSqlKind` returns null for `SqlKind.OTHER`.** If your function uses `SqlKind.OTHER`, the planner falls back to name-based resolution via `fromSqlFunction`. Make sure the enum constant name matches the function's SQL name uppercased.
-
-## PR shape
-
-For a single Bucket-1 function, a clean PR against `opensearch-project/OpenSearch:main` is:
-
-| Commit | Files | Purpose |
-|---|---|---|
-| 1 | `DataFusionAnalyticsBackendPlugin.java` | Add the `ScalarFunction.<NAME>` to `STANDARD_PROJECT_OPS` |
-| 2 | `<Function>CommandIT.java`, `datasets/<name>/{mapping,bulk}.json` (new or reused) | QA IT exercising the function via `POST /_analytics/ppl` |
-
-If you also need a planner fix for the same function, prepend it as commit 0 (e.g. fillnull's nested-strip fix).
-
-PR description should:
-- State the bucket (S0 / S1 / M).
-- Show the failure mode the fix addresses (paste the cluster-log stack trace).
-- List the test file count: `CalciteXxxCommandIT` v2-side go from `N/M → M/M`, and `XxxCommandIT` QA-side is `M/M`.
-- Frame it as the ongoing pattern: future Bucket-1 functions are one-line additions to `STANDARD_PROJECT_OPS`.
-
-**Before pushing**, run the sandbox-wide check (see Step 6 in the dev loop):
-```bash
-./gradlew check -p sandbox -Dsandbox.enabled=true
-```
-
-## Files to read once before starting
-
-- `OpenSearch/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java`
-- `OpenSearch/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/{Project,Filter,Aggregate}Capability.java`
-- `OpenSearch/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/BackendCapabilityProvider.java`
-- `OpenSearch/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/planner/CapabilityRegistry.java`
-- `OpenSearch/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/planner/rules/OpenSearchProjectRule.java`
-- `OpenSearch/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionAnalyticsBackendPlugin.java`
-- `OpenSearch/sandbox/qa/analytics-engine-rest/src/test/java/org/opensearch/analytics/qa/{AnalyticsRestTestCase,DatasetProvisioner,Dataset}.java`
-- `sql/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java` — find the `visitXxx` for your command to confirm the RelNode shape it produces
-- `sql/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPL<Xxx>Test.java` — the printed `LogicalPlan` shows exactly what reaches Layer 2
-
-## Reference: fillnull as the worked example
-
-The fillnull walkthrough that produced this guide:
-- PR: [opensearch-project/OpenSearch#21472][pr-fillnull]
-- Function: `COALESCE` (Layer 3 wire-up via `STANDARD_PROJECT_OPS`)
-- Adjacent: `CEIL` (one fillnull test uses `with ceil(...) in ...`)
-- Planner fix bundled: recursive `AnnotatedProjectExpression` strip in `OpenSearchProject.stripAnnotations` (general bug exposed by nested project calls; surfaced once both COALESCE and CEIL became project-capable)
-- Result: `CalciteFillNullCommandIT` 2/13 → 13/13; QA-side `FillNullCommandIT` 13/13
-
-The IT-infrastructure scaffolding (`tests.analytics.force_routing`, `tests.analytics.parquet_indices`, parquet-backed index helper in `TestUtils.makeParquetBacked`) was carried over from [`ahkcs/sql@29339c9c`](https://github.com/ahkcs/sql/commit/29339c9c0316d5bad43b5bcb13cc91627b54b1d9) on `feature/mustang-bin-command` — that commit is the canonical reference for the SQL-side test infra. The bundle branch's `analyticsCompatibilityTest` / `analyticsCompatibilityReport` Gradle tasks are report-generation helpers built on top of those system properties; per-command verification just needs the two `-D` flags on `:integTestRemote`. Once a function ships its QA-side IT in `sandbox/qa/analytics-engine-rest`, the SQL-side IT route becomes verification-only (the QA IT is the source of truth).

From 1d4c1130e8f1b90044c9a0e98cec442f53fd21b5 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Wed, 6 May 2026 14:31:38 -0700
Subject: [PATCH 3/4] Address @dai-chen: add CALCITE_ENGINE_ENABLED to default
 settings map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

@dai-chen suggested either adding the setting to the default list at
UnifiedQueryContext.java:123 or passing it via the .setting() API on the
builder, since only the unified PPL path requires it.

Going with option 1 (default-list entry) — it composes the same way every
other planning-required default does, and avoids forcing every caller (the
production REST handler and every IT) to remember a single magic .setting()
call. The IT at UnifiedQueryOpenSearchIT.java:51 was the precedent for the
.setting() approach but only one IT needs it; the production caller
(RestUnifiedQueryAction) wouldn't have a natural place to wire this without
either repeating it everywhere or routing through a helper.

Removes the conditional in buildSettings().getSettingValue() — the default
map now carries the value the same way QUERY_SIZE_LIMIT, PPL_SUBSEARCH_MAXOUT,
and PPL_JOIN_SUBSEARCH_MAXOUT do.

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../sql/api/UnifiedQueryContext.java          | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java
index 864b2610897..5670b5fe8b5 100644
--- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java
+++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java
@@ -5,6 +5,7 @@
 
 package org.opensearch.sql.api;
 
+import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_ENGINE_ENABLED;
 import static org.opensearch.sql.common.setting.Settings.Key.PPL_JOIN_SUBSEARCH_MAXOUT;
 import static org.opensearch.sql.common.setting.Settings.Key.PPL_SUBSEARCH_MAXOUT;
 import static org.opensearch.sql.common.setting.Settings.Key.QUERY_SIZE_LIMIT;
@@ -115,13 +116,21 @@ public static class Builder {
     /**
      * Setting values with defaults from SysLimit.DEFAULT. Only includes planning-required settings
      * to avoid coupling with OpenSearchSettings.
+     *
+     * <p>{@link Settings.Key#CALCITE_ENGINE_ENABLED} defaults to {@code true} here because the
+     * unified query path is by definition Calcite-based — every query reaching this context flows
+     * through Calcite's planner, never the v2 engine. The PPL {@link
+     * org.opensearch.sql.api.parser.PPLQueryParser} reuses the v2 {@code AstBuilder}, which gates
+     * Calcite-only commands (e.g. {@code visitTableCommand}) on this setting; without the default,
+     * those commands fail at parse time even when the cluster setting is true.
      */
     private final Map<Settings.Key, Object> settings =
         new HashMap<Settings.Key, Object>(
             Map.of(
                 QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit(),
                 PPL_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.subsearchLimit(),
-                PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit()));
+                PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit(),
+                CALCITE_ENGINE_ENABLED, true));
 
     /**
      * Sets the query language frontend to be used.
@@ -225,14 +234,6 @@ private Settings buildSettings() {
         @Override
         @SuppressWarnings("unchecked")
         public <T> T getSettingValue(Key key) {
-          // The unified query path is always Calcite-based — every query reaching this context
-          // goes through Calcite's planner, never the v2 engine. Default
-          // {@code plugins.calcite.enabled} to true so AstBuilder gates (e.g. visitTableCommand)
-          // that protect v2-only code paths don't block valid analytics-engine queries when the
-          // underlying setting hasn't been propagated through Builder#setting.
-          if (key == Key.CALCITE_ENGINE_ENABLED && !settings.containsKey(key)) {
-            return (T) Boolean.TRUE;
-          }
           return (T) settings.get(key);
         }
 

From 4ade92fe0e4cedf8b748e70346081c8b4c25cbec Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Wed, 6 May 2026 15:58:16 -0700
Subject: [PATCH 4/4] CalcitePPLRenameIT: switch to column-name-aware row
 matcher
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, {@code verifyStandardDataRows} (and a handful of bespoke
{@code verifyDataRows} calls in this file) compared rows positionally —
they assumed the engine emits columns in the {@code _source} iteration
order the v2 / Lucene path produces. Under the analytics-engine route the
parquet-backed reader returns columns in storage order, so the same 4
canonical state_country rows came back as {@code [70,"USA",4,"Jake",
"California",2023]} instead of {@code [Jake,USA,California,4,2023,70]}
and 12 of 22 tests failed despite the data being identical.

Replace with a column-name-keyed expected-row map. The helper reads the
actual schema from the response, looks up each canonical value by column
name, places it at the corresponding schema position, then defers to
{@code verifyDataRows} as before. The contract is identical to the
existing {@code verifySchema} matcher — both are set-equality on column
names — so the test no longer leaks the engine's emission order into
the assertion.

Each call site passes the canonical column-name list (with rename
substitutions where applicable). Tests that don't rename age keep
calling the no-arg form. Both paths now pass:

* Analytics-engine route (`tests.analytics.force_routing=true`): 20 / 22
  (the remaining 2 are `testRenameInAgg` /
  `testRenameWithBackticksInAgg`, blocked on the Substrait isthmus
  AVG-binding follow-up tracked in
  https://github.com/opensearch-project/OpenSearch/pull/21521).
* v2 / Calcite route (default routing): 20 / 22 (same two tests fail with
  `NoClassDefFoundError: LevenshteinDistance` only when running against
  the OS-core `:run` cluster — that bundle is missing commons-text.
  CI's own integ-test cluster bundles commons-text via the SQL plugin's
  classloader and isn't affected.)

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../calcite/remote/CalcitePPLRenameIT.java    | 142 +++++++++++++++---
 1 file changed, 118 insertions(+), 24 deletions(-)

diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java
index 6cd0674a2dc..0302a13b159 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLRenameIT.java
@@ -14,6 +14,10 @@
 import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder;
 
 import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.hamcrest.Matcher;
+import org.json.JSONArray;
 import org.json.JSONObject;
 import org.junit.Test;
 import org.opensearch.sql.ppl.PPLIntegTestCase;
@@ -40,7 +44,7 @@ public void testRename() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "name", "country", "state", "month", "year", "renamed_age");
   }
 
   @Test
@@ -77,7 +81,7 @@ public void testRenameToMetaField() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "name", "country", "state", "month", "year", "_ID");
   }
 
   @Test
@@ -156,7 +160,7 @@ public void testRenameWildcardFields() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "nAME", "country", "state", "month", "year", "age");
   }
 
   @Test
@@ -171,7 +175,7 @@ public void testRenameMultipleWildcardFields() throws IOException {
         schema("couNTry", "string"),
         schema("year", "int"),
         schema("moNTh", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "name", "couNTry", "state", "moNTh", "year", "age");
   }
 
   @Test
@@ -186,7 +190,7 @@ public void testRenameWildcardPrefix() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "new_na", "country", "state", "month", "year", "age");
   }
 
   @Test
@@ -212,7 +216,7 @@ public void testRenameMultipleWildcards() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("MoNtH", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "name", "country", "state", "MoNtH", "year", "age");
   }
 
   @Test
@@ -261,12 +265,14 @@ public void testRenamingToExistingField() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyDataRows(
+    // After `rename name as age`, the original name column overwrites the original age column;
+    // the (number) age values are gone and only the (string) name values remain under "age".
+    verifyDataRowsByColumn(
         result,
-        rows("Jake", "USA", "California", 4, 2023),
-        rows("Hello", "USA", "New York", 4, 2023),
-        rows("John", "Canada", "Ontario", 4, 2023),
-        rows("Jane", "Canada", "Quebec", 4, 2023));
+        rowOf("age", "Jake", "country", "USA", "state", "California", "month", 4, "year", 2023),
+        rowOf("age", "Hello", "country", "USA", "state", "New York", "month", 4, "year", 2023),
+        rowOf("age", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023),
+        rowOf("age", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023));
   }
 
   @Test
@@ -296,12 +302,12 @@ public void testRenamingNonExistentFieldToExistingField() throws IOException {
         schema("country", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyDataRows(
+    verifyDataRowsByColumn(
         result,
-        rows("Jake", "USA", "California", 4, 2023),
-        rows("Hello", "USA", "New York", 4, 2023),
-        rows("John", "Canada", "Ontario", 4, 2023),
-        rows("Jane", "Canada", "Quebec", 4, 2023));
+        rowOf("name", "Jake", "country", "USA", "state", "California", "month", 4, "year", 2023),
+        rowOf("name", "Hello", "country", "USA", "state", "New York", "month", 4, "year", 2023),
+        rowOf("name", "John", "country", "Canada", "state", "Ontario", "month", 4, "year", 2023),
+        rowOf("name", "Jane", "country", "Canada", "state", "Quebec", "month", 4, "year", 2023));
   }
 
   @Test
@@ -345,7 +351,7 @@ public void testMultipleRenameWithoutComma() throws IOException {
         schema("location", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "user_name", "location", "state", "month", "year", "user_age");
   }
 
   @Test
@@ -363,15 +369,103 @@ public void testRenameMixedCommaAndSpace() throws IOException {
         schema("location", "string"),
         schema("year", "int"),
         schema("month", "int"));
-    verifyStandardDataRows(result);
+    verifyStandardDataRows(result, "user_name", "location", "state", "month", "year", "user_age");
+  }
+
+  /**
+   * Build a {@code column -> value} map from interleaved varargs ({@code key1, val1, key2, val2,
+   * ...}). Preserves insertion order so the expected-row mapping reads naturally at the call site.
+   */
+  private static Map<String, Object> rowOf(Object... pairs) {
+    if (pairs.length % 2 != 0) {
+      throw new IllegalArgumentException("rowOf expects an even number of args (key, value, ...)");
+    }
+    Map<String, Object> row = new LinkedHashMap<>();
+    for (int i = 0; i < pairs.length; i += 2) {
+      row.put((String) pairs[i], pairs[i + 1]);
+    }
+    return row;
   }
 
   private void verifyStandardDataRows(JSONObject result) {
-    verifyDataRows(
-        result,
-        rows("Jake", "USA", "California", 4, 2023, 70),
-        rows("Hello", "USA", "New York", 4, 2023, 30),
-        rows("John", "Canada", "Ontario", 4, 2023, 25),
-        rows("Jane", "Canada", "Quebec", 4, 2023, 20));
+    verifyStandardDataRows(result, "name", "country", "state", "month", "year", "age");
+  }
+
+  /**
+   * Verify the four canonical state_country rows independently of column order.
+   *
+   * <p>The schema check above ({@code verifySchema}) is set-equality on column names; the data row
+   * check {@code verifyDataRows} is positional. The two paths the analytics-engine route can take
+   * return columns in different orders (parquet preserves storage order, the v2 / Lucene path
+   * preserves {@code _source} iteration order), and either is valid given the contract {@code
+   * verifySchema} declares. To avoid baking either order into the test, this helper takes the
+   * canonical-position column names as varargs and reorders the canonical row values to match
+   * whatever column order the response actually returned.
+   *
+   * @param result the response JSON
+   * @param canonicalColumns the column names of the four canonical rows in {@code (name-or-renamed,
+   *     country-or-renamed, state, month, year, age-or-renamed)} order. Pass the rename target
+   *     where applicable.
+   */
+  private void verifyStandardDataRows(JSONObject result, String... canonicalColumns) {
+    if (canonicalColumns.length != 6) {
+      throw new IllegalArgumentException(
+          "verifyStandardDataRows expects 6 canonical column names; got "
+              + canonicalColumns.length);
+    }
+    Object[][] canonicalValues =
+        new Object[][] {
+          {"Jake", "USA", "California", 4, 2023, 70},
+          {"Hello", "USA", "New York", 4, 2023, 30},
+          {"John", "Canada", "Ontario", 4, 2023, 25},
+          {"Jane", "Canada", "Quebec", 4, 2023, 20}
+        };
+    Map<String, Object>[] expectedRows = new LinkedHashMap[canonicalValues.length];
+    for (int i = 0; i < canonicalValues.length; i++) {
+      Map<String, Object> row = new LinkedHashMap<>();
+      for (int c = 0; c < canonicalColumns.length; c++) {
+        row.put(canonicalColumns[c], canonicalValues[i][c]);
+      }
+      expectedRows[i] = row;
+    }
+    verifyDataRowsByColumn(result, expectedRows);
+  }
+
+  /**
+   * Match expected rows against the response by column name, ignoring the response's column
+   * emission order. For each expected row (a {@code column-name -> value} map), the value at each
+   * schema position is looked up by name. Tests using this helper become engine-order agnostic: a
+   * parquet-backed response and a Lucene-backed response yield the same assertion outcome as long
+   * as the column-name-to-value mapping agrees.
+   */
+  @SafeVarargs
+  @SuppressWarnings("varargs")
+  private final void verifyDataRowsByColumn(
+      JSONObject result, Map<String, Object>... expectedRows) {
+    JSONArray schema = result.getJSONArray("schema");
+    int n = schema.length();
+    String[] columnOrder = new String[n];
+    for (int i = 0; i < n; i++) {
+      columnOrder[i] = schema.getJSONObject(i).getString("name");
+    }
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Matcher<JSONArray>[] rowMatchers = new Matcher[expectedRows.length];
+    for (int r = 0; r < expectedRows.length; r++) {
+      Object[] reordered = new Object[n];
+      for (int c = 0; c < n; c++) {
+        if (!expectedRows[r].containsKey(columnOrder[c])) {
+          throw new IllegalArgumentException(
+              "Expected row at index "
+                  + r
+                  + " is missing canonical value for response column ["
+                  + columnOrder[c]
+                  + "]; provided keys: "
+                  + expectedRows[r].keySet());
+        }
+        reordered[c] = expectedRows[r].get(columnOrder[c]);
+      }
+      rowMatchers[r] = rows(reordered);
+    }
+    verifyDataRows(result, rowMatchers);
   }
 }