diff --git a/.changeset/smart-chunking-rewrite.md b/.changeset/smart-chunking-rewrite.md
new file mode 100644
index 0000000..439c292
--- /dev/null
+++ b/.changeset/smart-chunking-rewrite.md
@@ -0,0 +1,7 @@
+---
+"@chkit/plugin-backfill": patch
+"@chkit/clickhouse": patch
+"chkit": patch
+---
+
+Rewrite backfill chunk planning with multi-strategy smart chunking. The planner now introspects partition layout, sort key distribution, and row estimates to produce better-sized chunks using strategies like equal-width splitting, quantile ranges, temporal bucketing, string prefix splitting, and group-by-key splitting. Adds a dedicated `sdk` entry point for programmatic access to chunking internals.
diff --git a/.changeset/structured-backfill-logging.md b/.changeset/structured-backfill-logging.md
new file mode 100644
index 0000000..1c795e2
--- /dev/null
+++ b/.changeset/structured-backfill-logging.md
@@ -0,0 +1,6 @@
+---
+"chkit": patch
+"@chkit/plugin-backfill": patch
+---
+
+Add structured logging to backfill chunk planning via `@logtape/logtape`. The smart chunking planner now logs introspection, partition planning, and per-strategy split decisions, and emits warnings when ClickHouse queries exceed 5s. Enable with `CHKIT_DEBUG=1`.
diff --git a/bun.lock b/bun.lock
index 1191f8c..2f62810 100644
--- a/bun.lock
+++ b/bun.lock
@@ -23,7 +23,7 @@
     },
     "apps/docs": {
       "name": "@chkit/docs",
-      "version": "0.0.2-beta.8",
+      "version": "0.0.2-beta.9",
       "dependencies": {
         "@astrojs/starlight": "^0.37.6",
         "astro": "^5.6.1",
@@ -33,7 +33,7 @@
     },
     "packages/cli": {
       "name": "chkit",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "bin": {
         "chkit": "./dist/bin/chkit.js",
       },
@@ -42,28 +42,30 @@
         "@chkit/codegen": "workspace:*",
         "@chkit/core": "workspace:*",
         "@clickhouse/client": "^1.11.0",
+        "@logtape/logtape": "^2.0.5",
         "fast-glob": "^3.3.2",
       },
     },
     "packages/clickhouse": {
       "name": "@chkit/clickhouse",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
         "@chkit/core": "workspace:*",
         "@clickhouse/client": "^1.11.0",
+        "@logtape/logtape": "^2.0.5",
         "p-retry": "^7.1.1",
       },
     },
     "packages/codegen": {
       "name": "@chkit/codegen",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
         "@chkit/core": "workspace:*",
       },
     },
     "packages/core": {
       "name": "@chkit/core",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
         "fast-glob": "^3.3.2",
       },
@@ -73,17 +75,18 @@
     },
     "packages/plugin-backfill": {
       "name": "@chkit/plugin-backfill",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
         "@chkit/clickhouse": "workspace:*",
         "@chkit/core": "workspace:*",
+        "@logtape/logtape": "^2.0.5",
         "p-map": "^7.0.4",
         "zod": "^4.3.6",
       },
     },
     "packages/plugin-codegen": {
       "name": "@chkit/plugin-codegen",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
         "@chkit/core": "workspace:*",
         "zod": "^4.3.6",
@@ -91,8 +94,9 @@
     },
     "packages/plugin-obsessiondb": {
       "name": "@chkit/plugin-obsessiondb",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
+        "@chkit/clickhouse": "workspace:*",
         "@chkit/core": "workspace:*",
         "@orpc/client": "1.13.4",
         "@orpc/contract": "1.13.4",
@@ -101,7 +105,7 @@
     },
     "packages/plugin-pull": {
       "name": "@chkit/plugin-pull",
-      "version": "0.1.0-beta.19",
+      "version": "0.1.0-beta.20",
       "dependencies": {
         "@chkit/clickhouse": "workspace:*",
         "@chkit/core": "workspace:*",
@@ -348,6 +352,8 @@
 
     "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.9", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.0.3", "@jridgewell/sourcemap-codec": "^1.4.10" } }, "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ=="],
 
+    "@logtape/logtape": ["@logtape/logtape@2.0.5", "", {}, "sha512-UizDkh20ZPJVOddRxG1F77WhHdlNl/sbQgoO8T534R7XvUBMAJ9En9f35u+meW2tRsNLvjz6R87Zanwf53tspQ=="],
+
     "@manypkg/find-root": ["@manypkg/find-root@1.1.0", "", { "dependencies": { "@babel/runtime": "^7.5.5", "@types/node": "^12.7.1", "find-up": "^4.1.0", "fs-extra": "^8.1.0" } }, "sha512-mki5uBvhHzO8kYYix/WRy2WX8S3B5wdVSc9D6KcU5lQNglP2yt58/VfLuAK49glRXChosY8ap2oJ1qgma3GUVA=="],
 
     "@manypkg/get-packages": ["@manypkg/get-packages@1.1.3", "", { "dependencies": { "@babel/runtime": "^7.5.5", "@changesets/types": "^4.0.1", "@manypkg/find-root": "^1.1.0", "fs-extra": "^8.1.0", "globby": "^11.0.0", "read-yaml-file": "^1.1.0" } }, "sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A=="],
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 0b969b7..0d145c2 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -43,6 +43,7 @@
     "@chkit/clickhouse": "workspace:*",
     "@chkit/codegen": "workspace:*",
     "@chkit/core": "workspace:*",
+    "@logtape/logtape": "^2.0.5",
     "fast-glob": "^3.3.2"
   }
 }
diff --git a/packages/cli/src/bin/chkit.ts b/packages/cli/src/bin/chkit.ts
index 49e300c..914c506 100644
--- a/packages/cli/src/bin/chkit.ts
+++ b/packages/cli/src/bin/chkit.ts
@@ -17,6 +17,7 @@ import { loadPluginRuntime } from './plugin-runtime.js'
 import { getInternalPlugins } from './internal-plugins/index.js'
 import { CLI_VERSION } from './version.js'
 import { debug } from './debug.js'
+import { configureCliLogging } from './logging.js'
 
 const WELL_KNOWN_PLUGIN_COMMANDS: Record<string, string> = {
   codegen: 'Codegen',
@@ -73,6 +74,8 @@ function collectPluginCommands(runtime: Awaited<ReturnType<typeof loadPluginRunt
 }
 
 async function main(): Promise<void> {
+  configureCliLogging()
+
   const argv = process.argv.slice(2)
   const commandName = argv[0]
   debug('cli', `chkit ${CLI_VERSION} — argv: [${argv.join(', ')}]`)
diff --git a/packages/cli/src/bin/debug.ts b/packages/cli/src/bin/debug.ts
index 5893a18..7f8d3d3 100644
--- a/packages/cli/src/bin/debug.ts
+++ b/packages/cli/src/bin/debug.ts
@@ -1,22 +1,15 @@
-import process from 'node:process'
+import { getLogger } from '@logtape/logtape'
 
-const enabled = process.env.CHKIT_DEBUG === '1' || process.env.CHKIT_DEBUG === 'true'
-
-function timestamp(): string {
-  const now = new Date()
-  return now.toISOString().slice(11, 23) // HH:mm:ss.SSS
-}
+import { isDebugEnabled } from './logging.js'
 
 export function debug(category: string, message: string, detail?: unknown): void {
-  if (!enabled) return
-  const prefix = `[chkit:${category}]`
+  if (!isDebugEnabled()) return
+  const logger = getLogger(['chkit', category])
   if (detail !== undefined) {
-    console.error(`${timestamp()} ${prefix} ${message}`, detail)
-  } else {
-    console.error(`${timestamp()} ${prefix} ${message}`)
+    logger.debug(message, { detail })
+    return
   }
+  logger.debug(message)
 }
 
-export function isDebugEnabled(): boolean {
-  return enabled
-}
+export { isDebugEnabled } from './logging.js'
diff --git a/packages/cli/src/bin/logging.ts b/packages/cli/src/bin/logging.ts
new file mode 100644
index 0000000..e782027
--- /dev/null
+++ b/packages/cli/src/bin/logging.ts
@@ -0,0 +1,34 @@
+import process from 'node:process'
+
+import { configureSync, getConfig, getConsoleSink, getTextFormatter } from '@logtape/logtape'
+
+const enabled = process.env.CHKIT_DEBUG === '1' || process.env.CHKIT_DEBUG === 'true'
+
+export function configureCliLogging(): void {
+  if (!enabled || getConfig()) return
+
+  configureSync({
+    sinks: {
+      console: getConsoleSink({
+        formatter: getTextFormatter({ timestamp: 'time' }),
+      }),
+    },
+    loggers: [
+      {
+        category: 'chkit',
+        sinks: ['console'],
+        lowestLevel: 'debug',
+      },
+      {
+        category: 'logtape',
+        sinks: ['console'],
+        lowestLevel: 'error',
+      },
+    ],
+    reset: true,
+  })
+}
+
+export function isDebugEnabled(): boolean {
+  return enabled
+}
diff --git a/packages/cli/src/plugin.test.ts b/packages/cli/src/plugin.test.ts
index ddda0ed..8ff0a2b 100644
--- a/packages/cli/src/plugin.test.ts
+++ b/packages/cli/src/plugin.test.ts
@@ -40,7 +40,7 @@ async function waitForParts(
   database: string,
   table: string,
   expectedPartitions: number,
-  timeoutMs = 15_000,
+  timeoutMs = 60_000,
 ): Promise<void> {
   const start = Date.now()
   while (Date.now() - start < timeoutMs) {
diff --git a/packages/clickhouse/package.json b/packages/clickhouse/package.json
index e09e9a4..41f51e4 100644
--- a/packages/clickhouse/package.json
+++ b/packages/clickhouse/package.json
@@ -46,6 +46,7 @@
   "dependencies": {
     "@chkit/core": "workspace:*",
     "@clickhouse/client": "^1.11.0",
+    "@logtape/logtape": "^2.0.5",
     "p-retry": "^7.1.1"
   }
 }
diff --git a/packages/clickhouse/src/index.ts b/packages/clickhouse/src/index.ts
index a62dea6..0daccea 100644
--- a/packages/clickhouse/src/index.ts
+++ b/packages/clickhouse/src/index.ts
@@ -1,4 +1,4 @@
-import { createClient } from '@clickhouse/client'
+import { createClient, type ClickHouseSettings } from '@clickhouse/client'
 import {
   normalizeSQLFragment,
   type ChxConfig,
@@ -6,6 +6,7 @@ import {
   type ProjectionDefinition,
   type SkipIndexDefinition,
 } from '@chkit/core'
+import { getLogger } from '@logtape/logtape'
 import {
   parseEngineFromCreateTableQuery,
   parseOrderByFromCreateTableQuery,
@@ -28,9 +29,11 @@ export interface QueryStatus {
   error?: string
 }
 
+export type { ClickHouseSettings }
+
 export interface ClickHouseExecutor {
   command(sql: string): Promise<void>
-  query<T>(sql: string): Promise<T[]>
+  query<T>(sql: string, settings?: ClickHouseSettings): Promise<T[]>
   insert<T extends Record<string, unknown>>(params: { table: string; values: T[] }): Promise<void>
   listSchemaObjects(): Promise<SchemaObjectRef[]>
   listTableDetails(databases: string[]): Promise<IntrospectedTable[]>
@@ -249,7 +252,54 @@ export {
   waitForTableAbsent,
 } from './ddl-propagation.js'
 
+function parseSummaryFromHeaders(headers: Record<string, string | string[] | undefined>): {
+  read_rows: string
+  read_bytes: string
+  written_rows: string
+  written_bytes: string
+  result_rows: string
+  result_bytes: string
+  elapsed_ns: string
+} | undefined {
+  const raw = headers['x-clickhouse-summary']
+  if (!raw || typeof raw !== 'string') return undefined
+  try {
+    return JSON.parse(raw)
+  } catch {
+    return undefined
+  }
+}
+
+function logProfiling(
+  logger: ReturnType<typeof getLogger>,
+  query: string,
+  queryId: string,
+  summary?: {
+    read_rows: string
+    read_bytes: string
+    written_rows: string
+    written_bytes: string
+    result_rows?: string
+    result_bytes?: string
+    elapsed_ns: string
+  },
+): void {
+  logger.trace('Query completed: {query}', {
+    query,
+    queryId,
+    readRows: Number(summary?.read_rows ?? 0),
+    readBytes: Number(summary?.read_bytes ?? 0),
+    writtenRows: Number(summary?.written_rows ?? 0),
+    writtenBytes: Number(summary?.written_bytes ?? 0),
+    elapsedMs: Number(summary?.elapsed_ns ?? 0) / 1_000_000,
+    resultRows: Number(summary?.result_rows ?? 0),
+    resultBytes: Number(summary?.result_bytes ?? 0),
+  })
+}
+
 export function createClickHouseExecutor(config: NonNullable<ChxConfig['clickhouse']>): ClickHouseExecutor {
+  const profiler = getLogger(['chkit', 'profiling'])
+
   const client = createClient({
     url: config.url,
     username: config.username,
@@ -259,6 +309,7 @@ export function createClickHouseExecutor(config: NonNullable<ChxConfig['clickhou
     clickhouse_settings: {
       wait_end_of_query: 1,
       async_insert: 0,
+      send_progress_in_http_headers: 1,
     },
   })
 
@@ -275,11 +326,10 @@ export function createClickHouseExecutor(config: NonNullable<ChxConfig['clickhou
   return {
     async command(sql: string): Promise<void> {
       try {
-        await client.command({ query: sql, http_headers: { 'X-DDL': '1' } })
+        const result = await client.command({ query: sql, http_headers: { 'X-DDL': '1' } })
+        logProfiling(profiler, sql, result.query_id, result.summary)
       } catch (error) {
         if (isUnknownDatabaseError(error)) {
-          // The configured database doesn't exist yet. Retry without the
-          // session database so that CREATE DATABASE can succeed.
           const fallback = createClient({
             url: config.url,
             username: config.username,
@@ -296,21 +346,24 @@ export function createClickHouseExecutor(config: NonNullable<ChxConfig['clickhou
         wrapConnectionError(error, config.url)
       }
     },
-    async query<T>(sql: string): Promise<T[]> {
+    async query<T>(sql: string, settings?: ClickHouseSettings): Promise<T[]> {
       try {
-        const result = await client.query({ query: sql, format: 'JSONEachRow', http_headers: { 'X-DDL': '1' } })
-        return result.json<T>()
+        const result = await client.query({ query: sql, format: 'JSONEachRow', http_headers: { 'X-DDL': '1' }, ...(settings ? { clickhouse_settings: settings } : {}) })
+        const rows = await result.json<T>()
+        logProfiling(profiler, sql, result.query_id, parseSummaryFromHeaders(result.response_headers))
+        return rows
       } catch (error) {
         wrapConnectionError(error, config.url)
       }
     },
     async insert<T extends Record<string, unknown>>(params: { table: string; values: T[] }): Promise<void> {
       try {
-        await client.insert({
+        const result = await client.insert({
           table: params.table,
           values: params.values,
           format: 'JSONEachRow',
         })
+        logProfiling(profiler, `INSERT INTO ${params.table}`, result.query_id, result.summary)
       } catch (error) {
         wrapConnectionError(error, config.url)
       }
@@ -327,7 +380,7 @@ export function createClickHouseExecutor(config: NonNullable<ChxConfig['clickhou
     async queryStatus(queryId: string, options?: { afterTime?: string }): Promise<QueryStatus> {
       try {
         const running = await client.query({
-          query: `SELECT read_rows, read_bytes, written_rows, written_bytes, elapsed FROM clusterAllReplicas('parallel_replicas', system.processes) WHERE user = currentUser() AND query_id = {qid:String} SETTINGS skip_unavailable_shards = 1`,
+          query: `SELECT read_rows, read_bytes, written_rows, written_bytes, elapsed FROM clusterAllReplicas('cluster', system.processes) WHERE user = currentUser() AND query_id = {qid:String} SETTINGS skip_unavailable_shards = 1`,
           query_params: { qid: queryId },
           format: 'JSONEachRow',
         })
@@ -353,7 +406,7 @@ export function createClickHouseExecutor(config: NonNullable<ChxConfig['clickhou
         const afterTime = options?.afterTime ?? '1970-01-01T00:00:00Z'
         const log = await client.query({
           query: `SELECT type, written_rows, written_bytes, query_duration_ms, exception
-FROM clusterAllReplicas('parallel_replicas', system.query_log)
+FROM clusterAllReplicas('cluster', system.query_log)
 WHERE user = currentUser()
   AND query_id = {qid:String}
   AND type IN ('QueryFinish', 'ExceptionWhileProcessing')
diff --git a/packages/plugin-backfill/README.md b/packages/plugin-backfill/README.md
index 6feaa72..8986321 100644
--- a/packages/plugin-backfill/README.md
+++ b/packages/plugin-backfill/README.md
@@ -35,6 +35,136 @@ export default defineConfig({
 
 See the [chkit documentation](https://chkit.obsessiondb.com).
 
+## SDK
+
+The package root is limited to the plugin registration API. Everything used by the CLI itself — the chunk planner, SQL builders, async executor, logging — is also exported from the `@chkit/plugin-backfill/sdk` subpath so you can build your own backfill scripts without going through the CLI.
+
+```ts
+import {
+  generateChunkPlan,
+  buildChunkExecutionSql,
+  executeBackfill,
+  getBackfillLogger,
+  type ChunkPlan,
+  type PlannerQuery,
+} from '@chkit/plugin-backfill/sdk'
+```
+
+The pipeline has three stages, and you can use any subset:
+
+1. **Plan** — `generateChunkPlan(...)` introspects a table and returns a `ChunkPlan` describing how to partition the work into roughly equal-sized chunks.
+2. **Build SQL** — `buildChunkExecutionSql(...)` turns a single `Chunk` into an `INSERT … SELECT` statement.
+3. **Execute** — `executeBackfill(...)` submits chunks against a real `ClickHouseExecutor` with deterministic query IDs, polling, and resume support.
+
+### Plan a backfill
+
+`generateChunkPlan` is decoupled from any ClickHouse client. You pass in a `query` function with the `PlannerQuery` shape and the planner uses it for every introspection / probe / split query. This makes the planner trivial to instrument or run against alternative clients.
+
+```ts
+import { createClient } from '@clickhouse/client'
+import { generateChunkPlan, type PlannerQuery } from '@chkit/plugin-backfill/sdk'
+
+const client = createClient({ url: process.env.CLICKHOUSE_URL })
+
+const query: PlannerQuery = async (sql, settings) => {
+  const result = await client.query({
+    query: sql,
+    format: 'JSONEachRow',
+    clickhouse_settings: settings as Record<string, string | number | boolean>,
+  })
+  return result.json()
+}
+
+const plan = await generateChunkPlan({
+  database: 'analytics',
+  table: 'events',
+  from: '2025-01-01T00:00:00Z',
+  to: '2025-02-01T00:00:00Z',
+  targetChunkBytes: 1_000_000_000, // ~1 GiB per chunk
+  query,
+  // 'count' is exact but slower; 'explain-estimate' is faster but approximate
+  rowProbeStrategy: 'count',
+})
+
+console.log(`${plan.chunks.length} chunks, ${plan.totalRows.toLocaleString()} rows`)
+```
+
+### Execute chunks against a target
+
+`buildChunkExecutionSql` produces the per-chunk `INSERT … SELECT` and `executeBackfill` runs them with concurrency, polling, and progress callbacks. Persist the `progress` argument anywhere you like to support resume.
+
+```ts
+import { createClickHouseExecutor } from '@chkit/clickhouse'
+import {
+  buildChunkExecutionSql,
+  executeBackfill,
+  type BackfillProgress,
+} from '@chkit/plugin-backfill/sdk'
+
+const executor = createClickHouseExecutor({
+  url: process.env.CLICKHOUSE_URL!,
+  username: 'default',
+  password: process.env.CLICKHOUSE_PASSWORD!,
+  database: 'analytics',
+})
+
+const chunksById = new Map(plan.chunks.map((chunk) => [chunk.id, chunk]))
+let saved: BackfillProgress | undefined // load from disk for resume
+
+const result = await executeBackfill({
+  executor,
+  planId: plan.planId,
+  chunks: plan.chunks,
+  buildQuery: ({ id }) =>
+    buildChunkExecutionSql({
+      planId: plan.planId,
+      chunk: chunksById.get(id)!,
+      target: 'analytics.events_backfill',
+      table: plan.table,
+    }),
+  concurrency: 4,
+  pollIntervalMs: 5_000,
+  resumeFrom: saved,
+  onProgress: async (progress) => {
+    saved = progress
+    // persist to disk / state store
+  },
+})
+
+console.log(`done=${result.completed} failed=${result.failed}`)
+```
+
+### Plan persistence
+
+Plans contain string boundaries that may include non-UTF-8 bytes (the planner uses `latin1`-encoded byte ranges for string sort keys), so JSON-serializing a `ChunkPlan` directly will lose information. Use the codec helpers when you need to round-trip a plan through storage:
+
+```ts
+import {
+  encodeChunkPlanForPersistence,
+  decodeChunkPlanFromPersistence,
+} from '@chkit/plugin-backfill/sdk'
+
+const json = JSON.stringify(encodeChunkPlanForPersistence(plan))
+// later …
+const plan2 = decodeChunkPlanFromPersistence(JSON.parse(json))
+```
+
+### Logging
+
+The planner emits structured logs via [`@logtape/logtape`](https://logtape.org/) under the `['chkit', 'backfill']` category. Configure a sink at process start to see them — slow-query warnings (>5 s) are emitted at `warning` level, planning progress at `info`, and per-strategy decisions at `debug`.
+
+```ts
+import { configureSync, getConsoleSink, getTextFormatter } from '@chkit/plugin-backfill/sdk'
+
+configureSync({
+  sinks: { console: getConsoleSink({ formatter: getTextFormatter({ timestamp: 'time' }) }) },
+  loggers: [{ category: 'chkit', sinks: ['console'], lowestLevel: 'info' }],
+  reset: true,
+})
+```
+
+To capture every SQL statement the planner runs (with timing, server-side stats, and per-strategy classification), wrap your `query` function instead of relying solely on logging — the wrapper sees the raw SQL and settings on every call and can record query IDs, response headers, and durations alongside the structured logs.
+
 ## License
 
 [MIT](../../LICENSE)
diff --git a/packages/plugin-backfill/package.json b/packages/plugin-backfill/package.json
index c4cbe63..87326bf 100644
--- a/packages/plugin-backfill/package.json
+++ b/packages/plugin-backfill/package.json
@@ -27,6 +27,11 @@
       "source": "./src/index.ts",
       "types": "./dist/index.d.ts",
       "default": "./dist/index.js"
+    },
+    "./sdk": {
+      "source": "./src/sdk.ts",
+      "types": "./dist/sdk.d.ts",
+      "default": "./dist/sdk.js"
     }
   },
   "files": [
@@ -38,11 +43,14 @@
     "typecheck": "tsc -p tsconfig.json --noEmit",
     "lint": "biome lint src",
     "test": "bun test src",
+    "test:env": "doppler run --project chkit --config ci -- bun test src",
+    "seed:env": "doppler run --project chkit --config ci -- bun run src/chunking/e2e/seed-datasets.script.ts",
     "clean": "rm -rf dist"
   },
   "dependencies": {
     "@chkit/clickhouse": "workspace:*",
     "@chkit/core": "workspace:*",
+    "@logtape/logtape": "^2.0.5",
     "p-map": "^7.0.4",
     "zod": "^4.3.6"
   }
diff --git a/packages/plugin-backfill/src/async-backfill.ts b/packages/plugin-backfill/src/async-backfill.ts
index f393499..bbb7c3b 100644
--- a/packages/plugin-backfill/src/async-backfill.ts
+++ b/packages/plugin-backfill/src/async-backfill.ts
@@ -7,9 +7,9 @@ export interface BackfillOptions {
   /** Plan ID used as a namespace in deterministic query IDs */
   planId: string
   /** The chunks to process (from buildChunks) */
-  chunks: Array<{ id: string; from: string; to: string; [key: string]: unknown }>
+  chunks: Array<{ id: string; from?: string; to?: string; [key: string]: unknown }>
   /** Build the SQL for a given chunk. Called once per chunk at submit time. */
-  buildQuery: (chunk: { id: string; from: string; to: string }) => string
+  buildQuery: (chunk: { id: string; from?: string; to?: string }) => string
   /** Max concurrent queries running on the server. Default: 3 */
   concurrency?: number
   /** Polling interval in ms. Default: 5000 */
@@ -149,7 +149,7 @@ export async function syncProgress(
   const safePrefix = prefix.replace(/'/g, "''").replace(/%/g, '\\%').replace(/_/g, '\\_')
 
   const runningRows = await executor.query<{ query_id: string }>(
-    `SELECT query_id FROM clusterAllReplicas('parallel_replicas', system.processes) WHERE user = currentUser() AND query_id LIKE '${safePrefix}%' SETTINGS skip_unavailable_shards = 1`
+    `SELECT query_id FROM clusterAllReplicas('cluster', system.processes) WHERE user = currentUser() AND query_id LIKE '${safePrefix}%' SETTINGS skip_unavailable_shards = 1`
   )
   const runningSet = new Set(runningRows.map((r) => r.query_id))
 
@@ -162,7 +162,7 @@ export async function syncProgress(
     exception: string
   }>(
     `SELECT query_id, type, written_rows, written_bytes, query_duration_ms, exception
-FROM clusterAllReplicas('parallel_replicas', system.query_log)
+FROM clusterAllReplicas('cluster', system.query_log)
 WHERE user = currentUser()
   AND query_id LIKE '${safePrefix}%'
   AND type IN ('QueryFinish', 'ExceptionWhileProcessing')
diff --git a/packages/plugin-backfill/src/chunking/analyze.ts b/packages/plugin-backfill/src/chunking/analyze.ts
index 7e051e1..3903b72 100644
--- a/packages/plugin-backfill/src/chunking/analyze.ts
+++ b/packages/plugin-backfill/src/chunking/analyze.ts
@@ -1,129 +1,15 @@
-import { hashId, randomPlanId } from '../state.js'
+import { generateChunkPlan } from './planner.js'
+import type { ChunkPlan, GenerateChunkPlanInput } from './types.js'
 
-import { buildChunkBoundaries } from './build.js'
-import { introspectTable, querySortKeyRanges } from './introspect.js'
-import type { ChunkBoundary, PartitionInfo, PlannedChunk, SortKeyInfo } from './types.js'
-
-export interface AnalyzeAndChunkInput {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  maxChunkBytes: number
-  requireIdempotencyToken: boolean
-  query: <T>(sql: string) => Promise<T[]>
-}
-
-export interface AnalyzeAndChunkResult {
-  planId: string
-  partitions: PartitionInfo[]
-  sortKey?: SortKeyInfo
-  chunks: PlannedChunk[]
-}
+export type AnalyzeAndChunkInput = GenerateChunkPlanInput
+export type AnalyzeAndChunkResult = ChunkPlan
+export type AnalyzeTableInput = GenerateChunkPlanInput
+export type AnalyzeTableResult = ChunkPlan
 
 export async function analyzeAndChunk(input: AnalyzeAndChunkInput): Promise<AnalyzeAndChunkResult> {
-  const { partitions, sortKey, boundaries } = await analyzeTable({
-    database: input.database,
-    table: input.table,
-    from: input.from,
-    to: input.to,
-    maxChunkBytes: input.maxChunkBytes,
-    query: input.query,
-  })
-
-  const planId = randomPlanId()
-
-  const chunks = buildPlannedChunks({
-    planId,
-    partitions,
-    boundaries,
-    requireIdempotencyToken: input.requireIdempotencyToken,
-  })
-
-  return { planId, partitions, sortKey, chunks }
-}
-
-export interface AnalyzeTableInput {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  maxChunkBytes: number
-  query: <T>(sql: string) => Promise<T[]>
-}
-
-export interface AnalyzeTableResult {
-  partitions: PartitionInfo[]
-  sortKey?: SortKeyInfo
-  boundaries: ChunkBoundary[]
+  return generateChunkPlan(input)
 }
 
 export async function analyzeTable(input: AnalyzeTableInput): Promise<AnalyzeTableResult> {
-  const { partitions, sortKey } = await introspectTable({
-    database: input.database,
-    table: input.table,
-    from: input.from,
-    to: input.to,
-    query: input.query,
-  })
-
-  const oversizedPartitionIds = partitions
-    .filter(p => p.bytesOnDisk > input.maxChunkBytes)
-    .map(p => p.partitionId)
-
-  let sortKeyRanges: Map<string, { min: string; max: string }> | undefined
-  if (sortKey && oversizedPartitionIds.length > 0) {
-    sortKeyRanges = await querySortKeyRanges({
-      database: input.database,
-      table: input.table,
-      sortKeyColumn: sortKey.column,
-      partitionIds: oversizedPartitionIds,
-      query: input.query,
-    })
-  }
-
-  const boundaries = buildChunkBoundaries({
-    partitions,
-    maxChunkBytes: input.maxChunkBytes,
-    sortKey,
-    sortKeyRanges,
-  })
-
-  return { partitions, sortKey, boundaries }
-}
-
-export function buildPlannedChunks(input: {
-  planId: string
-  partitions: PartitionInfo[]
-  boundaries: ChunkBoundary[]
-  requireIdempotencyToken: boolean
-}): PlannedChunk[] {
-  const chunks: PlannedChunk[] = []
-  const partitionIndex = new Map<string, number>()
-
-  for (const boundary of input.boundaries) {
-    const idx = partitionIndex.get(boundary.partitionId) ?? 0
-    partitionIndex.set(boundary.partitionId, idx + 1)
-
-    const idSeed = `${input.planId}:${boundary.partitionId}:${idx}`
-    const chunkId = hashId(`chunk:${idSeed}`).slice(0, 16)
-    const token = input.requireIdempotencyToken ? hashId(`token:${idSeed}`) : ''
-
-    const partition = input.partitions.find(p => p.partitionId === boundary.partitionId)
-    const from = boundary.sortKeyFrom ?? partition?.minTime ?? ''
-    const to = boundary.sortKeyTo ?? partition?.maxTime ?? ''
-
-    chunks.push({
-      id: chunkId,
-      partitionId: boundary.partitionId,
-      sortKeyFrom: boundary.sortKeyFrom,
-      sortKeyTo: boundary.sortKeyTo,
-      estimatedBytes: boundary.estimatedBytes,
-      idempotencyToken: token,
-      from,
-      to,
-    })
-  }
-
-  return chunks
+  return analyzeAndChunk(input)
 }
diff --git a/packages/plugin-backfill/src/chunking/boundary-codec.ts b/packages/plugin-backfill/src/chunking/boundary-codec.ts
new file mode 100644
index 0000000..73b8984
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/boundary-codec.ts
@@ -0,0 +1,109 @@
+import type {
+  Chunk,
+  ChunkPlan,
+  ChunkRange,
+  FocusedValue,
+  SortKey,
+} from './types.js'
+
+export function encodeBoundary(
+  value: string | undefined,
+  sortKey: SortKey | undefined,
+): string | undefined {
+  if (value === undefined || sortKey === undefined) return value
+  if (sortKey.boundaryEncoding === 'hex-latin1') {
+    return Buffer.from(value, 'latin1').toString('hex')
+  }
+  return value
+}
+
+export function decodeBoundary(
+  value: string | undefined,
+  sortKey: SortKey | undefined,
+): string | undefined {
+  if (value === undefined || sortKey === undefined) return value
+  if (sortKey.boundaryEncoding === 'hex-latin1') {
+    return Buffer.from(value, 'hex').toString('latin1')
+  }
+  return value
+}
+
+export function encodeRangesForPlan(
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): ChunkRange[] {
+  return ranges.map((range) => ({
+    dimensionIndex: range.dimensionIndex,
+    from: encodeBoundary(range.from, sortKeys[range.dimensionIndex]),
+    to: encodeBoundary(range.to, sortKeys[range.dimensionIndex]),
+  }))
+}
+
+export function decodeRangesFromPlan(
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): ChunkRange[] {
+  return ranges.map((range) => ({
+    dimensionIndex: range.dimensionIndex,
+    from: decodeBoundary(range.from, sortKeys[range.dimensionIndex]),
+    to: decodeBoundary(range.to, sortKeys[range.dimensionIndex]),
+  }))
+}
+
+function encodeFocusedValue(
+  focusedValue: FocusedValue | undefined,
+  sortKeys: SortKey[],
+): FocusedValue | undefined {
+  if (!focusedValue) return undefined
+  return {
+    dimensionIndex: focusedValue.dimensionIndex,
+    value: encodeBoundary(focusedValue.value, sortKeys[focusedValue.dimensionIndex]) ?? focusedValue.value,
+  }
+}
+
+function decodeFocusedValue(
+  focusedValue: FocusedValue | undefined,
+  sortKeys: SortKey[],
+): FocusedValue | undefined {
+  if (!focusedValue) return undefined
+  return {
+    dimensionIndex: focusedValue.dimensionIndex,
+    value: decodeBoundary(focusedValue.value, sortKeys[focusedValue.dimensionIndex]) ?? focusedValue.value,
+  }
+}
+
+export function encodeChunkForPlan(chunk: Chunk, sortKeys: SortKey[]): Chunk {
+  return {
+    ...chunk,
+    ranges: encodeRangesForPlan(chunk.ranges, sortKeys),
+    analysis: {
+      ...chunk.analysis,
+      focusedValue: encodeFocusedValue(chunk.analysis.focusedValue, sortKeys),
+    },
+  }
+}
+
+export function decodeChunkFromPlan(chunk: Chunk, sortKeys: SortKey[]): Chunk {
+  return {
+    ...chunk,
+    ranges: decodeRangesFromPlan(chunk.ranges, sortKeys),
+    analysis: {
+      ...chunk.analysis,
+      focusedValue: decodeFocusedValue(chunk.analysis.focusedValue, sortKeys),
+    },
+  }
+}
+
+export function encodeChunkPlanForPersistence(plan: ChunkPlan): ChunkPlan {
+  return {
+    ...plan,
+    chunks: plan.chunks.map((chunk) => encodeChunkForPlan(chunk, plan.table.sortKeys)),
+  }
+}
+
+export function decodeChunkPlanFromPersistence(plan: ChunkPlan): ChunkPlan {
+  return {
+    ...plan,
+    chunks: plan.chunks.map((chunk) => decodeChunkFromPlan(chunk, plan.table.sortKeys)),
+  }
+}
diff --git a/packages/plugin-backfill/src/chunking/build.test.ts b/packages/plugin-backfill/src/chunking/build.test.ts
deleted file mode 100644
index 71aa47e..0000000
--- a/packages/plugin-backfill/src/chunking/build.test.ts
+++ /dev/null
@@ -1,135 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { buildChunkBoundaries } from './build.js'
-import type { PartitionInfo, SortKeyInfo } from './types.js'
-
-const GiB = 1024 ** 3
-
-describe('buildChunkBoundaries', () => {
-  test('small partition produces one chunk boundary', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T23:59:59.000Z' },
-    ]
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-    })
-
-    expect(boundaries).toHaveLength(1)
-    expect(boundaries[0]?.partitionId).toBe('202501')
-    expect(boundaries[0]?.sortKeyFrom).toBeUndefined()
-    expect(boundaries[0]?.sortKeyTo).toBeUndefined()
-    expect(boundaries[0]?.estimatedBytes).toBe(5 * GiB)
-  })
-
-  test('large partition produces multiple sub-chunks with sort key ranges', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_time', type: 'DateTime', category: 'datetime' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '2025-01-01 00:00:00', max: '2025-01-31 00:00:00' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    expect(boundaries).toHaveLength(3)
-    for (const b of boundaries) {
-      expect(b.partitionId).toBe('202501')
-      expect(b.sortKeyFrom).toBeDefined()
-      expect(b.sortKeyTo).toBeDefined()
-    }
-  })
-
-  test('large partition without sort key produces single chunk', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-    })
-
-    expect(boundaries).toHaveLength(1)
-    expect(boundaries[0]?.estimatedBytes).toBe(30 * GiB)
-  })
-
-  test('mixed sizes produce correct boundary counts', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 500, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-      { partitionId: '202502', rows: 5000, bytesOnDisk: 25 * GiB, minTime: '2025-02-01T00:00:00.000Z', maxTime: '2025-02-28T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_time', type: 'DateTime', category: 'datetime' }
-    const sortKeyRanges = new Map([
-      ['202502', { min: '2025-02-01 00:00:00', max: '2025-02-28 00:00:00' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    // First partition: 5 GiB < 10 GiB -> 1 boundary
-    // Second partition: 25 GiB / 10 GiB = 3 sub-boundaries
-    expect(boundaries).toHaveLength(4)
-
-    const p1 = boundaries.filter((b) => b.partitionId === '202501')
-    const p2 = boundaries.filter((b) => b.partitionId === '202502')
-    expect(p1).toHaveLength(1)
-    expect(p2).toHaveLength(3)
-  })
-
-  test('large partition with min === max sort key produces single chunk', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_type', type: 'String', category: 'string' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: 'click', max: 'click' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    expect(boundaries).toHaveLength(1)
-    expect(boundaries[0]?.partitionId).toBe('202501')
-    expect(boundaries[0]?.sortKeyFrom).toBeUndefined()
-    expect(boundaries[0]?.sortKeyTo).toBeUndefined()
-  })
-
-  test('numeric sort key produces numeric range sub-chunks', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 20 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'id', type: 'UInt64', category: 'numeric' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '100', max: '200' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    expect(boundaries).toHaveLength(2)
-    expect(boundaries[0]?.sortKeyFrom).toBe('100')
-    expect(boundaries[0]?.sortKeyTo).toBe('150')
-    expect(boundaries[1]?.sortKeyFrom).toBe('150')
-    expect(boundaries[1]?.sortKeyTo).toBe('201')
-  })
-})
diff --git a/packages/plugin-backfill/src/chunking/build.ts b/packages/plugin-backfill/src/chunking/build.ts
deleted file mode 100644
index cc6693b..0000000
--- a/packages/plugin-backfill/src/chunking/build.ts
+++ /dev/null
@@ -1,60 +0,0 @@
-import { splitSortKeyRange } from './splitter.js'
-import type { ChunkBoundary, PartitionInfo, SortKeyInfo } from './types.js'
-
-export function buildChunkBoundaries(input: {
-  partitions: PartitionInfo[]
-  maxChunkBytes: number
-  sortKey?: SortKeyInfo
-  sortKeyRanges?: Map<string, { min: string; max: string }>
-}): ChunkBoundary[] {
-  const boundaries: ChunkBoundary[] = []
-
-  for (const partition of input.partitions) {
-    if (partition.bytesOnDisk <= input.maxChunkBytes) {
-      boundaries.push({
-        partitionId: partition.partitionId,
-        estimatedBytes: partition.bytesOnDisk,
-      })
-    } else if (input.sortKey && input.sortKeyRanges) {
-      const range = input.sortKeyRanges.get(partition.partitionId)
-      if (!range) {
-        // No range data — emit as single chunk
-        boundaries.push({
-          partitionId: partition.partitionId,
-          estimatedBytes: partition.bytesOnDisk,
-        })
-        continue
-      }
-
-      // If min === max, splitting would produce empty sub-ranges; emit as single chunk
-      if (range.min === range.max) {
-        boundaries.push({
-          partitionId: partition.partitionId,
-          estimatedBytes: partition.bytesOnDisk,
-        })
-        continue
-      }
-
-      const subCount = Math.ceil(partition.bytesOnDisk / input.maxChunkBytes)
-      const subRanges = splitSortKeyRange(input.sortKey.category, range.min, range.max, subCount)
-      const estimatedBytesPerSub = Math.ceil(partition.bytesOnDisk / subCount)
-
-      for (const sub of subRanges) {
-        boundaries.push({
-          partitionId: partition.partitionId,
-          sortKeyFrom: sub.from,
-          sortKeyTo: sub.to,
-          estimatedBytes: estimatedBytesPerSub,
-        })
-      }
-    } else {
-      // No sort key info — emit as single chunk despite being oversized
-      boundaries.push({
-        partitionId: partition.partitionId,
-        estimatedBytes: partition.bytesOnDisk,
-      })
-    }
-  }
-
-  return boundaries
-}
diff --git a/packages/plugin-backfill/src/chunking/e2e/constants.ts b/packages/plugin-backfill/src/chunking/e2e/constants.ts
new file mode 100644
index 0000000..98e4738
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/e2e/constants.ts
@@ -0,0 +1 @@
+export const TABLE_PREFIX = 'chkit_e2e_chunking'
diff --git a/packages/plugin-backfill/src/chunking/e2e/seed-datasets.script.ts b/packages/plugin-backfill/src/chunking/e2e/seed-datasets.script.ts
new file mode 100644
index 0000000..e665c5d
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/e2e/seed-datasets.script.ts
@@ -0,0 +1,163 @@
+#!/usr/bin/env bun
+
+/**
+ * Seeds ClickHouse tables for smart-chunking E2E tests.
+ *
+ * Run once manually:
+ *   bun run packages/plugin-backfill/src/chunking/e2e/seed.ts
+ *
+ * Requires CLICKHOUSE_HOST/CLICKHOUSE_URL + CLICKHOUSE_PASSWORD env vars.
+ * Creates tables if they don't exist, truncates them, and re-inserts data.
+ */
+
+import { randomBytes } from 'node:crypto'
+import { getRequiredEnv, createLiveExecutor } from '@chkit/clickhouse/e2e-testkit'
+
+import { TABLE_PREFIX } from './constants.js'
+
+interface DatasetConfig {
+  name: string
+  columns: string
+  orderBy: string
+  partitionBy: string
+  generate: () => Record<string, unknown>[]
+}
+
+function pad(bytes: number): string {
+  return randomBytes(bytes).toString('hex')
+}
+
+function dayHour(day: number, hour: number): string {
+  return `2026-01-${String(day).padStart(2, '0')} ${String(hour).padStart(2, '0')}:00:00`
+}
+
+export const datasets: DatasetConfig[] = [
+  {
+    name: 'skewed_power_law',
+    columns: [
+      'tenant_id String',
+      'seq UInt64',
+      'event_time DateTime',
+      'padding String',
+    ].join(', '),
+    orderBy: '(tenant_id, seq)',
+    partitionBy: 'toYYYYMM(event_time)',
+    generate() {
+      const rows: Record<string, unknown>[] = []
+
+      // 80%: single dominant tenant — 8,000 rows
+      for (let i = 0; i < 8000; i++) {
+        rows.push({
+          tenant_id: 'mega-corp',
+          seq: i,
+          event_time: dayHour(1 + (i % 28), i % 24),
+          padding: pad(512),
+        })
+      }
+
+      // 20%: 200 small tenants, 10 rows each — 2,000 rows
+      for (let t = 0; t < 200; t++) {
+        for (let i = 0; i < 10; i++) {
+          rows.push({
+            tenant_id: `tenant-${String(t).padStart(4, '0')}`,
+            seq: i,
+            event_time: dayHour(1 + ((t * 10 + i) % 28), (t + i) % 24),
+            padding: pad(512),
+          })
+        }
+      }
+
+      return rows
+    },
+  },
+  {
+    name: 'multiple_hot_keys',
+    columns: [
+      'tenant_id String',
+      'seq UInt64',
+      'event_time DateTime',
+      'padding String',
+    ].join(', '),
+    orderBy: '(tenant_id, seq)',
+    partitionBy: 'toYYYYMM(event_time)',
+    generate() {
+      const rows: Record<string, unknown>[] = []
+
+      // 3 hot tenants, ~30% each — 3,000 rows each = 9,000 rows
+      for (const tenant of ['alpha-corp', 'beta-corp', 'gamma-corp']) {
+        for (let i = 0; i < 3000; i++) {
+          rows.push({
+            tenant_id: tenant,
+            seq: i,
+            event_time: dayHour(1 + (i % 28), i % 24),
+            padding: pad(512),
+          })
+        }
+      }
+
+      // 10%: 100 small tenants, 10 rows each — 1,000 rows
+      for (let t = 0; t < 100; t++) {
+        for (let i = 0; i < 10; i++) {
+          rows.push({
+            tenant_id: `small-${String(t).padStart(4, '0')}`,
+            seq: i,
+            event_time: dayHour(1 + ((t * 10 + i) % 28), (t + i) % 24),
+            padding: pad(512),
+          })
+        }
+      }
+
+      return rows
+    },
+  },
+]
+
+const BATCH_SIZE = 5000
+
+async function seed() {
+  const env = getRequiredEnv()
+  const executor = createLiveExecutor(env)
+  const db = env.clickhouseDatabase
+
+  try {
+    for (const dataset of datasets) {
+      const table = `${TABLE_PREFIX}_${dataset.name}`
+      const fqn = `${db}.${table}`
+      console.log(`\n--- Seeding ${fqn} ---`)
+
+      await executor.command(`
+        CREATE TABLE IF NOT EXISTS ${fqn} (
+          ${dataset.columns}
+        ) ENGINE = MergeTree()
+        PARTITION BY ${dataset.partitionBy}
+        ORDER BY ${dataset.orderBy}
+      `)
+      console.log('  Table ensured.')
+
+      await executor.command(`TRUNCATE TABLE ${fqn}`)
+      console.log('  Truncated.')
+
+      const rows = dataset.generate()
+      for (let i = 0; i < rows.length; i += BATCH_SIZE) {
+        const batch = rows.slice(i, i + BATCH_SIZE)
+        await executor.insert({ table: fqn, values: batch })
+        console.log(`  Inserted ${Math.min(i + BATCH_SIZE, rows.length)} / ${rows.length} rows`)
+      }
+
+      // Verify
+      const [result] = await executor.query<{ cnt: string }>(
+        `SELECT count() AS cnt FROM ${fqn} SETTINGS select_sequential_consistency = 1`,
+      )
+      console.log(`  Verified: ${result?.cnt} rows`)
+    }
+  } finally {
+    await executor.close()
+  }
+
+  console.log('\nDone!')
+}
+
+seed().catch((error) => {
+  console.error(error)
+  process.exit(1)
+})
diff --git a/packages/plugin-backfill/src/chunking/e2e/smart-chunking.e2e.test.ts b/packages/plugin-backfill/src/chunking/e2e/smart-chunking.e2e.test.ts
new file mode 100644
index 0000000..56aa47d
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/e2e/smart-chunking.e2e.test.ts
@@ -0,0 +1,297 @@
+import { afterAll, beforeAll, describe, expect, test } from 'bun:test'
+
+import { createClient } from '@clickhouse/client'
+import { createLiveExecutor, getRequiredEnv } from '@chkit/clickhouse/e2e-testkit'
+import type { ClickHouseExecutor } from '@chkit/clickhouse'
+
+import { analyzeAndChunk } from '../analyze.js'
+import { buildChunkExecutionSql, buildWhereClauseFromChunk } from '../sql.js'
+import type { Chunk, ChunkPlan, PlannerQuery } from '../types.js'
+
+import { TABLE_PREFIX } from './constants.js'
+
+// ---------------------------------------------------------------------------
+// Shared setup
+// ---------------------------------------------------------------------------
+
+let executor: ClickHouseExecutor
+let plannerQuery: PlannerQuery
+let closePlannerClient: () => Promise<void>
+let db: string
+
+beforeAll(() => {
+  const env = getRequiredEnv()
+  executor = createLiveExecutor(env)
+  db = env.clickhouseDatabase
+
+  // The planner runs parallel queries via pMap, which requires a sessionless
+  // client to avoid ClickHouse Cloud session locking errors.
+  const client = createClient({
+    url: env.clickhouseUrl,
+    username: env.clickhouseUser,
+    password: env.clickhousePassword,
+    database: env.clickhouseDatabase,
+    clickhouse_settings: { wait_end_of_query: 1 },
+  })
+
+  plannerQuery = async <T>(sql: string, settings?: Record<string, string | number | boolean | undefined>): Promise<T[]> => {
+    const result = await client.query({
+      query: sql,
+      format: 'JSONEachRow',
+      ...(settings ? { clickhouse_settings: settings } : {}),
+    })
+    return result.json<T>()
+  }
+  closePlannerClient = () => client.close()
+})
+
+afterAll(async () => {
+  await closePlannerClient?.()
+  await executor?.close()
+})
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function strategyIds(chunk: Chunk): string[] {
+  return chunk.analysis.lineage.map((step) => step.strategyId)
+}
+
+async function requireSeededTable(table: string): Promise<number> {
+  const [result] = await executor.query<{ cnt: string }>(
+    `SELECT count() AS cnt FROM ${db}.${table} SETTINGS select_sequential_consistency = 1`,
+  )
+  const count = Number(result?.cnt ?? 0)
+  if (count === 0) {
+    throw new Error(
+      `Table ${db}.${table} is empty. Run the seed script first:\n` +
+      `  bun run seed:env`,
+    )
+  }
+  return count
+}
+
+async function getPartitionUncompressedBytes(table: string): Promise<number> {
+  const rows = await executor.query<{ total: string }>(`
+    SELECT toString(sum(data_uncompressed_bytes)) AS total
+    FROM system.parts
+    WHERE database = '${db}' AND table = '${table}' AND active = 1
+    SETTINGS select_sequential_consistency = 1
+  `)
+  return Number(rows[0]?.total ?? 0)
+}
+
+async function chunkPlan(table: string, targetChunkBytes: number): Promise<ChunkPlan> {
+  return analyzeAndChunk({
+    database: db,
+    table,
+    targetChunkBytes,
+    query: plannerQuery,
+    querySettings: { enable_parallel_replicas: 0 },
+  })
+}
+
+function buildSql(plan: ChunkPlan, chunk: Chunk): string {
+  return buildChunkExecutionSql({
+    planId: plan.planId,
+    chunk,
+    target: `${plan.table.database}.${plan.table.table}`,
+    sourceTarget: `${plan.table.database}.${plan.table.table}`,
+    table: plan.table,
+  })
+}
+
+// ---------------------------------------------------------------------------
+// Scenario 1: Skewed Power Law Distribution
+//
+// 80% of rows belong to a single tenant ("mega-corp"), 20% spread across
+// 200 small tenants. Sort key: (tenant_id, seq).
+//
+// Expected behavior:
+//   - The system detects "mega-corp" as a hot key
+//   - mega-corp chunks are split on the secondary dimension (seq)
+//   - Small tenants are grouped into larger chunks
+//   - All rows are covered, no gaps or overlaps
+// ---------------------------------------------------------------------------
+
+describe('e2e: skewed power law', () => {
+  const table = `${TABLE_PREFIX}_skewed_power_law`
+  let plan: ChunkPlan
+  let totalRows: number
+
+  beforeAll(async () => {
+    totalRows = await requireSeededTable(table)
+    const uncompressedBytes = await getPartitionUncompressedBytes(table)
+
+    // Target ~5 chunks
+    const targetChunkBytes = Math.floor(uncompressedBytes / 5)
+    plan = await chunkPlan(table, targetChunkBytes)
+  }, 60_000)
+
+  test('produces multiple chunks', () => {
+    expect(plan.chunks.length).toBeGreaterThan(1)
+  })
+
+  test('detects mega-corp as a focused (hot) key', () => {
+    const focused = plan.chunks.filter(
+      (c) => c.analysis.focusedValue?.value === 'mega-corp',
+    )
+    expect(focused.length).toBeGreaterThan(0)
+  })
+
+  test('mega-corp chunks are split on the secondary dimension (seq)', () => {
+    const megaCorpChunks = plan.chunks.filter(
+      (c) => c.analysis.focusedValue?.value === 'mega-corp',
+    )
+    expect(megaCorpChunks.length).toBeGreaterThan(1)
+
+    // Each mega-corp chunk should have ranges on both dimensions
+    for (const chunk of megaCorpChunks) {
+      const dims = new Set(chunk.ranges.map((r) => r.dimensionIndex))
+      expect(dims.has(0)).toBe(true) // tenant_id
+      expect(dims.has(1)).toBe(true) // seq
+    }
+  })
+
+  test('mega-corp chunk boundaries on dim 1 are contiguous', () => {
+    const megaCorpChunks = plan.chunks
+      .filter((c) => c.analysis.focusedValue?.value === 'mega-corp')
+      .sort((a, b) => {
+        const aFrom = a.ranges.find((r) => r.dimensionIndex === 1)?.from ?? ''
+        const bFrom = b.ranges.find((r) => r.dimensionIndex === 1)?.from ?? ''
+        return String(aFrom).localeCompare(String(bFrom))
+      })
+
+    for (let i = 1; i < megaCorpChunks.length; i++) {
+      const prev = megaCorpChunks[i - 1]?.ranges.find((r) => r.dimensionIndex === 1)
+      const curr = megaCorpChunks[i]?.ranges.find((r) => r.dimensionIndex === 1)
+      if (prev?.to !== undefined && curr?.from !== undefined) {
+        expect(prev.to).toBe(curr.from)
+      }
+    }
+  })
+
+  test('estimated row sum is within 20% of actual count', () => {
+    const estimatedTotal = plan.chunks.reduce((sum, c) => sum + c.estimate.rows, 0)
+    const ratio = estimatedTotal / totalRows
+    expect(ratio).toBeGreaterThanOrEqual(0.8)
+    expect(ratio).toBeLessThanOrEqual(1.2)
+  })
+
+  test('no chunk exceeds 2x the target size', () => {
+    for (const chunk of plan.chunks) {
+      expect(chunk.estimate.bytesUncompressed).toBeLessThan(plan.targetChunkBytes * 2)
+    }
+  })
+
+  test('every chunk produces valid execution SQL', () => {
+    for (const chunk of plan.chunks) {
+      const sql = buildSql(plan, chunk)
+      expect(sql).toContain('INSERT INTO')
+      expect(sql).toContain('_partition_id')
+      // mega-corp chunks should reference both sort key columns
+      if (chunk.analysis.focusedValue?.value === 'mega-corp') {
+        expect(sql).toContain('tenant_id >=')
+        expect(sql).toContain('seq >=')
+      }
+    }
+  })
+
+  test('executing all chunk queries returns the full row count', async () => {
+    let totalCounted = 0
+    for (const chunk of plan.chunks) {
+      const where = buildWhereClauseFromChunk(chunk, plan.table)
+      const countSql = `SELECT count() AS cnt FROM ${db}.${table} WHERE ${where}`
+      const [row] = await executor.query<{ cnt: string }>(countSql)
+      totalCounted += Number(row?.cnt ?? 0)
+    }
+
+    expect(totalCounted).toBe(totalRows)
+  }, 60_000)
+})
+
+// ---------------------------------------------------------------------------
+// Scenario 2: Multiple Hot Keys
+//
+// Three tenants each hold ~30% of rows ("alpha-corp", "beta-corp",
+// "gamma-corp"), with ~10% spread across 100 small tenants.
+// Sort key: (tenant_id, seq).
+//
+// Expected behavior:
+//   - Each hot tenant is detected as a focused value
+//   - Each hot tenant is independently split on dim 1 (seq)
+//   - Small tenants are covered by non-focused chunks
+//   - All rows are accounted for with no gaps
+// ---------------------------------------------------------------------------
+
+describe('e2e: multiple hot keys', () => {
+  const table = `${TABLE_PREFIX}_multiple_hot_keys`
+  const hotTenants = ['alpha-corp', 'beta-corp', 'gamma-corp']
+  let plan: ChunkPlan
+  let totalRows: number
+
+  beforeAll(async () => {
+    totalRows = await requireSeededTable(table)
+    const uncompressedBytes = await getPartitionUncompressedBytes(table)
+
+    // Target ~10 chunks so each hot tenant (~30% = ~3x target) clearly needs splitting
+    const targetChunkBytes = Math.floor(uncompressedBytes / 10)
+    plan = await chunkPlan(table, targetChunkBytes)
+  }, 60_000)
+
+  test('produces multiple chunks', () => {
+    expect(plan.chunks.length).toBeGreaterThan(3)
+  })
+
+  test('detects all three hot tenants as focused values', () => {
+    const focusedValues = new Set(
+      plan.chunks
+        .map((c) => c.analysis.focusedValue?.value)
+        .filter(Boolean),
+    )
+    for (const tenant of hotTenants) {
+      expect(focusedValues.has(tenant)).toBe(true)
+    }
+  })
+
+  test('each hot tenant has chunks with ranges on both dimensions', () => {
+    for (const tenant of hotTenants) {
+      const tenantChunks = plan.chunks.filter(
+        (c) => c.analysis.focusedValue?.value === tenant,
+      )
+      expect(tenantChunks.length).toBeGreaterThanOrEqual(1)
+
+      for (const chunk of tenantChunks) {
+        const dims = new Set(chunk.ranges.map((r) => r.dimensionIndex))
+        expect(dims.has(0)).toBe(true) // tenant_id
+        expect(dims.has(1)).toBe(true) // seq
+      }
+    }
+  })
+
+  test('estimated row sum is within 20% of actual count', () => {
+    const estimatedTotal = plan.chunks.reduce((sum, c) => sum + c.estimate.rows, 0)
+    const ratio = estimatedTotal / totalRows
+    expect(ratio).toBeGreaterThanOrEqual(0.8)
+    expect(ratio).toBeLessThanOrEqual(1.2)
+  })
+
+  test('no chunk exceeds 2x the target size', () => {
+    for (const chunk of plan.chunks) {
+      expect(chunk.estimate.bytesUncompressed).toBeLessThan(plan.targetChunkBytes * 2)
+    }
+  })
+
+  test('executing all chunk queries returns the full row count', async () => {
+    let totalCounted = 0
+    for (const chunk of plan.chunks) {
+      const where = buildWhereClauseFromChunk(chunk, plan.table)
+      const countSql = `SELECT count() AS cnt FROM ${db}.${table} WHERE ${where}`
+      const [row] = await executor.query<{ cnt: string }>(countSql)
+      totalCounted += Number(row?.cnt ?? 0)
+    }
+
+    expect(totalCounted).toBe(totalRows)
+  }, 60_000)
+})
diff --git a/packages/plugin-backfill/src/chunking/introspect.test.ts b/packages/plugin-backfill/src/chunking/introspect.test.ts
deleted file mode 100644
index 431872c..0000000
--- a/packages/plugin-backfill/src/chunking/introspect.test.ts
+++ /dev/null
@@ -1,234 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { introspectTable, queryPartitionInfo, querySortKeyInfo, querySortKeyRanges } from './introspect.js'
-
-describe('queryPartitionInfo', () => {
-  test('maps system.parts rows to PartitionInfo array', async () => {
-    const mockRows = [
-      { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-      { partition_id: '202502', total_rows: '2000', total_bytes: '8000000', min_time: '2025-02-01 00:00:00', max_time: '2025-02-28 23:59:59' },
-    ]
-
-    const result = await queryPartitionInfo({
-      database: 'default',
-      table: 'events',
-      query: async () => mockRows as never,
-    })
-
-    expect(result).toHaveLength(2)
-    expect(result[0]?.partitionId).toBe('202501')
-    expect(result[0]?.rows).toBe(1000)
-    expect(result[0]?.bytesOnDisk).toBe(5000000)
-    expect(result[1]?.partitionId).toBe('202502')
-    expect(result[1]?.rows).toBe(2000)
-  })
-
-  test('filters out partitions before --from', async () => {
-    const mockRows = [
-      { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-      { partition_id: '202503', total_rows: '3000', total_bytes: '9000000', min_time: '2025-03-01 00:00:00', max_time: '2025-03-31 23:59:59' },
-    ]
-
-    const result = await queryPartitionInfo({
-      database: 'default',
-      table: 'events',
-      from: '2025-02-01T00:00:00.000Z',
-      query: async () => mockRows as never,
-    })
-
-    expect(result).toHaveLength(1)
-    expect(result[0]?.partitionId).toBe('202503')
-  })
-
-  test('filters out partitions at or after --to', async () => {
-    const mockRows = [
-      { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-      { partition_id: '202503', total_rows: '3000', total_bytes: '9000000', min_time: '2025-03-01 00:00:00', max_time: '2025-03-31 23:59:59' },
-    ]
-
-    const result = await queryPartitionInfo({
-      database: 'default',
-      table: 'events',
-      to: '2025-03-01T00:00:00.000Z',
-      query: async () => mockRows as never,
-    })
-
-    expect(result).toHaveLength(1)
-    expect(result[0]?.partitionId).toBe('202501')
-  })
-})
-
-describe('querySortKeyInfo', () => {
-  test('returns sort key info for table with DateTime sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: 'event_time' }] as T[]
-      }
-      if (sql.includes('system.columns')) {
-        return [{ type: 'DateTime' }] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({
-      database: 'default',
-      table: 'events',
-      query,
-    })
-
-    expect(result).toBeDefined()
-    expect(result?.column).toBe('event_time')
-    expect(result?.type).toBe('DateTime')
-    expect(result?.category).toBe('datetime')
-  })
-
-  test('returns numeric category for Int64 sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'id' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'Int64' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.category).toBe('numeric')
-  })
-
-  test('returns string category for String sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'name' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'String' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.category).toBe('string')
-  })
-
-  test('extracts column name from function expression', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'toDate(event_time)' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'DateTime' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.column).toBe('event_time')
-  })
-
-  test('returns undefined when table has no sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: '' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result).toBeUndefined()
-  })
-
-  test('returns first column from multi-column sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'event_time, id' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'DateTime' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.column).toBe('event_time')
-  })
-})
-
-describe('querySortKeyRanges', () => {
-  test('returns min/max per partition', async () => {
-    const query = async <T>() => {
-      return [
-        { partition_id: '202501', min_val: '2025-01-01 00:00:00', max_val: '2025-01-31 23:59:59' },
-        { partition_id: '202502', min_val: '2025-02-01 00:00:00', max_val: '2025-02-28 23:59:59' },
-      ] as T[]
-    }
-
-    const result = await querySortKeyRanges({
-      database: 'default',
-      table: 'events',
-      sortKeyColumn: 'event_time',
-      partitionIds: ['202501', '202502'],
-      query,
-    })
-
-    expect(result.size).toBe(2)
-    expect(result.get('202501')?.min).toBe('2025-01-01 00:00:00')
-    expect(result.get('202502')?.max).toBe('2025-02-28 23:59:59')
-  })
-
-  test('returns empty map for empty partition list', async () => {
-    const query = async <T>() => [] as T[]
-
-    const result = await querySortKeyRanges({
-      database: 'default',
-      table: 'events',
-      sortKeyColumn: 'event_time',
-      partitionIds: [],
-      query,
-    })
-
-    expect(result.size).toBe(0)
-  })
-})
-
-describe('introspectTable', () => {
-  test('returns partitions and sort key in a single call', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.parts')) {
-        return [
-          { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-        ] as T[]
-      }
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: 'event_time' }] as T[]
-      }
-      if (sql.includes('system.columns')) {
-        return [{ type: 'DateTime' }] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await introspectTable({
-      database: 'default',
-      table: 'events',
-      query,
-    })
-
-    expect(result.partitions).toHaveLength(1)
-    expect(result.partitions[0]?.partitionId).toBe('202501')
-    expect(result.sortKey).toBeDefined()
-    expect(result.sortKey?.column).toBe('event_time')
-    expect(result.sortKey?.category).toBe('datetime')
-  })
-
-  test('returns undefined sortKey when table has no sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.parts')) {
-        return [
-          { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-        ] as T[]
-      }
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: '' }] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await introspectTable({
-      database: 'default',
-      table: 'events',
-      query,
-    })
-
-    expect(result.partitions).toHaveLength(1)
-    expect(result.sortKey).toBeUndefined()
-  })
-})
diff --git a/packages/plugin-backfill/src/chunking/introspect.ts b/packages/plugin-backfill/src/chunking/introspect.ts
deleted file mode 100644
index e383f6f..0000000
--- a/packages/plugin-backfill/src/chunking/introspect.ts
+++ /dev/null
@@ -1,146 +0,0 @@
-import type { PartitionInfo, SortKeyInfo } from './types.js'
-
-const NUMERIC_TYPES = new Set([
-  'Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256',
-  'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256',
-  'Float32', 'Float64',
-])
-
-const DATETIME_TYPES = new Set(['Date', 'Date32', 'DateTime', 'DateTime64'])
-
-function classifySortKeyType(type: string): SortKeyInfo['category'] {
-  if (NUMERIC_TYPES.has(type)) return 'numeric'
-  if (DATETIME_TYPES.has(type)) return 'datetime'
-  if (type.startsWith('DateTime64(')) return 'datetime'
-  if (type.startsWith("DateTime('")) return 'datetime'
-  return 'string'
-}
-
-export async function queryPartitionInfo(input: {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<PartitionInfo[]> {
-  // Force replica sync on the target table before reading system.parts.
-  // select_sequential_consistency is only effective on user tables, not system
-  // tables, so this preliminary query ensures the replica has caught up with
-  // all pending writes before we inspect part metadata.
-  await input.query(
-    `SELECT 1 FROM ${input.database}.${input.table} LIMIT 1 SETTINGS select_sequential_consistency = 1`
-  )
-
-  const rows = await input.query<{
-    partition_id: string
-    total_rows: string
-    total_bytes: string
-    min_time: string
-    max_time: string
-  }>(
-    `SELECT
-  partition_id,
-  toString(sum(rows)) AS total_rows,
-  toString(sum(bytes_on_disk)) AS total_bytes,
-  toString(min(min_time)) AS min_time,
-  toString(max(max_time)) AS max_time
-FROM system.parts
-WHERE database = '${input.database}'
-  AND table = '${input.table}'
-  AND active = 1
-GROUP BY partition_id
-ORDER BY partition_id
-SETTINGS select_sequential_consistency = 1`
-  )
-
-  const partitions: PartitionInfo[] = rows.map((row) => ({
-    partitionId: row.partition_id,
-    rows: Number(row.total_rows),
-    bytesOnDisk: Number(row.total_bytes),
-    minTime: new Date(row.min_time).toISOString(),
-    maxTime: new Date(row.max_time).toISOString(),
-  }))
-
-  return partitions.filter((p) => {
-    if (input.from && p.maxTime < input.from) return false
-    if (input.to && p.minTime >= input.to) return false
-    return true
-  })
-}
-
-export async function querySortKeyInfo(input: {
-  database: string
-  table: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<SortKeyInfo | undefined> {
-  const tableRows = await input.query<{ sorting_key: string }>(
-    `SELECT sorting_key FROM system.tables WHERE database = '${input.database}' AND name = '${input.table}'`
-  )
-
-  const sortingKey = tableRows[0]?.sorting_key
-  if (!sortingKey) return undefined
-
-  // Parse first column from sorting key (may have expressions like "toDate(event_time)")
-  const firstColumn = sortingKey.split(',')[0]?.trim()
-  if (!firstColumn) return undefined
-
-  // If it's a function call like toDate(col), extract the column name
-  const match = firstColumn.match(/^\w+\((\w+)\)$/)
-  const columnName = match ? match[1] : firstColumn
-  if (!columnName) return undefined
-
-  const columnRows = await input.query<{ type: string }>(
-    `SELECT type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}' AND name = '${columnName}'`
-  )
-
-  const type = columnRows[0]?.type
-  if (!type) return undefined
-
-  return {
-    column: columnName,
-    type,
-    category: classifySortKeyType(type),
-  }
-}
-
-export async function querySortKeyRanges(input: {
-  database: string
-  table: string
-  sortKeyColumn: string
-  partitionIds: string[]
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<Map<string, { min: string; max: string }>> {
-  if (input.partitionIds.length === 0) return new Map()
-
-  const inList = input.partitionIds.map((id) => `'${id}'`).join(', ')
-  const rows = await input.query<{
-    partition_id: string
-    min_val: string
-    max_val: string
-  }>(
-    `SELECT _partition_id AS partition_id, toString(min(${input.sortKeyColumn})) AS min_val, toString(max(${input.sortKeyColumn})) AS max_val FROM ${input.database}.${input.table} WHERE _partition_id IN (${inList}) GROUP BY _partition_id SETTINGS select_sequential_consistency = 1`
-  )
-
-  const result = new Map<string, { min: string; max: string }>()
-  for (const row of rows) {
-    result.set(row.partition_id, { min: row.min_val, max: row.max_val })
-  }
-  return result
-}
-
-export async function introspectTable(input: {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<{ partitions: PartitionInfo[]; sortKey?: SortKeyInfo }> {
-  const partitions = await queryPartitionInfo(input)
-  const sortKey = await querySortKeyInfo({
-    database: input.database,
-    table: input.table,
-    query: input.query,
-  })
-
-  return { partitions, sortKey }
-}
diff --git a/packages/plugin-backfill/src/chunking/partition-slices.ts b/packages/plugin-backfill/src/chunking/partition-slices.ts
new file mode 100644
index 0000000..6baa2f0
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/partition-slices.ts
@@ -0,0 +1,154 @@
+import type {
+  ChunkEstimate,
+  EstimateConfidence,
+  EstimateReason,
+  Partition,
+  PartitionSlice,
+  ChunkDerivationStep,
+  ChunkRange,
+} from './types.js'
+
+export function buildRootSlice(partition: Partition): PartitionSlice {
+  return {
+    partitionId: partition.partitionId,
+    ranges: [],
+    estimate: {
+      rows: partition.rows,
+      bytesCompressed: partition.bytesCompressed,
+      bytesUncompressed: partition.bytesUncompressed,
+      confidence: 'high',
+      reason: 'partition-metadata',
+    },
+    analysis: {
+      lineage: [],
+    },
+  }
+}
+
+export function buildSliceEstimate(
+  partition: Partition,
+  rows: number,
+  confidence: EstimateConfidence,
+  reason: EstimateReason,
+): ChunkEstimate {
+  const bytesCompressed = partition.rows > 0
+    ? Math.round((rows / partition.rows) * partition.bytesCompressed)
+    : 0
+  const bytesUncompressed = partition.rows > 0
+    ? Math.round((rows / partition.rows) * partition.bytesUncompressed)
+    : 0
+
+  return {
+    rows,
+    bytesCompressed,
+    bytesUncompressed,
+    confidence,
+    reason,
+  }
+}
+
+export function buildSliceFromRows(
+  partition: Partition,
+  input: {
+    ranges: ChunkRange[]
+    rows: number
+    focusedValue?: PartitionSlice['analysis']['focusedValue']
+    confidence: EstimateConfidence
+    reason: EstimateReason
+    lineage: ChunkDerivationStep[]
+  },
+): PartitionSlice {
+  return {
+    partitionId: partition.partitionId,
+    ranges: input.ranges,
+    estimate: buildSliceEstimate(partition, input.rows, input.confidence, input.reason),
+    analysis: {
+      focusedValue: input.focusedValue,
+      lineage: input.lineage,
+    },
+  }
+}
+
+export function getTargetChunkRows(
+  partition: Partition,
+  targetChunkBytes: number,
+): number {
+  if (partition.bytesUncompressed <= 0) return partition.rows
+  return (targetChunkBytes * partition.rows) / partition.bytesUncompressed
+}
+
+export function mergeAdjacentSlices(
+  slices: PartitionSlice[],
+  targetChunkBytes: number,
+): PartitionSlice[] {
+  if (slices.length <= 1) return slices
+
+  const merged: PartitionSlice[] = []
+  let current: PartitionSlice | undefined
+
+  for (const slice of slices) {
+    if (!current) {
+      current = slice
+      continue
+    }
+
+    const canMerge =
+      !current.analysis.focusedValue &&
+      !slice.analysis.focusedValue &&
+      haveSameTrailingRanges(current.ranges, slice.ranges) &&
+      current.estimate.bytesUncompressed + slice.estimate.bytesUncompressed <= targetChunkBytes * 1.1
+
+    if (!canMerge) {
+      merged.push(current)
+      current = slice
+      continue
+    }
+
+    current = {
+      ...current,
+      ranges: mergeRanges(current.ranges, slice.ranges),
+      estimate: {
+        ...current.estimate,
+        rows: current.estimate.rows + slice.estimate.rows,
+        bytesCompressed: current.estimate.bytesCompressed + slice.estimate.bytesCompressed,
+        bytesUncompressed: current.estimate.bytesUncompressed + slice.estimate.bytesUncompressed,
+
+      },
+    }
+  }
+
+  if (current) merged.push(current)
+  return merged
+}
+
+function mergeRanges(left: ChunkRange[], right: ChunkRange[]): ChunkRange[] {
+  return left.map((leftRange) => {
+    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
+    return rightRange === undefined
+      ? leftRange
+      : {
+        dimensionIndex: leftRange.dimensionIndex,
+        from: leftRange.from,
+        to: rightRange.to,
+      }
+  })
+}
+
+function haveSameTrailingRanges(left: ChunkRange[], right: ChunkRange[]): boolean {
+  if (left.length !== right.length) return false
+
+  let differingDimensions = 0
+
+  for (const leftRange of left) {
+    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
+    if (!rightRange) return false
+
+    const same = leftRange.from === rightRange.from && leftRange.to === rightRange.to
+    if (!same) {
+      differingDimensions += 1
+      if (leftRange.to !== rightRange.from) return false
+    }
+  }
+
+  return differingDimensions <= 1
+}
diff --git a/packages/plugin-backfill/src/chunking/planner.ts b/packages/plugin-backfill/src/chunking/planner.ts
new file mode 100644
index 0000000..23e8da8
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/planner.ts
@@ -0,0 +1,546 @@
+import pMap from 'p-map'
+import {
+  describeSqlContext,
+  describeSqlOperation,
+  formatBytes,
+  getBackfillLogger,
+  SLOW_CLICKHOUSE_QUERY_MS,
+  SLOW_CLICKHOUSE_QUERY_REPEAT_INITIAL_MS,
+  SLOW_CLICKHOUSE_QUERY_REPEAT_MAX_MS,
+  summarizeSql,
+} from '../logging.js'
+import { buildRootSlice, mergeAdjacentSlices } from './partition-slices.js'
+import { introspectPartitions, introspectSortKeys } from './services/metadata-source.js'
+import { getRowProbeStrategy, getSortKeyRange, parsePlannerDateTime } from './services/row-probe.js'
+import { splitSliceWithEqualWidthRanges } from './strategies/equal-width-split.js'
+import { buildSingleChunkPartition } from './strategies/metadata-single-chunk.js'
+import {
+  findQuantileBoundaryOnDimension,
+  splitSliceWithQuantiles,
+} from './strategies/quantile-range-split.js'
+import { refinePartitionSlices } from './strategies/refinement.js'
+import { splitSliceWithGroupByKey } from './strategies/group-by-key-split.js'
+import { buildRootStringUpperBound, splitSliceWithStringPrefixes } from './strategies/string-prefix-split.js'
+import { splitSliceWithTemporalBuckets } from './strategies/temporal-bucket-split.js'
+import { getCandidateDimensions } from './strategy-policy.js'
+import type {
+  Chunk,
+  ChunkPlan,
+  GenerateChunkPlanInput,
+  Partition,
+  PartitionBuildResult,
+  PartitionSlice,
+  PlannerContext,
+  PlannerQuery,
+  SortKey,
+  TableProfile,
+} from './types.js'
+import { generateChunkId, generatePlanId } from './utils/ids.js'
+import { getChunkRange, isExactChunkRange, replaceChunkRange } from './utils/ranges.js'
+
+const MAX_SPLIT_DEPTH_MULTIPLIER = 3
+const STOP_SPLIT_FUZZ_FACTOR = 1.5
+const logger = getBackfillLogger('chunking', 'planner')
+const queryLogger = getBackfillLogger('chunking', 'clickhouse')
+
+export async function generateChunkPlan(input: GenerateChunkPlanInput): Promise<ChunkPlan> {
+  const planStartedAt = performance.now()
+  const context: PlannerContext = {
+    database: input.database,
+    table: input.table,
+    from: input.from,
+    to: input.to,
+    targetChunkBytes: input.targetChunkBytes,
+    query: createTimedPlannerQuery(input),
+    querySettings: input.querySettings,
+    rowProbeStrategy: input.rowProbeStrategy ?? 'count',
+  }
+
+  logger.info(
+    `starting chunk plan for ${input.database}.${input.table} (target chunk size ${formatBytes(input.targetChunkBytes)}, row probe ${context.rowProbeStrategy})`
+  )
+
+  const introspectionStartedAt = performance.now()
+  const partitions = await introspectPartitions(context)
+  const sortKeys = await introspectSortKeys(context)
+  const table: TableProfile = {
+    database: input.database,
+    table: input.table,
+    sortKeys,
+  }
+  const planId = generatePlanId()
+
+  logger.info(
+    `introspection completed for ${input.database}.${input.table}: ${partitions.length} partitions, ${partitions.filter((partition) => partition.bytesUncompressed > context.targetChunkBytes).length} oversized partitions, ${sortKeys.length} sort keys (${Math.round(performance.now() - introspectionStartedAt)}ms)`
+  )
+
+  const slices: PartitionSlice[] = []
+  const plannedPartitions: Partition[] = []
+  for (const partition of partitions) {
+    const result = await planPartition(context, partition, table)
+    slices.push(...result.slices)
+    plannedPartitions.push({
+      ...partition,
+      diagnostics: result.diagnostics,
+    })
+  }
+
+  const chunks = assignChunkIds(planId, slices)
+  const chunkBytes = chunks.map((chunk) => chunk.estimate.bytesUncompressed)
+  const stats = {
+    totalPartitions: partitions.length,
+    oversizedPartitions: partitions.filter((partition) => partition.bytesUncompressed > context.targetChunkBytes).length,
+    focusedChunks: chunks.filter((chunk) => chunk.analysis.focusedValue !== undefined).length,
+    totalChunks: chunks.length,
+    avgChunkBytes: chunkBytes.length > 0
+      ? Math.round(chunkBytes.reduce((sum, value) => sum + value, 0) / chunkBytes.length)
+      : 0,
+    maxChunkBytes: chunkBytes.length > 0 ? Math.max(...chunkBytes) : 0,
+    minChunkBytes: chunkBytes.length > 0 ? Math.min(...chunkBytes) : 0,
+  }
+
+  logger.info(
+    `finished chunk plan for ${input.database}.${input.table}: ${chunks.length} chunks across ${partitions.length} partitions, ${formatBytes(partitions.reduce((sum, partition) => sum + partition.bytesUncompressed, 0))} uncompressed (${Math.round(performance.now() - planStartedAt)}ms)`
+  )
+
+  return {
+    planId,
+    generatedAt: new Date().toISOString(),
+    rowProbeStrategy: getRowProbeStrategy(context),
+    targetChunkBytes: context.targetChunkBytes,
+    table,
+    partitions: plannedPartitions,
+    chunks,
+    totalRows: partitions.reduce((sum, partition) => sum + partition.rows, 0),
+    totalBytesCompressed: partitions.reduce((sum, partition) => sum + partition.bytesCompressed, 0),
+    totalBytesUncompressed: partitions.reduce((sum, partition) => sum + partition.bytesUncompressed, 0),
+    stats,
+  }
+}
+
+async function planPartition(
+  context: PlannerContext,
+  partition: Partition,
+  table: TableProfile,
+): Promise<PartitionBuildResult> {
+  const startedAt = performance.now()
+  logger.info(
+    `planning partition ${partition.partitionId} (${partition.rows.toLocaleString()} rows, ${formatBytes(partition.bytesUncompressed)} uncompressed, target ${formatBytes(context.targetChunkBytes)})`
+  )
+
+  if (partition.bytesUncompressed <= context.targetChunkBytes || table.sortKeys.length === 0) {
+    const refined = await refinePartitionSlices(
+      context,
+      partition,
+      buildSingleChunkPartition(partition),
+      table.sortKeys,
+      false
+    )
+
+    logger.info(
+      `kept partition ${partition.partitionId} as a single chunk (${Math.round(performance.now() - startedAt)}ms, ${partition.bytesUncompressed <= context.targetChunkBytes ? 'within target size' : 'no sort keys available'})`
+    )
+
+    return refined
+  }
+
+  const rootSlice = buildRootSlice(partition)
+  const splitSlices = await splitSliceRecursively(context, partition, rootSlice, table.sortKeys, 0)
+  const mergedSlices = mergeAdjacentSlices(splitSlices, context.targetChunkBytes)
+  const usedDistributionFallback = mergedSlices.some((slice) =>
+    slice.estimate.reason === 'string-prefix-distribution' ||
+    slice.estimate.reason === 'group-by-key-distribution' ||
+    slice.estimate.reason === 'temporal-distribution' ||
+    slice.estimate.reason === 'equal-width-distribution'
+  )
+
+  logger.debug(
+    `partition ${partition.partitionId} produced ${splitSlices.length} candidate slices before refinement (${mergedSlices.length} after merge, distribution fallback ${usedDistributionFallback ? 'used' : 'not used'})`
+  )
+
+  const refined = await refinePartitionSlices(
+    context,
+    partition,
+    mergedSlices,
+    table.sortKeys,
+    usedDistributionFallback
+  )
+
+  logger.info(
+    `finished partition ${partition.partitionId}: ${refined.slices.length} chunks (${Math.round(performance.now() - startedAt)}ms)`
+  )
+
+  return refined
+}
+
+async function splitSliceRecursively(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  depth: number,
+): Promise<PartitionSlice[]> {
+  if (slice.estimate.bytesUncompressed <= context.targetChunkBytes * STOP_SPLIT_FUZZ_FACTOR) {
+    logger.debug(
+      `stopped splitting slice for partition ${partition.partitionId} at depth ${depth}: ${formatBytes(slice.estimate.bytesUncompressed)} is within threshold ${formatBytes(Math.round(context.targetChunkBytes * STOP_SPLIT_FUZZ_FACTOR))}`
+    )
+    return [slice]
+  }
+
+  if (depth >= sortKeys.length * MAX_SPLIT_DEPTH_MULTIPLIER) {
+    logger.debug(
+      `stopped splitting slice for partition ${partition.partitionId}: reached max depth ${sortKeys.length * MAX_SPLIT_DEPTH_MULTIPLIER}`
+    )
+    return [slice]
+  }
+
+  const children = await splitOversizedSlice(context, partition, slice, sortKeys, depth)
+  if (children.length <= 1) {
+    logger.debug(`slice could not be split further for partition ${partition.partitionId} at depth ${depth}`)
+    return [slice]
+  }
+
+  const finalized: PartitionSlice[] = []
+  for (const child of children) {
+    finalized.push(...(await splitSliceRecursively(context, partition, child, sortKeys, depth + 1)))
+  }
+
+  return finalized
+}
+
+async function splitOversizedSlice(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  depth: number,
+): Promise<PartitionSlice[]> {
+  const candidateDimensions = getCandidateDimensions(sortKeys, slice)
+
+  logger.debug(
+    `attempting oversized slice split for partition ${partition.partitionId} at depth ${depth} (${formatBytes(slice.estimate.bytesUncompressed)} uncompressed across ${candidateDimensions.length} candidate dimensions)`
+  )
+
+  for (const dimensionIndex of candidateDimensions) {
+    const preparedSlice = await hydrateSliceRange(context, slice, sortKeys, dimensionIndex)
+    if (!preparedSlice) continue
+
+    const sortKey = sortKeys[dimensionIndex]
+    if (!sortKey) continue
+
+    const rootLike = depth === 0
+    const focusedValue = findFocusedValue(preparedSlice, sortKeys)
+
+    logger.debug(
+      `trying split dimension ${dimensionIndex} on ${partition.partitionId} using ${sortKey.name} (${sortKey.category})`
+    )
+
+    if (sortKey.category === 'string') {
+      if (rootLike) {
+        // First pass: equal-width EXPLAIN ESTIMATE (fast, metadata-only)
+        const estimateSlices = await splitWithEqualWidthEstimate(context, partition, preparedSlice, sortKeys, dimensionIndex)
+        if (isEffectiveSplit(preparedSlice, estimateSlices)) {
+          logger.debug(`equal-width estimate split succeeded for partition ${partition.partitionId}: ${estimateSlices.length} slices`)
+          return applyFocusedValue(estimateSlices, focusedValue)
+        }
+      } else {
+        // Refinement pass: full GROUP BY key to detect hot keys directly
+        const keySlices = await splitSliceWithGroupByKey(context, partition, preparedSlice, sortKeys, dimensionIndex)
+        if (keySlices && isEffectiveSplit(preparedSlice, keySlices)) {
+          logger.debug(`group-by-key split succeeded for partition ${partition.partitionId}: ${keySlices.length} slices`)
+          return applyFocusedValue(keySlices, focusedValue)
+        }
+
+        // Single hot key: narrow the range and re-enter dispatch so focusedValue is detected
+        if (keySlices?.length === 1 && keySlices[0]?.analysis.focusedValue) {
+          const refined = keySlices[0]
+          const currentRange = getChunkRange(preparedSlice, dimensionIndex)
+          const refinedRange = getChunkRange(refined, dimensionIndex)
+          if (currentRange.from !== refinedRange.from || currentRange.to !== refinedRange.to) {
+            logger.debug(`narrowed single hot key for partition ${partition.partitionId}, re-entering dispatch`)
+            return splitOversizedSlice(context, partition, refined, sortKeys, depth)
+          }
+        }
+
+        // Fallback: GROUP BY prefix when too many distinct keys
+        const stringSlices = await splitSliceWithStringPrefixes(context, partition, preparedSlice, sortKeys, dimensionIndex)
+        if (isEffectiveSplit(preparedSlice, stringSlices)) {
+          logger.debug(`string-prefix split succeeded for partition ${partition.partitionId}: ${stringSlices.length} slices`)
+          return applyFocusedValue(stringSlices, focusedValue)
+        }
+      }
+    }
+
+    if (sortKey.category === 'datetime' && (!rootLike || focusedValue !== undefined)) {
+      const temporalSlices = await splitSliceWithTemporalBuckets(
+        context,
+        partition,
+        markFocusedSlice(preparedSlice, focusedValue),
+        sortKeys,
+        dimensionIndex
+      )
+      if (isEffectiveSplit(preparedSlice, temporalSlices)) {
+        logger.debug(`temporal bucket split succeeded for partition ${partition.partitionId}: ${temporalSlices.length} slices`)
+        return applyFocusedValue(temporalSlices, focusedValue)
+      }
+    }
+
+    const rangedSlices = await splitWithRanges(context, partition, preparedSlice, sortKeys, dimensionIndex)
+    if (isEffectiveSplit(preparedSlice, rangedSlices)) {
+      logger.debug(`range-based split succeeded for partition ${partition.partitionId}: ${rangedSlices.length} slices`)
+      return applyFocusedValue(rangedSlices, focusedValue)
+    }
+  }
+
+  logger.debug(`no effective split found for partition ${partition.partitionId} at depth ${depth}`)
+
+  return [slice]
+}
+
+async function splitWithRanges(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return [slice]
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return [slice]
+  if (sortKey.category === 'string' && isExactChunkRange(range)) return [slice]
+
+  const subCount = Math.ceil(slice.estimate.bytesUncompressed / context.targetChunkBytes)
+  if (subCount <= 1) return [slice]
+
+  const quantileBoundaries = await buildQuantileBoundaries(context, slice, sortKeys, dimensionIndex, subCount)
+  if (quantileBoundaries) {
+    logger.debug(
+      `using quantile-aligned range split for partition ${partition.partitionId} on dimension ${dimensionIndex} with ${quantileBoundaries.length} boundaries`
+    )
+    return splitSliceWithQuantiles(context, partition, slice, sortKeys, dimensionIndex, quantileBoundaries)
+  }
+
+  logger.debug(
+    `falling back to equal-width range split for partition ${partition.partitionId} on dimension ${dimensionIndex} with ${subCount} subranges`
+  )
+
+  return splitSliceWithEqualWidthRanges(
+    context,
+    partition,
+    slice,
+    sortKeys,
+    dimensionIndex,
+    range.from,
+    range.to,
+    subCount
+  )
+}
+
+async function splitWithEqualWidthEstimate(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const estimateContext: PlannerContext = {
+    ...context,
+    rowProbeStrategy: 'explain-estimate',
+  }
+  return splitWithRanges(estimateContext, partition, slice, sortKeys, dimensionIndex)
+}
+
+async function buildQuantileBoundaries(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  subCount: number,
+): Promise<string[] | undefined> {
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return undefined
+
+  const steps = Array.from({ length: subCount - 1 }, (_, i) => i + 1)
+  const foundBoundaries = await pMap(
+    steps,
+    (step) => {
+      const targetCumRows = Math.round((slice.estimate.rows * step) / subCount)
+      return findQuantileBoundaryOnDimension(context, slice, sortKeys, dimensionIndex, targetCumRows)
+    },
+    { concurrency: 10 },
+  )
+  const boundaries = [range.from, ...foundBoundaries]
+
+  const uniqueBoundaryCount = new Set(boundaries).size
+  if (uniqueBoundaryCount <= Math.max(2, Math.ceil(subCount / 3))) {
+    logger.debug(
+      `discarded quantile boundaries for partition ${slice.partitionId} on dimension ${dimensionIndex} because only ${uniqueBoundaryCount} unique boundaries remained`
+    )
+    return undefined
+  }
+
+  return boundaries.concat([range.to])
+}
+
+async function hydrateSliceRange(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice | undefined> {
+  const existingRange = getChunkRange(slice, dimensionIndex)
+  if (existingRange.from !== undefined && existingRange.to !== undefined) {
+    return slice
+  }
+
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return undefined
+
+  const observedRange = await getSortKeyRange(context, slice.partitionId, slice.ranges, sortKeys, sortKey)
+  if (!observedRange) return undefined
+
+  logger.debug(
+    `hydrated missing sort-key range for partition ${slice.partitionId} on ${sortKey.name}: [${observedRange.min}, ${observedRange.max}]`
+  )
+
+  return {
+    ...slice,
+    ranges: replaceChunkRange(
+      slice,
+      dimensionIndex,
+      observedRange.min,
+      toExclusiveUpperBound(observedRange.max, sortKey)
+    ),
+  }
+}
+
+function toExclusiveUpperBound(value: string, sortKey: SortKey): string {
+  if (sortKey.category === 'string') {
+    return buildRootStringUpperBound(value)
+  }
+  if (sortKey.category === 'datetime') {
+    return new Date(parsePlannerDateTime(value) + 1000).toISOString()
+  }
+  return String(Number(value) + 1)
+}
+
+function isEffectiveSplit(parentSlice: PartitionSlice, childSlices: PartitionSlice[]): boolean {
+  if (childSlices.length <= 1) return false
+
+  return childSlices.some((childSlice) =>
+    childSlice.estimate.rows !== parentSlice.estimate.rows ||
+    JSON.stringify(childSlice.ranges) !== JSON.stringify(parentSlice.ranges)
+  )
+}
+
+function findFocusedValue(
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+): { dimensionIndex: number; value: string } | undefined {
+  for (const range of slice.ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (sortKey?.category !== 'string') continue
+    if (isExactChunkRange(range) && range.from !== undefined) {
+      return { dimensionIndex: range.dimensionIndex, value: range.from }
+    }
+  }
+  return undefined
+}
+
+function applyFocusedValue(
+  slices: PartitionSlice[],
+  focusedValue: { dimensionIndex: number; value: string } | undefined,
+): PartitionSlice[] {
+  if (!focusedValue) return slices
+  return slices.map((slice) => markFocusedSlice(slice, focusedValue))
+}
+
+function markFocusedSlice(
+  slice: PartitionSlice,
+  focusedValue: { dimensionIndex: number; value: string } | undefined,
+): PartitionSlice {
+  if (!focusedValue) return slice
+  return {
+    ...slice,
+    analysis: {
+      ...slice.analysis,
+      focusedValue,
+    },
+  }
+}
+
+function assignChunkIds(planId: string, slices: PartitionSlice[]): Chunk[] {
+  const chunkIndexes = new Map<string, number>()
+
+  return slices.map((slice) => {
+    const currentIndex = chunkIndexes.get(slice.partitionId) ?? 0
+    chunkIndexes.set(slice.partitionId, currentIndex + 1)
+    return {
+      ...slice,
+      id: generateChunkId(planId, slice.partitionId, currentIndex),
+    }
+  })
+}
+
+function createTimedPlannerQuery(
+  input: Pick<GenerateChunkPlanInput, 'database' | 'table' | 'query'>,
+): PlannerQuery {
+  return async function timedPlannerQuery<T>(
+    sql: string,
+    settings?: Record<string, string | number | boolean | undefined>,
+  ): Promise<T[]> {
+    const startedAt = performance.now()
+    const sqlSummary = summarizeSql(sql)
+    const operation = describeSqlOperation(sql)
+    const context = describeSqlContext(sql)
+    const queryLabel = context ? `${operation} (${context})` : operation
+    let repeatTimer: ReturnType<typeof setTimeout> | undefined
+    let repeatDelayMs = SLOW_CLICKHOUSE_QUERY_REPEAT_INITIAL_MS
+    const scheduleRepeatWarning = () => {
+      repeatTimer = setTimeout(() => {
+        const elapsedRepeatMs = Math.round(performance.now() - startedAt)
+        queryLogger.warning(
+          `clickhouse query still running for ${input.database}.${input.table} after ${elapsedRepeatMs}ms: ${queryLabel}`
+        )
+        repeatDelayMs = Math.min(repeatDelayMs * 2, SLOW_CLICKHOUSE_QUERY_REPEAT_MAX_MS)
+        scheduleRepeatWarning()
+      }, repeatDelayMs)
+    }
+    const slowTimer = setTimeout(() => {
+      const elapsedMs = Math.round(performance.now() - startedAt)
+      queryLogger.warning(
+        `clickhouse query still running for ${input.database}.${input.table} after ${elapsedMs}ms: ${queryLabel} | ${sqlSummary}`
+      )
+      scheduleRepeatWarning()
+    }, SLOW_CLICKHOUSE_QUERY_MS)
+
+    queryLogger.debug(`clickhouse query started for ${input.database}.${input.table}: ${sqlSummary}`)
+
+    try {
+      const rows = await input.query<T>(sql, settings)
+      const durationMs = Math.round(performance.now() - startedAt)
+
+      if (durationMs >= SLOW_CLICKHOUSE_QUERY_MS) {
+        queryLogger.debug(
+          `slow clickhouse query completed for ${input.database}.${input.table} in ${durationMs}ms (${rows.length} rows): ${queryLabel}`
+        )
+      } else {
+        queryLogger.debug(
+          `clickhouse query completed for ${input.database}.${input.table} in ${durationMs}ms (${rows.length} rows): ${sqlSummary}`
+        )
+      }
+
+      return rows
+    } catch (error) {
+      queryLogger.error(
+        `clickhouse query failed for ${input.database}.${input.table} after ${Math.round(performance.now() - startedAt)}ms: ${sqlSummary}`
+      )
+      throw error
+    } finally {
+      clearTimeout(slowTimer)
+      if (repeatTimer) clearTimeout(repeatTimer)
+    }
+  }
+}
diff --git a/packages/plugin-backfill/src/chunking/services/distribution-source.ts b/packages/plugin-backfill/src/chunking/services/distribution-source.ts
new file mode 100644
index 0000000..48e884d
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/services/distribution-source.ts
@@ -0,0 +1,109 @@
+import { buildWhereClauseFromRanges } from '../sql.js'
+import type {
+  ChunkRange,
+  PlannerContext,
+  SortKey,
+  StringPrefixBucket,
+  TemporalBucket,
+} from '../types.js'
+
+type QueryContext = Pick<PlannerContext, 'database' | 'table' | 'query' | 'querySettings'>
+
+export async function probeStringPrefixDistribution(
+  context: QueryContext,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKey: SortKey,
+  dimensionIndex: number,
+  depth: number,
+  sortKeys: SortKey[],
+): Promise<StringPrefixBucket[]> {
+  const range = ranges.find((candidate) => candidate.dimensionIndex === dimensionIndex)
+  if (!range?.from || !range.to) return []
+
+  const rows = await context.query<{ prefix: string; cnt: string }>(`
+SELECT
+  substring(${sortKey.name}, 1, ${depth}) AS prefix,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}
+GROUP BY prefix
+ORDER BY prefix`,
+    context.querySettings,
+  )
+
+  return rows.map((row) => ({
+    value: row.prefix,
+    rowCount: Number(row.cnt),
+    isExactValue: Buffer.from(row.prefix, 'latin1').length < depth,
+  }))
+}
+
+export interface StringKeyBucket {
+  value: string
+  rowCount: number
+}
+
+export async function probeStringKeyDistribution(
+  context: QueryContext,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKey: SortKey,
+  dimensionIndex: number,
+  sortKeys: SortKey[],
+  limit: number,
+): Promise<StringKeyBucket[] | undefined> {
+  const range = ranges.find((candidate) => candidate.dimensionIndex === dimensionIndex)
+  if (!range?.from || !range.to) return undefined
+
+  const rows = await context.query<{ key: string; cnt: string }>(`
+SELECT
+  ${sortKey.name} AS key,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}
+GROUP BY key
+ORDER BY cnt DESC
+LIMIT ${limit + 1}`,
+    context.querySettings,
+  )
+
+  if (rows.length > limit) return undefined
+
+  return rows.map((row) => ({
+    value: row.key,
+    rowCount: Number(row.cnt),
+  }))
+}
+
+export async function probeTemporalDistribution(
+  context: QueryContext,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  grain: 'day' | 'hour',
+): Promise<TemporalBucket[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey || sortKey.category !== 'datetime') return []
+
+  const bucketExpression = grain === 'day'
+    ? `toStartOfDay(${sortKey.name})`
+    : `toStartOfHour(${sortKey.name})`
+
+  const rows = await context.query<{ bucket: string; cnt: string }>(`
+SELECT
+  formatDateTime(${bucketExpression}, '%Y-%m-%dT%H:%i:%sZ') AS bucket,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}
+GROUP BY bucket
+ORDER BY bucket`,
+    context.querySettings,
+  )
+
+  return rows.map((row) => ({
+    start: row.bucket,
+    rowCount: Number(row.cnt),
+  }))
+}
diff --git a/packages/plugin-backfill/src/chunking/services/metadata-source.ts b/packages/plugin-backfill/src/chunking/services/metadata-source.ts
new file mode 100644
index 0000000..f2ad3a6
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/services/metadata-source.ts
@@ -0,0 +1,170 @@
+import type { Partition, PlannerContext, SortKey, SortKeyCategory } from '../types.js'
+
+/** ClickHouse returns timestamps without timezone — they are always UTC. */
+function parseClickHouseUTCTimestamp(value: string): string {
+  const trimmed = value.trim()
+  const normalized = trimmed.includes('T') ? trimmed : `${trimmed.replace(' ', 'T')}Z`
+  return new Date(normalized.endsWith('Z') ? normalized : `${normalized}Z`).toISOString()
+}
+
+const NUMERIC_TYPES = /^(U?Int|Float|Decimal)/
+const DATETIME_TYPES = /^(Date|DateTime)/
+
+function classifySortKeyType(type: string): SortKeyCategory {
+  if (NUMERIC_TYPES.test(type)) return 'numeric'
+  if (DATETIME_TYPES.test(type)) return 'datetime'
+  return 'string'
+}
+
+function boundaryEncodingForCategory(category: SortKeyCategory): SortKey['boundaryEncoding'] {
+  return category === 'string' ? 'hex-latin1' : 'literal'
+}
+
+function splitTopLevelCsv(input: string): string[] {
+  const parts: string[] = []
+  let current = ''
+  let depth = 0
+  let quote: '\'' | '"' | undefined
+
+  for (let index = 0; index < input.length; index++) {
+    const char = input[index]
+    if (char === undefined) continue
+
+    if (quote) {
+      current += char
+      if (char === quote && input[index - 1] !== '\\') quote = undefined
+      continue
+    }
+
+    if (char === '\'' || char === '"') {
+      quote = char
+      current += char
+      continue
+    }
+
+    if (char === '(') {
+      depth += 1
+      current += char
+      continue
+    }
+
+    if (char === ')') {
+      depth = Math.max(0, depth - 1)
+      current += char
+      continue
+    }
+
+    if (char === ',' && depth === 0) {
+      parts.push(current.trim())
+      current = ''
+      continue
+    }
+
+    current += char
+  }
+
+  if (current.trim().length > 0) {
+    parts.push(current.trim())
+  }
+
+  return parts
+}
+
+function resolveSortKeyColumn(expression: string, knownColumns: Set<string>): string | undefined {
+  const trimmed = expression.trim()
+  if (knownColumns.has(trimmed)) return trimmed
+
+  const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
+    .map((match) => match[0])
+    .filter((identifier): identifier is string => Boolean(identifier))
+
+  const matches = Array.from(new Set(identifiers.filter((identifier) => knownColumns.has(identifier))))
+  if (matches.length === 1) return matches[0]
+  if (knownColumns.size === 0 && identifiers.length > 0) {
+    return identifiers[identifiers.length - 1]
+  }
+  return undefined
+}
+
+export async function introspectPartitions(context: PlannerContext): Promise<Partition[]> {
+  await context.query(
+    `SELECT 1 FROM ${context.database}.${context.table} LIMIT 1 SETTINGS select_sequential_consistency = 1`
+  )
+
+  const rows = await context.query<{
+    partition_id: string
+    total_rows: string
+    total_bytes: string
+    total_uncompressed_bytes?: string
+    min_time: string
+    max_time: string
+  }>(`SELECT
+  partition_id,
+  toString(sum(rows)) AS total_rows,
+  toString(sum(bytes_on_disk)) AS total_bytes,
+  toString(sum(data_uncompressed_bytes)) AS total_uncompressed_bytes,
+  toString(min(min_time)) AS min_time,
+  toString(max(max_time)) AS max_time
+FROM system.parts
+WHERE database = '${context.database}'
+  AND table = '${context.table}'
+  AND active = 1
+GROUP BY partition_id
+ORDER BY partition_id
+SETTINGS select_sequential_consistency = 1`)
+
+  return rows
+    .map((row) => ({
+      partitionId: row.partition_id,
+      rows: Number(row.total_rows),
+      bytesCompressed: Number(row.total_bytes),
+      bytesUncompressed: Number(row.total_uncompressed_bytes ?? row.total_bytes),
+      minTime: parseClickHouseUTCTimestamp(row.min_time),
+      maxTime: parseClickHouseUTCTimestamp(row.max_time),
+    }))
+    .filter((partition) => {
+      if (context.from && partition.maxTime < context.from) return false
+      if (context.to && partition.minTime >= context.to) return false
+      return true
+    })
+}
+
+export async function introspectSortKeys(context: PlannerContext): Promise<SortKey[]> {
+  const tableRows = await context.query<{ sorting_key: string }>(
+    `SELECT sorting_key FROM system.tables WHERE database = '${context.database}' AND name = '${context.table}'`
+  )
+
+  const sortingKey = tableRows[0]?.sorting_key
+  if (!sortingKey) return []
+
+  const expressions = splitTopLevelCsv(sortingKey)
+  if (expressions.length === 0) return []
+
+  const columnRows = await context.query<{ name?: string; type: string }>(
+    `SELECT name, type FROM system.columns WHERE database = '${context.database}' AND table = '${context.table}'`
+  )
+
+  const typeByName = new Map(
+    columnRows
+      .filter((row): row is { name: string; type: string } => Boolean(row.name))
+      .map((row) => [row.name, row.type])
+  )
+
+  const knownColumns = new Set(typeByName.keys())
+
+  return expressions.flatMap((expression, index) => {
+    const column = resolveSortKeyColumn(expression, knownColumns)
+    const type = column
+      ? typeByName.get(column) ?? columnRows[index]?.type ?? columnRows[0]?.type
+      : undefined
+    if (!column || !type) return []
+
+    const category = classifySortKeyType(type)
+    return [{
+      name: column,
+      type,
+      category,
+      boundaryEncoding: boundaryEncodingForCategory(category),
+    }]
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/services/row-probe.ts b/packages/plugin-backfill/src/chunking/services/row-probe.ts
new file mode 100644
index 0000000..5b0e2c0
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/services/row-probe.ts
@@ -0,0 +1,108 @@
+import { buildCountSql, buildEstimateSql, buildWhereClauseFromRanges } from '../sql.js'
+import type {
+  ChunkRange,
+  EstimateFilter,
+  PlannerContext,
+  RowProbeStrategy,
+  SortKey,
+} from '../types.js'
+
+type QueryContext = Pick<PlannerContext, 'database' | 'table' | 'query' | 'querySettings'>
+
+export function getRowProbeStrategy(context: Pick<PlannerContext, 'rowProbeStrategy'>): RowProbeStrategy {
+  return context.rowProbeStrategy
+}
+
+export async function estimateRows(
+  context: PlannerContext,
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+): Promise<number> {
+  if (getRowProbeStrategy(context) === 'count') {
+    return countRowsExact(context, filter, sortKeys)
+  }
+
+  const rows = await context.query<Record<string, string | number | undefined>>(
+    buildEstimateSql(filter, sortKeys, context, getRowProbeStrategy(context)),
+    context.querySettings,
+  )
+
+  const firstRow = rows[0]
+  if (!firstRow) return 0
+
+  for (const [key, value] of Object.entries(firstRow)) {
+    if (!key.toLowerCase().includes('row')) continue
+    const parsed = Number(value ?? 0)
+    if (Number.isFinite(parsed)) return parsed
+  }
+
+  for (const value of Object.values(firstRow)) {
+    const parsed = Number(value ?? 0)
+    if (Number.isFinite(parsed)) return parsed
+  }
+
+  return 0
+}
+
+export async function countRowsExact(
+  context: QueryContext,
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+): Promise<number> {
+  const rows = await context.query<{ cnt: string }>(buildCountSql(filter, sortKeys, context), context.querySettings)
+  return Number(rows[0]?.cnt ?? 0)
+}
+
+export async function countRows(
+  context: QueryContext,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): Promise<number> {
+  const filter: EstimateFilter = {
+    partitionId,
+    ranges,
+    exactDimensionIndex: undefined,
+    exactValue: undefined,
+  }
+  return countRowsExact(context, filter, sortKeys)
+}
+
+export async function countPartitionRows(
+  context: QueryContext,
+  partitionId: string,
+): Promise<number> {
+  const rows = await context.query<{ cnt: string }>(
+    `SELECT count() AS cnt FROM ${context.database}.${context.table} WHERE _partition_id = '${partitionId}'`,
+    context.querySettings,
+  )
+  return Number(rows[0]?.cnt ?? 0)
+}
+
+export async function getSortKeyRange(
+  context: QueryContext,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+  sortKey: SortKey,
+): Promise<{ min: string; max: string } | undefined> {
+  const rows = await context.query<{ minVal: string; maxVal: string }>(`
+SELECT
+  toString(min(${sortKey.name})) AS minVal,
+  toString(max(${sortKey.name})) AS maxVal
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}`,
+    context.querySettings,
+  )
+
+  if (rows.length === 0) return undefined
+  return {
+    min: rows[0]?.minVal ?? '',
+    max: rows[0]?.maxVal ?? '',
+  }
+}
+
+export function parsePlannerDateTime(value: string): number {
+  const normalized = value.includes('T') ? value : value.replace(' ', 'T')
+  return Date.parse(normalized.endsWith('Z') ? normalized : `${normalized}Z`)
+}
diff --git a/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
new file mode 100644
index 0000000..b7b4d6f
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
@@ -0,0 +1,451 @@
+import { describe, expect, test } from 'bun:test'
+
+import { analyzeAndChunk } from './analyze.js'
+import { buildChunkExecutionSql } from './sql.js'
+import type { Chunk, ChunkPlan } from './types.js'
+
+const MiB = 1024 ** 2
+
+type RowValue = string | number
+
+interface FixtureRow {
+  _partition_id: string
+  event_time: string
+  [key: string]: RowValue
+}
+
+function isoAt(day: number, hour: number, minute = 0): string {
+  return new Date(Date.UTC(2026, 0, day, hour, minute, 0)).toISOString()
+}
+
+function createFixtureQuery(input: {
+  database: string
+  table: string
+  rows: FixtureRow[]
+  sortKeys: Array<{ column: string; type: string }>
+  bytesPerRow?: number
+  uncompressedBytesPerRow?: number
+}) {
+  const bytesPerRow = input.bytesPerRow ?? 1024
+  const uncompressedBytesPerRow = input.uncompressedBytesPerRow ?? bytesPerRow * 2
+
+  return async function query<T>(sql: string): Promise<T[]> {
+    if (sql.includes(`SELECT 1 FROM ${input.database}.${input.table} LIMIT 1`)) {
+      return [{ ok: 1 }] as T[]
+    }
+
+    if (sql.includes('FROM system.parts')) {
+      return summarizePartitions(input.rows, bytesPerRow, uncompressedBytesPerRow) as T[]
+    }
+
+    if (sql.includes('FROM system.tables')) {
+      return [{ sorting_key: input.sortKeys.map((key) => key.column).join(', ') }] as T[]
+    }
+
+    if (sql.includes('FROM system.columns')) {
+      return input.sortKeys.map((key) => ({ name: key.column, type: key.type })) as T[]
+    }
+
+    const filteredRows = filterRows(sql, input.rows)
+
+    if (sql.startsWith('EXPLAIN ESTIMATE')) {
+      return [{ rows: String(filteredRows.length) }] as T[]
+    }
+
+    if (sql.includes(' AS key') && sql.includes('GROUP BY key')) {
+      const match = sql.match(/^\s*SELECT\s+(\w+)\s+AS key/m)
+      const column = match?.[1]
+      if (!column) return [] as T[]
+
+      const limitMatch = sql.match(/LIMIT\s+(\d+)/)
+      const limit = limitMatch ? Number(limitMatch[1]) : Infinity
+
+      const grouped = new Map<string, number>()
+      for (const row of filteredRows) {
+        const value = String(row[column] ?? '')
+        grouped.set(value, (grouped.get(value) ?? 0) + 1)
+      }
+
+      return Array.from(grouped.entries())
+        .sort(([, a], [, b]) => b - a)
+        .slice(0, limit)
+        .map(([key, cnt]) => ({ key, cnt: String(cnt) })) as T[]
+    }
+
+    if (sql.includes('substring(')) {
+      const match = sql.match(/substring\((\w+), 1, (\d+)\) AS prefix/)
+      const column = match?.[1]
+      const depth = Number(match?.[2] ?? 0)
+      if (!column || depth <= 0) return [] as T[]
+
+      const grouped = new Map<string, number>()
+      for (const row of filteredRows) {
+        const value = String(row[column] ?? '')
+        const prefix = Buffer.from(value, 'latin1').subarray(0, depth).toString('latin1')
+        grouped.set(prefix, (grouped.get(prefix) ?? 0) + 1)
+      }
+
+      return Array.from(grouped.entries())
+        .sort(([left], [right]) => compareLatin1(left, right))
+        .map(([prefix, cnt]) => ({ prefix, cnt: String(cnt) })) as T[]
+    }
+
+    if (sql.includes('formatDateTime(toStartOfDay(') || sql.includes('formatDateTime(toStartOfHour(')) {
+      const grain = sql.includes('toStartOfDay(') ? 'day' : 'hour'
+      const columnMatch = sql.match(/toStartOf(?:Day|Hour)\((\w+)\)/)
+      const column = columnMatch?.[1]
+      if (!column) return [] as T[]
+
+      const grouped = new Map<string, number>()
+      for (const row of filteredRows) {
+        const bucket = grain === 'day' ? toStartOfDay(String(row[column])) : toStartOfHour(String(row[column]))
+        grouped.set(bucket, (grouped.get(bucket) ?? 0) + 1)
+      }
+
+      return Array.from(grouped.entries())
+        .sort(([left], [right]) => left.localeCompare(right))
+        .map(([bucket, cnt]) => ({ bucket, cnt: String(cnt) })) as T[]
+    }
+
+    if (sql.includes('toString(min(') && sql.includes('toString(max(')) {
+      const match = sql.match(/toString\(min\((\w+)\)\) AS minVal,\s+toString\(max\(\1\)\) AS maxVal/s)
+      const column = match?.[1]
+      if (!column || filteredRows.length === 0) return [] as T[]
+
+      const values = filteredRows.map((row) => row[column]).filter((value) => value !== undefined)
+      if (values.length === 0) return [] as T[]
+
+      return [{
+        minVal: formatValueForMinMax(values.reduce((current, candidate) => compareValues(candidate, current) < 0 ? candidate : current)),
+        maxVal: formatValueForMinMax(values.reduce((current, candidate) => compareValues(candidate, current) > 0 ? candidate : current)),
+      }] as T[]
+    }
+
+    if (sql.includes('SELECT count() AS cnt')) {
+      return [{ cnt: String(filteredRows.length) }] as T[]
+    }
+
+    return [] as T[]
+  }
+}
+
+function summarizePartitions(rows: FixtureRow[], bytesPerRow: number, uncompressedBytesPerRow: number) {
+  const byPartition = new Map<string, FixtureRow[]>()
+  for (const row of rows) {
+    const list = byPartition.get(row._partition_id)
+    if (list) list.push(row)
+    else byPartition.set(row._partition_id, [row])
+  }
+
+  return Array.from(byPartition.entries())
+    .sort(([left], [right]) => left.localeCompare(right))
+    .map(([partitionId, partitionRows]) => ({
+      partition_id: partitionId,
+      total_rows: String(partitionRows.length),
+      total_bytes: String(partitionRows.length * bytesPerRow),
+      total_uncompressed_bytes: String(partitionRows.length * uncompressedBytesPerRow),
+      min_time: String(partitionRows.reduce((min, row) => row.event_time < min ? row.event_time : min, partitionRows[0]?.event_time ?? '')),
+      max_time: String(partitionRows.reduce((max, row) => row.event_time > max ? row.event_time : max, partitionRows[0]?.event_time ?? '')),
+    }))
+}
+
+function filterRows(sql: string, rows: FixtureRow[]): FixtureRow[] {
+  const whereMatch = sql.match(/WHERE\s+([\s\S]*?)(?:GROUP BY|ORDER BY|SETTINGS|$)/i)
+  if (!whereMatch?.[1]) return rows
+
+  const clauses = whereMatch[1]
+    .split(/\s+AND\s+/)
+    .map((clause) => clause.replace(/\s+/g, ' ').trim())
+    .filter(Boolean)
+
+  return rows.filter((row) => clauses.every((clause) => evaluateClause(clause, row)))
+}
+
+function evaluateClause(clause: string, row: FixtureRow): boolean {
+  let match = clause.match(/^_partition_id = '([^']+)'$/)
+  if (match) return row._partition_id === match[1]
+
+  match = clause.match(/^(\w+) >= parseDateTimeBestEffort\('([^']+)'\)$/)
+  if (match) return Date.parse(String(row[match[1]])) >= Date.parse(match[2])
+
+  match = clause.match(/^(\w+) < parseDateTimeBestEffort\('([^']+)'\)$/)
+  if (match) return Date.parse(String(row[match[1]])) < Date.parse(match[2])
+
+  match = clause.match(/^(\w+) >= unhex\('([0-9a-f]*)'\)$/i)
+  if (match) return compareLatin1(String(row[match[1]] ?? ''), Buffer.from(match[2], 'hex').toString('latin1')) >= 0
+
+  match = clause.match(/^(\w+) < unhex\('([0-9a-f]*)'\)$/i)
+  if (match) return compareLatin1(String(row[match[1]] ?? ''), Buffer.from(match[2], 'hex').toString('latin1')) < 0
+
+  match = clause.match(/^(\w+) >= '([^']+)'$/)
+  if (match) return comparePrimitive(row[match[1]], match[2]) >= 0
+
+  match = clause.match(/^(\w+) < '([^']+)'$/)
+  if (match) return comparePrimitive(row[match[1]], match[2]) < 0
+
+  match = clause.match(/^(\w+) >= (-?\d+(?:\.\d+)?)$/)
+  if (match) return Number(row[match[1]]) >= Number(match[2])
+
+  match = clause.match(/^(\w+) < (-?\d+(?:\.\d+)?)$/)
+  if (match) return Number(row[match[1]]) < Number(match[2])
+
+  throw new Error(`Unsupported test clause: ${clause}`)
+}
+
+function comparePrimitive(left: RowValue | undefined, right: string): number {
+  if (typeof left === 'number') return left - Number(right)
+  return String(left ?? '').localeCompare(right)
+}
+
+function compareValues(left: RowValue, right: RowValue): number {
+  if (typeof left === 'number' && typeof right === 'number') return left - right
+  return compareLatin1(String(left), String(right))
+}
+
+function formatValueForMinMax(value: RowValue): string {
+  return String(value)
+}
+
+function compareLatin1(left: string, right: string): number {
+  return Buffer.from(left, 'latin1').compare(Buffer.from(right, 'latin1'))
+}
+
+function toStartOfDay(value: string): string {
+  const date = new Date(value)
+  return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate(), 0, 0, 0)).toISOString()
+}
+
+function toStartOfHour(value: string): string {
+  const date = new Date(value)
+  return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate(), date.getUTCHours(), 0, 0)).toISOString()
+}
+
+async function planFixture(input: {
+  rows: FixtureRow[]
+  sortKeys: Array<{ column: string; type: string }>
+  maxChunkBytes: number
+}): Promise<ChunkPlan> {
+  const query = createFixtureQuery({
+    database: 'app',
+    table: 'events',
+    rows: input.rows,
+    sortKeys: input.sortKeys,
+  })
+
+  return analyzeAndChunk({
+    database: 'app',
+    table: 'events',
+    targetChunkBytes: input.maxChunkBytes,
+    query,
+  })
+}
+
+function strategyIds(chunk: Pick<Chunk, 'analysis'>): string[] {
+  return chunk.analysis.lineage.map((step) => step.strategyId)
+}
+
+function buildSqlForChunk(plan: ChunkPlan, chunk: Chunk): string {
+  return buildChunkExecutionSql({
+    planId: 'fixture-plan',
+    chunk,
+    target: 'app.events',
+    sourceTarget: 'app.events',
+    table: plan.table,
+  })
+}
+
+function requireChunk<T>(value: T | undefined, label: string): T {
+  if (value === undefined) {
+    throw new Error(`Missing expected chunk: ${label}`)
+  }
+  return value
+}
+
+describe('smart chunking integration', () => {
+  test('keeps small partitions as a single metadata chunk', async () => {
+    const rows = Array.from({ length: 12 }, (_, index) => ({
+      _partition_id: 'p_small',
+      event_time: isoAt(1, index),
+      id: index,
+    }))
+
+    const plan = await planFixture({
+      rows,
+      sortKeys: [{ column: 'id', type: 'UInt64' }],
+      maxChunkBytes: 64 * MiB,
+    })
+
+    expect(plan.chunks).toHaveLength(1)
+    expect(plan.chunks[0]?.estimate.reason).toBe('partition-metadata')
+    expect(strategyIds(requireChunk(plan.chunks[0], 'metadata chunk'))).toHaveLength(0)
+  })
+
+  test('uses quantile range splitting for wide numeric distributions', async () => {
+    const rows = Array.from({ length: 120 }, (_, index) => ({
+      _partition_id: 'p_quantile',
+      event_time: isoAt(2, index % 24),
+      id: index,
+    }))
+
+    const plan = await planFixture({
+      rows,
+      sortKeys: [{ column: 'id', type: 'UInt64' }],
+      maxChunkBytes: 60 * 1024,
+    })
+
+    expect(plan.chunks.length).toBeGreaterThanOrEqual(3)
+    expect(plan.chunks.every((chunk) => strategyIds(chunk).includes('quantile-range-split'))).toBe(true)
+
+    const estimatedRows = plan.chunks.map((chunk) => chunk.estimate.rows)
+    expect(Math.max(...estimatedRows) - Math.min(...estimatedRows)).toBeLessThanOrEqual(4)
+  })
+
+  test('falls back to equal-width splitting when quantile boundaries collapse', async () => {
+    const rows = Array.from({ length: 80 }, (_, index) => ({
+      _partition_id: 'p_equal',
+      event_time: isoAt(3, index % 24),
+      id: 100 + (index % 2),
+    }))
+
+    const plan = await planFixture({
+      rows,
+      sortKeys: [{ column: 'id', type: 'UInt64' }],
+      maxChunkBytes: 40 * 1024,
+    })
+
+    expect(plan.chunks.length).toBeGreaterThan(1)
+    expect(plan.chunks.some((chunk) => strategyIds(chunk).includes('equal-width-split'))).toBe(true)
+    expect(plan.chunks.every((chunk) => chunk.estimate.rows > 0)).toBe(true)
+    expect(plan.chunks.every((chunk) =>
+      chunk.ranges.every((range) => range.from !== range.to)
+    )).toBe(true)
+  })
+
+  test('uses string key splitting for string-distributed partitions', async () => {
+    const rows: FixtureRow[] = []
+    for (const prefix of ['apple', 'apricot', 'banana', 'berry', 'citrus']) {
+      for (let index = 0; index < 24; index++) {
+        rows.push({
+          _partition_id: 'p_string',
+          event_time: isoAt(4, index % 24),
+          slug: `${prefix}-${index.toString().padStart(2, '0')}`,
+        })
+      }
+    }
+
+    const plan = await planFixture({
+      rows,
+      sortKeys: [{ column: 'slug', type: 'String' }],
+      maxChunkBytes: 48 * 1024,
+    })
+
+    expect(plan.chunks.length).toBeGreaterThan(2)
+    const usesStringStrategy = plan.chunks.some((chunk) =>
+      strategyIds(chunk).includes('group-by-key-split') ||
+      strategyIds(chunk).includes('string-prefix-split')
+    )
+    expect(usesStringStrategy).toBe(true)
+
+    const sql = buildSqlForChunk(plan, requireChunk(plan.chunks[0], 'string-key first chunk'))
+    expect(sql).toContain("unhex('")
+  })
+
+  test('combines string-prefix and temporal splitting for focused time windows', async () => {
+    const rows: FixtureRow[] = []
+
+    for (let day = 1; day <= 3; day++) {
+      for (let hour = 0; hour < 24; hour++) {
+        rows.push({
+          _partition_id: 'p_combo_temporal',
+          event_time: isoAt(10 + day, hour),
+          user_id: 'hot',
+          score: 1000 + day * 24 + hour,
+        })
+      }
+    }
+
+    for (let index = 0; index < 18; index++) {
+      rows.push({
+        _partition_id: 'p_combo_temporal',
+        event_time: isoAt(10, index),
+        user_id: `cold-${index}`,
+        score: index,
+      })
+    }
+
+    const plan = await planFixture({
+      rows,
+      sortKeys: [
+        { column: 'user_id', type: 'String' },
+        { column: 'event_time', type: 'DateTime' },
+      ],
+      maxChunkBytes: 36 * 1024,
+    })
+
+    const hotChunks = plan.chunks.filter((chunk) =>
+      strategyIds(chunk).includes('temporal-bucket-split') &&
+      chunk.ranges.some((range) => range.dimensionIndex === 0) &&
+      chunk.ranges.some((range) => range.dimensionIndex === 1)
+    )
+
+    expect(hotChunks.length).toBeGreaterThan(0)
+    expect(hotChunks.every((chunk) => chunk.analysis.focusedValue?.value === 'hot')).toBe(true)
+
+    const sql = buildSqlForChunk(plan, requireChunk(hotChunks[0], 'temporal combo chunk'))
+    expect(sql).toContain('user_id >=')
+    expect(sql).toContain('event_time >=')
+    expect(sql).toContain('parseDateTimeBestEffort')
+
+    const temporalRanges = hotChunks
+      .map((chunk) => chunk.ranges.find((range) => range.dimensionIndex === 1))
+      .filter((range): range is NonNullable<typeof range> => Boolean(range))
+      .sort((left, right) => String(left.from).localeCompare(String(right.from)))
+
+    for (let index = 1; index < temporalRanges.length; index++) {
+      expect(temporalRanges[index - 1]?.to).toBe(temporalRanges[index]?.from)
+    }
+  })
+
+  test('combines string-prefix and quantile splitting on secondary numeric dimensions', async () => {
+    const rows: FixtureRow[] = []
+
+    for (let index = 0; index < 96; index++) {
+      rows.push({
+        _partition_id: 'p_combo_numeric',
+        event_time: isoAt(20, index % 24),
+        account: 'vip',
+        seq: index,
+      })
+    }
+
+    for (let index = 0; index < 24; index++) {
+      rows.push({
+        _partition_id: 'p_combo_numeric',
+        event_time: isoAt(20, index % 24),
+        account: `free-${index}`,
+        seq: index,
+      })
+    }
+
+    const plan = await planFixture({
+      rows,
+      sortKeys: [
+        { column: 'account', type: 'String' },
+        { column: 'seq', type: 'UInt64' },
+      ],
+      maxChunkBytes: 48 * 1024,
+    })
+
+    const comboChunks = plan.chunks.filter((chunk) =>
+      strategyIds(chunk).includes('quantile-range-split') &&
+      chunk.ranges.some((range) => range.dimensionIndex === 0) &&
+      chunk.ranges.some((range) => range.dimensionIndex === 1)
+    )
+
+    expect(comboChunks.length).toBeGreaterThan(0)
+
+    const sql = buildSqlForChunk(plan, requireChunk(comboChunks[0], 'numeric combo chunk'))
+    expect(sql).toContain('account >=')
+    expect(sql).toContain("seq >= '")
+  })
+})
diff --git a/packages/plugin-backfill/src/chunking/splitter.test.ts b/packages/plugin-backfill/src/chunking/splitter.test.ts
deleted file mode 100644
index 16f4f3f..0000000
--- a/packages/plugin-backfill/src/chunking/splitter.test.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { splitSortKeyRange, stringToUint64, uint64ToString } from './splitter.js'
-
-describe('splitSortKeyRange', () => {
-  test('numeric: splits into equal-width ranges', () => {
-    const ranges = splitSortKeyRange('numeric', '100', '200', 2)
-
-    expect(ranges).toHaveLength(2)
-    expect(ranges[0]?.from).toBe('100')
-    expect(ranges[0]?.to).toBe('150')
-    expect(ranges[1]?.from).toBe('150')
-    expect(ranges[1]?.to).toBe('201')
-  })
-
-  test('datetime: splits into equal-width time ranges', () => {
-    const ranges = splitSortKeyRange('datetime', '2025-01-01 00:00:00', '2025-01-31 00:00:00', 3)
-
-    expect(ranges).toHaveLength(3)
-    for (const r of ranges) {
-      expect(r.from).toBeDefined()
-      expect(r.to).toBeDefined()
-    }
-  })
-
-  test('string: round-trips through uint64 conversion', () => {
-    const ranges = splitSortKeyRange('string', 'aaa', 'zzz', 2)
-
-    expect(ranges).toHaveLength(2)
-    expect(ranges[0]?.from).toBeDefined()
-    expect(ranges[1]?.to).toBeDefined()
-  })
-})
-
-describe('stringToUint64 / uint64ToString', () => {
-  test('round-trips short strings', () => {
-    const original = 'abc'
-    const n = stringToUint64(original)
-    const back = uint64ToString(n)
-    expect(back).toBe(original)
-  })
-
-  test('round-trips 8-byte strings', () => {
-    const original = 'abcdefgh'
-    const n = stringToUint64(original)
-    const back = uint64ToString(n)
-    expect(back).toBe(original)
-  })
-
-  test('truncates strings longer than 8 bytes', () => {
-    const n = stringToUint64('abcdefghijklmnop')
-    const back = uint64ToString(n)
-    expect(back).toBe('abcdefgh')
-  })
-
-  test('handles embedded zero bytes from arithmetic', () => {
-    // Simulates a computed intermediate where a middle byte is 0x00
-    // e.g. 0x6200000000000001 has zero bytes between 'b' and the trailing 0x01
-    const n = 0x6200000000000001n
-    const result = uint64ToString(n)
-    expect(result).toBe('b\0\0\0\0\0\0\x01')
-    expect(result.length).toBe(8)
-  })
-})
diff --git a/packages/plugin-backfill/src/chunking/splitter.ts b/packages/plugin-backfill/src/chunking/splitter.ts
deleted file mode 100644
index c3a3e5c..0000000
--- a/packages/plugin-backfill/src/chunking/splitter.ts
+++ /dev/null
@@ -1,86 +0,0 @@
-import type { SortKeyInfo } from './types.js'
-
-export function splitNumericRange(min: number, max: number, count: number): Array<{ from: string; to: string }> {
-  const span = max - min
-  const step = span / count
-  const ranges: Array<{ from: string; to: string }> = []
-  for (let i = 0; i < count; i++) {
-    const from = min + i * step
-    const to = i === count - 1 ? max + 1 : min + (i + 1) * step
-    ranges.push({ from: String(from), to: String(to) })
-  }
-  return ranges
-}
-
-export function splitDateTimeRange(min: string, max: string, count: number): Array<{ from: string; to: string }> {
-  const minMs = new Date(min).getTime()
-  const maxMs = new Date(max).getTime()
-  const span = maxMs - minMs
-  const step = span / count
-  const ranges: Array<{ from: string; to: string }> = []
-  for (let i = 0; i < count; i++) {
-    const from = new Date(minMs + i * step).toISOString()
-    const to = i === count - 1
-      ? new Date(maxMs + 1).toISOString()
-      : new Date(minMs + (i + 1) * step).toISOString()
-    ranges.push({ from, to })
-  }
-  return ranges
-}
-
-export function stringToUint64(s: string): bigint {
-  let result = 0n
-  const bytes = Math.min(s.length, 8)
-  for (let i = 0; i < bytes; i++) {
-    result = (result << 8n) | BigInt(s.charCodeAt(i))
-  }
-  // Pad remaining bytes with zeros
-  for (let i = bytes; i < 8; i++) {
-    result = result << 8n
-  }
-  return result
-}
-
-export function uint64ToString(n: bigint): string {
-  const chars: string[] = []
-  for (let i = 7; i >= 0; i--) {
-    const byte = Number((n >> BigInt(i * 8)) & 0xffn)
-    chars.push(String.fromCharCode(byte))
-  }
-  // Trim trailing NUL bytes (padding from stringToUint64 for short strings)
-  let end = chars.length
-  while (end > 0 && chars[end - 1] === '\0') end--
-  return chars.slice(0, end).join('')
-}
-
-export function splitStringRange(min: string, max: string, count: number): Array<{ from: string; to: string }> {
-  const minVal = stringToUint64(min)
-  const maxVal = stringToUint64(max)
-  const span = maxVal - minVal
-  const step = span / BigInt(count)
-  const ranges: Array<{ from: string; to: string }> = []
-  for (let i = 0; i < count; i++) {
-    const from = uint64ToString(minVal + BigInt(i) * step)
-    const to = i === count - 1
-      ? uint64ToString(maxVal + 1n)
-      : uint64ToString(minVal + BigInt(i + 1) * step)
-    ranges.push({ from, to })
-  }
-  return ranges
-}
-
-export function splitSortKeyRange(
-  category: SortKeyInfo['category'],
-  min: string,
-  max: string,
-  count: number,
-): Array<{ from: string; to: string }> {
-  switch (category) {
-    case 'numeric':
-      return splitNumericRange(Number(min), Number(max), count)
-    case 'datetime':
-      return splitDateTimeRange(min, max, count)
-    case 'string':
-      return splitStringRange(min, max, count)
-  }
-}
diff --git a/packages/plugin-backfill/src/chunking/sql.ts b/packages/plugin-backfill/src/chunking/sql.ts
index e6b7458..cd57901 100644
--- a/packages/plugin-backfill/src/chunking/sql.ts
+++ b/packages/plugin-backfill/src/chunking/sql.ts
@@ -1,48 +1,101 @@
-import type { PlannedChunk, SortKeyInfo } from './types.js'
+import type {
+  Chunk,
+  ChunkRange,
+  EstimateFilter,
+  PlannerContext,
+  RowProbeStrategy,
+  SortKey,
+  TableProfile,
+} from './types.js'
+
+
+export function quoteSqlString(value: string): string {
+  return `'${value.replaceAll('\\', '\\\\').replaceAll('\'', '\\\'')}'`
+}
+
+export function formatBound(value: string, sortKey: SortKey): string {
+  if (sortKey.category === 'datetime') {
+    return `parseDateTimeBestEffort(${quoteSqlString(value)})`
+  }
+
+  if (sortKey.category === 'string') {
+    return `unhex('${Buffer.from(value, 'latin1').toString('hex')}')`
+  }
+
+  return quoteSqlString(value)
+}
+
+export function buildWhereClauseFromRanges(
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): string {
+  const conditions = [`_partition_id = ${quoteSqlString(partitionId)}`]
+
+  for (const range of ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) continue
+
+    if (range.from !== undefined) {
+      conditions.push(`${sortKey.name} >= ${formatBound(range.from, sortKey)}`)
+    }
+    if (range.to !== undefined) {
+      conditions.push(`${sortKey.name} < ${formatBound(range.to, sortKey)}`)
+    }
+  }
+
+  return conditions.join('\n  AND ')
+}
+
+export function buildWhereClauseFromChunk(
+  chunk: Pick<Chunk, 'partitionId' | 'ranges'>,
+  table: Pick<TableProfile, 'sortKeys'>,
+): string {
+  return buildWhereClauseFromRanges(chunk.partitionId, chunk.ranges, table.sortKeys)
+}
 
 function buildSettingsClause(token: string): string {
   if (token) {
     return `SETTINGS async_insert=0, insert_deduplication_token='${token}'`
   }
-  return `SETTINGS async_insert=0`
+  return 'SETTINGS async_insert=0'
 }
 
-function buildSortKeyCondition(
-  sortKeyColumn: string,
-  category: SortKeyInfo['category'],
-  from: string,
-  to: string,
-): string {
-  if (category === 'datetime') {
-    return `  AND ${sortKeyColumn} >= parseDateTimeBestEffort('${from}')\n  AND ${sortKeyColumn} < parseDateTimeBestEffort('${to}')`
-  }
-  // numeric and string use direct comparison
-  return `  AND ${sortKeyColumn} >= '${from}'\n  AND ${sortKeyColumn} < '${to}'`
+function buildChunkConditions(chunk: Pick<Chunk, 'ranges'>, sortKeys: SortKey[]): string[] {
+  return chunk.ranges.flatMap((range) => {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) return []
+
+    const conditions: string[] = []
+    if (range.from !== undefined) {
+      conditions.push(`${sortKey.name} >= ${formatBound(range.from, sortKey)}`)
+    }
+    if (range.to !== undefined) {
+      conditions.push(`${sortKey.name} < ${formatBound(range.to, sortKey)}`)
+    }
+    return conditions
+  })
 }
 
-export function buildChunkSql(input: {
+export function buildChunkExecutionSql(input: {
   planId: string
-  chunk: PlannedChunk
+  chunk: Chunk
   target: string
-  sortKey?: SortKeyInfo
+  table: Pick<TableProfile, 'sortKeys'>
+  sourceTarget?: string
   mvAsQuery?: string
   targetColumns?: string[]
+  idempotencyToken?: string
 }): string {
-  const header = `/* chkit backfill plan=${input.planId} chunk=${input.chunk.id} token=${input.chunk.idempotencyToken} */`
-  const settings = buildSettingsClause(input.chunk.idempotencyToken)
-  const { chunk } = input
+  const sourceTarget = input.sourceTarget ?? input.target
+  const header = `/* chkit backfill plan=${input.planId} chunk=${input.chunk.id} token=${input.idempotencyToken ?? ''} */`
+  const settings = buildSettingsClause(input.idempotencyToken ?? '')
+  const chunkConditions = buildChunkConditions(input.chunk, input.table.sortKeys)
 
   if (input.mvAsQuery) {
-    // MV replay: inject partition + sort key filters into the MV's AS query
-    let filtered = injectPartitionFilter(input.mvAsQuery, chunk.partitionId)
-    if (chunk.sortKeyFrom !== undefined && chunk.sortKeyTo !== undefined && input.sortKey) {
-      filtered = injectSortKeyFilter(
-        filtered,
-        input.sortKey.column,
-        input.sortKey.category,
-        chunk.sortKeyFrom,
-        chunk.sortKeyTo,
-      )
+    let filtered = injectPartitionFilter(input.mvAsQuery, input.chunk.partitionId)
+    for (const condition of chunkConditions) {
+      filtered = injectWhereCondition(filtered, condition)
     }
     if (input.targetColumns?.length) {
       filtered = rewriteSelectColumns(filtered, input.targetColumns)
@@ -50,48 +103,96 @@ export function buildChunkSql(input: {
     return [header, `INSERT INTO ${input.target}`, filtered, settings].join('\n')
   }
 
-  // Direct table copy
   const lines = [
     header,
     `INSERT INTO ${input.target}`,
-    `SELECT *`,
-    `FROM ${input.target}`,
-    `WHERE _partition_id = '${chunk.partitionId}'`,
+    'SELECT *',
+    `FROM ${sourceTarget}`,
+    `WHERE _partition_id = ${quoteSqlString(input.chunk.partitionId)}`,
   ]
 
-  if (chunk.sortKeyFrom !== undefined && chunk.sortKeyTo !== undefined && input.sortKey) {
-    lines.push(buildSortKeyCondition(
-      input.sortKey.column,
-      input.sortKey.category,
-      chunk.sortKeyFrom,
-      chunk.sortKeyTo,
-    ))
+  for (const condition of chunkConditions) {
+    lines.push(`  AND ${condition}`)
   }
 
   lines.push(settings)
   return lines.join('\n')
 }
 
-// --- SQL helpers ---
+export function buildEstimateSql(
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+  context: PlannerContext,
+  rowProbeStrategy: RowProbeStrategy,
+): string {
+  const whereClause = buildWhereClauseFromFilter(filter, sortKeys)
+  if (rowProbeStrategy === 'count') {
+    return `SELECT count() AS cnt FROM ${context.database}.${context.table} WHERE ${whereClause}`
+  }
+  return `EXPLAIN ESTIMATE SELECT count() FROM ${context.database}.${context.table} WHERE ${whereClause}`
+}
+
+export function buildCountSql(
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+  context: Pick<PlannerContext, 'database' | 'table'>,
+): string {
+  return `SELECT count() AS cnt FROM ${context.database}.${context.table} WHERE ${buildWhereClauseFromFilter(filter, sortKeys)}`
+}
+
+function buildWhereClauseFromFilter(
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+): string {
+  const conditions = [`_partition_id = ${quoteSqlString(filter.partitionId)}`]
+
+  for (const range of filter.ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) continue
+
+    if (filter.exactDimensionIndex === range.dimensionIndex && filter.exactValue !== undefined) {
+      conditions.push(`${sortKey.name} = ${formatBound(filter.exactValue, sortKey)}`)
+      continue
+    }
+
+    if (range.from !== undefined) {
+      conditions.push(`${sortKey.name} >= ${formatBound(range.from, sortKey)}`)
+    }
+    if (range.to !== undefined) {
+      conditions.push(`${sortKey.name} < ${formatBound(range.to, sortKey)}`)
+    }
+  }
+
+  return conditions.join(' AND ')
+}
 
 function injectPartitionFilter(query: string, partitionId: string): string {
-  const condition = `_partition_id = '${partitionId}'`
-  return injectWhereCondition(query, condition)
+  return injectWhereCondition(query, `_partition_id = ${quoteSqlString(partitionId)}`)
 }
 
 export function injectSortKeyFilter(
   query: string,
   sortKeyColumn: string,
-  category: SortKeyInfo['category'],
+  category: SortKey['category'],
   from: string,
   to: string,
 ): string {
   let condition: string
+
   if (category === 'datetime') {
-    condition = `${sortKeyColumn} >= parseDateTimeBestEffort('${from}')\n  AND ${sortKeyColumn} < parseDateTimeBestEffort('${to}')`
+    condition =
+      `${sortKeyColumn} >= parseDateTimeBestEffort(${quoteSqlString(from)})\n` +
+      `  AND ${sortKeyColumn} < parseDateTimeBestEffort(${quoteSqlString(to)})`
+  } else if (category === 'string') {
+    condition =
+      `${sortKeyColumn} >= unhex('${Buffer.from(from, 'latin1').toString('hex')}')\n` +
+      `  AND ${sortKeyColumn} < unhex('${Buffer.from(to, 'latin1').toString('hex')}')`
   } else {
-    condition = `${sortKeyColumn} >= '${from}'\n  AND ${sortKeyColumn} < '${to}'`
+    condition =
+      `${sortKeyColumn} >= ${quoteSqlString(from)}\n` +
+      `  AND ${sortKeyColumn} < ${quoteSqlString(to)}`
   }
+
   return injectWhereCondition(query, condition)
 }
 
@@ -99,40 +200,51 @@ function injectWhereCondition(query: string, condition: string): string {
   const trimmed = query.trimEnd()
   const upper = trimmed.toUpperCase()
 
-  interface KWHit { keyword: string; position: number }
-  const hits: KWHit[] = []
+  interface KeywordHit {
+    keyword: string
+    position: number
+  }
+
+  const hits: KeywordHit[] = []
   let depth = 0
 
-  for (let i = 0; i < trimmed.length; i++) {
-    const ch = trimmed[i]
-    if (ch === '(') { depth++; continue }
-    if (ch === ')') { depth--; continue }
-    if (ch === "'") {
-      i++
-      while (i < trimmed.length && trimmed[i] !== "'") {
-        if (trimmed[i] === '\\') i++
-        i++
+  for (let index = 0; index < trimmed.length; index++) {
+    const char = trimmed[index]
+    if (char === '(') {
+      depth += 1
+      continue
+    }
+    if (char === ')') {
+      depth -= 1
+      continue
+    }
+    if (char === '\'') {
+      index += 1
+      while (index < trimmed.length && trimmed[index] !== '\'') {
+        if (trimmed[index] === '\\') index += 1
+        index += 1
       }
       continue
     }
     if (depth !== 0) continue
-
-    if (i > 0 && /\S/.test(trimmed[i - 1] ?? '')) continue
-
-    const rest = upper.slice(i)
-    for (const kw of ['WHERE', 'GROUP BY', 'HAVING', 'ORDER BY', 'QUALIFY', 'LIMIT', 'SETTINGS']) {
-      if (rest.startsWith(kw) && (i + kw.length >= trimmed.length || /\s/.test(trimmed[i + kw.length] ?? ''))) {
-        hits.push({ keyword: kw, position: i })
+    if (index > 0 && /\S/.test(trimmed[index - 1] ?? '')) continue
+
+    const rest = upper.slice(index)
+    for (const keyword of ['WHERE', 'GROUP BY', 'HAVING', 'ORDER BY', 'QUALIFY', 'LIMIT', 'SETTINGS']) {
+      if (
+        rest.startsWith(keyword) &&
+        (index + keyword.length >= trimmed.length || /\s/.test(trimmed[index + keyword.length] ?? ''))
+      ) {
+        hits.push({ keyword, position: index })
         break
       }
     }
   }
 
-  const whereHit = hits.find(h => h.keyword === 'WHERE')
-  const trailingKeywords = ['GROUP BY', 'HAVING', 'ORDER BY', 'QUALIFY', 'LIMIT', 'SETTINGS']
+  const whereHit = hits.find((hit) => hit.keyword === 'WHERE')
   const firstTrailing = hits
-    .filter(h => trailingKeywords.includes(h.keyword))
-    .filter(h => !whereHit || h.position > whereHit.position)[0]
+    .filter((hit) => hit.keyword !== 'WHERE')
+    .filter((hit) => !whereHit || hit.position > whereHit.position)[0]
 
   const insertAt = firstTrailing ? firstTrailing.position : trimmed.length
   const before = trimmed.slice(0, insertAt).trimEnd()
@@ -141,6 +253,7 @@ function injectWhereCondition(query: string, condition: string): string {
   if (whereHit) {
     return `${before}\n  AND ${condition}${after ? `\n${after}` : ''}`
   }
+
   return `${before}\nWHERE ${condition}${after ? `\n${after}` : ''}`
 }
 
@@ -152,57 +265,85 @@ export function rewriteSelectColumns(query: string, targetColumns: string[]): st
   let fromPos = -1
   let depth = 0
 
-  for (let i = 0; i < trimmed.length; i++) {
-    const ch = trimmed[i]
-    if (ch === '(') { depth++; continue }
-    if (ch === ')') { depth--; continue }
-    if (ch === "'") {
-      i++
-      while (i < trimmed.length && trimmed[i] !== "'") {
-        if (trimmed[i] === '\\') i++
-        i++
+  for (let index = 0; index < trimmed.length; index++) {
+    const char = trimmed[index]
+    if (char === '(') {
+      depth += 1
+      continue
+    }
+    if (char === ')') {
+      depth -= 1
+      continue
+    }
+    if (char === '\'') {
+      index += 1
+      while (index < trimmed.length && trimmed[index] !== '\'') {
+        if (trimmed[index] === '\\') index += 1
+        index += 1
       }
       continue
     }
     if (depth !== 0) continue
-
-    if (i > 0 && /\S/.test(trimmed[i - 1] ?? '')) continue
-
-    const rest = upper.slice(i)
-    if (selectPos === -1 && rest.startsWith('SELECT') && (i + 6 >= trimmed.length || /\s/.test(trimmed[i + 6] ?? ''))) {
-      selectPos = i
-    } else if (selectPos !== -1 && fromPos === -1 && rest.startsWith('FROM') && (i + 4 >= trimmed.length || /\s/.test(trimmed[i + 4] ?? ''))) {
-      fromPos = i
+    if (index > 0 && /\S/.test(trimmed[index - 1] ?? '')) continue
+
+    const rest = upper.slice(index)
+    if (
+      selectPos === -1 &&
+      rest.startsWith('SELECT') &&
+      (index + 6 >= trimmed.length || /\s/.test(trimmed[index + 6] ?? ''))
+    ) {
+      selectPos = index
+    } else if (
+      selectPos !== -1 &&
+      fromPos === -1 &&
+      rest.startsWith('FROM') &&
+      (index + 4 >= trimmed.length || /\s/.test(trimmed[index + 4] ?? ''))
+    ) {
+      fromPos = index
     }
   }
 
   if (selectPos === -1 || fromPos === -1) return query
 
-  const projStart = selectPos + 6
-  const projText = trimmed.slice(projStart, fromPos).trim()
+  const projectionStart = selectPos + 6
+  const rawProjection = trimmed.slice(projectionStart, fromPos).trim()
+  let projectionPrefix = ''
+  let projection = rawProjection
+
+  const distinctMatch = rawProjection.match(/^DISTINCT\b\s*/i)
+  if (distinctMatch) {
+    projectionPrefix = distinctMatch[0] ?? ''
+    projection = rawProjection.slice(projectionPrefix.length).trim()
+  }
 
   const items: string[] = []
   let itemStart = 0
   depth = 0
 
-  for (let i = 0; i < projText.length; i++) {
-    const ch = projText[i]
-    if (ch === '(') { depth++; continue }
-    if (ch === ')') { depth--; continue }
-    if (ch === "'") {
-      i++
-      while (i < projText.length && projText[i] !== "'") {
-        if (projText[i] === '\\') i++
-        i++
+  for (let index = 0; index < projection.length; index++) {
+    const char = projection[index]
+    if (char === '(') {
+      depth += 1
+      continue
+    }
+    if (char === ')') {
+      depth -= 1
+      continue
+    }
+    if (char === '\'') {
+      index += 1
+      while (index < projection.length && projection[index] !== '\'') {
+        if (projection[index] === '\\') index += 1
+        index += 1
       }
       continue
     }
-    if (depth === 0 && ch === ',') {
-      items.push(projText.slice(itemStart, i).trim())
-      itemStart = i + 1
+    if (depth === 0 && char === ',') {
+      items.push(projection.slice(itemStart, index).trim())
+      itemStart = index + 1
     }
   }
-  items.push(projText.slice(itemStart).trim())
+  items.push(projection.slice(itemStart).trim())
 
   const aliasMap = new Map<string, string>()
   for (const item of items) {
@@ -210,38 +351,43 @@ export function rewriteSelectColumns(query: string, targetColumns: string[]): st
 
     const itemUpper = item.toUpperCase()
     let asPos = -1
-    let d = 0
-
-    for (let i = 0; i < item.length; i++) {
-      const ch = item[i]
-      if (ch === '(') { d++; continue }
-      if (ch === ')') { d--; continue }
-      if (ch === "'") {
-        i++
-        while (i < item.length && item[i] !== "'") {
-          if (item[i] === '\\') i++
-          i++
+    let itemDepth = 0
+
+    for (let index = 0; index < item.length; index++) {
+      const char = item[index]
+      if (char === '(') {
+        itemDepth += 1
+        continue
+      }
+      if (char === ')') {
+        itemDepth -= 1
+        continue
+      }
+      if (char === '\'') {
+        index += 1
+        while (index < item.length && item[index] !== '\'') {
+          if (item[index] === '\\') index += 1
+          index += 1
         }
         continue
       }
-      if (d !== 0) continue
-      if (i > 0 && /\S/.test(item[i - 1] ?? '')) continue
-
-      const rest = itemUpper.slice(i)
-      if (rest.startsWith('AS') && (i + 2 >= item.length || /\s/.test(item[i + 2] ?? ''))) {
-        asPos = i
+      if (itemDepth !== 0) continue
+      if (index > 0 && /\S/.test(item[index - 1] ?? '')) continue
+
+      const rest = itemUpper.slice(index)
+      if (
+        rest.startsWith('AS') &&
+        (index + 2 >= item.length || /\s/.test(item[index + 2] ?? ''))
+      ) {
+        asPos = index
       }
     }
 
     if (asPos !== -1) {
-      const alias = item.slice(asPos + 2).trim()
-      aliasMap.set(alias, item)
+      aliasMap.set(item.slice(asPos + 2).trim(), item)
     }
   }
 
-  const rewrittenCols = targetColumns.map(col => aliasMap.get(col) ?? col)
-
-  const before = trimmed.slice(0, projStart)
-  const after = trimmed.slice(fromPos)
-  return `${before} ${rewrittenCols.join(', ')}\n${after}`
+  const rewrittenProjection = targetColumns.map((column) => aliasMap.get(column) ?? column)
+  return `${trimmed.slice(0, projectionStart)} ${projectionPrefix}${rewrittenProjection.join(', ')}\n${trimmed.slice(fromPos)}`
 }
diff --git a/packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts b/packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts
new file mode 100644
index 0000000..351e6ad
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts
@@ -0,0 +1,73 @@
+import pMap from 'p-map'
+import { buildSliceFromRows } from '../partition-slices.js'
+import { estimateRows } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+import { replaceChunkRange } from '../utils/ranges.js'
+import { buildEvenlySpacedBoundaries } from './quantile-range-split.js'
+
+export const DEFAULT_OVERSAMPLING_MULTIPLIER = 3
+const ESTIMATE_CONCURRENCY = 50
+
+export async function splitSliceWithEqualWidthRanges(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  subCount: number,
+  oversamplingMultiplier: number = DEFAULT_OVERSAMPLING_MULTIPLIER,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return [slice]
+
+  const boundaries = Array.from(
+    new Set(buildEvenlySpacedBoundaries(rangeFrom, rangeTo, subCount * oversamplingMultiplier, sortKey))
+  )
+  if (boundaries.length <= 2) return [slice]
+
+  const intervals: Array<{ from: string; to: string }> = []
+  for (let index = 0; index < boundaries.length - 1; index++) {
+    const from = boundaries[index]
+    const to = boundaries[index + 1]
+    if (from === undefined || to === undefined || from === to) continue
+    intervals.push({ from, to })
+  }
+
+  const results = await pMap(
+    intervals,
+    async ({ from, to }) => {
+      const ranges = replaceChunkRange(slice, dimensionIndex, from, to)
+      const rows = await estimateRows(
+        context,
+        { partitionId: partition.partitionId, ranges },
+        sortKeys,
+      )
+      if (rows <= 0) return null
+      return buildSliceFromRows(partition, {
+        ranges,
+        rows,
+        focusedValue: slice.analysis.focusedValue,
+        confidence: context.rowProbeStrategy === 'count' ? 'exact' : 'low',
+        reason: context.rowProbeStrategy === 'count' ? 'exact-count' : 'equal-width-distribution',
+        lineage: slice.analysis.lineage.concat([
+          {
+            strategyId: 'equal-width-split',
+            dimensionIndex,
+            reason: 'fallback to equal-width ranges',
+          },
+        ]),
+      })
+    },
+    { concurrency: ESTIMATE_CONCURRENCY },
+  )
+
+  const slices = results.filter((s): s is PartitionSlice => s !== null)
+  return slices.length > 0 ? slices : [slice]
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/group-by-key-split.ts b/packages/plugin-backfill/src/chunking/strategies/group-by-key-split.ts
new file mode 100644
index 0000000..02807e8
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/group-by-key-split.ts
@@ -0,0 +1,93 @@
+import { buildSliceFromRows } from '../partition-slices.js'
+import {
+  type StringKeyBucket,
+  probeStringKeyDistribution,
+} from '../services/distribution-source.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+import { compareBinaryStrings, maxBinaryString, minBinaryString } from '../utils/binary-string.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const KEY_LIMIT = 100
+
+export async function splitSliceWithGroupByKey(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[] | undefined> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey || sortKey.category !== 'string') return undefined
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return undefined
+
+  const buckets = await probeStringKeyDistribution(
+    context,
+    slice.partitionId,
+    slice.ranges,
+    sortKey,
+    dimensionIndex,
+    sortKeys,
+    KEY_LIMIT,
+  )
+
+  if (!buckets || buckets.length === 0) return undefined
+
+  // Sort by value for range-ordered slice construction
+  const sorted = [...buckets].sort((a, b) => compareBinaryStrings(a.value, b.value))
+
+  return buildKeySlices(partition, slice, dimensionIndex, range.from, range.to, sorted)
+}
+
+function buildKeySlices(
+  partition: Partition,
+  parentSlice: PartitionSlice,
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  sortedBuckets: StringKeyBucket[],
+): PartitionSlice[] {
+  const slices: PartitionSlice[] = []
+  let cursor = rangeFrom
+
+  for (const bucket of sortedBuckets) {
+    const keyFrom = bucket.value
+    const keyTo = `${bucket.value}\0`
+
+    // Gap slice before this key (non-hot residual between keys)
+    const gapFrom = maxBinaryString(cursor, rangeFrom)
+    const gapTo = minBinaryString(keyFrom, rangeTo)
+    if (compareBinaryStrings(gapFrom, gapTo) < 0) {
+      // There's a gap — but it has zero rows in our full distribution,
+      // so we skip it (all rows are accounted for by the key buckets)
+    }
+
+    // Exact key slice
+    const sliceFrom = maxBinaryString(keyFrom, rangeFrom)
+    const sliceTo = minBinaryString(keyTo, rangeTo)
+    if (compareBinaryStrings(sliceFrom, sliceTo) < 0) {
+      slices.push(buildSliceFromRows(partition, {
+        ranges: replaceChunkRange(parentSlice, dimensionIndex, sliceFrom, sliceTo),
+        rows: bucket.rowCount,
+        focusedValue: { dimensionIndex, value: bucket.value },
+        confidence: 'high',
+        reason: 'group-by-key-distribution',
+        lineage: parentSlice.analysis.lineage.concat([{
+          strategyId: 'group-by-key-split',
+          dimensionIndex,
+          reason: 'split slice using full GROUP BY key distribution',
+        }]),
+      }))
+    }
+
+    cursor = keyTo
+  }
+
+  return slices
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts b/packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts
new file mode 100644
index 0000000..86dca54
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts
@@ -0,0 +1,6 @@
+import { buildRootSlice } from '../partition-slices.js'
+import type { Partition, PartitionSlice } from '../types.js'
+
+export function buildSingleChunkPartition(partition: Partition): PartitionSlice[] {
+  return [buildRootSlice(partition)]
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts b/packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts
new file mode 100644
index 0000000..d6558f8
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts
@@ -0,0 +1,216 @@
+import pMap from 'p-map'
+import { buildSliceFromRows } from '../partition-slices.js'
+import { estimateRows, parsePlannerDateTime } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+import { bigIntToStr, strToBigInt } from '../utils/binary-string.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const BINARY_SEARCH_STEPS = 24
+const ESTIMATE_CONCURRENCY = 50
+
+export async function splitSliceWithQuantiles(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  boundaries: string[],
+): Promise<PartitionSlice[]> {
+  const intervals: Array<{ from: string; to: string }> = []
+  for (let index = 0; index < boundaries.length - 1; index++) {
+    const from = boundaries[index]
+    const to = boundaries[index + 1]
+    if (from === undefined || to === undefined || from === to) continue
+    intervals.push({ from, to })
+  }
+
+  const results = await pMap(
+    intervals,
+    async ({ from, to }) => {
+      const ranges = replaceChunkRange(slice, dimensionIndex, from, to)
+      const rows = await estimateRows(
+        context,
+        { partitionId: partition.partitionId, ranges },
+        sortKeys,
+      )
+      if (rows <= 0) return null
+      return buildSliceFromRows(partition, {
+        ranges,
+        rows,
+        focusedValue: slice.analysis.focusedValue,
+        confidence: context.rowProbeStrategy === 'count' ? 'exact' : 'high',
+        reason: context.rowProbeStrategy === 'count' ? 'exact-count' : 'quantile-estimate',
+        lineage: slice.analysis.lineage.concat([
+          {
+            strategyId: 'quantile-range-split',
+            dimensionIndex,
+            reason: 'split slice into quantile-aligned ranges',
+          },
+        ]),
+      })
+    },
+    { concurrency: ESTIMATE_CONCURRENCY },
+  )
+
+  return results.filter((s): s is PartitionSlice => s !== null)
+}
+
+export async function findQuantileBoundaryOnDimension(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  targetCumRows: number,
+): Promise<string> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) {
+    throw new Error(`Missing sort key at dimension ${dimensionIndex}`)
+  }
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) {
+    throw new Error(`Missing range for quantile split on dimension ${dimensionIndex}`)
+  }
+
+  if (sortKey.category === 'string') {
+    return findStringBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows)
+  }
+  if (sortKey.category === 'datetime') {
+    return findDateTimeBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows)
+  }
+  return findNumericBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows)
+}
+
+export function buildEvenlySpacedBoundaries(
+  rangeFrom: string,
+  rangeTo: string,
+  subCount: number,
+  sortKey: SortKey,
+): string[] {
+  if (subCount <= 1) return [rangeFrom, rangeTo]
+
+  if (sortKey.category === 'datetime') {
+    const start = parsePlannerDateTime(rangeFrom)
+    const end = parsePlannerDateTime(rangeTo)
+    return Array.from({ length: subCount + 1 }, (_, index) =>
+      new Date(start + Math.floor(((end - start) * index) / subCount)).toISOString()
+    )
+  }
+
+  if (sortKey.category === 'numeric') {
+    const start = Number(rangeFrom)
+    const end = Number(rangeTo)
+    return Array.from({ length: subCount + 1 }, (_, index) =>
+      String(start + Math.floor(((end - start) * index) / subCount))
+    )
+  }
+
+  const width = Math.max(rangeFrom.length, rangeTo.length)
+  const start = strToBigInt(rangeFrom, width)
+  const end = strToBigInt(rangeTo, width)
+  const boundaries = Array.from({ length: subCount + 1 }, (_, index) =>
+    bigIntToStr(start + ((end - start) * BigInt(index)) / BigInt(subCount), width, width)
+  )
+  // Use original values at endpoints to avoid round-trip length changes
+  boundaries[0] = rangeFrom
+  boundaries[boundaries.length - 1] = rangeTo
+  return boundaries
+}
+
+async function findStringBoundary(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  targetCumRows: number,
+): Promise<string> {
+  const width = Math.max(rangeFrom.length, rangeTo.length)
+  let low = strToBigInt(rangeFrom, width)
+  let high = strToBigInt(rangeTo, width)
+
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = (low + high) / 2n
+    if (midpoint === low || midpoint === high) break
+
+    const mid = bigIntToStr(midpoint, width, width)
+    const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, mid)
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return bigIntToStr((low + high) / 2n, width, width)
+}
+
+async function findDateTimeBoundary(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  targetCumRows: number,
+): Promise<string> {
+  let low = parsePlannerDateTime(rangeFrom)
+  let high = parsePlannerDateTime(rangeTo)
+
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = Math.floor((low + high) / 2)
+    if (midpoint === low || midpoint === high) break
+
+    const mid = new Date(midpoint).toISOString()
+    const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, mid)
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return new Date(Math.floor((low + high) / 2)).toISOString()
+}
+
+async function findNumericBoundary(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  targetCumRows: number,
+): Promise<string> {
+  let low = Number(rangeFrom)
+  let high = Number(rangeTo)
+
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = Math.floor((low + high) / 2)
+    if (midpoint === low || midpoint === high) break
+
+    const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, String(midpoint))
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return String(Math.floor((low + high) / 2))
+}
+
+async function estimateRowsUntil(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+): Promise<number> {
+  return estimateRows(
+    context,
+    {
+      partitionId: slice.partitionId,
+      ranges: replaceChunkRange(slice, dimensionIndex, rangeFrom, rangeTo),
+    },
+    sortKeys
+  )
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/refinement.ts b/packages/plugin-backfill/src/chunking/strategies/refinement.ts
new file mode 100644
index 0000000..4d27c2a
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/refinement.ts
@@ -0,0 +1,128 @@
+import { buildSliceEstimate } from '../partition-slices.js'
+import { countRowsExact, getRowProbeStrategy } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionBuildResult,
+  PartitionDiagnostics,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+
+const ESTIMATE_RATIO_MIN = 0.7
+const ESTIMATE_RATIO_MAX = 1.3
+
+export async function refinePartitionSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slices: PartitionSlice[],
+  sortKeys: SortKey[],
+  usedDistributionFallback: boolean,
+): Promise<PartitionBuildResult> {
+  let workingSlices = slices
+  let usedLowConfidenceChunkRefinement = false
+
+  if (slices.some((slice) => slice.estimate.confidence === 'low')) {
+    workingSlices = await refineLowConfidenceSlices(context, partition, slices, sortKeys)
+    usedLowConfidenceChunkRefinement = true
+  }
+
+  const diagnostics = buildPartitionDiagnostics(
+    partition,
+    workingSlices,
+    usedDistributionFallback,
+    usedLowConfidenceChunkRefinement,
+    false
+  )
+
+  if (
+    getRowProbeStrategy(context) !== 'explain-estimate' ||
+    !diagnostics.suspiciousEstimate
+  ) {
+    return { slices: workingSlices, diagnostics }
+  }
+
+  const refinedSlices = await refineAllSlices(context, partition, workingSlices, sortKeys)
+  return {
+    slices: refinedSlices,
+    diagnostics: buildPartitionDiagnostics(
+      partition,
+      refinedSlices,
+      usedDistributionFallback,
+      usedLowConfidenceChunkRefinement,
+      true
+    ),
+  }
+}
+
+export function buildPartitionDiagnostics(
+  partition: Partition,
+  slices: PartitionSlice[],
+  usedDistributionFallback: boolean,
+  usedLowConfidenceChunkRefinement: boolean,
+  usedExactCountFallback: boolean,
+): PartitionDiagnostics {
+  const estimatedRowSum = slices.reduce((sum, slice) => sum + slice.estimate.rows, 0)
+  const estimateToExactRatio = partition.rows > 0 ? estimatedRowSum / partition.rows : 1
+
+  return {
+    estimatedRowSum,
+    exactPartitionRows: partition.rows,
+    estimateToExactRatio,
+    suspiciousEstimate:
+      estimateToExactRatio < ESTIMATE_RATIO_MIN || estimateToExactRatio > ESTIMATE_RATIO_MAX,
+    lowConfidenceChunkCount: slices.filter((slice) => slice.estimate.confidence === 'low').length,
+    usedDistributionFallback,
+    usedLowConfidenceChunkRefinement,
+    usedExactCountFallback,
+  }
+}
+
+async function refineLowConfidenceSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slices: PartitionSlice[],
+  sortKeys: SortKey[],
+): Promise<PartitionSlice[]> {
+  const refined: PartitionSlice[] = []
+
+  for (const slice of slices) {
+    if (slice.estimate.confidence !== 'low') {
+      refined.push(slice)
+      continue
+    }
+    refined.push(await refineSlice(context, partition, slice, sortKeys))
+  }
+
+  return refined
+}
+
+async function refineAllSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slices: PartitionSlice[],
+  sortKeys: SortKey[],
+): Promise<PartitionSlice[]> {
+  return Promise.all(slices.map((slice) => refineSlice(context, partition, slice, sortKeys)))
+}
+
+async function refineSlice(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+): Promise<PartitionSlice> {
+  const rows = await countRowsExact(
+    context,
+    {
+      partitionId: partition.partitionId,
+      ranges: slice.ranges,
+    },
+    sortKeys
+  )
+
+  return {
+    ...slice,
+    estimate: buildSliceEstimate(partition, rows, 'exact', 'exact-count'),
+  }
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts b/packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts
new file mode 100644
index 0000000..5c050b6
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts
@@ -0,0 +1,144 @@
+import { buildSliceFromRows } from '../partition-slices.js'
+import { probeStringPrefixDistribution } from '../services/distribution-source.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+  StringPrefixBucket,
+} from '../types.js'
+import {
+  buildObservedStringUpperBound,
+  maxBinaryString,
+  minBinaryString,
+  nextPrefixValue,
+} from '../utils/binary-string.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const TARGET_BYTES_FUZZ_FACTOR = 1.15
+const PREFIX_START_DEPTH = 1
+const PREFIX_MAX_DEPTH = 4
+
+export async function splitSliceWithStringPrefixes(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey || sortKey.category !== 'string') return []
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return []
+
+  return buildPrefixSlices(
+    context,
+    partition,
+    slice,
+    sortKeys,
+    dimensionIndex,
+    range.from,
+    range.to,
+    PREFIX_START_DEPTH
+  )
+}
+
+export function buildRootStringUpperBound(maxValue: string): string {
+  return buildObservedStringUpperBound(maxValue)
+}
+
+async function buildPrefixSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  depth: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return []
+
+  const buckets = await probeStringPrefixDistribution(
+    context,
+    partition.partitionId,
+    replaceChunkRange(slice, dimensionIndex, rangeFrom, rangeTo),
+    sortKey,
+    dimensionIndex,
+    depth,
+    sortKeys
+  )
+
+  const slices: PartitionSlice[] = []
+  for (const bucket of buckets) {
+    if (bucket.rowCount <= 0) continue
+
+    const bucketSlice = buildBucketSlice(partition, slice, dimensionIndex, rangeFrom, rangeTo, bucket)
+    if (!bucketSlice) continue
+
+    if (bucketSlice.estimate.bytesUncompressed <= context.targetChunkBytes * TARGET_BYTES_FUZZ_FACTOR) {
+      slices.push(bucketSlice)
+      continue
+    }
+
+    if (!bucket.isExactValue && depth < PREFIX_MAX_DEPTH) {
+      const bucketRange = getChunkRange(bucketSlice, dimensionIndex)
+      if (bucketRange.from !== undefined && bucketRange.to !== undefined) {
+        slices.push(
+          ...(await buildPrefixSlices(
+            context,
+            partition,
+            slice,
+            sortKeys,
+            dimensionIndex,
+            bucketRange.from,
+            bucketRange.to,
+            depth + 1
+          ))
+        )
+        continue
+      }
+    }
+
+    slices.push(bucketSlice)
+  }
+
+  return slices
+}
+
+function buildBucketSlice(
+  partition: Partition,
+  parentSlice: PartitionSlice,
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  bucket: StringPrefixBucket,
+): PartitionSlice | undefined {
+  const bucketFrom = maxBinaryString(rangeFrom, bucket.value)
+  const bucketUpper = bucket.isExactValue ? `${bucket.value}\0` : nextPrefixValue(bucket.value)
+  if (bucketUpper === undefined) return undefined
+
+  const bucketTo = minBinaryString(rangeTo, bucketUpper)
+  if (bucketFrom === bucketTo) return undefined
+
+  const focusedValue = bucket.isExactValue
+    ? { dimensionIndex, value: bucket.value }
+    : parentSlice.analysis.focusedValue
+
+  return buildSliceFromRows(partition, {
+    ranges: replaceChunkRange(parentSlice, dimensionIndex, bucketFrom, bucketTo),
+    rows: bucket.rowCount,
+    focusedValue,
+    confidence: 'high',
+    reason: 'string-prefix-distribution',
+    lineage: parentSlice.analysis.lineage.concat([
+      {
+        strategyId: 'string-prefix-split',
+        dimensionIndex,
+        reason: 'split slice using string prefix distribution',
+      },
+    ]),
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts b/packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts
new file mode 100644
index 0000000..5b83f6a
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts
@@ -0,0 +1,117 @@
+import { buildSliceFromRows, getTargetChunkRows } from '../partition-slices.js'
+import { probeTemporalDistribution } from '../services/distribution-source.js'
+import { parsePlannerDateTime } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+  TemporalBucket,
+} from '../types.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const TARGET_BYTES_FUZZ_FACTOR = 1.15
+
+export async function splitSliceWithTemporalBuckets(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const dayBuckets = await probeTemporalDistribution(
+    context,
+    partition.partitionId,
+    slice.ranges,
+    sortKeys,
+    dimensionIndex,
+    'day'
+  )
+  if (dayBuckets.length === 0) return [slice]
+
+  const daySlices = buildTemporalSlices(partition, slice, dimensionIndex, dayBuckets, context.targetChunkBytes)
+  if (daySlices.every((candidate) => candidate.estimate.bytesUncompressed <= context.targetChunkBytes * TARGET_BYTES_FUZZ_FACTOR)) {
+    return daySlices
+  }
+
+  const hourBuckets = await probeTemporalDistribution(
+    context,
+    partition.partitionId,
+    slice.ranges,
+    sortKeys,
+    dimensionIndex,
+    'hour'
+  )
+  if (hourBuckets.length === 0) return daySlices
+
+  return buildTemporalSlices(partition, slice, dimensionIndex, hourBuckets, context.targetChunkBytes)
+}
+
+export function getPartitionEndExclusive(partition: Partition): string {
+  return new Date(parsePlannerDateTime(partition.maxTime) + 1000).toISOString()
+}
+
+function buildTemporalSlices(
+  partition: Partition,
+  parentSlice: PartitionSlice,
+  dimensionIndex: number,
+  buckets: TemporalBucket[],
+  targetChunkBytes: number,
+): PartitionSlice[] {
+  const targetChunkRows = getTargetChunkRows(partition, targetChunkBytes)
+  const slices: PartitionSlice[] = []
+  let currentStart: string | undefined
+  let currentRows = 0
+  const parentRange = getChunkRange(parentSlice, dimensionIndex)
+  const sliceStart = parentRange.from
+  const sliceEnd = parentRange.to ?? getPartitionEndExclusive(partition)
+
+  for (let index = 0; index < buckets.length; index++) {
+    const bucket = buckets[index]
+    if (!bucket) continue
+
+    const bucketStart = sliceStart && bucket.start < sliceStart ? sliceStart : bucket.start
+    if (currentStart === undefined) {
+      currentStart = bucketStart
+    }
+
+    const wouldExceed = currentRows > 0 && currentRows + bucket.rowCount > targetChunkRows * TARGET_BYTES_FUZZ_FACTOR
+    if (wouldExceed && currentStart !== undefined && currentStart < bucketStart) {
+      slices.push(buildSlice(parentSlice, partition, dimensionIndex, currentStart, bucketStart, currentRows))
+      currentStart = bucketStart
+      currentRows = 0
+    }
+
+    currentRows += bucket.rowCount
+
+    if (index === buckets.length - 1 && currentStart !== undefined && currentStart < sliceEnd) {
+      slices.push(buildSlice(parentSlice, partition, dimensionIndex, currentStart, sliceEnd, currentRows))
+    }
+  }
+
+  return slices.length > 0 ? slices : [parentSlice]
+}
+
+function buildSlice(
+  parentSlice: PartitionSlice,
+  partition: Partition,
+  dimensionIndex: number,
+  from: string,
+  to: string,
+  rows: number,
+): PartitionSlice {
+  return buildSliceFromRows(partition, {
+    ranges: replaceChunkRange(parentSlice, dimensionIndex, from, to),
+    rows,
+    focusedValue: parentSlice.analysis.focusedValue,
+    confidence: 'low',
+    reason: 'temporal-distribution',
+    lineage: parentSlice.analysis.lineage.concat([
+      {
+        strategyId: 'temporal-bucket-split',
+        dimensionIndex,
+        reason: 'split slice using temporal distribution buckets',
+      },
+    ]),
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/strategy-policy.test.ts b/packages/plugin-backfill/src/chunking/strategy-policy.test.ts
new file mode 100644
index 0000000..04880b7
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategy-policy.test.ts
@@ -0,0 +1,13 @@
+import { describe, expect, test } from 'bun:test'
+
+import { getCandidateDimensions } from './strategy-policy.js'
+
+describe('getCandidateDimensions', () => {
+  test('preserves declared sort-key order regardless of type', () => {
+    expect(getCandidateDimensions([
+      { name: 'event_time', type: 'DateTime', category: 'datetime', boundaryEncoding: 'literal' },
+      { name: 'account_id', type: 'String', category: 'string', boundaryEncoding: 'hex-latin1' },
+      { name: 'seq', type: 'UInt64', category: 'numeric', boundaryEncoding: 'literal' },
+    ])).toEqual([0, 1, 2])
+  })
+})
diff --git a/packages/plugin-backfill/src/chunking/strategy-policy.ts b/packages/plugin-backfill/src/chunking/strategy-policy.ts
new file mode 100644
index 0000000..0b1a4d1
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategy-policy.ts
@@ -0,0 +1,8 @@
+import type { PartitionSlice, SortKey } from './types.js'
+
+export function getCandidateDimensions(
+  sortKeys: SortKey[],
+  _slice?: PartitionSlice,
+): number[] {
+  return sortKeys.map((_, index) => index)
+}
diff --git a/packages/plugin-backfill/src/chunking/types.ts b/packages/plugin-backfill/src/chunking/types.ts
index 0c76952..6cd7910 100644
--- a/packages/plugin-backfill/src/chunking/types.ts
+++ b/packages/plugin-backfill/src/chunking/types.ts
@@ -1,31 +1,170 @@
-export interface PartitionInfo {
+export type RowProbeStrategy = 'explain-estimate' | 'count'
+
+export type SortKeyCategory = 'numeric' | 'datetime' | 'string'
+
+export type SortKeyBoundaryEncoding = 'literal' | 'hex-latin1'
+
+export type EstimateConfidence = 'high' | 'low' | 'exact'
+
+export type EstimateReason =
+  | 'partition-metadata'
+  | 'quantile-estimate'
+  | 'string-prefix-distribution'
+  | 'group-by-key-distribution'
+  | 'temporal-distribution'
+  | 'equal-width-distribution'
+  | 'exact-count'
+
+export interface SortKey {
+  name: string
+  type: string
+  category: SortKeyCategory
+  boundaryEncoding: SortKeyBoundaryEncoding
+}
+
+export interface ChunkRange {
+  dimensionIndex: number
+  from?: string
+  to?: string
+}
+
+export interface ChunkDerivationStep {
+  strategyId: string
+  dimensionIndex?: number
+  reason: string
+}
+
+export interface ChunkEstimate {
+  rows: number
+  bytesCompressed: number
+  bytesUncompressed: number
+  confidence: EstimateConfidence
+  reason: EstimateReason
+}
+
+export interface FocusedValue {
+  dimensionIndex: number
+  value: string
+}
+
+export interface ChunkAnalysis {
+  focusedValue?: FocusedValue
+  lineage: ChunkDerivationStep[]
+}
+
+export interface Chunk {
+  id: string
+  partitionId: string
+  ranges: ChunkRange[]
+  estimate: ChunkEstimate
+  analysis: ChunkAnalysis
+}
+
+export interface PartitionDiagnostics {
+  estimatedRowSum: number
+  exactPartitionRows: number
+  estimateToExactRatio: number
+  suspiciousEstimate: boolean
+  lowConfidenceChunkCount: number
+  usedDistributionFallback: boolean
+  usedLowConfidenceChunkRefinement: boolean
+  usedExactCountFallback: boolean
+}
+
+export interface Partition {
   partitionId: string
   rows: number
-  bytesOnDisk: number
+  bytesCompressed: number
+  bytesUncompressed: number
   minTime: string
   maxTime: string
+  diagnostics?: PartitionDiagnostics
 }
 
-export interface SortKeyInfo {
-  column: string
-  type: string
-  category: 'numeric' | 'datetime' | 'string'
+export interface TableProfile {
+  database: string
+  table: string
+  sortKeys: SortKey[]
+}
+
+export interface ChunkPlanStats {
+  totalPartitions: number
+  oversizedPartitions: number
+  focusedChunks: number
+  totalChunks: number
+  avgChunkBytes: number
+  maxChunkBytes: number
+  minChunkBytes: number
+}
+
+export interface ChunkPlan {
+  planId: string
+  generatedAt: string
+  rowProbeStrategy: RowProbeStrategy
+  targetChunkBytes: number
+  table: TableProfile
+  partitions: Partition[]
+  chunks: Chunk[]
+  totalRows: number
+  totalBytesCompressed: number
+  totalBytesUncompressed: number
+  stats: ChunkPlanStats
+}
+
+export type PlannerQuery = <T>(sql: string, settings?: Record<string, string | number | boolean | undefined>) => Promise<T[]>
+
+export interface PlannerContext {
+  database: string
+  table: string
+  from?: string
+  to?: string
+  targetChunkBytes: number
+  query: PlannerQuery
+  querySettings?: Record<string, string | number | boolean | undefined>
+  rowProbeStrategy: RowProbeStrategy
 }
 
-export interface ChunkBoundary {
+export interface EstimateFilter {
   partitionId: string
-  sortKeyFrom?: string
-  sortKeyTo?: string
-  estimatedBytes: number
+  ranges: ChunkRange[]
+  exactDimensionIndex?: number
+  exactValue?: string
 }
 
-export interface PlannedChunk {
-  id: string
+export interface StringPrefixBucket {
+  value: string
+  rowCount: number
+  isExactValue: boolean
+}
+
+export interface TemporalBucket {
+  start: string
+  rowCount: number
+}
+
+export interface PartitionSlice {
   partitionId: string
-  sortKeyFrom?: string
-  sortKeyTo?: string
-  estimatedBytes: number
-  idempotencyToken: string
-  from: string
-  to: string
+  ranges: ChunkRange[]
+  estimate: ChunkEstimate
+  analysis: ChunkAnalysis
+}
+
+export interface PartitionBuildResult {
+  slices: PartitionSlice[]
+  diagnostics: PartitionDiagnostics
+}
+
+export interface PlanChunkOptions {
+  requireIdempotencyToken: boolean
+}
+
+export interface GenerateChunkPlanInput {
+  database: string
+  table: string
+  from?: string
+  to?: string
+  targetChunkBytes: number
+  query: PlannerQuery
+  querySettings?: Record<string, string | number | boolean | undefined>
+  rowProbeStrategy?: RowProbeStrategy
 }
diff --git a/packages/plugin-backfill/src/chunking/utils/binary-string.ts b/packages/plugin-backfill/src/chunking/utils/binary-string.ts
new file mode 100644
index 0000000..b264f2d
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/utils/binary-string.ts
@@ -0,0 +1,62 @@
+export function compareBinaryStrings(left: string, right: string): number {
+  return Buffer.from(left, 'latin1').compare(Buffer.from(right, 'latin1'))
+}
+
+export function minBinaryString(left: string, right: string): string {
+  return compareBinaryStrings(left, right) <= 0 ? left : right
+}
+
+export function maxBinaryString(left: string, right: string): string {
+  return compareBinaryStrings(left, right) >= 0 ? left : right
+}
+
+export function nextPrefixValue(prefix: string): string | undefined {
+  if (prefix.length === 0) return undefined
+
+  const buffer = Buffer.from(prefix, 'latin1')
+  for (let index = buffer.length - 1; index >= 0; index--) {
+    const byte = buffer[index]
+    if (byte === undefined || byte === 0xff) continue
+
+    const next = Buffer.from(buffer.subarray(0, index + 1))
+    next[index] = byte + 1
+    return next.toString('latin1')
+  }
+
+  return undefined
+}
+
+export function buildObservedStringUpperBound(maxValue: string): string {
+  return `${maxValue}\0`
+}
+
+export function strToBigInt(value: string, padTo: number): bigint {
+  const buffer = Buffer.from(value, 'latin1')
+  let result = 0n
+
+  for (let index = 0; index < padTo; index++) {
+    const byte = index < buffer.length ? (buffer[index] ?? 0) : 0
+    result = (result << 8n) | BigInt(byte)
+  }
+
+  return result
+}
+
+export function bigIntToStr(value: bigint, length: number, minLength = 0): string {
+  const buffer = Buffer.alloc(length)
+  let remaining = value
+
+  for (let index = length - 1; index >= 0; index--) {
+    buffer[index] = Number(remaining & 0xffn)
+    remaining >>= 8n
+  }
+
+  // Strip trailing null bytes so boundaries match real string values
+  // in ClickHouse comparisons (where "abc" < "abc\0"), but preserve
+  // at least minLength bytes to avoid losing meaningful trailing nulls
+  // (e.g. from buildObservedStringUpperBound which appends "\0").
+  let end = length
+  while (end > minLength && buffer[end - 1] === 0) end--
+
+  return buffer.subarray(0, end).toString('latin1')
+}
diff --git a/packages/plugin-backfill/src/chunking/utils/ids.ts b/packages/plugin-backfill/src/chunking/utils/ids.ts
new file mode 100644
index 0000000..32c0ff3
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/utils/ids.ts
@@ -0,0 +1,17 @@
+import { hashId, randomPlanId } from '../../state.js'
+
+export function generatePlanId(): string {
+  return randomPlanId()
+}
+
+export function generateChunkId(
+  planId: string,
+  partitionId: string,
+  index: number,
+): string {
+  return hashId(`chunk:${planId}:${partitionId}:${index}`).slice(0, 16)
+}
+
+export function generateIdempotencyToken(planId: string, chunkId: string): string {
+  return hashId(`token:${planId}:${chunkId}`)
+}
diff --git a/packages/plugin-backfill/src/chunking/utils/ranges.ts b/packages/plugin-backfill/src/chunking/utils/ranges.ts
new file mode 100644
index 0000000..3af1571
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/utils/ranges.ts
@@ -0,0 +1,31 @@
+import type { ChunkRange, PartitionSlice } from '../types.js'
+
+export function getChunkRange(
+  slice: Pick<PartitionSlice, 'ranges'>,
+  dimensionIndex: number,
+): ChunkRange {
+  return (
+    slice.ranges.find((range) => range.dimensionIndex === dimensionIndex) ?? {
+      dimensionIndex,
+      from: undefined,
+      to: undefined,
+    }
+  )
+}
+
+export function replaceChunkRange(
+  slice: Pick<PartitionSlice, 'ranges'>,
+  dimensionIndex: number,
+  from: string | undefined,
+  to: string | undefined,
+): ChunkRange[] {
+  return slice.ranges
+    .filter((range) => range.dimensionIndex !== dimensionIndex)
+    .concat([{ dimensionIndex, from, to }])
+    .sort((left, right) => left.dimensionIndex - right.dimensionIndex)
+}
+
+export function isExactChunkRange(range: Pick<ChunkRange, 'from' | 'to'>): boolean {
+  if (range.from === undefined || range.to === undefined) return false
+  return range.to === `${range.from}\0`
+}
diff --git a/packages/plugin-backfill/src/index.ts b/packages/plugin-backfill/src/index.ts
index 3420da7..c2c8446 100644
--- a/packages/plugin-backfill/src/index.ts
+++ b/packages/plugin-backfill/src/index.ts
@@ -1,16 +1,6 @@
 import './table-config.js'
 
 export { backfill, createBackfillPlugin } from './plugin.js'
-export { executeBackfill, syncProgress } from './async-backfill.js'
-export { analyzeAndChunk } from './chunking/analyze.js'
-export type {
-  BackfillOptions,
-  BackfillChunkState,
-  BackfillProgress,
-  BackfillResult,
-} from './async-backfill.js'
 export type { BackfillPlugin, BackfillPluginOptions, BackfillPluginRegistration } from './types.js'
 export type { PluginConfig } from './options.js'
 export type { BackfillTableConfig } from './table-config.js'
-export type { AnalyzeAndChunkInput, AnalyzeAndChunkResult } from './chunking/analyze.js'
-export type { PlannedChunk, PartitionInfo, SortKeyInfo } from './chunking/types.js'
diff --git a/packages/plugin-backfill/src/logging.ts b/packages/plugin-backfill/src/logging.ts
new file mode 100644
index 0000000..b2765a9
--- /dev/null
+++ b/packages/plugin-backfill/src/logging.ts
@@ -0,0 +1,60 @@
+import { getLogger, type Logger } from '@logtape/logtape'
+
+export const CHKIT_LOGGER_CATEGORY = ['chkit'] as const
+export const CHKIT_BACKFILL_LOGGER_CATEGORY = [...CHKIT_LOGGER_CATEGORY, 'backfill'] as const
+export const SLOW_CLICKHOUSE_QUERY_MS = 5000
+export const SLOW_CLICKHOUSE_QUERY_REPEAT_INITIAL_MS = 5000
+export const SLOW_CLICKHOUSE_QUERY_REPEAT_MAX_MS = 30000
+
+export function getBackfillLogger(...segments: string[]): Logger {
+  return getLogger([...CHKIT_BACKFILL_LOGGER_CATEGORY, ...segments])
+}
+
+export function formatBytes(bytes: number): string {
+  if (bytes >= 1024 ** 4) return `${(bytes / 1024 ** 4).toFixed(1)} TiB`
+  if (bytes >= 1024 ** 3) return `${(bytes / 1024 ** 3).toFixed(1)} GiB`
+  if (bytes >= 1024 ** 2) return `${(bytes / 1024 ** 2).toFixed(1)} MiB`
+  if (bytes >= 1024) return `${(bytes / 1024).toFixed(1)} KiB`
+  return `${bytes} B`
+}
+
+export function summarizeSql(sql: string, maxLength = 240): string {
+  const normalized = normalizeSql(sql)
+  if (normalized.length <= maxLength) return normalized
+  return `${normalized.slice(0, maxLength - 3)}...`
+}
+
+export function describeSqlOperation(sql: string): string {
+  const normalized = normalizeSql(sql)
+
+  const prefixDistribution = normalized.match(/^SELECT substring\((\w+), 1, \d+\) AS prefix, count\(\) AS cnt /)
+  if (prefixDistribution?.[1]) return `prefix distribution on ${prefixDistribution[1]}`
+
+  const temporalDistribution = normalized.match(/^SELECT formatDateTime\(toStartOf(Day|Hour)\((\w+)\)/)
+  if (temporalDistribution?.[1] && temporalDistribution[2]) {
+    return `${temporalDistribution[1].toLowerCase()} distribution on ${temporalDistribution[2]}`
+  }
+
+  const minMaxProbe = normalized.match(/^SELECT toString\(min\((\w+)\)\) AS minVal, toString\(max\(\1\)\) AS maxVal /)
+  if (minMaxProbe?.[1]) return `range probe on ${minMaxProbe[1]}`
+
+  if (normalized.startsWith('SELECT count() AS cnt FROM ')) return 'row count probe'
+  if (normalized.startsWith('SELECT sorting_key FROM system.tables')) return 'sort key introspection'
+  if (normalized.startsWith('SELECT name, type FROM system.columns')) return 'column introspection'
+  if (normalized.startsWith('SELECT partition_id,')) return 'partition introspection'
+  if (normalized.startsWith('SELECT 1 FROM ')) return 'table existence probe'
+
+  return summarizeSql(normalized, 100)
+}
+
+export function describeSqlContext(sql: string): string | undefined {
+  const normalized = normalizeSql(sql)
+  const partitionId = normalized.match(/_partition_id = '([^']+)'/)?.[1]
+
+  if (partitionId) return `partition ${partitionId}`
+  return undefined
+}
+
+function normalizeSql(sql: string): string {
+  return sql.replace(/\s+/g, ' ').trim()
+}
diff --git a/packages/plugin-backfill/src/partition-planner.test.ts b/packages/plugin-backfill/src/partition-planner.test.ts
deleted file mode 100644
index b10c8da..0000000
--- a/packages/plugin-backfill/src/partition-planner.test.ts
+++ /dev/null
@@ -1,185 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { buildChunkBoundaries } from './chunking/build.js'
-import { buildChunkSql } from './chunking/sql.js'
-import { buildPlannedChunks } from './chunking/analyze.js'
-import type { PartitionInfo, SortKeyInfo } from './types.js'
-
-const GiB = 1024 ** 3
-
-function buildChunksWithSql(input: {
-  planId: string
-  target: string
-  partitions: PartitionInfo[]
-  maxChunkBytes: number
-  sortKey?: SortKeyInfo
-  sortKeyRanges?: Map<string, { min: string; max: string }>
-  requireIdempotencyToken: boolean
-  mvAsQuery?: string
-  targetColumns?: string[]
-}) {
-  const boundaries = buildChunkBoundaries({
-    partitions: input.partitions,
-    maxChunkBytes: input.maxChunkBytes,
-    sortKey: input.sortKey,
-    sortKeyRanges: input.sortKeyRanges,
-  })
-
-  const planned = buildPlannedChunks({
-    planId: input.planId,
-    partitions: input.partitions,
-    boundaries,
-    requireIdempotencyToken: input.requireIdempotencyToken,
-  })
-
-  return planned.map(chunk => ({
-    ...chunk,
-    sqlTemplate: buildChunkSql({
-      planId: input.planId,
-      chunk,
-      target: input.target,
-      sortKey: input.sortKey,
-      mvAsQuery: input.mvAsQuery,
-      targetColumns: input.targetColumns,
-    }),
-  }))
-}
-
-describe('buildChunksWithSql', () => {
-  const basePlanId = 'abc1234567890123'
-
-  test('small partition produces one chunk with _partition_id filter only', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T23:59:59.000Z' },
-    ]
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    expect(chunks).toHaveLength(1)
-    expect(chunks[0]?.sqlTemplate).toContain("WHERE _partition_id = '202501'")
-    expect(chunks[0]?.partitionId).toBe('202501')
-    expect(chunks[0]?.estimatedBytes).toBe(5 * GiB)
-  })
-
-  test('large partition with datetime sort key produces sub-chunks with parseDateTimeBestEffort', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_time', type: 'DateTime', category: 'datetime' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '2025-01-01 00:00:00', max: '2025-01-31 00:00:00' }],
-    ])
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-      requireIdempotencyToken: true,
-    })
-
-    expect(chunks).toHaveLength(3)
-    for (const chunk of chunks) {
-      expect(chunk.sqlTemplate).toContain("WHERE _partition_id = '202501'")
-      expect(chunk.sqlTemplate).toContain('event_time >= parseDateTimeBestEffort(')
-      expect(chunk.sqlTemplate).toContain('event_time < parseDateTimeBestEffort(')
-      expect(chunk.partitionId).toBe('202501')
-    }
-  })
-
-  test('chunk IDs are deterministic for same input', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const first = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    const second = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    expect(first[0]?.id).toBe(second[0]?.id)
-    expect(first[0]?.idempotencyToken).toBe(second[0]?.idempotencyToken)
-  })
-
-  test('idempotency tokens are empty when not required', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: false,
-    })
-
-    expect(chunks[0]?.idempotencyToken).toBe('')
-    expect(chunks[0]?.sqlTemplate).not.toContain('insert_deduplication_token')
-  })
-
-  test('SQL templates include correct INSERT and SELECT structure', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    const sql = chunks[0]?.sqlTemplate ?? ''
-    expect(sql).toContain(`/* chkit backfill plan=${basePlanId}`)
-    expect(sql).toContain('INSERT INTO default.events')
-    expect(sql).toContain('SELECT *')
-    expect(sql).toContain('FROM default.events')
-    expect(sql).toContain('SETTINGS async_insert=0')
-  })
-
-  test('numeric sort key sub-chunks use direct comparison', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 20 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'id', type: 'UInt64', category: 'numeric' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '100', max: '200' }],
-    ])
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-      requireIdempotencyToken: false,
-    })
-
-    expect(chunks).toHaveLength(2)
-    expect(chunks[0]?.sqlTemplate).toContain("id >= '100'")
-    expect(chunks[0]?.sqlTemplate).toContain("id < '150'")
-    expect(chunks[0]?.sqlTemplate).not.toContain('parseDateTimeBestEffort')
-  })
-})
diff --git a/packages/plugin-backfill/src/payload.ts b/packages/plugin-backfill/src/payload.ts
index f17e096..f79d2aa 100644
--- a/packages/plugin-backfill/src/payload.ts
+++ b/packages/plugin-backfill/src/payload.ts
@@ -27,15 +27,13 @@ export function planPayload(output: BuildBackfillPlanOutput): {
     target: output.plan.target,
     from: output.plan.from,
     to: output.plan.to,
-    chunkCount: output.plan.chunks.length,
+    chunkCount: output.plan.chunkPlan.chunks.length,
     maxChunkBytes: output.plan.options.maxChunkBytes,
     sortKeyColumn: output.plan.options.sortKeyColumn,
     planPath: output.planPath,
-    strategy: output.plan.strategy,
-    partitionCount: output.plan.partitions?.length,
-    totalBytes: output.plan.partitions
-      ? output.plan.partitions.reduce((sum, p) => sum + p.bytesOnDisk, 0)
-      : undefined,
+    strategy: output.plan.execution.mode,
+    partitionCount: output.plan.chunkPlan.partitions.length,
+    totalBytes: output.plan.chunkPlan.totalBytesCompressed,
   }
 }
 
diff --git a/packages/plugin-backfill/src/planner.test.ts b/packages/plugin-backfill/src/planner.test.ts
index be8f8cb..4d83103 100644
--- a/packages/plugin-backfill/src/planner.test.ts
+++ b/packages/plugin-backfill/src/planner.test.ts
@@ -1,39 +1,52 @@
 import { describe, expect, test } from 'bun:test'
-import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'
-import { join, resolve } from 'node:path'
+import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'
+import { dirname, join, resolve } from 'node:path'
 import { tmpdir } from 'node:os'
 
 import { resolveConfig } from '@chkit/core'
 
+import { buildChunkExecutionSql, rewriteSelectColumns } from './chunking/sql.js'
+import { generateIdempotencyToken } from './chunking/utils/ids.js'
 import { PlanSchema } from './options.js'
 import { buildBackfillPlan } from './planner.js'
-import { injectSortKeyFilter, rewriteSelectColumns } from './chunking/sql.js'
-import { computeBackfillStateDir, computeEnvironmentFingerprint } from './state.js'
+import { backfillPaths, computeBackfillStateDir, readPlan } from './state.js'
 
 function createMockQuery(opts: {
-  partitions?: Array<{ partition_id: string; total_rows: string; total_bytes: string; min_time: string; max_time: string }>
+  partitions?: Array<{
+    partition_id: string
+    total_rows: string
+    total_bytes: string
+    total_uncompressed_bytes?: string
+    min_time: string
+    max_time: string
+  }>
   sortingKey?: string
-  sortKeyType?: string
-  sortKeyRanges?: Array<{ partition_id: string; min_val: string; max_val: string }>
+  columnRows?: Array<{ name: string; type: string }>
 } = {}): <T>(sql: string) => Promise<T[]> {
   const partitions = opts.partitions ?? [
-    { partition_id: '202601', total_rows: '1000', total_bytes: '500000', min_time: '2026-01-01 00:00:00', max_time: '2026-01-01 18:00:00' },
+    {
+      partition_id: '202601',
+      total_rows: '1000',
+      total_bytes: '500000',
+      total_uncompressed_bytes: '1000000',
+      min_time: '2026-01-01 00:00:00',
+      max_time: '2026-01-01 18:00:00',
+    },
   ]
   const sortingKey = opts.sortingKey ?? 'event_time'
-  const sortKeyType = opts.sortKeyType ?? 'DateTime'
-  const sortKeyRanges = opts.sortKeyRanges ?? []
+  const columnRows = opts.columnRows ?? [{ name: 'event_time', type: 'DateTime' }]
 
   return async <T>(sql: string) => {
-    if (sql.includes('system.parts')) return partitions as T[]
-    if (sql.includes('system.tables')) return [{ sorting_key: sortingKey }] as T[]
-    if (sql.includes('system.columns')) return [{ type: sortKeyType }] as T[]
-    if (sql.includes('min(') && sql.includes('max(')) return sortKeyRanges as T[]
+    if (sql.includes('SELECT 1 FROM')) return [{ ok: 1 }] as T[]
+    if (sql.includes('FROM system.parts')) return partitions as T[]
+    if (sql.includes('FROM system.tables')) return [{ sorting_key: sortingKey }] as T[]
+    if (sql.includes('FROM system.columns')) return columnRows as T[]
     return [] as T[]
   }
 }
 
 describe('@chkit/plugin-backfill planning', () => {
-  test('each plan gets a unique random id', async () => {
+  test('each plan gets a unique random id and canonical chunk plan', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
 
@@ -42,12 +55,37 @@ describe('@chkit/plugin-backfill planning', () => {
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
-      const opts = PlanSchema.parse({ target: 'app.events', from: '2026-01-01T00:00:00.000Z', to: '2026-01-01T18:00:00.000Z' })
+      const opts = PlanSchema.parse({
+        target: 'app.events',
+        from: '2026-01-01T00:00:00.000Z',
+        to: '2026-01-01T18:00:00.000Z',
+      })
       const mockQuery = createMockQuery({
         partitions: [
-          { partition_id: '202601a', total_rows: '500', total_bytes: '250000', min_time: '2026-01-01 00:00:00', max_time: '2026-01-01 06:00:00' },
-          { partition_id: '202601b', total_rows: '500', total_bytes: '250000', min_time: '2026-01-01 06:00:00', max_time: '2026-01-01 12:00:00' },
-          { partition_id: '202601c', total_rows: '500', total_bytes: '250000', min_time: '2026-01-01 12:00:00', max_time: '2026-01-01 18:00:00' },
+          {
+            partition_id: '202601a',
+            total_rows: '500',
+            total_bytes: '250000',
+            total_uncompressed_bytes: '500000',
+            min_time: '2026-01-01 00:00:00',
+            max_time: '2026-01-01 06:00:00',
+          },
+          {
+            partition_id: '202601b',
+            total_rows: '500',
+            total_bytes: '250000',
+            total_uncompressed_bytes: '500000',
+            min_time: '2026-01-01 06:00:00',
+            max_time: '2026-01-01 12:00:00',
+          },
+          {
+            partition_id: '202601c',
+            total_rows: '500',
+            total_bytes: '250000',
+            total_uncompressed_bytes: '500000',
+            min_time: '2026-01-01 12:00:00',
+            max_time: '2026-01-01 18:00:00',
+          },
         ],
       })
 
@@ -56,12 +94,24 @@ describe('@chkit/plugin-backfill planning', () => {
 
       expect(first.plan.planId).not.toBe(second.plan.planId)
       expect(first.plan.planId).toMatch(/^[a-f0-9]{16}$/)
-      expect(first.plan.chunks).toHaveLength(3)
-
-      const chunk = first.plan.chunks[0]
-      expect(chunk?.idempotencyToken.length).toBe(64)
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.events')
-      expect(chunk?.sqlTemplate).toContain(`insert_deduplication_token='${chunk?.idempotencyToken}'`)
+      expect(first.plan.chunkPlan.chunks).toHaveLength(3)
+
+      const chunk = first.plan.chunkPlan.chunks[0]
+      const token = chunk ? generateIdempotencyToken(first.plan.planId, chunk.id) : ''
+      const sql = chunk
+        ? buildChunkExecutionSql({
+          planId: first.plan.planId,
+          chunk,
+          target: first.plan.target,
+          sourceTarget: first.plan.execution.sourceTarget,
+          table: first.plan.chunkPlan.table,
+          idempotencyToken: token,
+        })
+        : ''
+
+      expect(token).toHaveLength(64)
+      expect(sql).toContain('INSERT INTO app.events')
+      expect(sql).toContain(`insert_deduplication_token='${token}'`)
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
@@ -76,22 +126,13 @@ describe('@chkit/plugin-backfill planning', () => {
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
-      const opts = PlanSchema.parse({ target: 'app.events', from: '2026-01-01T00:00:00.000Z', to: '2026-01-01T07:00:00.000Z' })
-      const mockQuery = createMockQuery({
-        partitions: [
-          { partition_id: '202601a', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 00:00:00', max_time: '2026-01-01 02:00:00' },
-          { partition_id: '202601b', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 02:00:00', max_time: '2026-01-01 04:00:00' },
-          { partition_id: '202601c', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 04:00:00', max_time: '2026-01-01 06:00:00' },
-          { partition_id: '202601d', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 06:00:00', max_time: '2026-01-01 07:00:00' },
-        ],
-      })
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
+      const opts = PlanSchema.parse({ target: 'app.events' })
+      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
 
       const raw = await readFile(output.planPath, 'utf8')
-      const persisted = JSON.parse(raw) as { planId: string; chunks: Array<{ id: string }> }
+      const persisted = JSON.parse(raw) as { planId: string; chunkPlan: { chunks: Array<{ id: string }> } }
       expect(persisted.planId).toBe(output.plan.planId)
-      expect(persisted.chunks.length).toBe(4)
+      expect(persisted.chunkPlan.chunks.length).toBe(1)
       expect(output.planPath).toContain('/plans/')
     } finally {
       await rm(dir, { recursive: true, force: true })
@@ -108,45 +149,24 @@ describe('@chkit/plugin-backfill planning', () => {
         metaDir: './chkit/meta',
       })
       const opts = PlanSchema.parse({ target: 'app.events' })
-      const mockQuery = createMockQuery({
-        sortingKey: 'session_date',
-        sortKeyType: 'Date',
+      const output = await buildBackfillPlan({
+        opts,
+        configPath,
+        config,
+        clickhouseQuery: createMockQuery({
+          sortingKey: 'session_date',
+          columnRows: [{ name: 'session_date', type: 'Date' }],
+        }),
       })
 
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.sortKey?.column).toBe('session_date')
-      expect(output.plan.sortKey?.category).toBe('datetime')
+      expect(output.plan.chunkPlan.table.sortKeys[0]?.name).toBe('session_date')
+      expect(output.plan.chunkPlan.table.sortKeys[0]?.category).toBe('datetime')
       expect(output.plan.options.sortKeyColumn).toBe('session_date')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
   })
 
-  test('chunk IDs are deterministic within a plan (derived from planId)', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
-
-      const chunkIds = output.plan.chunks.map(c => c.id)
-      const uniqueIds = new Set(chunkIds)
-      expect(uniqueIds.size).toBe(chunkIds.length)
-      for (const id of chunkIds) {
-        expect(id).toMatch(/^[a-f0-9]{16}$/)
-      }
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
   test('computes state dir from config by default and plugin override', () => {
     const config = resolveConfig({
       schema: './schema.ts',
@@ -161,7 +181,7 @@ describe('@chkit/plugin-backfill planning', () => {
     expect(overriddenDir).toBe(resolve('/tmp/project/custom-state'))
   })
 
-  test('generates MV replay SQL when schema contains materialized view', async () => {
+  test('generates MV replay execution metadata and SQL when schema contains materialized view', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
     const schemaPath = join(dir, 'schema.ts')
@@ -196,348 +216,56 @@ export const events_mv = {
         metaDir: './chkit/meta',
       })
       const opts = PlanSchema.parse({ target: 'app.events_agg' })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.strategy).toBe('mv_replay')
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.events_agg')
-      expect(chunk?.sqlTemplate).not.toContain('WITH _backfill_source AS (')
-      expect(chunk?.sqlTemplate).toContain('SELECT toStartOfHour(event_time)')
-      expect(chunk?.sqlTemplate).toContain('FROM app.events')
-      expect(chunk?.sqlTemplate).toContain('GROUP BY event_time')
-      expect(chunk?.sqlTemplate).toContain('SETTINGS async_insert=0')
-      expect(chunk?.sqlTemplate).toContain(`insert_deduplication_token='${chunk?.idempotencyToken}'`)
-      expect(chunk?.sqlTemplate).not.toContain('FROM app.events_agg')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('MV replay rewrites SELECT columns to match target table order', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-    const schemaPath = join(dir, 'schema.ts')
-
-    try {
-      await writeFile(
-        schemaPath,
-        `export const sessions = {
-  kind: 'table',
-  database: 'app',
-  name: 'session_analytics',
-  columns: [
-    { name: 'session_date', type: 'Date' },
-    { name: 'session_id', type: 'String' },
-    { name: 'skills', type: 'Array(String)' },
-    { name: 'slash_commands', type: 'Array(String)' },
-    { name: 'ingested_at', type: 'DateTime' },
-  ],
-  engine: 'MergeTree',
-  primaryKey: ['session_date'],
-  orderBy: ['session_date', 'session_id'],
-}
-export const sessions_mv = {
-  kind: 'materialized_view',
-  database: 'app',
-  name: 'sessions_mv',
-  to: { database: 'app', name: 'session_analytics' },
-  as: "SELECT *, extractAll(content, 'skill') AS skills, extractAll(content, 'cmd') AS slash_commands FROM app.raw_sessions",
-}
-`
-      )
-
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.session_analytics' })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.strategy).toBe('mv_replay')
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.session_analytics')
-      expect(chunk?.sqlTemplate).not.toContain('INSERT INTO app.session_analytics (')
-      expect(chunk?.sqlTemplate).toContain(
-        "SELECT session_date, session_id, extractAll(content, 'skill') AS skills, extractAll(content, 'cmd') AS slash_commands, ingested_at"
-      )
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('omits insert_deduplication_token when requireIdempotencyToken is false', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events', requireIdempotencyToken: false })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.idempotencyToken).toBe('')
-      expect(chunk?.sqlTemplate).toContain('SETTINGS async_insert=0')
-      expect(chunk?.sqlTemplate).not.toContain('insert_deduplication_token')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('uses partition strategy when no MV is found', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.strategy).toBe('partition')
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.events')
-      expect(chunk?.sqlTemplate).toContain('FROM app.events')
-      expect(chunk?.sqlTemplate).toContain('_partition_id')
-      expect(chunk?.sqlTemplate).toContain('SETTINGS async_insert=0')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('throws when no partitions found', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-      const mockQuery = createMockQuery({ partitions: [] })
+      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
 
-      await expect(
-        buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-      ).rejects.toThrow('No partitions found')
+      expect(output.plan.execution.mode).toBe('mv_replay')
+
+      const chunk = output.plan.chunkPlan.chunks[0]
+      const sql = chunk
+        ? buildChunkExecutionSql({
+          planId: output.plan.planId,
+          chunk,
+          target: output.plan.target,
+          sourceTarget: output.plan.execution.sourceTarget,
+          table: output.plan.chunkPlan.table,
+          mvAsQuery: output.plan.execution.mvAsQuery,
+          targetColumns: output.plan.execution.targetColumns,
+          idempotencyToken: generateIdempotencyToken(output.plan.planId, chunk.id),
+        })
+        : ''
+
+      expect(sql).toContain('INSERT INTO app.events_agg')
+      expect(sql).toContain('SELECT toStartOfHour(event_time)')
+      expect(sql).toContain('FROM app.events')
+      expect(sql).toContain('GROUP BY event_time')
+      expect(sql).toContain('SETTINGS async_insert=0')
+      expect(sql).not.toContain('FROM app.events_agg')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
   })
-})
-
-describe('rewriteSelectColumns', () => {
-  test('reorders SELECT columns to match target table order', () => {
-    const query = 'SELECT *, _foo as bar, _baz as qux FROM source WHERE status = 1'
-    const result = rewriteSelectColumns(query, ['col_a', 'bar', 'col_b', 'qux'])
-
-    expect(result).toContain('SELECT col_a, _foo as bar, col_b, _baz as qux')
-    expect(result).toContain('FROM source')
-    expect(result).toContain('WHERE status = 1')
-  })
-
-  test('preserves WITH clause when rewriting SELECT', () => {
-    const query = [
-      'WITH',
-      "  arrayDistinct(extractAll(content, '\\w+')) AS _skills,",
-      "  toUInt64(JSONExtractFloat(meta, 'input')) AS _input_tokens",
-      'SELECT *, _skills as skills, _input_tokens as input_tokens',
-      'FROM app.sessions',
-      'WHERE length(content) > 0',
-    ].join('\n')
-
-    const result = rewriteSelectColumns(query, ['session_id', 'skills', 'content', 'input_tokens'])
-
-    expect(result).toContain('arrayDistinct')
-    expect(result).toContain('_input_tokens')
-    expect(result).toContain('SELECT session_id, _skills as skills, content, _input_tokens as input_tokens')
-    expect(result).toContain('FROM app.sessions')
-    expect(result).toContain('WHERE length(content) > 0')
-  })
 
-  test('handles SELECT without star expansion', () => {
-    const query = 'SELECT toStartOfHour(event_time) AS event_time, count() AS cnt FROM events GROUP BY event_time'
-    const result = rewriteSelectColumns(query, ['cnt', 'event_time'])
+  test('MV replay rewrites SELECT columns to match target table order', () => {
+    const rewritten = rewriteSelectColumns(
+      "SELECT *, extractAll(content, 'skill') AS skills, extractAll(content, 'cmd') AS slash_commands FROM app.raw_sessions",
+      ['session_date', 'session_id', 'skills', 'slash_commands', 'ingested_at']
+    )
 
-    expect(result).toContain('SELECT count() AS cnt, toStartOfHour(event_time) AS event_time')
-    expect(result).toContain('FROM events')
-    expect(result).toContain('GROUP BY event_time')
+    expect(rewritten).toContain('SELECT session_date, session_id, extractAll(content, \'skill\') AS skills, extractAll(content, \'cmd\') AS slash_commands, ingested_at')
+    expect(rewritten).toContain('FROM app.raw_sessions')
   })
 
-  test('returns query unchanged when SELECT/FROM cannot be found', () => {
-    const query = 'INSERT INTO t VALUES (1, 2)'
-    const result = rewriteSelectColumns(query, ['a', 'b'])
+  test('MV replay preserves DISTINCT when rewriting projection columns', () => {
+    const rewritten = rewriteSelectColumns(
+      'SELECT DISTINCT event_time AS ts, user_id AS uid FROM app.events',
+      ['uid', 'ts']
+    )
 
-    expect(result).toBe(query)
+    expect(rewritten).toContain('SELECT DISTINCT user_id AS uid, event_time AS ts')
+    expect(rewritten).toContain('FROM app.events')
   })
-})
-
-describe('injectSortKeyFilter', () => {
-  const from = '2025-01-01T00:00:00.000Z'
-  const to = '2025-01-01T06:00:00.000Z'
-
-  test('injects WHERE before GROUP BY for datetime filter', () => {
-    const query = 'SELECT toStartOfHour(event_time) AS event_time, count() AS count FROM app.events GROUP BY event_time'
-    const result = injectSortKeyFilter(query, 'event_time', 'datetime', from, to)
-
-    expect(result).toContain("WHERE event_time >= parseDateTimeBestEffort('2025-01-01T00:00:00.000Z')")
-    expect(result).toContain("AND event_time < parseDateTimeBestEffort('2025-01-01T06:00:00.000Z')")
-    expect(result).toContain('GROUP BY event_time')
-    expect(result.indexOf('WHERE')).toBeLessThan(result.indexOf('GROUP BY'))
-  })
-
-  test('appends AND to existing WHERE clause', () => {
-    const query = 'SELECT * FROM app.events WHERE status = 1'
-    const result = injectSortKeyFilter(query, 'event_time', 'datetime', from, to)
-
-    expect(result).toContain('WHERE status = 1')
-    expect(result).toContain("AND event_time >= parseDateTimeBestEffort('")
-    expect(result).toContain("AND event_time < parseDateTimeBestEffort('")
-    expect(result.match(/WHERE/g)?.length).toBe(1)
-  })
-
-  test('numeric sort key uses direct comparison', () => {
-    const query = 'SELECT * FROM app.events WHERE status = 1'
-    const result = injectSortKeyFilter(query, 'id', 'numeric', '100', '200')
-
-    expect(result).toContain("AND id >= '100'")
-    expect(result).toContain("AND id < '200'")
-    expect(result).not.toContain('parseDateTimeBestEffort')
-  })
-
-  test('handles query with WHERE and QUALIFY', () => {
-    const query = [
-      'SELECT *, skills',
-      'FROM app.sessions AS s',
-      'WHERE length(timestamps) > 0',
-      "QUALIFY ROW_NUMBER() OVER (PARTITION BY s.id ORDER BY s.ts DESC) = 1",
-    ].join('\n')
-    const result = injectSortKeyFilter(query, 'session_date', 'datetime', from, to)
-
-    expect(result).toContain('WHERE length(timestamps) > 0')
-    expect(result).toContain("AND session_date >= parseDateTimeBestEffort('")
-    expect(result.indexOf('AND session_date')).toBeLessThan(result.indexOf('QUALIFY'))
-  })
-
-  test('handles MV query with WITH column expressions', () => {
-    const query = [
-      'WITH',
-      "  arrayDistinct(arrayFilter(x -> x != '', extractAll(content, '\\\\w+'))) AS _skills",
-      'SELECT',
-      '  id,',
-      '  _skills as skills,',
-      '  ts',
-      'FROM app.sessions',
-      'WHERE length(content) > 0',
-    ].join('\n')
-    const result = injectSortKeyFilter(query, 'ts', 'datetime', from, to)
-
-    expect(result.match(/WHERE/g)?.length).toBe(1)
-    expect(result).toContain("AND ts >= parseDateTimeBestEffort('")
-    expect(result).toContain('arrayDistinct')
-  })
-
-  test('injects WHERE at end when query has no WHERE and no trailing clauses', () => {
-    const query = 'SELECT * FROM app.events'
-    const result = injectSortKeyFilter(query, 'event_time', 'datetime', from, to)
-
-    expect(result).toContain("WHERE event_time >= parseDateTimeBestEffort('")
-    expect(result).toContain("AND event_time < parseDateTimeBestEffort('")
-  })
-
-  test('ignores WHERE inside parenthesized subquery', () => {
-    const query = 'SELECT * FROM (SELECT * FROM app.events WHERE inner = 1) AS sub GROUP BY id'
-    const result = injectSortKeyFilter(query, 'ts', 'datetime', from, to)
-
-    expect(result).toContain("WHERE ts >= parseDateTimeBestEffort('")
-    expect(result.indexOf("WHERE ts")).toBeLessThan(result.indexOf('GROUP BY'))
-    expect(result).toContain('WHERE inner = 1')
-  })
-})
-
-describe('computeEnvironmentFingerprint', () => {
-  test('returns undefined when clickhouse is undefined', () => {
-    expect(computeEnvironmentFingerprint(undefined)).toBeUndefined()
-  })
-
-  test('returns correct structure with fingerprint, url origin, and database', () => {
-    const env = computeEnvironmentFingerprint({
-      url: 'https://my-cluster.clickhouse.cloud:8443/some/path',
-      database: 'analytics',
-    })
 
-    expect(env).toBeDefined()
-    expect(env?.fingerprint).toMatch(/^[a-f0-9]{16}$/)
-    expect(env?.url).toBe('https://my-cluster.clickhouse.cloud:8443')
-    expect(env?.database).toBe('analytics')
-  })
-
-  test('same URL+database produces same fingerprint', () => {
-    const a = computeEnvironmentFingerprint({ url: 'https://host:8443/path', database: 'db1' })
-    const b = computeEnvironmentFingerprint({ url: 'https://host:8443/other', database: 'db1' })
-
-    expect(a?.fingerprint).toBe(b?.fingerprint)
-  })
-
-  test('different database produces different fingerprint', () => {
-    const a = computeEnvironmentFingerprint({ url: 'https://host:8443', database: 'staging' })
-    const b = computeEnvironmentFingerprint({ url: 'https://host:8443', database: 'production' })
-
-    expect(a?.fingerprint).not.toBe(b?.fingerprint)
-  })
-
-  test('different host produces different fingerprint', () => {
-    const a = computeEnvironmentFingerprint({ url: 'https://staging.ch.cloud:8443', database: 'db' })
-    const b = computeEnvironmentFingerprint({ url: 'https://prod.ch.cloud:8443', database: 'db' })
-
-    expect(a?.fingerprint).not.toBe(b?.fingerprint)
-  })
-})
-
-describe('environment binding in plan', () => {
-  test('plan includes environment when clickhouse is provided', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-
-      const output = await buildBackfillPlan({
-        opts: PlanSchema.parse({ target: 'app.events' }),
-        configPath,
-        config,
-        clickhouse: { url: 'https://my-cluster.ch.cloud:8443', database: 'analytics' },
-        clickhouseQuery: createMockQuery(),
-      })
-
-      expect(output.plan.environment).toBeDefined()
-      expect(output.plan.environment?.fingerprint).toMatch(/^[a-f0-9]{16}$/)
-      expect(output.plan.environment?.url).toBe('https://my-cluster.ch.cloud:8443')
-      expect(output.plan.environment?.database).toBe('analytics')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('plan omits environment when clickhouse connection info is not provided', async () => {
+  test('omits idempotency token when disabled', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
 
@@ -546,50 +274,57 @@ describe('environment binding in plan', () => {
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
+      const opts = PlanSchema.parse({ target: 'app.events', requireIdempotencyToken: false })
+      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
 
-      const output = await buildBackfillPlan({
-        opts: PlanSchema.parse({ target: 'app.events' }),
-        configPath,
-        config,
-        clickhouseQuery: createMockQuery(),
-      })
-
-      expect(output.plan.environment).toBeUndefined()
+      const chunk = output.plan.chunkPlan.chunks[0]
+      const sql = chunk
+        ? buildChunkExecutionSql({
+          planId: output.plan.planId,
+          chunk,
+          target: output.plan.target,
+          sourceTarget: output.plan.execution.sourceTarget,
+          table: output.plan.chunkPlan.table,
+          idempotencyToken: '',
+        })
+        : ''
+
+      expect(output.plan.execution.requireIdempotencyToken).toBe(false)
+      expect(sql).toContain('SETTINGS async_insert=0')
+      expect(sql).not.toContain('insert_deduplication_token')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
   })
 
-  test('plan includes environment from different clickhouse configs', async () => {
+  test('rejects persisted legacy plans with an actionable error', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
+    const planId = 'deadbeefdeadbeef'
 
     try {
       const config = resolveConfig({
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-
-      const staging = await buildBackfillPlan({
-        opts,
-        configPath,
-        config,
-        clickhouse: { url: 'https://staging.ch.cloud:8443', database: 'analytics' },
-        clickhouseQuery: createMockQuery(),
-      })
-
-      const production = await buildBackfillPlan({
-        opts,
+      const stateDir = computeBackfillStateDir(config, configPath)
+      const { planPath } = backfillPaths(stateDir, planId)
+      await mkdir(dirname(planPath), { recursive: true })
+
+      await writeFile(planPath, JSON.stringify({
+        planId,
+        target: 'app.events',
+        createdAt: '2026-01-01T00:00:00.000Z',
+        from: '2026-01-01T00:00:00.000Z',
+        to: '2026-01-01T01:00:00.000Z',
+        chunks: [],
+      }))
+
+      await expect(readPlan({
+        planId,
         configPath,
         config,
-        clickhouse: { url: 'https://prod.ch.cloud:8443', database: 'analytics' },
-        clickhouseQuery: createMockQuery(),
-      })
-
-      expect(staging.plan.environment?.url).toBe('https://staging.ch.cloud:8443')
-      expect(production.plan.environment?.url).toBe('https://prod.ch.cloud:8443')
-      expect(staging.plan.environment?.fingerprint).not.toBe(production.plan.environment?.fingerprint)
+      })).rejects.toThrow('uses a previous chunking format')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
diff --git a/packages/plugin-backfill/src/planner.ts b/packages/plugin-backfill/src/planner.ts
index 6f24e02..055c1b2 100644
--- a/packages/plugin-backfill/src/planner.ts
+++ b/packages/plugin-backfill/src/planner.ts
@@ -1,10 +1,10 @@
 import { dirname } from 'node:path'
 
-import { loadSchemaDefinitions } from '@chkit/core/schema-loader'
 import type { ResolvedChxConfig } from '@chkit/core'
+import { loadSchemaDefinitions } from '@chkit/core/schema-loader'
 
-import { analyzeAndChunk } from './chunking/analyze.js'
-import { buildChunkSql } from './chunking/sql.js'
+import { encodeChunkPlanForPersistence } from './chunking/boundary-codec.js'
+import { generateChunkPlan } from './chunking/planner.js'
 import { findMvForTarget } from './detect.js'
 import { BackfillConfigError } from './errors.js'
 import type { PlanOptions } from './options.js'
@@ -12,20 +12,18 @@ import {
   backfillPaths,
   computeBackfillStateDir,
   computeEnvironmentFingerprint,
+  nowIso,
   writeJson,
 } from './state.js'
-import type {
-  BackfillChunk,
-  BuildBackfillPlanOutput,
-  PartitionInfo,
-} from './types.js'
+import type { BuildBackfillPlanOutput } from './types.js'
 
 export async function buildBackfillPlan(input: {
   opts: PlanOptions
   configPath: string
   config: Pick<ResolvedChxConfig, 'metaDir' | 'schema'>
   clickhouse?: { url: string; database: string }
-  clickhouseQuery: <T>(sql: string) => Promise<T[]>
+  clickhouseQuery: <T>(sql: string, settings?: Record<string, string | number | boolean | undefined>) => Promise<T[]>
+  querySettings?: Record<string, string | number | boolean | undefined>
 }): Promise<BuildBackfillPlanOutput> {
   const { opts } = input
   const [database, table] = opts.target.split('.')
@@ -33,33 +31,36 @@ export async function buildBackfillPlan(input: {
     throw new BackfillConfigError('Invalid target format. Expected <database.table>.')
   }
 
-  const env = computeEnvironmentFingerprint(input.clickhouse)
-
-  // 1. Analyze table and build planned chunks
-  const { planId, partitions, sortKey, chunks: plannedChunks } = await analyzeAndChunk({
+  const chunkPlan = await generateChunkPlan({
     database,
     table,
     from: opts.from,
     to: opts.to,
-    maxChunkBytes: opts.maxChunkBytes,
-    requireIdempotencyToken: opts.requireIdempotencyToken,
+    targetChunkBytes: opts.maxChunkBytes,
     query: input.clickhouseQuery,
+    querySettings: input.querySettings,
   })
 
-  if (partitions.length === 0) {
+  const firstPartition = chunkPlan.partitions[0]
+  if (!firstPartition) {
     throw new BackfillConfigError(
       `No partitions found for ${opts.target}${opts.from || opts.to ? ' within the specified time range' : ''}. The table may be empty.`
     )
   }
 
-  const firstPartition = partitions[0] as PartitionInfo
-  const derivedFrom = opts.from ?? partitions.reduce((min, p) => (p.minTime < min ? p.minTime : min), firstPartition.minTime)
-  const derivedTo = opts.to ?? partitions.reduce((max, p) => (p.maxTime > max ? p.maxTime : max), firstPartition.maxTime)
+  const env = computeEnvironmentFingerprint(input.clickhouse)
+  const derivedFrom = opts.from ?? chunkPlan.partitions.reduce(
+    (min, partition) => (partition.minTime < min ? partition.minTime : min),
+    firstPartition.minTime
+  )
+  const derivedTo = opts.to ?? chunkPlan.partitions.reduce(
+    (max, partition) => (partition.maxTime > max ? partition.maxTime : max),
+    firstPartition.maxTime
+  )
 
   const stateDir = computeBackfillStateDir(input.config, input.configPath, opts.stateDir)
-  const paths = backfillPaths(stateDir, planId)
+  const paths = backfillPaths(stateDir, chunkPlan.planId)
 
-  // 2. Detect MV for replay strategy
   let mvAsQuery: string | undefined
   let targetColumns: string[] | undefined
 
@@ -71,62 +72,37 @@ export async function buildBackfillPlan(input: {
     if (mv) {
       mvAsQuery = mv.as
       const tableDef = definitions.find(
-        (d) => d.kind === 'table' && d.database === database && d.name === table
+        (definition) => definition.kind === 'table' && definition.database === database && definition.name === table
       )
-      if (tableDef && tableDef.kind === 'table') {
-        targetColumns = tableDef.columns.map((c) => c.name)
+      if (tableDef?.kind === 'table') {
+        targetColumns = tableDef.columns.map((column) => column.name)
       }
     }
   } catch {
-    // Schema load failed — fall back to direct copy
+    // Schema load failed, fall back to direct copy.
   }
 
-  // 3. Stamp SQL on each planned chunk to produce BackfillChunk[]
-  const chunks: BackfillChunk[] = plannedChunks.map(planned => {
-    const sqlTemplate = buildChunkSql({
-      planId,
-      chunk: planned,
-      target: opts.target,
-      sortKey,
-      mvAsQuery,
-      targetColumns,
-    })
-
-    return {
-      id: planned.id,
-      from: planned.from,
-      to: planned.to,
-      status: 'pending' as const,
-      attempts: 0,
-      idempotencyToken: planned.idempotencyToken,
-      sqlTemplate,
-      partitionId: planned.partitionId,
-      estimatedBytes: planned.estimatedBytes,
-      ...(planned.sortKeyFrom !== undefined ? { sortKeyFrom: planned.sortKeyFrom } : {}),
-      ...(planned.sortKeyTo !== undefined ? { sortKeyTo: planned.sortKeyTo } : {}),
-    }
-  })
-
-  const strategy = mvAsQuery ? 'mv_replay' : 'partition'
-
   const plan = {
-    planId,
+    planId: chunkPlan.planId,
     target: opts.target,
-    createdAt: '1970-01-01T00:00:00.000Z',
-    status: 'planned' as const,
-    strategy: strategy as 'partition' | 'mv_replay',
+    createdAt: nowIso(),
     ...(env ? { environment: env } : {}),
     from: derivedFrom,
     to: derivedTo,
-    chunks,
-    partitions,
-    sortKey,
+    chunkPlan,
+    execution: {
+      mode: mvAsQuery ? 'mv_replay' as const : 'copy' as const,
+      sourceTarget: opts.target,
+      ...(mvAsQuery ? { mvAsQuery } : {}),
+      ...(targetColumns ? { targetColumns } : {}),
+      requireIdempotencyToken: opts.requireIdempotencyToken,
+    },
     options: {
       maxChunkBytes: opts.maxChunkBytes,
       maxParallelChunks: opts.maxParallelChunks,
       maxRetriesPerChunk: opts.maxRetriesPerChunk,
       requireIdempotencyToken: opts.requireIdempotencyToken,
-      sortKeyColumn: sortKey?.column,
+      sortKeyColumn: chunkPlan.table.sortKeys[0]?.name,
     },
     policy: {
       requireDryRunBeforeRun: opts.requireDryRunBeforeRun,
@@ -140,7 +116,10 @@ export async function buildBackfillPlan(input: {
     },
   }
 
-  await writeJson(paths.planPath, plan)
+  await writeJson(paths.planPath, {
+    ...plan,
+    chunkPlan: encodeChunkPlanForPersistence(plan.chunkPlan),
+  })
 
   return {
     plan,
diff --git a/packages/plugin-backfill/src/plugin.test.ts b/packages/plugin-backfill/src/plugin.test.ts
index 275fe78..b84c98b 100644
--- a/packages/plugin-backfill/src/plugin.test.ts
+++ b/packages/plugin-backfill/src/plugin.test.ts
@@ -1,7 +1,16 @@
 import { describe, expect, test } from 'bun:test'
+import { readFileSync } from 'node:fs'
 
+import * as sdk from './sdk.js'
+import * as root from './index.js'
 import { backfill, createBackfillPlugin } from './plugin.js'
 
+const pluginBackfillPackage = JSON.parse(
+  readFileSync(new URL('../package.json', import.meta.url), 'utf8')
+) as {
+  exports: Record<string, { source: string; types: string; default: string }>
+}
+
 describe('@chkit/plugin-backfill plugin surface', () => {
   test('exposes commands and typed registration helper', () => {
     const plugin = createBackfillPlugin()
@@ -21,4 +30,34 @@ describe('@chkit/plugin-backfill plugin surface', () => {
     expect(registration.enabled).toBe(true)
     expect(registration.options?.maxParallelChunks).toBe(4)
   })
+
+  test('keeps internals off the package root and exposes them via sdk', () => {
+    expect(root).not.toHaveProperty('analyzeAndChunk')
+    expect(root).not.toHaveProperty('executeBackfill')
+
+    expect(sdk).toHaveProperty('analyzeAndChunk')
+    expect(sdk).toHaveProperty('configureSync')
+    expect(sdk).toHaveProperty('generateChunkPlan')
+    expect(sdk).toHaveProperty('getBackfillLogger')
+    expect(sdk).toHaveProperty('getConsoleSink')
+    expect(sdk).toHaveProperty('executeBackfill')
+    expect(sdk).toHaveProperty('buildChunkExecutionSql')
+    expect(sdk).toHaveProperty('buildWhereClauseFromChunk')
+    expect(sdk).toHaveProperty('encodeChunkPlanForPersistence')
+    expect(sdk).toHaveProperty('decodeChunkPlanFromPersistence')
+    expect(sdk).toHaveProperty('generateIdempotencyToken')
+  })
+
+  test('package exports declare root and sdk subpath separately', () => {
+    expect(pluginBackfillPackage.exports['.']).toEqual({
+      source: './src/index.ts',
+      types: './dist/index.d.ts',
+      default: './dist/index.js',
+    })
+    expect(pluginBackfillPackage.exports['./sdk']).toEqual({
+      source: './src/sdk.ts',
+      types: './dist/sdk.d.ts',
+      default: './dist/sdk.js',
+    })
+  })
 })
diff --git a/packages/plugin-backfill/src/plugin.ts b/packages/plugin-backfill/src/plugin.ts
index 53079d8..449c58c 100644
--- a/packages/plugin-backfill/src/plugin.ts
+++ b/packages/plugin-backfill/src/plugin.ts
@@ -2,6 +2,8 @@ import { createClickHouseExecutor } from '@chkit/clickhouse'
 import { wrapPluginRun } from '@chkit/core'
 
 import { executeBackfill, type BackfillProgress } from './async-backfill.js'
+import { buildChunkExecutionSql } from './chunking/sql.js'
+import { generateIdempotencyToken } from './chunking/utils/ids.js'
 import { BackfillConfigError } from './errors.js'
 import {
   PLAN_FLAGS,
@@ -112,11 +114,22 @@ async function runBackfill(input: {
     const result = await executeBackfill({
       executor: db,
       planId: plan.planId,
-      chunks: plan.chunks.map((c) => ({ id: c.id, from: c.from, to: c.to })),
+      chunks: plan.chunkPlan.chunks.map((chunk) => ({ id: chunk.id })),
       buildQuery: (chunk) => {
-        const planChunk = plan.chunks.find((c) => c.id === chunk.id)
+        const planChunk = plan.chunkPlan.chunks.find((candidate) => candidate.id === chunk.id)
         if (!planChunk) throw new Error(`Chunk ${chunk.id} not found in plan`)
-        return planChunk.sqlTemplate
+        return buildChunkExecutionSql({
+          planId: plan.planId,
+          chunk: planChunk,
+          target: plan.target,
+          sourceTarget: plan.execution.sourceTarget,
+          table: plan.chunkPlan.table,
+          mvAsQuery: plan.execution.mvAsQuery,
+          targetColumns: plan.execution.targetColumns,
+          idempotencyToken: plan.execution.requireIdempotencyToken
+            ? generateIdempotencyToken(plan.planId, planChunk.id)
+            : '',
+        })
       },
       concurrency: input.concurrency,
       pollIntervalMs: input.pollIntervalMs,
@@ -205,22 +218,25 @@ export function createBackfillPlugin(options: PluginConfig = {}): BackfillPlugin
                   configPath: context.configPath,
                   config: context.config,
                   clickhouse: context.config.clickhouse,
-                  clickhouseQuery: async <T>(sql: string) => {
-                    const result = await db.query(sql)
+                  clickhouseQuery: async <T>(sql: string, settings?: Record<string, string | number | boolean | undefined>) => {
+                    const result = await db.query(sql, settings)
                     return result as T[]
                   },
+                  // ObsessionDB (ClickHouse Cloud) enables parallel replicas by default,
+                  // which inflates aggregate results (count, GROUP BY). Disable for planning
+                  // queries until ObsessionDB handles it at the profile level.
+                  querySettings: { enable_parallel_replicas: 0 },
                 })
 
                 const payload = planPayload(output)
                 if (context.jsonMode) {
                   context.print(payload)
                 } else {
-                  const partitionCount = output.plan.partitions?.length ?? 0
-                  const totalBytes = output.plan.partitions
-                    ? formatBytes(output.plan.partitions.reduce((sum, p) => sum + p.bytesOnDisk, 0))
-                    : 'unknown'
-                  const sortKeyLabel = output.plan.sortKey
-                    ? `, sort key: ${output.plan.sortKey.column} (${output.plan.sortKey.category})`
+                  const partitionCount = output.plan.chunkPlan.partitions.length
+                  const totalBytes = formatBytes(output.plan.chunkPlan.totalBytesCompressed)
+                  const primarySortKey = output.plan.chunkPlan.table.sortKeys[0]
+                  const sortKeyLabel = primarySortKey
+                    ? `, sort key: ${primarySortKey.name} (${primarySortKey.category})`
                     : ''
                   context.print(
                     `Backfill plan ${payload.planId} for ${payload.target} (${payload.chunkCount} chunks across ${partitionCount} partitions, ~${totalBytes}${sortKeyLabel}) -> ${payload.planPath}`
diff --git a/packages/plugin-backfill/src/queries.ts b/packages/plugin-backfill/src/queries.ts
index 66780ef..ffdef75 100644
--- a/packages/plugin-backfill/src/queries.ts
+++ b/packages/plugin-backfill/src/queries.ts
@@ -35,8 +35,8 @@ export async function getBackfillStatus(input: {
       target: plan.target,
       status: 'planned',
       totals: {
-        total: plan.chunks.length,
-        pending: plan.chunks.length,
+        total: plan.chunkPlan.chunks.length,
+        pending: plan.chunkPlan.chunks.length,
         submitted: 0,
         running: 0,
         done: 0,
@@ -108,7 +108,7 @@ export async function getBackfillDoctorReport(input: {
         planId: plan.planId,
         target: plan.target,
         status: 'planned' as const,
-        totals: { total: plan.chunks.length, pending: plan.chunks.length, submitted: 0, running: 0, done: 0, failed: 0 },
+        totals: { total: plan.chunkPlan.chunks.length, pending: plan.chunkPlan.chunks.length, submitted: 0, running: 0, done: 0, failed: 0 },
         rowsWritten: 0,
         updatedAt: plan.createdAt,
         runPath: paths.runPath,
diff --git a/packages/plugin-backfill/src/sdk.ts b/packages/plugin-backfill/src/sdk.ts
new file mode 100644
index 0000000..99dcbf8
--- /dev/null
+++ b/packages/plugin-backfill/src/sdk.ts
@@ -0,0 +1,48 @@
+export * from '@logtape/logtape'
+
+export { executeBackfill, syncProgress } from './async-backfill.js'
+export { analyzeAndChunk, analyzeTable } from './chunking/analyze.js'
+export {
+  decodeChunkPlanFromPersistence,
+  encodeChunkPlanForPersistence,
+} from './chunking/boundary-codec.js'
+export { generateChunkPlan } from './chunking/planner.js'
+export {
+  CHKIT_BACKFILL_LOGGER_CATEGORY,
+  CHKIT_LOGGER_CATEGORY,
+  getBackfillLogger,
+} from './logging.js'
+export {
+  buildChunkExecutionSql,
+  buildWhereClauseFromChunk,
+  injectSortKeyFilter,
+  rewriteSelectColumns,
+} from './chunking/sql.js'
+export { generateIdempotencyToken } from './chunking/utils/ids.js'
+
+export type {
+  BackfillOptions,
+  BackfillChunkState,
+  BackfillProgress,
+  BackfillResult,
+} from './async-backfill.js'
+
+export type {
+  AnalyzeAndChunkInput,
+  AnalyzeAndChunkResult,
+  AnalyzeTableInput,
+  AnalyzeTableResult,
+} from './chunking/analyze.js'
+
+export type {
+  Chunk,
+  ChunkDerivationStep,
+  ChunkPlan,
+  ChunkRange,
+  EstimateConfidence,
+  EstimateReason,
+  FocusedValue,
+  Partition,
+  PartitionDiagnostics,
+  SortKey,
+} from './chunking/types.js'
diff --git a/packages/plugin-backfill/src/state.ts b/packages/plugin-backfill/src/state.ts
index 45dd900..2b47d52 100644
--- a/packages/plugin-backfill/src/state.ts
+++ b/packages/plugin-backfill/src/state.ts
@@ -5,6 +5,7 @@ import { dirname, join, resolve } from 'node:path'
 
 import type { ResolvedChxConfig } from '@chkit/core'
 
+import { decodeChunkPlanFromPersistence } from './chunking/boundary-codec.js'
 import { BackfillConfigError } from './errors.js'
 import type {
   BackfillEnvironment,
@@ -89,6 +90,13 @@ async function readJsonMaybe<T>(filePath: string): Promise<T | null> {
   return JSON.parse(await readFile(filePath, 'utf8')) as T
 }
 
+function decodePlan(plan: BackfillPlanState): BackfillPlanState {
+  return {
+    ...plan,
+    chunkPlan: decodeChunkPlanFromPersistence(plan.chunkPlan),
+  }
+}
+
 export async function writeJson(filePath: string, value: unknown): Promise<void> {
   await mkdir(dirname(filePath), { recursive: true })
   await writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8')
@@ -102,12 +110,21 @@ export async function readPlan(input: {
 }): Promise<ReadPlanOutput> {
   const stateDir = computeBackfillStateDir(input.config, input.configPath, input.stateDir)
   const paths = backfillPaths(stateDir, input.planId)
-  const plan = await readJsonMaybe<BackfillPlanState>(paths.planPath)
-  if (!plan) {
+  const rawPlan = await readJsonMaybe<Record<string, unknown>>(paths.planPath)
+  if (!rawPlan) {
     throw new BackfillConfigError(`Backfill plan not found: ${paths.planPath}`)
   }
+
+  if (!('chunkPlan' in rawPlan)) {
+    throw new BackfillConfigError(
+      `Backfill plan ${input.planId} uses a previous chunking format and can no longer be loaded. Recreate the plan.`
+    )
+  }
+
+  const plan = rawPlan as unknown as BackfillPlanState
+
   return {
-    plan,
+    plan: decodePlan(plan),
     planPath: paths.planPath,
     stateDir,
   }
@@ -132,7 +149,7 @@ export function summarizeRunStatus(
   plan: BackfillPlanState,
 ): BackfillStatusSummary {
   const totals = {
-    total: plan.chunks.length,
+    total: plan.chunkPlan.chunks.length,
     pending: 0,
     submitted: 0,
     running: 0,
@@ -141,7 +158,7 @@ export function summarizeRunStatus(
   }
 
   let rowsWritten = 0
-  for (const chunk of plan.chunks) {
+  for (const chunk of plan.chunkPlan.chunks) {
     const state = run.progress[chunk.id]
     if (!state) {
       totals.pending += 1
diff --git a/packages/plugin-backfill/src/types.ts b/packages/plugin-backfill/src/types.ts
index f3b50da..4f02865 100644
--- a/packages/plugin-backfill/src/types.ts
+++ b/packages/plugin-backfill/src/types.ts
@@ -1,7 +1,9 @@
 import type { ChxInlinePluginRegistration, ResolvedChxConfig } from '@chkit/core'
 
 import type { BackfillProgress } from './async-backfill.js'
-import type { PartitionInfo, SortKeyInfo } from './chunking/types.js'
+import type {
+  ChunkPlan,
+} from './chunking/types.js'
 import type { PluginConfig } from './options.js'
 
 /** @deprecated Use {@link PluginConfig} instead. */
@@ -16,42 +18,41 @@ export interface BackfillEnvironment {
 
 export type BackfillPlanStatus = 'planned' | 'running' | 'paused' | 'completed' | 'failed' | 'cancelled'
 
-export type { ChunkBoundary, PartitionInfo, PlannedChunk, SortKeyInfo } from './chunking/types.js'
-
-export interface BackfillChunk {
-  id: string
-  from: string
-  to: string
-  status: 'pending' | 'running' | 'done' | 'failed' | 'skipped'
-  attempts: number
-  idempotencyToken: string
-  sqlTemplate: string
-  lastError?: string
-  partitionId: string
-  estimatedBytes: number
-  sortKeyFrom?: string
-  sortKeyTo?: string
+export type {
+  Chunk,
+  ChunkDerivationStep,
+  ChunkPlan,
+  ChunkRange,
+  EstimateConfidence,
+  EstimateReason,
+  FocusedValue,
+  Partition,
+  PartitionDiagnostics,
+  SortKey,
+} from './chunking/types.js'
+
+export interface BackfillExecutionPlan {
+  mode: 'copy' | 'mv_replay'
+  sourceTarget: string
+  mvAsQuery?: string
+  targetColumns?: string[]
+  requireIdempotencyToken: boolean
 }
 
 export interface BackfillPlanState {
   planId: string
   target: string
   createdAt: string
-  status: BackfillPlanStatus
-  strategy?: 'table' | 'mv_replay' | 'partition'
   environment?: BackfillEnvironment
   from: string
   to: string
-  chunks: BackfillChunk[]
-  partitions?: PartitionInfo[]
-  sortKey?: SortKeyInfo
+  chunkPlan: ChunkPlan
+  execution: BackfillExecutionPlan
   options: {
-    chunkHours?: number
     maxChunkBytes?: number
     maxParallelChunks: number
     maxRetriesPerChunk: number
     requireIdempotencyToken: boolean
-    timeColumn?: string
     sortKeyColumn?: string
   }
   policy: {
diff --git a/packages/plugin-obsessiondb/package.json b/packages/plugin-obsessiondb/package.json
index 3e4eee5..01b459a 100644
--- a/packages/plugin-obsessiondb/package.json
+++ b/packages/plugin-obsessiondb/package.json
@@ -41,6 +41,7 @@
     "clean": "rm -rf dist"
   },
   "dependencies": {
+    "@chkit/clickhouse": "workspace:*",
     "@chkit/core": "workspace:*",
     "@orpc/client": "1.13.4",
     "@orpc/contract": "1.13.4",
diff --git a/packages/plugin-obsessiondb/src/index.ts b/packages/plugin-obsessiondb/src/index.ts
index 77efd28..d17c6bd 100644
--- a/packages/plugin-obsessiondb/src/index.ts
+++ b/packages/plugin-obsessiondb/src/index.ts
@@ -12,6 +12,10 @@ import { loadSelectedService } from './service/storage.js'
 
 export { loadCredentials, resolveBaseUrl, type Credentials } from './auth/index.js'
 export { createJobsClient, type JobsClient } from './backfill/index.js'
+export {
+  loadSelectedService,
+} from './service/storage.js'
+export type { SelectedService } from './service/types.js'
 
 export type ObsessionDBPluginOptions = Record<string, never>
 
diff --git a/thoughts/smart-chunking-e2e-scenarios.md b/thoughts/smart-chunking-e2e-scenarios.md
new file mode 100644
index 0000000..d165593
--- /dev/null
+++ b/thoughts/smart-chunking-e2e-scenarios.md
@@ -0,0 +1,194 @@
+# Smart Chunking E2E Test Scenarios
+
+Remaining scenarios to implement. Each gets its own table in the seed script and a `describe` block in `smart-chunking.e2e.test.ts`.
+
+Implemented so far:
+- [x] Scenario 1: Skewed Power Law (80/20 single hot key)
+- [x] Scenario 2: Multiple Hot Keys (3 tenants at ~30% each)
+
+---
+
+## Scenario 3: Empty Ranges / Sparse Numeric Sort Key
+
+**Table:** `chkit_e2e_chunking_sparse_numeric`
+**Sort key:** `(id UInt64)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- ~5,000 rows with `id` in range `[1, 10]`
+- ~5,000 rows with `id` in range `[1_000_000, 1_000_010]`
+- No values between 10 and 1,000,000
+- Padding column for byte control
+
+**What this tests:**
+- Equal-width splitting will carve the huge numeric gap into many empty intervals
+- Quantile binary search must handle the gap without producing empty chunks
+- The system should not emit chunks with 0 rows
+- After merge, only chunks covering the two clusters should remain
+- Full row coverage despite the sparse distribution
+
+**Key assertions:**
+- No chunk has 0 estimated rows
+- All chunks produced have `estimate.rows > 0`
+- Total counted rows = total actual rows
+- Chunk count is reasonable (not dozens of empty chunks)
+
+---
+
+## Scenario 4: Single Distinct Value in Sort Key
+
+**Table:** `chkit_e2e_chunking_single_value`
+**Sort key:** `(status String, seq UInt64)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- 10,000 rows all with `status = 'active'`, `seq` 0-9999
+- Single partition, padding for byte volume
+
+**What this tests:**
+- Every splitting strategy on dimension 0 should fail (quantile boundaries collapse, equal-width produces identical bounds, group-by-key returns 1 value)
+- The system must fall through to dimension 1 (seq) and split there
+- Or: produce a single chunk if seq splitting isn't needed
+- Must not infinite-loop or error when no split is possible on dim 0
+
+**Key assertions:**
+- Plan completes without error
+- If partition is oversized: chunks are split on dim 1 (seq), not dim 0
+- Total counted rows = total actual rows
+- No duplicate coverage
+
+---
+
+## Scenario 5: Very Long String Keys with Shared Prefixes
+
+**Table:** `chkit_e2e_chunking_long_prefix`
+**Sort key:** `(url String)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- 10,000 rows where `url` follows pattern: `https://example.com/api/v2/resources/XXXX`
+  where `XXXX` is a 4-digit incrementing ID (0000-9999)
+- All values share a 39-character prefix; differ only in the last 4 characters
+- Single partition
+
+**What this tests:**
+- `string-prefix-split` at depths 1-4 will see a single bucket (prefix is 39 chars)
+- The system must fall through to quantile or equal-width splitting
+- The dynamic BigInt width (from our fix) must handle 40+ char strings correctly
+- Boundary computation must have enough precision in the suffix to split evenly
+
+**Key assertions:**
+- Plan completes, produces multiple chunks
+- Chunks have boundaries that differentiate in the suffix portion
+- Full row coverage
+- No chunks with 0 rows (the long shared prefix shouldn't confuse the splitter)
+
+---
+
+## Scenario 6: DateTime Sort Key with Burst Traffic
+
+**Table:** `chkit_e2e_chunking_datetime_burst`
+**Sort key:** `(event_time DateTime)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- 500 rows spread across 30 days of January 2026 (background traffic)
+- 9,500 rows all within a single hour: `2026-01-15 14:00:00` to `2026-01-15 14:59:59`
+- Single partition, padding for byte volume
+
+**What this tests:**
+- Day-level temporal bucketing produces one massive day and many tiny ones
+- Hour-level fallback kicks in for Jan 15
+- If 95% is within one hour, even hour-level bucketing can't split further
+- Must fall through to quantile splitting on the datetime dimension itself
+- Tests the full temporal cascade: day -> hour -> quantile
+
+**Key assertions:**
+- Plan completes, produces multiple chunks
+- The burst hour is split into multiple chunks (not left as one oversized chunk)
+- Background traffic days are merged into larger chunks (not 30 tiny chunks)
+- Full row coverage
+- Reasonable chunk sizes (within 2-3x target)
+
+---
+
+## Scenario 7: Three-Dimension Compound Key
+
+**Table:** `chkit_e2e_chunking_three_dim`
+**Sort key:** `(region String, tenant_id String, event_time DateTime)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- 5 regions: `us-east`, `us-west`, `eu-west`, `ap-south`, `ap-east`
+- Per region: 1 hot tenant with 1,500 rows + 10 small tenants with 10 rows each
+- Hot tenant rows spread across 7 days in January 2026
+- Total: 5 * (1500 + 100) = 8,000 rows
+
+**What this tests:**
+- Recursion through 3 dimensions (max depth = 3 * 3 = 9)
+- Dimension 0 (region) splits into ~5 sub-ranges
+- Dimension 1 (tenant_id) identifies hot tenant per region
+- Dimension 2 (event_time) splits hot tenants by time
+- Final chunks should carry ranges on all three dimensions
+
+**Key assertions:**
+- Plan completes within timeout
+- Hot tenants are detected as focused values
+- Some chunks have ranges on all 3 dimensions
+- Full row coverage
+- Chunk count is reasonable (not exponential blowup)
+
+---
+
+## Scenario 8: Partition at Exact Fuzz Factor Boundary
+
+**Table:** `chkit_e2e_chunking_fuzz_boundary`
+**Sort key:** `(id UInt64)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- Two partitions (January and February 2026)
+- January: rows sized to be exactly at `targetChunkBytes * 1.0`
+- February: rows sized to be exactly at `targetChunkBytes * 1.6` (above the 1.5x fuzz factor)
+- Controlled via row count and padding size
+
+**What this tests:**
+- The stop condition `<= target * 1.5`
+- January partition (1.0x) should produce exactly 1 chunk
+- February partition (1.6x) should be split into 2+ chunks
+- Boundary arithmetic of the fuzz factor
+
+**Key assertions:**
+- January partition: exactly 1 chunk
+- February partition: 2+ chunks
+- Full row coverage in both partitions
+
+**Implementation note:** This requires querying `system.parts` after seeding to learn the actual uncompressed bytes, then computing the target from the smaller partition's size. The seed might need iterative adjustment to hit the right byte ratio.
+
+---
+
+## Scenario 9: Mixed Type Sort Keys (Numeric + String)
+
+**Table:** `chkit_e2e_chunking_mixed_types`
+**Sort key:** `(priority UInt8, slug String)`
+**Partition by:** `toYYYYMM(event_time)`
+
+**Dataset:**
+- `priority` has 3 distinct values: 1, 2, 3
+- Priority 1: 1,000 rows with 100 distinct slugs
+- Priority 2: 6,000 rows with 50 distinct slugs (hot priority)
+- Priority 3: 3,000 rows with 200 distinct slugs
+- Slugs are short strings like `item-XXXX`
+
+**What this tests:**
+- Numeric dimension with very low cardinality (3 values)
+- Quantile splitting will likely collapse on dim 0 (only 3 values)
+- Equal-width on dim 0 should produce 3 intervals matching the 3 values
+- Oversized priority-2 bucket must then split on dim 1 (slug)
+- Tests cross-type dimension interaction
+
+**Key assertions:**
+- All three priorities are represented in chunks
+- Priority 2 chunks are split on the slug dimension
+- Full row coverage
+- No chunks span multiple priority values (each chunk's dim 0 range should be tight)