diff --git a/.config/nextest.toml b/.config/nextest.toml index 478e1fa70..9016789f5 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,38 +3,119 @@ retries = 3 test-threads = 15 +[test-groups] +# Keep these known noisy shared-cluster/perf tests from overlapping with each +# other, while still allowing unrelated tests to use the remaining default +# concurrency budget. +stateful-heavy = { max-threads = 1 } + [[profile.default.overrides]] filter = 'test(test_scenario_08_subscription_reconnect)' slow-timeout = { period = "60s", terminate-after = 2 } [[profile.default.overrides]] filter = 'test(test_setup_complete_environment)' -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(idle_autocommit_transaction_checks_add_no_extra_allocations)' # This perf/allocation regression is stable in isolation but noisy under full workspace contention. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(autocommit_read_write_latency_regression_stays_within_five_percent)' # This perf regression compares two nearly identical code paths and should run in isolation. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(e2e_perf_sequential_insert_100)' # This PG perf check is stable in isolation but noisy under full-suite contention. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(e2e_perf_cross_verify_latency)' # This latency check is sensitive to concurrent heavy tests and should run in isolation. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(e2e_perf_local_memory_stays_bounded_under_batch_insert_and_scan)' # This memory-bound perf check is meaningful only without competing suite load. -threads-required = 15 +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_http_consume_direct_multi_user_publishers_no_missing_changes)' +# These topic smoke tests are stable in isolation but can contend under full workspace load. +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_http_consume_preserves_impersonated_user_and_payloads)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_consume_option_matrix_start_batch_auto_ack_modes)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_high_load_two_consumers_same_group_single_delivery)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_four_consumers_same_group_no_duplicates)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_ack_failure_recovery_no_message_loss_with_latency)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_fan_out_different_groups_receive_all)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_event_counter_integrity_through_multiple_outages)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_gradual_latency_ramp_forces_reconnect_then_recovers)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_heavy_write_burst_during_outage_all_delivered)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_large_initial_snapshot_survives_repeated_outages)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_latency_spike_during_initial_snapshot_recovers)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_proxy_server_down_during_live_updates_resumes)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_loading_snapshot_with_live_writes_resumes_without_duplicate_rows)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_proxy_three_subscriptions_resume_after_server_bounce)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_shared_connection_recovers_subscriptions_in_different_stages)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_cli_syntax_error_handling)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_connection_timeout_option)' +# Pure options test, but full-suite CLI subprocess load can make nextest assign leaked output +# handles here; keep it isolated from subprocess-heavy tests. +test-group = "stateful-heavy" [profile.ci] # Store test results in JUnit format diff --git a/Cargo.lock b/Cargo.lock index f68dbcf35..688d9ded2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1161,9 +1161,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.60" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ "find-msvc-tools", "jobserver", @@ -2281,9 +2281,9 @@ dependencies = [ [[package]] name = "datafusion-functions-json" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75184ea8f2a291183525f40f8b86e2e596655fa052b7d9ab23af97b19e332c31" +checksum = "13ff70cb2c1960f03ba647aa2813fb1efba4c33bc221d973dd4a462a6376359a" dependencies = [ "datafusion", "jiter", @@ -3787,9 +3787,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.95" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" dependencies = [ "cfg-if", "futures-util", @@ -3814,7 +3814,7 @@ dependencies = [ [[package]] name = "kalam-cli" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "anyhow", "assert_cmd", @@ -3856,7 +3856,7 @@ dependencies = [ [[package]] name = "kalam-client" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "kalamdb-configs", "kalamdb-server", @@ -3870,14 +3870,14 @@ dependencies = [ [[package]] name = "kalam-consumer" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "kalam-client", ] [[package]] name = "kalam-consumer-wasm" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "base64", "js-sys", @@ -3891,7 +3891,7 @@ dependencies = [ [[package]] name = "kalam-link-dart" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "anyhow", "flutter_rust_bridge", @@ -3903,7 +3903,7 @@ dependencies = [ [[package]] name = "kalam-pg-api" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -3915,7 +3915,7 @@ dependencies = [ [[package]] name = "kalam-pg-client" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "arrow-ipc", @@ -3924,6 +3924,7 @@ dependencies = [ "bytes", "kalam-pg-api", "kalam-pg-common", + "kalamdb-commons", "kalamdb-pg", "ntest", "serde_json", @@ -3934,7 +3935,7 @@ dependencies = [ [[package]] name = "kalam-pg-common" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "datafusion-common", "serde", @@ -3943,7 +3944,7 @@ dependencies = [ [[package]] name = "kalam-pg-extension" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -3974,7 +3975,7 @@ dependencies = [ [[package]] name = "kalam-pg-fdw" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "datafusion-common", "kalam-pg-api", @@ -3985,7 +3986,7 @@ dependencies = [ [[package]] name = "kalam-pg-types" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "kalam-pg-common", "kalamdb-commons", @@ -3993,7 +3994,7 @@ dependencies = [ [[package]] name = "kalamdb-api" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "actix-files", "actix-multipart", @@ -4036,7 +4037,7 @@ dependencies = [ [[package]] name = "kalamdb-auth" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "actix-web", "anyhow", @@ -4066,7 +4067,7 @@ dependencies = [ [[package]] name = "kalamdb-commons" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "arrow-schema", @@ -4093,7 +4094,7 @@ dependencies = [ [[package]] name = "kalamdb-configs" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "anyhow", "ipnet", @@ -4104,7 +4105,7 @@ dependencies = [ [[package]] name = "kalamdb-core" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "anyhow", "arrow", @@ -4160,7 +4161,7 @@ dependencies = [ [[package]] name = "kalamdb-datafusion-sources" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "arrow-schema", @@ -4178,7 +4179,7 @@ dependencies = [ [[package]] name = "kalamdb-dba" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "chrono", "datafusion", @@ -4196,7 +4197,7 @@ dependencies = [ [[package]] name = "kalamdb-dialect" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "anyhow", "arrow", @@ -4214,7 +4215,7 @@ dependencies = [ [[package]] name = "kalamdb-filestore" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "bytes", @@ -4245,7 +4246,7 @@ dependencies = [ [[package]] name = "kalamdb-handlers" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "kalamdb-commons", "kalamdb-core", @@ -4260,7 +4261,7 @@ dependencies = [ [[package]] name = "kalamdb-handlers-admin" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "chrono", @@ -4280,7 +4281,7 @@ dependencies = [ [[package]] name = "kalamdb-handlers-ddl" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "chrono", @@ -4304,7 +4305,7 @@ dependencies = [ [[package]] name = "kalamdb-handlers-stream" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "chrono", "datafusion", @@ -4321,7 +4322,7 @@ dependencies = [ [[package]] name = "kalamdb-handlers-support" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "chrono", "datafusion", @@ -4338,7 +4339,7 @@ dependencies = [ [[package]] name = "kalamdb-handlers-user" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "chrono", "kalamdb-auth", @@ -4354,7 +4355,7 @@ dependencies = [ [[package]] name = "kalamdb-jobs" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "async-trait", "chrono", @@ -4380,7 +4381,7 @@ dependencies = [ [[package]] name = "kalamdb-live" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -4408,7 +4409,7 @@ dependencies = [ [[package]] name = "kalamdb-macros" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "proc-macro2", "quote", @@ -4417,7 +4418,7 @@ dependencies = [ [[package]] name = "kalamdb-observability" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "cc", "chrono", @@ -4430,7 +4431,7 @@ dependencies = [ [[package]] name = "kalamdb-pg" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "arrow-ipc", @@ -4457,7 +4458,7 @@ dependencies = [ [[package]] name = "kalamdb-plan-cache" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "datafusion", "kalamdb-commons", @@ -4466,7 +4467,7 @@ dependencies = [ [[package]] name = "kalamdb-publisher" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "chrono", "dashmap 6.1.0", @@ -4483,7 +4484,7 @@ dependencies = [ [[package]] name = "kalamdb-raft" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "async-trait", "chrono", @@ -4519,7 +4520,7 @@ dependencies = [ [[package]] name = "kalamdb-server" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "actix-cors", "actix-web", @@ -4579,7 +4580,7 @@ dependencies = [ [[package]] name = "kalamdb-server-auth" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "log", "rcgen", @@ -4589,7 +4590,7 @@ dependencies = [ [[package]] name = "kalamdb-session" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "kalamdb-commons", "tokio", @@ -4597,7 +4598,7 @@ dependencies = [ [[package]] name = "kalamdb-session-datafusion" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -4610,7 +4611,7 @@ dependencies = [ [[package]] name = "kalamdb-sharding" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "kalamdb-commons", "kalamdb-configs", @@ -4619,7 +4620,7 @@ dependencies = [ [[package]] name = "kalamdb-store" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "anyhow", "async-trait", @@ -4641,7 +4642,7 @@ dependencies = [ [[package]] name = "kalamdb-streams" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "chrono", "dashmap 6.1.0", @@ -4655,7 +4656,7 @@ dependencies = [ [[package]] name = "kalamdb-system" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -4680,7 +4681,7 @@ dependencies = [ [[package]] name = "kalamdb-tables" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -4713,20 +4714,19 @@ dependencies = [ [[package]] name = "kalamdb-transactions" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "datafusion", "futures-util", "kalamdb-commons", "kalamdb-datafusion-sources", "serde", - "serde_json", "tokio", ] [[package]] name = "kalamdb-vector" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "async-trait", "bytes", @@ -4748,7 +4748,7 @@ dependencies = [ [[package]] name = "kalamdb-views" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "arrow", "async-trait", @@ -4842,9 +4842,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libflate" @@ -4888,9 +4888,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libmimalloc-sys" -version = "0.1.46" +version = "0.1.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc89deee4af0429081d2a518c0431ae068222a5a262a3bc6ff4d8535ec2e02fe" +checksum = "2d1eacfa31c33ec25e873c136ba5669f00f9866d0688bea7be4d3f7e43067df6" dependencies = [ "cc", "cty", @@ -4931,7 +4931,7 @@ dependencies = [ [[package]] name = "link-common" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "aws-lc-rs", "base64", @@ -6679,13 +6679,13 @@ dependencies = [ [[package]] name = "rpassword" -version = "7.4.0" +version = "7.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66d4c8b64f049c6721ec8ccec37ddfc3d641c4a7fca57e8f2a89de509c73df39" +checksum = "2501c67132bd19c3005b0111fba298907ef002c8c1cf68e25634707e38bf66fe" dependencies = [ "libc", "rtoolbox", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7605,9 +7605,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.51.1" +version = "1.52.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" +checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6" dependencies = [ "bytes", "libc", @@ -8291,9 +8291,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" dependencies = [ "cfg-if", "once_cell", @@ -8304,9 +8304,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.68" +version = "0.4.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8" +checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" dependencies = [ "js-sys", "wasm-bindgen", @@ -8314,9 +8314,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8324,9 +8324,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" dependencies = [ "bumpalo", "proc-macro2", @@ -8337,9 +8337,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.118" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" dependencies = [ "unicode-ident", ] @@ -8406,9 +8406,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.95" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" dependencies = [ "js-sys", "wasm-bindgen", @@ -9095,9 +9095,9 @@ dependencies = [ [[package]] name = "zip" -version = "8.5.1" +version = "8.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcab981e19633ebcf0b001ddd37dd802996098bc1864f90b7c5d970ce76c1d59" +checksum = "2d04a6b5381502aa6087c94c669499eb1602eb9c5e8198e534de571f7154809b" dependencies = [ "crc32fast", "flate2", diff --git a/Cargo.toml b/Cargo.toml index 7a8662c68..195c77c13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,7 +52,7 @@ members = [ exclude = ["benchv2"] [workspace.package] -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" edition = "2021" rust-version = "1.92" authors = ["KalamDB Team"] @@ -73,7 +73,7 @@ anyhow = "1.0.102" log = "0.4.29" # Async runtime -tokio = { version = "1.51.1", features = ["rt-multi-thread", "macros", "sync", "time", "fs", "io-util", "io-std", "net", "signal", "parking_lot"] } +tokio = { version = "1.52.1", features = ["rt-multi-thread", "macros", "sync", "time", "fs", "io-util", "io-std", "net", "signal", "parking_lot"] } tokio-stream = { version = "0.1", features = ["net"] } # HTTP client (with HTTP/2 support) - using rustls-tls for cross-compilation compatibility @@ -109,7 +109,7 @@ datafusion = { version = "53.1.0", default-features = false, features = ["sql", datafusion-datasource = { version = "53.1.0", default-features = false } datafusion-common = { version = "53.1.0", default-features = false } datafusion-expr = { version = "53.1.0" } -datafusion-functions-json = { version = "0.53.0" } +datafusion-functions-json = { version = "0.53.1" } sqlparser = { version = "0.61.0" } parquet = { version = "58.1.0", default-features = false, features = ["snap", "zstd", "arrow", "async"] } @@ -180,7 +180,7 @@ cookie = "0.18" colored = "3.1.1" fern = "0.7" indicatif = "0.18.4" -rpassword = "7.3" +rpassword = "7.5.1" dirs = "6.0" term_size = "0.3" crossterm = "0.29.0" @@ -197,7 +197,7 @@ dashmap = "6.1" bytes = "1.11.1" http-body = "1.0.0" http-body-util = "0.1.2" -cc = "1.2.60" +cc = "1.2.61" proc-macro2 = "1.0.106" quote = "1.0.44" syn = { version = "2.0.117", features = ["full", "extra-traits"] } @@ -217,17 +217,17 @@ storekey = "0.11" moka = { version = "0.12.15", features = ["future", "sync"] } ntest = "0.9.5" ipnet = "2.11.0" -wasm-bindgen = { version = "0.2.118" } -wasm-bindgen-futures = { version = "0.4.68" } -js-sys = { version = "0.3.95" } -libc = "0.2.185" -libmimalloc-sys = { version = "0.1.46", features = ["extended"] } -web-sys = { version = "0.3.95" } +wasm-bindgen = { version = "0.2.120" } +wasm-bindgen-futures = { version = "0.4.70" } +js-sys = { version = "0.3.97" } +libc = "0.2.186" +libmimalloc-sys = { version = "0.1.47", features = ["extended"] } +web-sys = { version = "0.3.97" } tsify = { version = "0.5.6", default-features = false, features = ["js"] } serde-wasm-bindgen = "0.6.5" flate2 = "1.1.9" tar = "0.4" -zip = { version = "8.5.1", default-features = false, features = ["deflate"] } +zip = { version = "8.6.0", default-features = false, features = ["deflate"] } miniz_oxide = "0.9.1" flutter_rust_bridge = "=2.12.0" rust-embed = { version = "8.11.0", features = ["compression", "include-exclude"] } diff --git a/backend/build.rs b/backend/build.rs index b8164120f..97c0fc83b 100644 --- a/backend/build.rs +++ b/backend/build.rs @@ -3,9 +3,11 @@ // - Falls back to version.toml if git is not available (e.g., Docker builds) // - Builds the Admin UI for kalamdb-api release builds (must run before rust-embed) -use std::fs; -use std::path::{Path, PathBuf}; -use std::process::Command; +use std::{ + fs, + path::{Path, PathBuf}, + process::Command, +}; fn main() { let package_name = std::env::var("CARGO_PKG_NAME").unwrap_or_default(); @@ -129,7 +131,8 @@ fn build_ui_if_release(repo_root: &Path) { let index_file = dist_dir.join("index.html"); if !index_file.exists() { panic!( - "SKIP_UI_BUILD is set but ui/dist/index.html does not exist! Build UI first or unset SKIP_UI_BUILD." + "SKIP_UI_BUILD is set but ui/dist/index.html does not exist! Build UI first or \ + unset SKIP_UI_BUILD." ); } return; @@ -156,7 +159,8 @@ fn build_ui_if_release(repo_root: &Path) { println!("cargo:warning=Building UI for release..."); - // Use isolated cargo dirs for nested wasm-pack to avoid lock contention with the outer cargo build. + // Use isolated cargo dirs for nested wasm-pack to avoid lock contention with the outer cargo + // build. let isolated_target = repo_root.join("ui").join(".cargo-target"); let isolated_home = repo_root.join("ui").join(".cargo-home"); @@ -184,13 +188,15 @@ fn build_ui_if_release(repo_root: &Path) { Ok(status) if status.success() => {}, Ok(status) => { panic!( - "npm install failed with status: {} - UI dependencies are required for release builds!", + "npm install failed with status: {} - UI dependencies are required for \ + release builds!", status ); }, Err(e) => { panic!( - "Failed to run npm install: {} - UI dependencies are required for release builds!", + "Failed to run npm install: {} - UI dependencies are required for release \ + builds!", e ); }, diff --git a/backend/crates/kalamdb-api/src/http/auth/audit.rs b/backend/crates/kalamdb-api/src/http/auth/audit.rs index bbdd4b771..a0fe9c791 100644 --- a/backend/crates/kalamdb-api/src/http/auth/audit.rs +++ b/backend/crates/kalamdb-api/src/http/auth/audit.rs @@ -1,9 +1,12 @@ +use std::sync::Arc; + use chrono::Utc; -use kalamdb_commons::models::{AuditLogId, ConnectionInfo}; -use kalamdb_commons::UserId; +use kalamdb_commons::{ + models::{AuditLogId, ConnectionInfo}, + UserId, +}; use kalamdb_core::app_context::AppContext; use kalamdb_system::AuditLogEntry; -use std::sync::Arc; use uuid::Uuid; pub(crate) async fn record_admin_login( diff --git a/backend/crates/kalamdb-api/src/http/auth/login.rs b/backend/crates/kalamdb-api/src/http/auth/login.rs index 20e14ced3..3cfccac04 100644 --- a/backend/crates/kalamdb-api/src/http/auth/login.rs +++ b/backend/crates/kalamdb-api/src/http/auth/login.rs @@ -2,23 +2,25 @@ //! //! POST /v1/api/auth/login - Authenticates a user and returns JWT tokens +use std::sync::Arc; + use actix_web::{web, HttpRequest, HttpResponse}; use chrono::{Duration, Utc}; -use kalamdb_auth::providers::jwt_auth::create_and_sign_refresh_token; use kalamdb_auth::{ authenticate, create_and_sign_token, create_auth_cookie, create_refresh_cookie, - extract_client_ip_secure, AuthRequest, CookieConfig, UserRepository, + extract_client_ip_secure, providers::jwt_auth::create_and_sign_refresh_token, AuthRequest, + CookieConfig, UserRepository, }; use kalamdb_commons::Role; use kalamdb_configs::AuthSettings; use kalamdb_core::app_context::AppContext; -use std::sync::Arc; +use kalamdb_jobs::health_monitor::record_activity_now; -use super::audit; -use super::map_auth_error_to_response; -use super::models::{AuthErrorResponse, LoginRequest, LoginResponse, UserInfo}; +use super::{ + audit, map_auth_error_to_response, + models::{AuthErrorResponse, LoginRequest, LoginResponse, UserInfo}, +}; use crate::limiter::RateLimiter; -use kalamdb_jobs::health_monitor::record_activity_now; /// POST /v1/api/auth/login /// diff --git a/backend/crates/kalamdb-api/src/http/auth/me.rs b/backend/crates/kalamdb-api/src/http/auth/me.rs index fe0b3fb95..813218af8 100644 --- a/backend/crates/kalamdb-api/src/http/auth/me.rs +++ b/backend/crates/kalamdb-api/src/http/auth/me.rs @@ -2,13 +2,16 @@ //! //! GET /v1/api/auth/me - Returns information about the currently authenticated user +use std::sync::Arc; + use actix_web::{web, HttpRequest, HttpResponse}; use kalamdb_auth::{authenticate, extract_client_ip_secure, AuthRequest, UserRepository}; use kalamdb_commons::Role; -use std::sync::Arc; -use super::models::{CurrentUserResponse, UserInfo}; -use super::{extract_bearer_or_cookie_token, map_auth_error_to_response}; +use super::{ + extract_bearer_or_cookie_token, map_auth_error_to_response, + models::{CurrentUserResponse, UserInfo}, +}; /// GET /v1/api/auth/me /// diff --git a/backend/crates/kalamdb-api/src/http/auth/mod.rs b/backend/crates/kalamdb-api/src/http/auth/mod.rs index 5ee420056..facb939ae 100644 --- a/backend/crates/kalamdb-api/src/http/auth/mod.rs +++ b/backend/crates/kalamdb-api/src/http/auth/mod.rs @@ -19,17 +19,15 @@ mod me; mod refresh; mod setup; +use actix_web::{HttpRequest, HttpResponse}; +use kalamdb_auth::{extract_auth_token, extract_refresh_token, AuthError}; pub(crate) use login::login_handler; pub(crate) use logout::logout_handler; pub(crate) use me::me_handler; +use models::AuthErrorResponse; pub(crate) use refresh::refresh_handler; pub(crate) use setup::{server_setup_handler, setup_status_handler}; -use actix_web::HttpRequest; -use actix_web::HttpResponse; -use kalamdb_auth::{extract_auth_token, extract_refresh_token, AuthError}; -use models::AuthErrorResponse; - /// Map authentication errors to HTTP responses /// /// Uses generic error messages to prevent user enumeration attacks. @@ -141,11 +139,12 @@ pub(crate) fn extract_refresh_or_bearer_token(req: &HttpRequest) -> Result Value { let body = to_bytes(response.into_body()) .await diff --git a/backend/crates/kalamdb-api/src/http/auth/models/login_response.rs b/backend/crates/kalamdb-api/src/http/auth/models/login_response.rs index 3e90e986d..9cd4cad5c 100644 --- a/backend/crates/kalamdb-api/src/http/auth/models/login_response.rs +++ b/backend/crates/kalamdb-api/src/http/auth/models/login_response.rs @@ -1,8 +1,9 @@ //! Login response model -use super::UserInfo; use serde::Serialize; +use super::UserInfo; + /// Login response body #[derive(Debug, Serialize)] pub struct LoginResponse { diff --git a/backend/crates/kalamdb-api/src/http/auth/models/mod.rs b/backend/crates/kalamdb-api/src/http/auth/models/mod.rs index dbc341b3c..16c0ecebe 100644 --- a/backend/crates/kalamdb-api/src/http/auth/models/mod.rs +++ b/backend/crates/kalamdb-api/src/http/auth/models/mod.rs @@ -14,5 +14,4 @@ pub use login_request::LoginRequest; pub use login_response::LoginResponse; pub use setup_request::ServerSetupRequest; pub use setup_response::ServerSetupResponse; -pub use user_info::CurrentUserResponse; -pub use user_info::UserInfo; +pub use user_info::{CurrentUserResponse, UserInfo}; diff --git a/backend/crates/kalamdb-api/src/http/auth/models/setup_request.rs b/backend/crates/kalamdb-api/src/http/auth/models/setup_request.rs index 7b3496a0b..41eca51b8 100644 --- a/backend/crates/kalamdb-api/src/http/auth/models/setup_request.rs +++ b/backend/crates/kalamdb-api/src/http/auth/models/setup_request.rs @@ -1,8 +1,9 @@ //! Server setup request model -use super::login_request::{validate_password_length, validate_user_length}; use serde::Deserialize; +use super::login_request::{validate_password_length, validate_user_length}; + /// Server setup request body #[derive(Debug, Deserialize)] #[serde(deny_unknown_fields)] diff --git a/backend/crates/kalamdb-api/src/http/auth/models/setup_response.rs b/backend/crates/kalamdb-api/src/http/auth/models/setup_response.rs index 8e4c4a2a1..b297d4715 100644 --- a/backend/crates/kalamdb-api/src/http/auth/models/setup_response.rs +++ b/backend/crates/kalamdb-api/src/http/auth/models/setup_response.rs @@ -1,8 +1,9 @@ //! Server setup response model -use super::UserInfo; use serde::Serialize; +use super::UserInfo; + /// Server setup response body #[derive(Debug, Serialize)] pub struct ServerSetupResponse { diff --git a/backend/crates/kalamdb-api/src/http/auth/models/user_info.rs b/backend/crates/kalamdb-api/src/http/auth/models/user_info.rs index 97df35fd5..1adbba80a 100644 --- a/backend/crates/kalamdb-api/src/http/auth/models/user_info.rs +++ b/backend/crates/kalamdb-api/src/http/auth/models/user_info.rs @@ -1,7 +1,6 @@ //! User info model -use kalamdb_commons::models::UserId; -use kalamdb_commons::Role; +use kalamdb_commons::{models::UserId, Role}; use serde::Serialize; /// User info returned in authentication responses diff --git a/backend/crates/kalamdb-api/src/http/auth/refresh.rs b/backend/crates/kalamdb-api/src/http/auth/refresh.rs index e55164bf4..a7568ff87 100644 --- a/backend/crates/kalamdb-api/src/http/auth/refresh.rs +++ b/backend/crates/kalamdb-api/src/http/auth/refresh.rs @@ -2,20 +2,21 @@ //! //! POST /v1/api/auth/refresh - Refreshes the JWT token if still valid +use std::sync::Arc; + use actix_web::{web, HttpRequest, HttpResponse}; use chrono::{Duration, Utc}; -use kalamdb_auth::providers::jwt_auth::{ - create_and_sign_refresh_token, validate_jwt_token, TokenType, -}; use kalamdb_auth::{ create_and_sign_token, create_auth_cookie, create_refresh_cookie, extract_client_ip_secure, + providers::jwt_auth::{create_and_sign_refresh_token, validate_jwt_token, TokenType}, CookieConfig, UserRepository, }; use kalamdb_configs::AuthSettings; -use std::sync::Arc; -use super::models::{AuthErrorResponse, LoginResponse, UserInfo}; -use super::{extract_refresh_or_bearer_token, map_auth_error_to_response}; +use super::{ + extract_refresh_or_bearer_token, map_auth_error_to_response, + models::{AuthErrorResponse, LoginResponse, UserInfo}, +}; use crate::limiter::RateLimiter; /// POST /v1/api/auth/refresh @@ -168,9 +169,10 @@ pub async fn refresh_handler( #[cfg(test)] mod tests { - use super::*; use kalamdb_auth::providers::jwt_auth::JwtClaims; + use super::*; + #[test] fn refresh_endpoint_only_accepts_refresh_token_type() { let now = chrono::Utc::now().timestamp() as usize; diff --git a/backend/crates/kalamdb-api/src/http/auth/setup.rs b/backend/crates/kalamdb-api/src/http/auth/setup.rs index 95234163a..fd5b1a2b3 100644 --- a/backend/crates/kalamdb-api/src/http/auth/setup.rs +++ b/backend/crates/kalamdb-api/src/http/auth/setup.rs @@ -3,6 +3,8 @@ //! POST /v1/api/auth/setup - Initial server setup (localhost only) //! GET /v1/api/auth/status - Check server setup status +use std::sync::Arc; + use actix_web::{web, HttpRequest, HttpResponse}; use kalamdb_auth::{ errors::error::AuthError, @@ -10,12 +12,12 @@ use kalamdb_auth::{ security::password::{hash_password, validate_password}, UserRepository, }; -use kalamdb_commons::models::{StorageId, UserId}; -use kalamdb_commons::{AuthType, Role}; +use kalamdb_commons::{ + models::{StorageId, UserId}, + AuthType, Role, +}; use kalamdb_configs::AuthSettings; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::User; -use std::sync::Arc; +use kalamdb_system::{providers::storages::models::StorageMode, User}; use super::models::{AuthErrorResponse, ServerSetupRequest, ServerSetupResponse, UserInfo}; use crate::limiter::RateLimiter; @@ -186,13 +188,15 @@ pub async fn server_setup_handler( match user_repo.get_user_by_id(&dba_user_id).await { Ok(existing_user) => { log::info!( - "Server setup raced with another caller; reusing existing DBA user '{}'", + "Server setup raced with another caller; reusing existing DBA user \ + '{}'", dba_user_id.as_str() ); return HttpResponse::Ok().json(build_setup_response( &existing_user, format!( - "Server setup already completed for DBA user '{}'. Please login to continue.", + "Server setup already completed for DBA user '{}'. Please login \ + to continue.", existing_user.user_id.as_str() ), )); @@ -224,7 +228,8 @@ pub async fn server_setup_handler( HttpResponse::Ok().json(build_setup_response( &dba_user, format!( - "Server setup complete. Root password configured and DBA user '{}' created. Please login to continue.", + "Server setup complete. Root password configured and DBA user '{}' created. Please \ + login to continue.", dba_user_id.as_str() ), )) diff --git a/backend/crates/kalamdb-api/src/http/cluster/health.rs b/backend/crates/kalamdb-api/src/http/cluster/health.rs index ba6c37ec1..7e71fa138 100644 --- a/backend/crates/kalamdb-api/src/http/cluster/health.rs +++ b/backend/crates/kalamdb-api/src/http/cluster/health.rs @@ -1,11 +1,14 @@ //! Cluster health endpoint handler +use std::sync::Arc; + use actix_web::{web, HttpRequest, HttpResponse}; use kalamdb_auth::extract_client_ip_secure; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::metrics::{BUILD_DATE, SERVER_VERSION}; +use kalamdb_core::{ + app_context::AppContext, + metrics::{BUILD_DATE, SERVER_VERSION}, +}; use kalamdb_raft::{NodeStatus, RaftExecutor, ServerStateExt}; -use std::sync::Arc; use super::models::{ClusterHealthResponse, NodeHealth}; diff --git a/backend/crates/kalamdb-api/src/http/cluster/models/cluster_health_response.rs b/backend/crates/kalamdb-api/src/http/cluster/models/cluster_health_response.rs index b8cf5ebbc..8f3438c31 100644 --- a/backend/crates/kalamdb-api/src/http/cluster/models/cluster_health_response.rs +++ b/backend/crates/kalamdb-api/src/http/cluster/models/cluster_health_response.rs @@ -1,8 +1,9 @@ //! Cluster health response model -use super::NodeHealth; use serde::Serialize; +use super::NodeHealth; + /// Response for cluster health endpoint #[derive(Serialize)] pub struct ClusterHealthResponse { diff --git a/backend/crates/kalamdb-api/src/http/files/download.rs b/backend/crates/kalamdb-api/src/http/files/download.rs index d592cf691..c2d377e6e 100644 --- a/backend/crates/kalamdb-api/src/http/files/download.rs +++ b/backend/crates/kalamdb-api/src/http/files/download.rs @@ -1,14 +1,13 @@ //! File download handler +use std::sync::Arc; + use actix_web::{get, web, HttpResponse, Responder}; use kalamdb_auth::AuthSessionExtractor; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableAccess; +use kalamdb_commons::{models::TableId, schemas::TableType, TableAccess}; use kalamdb_core::app_context::AppContext; use kalamdb_session::{can_access_shared_table, can_impersonate_role, AuthSession}; use kalamdb_system::FileRef; -use std::sync::Arc; use super::models::DownloadQuery; use crate::http::sql::models::{ErrorCode, SqlResponse}; @@ -66,7 +65,8 @@ pub async fn download_file( }, Ok(Err(e)) => { log::warn!( - "Failed to resolve impersonation target for file download: user_id={}, error={}", + "Failed to resolve impersonation target for file download: user_id={}, \ + error={}", requested_user_id, e ); @@ -78,7 +78,8 @@ pub async fn download_file( }, Err(e) => { log::warn!( - "Failed to resolve impersonation target for file download: user_id={}, error={}", + "Failed to resolve impersonation target for file download: user_id={}, \ + error={}", requested_user_id, e ); @@ -160,7 +161,7 @@ pub async fn download_file( .await { Ok(data) => { - //TODO: Get content type from the stored file metadata + // TODO: Get content type from the stored file metadata // Guess content type from file extension in file_id let content_type = guess_content_type(&file_id); @@ -194,9 +195,10 @@ fn guess_content_type(file_id: &str) -> String { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::Role; + use super::*; + #[test] fn download_impersonation_respects_target_role_matrix() { assert!(can_impersonate_role(Role::System, Role::System)); diff --git a/backend/crates/kalamdb-api/src/http/files/export_download.rs b/backend/crates/kalamdb-api/src/http/files/export_download.rs index 708a942ad..4fd94437c 100644 --- a/backend/crates/kalamdb-api/src/http/files/export_download.rs +++ b/backend/crates/kalamdb-api/src/http/files/export_download.rs @@ -7,11 +7,12 @@ //! //! The requesting user must be the owner of the export, or an admin. +use std::sync::Arc; + use actix_web::{get, web, HttpResponse, Responder}; use kalamdb_auth::AuthSessionExtractor; use kalamdb_core::app_context::AppContext; use kalamdb_session::{is_admin_role, AuthSession}; -use std::sync::Arc; use crate::http::sql::models::{ErrorCode, SqlResponse}; diff --git a/backend/crates/kalamdb-api/src/http/health.rs b/backend/crates/kalamdb-api/src/http/health.rs index 157660d4b..0d76a53e0 100644 --- a/backend/crates/kalamdb-api/src/http/health.rs +++ b/backend/crates/kalamdb-api/src/http/health.rs @@ -21,10 +21,12 @@ pub(crate) async fn healthcheck_handler(req: HttpRequest) -> HttpResponse { #[cfg(test)] mod tests { - use super::healthcheck_handler; + use std::net::SocketAddr; + use actix_web::{body::to_bytes, http::StatusCode, test::TestRequest}; use serde_json::Value; - use std::net::SocketAddr; + + use super::healthcheck_handler; async fn execute_healthcheck(req: actix_web::HttpRequest) -> (StatusCode, Value) { let response = healthcheck_handler(req).await; diff --git a/backend/crates/kalamdb-api/src/http/sql/execute.rs b/backend/crates/kalamdb-api/src/http/sql/execute.rs index 3999b4bd8..f2bed72cd 100644 --- a/backend/crates/kalamdb-api/src/http/sql/execute.rs +++ b/backend/crates/kalamdb-api/src/http/sql/execute.rs @@ -13,42 +13,43 @@ //! //! ## Performance notes //! -//! - `extract_file_placeholders` is called **once** in the handler; the result -//! is passed into `execute_file_upload_path` to avoid rescanning. -//! - `req_for_forward` (clones `sql` + `params`) is built lazily — only when -//! forwarding is actually needed. -//! - In the batch loop, `params` is **moved** on the last iteration instead of -//! cloned, eliminating one allocation per single-statement request (>90% of -//! traffic). -//! - Content-type detection uses ASCII-case-insensitive comparison without -//! allocating a lowercase copy. -//! - `EXECUTE AS USER` prefix detection uses a fixed-length slice comparison -//! instead of uppercasing the entire input string. +//! - `extract_file_placeholders` is called **once** in the handler; the result is passed into +//! `execute_file_upload_path` to avoid rescanning. +//! - `req_for_forward` (clones `sql` + `params`) is built lazily — only when forwarding is actually +//! needed. +//! - In the batch loop, `params` is **moved** on the last iteration instead of cloned, eliminating +//! one allocation per single-statement request (>90% of traffic). +//! - Content-type detection uses ASCII-case-insensitive comparison without allocating a lowercase +//! copy. +//! - `EXECUTE AS USER` prefix detection uses a fixed-length slice comparison instead of uppercasing +//! the entire input string. + +use std::{sync::Arc, time::Instant}; use actix_web::{post, web, HttpRequest, HttpResponse, Responder}; use kalamdb_auth::AuthSessionExtractor; use kalamdb_commons::models::NamespaceId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::executor::SqlExecutor; -use kalamdb_core::sql::SqlImpersonationService; +use kalamdb_core::{ + app_context::AppContext, + sql::{context::ExecutionContext, executor::SqlExecutor, SqlImpersonationService}, +}; +use kalamdb_jobs::health_monitor::record_activity_now; use kalamdb_raft::GroupId; use kalamdb_session::AuthSession; -use std::sync::Arc; -use std::time::Instant; use uuid::Uuid; -use super::execution_paths::{execute_batch_path, execute_file_upload_path}; -use super::file_utils::extract_file_placeholders; -use super::forward::forward_sql_if_follower; -use super::helpers::parse_scalar_params; -use super::models::{ErrorCode, QueryRequest, SqlResponse}; -use super::request::{parse_incoming_payload, took_ms, validate_sql_length}; -use super::statements::{ - authorized_username, split_and_prepare_statements, PreparedApiExecutionStatement, +use super::{ + execution_paths::{execute_batch_path, execute_file_upload_path}, + file_utils::extract_file_placeholders, + forward::{forward_sql_if_follower, prepared_statement_target_group}, + helpers::parse_scalar_params, + models::{ErrorCode, QueryRequest, SqlResponse}, + request::{parse_incoming_payload, took_ms, validate_sql_length}, + statements::{ + authorized_username, split_and_prepare_statements, PreparedApiExecutionStatement, + }, }; use crate::limiter::RateLimiter; -use kalamdb_jobs::health_monitor::record_activity_now; #[inline] fn batch_requires_request_id(prepared_statements: &[PreparedApiExecutionStatement]) -> bool { @@ -77,8 +78,8 @@ fn batch_requires_request_id(prepared_statements: &[PreparedApiExecutionStatemen /// Accepts either JSON or multipart/form-data payloads. /// /// - JSON: `sql` plus optional `params` and `namespace_id`. -/// - Multipart: `sql`, optional `params` (JSON array), optional `namespace_id`, -/// and file parts named `file:` for FILE("name") placeholders. +/// - Multipart: `sql`, optional `params` (JSON array), optional `namespace_id`, and file parts +/// named `file:` for FILE("name") placeholders. /// /// Multiple statements can be separated by semicolons and will be executed sequentially. /// File uploads require a single SQL statement. @@ -138,33 +139,60 @@ pub async fn execute_sql_v1( let base_session = app_context.base_session_context(); let mut exec_ctx = ExecutionContext::from_session(session, Arc::clone(&base_session)) .with_namespace_id(default_namespace.clone()); - let is_meta_leader = app_context.executor().is_leader(GroupId::Meta).await; - // 5. File uploads must go to the leader - if files_present && !is_meta_leader { - return HttpResponse::ServiceUnavailable().json(SqlResponse::error( - ErrorCode::NotLeader, - "File uploads must be sent to the current leader", - took_ms(start_time), - )); + // 5. Split, parse, and classify SQL statements once. Follower forwarding reuses + // this metadata so read-only follower requests do not pay a second parse pass. + let prepared_statements = + match split_and_prepare_statements(&sql, &exec_ctx, sql_executor.get_ref(), start_time) { + Ok(stmts) => stmts, + Err(resp) => return resp, + }; + + if exec_ctx.request_id().is_none() && batch_requires_request_id(&prepared_statements) { + exec_ctx = exec_ctx.with_request_id(Uuid::now_v7().to_string()); } - // 6. Forward to leader if this node is a follower (non-file path). - if !files_present && !is_meta_leader { - if exec_ctx.request_id().is_none() { - exec_ctx = exec_ctx.with_request_id(Uuid::now_v7().to_string()); + // 6. Multipart uploads cannot use the normal gRPC SQL forwarder because the + // file payload is local to this HTTP request. Instead, reject early with a + // standard leader hint before any file staging happens so the client can retry. + if files_present { + if let Some(target_group) = prepared_statements.iter().find_map(|statement| { + prepared_statement_target_group(statement, app_context.get_ref(), exec_ctx.user_id()) + }) { + if !app_context.executor().is_leader(target_group).await { + let leader_addr = match target_group { + GroupId::DataSharedShard(_) => app_context.leader_addr_for_shared().await, + GroupId::DataUserShard(_) => { + app_context.leader_addr_for_user(exec_ctx.user_id()).await + }, + _ => None, + }; + let message = match leader_addr { + Some(addr) => format!("Not leader for shard. Leader: {}", addr), + None => "Not leader for shard. Leader unknown".to_string(), + }; + return HttpResponse::ServiceUnavailable().json(SqlResponse::error( + ErrorCode::NotLeader, + &message, + took_ms(start_time), + )); + } } + } - let req_for_forward = QueryRequest { - sql: sql.clone(), - params: params_json.clone(), - namespace_id: namespace_id.clone(), - }; + // 7. Forward leader-routed operations to their actual group leader if this + // node is a follower for the target group (non-file path). A node may be + // Meta leader while another node leads the relevant data shard, so this + // check must not be gated by Meta leadership. + if !files_present { if let Some(response) = forward_sql_if_follower( &http_req, - &req_for_forward, + &sql, + ¶ms_json, + &namespace_id, app_context.get_ref(), - &default_namespace, + &prepared_statements, + exec_ctx.user_id(), exec_ctx.request_id(), ) .await @@ -173,7 +201,7 @@ pub async fn execute_sql_v1( } } - // 7. Parse query parameters + // 8. Parse query parameters let params = match parse_scalar_params(¶ms_json) { Ok(p) => p, Err(err) => { @@ -185,17 +213,6 @@ pub async fn execute_sql_v1( }, }; - // 8. Split, parse, and classify SQL statements - let prepared_statements = - match split_and_prepare_statements(&sql, &exec_ctx, sql_executor.get_ref(), start_time) { - Ok(stmts) => stmts, - Err(resp) => return resp, - }; - - if exec_ctx.request_id().is_none() && batch_requires_request_id(&prepared_statements) { - exec_ctx = exec_ctx.with_request_id(Uuid::now_v7().to_string()); - } - let auth_username = authorized_username(&exec_ctx); let impersonation_service = SqlImpersonationService::new(Arc::clone(app_context.get_ref())); diff --git a/backend/crates/kalamdb-api/src/http/sql/execution_paths.rs b/backend/crates/kalamdb-api/src/http/sql/execution_paths.rs index e9e9d4f33..8e2fcbbdb 100644 --- a/backend/crates/kalamdb-api/src/http/sql/execution_paths.rs +++ b/backend/crates/kalamdb-api/src/http/sql/execution_paths.rs @@ -1,30 +1,37 @@ +use std::{collections::HashMap, sync::Arc, time::Instant}; + use actix_web::{http::StatusCode, HttpRequest, HttpResponse}; use bytes::Bytes; -use kalamdb_commons::models::NamespaceId; -use kalamdb_commons::schemas::TableType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::schema_registry::SchemaRegistry; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::executor::request_transaction_state::RequestTransactionState; -use kalamdb_core::sql::executor::{PreparedExecutionStatement, ScalarValue, SqlExecutor}; -use kalamdb_core::sql::SqlImpersonationService; +use kalamdb_commons::{models::NamespaceId, schemas::TableType}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + schema_registry::SchemaRegistry, + sql::{ + context::ExecutionContext, + executor::{ + request_transaction_state::RequestTransactionState, PreparedExecutionStatement, + ScalarValue, SqlExecutor, + }, + SqlImpersonationService, + }, +}; use kalamdb_sql::classifier::SqlStatementKind; use kalamdb_system::FileSubfolderState; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Instant; - -use super::file_utils::{stage_and_finalize_files, substitute_file_placeholders}; -use super::forward::handle_not_leader_error; -use super::helpers::{ - cleanup_files, execute_single_statement, execute_single_statement_raw, - execution_result_to_query_result, stream_sql_rows_response, -}; -use super::models::{ErrorCode, QueryRequest, QueryResult, SqlResponse}; -use super::request::took_ms; -use super::statements::{ - classify_sql, resolve_execute_as_user, resolve_result_username, PreparedApiExecutionStatement, + +use super::{ + file_utils::{stage_and_finalize_files, substitute_file_placeholders}, + forward::handle_not_leader_error, + helpers::{ + cleanup_files, execute_single_statement, execute_single_statement_raw, + execution_result_to_query_result, stream_sql_rows_response, + }, + models::{ErrorCode, QueryRequest, QueryResult, SqlResponse}, + request::took_ms, + statements::{ + classify_sql, resolve_execute_as_user, resolve_result_username, + PreparedApiExecutionStatement, + }, }; #[inline] @@ -54,6 +61,18 @@ fn is_table_discovery_error_message(message: &str) -> bool { || message.contains("unknown table") } +#[inline] +fn is_leader_routing_error_message(message: &str) -> bool { + message.contains("not leader") + || message.contains("not_leader") + || message.contains("unknown leader") + || message.contains("no cluster leader") + || message.contains("no raft leader") + || message.contains("forward request to cluster leader") + || message.contains("failed to forward request to cluster leader") + || message.contains("forward to leader") +} + #[inline] fn is_safe_validation_error_message(message: &str) -> bool { (message.contains("column") && message.contains("not found")) @@ -71,6 +90,9 @@ fn is_safe_validation_error_message(message: &str) -> bool { #[inline] fn classify_sql_error(err: &KalamDbError) -> (StatusCode, ErrorCode, bool) { match err { + KalamDbError::NotLeader { .. } => { + (StatusCode::SERVICE_UNAVAILABLE, ErrorCode::NotLeader, true) + }, KalamDbError::PermissionDenied(_) | KalamDbError::Unauthorized(_) => { (StatusCode::FORBIDDEN, ErrorCode::PermissionDenied, true) }, @@ -105,7 +127,9 @@ fn classify_sql_error(err: &KalamDbError) -> (StatusCode, ErrorCode, bool) { }, KalamDbError::ExecutionError(message) => { let message_lower = message.to_lowercase(); - if is_permission_error_message(&message_lower) { + if is_leader_routing_error_message(&message_lower) { + (StatusCode::SERVICE_UNAVAILABLE, ErrorCode::NotLeader, true) + } else if is_permission_error_message(&message_lower) { (StatusCode::FORBIDDEN, ErrorCode::PermissionDenied, true) } else if is_safe_validation_error_message(&message_lower) { (StatusCode::BAD_REQUEST, ErrorCode::SqlExecutionError, true) @@ -169,6 +193,19 @@ fn build_statement_error_response( ); } + let err_msg = err.to_string(); + if is_leader_routing_error_message(&err_msg.to_lowercase()) { + return build_sql_error_response( + StatusCode::SERVICE_UNAVAILABLE, + ErrorCode::NotLeader, + &format!("Statement {} failed: {}", statement_index, err_msg), + Some(sql), + took, + is_admin, + true, + ); + } + build_sql_error_response( StatusCode::BAD_REQUEST, ErrorCode::SqlExecutionError, @@ -233,7 +270,8 @@ pub(super) async fn execute_file_upload_path( None => { return HttpResponse::BadRequest().json(SqlResponse::error_for_privilege( ErrorCode::InvalidInput, - "Could not determine target table from SQL. Use fully qualified table name (namespace.table).", + "Could not determine target table from SQL. Use fully qualified table name \ + (namespace.table).", took_ms(start_time), exec_ctx.is_admin(), )); @@ -259,8 +297,8 @@ pub(super) async fn execute_file_upload_path( return HttpResponse::BadRequest().json(SqlResponse::error_for_privilege( ErrorCode::SqlExecutionError, &format!( - "EXECUTE AS USER is not allowed on SHARED tables (table '{}'). \ - AS USER impersonation is only supported for USER tables.", + "EXECUTE AS USER is not allowed on SHARED tables (table '{}'). AS USER \ + impersonation is only supported for USER tables.", table_id ), took_ms(start_time), @@ -523,8 +561,8 @@ pub(super) async fn execute_batch_path( return HttpResponse::BadRequest().json(SqlResponse::error_for_privilege( ErrorCode::SqlExecutionError, &format!( - "EXECUTE AS USER is not allowed on SHARED tables (table '{}'). \ - AS USER impersonation is only supported for USER tables.", + "EXECUTE AS USER is not allowed on SHARED tables (table '{}'). AS USER \ + impersonation is only supported for USER tables.", table_id ), took_ms(start_time), diff --git a/backend/crates/kalamdb-api/src/http/sql/file_utils.rs b/backend/crates/kalamdb-api/src/http/sql/file_utils.rs index 83304d408..1874e4e25 100644 --- a/backend/crates/kalamdb-api/src/http/sql/file_utils.rs +++ b/backend/crates/kalamdb-api/src/http/sql/file_utils.rs @@ -6,17 +6,19 @@ //! - Substituting placeholders with FileRef JSON //! - Staging and finalizing files +use std::collections::HashMap; + use actix_multipart::Multipart; use actix_web::{web, Either}; use bytes::{Bytes, BytesMut}; use futures_util::StreamExt; -use kalamdb_commons::models::ids::StorageId; -use kalamdb_commons::models::{NamespaceId, TableId, UserId}; -use kalamdb_commons::schemas::TableType; +use kalamdb_commons::{ + models::{ids::StorageId, NamespaceId, TableId, UserId}, + schemas::TableType, +}; use kalamdb_configs::FileUploadSettings; use kalamdb_filestore::FileStorageService; use kalamdb_system::{FileRef, FileSubfolderState}; -use std::collections::HashMap; use super::models::{ErrorCode, FileError, ParsedMultipartRequest, ParsedSqlPayload, QueryRequest}; diff --git a/backend/crates/kalamdb-api/src/http/sql/forward.rs b/backend/crates/kalamdb-api/src/http/sql/forward.rs index f394b28a4..e41d8b14f 100644 --- a/backend/crates/kalamdb-api/src/http/sql/forward.rs +++ b/backend/crates/kalamdb-api/src/http/sql/forward.rs @@ -1,16 +1,23 @@ //! SQL forwarding to leader node in cluster mode. +use std::{sync::Arc, time::Instant}; + use actix_web::{HttpRequest, HttpResponse}; -use kalamdb_commons::models::{NamespaceId, NodeId}; -use kalamdb_commons::Role; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_raft::{ClusterClient, ForwardSqlRequest, GroupId, RaftExecutor}; -use std::sync::Arc; -use std::time::Instant; +use kalamdb_commons::{ + models::{NamespaceId, NodeId, UserId}, + schemas::TableType, +}; +use kalamdb_core::{app_context::AppContext, error::KalamDbError}; +use kalamdb_raft::{ClusterClient, ForwardSqlRequest, GroupId, RaftExecutor, ShardRouter}; +use kalamdb_sql::classifier::SqlStatementKind; +use serde_json::Value as JsonValue; +use uuid::Uuid; -use super::helpers::parse_forward_params; -use super::models::{ErrorCode, QueryRequest, SqlResponse}; +use super::{ + helpers::parse_forward_params, + models::{ErrorCode, QueryRequest, SqlResponse}, + statements::PreparedApiExecutionStatement, +}; fn header_to_string(req: &HttpRequest, name: &str) -> Option { req.headers().get(name).and_then(|v| v.to_str().ok()).map(|v| v.to_string()) @@ -49,9 +56,77 @@ fn cluster_client_for(app_context: &AppContext) -> Result Option { + let executor = app_context.executor(); + let raft_executor = executor.as_any().downcast_ref::()?; + let config = raft_executor.manager().config(); + let router = ShardRouter::new(config.user_shards, config.shared_shards); + + match table_type { + TableType::User | TableType::Stream => { + Some(GroupId::DataUserShard(router.user_shard_id(user_id))) + }, + TableType::Shared => Some(GroupId::DataSharedShard(router.shared_shard_id())), + TableType::System => Some(GroupId::Meta), + } +} + +fn prepared_statement_table_type( + statement: &PreparedApiExecutionStatement, + app_context: &AppContext, +) -> Option { + statement.prepared_statement.table_type.or_else(|| { + statement + .prepared_statement + .table_id + .as_ref() + .and_then(|table_id| app_context.schema_registry().get(table_id)) + .map(|cached| cached.table_entry().table_type) + }) +} + +pub(crate) fn prepared_statement_target_group( + statement: &PreparedApiExecutionStatement, + app_context: &AppContext, + user_id: &UserId, +) -> Option { + let Some(classified) = statement.prepared_statement.classified_statement.as_ref() else { + return None; + }; + + if matches!(classified.kind(), SqlStatementKind::Select) { + return prepared_statement_table_type(statement, app_context).and_then(|table_type| { + match table_type { + TableType::User | TableType::Shared | TableType::Stream => { + data_group_for_table_type(app_context, table_type, user_id) + }, + TableType::System => None, + } + }); + } + + if !classified.is_write_operation() { + return None; + } + + match classified.kind() { + SqlStatementKind::Insert(_) | SqlStatementKind::Update(_) | SqlStatementKind::Delete(_) => { + prepared_statement_table_type(statement, app_context) + .and_then(|table_type| data_group_for_table_type(app_context, table_type, user_id)) + .or(Some(GroupId::Meta)) + }, + _ => Some(GroupId::Meta), + } +} + async fn forward_sql_grpc( target: ForwardTarget, http_req: &HttpRequest, @@ -87,6 +162,9 @@ async fn forward_sql_grpc( let response = match target { ForwardTarget::Leader => client.forward_sql_to_leader(grpc_req).await, + ForwardTarget::GroupLeader(group_id) => { + client.forward_sql_to_group_leader(group_id, grpc_req).await + }, ForwardTarget::Node(node_id) => client.forward_sql_to_node(node_id, grpc_req).await, }; @@ -115,58 +193,57 @@ async fn forward_sql_grpc( Some(HttpResponse::build(status).content_type("application/json").body(response.body)) } -/// Forwards write operations to the leader node in cluster mode. +/// Forwards leader-routed operations to the appropriate leader node in cluster mode. pub async fn forward_sql_if_follower( http_req: &HttpRequest, - req: &QueryRequest, + sql: &str, + params_json: &Option>, + namespace_id: &Option, app_context: &Arc, - default_namespace: &NamespaceId, + prepared_statements: &[PreparedApiExecutionStatement], + user_id: &UserId, request_id: Option<&str>, ) -> Option { let start_time = Instant::now(); let executor = app_context.executor(); - if executor.is_leader(GroupId::Meta).await { - return None; - } + let write_targets: Vec = prepared_statements + .iter() + .filter_map(|statement| { + prepared_statement_target_group(statement, app_context.as_ref(), user_id) + }) + .collect(); - let statements = match kalamdb_sql::split_statements(&req.sql) { - Ok(stmts) => stmts, - Err(_) => { - return forward_sql_grpc( - ForwardTarget::Leader, - http_req, - req, - app_context.as_ref(), - request_id, - start_time, - ) - .await - }, - }; + if let Some(first_target) = write_targets.first().copied() { + let target_group = if write_targets.iter().all(|target| *target == first_target) { + first_target + } else { + GroupId::Meta + }; - let has_write = statements.iter().any(|sql| { - let classify_sql = - kalamdb_sql::execute_as::extract_inner_sql(sql).unwrap_or_else(|| sql.to_string()); - let stmt = kalamdb_sql::classifier::SqlStatement::classify_and_parse( - &classify_sql, - default_namespace, - Role::System, - ) - .unwrap_or_else(|_| { - kalamdb_sql::classifier::SqlStatement::new( - classify_sql, - kalamdb_sql::classifier::SqlStatementKind::Unknown, - ) - }); - stmt.is_write_operation() - }); + if executor.is_leader(target_group).await { + return None; + } + + let generated_request_id; + let request_id = match request_id { + Some(request_id) => Some(request_id), + None => { + generated_request_id = Uuid::now_v7().to_string(); + Some(generated_request_id.as_str()) + }, + }; + + let req = QueryRequest { + sql: sql.to_string(), + params: params_json.clone(), + namespace_id: namespace_id.clone(), + }; - if has_write { return forward_sql_grpc( - ForwardTarget::Leader, + ForwardTarget::GroupLeader(target_group), http_req, - req, + &req, app_context.as_ref(), request_id, start_time, diff --git a/backend/crates/kalamdb-api/src/http/sql/helpers/converter.rs b/backend/crates/kalamdb-api/src/http/sql/helpers/converter.rs index 28f57e8e0..9795a3332 100644 --- a/backend/crates/kalamdb-api/src/http/sql/helpers/converter.rs +++ b/backend/crates/kalamdb-api/src/http/sql/helpers/converter.rs @@ -1,9 +1,11 @@ //! Arrow to JSON conversion helpers use arrow::record_batch::RecordBatch; -use kalamdb_commons::conversions::{mask_sensitive_rows_for_role, schema_fields_from_arrow_schema}; -use kalamdb_commons::models::Role; -use kalamdb_commons::schemas::SchemaField; +use kalamdb_commons::{ + conversions::{mask_sensitive_rows_for_role, schema_fields_from_arrow_schema}, + models::Role, + schemas::SchemaField, +}; use kalamdb_core::providers::arrow_json_conversion::record_batch_to_json_arrays; use super::super::models::QueryResult; @@ -68,16 +70,20 @@ pub fn success_response_suffix(row_count: usize, as_user: &str, took: f64) -> St #[cfg(test)] mod tests { - use super::*; - use arrow::array::RecordBatch; - use arrow::datatypes::{DataType, Field, Schema}; - use kalamdb_commons::conversions::{ - with_kalam_column_flags_metadata, with_kalam_data_type_metadata, - }; - use kalamdb_commons::models::datatypes::KalamDataType; - use kalamdb_commons::schemas::{FieldFlag, FieldFlags}; use std::sync::Arc; + use arrow::{ + array::RecordBatch, + datatypes::{DataType, Field, Schema}, + }; + use kalamdb_commons::{ + conversions::{with_kalam_column_flags_metadata, with_kalam_data_type_metadata}, + models::datatypes::KalamDataType, + schemas::{FieldFlag, FieldFlags}, + }; + + use super::*; + #[test] fn test_record_batch_to_query_result_includes_flags_and_omits_empty() { let id_field = with_kalam_column_flags_metadata( diff --git a/backend/crates/kalamdb-api/src/http/sql/helpers/executor.rs b/backend/crates/kalamdb-api/src/http/sql/helpers/executor.rs index a486cbfcc..b6481167a 100644 --- a/backend/crates/kalamdb-api/src/http/sql/helpers/executor.rs +++ b/backend/crates/kalamdb-api/src/http/sql/helpers/executor.rs @@ -1,14 +1,15 @@ //! SQL execution helpers -use kalamdb_commons::models::UserId; -use kalamdb_commons::Role; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::executor::{PreparedExecutionStatement, ScalarValue, SqlExecutor}; -use kalamdb_core::sql::ExecutionResult; use std::sync::Arc; -use super::super::models::QueryResult; -use super::converter::record_batch_to_query_result; +use kalamdb_commons::{models::UserId, Role}; +use kalamdb_core::sql::{ + context::ExecutionContext, + executor::{PreparedExecutionStatement, ScalarValue, SqlExecutor}, + ExecutionResult, +}; + +use super::{super::models::QueryResult, converter::record_batch_to_query_result}; pub async fn execute_single_statement_raw( metadata: &PreparedExecutionStatement, diff --git a/backend/crates/kalamdb-api/src/http/sql/helpers/files.rs b/backend/crates/kalamdb-api/src/http/sql/helpers/files.rs index 8db57006d..9806eaf2e 100644 --- a/backend/crates/kalamdb-api/src/http/sql/helpers/files.rs +++ b/backend/crates/kalamdb-api/src/http/sql/helpers/files.rs @@ -1,11 +1,13 @@ //! File cleanup helpers -use kalamdb_commons::models::ids::StorageId; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_system::FileRef; use std::collections::HashMap; +use kalamdb_commons::{ + models::{ids::StorageId, TableId, UserId}, + schemas::TableType, +}; +use kalamdb_system::FileRef; + /// Cleanup files after SQL error pub async fn cleanup_files( file_refs: &HashMap, diff --git a/backend/crates/kalamdb-api/src/http/sql/helpers/params.rs b/backend/crates/kalamdb-api/src/http/sql/helpers/params.rs index 178002e1e..67bd08e2c 100644 --- a/backend/crates/kalamdb-api/src/http/sql/helpers/params.rs +++ b/backend/crates/kalamdb-api/src/http/sql/helpers/params.rs @@ -1,7 +1,8 @@ //! Parameter parsing helpers -use kalamdb_core::providers::arrow_json_conversion::json_value_to_scalar_strict; -use kalamdb_core::sql::executor::ScalarValue; +use kalamdb_core::{ + providers::arrow_json_conversion::json_value_to_scalar_strict, sql::executor::ScalarValue, +}; use kalamdb_raft::ForwardSqlParam; use serde_json::Value as JsonValue; @@ -71,10 +72,11 @@ pub fn parse_forward_params( #[cfg(test)] mod tests { - use super::*; use kalamdb_raft::forward_sql_param; use serde_json::json; + use super::*; + #[test] fn parse_forward_params_preserves_scalar_types() { let params = Some(vec![ diff --git a/backend/crates/kalamdb-api/src/http/sql/helpers/streaming.rs b/backend/crates/kalamdb-api/src/http/sql/helpers/streaming.rs index a6f9f9956..b38d90319 100644 --- a/backend/crates/kalamdb-api/src/http/sql/helpers/streaming.rs +++ b/backend/crates/kalamdb-api/src/http/sql/helpers/streaming.rs @@ -1,9 +1,11 @@ use actix_web::{error::ErrorInternalServerError, HttpResponse}; use bytes::Bytes; use futures_util::stream; -use kalamdb_commons::conversions::{mask_sensitive_rows_for_role, schema_fields_from_arrow_schema}; -use kalamdb_commons::models::{KalamCellValue, Role}; -use kalamdb_commons::schemas::SchemaField; +use kalamdb_commons::{ + conversions::{mask_sensitive_rows_for_role, schema_fields_from_arrow_schema}, + models::{KalamCellValue, Role}, + schemas::SchemaField, +}; use kalamdb_core::providers::arrow_json_conversion::record_batch_to_json_arrays; use super::converter::{resolve_arrow_schema, row_result_prefix, success_response_suffix}; diff --git a/backend/crates/kalamdb-api/src/http/sql/models/parsed_payload.rs b/backend/crates/kalamdb-api/src/http/sql/models/parsed_payload.rs index 1decfca50..35247542f 100644 --- a/backend/crates/kalamdb-api/src/http/sql/models/parsed_payload.rs +++ b/backend/crates/kalamdb-api/src/http/sql/models/parsed_payload.rs @@ -1,9 +1,10 @@ //! Parsed SQL payload models +use std::collections::HashMap; + use bytes::Bytes; use kalamdb_commons::models::NamespaceId; use serde_json::Value as JsonValue; -use std::collections::HashMap; use super::ErrorCode; diff --git a/backend/crates/kalamdb-api/src/http/sql/models/sql_response.rs b/backend/crates/kalamdb-api/src/http/sql/models/sql_response.rs index 1f7457642..d0cb80bc8 100644 --- a/backend/crates/kalamdb-api/src/http/sql/models/sql_response.rs +++ b/backend/crates/kalamdb-api/src/http/sql/models/sql_response.rs @@ -2,12 +2,14 @@ //! //! This module defines the structure for SQL execution responses from the `/v1/api/sql` endpoint. -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::KalamCellValue; -use kalamdb_commons::schemas::SchemaField; -use serde::{Deserialize, Serialize, Serializer}; use std::fmt; +use kalamdb_commons::{ + models::{datatypes::KalamDataType, KalamCellValue}, + schemas::SchemaField, +}; +use serde::{Deserialize, Serialize, Serializer}; + /// Custom serializer to limit took field to 3 decimal places fn serialize_took(took: &f64, serializer: S) -> Result where @@ -210,7 +212,8 @@ pub struct SqlResponse { /// Array of result sets, one per executed statement pub results: Vec, - /// Total execution time in milliseconds (with fractional precision, limited to 3 decimal places) + /// Total execution time in milliseconds (with fractional precision, limited to 3 decimal + /// places) #[serde(serialize_with = "serialize_took")] pub took: f64, diff --git a/backend/crates/kalamdb-api/src/http/sql/request.rs b/backend/crates/kalamdb-api/src/http/sql/request.rs index e0f7d7d2a..234ff0f9a 100644 --- a/backend/crates/kalamdb-api/src/http/sql/request.rs +++ b/backend/crates/kalamdb-api/src/http/sql/request.rs @@ -1,11 +1,14 @@ -use actix_multipart::Multipart; -use actix_web::{web, Either, FromRequest, HttpRequest, HttpResponse}; use std::time::Instant; -use super::file_utils::parse_sql_payload; -use super::models::{ErrorCode, ParsedSqlPayload, QueryRequest, SqlResponse}; +use actix_multipart::Multipart; +use actix_web::{web, Either, FromRequest, HttpRequest, HttpResponse}; use kalamdb_core::app_context::AppContext; +use super::{ + file_utils::parse_sql_payload, + models::{ErrorCode, ParsedSqlPayload, QueryRequest, SqlResponse}, +}; + #[inline] pub(super) fn took_ms(start_time: Instant) -> f64 { start_time.elapsed().as_secs_f64() * 1000.0 diff --git a/backend/crates/kalamdb-api/src/http/sql/statements.rs b/backend/crates/kalamdb-api/src/http/sql/statements.rs index 4223f5c41..a78453c47 100644 --- a/backend/crates/kalamdb-api/src/http/sql/statements.rs +++ b/backend/crates/kalamdb-api/src/http/sql/statements.rs @@ -1,13 +1,20 @@ -use actix_web::HttpResponse; -use kalamdb_commons::models::{NamespaceId, UserId}; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::executor::{PreparedExecutionStatement, SqlExecutor}; -use kalamdb_core::sql::SqlImpersonationService; use std::time::Instant; -use super::models::{ErrorCode, SqlResponse}; -use super::request::took_ms; +use actix_web::HttpResponse; +use kalamdb_commons::models::{NamespaceId, UserId}; +use kalamdb_core::{ + error::KalamDbError, + sql::{ + context::ExecutionContext, + executor::{PreparedExecutionStatement, SqlExecutor}, + SqlImpersonationService, + }, +}; + +use super::{ + models::{ErrorCode, SqlResponse}, + request::took_ms, +}; #[derive(Debug)] pub(super) struct ParsedExecutionStatement { @@ -42,11 +49,7 @@ pub(super) async fn resolve_execute_as_user( ) -> Result, KalamDbError> { match statement.execute_as_username.as_ref() { Some(target_username) => impersonation_service - .resolve_execute_as_user( - exec_ctx.user_id(), - exec_ctx.user_role(), - target_username, - ) + .resolve_execute_as_user(exec_ctx.user_id(), exec_ctx.user_role(), target_username) .await .map(Some), None => Ok(None), diff --git a/backend/crates/kalamdb-api/src/http/topics/ack.rs b/backend/crates/kalamdb-api/src/http/topics/ack.rs index 12b24e606..d779882de 100644 --- a/backend/crates/kalamdb-api/src/http/topics/ack.rs +++ b/backend/crates/kalamdb-api/src/http/topics/ack.rs @@ -2,12 +2,13 @@ //! //! POST /v1/api/topics/ack - Acknowledge offset for consumer group +use std::sync::Arc; + use actix_web::{post, web, HttpResponse, Responder}; use kalamdb_auth::AuthSessionExtractor; use kalamdb_commons::Role; use kalamdb_core::app_context::AppContext; use kalamdb_session::AuthSession; -use std::sync::Arc; use super::models::{AckRequest, AckResponse, TopicErrorResponse}; diff --git a/backend/crates/kalamdb-api/src/http/topics/consume.rs b/backend/crates/kalamdb-api/src/http/topics/consume.rs index 70d11225e..7a9db63d8 100644 --- a/backend/crates/kalamdb-api/src/http/topics/consume.rs +++ b/backend/crates/kalamdb-api/src/http/topics/consume.rs @@ -5,12 +5,13 @@ //! Uses the standard topic_message_schema for consistent field structure //! across SQL CONSUME and HTTP API responses. +use std::sync::Arc; + use actix_web::{post, web, HttpResponse, Responder}; use kalamdb_auth::AuthSessionExtractor; use kalamdb_commons::Role; use kalamdb_core::app_context::AppContext; use kalamdb_session::AuthSession; -use std::sync::Arc; use super::models::{ ConsumeRequest, ConsumeResponse, StartPosition, TopicErrorResponse, TopicMessage, diff --git a/backend/crates/kalamdb-api/src/http/topics/models/consume_request.rs b/backend/crates/kalamdb-api/src/http/topics/models/consume_request.rs index 0bdab1bc2..c1fb93173 100644 --- a/backend/crates/kalamdb-api/src/http/topics/models/consume_request.rs +++ b/backend/crates/kalamdb-api/src/http/topics/models/consume_request.rs @@ -1,9 +1,10 @@ //! Consume request model -use super::StartPosition; use kalamdb_commons::models::{ConsumerGroupId, TopicId}; use serde::Deserialize; +use super::StartPosition; + fn default_start_position() -> StartPosition { StartPosition::Latest } diff --git a/backend/crates/kalamdb-api/src/http/topics/models/consume_response.rs b/backend/crates/kalamdb-api/src/http/topics/models/consume_response.rs index 320ef566c..0e4d5cf98 100644 --- a/backend/crates/kalamdb-api/src/http/topics/models/consume_response.rs +++ b/backend/crates/kalamdb-api/src/http/topics/models/consume_response.rs @@ -1,8 +1,9 @@ //! Consume response model -use super::TopicMessage; use serde::Serialize; +use super::TopicMessage; + /// Response for POST /api/topics/consume #[derive(Debug, Serialize)] pub struct ConsumeResponse { diff --git a/backend/crates/kalamdb-api/src/http/topics/models/topic_message.rs b/backend/crates/kalamdb-api/src/http/topics/models/topic_message.rs index 86bfed1d8..9c6d30053 100644 --- a/backend/crates/kalamdb-api/src/http/topics/models/topic_message.rs +++ b/backend/crates/kalamdb-api/src/http/topics/models/topic_message.rs @@ -22,5 +22,5 @@ pub struct TopicMessage { #[serde(skip_serializing_if = "Option::is_none")] pub user: Option, /// Operation type that triggered this message (Insert, Update, Delete) - pub op: String, //TODO: Use TopicOp instead + pub op: String, // TODO: Use TopicOp instead } diff --git a/backend/crates/kalamdb-api/src/limiter/mod.rs b/backend/crates/kalamdb-api/src/limiter/mod.rs index 47fa964d3..3c0367e6f 100644 --- a/backend/crates/kalamdb-api/src/limiter/mod.rs +++ b/backend/crates/kalamdb-api/src/limiter/mod.rs @@ -1,6 +1,7 @@ //! Rate limiting and connection guard module //! -//! This module provides lightweight, zero-copy rate limiting for REST API and WebSocket connections. +//! This module provides lightweight, zero-copy rate limiting for REST API and WebSocket +//! connections. //! //! ## Components //! diff --git a/backend/crates/kalamdb-api/src/limiter/rate_limiter.rs b/backend/crates/kalamdb-api/src/limiter/rate_limiter.rs index fb9095678..118c01531 100644 --- a/backend/crates/kalamdb-api/src/limiter/rate_limiter.rs +++ b/backend/crates/kalamdb-api/src/limiter/rate_limiter.rs @@ -3,15 +3,21 @@ //! Uses Moka cache for automatic TTL-based cleanup and high concurrency. //! Optimized for zero-copy access patterns where possible. -use super::token_bucket::TokenBucket; +use std::{ + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, + }, + time::Duration, +}; + use kalamdb_commons::models::{ConnectionInfo, UserId}; use kalamdb_configs::RateLimitSettings; use kalamdb_live::ConnectionId; use moka::sync::Cache; use parking_lot::Mutex; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::Arc; -use std::time::Duration; + +use super::token_bucket::TokenBucket; /// Rate limiter for users and connections /// @@ -215,9 +221,10 @@ impl Default for RateLimiter { #[cfg(test)] mod tests { - use super::*; use std::thread; + use super::*; + fn test_config(max_queries: u32, max_subs: u32, max_msgs: u32) -> RateLimitSettings { RateLimitSettings { max_queries_per_sec: max_queries, diff --git a/backend/crates/kalamdb-api/src/limiter/token_bucket.rs b/backend/crates/kalamdb-api/src/limiter/token_bucket.rs index 5ae98cab7..f80282997 100644 --- a/backend/crates/kalamdb-api/src/limiter/token_bucket.rs +++ b/backend/crates/kalamdb-api/src/limiter/token_bucket.rs @@ -98,9 +98,10 @@ impl TokenBucket { #[cfg(test)] mod tests { - use super::*; use std::thread; + use super::*; + #[test] fn test_token_bucket_basic() { let mut bucket = TokenBucket::new(10, 10, Duration::from_secs(1)); diff --git a/backend/crates/kalamdb-api/src/routes.rs b/backend/crates/kalamdb-api/src/routes.rs index 1622a0cbc..00d4009a3 100644 --- a/backend/crates/kalamdb-api/src/routes.rs +++ b/backend/crates/kalamdb-api/src/routes.rs @@ -2,9 +2,10 @@ //! //! This module configures all HTTP and WebSocket routes for the KalamDB API. -use crate::{http, ui, ws}; use actix_web::web; +use crate::{http, ui, ws}; + /// Configure API routes for KalamDB /// /// Health check endpoints (both point to same handler, localhost-only): diff --git a/backend/crates/kalamdb-api/src/ui/embedded.rs b/backend/crates/kalamdb-api/src/ui/embedded.rs index c45c96865..6a883ceef 100644 --- a/backend/crates/kalamdb-api/src/ui/embedded.rs +++ b/backend/crates/kalamdb-api/src/ui/embedded.rs @@ -58,15 +58,10 @@ pub async fn serve_embedded_ui(req: HttpRequest) -> HttpResponse { // No UI built - show helpful message HttpResponse::NotFound().body( - "\ -

Admin UI Not Built

\ -

The Admin UI assets were not found in the binary.

\ -

To build the UI:

\ -
    \ -
  1. Run cd ui && npm install && npm run build
  2. \ -
  3. Rebuild the server with cargo build
  4. \ -
\ - ", + "

Admin UI Not Built

The Admin UI assets were not found in the \ + binary.

To build the UI:

  1. Run cd ui && npm install && npm run \ + build
  2. Rebuild the server with cargo \ + build
", ) } diff --git a/backend/crates/kalamdb-api/src/ui/mod.rs b/backend/crates/kalamdb-api/src/ui/mod.rs index 7ab2b849f..0aeea1b0b 100644 --- a/backend/crates/kalamdb-api/src/ui/mod.rs +++ b/backend/crates/kalamdb-api/src/ui/mod.rs @@ -3,9 +3,10 @@ #[cfg(feature = "embedded-ui")] mod embedded; -use actix_web::{web, HttpResponse}; use std::path::PathBuf; +use actix_web::{web, HttpResponse}; + #[derive(Debug, Clone)] pub struct UiRuntimeConfig { backend_origin: Option, @@ -67,7 +68,8 @@ pub fn configure_filesystem_ui_routes( let index_path = ui_path.join("index.html"); let index_content = std::fs::read_to_string(&index_path).unwrap_or_else(|_| { - "

Admin UI not built

Run 'pnpm build' in ui/ directory

" + "

Admin UI not built

Run 'pnpm build' in ui/ \ + directory

" .to_string() }); let index_content = web::Data::new(index_content); diff --git a/backend/crates/kalamdb-api/src/ws/compression.rs b/backend/crates/kalamdb-api/src/ws/compression.rs index c22d0649a..c15b7eca6 100644 --- a/backend/crates/kalamdb-api/src/ws/compression.rs +++ b/backend/crates/kalamdb-api/src/ws/compression.rs @@ -9,10 +9,10 @@ //! unavoidable without a per-connection write pipeline; this keeps it to //! exactly one modestly-sized allocation rather than several doublings. -use flate2::write::GzEncoder; -use flate2::Compression; use std::io::Write; +use flate2::{write::GzEncoder, Compression}; + /// Default compression threshold in bytes (512 bytes) /// Messages smaller than this are sent uncompressed pub const COMPRESSION_THRESHOLD: usize = 512; diff --git a/backend/crates/kalamdb-api/src/ws/context.rs b/backend/crates/kalamdb-api/src/ws/context.rs index dd2c4e4e4..33d31742f 100644 --- a/backend/crates/kalamdb-api/src/ws/context.rs +++ b/backend/crates/kalamdb-api/src/ws/context.rs @@ -1,17 +1,14 @@ use std::sync::Arc; -use kalamdb_auth::UserRepository; -use kalamdb_commons::models::UserId; +use kalamdb_auth::{AuthRequest, UserRepository}; use kalamdb_commons::websocket::ProtocolOptions; -use kalamdb_commons::Role; use kalamdb_core::app_context::AppContext; use kalamdb_live::{ConnectionsManager, LiveQueryManager}; use crate::limiter::RateLimiter; pub(super) struct UpgradeAuth { - pub(super) user_id: UserId, - pub(super) role: Role, + pub(super) auth_request: AuthRequest, pub(super) protocol: ProtocolOptions, } diff --git a/backend/crates/kalamdb-api/src/ws/events/auth.rs b/backend/crates/kalamdb-api/src/ws/events/auth.rs index ddc7196c0..f1560739e 100644 --- a/backend/crates/kalamdb-api/src/ws/events/auth.rs +++ b/backend/crates/kalamdb-api/src/ws/events/auth.rs @@ -7,15 +7,18 @@ //! Only JWT token authentication is accepted for WebSocket connections. //! This keeps user/password auth limited to the login endpoint. +use std::sync::Arc; + use actix_ws::Session; use kalamdb_auth::{authenticate, extract_user_id_for_audit, AuthRequest, UserRepository}; -use kalamdb_commons::models::{ConnectionInfo, UserId}; -use kalamdb_commons::websocket::{ProtocolOptions, WsAuthCredentials}; -use kalamdb_commons::{Role, WebSocketMessage}; +use kalamdb_commons::{ + models::{ConnectionInfo, UserId}, + websocket::{ProtocolOptions, WsAuthCredentials}, + Role, WebSocketMessage, +}; use kalamdb_core::app_context::AppContext; use kalamdb_live::SharedConnectionState; use log::debug; -use std::sync::Arc; use tracing::Instrument; use super::{send_auth_error, send_json}; @@ -56,7 +59,42 @@ pub async fn handle_authenticate( WsAuthCredentials::Jwt { token } => AuthRequest::Jwt { token }, }; - authenticate_with_request( + authenticate_ws_request( + connection_state, + client_ip, + auth_request, + protocol, + session, + user_repo, + compression, + ) + .await +} + +/// Authenticate a request that was supplied during the HTTP upgrade. +/// +/// This runs after the WebSocket upgrade has completed so the TCP/WebSocket +/// handshake is not blocked on JWT validation or user lookup. +pub async fn handle_upgrade_auth( + connection_state: &SharedConnectionState, + client_ip: &ConnectionInfo, + auth_request: AuthRequest, + protocol: ProtocolOptions, + session: &mut Session, + rate_limiter: &Arc, + user_repo: &Arc, + compression: bool, +) -> Result<(), String> { + if !rate_limiter.check_auth_rate(client_ip) { + let _ = send_auth_error( + session.clone(), + "Too many authentication attempts. Please retry shortly.", + ) + .await; + return Err("Auth rate limit exceeded".to_string()); + } + + authenticate_ws_request( connection_state, client_ip, auth_request, @@ -71,8 +109,8 @@ pub async fn handle_authenticate( /// Complete WebSocket authentication after a user has been validated. /// /// This is the single source of truth for post-validation auth steps. -/// Called from both the header-auth fast path (handler.rs) and the -/// message-auth path (authenticate_with_request). Consolidates: +/// Called from both the upgrade-header auth path and the explicit +/// Authenticate message path. Consolidates: /// - Marking the connection as authenticated /// - Setting the negotiated protocol /// - Sending the AuthSuccess response @@ -129,7 +167,7 @@ pub async fn send_current_auth_success( } /// Internal function that handles authentication for any AuthRequest type -async fn authenticate_with_request( +async fn authenticate_ws_request( connection_state: &SharedConnectionState, connection_info: &ConnectionInfo, auth_request: AuthRequest, diff --git a/backend/crates/kalamdb-api/src/ws/events/batch.rs b/backend/crates/kalamdb-api/src/ws/events/batch.rs index aed3e4af3..220844a4e 100644 --- a/backend/crates/kalamdb-api/src/ws/events/batch.rs +++ b/backend/crates/kalamdb-api/src/ws/events/batch.rs @@ -2,19 +2,17 @@ //! //! Handles the NextBatch message for paginated initial data fetching. +use std::sync::Arc; + use actix_ws::Session; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::websocket::BatchControl; -use kalamdb_commons::WebSocketMessage; +use kalamdb_commons::{ids::SeqId, websocket::BatchControl, WebSocketMessage}; use kalamdb_core::providers::arrow_json_conversion::row_into_json_map; use kalamdb_live::{LiveQueryManager, SharedConnectionState}; use log::error; -use std::sync::Arc; use tracing::debug; -use crate::ws::models::WsErrorCode; - use super::{send_error, send_message}; +use crate::ws::models::WsErrorCode; /// Handle next batch request /// @@ -43,12 +41,7 @@ pub async fn handle_next_batch( { Ok(result) => { // Use BatchControl::new() which handles status based on batch_num and has_more - let batch_control = BatchControl::new( - batch_num, - result.has_more, - result.last_seq, - result.snapshot_end_seq, - ); + let batch_control = BatchControl::new(batch_num, result.has_more, result.last_seq); debug!( "Sending batch {}: {} rows, has_more={}", diff --git a/backend/crates/kalamdb-api/src/ws/events/cleanup.rs b/backend/crates/kalamdb-api/src/ws/events/cleanup.rs index cf1300228..a6b765b56 100644 --- a/backend/crates/kalamdb-api/src/ws/events/cleanup.rs +++ b/backend/crates/kalamdb-api/src/ws/events/cleanup.rs @@ -2,9 +2,10 @@ //! //! Handles cleanup when a WebSocket connection is closed. +use std::sync::Arc; + use kalamdb_live::{ConnectionsManager, LiveQueryManager, SharedConnectionState}; use log::debug; -use std::sync::Arc; use crate::limiter::RateLimiter; diff --git a/backend/crates/kalamdb-api/src/ws/events/mod.rs b/backend/crates/kalamdb-api/src/ws/events/mod.rs index 0e2300280..b00702fe2 100644 --- a/backend/crates/kalamdb-api/src/ws/events/mod.rs +++ b/backend/crates/kalamdb-api/src/ws/events/mod.rs @@ -19,11 +19,12 @@ pub mod subscription; pub mod unsubscribe; use actix_ws::{CloseCode, CloseReason, Session}; -use kalamdb_commons::websocket::SerializationType; -use kalamdb_commons::WebSocketMessage; +use kalamdb_commons::{websocket::SerializationType, WebSocketMessage}; -use crate::ws::compression::{is_gzip, maybe_compress}; -use crate::ws::models::{Notification, WsErrorCode}; +use crate::ws::{ + compression::{is_gzip, maybe_compress}, + models::{Notification, WsErrorCode}, +}; /// Send auth error and close (takes ownership of session to close it) pub async fn send_auth_error(mut session: Session, message: &str) -> Result<(), ()> { diff --git a/backend/crates/kalamdb-api/src/ws/events/subscription.rs b/backend/crates/kalamdb-api/src/ws/events/subscription.rs index 25f2b9cd8..6117ffdfa 100644 --- a/backend/crates/kalamdb-api/src/ws/events/subscription.rs +++ b/backend/crates/kalamdb-api/src/ws/events/subscription.rs @@ -2,18 +2,19 @@ //! //! Handles the Subscribe message for live query subscriptions. +use std::sync::Arc; + use actix_ws::Session; -use kalamdb_commons::websocket::{BatchControl, SubscriptionRequest, MAX_ROWS_PER_BATCH}; -use kalamdb_commons::WebSocketMessage; +use kalamdb_commons::{ + websocket::{BatchControl, SubscriptionRequest, MAX_ROWS_PER_BATCH}, + WebSocketMessage, +}; use kalamdb_core::providers::arrow_json_conversion::row_into_json_map; use kalamdb_live::{InitialDataOptions, LiveQueryManager, SharedConnectionState}; use log::{debug, error, warn}; -use std::sync::Arc; - -use crate::limiter::RateLimiter; -use crate::ws::models::WsErrorCode; use super::{send_error, send_message}; +use crate::{limiter::RateLimiter, ws::models::WsErrorCode}; /// Handle subscription request /// @@ -72,7 +73,7 @@ pub async fn handle_subscribe( // - batch_size: Hint for server-side batch sizing let initial_opts = subscription_options.map(|options| { if let Some(from_seq) = options.from { - InitialDataOptions::batch(Some(from_seq), options.snapshot_end_seq, batch_size) + InitialDataOptions::batch(Some(from_seq), None, batch_size) } else if let Some(n) = options.last_rows { InitialDataOptions::last(n as usize) } else { @@ -101,11 +102,10 @@ pub async fn handle_subscribe( 0, // batch_num initial.has_more, initial.last_seq, - initial.snapshot_end_seq, ) } else { // No initial data - empty result, ready immediately - BatchControl::new(0, false, None, None) + BatchControl::new(0, false, None) }; let ack = WebSocketMessage::subscription_ack( diff --git a/backend/crates/kalamdb-api/src/ws/events/unsubscribe.rs b/backend/crates/kalamdb-api/src/ws/events/unsubscribe.rs index 2356518eb..d7f26330b 100644 --- a/backend/crates/kalamdb-api/src/ws/events/unsubscribe.rs +++ b/backend/crates/kalamdb-api/src/ws/events/unsubscribe.rs @@ -2,10 +2,11 @@ //! //! Handles the Unsubscribe message for live query subscriptions. +use std::sync::Arc; + use kalamdb_commons::models::LiveQueryId; use kalamdb_live::{LiveQueryManager, SharedConnectionState}; use log::debug; -use std::sync::Arc; use crate::limiter::RateLimiter; diff --git a/backend/crates/kalamdb-api/src/ws/handler.rs b/backend/crates/kalamdb-api/src/ws/handler.rs index c489ff583..701bf6023 100644 --- a/backend/crates/kalamdb-api/src/ws/handler.rs +++ b/backend/crates/kalamdb-api/src/ws/handler.rs @@ -11,16 +11,19 @@ //! - Subscriptions stored in ConnectionState.subscriptions //! - No local tracking needed - everything is in ConnectionState +use std::sync::Arc; + use actix_web::{get, web, Error, HttpRequest, HttpResponse}; use kalamdb_auth::UserRepository; use kalamdb_core::app_context::AppContext; use kalamdb_live::{ConnectionId, ConnectionsManager, LiveQueryManager}; use log::{debug, warn}; -use std::sync::Arc; -use super::context::WsHandlerContext; -use super::protocol::{authenticate_upgrade, compression_enabled_from_query, validate_origin}; -use super::runtime::run_websocket; +use super::{ + context::WsHandlerContext, + protocol::{compression_enabled_from_query, parse_upgrade_auth, validate_origin}, + runtime::run_websocket, +}; use crate::limiter::RateLimiter; /// GET /v1/ws - Establish WebSocket connection @@ -52,10 +55,7 @@ pub async fn websocket_handler( } let compression_enabled = compression_enabled_from_query(&req); - let pre_auth = match authenticate_upgrade(&req, user_repo.get_ref()).await { - Ok(pre_auth) => pre_auth, - Err(response) => return Ok(response), - }; + let pre_auth = parse_upgrade_auth(&req); let connection_id = ConnectionId::new(uuid::Uuid::new_v4().simple().to_string()); let client_ip = kalamdb_auth::extract_client_ip_secure(&req); diff --git a/backend/crates/kalamdb-api/src/ws/messages.rs b/backend/crates/kalamdb-api/src/ws/messages.rs index 0fbe4cb6a..413741f3c 100644 --- a/backend/crates/kalamdb-api/src/ws/messages.rs +++ b/backend/crates/kalamdb-api/src/ws/messages.rs @@ -1,21 +1,25 @@ +use std::{io::Read, sync::Arc}; + use actix_ws::Session; use bytes::Bytes; use flate2::read::GzDecoder; use kalamdb_auth::UserRepository; -use kalamdb_commons::models::ConnectionInfo; -use kalamdb_commons::websocket::{ClientMessage, SerializationType}; +use kalamdb_commons::{ + models::ConnectionInfo, + websocket::{ClientMessage, SerializationType}, +}; use kalamdb_live::{LiveQueryManager, SharedConnectionState}; -use std::io::Read; -use std::sync::Arc; -use super::events::{ - auth::{handle_authenticate, send_current_auth_success}, - batch::handle_next_batch, - send_error, - subscription::handle_subscribe, - unsubscribe::handle_unsubscribe, +use super::{ + events::{ + auth::{handle_authenticate, send_current_auth_success}, + batch::handle_next_batch, + send_error, + subscription::handle_subscribe, + unsubscribe::handle_unsubscribe, + }, + models::WsErrorCode, }; -use super::models::WsErrorCode; use crate::limiter::RateLimiter; #[allow(clippy::too_many_arguments)] diff --git a/backend/crates/kalamdb-api/src/ws/mod.rs b/backend/crates/kalamdb-api/src/ws/mod.rs index 4660cd83a..2eb33448d 100644 --- a/backend/crates/kalamdb-api/src/ws/mod.rs +++ b/backend/crates/kalamdb-api/src/ws/mod.rs @@ -37,25 +37,26 @@ pub(crate) use handler::websocket_handler; #[cfg(test)] mod tests { - use super::websocket_handler; - use crate::limiter::RateLimiter; + use std::{collections::HashMap, net::TcpListener, sync::Arc}; + use actix_web::{web, App, HttpServer}; use futures_util::StreamExt; use kalamdb_auth::{create_and_sign_token, CoreUsersRepo, UserRepository}; - use kalamdb_commons::models::{KalamCellValue, UserId}; - use kalamdb_commons::websocket::{ChangeType, SharedChangePayload, WireNotification}; - use kalamdb_commons::Role; - use kalamdb_core::app_context::AppContext; - use kalamdb_core::test_helpers::test_app_context_simple; + use kalamdb_commons::{ + models::{KalamCellValue, UserId}, + websocket::{ChangeType, SharedChangePayload, WireNotification}, + Role, + }; + use kalamdb_core::{app_context::AppContext, test_helpers::test_app_context_simple}; use kalamdb_live::ConnectionsManager; - use kalamdb_system::providers::storages::models::StorageMode; - use kalamdb_system::{AuthType, User}; - use std::collections::HashMap; - use std::net::TcpListener; - use std::sync::Arc; - use tokio_tungstenite::connect_async; - use tokio_tungstenite::tungstenite::client::IntoClientRequest; - use tokio_tungstenite::tungstenite::Message; + use kalamdb_system::{providers::storages::models::StorageMode, AuthType, User}; + use tokio_tungstenite::{ + connect_async, + tungstenite::{client::IntoClientRequest, Message}, + }; + + use super::websocket_handler; + use crate::limiter::RateLimiter; struct WsTestContext { server: actix_web::dev::ServerHandle, diff --git a/backend/crates/kalamdb-api/src/ws/models/ws_context.rs b/backend/crates/kalamdb-api/src/ws/models/ws_context.rs index 7fee1d3e9..98595e1e6 100644 --- a/backend/crates/kalamdb-api/src/ws/models/ws_context.rs +++ b/backend/crates/kalamdb-api/src/ws/models/ws_context.rs @@ -1,9 +1,10 @@ //! WebSocket connection context +use std::sync::Arc; + use kalamdb_auth::UserRepository; use kalamdb_core::app_context::AppContext; use kalamdb_live::{ConnectionsManager, LiveQueryManager}; -use std::sync::Arc; use crate::limiter::RateLimiter; diff --git a/backend/crates/kalamdb-api/src/ws/protocol.rs b/backend/crates/kalamdb-api/src/ws/protocol.rs index b0e53eb5d..ba15b44ae 100644 --- a/backend/crates/kalamdb-api/src/ws/protocol.rs +++ b/backend/crates/kalamdb-api/src/ws/protocol.rs @@ -1,8 +1,7 @@ use actix_web::{HttpRequest, HttpResponse}; use actix_ws::ProtocolError; -use kalamdb_auth::{authenticate, AuthRequest, UserRepository}; +use kalamdb_auth::AuthRequest; use kalamdb_commons::websocket::{CompressionType, ProtocolOptions, SerializationType}; -use std::sync::Arc; use super::context::UpgradeAuth; @@ -63,38 +62,25 @@ pub(super) fn validate_origin( Ok(()) } -pub(super) async fn authenticate_upgrade( - req: &HttpRequest, - user_repo: &Arc, -) -> Result, HttpResponse> { +pub(super) fn parse_upgrade_auth(req: &HttpRequest) -> Option { let Some(auth_header) = req.headers().get("Authorization") else { - return Ok(None); + return None; }; let Ok(auth_str) = auth_header.to_str() else { - return Ok(None); + return None; }; let Some(token) = auth_str.strip_prefix("Bearer ") else { - return Ok(None); - }; - - let client_ip_for_auth = kalamdb_auth::extract_client_ip_secure(req); - let auth_request = AuthRequest::Jwt { - token: token.to_string(), + return None; }; - match authenticate(auth_request, &client_ip_for_auth, user_repo).await { - Ok(result) => Ok(Some(UpgradeAuth { - user_id: result.user.user_id, - role: result.user.role, - protocol: parse_protocol_from_query(req.query_string()), - })), - Err(_) => { - log::warn!("WebSocket upgrade rejected: invalid Bearer token"); - Err(HttpResponse::Unauthorized().body("Invalid token")) + Some(UpgradeAuth { + auth_request: AuthRequest::Jwt { + token: token.to_string(), }, - } + protocol: parse_protocol_from_query(req.query_string()), + }) } pub(super) fn is_expected_ws_disconnect(error: &ProtocolError) -> bool { @@ -122,16 +108,20 @@ pub(super) fn is_expected_ws_disconnect(error: &ProtocolError) -> bool { #[cfg(test)] mod tests { - use super::{parse_protocol_from_query, validate_origin}; + use std::sync::Arc; + use actix_web::{http::StatusCode, test::TestRequest}; - use kalamdb_commons::websocket::{CompressionType, SerializationType}; - use kalamdb_commons::NodeId; + use kalamdb_commons::{ + websocket::{CompressionType, SerializationType}, + NodeId, + }; use kalamdb_configs::ServerConfig; use kalamdb_core::app_context::AppContext; use kalamdb_store::test_utils::InMemoryBackend; - use std::sync::Arc; use uuid::Uuid; + use super::{parse_protocol_from_query, validate_origin}; + fn test_app_context_with_origin_policy( cors_allowed_origins: Vec, strict_ws_origin_check: bool, diff --git a/backend/crates/kalamdb-api/src/ws/runtime.rs b/backend/crates/kalamdb-api/src/ws/runtime.rs index d1ba119ad..dc4fb4397 100644 --- a/backend/crates/kalamdb-api/src/ws/runtime.rs +++ b/backend/crates/kalamdb-api/src/ws/runtime.rs @@ -7,14 +7,16 @@ use kalamdb_jobs::health_monitor::{ use kalamdb_live::{ConnectionEvent, ConnectionRegistration}; use log::{debug, error, info, warn}; -use super::context::{UpgradeAuth, WsHandlerContext}; -use super::events::{ - auth::complete_ws_auth, cleanup::cleanup_connection, send_error, send_json, - send_wire_notification, +use super::{ + context::{UpgradeAuth, WsHandlerContext}, + events::{ + auth::handle_upgrade_auth, cleanup::cleanup_connection, send_error, send_json, + send_wire_notification, + }, + messages::{handle_binary_message, handle_text_message}, + models::WsErrorCode, + protocol::is_expected_ws_disconnect, }; -use super::messages::{handle_binary_message, handle_text_message}; -use super::models::WsErrorCode; -use super::protocol::is_expected_ws_disconnect; pub(super) async fn run_websocket( client_ip: kalamdb_commons::models::ConnectionInfo, @@ -38,16 +40,36 @@ pub(super) async fn run_websocket( if let Some(auth) = pre_auth { connection_state.mark_auth_started(); - let _ = complete_ws_auth( + if handle_upgrade_auth( &connection_state, - auth.user_id, - auth.role, + &client_ip, + auth.auth_request, auth.protocol, &mut session, + &handler_context.rate_limiter, + &handler_context.user_repo, handler_context.compression_enabled, ) - .await; - debug!("WebSocket pre-authenticated from header: {}", connection_id); + .await + .is_err() + { + let _ = session + .close(Some(CloseReason { + code: CloseCode::Policy, + description: Some("Authentication failed".into()), + })) + .await; + cleanup_connection( + &connection_state, + &handler_context.connection_registry, + &handler_context.rate_limiter, + &handler_context.live_query_manager, + ) + .await; + decrement_websocket_sessions(); + return; + } + debug!("WebSocket authenticated from upgrade header: {}", connection_id); } loop { diff --git a/backend/crates/kalamdb-auth/src/errors/error.rs b/backend/crates/kalamdb-auth/src/errors/error.rs index bde265d4a..f3ba5653b 100644 --- a/backend/crates/kalamdb-auth/src/errors/error.rs +++ b/backend/crates/kalamdb-auth/src/errors/error.rs @@ -1,9 +1,9 @@ // Authentication error types for KalamDB -use crate::oidc::OidcError; - use thiserror::Error; +use crate::oidc::OidcError; + /// Errors that can occur during authentication and authorization. #[derive(Error, Debug)] pub enum AuthError { diff --git a/backend/crates/kalamdb-auth/src/helpers/basic_auth.rs b/backend/crates/kalamdb-auth/src/helpers/basic_auth.rs index 25bb87df9..477d82c4b 100644 --- a/backend/crates/kalamdb-auth/src/helpers/basic_auth.rs +++ b/backend/crates/kalamdb-auth/src/helpers/basic_auth.rs @@ -1,8 +1,9 @@ // HTTP Basic Authentication parser -use crate::errors::error::{AuthError, AuthResult}; use base64::prelude::*; +use crate::errors::error::{AuthError, AuthResult}; + /// Parse HTTP Basic Auth header and extract credentials. /// /// Expected format: `Authorization: Basic ` diff --git a/backend/crates/kalamdb-auth/src/helpers/cookie.rs b/backend/crates/kalamdb-auth/src/helpers/cookie.rs index bb017a07d..1b091f99d 100644 --- a/backend/crates/kalamdb-auth/src/helpers/cookie.rs +++ b/backend/crates/kalamdb-auth/src/helpers/cookie.rs @@ -42,8 +42,8 @@ fn build_token_cookie<'a>( cookie::time::OffsetDateTime::from_unix_timestamp(expiry.timestamp()).unwrap_or_else( |_| { log::warn!( - "JWT expiry timestamp {} is out of OffsetDateTime range; \ - falling back to current time plus 24 h", + "JWT expiry timestamp {} is out of OffsetDateTime range; falling back to \ + current time plus 24 h", expiry.timestamp() ); cookie::time::OffsetDateTime::now_utc() + cookie::time::Duration::hours(24) diff --git a/backend/crates/kalamdb-auth/src/helpers/extractor.rs b/backend/crates/kalamdb-auth/src/helpers/extractor.rs index e83951f3d..6ffc942b0 100644 --- a/backend/crates/kalamdb-auth/src/helpers/extractor.rs +++ b/backend/crates/kalamdb-auth/src/helpers/extractor.rs @@ -32,16 +32,16 @@ //! //! ``` +use std::{fmt, future::Future, pin::Pin, sync::Arc}; + use actix_web::{dev::Payload, http::StatusCode, FromRequest, HttpRequest, ResponseError}; -use std::fmt; -use std::future::Future; -use std::pin::Pin; -use std::sync::Arc; -use crate::errors::error::AuthError; -use crate::helpers::ip_extractor::extract_client_ip_secure; -use crate::repository::user_repo::UserRepository; -use crate::services::unified::{authenticate, AuthRequest}; +use crate::{ + errors::error::AuthError, + helpers::ip_extractor::extract_client_ip_secure, + repository::user_repo::UserRepository, + services::unified::{authenticate, AuthRequest}, +}; /// Error type for authentication extraction. /// @@ -184,10 +184,11 @@ impl From for AuthExtractError { #[cfg(test)] mod tests { - use super::*; use actix_web::body::to_bytes; use serde_json::Value; + use super::*; + #[test] fn test_auth_extract_error_codes() { let err = AuthExtractError::new(AuthError::MissingAuthorization("test".to_string()), 10.0); @@ -313,7 +314,9 @@ impl FromRequest for AuthSessionExtractor { let took = start_time.elapsed().as_secs_f64() * 1000.0; return Err(AuthExtractError::new( AuthError::DatabaseError( - "User repository not configured. Ensure Arc is registered as app data.".to_string(), + "User repository not configured. Ensure Arc is \ + registered as app data." + .to_string(), ), took, )); @@ -350,7 +353,8 @@ impl FromRequest for AuthSessionExtractor { let took = start_time.elapsed().as_secs_f64() * 1000.0; return Err(AuthExtractError::new( AuthError::InvalidCredentials( - "This endpoint requires a Bearer token. Basic authentication is not supported." + "This endpoint requires a Bearer token. Basic authentication is not \ + supported." .to_string(), ), took, diff --git a/backend/crates/kalamdb-auth/src/helpers/ip_extractor.rs b/backend/crates/kalamdb-auth/src/helpers/ip_extractor.rs index 329a92355..56b9abb23 100644 --- a/backend/crates/kalamdb-auth/src/helpers/ip_extractor.rs +++ b/backend/crates/kalamdb-auth/src/helpers/ip_extractor.rs @@ -3,14 +3,16 @@ //! This module provides secure extraction of client IP addresses from HTTP requests, //! with protection against header spoofing attacks that attempt to bypass localhost checks. -use actix_web::http::header::{HeaderMap, HeaderValue}; -use actix_web::HttpRequest; +use std::{net::IpAddr, sync::RwLock}; + +use actix_web::{ + http::header::{HeaderMap, HeaderValue}, + HttpRequest, +}; use ipnet::IpNet; use kalamdb_commons::models::ConnectionInfo; use log::warn; use once_cell::sync::Lazy; -use std::net::IpAddr; -use std::sync::RwLock; static TRUSTED_PROXY_RANGES: Lazy>> = Lazy::new(|| RwLock::new(Vec::new())); @@ -106,9 +108,9 @@ fn extract_proxy_header_ip( if is_localhost_address(candidate) { warn!( - "Security: Rejected localhost value in trusted {} header: '{}'. Using peer_addr instead.", - header_name, - candidate + "Security: Rejected localhost value in trusted {} header: '{}'. Using peer_addr \ + instead.", + header_name, candidate ); return None; } @@ -143,9 +145,10 @@ pub fn is_localhost_address(ip: &str) -> bool { #[cfg(test)] mod tests { - use super::*; use actix_web::http::header::{HeaderMap, HeaderName, HeaderValue}; + use super::*; + #[test] fn test_is_localhost_address() { // IPv4 localhost diff --git a/backend/crates/kalamdb-auth/src/lib.rs b/backend/crates/kalamdb-auth/src/lib.rs index 042d9c3e9..4972c5fbc 100644 --- a/backend/crates/kalamdb-auth/src/lib.rs +++ b/backend/crates/kalamdb-auth/src/lib.rs @@ -36,7 +36,7 @@ pub use providers::jwt_auth::{ JwtClaims, TokenType, DEFAULT_JWT_EXPIRY_HOURS, KALAMDB_ISSUER, }; pub use repository::user_repo::{CachedUsersRepo, CoreUsersRepo, UserRepository}; -pub use services::login_tracker::{LoginTracker, LoginTrackingConfig}; -pub use services::unified::{ - authenticate, extract_user_id_for_audit, AuthRequest, AuthenticationResult, +pub use services::{ + login_tracker::{LoginTracker, LoginTrackingConfig}, + unified::{authenticate, extract_user_id_for_audit, AuthRequest, AuthenticationResult}, }; diff --git a/backend/crates/kalamdb-auth/src/models/impersonation.rs b/backend/crates/kalamdb-auth/src/models/impersonation.rs index 0d3d0bbf1..ef78998d7 100644 --- a/backend/crates/kalamdb-auth/src/models/impersonation.rs +++ b/backend/crates/kalamdb-auth/src/models/impersonation.rs @@ -22,8 +22,7 @@ //! // Context used for audit logging //! ``` -use kalamdb_commons::models::UserId; -use kalamdb_commons::Role; +use kalamdb_commons::{models::UserId, Role}; /// Origin of the impersonation request #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/backend/crates/kalamdb-auth/src/oidc/utils.rs b/backend/crates/kalamdb-auth/src/oidc/utils.rs index 22fef22aa..233ee6880 100644 --- a/backend/crates/kalamdb-auth/src/oidc/utils.rs +++ b/backend/crates/kalamdb-auth/src/oidc/utils.rs @@ -1,8 +1,8 @@ -use crate::oidc::OidcError; -use base64::engine::general_purpose::URL_SAFE_NO_PAD; -use base64::Engine as _; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use jsonwebtoken::{decode_header, Algorithm}; +use crate::oidc::OidcError; + /// Extract the `alg` field from the JWT header without verifying the signature. pub(crate) fn extract_algorithm_unverified(token: &str) -> Result { let header = decode_header(token).map_err(|error| { diff --git a/backend/crates/kalamdb-auth/src/oidc/validator.rs b/backend/crates/kalamdb-auth/src/oidc/validator.rs index 4c8028818..876d9b11f 100644 --- a/backend/crates/kalamdb-auth/src/oidc/validator.rs +++ b/backend/crates/kalamdb-auth/src/oidc/validator.rs @@ -1,11 +1,15 @@ -use crate::oidc::{OidcConfig, OidcError}; -use jsonwebtoken::jwk::{Jwk, JwkSet}; -use jsonwebtoken::{decode, decode_header, Algorithm, DecodingKey, Validation}; +use std::{collections::HashMap, sync::Arc}; + +use jsonwebtoken::{ + decode, decode_header, + jwk::{Jwk, JwkSet}, + Algorithm, DecodingKey, Validation, +}; use serde::Deserialize; -use std::collections::HashMap; -use std::sync::Arc; use tokio::sync::RwLock; +use crate::oidc::{OidcConfig, OidcError}; + /// OIDC JWT validator with per-issuer JWKS caching. #[derive(Clone)] pub(crate) struct OidcValidator { diff --git a/backend/crates/kalamdb-auth/src/providers/jwt_auth.rs b/backend/crates/kalamdb-auth/src/providers/jwt_auth.rs index 2a0d48ddd..b140e4d68 100644 --- a/backend/crates/kalamdb-auth/src/providers/jwt_auth.rs +++ b/backend/crates/kalamdb-auth/src/providers/jwt_auth.rs @@ -1,14 +1,15 @@ // JWT authentication and validation module -use crate::errors::error::{AuthError, AuthResult}; -pub(crate) use crate::oidc::{extract_algorithm_unverified, extract_issuer_unverified}; -pub use crate::oidc::{JwtClaims, TokenType, DEFAULT_JWT_EXPIRY_HOURS}; -use jsonwebtoken::errors::ErrorKind; use jsonwebtoken::{ - decode, decode_header, encode, Algorithm, DecodingKey, EncodingKey, Header, Validation, + decode, decode_header, encode, errors::ErrorKind, Algorithm, DecodingKey, EncodingKey, Header, + Validation, }; use kalamdb_commons::{Role, UserId}; +use crate::errors::error::{AuthError, AuthResult}; +pub(crate) use crate::oidc::{extract_algorithm_unverified, extract_issuer_unverified}; +pub use crate::oidc::{JwtClaims, TokenType, DEFAULT_JWT_EXPIRY_HOURS}; + /// Default issuer for KalamDB-issued tokens. pub const KALAMDB_ISSUER: &str = "kalamdb"; @@ -142,9 +143,10 @@ pub fn is_internal_issuer(issuer: &str) -> bool { #[cfg(test)] mod tests { - use super::*; use jsonwebtoken::{encode, EncodingKey, Header}; + use super::*; + fn create_test_token(secret: &str, exp_offset_secs: i64) -> String { create_test_token_with_type(secret, exp_offset_secs, Some(TokenType::Access)) } diff --git a/backend/crates/kalamdb-auth/src/providers/jwt_config.rs b/backend/crates/kalamdb-auth/src/providers/jwt_config.rs index 077bcf34b..b9023c4d4 100644 --- a/backend/crates/kalamdb-auth/src/providers/jwt_config.rs +++ b/backend/crates/kalamdb-auth/src/providers/jwt_config.rs @@ -1,12 +1,16 @@ // JWT configuration cache, trusted issuer parsing, and OIDC validator registry. -use crate::errors::error::{AuthError, AuthResult}; -use crate::oidc::{OidcConfig, OidcValidator}; -use crate::providers::jwt_auth; -use once_cell::sync::OnceCell; use std::collections::HashMap; + +use once_cell::sync::OnceCell; use tokio::sync::RwLock; +use crate::{ + errors::error::{AuthError, AuthResult}, + oidc::{OidcConfig, OidcValidator}, + providers::jwt_auth, +}; + /// Cached JWT configuration for performance. /// /// Holds the HS256 shared secret for internal tokens, the list of trusted diff --git a/backend/crates/kalamdb-auth/src/repository/user_repo.rs b/backend/crates/kalamdb-auth/src/repository/user_repo.rs index a602962bd..d7bd287bd 100644 --- a/backend/crates/kalamdb-auth/src/repository/user_repo.rs +++ b/backend/crates/kalamdb-auth/src/repository/user_repo.rs @@ -1,9 +1,10 @@ -use crate::errors::error::AuthResult; +use std::{sync::Arc, time::Duration}; + use kalamdb_commons::UserId; use kalamdb_system::{User, UsersTableProvider}; use moka::sync::Cache; -use std::sync::Arc; -use std::time::Duration; + +use crate::errors::error::AuthResult; /// Abstraction over user persistence for authentication flows. /// diff --git a/backend/crates/kalamdb-auth/src/security/password.rs b/backend/crates/kalamdb-auth/src/security/password.rs index 92754b70d..c600571ef 100644 --- a/backend/crates/kalamdb-auth/src/security/password.rs +++ b/backend/crates/kalamdb-auth/src/security/password.rs @@ -1,9 +1,10 @@ // Password hashing and validation module -use crate::errors::error::{AuthError, AuthResult}; +use std::{collections::HashSet, sync::OnceLock}; + use bcrypt::{hash, verify, DEFAULT_COST}; -use std::collections::HashSet; -use std::sync::OnceLock; + +use crate::errors::error::{AuthError, AuthResult}; /// Bcrypt cost factor for password hashing. /// Higher values = more secure but slower. diff --git a/backend/crates/kalamdb-auth/src/services/login_tracker.rs b/backend/crates/kalamdb-auth/src/services/login_tracker.rs index 8b2a0c890..382dbf427 100644 --- a/backend/crates/kalamdb-auth/src/services/login_tracker.rs +++ b/backend/crates/kalamdb-auth/src/services/login_tracker.rs @@ -3,13 +3,16 @@ //! Provides functionality for tracking failed login attempts and //! implementing account lockout to prevent brute-force attacks. -use crate::errors::error::{AuthError, AuthResult}; -use crate::repository::user_repo::UserRepository; -use kalamdb_system::User; -use kalamdb_system::{DEFAULT_LOCKOUT_DURATION_MINUTES, DEFAULT_MAX_FAILED_ATTEMPTS}; -use log::{info, warn}; use std::sync::Arc; +use kalamdb_system::{User, DEFAULT_LOCKOUT_DURATION_MINUTES, DEFAULT_MAX_FAILED_ATTEMPTS}; +use log::{info, warn}; + +use crate::{ + errors::error::{AuthError, AuthResult}, + repository::user_repo::UserRepository, +}; + /// Configuration for login tracking behavior #[derive(Debug, Clone)] pub struct LoginTrackingConfig { diff --git a/backend/crates/kalamdb-auth/src/services/unified/bearer.rs b/backend/crates/kalamdb-auth/src/services/unified/bearer.rs index f59272fa8..d90783c92 100644 --- a/backend/crates/kalamdb-auth/src/services/unified/bearer.rs +++ b/backend/crates/kalamdb-auth/src/services/unified/bearer.rs @@ -1,16 +1,20 @@ -use crate::errors::error::{AuthError, AuthResult}; -use crate::models::context::AuthenticatedUser; -use crate::oidc::OidcError; -use crate::providers::jwt_auth; -use crate::providers::jwt_config; -use crate::providers::jwt_config::JwtConfig; -use crate::repository::user_repo::UserRepository; -use jsonwebtoken::Algorithm; -use kalamdb_commons::models::{ConnectionInfo, UserId}; -use kalamdb_commons::AuthType; use std::sync::Arc; + +use jsonwebtoken::Algorithm; +use kalamdb_commons::{ + models::{ConnectionInfo, UserId}, + AuthType, +}; use tracing::Instrument; +use crate::{ + errors::error::{AuthError, AuthResult}, + models::context::AuthenticatedUser, + oidc::OidcError, + providers::{jwt_auth, jwt_config, jwt_config::JwtConfig}, + repository::user_repo::UserRepository, +}; + pub(super) async fn authenticate_bearer( token: &str, connection_info: &ConnectionInfo, @@ -126,9 +130,9 @@ async fn validate_bearer_token( Algorithm::HS256 | Algorithm::HS384 | Algorithm::HS512 => { if !jwt_auth::is_internal_issuer(issuer) { return Err(AuthError::MalformedAuthorization(format!( - "HS256 tokens are only accepted for internally-issued tokens \ - (iss='{}'). External provider tokens must use an asymmetric \ - algorithm (RS256 / ES256). Received iss='{}'.", + "HS256 tokens are only accepted for internally-issued tokens (iss='{}'). \ + External provider tokens must use an asymmetric algorithm (RS256 / ES256). \ + Received iss='{}'.", jwt_auth::KALAMDB_ISSUER, issuer ))); diff --git a/backend/crates/kalamdb-auth/src/services/unified/mod.rs b/backend/crates/kalamdb-auth/src/services/unified/mod.rs index 279568136..fd3712da3 100644 --- a/backend/crates/kalamdb-auth/src/services/unified/mod.rs +++ b/backend/crates/kalamdb-auth/src/services/unified/mod.rs @@ -5,20 +5,22 @@ mod bearer; mod password; mod types; -use crate::errors::error::{AuthError, AuthResult}; -use crate::models::context::AuthenticatedUser; -use crate::providers::jwt_config; -use crate::repository::user_repo::UserRepository; -use crate::services::login_tracker::LoginTracker; -use once_cell::sync::Lazy; use std::sync::Arc; -use tracing::Instrument; +pub use audit::extract_user_id_for_audit; use bearer::authenticate_bearer; +use once_cell::sync::Lazy; use password::authenticate_user_password; +use tracing::Instrument; pub use types::{AuthMethod, AuthRequest, AuthenticationResult}; -pub use audit::extract_user_id_for_audit; +use crate::{ + errors::error::{AuthError, AuthResult}, + models::context::AuthenticatedUser, + providers::jwt_config, + repository::user_repo::UserRepository, + services::login_tracker::LoginTracker, +}; /// Cached login tracker instance. static LOGIN_TRACKER: Lazy = Lazy::new(LoginTracker::new); @@ -146,9 +148,10 @@ fn record_authenticated_span(user: &AuthenticatedUser) { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::UserId; + use super::*; + #[test] fn test_auth_method_debug() { assert_eq!(format!("{:?}", AuthMethod::Basic), "Basic"); diff --git a/backend/crates/kalamdb-auth/src/services/unified/password.rs b/backend/crates/kalamdb-auth/src/services/unified/password.rs index f1e7f424e..8edcbc561 100644 --- a/backend/crates/kalamdb-auth/src/services/unified/password.rs +++ b/backend/crates/kalamdb-auth/src/services/unified/password.rs @@ -1,15 +1,17 @@ -use crate::errors::error::{AuthError, AuthResult}; -use crate::helpers::basic_auth; -use crate::models::context::AuthenticatedUser; -use crate::repository::user_repo::UserRepository; -use crate::security::password; -use kalamdb_commons::models::ConnectionInfo; -use kalamdb_commons::{AuthType, Role, UserId}; -use log::debug; use std::sync::Arc; + +use kalamdb_commons::{models::ConnectionInfo, AuthType, Role, UserId}; +use log::debug; use tracing::Instrument; use super::LOGIN_TRACKER; +use crate::{ + errors::error::{AuthError, AuthResult}, + helpers::basic_auth, + models::context::AuthenticatedUser, + repository::user_repo::UserRepository, + security::password, +}; /// Authenticate using Basic Auth header. /// diff --git a/backend/crates/kalamdb-auth/src/services/unified/types.rs b/backend/crates/kalamdb-auth/src/services/unified/types.rs index 9773498e8..0105414a4 100644 --- a/backend/crates/kalamdb-auth/src/services/unified/types.rs +++ b/backend/crates/kalamdb-auth/src/services/unified/types.rs @@ -1,6 +1,7 @@ -use crate::models::context::AuthenticatedUser; pub use kalamdb_session::AuthMethod; +use crate::models::context::AuthenticatedUser; + /// Authentication request that can come from HTTP or WebSocket. #[derive(Debug, Clone)] pub enum AuthRequest { diff --git a/backend/crates/kalamdb-auth/tests/test_password_security.rs b/backend/crates/kalamdb-auth/tests/test_password_security.rs index 514c2ac08..442e79f71 100644 --- a/backend/crates/kalamdb-auth/tests/test_password_security.rs +++ b/backend/crates/kalamdb-auth/tests/test_password_security.rs @@ -65,8 +65,8 @@ async fn test_concurrent_bcrypt_non_blocking() { // We allow up to 100ms for CI variability assert!( duration.as_millis() < 100, - "Concurrent operations took too long: {:?}ms (expected <100ms). \ - This suggests operations are blocking the async runtime.", + "Concurrent operations took too long: {:?}ms (expected <100ms). This suggests operations \ + are blocking the async runtime.", duration.as_millis() ); diff --git a/backend/crates/kalamdb-commons/src/constants.rs b/backend/crates/kalamdb-commons/src/constants.rs index 933c433fd..b9f974f78 100644 --- a/backend/crates/kalamdb-commons/src/constants.rs +++ b/backend/crates/kalamdb-commons/src/constants.rs @@ -1,12 +1,12 @@ //! System-wide constants for KalamDB. //! //! This module centralizes constant definitions used across all crates, including: -//! - Column family names (non-system) +//! - Logical storage partition names (non-system) //! - Reserved identifiers -/// RocksDB column family names. +/// Logical storage partition names. /// -/// Provides centralized naming for all column families used in KalamDB storage. +/// Provides centralized naming for storage partitions used in KalamDB. pub struct ColumnFamilyNames; #[allow(non_upper_case_globals)] @@ -16,13 +16,13 @@ impl ColumnFamilyNames { /// Unified information_schema tables (replaces system_table_schemas + system_columns) pub const INFORMATION_SCHEMA_TABLES: &'static str = "information_schema_tables"; - /// Prefix for user table column families (appended with table name) + /// Prefix for user table partitions (appended with table name) pub const USER_TABLE_PREFIX: &'static str = "user_"; - /// Prefix for shared table column families (appended with table name) + /// Prefix for shared table partitions (appended with table name) pub const SHARED_TABLE_PREFIX: &'static str = "shared_"; - /// Prefix for stream table column families (appended with table name) + /// Prefix for stream table partitions (appended with table name) pub const STREAM_TABLE_PREFIX: &'static str = "stream_"; } diff --git a/backend/crates/kalamdb-commons/src/conversions/arrow_conversion.rs b/backend/crates/kalamdb-commons/src/conversions/arrow_conversion.rs index 58d95a518..8c4fff92b 100644 --- a/backend/crates/kalamdb-commons/src/conversions/arrow_conversion.rs +++ b/backend/crates/kalamdb-commons/src/conversions/arrow_conversion.rs @@ -2,11 +2,11 @@ //! //! Provides bidirectional conversion between KalamDataType and Apache Arrow DataType. -use crate::models::datatypes::KalamDataType; -use arrow_schema::DataType as ArrowDataType; -use arrow_schema::{Field, TimeUnit}; +use arrow_schema::{DataType as ArrowDataType, Field, TimeUnit}; use thiserror::Error; +use crate::models::datatypes::KalamDataType; + #[derive(Error, Debug)] pub enum ArrowConversionError { #[error("Unsupported Arrow type: {0:?}")] diff --git a/backend/crates/kalamdb-commons/src/conversions/arrow_json_conversion.rs b/backend/crates/kalamdb-commons/src/conversions/arrow_json_conversion.rs index 799464839..3b6cc6702 100644 --- a/backend/crates/kalamdb-commons/src/conversions/arrow_json_conversion.rs +++ b/backend/crates/kalamdb-commons/src/conversions/arrow_json_conversion.rs @@ -30,20 +30,21 @@ //! - Timestamps serialized as raw microsecond values (numbers) //! - Boolean, String, Float values as native JSON types -use crate::errors::CommonError; -// Chrono no longer needed - DataFusion handles timestamp serialization natively -use crate::models::rows::Row; -use crate::models::KalamCellValue; -use arrow::array::*; -use arrow::datatypes::{DataType, Field, SchemaRef, TimeUnit}; -use arrow::record_batch::RecordBatch; +use std::{collections::BTreeMap, convert::TryFrom, sync::Arc}; + +use arrow::{ + array::*, + datatypes::{DataType, Field, SchemaRef, TimeUnit}, + record_batch::RecordBatch, +}; use datafusion_common::{DataFusionError, ScalarValue}; use serde_json::Value as JsonValue; -use std::collections::BTreeMap; -use std::convert::TryFrom; -use std::sync::Arc; use uuid::Uuid; +// Chrono no longer needed - DataFusion handles timestamp serialization natively +use crate::models::rows::Row; +use crate::{errors::CommonError, models::KalamCellValue}; + /// Type alias for Arc to improve readability type ArrayRef = Arc; @@ -72,9 +73,9 @@ pub fn coerce_rows(rows: Vec, schema: &SchemaRef) -> Result, Strin } } - // 2. Coerce existing values in-place using get_mut + mem::replace. - // This avoids rebuilding the entire BTreeMap (no remove/re-insert, - // no new String key allocations for existing columns). + // 2. Coerce existing values in-place using get_mut + mem::replace. This avoids + // rebuilding the entire BTreeMap (no remove/re-insert, no new String key allocations + // for existing columns). for field in schema.fields() { if let Some(val) = row.values.get_mut(field.name().as_str()) { let owned = std::mem::replace(val, ScalarValue::Null); @@ -420,7 +421,8 @@ fn coerce_uuid_scalar(value: ScalarValue, field: &Field) -> Result { if size != 16 || bytes.len() != 16 { return Err(format!( - "UUID fixed binary literal must be 16 bytes for column '{}', got size {} and len {}", + "UUID fixed binary literal must be 16 bytes for column '{}', got size {} and \ + len {}", field.name(), size, bytes.len() @@ -434,10 +436,11 @@ fn coerce_uuid_scalar(value: ScalarValue, field: &Field) -> Result) -> Row { let mut values = BTreeMap::new(); @@ -582,7 +585,8 @@ mod tests { // detects no-op UPDATEs (same logical value, potentially different ScalarValue type). /// Build a schema matching the user's chat.messages table: - /// id TEXT PK, thread_id TEXT, role TEXT, content TEXT, created_at TIMESTAMP, _seq INT64, _commit_seq UINT64, _deleted BOOLEAN + /// id TEXT PK, thread_id TEXT, role TEXT, content TEXT, created_at TIMESTAMP, _seq INT64, + /// _commit_seq UINT64, _deleted BOOLEAN fn chat_messages_schema() -> Arc { Arc::new(Schema::new(vec![ Field::new("id", DataType::Utf8, false), @@ -682,7 +686,8 @@ mod tests { #[test] fn noop_update_without_coercion_would_fail_for_timestamp() { - // Demonstrates the bug: without coercion, Int64(1704067200000000) != TimestampMicrosecond(1704067200000000) + // Demonstrates the bug: without coercion, Int64(1704067200000000) != + // TimestampMicrosecond(1704067200000000) let update_val = ScalarValue::Int64(Some(1704067200000000)); let stored_val = ScalarValue::TimestampMicrosecond(Some(1704067200000000), None); @@ -793,8 +798,7 @@ pub fn arrow_value_to_scalar( array: &dyn Array, row_idx: usize, ) -> Result { - use arrow::array::*; - use arrow::datatypes::*; + use arrow::{array::*, datatypes::*}; if array.is_null(row_idx) { return Ok(ScalarValue::try_from(array.data_type()).unwrap_or(ScalarValue::Null)); @@ -1108,10 +1112,7 @@ where let mut json_values = Vec::with_capacity(values.len()); for index in 0..values.len() { let scalar = ScalarValue::try_from_array(values.as_ref(), index).map_err(|error| { - CommonError::invalid_input(format!( - "Failed to extract list element scalar: {}", - error - )) + CommonError::invalid_input(format!("Failed to extract list element scalar: {}", error)) })?; json_values.push(scalar_value_to_json(&scalar)?.0); } @@ -1166,8 +1167,8 @@ pub fn record_batch_to_json_rows( /// /// **Used by:** REST API `/v1/api/sql` endpoint for query results /// -/// Returns rows as arrays of values where each value's position corresponds to the schema field index. -/// Example output: `[["123", "Alice", 1699000000000000], ["456", "Bob", 1699000001000000]]` +/// Returns rows as arrays of values where each value's position corresponds to the schema field +/// index. Example output: `[["123", "Alice", 1699000000000000], ["456", "Bob", 1699000001000000]]` /// /// # Arguments /// * `batch` - Arrow RecordBatch from DataFusion query execution diff --git a/backend/crates/kalamdb-commons/src/conversions/mod.rs b/backend/crates/kalamdb-commons/src/conversions/mod.rs index d30e87b9a..5d4e181de 100644 --- a/backend/crates/kalamdb-commons/src/conversions/mod.rs +++ b/backend/crates/kalamdb-commons/src/conversions/mod.rs @@ -45,8 +45,6 @@ pub mod scalar_bytes; #[cfg(feature = "conversions")] pub mod scalar_json; #[cfg(feature = "conversions")] -pub mod serde_row; -#[cfg(feature = "conversions")] pub mod scalar_numeric; #[cfg(feature = "conversions")] pub mod scalar_size; @@ -54,6 +52,8 @@ pub mod scalar_size; pub mod scalar_string; #[cfg(feature = "schema-metadata")] pub mod schema_metadata; +#[cfg(feature = "conversions")] +pub mod serde_row; // Re-export commonly used functions at the module root for convenience #[cfg(feature = "conversions")] @@ -63,8 +63,6 @@ pub use scalar_bytes::scalar_value_to_bytes; #[cfg(feature = "conversions")] pub use scalar_json::{json_value_to_scalar_for_column, scalar_to_json_for_column}; #[cfg(feature = "conversions")] -pub use serde_row::{row_to_serde_model, serde_model_to_row}; -#[cfg(feature = "conversions")] pub use scalar_numeric::{as_f64, scalar_to_f64, scalar_to_i64}; #[cfg(feature = "conversions")] pub use scalar_size::estimate_scalar_value_size; @@ -78,3 +76,5 @@ pub use schema_metadata::{ with_kalam_column_flags_metadata, with_kalam_data_type_metadata, KALAM_COLUMN_FLAGS_METADATA_KEY, KALAM_DATA_TYPE_METADATA_KEY, }; +#[cfg(feature = "conversions")] +pub use serde_row::{row_to_serde_model, serde_model_to_row}; diff --git a/backend/crates/kalamdb-commons/src/conversions/scalar_bytes.rs b/backend/crates/kalamdb-commons/src/conversions/scalar_bytes.rs index f626ab0b1..2e2752bb5 100644 --- a/backend/crates/kalamdb-commons/src/conversions/scalar_bytes.rs +++ b/backend/crates/kalamdb-commons/src/conversions/scalar_bytes.rs @@ -1,7 +1,7 @@ //! Centralized datatype and value conversion utilities //! -//! This module provides unified conversion functions for mapping between different data representations -//! (ScalarValue, bytes, Arrow types, etc.) used throughout the KalamDB codebase. +//! This module provides unified conversion functions for mapping between different data +//! representations (ScalarValue, bytes, Arrow types, etc.) used throughout the KalamDB codebase. //! //! The goal is to eliminate duplication of similar conversion logic scattered across multiple files //! and provide a single source of truth for all datatype conversions. diff --git a/backend/crates/kalamdb-commons/src/conversions/scalar_json.rs b/backend/crates/kalamdb-commons/src/conversions/scalar_json.rs index 439fe54c0..e3e7e02c1 100644 --- a/backend/crates/kalamdb-commons/src/conversions/scalar_json.rs +++ b/backend/crates/kalamdb-commons/src/conversions/scalar_json.rs @@ -260,10 +260,10 @@ fn extract_string(scalar: &ScalarValue) -> Option { #[cfg(test)] mod tests { - use super::{json_value_to_scalar_for_column, scalar_to_json_for_column}; use datafusion_common::ScalarValue; use serde_json::json; + use super::{json_value_to_scalar_for_column, scalar_to_json_for_column}; use crate::models::datatypes::KalamDataType; #[test] diff --git a/backend/crates/kalamdb-commons/src/conversions/scalar_string.rs b/backend/crates/kalamdb-commons/src/conversions/scalar_string.rs index 0e4cd3aa7..8426db96e 100644 --- a/backend/crates/kalamdb-commons/src/conversions/scalar_string.rs +++ b/backend/crates/kalamdb-commons/src/conversions/scalar_string.rs @@ -109,7 +109,8 @@ pub fn parse_string_as_scalar( other => { // For any other type, use string representation and let caller handle coercion log::warn!( - "Unsupported data type {:?} for string parsing, using Utf8 representation for value '{}'", + "Unsupported data type {:?} for string parsing, using Utf8 representation for \ + value '{}'", other, value_str ); diff --git a/backend/crates/kalamdb-commons/src/conversions/schema_metadata.rs b/backend/crates/kalamdb-commons/src/conversions/schema_metadata.rs index 1d766a912..210ce44ef 100644 --- a/backend/crates/kalamdb-commons/src/conversions/schema_metadata.rs +++ b/backend/crates/kalamdb-commons/src/conversions/schema_metadata.rs @@ -3,10 +3,10 @@ //! Centralized helpers for reading/writing Arrow Field metadata keys used by KalamDB. //! This ensures consistent key names and serialization across the codebase. -use crate::models::datatypes::KalamDataType; -use crate::schemas::FieldFlags; use arrow_schema::Field; +use crate::{models::datatypes::KalamDataType, schemas::FieldFlags}; + /// Metadata key for serialized KalamDataType. pub const KALAM_DATA_TYPE_METADATA_KEY: &str = "kalam_data_type"; /// Metadata key for serialized `FieldFlags`. @@ -57,8 +57,7 @@ pub fn with_kalam_column_flags_metadata(mut field: Field, flags: &FieldFlags) -> pub fn schema_fields_from_arrow_schema( arrow_schema: &arrow_schema::SchemaRef, ) -> Vec { - use crate::conversions::arrow_conversion::FromArrowType; - use crate::models::datatypes::KalamDataType; + use crate::{conversions::arrow_conversion::FromArrowType, models::datatypes::KalamDataType}; arrow_schema .fields() diff --git a/backend/crates/kalamdb-commons/src/conversions/serde_row.rs b/backend/crates/kalamdb-commons/src/conversions/serde_row.rs index e0c99cd13..168661a27 100644 --- a/backend/crates/kalamdb-commons/src/conversions/serde_row.rs +++ b/backend/crates/kalamdb-commons/src/conversions/serde_row.rs @@ -1,20 +1,25 @@ +use std::collections::BTreeMap; + use datafusion_common::ScalarValue; -use serde::de::DeserializeOwned; -use serde::Serialize; +use serde::{de::DeserializeOwned, Serialize}; use serde_json::{Map, Value}; -use std::collections::BTreeMap; -use crate::conversions::{json_value_to_scalar_for_column, scalar_to_json_for_column}; -use crate::models::rows::Row; -use crate::schemas::TableDefinition; +use crate::{ + conversions::{json_value_to_scalar_for_column, scalar_to_json_for_column}, + models::rows::Row, + schemas::TableDefinition, +}; /// Serde-based fallback for metadata models that still bridge through JSON values. /// /// This is intentionally kept at a crate boundary helper so callers can wrap the /// string error in their local error type without duplicating the mapping logic. -pub fn serde_model_to_row(model: &T, table_def: &TableDefinition) -> Result { - let value = serde_json::to_value(model) - .map_err(|error| format!("model serialize failed: {error}"))?; +pub fn serde_model_to_row( + model: &T, + table_def: &TableDefinition, +) -> Result { + let value = + serde_json::to_value(model).map_err(|error| format!("model serialize failed: {error}"))?; let object = value .as_object() .ok_or_else(|| "model serialize failed: expected JSON object".to_string())?; @@ -31,15 +36,14 @@ pub fn serde_model_to_row(model: &T, table_def: &TableDefinition) } /// Inverse of [`serde_model_to_row`]. -pub fn row_to_serde_model(row: &Row, table_def: &TableDefinition) -> Result { +pub fn row_to_serde_model( + row: &Row, + table_def: &TableDefinition, +) -> Result { let mut object = Map::new(); for column in &table_def.columns { - let scalar = row - .values - .get(&column.column_name) - .cloned() - .unwrap_or(ScalarValue::Null); + let scalar = row.values.get(&column.column_name).cloned().unwrap_or(ScalarValue::Null); let json_value = scalar_to_json_for_column(&scalar, &column.data_type) .map_err(|error| format!("scalar->json conversion failed: {error}"))?; object.insert(column.column_name.clone(), json_value); diff --git a/backend/crates/kalamdb-commons/src/helpers/arrow_utils.rs b/backend/crates/kalamdb-commons/src/helpers/arrow_utils.rs index 4a44b1f00..a6f81172e 100644 --- a/backend/crates/kalamdb-commons/src/helpers/arrow_utils.rs +++ b/backend/crates/kalamdb-commons/src/helpers/arrow_utils.rs @@ -4,19 +4,22 @@ //! with common column types, reducing code duplication across system table //! providers and other RecordBatch construction sites. -use arrow::array::{ - ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, - Int8Array, LargeStringArray, StringArray, StringBuilder, TimestampMicrosecondArray, - TimestampMillisecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, +use std::sync::Arc; + +use arrow::{ + array::{ + ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, + Int8Array, LargeStringArray, StringArray, StringBuilder, TimestampMicrosecondArray, + TimestampMillisecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, + }, + compute, + compute::kernels::aggregate::{max_string, min_string}, + datatypes::SchemaRef, + error::ArrowError, + record_batch::RecordBatch, }; -use arrow::compute; -use arrow::compute::kernels::aggregate::{max_string, min_string}; -use arrow::datatypes::SchemaRef; -use arrow::error::ArrowError; -use arrow::record_batch::RecordBatch; pub use arrow_schema::DataType as ArrowDataType; use arrow_schema::{Field, Schema, TimeUnit}; -use std::sync::Arc; /// Arrow UTF-8 string type. pub fn arrow_utf8() -> ArrowDataType { @@ -447,9 +450,10 @@ pub fn is_boolean(data_type: &ArrowDataType) -> bool { #[cfg(test)] mod tests { - use super::*; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use super::*; + fn test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("id", DataType::Utf8, false), diff --git a/backend/crates/kalamdb-commons/src/helpers/file_helpers.rs b/backend/crates/kalamdb-commons/src/helpers/file_helpers.rs index c9aae9db0..e4a1cb363 100644 --- a/backend/crates/kalamdb-commons/src/helpers/file_helpers.rs +++ b/backend/crates/kalamdb-commons/src/helpers/file_helpers.rs @@ -16,9 +16,10 @@ use std::path::{Path, PathBuf}; /// # Examples /// /// ``` -/// use kalamdb_commons::file_helpers::normalize_dir_path; /// use std::path::Path; /// +/// use kalamdb_commons::file_helpers::normalize_dir_path; +/// /// let abs = normalize_dir_path("./data"); /// assert!(abs.is_empty() || Path::new(&abs).is_absolute()); /// ``` diff --git a/backend/crates/kalamdb-commons/src/helpers/string_interner.rs b/backend/crates/kalamdb-commons/src/helpers/string_interner.rs index 9bc6e0bac..4580a5c03 100644 --- a/backend/crates/kalamdb-commons/src/helpers/string_interner.rs +++ b/backend/crates/kalamdb-commons/src/helpers/string_interner.rs @@ -21,12 +21,13 @@ //! let updated_col = SYSTEM_COLUMNS.updated; //! ``` -use crate::constants::SystemColumnNames; -use dashmap::mapref::entry::Entry; -use dashmap::DashMap; -use once_cell::sync::Lazy; use std::sync::Arc; +use dashmap::{mapref::entry::Entry, DashMap}; +use once_cell::sync::Lazy; + +use crate::constants::SystemColumnNames; + /// Global string interner static INTERNER: Lazy, ()>> = Lazy::new(DashMap::new); @@ -133,9 +134,10 @@ pub fn clear() { #[cfg(test)] mod tests { - use super::*; use std::thread; + use super::*; + #[test] fn test_intern_same_string_returns_same_arc() { let s1 = intern("test_column_unique_1"); diff --git a/backend/crates/kalamdb-commons/src/ids/row_id.rs b/backend/crates/kalamdb-commons/src/ids/row_id.rs index 4a827c294..c64fde99e 100644 --- a/backend/crates/kalamdb-commons/src/ids/row_id.rs +++ b/backend/crates/kalamdb-commons/src/ids/row_id.rs @@ -4,13 +4,17 @@ //! - UserTableRowId: Composite key with user_id and _seq for user-scoped tables //! - SharedTableRowId: Alias to SeqId for shared tables (no user scoping) -use crate::ids::SeqId; -use crate::models::UserId; -use crate::storage_key::{decode_key, encode_key, encode_prefix}; -use crate::StorageKey; -use serde::{Deserialize, Serialize}; use std::cmp::Ordering; +use serde::{Deserialize, Serialize}; + +use crate::{ + ids::SeqId, + models::UserId, + storage_key::{decode_key, encode_key, encode_prefix}, + StorageKey, +}; + /// Composite key for user table rows: {user_id}:{_seq} /// /// **MVCC Architecture**: Similar to TableId pattern, this is a composite struct diff --git a/backend/crates/kalamdb-commons/src/ids/seq_id.rs b/backend/crates/kalamdb-commons/src/ids/seq_id.rs index de5901684..7a26ccf6c 100644 --- a/backend/crates/kalamdb-commons/src/ids/seq_id.rs +++ b/backend/crates/kalamdb-commons/src/ids/seq_id.rs @@ -3,11 +3,14 @@ //! This module provides a wrapper around Snowflake IDs for use as sequence identifiers //! in the MVCC architecture. Each SeqId represents a unique version of a row. -use crate::ids::SnowflakeGenerator; -use crate::StorageKey; +use std::{ + fmt, + time::{SystemTime, UNIX_EPOCH}, +}; + use serde::{Deserialize, Serialize}; -use std::fmt; -use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::{ids::SnowflakeGenerator, StorageKey}; /// Sequence ID for MVCC versioning /// @@ -107,7 +110,7 @@ impl SeqId { /// so the returned SeqId encompasses every Snowflake generated at or before /// `timestamp_millis`. pub fn max_id_for_timestamp(timestamp_millis: u64) -> Result { - //let normalized = timestamp_millis.max(Self::EPOCH); + // let normalized = timestamp_millis.max(Self::EPOCH); let id = SnowflakeGenerator::max_id_for_timestamp(timestamp_millis)?; Ok(Self::new(id)) } diff --git a/backend/crates/kalamdb-commons/src/ids/snowflake.rs b/backend/crates/kalamdb-commons/src/ids/snowflake.rs index f2d844a54..b80051337 100644 --- a/backend/crates/kalamdb-commons/src/ids/snowflake.rs +++ b/backend/crates/kalamdb-commons/src/ids/snowflake.rs @@ -1,7 +1,8 @@ // Snowflake ID generator -use parking_lot::Mutex; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use parking_lot::Mutex; + /// Snowflake ID generator for time-ordered unique identifiers /// /// Format (64 bits): @@ -224,9 +225,10 @@ impl Default for SnowflakeGenerator { #[cfg(test)] mod tests { - use super::*; use std::collections::HashSet; + use super::*; + #[test] fn test_snowflake_generation() { let gen = SnowflakeGenerator::new(1); @@ -366,8 +368,7 @@ mod tests { #[test] fn test_concurrent_generation() { - use std::sync::Arc; - use std::thread; + use std::{sync::Arc, thread}; let gen = Arc::new(SnowflakeGenerator::new(1)); let mut handles = vec![]; diff --git a/backend/crates/kalamdb-commons/src/lib.rs b/backend/crates/kalamdb-commons/src/lib.rs index f739cac0b..5578cb9c3 100644 --- a/backend/crates/kalamdb-commons/src/lib.rs +++ b/backend/crates/kalamdb-commons/src/lib.rs @@ -31,8 +31,8 @@ //! ## Example Usage //! //! ```rust -//! use kalamdb_commons::models::{UserId, NamespaceId, TableName}; -//! use kalamdb_system::{User, Job, LiveQuery}; +//! use kalamdb_commons::models::{NamespaceId, TableName, UserId}; +//! use kalamdb_system::{Job, LiveQuery, User}; //! //! let user_id = UserId::new("user_123"); //! let namespace_id = NamespaceId::default(); @@ -78,9 +78,7 @@ pub use errors::{CommonError, NotLeaderError, Result}; pub use helpers::arrow_utils; #[cfg(feature = "arrow-utils")] pub use helpers::arrow_utils::{empty_batch, RecordBatchBuilder}; -pub use helpers::file_helpers; -pub use helpers::security; -pub use helpers::string_interner; +pub use helpers::{file_helpers, security, string_interner}; pub use models::{ // Phase 15 (008-schema-consolidation): Re-export schema types datatypes, diff --git a/backend/crates/kalamdb-commons/src/models/auth_type.rs b/backend/crates/kalamdb-commons/src/models/auth_type.rs index 58afbf150..72b594de2 100644 --- a/backend/crates/kalamdb-commons/src/models/auth_type.rs +++ b/backend/crates/kalamdb-commons/src/models/auth_type.rs @@ -1,5 +1,4 @@ -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/models/datatypes/kalam_data_type.rs b/backend/crates/kalamdb-commons/src/models/datatypes/kalam_data_type.rs index f4f00d16f..d7270742c 100644 --- a/backend/crates/kalamdb-commons/src/models/datatypes/kalam_data_type.rs +++ b/backend/crates/kalamdb-commons/src/models/datatypes/kalam_data_type.rs @@ -3,9 +3,10 @@ //! This enum represents all supported data types in the system with deterministic //! wire format tags for efficient serialization. -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + /// Unified data type enum with wire format tags /// /// Each variant has an associated tag byte for wire format serialization: @@ -213,7 +214,7 @@ mod tests { assert_eq!( KalamDataType::Decimal { precision: 10, - scale: 2 + scale: 2, } .tag(), 0x0F @@ -268,7 +269,7 @@ mod tests { assert_eq!( KalamDataType::Decimal { precision: 10, - scale: 2 + scale: 2, } .sql_name(), "DECIMAL(10, 2)" diff --git a/backend/crates/kalamdb-commons/src/models/datatypes/mod.rs b/backend/crates/kalamdb-commons/src/models/datatypes/mod.rs index 7901f49cf..4934ed140 100644 --- a/backend/crates/kalamdb-commons/src/models/datatypes/mod.rs +++ b/backend/crates/kalamdb-commons/src/models/datatypes/mod.rs @@ -23,7 +23,8 @@ //! - Numeric: Float32, Float64, Decimal(precision, scale) //! - String: Utf8, LargeUtf8 //! - Binary: Binary, LargeBinary -//! - Temporal: Date32, Date64, Timestamp(unit, tz), Time32(unit), Time64(unit), Duration(unit), Interval(unit) +//! - Temporal: Date32, Date64, Timestamp(unit, tz), Time32(unit), Time64(unit), Duration(unit), +//! Interval(unit) //! - Complex: List(inner), LargeList(inner), Struct(fields), Map(key, value, sorted) //! - Special: Uuid, Json //! diff --git a/backend/crates/kalamdb-commons/src/models/datatypes/wire_format.rs b/backend/crates/kalamdb-commons/src/models/datatypes/wire_format.rs index d5cc950f8..1bc87cbad 100644 --- a/backend/crates/kalamdb-commons/src/models/datatypes/wire_format.rs +++ b/backend/crates/kalamdb-commons/src/models/datatypes/wire_format.rs @@ -3,10 +3,12 @@ //! Provides efficient binary serialization with tag bytes for type identification. //! Format: [tag byte][optional dimension for EMBEDDING] -use crate::models::datatypes::KalamDataType; use std::io::{Read, Write}; + use thiserror::Error; +use crate::models::datatypes::KalamDataType; + #[derive(Error, Debug)] pub enum WireFormatError { #[error("IO error: {0}")] @@ -81,9 +83,10 @@ impl WireFormat for KalamDataType { #[cfg(test)] mod tests { - use super::*; use std::io::Cursor; + use super::*; + #[test] fn test_simple_type_round_trip() { let types = vec![ diff --git a/backend/crates/kalamdb-commons/src/models/ids/audit_log_id.rs b/backend/crates/kalamdb-commons/src/models/ids/audit_log_id.rs index bea2477e5..161ddaa56 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/audit_log_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/audit_log_id.rs @@ -1,9 +1,10 @@ // File: backend/crates/kalamdb-commons/src/models/audit_log_id.rs // Type-safe wrapper for audit log identifiers. -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + use crate::StorageKey; /// Type-safe wrapper for audit log identifiers stored in system.audit_log. diff --git a/backend/crates/kalamdb-commons/src/models/ids/job_id.rs b/backend/crates/kalamdb-commons/src/models/ids/job_id.rs index 28a7fab62..cf371ae7a 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/job_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/job_id.rs @@ -1,9 +1,10 @@ // File: backend/crates/kalamdb-commons/src/models/job_id.rs // Type-safe wrapper for job identifiers -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + use crate::StorageKey; /// Type-safe wrapper for job identifiers in system.jobs table. diff --git a/backend/crates/kalamdb-commons/src/models/ids/job_node_id.rs b/backend/crates/kalamdb-commons/src/models/ids/job_node_id.rs index 0fcf92d4e..bec861ff3 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/job_node_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/job_node_id.rs @@ -1,9 +1,12 @@ -use serde::{Deserialize, Deserializer, Serialize}; use std::fmt; +use serde::{Deserialize, Deserializer, Serialize}; + use super::{JobId, NodeId}; -use crate::storage_key::{decode_key, encode_key, encode_prefix}; -use crate::StorageKey; +use crate::{ + storage_key::{decode_key, encode_key, encode_prefix}, + StorageKey, +}; /// Unique identifier for a job run on a specific node. /// diff --git a/backend/crates/kalamdb-commons/src/models/ids/live_query_id.rs b/backend/crates/kalamdb-commons/src/models/ids/live_query_id.rs index ac1f5eee3..90a1c576f 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/live_query_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/live_query_id.rs @@ -1,11 +1,15 @@ // File: backend/crates/kalamdb-commons/src/models/live_query_id.rs // Type-safe composite identifier for live query subscriptions -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; -use crate::models::{ConnectionId, UserId}; -use crate::{encode_prefix, StorageKey}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use crate::{ + encode_prefix, + models::{ConnectionId, UserId}, + StorageKey, +}; /// Unique identifier for live query subscriptions. /// @@ -111,7 +115,8 @@ impl LiveQueryId { let subscription = parts.next(); if user.is_none() || connection.is_none() || subscription.is_none() { return Err(format!( - "Invalid live_query_id format: {}. Expected: {{user_id}}-{{connection_id}}-{{subscription_id}}", + "Invalid live_query_id format: {}. Expected: \ + {{user_id}}-{{connection_id}}-{{subscription_id}}", s )); } diff --git a/backend/crates/kalamdb-commons/src/models/ids/manifest_id.rs b/backend/crates/kalamdb-commons/src/models/ids/manifest_id.rs index f1b6ce7b6..75dfcb454 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/manifest_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/manifest_id.rs @@ -5,8 +5,10 @@ use std::fmt; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::storage_key::{decode_key, encode_key, encode_prefix}; -use crate::{StorageKey, TableId, UserId}; +use crate::{ + storage_key::{decode_key, encode_key, encode_prefix}, + StorageKey, TableId, UserId, +}; /// Type-safe wrapper for manifest cache identifiers. /// diff --git a/backend/crates/kalamdb-commons/src/models/ids/namespace_id.rs b/backend/crates/kalamdb-commons/src/models/ids/namespace_id.rs index ba00a1131..e70ad0442 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/namespace_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/namespace_id.rs @@ -1,13 +1,17 @@ //! Type-safe wrapper for namespace identifiers. -use std::fmt; -use std::sync::{Arc, OnceLock}; +use std::{ + fmt, + sync::{Arc, OnceLock}, +}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::constants::{RESERVED_NAMESPACE_NAMES, SYSTEM_NAMESPACE}; -use crate::StorageKey; +use crate::{ + constants::{RESERVED_NAMESPACE_NAMES, SYSTEM_NAMESPACE}, + StorageKey, +}; /// Error returned when a namespace ID fails validation. #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/backend/crates/kalamdb-commons/src/models/ids/node_id.rs b/backend/crates/kalamdb-commons/src/models/ids/node_id.rs index 06e569937..eb7ad316d 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/node_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/node_id.rs @@ -6,9 +6,10 @@ //! - Live query routing //! - Distributed coordination -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + /// Node identifier for cluster deployments /// /// Uses u64 to match OpenRaft's NodeId type for seamless integration. diff --git a/backend/crates/kalamdb-commons/src/models/ids/row_id.rs b/backend/crates/kalamdb-commons/src/models/ids/row_id.rs index e0c9ff14d..138db7714 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/row_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/row_id.rs @@ -1,9 +1,10 @@ // File: backend/crates/kalamdb-commons/src/models/row_id.rs // Type-safe row identifier for shared and stream tables -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + /// Type-safe row identifier for shared and stream tables. /// /// This newtype wrapper provides compile-time safety for row IDs, diff --git a/backend/crates/kalamdb-commons/src/models/ids/shard_id.rs b/backend/crates/kalamdb-commons/src/models/ids/shard_id.rs index 53538be52..932ec89f0 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/shard_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/shard_id.rs @@ -3,9 +3,10 @@ //! Used in storage path templates for sharded data distribution. //! Format: `shard_{number}` (e.g., `shard_0`, `shard_42`) -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + /// Type-safe shard identifier. /// /// Wraps a u32 shard number. When formatted for storage paths, diff --git a/backend/crates/kalamdb-commons/src/models/ids/table_id.rs b/backend/crates/kalamdb-commons/src/models/ids/table_id.rs index 21ac18183..135d3d337 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/table_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/table_id.rs @@ -1,13 +1,16 @@ // File: backend/crates/kalamdb-commons/src/models/table_id.rs // Composite key for system.tables entries -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use super::namespace_id::NamespaceId; -use crate::models::schemas::TableName; -use crate::storage_key::{decode_key, encode_key, encode_prefix}; -use crate::StorageKey; +use crate::{ + models::schemas::TableName, + storage_key::{decode_key, encode_key, encode_prefix}, + StorageKey, +}; /// Composite key for system.tables entries: (namespace_id, table_name) /// @@ -124,9 +127,10 @@ impl<'de> Deserialize<'de> for TableId { where D: Deserializer<'de>, { - use serde::de::{Error, Visitor}; use std::fmt; + use serde::de::{Error, Visitor}; + struct TableIdVisitor; impl<'de> Visitor<'de> for TableIdVisitor { diff --git a/backend/crates/kalamdb-commons/src/models/ids/table_version_id.rs b/backend/crates/kalamdb-commons/src/models/ids/table_version_id.rs index d6cc946ec..4c2130eda 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/table_version_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/table_version_id.rs @@ -7,12 +7,15 @@ //! - Latest pointer: (namespace, table, VERSION_KIND_LATEST) //! - Versioned: (namespace, table, VERSION_KIND_VERSIONED, version) -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + use super::table_id::TableId; -use crate::storage_key::{decode_key, encode_key, encode_prefix}; -use crate::StorageKey; +use crate::{ + storage_key::{decode_key, encode_key, encode_prefix}, + StorageKey, +}; /// Marker for the "latest" version pointer pub const LATEST_MARKER: &str = ""; @@ -199,8 +202,7 @@ impl StorageKey for TableVersionId { #[cfg(test)] mod tests { use super::*; - use crate::models::ids::NamespaceId; - use crate::models::schemas::TableName; + use crate::models::{ids::NamespaceId, schemas::TableName}; fn test_table_id() -> TableId { TableId::new(NamespaceId::default(), TableName::new("users")) diff --git a/backend/crates/kalamdb-commons/src/models/ids/transaction_id.rs b/backend/crates/kalamdb-commons/src/models/ids/transaction_id.rs index cc3926610..20cb7afdf 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/transaction_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/transaction_id.rs @@ -1,6 +1,4 @@ -use std::fmt; -use std::str::FromStr; -use std::sync::Arc; +use std::{fmt, str::FromStr, sync::Arc}; #[cfg(feature = "serde")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; diff --git a/backend/crates/kalamdb-commons/src/models/ids/user_id.rs b/backend/crates/kalamdb-commons/src/models/ids/user_id.rs index 2b9790b4f..b7b2aaa5a 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/user_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/user_id.rs @@ -1,7 +1,9 @@ //! Type-safe wrapper for user identifiers. -use std::fmt; -use std::sync::{Arc, OnceLock}; +use std::{ + fmt, + sync::{Arc, OnceLock}, +}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/models/ids/user_row_id.rs b/backend/crates/kalamdb-commons/src/models/ids/user_row_id.rs index 10fac0c3e..9859fccbd 100644 --- a/backend/crates/kalamdb-commons/src/models/ids/user_row_id.rs +++ b/backend/crates/kalamdb-commons/src/models/ids/user_row_id.rs @@ -1,12 +1,15 @@ // File: backend/crates/kalamdb-commons/src/models/user_row_id.rs // Composite key for user-scoped table rows -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + use super::user_id::UserId; -use crate::storage_key::{decode_key, encode_key, encode_prefix}; -use crate::StorageKey; +use crate::{ + storage_key::{decode_key, encode_key, encode_prefix}, + StorageKey, +}; /// Composite key for user-scoped table rows: (user_id, row_id) /// diff --git a/backend/crates/kalamdb-commons/src/models/kalam_cell_value.rs b/backend/crates/kalamdb-commons/src/models/kalam_cell_value.rs index 3d22dbe86..2553f023a 100644 --- a/backend/crates/kalamdb-commons/src/models/kalam_cell_value.rs +++ b/backend/crates/kalamdb-commons/src/models/kalam_cell_value.rs @@ -22,10 +22,10 @@ //! {"id":"..."} // Object (e.g., FILE column) //! ``` +use std::{fmt, ops::Deref}; + use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; -use std::fmt; -use std::ops::Deref; /// A single cell value in a query result row or subscription notification. /// diff --git a/backend/crates/kalamdb-commons/src/models/mod.rs b/backend/crates/kalamdb-commons/src/models/mod.rs index 61fa8121f..7efad96f4 100644 --- a/backend/crates/kalamdb-commons/src/models/mod.rs +++ b/backend/crates/kalamdb-commons/src/models/mod.rs @@ -12,8 +12,8 @@ //! ## Examples //! //! ```rust -//! use kalamdb_commons::models::{UserId, NamespaceId, TableName}; -//! use kalamdb_system::{User, Job, LiveQuery}; +//! use kalamdb_commons::models::{NamespaceId, TableName, UserId}; +//! use kalamdb_system::{Job, LiveQuery, User}; //! //! let user_id = UserId::new("user_123"); //! let namespace_id = NamespaceId::default(); @@ -55,21 +55,19 @@ pub mod pg_operations; // Re-export all types from submodules for convenience pub use auth_type::AuthType; +pub use connection::ConnectionInfo; pub use ids::*; pub use kalam_cell_value::KalamCellValue; pub use oauth_provider::OAuthProvider; pub use payload_mode::PayloadMode; pub use read_context::ReadContext; pub use role::Role; +#[cfg(feature = "rows")] +pub use rows::{KTableRow, StreamTableRow, SystemTableRow, UserTableRow}; pub use schemas::{TableAccess, TableName}; pub use topic_op::TopicOp; pub use transaction::{OperationKind, TransactionOrigin, TransactionState}; -#[cfg(feature = "rows")] -pub use rows::{KTableRow, StreamTableRow, SystemTableRow, UserTableRow}; - -pub use connection::ConnectionInfo; - #[cfg(test)] mod tests { use super::*; diff --git a/backend/crates/kalamdb-commons/src/models/payload_mode.rs b/backend/crates/kalamdb-commons/src/models/payload_mode.rs index 80e65a1b0..99aab0e7d 100644 --- a/backend/crates/kalamdb-commons/src/models/payload_mode.rs +++ b/backend/crates/kalamdb-commons/src/models/payload_mode.rs @@ -1,7 +1,6 @@ //! Payload mode enumeration for topic messages. -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/models/pg_operations.rs b/backend/crates/kalamdb-commons/src/models/pg_operations.rs index e6391dc88..036a16842 100644 --- a/backend/crates/kalamdb-commons/src/models/pg_operations.rs +++ b/backend/crates/kalamdb-commons/src/models/pg_operations.rs @@ -6,9 +6,10 @@ #[cfg(feature = "arrow")] use arrow::record_batch::RecordBatch; -use crate::models::rows::Row; -use crate::models::UserId; -use crate::{TableId, TableType}; +use crate::{ + models::{rows::Row, UserId}, + TableId, TableType, +}; /// Domain-typed scan request. pub struct ScanRequest { diff --git a/backend/crates/kalamdb-commons/src/models/role.rs b/backend/crates/kalamdb-commons/src/models/role.rs index bdf655c83..cef09db82 100644 --- a/backend/crates/kalamdb-commons/src/models/role.rs +++ b/backend/crates/kalamdb-commons/src/models/role.rs @@ -1,5 +1,4 @@ -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/models/rows/k_table_row.rs b/backend/crates/kalamdb-commons/src/models/rows/k_table_row.rs index 7b094b5f4..b05089b42 100644 --- a/backend/crates/kalamdb-commons/src/models/rows/k_table_row.rs +++ b/backend/crates/kalamdb-commons/src/models/rows/k_table_row.rs @@ -1,8 +1,8 @@ -use super::row::Row; -use crate::ids::SeqId; -use crate::models::UserId; use serde::{Deserialize, Serialize}; +use super::row::Row; +use crate::{ids::SeqId, models::UserId}; + /// Unified table row model for User and Stream tables /// /// **Phase 13: Provider Consolidation** diff --git a/backend/crates/kalamdb-commons/src/models/rows/row.rs b/backend/crates/kalamdb-commons/src/models/rows/row.rs index f62a74fee..f62ca975e 100644 --- a/backend/crates/kalamdb-commons/src/models/rows/row.rs +++ b/backend/crates/kalamdb-commons/src/models/rows/row.rs @@ -1,11 +1,8 @@ +use std::{cmp::Ordering, collections::BTreeMap, sync::Arc}; + use arrow::array::{Array, FixedSizeListArray, Float32Array}; use datafusion_common::ScalarValue; -use serde::de; -use serde::ser::SerializeMap; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::cmp::Ordering; -use std::collections::BTreeMap; -use std::sync::Arc; +use serde::{de, ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; use thiserror::Error; /// A unified Row representation that holds DataFusion ScalarValues diff --git a/backend/crates/kalamdb-commons/src/models/rows/stream_table_row.rs b/backend/crates/kalamdb-commons/src/models/rows/stream_table_row.rs index bc2004275..9cd4081b8 100644 --- a/backend/crates/kalamdb-commons/src/models/rows/stream_table_row.rs +++ b/backend/crates/kalamdb-commons/src/models/rows/stream_table_row.rs @@ -1,13 +1,15 @@ -use super::{KTableRow, Row}; -use crate::ids::SeqId; -use crate::models::UserId; use serde::{Deserialize, Serialize}; +use super::{KTableRow, Row}; +use crate::{ids::SeqId, models::UserId}; + /// Stream table row entity /// /// **Design Notes**: -/// - Removed: event_id (redundant with _seq), timestamp (embedded in _seq Snowflake ID), row_id, inserted_at, _updated -/// - Kept: user_id (event owner), _seq (unique version ID with embedded timestamp), fields (all event data) +/// - Removed: event_id (redundant with _seq), timestamp (embedded in _seq Snowflake ID), row_id, +/// inserted_at, _updated +/// - Kept: user_id (event owner), _seq (unique version ID with embedded timestamp), fields (all +/// event data) /// - Note: NO _deleted field (stream tables don't use soft deletes, only TTL eviction) #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct StreamTableRow { diff --git a/backend/crates/kalamdb-commons/src/models/rows/system_table_row.rs b/backend/crates/kalamdb-commons/src/models/rows/system_table_row.rs index 3c715824c..9bb3ed6d7 100644 --- a/backend/crates/kalamdb-commons/src/models/rows/system_table_row.rs +++ b/backend/crates/kalamdb-commons/src/models/rows/system_table_row.rs @@ -1,6 +1,7 @@ -use super::Row; use serde::{Deserialize, Serialize}; +use super::Row; + /// Generic persisted row representation for system tables. /// /// System providers can keep typed models for business logic and convert them diff --git a/backend/crates/kalamdb-commons/src/models/rows/user_table_row.rs b/backend/crates/kalamdb-commons/src/models/rows/user_table_row.rs index 3057cc2c2..333210444 100644 --- a/backend/crates/kalamdb-commons/src/models/rows/user_table_row.rs +++ b/backend/crates/kalamdb-commons/src/models/rows/user_table_row.rs @@ -1,13 +1,14 @@ -use super::{KTableRow, Row}; -use crate::ids::SeqId; -use crate::models::UserId; use serde::{Deserialize, Serialize}; +use super::{KTableRow, Row}; +use crate::{ids::SeqId, models::UserId}; + /// User table row data /// /// **MVCC Architecture (Phase 12, User Story 5)**: /// - Removed: row_id (redundant with _seq), _updated (timestamp embedded in _seq Snowflake ID) -/// - Kept: user_id (row owner), _seq (version identifier with embedded timestamp), `_commit_seq` (commit-order visibility), _deleted (tombstone), fields (all user columns including PK) +/// - Kept: user_id (row owner), _seq (version identifier with embedded timestamp), `_commit_seq` +/// (commit-order visibility), _deleted (tombstone), fields (all user columns including PK) /// /// **Note on System Column Naming**: /// The underscore prefix (`_seq`, `_deleted`) follows SQL convention for system-managed columns. diff --git a/backend/crates/kalamdb-commons/src/models/schemas/column_default.rs b/backend/crates/kalamdb-commons/src/models/schemas/column_default.rs index e2bdf2fe4..2ce400c72 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/column_default.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/column_default.rs @@ -1,7 +1,6 @@ //! Column default value specification -use serde::de::Error as DeError; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializer}; use serde_json::Value as JsonValue; /// Represents the default value for a column @@ -195,9 +194,10 @@ impl ColumnDefault { #[cfg(test)] mod tests { - use super::*; use serde_json::json; + use super::*; + #[test] fn test_none_default() { let default = ColumnDefault::none(); diff --git a/backend/crates/kalamdb-commons/src/models/schemas/column_definition.rs b/backend/crates/kalamdb-commons/src/models/schemas/column_definition.rs index 30916a839..bee136376 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/column_definition.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/column_definition.rs @@ -1,9 +1,9 @@ //! Column definition for table schemas -use crate::models::datatypes::KalamDataType; -use crate::models::schemas::column_default::ColumnDefault; use serde::{Deserialize, Serialize}; +use crate::models::{datatypes::KalamDataType, schemas::column_default::ColumnDefault}; + /// Complete definition of a table column. /// Fields ordered for optimal memory alignment (8-byte types first). #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -157,9 +157,10 @@ impl ColumnDefinition { #[cfg(test)] mod tests { - use super::*; use serde_json::json; + use super::*; + #[test] fn test_simple_column() { let col = ColumnDefinition::simple(1, "name", 1, KalamDataType::Text); diff --git a/backend/crates/kalamdb-commons/src/models/schemas/field_flag.rs b/backend/crates/kalamdb-commons/src/models/schemas/field_flag.rs index 375b77ac8..cc97fbb0c 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/field_flag.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/field_flag.rs @@ -1,6 +1,7 @@ -use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; +use serde::{Deserialize, Serialize}; + pub type FieldFlags = BTreeSet; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] diff --git a/backend/crates/kalamdb-commons/src/models/schemas/schema_field.rs b/backend/crates/kalamdb-commons/src/models/schemas/schema_field.rs index 358cdf2a8..55301c963 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/schema_field.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/schema_field.rs @@ -3,13 +3,15 @@ //! This module defines the `SchemaField` struct used in REST API responses //! to provide type-safe schema information to clients. -use crate::conversions::read_kalam_column_flags_metadata; -use crate::models::datatypes::KalamDataType; -use crate::schemas::ColumnDefinition; -use crate::schemas::{FieldFlag, FieldFlags}; use arrow_schema::Field; use serde::{Deserialize, Serialize}; +use crate::{ + conversions::read_kalam_column_flags_metadata, + models::datatypes::KalamDataType, + schemas::{ColumnDefinition, FieldFlag, FieldFlags}, +}; + /// A field in the result schema returned by SQL queries /// /// Contains all the information a client needs to properly interpret @@ -111,9 +113,10 @@ impl SchemaField { #[cfg(test)] mod tests { - use super::*; use serde_json; + use super::*; + #[test] fn test_schema_field_serialization() { let field = SchemaField::new("user_id", KalamDataType::BigInt, 0); diff --git a/backend/crates/kalamdb-commons/src/models/schemas/schema_version.rs b/backend/crates/kalamdb-commons/src/models/schemas/schema_version.rs index dac651e7e..4eed9af30 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/schema_version.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/schema_version.rs @@ -59,9 +59,10 @@ impl SchemaVersion { #[cfg(test)] mod tests { - use super::*; use chrono::TimeZone; + use super::*; + #[test] fn test_new_schema_version() { let version = SchemaVersion::new(1, "Initial schema", "{}"); diff --git a/backend/crates/kalamdb-commons/src/models/schemas/table_access.rs b/backend/crates/kalamdb-commons/src/models/schemas/table_access.rs index 9f48ec10a..4f585cd51 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/table_access.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/table_access.rs @@ -1,5 +1,4 @@ -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/models/schemas/table_definition.rs b/backend/crates/kalamdb-commons/src/models/schemas/table_definition.rs index 0540c92e9..26d0ae058 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/table_definition.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/table_definition.rs @@ -1,18 +1,24 @@ //! Table definition - single source of truth for table schemas //! -//! **Phase 16 Schema Versioning**: Schema history is now stored separately using TableVersionId keys. -//! Each TableDefinition stores only its current `schema_version: u32`. -//! Historical versions are stored as separate entries: `{tableId}{version:08}` -> TableDefinition -//! The latest pointer `{tableId}` points to the current version. - -use crate::conversions::{with_kalam_column_flags_metadata, with_kalam_data_type_metadata}; -use crate::models::datatypes::{ArrowConversionError, ToArrowType}; -use crate::models::schemas::{ColumnDefinition, SchemaField, TableOptions, TableType}; -use crate::{NamespaceId, TableName}; +//! **Phase 16 Schema Versioning**: Schema history is now stored separately using TableVersionId +//! keys. Each TableDefinition stores only its current `schema_version: u32`. +//! Historical versions are stored as separate entries: `{tableId}{version:08}` -> +//! TableDefinition The latest pointer `{tableId}` points to the current version. + +use std::sync::Arc; + use arrow_schema::{Field, Schema as ArrowSchema}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::sync::Arc; + +use crate::{ + conversions::{with_kalam_column_flags_metadata, with_kalam_data_type_metadata}, + models::{ + datatypes::{ArrowConversionError, ToArrowType}, + schemas::{ColumnDefinition, SchemaField, TableOptions, TableType}, + }, + NamespaceId, TableName, +}; /// Complete definition of a table including schema, history, and options /// @@ -417,9 +423,10 @@ impl TableDefinition { #[cfg(test)] mod tests { use super::*; - use crate::models::datatypes::KalamDataType; - use crate::models::schemas::ColumnDefault; - use crate::{NamespaceId, TableName}; + use crate::{ + models::{datatypes::KalamDataType, schemas::ColumnDefault}, + NamespaceId, TableName, + }; fn sample_columns() -> Vec { vec![ diff --git a/backend/crates/kalamdb-commons/src/models/schemas/table_name.rs b/backend/crates/kalamdb-commons/src/models/schemas/table_name.rs index bac81154c..58ad76416 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/table_name.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/table_name.rs @@ -1,7 +1,6 @@ //! Type-safe wrapper for table names. -use std::fmt; -use std::sync::Arc; +use std::{fmt, sync::Arc}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/models/schemas/table_options.rs b/backend/crates/kalamdb-commons/src/models/schemas/table_options.rs index 0a65e02a0..5eddddd1a 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/table_options.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/table_options.rs @@ -4,9 +4,13 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::{schemas::policy::FlushPolicy, StorageId, TableAccess}; -/// **Q: How does per-user storage assignment work with use_user_storage option?** → A: Lookup chain: table.use_user_storage=true → check user.storage_mode → if "region" use user.storage_id, if "table" use table.storage_id fallback +/// **Q: How does per-user storage assignment work with use_user_storage option?** → A: Lookup +/// chain: table.use_user_storage=true → check user.storage_mode → if "region" use user.storage_id, +/// if "table" use table.storage_id fallback /// - *Impact*: User Story 2, User Story 10 (user management), new storage assignment logic -/// - *Rationale*: Enables data sovereignty (users in EU region → EU S3 bucket). Flexible fallback prevents orphaned data. user.storage_mode="table" allows per-table override when needed. Supports multi-tenant SaaS scenarios with region-specific compliance. +/// - *Rationale*: Enables data sovereignty (users in EU region → EU S3 bucket). Flexible fallback +/// prevents orphaned data. user.storage_mode="table" allows per-table override when needed. +/// Supports multi-tenant SaaS scenarios with region-specific compliance. /// /// Table options for USER tables #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -382,7 +386,8 @@ mod tests { assert_eq!(TableOptions::user().cache_ttl_seconds(), None); assert_eq!(TableOptions::shared().cache_ttl_seconds(), None); // Shared tables no longer have caching assert_eq!(TableOptions::stream(3600).cache_ttl_seconds(), None); - assert_eq!(TableOptions::system().cache_ttl_seconds(), None); // Updated to return None per implementation + assert_eq!(TableOptions::system().cache_ttl_seconds(), None); // Updated to return None per + // implementation } #[test] diff --git a/backend/crates/kalamdb-commons/src/models/schemas/table_type.rs b/backend/crates/kalamdb-commons/src/models/schemas/table_type.rs index c48bae31a..aab7b6a63 100644 --- a/backend/crates/kalamdb-commons/src/models/schemas/table_type.rs +++ b/backend/crates/kalamdb-commons/src/models/schemas/table_type.rs @@ -1,16 +1,20 @@ //! Table type classification +use std::{fmt, str::FromStr}; + use serde::{Deserialize, Serialize}; -use std::fmt; -use std::str::FromStr; /// Enum representing the type of table in KalamDB. /// /// Each table type has associated type-safe options: -/// - **User** → `UserTableOptions`: Per-user tables with user-specific partitioning (e.g., `user_123/conversations`) -/// - **Shared** → `SharedTableOptions`: Shared tables accessible across all users (e.g., `categories`) -/// - **Stream** → `StreamTableOptions`: Event stream tables with TTL-based eviction (e.g., `chat_events`) -/// - **System** → `SystemTableOptions`: Internal system metadata tables (e.g., `information_schema.tables`) +/// - **User** → `UserTableOptions`: Per-user tables with user-specific partitioning (e.g., +/// `user_123/conversations`) +/// - **Shared** → `SharedTableOptions`: Shared tables accessible across all users (e.g., +/// `categories`) +/// - **Stream** → `StreamTableOptions`: Event stream tables with TTL-based eviction (e.g., +/// `chat_events`) +/// - **System** → `SystemTableOptions`: Internal system metadata tables (e.g., +/// `information_schema.tables`) #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum TableType { /// Per-user tables with user-specific partitioning @@ -18,11 +22,13 @@ pub enum TableType { User, /// Shared tables accessible across all users - /// Options: `SharedTableOptions` (access_level, enable_cache, cache_ttl_seconds, compression, enable_replication) + /// Options: `SharedTableOptions` (access_level, enable_cache, cache_ttl_seconds, compression, + /// enable_replication) Shared, /// Event stream tables with TTL-based eviction - /// Options: `StreamTableOptions` (ttl_seconds, eviction_strategy, max_stream_size_bytes, enable_compaction, watermark_delay_seconds, compression) + /// Options: `StreamTableOptions` (ttl_seconds, eviction_strategy, max_stream_size_bytes, + /// enable_compaction, watermark_delay_seconds, compression) Stream, /// Internal system metadata tables diff --git a/backend/crates/kalamdb-commons/src/models/topic_op.rs b/backend/crates/kalamdb-commons/src/models/topic_op.rs index 8e6e195ba..faeac83e7 100644 --- a/backend/crates/kalamdb-commons/src/models/topic_op.rs +++ b/backend/crates/kalamdb-commons/src/models/topic_op.rs @@ -1,7 +1,6 @@ //! Topic operation type enumeration. -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-commons/src/serialization.rs b/backend/crates/kalamdb-commons/src/serialization.rs index f53ef94dc..d5e702f1e 100644 --- a/backend/crates/kalamdb-commons/src/serialization.rs +++ b/backend/crates/kalamdb-commons/src/serialization.rs @@ -24,12 +24,12 @@ pub use envelope::{encode_envelope_inline, CodecKind, EntityEnvelope}; /// ## Example /// /// ```rust -/// use serde::{Deserialize, Serialize}; /// use kalamdb_commons::serialization::KSerializable; +/// use serde::{Deserialize, Serialize}; /// -/// #[derive(Serialize, Deserialize, )] +/// #[derive(Serialize, Deserialize)] /// struct MyEntity { -/// id: String, +/// id: String, /// value: i64, /// } /// diff --git a/backend/crates/kalamdb-commons/src/serialization/envelope.rs b/backend/crates/kalamdb-commons/src/serialization/envelope.rs index 2dd798e92..2eff9350d 100644 --- a/backend/crates/kalamdb-commons/src/serialization/envelope.rs +++ b/backend/crates/kalamdb-commons/src/serialization/envelope.rs @@ -1,7 +1,9 @@ use serde::{Deserialize, Serialize}; -use crate::serialization::generated::entity_envelope_generated::kalamdb::serialization as fb; -use crate::storage::StorageError; +use crate::{ + serialization::generated::entity_envelope_generated::kalamdb::serialization as fb, + storage::StorageError, +}; type Result = std::result::Result; diff --git a/backend/crates/kalamdb-commons/src/serialization/generated/entity_envelope_generated.rs b/backend/crates/kalamdb-commons/src/serialization/generated/entity_envelope_generated.rs index d2d239083..9afa848a8 100644 --- a/backend/crates/kalamdb-commons/src/serialization/generated/entity_envelope_generated.rs +++ b/backend/crates/kalamdb-commons/src/serialization/generated/entity_envelope_generated.rs @@ -315,16 +315,17 @@ pub mod kalamdb { ::flatbuffers::size_prefixed_root_with_opts::>(opts, buf) } #[inline] - /// Assumes, without verification, that a buffer of bytes contains a EntityEnvelope and returns it. - /// # Safety + /// Assumes, without verification, that a buffer of bytes contains a EntityEnvelope and + /// returns it. # Safety /// Callers must trust the given bytes do indeed contain a valid `EntityEnvelope`. pub unsafe fn root_as_entity_envelope_unchecked(buf: &[u8]) -> EntityEnvelope<'_> { unsafe { ::flatbuffers::root_unchecked::(buf) } } #[inline] - /// Assumes, without verification, that a buffer of bytes contains a size prefixed EntityEnvelope and returns it. - /// # Safety - /// Callers must trust the given bytes do indeed contain a valid size prefixed `EntityEnvelope`. + /// Assumes, without verification, that a buffer of bytes contains a size prefixed + /// EntityEnvelope and returns it. # Safety + /// Callers must trust the given bytes do indeed contain a valid size prefixed + /// `EntityEnvelope`. pub unsafe fn size_prefixed_root_as_entity_envelope_unchecked( buf: &[u8], ) -> EntityEnvelope<'_> { diff --git a/backend/crates/kalamdb-commons/src/serialization/generated/row_models_generated.rs b/backend/crates/kalamdb-commons/src/serialization/generated/row_models_generated.rs index d0cb78729..10035b53a 100644 --- a/backend/crates/kalamdb-commons/src/serialization/generated/row_models_generated.rs +++ b/backend/crates/kalamdb-commons/src/serialization/generated/row_models_generated.rs @@ -13,17 +13,20 @@ pub mod kalamdb { #[deprecated( since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." + note = "Use associated constants instead. This will no longer be generated in \ + 2021." )] pub const ENUM_MIN_SCALAR_TAG: u16 = 0; #[deprecated( since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." + note = "Use associated constants instead. This will no longer be generated in \ + 2021." )] pub const ENUM_MAX_SCALAR_TAG: u16 = 24; #[deprecated( since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." + note = "Use associated constants instead. This will no longer be generated in \ + 2021." )] #[allow(non_camel_case_types)] pub const ENUM_VALUES_SCALAR_TAG: [ScalarTag; 25] = [ @@ -1612,16 +1615,17 @@ pub mod kalamdb { ::flatbuffers::size_prefixed_root_with_opts::>(opts, buf) } #[inline] - /// Assumes, without verification, that a buffer of bytes contains a RowPayload and returns it. - /// # Safety + /// Assumes, without verification, that a buffer of bytes contains a RowPayload and + /// returns it. # Safety /// Callers must trust the given bytes do indeed contain a valid `RowPayload`. pub unsafe fn root_as_row_payload_unchecked(buf: &[u8]) -> RowPayload<'_> { unsafe { ::flatbuffers::root_unchecked::(buf) } } #[inline] - /// Assumes, without verification, that a buffer of bytes contains a size prefixed RowPayload and returns it. - /// # Safety - /// Callers must trust the given bytes do indeed contain a valid size prefixed `RowPayload`. + /// Assumes, without verification, that a buffer of bytes contains a size prefixed + /// RowPayload and returns it. # Safety + /// Callers must trust the given bytes do indeed contain a valid size prefixed + /// `RowPayload`. pub unsafe fn size_prefixed_root_as_row_payload_unchecked( buf: &[u8], ) -> RowPayload<'_> { diff --git a/backend/crates/kalamdb-commons/src/serialization/row_codec.rs b/backend/crates/kalamdb-commons/src/serialization/row_codec.rs index d6ae36b40..a525815bf 100644 --- a/backend/crates/kalamdb-commons/src/serialization/row_codec.rs +++ b/backend/crates/kalamdb-commons/src/serialization/row_codec.rs @@ -1,17 +1,24 @@ -use std::collections::BTreeMap; -use std::sync::Arc; +use std::{collections::BTreeMap, sync::Arc}; -use arrow::array::{Array, FixedSizeListArray, Float32Array}; -use arrow::datatypes::Float32Type; +use arrow::{ + array::{Array, FixedSizeListArray, Float32Array}, + datatypes::Float32Type, +}; use datafusion_common::ScalarValue; -use crate::ids::SeqId; -use crate::models::rows::{Row, UserTableRow}; -use crate::models::UserId; -use crate::serialization::generated::row_models_generated::kalamdb::serialization::row as fb_row; -use crate::serialization::schema::ROW_SCHEMA_VERSION; -use crate::serialization::{decode_enveloped, encode_envelope_inline, CodecKind}; -use crate::storage::StorageError; +use crate::{ + ids::SeqId, + models::{ + rows::{Row, UserTableRow}, + UserId, + }, + serialization::{ + decode_enveloped, encode_envelope_inline, + generated::row_models_generated::kalamdb::serialization::row as fb_row, + schema::ROW_SCHEMA_VERSION, CodecKind, + }, + storage::StorageError, +}; type Result = std::result::Result; diff --git a/backend/crates/kalamdb-commons/src/storage.rs b/backend/crates/kalamdb-commons/src/storage.rs index 9860a3959..c3598b2f2 100644 --- a/backend/crates/kalamdb-commons/src/storage.rs +++ b/backend/crates/kalamdb-commons/src/storage.rs @@ -10,12 +10,12 @@ //! - get/put/delete for key-value access //! - batch for atomic multi-operation transactions //! - scan for range queries -//! - partition management (maps to column families in RocksDB, trees in Sled, etc.) +//! - partition management (mapped to backend-native keyspaces) //! //! ## Partition Model //! //! Since different backends have different concepts for data organization: -//! - **RocksDB**: Partition = Column Family +//! - **RocksDB**: Partition = key prefix inside a fixed physical column-family set //! - **Sled**: Partition = Tree //! - **Redis**: Partition = Key Prefix //! - **In-Memory**: Partition = HashMap namespace @@ -25,7 +25,7 @@ //! ## Example Usage //! //! ```rust -//! use kalamdb_commons::storage::{StorageBackend, Partition, Operation}; +//! use kalamdb_commons::storage::{Operation, Partition, StorageBackend}; //! //! fn store_user_data(backend: &S, user_id: &str, data: &[u8]) { //! let partition = Partition::new(format!("user_{}", user_id)); @@ -59,8 +59,7 @@ //! } //! ``` -use std::any::Any; -use std::fmt; +use std::{any::Any, fmt}; /// Type alias for a boxed key-value iterator to simplify function signatures pub type KvIterator<'a> = Box, Vec)> + Send + 'a>; @@ -71,7 +70,7 @@ pub type Result = std::result::Result; /// Errors that can occur during storage operations. #[derive(Debug, Clone)] pub enum StorageError { - /// Partition (column family, tree, namespace) not found + /// Partition (backend-native keyspace) not found PartitionNotFound(String), /// Generic I/O error from underlying storage @@ -115,7 +114,7 @@ impl std::error::Error for StorageError {} /// /// Partitions provide a way to organize data into separate namespaces. /// Different backends map partitions to their native concepts: -/// - RocksDB: Column Family +/// - RocksDB: prefixed logical keyspace inside a fixed physical column family /// - Sled: Tree /// - Redis: Key prefix /// - In-memory: HashMap namespace diff --git a/backend/crates/kalamdb-commons/src/storage_key.rs b/backend/crates/kalamdb-commons/src/storage_key.rs index 0f6b1db9c..2d609c1e6 100644 --- a/backend/crates/kalamdb-commons/src/storage_key.rs +++ b/backend/crates/kalamdb-commons/src/storage_key.rs @@ -8,8 +8,8 @@ //! RocksDB stores keys in lexicographic (byte-by-byte) order. Naive encoding //! strategies like `{len:1byte}{string_bytes}` break ordering: //! -//! - "bob" → [3, b, o, b] sorts BEFORE "alice" → [5, a, l, i, c, e] -//! because 3 < 5, even though "alice" < "bob" lexicographically +//! - "bob" → [3, b, o, b] sorts BEFORE "alice" → [5, a, l, i, c, e] because 3 < 5, even though +//! "alice" < "bob" lexicographically //! //! The `storekey` crate uses escape-sequence encoding that preserves the //! natural lexicographic order of strings and tuples. diff --git a/backend/crates/kalamdb-commons/src/system_tables.rs b/backend/crates/kalamdb-commons/src/system_tables.rs index 67f8cf6de..bb8886859 100644 --- a/backend/crates/kalamdb-commons/src/system_tables.rs +++ b/backend/crates/kalamdb-commons/src/system_tables.rs @@ -14,7 +14,7 @@ use crate::constants::ColumnFamilyNames; use crate::models::TableId; -/// Memory/performance profile applied to a RocksDB column family. +/// Memory/performance profile applied to a storage partition or physical RocksDB CF. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ColumnFamilyProfile { /// System metadata tables and compatibility partitions. @@ -70,7 +70,8 @@ pub enum SystemTable { ServerLogs, /// system.cluster - Raft cluster status and metrics (computed on-demand) Cluster, - /// system.cluster_groups - Per-Raft-group membership and replication status (computed on-demand) + /// system.cluster_groups - Per-Raft-group membership and replication status (computed + /// on-demand) ClusterGroups, /// system.datatypes - Supported data type mappings (computed on-demand) Datatypes, @@ -137,8 +138,8 @@ impl SystemTable { ) } - /// Get the column family name in RocksDB (e.g., "system_users") - /// Returns None for views (they have no storage backing) + /// Get the logical storage partition name (e.g., "system_users"). + /// Returns None for views because they have no storage backing. pub fn column_family_name(&self) -> Option<&'static str> { match self { SystemTable::Users => Some("system_users"), @@ -285,9 +286,10 @@ impl SystemTable { /// Allocates each Partition once and returns a reference, /// avoiding repeated String allocations across the codebase. pub fn partition(&self) -> Option<&'static crate::storage::Partition> { - use crate::storage::Partition; use once_cell::sync::Lazy; + use crate::storage::Partition; + static USERS: Lazy = Lazy::new(|| Partition::new("system_users")); static NAMESPACES: Lazy = Lazy::new(|| Partition::new("system_namespaces")); static SCHEMAS: Lazy = Lazy::new(|| Partition::new("system_schemas")); @@ -400,9 +402,10 @@ impl StoragePartition { /// Returns a shared Partition reference for this named partition. pub fn partition(&self) -> &'static crate::storage::Partition { - use crate::storage::Partition; use once_cell::sync::Lazy; + use crate::storage::Partition; + static INFO: Lazy = Lazy::new(|| Partition::new(StoragePartition::InformationSchemaTables.name())); static USERNAME_IDX: Lazy = @@ -433,9 +436,18 @@ impl StoragePartition { } } -/// Classify an arbitrary RocksDB column-family name into a typed tuning profile. +/// Classify a storage partition or physical RocksDB CF name into a typed tuning profile. #[must_use] pub fn classify_column_family_name(name: &str) -> ColumnFamilyProfile { + match name { + "system_meta" => return ColumnFamilyProfile::SystemMeta, + "system_index" => return ColumnFamilyProfile::SystemIndex, + "hot_data" => return ColumnFamilyProfile::HotData, + "hot_index" => return ColumnFamilyProfile::HotIndex, + "raft_data" => return ColumnFamilyProfile::Raft, + _ => {}, + } + if let Ok(table) = SystemTable::from_name(name) { if table.column_family_name().is_some() { return table.column_family_profile(); @@ -446,10 +458,6 @@ pub fn classify_column_family_name(name: &str) -> ColumnFamilyProfile { return partition.column_family_profile(); } - if name == "raft_data" { - return ColumnFamilyProfile::Raft; - } - if name == "topic_messages" { return ColumnFamilyProfile::HotData; } diff --git a/backend/crates/kalamdb-commons/src/websocket.rs b/backend/crates/kalamdb-commons/src/websocket.rs index 0d04270f5..62b3e9c3b 100644 --- a/backend/crates/kalamdb-commons/src/websocket.rs +++ b/backend/crates/kalamdb-commons/src/websocket.rs @@ -92,20 +92,21 @@ //! } //! ``` -use crate::ids::SeqId; -use crate::models::rows::Row; -use crate::models::KalamCellValue; -use crate::models::UserId; -use crate::schemas::SchemaField; pub use crate::websocket_auth::WsAuthCredentials; +use crate::{ + ids::SeqId, + models::{rows::Row, KalamCellValue, UserId}, + schemas::SchemaField, +}; // Simple Row type for WASM (JSON only) #[cfg(feature = "wasm")] pub type Row = serde_json::Map; +use std::collections::{BTreeMap, HashMap}; + use datafusion_common::ScalarValue; use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, HashMap}; /// Wire-format serialization type negotiated during authentication. /// @@ -327,11 +328,6 @@ pub struct SubscriptionOptions { /// Typically set automatically during reconnection to resume from last received event #[serde(skip_serializing_if = "Option::is_none", alias = "from_seq_id")] pub from: Option, - - /// Preserve the original snapshot boundary across reconnects while the - /// initial load is still in progress. - #[serde(skip_serializing_if = "Option::is_none")] - pub snapshot_end_seq: Option, } /// Batch control metadata for paginated initial data loading @@ -355,9 +351,6 @@ pub struct BatchControl { /// The SeqId of the last row in this batch (used for next request) pub last_seq_id: Option, - - /// The snapshot boundary (max SeqId at start of load) - pub snapshot_end_seq: Option, } /// Status of the initial data loading process @@ -622,7 +615,6 @@ impl BatchControl { BatchStatus::Ready }, last_seq_id: None, - snapshot_end_seq: None, } } @@ -641,17 +633,11 @@ impl BatchControl { BatchStatus::Ready }, last_seq_id: None, - snapshot_end_seq: None, } } /// Create batch control with all fields specified - pub fn new( - batch_num: u32, - has_more: bool, - last_seq_id: Option, - snapshot_end_seq: Option, - ) -> Self { + pub fn new(batch_num: u32, has_more: bool, last_seq_id: Option) -> Self { let status = if batch_num == 0 { if has_more { BatchStatus::Loading @@ -669,7 +655,6 @@ impl BatchControl { has_more, status, last_seq_id, - snapshot_end_seq, } } } @@ -863,9 +848,8 @@ impl WireNotification { // back to the allocating path when we actually see a byte that would // need JSON escaping. let sid_bytes = self.subscription_id.as_bytes(); - let needs_escape = sid_bytes - .iter() - .any(|&b| b == b'\\' || b == b'"' || b < 0x20 || b >= 0x7f); + let needs_escape = + sid_bytes.iter().any(|&b| b == b'\\' || b == b'"' || b < 0x20 || b >= 0x7f); if needs_escape { let escaped = self.subscription_id.replace('\\', "\\\\").replace('"', "\\\""); buf.extend_from_slice(escaped.as_bytes()); @@ -903,11 +887,12 @@ impl ChangeType { #[cfg(test)] mod tests { + use std::{collections::BTreeMap, sync::Arc}; + + use datafusion_common::ScalarValue; + use super::*; use crate::models::rows::Row; - use datafusion_common::ScalarValue; - use std::collections::BTreeMap; - use std::sync::Arc; fn create_test_row(id: i64, message: &str) -> Row { let mut values = BTreeMap::new(); @@ -1045,8 +1030,7 @@ mod tests { #[test] fn test_next_batch_request() { - use crate::ids::SeqId; - use crate::websocket::ClientMessage; + use crate::{ids::SeqId, websocket::ClientMessage}; let msg = ClientMessage::next_batch("sub-1".to_string(), Some(SeqId::new(100))); let json = serde_json::to_string(&msg).unwrap(); diff --git a/backend/crates/kalamdb-commons/tests/test_unified_types.rs b/backend/crates/kalamdb-commons/tests/test_unified_types.rs index e659cc024..3bd7e57c4 100644 --- a/backend/crates/kalamdb-commons/tests/test_unified_types.rs +++ b/backend/crates/kalamdb-commons/tests/test_unified_types.rs @@ -2,9 +2,10 @@ //! //! Tests that all 13 KalamDataTypes convert to Arrow and back losslessly +use std::sync::Arc; + use arrow::datatypes::{DataType as ArrowDataType, Field, TimeUnit}; use kalamdb_commons::models::datatypes::{FromArrowType, KalamDataType, ToArrowType}; -use std::sync::Arc; #[test] fn test_all_kalambdata_types_convert_to_arrow_losslessly() { @@ -127,7 +128,8 @@ fn test_type_conversion_performance() { let ops_per_sec = total_ops as f64 / elapsed.as_secs_f64(); println!( - "✅ Type conversion performance: {:.0} ops/sec ({} iterations × {} types × 2 directions = {} ops in {:?})", + "✅ Type conversion performance: {:.0} ops/sec ({} iterations × {} types × 2 directions = \ + {} ops in {:?})", ops_per_sec, iterations, test_types.len(), diff --git a/backend/crates/kalamdb-configs/src/config/cluster.rs b/backend/crates/kalamdb-configs/src/config/cluster.rs index c9fbd9c82..e63b2eea5 100644 --- a/backend/crates/kalamdb-configs/src/config/cluster.rs +++ b/backend/crates/kalamdb-configs/src/config/cluster.rs @@ -300,7 +300,8 @@ fn validate_advertised_address( if let Ok(addr) = trimmed.parse::() { if addr.ip().is_unspecified() && has_peers { return Err(format!( - "{} must not use an unspecified/wildcard address ({}) when peers are configured. Use a reachable hostname or IP instead", + "{} must not use an unspecified/wildcard address ({}) when peers are configured. \ + Use a reachable hostname or IP instead", field_name, value )); } diff --git a/backend/crates/kalamdb-configs/src/config/loader.rs b/backend/crates/kalamdb-configs/src/config/loader.rs index 7d7b668f8..0f20e0b7e 100644 --- a/backend/crates/kalamdb-configs/src/config/loader.rs +++ b/backend/crates/kalamdb-configs/src/config/loader.rs @@ -1,9 +1,7 @@ -use super::trusted_proxies::parse_trusted_proxy_entries; -use super::types::ServerConfig; +use std::{fs, net::IpAddr, path::Path}; + +use super::{trusted_proxies::parse_trusted_proxy_entries, types::ServerConfig}; use crate::file_helpers::normalize_dir_path; -use std::fs; -use std::net::IpAddr; -use std::path::Path; fn is_localhost_host(host: &str) -> bool { let trimmed = host.trim().trim_matches('[').trim_matches(']'); @@ -212,7 +210,8 @@ impl ServerConfig { && !has_configured_origin_policy(&self.security.cors.allowed_origins) { return Err(anyhow::anyhow!( - "Non-localhost HTTP exposure requires security.cors.allowed_origins to be configured (empty is not allowed)" + "Non-localhost HTTP exposure requires security.cors.allowed_origins to be \ + configured (empty is not allowed)" )); } @@ -222,8 +221,10 @@ impl ServerConfig { #[cfg(test)] mod tests { - use super::super::cluster::{ClusterConfig, PeerConfig}; - use super::*; + use super::{ + super::cluster::{ClusterConfig, PeerConfig}, + *, + }; fn local_cluster_config() -> ClusterConfig { ClusterConfig { diff --git a/backend/crates/kalamdb-configs/src/config/override.rs b/backend/crates/kalamdb-configs/src/config/override.rs index 51e5dec60..5a5e13577 100644 --- a/backend/crates/kalamdb-configs/src/config/override.rs +++ b/backend/crates/kalamdb-configs/src/config/override.rs @@ -1,7 +1,10 @@ -use super::cluster::{ClusterConfig, PeerConfig}; -use super::types::ServerConfig; use std::env; +use super::{ + cluster::{ClusterConfig, PeerConfig}, + types::ServerConfig, +}; + fn parse_csv_env_list(value: &str) -> Vec { value .split(',') @@ -16,7 +19,8 @@ impl ServerConfig { /// Supported environment variables (T030): /// - KALAMDB_SERVER_HOST: Override server.host /// - KALAMDB_SERVER_PORT: Override server.port - /// - KALAMDB_SERVER_PUBLIC_ORIGIN: Override server.public_origin (empty keeps Admin UI browser-origin fallback) + /// - KALAMDB_SERVER_PUBLIC_ORIGIN: Override server.public_origin (empty keeps Admin UI + /// browser-origin fallback) /// - KALAMDB_LOG_LEVEL: Override logging.level /// - KALAMDB_LOGS_DIR: Override logging.logs_path /// - KALAMDB_LOG_TO_CONSOLE: Override logging.log_to_console @@ -25,7 +29,8 @@ impl ServerConfig { /// - KALAMDB_OTLP_PROTOCOL: Override logging.otlp.protocol ("grpc" | "http") /// - KALAMDB_OTLP_SERVICE_NAME: Override logging.otlp.service_name /// - KALAMDB_OTLP_TIMEOUT_MS: Override logging.otlp.timeout_ms - /// - KALAMDB_DATA_DIR: Override storage.data_path (base directory for rocksdb, storage, snapshots) + /// - KALAMDB_DATA_DIR: Override storage.data_path (base directory for rocksdb, storage, + /// snapshots) /// - KALAMDB_CLUSTER_ID: Override cluster.cluster_id /// - KALAMDB_NODE_ID: Override cluster.node_id (alias: KALAMDB_CLUSTER_NODE_ID) /// - KALAMDB_CLUSTER_RPC_ADDR: Override cluster.rpc_addr @@ -37,9 +42,13 @@ impl ServerConfig { /// - KALAMDB_JWT_EXPIRY_HOURS: Override auth.jwt_expiry_hours /// - KALAMDB_COOKIE_SECURE: Override auth.cookie_secure /// - KALAMDB_ALLOW_REMOTE_SETUP: Override auth.allow_remote_setup - /// - KALAMDB_SECURITY_CORS_ALLOWED_ORIGINS: Override security.cors.allowed_origins with a comma-separated list or "*" + /// - KALAMDB_SECURITY_CORS_ALLOWED_ORIGINS: Override security.cors.allowed_origins with a + /// comma-separated list or "*" /// - KALAMDB_SECURITY_TRUSTED_PROXY_RANGES: Override security.trusted_proxy_ranges - /// - KALAMDB_RATE_LIMIT_AUTH_REQUESTS_PER_IP_PER_SEC: Override rate_limit.max_auth_requests_per_ip_per_sec + /// - KALAMDB_RATE_LIMIT_AUTH_REQUESTS_PER_IP_PER_SEC: Override + /// rate_limit.max_auth_requests_per_ip_per_sec + /// - KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS: Override topics.visibility_timeout_secs + /// (alias: KALAMDB_VISIBILITY_TIMEOUT_SECS) /// - KALAMDB_WEBSOCKET_CLIENT_TIMEOUT_SECS: Override websocket.client_timeout_secs /// - KALAMDB_WEBSOCKET_AUTH_TIMEOUT_SECS: Override websocket.auth_timeout_secs /// - KALAMDB_WEBSOCKET_HEARTBEAT_INTERVAL_SECS: Override websocket.heartbeat_interval_secs @@ -80,6 +89,14 @@ impl ServerConfig { self.logging.level = level; } + let topic_visibility_timeout = env::var("KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS") + .or_else(|_| env::var("KALAMDB_VISIBILITY_TIMEOUT_SECS")); + if let Ok(val) = topic_visibility_timeout { + self.topics.visibility_timeout_secs = val.parse().map_err(|_| { + anyhow::anyhow!("Invalid KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS value: {}", val) + })?; + } + // Logs directory path if let Ok(path) = env::var("KALAMDB_LOGS_DIR") { self.logging.logs_path = path; @@ -286,7 +303,8 @@ fn parse_cluster_peers(value: &str) -> anyhow::Result> { let parts: Vec<&str> = entry.split('@').collect(); if parts.len() < 3 || parts.len() > 4 { return Err(anyhow::anyhow!( - "Invalid KALAMDB_CLUSTER_PEERS entry '{}'. Expected format: node_id@rpc_addr@api_addr[@rpc_server_name]", + "Invalid KALAMDB_CLUSTER_PEERS entry '{}'. Expected format: \ + node_id@rpc_addr@api_addr[@rpc_server_name]", entry )); } @@ -309,9 +327,10 @@ fn parse_cluster_peers(value: &str) -> anyhow::Result> { #[cfg(test)] mod tests { - use super::*; use std::sync::{Mutex, MutexGuard, OnceLock}; + use super::*; + static ENV_MUTEX: OnceLock> = OnceLock::new(); fn acquire_env_lock() -> MutexGuard<'static, ()> { @@ -406,6 +425,34 @@ mod tests { env::remove_var("KALAMDB_SECURITY_CORS_ALLOWED_ORIGINS"); } + #[test] + fn test_env_override_topic_visibility_timeout_secs() { + let _guard = acquire_env_lock(); + env::remove_var("KALAMDB_VISIBILITY_TIMEOUT_SECS"); + env::set_var("KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS", "7"); + + let mut config = ServerConfig::default(); + config.apply_env_overrides().unwrap(); + + assert_eq!(config.topics.visibility_timeout_secs, 7); + + env::remove_var("KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS"); + } + + #[test] + fn test_env_override_topic_visibility_timeout_secs_legacy_alias() { + let _guard = acquire_env_lock(); + env::remove_var("KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS"); + env::set_var("KALAMDB_VISIBILITY_TIMEOUT_SECS", "5"); + + let mut config = ServerConfig::default(); + config.apply_env_overrides().unwrap(); + + assert_eq!(config.topics.visibility_timeout_secs, 5); + + env::remove_var("KALAMDB_VISIBILITY_TIMEOUT_SECS"); + } + #[test] fn test_env_override_trusted_proxy_ranges() { let _guard = acquire_env_lock(); diff --git a/backend/crates/kalamdb-configs/src/config/trusted_proxies.rs b/backend/crates/kalamdb-configs/src/config/trusted_proxies.rs index 5ae720a38..1f491ec33 100644 --- a/backend/crates/kalamdb-configs/src/config/trusted_proxies.rs +++ b/backend/crates/kalamdb-configs/src/config/trusted_proxies.rs @@ -1,6 +1,7 @@ +use std::net::IpAddr; + use anyhow::anyhow; use ipnet::IpNet; -use std::net::IpAddr; /// Parse trusted proxy entries from configuration. /// @@ -30,9 +31,10 @@ fn parse_trusted_proxy_entry(entry: &str) -> anyhow::Result { #[cfg(test)] mod tests { - use super::*; use std::net::IpAddr; + use super::*; + #[test] fn parses_single_ip_and_cidr_entries() { let parsed = parse_trusted_proxy_entries(&[ diff --git a/backend/crates/kalamdb-configs/src/config/types.rs b/backend/crates/kalamdb-configs/src/config/types.rs index 6ecf62787..901e481ff 100644 --- a/backend/crates/kalamdb-configs/src/config/types.rs +++ b/backend/crates/kalamdb-configs/src/config/types.rs @@ -1,7 +1,9 @@ -use super::defaults::*; -use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use serde::{Deserialize, Serialize}; + +use super::defaults::*; + /// Main server configuration #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ServerConfig { @@ -227,8 +229,8 @@ pub struct ServerSettings { #[serde(default = "default_api_version")] pub api_version: String, /// Enable HTTP/2 protocol support (default: true) - /// When true, server uses bind_auto_h2c() for automatic HTTP/1.1 and HTTP/2 cleartext negotiation - /// When false, server only supports HTTP/1.1 + /// When true, server uses bind_auto_h2c() for automatic HTTP/1.1 and HTTP/2 cleartext + /// negotiation When false, server only supports HTTP/1.1 #[serde(default = "default_enable_http2")] pub enable_http2: bool, /// Path to the Admin UI static files (e.g., "./ui/dist") diff --git a/backend/crates/kalamdb-configs/src/file_helpers.rs b/backend/crates/kalamdb-configs/src/file_helpers.rs index 3cf23db63..d4ff8cd52 100644 --- a/backend/crates/kalamdb-configs/src/file_helpers.rs +++ b/backend/crates/kalamdb-configs/src/file_helpers.rs @@ -16,9 +16,10 @@ use std::path::{Path, PathBuf}; /// # Examples /// /// ``` -/// use kalamdb_configs::file_helpers::normalize_dir_path; /// use std::path::Path; /// +/// use kalamdb_configs::file_helpers::normalize_dir_path; +/// /// let abs = normalize_dir_path("./data"); /// assert!(abs.is_empty() || Path::new(&abs).is_absolute()); /// ``` diff --git a/backend/crates/kalamdb-configs/src/lib.rs b/backend/crates/kalamdb-configs/src/lib.rs index a5e6e8480..f03eed1fa 100644 --- a/backend/crates/kalamdb-configs/src/lib.rs +++ b/backend/crates/kalamdb-configs/src/lib.rs @@ -5,5 +5,4 @@ pub mod config; pub mod file_helpers; -pub use config::defaults; -pub use config::*; +pub use config::{defaults, *}; diff --git a/backend/crates/kalamdb-core/src/app_context.rs b/backend/crates/kalamdb-core/src/app_context.rs index 561d57bb2..9f73399bd 100644 --- a/backend/crates/kalamdb-core/src/app_context.rs +++ b/backend/crates/kalamdb-core/src/app_context.rs @@ -3,22 +3,19 @@ //! Provides access to all core resources with simplified 3-parameter initialization. //! Uses constants from kalamdb_commons for table prefixes. -use crate::applier::UnifiedApplier; -use crate::job_waker::JobWaker; -use crate::live_adapters::SchemaRegistryLookup; -use crate::schema_registry::SchemaRegistry; -use crate::sql::datafusion_session::DataFusionSessionFactory; -use crate::sql::executor::SqlExecutor; -use crate::sql::table_functions::{CoreVectorSearchRuntime, VectorSearchTableFunction}; -use crate::transactions::{CommitSequenceTracker, TransactionCoordinator}; -use crate::views::system_schema_provider::SystemSchemaProvider; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; + use async_trait::async_trait; -use datafusion::catalog::SchemaProvider; -use datafusion::prelude::SessionContext; +use datafusion::{catalog::SchemaProvider, prelude::SessionContext}; use kalamdb_auth::{CoreUsersRepo, UserRepository}; -use kalamdb_commons::constants::SYSTEM_NAMESPACE; -use kalamdb_commons::models::{NamespaceId, TransactionOrigin, UserId}; -use kalamdb_commons::{constants::ColumnFamilyNames, NodeId}; +use kalamdb_commons::{ + constants::{ColumnFamilyNames, SYSTEM_NAMESPACE}, + models::{NamespaceId, TransactionOrigin, UserId}, + NodeId, +}; use kalamdb_configs::ServerConfig; use kalamdb_filestore::StorageRegistry; use kalamdb_live::{ @@ -31,14 +28,26 @@ use kalamdb_sharding::{GroupId, ShardRouter}; use kalamdb_store::StorageBackend; use kalamdb_system::{ClusterCoordinator, Namespace, SystemTablesRegistry}; use kalamdb_tables::{SharedTableStore, UserTableStore}; -use kalamdb_views::sessions::{PgSessionSnapshot, SessionsSnapshotCallback}; -use kalamdb_views::transactions::{TransactionSnapshot, TransactionsSnapshotCallback}; +use kalamdb_views::{ + sessions::{PgSessionSnapshot, SessionsSnapshotCallback}, + transactions::{TransactionSnapshot, TransactionsSnapshotCallback}, +}; use once_cell::sync::OnceCell; -use std::sync::Arc; -use std::time::{Duration, Instant}; -use crate::metrics::runtime::collect_runtime_metrics; -use crate::schema_registry::TablesSchemaRegistryAdapter; +use crate::{ + applier::UnifiedApplier, + job_waker::JobWaker, + live_adapters::SchemaRegistryLookup, + metrics::runtime::collect_runtime_metrics, + schema_registry::{SchemaRegistry, TablesSchemaRegistryAdapter}, + sql::{ + datafusion_session::DataFusionSessionFactory, + executor::SqlExecutor, + table_functions::{CoreVectorSearchRuntime, VectorSearchTableFunction}, + }, + transactions::{CommitSequenceTracker, TransactionCoordinator}, + views::system_schema_provider::SystemSchemaProvider, +}; struct SchemaRegistryTopicPrimaryKeyLookup { schema_registry: Arc, @@ -203,20 +212,15 @@ impl AppContext { /// /// # Example /// ```no_run - /// use kalamdb_core::app_context::AppContext; /// use kalamdb_commons::NodeId; + /// use kalamdb_core::app_context::AppContext; /// # use kalamdb_store::StorageBackend; /// # use std::sync::Arc; /// /// let backend: Arc = todo!(); /// let node_id = NodeId::new("prod-node-1".to_string()); // From server.toml /// let config = ServerConfig::from_file("server.toml").unwrap(); - /// AppContext::init( - /// backend, - /// node_id, - /// "data/storage".to_string(), - /// config, - /// ); + /// AppContext::init(backend, node_id, "data/storage".to_string(), config); /// ``` pub fn init( storage_backend: Arc, @@ -280,7 +284,8 @@ impl AppContext { ColumnFamilyNames::SHARED_TABLE_PREFIX.to_string(), )); - // Create system table providers registry FIRST (needed by StorageRegistry and information_schema) + // Create system table providers registry FIRST (needed by StorageRegistry and + // information_schema) let system_tables = Arc::new(SystemTablesRegistry::new(storage_backend.clone())); // Create storage registry (uses StoragesTableProvider from system_tables) @@ -321,9 +326,10 @@ impl AppContext { // Register system schema with lazy loading // Use constant catalog name "kalam" - configured in DataFusionSessionFactory - let catalog = base_session_context - .catalog("kalam") - .expect("Catalog 'kalam' not found - ensure DataFusionSessionFactory is properly configured"); + let catalog = base_session_context.catalog("kalam").expect( + "Catalog 'kalam' not found - ensure DataFusionSessionFactory is properly \ + configured", + ); // Register the system schema provider with the catalog // Views are created on first access, not eagerly at startup @@ -471,13 +477,12 @@ impl AppContext { let visibility_timeout = Duration::from_secs(config.topics.visibility_timeout_secs); let topic_primary_key_lookup: Arc = Arc::new(SchemaRegistryTopicPrimaryKeyLookup::new(schema_registry.clone())); - let topic_publisher = Arc::new( - TopicPublisherService::with_visibility_timeout_and_primary_key_lookup( + let topic_publisher = + Arc::new(TopicPublisherService::with_visibility_timeout_and_primary_key_lookup( storage_backend.clone(), visibility_timeout, Some(topic_primary_key_lookup), - ), - ); + )); // Create the shared committed snapshot tracker used by the transaction coordinator let commit_sequence_tracker = Arc::new(CommitSequenceTracker::new(0)); @@ -538,7 +543,8 @@ impl AppContext { topic_publisher.refresh_topics_cache(topics); topic_publisher.restore_offset_counters(); log::info!( - "Restored {} topics into TopicPublisherService cache (routes={}, offsets ready)", + "Restored {} topics into TopicPublisherService cache (routes={}, offsets \ + ready)", count, topic_publisher.cache_stats().total_routes, ); @@ -748,8 +754,10 @@ impl AppContext { /// Maps node_id string to 10-bit integer (0-1023) using CRC32 hash. /// This ensures consistent worker_id across server restarts. fn extract_worker_id(node_id: &NodeId) -> u16 { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; let mut hasher = DefaultHasher::new(); node_id.as_u64().hash(&mut hasher); @@ -769,9 +777,10 @@ impl AppContext { /// /// # Example /// ```no_run - /// use kalamdb_core::app_context::AppContext; /// use std::sync::Arc; /// + /// use kalamdb_core::app_context::AppContext; + /// /// let app_context = AppContext::new_test(); /// let sys_cols = app_context.system_columns_service(); /// let (snowflake_id, updated_ns, deleted) = sys_cols.handle_insert(None).unwrap(); @@ -871,13 +880,12 @@ impl AppContext { let visibility_timeout = Duration::from_secs(config.topics.visibility_timeout_secs); let topic_primary_key_lookup: Arc = Arc::new(SchemaRegistryTopicPrimaryKeyLookup::new(schema_registry.clone())); - let topic_publisher = Arc::new( - TopicPublisherService::with_visibility_timeout_and_primary_key_lookup( + let topic_publisher = + Arc::new(TopicPublisherService::with_visibility_timeout_and_primary_key_lookup( storage_backend.clone(), visibility_timeout, Some(topic_primary_key_lookup), - ), - ); + )); // Create transaction snapshot tracker for tests let commit_sequence_tracker = Arc::new(CommitSequenceTracker::new(0)); @@ -1237,7 +1245,7 @@ impl AppContext { } /// Register the shared SqlExecutor (called once during bootstrap) - pub fn set_sql_executor(&self, executor: Arc) { + pub fn set_sql_executor(self: &Arc, executor: Arc) { // Wire live query manager's InitialDataFetcher with the SQL executor adapter if let Some(lqm) = self.live_query_manager.get() { let adapter = Arc::new(crate::live_adapters::SqlExecutorAdapter::new( @@ -1245,6 +1253,10 @@ impl AppContext { Arc::clone(&self.base_session_context), )); lqm.set_sql_executor(adapter); + + let barrier = + Arc::new(crate::live_adapters::RaftApplyBarrierAdapter::new(Arc::clone(self))); + lqm.set_apply_barrier(barrier); } if self.sql_executor.set(executor).is_err() { diff --git a/backend/crates/kalamdb-core/src/applier/applier.rs b/backend/crates/kalamdb-core/src/applier/applier.rs index f0d8d0e1a..af0d78277 100644 --- a/backend/crates/kalamdb-core/src/applier/applier.rs +++ b/backend/crates/kalamdb-core/src/applier/applier.rs @@ -3,23 +3,23 @@ //! All commands flow through Raft, even in single-node mode. //! This ensures the same code path is tested in both modes. -use async_trait::async_trait; -use chrono::Utc; use std::sync::Arc; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::schemas::{TableDefinition, TableType}; -use kalamdb_commons::models::{NamespaceId, StorageId, TableId, TransactionId, UserId}; +use async_trait::async_trait; +use chrono::Utc; +use kalamdb_commons::models::{ + rows::Row, + schemas::{TableDefinition, TableType}, + NamespaceId, StorageId, TableId, TransactionId, UserId, +}; use kalamdb_raft::{ DataResponse, GroupId, MetaCommand, RaftExecutor, SharedDataCommand, UserDataCommand, }; use kalamdb_sharding::ShardRouter; -use kalamdb_system::Storage; -use kalamdb_system::User; +use kalamdb_system::{Storage, User}; use kalamdb_transactions::StagedMutation; -use super::error::ApplierError; -use super::executor::CommandExecutorImpl; +use super::{error::ApplierError, executor::CommandExecutorImpl}; use crate::app_context::AppContext; /// Unified Applier trait - the single interface for all command execution @@ -278,7 +278,8 @@ impl RaftApplier { if let Some(existing_group) = expected_group { if existing_group != group_id { return Err(ApplierError::Validation(format!( - "explicit transactions must remain within one data raft group; '{}' mapped to {:?} while prior mutations mapped to {:?}", + "explicit transactions must remain within one data raft group; '{}' \ + mapped to {:?} while prior mutations mapped to {:?}", mutation.table_id, group_id, existing_group ))); } diff --git a/backend/crates/kalamdb-core/src/applier/error.rs b/backend/crates/kalamdb-core/src/applier/error.rs index cfa6caacb..353d0a669 100644 --- a/backend/crates/kalamdb-core/src/applier/error.rs +++ b/backend/crates/kalamdb-core/src/applier/error.rs @@ -1,6 +1,7 @@ //! Error types for the unified applier use std::fmt; + use thiserror::Error; /// Errors that can occur during command application diff --git a/backend/crates/kalamdb-core/src/applier/executor/ddl.rs b/backend/crates/kalamdb-core/src/applier/executor/ddl.rs index dc015dcee..fc970cb0f 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/ddl.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/ddl.rs @@ -4,13 +4,18 @@ use std::sync::Arc; -use kalamdb_commons::models::schemas::TableDefinition; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; - -use crate::app_context::AppContext; -use crate::applier::executor::utils::{run_blocking_applier, with_plan_cache_invalidation}; -use crate::applier::ApplierError; +use kalamdb_commons::{ + models::{schemas::TableDefinition, TableId}, + schemas::TableType, +}; + +use crate::{ + app_context::AppContext, + applier::{ + executor::utils::{run_blocking_applier, with_plan_cache_invalidation}, + ApplierError, + }, +}; /// Executor for DDL (Data Definition Language) operations pub struct DdlExecutor { @@ -74,12 +79,9 @@ impl DdlExecutor { let table_def = table_def.clone(); with_plan_cache_invalidation(app_context, move |app_context: Arc| async move { run_blocking_applier(move || { - app_context - .schema_registry() - .register_table(table_def.clone()) - .map_err(|e| { - ApplierError::Execution(format!("Failed to register altered table: {}", e)) - })?; + app_context.schema_registry().register_table(table_def.clone()).map_err(|e| { + ApplierError::Execution(format!("Failed to register altered table: {}", e)) + })?; log::debug!( "CommandExecutorImpl: Updated schema cache and provider for {}", @@ -89,7 +91,8 @@ impl DdlExecutor { if let Some(cached) = app_context.schema_registry().get(&table_id) { if let Ok(schema) = cached.arrow_schema() { log::debug!( - "CommandExecutorImpl: ALTER TABLE {} complete - Arrow schema now has {} fields: {:?}", + "CommandExecutorImpl: ALTER TABLE {} complete - Arrow schema now has \ + {} fields: {:?}", table_id.full_name(), schema.fields().len(), schema.fields().iter().map(|f| f.name()).collect::>() @@ -132,17 +135,24 @@ impl DdlExecutor { #[cfg(test)] mod tests { - use super::DdlExecutor; - use crate::sql::context::ExecutionContext; - use crate::sql::executor::SqlExecutor; - use crate::test_helpers::test_app_context_simple; - use kalamdb_commons::models::datatypes::KalamDataType; - use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; - use kalamdb_commons::models::{NamespaceId, TableId, TableName}; - use kalamdb_commons::schemas::{ColumnDefault, TableType}; - use kalamdb_commons::{Role, UserId}; use std::sync::Arc; + use kalamdb_commons::{ + models::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + NamespaceId, TableId, TableName, + }, + schemas::{ColumnDefault, TableType}, + Role, UserId, + }; + + use super::DdlExecutor; + use crate::{ + sql::{context::ExecutionContext, executor::SqlExecutor}, + test_helpers::test_app_context_simple, + }; + #[tokio::test] async fn ddl_applied_via_applier_clears_plan_cache() { let app_ctx = test_app_context_simple(); diff --git a/backend/crates/kalamdb-core/src/applier/executor/dml.rs b/backend/crates/kalamdb-core/src/applier/executor/dml.rs index b6342ba5d..18d56f60f 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/dml.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/dml.rs @@ -10,29 +10,35 @@ //! - Unified: Same code path for standalone and cluster modes //! - Provider-agnostic: Handles User, Stream, and Shared table types -use std::collections::HashSet; -use std::sync::Arc; - -use kalamdb_commons::ids::StreamTableRowId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{OperationKind, TopicOp, TransactionId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::websocket::{ChangeNotification, ChangeType}; -use kalamdb_commons::TableId; +use std::{collections::HashSet, sync::Arc}; + +use kalamdb_commons::{ + ids::StreamTableRowId, + models::{rows::Row, OperationKind, TopicOp, TransactionId, UserId}, + schemas::TableType, + websocket::{ChangeNotification, ChangeType}, + TableId, +}; use kalamdb_raft::TransactionApplyResult; use kalamdb_system::{NotificationService as NotificationServiceTrait, TopicPublisher}; +use kalamdb_tables::{utils::base as table_base, StreamTableRow}; use kalamdb_transactions::StagedMutation; -use crate::app_context::AppContext; -use crate::applier::error::ApplierError; -use crate::applier::executor::utils::fileref_util::{ - collect_file_refs_from_row, collect_replaced_file_refs_for_update, delete_file_refs_best_effort, +use crate::{ + app_context::AppContext, + applier::{ + error::ApplierError, + executor::utils::fileref_util::{ + collect_file_refs_from_row, collect_replaced_file_refs_for_update, + delete_file_refs_best_effort, + }, + }, + providers::{ + base::{find_row_by_pk, BaseTableProvider}, + SharedTableProvider, StreamTableProvider, UserTableProvider, + }, + transactions::{CommitSideEffectPlan, FanoutOwnerScope}, }; -use crate::providers::base::{find_row_by_pk, BaseTableProvider}; -use crate::providers::{SharedTableProvider, StreamTableProvider, UserTableProvider}; -use crate::transactions::{CommitSideEffectPlan, FanoutOwnerScope}; -use kalamdb_tables::utils::base as table_base; -use kalamdb_tables::StreamTableRow; /// Executor for DML operations (Data Plane) /// @@ -49,6 +55,11 @@ impl DmlExecutor { Self { app_context } } + #[inline] + fn observe_commit_seq(&self, commit_seq: u64) { + self.app_context.commit_sequence_tracker().observe_committed(commit_seq); + } + async fn load_provider( &self, table_id: &TableId, @@ -147,12 +158,14 @@ impl DmlExecutor { .insert_batch_with_commit_seq(user_id, rows.to_vec(), commit_seq) .await .map_err(|e| ApplierError::Execution(format!("Failed to insert batch: {}", e)))?; + self.observe_commit_seq(commit_seq); log::debug!("DmlExecutor: Inserted {} rows into {}", row_ids.len(), table_id); Ok(row_ids.len()) } else if let Some(provider) = provider_arc.as_any().downcast_ref::() { let row_ids = provider.insert_batch(user_id, rows.to_vec()).await.map_err(|e| { ApplierError::Execution(format!("Failed to insert stream batch: {}", e)) })?; + self.observe_commit_seq(commit_seq); log::debug!("DmlExecutor: Inserted {} stream rows into {}", row_ids.len(), table_id); Ok(row_ids.len()) } else { @@ -228,6 +241,7 @@ impl DmlExecutor { &replaced_refs, ) .await; + self.observe_commit_seq(commit_seq); Ok(1) } else { Ok(0) @@ -305,6 +319,9 @@ impl DmlExecutor { } } log::debug!("DmlExecutor: Deleted {} rows from {}", deleted_count, table_id); + if deleted_count > 0 { + self.observe_commit_seq(commit_seq); + } Ok(deleted_count) } else if let Some(provider) = provider_arc.as_any().downcast_ref::() { let mut deleted_count = 0; @@ -358,6 +375,7 @@ impl DmlExecutor { .insert_batch_with_commit_seq(rows.to_vec(), commit_seq) .await .map_err(|e| ApplierError::Execution(format!("Failed to insert batch: {}", e)))?; + self.observe_commit_seq(commit_seq); log::debug!("DmlExecutor: Inserted {} shared rows into {}", row_ids.len(), table_id); Ok(row_ids.len()) } else { @@ -434,6 +452,7 @@ impl DmlExecutor { &replaced_refs, ) .await; + self.observe_commit_seq(commit_seq); } log::debug!( @@ -512,6 +531,9 @@ impl DmlExecutor { } log::debug!("DmlExecutor: Deleted {} shared rows from {}", deleted_count, table_id); + if deleted_count > 0 { + self.observe_commit_seq(commit_seq); + } Ok(deleted_count) } else { Err(ApplierError::Execution(format!( @@ -613,7 +635,8 @@ impl DmlExecutor { format!("{}|{}|{}", mutation.table_id, user_id.as_str(), mutation.primary_key); if !seen_insert_keys.insert(batch_key) { return Err(ApplierError::Execution(format!( - "Failed to insert batch row: Already exists: Primary key violation: value '{}' appears multiple times in the transaction batch for column '{}'", + "Failed to insert batch row: Already exists: Primary key violation: value \ + '{}' appears multiple times in the transaction batch for column '{}'", mutation.primary_key, provider.primary_key_field_name() ))); @@ -675,7 +698,8 @@ impl DmlExecutor { let batch_key = format!("{}|{}", mutation.table_id, mutation.primary_key); if !seen_insert_keys.insert(batch_key) { return Err(ApplierError::Execution(format!( - "Failed to insert batch row: Already exists: Primary key violation: value '{}' appears multiple times in the transaction batch for column '{}'", + "Failed to insert batch row: Already exists: Primary key violation: value \ + '{}' appears multiple times in the transaction batch for column '{}'", mutation.primary_key, provider.primary_key_field_name() ))); @@ -695,10 +719,20 @@ impl DmlExecutor { &self, transaction_id: &TransactionId, mutations: &[StagedMutation], + ) -> Result { + let commit_seq = self.app_context.commit_sequence_tracker().allocate_next(); + self.apply_user_transaction_batch_with_commit_seq(transaction_id, mutations, commit_seq) + .await + } + + pub async fn apply_user_transaction_batch_with_commit_seq( + &self, + transaction_id: &TransactionId, + mutations: &[StagedMutation], + commit_seq: u64, ) -> Result { self.prevalidate_user_transaction_batch(transaction_id, mutations).await?; - let commit_seq = self.app_context.commit_sequence_tracker().allocate_next(); let mut affected_rows = 0; let mut side_effect_plan = CommitSideEffectPlan::new(transaction_id.clone()); @@ -774,6 +808,7 @@ impl DmlExecutor { .app_context .notification_service() .dispatch_commit_plan(&side_effect_plan); + self.observe_commit_seq(commit_seq); return Ok(TransactionApplyResult { rows_affected: affected_rows, @@ -881,6 +916,7 @@ impl DmlExecutor { let notifications_sent = self.app_context.notification_service().dispatch_commit_plan(&side_effect_plan); + self.observe_commit_seq(commit_seq); Ok(TransactionApplyResult { rows_affected: affected_rows, @@ -895,10 +931,20 @@ impl DmlExecutor { &self, transaction_id: &TransactionId, mutations: &[StagedMutation], + ) -> Result { + let commit_seq = self.app_context.commit_sequence_tracker().allocate_next(); + self.apply_shared_transaction_batch_with_commit_seq(transaction_id, mutations, commit_seq) + .await + } + + pub async fn apply_shared_transaction_batch_with_commit_seq( + &self, + transaction_id: &TransactionId, + mutations: &[StagedMutation], + commit_seq: u64, ) -> Result { self.prevalidate_shared_transaction_batch(transaction_id, mutations).await?; - let commit_seq = self.app_context.commit_sequence_tracker().allocate_next(); let mut affected_rows = 0; let mut side_effect_plan = CommitSideEffectPlan::new(transaction_id.clone()); @@ -964,6 +1010,7 @@ impl DmlExecutor { let notifications_sent = self.app_context.notification_service().dispatch_commit_plan(&side_effect_plan); + self.observe_commit_seq(commit_seq); return Ok(TransactionApplyResult { rows_affected: affected_rows, @@ -1059,6 +1106,7 @@ impl DmlExecutor { let notifications_sent = self.app_context.notification_service().dispatch_commit_plan(&side_effect_plan); + self.observe_commit_seq(commit_seq); Ok(TransactionApplyResult { rows_affected: affected_rows, @@ -1092,14 +1140,6 @@ impl DmlExecutor { return false; } - let is_leader = match user_id { - Some(user_id) => self.app_context.is_leader_for_user(user_id).await, - None => self.app_context.is_leader_for_shared().await, - }; - if !is_leader { - return false; - } - let op = Self::topic_op_for_change(¬ification.change_type); if let Err(error) = topic_publisher.publish_for_table( ¬ification.table_id, diff --git a/backend/crates/kalamdb-core/src/applier/executor/mod.rs b/backend/crates/kalamdb-core/src/applier/executor/mod.rs index 7abd10ec0..9f631f97d 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/mod.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/mod.rs @@ -40,7 +40,6 @@ mod storage; mod user; mod utils; -use crate::app_context::AppContext; use std::sync::Arc; pub use ddl::DdlExecutor; @@ -49,6 +48,8 @@ pub use namespace::NamespaceExecutor; pub use storage::StorageExecutor; pub use user::UserExecutor; +use crate::app_context::AppContext; + /// The unified command executor /// /// This is the SINGLE place where all database mutations happen. diff --git a/backend/crates/kalamdb-core/src/applier/executor/namespace.rs b/backend/crates/kalamdb-core/src/applier/executor/namespace.rs index 1a7334899..5b28781fe 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/namespace.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/namespace.rs @@ -9,9 +9,10 @@ use std::sync::Arc; use kalamdb_commons::models::NamespaceId; use kalamdb_system::Namespace; -use crate::app_context::AppContext; -use crate::applier::executor::utils::run_blocking_applier; -use crate::applier::ApplierError; +use crate::{ + app_context::AppContext, + applier::{executor::utils::run_blocking_applier, ApplierError}, +}; /// Executor for namespace operations pub struct NamespaceExecutor { diff --git a/backend/crates/kalamdb-core/src/applier/executor/storage.rs b/backend/crates/kalamdb-core/src/applier/executor/storage.rs index ee16f66fa..ec0eee0e3 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/storage.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/storage.rs @@ -9,9 +9,10 @@ use std::sync::Arc; use kalamdb_commons::models::StorageId; use kalamdb_system::Storage; -use crate::app_context::AppContext; -use crate::applier::executor::utils::run_blocking_applier; -use crate::applier::ApplierError; +use crate::{ + app_context::AppContext, + applier::{executor::utils::run_blocking_applier, ApplierError}, +}; /// Executor for storage operations pub struct StorageExecutor { diff --git a/backend/crates/kalamdb-core/src/applier/executor/user.rs b/backend/crates/kalamdb-core/src/applier/executor/user.rs index 32d6e3e5c..22250cc5c 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/user.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/user.rs @@ -9,9 +9,10 @@ use std::sync::Arc; use kalamdb_commons::models::UserId; use kalamdb_system::User; -use crate::app_context::AppContext; -use crate::applier::executor::utils::run_blocking_applier; -use crate::applier::ApplierError; +use crate::{ + app_context::AppContext, + applier::{executor::utils::run_blocking_applier, ApplierError}, +}; /// Executor for user management operations pub struct UserExecutor { diff --git a/backend/crates/kalamdb-core/src/applier/executor/utils/fileref_util.rs b/backend/crates/kalamdb-core/src/applier/executor/utils/fileref_util.rs index 9b3a85aef..fb66ac361 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/utils/fileref_util.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/utils/fileref_util.rs @@ -1,9 +1,9 @@ use datafusion::scalar::ScalarValue; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; +use kalamdb_commons::{ + models::{datatypes::KalamDataType, rows::Row, UserId}, + schemas::TableType, + TableId, +}; use kalamdb_system::FileRef; use crate::app_context::AppContext; diff --git a/backend/crates/kalamdb-core/src/applier/executor/utils/mod.rs b/backend/crates/kalamdb-core/src/applier/executor/utils/mod.rs index 547c47dea..ddba5de5f 100644 --- a/backend/crates/kalamdb-core/src/applier/executor/utils/mod.rs +++ b/backend/crates/kalamdb-core/src/applier/executor/utils/mod.rs @@ -1,10 +1,8 @@ pub mod fileref_util; -use std::future::Future; -use std::sync::Arc; +use std::{future::Future, sync::Arc}; -use crate::app_context::AppContext; -use crate::applier::ApplierError; +use crate::{app_context::AppContext, applier::ApplierError}; pub(super) async fn run_blocking_applier(operation: F) -> Result where diff --git a/backend/crates/kalamdb-core/src/applier/mod.rs b/backend/crates/kalamdb-core/src/applier/mod.rs index f82930dea..e062b1fbd 100644 --- a/backend/crates/kalamdb-core/src/applier/mod.rs +++ b/backend/crates/kalamdb-core/src/applier/mod.rs @@ -45,16 +45,15 @@ pub mod executor; pub mod raft; // Re-exports +use std::sync::Arc; + pub use applier::{RaftApplier, UnifiedApplier}; pub use command::{CommandResult, CommandType, Validate}; pub use error::ApplierError; pub use executor::CommandExecutorImpl; - // Re-export Raft appliers pub use raft::{ProviderMetaApplier, ProviderSharedDataApplier, ProviderUserDataApplier}; -use std::sync::Arc; - use crate::app_context::AppContext; /// Create the unified Raft applier diff --git a/backend/crates/kalamdb-core/src/applier/raft/mod.rs b/backend/crates/kalamdb-core/src/applier/raft/mod.rs index 3f6379db0..64e6aa9cd 100644 --- a/backend/crates/kalamdb-core/src/applier/raft/mod.rs +++ b/backend/crates/kalamdb-core/src/applier/raft/mod.rs @@ -5,7 +5,7 @@ //! //! The traits are defined in kalamdb-raft: //! - `MetaApplier`: For namespace, table, storage, user, job operations -//! - `UserDataApplier`: For user table data operations +//! - `UserDataApplier`: For user table data operations //! - `SharedDataApplier`: For shared table data operations mod provider_meta_applier; diff --git a/backend/crates/kalamdb-core/src/applier/raft/provider_meta_applier.rs b/backend/crates/kalamdb-core/src/applier/raft/provider_meta_applier.rs index beb88a5e0..729cc498d 100644 --- a/backend/crates/kalamdb-core/src/applier/raft/provider_meta_applier.rs +++ b/backend/crates/kalamdb-core/src/applier/raft/provider_meta_applier.rs @@ -6,21 +6,21 @@ //! //! Used by the Raft state machine to apply replicated commands on followers. -use crate::app_context::AppContext; -use crate::applier::executor::CommandExecutorImpl; -use crate::applier::ApplierError; -use async_trait::async_trait; -use kalamdb_commons::models::schemas::TableDefinition; -use kalamdb_commons::models::{JobId, NamespaceId, NodeId, StorageId, TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_raft::applier::MetaApplier; -use kalamdb_raft::RaftError; -use kalamdb_system::providers::jobs::models::Job; -use kalamdb_system::JobStatus; -use kalamdb_system::User; -use kalamdb_system::{JobNode, Storage}; use std::sync::Arc; +use async_trait::async_trait; +use kalamdb_commons::{ + models::{schemas::TableDefinition, JobId, NamespaceId, NodeId, StorageId, TableId, UserId}, + schemas::TableType, +}; +use kalamdb_raft::{applier::MetaApplier, RaftError}; +use kalamdb_system::{providers::jobs::models::Job, JobNode, JobStatus, Storage, User}; + +use crate::{ + app_context::AppContext, + applier::{executor::CommandExecutorImpl, ApplierError}, +}; + /// Unified applier that persists all metadata operations to system tables /// /// This is used by the Raft state machine on follower nodes to apply diff --git a/backend/crates/kalamdb-core/src/applier/raft/provider_shared_data_applier.rs b/backend/crates/kalamdb-core/src/applier/raft/provider_shared_data_applier.rs index ed5392a30..75d5f8e45 100644 --- a/backend/crates/kalamdb-core/src/applier/raft/provider_shared_data_applier.rs +++ b/backend/crates/kalamdb-core/src/applier/raft/provider_shared_data_applier.rs @@ -5,17 +5,18 @@ //! //! Called by SharedDataStateMachine after Raft consensus on all nodes. -use async_trait::async_trait; use std::sync::Arc; -use crate::app_context::AppContext; -use crate::applier::executor::CommandExecutorImpl; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::TransactionId; -use kalamdb_commons::TableId; +use async_trait::async_trait; +use kalamdb_commons::{ + models::{rows::Row, TransactionId}, + TableId, +}; use kalamdb_raft::{RaftError, SharedDataApplier, TransactionApplyResult}; use kalamdb_transactions::StagedMutation; +use crate::{app_context::AppContext, applier::executor::CommandExecutorImpl}; + /// SharedDataApplier implementation using Unified Command Executor /// /// This is called by the Raft state machine when applying committed commands. @@ -35,12 +36,17 @@ impl ProviderSharedDataApplier { #[async_trait] impl SharedDataApplier for ProviderSharedDataApplier { - async fn insert(&self, table_id: &TableId, rows: &[Row]) -> Result { + async fn insert( + &self, + table_id: &TableId, + rows: &[Row], + commit_seq: u64, + ) -> Result { log::debug!("ProviderSharedDataApplier: Inserting into {} ({} rows)", table_id, rows.len()); self.executor .dml() - .insert_shared_data(table_id, rows) + .insert_shared_data_with_commit_seq(table_id, rows, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } @@ -50,12 +56,13 @@ impl SharedDataApplier for ProviderSharedDataApplier { table_id: &TableId, updates: &[Row], filter: Option<&str>, + commit_seq: u64, ) -> Result { log::debug!("ProviderSharedDataApplier: Updating {} ({} rows)", table_id, updates.len()); self.executor .dml() - .update_shared_data(table_id, updates, filter) + .update_shared_data_with_commit_seq(table_id, updates, filter, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } @@ -64,12 +71,13 @@ impl SharedDataApplier for ProviderSharedDataApplier { &self, table_id: &TableId, pk_values: Option<&[String]>, + commit_seq: u64, ) -> Result { log::debug!("ProviderSharedDataApplier: Deleting from {}", table_id); self.executor .dml() - .delete_shared_data(table_id, pk_values) + .delete_shared_data_with_commit_seq(table_id, pk_values, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } @@ -78,10 +86,11 @@ impl SharedDataApplier for ProviderSharedDataApplier { &self, transaction_id: &TransactionId, mutations: &[StagedMutation], + commit_seq: u64, ) -> Result { self.executor .dml() - .apply_shared_transaction_batch(transaction_id, mutations) + .apply_shared_transaction_batch_with_commit_seq(transaction_id, mutations, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } diff --git a/backend/crates/kalamdb-core/src/applier/raft/provider_user_data_applier.rs b/backend/crates/kalamdb-core/src/applier/raft/provider_user_data_applier.rs index 876600674..024d54b7b 100644 --- a/backend/crates/kalamdb-core/src/applier/raft/provider_user_data_applier.rs +++ b/backend/crates/kalamdb-core/src/applier/raft/provider_user_data_applier.rs @@ -5,17 +5,18 @@ //! //! Called by UserDataStateMachine after Raft consensus on all nodes. -use async_trait::async_trait; use std::sync::Arc; -use crate::app_context::AppContext; -use crate::applier::executor::CommandExecutorImpl; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{TransactionId, UserId}; -use kalamdb_commons::TableId; +use async_trait::async_trait; +use kalamdb_commons::{ + models::{rows::Row, TransactionId, UserId}, + TableId, +}; use kalamdb_raft::{RaftError, TransactionApplyResult, UserDataApplier}; use kalamdb_transactions::StagedMutation; +use crate::{app_context::AppContext, applier::executor::CommandExecutorImpl}; + /// UserDataApplier implementation using Unified Command Executor /// /// This is called by the Raft state machine when applying committed commands. @@ -40,6 +41,7 @@ impl UserDataApplier for ProviderUserDataApplier { table_id: &TableId, user_id: &UserId, rows: &[Row], + commit_seq: u64, ) -> Result { log::debug!( "ProviderUserDataApplier: Inserting into {} for user {} ({} rows)", @@ -50,7 +52,7 @@ impl UserDataApplier for ProviderUserDataApplier { self.executor .dml() - .insert_user_data(table_id, user_id, rows) + .insert_user_data_with_commit_seq(table_id, user_id, rows, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } @@ -61,6 +63,7 @@ impl UserDataApplier for ProviderUserDataApplier { user_id: &UserId, updates: &[Row], filter: Option<&str>, + commit_seq: u64, ) -> Result { log::debug!( "ProviderUserDataApplier: Updating {} for user {} ({} rows)", @@ -71,7 +74,7 @@ impl UserDataApplier for ProviderUserDataApplier { self.executor .dml() - .update_user_data(table_id, user_id, updates, filter) + .update_user_data_with_commit_seq(table_id, user_id, updates, filter, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } @@ -81,12 +84,13 @@ impl UserDataApplier for ProviderUserDataApplier { table_id: &TableId, user_id: &UserId, pk_values: Option<&[String]>, + commit_seq: u64, ) -> Result { log::debug!("ProviderUserDataApplier: Deleting from {} for user {}", table_id, user_id); self.executor .dml() - .delete_user_data(table_id, user_id, pk_values) + .delete_user_data_with_commit_seq(table_id, user_id, pk_values, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } @@ -95,10 +99,11 @@ impl UserDataApplier for ProviderUserDataApplier { &self, transaction_id: &TransactionId, mutations: &[StagedMutation], + commit_seq: u64, ) -> Result { self.executor .dml() - .apply_user_transaction_batch(transaction_id, mutations) + .apply_user_transaction_batch_with_commit_seq(transaction_id, mutations, commit_seq) .await .map_err(|e| RaftError::provider(e.to_string())) } diff --git a/backend/crates/kalamdb-core/src/cluster_handler.rs b/backend/crates/kalamdb-core/src/cluster_handler.rs index a147a63cc..45fac493e 100644 --- a/backend/crates/kalamdb-core/src/cluster_handler.rs +++ b/backend/crates/kalamdb-core/src/cluster_handler.rs @@ -2,16 +2,17 @@ //! //! Handles incoming inter-node gRPC calls and dispatches them into core services. -use std::sync::Arc; -use std::time::Instant; +use std::{sync::Arc, time::Instant}; use kalamdb_auth::{authenticate, AuthRequest, CoreUsersRepo, UserRepository}; -use kalamdb_commons::conversions::{ - mask_sensitive_rows_for_role, record_batch_to_json_arrays, schema_fields_from_arrow_schema, +use kalamdb_commons::{ + conversions::{ + mask_sensitive_rows_for_role, record_batch_to_json_arrays, schema_fields_from_arrow_schema, + }, + models::{ConnectionInfo, KalamCellValue, NamespaceId, UserId}, + schemas::SchemaField, + Role, }; -use kalamdb_commons::models::{ConnectionInfo, KalamCellValue, NamespaceId, UserId}; -use kalamdb_commons::schemas::SchemaField; -use kalamdb_commons::Role; use kalamdb_raft::{ forward_sql_param, ClusterMessageHandler, ForwardSqlParam, ForwardSqlRequest, ForwardSqlResponsePayload, GetNodeInfoRequest, GetNodeInfoResponse, PingRequest, RaftExecutor, @@ -19,11 +20,13 @@ use kalamdb_raft::{ use kalamdb_session::{AuthMethod, AuthSession}; use serde::Serialize; -use crate::app_context::AppContext; -use crate::sql::context::ExecutionContext; -use crate::sql::executor::PreparedExecutionStatement; -use crate::sql::ExecutionResult; -use crate::sql::SqlImpersonationService; +use crate::{ + app_context::AppContext, + sql::{ + context::ExecutionContext, executor::PreparedExecutionStatement, ExecutionResult, + SqlImpersonationService, + }, +}; // ── Response types (match SqlResponse JSON shape, no intermediate Value tree) ── @@ -228,9 +231,7 @@ impl CoreClusterHandler { let (sql, execute_as_username) = match kalamdb_sql::execute_as::parse_execute_as(statement)? { - Some(envelope) => { - (envelope.inner_sql, Some(envelope.username)) - }, + Some(envelope) => (envelope.inner_sql, Some(envelope.username)), None => (trimmed.to_string(), None), }; @@ -429,7 +430,8 @@ impl ClusterMessageHandler for CoreClusterHandler { 400, "SQL_EXECUTION_ERROR", &format!( - "Statement {} failed: EXECUTE AS USER is not allowed on SHARED tables (table '{}'). AS USER impersonation is only supported for USER tables.", + "Statement {} failed: EXECUTE AS USER is not allowed on SHARED tables \ + (table '{}'). AS USER impersonation is only supported for USER tables.", idx + 1, table_name ), @@ -557,17 +559,22 @@ impl ClusterMessageHandler for CoreClusterHandler { #[cfg(test)] mod tests { - use super::CoreClusterHandler; - use crate::sql::ExecutionResult; - use arrow::array::{Int64Array, StringArray}; - use arrow::datatypes::{DataType, Field, Schema}; - use arrow::record_batch::RecordBatch; + use std::sync::Arc; + + use arrow::{ + array::{Int64Array, StringArray}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, + }; use datafusion::scalar::ScalarValue; - use kalamdb_commons::conversions::with_kalam_data_type_metadata; - use kalamdb_commons::models::datatypes::KalamDataType; - use kalamdb_commons::models::Role; + use kalamdb_commons::{ + conversions::with_kalam_data_type_metadata, + models::{datatypes::KalamDataType, Role}, + }; use kalamdb_raft::ForwardSqlParam; - use std::sync::Arc; + + use super::CoreClusterHandler; + use crate::sql::ExecutionResult; #[test] fn forwarded_rows_include_schema_and_rows() { diff --git a/backend/crates/kalamdb-core/src/error.rs b/backend/crates/kalamdb-core/src/error.rs index 0962cb0aa..53e604f7f 100644 --- a/backend/crates/kalamdb-core/src/error.rs +++ b/backend/crates/kalamdb-core/src/error.rs @@ -135,7 +135,10 @@ pub enum KalamDbError { #[error("Parameter count exceeded: maximum {max} parameters allowed, got {actual}")] ParamCountExceeded { max: usize, actual: usize }, - #[error("Parameter size exceeded: parameter at index {index} is {actual_bytes} bytes (max {max_bytes} bytes)")] + #[error( + "Parameter size exceeded: parameter at index {index} is {actual_bytes} bytes (max \ + {max_bytes} bytes)" + )] ParamSizeExceeded { index: usize, max_bytes: usize, diff --git a/backend/crates/kalamdb-core/src/error_extensions.rs b/backend/crates/kalamdb-core/src/error_extensions.rs index 738b7828f..28ee90bd4 100644 --- a/backend/crates/kalamdb-core/src/error_extensions.rs +++ b/backend/crates/kalamdb-core/src/error_extensions.rs @@ -22,7 +22,6 @@ //! // Invalid operation errors //! validate_input() //! .into_invalid_operation("Input validation failed")?; -//! //! ``` use crate::error::KalamDbError; diff --git a/backend/crates/kalamdb-core/src/lib.rs b/backend/crates/kalamdb-core/src/lib.rs index 7ffd88730..0c2741579 100644 --- a/backend/crates/kalamdb-core/src/lib.rs +++ b/backend/crates/kalamdb-core/src/lib.rs @@ -3,7 +3,8 @@ //! This crate provides the core storage functionality for KalamDB, a high-performance //! distributed database with: //! -//! - **Namespace/Table Management**: Multi-tenant data isolation with user, shared, and stream tables +//! - **Namespace/Table Management**: Multi-tenant data isolation with user, shared, and stream +//! tables //! - **Arrow Integration**: Native Apache Arrow columnar storage for efficient analytics //! - **RocksDB Backend**: Fast write path with sub-millisecond latency //! - **Parquet Storage**: Compressed columnar format for flushed segments diff --git a/backend/crates/kalamdb-core/src/live_adapters.rs b/backend/crates/kalamdb-core/src/live_adapters.rs index 02a754c0a..ec3b6e3ea 100644 --- a/backend/crates/kalamdb-core/src/live_adapters.rs +++ b/backend/crates/kalamdb-core/src/live_adapters.rs @@ -3,19 +3,31 @@ //! These bridge the boundary between the live-query crate and the core server, //! so kalamdb-live never depends on kalamdb-core directly. -use crate::schema_registry::SchemaRegistry; -use crate::sql::context::{ExecutionContext, ExecutionResult}; -use crate::sql::executor::SqlExecutor; +use std::sync::Arc; + use arrow::datatypes::Schema as ArrowSchema; use async_trait::async_trait; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::prelude::SessionContext; -use kalamdb_commons::models::{ReadContext, TableId, UserId}; -use kalamdb_commons::schemas::TableDefinition; -use kalamdb_commons::Role; -use kalamdb_live::error::LiveError; -use kalamdb_live::traits::{LiveSchemaLookup, LiveSqlExecutor}; -use std::sync::Arc; +use datafusion::{arrow::record_batch::RecordBatch, prelude::SessionContext}; +use kalamdb_commons::{ + models::{ReadContext, TableId, UserId}, + schemas::TableDefinition, + Role, TableType, +}; +use kalamdb_live::{ + error::LiveError, + traits::{LiveApplyBarrier, LiveSchemaLookup, LiveSqlExecutor}, +}; +use kalamdb_raft::{GroupId, RaftExecutor}; +use kalamdb_sharding::ShardRouter; + +use crate::{ + app_context::AppContext, + schema_registry::SchemaRegistry, + sql::{ + context::{ExecutionContext, ExecutionResult}, + executor::SqlExecutor, + }, +}; /// Adapts [`SchemaRegistry`] to the [`LiveSchemaLookup`] trait. pub struct SchemaRegistryLookup { @@ -82,3 +94,56 @@ impl LiveSqlExecutor for SqlExecutorAdapter { } } } + +/// Adapts the cluster executor to a live snapshot apply barrier. +pub struct RaftApplyBarrierAdapter { + app_context: Arc, +} + +impl RaftApplyBarrierAdapter { + pub fn new(app_context: Arc) -> Self { + Self { app_context } + } + + fn table_group(&self, table_type: TableType, user_id: &UserId) -> Option { + let executor = self.app_context.executor(); + let raft_executor = executor.as_any().downcast_ref::()?; + let manager = raft_executor.manager(); + let router = ShardRouter::new(manager.config().user_shards, manager.config().shared_shards); + + match table_type { + TableType::User | TableType::Stream => { + Some(GroupId::DataUserShard(router.user_shard_id(user_id))) + }, + TableType::Shared => Some(GroupId::DataSharedShard(router.shared_shard_id())), + TableType::System => Some(GroupId::Meta), + } + } +} + +#[async_trait] +impl LiveApplyBarrier for RaftApplyBarrierAdapter { + async fn wait_for_table_apply_barrier( + &self, + _table_id: &TableId, + table_type: TableType, + user_id: &UserId, + ) -> Result<(), LiveError> { + let Some(group_id) = self.table_group(table_type, user_id) else { + return Ok(()); + }; + + let executor = self.app_context.executor(); + let Some(raft_executor) = executor.as_any().downcast_ref::() else { + return Ok(()); + }; + + let manager = raft_executor.manager(); + + manager + .wait_for_local_apply_barrier(group_id, manager.config().replication_timeout) + .await + .map(|_| ()) + .map_err(|err| LiveError::ExecutionError(err.to_string())) + } +} diff --git a/backend/crates/kalamdb-core/src/manifest/flush/base.rs b/backend/crates/kalamdb-core/src/manifest/flush/base.rs index 1b2223290..9b86cc166 100644 --- a/backend/crates/kalamdb-core/src/manifest/flush/base.rs +++ b/backend/crates/kalamdb-core/src/manifest/flush/base.rs @@ -18,9 +18,10 @@ //! Implementations (users.rs, shared.rs, streams.rs) //! ``` -use crate::error::KalamDbError; use serde::{Deserialize, Serialize}; +use crate::error::KalamDbError; + /// Metadata for user table flush operations #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] pub struct UserTableFlushMetadata { @@ -132,8 +133,12 @@ impl FlushDedupStats { table_ref ); log::debug!( - "📊 [FLUSH DEDUP] Version resolution complete: {} rows → {} unique (dedup: {:.1}%, deleted: {})", - self.rows_before_dedup, self.rows_after_dedup, self.dedup_ratio(), self.deleted_count + "📊 [FLUSH DEDUP] Version resolution complete: {} rows → {} unique (dedup: {:.1}%, \ + deleted: {})", + self.rows_before_dedup, + self.rows_after_dedup, + self.dedup_ratio(), + self.deleted_count ); log::debug!( "📊 [FLUSH DEDUP] Final: {} rows to flush ({} tombstones filtered)", @@ -183,19 +188,25 @@ pub trait TableFlush: Send + Sync { pub mod config { /// Number of rows to process per batch during scan pub const BATCH_SIZE: usize = 10000; + + /// Number of hot-storage keys to remove in one indexed-store delete batch. + pub const DELETE_BATCH_SIZE: usize = 4096; } /// Common helper functions for flush operations pub mod helpers { - use crate::error::KalamDbError; - use crate::error_extensions::KalamDbResultExt; - use crate::providers::arrow_json_conversion::json_rows_to_arrow_batch; - use datafusion::arrow::datatypes::SchemaRef; - use datafusion::arrow::record_batch::RecordBatch; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::constants::SystemColumnNames; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::next_storage_key_bytes; + use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + scalar::ScalarValue, + }; + use kalamdb_commons::{ + constants::SystemColumnNames, models::rows::Row, next_storage_key_bytes, + }; + + use crate::{ + error::KalamDbError, error_extensions::KalamDbResultExt, + providers::arrow_json_conversion::json_rows_to_arrow_batch, + }; /// Extract primary key field name from Arrow schema /// diff --git a/backend/crates/kalamdb-core/src/manifest/flush/mod.rs b/backend/crates/kalamdb-core/src/manifest/flush/mod.rs index 0c66703c0..e9c118b82 100644 --- a/backend/crates/kalamdb-core/src/manifest/flush/mod.rs +++ b/backend/crates/kalamdb-core/src/manifest/flush/mod.rs @@ -19,15 +19,15 @@ //! ``` pub mod base; +pub(crate) mod scope_writer; pub mod shared; pub mod users; // Re-export common types +// Re-export common configuration and helpers pub use base::{ - FlushDedupStats, FlushJobResult, FlushMetadata, SharedTableFlushMetadata, TableFlush, - UserTableFlushMetadata, + config, helpers, FlushDedupStats, FlushJobResult, FlushMetadata, SharedTableFlushMetadata, + TableFlush, UserTableFlushMetadata, }; -// Re-export common configuration and helpers -pub use base::{config, helpers}; pub use shared::SharedTableFlushJob; pub use users::UserTableFlushJob; diff --git a/backend/crates/kalamdb-core/src/manifest/flush/scope_writer.rs b/backend/crates/kalamdb-core/src/manifest/flush/scope_writer.rs new file mode 100644 index 000000000..5344e8283 --- /dev/null +++ b/backend/crates/kalamdb-core/src/manifest/flush/scope_writer.rs @@ -0,0 +1,184 @@ +use datafusion::arrow::{datatypes::SchemaRef, record_batch::RecordBatch}; +use kalamdb_commons::{models::rows::Row, schemas::TableType, TableId, UserId}; +use std::sync::Arc; + +use super::base::helpers; +use crate::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + manifest::FlushManifestHelper, + schema_registry::SchemaRegistry, + vector::{flush_shared_scope_vectors, flush_user_scope_vectors}, +}; + +pub(crate) struct FlushScopeWriteResult { + pub rows_count: usize, + pub destination_path: String, +} + +pub(crate) struct FlushScopeWriter<'a> { + app_context: &'a Arc, + table_id: &'a TableId, + table_type: TableType, + schema: &'a SchemaRef, + unified_cache: &'a SchemaRegistry, + manifest_helper: &'a FlushManifestHelper, + bloom_filter_columns: &'a [String], + indexed_columns: &'a [(u64, String)], +} + +impl<'a> FlushScopeWriter<'a> { + pub(crate) fn new( + app_context: &'a Arc, + table_id: &'a TableId, + table_type: TableType, + schema: &'a SchemaRef, + unified_cache: &'a SchemaRegistry, + manifest_helper: &'a FlushManifestHelper, + bloom_filter_columns: &'a [String], + indexed_columns: &'a [(u64, String)], + ) -> Self { + Self { + app_context, + table_id, + table_type, + schema, + unified_cache, + manifest_helper, + bloom_filter_columns, + indexed_columns, + } + } + + pub(crate) fn rows_to_record_batch( + &self, + rows: Vec<(Vec, Row)>, + ) -> Result { + helpers::rows_into_arrow_batch(self.schema, rows) + } + + pub(crate) fn write_scope( + &self, + user_id: Option<&UserId>, + rows: Vec<(Vec, Row)>, + ) -> Result { + if rows.is_empty() { + return Err(KalamDbError::InvalidOperation( + "flush scope writer requires at least one row".to_string(), + )); + } + + let rows_count = rows.len(); + let batch = self.rows_to_record_batch(rows)?; + let cached = self.unified_cache.get(self.table_id).ok_or_else(|| { + KalamDbError::TableNotFound(format!("Table not found: {}", self.table_id)) + })?; + let storage_cached = cached + .storage_cached(&self.app_context.storage_registry()) + .into_kalamdb_error("Failed to get storage cache")?; + + if let Some(user_id) = user_id { + let storage_path = storage_cached + .get_relative_path(self.table_type, self.table_id, Some(user_id)) + .full_path; + + if !storage_path.contains(user_id.as_str()) { + log::error!( + "🚨 RLS VIOLATION: Flush storage path does NOT contain user_id! user={}, \ + path={}", + user_id.as_str(), + storage_path + ); + return Err(KalamDbError::Other(format!( + "RLS violation: flush path missing user_id isolation for user {}", + user_id.as_str() + ))); + } + } + + let batch_number = self.manifest_helper.get_next_batch_number(self.table_id, user_id)?; + let batch_filename = FlushManifestHelper::generate_batch_filename(batch_number); + let temp_filename = FlushManifestHelper::generate_temp_filename(batch_number); + + let temp_path = storage_cached + .get_file_path(self.table_type, self.table_id, user_id, &temp_filename) + .full_path; + let destination_path = storage_cached + .get_file_path(self.table_type, self.table_id, user_id, &batch_filename) + .full_path; + + if let Err(err) = self.manifest_helper.mark_syncing(self.table_id, user_id) { + log::warn!( + "⚠️ Failed to mark manifest as syncing for {} (user_id={:?}): {} \ + (continuing)", + self.table_id, + user_id.map(UserId::as_str), + err + ); + } + + let (min_seq, max_seq) = FlushManifestHelper::extract_seq_range(&batch); + let column_stats = FlushManifestHelper::extract_column_stats(&batch, self.indexed_columns); + let row_count = batch.num_rows() as u64; + + log::debug!("📝 [ATOMIC] Writing Parquet to temp path: {}, rows={}", temp_path, rows_count); + let result = storage_cached + .write_parquet_sync( + self.table_type, + self.table_id, + user_id, + &temp_filename, + self.schema.clone(), + vec![batch], + Some(self.bloom_filter_columns.to_vec()), + ) + .into_kalamdb_error("Filestore error")?; + + log::debug!("📝 [ATOMIC] Renaming {} -> {}", temp_path, destination_path); + storage_cached + .rename_sync(self.table_type, self.table_id, user_id, &temp_filename, &batch_filename) + .into_kalamdb_error("Failed to rename Parquet file to final location")?; + + let schema_version = helpers::get_schema_version(self.unified_cache, self.table_id); + self.manifest_helper.update_manifest_after_flush_with_stats( + self.table_id, + user_id, + batch_filename, + min_seq, + max_seq, + column_stats, + row_count, + result.size_bytes, + schema_version, + )?; + + match (self.table_type, user_id) { + (TableType::User, Some(user_id)) => flush_user_scope_vectors( + self.app_context, + self.table_id, + user_id, + self.schema, + &storage_cached, + )?, + (TableType::Shared, None) => flush_shared_scope_vectors( + self.app_context, + self.table_id, + self.schema, + &storage_cached, + )?, + (table_type, user_id) => { + return Err(KalamDbError::InvalidOperation(format!( + "invalid flush scope: table_type={:?}, user_id={:?}", + table_type, + user_id.map(UserId::as_str) + ))); + }, + } + + Ok(FlushScopeWriteResult { + rows_count, + destination_path, + }) + } +} diff --git a/backend/crates/kalamdb-core/src/manifest/flush/shared.rs b/backend/crates/kalamdb-core/src/manifest/flush/shared.rs index 6acb646ef..cb88af299 100644 --- a/backend/crates/kalamdb-core/src/manifest/flush/shared.rs +++ b/backend/crates/kalamdb-core/src/manifest/flush/shared.rs @@ -3,24 +3,27 @@ //! Flushes shared table data from RocksDB to a single Parquet file. //! All rows are written to one file per flush operation. -use super::base::{FlushJobResult, FlushMetadata, TableFlush}; -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::manifest::{FlushManifestHelper, ManifestService}; -use crate::schema_registry::SchemaRegistry; -use crate::vector::flush_shared_scope_vectors; +use std::sync::Arc; + use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::SharedTableRowId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::StorageKey; +use kalamdb_commons::{ + constants::SystemColumnNames, + ids::SharedTableRowId, + models::{rows::Row, TableId}, + StorageKey, +}; use kalamdb_store::EntityStore; use kalamdb_tables::{SharedTableIndexedStore, SharedTableRow}; -use std::sync::Arc; + +use super::base::{config, helpers, FlushDedupStats, FlushJobResult, FlushMetadata, TableFlush}; +use super::scope_writer::FlushScopeWriter; +use crate::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + manifest::{FlushManifestHelper, ManifestService}, + schema_registry::SchemaRegistry, +}; /// Shared table flush job /// @@ -84,27 +87,21 @@ impl SharedTableFlushJob { } } - /// Get current schema version for the table - fn get_schema_version(&self) -> u32 { - super::base::helpers::get_schema_version(&self.unified_cache, &self.table_id) - } - - /// Generate batch filename using manifest max_batch (T115) - /// Returns (batch_number, filename) - fn generate_batch_filename(&self) -> Result<(u64, String), KalamDbError> { - let batch_number = self.manifest_helper.get_next_batch_number(&self.table_id, None)?; - let filename = FlushManifestHelper::generate_batch_filename(batch_number); - log::debug!( - "[MANIFEST] Generated batch filename: {} (batch_number={})", - filename, - batch_number - ); - Ok((batch_number, filename)) + fn scan_batch_size(&self) -> usize { + self.app_context.config().flush.flush_batch_size.max(1) } - /// Convert stored rows to Arrow RecordBatch without JSON round-trips - fn rows_to_record_batch(&self, rows: &[(Vec, Row)]) -> Result { - super::base::helpers::rows_to_arrow_batch(&self.schema, rows) + fn scope_writer(&self) -> FlushScopeWriter<'_> { + FlushScopeWriter::new( + &self.app_context, + &self.table_id, + kalamdb_commons::schemas::TableType::Shared, + &self.schema, + &self.unified_cache, + &self.manifest_helper, + &self.bloom_filter_columns, + &self.indexed_columns, + ) } /// Delete flushed rows from RocksDB after successful Parquet write @@ -113,21 +110,22 @@ impl SharedTableFlushJob { return Ok(()); } - let parsed_keys: Result, _> = keys - .iter() - .map(|key_bytes| { - kalamdb_commons::ids::SharedTableRowId::from_bytes(key_bytes) - .into_invalid_operation("Invalid key bytes") - }) - .collect(); - let parsed_keys = parsed_keys?; - - // Batch delete: single RocksDB batch write for all main + index entries. - self.store - .delete_batch(&parsed_keys) - .into_kalamdb_error("Failed to delete flushed rows")?; + for chunk in keys.chunks(config::DELETE_BATCH_SIZE) { + let parsed_keys: Result, _> = chunk + .iter() + .map(|key_bytes| { + kalamdb_commons::ids::SharedTableRowId::from_bytes(key_bytes) + .into_invalid_operation("Invalid key bytes") + }) + .collect(); + let parsed_keys = parsed_keys?; + + self.store + .delete_batch(&parsed_keys) + .into_kalamdb_error("Failed to delete flushed rows")?; + } - log::debug!("Deleted {} flushed rows from storage", parsed_keys.len()); + log::debug!("Deleted {} flushed rows from storage", keys.len()); Ok(()) } } @@ -136,7 +134,6 @@ impl TableFlush for SharedTableFlushJob { fn execute(&self) -> Result { log::debug!("🔄 Starting shared table flush: table={}", self.table_id); - use super::base::{config, helpers, FlushDedupStats}; use std::collections::HashMap; // Get primary key field name from schema @@ -151,10 +148,11 @@ impl TableFlush for SharedTableFlushJob { // Batched scan with cursor let mut cursor: Option = None; + let scan_batch_size = self.scan_batch_size(); loop { let batch = self .store - .scan_typed_with_prefix_and_start(None, cursor.as_ref(), config::BATCH_SIZE) + .scan_typed_with_prefix_and_start(None, cursor.as_ref(), scan_batch_size) .map_err(|e| { log::error!("❌ Failed to scan rows for shared table={}: {}", self.table_id, e); KalamDbError::Other(format!("Failed to scan rows: {}", e)) @@ -203,7 +201,7 @@ impl TableFlush for SharedTableFlushJob { } // Check if we got fewer rows than batch size (end of data) - if batch_len < config::BATCH_SIZE { + if batch_len < scan_batch_size { break; } } @@ -220,8 +218,7 @@ impl TableFlush for SharedTableFlushJob { continue; } - let row_data = - helpers::add_system_columns(row.fields.clone(), row._seq.as_i64(), false); + let row_data = helpers::add_system_columns(row.fields, row._seq.as_i64(), false); rows.push((key_bytes, row_data)); } @@ -248,98 +245,14 @@ impl TableFlush for SharedTableFlushJob { self.table_id ); - // Convert rows to RecordBatch - let batch = self.rows_to_record_batch(&rows)?; - - // T114-T115: Generate batch filename using manifest (sequential numbering) - let (batch_number, batch_filename) = self.generate_batch_filename()?; - let temp_filename = FlushManifestHelper::generate_temp_filename(batch_number); - let cached = self.unified_cache.get(&self.table_id).ok_or_else(|| { - KalamDbError::TableNotFound(format!("Table not found: {}", self.table_id)) - })?; - - let storage_cached = cached - .storage_cached(&self.app_context.storage_registry()) - .into_kalamdb_error("Failed to get storage cache")?; - - let temp_path = storage_cached - .get_file_path(TableType::Shared, &self.table_id, None, &temp_filename) - .full_path; - let destination_path = storage_cached - .get_file_path(TableType::Shared, &self.table_id, None, &batch_filename) - .full_path; - - // Use cached bloom_filter_columns and indexed_columns (fetched once at job construction) - // This avoids per-flush lookups and matches UserTableFlushJob optimization pattern - let bloom_filter_columns = &self.bloom_filter_columns; - let indexed_columns = &self.indexed_columns; - - log::debug!("🌸 Bloom filters enabled for columns: {:?}", bloom_filter_columns); - - // ===== ATOMIC FLUSH PATTERN ===== - // Step 1: Mark manifest as syncing (flush in progress) - // If crash occurs after this, we know a flush was in progress - if let Err(e) = self.manifest_helper.mark_syncing(&self.table_id, None) { - log::warn!("⚠️ Failed to mark manifest as syncing (continuing anyway): {}", e); - } - - // Extract manifest stats from the batch BEFORE writing Parquet, - // so we can move the batch into write_parquet_sync without cloning. - let (min_seq, max_seq) = FlushManifestHelper::extract_seq_range(&batch); - let column_stats = FlushManifestHelper::extract_column_stats(&batch, indexed_columns); - let row_count = batch.num_rows() as u64; - - // Step 2: Write Parquet to TEMP location first (consumes batch — no clone) - log::debug!("📝 [ATOMIC] Writing Parquet to temp path: {}, rows={}", temp_path, rows_count); - let result = storage_cached - .write_parquet_sync( - TableType::Shared, - &self.table_id, - None, - &temp_filename, - self.schema.clone(), - vec![batch], - Some(bloom_filter_columns.clone()), - ) - .into_kalamdb_error("Filestore error")?; - - // Step 3: Rename temp file to final location (atomic operation) - log::debug!("📝 [ATOMIC] Renaming {} -> {}", temp_path, destination_path); - storage_cached - .rename_sync(TableType::Shared, &self.table_id, None, &temp_filename, &batch_filename) - .into_kalamdb_error("Failed to rename Parquet file to final location")?; - + let write_result = self.scope_writer().write_scope(None, rows)?; log::info!( "✅ Flushed {} rows for shared table={} to {}", rows_count, self.table_id, - destination_path + write_result.destination_path ); - let size_bytes = result.size_bytes; - - // Update manifest using pre-extracted stats (batch was consumed above) - let schema_version = self.get_schema_version(); - self.manifest_helper.update_manifest_after_flush_with_stats( - &self.table_id, - None, - batch_filename.clone(), - min_seq, - max_seq, - column_stats, - row_count, - size_bytes, - schema_version, - )?; - - // Flush vector hot-staging artifacts for embedding columns in shared scope. - flush_shared_scope_vectors( - &self.app_context, - &self.table_id, - &self.schema, - &storage_cached, - )?; - // Delete ALL flushed rows from RocksDB (including old versions) log::info!( "📊 [FLUSH CLEANUP] Deleting {} rows from hot storage (including {} old versions)", @@ -352,11 +265,9 @@ impl TableFlush for SharedTableFlushJob { // to avoid blocking job completion. Removing the inline compact() call here // eliminates a redundant double-compaction. - let parquet_path = destination_path; - Ok(FlushJobResult { rows_flushed: rows_count, - parquet_files: vec![parquet_path], + parquet_files: vec![write_result.destination_path], metadata: FlushMetadata::shared_table(), }) } diff --git a/backend/crates/kalamdb-core/src/manifest/flush/users.rs b/backend/crates/kalamdb-core/src/manifest/flush/users.rs index 14774c75d..9505e71c7 100644 --- a/backend/crates/kalamdb-core/src/manifest/flush/users.rs +++ b/backend/crates/kalamdb-core/src/manifest/flush/users.rs @@ -3,25 +3,28 @@ //! Flushes user table data from RocksDB to Parquet files, grouping by UserId. //! Each user's data is written to a separate Parquet file for RLS isolation. -use super::base::{config, helpers, FlushDedupStats, FlushJobResult, FlushMetadata, TableFlush}; -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::manifest::{FlushManifestHelper, ManifestService}; -use crate::schema_registry::SchemaRegistry; -use crate::vector::flush_user_scope_vectors; +use std::{collections::HashMap, sync::Arc}; + use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::UserTableRowId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::StorageKey; +use kalamdb_commons::{ + constants::SystemColumnNames, + ids::UserTableRowId, + models::{rows::Row, TableId, UserId}, + schemas::TableType, + StorageKey, +}; use kalamdb_store::entity_store::EntityStore; -use kalamdb_tables::UserTableIndexedStore; -use std::collections::HashMap; -use std::sync::Arc; +use kalamdb_tables::{UserTableIndexedStore, UserTableRow}; + +use super::base::{config, helpers, FlushDedupStats, FlushJobResult, FlushMetadata, TableFlush}; +use super::scope_writer::FlushScopeWriter; +use crate::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + manifest::{FlushManifestHelper, ManifestService}, + schema_registry::SchemaRegistry, +}; /// User table flush job /// @@ -82,156 +85,69 @@ impl UserTableFlushJob { } } - /// Get current schema version for the table - fn get_schema_version(&self) -> u32 { - super::base::helpers::get_schema_version(&self.unified_cache, &self.table_id) + fn scan_batch_size(&self) -> usize { + self.app_context.config().flush.flush_batch_size.max(1) } - /// Convert JSON rows to Arrow RecordBatch - fn rows_to_record_batch(&self, rows: &[(Vec, Row)]) -> Result { - super::base::helpers::rows_to_arrow_batch(&self.schema, rows) + fn scope_writer(&self) -> FlushScopeWriter<'_> { + FlushScopeWriter::new( + &self.app_context, + &self.table_id, + TableType::User, + &self.schema, + &self.unified_cache, + &self.manifest_helper, + &self.bloom_filter_columns, + &self.indexed_columns, + ) } - /// Flush accumulated rows for a single user to Parquet - fn flush_user_data( + fn rows_from_versions( + latest_versions: HashMap, UserTableRow, i64)>, + stats: &mut FlushDedupStats, + ) -> Vec<(Vec, Row)> { + let mut rows = Vec::with_capacity(latest_versions.len()); + + for (_pk_value, (key_bytes, row, _seq)) in latest_versions { + if row._deleted { + stats.tombstones_filtered += 1; + continue; + } + + let row_data = helpers::add_system_columns(row.fields, row._seq.as_i64(), false); + rows.push((key_bytes, row_data)); + } + + rows + } + + fn flush_user_versions( &self, user_id: &UserId, - rows: &[(Vec, Row)], + latest_versions: HashMap, UserTableRow, i64)>, + keys_to_delete: Vec>, parquet_files: &mut Vec, - bloom_filter_columns: &[String], - indexed_columns: &[(u64, String)], + stats: &mut FlushDedupStats, ) -> Result { + stats.rows_after_dedup += latest_versions.len(); + + let rows = Self::rows_from_versions(latest_versions, stats); if rows.is_empty() { return Ok(0); } - let rows_count = rows.len(); log::debug!( "💾 Flushing {} rows for user {} (table={})", - rows_count, + rows.len(), user_id.as_str(), self.table_id ); - // Convert rows to RecordBatch - let batch = self.rows_to_record_batch(rows)?; - - // Resolve storage path for this user - let user_id_typed = user_id; - let cached = self.unified_cache.get(&self.table_id).ok_or_else(|| { - KalamDbError::TableNotFound(format!("Table not found: {}", self.table_id)) - })?; - let storage_cached = cached - .storage_cached(&self.app_context.storage_registry()) - .into_kalamdb_error("Failed to get storage cache")?; - let storage_path = storage_cached - .get_relative_path(TableType::User, &self.table_id, Some(&user_id_typed)) - .full_path; - - // RLS ASSERTION: Verify storage path contains user_id - if !storage_path.contains(user_id.as_str()) { - log::error!( - "🚨 RLS VIOLATION: Flush storage path does NOT contain user_id! user={}, path={}", - user_id.as_str(), - storage_path - ); - return Err(KalamDbError::Other(format!( - "RLS violation: flush path missing user_id isolation for user {}", - user_id.as_str() - ))); - } - - // Generate batch filename using manifest - let batch_number = self - .manifest_helper - .get_next_batch_number(&self.table_id, Some(&user_id_typed))?; - let batch_filename = FlushManifestHelper::generate_batch_filename(batch_number); - let temp_filename = FlushManifestHelper::generate_temp_filename(batch_number); - - let temp_path = storage_cached - .get_file_path(TableType::User, &self.table_id, Some(&user_id_typed), &temp_filename) - .full_path; - let destination_path = storage_cached - .get_file_path(TableType::User, &self.table_id, Some(&user_id_typed), &batch_filename) - .full_path; - - // ===== ATOMIC FLUSH PATTERN ===== - // Step 1: Mark manifest as syncing (flush in progress) - if let Err(e) = self.manifest_helper.mark_syncing(&self.table_id, Some(&user_id_typed)) { - log::warn!( - "⚠️ Failed to mark manifest as syncing for user {}: {} (continuing)", - user_id.as_str(), - e - ); - } + let write_result = self.scope_writer().write_scope(Some(user_id), rows)?; + parquet_files.push(write_result.destination_path); + self.delete_flushed_keys(&keys_to_delete)?; - // Extract manifest stats from the batch BEFORE writing Parquet, - // so we can move the batch into write_parquet_sync without cloning. - let (min_seq, max_seq) = FlushManifestHelper::extract_seq_range(&batch); - let column_stats = FlushManifestHelper::extract_column_stats(&batch, indexed_columns); - let row_count = batch.num_rows() as u64; - - // Step 2: Write Parquet to TEMP location first (consumes batch — no clone) - log::debug!("📝 [ATOMIC] Writing Parquet to temp path: {}, rows={}", temp_path, rows_count); - let result = storage_cached - .write_parquet_sync( - TableType::User, - &self.table_id, - Some(&user_id_typed), - &temp_filename, - self.schema.clone(), - vec![batch], - Some(bloom_filter_columns.to_vec()), - ) - .into_kalamdb_error("Filestore error")?; - - // Step 3: Rename temp file to final location (atomic operation) - log::debug!("📝 [ATOMIC] Renaming {} -> {}", temp_path, destination_path); - storage_cached - .rename_sync( - TableType::User, - &self.table_id, - Some(&user_id_typed), - &temp_filename, - &batch_filename, - ) - .into_kalamdb_error("Failed to rename Parquet file to final location")?; - - let size_bytes = result.size_bytes; - - // Update manifest using pre-extracted stats (batch was consumed above) - let schema_version = self.get_schema_version(); - self.manifest_helper.update_manifest_after_flush_with_stats( - &self.table_id, - Some(&user_id_typed), - batch_filename.clone(), - min_seq, - max_seq, - column_stats, - row_count, - size_bytes, - schema_version, - )?; - - // Flush vector hot-staging artifacts for embedding columns in this user scope. - flush_user_scope_vectors( - &self.app_context, - &self.table_id, - &user_id_typed, - &self.schema, - &storage_cached, - )?; - - log::debug!( - "✅ Flushed {} rows for user {} to {} (batch={})", - rows_count, - user_id, - destination_path, - batch_number - ); - - parquet_files.push(destination_path); - Ok(rows_count) + Ok(write_result.rows_count) } /// Delete flushed rows from RocksDB @@ -240,20 +156,20 @@ impl UserTableFlushJob { return Ok(()); } - let parsed_keys: Result, _> = keys - .iter() - .map(|key_bytes| { - kalamdb_commons::ids::UserTableRowId::from_storage_key(key_bytes) - .into_invalid_operation("Invalid key bytes") - }) - .collect(); - let parsed_keys = parsed_keys?; - - // Batch delete: single RocksDB batch write for all main + index entries. - // Much faster than per-key delete() which does get+batch per key. - self.store - .delete_batch(&parsed_keys) - .into_kalamdb_error("Failed to delete flushed rows")?; + for chunk in keys.chunks(config::DELETE_BATCH_SIZE) { + let parsed_keys: Result, _> = chunk + .iter() + .map(|key_bytes| { + kalamdb_commons::ids::UserTableRowId::from_storage_key(key_bytes) + .into_invalid_operation("Invalid key bytes") + }) + .collect(); + let parsed_keys = parsed_keys?; + + self.store + .delete_batch(&parsed_keys) + .into_kalamdb_error("Failed to delete flushed rows")?; + } log::debug!("Deleted {} flushed rows from storage", keys.len()); Ok(()) @@ -272,22 +188,24 @@ impl TableFlush for UserTableFlushJob { let pk_field = helpers::extract_pk_field_name(&self.schema); log::debug!("📊 [FLUSH DEDUP] Using primary key field: {}", pk_field); - // Map: (user_id, pk_value) -> (key_bytes, row, _seq) - let mut latest_versions: HashMap< - (UserId, String), - (Vec, kalamdb_tables::UserTableRow, i64), - > = HashMap::new(); - // Track ALL keys to delete (including old versions) - // Pre-allocate with reasonable capacity to reduce reallocations during scan - let mut all_keys_to_delete: Vec> = Vec::with_capacity(1024); + // UserTableRowId is ordered by (user_id, seq), so we only keep one user scope + // in memory at a time while still resolving all versions for that user. + let mut current_user: Option = None; + let mut latest_versions: HashMap, UserTableRow, i64)> = HashMap::new(); + let mut keys_to_delete: Vec> = Vec::with_capacity(1024); let mut stats = FlushDedupStats::default(); + let mut parquet_files: Vec = Vec::new(); + let mut total_rows_flushed = 0; + let mut users_count = 0; + let mut error_messages: Vec = Vec::new(); + let scan_batch_size = self.scan_batch_size(); // Batched scan with cursor let mut cursor: Option = None; loop { let batch = self .store - .scan_typed_with_prefix_and_start(None, cursor.as_ref(), config::BATCH_SIZE) + .scan_typed_with_prefix_and_start(None, cursor.as_ref(), scan_batch_size) .map_err(|e| { log::error!("❌ Failed to scan table={}: {}", self.table_id, e); KalamDbError::Other(format!("Failed to scan table: {}", e)) @@ -310,74 +228,108 @@ impl TableFlush for UserTableFlushJob { stats.rows_before_dedup += batch_len; for (row_id, row) in batch { - // Track ALL keys for deletion (before dedup) - all_keys_to_delete.push(row_id.storage_key()); - - // Parse user_id from key let user_id = row_id.user_id().clone(); + if current_user.as_ref().is_some_and(|current| current != &user_id) { + let finished_user = current_user.take().expect("current user exists"); + match self.flush_user_versions( + &finished_user, + std::mem::take(&mut latest_versions), + std::mem::take(&mut keys_to_delete), + &mut parquet_files, + &mut stats, + ) { + Ok(rows_count) => { + total_rows_flushed += rows_count; + if rows_count > 0 { + users_count += 1; + } + }, + Err(e) => { + let error_msg = + format!("Failed to flush user {}: {}", finished_user.as_str(), e); + log::error!("{}. Rows remain in hot storage.", error_msg); + error_messages.push(error_msg); + }, + } + } + + if current_user.is_none() { + current_user = Some(user_id.clone()); + } + + keys_to_delete.push(row_id.storage_key()); + // Extract PK value from fields let seq_val = row._seq.as_i64(); let pk_value = helpers::extract_pk_value(&row.fields, &pk_field, seq_val); - let group_key = (user_id.clone(), pk_value.clone()); - // Track deleted rows if row._deleted { stats.deleted_count += 1; } - // Keep MAX(_seq) per (user_id, pk_value) - match latest_versions.get(&group_key) { + // Keep MAX(_seq) per PK within the current user scope. + match latest_versions.get(&pk_value) { Some((_existing_key, _existing_row, existing_seq)) => { if seq_val > *existing_seq { - log::trace!("[FLUSH DEDUP] Replacing user={}, pk={}: old_seq={}, new_seq={}, deleted={}", - user_id.as_str(), pk_value, existing_seq, seq_val, row._deleted); - latest_versions.insert(group_key, (row_id.storage_key(), row, seq_val)); + log::trace!( + "[FLUSH DEDUP] Replacing user={}, pk={}: old_seq={}, new_seq={}, \ + deleted={}", + user_id.as_str(), + pk_value, + existing_seq, + seq_val, + row._deleted + ); + latest_versions.insert(pk_value, (row_id.storage_key(), row, seq_val)); } }, None => { - latest_versions.insert(group_key, (row_id.storage_key(), row, seq_val)); + latest_versions.insert(pk_value, (row_id.storage_key(), row, seq_val)); }, } } // Check if we got fewer rows than batch size (end of data) - if batch_len < config::BATCH_SIZE { + if batch_len < scan_batch_size { break; } } - stats.rows_after_dedup = latest_versions.len(); - - // STEP 2: Filter out deleted rows (tombstones) - let mut rows_by_user: HashMap, Row)>> = HashMap::new(); - - for ((user_id, _pk_value), (key_bytes, row, _seq)) in latest_versions { - // Skip soft-deleted rows (tombstones) - if row._deleted { - stats.tombstones_filtered += 1; - continue; + if let Some(finished_user) = current_user.take() { + match self.flush_user_versions( + &finished_user, + latest_versions, + keys_to_delete, + &mut parquet_files, + &mut stats, + ) { + Ok(rows_count) => { + total_rows_flushed += rows_count; + if rows_count > 0 { + users_count += 1; + } + }, + Err(e) => { + let error_msg = + format!("Failed to flush user {}: {}", finished_user.as_str(), e); + log::error!("{}. Rows remain in hot storage.", error_msg); + error_messages.push(error_msg); + }, } - - // Convert to JSON and inject system columns - let row_data = - helpers::add_system_columns(row.fields.clone(), row._seq.as_i64(), false); - - rows_by_user.entry(user_id).or_default().push((key_bytes, row_data)); } // Log dedup statistics stats.log_summary(&self.table_id.to_string()); - let rows_to_flush = rows_by_user.values().map(|v| v.len()).sum::(); log::debug!( "📊 [FLUSH USER] Partitioned into {} users, {} rows to flush", - rows_by_user.len(), - rows_to_flush + users_count, + total_rows_flushed ); // If no rows to flush, return early - if rows_by_user.is_empty() { + if total_rows_flushed == 0 && error_messages.is_empty() { log::debug!( "⚠️ No rows to flush for user table={} (empty table or all deleted)", self.table_id @@ -389,72 +341,32 @@ impl TableFlush for UserTableFlushJob { }); } - // Flush each user's data to separate Parquet file - let mut parquet_files: Vec = Vec::new(); - let mut total_rows_flushed = 0; - let mut error_messages: Vec = Vec::new(); - let mut flush_succeeded = true; - - for (user_id, rows) in &rows_by_user { - match self.flush_user_data( - user_id, - rows, - &mut parquet_files, - &self.bloom_filter_columns, - &self.indexed_columns, - ) { - Ok(rows_count) => { - total_rows_flushed += rows_count; - }, - Err(e) => { - let error_msg = format!( - "Failed to flush {} rows for user {}: {}", - rows.len(), - user_id.as_str(), - e - ); - log::error!("{}. Rows kept in buffer.", error_msg); - error_messages.push(error_msg); - flush_succeeded = false; - }, - } - } - - // Only delete ALL rows (including old versions) if ALL users flushed successfully - if flush_succeeded { - log::debug!( - "📊 [FLUSH CLEANUP] Deleting {} rows from hot storage (including {} old versions)", - all_keys_to_delete.len(), - all_keys_to_delete.len() - rows_by_user.values().map(|v| v.len()).sum::() - ); - if let Err(e) = self.delete_flushed_keys(&all_keys_to_delete) { - log::error!("Failed to delete flushed rows: {}", e); - error_messages.push(format!("Failed to delete flushed rows: {}", e)); - } - - // Manifest cache is already updated during flush; keep entries to - // ensure system.manifest reflects the latest segments. - } - // If any user flush failed, treat entire job as failed if !error_messages.is_empty() { let summary = format!( - "One or more user partitions failed to flush ({} errors). Rows flushed before failure: {}. First error: {}", - error_messages.len(), total_rows_flushed, + "One or more user partitions failed to flush ({} errors). Rows flushed before \ + failure: {}. First error: {}", + error_messages.len(), + total_rows_flushed, error_messages.first().cloned().unwrap_or_else(|| "unknown error".to_string()) ); log::error!("❌ User table flush failed: table={} — {}", self.table_id, summary); return Err(KalamDbError::Other(summary)); } - log::debug!("✅ User table flush completed: table={}, rows_flushed={}, users_count={}, parquet_files={}", - self.table_id, - total_rows_flushed, rows_by_user.len(), parquet_files.len()); + log::debug!( + "✅ User table flush completed: table={}, rows_flushed={}, users_count={}, \ + parquet_files={}", + self.table_id, + total_rows_flushed, + users_count, + parquet_files.len() + ); Ok(FlushJobResult { rows_flushed: total_rows_flushed, parquet_files, - metadata: FlushMetadata::user_table(rows_by_user.len(), error_messages), + metadata: FlushMetadata::user_table(users_count, error_messages), }) } diff --git a/backend/crates/kalamdb-core/src/manifest/flush_helper.rs b/backend/crates/kalamdb-core/src/manifest/flush_helper.rs index a8d578e57..6969b993b 100644 --- a/backend/crates/kalamdb-core/src/manifest/flush_helper.rs +++ b/backend/crates/kalamdb-core/src/manifest/flush_helper.rs @@ -3,19 +3,16 @@ //! Centralizes manifest-related logic used during flush operations to eliminate //! code duplication between user and shared table flush implementations. +use std::{collections::HashMap, path::Path, sync::Arc}; + +use datafusion::arrow::{array::*, compute, record_batch::RecordBatch}; +use kalamdb_commons::{ + arrow_utils::compute_min_max, constants::SystemColumnNames, ids::SeqId, TableId, UserId, +}; +use kalamdb_system::{ColumnStats, Manifest, SegmentMetadata}; + use super::ManifestService; use crate::error::KalamDbError; -use datafusion::arrow::array::*; -use datafusion::arrow::compute; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::arrow_utils::compute_min_max; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::{TableId, UserId}; -use kalamdb_system::{ColumnStats, Manifest, SegmentMetadata}; -use std::collections::HashMap; -use std::path::Path; -use std::sync::Arc; /// Helper for manifest operations during flush pub struct FlushManifestHelper { @@ -232,7 +229,8 @@ impl FlushManifestHelper { })?; log::debug!( - "[MANIFEST] ✅ Updated manifest and cache: {} (user_id={:?}, file={}, rows={}, size={} bytes)", + "[MANIFEST] ✅ Updated manifest and cache: {} (user_id={:?}, file={}, rows={}, \ + size={} bytes)", table_id, user_id.map(|u| u.as_str()), file_path.display(), @@ -318,11 +316,15 @@ impl FlushManifestHelper { #[cfg(test)] mod tests { - use super::*; - use datafusion::arrow::array::{Int64Array, StringArray}; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; use std::sync::Arc as StdArc; + use datafusion::arrow::{ + array::{Int64Array, StringArray}, + datatypes::{DataType, Field, Schema}, + }; + + use super::*; + #[test] fn test_generate_batch_filename() { assert_eq!(FlushManifestHelper::generate_batch_filename(0), "batch-0.parquet"); diff --git a/backend/crates/kalamdb-core/src/manifest/mod.rs b/backend/crates/kalamdb-core/src/manifest/mod.rs index b8cfe59b1..f37451663 100644 --- a/backend/crates/kalamdb-core/src/manifest/mod.rs +++ b/backend/crates/kalamdb-core/src/manifest/mod.rs @@ -11,8 +11,7 @@ pub mod flush; mod flush_helper; -pub use kalamdb_tables::manifest::manifest_helpers; -pub use kalamdb_tables::manifest::planner; +pub use kalamdb_tables::manifest::{manifest_helpers, planner}; mod service; pub use flush::{ @@ -20,6 +19,7 @@ pub use flush::{ TableFlush, UserTableFlushJob, UserTableFlushMetadata, }; pub use flush_helper::FlushManifestHelper; -pub use kalamdb_tables::manifest::{ensure_manifest_ready, load_row_from_parquet_by_seq}; -pub use kalamdb_tables::manifest::{ManifestAccessPlanner, RowGroupSelection}; +pub use kalamdb_tables::manifest::{ + ensure_manifest_ready, load_row_from_parquet_by_seq, ManifestAccessPlanner, RowGroupSelection, +}; pub use service::ManifestService; diff --git a/backend/crates/kalamdb-core/src/manifest/service.rs b/backend/crates/kalamdb-core/src/manifest/service.rs index 2b6fcc8d6..4ab56b54f 100644 --- a/backend/crates/kalamdb-core/src/manifest/service.rs +++ b/backend/crates/kalamdb-core/src/manifest/service.rs @@ -7,24 +7,23 @@ //! //! Key type: (TableId, Option) for type-safe cache access. +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; + use bytes::Bytes; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::{ManifestId, TableId, UserId}; +use kalamdb_commons::{ids::SeqId, ManifestId, TableId, UserId}; use kalamdb_configs::ManifestCacheSettings; use kalamdb_filestore::StorageRegistry; use kalamdb_store::{StorageBackend, StorageError}; -use kalamdb_system::providers::ManifestTableProvider; -use kalamdb_system::{ - FileSubfolderState, Manifest, ManifestCacheEntry, SegmentMetadata, SyncState, -}; use kalamdb_system::{ + providers::ManifestTableProvider, FileSubfolderState, Manifest, ManifestCacheEntry, ManifestService as ManifestServiceTrait, SchemaRegistry as SchemaRegistryTrait, + SegmentMetadata, SyncState, }; use kalamdb_tables::TableError; use log::{debug, info, warn}; -use std::collections::HashMap; -use std::collections::HashSet; -use std::sync::Arc; const MAX_MANIFEST_SCAN_LIMIT: usize = 100000; @@ -983,10 +982,10 @@ impl ManifestServiceTrait for ManifestService { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::{NamespaceId, TableName}; - use kalamdb_store::test_utils::InMemoryBackend; - use kalamdb_store::StorageBackend; + use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend}; + + use super::*; fn create_test_service() -> ManifestService { let backend: Arc = Arc::new(InMemoryBackend::new()); diff --git a/backend/crates/kalamdb-core/src/operations/scan.rs b/backend/crates/kalamdb-core/src/operations/scan.rs index f045759af..cdca1707e 100644 --- a/backend/crates/kalamdb-core/src/operations/scan.rs +++ b/backend/crates/kalamdb-core/src/operations/scan.rs @@ -1,13 +1,14 @@ use std::sync::Arc; -use arrow::datatypes::DataType; -use arrow::record_batch::RecordBatch; -use datafusion::physical_plan::{collect, ExecutionPlan}; -use datafusion::prelude::{col, lit, SessionContext}; +use arrow::{datatypes::DataType, record_batch::RecordBatch}; +use datafusion::{ + physical_plan::{collect, ExecutionPlan}, + prelude::{col, lit, SessionContext}, +}; +use kalamdb_commons::TableId; use super::error::OperationError; use crate::schema_registry::SchemaRegistry; -use kalamdb_commons::TableId; /// Convert a string filter value to a typed DataFusion `Expr` literal /// based on the Arrow column type. Falls back to string literal for unknown types. diff --git a/backend/crates/kalamdb-core/src/operations/service.rs b/backend/crates/kalamdb-core/src/operations/service.rs index b4f5afc0e..165ec5149 100644 --- a/backend/crates/kalamdb-core/src/operations/service.rs +++ b/backend/crates/kalamdb-core/src/operations/service.rs @@ -1,28 +1,31 @@ -use std::sync::Arc; +use std::{collections::BTreeMap, sync::Arc}; use async_trait::async_trait; use datafusion::prelude::SessionContext; -use kalamdb_commons::models::pg_operations::{ - DeleteRequest, InsertRequest, MutationResult, ScanRequest, ScanResult, UpdateRequest, +use kalamdb_commons::{ + models::{ + pg_operations::{ + DeleteRequest, InsertRequest, MutationResult, ScanRequest, ScanResult, UpdateRequest, + }, + rows::Row, + OperationKind, ReadContext, Role, TransactionId, TransactionOrigin, UserId, + }, + NamespaceId, TableType, }; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{ - OperationKind, ReadContext, Role, TransactionId, TransactionOrigin, UserId, -}; -use kalamdb_commons::{NamespaceId, TableType}; use kalamdb_pg::{LivePgTransaction, OperationExecutor}; use kalamdb_session_datafusion::SessionUserContext; use kalamdb_transactions::{ build_insert_staged_mutations, TransactionQueryContext, TransactionQueryExtension, }; -use std::collections::BTreeMap; use tonic::Status; use super::scan; -use crate::app_context::AppContext; -use crate::sql::ExecutionContext; -use crate::transactions::{ - CoordinatorAccessValidator, CoordinatorOverlayView, ExecutionOwnerKey, StagedMutation, +use crate::{ + app_context::AppContext, + sql::ExecutionContext, + transactions::{ + CoordinatorAccessValidator, CoordinatorOverlayView, ExecutionOwnerKey, StagedMutation, + }, }; /// Domain-typed operation executor for Tier-2 (typed) callers. @@ -225,7 +228,10 @@ impl OperationService { #[async_trait] impl OperationExecutor for OperationService { - async fn active_transaction(&self, session_id: &str) -> Result, Status> { + async fn active_transaction( + &self, + session_id: &str, + ) -> Result, Status> { let Some((transaction_id, handle)) = self.active_transaction_handle_for_session(Some(session_id))? else { @@ -467,22 +473,30 @@ fn require_user_id(user_id: Option, operation: &str) -> Result Row { Row { diff --git a/backend/crates/kalamdb-core/src/operations/table_cleanup.rs b/backend/crates/kalamdb-core/src/operations/table_cleanup.rs index 943a63f25..e4b459c7d 100644 --- a/backend/crates/kalamdb-core/src/operations/table_cleanup.rs +++ b/backend/crates/kalamdb-core/src/operations/table_cleanup.rs @@ -3,15 +3,19 @@ //! These functions are shared by the `DropTableHandler` (in `kalamdb-handlers`) //! and the `CleanupExecutor` (in `kalamdb-jobs`). -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::schema_registry::SchemaRegistry; -use kalamdb_commons::models::{StorageId, TableId}; -use kalamdb_commons::schemas::TableType; -use serde::{Deserialize, Serialize}; use std::sync::Arc; +use kalamdb_commons::{ + models::{StorageId, TableId}, + schemas::TableType, +}; +use serde::{Deserialize, Serialize}; + +use crate::{ + app_context::AppContext, error::KalamDbError, error_extensions::KalamDbResultExt, + schema_registry::SchemaRegistry, +}; + /// Cleanup operation types #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/backend/crates/kalamdb-core/src/providers/arrow_json_conversion.rs b/backend/crates/kalamdb-core/src/providers/arrow_json_conversion.rs index 459ffaba6..1226f95ff 100644 --- a/backend/crates/kalamdb-core/src/providers/arrow_json_conversion.rs +++ b/backend/crates/kalamdb-core/src/providers/arrow_json_conversion.rs @@ -1,11 +1,14 @@ -use crate::error::KalamDbError; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::conversions::arrow_json_conversion as commons; -use kalamdb_commons::errors::CommonError; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::KalamCellValue; use std::collections::HashMap; +use datafusion::arrow::record_batch::RecordBatch; +use kalamdb_commons::{ + conversions::arrow_json_conversion as commons, + errors::CommonError, + models::{rows::Row, KalamCellValue}, +}; + +use crate::error::KalamDbError; + fn map_error(err: CommonError) -> KalamDbError { KalamDbError::InvalidOperation(err.to_string()) } diff --git a/backend/crates/kalamdb-core/src/providers/mod.rs b/backend/crates/kalamdb-core/src/providers/mod.rs index 6f17f8aa7..fb72d2b81 100644 --- a/backend/crates/kalamdb-core/src/providers/mod.rs +++ b/backend/crates/kalamdb-core/src/providers/mod.rs @@ -3,7 +3,6 @@ pub mod arrow_json_conversion; pub use kalamdb_tables::utils::*; - // Preserve module paths for internal imports pub use kalamdb_tables::utils::{ base, core, parquet, pk, row_utils, shared, streams, unified_dml, users, version_resolution, diff --git a/backend/crates/kalamdb-core/src/schema_registry/cached_table_data.rs b/backend/crates/kalamdb-core/src/schema_registry/cached_table_data.rs index 999653f4f..755dcf949 100644 --- a/backend/crates/kalamdb-core/src/schema_registry/cached_table_data.rs +++ b/backend/crates/kalamdb-core/src/schema_registry/cached_table_data.rs @@ -1,16 +1,19 @@ -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + use datafusion::datasource::TableProvider; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::models::schemas::TableDefinition; -use kalamdb_commons::models::{StorageId, TableId}; -use kalamdb_commons::schemas::{TableOptions, TableType}; -use kalamdb_commons::TableAccess; +use kalamdb_commons::{ + constants::SystemColumnNames, + models::{schemas::TableDefinition, StorageId, TableId}, + schemas::{TableOptions, TableType}, + TableAccess, +}; use kalamdb_filestore::StorageCached; use parking_lot::RwLock; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; + +use crate::{app_context::AppContext, error::KalamDbError, error_extensions::KalamDbResultExt}; /// Lightweight table info for file operations #[derive(Debug, Clone)] @@ -208,7 +211,8 @@ impl CachedTableData { /// with built-in path template resolution. ObjectStore instances are /// cached per-storage in StorageRegistry, not per-table. /// - /// **Performance**: First call builds store (~50-200μs for cloud), subsequent calls return cached Arc (~1μs) + /// **Performance**: First call builds store (~50-200μs for cloud), subsequent calls return + /// cached Arc (~1μs) /// /// # Returns /// Arc-wrapped StorageCached for zero-copy sharing across operations diff --git a/backend/crates/kalamdb-core/src/schema_registry/mod.rs b/backend/crates/kalamdb-core/src/schema_registry/mod.rs index bb829b973..d40953d63 100644 --- a/backend/crates/kalamdb-core/src/schema_registry/mod.rs +++ b/backend/crates/kalamdb-core/src/schema_registry/mod.rs @@ -1,21 +1,18 @@ //! Schema Registry module for KalamDB Core //! //! Provides unified caching and metadata management for table schemas. -//! All schema-related functionality has been consolidated here from the former kalamdb-registry crate. +//! All schema-related functionality has been consolidated here from the former kalamdb-registry +//! crate. pub mod cached_table_data; pub mod projection; pub mod registry; pub use cached_table_data::{CachedTableData, TableEntry}; -pub use kalamdb_commons::helpers::string_interner::SystemColumns; -pub use kalamdb_views::error::RegistryError; - -pub use kalamdb_system::SystemColumnsService; -pub use projection::{project_batch, schemas_compatible}; -pub use registry::SchemaRegistry; -pub use registry::TablesSchemaRegistryAdapter; - // Re-export common types from kalamdb_commons for convenience pub use kalamdb_commons::models::{NamespaceId, TableName, UserId}; -pub use kalamdb_commons::schemas::TableType; +pub use kalamdb_commons::{helpers::string_interner::SystemColumns, schemas::TableType}; +pub use kalamdb_system::SystemColumnsService; +pub use kalamdb_views::error::RegistryError; +pub use projection::{project_batch, schemas_compatible}; +pub use registry::{SchemaRegistry, TablesSchemaRegistryAdapter}; diff --git a/backend/crates/kalamdb-core/src/schema_registry/projection.rs b/backend/crates/kalamdb-core/src/schema_registry/projection.rs index d7c579646..a9d8da95c 100644 --- a/backend/crates/kalamdb-core/src/schema_registry/projection.rs +++ b/backend/crates/kalamdb-core/src/schema_registry/projection.rs @@ -9,13 +9,17 @@ //! This ensures that all RecordBatches returned from Parquet files match the current //! table schema, regardless of when the file was written. -use super::RegistryError; -use arrow::array::{ArrayRef, NullArray}; -use arrow::compute::cast; -use arrow::datatypes::{DataType, Schema, SchemaRef}; -use arrow::record_batch::RecordBatch; use std::sync::Arc; +use arrow::{ + array::{ArrayRef, NullArray}, + compute::cast, + datatypes::{DataType, Schema, SchemaRef}, + record_batch::RecordBatch, +}; + +use super::RegistryError; + /// Project a RecordBatch from an old schema to a new schema /// /// # Arguments @@ -189,9 +193,12 @@ fn types_compatible(old_type: &DataType, new_type: &DataType) -> bool { #[cfg(test)] mod tests { + use arrow::{ + array::{Int32Array, Int64Array, StringArray}, + datatypes::Field, + }; + use super::*; - use arrow::array::{Int32Array, Int64Array, StringArray}; - use arrow::datatypes::Field; #[test] fn test_project_batch_same_schema() { diff --git a/backend/crates/kalamdb-core/src/schema_registry/registry/core.rs b/backend/crates/kalamdb-core/src/schema_registry/registry/core.rs index 2f24113c0..aa2f3b3b0 100644 --- a/backend/crates/kalamdb-core/src/schema_registry/registry/core.rs +++ b/backend/crates/kalamdb-core/src/schema_registry/registry/core.rs @@ -1,28 +1,36 @@ //! Core implementation of SchemaRegistry -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::schema_registry::cached_table_data::CachedTableData; +// use kalamdb_system::NotificationService as NotificationServiceTrait; +use std::{ + collections::{HashMap, HashSet}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, OnceLock, + }, +}; + use chrono::Utc; use dashmap::DashMap; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::datasource::TableProvider; -use datafusion::logical_expr::expr::ScalarFunction as ScalarFunctionExpr; -use datafusion::logical_expr::Expr; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::conversions::json_value_to_scalar; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::schemas::TableDefinition; -use kalamdb_commons::models::{StorageId, TableId, TableVersionId}; -use kalamdb_commons::schemas::{ColumnDefault, ColumnDefinition, TableType}; -use kalamdb_commons::SystemTable; +use datafusion::{ + arrow::datatypes::SchemaRef, + datasource::TableProvider, + logical_expr::{expr::ScalarFunction as ScalarFunctionExpr, Expr}, +}; +use kalamdb_commons::{ + constants::SystemColumnNames, + conversions::json_value_to_scalar, + datatypes::KalamDataType, + models::{schemas::TableDefinition, StorageId, TableId, TableVersionId}, + schemas::{ColumnDefault, ColumnDefinition, TableType}, + SystemTable, +}; use kalamdb_live::models::ChangeNotification; use kalamdb_system::{NotificationService, SchemaRegistry as SchemaRegistryTrait}; -// use kalamdb_system::NotificationService as NotificationServiceTrait; -use std::collections::{HashMap, HashSet}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::{Arc, OnceLock}; + +use crate::{ + app_context::AppContext, error::KalamDbError, error_extensions::KalamDbResultExt, + schema_registry::cached_table_data::CachedTableData, +}; #[derive(Debug, Default)] struct SystemSchemaReconcileStats { @@ -211,7 +219,8 @@ impl SchemaRegistry { if stats.created > 0 || stats.upgraded > 0 { log::info!( - "[SchemaRegistry] System schema reconciliation: created={}, upgraded={}, unchanged={}", + "[SchemaRegistry] System schema reconciliation: created={}, upgraded={}, \ + unchanged={}", stats.created, stats.upgraded, stats.unchanged @@ -555,10 +564,6 @@ impl SchemaRegistry { &self, table_def: &TableDefinition, ) -> Result, KalamDbError> { - use crate::providers::{ - SharedTableProvider, StreamTableProvider, TableProviderCore, UserTableProvider, - }; - use crate::schema_registry::TablesSchemaRegistryAdapter; use kalamdb_commons::schemas::TableOptions; use kalamdb_sharding::ShardRouter; use kalamdb_tables::{ @@ -566,6 +571,13 @@ impl SchemaRegistry { StreamTableStoreConfig, }; + use crate::{ + providers::{ + SharedTableProvider, StreamTableProvider, TableProviderCore, UserTableProvider, + }, + schema_registry::TablesSchemaRegistryAdapter, + }; + let app_ctx = self.app_context(); let table_id = TableId::from_strings(table_def.namespace_id.as_str(), table_def.table_name.as_str()); @@ -911,7 +923,8 @@ impl SchemaRegistry { // Check if table already exists - if so, deregister it first if schema.table_exist(table_name) { log::debug!( - "[SchemaRegistry] Table {} already registered in DataFusion; deregistering before re-registration", + "[SchemaRegistry] Table {} already registered in DataFusion; deregistering \ + before re-registration", table_id ); match schema.deregister_table(table_name) { @@ -922,9 +935,11 @@ impl SchemaRegistry { ); }, Ok(None) => { - // Table existed but deregister returned None - shouldn't happen but handle it + // Table existed but deregister returned None - shouldn't happen but handle + // it log::warn!( - "[SchemaRegistry] table_exist returned true but deregister_table returned None for {}", + "[SchemaRegistry] table_exist returned true but deregister_table \ + returned None for {}", table_id ); }, @@ -1233,12 +1248,15 @@ impl Default for SchemaRegistry { #[cfg(test)] mod tests { - use super::SchemaRegistry; use chrono::{Duration, Utc}; - use kalamdb_commons::datatypes::KalamDataType; - use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition}; - use kalamdb_commons::schemas::{ColumnDefault, TableOptions, TableType}; - use kalamdb_commons::{NamespaceId, TableName}; + use kalamdb_commons::{ + datatypes::KalamDataType, + models::schemas::{ColumnDefinition, TableDefinition}, + schemas::{ColumnDefault, TableOptions, TableType}, + NamespaceId, TableName, + }; + + use super::SchemaRegistry; fn base_table_definition() -> TableDefinition { TableDefinition::new( diff --git a/backend/crates/kalamdb-core/src/schema_registry/registry/mod.rs b/backend/crates/kalamdb-core/src/schema_registry/registry/mod.rs index a78474b4e..05f1b00eb 100644 --- a/backend/crates/kalamdb-core/src/schema_registry/registry/mod.rs +++ b/backend/crates/kalamdb-core/src/schema_registry/registry/mod.rs @@ -4,4 +4,5 @@ pub mod core; pub mod tables_adapter; pub use core::SchemaRegistry; + pub use tables_adapter::TablesSchemaRegistryAdapter; diff --git a/backend/crates/kalamdb-core/src/schema_registry/registry/tables_adapter.rs b/backend/crates/kalamdb-core/src/schema_registry/registry/tables_adapter.rs index 22afb7366..448a9c145 100644 --- a/backend/crates/kalamdb-core/src/schema_registry/registry/tables_adapter.rs +++ b/backend/crates/kalamdb-core/src/schema_registry/registry/tables_adapter.rs @@ -1,11 +1,13 @@ -use crate::error::KalamDbError; -use crate::schema_registry::SchemaRegistry; +use std::sync::Arc; + use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::models::schemas::TableDefinition as CommonsTableDefinition; -use kalamdb_commons::{StorageId, TableId}; +use kalamdb_commons::{ + models::schemas::TableDefinition as CommonsTableDefinition, StorageId, TableId, +}; use kalamdb_system::SchemaRegistry as SchemaRegistryTrait; use kalamdb_tables::TableError; -use std::sync::Arc; + +use crate::{error::KalamDbError, schema_registry::SchemaRegistry}; /// Adapter to expose SchemaRegistry with TableError for kalamdb-tables providers. pub struct TablesSchemaRegistryAdapter { diff --git a/backend/crates/kalamdb-core/src/slow_query_logger.rs b/backend/crates/kalamdb-core/src/slow_query_logger.rs index bfd35f808..1a9f93185 100644 --- a/backend/crates/kalamdb-core/src/slow_query_logger.rs +++ b/backend/crates/kalamdb-core/src/slow_query_logger.rs @@ -3,14 +3,13 @@ //! Logs queries that exceed a configurable threshold to a separate slow.log file. //! Designed for minimal performance overhead using async file I/O. -use crate::schema_registry::TableType; +use std::{fs::OpenOptions, io::Write, path::Path, sync::Arc}; + use kalamdb_commons::models::{TableName, UserId}; -use std::fs::OpenOptions; -use std::io::Write; -use std::path::Path; -use std::sync::Arc; use tokio::sync::mpsc; +use crate::schema_registry::TableType; + /// Slow query log entry #[derive(Debug, Clone)] pub struct SlowQueryEntry { @@ -18,7 +17,8 @@ pub struct SlowQueryEntry { pub duration_secs: f64, pub row_count: usize, pub user_id: UserId, - pub table_type: TableType, //use backend/crates/kalamdb-commons/src/models/schemas/table_type.rs + pub table_type: TableType, /* use backend/crates/kalamdb-commons/src/models/schemas/ + * table_type.rs */ pub table_name: Option, pub timestamp: i64, } @@ -34,7 +34,8 @@ impl SlowQueryLogger { /// /// # Arguments /// * `log_path` - Path to slow.log file - /// * `threshold_ms` - Minimum duration to log in milliseconds (queries faster than this are ignored) + /// * `threshold_ms` - Minimum duration to log in milliseconds (queries faster than this are + /// ignored) /// /// # Returns /// Arc-wrapped logger instance @@ -64,7 +65,8 @@ impl SlowQueryLogger { .unwrap_or_else(|| "unknown".to_string()); let log_line = format!( - "[{}] SLOW QUERY - user={}, table={} ({}), duration={:.3}s, rows={}, query={}\n", + "[{}] SLOW QUERY - user={}, table={} ({}), duration={:.3}s, rows={}, \ + query={}\n", timestamp, entry.user_id, table_info, @@ -167,9 +169,10 @@ impl SlowQueryLogger { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::UserId; + use super::*; + #[tokio::test] async fn test_slow_query_logger_threshold() { let logger = SlowQueryLogger::new("/tmp/test_slow.log".to_string(), 1200); // 1.2 seconds diff --git a/backend/crates/kalamdb-core/src/sql/context/execution_context.rs b/backend/crates/kalamdb-core/src/sql/context/execution_context.rs index 00170ba9a..30ea7a340 100644 --- a/backend/crates/kalamdb-core/src/sql/context/execution_context.rs +++ b/backend/crates/kalamdb-core/src/sql/context/execution_context.rs @@ -1,10 +1,10 @@ +use std::sync::Arc; + use datafusion::prelude::SessionContext; -use kalamdb_commons::models::ReadContext; -use kalamdb_commons::{NamespaceId, Role, UserId}; +use kalamdb_commons::{models::ReadContext, NamespaceId, Role, UserId}; use kalamdb_session::AuthSession; use kalamdb_session_datafusion::SessionUserContext; use once_cell::sync::OnceCell; -use std::sync::Arc; /// Unified execution context for SQL queries /// @@ -212,8 +212,9 @@ impl ExecutionContext { /// Create a per-request SessionContext with current user_id and role injected /// - /// Clones the base SessionState and injects the current user_id and role into config.extensions. - /// The clone is relatively cheap (~1-2μs) because most fields are Arc-wrapped. + /// Clones the base SessionState and injects the current user_id and role into + /// config.extensions. The clone is relatively cheap (~1-2μs) because most fields are + /// Arc-wrapped. /// /// # What Gets Cloned /// - session_id: String (~50 bytes) @@ -262,7 +263,7 @@ impl ExecutionContext { /// This reads `datafusion.catalog.default_schema` from the session configuration. /// The default schema is set to "default" initially and can be changed using: /// - `USE namespace` - /// - `USE NAMESPACE namespace` + /// - `USE NAMESPACE namespace` /// - `SET NAMESPACE namespace` /// /// # Returns diff --git a/backend/crates/kalamdb-core/src/sql/context/execution_result.rs b/backend/crates/kalamdb-core/src/sql/context/execution_result.rs index 3423ebbad..ae86f773c 100644 --- a/backend/crates/kalamdb-core/src/sql/context/execution_result.rs +++ b/backend/crates/kalamdb-core/src/sql/context/execution_result.rs @@ -1,5 +1,4 @@ -use arrow::array::RecordBatch; -use arrow::datatypes::SchemaRef; +use arrow::{array::RecordBatch, datatypes::SchemaRef}; /// Result type for SQL execution #[derive(Debug, Clone)] diff --git a/backend/crates/kalamdb-core/src/sql/context/mod.rs b/backend/crates/kalamdb-core/src/sql/context/mod.rs index dfe10b83e..96f72627a 100644 --- a/backend/crates/kalamdb-core/src/sql/context/mod.rs +++ b/backend/crates/kalamdb-core/src/sql/context/mod.rs @@ -12,11 +12,9 @@ mod execution_context; mod execution_result; +// Re-export DataFusion's ScalarValue for convenience +pub use datafusion::scalar::ScalarValue; pub use execution_context::ExecutionContext; pub use execution_result::ExecutionResult; - // Re-export SessionUserContext from kalamdb-session-datafusion for TableProviders. pub use kalamdb_session_datafusion::SessionUserContext; - -// Re-export DataFusion's ScalarValue for convenience -pub use datafusion::scalar::ScalarValue; diff --git a/backend/crates/kalamdb-core/src/sql/datafusion_session.rs b/backend/crates/kalamdb-core/src/sql/datafusion_session.rs index efa538d77..53ff3d745 100644 --- a/backend/crates/kalamdb-core/src/sql/datafusion_session.rs +++ b/backend/crates/kalamdb-core/src/sql/datafusion_session.rs @@ -11,19 +11,27 @@ //! - `max_partitions`: Maximum partitions per query (enables parallel execution) //! - `batch_size`: Arrow batch size for record processing (default: 8192) -use crate::sql::functions::{ - CosineDistanceFunction, CurrentRoleFunction, CurrentUserFunction, SnowflakeIdFunction, - UlidFunction, UuidV7Function, +use std::sync::Arc; + +use datafusion::{ + error::Result as DataFusionResult, + execution::{ + context::{SessionContext, SessionState}, + memory_pool::GreedyMemoryPool, + runtime_env::RuntimeEnvBuilder, + }, + logical_expr::ScalarUDF, + prelude::SessionConfig, }; -use crate::sql::table_functions::VectorSearchTableFunction; -use datafusion::error::Result as DataFusionResult; -use datafusion::execution::context::{SessionContext, SessionState}; -use datafusion::execution::memory_pool::GreedyMemoryPool; -use datafusion::execution::runtime_env::RuntimeEnvBuilder; -use datafusion::logical_expr::ScalarUDF; -use datafusion::prelude::SessionConfig; use kalamdb_configs::DataFusionSettings; -use std::sync::Arc; + +use crate::sql::{ + functions::{ + CosineDistanceFunction, CurrentRoleFunction, CurrentUserFunction, SnowflakeIdFunction, + UlidFunction, UuidV7Function, + }, + table_functions::VectorSearchTableFunction, +}; // KalamSessionState removed (ExecutionContext used at higher layer) @@ -154,8 +162,7 @@ impl DataFusionSessionFactory { // This replaces the former custom json_extract_scalar UDF and the PG-side // SQL rewrite layer with the community-maintained datafusion-functions-json // crate which handles operator planning natively inside DataFusion. - datafusion_functions_json::register_all(ctx) - .expect("failed to register JSON functions"); + datafusion_functions_json::register_all(ctx).expect("failed to register JSON functions"); // Register COSINE_DISTANCE(vector, query_vector) for ORDER BY similarity search syntax. ctx.register_udf(ScalarUDF::from(CosineDistanceFunction::new())); diff --git a/backend/crates/kalamdb-core/src/sql/executor/default_ordering.rs b/backend/crates/kalamdb-core/src/sql/executor/default_ordering.rs index 38b78f596..6fad37007 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/default_ordering.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/default_ordering.rs @@ -14,13 +14,13 @@ //! The default ordering is by primary key columns in ASC order. //! If no primary key is defined, we fall back to _seq (system sequence column). -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use datafusion::logical_expr::{LogicalPlan, SortExpr}; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::models::TableId; use std::sync::Arc; +use datafusion::logical_expr::{LogicalPlan, SortExpr}; +use kalamdb_commons::{constants::SystemColumnNames, models::TableId}; + +use crate::{app_context::AppContext, error::KalamDbError}; + /// Check if a LogicalPlan already has an ORDER BY clause at the top level /// /// This traverses up from the root to find if any Sort node exists. diff --git a/backend/crates/kalamdb-core/src/sql/executor/handler_adapter.rs b/backend/crates/kalamdb-core/src/sql/executor/handler_adapter.rs index 789287675..20ed5a90b 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/handler_adapter.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/handler_adapter.rs @@ -3,14 +3,21 @@ //! This module provides a zero-boilerplate way to register typed handlers //! in the HandlerRegistry without writing custom adapters for each handler. -use crate::error::KalamDbError; -use crate::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use crate::sql::executor::handler_registry::{SqlHandlerFuture, SqlStatementHandler}; -use crate::sql::executor::handlers::typed::TypedStatementHandler; -use kalamdb_sql::classifier::SqlStatement; -use kalamdb_sql::DdlAst; use std::marker::PhantomData; +use kalamdb_sql::{classifier::SqlStatement, DdlAst}; + +use crate::{ + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::{ + handler_registry::{SqlHandlerFuture, SqlStatementHandler}, + handlers::typed::TypedStatementHandler, + }, + }, +}; + /// Generic adapter that bridges TypedStatementHandler to SqlStatementHandler /// /// This eliminates the need to write custom adapter implementations for each handler. diff --git a/backend/crates/kalamdb-core/src/sql/executor/handler_registry.rs b/backend/crates/kalamdb-core/src/sql/executor/handler_registry.rs index 97f19dfdc..68eb6aeaa 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/handler_registry.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/handler_registry.rs @@ -11,16 +11,20 @@ //! - **Testability**: Easy to mock handlers for unit tests //! - **Zero Overhead**: Registry lookup via DashMap is <1μs (vs 50-100ns for match) -use crate::error::KalamDbError; -use crate::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use crate::sql::executor::handler_adapter::{DynamicHandlerAdapter, TypedHandlerAdapter}; +use std::{future::Future, pin::Pin, sync::Arc}; + use dashmap::DashMap; use kalamdb_sql::classifier::SqlStatement; -use std::future::Future; -use std::pin::Pin; -use std::sync::Arc; use tracing::Instrument; +use crate::{ + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handler_adapter::{DynamicHandlerAdapter, TypedHandlerAdapter}, + }, +}; + pub type SqlHandlerFuture<'a, T> = Pin + Send + 'a>>; /// Trait for handlers that can process any SqlStatement variant @@ -87,7 +91,8 @@ pub struct HandlerRegistry { impl HandlerRegistry { /// Create an empty handler registry. /// - /// Handler registration is performed externally via `kalamdb_handlers::register_all_handlers()`. + /// Handler registration is performed externally via + /// `kalamdb_handlers::register_all_handlers()`. pub fn new() -> Self { Self { handlers: DashMap::new(), @@ -210,10 +215,13 @@ impl HandlerRegistry { #[cfg(test)] mod tests { + use kalamdb_commons::{ + models::{NamespaceId, UserId}, + Role, + }; + use super::*; use crate::test_helpers::{create_test_session_simple, test_app_context_simple}; - use kalamdb_commons::models::{NamespaceId, UserId}; - use kalamdb_commons::Role; fn test_context() -> ExecutionContext { ExecutionContext::new(UserId::from("test_user"), Role::Dba, create_test_session_simple()) diff --git a/backend/crates/kalamdb-core/src/sql/executor/handlers/mod.rs b/backend/crates/kalamdb-core/src/sql/executor/handlers/mod.rs index 9700427c5..6ebffbdd9 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/handlers/mod.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/handlers/mod.rs @@ -15,19 +15,21 @@ //! - **helpers**: Shared helper functions (future) //! - **audit**: Audit logging (future) -use crate::error::KalamDbError; -use kalamdb_sql::classifier::SqlStatement; use std::future::Future; +use kalamdb_sql::classifier::SqlStatement; + +use crate::error::KalamDbError; + // Typed handler trait (stays in core; handler impls are in kalamdb-handlers) pub mod typed; // Re-export core types from executor/models for convenience -pub use crate::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; - // Re-export legacy placeholder handlers pub use typed::TypedStatementHandler; +pub use crate::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; + /// Common trait for SQL statement handlers /// /// All statement handlers should implement this trait to provide a consistent @@ -106,7 +108,7 @@ pub trait StatementHandler: Send + Sync { context: &'a ExecutionContext, ) -> impl Future> + Send + 'a { // Default implementation: delegate to AuthorizationHandler - //AuthorizationHandler::check_authorization(context, statement) + // AuthorizationHandler::check_authorization(context, statement) let result = statement .check_authorization(context.user_role()) .map_err(KalamDbError::PermissionDenied); diff --git a/backend/crates/kalamdb-core/src/sql/executor/handlers/typed.rs b/backend/crates/kalamdb-core/src/sql/executor/handlers/typed.rs index dbdfcbe83..47739cb10 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/handlers/typed.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/handlers/typed.rs @@ -1,9 +1,11 @@ //! Typed statement handler trait over parsed AST statements +use std::future::Future; + +use kalamdb_sql::DdlAst; + use super::{ExecutionContext, ExecutionResult, ScalarValue}; use crate::error::KalamDbError; -use kalamdb_sql::DdlAst; -use std::future::Future; #[allow(async_fn_in_trait)] pub trait TypedStatementHandler: Send + Sync { diff --git a/backend/crates/kalamdb-core/src/sql/executor/mod.rs b/backend/crates/kalamdb-core/src/sql/executor/mod.rs index a8645413c..8e8041759 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/mod.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/mod.rs @@ -15,13 +15,13 @@ pub mod request_transaction_state; mod sql_executor; mod transaction_batch_insert; -use crate::sql::executor::handler_registry::HandlerRegistry; -use crate::sql::plan_cache::SqlCacheRegistry; +use std::sync::Arc; + pub use datafusion::scalar::ScalarValue; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; +use kalamdb_commons::{models::TableId, schemas::TableType}; use kalamdb_sql::classifier::SqlStatement; -use std::sync::Arc; + +use crate::sql::{executor::handler_registry::HandlerRegistry, plan_cache::SqlCacheRegistry}; /// Public facade for SQL execution routing. pub struct SqlExecutor { diff --git a/backend/crates/kalamdb-core/src/sql/executor/parameter_binding.rs b/backend/crates/kalamdb-core/src/sql/executor/parameter_binding.rs index c35e6bbab..0030d974b 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/parameter_binding.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/parameter_binding.rs @@ -5,9 +5,7 @@ //! - DataFusion LogicalPlan placeholder replacement ($1, $2, ...) //! - ScalarValue type checking -use datafusion::common::ParamValues; -use datafusion::logical_expr::LogicalPlan; -use datafusion::scalar::ScalarValue; +use datafusion::{common::ParamValues, logical_expr::LogicalPlan, scalar::ScalarValue}; use crate::error::KalamDbError; diff --git a/backend/crates/kalamdb-core/src/sql/executor/request_transaction_state.rs b/backend/crates/kalamdb-core/src/sql/executor/request_transaction_state.rs index 4a01b9cb4..7b3283ff4 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/request_transaction_state.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/request_transaction_state.rs @@ -1,13 +1,15 @@ -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; +use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + sync::Arc, +}; use kalamdb_commons::models::{TransactionId, TransactionOrigin}; -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::sql::context::ExecutionContext; -use crate::transactions::ExecutionOwnerKey; +use crate::{ + app_context::AppContext, error::KalamDbError, sql::context::ExecutionContext, + transactions::ExecutionOwnerKey, +}; #[derive(Debug, Clone)] pub struct RequestTransactionState<'a> { diff --git a/backend/crates/kalamdb-core/src/sql/executor/sql_executor.rs b/backend/crates/kalamdb-core/src/sql/executor/sql_executor.rs index cbf1d1bca..77158b2a6 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/sql_executor.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/sql_executor.rs @@ -1,22 +1,35 @@ -use super::{PreparedExecutionStatement, SqlExecutor}; -use crate::error::KalamDbError; -use crate::sql::executor::handler_registry::HandlerRegistry; -use crate::sql::executor::request_transaction_state::RequestTransactionState; -use crate::sql::plan_cache::{PlanCacheKey, SqlCacheRegistry, SqlCacheRegistryConfig}; -use crate::sql::{ExecutionContext, ExecutionResult}; -use crate::transactions::CoordinatorAccessValidator; +use std::{sync::Arc, time::Duration}; + use arrow::array::RecordBatch; -use datafusion::prelude::SessionContext; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::conversions::arrow_json_conversion::arrow_value_to_scalar; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::{NamespaceId, TableId, TransactionId}; -use kalamdb_commons::Role; +use datafusion::{ + dataframe::DataFrame, logical_expr::LogicalPlan, prelude::SessionContext, scalar::ScalarValue, +}; +use kalamdb_commons::{ + conversions::arrow_json_conversion::arrow_value_to_scalar, + models::{NamespaceId, TableId, TransactionId}, + schemas::TableType, + Role, +}; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind, StatementClassificationError}; use kalamdb_transactions::{TransactionQueryContext, TransactionQueryExtension}; -use std::sync::Arc; -use std::time::Duration; use tracing::Instrument; +use uuid::Uuid; + +use super::{PreparedExecutionStatement, SqlExecutor}; +use crate::{ + error::KalamDbError, + sql::{ + executor::{ + default_ordering::apply_default_order_by, + handler_registry::HandlerRegistry, + parameter_binding::{replace_placeholders_in_plan, validate_params}, + request_transaction_state::RequestTransactionState, + }, + plan_cache::{PlanCacheKey, SqlCacheRegistry, SqlCacheRegistryConfig}, + ExecutionContext, ExecutionResult, + }, + transactions::CoordinatorAccessValidator, +}; #[derive(Debug, Clone, Copy)] enum DmlKind { @@ -26,7 +39,7 @@ enum DmlKind { } impl SqlExecutor { - async fn try_execute_embedding_literal_insert_via_applier( + async fn try_execute_literal_insert_via_applier( &self, sql: &str, metadata: &PreparedExecutionStatement, @@ -45,18 +58,6 @@ impl SqlExecutor { } } - let Some(cached_table) = self.app_context.schema_registry().get(table_id) else { - return Ok(None); - }; - let has_embedding_columns = cached_table - .table - .columns - .iter() - .any(|column| matches!(column.data_type, KalamDataType::Embedding(_))); - if !has_embedding_columns { - return Ok(None); - } - let dialect = sqlparser::dialect::GenericDialect {}; let parsed_statements = kalamdb_sql::parser::utils::parse_sql_statements(sql, &dialect) .map_err(|error| KalamDbError::InvalidSql(error.to_string()))?; @@ -587,7 +588,7 @@ impl SqlExecutor { ); // Enter the span for the entire execution async { - let classified = metadata.classified_statement.clone().ok_or_else(|| { + let classified = metadata.classified_statement.as_ref().ok_or_else(|| { KalamDbError::InvalidSql( "Missing pre-classified statement metadata for SQL execution".to_string(), ) @@ -628,7 +629,7 @@ impl SqlExecutor { SqlStatementKind::Insert(_) => { if params.is_empty() { if let Some(result) = self - .try_execute_embedding_literal_insert_via_applier( + .try_execute_literal_insert_via_applier( classified.as_str(), metadata, exec_ctx, @@ -686,7 +687,8 @@ impl SqlExecutor { | SqlStatementKind::CreateView(_) | SqlStatementKind::CreateNamespace(_) | SqlStatementKind::DropNamespace(_) => { - let result = self.handler_registry.handle(classified, params, exec_ctx).await; + let result = + self.handler_registry.handle(classified.clone(), params, exec_ctx).await; // Clear plan cache after DDL to invalidate any cached plans // that may reference the modified schema if result.is_ok() { @@ -697,7 +699,7 @@ impl SqlExecutor { }, // All other statements: Delegate to handler registry (no cache invalidation needed) - _ => self.handler_registry.handle(classified, params, exec_ctx).await, + _ => self.handler_registry.handle(classified.clone(), params, exec_ctx).await, }; // Record row count in the span @@ -718,6 +720,152 @@ impl SqlExecutor { .await } + fn should_stage_autocommit_dml( + &self, + metadata: &PreparedExecutionStatement, + exec_ctx: &ExecutionContext, + ) -> Result { + if self.active_request_transaction_id(exec_ctx)?.is_some() { + return Ok(false); + } + + let Some(table_id) = metadata.table_id.as_ref() else { + return Ok(false); + }; + + let Some(cached_table) = self.app_context.schema_registry().get(table_id) else { + return Ok(false); + }; + + let table_type: TableType = cached_table.table.table_type.into(); + Ok(matches!(table_type, TableType::User | TableType::Shared)) + } + + async fn execute_autocommit_dml_via_transaction( + &self, + sql: &str, + metadata: &PreparedExecutionStatement, + params: Vec, + exec_ctx: &ExecutionContext, + dml_kind: DmlKind, + ) -> Result { + let owned_exec_ctx; + let dml_exec_ctx = if exec_ctx.request_id().is_some() { + exec_ctx + } else { + owned_exec_ctx = + exec_ctx.clone().with_request_id(format!("sql-autocommit-{}", Uuid::now_v7())); + &owned_exec_ctx + }; + + let mut request_state = RequestTransactionState::from_execution_context(dml_exec_ctx)? + .ok_or_else(|| { + KalamDbError::InvalidOperation( + "autocommit DML requires a request-scoped execution context".to_string(), + ) + })?; + request_state.sync_from_coordinator(&self.app_context); + + if request_state.is_active() { + return self + .execute_dml_via_datafusion_inner(sql, metadata, params, dml_exec_ctx, dml_kind) + .await; + } + + request_state.begin(&self.app_context)?; + let result = self + .execute_dml_via_datafusion_inner(sql, metadata, params, dml_exec_ctx, dml_kind) + .await; + + match result { + Ok(result) => match request_state.commit(&self.app_context).await { + Ok(_) => Ok(result), + Err(error) => { + let _ = request_state.rollback_if_active(&self.app_context); + Err(error) + }, + }, + Err(error) => { + let _ = request_state.rollback_if_active(&self.app_context); + Err(error) + }, + } + } + + async fn execute_dml_via_datafusion( + &self, + sql: &str, + metadata: &PreparedExecutionStatement, + params: Vec, + exec_ctx: &ExecutionContext, + dml_kind: DmlKind, + ) -> Result { + if self.should_stage_autocommit_dml(metadata, exec_ctx)? { + return self + .execute_autocommit_dml_via_transaction(sql, metadata, params, exec_ctx, dml_kind) + .await; + } + + self.execute_dml_via_datafusion_inner(sql, metadata, params, exec_ctx, dml_kind) + .await + } + + async fn plan_dml_with_provider_reload( + &self, + execution_sql: &str, + original_sql: &str, + exec_ctx: &ExecutionContext, + ) -> Result<(SessionContext, DataFrame), KalamDbError> { + let session = self.create_session_with_transaction_context(exec_ctx)?; + let plan_start = std::time::Instant::now(); + + match session.sql(execution_sql).await { + Ok(df) => { + tracing::debug!( + plan_ms = (plan_start.elapsed().as_micros() as f64 / 1000.0), + "sql.dml_plan" + ); + Ok((session, df)) + }, + Err(error) if Self::is_table_not_found_error(&error) => { + if let Err(load_err) = self.load_existing_tables().await { + log::warn!( + target: "sql::dml", + "⚠️ Failed to reload table providers after missing table in DML | sql='{}' | error='{}'", + original_sql, + load_err + ); + } + + let retry_session = self.create_session_with_transaction_context(exec_ctx)?; + let retry_start = std::time::Instant::now(); + let retry_df = retry_session.sql(execution_sql).await.map_err(|retry_error| { + self.log_sql_error(original_sql, exec_ctx, retry_error) + })?; + tracing::debug!( + plan_ms = (retry_start.elapsed().as_micros() as f64 / 1000.0), + reloaded_providers = true, + "sql.dml_plan" + ); + Ok((retry_session, retry_df)) + }, + Err(error) => Err(self.log_sql_error(original_sql, exec_ctx, error)), + } + } + + fn cache_and_bind_dml_plan( + &self, + cache_key: &PlanCacheKey, + planned_df: DataFrame, + params: &[ScalarValue], + ) -> Result { + let template_plan = planned_df.logical_plan().clone(); + self.sql_cache_registry + .plan_cache() + .insert(cache_key.clone(), template_plan.clone()); + replace_placeholders_in_plan(template_plan, params) + } + #[tracing::instrument( name = "sql.dml_datafusion", skip_all, @@ -726,7 +874,7 @@ impl SqlExecutor { rows_affected = tracing::field::Empty, ) )] - async fn execute_dml_via_datafusion( + async fn execute_dml_via_datafusion_inner( &self, sql: &str, metadata: &PreparedExecutionStatement, @@ -739,10 +887,6 @@ impl SqlExecutor { let execution_sql = kalamdb_sql::rewrite_context_functions_for_datafusion(sql); let execution_sql: &str = &execution_sql; - use crate::sql::executor::parameter_binding::{ - replace_placeholders_in_plan, validate_params, - }; - if !params.is_empty() { validate_params(¶ms)?; } @@ -750,34 +894,7 @@ impl SqlExecutor { // Parameterized DML: reuse cached template plans and only bind placeholders per request. // This avoids reparsing/replanning the same INSERT/UPDATE/DELETE shape repeatedly. let df = if params.is_empty() { - let session = self.create_session_with_transaction_context(exec_ctx)?; - let plan_start = std::time::Instant::now(); - match session.sql(execution_sql).await { - Ok(df) => { - tracing::debug!(plan_ms = %plan_start.elapsed().as_micros() as f64 / 1000.0, "sql.dml_plan"); - df - }, - Err(e) => { - if Self::is_table_not_found_error(&e) { - if let Err(load_err) = self.load_existing_tables().await { - log::warn!( - target: "sql::dml", - "⚠️ Failed to reload table providers after missing table in DML | sql='{}' | error='{}'", - sql, - load_err - ); - } - let retry_session = - self.create_session_with_transaction_context(exec_ctx)?; - retry_session - .sql(execution_sql) - .await - .map_err(|e2| self.log_sql_error(sql, exec_ctx, e2))? - } else { - return Err(self.log_sql_error(sql, exec_ctx, e)); - } - }, - } + self.plan_dml_with_provider_reload(execution_sql, sql, exec_ctx).await?.1 } else { let cache_key = PlanCacheKey::new( exec_ctx.default_namespace().clone(), @@ -790,103 +907,36 @@ impl SqlExecutor { let bound_plan = replace_placeholders_in_plan((*template_plan).clone(), ¶ms)?; match session.execute_logical_plan(bound_plan).await { Ok(df) => df, - Err(e) => { + Err(error) => { + if let Some(not_leader_err) = Self::try_not_leader_error(&error) { + return Err(not_leader_err); + } + log::warn!( target: "sql::dml", "Failed to execute cached DML plan, reparsing SQL: {}", - e + error ); - match session.sql(execution_sql).await { - Ok(planned_df) => { - let template_plan = planned_df.logical_plan().clone(); - self.sql_cache_registry - .plan_cache() - .insert(cache_key.clone(), template_plan.clone()); - let rebound_plan = - replace_placeholders_in_plan(template_plan, ¶ms)?; - session - .execute_logical_plan(rebound_plan) - .await - .map_err(|e2| Self::datafusion_to_execution_error(e2))? - }, - Err(e) => { - if Self::is_table_not_found_error(&e) { - if let Err(load_err) = self.load_existing_tables().await { - log::warn!( - target: "sql::dml", - "⚠️ Failed to reload table providers after missing table in DML | sql='{}' | error='{}'", - sql, - load_err - ); - } - let retry_session = - self.create_session_with_transaction_context(exec_ctx)?; - let retry_df = retry_session - .sql(execution_sql) - .await - .map_err(|e2| self.log_sql_error(sql, exec_ctx, e2))?; - let template_plan = retry_df.logical_plan().clone(); - self.sql_cache_registry - .plan_cache() - .insert(cache_key.clone(), template_plan.clone()); - let rebound_plan = - replace_placeholders_in_plan(template_plan, ¶ms)?; - retry_session - .execute_logical_plan(rebound_plan) - .await - .map_err(|e3| Self::datafusion_to_execution_error(e3))? - } else { - return Err(self.log_sql_error(sql, exec_ctx, e)); - } - }, - } - }, - } - } else { - match session.sql(execution_sql).await { - Ok(planned_df) => { - let template_plan = planned_df.logical_plan().clone(); - self.sql_cache_registry - .plan_cache() - .insert(cache_key.clone(), template_plan.clone()); - let bound_plan = replace_placeholders_in_plan(template_plan, ¶ms)?; - session - .execute_logical_plan(bound_plan) + let (plan_session, planned_df) = self + .plan_dml_with_provider_reload(execution_sql, sql, exec_ctx) + .await?; + let rebound_plan = + self.cache_and_bind_dml_plan(&cache_key, planned_df, ¶ms)?; + plan_session + .execute_logical_plan(rebound_plan) .await - .map_err(|e2| Self::datafusion_to_execution_error(e2))? - }, - Err(e) => { - if Self::is_table_not_found_error(&e) { - if let Err(load_err) = self.load_existing_tables().await { - log::warn!( - target: "sql::dml", - "⚠️ Failed to reload table providers after missing table in DML | sql='{}' | error='{}'", - sql, - load_err - ); - } - let retry_session = - self.create_session_with_transaction_context(exec_ctx)?; - let retry_df = retry_session - .sql(execution_sql) - .await - .map_err(|e2| self.log_sql_error(sql, exec_ctx, e2))?; - - let template_plan = retry_df.logical_plan().clone(); - self.sql_cache_registry - .plan_cache() - .insert(cache_key.clone(), template_plan.clone()); - let bound_plan = replace_placeholders_in_plan(template_plan, ¶ms)?; - retry_session - .execute_logical_plan(bound_plan) - .await - .map_err(|e3| Self::datafusion_to_execution_error(e3))? - } else { - return Err(self.log_sql_error(sql, exec_ctx, e)); - } + .map_err(Self::datafusion_to_execution_error)? }, } + } else { + let (plan_session, planned_df) = + self.plan_dml_with_provider_reload(execution_sql, sql, exec_ctx).await?; + let bound_plan = self.cache_and_bind_dml_plan(&cache_key, planned_df, ¶ms)?; + plan_session + .execute_logical_plan(bound_plan) + .await + .map_err(Self::datafusion_to_execution_error)? } }; @@ -918,10 +968,6 @@ impl SqlExecutor { ) -> Result { let execution_sql = kalamdb_sql::rewrite_context_functions_for_datafusion(sql); let execution_sql: &str = &execution_sql; - use crate::sql::executor::default_ordering::apply_default_order_by; - use crate::sql::executor::parameter_binding::{ - replace_placeholders_in_plan, validate_params, - }; // Validate parameters if present if !params.is_empty() { @@ -931,7 +977,8 @@ impl SqlExecutor { let session = self.create_session_with_transaction_context(exec_ctx)?; // Try cached template plan first (works for both plain and parameterized SQL). - // Key excludes user_id because LogicalPlan is user-agnostic - filtering happens at scan time. + // Key excludes user_id because LogicalPlan is user-agnostic - filtering happens at scan + // time. let cache_key = PlanCacheKey::new( exec_ctx.default_namespace().clone(), exec_ctx.user_role(), @@ -1310,7 +1357,7 @@ impl SqlExecutor { /// Called during server startup to restore table access after restart. /// Loads table definitions from the store and creates/registers: /// - UserTableShared instances for USER tables - /// - SharedTableProvider instances for SHARED tables + /// - SharedTableProvider instances for SHARED tables /// - StreamTableProvider instances for STREAM tables /// /// # Returns diff --git a/backend/crates/kalamdb-core/src/sql/executor/transaction_batch_insert.rs b/backend/crates/kalamdb-core/src/sql/executor/transaction_batch_insert.rs index 8527c3284..6685f5719 100644 --- a/backend/crates/kalamdb-core/src/sql/executor/transaction_batch_insert.rs +++ b/backend/crates/kalamdb-core/src/sql/executor/transaction_batch_insert.rs @@ -1,28 +1,35 @@ -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::schema_registry::CachedTableData; -use crate::sql::plan_cache::{ - FastInsertDefaultEntry, FastInsertDefaultTemplate, FastInsertMetadata, InsertMetadataCacheKey, - SqlCacheRegistry, +use std::{ + collections::BTreeMap, + sync::{Arc, OnceLock}, }; -use crate::sql::ExecutionContext; + use chrono::Utc; use datafusion::scalar::ScalarValue; -use kalamdb_commons::conversions::arrow_json_conversion::coerce_rows; -use kalamdb_commons::conversions::json_value_to_scalar; -use kalamdb_commons::ids::SnowflakeGenerator; -use kalamdb_commons::models::rows::row::Row; -use kalamdb_commons::models::{TransactionId, UserId}; -use kalamdb_commons::schemas::{ColumnDefault, TableType}; -use kalamdb_commons::TableId; +use kalamdb_commons::{ + conversions::{arrow_json_conversion::coerce_rows, json_value_to_scalar}, + ids::SnowflakeGenerator, + models::{rows::row::Row, TransactionId, UserId}, + schemas::{ColumnDefault, TableType}, + TableId, +}; use kalamdb_transactions::build_insert_staged_mutations; use sqlparser::ast::{Expr, SetExpr, Statement}; -use std::collections::BTreeMap; -use std::sync::{Arc, OnceLock}; use ulid::Ulid; use uuid::Uuid; use super::helpers::ast_parsing; +use crate::{ + app_context::AppContext, + error::KalamDbError, + schema_registry::CachedTableData, + sql::{ + plan_cache::{ + FastInsertDefaultEntry, FastInsertDefaultTemplate, FastInsertMetadata, + InsertMetadataCacheKey, SqlCacheRegistry, + }, + ExecutionContext, + }, +}; static INSERT_DEFAULT_SNOWFLAKE_GENERATOR: OnceLock = OnceLock::new(); @@ -258,7 +265,8 @@ fn prepare_default_template( ColumnDefault::FunctionCall { name, args } => { if !args.is_empty() { return Err(KalamDbError::InvalidOperation(format!( - "Default function '{}' with arguments is not supported in transaction batch INSERT", + "Default function '{}' with arguments is not supported in transaction batch \ + INSERT", name ))); } diff --git a/backend/crates/kalamdb-core/src/sql/functions/current_role.rs b/backend/crates/kalamdb-core/src/sql/functions/current_role.rs index 8b993d012..585009dc0 100644 --- a/backend/crates/kalamdb-core/src/sql/functions/current_role.rs +++ b/backend/crates/kalamdb-core/src/sql/functions/current_role.rs @@ -3,16 +3,18 @@ //! This module provides a user-defined function for DataFusion that returns the current user's role //! from the session context. -use datafusion::arrow::array::{ArrayRef, StringArray}; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +use std::{any::Any, sync::Arc}; + +use datafusion::{ + arrow::array::{ArrayRef, StringArray}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}, +}; +use kalamdb_commons::{ + arrow_utils::{arrow_utf8, ArrowDataType}, + Role, }; -use kalamdb_commons::arrow_utils::{arrow_utf8, ArrowDataType}; -use kalamdb_commons::Role; use kalamdb_session_datafusion::SessionUserContext; -use std::any::Any; -use std::sync::Arc; /// CURRENT_ROLE() scalar function implementation /// @@ -50,11 +52,8 @@ impl ScalarUDFImpl for CurrentRoleFunction { return Err(DataFusionError::Plan("CURRENT_ROLE() takes no arguments".to_string())); } - let session_ctx = args - .config_options - .extensions - .get::() - .ok_or_else(|| { + let session_ctx = + args.config_options.extensions.get::().ok_or_else(|| { DataFusionError::Execution( "CURRENT_ROLE() failed: session user context not found".to_string(), ) @@ -75,9 +74,10 @@ impl ScalarUDFImpl for CurrentRoleFunction { #[cfg(test)] mod tests { - use super::*; use datafusion::logical_expr::ScalarUDF; + use super::*; + #[test] fn test_current_role_function_creation() { let func_impl = CurrentRoleFunction::new(); diff --git a/backend/crates/kalamdb-core/src/sql/functions/current_user.rs b/backend/crates/kalamdb-core/src/sql/functions/current_user.rs index dcf527f64..7653ad117 100644 --- a/backend/crates/kalamdb-core/src/sql/functions/current_user.rs +++ b/backend/crates/kalamdb-core/src/sql/functions/current_user.rs @@ -2,15 +2,15 @@ //! //! Returns the current user_id from the session context. -use datafusion::arrow::array::{ArrayRef, StringArray}; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +use std::{any::Any, sync::Arc}; + +use datafusion::{ + arrow::array::{ArrayRef, StringArray}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}, }; use kalamdb_commons::arrow_utils::{arrow_utf8, ArrowDataType}; use kalamdb_session_datafusion::SessionUserContext; -use std::any::Any; -use std::sync::Arc; /// KDB_CURRENT_USER() scalar function implementation /// @@ -48,11 +48,8 @@ impl ScalarUDFImpl for CurrentUserFunction { return Err(DataFusionError::Plan("KDB_CURRENT_USER() takes no arguments".to_string())); } - let session_ctx = args - .config_options - .extensions - .get::() - .ok_or_else(|| { + let session_ctx = + args.config_options.extensions.get::().ok_or_else(|| { DataFusionError::Execution( "KDB_CURRENT_USER() failed: session user context not found".to_string(), ) diff --git a/backend/crates/kalamdb-core/src/sql/functions/snowflake_id.rs b/backend/crates/kalamdb-core/src/sql/functions/snowflake_id.rs index d8cadf354..eb4523010 100644 --- a/backend/crates/kalamdb-core/src/sql/functions/snowflake_id.rs +++ b/backend/crates/kalamdb-core/src/sql/functions/snowflake_id.rs @@ -8,16 +8,21 @@ //! //! This ensures time-ordered, unique IDs suitable for PRIMARY KEY columns. -use datafusion::arrow::array::{ArrayRef, Int64Array}; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +use std::{ + any::Any, + sync::{ + atomic::{AtomicU16, Ordering}, + Arc, + }, + time::{SystemTime, UNIX_EPOCH}, +}; + +use datafusion::{ + arrow::array::{ArrayRef, Int64Array}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}, }; use kalamdb_commons::arrow_utils::{arrow_int64, ArrowDataType}; -use std::any::Any; -use std::sync::atomic::{AtomicU16, Ordering}; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; // Snowflake ID format constants const TIMESTAMP_BITS: u64 = 41; @@ -132,10 +137,12 @@ impl ScalarUDFImpl for SnowflakeIdFunction { #[cfg(test)] mod tests { - use super::*; - use datafusion::logical_expr::ScalarUDF; use std::collections::HashSet; + use datafusion::logical_expr::ScalarUDF; + + use super::*; + #[test] fn test_snowflake_id_function_creation() { let func_impl = SnowflakeIdFunction::new(); diff --git a/backend/crates/kalamdb-core/src/sql/functions/ulid.rs b/backend/crates/kalamdb-core/src/sql/functions/ulid.rs index 54fe9687c..0860f4649 100644 --- a/backend/crates/kalamdb-core/src/sql/functions/ulid.rs +++ b/backend/crates/kalamdb-core/src/sql/functions/ulid.rs @@ -11,14 +11,14 @@ //! ULIDs are URL-safe, case-insensitive, and time-ordered, making them //! suitable for PRIMARY KEY columns and correlation IDs. -use datafusion::arrow::array::{ArrayRef, StringArray}; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +use std::{any::Any, sync::Arc}; + +use datafusion::{ + arrow::array::{ArrayRef, StringArray}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}, }; use kalamdb_commons::arrow_utils::{arrow_utf8, ArrowDataType}; -use std::any::Any; -use std::sync::Arc; use ulid::Ulid; /// ULID() scalar function implementation @@ -91,10 +91,12 @@ impl ScalarUDFImpl for UlidFunction { #[cfg(test)] mod tests { - use super::*; - use datafusion::logical_expr::ScalarUDF; use std::collections::HashSet; + use datafusion::logical_expr::ScalarUDF; + + use super::*; + #[test] fn test_ulid_function_creation() { let func_impl = UlidFunction::new(); diff --git a/backend/crates/kalamdb-core/src/sql/functions/uuid_v7.rs b/backend/crates/kalamdb-core/src/sql/functions/uuid_v7.rs index ca1da7c14..1a5ed8b56 100644 --- a/backend/crates/kalamdb-core/src/sql/functions/uuid_v7.rs +++ b/backend/crates/kalamdb-core/src/sql/functions/uuid_v7.rs @@ -5,20 +5,20 @@ //! //! UUIDv7 format: //! - 48 bits: Unix timestamp in milliseconds -//! - 12 bits: randomized version and variant bits +//! - 12 bits: randomized version and variant bits //! - 62 bits: random data //! //! UUIDv7 provides time-ordered UUIDs suitable for PRIMARY KEY columns //! while maintaining global uniqueness. -use datafusion::arrow::array::{ArrayRef, StringArray}; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +use std::{any::Any, sync::Arc}; + +use datafusion::{ + arrow::array::{ArrayRef, StringArray}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}, }; use kalamdb_commons::arrow_utils::{arrow_utf8, ArrowDataType}; -use std::any::Any; -use std::sync::Arc; use uuid::Uuid; /// UUID_V7() scalar function implementation @@ -88,10 +88,12 @@ impl ScalarUDFImpl for UuidV7Function { #[cfg(test)] mod tests { - use super::*; - use datafusion::logical_expr::ScalarUDF; use std::collections::HashSet; + use datafusion::logical_expr::ScalarUDF; + + use super::*; + #[test] fn test_uuid_v7_function_creation() { let func_impl = UuidV7Function::new(); @@ -175,17 +177,15 @@ mod tests { // Test removed - testing internal DataFusion behavior that changed in newer versions // The signature() method already validates no arguments are accepted - /* - #[test] - fn test_uuid_v7_with_arguments_fails() { - let func_impl = UuidV7Function::new(); - let args = vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![ - "arg", - ])))]; - let result = func_impl.invoke(&args); - assert!(result.is_err()); - } - */ + // #[test] + // fn test_uuid_v7_with_arguments_fails() { + // let func_impl = UuidV7Function::new(); + // let args = vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![ + // "arg", + // ])))]; + // let result = func_impl.invoke(&args); + // assert!(result.is_err()); + // } #[test] fn test_uuid_v7_return_type() { diff --git a/backend/crates/kalamdb-core/src/sql/impersonation.rs b/backend/crates/kalamdb-core/src/sql/impersonation.rs index 6e16cb943..55fd9e408 100644 --- a/backend/crates/kalamdb-core/src/sql/impersonation.rs +++ b/backend/crates/kalamdb-core/src/sql/impersonation.rs @@ -1,14 +1,17 @@ -use crate::app_context::AppContext; -use crate::error::KalamDbError; +use std::sync::Arc; + use chrono::Utc; -use kalamdb_commons::models::{AuditLogId, UserId}; -use kalamdb_commons::Role; +use kalamdb_commons::{ + models::{AuditLogId, UserId}, + Role, +}; use kalamdb_session::can_impersonate_role; use kalamdb_system::AuditLogEntry; use serde_json::json; -use std::sync::Arc; use uuid::Uuid; +use crate::{app_context::AppContext, error::KalamDbError}; + /// Core service for SQL "execute as user" resolution and authorization. pub struct SqlImpersonationService { app_context: Arc, diff --git a/backend/crates/kalamdb-core/src/sql/mod.rs b/backend/crates/kalamdb-core/src/sql/mod.rs index e3de2a9d8..186a84aa4 100644 --- a/backend/crates/kalamdb-core/src/sql/mod.rs +++ b/backend/crates/kalamdb-core/src/sql/mod.rs @@ -36,12 +36,11 @@ pub mod plan_cache; pub mod table_functions; pub use context::{ExecutionContext, ScalarValue}; -pub use datafusion_session::DataFusionSessionFactory; // KalamSessionState removed in v3 refactor -pub use executor::handlers::ExecutionResult; -pub use executor::SqlExecutor; +pub use datafusion_session::DataFusionSessionFactory; /* KalamSessionState removed in v3 + * refactor */ +pub use executor::{handlers::ExecutionResult, SqlExecutor}; pub use functions::{CosineDistanceFunction, CurrentRoleFunction, CurrentUserFunction}; pub use impersonation::SqlImpersonationService; -pub use table_functions::VectorSearchTableFunction; - pub use kalamdb_session::SessionError as TableAccessError; pub use kalamdb_session_datafusion::PermissionChecker; +pub use table_functions::VectorSearchTableFunction; diff --git a/backend/crates/kalamdb-core/src/sql/table_functions/runtime.rs b/backend/crates/kalamdb-core/src/sql/table_functions/runtime.rs index 7661993dc..6f85c4d78 100644 --- a/backend/crates/kalamdb-core/src/sql/table_functions/runtime.rs +++ b/backend/crates/kalamdb-core/src/sql/table_functions/runtime.rs @@ -1,9 +1,13 @@ -use crate::app_context::AppContext; +use std::sync::Weak; + use datafusion::common::{DataFusionError, Result}; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; +use kalamdb_commons::{ + models::{TableId, UserId}, + schemas::TableType, +}; use kalamdb_vector::{VectorSearchRuntime, VectorSearchScope}; -use std::sync::Weak; + +use crate::app_context::AppContext; #[derive(Debug, Clone)] pub struct CoreVectorSearchRuntime { diff --git a/backend/crates/kalamdb-core/src/test_helpers.rs b/backend/crates/kalamdb-core/src/test_helpers.rs index 0daaa0890..3587b34b4 100644 --- a/backend/crates/kalamdb-core/src/test_helpers.rs +++ b/backend/crates/kalamdb-core/src/test_helpers.rs @@ -4,23 +4,23 @@ //! They have no dependency on `kalamdb-jobs`, so they are safe to use from other crates' //! test code via `kalamdb-core = { ..., features = ["test-helpers"] }` in dev-dependencies. -use crate::app_context::AppContext; -use datafusion::prelude::SessionContext; -use kalamdb_commons::models::{NodeId, StorageId}; -use kalamdb_store::test_utils::TestDb; -use kalamdb_store::StorageBackend; -use kalamdb_system::{StoragePartition, SystemTable}; +#[cfg(any(test, feature = "test-helpers"))] +use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::Arc; +#[cfg(any(test, feature = "test-helpers"))] +use std::sync::Once; +use datafusion::prelude::SessionContext; // ── Imports needed by init_test_app_context / test_app_context ───────────────── #[cfg(any(test, feature = "test-helpers"))] use kalamdb_commons::models::NamespaceId; +use kalamdb_commons::models::{NodeId, StorageId}; +use kalamdb_store::{test_utils::TestDb, StorageBackend}; +use kalamdb_system::{StoragePartition, SystemTable}; #[cfg(any(test, feature = "test-helpers"))] use once_cell::sync::OnceCell; -#[cfg(any(test, feature = "test-helpers"))] -use std::sync::atomic::{AtomicU16, Ordering}; -#[cfg(any(test, feature = "test-helpers"))] -use std::sync::Once; + +use crate::app_context::AppContext; #[cfg(any(test, feature = "test-helpers"))] static TEST_DB: OnceCell> = OnceCell::new(); diff --git a/backend/crates/kalamdb-core/src/transactions/commit_result.rs b/backend/crates/kalamdb-core/src/transactions/commit_result.rs index 482084a48..9a81d0439 100644 --- a/backend/crates/kalamdb-core/src/transactions/commit_result.rs +++ b/backend/crates/kalamdb-core/src/transactions/commit_result.rs @@ -1,8 +1,5 @@ use chrono::{DateTime, Utc}; - -use kalamdb_commons::models::TransactionId; -use kalamdb_commons::TableType; - +use kalamdb_commons::{models::TransactionId, TableType}; // Re-export fanout types from kalamdb-live (canonical source) pub use kalamdb_live::fanout::{ CommitSideEffectPlan, FanoutDispatchPlan, FanoutOwnerScope, TransactionSideEffects, diff --git a/backend/crates/kalamdb-core/src/transactions/coordinator.rs b/backend/crates/kalamdb-core/src/transactions/coordinator.rs index 52bbcde00..3dc36166f 100644 --- a/backend/crates/kalamdb-core/src/transactions/coordinator.rs +++ b/backend/crates/kalamdb-core/src/transactions/coordinator.rs @@ -1,27 +1,25 @@ -use std::collections::HashSet; -use std::sync::{Arc, Weak}; -use std::time::{Duration, Instant}; - -use dashmap::DashMap; -use tokio::runtime::Handle; -use tokio::time::MissedTickBehavior; -use uuid::Uuid; +use std::{ + collections::HashSet, + sync::{Arc, Weak}, + time::{Duration, Instant}, +}; -use kalamdb_commons::models::{ - NodeId, TableId, TransactionId, TransactionOrigin, TransactionState, UserId, +use dashmap::{mapref::entry::Entry, DashMap}; +use kalamdb_commons::{ + models::{NodeId, TableId, TransactionId, TransactionOrigin, TransactionState, UserId}, + TableType, }; -use kalamdb_commons::TableType; use kalamdb_raft::RaftExecutor; use kalamdb_sharding::{GroupId, ShardRouter}; - -use crate::app_context::AppContext; -use crate::error::KalamDbError; +use tokio::{runtime::Handle, time::MissedTickBehavior}; +use uuid::Uuid; use super::{ ActiveTransactionMetric, CommitSequenceTracker, ExecutionOwnerKey, StagedMutation, TransactionCommitResult, TransactionHandle, TransactionOverlay, TransactionRaftBinding, TransactionWriteSet, }; +use crate::{app_context::AppContext, error::KalamDbError}; /// In-memory coordinator for active explicit transactions. #[derive(Debug)] @@ -60,7 +58,8 @@ impl TransactionCoordinator { }, Err(error) => { log::warn!( - "transaction timeout sweeper was not started because no Tokio runtime is active: {}", + "transaction timeout sweeper was not started because no Tokio runtime is \ + active: {}", error ); }, @@ -73,13 +72,6 @@ impl TransactionCoordinator { owner_id: Arc, origin: TransactionOrigin, ) -> Result { - if self.active_by_owner.contains_key(&owner_key) { - return Err(KalamDbError::Conflict(format!( - "owner '{}' already has an active transaction", - owner_id - ))); - } - let transaction_id = TransactionId::new(Uuid::now_v7().to_string()); let snapshot_commit_seq = self.commit_sequence_tracker.current_committed(); let handle = TransactionHandle::new( @@ -92,9 +84,17 @@ impl TransactionCoordinator { Instant::now(), ); - self.active_by_owner.insert(owner_key, transaction_id.clone()); - self.active_by_id.insert(transaction_id.clone(), handle); - Ok(transaction_id) + match self.active_by_owner.entry(owner_key) { + Entry::Occupied(_) => Err(KalamDbError::Conflict(format!( + "owner '{}' already has an active transaction", + owner_id + ))), + Entry::Vacant(entry) => { + self.active_by_id.insert(transaction_id.clone(), handle); + entry.insert(transaction_id.clone()); + Ok(transaction_id) + }, + } } pub fn stage( @@ -322,7 +322,7 @@ impl TransactionCoordinator { }; let affected_rows = write_set.affected_rows(); - let mutations = write_set.ordered_mutations.clone(); + let mutations = write_set.ordered_mutations; let response = match self .app_context @@ -482,7 +482,8 @@ impl TransactionCoordinator { } => { if bound_group_id != group_id { Err(KalamDbError::InvalidOperation(format!( - "explicit transaction '{}' is already bound to data raft group '{}' and cannot access table '{}' in group '{}'", + "explicit transaction '{}' is already bound to data raft group '{}' and \ + cannot access table '{}' in group '{}'", transaction_id, bound_group_id, table_id, group_id ))) } else { @@ -750,7 +751,8 @@ impl TransactionCoordinator { .map(|node_id| node_id.to_string()) .unwrap_or_else(|| "unknown".to_string()); KalamDbError::InvalidOperation(format!( - "transaction '{}' was aborted because leader for bound raft group '{}' changed from node '{}' to '{}'; retry in a new transaction", + "transaction '{}' was aborted because leader for bound raft group '{}' changed from \ + node '{}' to '{}'; retry in a new transaction", transaction_id, group_id, prior_leader_node_id, current_leader )) } diff --git a/backend/crates/kalamdb-core/src/transactions/handle.rs b/backend/crates/kalamdb-core/src/transactions/handle.rs index 3130e886b..22e6eba03 100644 --- a/backend/crates/kalamdb-core/src/transactions/handle.rs +++ b/backend/crates/kalamdb-core/src/transactions/handle.rs @@ -1,11 +1,8 @@ -use std::collections::HashSet; -use std::sync::Arc; -use std::time::Instant; +use std::{collections::HashSet, sync::Arc, time::Instant}; use kalamdb_commons::models::{TableId, TransactionId, TransactionOrigin, TransactionState}; -use super::binding::TransactionRaftBinding; -use super::ExecutionOwnerKey; +use super::{binding::TransactionRaftBinding, ExecutionOwnerKey}; /// Hot transaction metadata kept separate from the staged write buffer. #[derive(Debug, Clone)] diff --git a/backend/crates/kalamdb-core/src/transactions/mod.rs b/backend/crates/kalamdb-core/src/transactions/mod.rs index d05220e56..65da81405 100644 --- a/backend/crates/kalamdb-core/src/transactions/mod.rs +++ b/backend/crates/kalamdb-core/src/transactions/mod.rs @@ -17,6 +17,7 @@ pub use commit_result::{ pub use commit_sequence::CommitSequenceTracker; pub use coordinator::TransactionCoordinator; pub use handle::TransactionHandle; +pub use kalamdb_transactions::{TransactionOverlay, TransactionOverlayEntry}; pub use metrics::ActiveTransactionMetric; pub use overlay_view::{ CoordinatorAccessValidator, CoordinatorMutationSink, CoordinatorOverlayView, @@ -24,5 +25,3 @@ pub use overlay_view::{ pub use owner::ExecutionOwnerKey; pub use staged_mutation::StagedMutation; pub use write_set::TransactionWriteSet; - -pub use kalamdb_transactions::{TransactionOverlay, TransactionOverlayEntry}; diff --git a/backend/crates/kalamdb-core/src/transactions/overlay_view.rs b/backend/crates/kalamdb-core/src/transactions/overlay_view.rs index f82eb0c2d..c2a033f35 100644 --- a/backend/crates/kalamdb-core/src/transactions/overlay_view.rs +++ b/backend/crates/kalamdb-core/src/transactions/overlay_view.rs @@ -1,8 +1,9 @@ use std::sync::Arc; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{OperationKind, TableId, TransactionId, UserId}; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + models::{rows::Row, OperationKind, TableId, TransactionId, UserId}, + TableType, +}; use kalamdb_transactions::{ TransactionAccessError, TransactionAccessValidator, TransactionMutationSink, TransactionOverlay, TransactionOverlayView, diff --git a/backend/crates/kalamdb-core/src/transactions/owner.rs b/backend/crates/kalamdb-core/src/transactions/owner.rs index bdda8f395..69c26784c 100644 --- a/backend/crates/kalamdb-core/src/transactions/owner.rs +++ b/backend/crates/kalamdb-core/src/transactions/owner.rs @@ -1,6 +1,7 @@ -use crate::error::KalamDbError; use uuid::Uuid; +use crate::error::KalamDbError; + #[inline] fn invalid_pg_session_id(session_id: &str, reason: &str) -> KalamDbError { KalamDbError::InvalidOperation(format!("invalid pg session id '{}': {}", session_id, reason)) diff --git a/backend/crates/kalamdb-core/src/transactions/write_set.rs b/backend/crates/kalamdb-core/src/transactions/write_set.rs index 7ee23c55d..6a67449d4 100644 --- a/backend/crates/kalamdb-core/src/transactions/write_set.rs +++ b/backend/crates/kalamdb-core/src/transactions/write_set.rs @@ -115,14 +115,15 @@ impl TransactionWriteSet { #[cfg(test)] mod tests { - use datafusion_common::ScalarValue; use std::collections::BTreeMap; + use datafusion_common::ScalarValue; + use kalamdb_commons::{ + models::{rows::Row, NamespaceId, OperationKind, TableId, TableName}, + TableType, + }; + use super::*; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{NamespaceId, TableId}; - use kalamdb_commons::models::{OperationKind, TableName}; - use kalamdb_commons::TableType; fn row(values: &[(&'static str, ScalarValue)]) -> Row { let mut fields = BTreeMap::new(); diff --git a/backend/crates/kalamdb-core/src/vector/hot_flush.rs b/backend/crates/kalamdb-core/src/vector/hot_flush.rs index 8b9bc94e9..03a27aeb6 100644 --- a/backend/crates/kalamdb-core/src/vector/hot_flush.rs +++ b/backend/crates/kalamdb-core/src/vector/hot_flush.rs @@ -1,6 +1,5 @@ -use crate::app_context::AppContext; -use crate::error::KalamDbError; -use crate::manifest::ManifestService; +use std::sync::Arc; + use datafusion::arrow::datatypes::SchemaRef; use kalamdb_commons::models::{TableId, UserId}; use kalamdb_filestore::StorageCached; @@ -10,7 +9,8 @@ use kalamdb_vector::{ flush_user_scope_vectors as flush_user_scope_vectors_impl, VectorFlushError, VectorManifestStore, }; -use std::sync::Arc; + +use crate::{app_context::AppContext, error::KalamDbError, manifest::ManifestService}; struct CoreVectorManifestStore<'a> { manifest_service: &'a ManifestService, diff --git a/backend/crates/kalamdb-core/src/views/mod.rs b/backend/crates/kalamdb-core/src/views/mod.rs index aaa76c5fd..f4c961f22 100644 --- a/backend/crates/kalamdb-core/src/views/mod.rs +++ b/backend/crates/kalamdb-core/src/views/mod.rs @@ -1,6 +1,5 @@ // System schema provider wiring stays in core (depends on SchemaRegistry) pub mod system_schema_provider; -pub use system_schema_provider::SystemSchemaProvider; - // Re-export only the remaining view module used from kalamdb-core. pub use kalamdb_views::describe; +pub use system_schema_provider::SystemSchemaProvider; diff --git a/backend/crates/kalamdb-core/src/views/system_schema_provider.rs b/backend/crates/kalamdb-core/src/views/system_schema_provider.rs index a36f78e05..a7b8835a9 100644 --- a/backend/crates/kalamdb-core/src/views/system_schema_provider.rs +++ b/backend/crates/kalamdb-core/src/views/system_schema_provider.rs @@ -5,34 +5,37 @@ //! Virtual views (stats, settings, etc.) are created lazily on first access and //! stored back into SchemaRegistry. +use std::{any::Any, path::PathBuf, sync::Arc}; + use async_trait::async_trait; -use datafusion::catalog::SchemaProvider; -use datafusion::datasource::{TableProvider, TableType}; -use datafusion::error::Result as DataFusionResult; +use datafusion::{ + catalog::SchemaProvider, + datasource::{TableProvider, TableType}, + error::Result as DataFusionResult, +}; use kalamdb_commons::SystemTable; use kalamdb_configs::ServerConfig; use kalamdb_raft::CommandExecutor; use kalamdb_session_datafusion::secure_provider; use kalamdb_system::SystemTablesRegistry; +use kalamdb_views::{ + cluster::create_cluster_provider, + cluster_groups::create_cluster_groups_provider, + columns_view::create_columns_view_provider, + datatypes::{DatatypesTableProvider, DatatypesView}, + describe::DescribeView, + live::{LiveTableProvider, LiveView}, + server_logs::create_server_logs_provider, + sessions::{SessionsTableProvider, SessionsView}, + settings::{SettingsTableProvider, SettingsView}, + stats::{StatsTableProvider, StatsView}, + tables_view::create_tables_view_provider, + transactions::{TransactionsTableProvider, TransactionsView}, + view_base::ViewTableProvider, +}; use parking_lot::RwLock; -use std::any::Any; -use std::path::PathBuf; -use std::sync::Arc; use crate::schema_registry::SchemaRegistry; -use kalamdb_views::cluster::create_cluster_provider; -use kalamdb_views::cluster_groups::create_cluster_groups_provider; -use kalamdb_views::columns_view::create_columns_view_provider; -use kalamdb_views::datatypes::{DatatypesTableProvider, DatatypesView}; -use kalamdb_views::describe::DescribeView; -use kalamdb_views::live::{LiveTableProvider, LiveView}; -use kalamdb_views::server_logs::create_server_logs_provider; -use kalamdb_views::sessions::{SessionsTableProvider, SessionsView}; -use kalamdb_views::settings::{SettingsTableProvider, SettingsView}; -use kalamdb_views::stats::{StatsTableProvider, StatsView}; -use kalamdb_views::tables_view::create_tables_view_provider; -use kalamdb_views::transactions::{TransactionsTableProvider, TransactionsView}; -use kalamdb_views::view_base::ViewTableProvider; /// Configuration for view initialization pub struct ViewConfig { @@ -290,9 +293,10 @@ impl SchemaProvider for SystemSchemaProvider { #[cfg(test)] mod tests { - use super::*; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + fn create_test_provider() -> SystemSchemaProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); let system_tables = Arc::new(SystemTablesRegistry::new(backend)); diff --git a/backend/crates/kalamdb-core/tests/autocommit_perf_regression.rs b/backend/crates/kalamdb-core/tests/autocommit_perf_regression.rs index 139c08aea..8944b588a 100644 --- a/backend/crates/kalamdb-core/tests/autocommit_perf_regression.rs +++ b/backend/crates/kalamdb-core/tests/autocommit_perf_regression.rs @@ -1,24 +1,30 @@ mod support; -use std::alloc::{GlobalAlloc, Layout, System}; -use std::future::Future; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::Arc; -use std::time::Instant; - -use kalamdb_commons::models::pg_operations::{InsertRequest, ScanRequest}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{NamespaceId, NodeId, StorageId, TableId, TableName}; -use kalamdb_commons::TableType; +use std::{ + alloc::{GlobalAlloc, Layout, System}, + future::Future, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, + }, + time::Instant, +}; + +use kalamdb_commons::{ + models::{ + pg_operations::{InsertRequest, ScanRequest}, + rows::Row, + NamespaceId, NodeId, StorageId, TableId, TableName, + }, + TableType, +}; use kalamdb_configs::ServerConfig; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::operations::service::OperationService; +use kalamdb_core::{app_context::AppContext, operations::service::OperationService}; use kalamdb_pg::OperationExecutor; -use kalamdb_store::test_utils::TestDb; -use kalamdb_store::StorageBackend; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::{Storage, StoragePartition, SystemTable}; - +use kalamdb_store::{test_utils::TestDb, StorageBackend}; +use kalamdb_system::{ + providers::storages::models::StorageType, Storage, StoragePartition, SystemTable, +}; use support::{create_cluster_app_context, create_shared_table, row, unique_namespace}; const VALID_IDLE_SESSION_ID: &str = "pg-7101-deadbeef"; @@ -373,7 +379,8 @@ async fn idle_autocommit_transaction_checks_add_no_extra_allocations() { .await; assert_eq!( scan_with_idle_session, scan_without_session, - "idle transaction lookup changed scan allocations: without_session={scan_without_session:?} with_session={scan_with_idle_session:?}" + "idle transaction lookup changed scan allocations: \ + without_session={scan_without_session:?} with_session={scan_with_idle_session:?}" ); let write_without_session = @@ -384,7 +391,8 @@ async fn idle_autocommit_transaction_checks_add_no_extra_allocations() { .await; assert_eq!( write_with_idle_session, write_without_session, - "idle transaction lookup changed write allocations: without_session={write_without_session:?} with_session={write_with_idle_session:?}" + "idle transaction lookup changed write allocations: \ + without_session={write_without_session:?} with_session={write_with_idle_session:?}" ); } @@ -503,11 +511,9 @@ async fn autocommit_read_write_latency_regression_stays_within_five_percent() { let read_candidate_ns = median_nanos(&read_candidate_samples); println!( - "autocommit perf regression medians: write baseline={}ns candidate={}ns, read baseline={}ns candidate={}ns", - write_baseline_ns, - write_candidate_ns, - read_baseline_ns, - read_candidate_ns + "autocommit perf regression medians: write baseline={}ns candidate={}ns, read \ + baseline={}ns candidate={}ns", + write_baseline_ns, write_candidate_ns, read_baseline_ns, read_candidate_ns ); assert!(app_ctx.transaction_coordinator().active_metrics().is_empty()); diff --git a/backend/crates/kalamdb-core/tests/snapshot_isolation.rs b/backend/crates/kalamdb-core/tests/snapshot_isolation.rs index c7a866aee..89c0564e1 100644 --- a/backend/crates/kalamdb-core/tests/snapshot_isolation.rs +++ b/backend/crates/kalamdb-core/tests/snapshot_isolation.rs @@ -1,18 +1,20 @@ mod support; -use std::collections::BTreeMap; -use std::sync::Arc; +use std::{collections::BTreeMap, sync::Arc}; use datafusion_common::ScalarValue; -use kalamdb_commons::conversions::arrow_json_conversion::record_batch_to_json_rows; -use kalamdb_commons::models::pg_operations::{InsertRequest, ScanRequest}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; -use kalamdb_commons::models::{NamespaceId, TableId, TableName}; -use kalamdb_commons::schemas::ColumnDefault; -use kalamdb_commons::{TableAccess, TableType}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::operations::service::OperationService; +use kalamdb_commons::{ + conversions::arrow_json_conversion::record_batch_to_json_rows, + models::{ + pg_operations::{InsertRequest, ScanRequest}, + rows::Row, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + NamespaceId, TableId, TableName, + }, + schemas::ColumnDefault, + TableAccess, TableType, +}; +use kalamdb_core::{app_context::AppContext, operations::service::OperationService}; use kalamdb_pg::OperationExecutor; use support::create_cluster_app_context; diff --git a/backend/crates/kalamdb-core/tests/sql_insert_transaction_semantics.rs b/backend/crates/kalamdb-core/tests/sql_insert_transaction_semantics.rs index 754d63356..f10ab17a0 100644 --- a/backend/crates/kalamdb-core/tests/sql_insert_transaction_semantics.rs +++ b/backend/crates/kalamdb-core/tests/sql_insert_transaction_semantics.rs @@ -1,12 +1,9 @@ mod support; use datafusion_common::ScalarValue; -use kalamdb_commons::models::UserId; -use kalamdb_commons::Role; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::context::ExecutionResult; +use kalamdb_commons::{models::UserId, Role}; +use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult}; use kalamdb_tables::UserTableProvider; - use support::{ create_cluster_app_context, create_executor, create_user_table, execute_ok, request_exec_ctx, unique_namespace, diff --git a/backend/crates/kalamdb-core/tests/support/mod.rs b/backend/crates/kalamdb-core/tests/support/mod.rs index 2ccb0bd70..6359958c9 100644 --- a/backend/crates/kalamdb-core/tests/support/mod.rs +++ b/backend/crates/kalamdb-core/tests/support/mod.rs @@ -1,27 +1,39 @@ #![allow(dead_code)] -use std::collections::BTreeMap; -use std::sync::atomic::{AtomicU16, Ordering}; -use std::sync::Arc; +use std::{ + collections::BTreeMap, + sync::{ + atomic::{AtomicU16, Ordering}, + Arc, + }, +}; use chrono::Utc; use datafusion_common::ScalarValue; -use kalamdb_commons::conversions::arrow_json_conversion::record_batch_to_json_rows; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; -use kalamdb_commons::models::{NamespaceId, StorageId, TableId, TableName, UserId}; -use kalamdb_commons::schemas::ColumnDefault; -use kalamdb_commons::{NodeId, Role, TableAccess, TableType}; +use kalamdb_commons::{ + conversions::arrow_json_conversion::record_batch_to_json_rows, + models::{ + datatypes::KalamDataType, + rows::Row, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + NamespaceId, StorageId, TableId, TableName, UserId, + }, + schemas::ColumnDefault, + NodeId, Role, TableAccess, TableType, +}; use kalamdb_configs::ServerConfig; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult}; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; -use kalamdb_core::sql::executor::request_transaction_state::RequestTransactionState; -use kalamdb_core::sql::executor::SqlExecutor; +use kalamdb_core::{ + app_context::AppContext, + sql::{ + context::{ExecutionContext, ExecutionResult}, + executor::{ + handler_registry::HandlerRegistry, request_transaction_state::RequestTransactionState, + SqlExecutor, + }, + }, +}; use kalamdb_store::test_utils::TestDb; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; use uuid::Uuid; static TEST_PORT_OFFSET: AtomicU16 = AtomicU16::new(0); diff --git a/backend/crates/kalamdb-core/tests/system_transactions_view.rs b/backend/crates/kalamdb-core/tests/system_transactions_view.rs index 9aeb45138..37827008f 100644 --- a/backend/crates/kalamdb-core/tests/system_transactions_view.rs +++ b/backend/crates/kalamdb-core/tests/system_transactions_view.rs @@ -1,28 +1,25 @@ mod support; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Duration; +use std::{collections::HashMap, sync::Arc, time::Duration}; use datafusion_common::ScalarValue; use kalamdb_auth::{create_and_sign_token, services::unified::init_auth_config}; -use kalamdb_commons::conversions::arrow_json_conversion::record_batch_to_json_rows; -use kalamdb_commons::models::pg_operations::InsertRequest; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{KalamCellValue, TransactionId, UserId}; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + conversions::arrow_json_conversion::record_batch_to_json_rows, + models::{pg_operations::InsertRequest, rows::Row, KalamCellValue, TransactionId, UserId}, + TableType, +}; use kalamdb_configs::ServerConfig; -use kalamdb_core::operations::service::OperationService; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::context::ExecutionResult; -use kalamdb_core::transactions::ExecutionOwnerKey; +use kalamdb_core::{ + app_context::AppContext, operations::service::OperationService, sql::context::ExecutionResult, + transactions::ExecutionOwnerKey, +}; use kalamdb_pg::{ BeginTransactionRequest, InsertRpcRequest, KalamPgService, OpenSessionRequest, OperationExecutor, PgService, RollbackTransactionRequest, ScanRpcRequest, }; use kalamdb_raft::RaftExecutor; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::{AuthType, Role, User}; +use kalamdb_system::{providers::storages::models::StorageMode, AuthType, Role, User}; use support::{ create_cluster_app_context, create_cluster_app_context_with_config, create_executor, create_shared_table, execute_ok, insert_sql, observer_exec_ctx, request_exec_ctx, @@ -83,7 +80,11 @@ fn shared_insert_request( } } -async fn open_session(app_ctx: &Arc, service: &KalamPgService, session_label: &str) -> String { +async fn open_session( + app_ctx: &Arc, + service: &KalamPgService, + session_label: &str, +) -> String { init_auth_config(&app_ctx.config().auth, &app_ctx.config().oauth); let bridge_user_id = UserId::new(format!("{}_bridge_dba", session_label.replace('-', "_"))); @@ -123,9 +124,10 @@ async fn open_session(app_ctx: &Arc, service: &KalamPgService, sessi session_id: String::new(), current_schema: None, }); - request - .metadata_mut() - .insert("authorization", format!("Bearer {}", token).parse().expect("valid auth metadata")); + request.metadata_mut().insert( + "authorization", + format!("Bearer {}", token).parse().expect("valid auth metadata"), + ); service .open_session(request) @@ -200,7 +202,8 @@ async fn system_transactions_shows_active_pg_and_sql_transactions_while_sessions execute_ok( &executor, &observer_ctx, - "SELECT transaction_id, owner_id, origin, state, write_count FROM system.transactions ORDER BY origin, transaction_id", + "SELECT transaction_id, owner_id, origin, state, write_count FROM system.transactions \ + ORDER BY origin, transaction_id", ) .await, ); @@ -229,7 +232,8 @@ async fn system_transactions_shows_active_pg_and_sql_transactions_while_sessions execute_ok( &executor, &observer_ctx, - "SELECT session_id, transaction_id, transaction_state FROM system.sessions ORDER BY session_id", + "SELECT session_id, transaction_id, transaction_state FROM system.sessions ORDER BY \ + session_id", ) .await, ); @@ -314,7 +318,8 @@ async fn pg_passive_timeout_hides_stale_transaction_fields_from_sessions_view() &executor, &observer_ctx, &format!( - "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions WHERE session_id = '{session_id}'" + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE session_id = '{session_id}'" ), ) .await, @@ -337,7 +342,8 @@ async fn pg_passive_timeout_hides_stale_transaction_fields_from_sessions_view() &executor, &observer_ctx, &format!( - "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions WHERE session_id = '{session_id}'" + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE session_id = '{session_id}'" ), ) .await, @@ -352,7 +358,8 @@ async fn pg_passive_timeout_hides_stale_transaction_fields_from_sessions_view() &executor, &observer_ctx, &format!( - "SELECT transaction_id FROM system.transactions WHERE transaction_id = '{transaction_id}'" + "SELECT transaction_id FROM system.transactions WHERE transaction_id = \ + '{transaction_id}'" ), ) .await, @@ -395,7 +402,8 @@ async fn pg_timeout_after_write_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions WHERE session_id = '{session_id}'" + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE session_id = '{session_id}'" ), ) .await, @@ -416,7 +424,8 @@ async fn pg_timeout_after_write_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT transaction_id, state, write_count FROM system.transactions WHERE transaction_id = '{transaction_id}'" + "SELECT transaction_id, state, write_count FROM system.transactions WHERE \ + transaction_id = '{transaction_id}'" ), ) .await, @@ -453,7 +462,8 @@ async fn pg_timeout_after_write_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions WHERE session_id = '{session_id}'" + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE session_id = '{session_id}'" ), ) .await, @@ -468,7 +478,8 @@ async fn pg_timeout_after_write_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT transaction_id FROM system.transactions WHERE transaction_id = '{transaction_id}'" + "SELECT transaction_id FROM system.transactions WHERE transaction_id = \ + '{transaction_id}'" ), ) .await, @@ -506,7 +517,8 @@ async fn pg_timeout_after_read_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT transaction_id, state, write_count FROM system.transactions WHERE transaction_id = '{transaction_id}'" + "SELECT transaction_id, state, write_count FROM system.transactions WHERE \ + transaction_id = '{transaction_id}'" ), ) .await, @@ -552,7 +564,8 @@ async fn pg_timeout_after_read_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions WHERE session_id = '{session_id}'" + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE session_id = '{session_id}'" ), ) .await, @@ -567,7 +580,8 @@ async fn pg_timeout_after_read_clears_sessions_and_transactions_views() { &executor, &observer_ctx, &format!( - "SELECT transaction_id FROM system.transactions WHERE transaction_id = '{transaction_id}'" + "SELECT transaction_id FROM system.transactions WHERE transaction_id = \ + '{transaction_id}'" ), ) .await, diff --git a/backend/crates/kalamdb-core/tests/test_all_sql_functions.rs b/backend/crates/kalamdb-core/tests/test_all_sql_functions.rs index b1e3f9d1c..f3d967817 100644 --- a/backend/crates/kalamdb-core/tests/test_all_sql_functions.rs +++ b/backend/crates/kalamdb-core/tests/test_all_sql_functions.rs @@ -5,12 +5,12 @@ //! - ID generation functions: SNOWFLAKE_ID(), UUID_V7(), ULID() //! - Function usage in SELECT, WHERE, INSERT, UPDATE, DELETE statements +use std::sync::Arc; + use datafusion::prelude::SessionContext; use kalamdb_commons::{Role, UserId}; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::datafusion_session::DataFusionSessionFactory; +use kalamdb_core::sql::{context::ExecutionContext, datafusion_session::DataFusionSessionFactory}; use kalamdb_session::AuthSession; -use std::sync::Arc; fn create_test_session() -> Arc { let factory = @@ -131,7 +131,10 @@ async fn test_all_context_functions_together() { let session = exec_ctx.create_session_with_user(); let result = session - .sql("SELECT KDB_CURRENT_USER() AS current_user, KDB_CURRENT_USER() AS user_id, KDB_CURRENT_ROLE() AS role") + .sql( + "SELECT KDB_CURRENT_USER() AS current_user, KDB_CURRENT_USER() AS user_id, \ + KDB_CURRENT_ROLE() AS role", + ) .await .unwrap(); let batches = result.collect().await.unwrap(); @@ -308,11 +311,8 @@ async fn test_context_and_id_functions_together() { let result = session .sql( - "SELECT KDB_CURRENT_USER() AS username, \ - KDB_CURRENT_ROLE() AS role, \ - SNOWFLAKE_ID() AS snowflake_id, \ - UUID_V7() AS uuid_v7, \ - ULID() AS ulid", + "SELECT KDB_CURRENT_USER() AS username, KDB_CURRENT_ROLE() AS role, SNOWFLAKE_ID() AS \ + snowflake_id, UUID_V7() AS uuid_v7, ULID() AS ulid", ) .await .unwrap(); @@ -370,9 +370,8 @@ async fn test_snowflake_id_generates_multiple_unique_ids() { // Query that generates multiple IDs let result = session .sql( - "SELECT SNOWFLAKE_ID() AS id1 UNION ALL \ - SELECT SNOWFLAKE_ID() UNION ALL \ - SELECT SNOWFLAKE_ID()", + "SELECT SNOWFLAKE_ID() AS id1 UNION ALL SELECT SNOWFLAKE_ID() UNION ALL SELECT \ + SNOWFLAKE_ID()", ) .await .unwrap(); @@ -394,11 +393,9 @@ async fn test_context_function_in_case_statement() { let result = session .sql( - "SELECT CASE \ - WHEN KDB_CURRENT_ROLE() = 'dba' THEN 'Administrator' \ - WHEN KDB_CURRENT_ROLE() = 'user' THEN 'Regular User' \ - ELSE 'Unknown' \ - END AS role_description", + "SELECT CASE WHEN KDB_CURRENT_ROLE() = 'dba' THEN 'Administrator' WHEN \ + KDB_CURRENT_ROLE() = 'user' THEN 'Regular User' ELSE 'Unknown' END AS \ + role_description", ) .await .unwrap(); @@ -416,10 +413,7 @@ async fn test_id_function_in_case_statement() { let result = session .sql( - "SELECT CASE \ - WHEN SNOWFLAKE_ID() > 0 THEN 'Valid ID' \ - ELSE 'Invalid ID' \ - END AS id_check", + "SELECT CASE WHEN SNOWFLAKE_ID() > 0 THEN 'Valid ID' ELSE 'Invalid ID' END AS id_check", ) .await .unwrap(); @@ -528,10 +522,8 @@ async fn test_example_all_context_functions() { // This example demonstrates all three context functions working together let result = session .sql( - "SELECT \ - KDB_CURRENT_USER() AS username, \ - KDB_CURRENT_USER() AS user_id, \ - KDB_CURRENT_ROLE() AS role", + "SELECT KDB_CURRENT_USER() AS username, KDB_CURRENT_USER() AS user_id, \ + KDB_CURRENT_ROLE() AS role", ) .await .unwrap(); @@ -548,12 +540,7 @@ async fn test_example_all_id_functions() { // This example demonstrates all three ID generation functions let result = session - .sql( - "SELECT \ - SNOWFLAKE_ID() AS snowflake_id, \ - UUID_V7() AS uuid_v7, \ - ULID() AS ulid", - ) + .sql("SELECT SNOWFLAKE_ID() AS snowflake_id, UUID_V7() AS uuid_v7, ULID() AS ulid") .await .unwrap(); let batches = result.collect().await.unwrap(); @@ -570,10 +557,8 @@ async fn test_example_mixed_functions() { // Mix context and ID functions let result = session .sql( - "SELECT \ - KDB_CURRENT_USER() AS current_user, \ - SNOWFLAKE_ID() AS new_record_id, \ - KDB_CURRENT_ROLE() AS admin_role", + "SELECT KDB_CURRENT_USER() AS current_user, SNOWFLAKE_ID() AS new_record_id, \ + KDB_CURRENT_ROLE() AS admin_role", ) .await .unwrap(); diff --git a/backend/crates/kalamdb-core/tests/test_context_functions.rs b/backend/crates/kalamdb-core/tests/test_context_functions.rs index 334a15521..14aebd799 100644 --- a/backend/crates/kalamdb-core/tests/test_context_functions.rs +++ b/backend/crates/kalamdb-core/tests/test_context_functions.rs @@ -1,11 +1,12 @@ -//! Integration tests for SQL context functions: KDB_CURRENT_USER(), KDB_CURRENT_USER(), KDB_CURRENT_ROLE() +//! Integration tests for SQL context functions: KDB_CURRENT_USER(), KDB_CURRENT_USER(), +//! KDB_CURRENT_ROLE() + +use std::sync::Arc; use datafusion::prelude::SessionContext; use kalamdb_commons::{Role, UserId}; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::datafusion_session::DataFusionSessionFactory; +use kalamdb_core::sql::{context::ExecutionContext, datafusion_session::DataFusionSessionFactory}; use kalamdb_session::AuthSession; -use std::sync::Arc; fn create_test_session() -> Arc { let factory = @@ -214,7 +215,10 @@ async fn test_all_three_functions_together() { let session = exec_ctx.create_session_with_user(); let result = session - .sql("SELECT KDB_CURRENT_USER() AS current_user, KDB_CURRENT_USER() AS user_id, KDB_CURRENT_ROLE() AS role") + .sql( + "SELECT KDB_CURRENT_USER() AS current_user, KDB_CURRENT_USER() AS user_id, \ + KDB_CURRENT_ROLE() AS role", + ) .await; assert!(result.is_ok(), "Query failed: {:?}", result.err()); @@ -267,7 +271,8 @@ async fn test_context_function_execution_uses_rewritten_aliases() { let result = session .sql( - "SELECT KDB_CURRENT_USER() AS current_user, KDB_CURRENT_USER() AS user_id, KDB_CURRENT_ROLE() AS role", + "SELECT KDB_CURRENT_USER() AS current_user, KDB_CURRENT_USER() AS user_id, \ + KDB_CURRENT_ROLE() AS role", ) .await; assert!(result.is_ok(), "Query failed: {:?}", result.err()); diff --git a/backend/crates/kalamdb-core/tests/test_cte_support.rs b/backend/crates/kalamdb-core/tests/test_cte_support.rs index 59479d6dd..469fe89e6 100644 --- a/backend/crates/kalamdb-core/tests/test_cte_support.rs +++ b/backend/crates/kalamdb-core/tests/test_cte_support.rs @@ -8,18 +8,20 @@ //! - CTEs with JOINs //! - CTEs with filtering +use std::sync::Arc; + use chrono::Utc; -use kalamdb_commons::models::StorageId; -use kalamdb_commons::{NodeId, Role, UserId}; +use kalamdb_commons::{models::StorageId, NodeId, Role, UserId}; use kalamdb_configs::ServerConfig; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult}; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; -use kalamdb_core::sql::executor::SqlExecutor; +use kalamdb_core::{ + app_context::AppContext, + sql::{ + context::{ExecutionContext, ExecutionResult}, + executor::{handler_registry::HandlerRegistry, SqlExecutor}, + }, +}; use kalamdb_store::test_utils::TestDb; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; -use std::sync::Arc; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; /// Helper to create a fully-wired SqlExecutor with all handlers registered. fn create_executor(app_context: Arc) -> SqlExecutor { @@ -101,7 +103,8 @@ async fn setup_test_table( // Create a test table executor .execute( - "CREATE USER TABLE test_ns.employees (id INT PRIMARY KEY, name TEXT, department TEXT, salary INT)", + "CREATE USER TABLE test_ns.employees (id INT PRIMARY KEY, name TEXT, department TEXT, \ + salary INT)", exec_ctx, vec![], ) @@ -110,11 +113,16 @@ async fn setup_test_table( // Insert test data let insert_queries = vec![ - "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (1, 'Alice', 'Engineering', 100000)", - "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (2, 'Bob', 'Engineering', 90000)", - "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (3, 'Charlie', 'Sales', 80000)", - "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (4, 'Diana', 'Sales', 85000)", - "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (5, 'Eve', 'Marketing', 75000)", + "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (1, 'Alice', \ + 'Engineering', 100000)", + "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (2, 'Bob', \ + 'Engineering', 90000)", + "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (3, 'Charlie', \ + 'Sales', 80000)", + "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (4, 'Diana', \ + 'Sales', 85000)", + "INSERT INTO test_ns.employees (id, name, department, salary) VALUES (5, 'Eve', \ + 'Marketing', 75000)", ]; for query in insert_queries { diff --git a/backend/crates/kalamdb-core/tests/test_flush_operations.rs b/backend/crates/kalamdb-core/tests/test_flush_operations.rs index 204c241a6..19607a725 100644 --- a/backend/crates/kalamdb-core/tests/test_flush_operations.rs +++ b/backend/crates/kalamdb-core/tests/test_flush_operations.rs @@ -2,12 +2,14 @@ //! //! These tests cover utility functions and error scenarios in flush operations. -use datafusion::arrow::datatypes::{DataType, Field, Schema}; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::manifest::flush::helpers; -use kalamdb_core::manifest::{FlushJobResult, FlushMetadata, TableFlush}; use std::sync::Arc; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use kalamdb_core::{ + error::KalamDbError, + manifest::{flush::helpers, FlushJobResult, FlushMetadata, TableFlush}, +}; + #[test] fn test_extract_pk_field_name_with_non_system_column() { let schema = Arc::new(Schema::new(vec![ diff --git a/backend/crates/kalamdb-core/tests/test_information_schema_columns.rs b/backend/crates/kalamdb-core/tests/test_information_schema_columns.rs index ceab54b85..7b5798097 100644 --- a/backend/crates/kalamdb-core/tests/test_information_schema_columns.rs +++ b/backend/crates/kalamdb-core/tests/test_information_schema_columns.rs @@ -3,11 +3,12 @@ //! This test verifies that the information_schema.columns table is properly //! registered and can be queried via SQL. +use std::sync::Arc; + use kalamdb_commons::NodeId; use kalamdb_configs::ServerConfig; use kalamdb_core::app_context::AppContext; use kalamdb_store::test_utils::TestDb; -use std::sync::Arc; /// Helper to create AppContext with temporary RocksDB for testing async fn create_test_app_context() -> (Arc, TestDb) { @@ -33,10 +34,8 @@ async fn test_information_schema_columns_query() { let session = app_ctx.base_session_context(); // Query information_schema.columns - let sql = "SELECT table_catalog, table_schema, table_name, column_name \ - FROM information_schema.columns \ - WHERE table_name = 'jobs' \ - ORDER BY ordinal_position \ + let sql = "SELECT table_catalog, table_schema, table_name, column_name FROM \ + information_schema.columns WHERE table_name = 'jobs' ORDER BY ordinal_position \ LIMIT 5"; let result = session.sql(sql).await; @@ -63,10 +62,8 @@ async fn test_information_schema_columns_shows_system_jobs() { let session = app_ctx.base_session_context(); // Query for system.jobs columns specifically - let sql = "SELECT column_name, data_type, is_nullable \ - FROM information_schema.columns \ - WHERE table_schema = 'system' AND table_name = 'jobs' \ - ORDER BY ordinal_position"; + let sql = "SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE \ + table_schema = 'system' AND table_name = 'jobs' ORDER BY ordinal_position"; let result = session.sql(sql).await; assert!(result.is_ok(), "Query failed: {:?}", result.err()); diff --git a/backend/crates/kalamdb-core/tests/test_manifest_cache.rs b/backend/crates/kalamdb-core/tests/test_manifest_cache.rs index 0a3d70c80..f9e53d09f 100644 --- a/backend/crates/kalamdb-core/tests/test_manifest_cache.rs +++ b/backend/crates/kalamdb-core/tests/test_manifest_cache.rs @@ -8,13 +8,13 @@ //! - T099: restore_from_rocksdb() after server restart //! - T100: SHOW MANIFEST returns all cached entries //! - T101: cache eviction and re-population +use std::sync::Arc; + use kalamdb_commons::{NamespaceId, TableId, TableName, UserId}; use kalamdb_configs::ManifestCacheSettings; use kalamdb_core::manifest::ManifestService; use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend}; -use kalamdb_system::providers::ManifestTableProvider; -use kalamdb_system::{Manifest, SyncState}; -use std::sync::Arc; +use kalamdb_system::{providers::ManifestTableProvider, Manifest, SyncState}; fn create_test_service_with_config(config: ManifestCacheSettings) -> ManifestService { let backend: Arc = Arc::new(InMemoryBackend::new()); @@ -127,8 +127,8 @@ fn test_update_after_flush_atomic_write() { // let config = ManifestCacheSettings::default(); // // Service 1: Add entries -// let service1 = ManifestService::new(Arc::clone(&backend), "/tmp/test".to_string(), config.clone()); -// let namespace1 = NamespaceId::new("ns1"); +// let service1 = ManifestService::new(Arc::clone(&backend), "/tmp/test".to_string(), +// config.clone()); let namespace1 = NamespaceId::new("ns1"); // let table1 = TableName::new("products"); // let table_id1 = TableId::new(namespace1.clone(), table1.clone()); // let manifest1 = create_test_manifest("ns1", "products", Some("u_123")); diff --git a/backend/crates/kalamdb-core/tests/test_manifest_operations.rs b/backend/crates/kalamdb-core/tests/test_manifest_operations.rs index 1dea324a3..a31570f21 100644 --- a/backend/crates/kalamdb-core/tests/test_manifest_operations.rs +++ b/backend/crates/kalamdb-core/tests/test_manifest_operations.rs @@ -3,12 +3,13 @@ //! These tests cover error scenarios, edge cases, and failure modes //! that could occur during manifest read/write operations. -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::StoredScalarValue; -use kalamdb_commons::{NamespaceId, TableId, TableName, UserId}; -use kalamdb_system::{Manifest, SegmentMetadata}; use std::collections::HashMap; +use kalamdb_commons::{ + ids::SeqId, models::rows::StoredScalarValue, NamespaceId, TableId, TableName, UserId, +}; +use kalamdb_system::{Manifest, SegmentMetadata}; + #[test] fn test_manifest_serialization_deserialization() { let table_id = TableId::new(NamespaceId::new("test_ns"), TableName::new("test_table")); diff --git a/backend/crates/kalamdb-core/tests/test_typed_handlers.rs b/backend/crates/kalamdb-core/tests/test_typed_handlers.rs index 36c0f2533..c1fbdf784 100644 --- a/backend/crates/kalamdb-core/tests/test_typed_handlers.rs +++ b/backend/crates/kalamdb-core/tests/test_typed_handlers.rs @@ -2,26 +2,31 @@ //! //! Shows how the executor classifies SQL, parses once, and dispatches to typed handlers. +use std::sync::Arc; + use chrono::Utc; -use kalamdb_commons::models::StorageId; -use kalamdb_commons::models::UserId; -use kalamdb_commons::NodeId; -use kalamdb_commons::Role; +use kalamdb_commons::{ + models::{StorageId, UserId}, + NodeId, Role, +}; use kalamdb_configs::ServerConfig; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_core::sql::context::ExecutionResult; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; -use kalamdb_core::sql::executor::SqlExecutor; -use kalamdb_jobs::executors::{ - BackupExecutor, CleanupExecutor, CompactExecutor, FlushExecutor, JobRegistry, RestoreExecutor, - RetentionExecutor, StreamEvictionExecutor, UserCleanupExecutor, VectorIndexExecutor, +use kalamdb_core::{ + app_context::AppContext, + sql::{ + context::{ExecutionContext, ExecutionResult}, + executor::{handler_registry::HandlerRegistry, SqlExecutor}, + }, +}; +use kalamdb_jobs::{ + executors::{ + BackupExecutor, CleanupExecutor, CompactExecutor, FlushExecutor, JobRegistry, + RestoreExecutor, RetentionExecutor, StreamEvictionExecutor, UserCleanupExecutor, + VectorIndexExecutor, + }, + JobsManager, }; -use kalamdb_jobs::JobsManager; use kalamdb_store::test_utils::TestDb; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; -use std::sync::Arc; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; fn create_executor(app_context: Arc) -> SqlExecutor { let registry = Arc::new(HandlerRegistry::new()); @@ -229,7 +234,10 @@ async fn test_storage_flush_table_returns_noop_when_flush_already_in_progress() .expect("second flush should be treated as no-op success"); match second_flush { ExecutionResult::Success { message } => { - assert!(message.contains("Storage flush skipped: a flush is already queued or running for table 'flush_busy.docs'")); + assert!(message.contains( + "Storage flush skipped: a flush is already queued or running for table \ + 'flush_busy.docs'" + )); }, other => panic!("Expected success result for second flush, got {:?}", other), } diff --git a/backend/crates/kalamdb-core/tests/test_vector_search_sql.rs b/backend/crates/kalamdb-core/tests/test_vector_search_sql.rs index 18db6e625..813cc01d7 100644 --- a/backend/crates/kalamdb-core/tests/test_vector_search_sql.rs +++ b/backend/crates/kalamdb-core/tests/test_vector_search_sql.rs @@ -1,31 +1,39 @@ +use std::sync::Arc; + use chrono::Utc; -use datafusion::arrow::array::{Array, Int32Array, Int64Array}; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{StorageId, TableId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::{NodeId, Role, UserId}; +use datafusion::arrow::{ + array::{Array, Int32Array, Int64Array}, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + ids::SeqId, + models::{StorageId, TableId}, + schemas::TableType, + NodeId, Role, UserId, +}; use kalamdb_configs::ServerConfig; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult}; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; -use kalamdb_core::sql::executor::SqlExecutor; -use kalamdb_core::vector::flush_shared_scope_vectors; -use kalamdb_jobs::executors::{ - BackupExecutor, CleanupExecutor, CompactExecutor, FlushExecutor, JobRegistry, RestoreExecutor, - RetentionExecutor, StreamEvictionExecutor, UserCleanupExecutor, VectorIndexExecutor, +use kalamdb_core::{ + app_context::AppContext, + sql::{ + context::{ExecutionContext, ExecutionResult}, + executor::{handler_registry::HandlerRegistry, SqlExecutor}, + }, + vector::flush_shared_scope_vectors, +}; +use kalamdb_jobs::{ + executors::{ + BackupExecutor, CleanupExecutor, CompactExecutor, FlushExecutor, JobRegistry, + RestoreExecutor, RetentionExecutor, StreamEvictionExecutor, UserCleanupExecutor, + VectorIndexExecutor, + }, + JobsManager, }; -use kalamdb_jobs::JobsManager; -use kalamdb_store::test_utils::TestDb; -use kalamdb_store::EntityStore; -use kalamdb_store::Partition; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::{Storage, VectorMetric}; +use kalamdb_store::{test_utils::TestDb, EntityStore, Partition}; +use kalamdb_system::{providers::storages::models::StorageType, Storage, VectorMetric}; use kalamdb_vector::{ new_indexed_shared_vector_hot_store, shared_vector_ops_partition_name, shared_vector_pk_index_partition_name, SharedVectorHotOpId, VectorHotOp, VectorHotOpType, }; -use std::sync::Arc; fn create_executor(app_context: Arc) -> SqlExecutor { let registry = Arc::new(HandlerRegistry::new()); @@ -186,14 +194,18 @@ async fn setup_vector_table(executor: &SqlExecutor, exec_ctx: &ExecutionContext) execute_sql( executor, exec_ctx, - "CREATE SHARED TABLE test_ns.documents (id INT PRIMARY KEY, title TEXT, embedding EMBEDDING(3), embedding_alt EMBEDDING(3))", + "CREATE SHARED TABLE test_ns.documents (id INT PRIMARY KEY, title TEXT, embedding \ + EMBEDDING(3), embedding_alt EMBEDDING(3))", ) .await; let inserts = [ - "INSERT INTO test_ns.documents (id, title, embedding, embedding_alt) VALUES (1, 'Doc A', '[1.0,0.0,0.0]', '[0.0,1.0,0.0]')", - "INSERT INTO test_ns.documents (id, title, embedding, embedding_alt) VALUES (2, 'Doc B', '[0.9,0.1,0.0]', '[0.2,0.8,0.0]')", - "INSERT INTO test_ns.documents (id, title, embedding, embedding_alt) VALUES (3, 'Doc C', '[0.0,1.0,0.0]', '[1.0,0.0,0.0]')", + "INSERT INTO test_ns.documents (id, title, embedding, embedding_alt) VALUES (1, 'Doc A', \ + '[1.0,0.0,0.0]', '[0.0,1.0,0.0]')", + "INSERT INTO test_ns.documents (id, title, embedding, embedding_alt) VALUES (2, 'Doc B', \ + '[0.9,0.1,0.0]', '[0.2,0.8,0.0]')", + "INSERT INTO test_ns.documents (id, title, embedding, embedding_alt) VALUES (3, 'Doc C', \ + '[0.0,1.0,0.0]', '[1.0,0.0,0.0]')", ]; for sql in inserts { @@ -213,7 +225,8 @@ async fn test_cosine_distance_order_by_syntax_on_table() { let result = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.documents ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 2", + "SELECT id FROM test_ns.documents ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') \ + LIMIT 2", ) .await; @@ -452,7 +465,8 @@ async fn test_multiple_vector_indexes_flush_and_selection() { let nearest_embedding = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.documents ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 1", + "SELECT id FROM test_ns.documents ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') \ + LIMIT 1", ) .await; match nearest_embedding { @@ -468,7 +482,8 @@ async fn test_multiple_vector_indexes_flush_and_selection() { let nearest_alt = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.documents ORDER BY COSINE_DISTANCE(embedding_alt, '[1.0,0.0,0.0]') LIMIT 1", + "SELECT id FROM test_ns.documents ORDER BY COSINE_DISTANCE(embedding_alt, \ + '[1.0,0.0,0.0]') LIMIT 1", ) .await; match nearest_alt { @@ -686,7 +701,8 @@ async fn test_cosine_distance_query_combines_hot_and_cold_rows_after_index_flush let result = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.docs_overlay ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 3", + "SELECT id FROM test_ns.docs_overlay ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') \ + LIMIT 3", ) .await; @@ -886,7 +902,8 @@ async fn test_cosine_distance_query_combines_hot_and_cold_rows() { let result = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.docs_tiered ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 2", + "SELECT id FROM test_ns.docs_tiered ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') \ + LIMIT 2", ) .await; @@ -913,7 +930,8 @@ async fn test_vector_delete_stages_hot_tombstone_and_flushes_cleanup() { execute_sql( &executor, &exec_ctx, - "CREATE SHARED TABLE test_ns.docs_delete_cleanup (id INT PRIMARY KEY, title TEXT, embedding EMBEDDING(3))", + "CREATE SHARED TABLE test_ns.docs_delete_cleanup (id INT PRIMARY KEY, title TEXT, \ + embedding EMBEDDING(3))", ) .await; execute_sql( @@ -926,19 +944,22 @@ async fn test_vector_delete_stages_hot_tombstone_and_flushes_cleanup() { execute_sql( &executor, &exec_ctx, - "INSERT INTO test_ns.docs_delete_cleanup (id, title, embedding) VALUES (1, 'base', '[1.0,0.0,0.0]')", + "INSERT INTO test_ns.docs_delete_cleanup (id, title, embedding) VALUES (1, 'base', \ + '[1.0,0.0,0.0]')", ) .await; execute_sql( &executor, &exec_ctx, - "INSERT INTO test_ns.docs_delete_cleanup (id, title, embedding) VALUES (2, 'near', '[0.95,0.05,0.0]')", + "INSERT INTO test_ns.docs_delete_cleanup (id, title, embedding) VALUES (2, 'near', \ + '[0.95,0.05,0.0]')", ) .await; execute_sql( &executor, &exec_ctx, - "INSERT INTO test_ns.docs_delete_cleanup (id, title, embedding) VALUES (3, 'far', '[0.0,1.0,0.0]')", + "INSERT INTO test_ns.docs_delete_cleanup (id, title, embedding) VALUES (3, 'far', \ + '[0.0,1.0,0.0]')", ) .await; @@ -992,7 +1013,8 @@ async fn test_vector_delete_stages_hot_tombstone_and_flushes_cleanup() { let hot_query = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.docs_delete_cleanup ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 3", + "SELECT id FROM test_ns.docs_delete_cleanup ORDER BY COSINE_DISTANCE(embedding, \ + '[1.0,0.0,0.0]') LIMIT 3", ) .await; match hot_query { @@ -1037,7 +1059,8 @@ async fn test_vector_delete_stages_hot_tombstone_and_flushes_cleanup() { let cold_query = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.docs_delete_cleanup ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 3", + "SELECT id FROM test_ns.docs_delete_cleanup ORDER BY COSINE_DISTANCE(embedding, \ + '[1.0,0.0,0.0]') LIMIT 3", ) .await; match cold_query { @@ -1063,7 +1086,8 @@ async fn test_multiple_vector_indexes_with_mixed_rows_and_null_embeddings() { execute_sql( &executor, &exec_ctx, - "CREATE SHARED TABLE test_ns.docs_mixed_vectors (id INT PRIMARY KEY, title TEXT, category TEXT, score DOUBLE, embedding EMBEDDING(3), embedding_alt EMBEDDING(3))", + "CREATE SHARED TABLE test_ns.docs_mixed_vectors (id INT PRIMARY KEY, title TEXT, category \ + TEXT, score DOUBLE, embedding EMBEDDING(3), embedding_alt EMBEDDING(3))", ) .await; execute_sql( @@ -1080,10 +1104,14 @@ async fn test_multiple_vector_indexes_with_mixed_rows_and_null_embeddings() { .await; let rows = [ - "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, embedding_alt) VALUES (1, 'alpha', 'a', 10.5, '[1.0,0.0,0.0]', '[0.0,1.0,0.0]')", - "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, embedding_alt) VALUES (2, 'beta', 'b', 9.0, NULL, '[0.02,0.98,0.0]')", - "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, embedding_alt) VALUES (3, 'gamma', 'c', 8.5, '[0.8,0.2,0.0]', NULL)", - "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, embedding_alt) VALUES (4, 'delta', 'd', 7.5, '[0.0,1.0,0.0]', '[1.0,0.0,0.0]')", + "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, \ + embedding_alt) VALUES (1, 'alpha', 'a', 10.5, '[1.0,0.0,0.0]', '[0.0,1.0,0.0]')", + "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, \ + embedding_alt) VALUES (2, 'beta', 'b', 9.0, NULL, '[0.02,0.98,0.0]')", + "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, \ + embedding_alt) VALUES (3, 'gamma', 'c', 8.5, '[0.8,0.2,0.0]', NULL)", + "INSERT INTO test_ns.docs_mixed_vectors (id, title, category, score, embedding, \ + embedding_alt) VALUES (4, 'delta', 'd', 7.5, '[0.0,1.0,0.0]', '[1.0,0.0,0.0]')", ]; for sql in rows { execute_sql(&executor, &exec_ctx, sql).await; @@ -1129,7 +1157,8 @@ async fn test_multiple_vector_indexes_with_mixed_rows_and_null_embeddings() { let result_primary = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.docs_mixed_vectors ORDER BY COSINE_DISTANCE(embedding, '[1.0,0.0,0.0]') LIMIT 3", + "SELECT id FROM test_ns.docs_mixed_vectors ORDER BY COSINE_DISTANCE(embedding, \ + '[1.0,0.0,0.0]') LIMIT 3", ) .await; match result_primary { @@ -1147,7 +1176,8 @@ async fn test_multiple_vector_indexes_with_mixed_rows_and_null_embeddings() { let result_alt = execute_sql( &executor, &exec_ctx, - "SELECT id FROM test_ns.docs_mixed_vectors ORDER BY COSINE_DISTANCE(embedding_alt, '[0.0,1.0,0.0]') LIMIT 3", + "SELECT id FROM test_ns.docs_mixed_vectors ORDER BY COSINE_DISTANCE(embedding_alt, \ + '[0.0,1.0,0.0]') LIMIT 3", ) .await; match result_alt { diff --git a/backend/crates/kalamdb-core/tests/transaction_buffer_limit.rs b/backend/crates/kalamdb-core/tests/transaction_buffer_limit.rs index 84759d6e0..f5af040b2 100644 --- a/backend/crates/kalamdb-core/tests/transaction_buffer_limit.rs +++ b/backend/crates/kalamdb-core/tests/transaction_buffer_limit.rs @@ -2,8 +2,7 @@ mod support; use std::sync::Arc; -use kalamdb_commons::models::pg_operations::InsertRequest; -use kalamdb_commons::TableType; +use kalamdb_commons::{models::pg_operations::InsertRequest, TableType}; use kalamdb_configs::ServerConfig; use kalamdb_core::operations::service::OperationService; use kalamdb_pg::OperationExecutor; diff --git a/backend/crates/kalamdb-core/tests/transaction_cluster_failover.rs b/backend/crates/kalamdb-core/tests/transaction_cluster_failover.rs index dd97458cf..cf777d147 100644 --- a/backend/crates/kalamdb-core/tests/transaction_cluster_failover.rs +++ b/backend/crates/kalamdb-core/tests/transaction_cluster_failover.rs @@ -1,10 +1,8 @@ mod support; -use kalamdb_commons::models::NodeId; -use kalamdb_commons::TransactionState; +use kalamdb_commons::{models::NodeId, TransactionState}; use kalamdb_core::transactions::TransactionRaftBinding; use ntest::timeout; - use support::{ create_cluster_app_context, create_executor, create_shared_table, execute_err, execute_ok, insert_sql, observer_exec_ctx, request_exec_ctx, request_transaction_state, select_names, diff --git a/backend/crates/kalamdb-core/tests/transaction_cluster_group_rejection.rs b/backend/crates/kalamdb-core/tests/transaction_cluster_group_rejection.rs index 1900f05fa..3ab8835ba 100644 --- a/backend/crates/kalamdb-core/tests/transaction_cluster_group_rejection.rs +++ b/backend/crates/kalamdb-core/tests/transaction_cluster_group_rejection.rs @@ -2,7 +2,6 @@ mod support; use kalamdb_core::transactions::TransactionRaftBinding; use ntest::timeout; - use support::{ create_cluster_app_context, create_executor, create_shared_table, create_user_table, execute_err, execute_ok, insert_sql, request_exec_ctx, request_transaction_state, select_names, diff --git a/backend/crates/kalamdb-core/tests/transaction_commit_live_fanout.rs b/backend/crates/kalamdb-core/tests/transaction_commit_live_fanout.rs index abe29125f..850733afb 100644 --- a/backend/crates/kalamdb-core/tests/transaction_commit_live_fanout.rs +++ b/backend/crates/kalamdb-core/tests/transaction_commit_live_fanout.rs @@ -1,21 +1,18 @@ mod support; -use std::sync::Arc; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{ - ConnectionId, LiveQueryId, OperationKind, TransactionOrigin, UserId, +use kalamdb_commons::{ + models::{rows::Row, ConnectionId, LiveQueryId, OperationKind, TransactionOrigin, UserId}, + websocket::ChangeType, + TableType, }; -use kalamdb_commons::websocket::ChangeType; -use kalamdb_commons::TableType; use kalamdb_core::transactions::{ExecutionOwnerKey, StagedMutation}; use kalamdb_live::models::{ NotificationSender, SubscriptionFlowControl, SubscriptionHandle, SubscriptionRuntimeMetadata, }; -use tokio::sync::mpsc; - use support::{create_cluster_app_context, create_shared_table, row, unique_namespace}; +use tokio::sync::mpsc; fn make_shared_handle( subscription_id: &str, diff --git a/backend/crates/kalamdb-core/tests/transaction_races.rs b/backend/crates/kalamdb-core/tests/transaction_races.rs index 7802494d4..d399b77be 100644 --- a/backend/crates/kalamdb-core/tests/transaction_races.rs +++ b/backend/crates/kalamdb-core/tests/transaction_races.rs @@ -1,13 +1,10 @@ mod support; -use std::sync::Arc; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; -use kalamdb_commons::models::pg_operations::InsertRequest; -use kalamdb_commons::TableType; +use kalamdb_commons::{models::pg_operations::InsertRequest, TableType}; use kalamdb_configs::ServerConfig; -use kalamdb_core::operations::service::OperationService; -use kalamdb_core::transactions::ExecutionOwnerKey; +use kalamdb_core::{operations::service::OperationService, transactions::ExecutionOwnerKey}; use kalamdb_pg::OperationExecutor; use support::{ create_cluster_app_context, create_cluster_app_context_with_config, create_executor, @@ -61,7 +58,8 @@ async fn repeated_commit_vs_rollback_clears_coordinator_state() { let rollback_ok = rollback_result.is_ok(); assert_ne!( commit_ok, rollback_ok, - "exactly one terminal path should win: commit={commit_result:?} rollback={rollback_result:?}" + "exactly one terminal path should win: commit={commit_result:?} \ + rollback={rollback_result:?}" ); assert!(app_ctx.transaction_coordinator().active_metrics().is_empty()); diff --git a/backend/crates/kalamdb-core/tests/transaction_stream_table_rejection.rs b/backend/crates/kalamdb-core/tests/transaction_stream_table_rejection.rs index 46f0a4d64..e7307c13c 100644 --- a/backend/crates/kalamdb-core/tests/transaction_stream_table_rejection.rs +++ b/backend/crates/kalamdb-core/tests/transaction_stream_table_rejection.rs @@ -2,9 +2,10 @@ mod support; use std::sync::Arc; -use kalamdb_commons::models::pg_operations::InsertRequest; -use kalamdb_commons::models::{TableId, TableName}; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + models::{pg_operations::InsertRequest, TableId, TableName}, + TableType, +}; use kalamdb_core::operations::service::OperationService; use kalamdb_pg::OperationExecutor; use support::{create_cluster_app_context, row, unique_namespace}; diff --git a/backend/crates/kalamdb-core/tests/transaction_timeout.rs b/backend/crates/kalamdb-core/tests/transaction_timeout.rs index 3b9dcc22c..d1172fc97 100644 --- a/backend/crates/kalamdb-core/tests/transaction_timeout.rs +++ b/backend/crates/kalamdb-core/tests/transaction_timeout.rs @@ -1,10 +1,8 @@ mod support; -use std::sync::Arc; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; -use kalamdb_commons::models::pg_operations::InsertRequest; -use kalamdb_commons::TableType; +use kalamdb_commons::{models::pg_operations::InsertRequest, TableType}; use kalamdb_configs::ServerConfig; use kalamdb_core::operations::service::OperationService; use kalamdb_pg::OperationExecutor; diff --git a/backend/crates/kalamdb-core/tests/transaction_user_batch_commit.rs b/backend/crates/kalamdb-core/tests/transaction_user_batch_commit.rs index 7face1c7f..e853d1356 100644 --- a/backend/crates/kalamdb-core/tests/transaction_user_batch_commit.rs +++ b/backend/crates/kalamdb-core/tests/transaction_user_batch_commit.rs @@ -3,16 +3,17 @@ mod support; use std::sync::Arc; use datafusion_common::ScalarValue; -use kalamdb_commons::models::pg_operations::InsertRequest; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{OperationKind, TransactionOrigin, UserId}; -use kalamdb_commons::TableType; -use kalamdb_core::operations::service::OperationService; -use kalamdb_core::transactions::{ExecutionOwnerKey, StagedMutation}; +use kalamdb_commons::{ + models::{pg_operations::InsertRequest, rows::Row, OperationKind, TransactionOrigin, UserId}, + TableType, +}; +use kalamdb_core::{ + operations::service::OperationService, + transactions::{ExecutionOwnerKey, StagedMutation}, +}; use kalamdb_pg::OperationExecutor; use kalamdb_sharding::ShardRouter; use kalamdb_tables::UserTableProvider; - use support::{create_cluster_app_context, create_user_table, row, unique_namespace}; fn insert_mutation( diff --git a/backend/crates/kalamdb-datafusion-sources/src/exec.rs b/backend/crates/kalamdb-datafusion-sources/src/exec.rs index bb58e8fe0..2f58e3166 100644 --- a/backend/crates/kalamdb-datafusion-sources/src/exec.rs +++ b/backend/crates/kalamdb-datafusion-sources/src/exec.rs @@ -5,29 +5,34 @@ //! single monolithic plan type across families with very different semantics //! (MVCC merge, one-shot views, vector TVFs, overlay). -use std::any::Any; -use std::cmp::Ordering; -use std::collections::{BTreeMap, HashMap}; -use std::fmt; -use std::sync::Arc; - -use async_trait::async_trait; -use arrow::array::{Array, BooleanArray, Int64Array, StringArray, UInt64Array}; -use arrow::compute; -use arrow::record_batch::RecordBatch; +use std::{ + any::Any, + cmp::Ordering, + collections::{BTreeMap, HashMap}, + fmt, + sync::Arc, +}; + +use arrow::{ + array::{Array, BooleanArray, Int64Array, StringArray, UInt64Array}, + compute, + record_batch::RecordBatch, +}; use arrow_schema::SchemaRef; -use crate::stats::single_partition_plan_properties; -use crate::stream::one_shot_batch_stream; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::execution::{SendableRecordBatchStream, TaskContext}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::conversions::arrow_json_conversion::arrow_value_to_scalar; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::serialization::row_codec::RowMetadata; +use async_trait::async_trait; +use datafusion::{ + error::{DataFusionError, Result as DataFusionResult}, + execution::{SendableRecordBatchStream, TaskContext}, + physical_expr::PhysicalExpr, + physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties}, + scalar::ScalarValue, +}; +use kalamdb_commons::{ + constants::SystemColumnNames, conversions::arrow_json_conversion::arrow_value_to_scalar, + ids::SeqId, models::rows::Row, serialization::row_codec::RowMetadata, +}; + +use crate::{stats::single_partition_plan_properties, stream::one_shot_batch_stream}; /// Apply provider-side filter, projection, and limit handling to a deferred /// source batch after the source has materialized its raw rows. @@ -103,13 +108,7 @@ pub fn prefers_version( where S: Ord, { - version_ordering( - candidate_commit_seq, - candidate_seq, - current_commit_seq, - current_seq, - ) - .is_gt() + version_ordering(candidate_commit_seq, candidate_seq, current_commit_seq, current_seq).is_gt() } /// Shared version candidate used by metadata-first MVCC merge helpers. @@ -198,17 +197,10 @@ where { let hot_iter = hot_candidates.into_iter(); let cold_iter = cold_candidates.into_iter(); - let estimated_capacity = hot_iter - .size_hint() - .0 - .saturating_add(cold_iter.size_hint().0) - .max(64); + let estimated_capacity = hot_iter.size_hint().0.saturating_add(cold_iter.size_hint().0).max(64); let mut best: HashMap> = HashMap::with_capacity(estimated_capacity); - for candidate in hot_iter - .map(Candidate::Hot) - .chain(cold_iter.map(Candidate::Cold)) - { + for candidate in hot_iter.map(Candidate::Hot).chain(cold_iter.map(Candidate::Cold)) { if !is_visible_at_snapshot(candidate.commit_seq(), snapshot_commit_seq) { continue; } @@ -315,12 +307,8 @@ where R: VersionedRow, { select_latest_versions( - hot_rows - .into_iter() - .map(|row| version_candidate_from_row(pk_name, &row, ())), - cold_rows - .into_iter() - .map(|row| version_candidate_from_row(pk_name, &row, ())), + hot_rows.into_iter().map(|row| version_candidate_from_row(pk_name, &row, ())), + cold_rows.into_iter().map(|row| version_candidate_from_row(pk_name, &row, ())), snapshot_commit_seq, false, ) @@ -335,12 +323,7 @@ pub fn count_resolved_from_metadata( ) -> DataFusionResult { let cold_metadata = parquet_batch_to_metadata(cold_batch, pk_name)?; - Ok(count_merged_rows( - pk_name, - hot_metadata, - cold_metadata, - snapshot_commit_seq, - )) + Ok(count_merged_rows(pk_name, hot_metadata, cold_metadata, snapshot_commit_seq)) } pub fn merge_versioned_rows( @@ -500,19 +483,17 @@ impl<'a> ParquetBatchDecoder<'a> { .fields() .iter() .position(|field| field.name() == SystemColumnNames::COMMIT_SEQ); - let pk_idx = pk_name.and_then(|name| { - schema.fields().iter().position(|field| field.name() == name) - }); - - let seq_array = batch - .column(seq_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| DataFusionError::Execution("_seq column is not Int64Array".to_string()))?; + let pk_idx = + pk_name.and_then(|name| schema.fields().iter().position(|field| field.name() == name)); + + let seq_array = + batch.column(seq_idx).as_any().downcast_ref::().ok_or_else(|| { + DataFusionError::Execution("_seq column is not Int64Array".to_string()) + })?; let deleted_array = deleted_idx.and_then(|idx| batch.column(idx).as_any().downcast_ref::()); - let commit_seq_array = commit_seq_idx - .and_then(|idx| batch.column(idx).as_any().downcast_ref::()); + let commit_seq_array = + commit_seq_idx.and_then(|idx| batch.column(idx).as_any().downcast_ref::()); let pk_string_array = pk_idx.and_then(|idx| batch.column(idx).as_any().downcast_ref::()); let value_column_indices = schema diff --git a/backend/crates/kalamdb-datafusion-sources/src/lib.rs b/backend/crates/kalamdb-datafusion-sources/src/lib.rs index 6bb0a7375..63c3a4ca8 100644 --- a/backend/crates/kalamdb-datafusion-sources/src/lib.rs +++ b/backend/crates/kalamdb-datafusion-sources/src/lib.rs @@ -9,14 +9,12 @@ //! //! ## Module boundaries //! -//! - [`provider`]: scan descriptors, capability matrix, and thin -//! `TableProvider`-adjacent traits. -//! - [`exec`]: shared [`ExecutionPlan`][datafusion::physical_plan::ExecutionPlan] -//! scaffolding built on the DataFusion 53.x surface. +//! - [`provider`]: scan descriptors, capability matrix, and thin `TableProvider`-adjacent traits. +//! - [`exec`]: shared [`ExecutionPlan`][datafusion::physical_plan::ExecutionPlan] scaffolding built +//! on the DataFusion 53.x surface. //! - [`stream`]: [`SendableRecordBatchStream`][datafusion::execution::SendableRecordBatchStream] //! adapters that preserve Arrow buffer sharing where possible. -//! - [`pruning`]: filter, projection, limit, and pruning descriptors reused by -//! every source family. +//! - [`pruning`]: filter, projection, limit, and pruning descriptors reused by every source family. //! - [`stats`]: partition statistics and `PlanProperties` builders. //! //! ## Library policy diff --git a/backend/crates/kalamdb-datafusion-sources/src/provider.rs b/backend/crates/kalamdb-datafusion-sources/src/provider.rs index b0b29d69f..8343c2738 100644 --- a/backend/crates/kalamdb-datafusion-sources/src/provider.rs +++ b/backend/crates/kalamdb-datafusion-sources/src/provider.rs @@ -7,9 +7,10 @@ use std::sync::Arc; use arrow_schema::SchemaRef; -use crate::pruning::{FilterRequest, LimitRequest, ProjectionRequest, PruningRequest}; use datafusion::logical_expr::{utils::expr_to_columns, Expr, TableProviderFilterPushDown}; +use crate::pruning::{FilterRequest, LimitRequest, ProjectionRequest, PruningRequest}; + /// Capability reporting for a single pushdown filter. /// /// Mirrors [`TableProviderFilterPushDown`] but is produced by @@ -99,7 +100,9 @@ impl ScanDescriptor { pub fn projection_request(&self) -> ProjectionRequest { match self.projection.as_deref() { - Some(indices) => ProjectionRequest { columns: Some(Arc::from(indices)) }, + Some(indices) => ProjectionRequest { + columns: Some(Arc::from(indices)), + }, None => ProjectionRequest::full(), } } @@ -115,11 +118,7 @@ impl ScanDescriptor { } pub fn pruning_request(&self) -> PruningRequest { - PruningRequest::new( - self.projection_request(), - self.filter_request(), - self.limit_request(), - ) + PruningRequest::new(self.projection_request(), self.filter_request(), self.limit_request()) } } @@ -165,8 +164,7 @@ pub fn merged_projection_for_filters( match projection { Some(indices) if filters.is_empty() => Some(indices.clone()), Some(indices) => { - let mut needed: std::collections::HashSet = - indices.iter().copied().collect(); + let mut needed: std::collections::HashSet = indices.iter().copied().collect(); let mut filter_columns = std::collections::HashSet::new(); for filter in filters { let _ = expr_to_columns(filter, &mut filter_columns); diff --git a/backend/crates/kalamdb-datafusion-sources/src/pruning.rs b/backend/crates/kalamdb-datafusion-sources/src/pruning.rs index 04456b0fb..607432860 100644 --- a/backend/crates/kalamdb-datafusion-sources/src/pruning.rs +++ b/backend/crates/kalamdb-datafusion-sources/src/pruning.rs @@ -156,7 +156,5 @@ fn references_column(filter: &Expr, column_name: &str) -> bool { return false; } - columns - .iter() - .any(|column| column.name.eq_ignore_ascii_case(column_name)) + columns.iter().any(|column| column.name.eq_ignore_ascii_case(column_name)) } diff --git a/backend/crates/kalamdb-datafusion-sources/src/stats.rs b/backend/crates/kalamdb-datafusion-sources/src/stats.rs index 48ccd75ed..b0259c0d1 100644 --- a/backend/crates/kalamdb-datafusion-sources/src/stats.rs +++ b/backend/crates/kalamdb-datafusion-sources/src/stats.rs @@ -4,10 +4,14 @@ //! `Statistics` and `PlanProperties` types so providers can emit trustworthy //! metadata without duplicating builder code. -use datafusion::common::stats::Precision; -use datafusion::physical_expr::EquivalenceProperties; -use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType}; -use datafusion::physical_plan::{Partitioning, PlanProperties, Statistics}; +use datafusion::{ + common::stats::Precision, + physical_expr::EquivalenceProperties, + physical_plan::{ + execution_plan::{Boundedness, EmissionType}, + Partitioning, PlanProperties, Statistics, + }, +}; /// Build conservative [`PlanProperties`] for a single-partition, bounded /// source. Sources with richer guarantees should call [`PlanProperties::new`] diff --git a/backend/crates/kalamdb-datafusion-sources/src/stream.rs b/backend/crates/kalamdb-datafusion-sources/src/stream.rs index 8f9319311..eaa768047 100644 --- a/backend/crates/kalamdb-datafusion-sources/src/stream.rs +++ b/backend/crates/kalamdb-datafusion-sources/src/stream.rs @@ -6,16 +6,19 @@ //! preserved wherever possible: the adapters forward `RecordBatch` slices and //! never copy the underlying buffers. -use std::future::Future; -use std::pin::Pin; -use std::task::{Context, Poll}; +use std::{ + future::Future, + pin::Pin, + task::{Context, Poll}, +}; use arrow::record_batch::RecordBatch; use arrow_schema::SchemaRef; -use datafusion::error::DataFusionError; -use datafusion::execution::RecordBatchStream; -use datafusion::physical_plan::SendableRecordBatchStream; -use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::{ + error::DataFusionError, + execution::RecordBatchStream, + physical_plan::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream}, +}; use futures_util::{stream, Stream}; /// Wrap a one-shot async batch producer as a [`SendableRecordBatchStream`]. @@ -76,7 +79,7 @@ impl Stream for LimitedRecordBatchStream { self.done = true; Poll::Ready(Some(Ok(batch.slice(0, take)))) } - } + }, other => other, } } diff --git a/backend/crates/kalamdb-datafusion-sources/tests/current_api_surface.rs b/backend/crates/kalamdb-datafusion-sources/tests/current_api_surface.rs index 41a46b256..305112b90 100644 --- a/backend/crates/kalamdb-datafusion-sources/tests/current_api_surface.rs +++ b/backend/crates/kalamdb-datafusion-sources/tests/current_api_surface.rs @@ -26,8 +26,7 @@ fn uses_current_plan_properties_surface() { #[allow(dead_code)] fn uses_current_record_batch_stream_surface() { - use datafusion::execution::RecordBatchStream; - use datafusion::physical_plan::SendableRecordBatchStream; + use datafusion::{execution::RecordBatchStream, physical_plan::SendableRecordBatchStream}; fn _assert_trait() {} fn _expect(_s: SendableRecordBatchStream) {} } @@ -37,10 +36,11 @@ fn uses_shared_exec_and_stream_helpers() { use std::sync::Arc; use arrow_schema::Schema; - use datafusion::error::{DataFusionError, Result as DataFusionResult}; - use datafusion::physical_plan::SendableRecordBatchStream; - use kalamdb_datafusion_sources::exec::projected_schema; - use kalamdb_datafusion_sources::stream::one_shot_batch_stream; + use datafusion::{ + error::{DataFusionError, Result as DataFusionResult}, + physical_plan::SendableRecordBatchStream, + }; + use kalamdb_datafusion_sources::{exec::projected_schema, stream::one_shot_batch_stream}; fn _project(schema: Arc) -> DataFusionResult> { projected_schema(&schema, None) diff --git a/backend/crates/kalamdb-datafusion-sources/tests/descriptor_contract.rs b/backend/crates/kalamdb-datafusion-sources/tests/descriptor_contract.rs index 55c99ac49..c078cf1e1 100644 --- a/backend/crates/kalamdb-datafusion-sources/tests/descriptor_contract.rs +++ b/backend/crates/kalamdb-datafusion-sources/tests/descriptor_contract.rs @@ -54,14 +54,8 @@ fn scan_descriptor_exposes_pruning_requests() { #[test] fn filter_capability_maps_to_datafusion() { use datafusion::logical_expr::TableProviderFilterPushDown as DfPushDown; - assert!(matches!( - DfPushDown::from(FilterCapability::Exact), - DfPushDown::Exact - )); - assert!(matches!( - DfPushDown::from(FilterCapability::Inexact), - DfPushDown::Inexact - )); + assert!(matches!(DfPushDown::from(FilterCapability::Exact), DfPushDown::Exact)); + assert!(matches!(DfPushDown::from(FilterCapability::Inexact), DfPushDown::Inexact)); assert!(matches!( DfPushDown::from(FilterCapability::Unsupported), DfPushDown::Unsupported @@ -73,10 +67,7 @@ struct StubSource { } impl SourceProvider for StubSource { - fn filter_capability( - &self, - filter: &datafusion::logical_expr::Expr, - ) -> FilterCapability { + fn filter_capability(&self, filter: &datafusion::logical_expr::Expr) -> FilterCapability { use datafusion::logical_expr::Expr; match filter { Expr::BinaryExpr(_) => FilterCapability::Exact, @@ -99,7 +90,9 @@ impl SourceProvider for StubSource { #[test] fn source_provider_trait_reports_and_builds_descriptor() { - let src = StubSource { schema: test_schema() }; + let src = StubSource { + schema: test_schema(), + }; let filter = col("id").eq(lit(1i64)); assert_eq!(src.filter_capability(&filter), FilterCapability::Exact); let desc = src.scan_descriptor(Some(&vec![0]), &[filter], Some(5)); diff --git a/backend/crates/kalamdb-dba/src/bootstrap.rs b/backend/crates/kalamdb-dba/src/bootstrap.rs index c7c302ac2..090b7ba35 100644 --- a/backend/crates/kalamdb-dba/src/bootstrap.rs +++ b/backend/crates/kalamdb-dba/src/bootstrap.rs @@ -1,9 +1,13 @@ -use crate::error::Result; -use crate::models::{bootstrap_table_definitions, DBA_NAMESPACE}; +use std::sync::Arc; + use kalamdb_commons::models::{NamespaceId, TableId}; use kalamdb_core::app_context::AppContext; use kalamdb_system::Namespace; -use std::sync::Arc; + +use crate::{ + error::Result, + models::{bootstrap_table_definitions, DBA_NAMESPACE}, +}; pub fn initialize_dba_namespace(app_context: Arc) -> Result<()> { ensure_namespace_exists(app_context.as_ref())?; diff --git a/backend/crates/kalamdb-dba/src/error.rs b/backend/crates/kalamdb-dba/src/error.rs index eb623215e..d3d0a0fa5 100644 --- a/backend/crates/kalamdb-dba/src/error.rs +++ b/backend/crates/kalamdb-dba/src/error.rs @@ -1,5 +1,4 @@ -use kalamdb_core::applier::ApplierError; -use kalamdb_core::error::KalamDbError; +use kalamdb_core::{applier::ApplierError, error::KalamDbError}; use kalamdb_system::SystemError; use kalamdb_tables::TableError; use thiserror::Error; diff --git a/backend/crates/kalamdb-dba/src/lib.rs b/backend/crates/kalamdb-dba/src/lib.rs index 8b357e709..e1d7b631c 100644 --- a/backend/crates/kalamdb-dba/src/lib.rs +++ b/backend/crates/kalamdb-dba/src/lib.rs @@ -10,4 +10,6 @@ pub use error::{DbaError, Result}; pub use repository::{ DbaRegistry, NotificationsRepository, SharedTableRepository, StatsRepository, }; -pub use stats_recorder::{record_stats_snapshot, start_stats_recorder}; +pub use stats_recorder::{ + record_stats_snapshot, start_startup_stats_snapshot, start_stats_recorder, +}; diff --git a/backend/crates/kalamdb-dba/src/mapping.rs b/backend/crates/kalamdb-dba/src/mapping.rs index 8a438db7e..0d092790f 100644 --- a/backend/crates/kalamdb-dba/src/mapping.rs +++ b/backend/crates/kalamdb-dba/src/mapping.rs @@ -1,9 +1,11 @@ +use kalamdb_commons::{ + conversions::{row_to_serde_model, serde_model_to_row}, + models::rows::Row, + schemas::TableDefinition, +}; +use serde::{de::DeserializeOwned, Serialize}; + use crate::error::{DbaError, Result}; -use kalamdb_commons::conversions::{row_to_serde_model, serde_model_to_row}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::schemas::TableDefinition; -use serde::de::DeserializeOwned; -use serde::Serialize; pub fn model_to_row(model: &T, table_def: &TableDefinition) -> Result { serde_model_to_row(model, table_def).map_err(DbaError::Serialization) diff --git a/backend/crates/kalamdb-dba/src/models/mod.rs b/backend/crates/kalamdb-dba/src/models/mod.rs index d5e0b8c2c..a227d6158 100644 --- a/backend/crates/kalamdb-dba/src/models/mod.rs +++ b/backend/crates/kalamdb-dba/src/models/mod.rs @@ -1,12 +1,12 @@ mod notification; mod stats; -use crate::error::Result; use kalamdb_commons::schemas::TableDefinition; - pub use notification::NotificationRow; pub use stats::StatsRow; +use crate::error::Result; + pub const DBA_NAMESPACE: &str = "dba"; pub fn bootstrap_table_definitions() -> Result> { diff --git a/backend/crates/kalamdb-dba/src/models/notification.rs b/backend/crates/kalamdb-dba/src/models/notification.rs index feb6a90ef..2a2ba0a8e 100644 --- a/backend/crates/kalamdb-dba/src/models/notification.rs +++ b/backend/crates/kalamdb-dba/src/models/notification.rs @@ -1,9 +1,12 @@ -use crate::models::DBA_NAMESPACE; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::{NamespaceId, TableId, TableName, UserId}; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::{NamespaceId, TableId, TableName, UserId}, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; +use crate::models::DBA_NAMESPACE; + #[table( name = "notifications", namespace = "dba", diff --git a/backend/crates/kalamdb-dba/src/models/stats.rs b/backend/crates/kalamdb-dba/src/models/stats.rs index f7001771d..3049b2cfc 100644 --- a/backend/crates/kalamdb-dba/src/models/stats.rs +++ b/backend/crates/kalamdb-dba/src/models/stats.rs @@ -1,11 +1,14 @@ -use crate::models::DBA_NAMESPACE; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::{NamespaceId, TableId, TableName}; -use kalamdb_commons::schemas::{TableDefinition, TableOptions}; -use kalamdb_commons::TableAccess; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::{NamespaceId, TableId, TableName}, + schemas::{TableDefinition, TableOptions}, + TableAccess, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; +use crate::models::DBA_NAMESPACE; + #[table( name = "stats", namespace = "dba", diff --git a/backend/crates/kalamdb-dba/src/repository/mod.rs b/backend/crates/kalamdb-dba/src/repository/mod.rs index 5fba4ef0c..082944321 100644 --- a/backend/crates/kalamdb-dba/src/repository/mod.rs +++ b/backend/crates/kalamdb-dba/src/repository/mod.rs @@ -2,13 +2,12 @@ mod notifications_repository; mod shared_repository; mod stats_repository; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableDefinition; -use kalamdb_core::app_context::AppContext; -use serde::Serialize; use std::sync::Arc; +use kalamdb_commons::{models::TableId, schemas::TableDefinition}; +use kalamdb_core::app_context::AppContext; pub use notifications_repository::NotificationsRepository; +use serde::Serialize; pub use shared_repository::SharedTableRepository; pub use stats_repository::StatsRepository; diff --git a/backend/crates/kalamdb-dba/src/repository/notifications_repository.rs b/backend/crates/kalamdb-dba/src/repository/notifications_repository.rs index c7ba9f0c8..11d0d4660 100644 --- a/backend/crates/kalamdb-dba/src/repository/notifications_repository.rs +++ b/backend/crates/kalamdb-dba/src/repository/notifications_repository.rs @@ -1,5 +1,7 @@ -use crate::models::NotificationRow; -use crate::repository::{RepositoryModel, SharedTableRepository}; +use crate::{ + models::NotificationRow, + repository::{RepositoryModel, SharedTableRepository}, +}; pub type NotificationsRepository = SharedTableRepository; diff --git a/backend/crates/kalamdb-dba/src/repository/shared_repository.rs b/backend/crates/kalamdb-dba/src/repository/shared_repository.rs index 638ef89fc..a1e221f3d 100644 --- a/backend/crates/kalamdb-dba/src/repository/shared_repository.rs +++ b/backend/crates/kalamdb-dba/src/repository/shared_repository.rs @@ -1,11 +1,15 @@ -use crate::error::{DbaError, Result}; -use crate::mapping::model_to_row; -use crate::repository::{current_definition, RepositoryModel}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::providers::base::find_row_by_pk; -use kalamdb_core::providers::SharedTableProvider; -use std::marker::PhantomData; -use std::sync::Arc; +use std::{marker::PhantomData, sync::Arc}; + +use kalamdb_core::{ + app_context::AppContext, + providers::{base::find_row_by_pk, SharedTableProvider}, +}; + +use crate::{ + error::{DbaError, Result}, + mapping::model_to_row, + repository::{current_definition, RepositoryModel}, +}; #[derive(Clone)] pub struct SharedTableRepository { diff --git a/backend/crates/kalamdb-dba/src/repository/stats_repository.rs b/backend/crates/kalamdb-dba/src/repository/stats_repository.rs index 158dc9787..112273e68 100644 --- a/backend/crates/kalamdb-dba/src/repository/stats_repository.rs +++ b/backend/crates/kalamdb-dba/src/repository/stats_repository.rs @@ -1,5 +1,7 @@ -use crate::models::StatsRow; -use crate::repository::{RepositoryModel, SharedTableRepository}; +use crate::{ + models::StatsRow, + repository::{RepositoryModel, SharedTableRepository}, +}; pub type StatsRepository = SharedTableRepository; diff --git a/backend/crates/kalamdb-dba/src/stats_recorder.rs b/backend/crates/kalamdb-dba/src/stats_recorder.rs index 30bea9ece..a47098f85 100644 --- a/backend/crates/kalamdb-dba/src/stats_recorder.rs +++ b/backend/crates/kalamdb-dba/src/stats_recorder.rs @@ -1,29 +1,35 @@ -use crate::error::{DbaError, Result}; -use crate::mapping::model_to_row; -use crate::models::StatsRow; +use std::{sync::Arc, time::Duration}; + use chrono::Utc; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::providers::SharedTableProvider; -use kalamdb_tables::utils::row_utils::system_user_id; -use kalamdb_tables::BaseTableProvider; -use std::sync::Arc; -use std::time::Duration; -use tokio::task::JoinHandle; -use tokio::time::{interval_at, Instant, MissedTickBehavior}; +use kalamdb_core::{app_context::AppContext, providers::SharedTableProvider}; +use kalamdb_tables::{utils::row_utils::system_user_id, BaseTableProvider}; +use tokio::{ + task::JoinHandle, + time::{interval_at, Instant, MissedTickBehavior}, +}; + +use crate::{ + error::{DbaError, Result}, + mapping::model_to_row, + models::StatsRow, +}; const DEFAULT_STATS_RECORD_INTERVAL: Duration = Duration::from_secs(30); const DBA_STATS_MAINTENANCE_INTERVAL: Duration = Duration::from_secs(12 * 60 * 60); const DELETE_BATCH_SIZE: usize = 1_000; -pub async fn start_stats_recorder(app_context: Arc) -> Result> { - let recorded = record_stats_snapshot(app_context.clone()).await?; - log::debug!("Recorded {} startup system.stats samples into dba.stats", recorded); +const STARTUP_STATS_INITIAL_DELAY: Duration = Duration::from_secs(1); - if let Err(error) = prune_expired_stats(app_context.clone()).await { - log::warn!("Failed to prune expired dba.stats samples during startup: {}", error); - } +pub fn start_startup_stats_snapshot(app_context: Arc) -> JoinHandle<()> { + tokio::spawn(async move { + record_startup_snapshot(app_context).await; + }) +} +pub fn start_stats_recorder(app_context: Arc) -> Result> { Ok(tokio::spawn(async move { + record_startup_snapshot(app_context.clone()).await; + let mut ticker = interval_at( Instant::now() + DEFAULT_STATS_RECORD_INTERVAL, DEFAULT_STATS_RECORD_INTERVAL, @@ -55,6 +61,23 @@ pub async fn start_stats_recorder(app_context: Arc) -> Result) { + tokio::time::sleep(STARTUP_STATS_INITIAL_DELAY).await; + + match record_stats_snapshot(app_context.clone()).await { + Ok(recorded) => { + log::debug!("Recorded {} startup system.stats samples into dba.stats", recorded); + }, + Err(error) => { + log::warn!("Failed to record startup dba.stats snapshot: {}", error); + }, + } + + if let Err(error) = prune_expired_stats(app_context).await { + log::warn!("Failed to prune expired dba.stats samples during startup: {}", error); + } +} + pub async fn record_stats_snapshot(app_context: Arc) -> Result { let sampled_at = Utc::now().timestamp_millis(); let node_id = app_context.node_id().to_string(); diff --git a/backend/crates/kalamdb-dialect/src/batch_execution.rs b/backend/crates/kalamdb-dialect/src/batch_execution.rs index 0215cd42a..cafb2ab2a 100644 --- a/backend/crates/kalamdb-dialect/src/batch_execution.rs +++ b/backend/crates/kalamdb-dialect/src/batch_execution.rs @@ -4,13 +4,16 @@ //! Handles quoted strings, comments, and whitespace to avoid breaking on //! semicolons that appear inside literals or comment blocks. -use crate::dialect::KalamDbDialect; -use crate::execute_as::parse_execute_as; -use crate::parser::utils::parse_single_statement; -use crate::parser::utils::parse_sql_statements; -use sqlparser::ast::Spanned; -use sqlparser::ast::Statement; -use sqlparser::tokenizer::{Location, Span}; +use sqlparser::{ + ast::{Spanned, Statement}, + tokenizer::{Location, Span}, +}; + +use crate::{ + dialect::KalamDbDialect, + execute_as::parse_execute_as, + parser::utils::{parse_single_statement, parse_sql_statements}, +}; /// Error produced when parsing a batch SQL string fails. #[derive(Debug, Clone, PartialEq, Eq)] @@ -524,12 +527,14 @@ mod tests { #[test] fn parse_batch_statements_preserves_create_table_prefix_when_spans_are_partial() { - let sql = "CREATE TABLE demo.notes (id BIGINT PRIMARY KEY, content TEXT) WITH (TYPE='USER', STORAGE_ID='local'); SELECT 1;"; + let sql = "CREATE TABLE demo.notes (id BIGINT PRIMARY KEY, content TEXT) WITH \ + (TYPE='USER', STORAGE_ID='local'); SELECT 1;"; let statements = parse_batch_statements(sql).unwrap(); assert_eq!(statements.len(), 2); assert_eq!( statements[0], - "CREATE TABLE demo.notes (id BIGINT PRIMARY KEY, content TEXT) WITH (TYPE='USER', STORAGE_ID='local')" + "CREATE TABLE demo.notes (id BIGINT PRIMARY KEY, content TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local')" ); assert_eq!(statements[1], "SELECT 1"); } diff --git a/backend/crates/kalamdb-dialect/src/classifier/engine/core.rs b/backend/crates/kalamdb-dialect/src/classifier/engine/core.rs index 77decc163..b5c36c5dd 100644 --- a/backend/crates/kalamdb-dialect/src/classifier/engine/core.rs +++ b/backend/crates/kalamdb-dialect/src/classifier/engine/core.rs @@ -1,10 +1,12 @@ -use crate::classifier::types::{SqlStatement, SqlStatementKind, StatementClassificationError}; -use crate::ddl::*; -use crate::parser::utils::{collect_non_whitespace_tokens, parse_sql_statements, tokens_to_words}; -use kalamdb_commons::models::NamespaceId; -use kalamdb_commons::Role; +use kalamdb_commons::{models::NamespaceId, Role}; use kalamdb_session::is_admin_role; +use crate::{ + classifier::types::{SqlStatement, SqlStatementKind, StatementClassificationError}, + ddl::*, + parser::utils::{collect_non_whitespace_tokens, parse_sql_statements, tokens_to_words}, +}; + impl SqlStatement { /// Wrap a parsed statement into SqlStatement with sql_text fn wrap(sql: &str, parser: F) -> Result @@ -321,7 +323,7 @@ impl SqlStatement { }) }, - // Cluster operations - require admin (except CLUSTER LIST which is read-only) + // Cluster operations - require admin ["CLUSTER", "SNAPSHOT", ..] => { if !is_admin { return Err(StatementClassificationError::Unauthorized( @@ -410,6 +412,78 @@ impl SqlStatement { })?; Ok(Self::new(sql.to_string(), SqlStatementKind::ClusterTransferLeader(node_id))) }, + ["CLUSTER", "JOIN", node_id, ..] => { + if !is_admin { + return Err(StatementClassificationError::Unauthorized( + "Admin privileges (DBA or System role) required for cluster operations" + .to_string(), + )); + } + + let node_id = node_id.parse::().map_err(|_| { + StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER JOIN requires a numeric node id".to_string(), + } + })?; + let original_parts: Vec<&str> = sql.split_whitespace().collect(); + let upper_parts: Vec = + original_parts.iter().map(|part| part.to_ascii_uppercase()).collect(); + let (rpc_addr, api_addr) = if upper_parts.get(3).map(String::as_str) == Some("RPC") + { + let rpc_addr = original_parts.get(4).ok_or_else(|| { + StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER JOIN requires RPC address".to_string(), + } + })?; + if upper_parts.get(5).map(String::as_str) != Some("API") { + return Err(StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER JOIN requires API address".to_string(), + }); + } + let api_addr = original_parts.get(6).ok_or_else(|| { + StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER JOIN requires API address".to_string(), + } + })?; + ((*rpc_addr).to_string(), (*api_addr).to_string()) + } else { + let rpc_addr = original_parts.get(3).ok_or_else(|| { + StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER JOIN requires RPC address".to_string(), + } + })?; + let api_addr = original_parts.get(4).ok_or_else(|| { + StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER JOIN requires API address".to_string(), + } + })?; + ((*rpc_addr).to_string(), (*api_addr).to_string()) + }; + + Ok(Self::new( + sql.to_string(), + SqlStatementKind::ClusterJoin { + node_id, + rpc_addr, + api_addr, + }, + )) + }, + ["CLUSTER", "REBALANCE", ..] => { + if !is_admin { + return Err(StatementClassificationError::Unauthorized( + "Admin privileges (DBA or System role) required for cluster operations" + .to_string(), + )); + } + Ok(Self::new(sql.to_string(), SqlStatementKind::ClusterRebalance)) + }, ["CLUSTER", "STEPDOWN", ..] | ["CLUSTER", "STEP-DOWN", ..] => { if !is_admin { return Err(StatementClassificationError::Unauthorized( @@ -428,20 +502,16 @@ impl SqlStatement { } Ok(Self::new(sql.to_string(), SqlStatementKind::ClusterClear)) }, - ["CLUSTER", "LIST", ..] => { - // Read-only, allowed for all users - Ok(Self::new(sql.to_string(), SqlStatementKind::ClusterList)) - }, - ["CLUSTER", "STATUS", ..] | ["CLUSTER", "LS", ..] => { - // Read-only, allowed for all users - Ok(Self::new(sql.to_string(), SqlStatementKind::ClusterList)) - }, - ["CLUSTER", "JOIN", ..] | ["CLUSTER", "LEAVE", ..] => { - Err(StatementClassificationError::InvalidSql { - sql: sql.to_string(), - message: "CLUSTER JOIN/LEAVE commands were removed".to_string(), - }) - }, + ["CLUSTER", "LIST", ..] + | ["CLUSTER", "STATUS", ..] + | ["CLUSTER", "LS", ..] => Err(StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER LIST is a CLI-only command. Use \\cluster list in kalam, or query system.cluster and system.cluster_groups directly.".to_string(), + }), + ["CLUSTER", "LEAVE", ..] => Err(StatementClassificationError::InvalidSql { + sql: sql.to_string(), + message: "CLUSTER LEAVE is not supported yet".to_string(), + }), // Transaction control (no parsing needed - just markers) ["BEGIN", ..] | ["START", "TRANSACTION", ..] => { @@ -696,14 +766,15 @@ impl SqlStatement { | SqlStatementKind::ClusterPurge(_) | SqlStatementKind::ClusterTriggerElection | SqlStatementKind::ClusterTransferLeader(_) + | SqlStatementKind::ClusterJoin { .. } + | SqlStatementKind::ClusterRebalance | SqlStatementKind::ClusterStepdown - | SqlStatementKind::ClusterClear - | SqlStatementKind::ClusterList => Err( - "Admin privileges (DBA or System role) required for storage and cluster operations" - .to_string(), - ), + | SqlStatementKind::ClusterClear => Err("Admin privileges (DBA or System role) \ + required for storage and cluster operations" + .to_string()), - // User management requires admin privileges (except for self-modification in ALTER USER) + // User management requires admin privileges (except for self-modification in ALTER + // USER) SqlStatementKind::CreateUser(_) | SqlStatementKind::DropUser(_) => { Err("Admin privileges (DBA or System role) required for user management" .to_string()) @@ -716,10 +787,9 @@ impl SqlStatement { // Namespace DDL requires admin privileges SqlStatementKind::CreateNamespace(_) | SqlStatementKind::AlterNamespace(_) - | SqlStatementKind::DropNamespace(_) => { - Err("Admin privileges (DBA or System role) required for namespace operations" - .to_string()) - }, + | SqlStatementKind::DropNamespace(_) => Err("Admin privileges (DBA or System role) \ + required for namespace operations" + .to_string()), // Read-only operations on system tables are allowed for all authenticated users SqlStatementKind::ShowNamespaces(_) @@ -731,7 +801,8 @@ impl SqlStatement { | SqlStatementKind::DescribeTable(_) | SqlStatementKind::UseNamespace(_) => Ok(()), - // CREATE TABLE/VIEW, DROP TABLE, STORAGE FLUSH/COMPACT, ALTER TABLE - defer to ownership checks + // CREATE TABLE/VIEW, DROP TABLE, STORAGE FLUSH/COMPACT, ALTER TABLE - defer to + // ownership checks SqlStatementKind::CreateTable(_) | SqlStatementKind::CreateView(_) | SqlStatementKind::AlterTable(_) @@ -772,10 +843,9 @@ impl SqlStatement { SqlStatementKind::CreateTopic(_) | SqlStatementKind::DropTopic(_) | SqlStatementKind::ClearTopic(_) - | SqlStatementKind::AddTopicSource(_) => { - Err("Admin privileges (DBA or System role) required for topic management" - .to_string()) - }, + | SqlStatementKind::AddTopicSource(_) => Err("Admin privileges (DBA or System role) \ + required for topic management" + .to_string()), // Backup/Restore requires admin SqlStatementKind::BackupDatabase(_) | SqlStatementKind::RestoreDatabase(_) => { diff --git a/backend/crates/kalamdb-dialect/src/classifier/types.rs b/backend/crates/kalamdb-dialect/src/classifier/types.rs index dc5731553..d94deee78 100644 --- a/backend/crates/kalamdb-dialect/src/classifier/types.rs +++ b/backend/crates/kalamdb-dialect/src/classifier/types.rs @@ -101,12 +101,18 @@ pub enum SqlStatementKind { ClusterTriggerElection, /// CLUSTER TRANSFER-LEADER - Transfer leadership ClusterTransferLeader(u64), + /// CLUSTER JOIN - Add a node at runtime + ClusterJoin { + node_id: u64, + rpc_addr: String, + api_addr: String, + }, + /// CLUSTER REBALANCE - Best-effort leader redistribution + ClusterRebalance, /// CLUSTER STEPDOWN - Attempt leader stepdown ClusterStepdown, /// CLUSTER CLEAR - Clear old snapshots ClusterClear, - /// CLUSTER LIST - List cluster nodes - ClusterList, // ===== Job Management ===== /// KILL JOB @@ -292,11 +298,10 @@ impl SqlStatement { | SqlStatementKind::ClusterPurge(_) | SqlStatementKind::ClusterTriggerElection | SqlStatementKind::ClusterTransferLeader(_) + | SqlStatementKind::ClusterJoin { .. } + | SqlStatementKind::ClusterRebalance | SqlStatementKind::ClusterStepdown | SqlStatementKind::ClusterClear => true, - - // Read-only cluster inspection can run on any node - SqlStatementKind::ClusterList => false, } } @@ -329,9 +334,10 @@ impl SqlStatement { SqlStatementKind::ClusterPurge(_) => "CLUSTER PURGE", SqlStatementKind::ClusterTriggerElection => "CLUSTER TRIGGER ELECTION", SqlStatementKind::ClusterTransferLeader(_) => "CLUSTER TRANSFER-LEADER", + SqlStatementKind::ClusterJoin { .. } => "CLUSTER JOIN", + SqlStatementKind::ClusterRebalance => "CLUSTER REBALANCE", SqlStatementKind::ClusterStepdown => "CLUSTER STEPDOWN", SqlStatementKind::ClusterClear => "CLUSTER CLEAR", - SqlStatementKind::ClusterList => "CLUSTER LIST", SqlStatementKind::KillJob(_) => "KILL JOB", SqlStatementKind::KillLiveQuery(_) => "KILL LIVE QUERY", SqlStatementKind::BeginTransaction => "BEGIN", diff --git a/backend/crates/kalamdb-dialect/src/compatibility.rs b/backend/crates/kalamdb-dialect/src/compatibility.rs index 0f07d8f28..5d5ecd8e9 100644 --- a/backend/crates/kalamdb-dialect/src/compatibility.rs +++ b/backend/crates/kalamdb-dialect/src/compatibility.rs @@ -4,11 +4,11 @@ //! data types into Arrow data types that KalamDB understands. Centralising //! these conversions keeps the CREATE TABLE parsers in sync across crates. +use std::string::String; + use arrow::datatypes::{DataType, IntervalUnit, TimeUnit}; use kalamdb_commons::models::datatypes::{FromArrowType, KalamDataType}; -use sqlparser::ast::DataType::*; -use sqlparser::ast::{DataType as SQLDataType, ObjectName}; -use std::string::String; +use sqlparser::ast::{DataType as SQLDataType, DataType::*, ObjectName}; /// Map a parsed `sqlparser` data type into an Arrow data type while accounting /// for PostgreSQL/MySQL aliases (e.g. `SERIAL`, `INT4`, `AUTO_INCREMENT`). @@ -146,10 +146,9 @@ fn map_custom_type(name: &ObjectName, modifiers: &[String]) -> Result { // Extract dimension from modifiers if modifiers.len() != 1 { - return Err( - "EMBEDDING type requires exactly one dimension parameter, e.g., EMBEDDING(384)" - .to_string(), - ); + return Err("EMBEDDING type requires exactly one dimension parameter, e.g., \ + EMBEDDING(384)" + .to_string()); } let dim_str = &modifiers[0]; @@ -196,9 +195,10 @@ fn map_custom_type(name: &ObjectName, modifiers: &[String]) -> Result SQLDataType { SQLDataType::Custom( ObjectName(vec![sqlparser::ast::ObjectNamePart::Identifier(Ident::new(name))]), @@ -397,11 +397,17 @@ pub fn format_mysql_column_not_found(column_name: &str) -> String { /// use kalamdb_dialect::compatibility::format_mysql_syntax_error; /// /// let msg = format_mysql_syntax_error("FROM", 1); -/// assert_eq!(msg, "ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'FROM' at line 1"); +/// assert_eq!( +/// msg, +/// "ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that \ +/// corresponds to your MySQL server version for the right syntax to use near 'FROM' at line \ +/// 1" +/// ); /// ``` pub fn format_mysql_syntax_error(token: &str, line: usize) -> String { format!( - "ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '{}' at line {}", + "ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that \ + corresponds to your MySQL server version for the right syntax to use near '{}' at line {}", token, line ) } @@ -448,7 +454,9 @@ mod error_formatting_tests { fn test_mysql_syntax_error() { assert_eq!( format_mysql_syntax_error("FROM", 1), - "ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'FROM' at line 1" + "ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that \ + corresponds to your MySQL server version for the right syntax to use near 'FROM' at \ + line 1" ); } } diff --git a/backend/crates/kalamdb-dialect/src/ddl.rs b/backend/crates/kalamdb-dialect/src/ddl.rs index f1f7775b2..60a9a96e7 100644 --- a/backend/crates/kalamdb-dialect/src/ddl.rs +++ b/backend/crates/kalamdb-dialect/src/ddl.rs @@ -47,6 +47,8 @@ pub use drop_table::{DropTableStatement, TableKind}; pub use export_commands::{ExportUserDataStatement, ShowExportStatement}; pub use flush_commands::{FlushAllTablesStatement, FlushTableStatement}; pub use job_commands::{parse_job_command, JobCommand}; +// Re-export SubscriptionOptions from kalamdb_commons for convenience +pub use kalamdb_commons::websocket::SubscriptionOptions; pub use kill_live_query::KillLiveQueryStatement; pub use manifest_commands::ShowManifestStatement; pub use restore_namespace::RestoreDatabaseStatement; @@ -58,8 +60,6 @@ pub use storage_commands::{ ShowStoragesStatement, }; pub use subscribe_commands::SubscribeStatement; -// Re-export SubscriptionOptions from kalamdb_commons for convenience -pub use kalamdb_commons::websocket::SubscriptionOptions; pub use topic_commands::{ AckStatement, AddTopicSourceStatement, ClearTopicStatement, ConsumePosition, ConsumeStatement, CreateTopicStatement, DropTopicStatement, diff --git a/backend/crates/kalamdb-dialect/src/ddl/alter_namespace.rs b/backend/crates/kalamdb-dialect/src/ddl/alter_namespace.rs index 54376531a..34081c777 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/alter_namespace.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/alter_namespace.rs @@ -3,11 +3,12 @@ //! Parses SQL statements like: //! - ALTER NAMESPACE app SET OPTIONS (key1 = 'value1', key2 = 'value2') -use crate::ddl::DdlResult; +use std::collections::HashMap; use kalamdb_commons::models::NamespaceId; use serde_json::Value as JsonValue; -use std::collections::HashMap; + +use crate::ddl::DdlResult; /// ALTER NAMESPACE statement #[derive(Debug, Clone, PartialEq)] @@ -85,7 +86,8 @@ impl AlterNamespaceStatement { let mut options = HashMap::new(); - // Simple parsing: split by comma (doesn't handle commas in strings, but good enough for now) + // Simple parsing: split by comma (doesn't handle commas in strings, but good enough for + // now) for pair in inner.split(',') { let mut parts = pair.splitn(2, '=').map(|s| s.trim()); let key = parts.next(); diff --git a/backend/crates/kalamdb-dialect/src/ddl/alter_table.rs b/backend/crates/kalamdb-dialect/src/ddl/alter_table.rs index 69521f308..d1f06570f 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/alter_table.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/alter_table.rs @@ -5,21 +5,24 @@ //! - ALTER TABLE messages DROP COLUMN age //! - ALTER TABLE messages MODIFY COLUMN age BIGINT -use crate::ddl::DdlResult; -use crate::parser::utils::parse_sql_statements; - -use crate::compatibility::map_sql_type_to_kalam; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::{NamespaceId, TableAccess, TableName}; -use kalamdb_commons::schemas::ColumnDefault; +use kalamdb_commons::{ + models::{datatypes::KalamDataType, NamespaceId, TableAccess, TableName}, + schemas::ColumnDefault, +}; use kalamdb_system::VectorMetric; use once_cell::sync::Lazy; use regex::{Captures, Regex}; -use sqlparser::ast::{ - AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, - DropBehavior, Expr, Ident, ObjectName, SqlOption, Statement, Value, +use sqlparser::{ + ast::{ + AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, + DropBehavior, Expr, Ident, ObjectName, SqlOption, Statement, Value, + }, + dialect::GenericDialect, +}; + +use crate::{ + compatibility::map_sql_type_to_kalam, ddl::DdlResult, parser::utils::parse_sql_statements, }; -use sqlparser::dialect::GenericDialect; /// Column alteration operation #[derive(Debug, Clone, PartialEq)] @@ -517,9 +520,10 @@ fn extract_access_level(option: &SqlOption) -> DdlResult> { "DBA" => TableAccess::Dba, other => { return Err(format!( - "Invalid ACCESS_LEVEL '{}'. Supported values: PUBLIC, PRIVATE, RESTRICTED, DBA", - other - )) + "Invalid ACCESS_LEVEL '{}'. Supported values: PUBLIC, PRIVATE, \ + RESTRICTED, DBA", + other + )) }, }; return Ok(Some(access_level)); diff --git a/backend/crates/kalamdb-dialect/src/ddl/compact_commands.rs b/backend/crates/kalamdb-dialect/src/ddl/compact_commands.rs index fd3b8127e..bd1a70907 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/compact_commands.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/compact_commands.rs @@ -22,10 +22,10 @@ //! STORAGE COMPACT ALL IN namespace; //! ``` -use crate::ddl::parsing; -use crate::parser::utils::normalize_sql; use kalamdb_commons::{NamespaceId, TableName}; +use crate::{ddl::parsing, parser::utils::normalize_sql}; + const ERR_EXPECTED_NAMESPACE: &str = "Expected STORAGE COMPACT ALL IN namespace"; /// STORAGE COMPACT TABLE statement diff --git a/backend/crates/kalamdb-dialect/src/ddl/create_namespace.rs b/backend/crates/kalamdb-dialect/src/ddl/create_namespace.rs index 6e4d202a5..61ed1fb9b 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/create_namespace.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/create_namespace.rs @@ -4,9 +4,10 @@ //! - CREATE NAMESPACE app //! - CREATE NAMESPACE IF NOT EXISTS app -use crate::ddl::{parsing, DdlResult}; use kalamdb_commons::models::NamespaceId; +use crate::ddl::{parsing, DdlResult}; + /// CREATE NAMESPACE statement #[derive(Debug, Clone, PartialEq)] pub struct CreateNamespaceStatement { diff --git a/backend/crates/kalamdb-dialect/src/ddl/create_table/parser.rs b/backend/crates/kalamdb-dialect/src/ddl/create_table/parser.rs index 4eb4f83c1..1dc4acab2 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/create_table/parser.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/create_table/parser.rs @@ -1,17 +1,23 @@ -use super::types::CreateTableStatement; -use crate::compatibility::map_sql_type_to_kalam; -use crate::parser::utils::{format_span, parse_sql_statements}; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; + use arrow::datatypes::{Field, Schema}; -use kalamdb_commons::conversions::with_kalam_data_type_metadata; -use kalamdb_commons::models::datatypes::ToArrowType; -use kalamdb_commons::models::{NamespaceId, StorageId, TableAccess, TableName}; -use kalamdb_commons::schemas::policy::FlushPolicy; -use kalamdb_commons::schemas::{ColumnDefault, TableType}; +use kalamdb_commons::{ + conversions::with_kalam_data_type_metadata, + models::{datatypes::ToArrowType, NamespaceId, StorageId, TableAccess, TableName}, + schemas::{policy::FlushPolicy, ColumnDefault, TableType}, +}; use once_cell::sync::Lazy; use regex::Regex; use sqlparser::ast::{ColumnOption, CreateTable, ObjectNamePart, Statement, TableConstraint}; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; + +use super::types::CreateTableStatement; +use crate::{ + compatibility::map_sql_type_to_kalam, + parser::utils::{format_span, parse_sql_statements}, +}; static RE_ALPHANUMERIC: Lazy = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]+$").unwrap()); static RE_STORAGE_ID: Lazy = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_-]+$").unwrap()); @@ -67,7 +73,9 @@ impl CreateTableStatement { TableName::from(name.0[1].to_string().as_str()), ) } else { - return Err("Invalid table name format. Expected 'table_name' or 'namespace.table_name'".to_string()); + return Err("Invalid table name format. Expected 'table_name' or \ + 'namespace.table_name'" + .to_string()); }; // Validate names @@ -78,12 +86,10 @@ impl CreateTableStatement { }); let location = span.map(format_span); return Err(format!( - "Invalid namespace name '{}'. Only alphanumeric characters and underscores are allowed{}.", + "Invalid namespace name '{}'. Only alphanumeric characters and \ + underscores are allowed{}.", namespace_id, - location - .as_deref() - .map(|s| format!(" ({})", s)) - .unwrap_or_default() + location.as_deref().map(|s| format!(" ({})", s)).unwrap_or_default() )); } if !RE_ALPHANUMERIC.is_match(table_name.as_str()) { @@ -93,12 +99,10 @@ impl CreateTableStatement { }); let location = span.map(format_span); return Err(format!( - "Invalid table name '{}'. Only alphanumeric characters and underscores are allowed{}.", + "Invalid table name '{}'. Only alphanumeric characters and underscores \ + are allowed{}.", table_name, - location - .as_deref() - .map(|s| format!(" ({})", s)) - .unwrap_or_default() + location.as_deref().map(|s| format!(" ({})", s)).unwrap_or_default() )); } @@ -125,14 +129,20 @@ impl CreateTableStatement { match key_str.as_str() { "TYPE" => { - let requested_type = TableType::from_str_opt(&value_str).ok_or_else(|| { - format!("Invalid TYPE option '{}'. Supported: USER, SHARED, STREAM", value_str) - })?; + let requested_type = TableType::from_str_opt(&value_str) + .ok_or_else(|| { + format!( + "Invalid TYPE option '{}'. Supported: USER, SHARED, \ + STREAM", + value_str + ) + })?; if let Some(prefix_type) = create_prefix_table_type { if requested_type != prefix_type { return Err(format!( - "Conflicting table type definitions: CREATE {:?} TABLE vs TYPE option {:?}", + "Conflicting table type definitions: CREATE {:?} \ + TABLE vs TYPE option {:?}", prefix_type, requested_type )); } @@ -142,7 +152,11 @@ impl CreateTableStatement { }, "STORAGE_ID" => { if !RE_STORAGE_ID.is_match(&value_str) { - return Err(format!("Invalid STORAGE_ID '{}'. Only alphanumeric, underscore, and hyphen allowed.", value_str)); + return Err(format!( + "Invalid STORAGE_ID '{}'. Only alphanumeric, underscore, \ + and hyphen allowed.", + value_str + )); } storage_id = Some(StorageId::from(value_str)); }, @@ -159,7 +173,11 @@ impl CreateTableStatement { let key = kv.next(); let value = kv.next(); if key.is_none() || value.is_none() { - return Err(format!("Invalid FLUSH_POLICY format '{}'. Expected 'key:value'", part)); + return Err(format!( + "Invalid FLUSH_POLICY format '{}'. Expected \ + 'key:value'", + part + )); } match key.unwrap().to_uppercase().as_str() { "ROWS" => { @@ -195,10 +213,9 @@ impl CreateTableStatement { interval_seconds: interval, } } else { - return Err( - "FLUSH_POLICY must specify 'rows' or 'interval' > 0" - .to_string(), - ); + return Err("FLUSH_POLICY must specify 'rows' or 'interval' \ + > 0" + .to_string()); }; // Validate policy immediately @@ -222,7 +239,13 @@ impl CreateTableStatement { "PRIVATE" => Some(TableAccess::Private), "RESTRICTED" => Some(TableAccess::Restricted), "DBA" => Some(TableAccess::Dba), - _ => return Err(format!("Invalid ACCESS_LEVEL '{}'. Supported: PUBLIC, PRIVATE, RESTRICTED, DBA", value_str)), + _ => { + return Err(format!( + "Invalid ACCESS_LEVEL '{}'. Supported: PUBLIC, \ + PRIVATE, RESTRICTED, DBA", + value_str + )) + }, }; }, _ => return Err(format!("Unknown table option '{}'", key_str)), @@ -282,7 +305,8 @@ impl CreateTableStatement { let col_name = col.name.value; if !RE_ALPHANUMERIC.is_match(&col_name) { return Err(format!( - "Invalid column name '{}'. Only alphanumeric characters and underscores are allowed.", + "Invalid column name '{}'. Only alphanumeric characters and \ + underscores are allowed.", col_name )); } diff --git a/backend/crates/kalamdb-dialect/src/ddl/create_table/types.rs b/backend/crates/kalamdb-dialect/src/ddl/create_table/types.rs index 7c523514c..74b3baed8 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/create_table/types.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/create_table/types.rs @@ -1,9 +1,10 @@ +use std::{collections::HashMap, sync::Arc}; + use arrow::datatypes::Schema; -use kalamdb_commons::models::{NamespaceId, StorageId, TableAccess, TableName}; -use kalamdb_commons::schemas::policy::FlushPolicy; -use kalamdb_commons::schemas::{ColumnDefault, TableType}; -use std::collections::HashMap; -use std::sync::Arc; +use kalamdb_commons::{ + models::{NamespaceId, StorageId, TableAccess, TableName}, + schemas::{policy::FlushPolicy, ColumnDefault, TableType}, +}; /// Unified CREATE TABLE statement that works for USER, SHARED, and STREAM tables #[derive(Debug, Clone, PartialEq)] diff --git a/backend/crates/kalamdb-dialect/src/ddl/create_view.rs b/backend/crates/kalamdb-dialect/src/ddl/create_view.rs index 7ccf83d4d..47b27bd1e 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/create_view.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/create_view.rs @@ -3,11 +3,13 @@ //! Produces a typed AST that mirrors sqlparser's semantics so kalamdb-core can //! route CREATE VIEW requests through a dedicated handler. -use crate::ddl::DdlResult; -use crate::parser::utils::parse_sql_statements; use kalamdb_commons::models::{NamespaceId, TableName}; -use sqlparser::ast::{ObjectName, Statement}; -use sqlparser::dialect::GenericDialect; +use sqlparser::{ + ast::{ObjectName, Statement}, + dialect::GenericDialect, +}; + +use crate::{ddl::DdlResult, parser::utils::parse_sql_statements}; /// Typed representation of a CREATE VIEW statement. #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/backend/crates/kalamdb-dialect/src/ddl/describe_table.rs b/backend/crates/kalamdb-dialect/src/ddl/describe_table.rs index b77026c88..7a01db7f1 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/describe_table.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/describe_table.rs @@ -6,10 +6,10 @@ //! - DESC TABLE table_name //! - DESCRIBE TABLE table_name HISTORY (show schema versions) -use crate::ddl::DdlResult; - use kalamdb_commons::models::{NamespaceId, TableName}; +use crate::ddl::DdlResult; + /// DESCRIBE TABLE statement #[derive(Debug, Clone, PartialEq)] pub struct DescribeTableStatement { diff --git a/backend/crates/kalamdb-dialect/src/ddl/drop_namespace.rs b/backend/crates/kalamdb-dialect/src/ddl/drop_namespace.rs index 74bad3cb4..214e326f4 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/drop_namespace.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/drop_namespace.rs @@ -6,9 +6,10 @@ //! - DROP NAMESPACE app CASCADE //! - DROP NAMESPACE IF EXISTS app CASCADE -use crate::ddl::{parsing, DdlResult}; use kalamdb_commons::models::NamespaceId; +use crate::ddl::{parsing, DdlResult}; + /// DROP NAMESPACE statement #[derive(Debug, Clone, PartialEq)] pub struct DropNamespaceStatement { diff --git a/backend/crates/kalamdb-dialect/src/ddl/drop_table.rs b/backend/crates/kalamdb-dialect/src/ddl/drop_table.rs index ae0f18ab3..2209a15a3 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/drop_table.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/drop_table.rs @@ -2,14 +2,16 @@ //! //! Parses SQL statements like: //! - DROP USER TABLE messages -//! - DROP SHARED TABLE conversations +//! - DROP SHARED TABLE conversations //! - DROP STREAM TABLE events //! - DROP TABLE IF EXISTS messages -use crate::ddl::DdlResult; +use kalamdb_commons::{ + models::{NamespaceId, TableName}, + schemas::TableType, +}; -use kalamdb_commons::models::{NamespaceId, TableName}; -use kalamdb_commons::schemas::TableType; +use crate::ddl::DdlResult; /// Table categories supported by DROP TABLE statements. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/backend/crates/kalamdb-dialect/src/ddl/flush_commands.rs b/backend/crates/kalamdb-dialect/src/ddl/flush_commands.rs index b31306659..cb9eacd0e 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/flush_commands.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/flush_commands.rs @@ -57,7 +57,7 @@ //! ## Examples //! //! ```rust -//! use kalamdb_dialect::ddl::flush_commands::{FlushTableStatement, FlushAllTablesStatement}; +//! use kalamdb_dialect::ddl::flush_commands::{FlushAllTablesStatement, FlushTableStatement}; //! //! # fn main() -> Result<(), Box> { //! // Parse STORAGE FLUSH TABLE @@ -74,10 +74,10 @@ //! Parsers for STORAGE FLUSH TABLE and STORAGE FLUSH ALL commands (US4). -use crate::ddl::parsing; -use crate::parser::utils::normalize_sql; use kalamdb_commons::{NamespaceId, TableName}; +use crate::{ddl::parsing, parser::utils::normalize_sql}; + const ERR_EXPECTED_NAMESPACE: &str = "Expected STORAGE FLUSH ALL IN namespace"; /// STORAGE FLUSH TABLE statement diff --git a/backend/crates/kalamdb-dialect/src/ddl/job_commands.rs b/backend/crates/kalamdb-dialect/src/ddl/job_commands.rs index f8f407da7..ebf9fef11 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/job_commands.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/job_commands.rs @@ -20,14 +20,14 @@ //! ## Examples //! //! ```rust,no_run -//! use kalamdb_dialect::ddl::job_commands::{JobCommand, parse_job_command}; +//! use kalamdb_dialect::ddl::job_commands::{parse_job_command, JobCommand}; //! //! // Parse KILL JOB command //! let cmd = parse_job_command("KILL JOB 'flush-001'").unwrap(); //! match cmd { //! JobCommand::Kill { job_id } => { //! println!("Cancelling job: {}", job_id); -//! } +//! }, //! } //! ``` @@ -39,7 +39,7 @@ pub enum JobCommand { /// Kill (cancel) a running job Kill { /// Job ID to cancel - job_id: String, //TODO: use JobId type? + job_id: String, // TODO: use JobId type? }, } @@ -124,7 +124,7 @@ mod tests { assert_eq!( cmd, JobCommand::Kill { - job_id: "flush-001".to_string() + job_id: "flush-001".to_string(), } ); } @@ -135,7 +135,7 @@ mod tests { assert_eq!( cmd, JobCommand::Kill { - job_id: "flush-001".to_string() + job_id: "flush-001".to_string(), } ); } @@ -146,7 +146,7 @@ mod tests { assert_eq!( cmd, JobCommand::Kill { - job_id: "flush-001".to_string() + job_id: "flush-001".to_string(), } ); } @@ -157,7 +157,7 @@ mod tests { assert_eq!( cmd, JobCommand::Kill { - job_id: "flush-001".to_string() + job_id: "flush-001".to_string(), } ); } @@ -168,7 +168,7 @@ mod tests { assert_eq!( cmd, JobCommand::Kill { - job_id: "flush-001".to_string() + job_id: "flush-001".to_string(), } ); } @@ -179,7 +179,7 @@ mod tests { assert_eq!( cmd, JobCommand::Kill { - job_id: "flush-001".to_string() + job_id: "flush-001".to_string(), } ); } diff --git a/backend/crates/kalamdb-dialect/src/ddl/kill_live_query.rs b/backend/crates/kalamdb-dialect/src/ddl/kill_live_query.rs index 73cf73c5b..23e6714ea 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/kill_live_query.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/kill_live_query.rs @@ -3,10 +3,10 @@ //! Parses SQL statements like: //! - KILL LIVE QUERY 'user123-conn_abc-messages-updatedMessages' -use crate::ddl::DdlResult; - use kalamdb_commons::models::{ConnectionId, LiveQueryId, UserId}; +use crate::ddl::DdlResult; + /// KILL LIVE QUERY statement #[derive(Debug, Clone, PartialEq)] pub struct KillLiveQueryStatement { diff --git a/backend/crates/kalamdb-dialect/src/ddl/parsing.rs b/backend/crates/kalamdb-dialect/src/ddl/parsing.rs index 2fe073e3d..c786bbad4 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/parsing.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/parsing.rs @@ -243,7 +243,8 @@ pub fn parse_table_reference(table_ref: &str) -> DdlResult<(Option, Stri for keyword in &sql_keywords { if upper_ref.contains(&format!(" {}", keyword)) || upper_ref.starts_with(keyword) { return Err(format!( - "Invalid table reference '{}'. Table references should be simple identifiers like 'table' or 'namespace.table', not SQL statements", + "Invalid table reference '{}'. Table references should be simple identifiers like \ + 'table' or 'namespace.table', not SQL statements", table_ref )); } diff --git a/backend/crates/kalamdb-dialect/src/ddl/show_table_stats.rs b/backend/crates/kalamdb-dialect/src/ddl/show_table_stats.rs index a2fbbf355..44d5fdd16 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/show_table_stats.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/show_table_stats.rs @@ -4,15 +4,15 @@ //! - SHOW STATS FOR TABLE table_name //! - SHOW STATS FOR TABLE namespace.table_name -use crate::ddl::DdlResult; - use kalamdb_commons::models::{NamespaceId, TableName}; +use crate::ddl::DdlResult; + /// SHOW TABLE STATS statement #[derive(Debug, Clone, PartialEq)] pub struct ShowTableStatsStatement { /// Optional namespace (if qualified name used) - pub namespace_id: Option, //TODO: consider making this mandatory + pub namespace_id: Option, // TODO: consider making this mandatory /// Table name to show statistics for pub table_name: TableName, diff --git a/backend/crates/kalamdb-dialect/src/ddl/show_tables.rs b/backend/crates/kalamdb-dialect/src/ddl/show_tables.rs index a311ea960..215d918a6 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/show_tables.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/show_tables.rs @@ -4,9 +4,10 @@ //! - SHOW TABLES //! - SHOW TABLES IN namespace -use crate::ddl::{parsing, DdlResult}; use kalamdb_commons::models::NamespaceId; +use crate::ddl::{parsing, DdlResult}; + /// SHOW TABLES statement #[derive(Debug, Clone, PartialEq)] pub struct ShowTablesStatement { diff --git a/backend/crates/kalamdb-dialect/src/ddl/subscribe_commands.rs b/backend/crates/kalamdb-dialect/src/ddl/subscribe_commands.rs index ca5004207..c5c70afe7 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/subscribe_commands.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/subscribe_commands.rs @@ -51,13 +51,14 @@ //! ``` //! ``` +use kalamdb_commons::{websocket::SubscriptionOptions, NamespaceId, TableName}; +use sqlparser::{ + ast::{ObjectName, ObjectNamePart, SetExpr, Statement, TableFactor}, + dialect::{GenericDialect, PostgreSqlDialect}, +}; + use super::DdlResult; -use crate::parser::query_parser::QueryParser; -use crate::parser::utils::parse_sql_statements; -use kalamdb_commons::websocket::SubscriptionOptions; -use kalamdb_commons::{NamespaceId, TableName}; -use sqlparser::ast::{ObjectName, ObjectNamePart, SetExpr, Statement, TableFactor}; -use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; +use crate::parser::{query_parser::QueryParser, utils::parse_sql_statements}; /// SUBSCRIBE TO statement for live query subscriptions. /// @@ -95,7 +96,8 @@ impl SubscribeStatement { /// assert_eq!(stmt.select_query, "SELECT * FROM app.messages"); /// /// // Custom column selection - /// let stmt = SubscribeStatement::parse("SUBSCRIBE TO SELECT event_type FROM app.messages").unwrap(); + /// let stmt = + /// SubscribeStatement::parse("SUBSCRIBE TO SELECT event_type FROM app.messages").unwrap(); /// assert_eq!(stmt.select_query, "SELECT event_type FROM app.messages"); /// ``` pub fn parse(sql: &str) -> DdlResult { @@ -292,10 +294,12 @@ impl SubscribeStatement { /// ``` /// use kalamdb_dialect::ddl::subscribe_commands::SubscribeStatement; /// - /// let stmt = SubscribeStatement::parse("SUBSCRIBE TO app.messages WHERE user_id = 'alice'").unwrap(); + /// let stmt = + /// SubscribeStatement::parse("SUBSCRIBE TO app.messages WHERE user_id = 'alice'").unwrap(); /// assert_eq!(stmt.to_select_sql(), "SELECT * FROM app.messages WHERE user_id = 'alice'"); /// - /// let stmt = SubscribeStatement::parse("SUBSCRIBE TO SELECT event_type FROM app.messages").unwrap(); + /// let stmt = + /// SubscribeStatement::parse("SUBSCRIBE TO SELECT event_type FROM app.messages").unwrap(); /// assert_eq!(stmt.to_select_sql(), "SELECT event_type FROM app.messages"); /// ``` pub fn to_select_sql(&self) -> String { @@ -362,7 +366,11 @@ fn parse_subscribe_options(options_str: &str) -> DdlResult from = Some(SeqId::new(seq_val)); }, _ => { - return Err(format!("Unknown subscription option: '{}'. Valid options are: last_rows, batch_size, from", key)); + return Err(format!( + "Unknown subscription option: '{}'. Valid options are: last_rows, \ + batch_size, from", + key + )); }, } } else { @@ -374,7 +382,6 @@ fn parse_subscribe_options(options_str: &str) -> DdlResult batch_size, last_rows, from, - snapshot_end_seq: None, }) } @@ -609,7 +616,8 @@ mod tests { #[test] fn test_parse_subscribe_with_where_and_multiple_options() { let stmt = SubscribeStatement::parse( - "SUBSCRIBE TO app.messages WHERE user_id = 'alice' OPTIONS (last_rows=50, batch_size=25)", + "SUBSCRIBE TO app.messages WHERE user_id = 'alice' OPTIONS (last_rows=50, \ + batch_size=25)", ) .unwrap(); assert_eq!(stmt.namespace, NamespaceId::from("app")); diff --git a/backend/crates/kalamdb-dialect/src/ddl/topic_commands.rs b/backend/crates/kalamdb-dialect/src/ddl/topic_commands.rs index f96966258..d165c42df 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/topic_commands.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/topic_commands.rs @@ -6,10 +6,13 @@ //! - ALTER TOPIC ADD SOURCE: Add a table route to a topic //! - CONSUME FROM: Consume messages from a topic -use crate::parser::utils::{extract_identifier, extract_keyword_value, normalize_sql}; -use crate::DdlAst; use kalamdb_commons::models::{PayloadMode, TableId, TopicOp}; +use crate::{ + parser::utils::{extract_identifier, extract_keyword_value, normalize_sql}, + DdlAst, +}; + /// CREATE TOPIC statement /// /// Syntax: @@ -334,7 +337,7 @@ pub fn parse_ack(sql: &str) -> Result { // Helper functions -//TODO: We aready have a method inside tableId for this. Refactor to use that. +// TODO: We aready have a method inside tableId for this. Refactor to use that. fn parse_table_id(table_str: &str) -> Result { // Support both "table" and "namespace.table" formats if table_str.contains('.') { diff --git a/backend/crates/kalamdb-dialect/src/ddl/use_namespace.rs b/backend/crates/kalamdb-dialect/src/ddl/use_namespace.rs index eff07ad37..cd86cc584 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/use_namespace.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/use_namespace.rs @@ -8,9 +8,10 @@ //! This sets the default schema for the current session using DataFusion's //! native configuration: `datafusion.catalog.default_schema` -use crate::ddl::DdlResult; use kalamdb_commons::models::NamespaceId; +use crate::ddl::DdlResult; + /// USE NAMESPACE statement /// /// Changes the default schema for unqualified table names in the current session. diff --git a/backend/crates/kalamdb-dialect/src/ddl/user_commands.rs b/backend/crates/kalamdb-dialect/src/ddl/user_commands.rs index 4543fc183..1013ae3d0 100644 --- a/backend/crates/kalamdb-dialect/src/ddl/user_commands.rs +++ b/backend/crates/kalamdb-dialect/src/ddl/user_commands.rs @@ -7,11 +7,12 @@ //! //! Uses sqlparser-rs tokenizer for consistent identifier and string handling. -use kalamdb_commons::AuthType; -use kalamdb_commons::Role; +use kalamdb_commons::{AuthType, Role}; use serde::{Deserialize, Serialize}; -use sqlparser::dialect::GenericDialect; -use sqlparser::tokenizer::{Token, Tokenizer}; +use sqlparser::{ + dialect::GenericDialect, + tokenizer::{Token, Tokenizer}, +}; /// Common error type for user command parsing #[derive(Debug, Clone, PartialEq)] @@ -426,7 +427,8 @@ mod tests { // CREATE USER tests #[test] fn test_create_user_with_password_quoted() { - let sql = "CREATE USER 'alice' WITH PASSWORD 'secure123' ROLE developer EMAIL 'alice@example.com'"; + let sql = "CREATE USER 'alice' WITH PASSWORD 'secure123' ROLE developer EMAIL \ + 'alice@example.com'"; let stmt = CreateUserStatement::parse(sql).unwrap(); assert_eq!(stmt.username, "alice"); assert_eq!(stmt.auth_type, AuthType::Password); diff --git a/backend/crates/kalamdb-dialect/src/lib.rs b/backend/crates/kalamdb-dialect/src/lib.rs index 3128a66ff..4a316c996 100644 --- a/backend/crates/kalamdb-dialect/src/lib.rs +++ b/backend/crates/kalamdb-dialect/src/lib.rs @@ -2,8 +2,7 @@ //! //! This crate is the extraction point for KalamDB SQL parsing concerns. -use kalamdb_commons::models::NamespaceId; -use kalamdb_commons::Role; +use kalamdb_commons::{models::NamespaceId, Role}; pub mod batch_execution; pub mod classifier; @@ -36,13 +35,12 @@ pub use ddl::{ pub use ddl_parent::DdlAst; pub use dialect::KalamDbDialect; pub use execute_as::{extract_inner_sql, parse_execute_as, ExecuteAsEnvelope}; -pub use parser::query_parser::{QueryParseError, QueryParser, SubscriptionQueryAnalysis}; -pub use parser::SqlParser; pub use parser::{ extract_dml_table_id, extract_dml_table_id_fast, extract_dml_table_id_from_statement, insert_column_names_from_statement, insert_columns_match, normalize_context_keyword_calls_for_sqlparser, parse_single_statement, - rewrite_context_functions_for_datafusion, + query_parser::{QueryParseError, QueryParser, SubscriptionQueryAnalysis}, + rewrite_context_functions_for_datafusion, SqlParser, }; pub use validation::{ validate_column_name, validate_namespace_name, validate_table_name, ValidationError, diff --git a/backend/crates/kalamdb-dialect/src/parser/extensions.rs b/backend/crates/kalamdb-dialect/src/parser/extensions.rs index 08175076b..21ab70505 100644 --- a/backend/crates/kalamdb-dialect/src/parser/extensions.rs +++ b/backend/crates/kalamdb-dialect/src/parser/extensions.rs @@ -12,6 +12,16 @@ //! These parsers complement the standard SQL parser and are invoked //! when the standard parser doesn't recognize the syntax. +// Re-export SubscriptionOptions from kalamdb_commons +pub use kalamdb_commons::websocket::SubscriptionOptions; + +// Re-export flush/compact commands +pub use crate::ddl::compact_commands::{CompactAllTablesStatement, CompactTableStatement}; +pub use crate::ddl::flush_commands::{FlushAllTablesStatement, FlushTableStatement}; +// Job commands (KILL JOB) +pub use crate::ddl::job_commands::{parse_job_command, JobCommand}; +// Manifest cache commands (SHOW MANIFEST CACHE) +pub use crate::ddl::manifest_commands::ShowManifestStatement; /// Re-export existing KalamDB-specific parsers for convenience. /// /// These parsers handle commands that are unique to KalamDB and not part @@ -21,33 +31,18 @@ pub use crate::ddl::storage_commands::{ AlterStorageStatement, CreateStorageStatement, DropStorageStatement, ShowStoragesStatement, }; - -// Re-export flush/compact commands -pub use crate::ddl::compact_commands::{CompactAllTablesStatement, CompactTableStatement}; -pub use crate::ddl::flush_commands::{FlushAllTablesStatement, FlushTableStatement}; - -// Job commands (KILL JOB) -pub use crate::ddl::job_commands::{parse_job_command, JobCommand}; - // Subscribe commands (SUBSCRIBE TO) pub use crate::ddl::subscribe_commands::SubscribeStatement; -// Re-export SubscriptionOptions from kalamdb_commons -pub use kalamdb_commons::websocket::SubscriptionOptions; - // Topic pub/sub commands pub use crate::ddl::topic_commands::{ AddTopicSourceStatement, ClearTopicStatement, ConsumePosition, ConsumeStatement, CreateTopicStatement, DropTopicStatement, }; - // User commands (CREATE USER, ALTER USER, DROP USER) pub use crate::ddl::user_commands::{ AlterUserStatement, CreateUserStatement, DropUserStatement, UserModification, }; -// Manifest cache commands (SHOW MANIFEST CACHE) -pub use crate::ddl::manifest_commands::ShowManifestStatement; - /// Extension statement types that don't fit into standard SQL. /// /// This enum represents KalamDB-specific commands that extend SQL @@ -100,12 +95,18 @@ pub enum ExtensionStatement { ClusterTriggerElection, /// CLUSTER TRANSFER-LEADER command ClusterTransferLeader { node_id: u64 }, + /// CLUSTER JOIN command + ClusterJoin { + node_id: u64, + rpc_addr: String, + api_addr: String, + }, + /// CLUSTER REBALANCE command + ClusterRebalance, /// CLUSTER STEPDOWN command ClusterStepdown, /// CLUSTER CLEAR command ClusterClear, - /// CLUSTER LIST command - ClusterList, } impl ExtensionStatement { @@ -353,22 +354,20 @@ impl ExtensionStatement { .find(|p| p.parse::().is_ok()) .and_then(|p| p.parse::().ok()) }) - .or_else(|| { - original_parts - .get(2) - .and_then(|p| p.parse::().ok()) - }); + .or_else(|| original_parts.get(2).and_then(|p| p.parse::().ok())); if let Some(upto) = upto { return Ok(ExtensionStatement::ClusterPurge { upto }); } - return Err("CLUSTER PURGE requires --upto or a numeric index".to_string()); - } + return Err( + "CLUSTER PURGE requires --upto or a numeric index".to_string() + ); + }, "TRIGGER" => { if parts.get(2) == Some(&"ELECTION") { return Ok(ExtensionStatement::ClusterTriggerElection); } - } + }, "TRIGGER-ELECTION" => return Ok(ExtensionStatement::ClusterTriggerElection), "TRANSFER" => { if parts.get(2) == Some(&"LEADER") { @@ -378,26 +377,78 @@ impl ExtensionStatement { } return Err("CLUSTER TRANSFER LEADER requires a node id".to_string()); } - } + }, "TRANSFER-LEADER" => { let node_id = parts.get(2).and_then(|id| id.parse::().ok()); if let Some(node_id) = node_id { return Ok(ExtensionStatement::ClusterTransferLeader { node_id }); } return Err("CLUSTER TRANSFER-LEADER requires a node id".to_string()); - } + }, + "JOIN" => { + let original_parts: Vec<&str> = sql.split_whitespace().collect(); + let node_id = original_parts + .get(2) + .and_then(|id| id.parse::().ok()) + .ok_or_else(|| { + "CLUSTER JOIN requires a numeric node id".to_string() + })?; + let upper_parts: Vec = + original_parts.iter().map(|part| part.to_ascii_uppercase()).collect(); + let (rpc_addr, api_addr) = + if upper_parts.get(3).map(String::as_str) == Some("RPC") { + let rpc_addr = original_parts.get(4).ok_or_else(|| { + "CLUSTER JOIN requires RPC address".to_string() + })?; + if upper_parts.get(5).map(String::as_str) != Some("API") { + return Err("CLUSTER JOIN requires API address".to_string()); + } + let api_addr = original_parts.get(6).ok_or_else(|| { + "CLUSTER JOIN requires API address".to_string() + })?; + ((*rpc_addr).to_string(), (*api_addr).to_string()) + } else { + let rpc_addr = original_parts.get(3).ok_or_else(|| { + "CLUSTER JOIN requires RPC address".to_string() + })?; + let api_addr = original_parts.get(4).ok_or_else(|| { + "CLUSTER JOIN requires API address".to_string() + })?; + ((*rpc_addr).to_string(), (*api_addr).to_string()) + }; + + return Ok(ExtensionStatement::ClusterJoin { + node_id, + rpc_addr, + api_addr, + }); + }, + "REBALANCE" => return Ok(ExtensionStatement::ClusterRebalance), "STEPDOWN" | "STEP-DOWN" => return Ok(ExtensionStatement::ClusterStepdown), "CLEAR" => return Ok(ExtensionStatement::ClusterClear), - "LIST" | "LS" | "STATUS" => return Ok(ExtensionStatement::ClusterList), - "JOIN" | "LEAVE" => { - return Err("CLUSTER JOIN/LEAVE commands were removed".to_string()); + "LIST" | "LS" | "STATUS" => { + return Err( + "CLUSTER LIST is a CLI-only command. Use \\cluster list in kalam, or query system.cluster and system.cluster_groups directly." + .to_string(), + ); + }, + "LEAVE" => { + return Err("CLUSTER LEAVE is not supported yet".to_string()); + }, + _ => { + return Err("Unknown CLUSTER subcommand. Supported: SNAPSHOT, PURGE, \ + TRIGGER ELECTION, TRANSFER-LEADER, JOIN, REBALANCE, STEPDOWN, \ + CLEAR" + .to_string()) }, - _ => return Err("Unknown CLUSTER subcommand. Supported: SNAPSHOT, PURGE, TRIGGER ELECTION, TRANSFER-LEADER, STEPDOWN, CLEAR, LIST".to_string()), } } } - Err("Unknown KalamDB extension command. Supported commands: CREATE/ALTER/DROP/SHOW STORAGE, STORAGE FLUSH, STORAGE COMPACT, KILL JOB, SUBSCRIBE TO, CREATE/ALTER/DROP USER, SHOW MANIFEST".to_string()) + Err("Unknown KalamDB extension command. Supported commands: CREATE/ALTER/DROP/SHOW \ + STORAGE, STORAGE FLUSH, STORAGE COMPACT, KILL JOB, SUBSCRIBE TO, CREATE/ALTER/DROP \ + USER, SHOW MANIFEST" + .to_string()) } } @@ -407,12 +458,9 @@ mod tests { #[test] fn test_parse_create_storage() { - let sql = "CREATE STORAGE my_storage \ - TYPE filesystem \ - NAME 'My Storage' \ - BASE_DIRECTORY '/data' \ - SHARED_TABLES_TEMPLATE '{namespace}/{table}/' \ - USER_TABLES_TEMPLATE '{namespace}/{table}/{userId}/'"; + let sql = "CREATE STORAGE my_storage TYPE filesystem NAME 'My Storage' BASE_DIRECTORY \ + '/data' SHARED_TABLES_TEMPLATE '{namespace}/{table}/' USER_TABLES_TEMPLATE \ + '{namespace}/{table}/{userId}/'"; let result = ExtensionStatement::parse(sql); if let Err(ref e) = result { eprintln!("Parse error: {}", e); @@ -471,11 +519,30 @@ mod tests { } #[test] - fn test_parse_cluster_join_removed() { - let sql = "CLUSTER JOIN 10.0.0.2:9188"; + fn test_parse_cluster_join() { + let sql = "CLUSTER JOIN 2 10.0.0.2:9188 http://10.0.0.2:8080"; let result = ExtensionStatement::parse(sql); - assert!(result.is_err()); - assert!(result.unwrap_err().contains("CLUSTER JOIN/LEAVE commands were removed")); + + match result.unwrap() { + ExtensionStatement::ClusterJoin { + node_id, + rpc_addr, + api_addr, + } => { + assert_eq!(node_id, 2); + assert_eq!(rpc_addr, "10.0.0.2:9188"); + assert_eq!(api_addr, "http://10.0.0.2:8080"); + }, + other => panic!("unexpected statement: {:?}", other), + } + } + + #[test] + fn test_parse_cluster_rebalance() { + let sql = "CLUSTER REBALANCE"; + let result = ExtensionStatement::parse(sql); + + assert!(matches!(result.unwrap(), ExtensionStatement::ClusterRebalance)); } #[test] @@ -483,6 +550,6 @@ mod tests { let sql = "CLUSTER LEAVE"; let result = ExtensionStatement::parse(sql); assert!(result.is_err()); - assert!(result.unwrap_err().contains("CLUSTER JOIN/LEAVE commands were removed")); + assert!(result.unwrap_err().contains("CLUSTER LEAVE is not supported yet")); } } diff --git a/backend/crates/kalamdb-dialect/src/parser/mod.rs b/backend/crates/kalamdb-dialect/src/parser/mod.rs index 70fb4f3ba..6f291ba92 100644 --- a/backend/crates/kalamdb-dialect/src/parser/mod.rs +++ b/backend/crates/kalamdb-dialect/src/parser/mod.rs @@ -6,7 +6,8 @@ //! ## Architecture //! //! - **standard.rs**: Wraps sqlparser-rs for ANSI SQL, PostgreSQL, and MySQL syntax -//! - **extensions.rs**: Custom parsers for KalamDB-specific commands (CREATE STORAGE, STORAGE FLUSH, STORAGE COMPACT, etc.) +//! - **extensions.rs**: Custom parsers for KalamDB-specific commands (CREATE STORAGE, STORAGE +//! FLUSH, STORAGE COMPACT, etc.) //! - **system.rs**: Parsers for system table queries //! - **utils.rs**: Common parsing utilities (keyword extraction, normalization, etc.) //! - **query_parser.rs**: Safe SQL query parsing for live queries and subscriptions diff --git a/backend/crates/kalamdb-dialect/src/parser/query_parser.rs b/backend/crates/kalamdb-dialect/src/parser/query_parser.rs index 721208820..95a12403d 100644 --- a/backend/crates/kalamdb-dialect/src/parser/query_parser.rs +++ b/backend/crates/kalamdb-dialect/src/parser/query_parser.rs @@ -3,13 +3,12 @@ //! Uses sqlparser-rs for safe SQL parsing to prevent SQL injection attacks //! and ensure proper handling of edge cases. -use crate::parser::utils::parse_sql_statements; use kalamdb_commons::constants::SystemColumnNames; use sqlparser::ast::{ Expr, GroupByExpr, Query, Select, SelectItem, SetExpr, Statement, TableFactor, TableWithJoins, }; -use crate::dialect::KalamDbDialect; +use crate::{dialect::KalamDbDialect, parser::utils::parse_sql_statements}; /// Error type for query parsing #[derive(Debug, Clone)] @@ -283,13 +282,17 @@ impl QueryParser { fn validate_subscription_query_ast(query: &Query) -> Result<(), QueryParseError> { if query.with.is_some() { return Err(QueryParseError::InvalidSql( - "Subscription query does not support WITH clauses. Only SELECT ... FROM ... [WHERE ...] is supported.".to_string(), + "Subscription query does not support WITH clauses. Only SELECT ... FROM ... \ + [WHERE ...] is supported." + .to_string(), )); } if query.order_by.is_some() { return Err(QueryParseError::InvalidSql( - "Subscription query does not support ORDER BY. Only SELECT ... FROM ... [WHERE ...] is supported.".to_string(), + "Subscription query does not support ORDER BY. Only SELECT ... FROM ... [WHERE \ + ...] is supported." + .to_string(), )); } @@ -338,7 +341,9 @@ impl QueryParser { if Self::has_group_by(&select.group_by) { return Err(QueryParseError::InvalidSql( - "Subscription query does not support GROUP BY. Only SELECT ... FROM ... [WHERE ...] is supported.".to_string(), + "Subscription query does not support GROUP BY. Only SELECT ... FROM ... [WHERE \ + ...] is supported." + .to_string(), )); } @@ -351,7 +356,9 @@ impl QueryParser { let table_with_joins = &select.from[0]; if !table_with_joins.joins.is_empty() { return Err(QueryParseError::InvalidSql( - "Subscription query does not support JOIN clauses. Only SELECT ... FROM ... [WHERE ...] is supported.".to_string(), + "Subscription query does not support JOIN clauses. Only SELECT ... FROM ... \ + [WHERE ...] is supported." + .to_string(), )); } diff --git a/backend/crates/kalamdb-dialect/src/parser/system.rs b/backend/crates/kalamdb-dialect/src/parser/system.rs index ec4e3726c..55289c8c4 100644 --- a/backend/crates/kalamdb-dialect/src/parser/system.rs +++ b/backend/crates/kalamdb-dialect/src/parser/system.rs @@ -2,12 +2,11 @@ //! //! Parses SQL statements targeting system tables using sqlparser-rs. -use crate::parser::utils::parse_sql_statements; use anyhow::{anyhow, Result}; pub use kalamdb_system::SystemTable; use sqlparser::ast::Statement; -use crate::dialect::KalamDbDialect; +use crate::{dialect::KalamDbDialect, parser::utils::parse_sql_statements}; /// SQL statement types supported for system tables #[derive(Debug, Clone)] diff --git a/backend/crates/kalamdb-dialect/src/parser/utils.rs b/backend/crates/kalamdb-dialect/src/parser/utils.rs index 2946d9aea..eb43b5051 100644 --- a/backend/crates/kalamdb-dialect/src/parser/utils.rs +++ b/backend/crates/kalamdb-dialect/src/parser/utils.rs @@ -4,17 +4,20 @@ //! custom parsers (CREATE STORAGE, STORAGE FLUSH, KILL JOB, etc.). use core::ops::ControlFlow; + use kalamdb_commons::TableId; use once_cell::sync::Lazy; use regex::Regex; -use sqlparser::ast::{ - BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, - FunctionArguments, Ident, ObjectName, ObjectNamePart, Statement, TableFactor, TableObject, - VisitMut, VisitorMut, +use sqlparser::{ + ast::{ + BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, + FunctionArguments, Ident, ObjectName, ObjectNamePart, Statement, TableFactor, TableObject, + VisitMut, VisitorMut, + }, + dialect::Dialect, + parser::{Parser, ParserError, ParserOptions}, + tokenizer::{Span, Token}, }; -use sqlparser::dialect::Dialect; -use sqlparser::parser::{Parser, ParserError, ParserOptions}; -use sqlparser::tokenizer::{Span, Token}; use crate::dialect::KalamDbDialect; @@ -112,11 +115,7 @@ fn rewrite_json_operators_for_datafusion(sql: &str) -> String { } fn statements_to_sql(statements: &[Statement]) -> String { - statements - .iter() - .map(ToString::to_string) - .collect::>() - .join("; ") + statements.iter().map(ToString::to_string).collect::>().join("; ") } struct JsonOperatorRewriter; @@ -144,10 +143,7 @@ fn rewrite_json_expr(expr: &Expr) -> Option { _ => return None, }; - Some(make_function_call( - function_name, - vec![(**left).clone(), (**right).clone()], - )) + Some(make_function_call(function_name, vec![(**left).clone(), (**right).clone()])) } fn make_function_call(name: &str, args: Vec) -> Expr { @@ -610,9 +606,10 @@ fn find_whole_word(haystack: &str, needle: &str) -> Option { #[cfg(test)] mod tests { - use super::*; use sqlparser::dialect::GenericDialect; + use super::*; + #[test] fn test_normalize_sql() { assert_eq!(normalize_sql(" SELECT * ;"), "SELECT *"); @@ -734,10 +731,7 @@ mod tests { "SELECT CURRENT_USER(), CURRENT_USER_ID(), CURRENT_ROLE()", ); // CURRENT_USER_ID() is an alias for CURRENT_USER() (both return user id). - assert_eq!( - rewritten, - "SELECT KDB_CURRENT_USER(), KDB_CURRENT_USER(), KDB_CURRENT_ROLE()" - ); + assert_eq!(rewritten, "SELECT KDB_CURRENT_USER(), KDB_CURRENT_USER(), KDB_CURRENT_ROLE()"); } #[test] @@ -750,35 +744,23 @@ mod tests { #[test] fn test_rewrite_json_arrow_operator_for_datafusion() { - let rewritten = rewrite_context_functions_for_datafusion( - "SELECT doc->'profile' AS profile FROM docs", - ); - assert_eq!( - rewritten, - "SELECT json_get_json(doc, 'profile') AS profile FROM docs" - ); + let rewritten = + rewrite_context_functions_for_datafusion("SELECT doc->'profile' AS profile FROM docs"); + assert_eq!(rewritten, "SELECT json_get_json(doc, 'profile') AS profile FROM docs"); } #[test] fn test_rewrite_json_long_arrow_operator_for_datafusion() { - let rewritten = rewrite_context_functions_for_datafusion( - "SELECT doc->>'name' AS name FROM docs", - ); - assert_eq!( - rewritten, - "SELECT json_as_text(doc, 'name') AS name FROM docs" - ); + let rewritten = + rewrite_context_functions_for_datafusion("SELECT doc->>'name' AS name FROM docs"); + assert_eq!(rewritten, "SELECT json_as_text(doc, 'name') AS name FROM docs"); } #[test] fn test_rewrite_json_question_operator_for_datafusion() { - let rewritten = rewrite_context_functions_for_datafusion( - "SELECT doc ? 'customer_id' FROM docs", - ); - assert_eq!( - rewritten, - "SELECT json_contains(doc, 'customer_id') FROM docs" - ); + let rewritten = + rewrite_context_functions_for_datafusion("SELECT doc ? 'customer_id' FROM docs"); + assert_eq!(rewritten, "SELECT json_contains(doc, 'customer_id') FROM docs"); } #[test] @@ -788,7 +770,8 @@ mod tests { ); assert_eq!( rewritten, - "SELECT json_as_text(json_get_json(json_get_json(doc, 'user'), 'address'), 'zip') AS zip FROM docs" + "SELECT json_as_text(json_get_json(json_get_json(doc, 'user'), 'address'), 'zip') AS \ + zip FROM docs" ); } @@ -799,7 +782,8 @@ mod tests { ); assert_eq!( rewritten, - "SELECT json_as_text(doc, 'priority') AS p FROM docs WHERE json_as_text(doc, 'status') = 'active'" + "SELECT json_as_text(doc, 'priority') AS p FROM docs WHERE json_as_text(doc, \ + 'status') = 'active'" ); } } diff --git a/backend/crates/kalamdb-dialect/src/validation.rs b/backend/crates/kalamdb-dialect/src/validation.rs index b1f21bcc2..4c5f977e7 100644 --- a/backend/crates/kalamdb-dialect/src/validation.rs +++ b/backend/crates/kalamdb-dialect/src/validation.rs @@ -1,12 +1,13 @@ //! Naming validation for namespaces, tables, and columns. +use std::collections::HashSet; + use kalamdb_commons::constants::{SystemColumnNames, RESERVED_NAMESPACE_NAMES}; use once_cell::sync::Lazy; use sqlparser::keywords::{ Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX, RESERVED_FOR_COLUMN_ALIAS, RESERVED_FOR_IDENTIFIER, RESERVED_FOR_TABLE_ALIAS, RESERVED_FOR_TABLE_FACTOR, }; -use std::collections::HashSet; pub static RESERVED_NAMESPACES: Lazy> = Lazy::new(|| RESERVED_NAMESPACE_NAMES.iter().copied().collect()); diff --git a/backend/crates/kalamdb-filestore/src/core/factory.rs b/backend/crates/kalamdb-filestore/src/core/factory.rs index 551718797..0418b60aa 100644 --- a/backend/crates/kalamdb-filestore/src/core/factory.rs +++ b/backend/crates/kalamdb-filestore/src/core/factory.rs @@ -13,29 +13,29 @@ //! and applied to all remote storage backends (S3, GCS, Azure). #[cfg(any(feature = "cloud-aws", feature = "cloud-gcp", feature = "cloud-azure"))] -use crate::core::paths::parse_remote_url; -use crate::error::{FilestoreError, Result}; +use std::time::Duration; +use std::{path::PathBuf, sync::Arc}; + use kalamdb_configs::config::types::RemoteStorageTimeouts; -use kalamdb_system::providers::storages::models::{ - StorageLocationConfig, StorageLocationConfigError, +use kalamdb_system::{ + providers::storages::models::{StorageLocationConfig, StorageLocationConfigError}, + Storage, }; -use kalamdb_system::Storage; #[cfg(feature = "cloud-aws")] use object_store::aws::AmazonS3Builder; #[cfg(feature = "cloud-azure")] use object_store::azure::MicrosoftAzureBuilder; #[cfg(feature = "cloud-gcp")] use object_store::gcp::GoogleCloudStorageBuilder; -use object_store::local::LocalFileSystem; -use object_store::path::Path as ObjectStorePath; -use object_store::prefix::PrefixStore; #[cfg(any(feature = "cloud-aws", feature = "cloud-gcp", feature = "cloud-azure"))] use object_store::ClientOptions; -use object_store::ObjectStore; -use std::path::PathBuf; -use std::sync::Arc; +use object_store::{ + local::LocalFileSystem, path::Path as ObjectStorePath, prefix::PrefixStore, ObjectStore, +}; + #[cfg(any(feature = "cloud-aws", feature = "cloud-gcp", feature = "cloud-azure"))] -use std::time::Duration; +use crate::core::paths::parse_remote_url; +use crate::error::{FilestoreError, Result}; /// Build an `ObjectStore` instance from a Storage entity. /// @@ -267,11 +267,11 @@ mod tests { #[test] fn test_build_object_store_filesystem() { - use kalamdb_commons::models::ids::StorageId; - use kalamdb_system::providers::storages::models::StorageType; - use kalamdb_system::Storage; use std::env; + use kalamdb_commons::models::ids::StorageId; + use kalamdb_system::{providers::storages::models::StorageType, Storage}; + let temp_dir = env::temp_dir().join("kalamdb_test_build_store"); let _ = std::fs::remove_dir_all(&temp_dir); std::fs::create_dir_all(&temp_dir).unwrap(); diff --git a/backend/crates/kalamdb-filestore/src/core/runtime.rs b/backend/crates/kalamdb-filestore/src/core/runtime.rs index 1071369a5..5ade5346d 100644 --- a/backend/crates/kalamdb-filestore/src/core/runtime.rs +++ b/backend/crates/kalamdb-filestore/src/core/runtime.rs @@ -1,16 +1,16 @@ +use std::{future::Future, sync::OnceLock}; + use crate::error::{FilestoreError, Result}; -use std::future::Future; -use std::sync::OnceLock; /// Run an async operation in a synchronous context. /// /// This function handles the tricky case of needing to call async code from sync context. /// /// **Strategy**: -/// - If we're in a tokio multi-thread runtime context, use `block_in_place` which allows -/// blocking while letting other tasks run on other threads -/// - If we're in a tokio current-thread runtime (common in tests), spawn the work on a -/// background thread that uses a shared runtime to avoid nested block_on calls +/// - If we're in a tokio multi-thread runtime context, use `block_in_place` which allows blocking +/// while letting other tasks run on other threads +/// - If we're in a tokio current-thread runtime (common in tests), spawn the work on a background +/// thread that uses a shared runtime to avoid nested block_on calls /// - If no runtime exists, use the shared runtime's block_on /// /// **Why this matters for object_store**: diff --git a/backend/crates/kalamdb-filestore/src/files/file_service.rs b/backend/crates/kalamdb-filestore/src/files/file_service.rs index 4dacf2f6e..f3431da7d 100644 --- a/backend/crates/kalamdb-filestore/src/files/file_service.rs +++ b/backend/crates/kalamdb-filestore/src/files/file_service.rs @@ -3,17 +3,21 @@ //! Handles file finalization (staging → permanent) and cleanup operations. //! Uses StorageRegistry to get the correct storage per table. -use crate::error::{FilestoreError, Result}; -use crate::files::staging::{StagedFile, StagingManager}; -use crate::registry::{StorageCached, StorageRegistry}; +use std::{fs, sync::Arc}; + use bytes::Bytes; -use kalamdb_commons::ids::SnowflakeGenerator; -use kalamdb_commons::models::ids::StorageId; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; +use kalamdb_commons::{ + ids::SnowflakeGenerator, + models::{ids::StorageId, TableId, UserId}, + schemas::TableType, +}; use kalamdb_system::{FileRef, FileSubfolderState}; -use std::fs; -use std::sync::Arc; + +use crate::{ + error::{FilestoreError, Result}, + files::staging::{StagedFile, StagingManager}, + registry::{StorageCached, StorageRegistry}, +}; /// File storage service for managing FILE column uploads. /// diff --git a/backend/crates/kalamdb-filestore/src/files/staging.rs b/backend/crates/kalamdb-filestore/src/files/staging.rs index eb72af0f5..50b9b30a3 100644 --- a/backend/crates/kalamdb-filestore/src/files/staging.rs +++ b/backend/crates/kalamdb-filestore/src/files/staging.rs @@ -3,12 +3,16 @@ //! Files are staged in a temp directory before being finalized to their //! permanent location. This enables atomic operations and cleanup on failure. -use crate::error::{FilestoreError, Result}; +use std::{ + fs, + io::Write, + path::{Path, PathBuf}, +}; + use bytes::Bytes; use sha2::{Digest, Sha256}; -use std::fs; -use std::io::Write; -use std::path::{Path, PathBuf}; + +use crate::error::{FilestoreError, Result}; /// A staged file with computed metadata. #[derive(Debug, Clone)] @@ -273,9 +277,10 @@ fn detect_mime_type(filename: &str, data: &[u8]) -> String { #[cfg(test)] mod tests { - use super::*; use std::env; + use super::*; + #[test] fn test_sanitize_path_component() { assert_eq!(sanitize_path_component("hello-world_123.txt"), "hello-world_123.txt"); diff --git a/backend/crates/kalamdb-filestore/src/health/service.rs b/backend/crates/kalamdb-filestore/src/health/service.rs index 71b3ef2e7..d2b6605f4 100644 --- a/backend/crates/kalamdb-filestore/src/health/service.rs +++ b/backend/crates/kalamdb-filestore/src/health/service.rs @@ -1,19 +1,18 @@ //! Storage health check service implementation. -use super::models::{ConnectivityTestResult, StorageHealthResult}; -use crate::core::factory::build_object_store; -use crate::error::{FilestoreError, Result}; +use std::{path::Path, sync::Arc, time::Instant}; + use bytes::Bytes; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; -use object_store::path::Path as ObjectPath; -use object_store::ObjectStore; -use object_store::ObjectStoreExt; -use std::path::Path; -use std::sync::Arc; -use std::time::Instant; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; +use object_store::{path::Path as ObjectPath, ObjectStore, ObjectStoreExt}; use sysinfo::Disks; +use super::models::{ConnectivityTestResult, StorageHealthResult}; +use crate::{ + core::factory::build_object_store, + error::{FilestoreError, Result}, +}; + /// Test file content used for health checks. const HEALTH_CHECK_CONTENT: &[u8] = b"KALAMDB_HEALTH\n"; diff --git a/backend/crates/kalamdb-filestore/src/health/tests.rs b/backend/crates/kalamdb-filestore/src/health/tests.rs index acf858c56..df273a567 100644 --- a/backend/crates/kalamdb-filestore/src/health/tests.rs +++ b/backend/crates/kalamdb-filestore/src/health/tests.rs @@ -1,12 +1,15 @@ //! Tests for the storage health service. -use super::models::{HealthStatus, StorageHealthResult}; -use super::service::StorageHealthService; -use kalamdb_commons::models::StorageId; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; use std::env; +use kalamdb_commons::models::StorageId; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; + +use super::{ + models::{HealthStatus, StorageHealthResult}, + service::StorageHealthService, +}; + fn create_test_storage(base_directory: &str) -> Storage { let now = chrono::Utc::now().timestamp(); Storage { diff --git a/backend/crates/kalamdb-filestore/src/lib.rs b/backend/crates/kalamdb-filestore/src/lib.rs index 72bb53e19..f3daa3d29 100644 --- a/backend/crates/kalamdb-filestore/src/lib.rs +++ b/backend/crates/kalamdb-filestore/src/lib.rs @@ -33,6 +33,8 @@ pub use error::{FilestoreError, Result}; pub use files::{FileStorageService, StagedFile, StagingManager}; pub use health::{ConnectivityTestResult, HealthStatus, StorageHealthResult, StorageHealthService}; pub use manifest::json::{manifest_exists, read_manifest_json, write_manifest_json}; -pub use parquet::reader::{parse_parquet_stream, RecordBatchFileStream}; -pub use parquet::writer::ParquetWriteResult; +pub use parquet::{ + reader::{parse_parquet_stream, RecordBatchFileStream}, + writer::ParquetWriteResult, +}; pub use registry::{StorageCached, StorageRegistry}; diff --git a/backend/crates/kalamdb-filestore/src/manifest/json.rs b/backend/crates/kalamdb-filestore/src/manifest/json.rs index 698e1e70d..50f39d2bc 100644 --- a/backend/crates/kalamdb-filestore/src/manifest/json.rs +++ b/backend/crates/kalamdb-filestore/src/manifest/json.rs @@ -3,11 +3,16 @@ //! Provides read/write operations for manifest.json files with atomic writes. //! Supports both local and remote storage backends. -use crate::error::{FilestoreError, Result}; -use crate::registry::StorageCached; use bytes::Bytes; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; +use kalamdb_commons::{ + models::{TableId, UserId}, + schemas::TableType, +}; + +use crate::{ + error::{FilestoreError, Result}, + registry::StorageCached, +}; /// Read manifest.json from storage. /// @@ -70,15 +75,16 @@ pub async fn manifest_exists( #[cfg(test)] mod tests { + use std::{env, fs}; + + use kalamdb_commons::{ + models::{ids::StorageId, TableId}, + schemas::TableType, + }; + use kalamdb_system::{providers::storages::models::StorageType, Storage}; + use super::*; use crate::registry::StorageCached; - use kalamdb_commons::models::ids::StorageId; - use kalamdb_commons::models::TableId; - use kalamdb_commons::schemas::TableType; - use kalamdb_system::providers::storages::models::StorageType; - use kalamdb_system::Storage; - use std::env; - use std::fs; fn create_test_storage(temp_dir: &std::path::Path) -> Storage { let now = chrono::Utc::now().timestamp_millis(); diff --git a/backend/crates/kalamdb-filestore/src/parquet/reader.rs b/backend/crates/kalamdb-filestore/src/parquet/reader.rs index fd82805d5..1a0f0099d 100644 --- a/backend/crates/kalamdb-filestore/src/parquet/reader.rs +++ b/backend/crates/kalamdb-filestore/src/parquet/reader.rs @@ -6,11 +6,11 @@ //! # Performance Features //! //! - **Column Projection**: Only read the columns you need, reducing I/O and memory -//! - **Row Group Pruning via Bloom Filters**: Skip entire row groups where the bloom -//! filter reports a value is "definitely not present" -//! - **Streaming I/O**: All reads use `ParquetObjectReader` — reads only the footer -//! eagerly and fetches column chunks on demand via range requests (remote) or -//! file seeks (local). No full-file downloads. +//! - **Row Group Pruning via Bloom Filters**: Skip entire row groups where the bloom filter reports +//! a value is "definitely not present" +//! - **Streaming I/O**: All reads use `ParquetObjectReader` — reads only the footer eagerly and +//! fetches column chunks on demand via range requests (remote) or file seeks (local). No +//! full-file downloads. //! //! # Usage Tiers //! @@ -18,17 +18,17 @@ //! |----------|:-:|:-:|----------| //! | `parse_parquet_stream` | Optional | ✓ | General streaming read (recommended) | -use crate::error::{FilestoreError, Result}; +use std::{pin::Pin, sync::Arc}; + use arrow::record_batch::RecordBatch; -use datafusion::parquet::arrow::async_reader::{ - ParquetObjectReader, ParquetRecordBatchStreamBuilder, +use datafusion::parquet::arrow::{ + async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}, + ProjectionMask, }; -use datafusion::parquet::arrow::ProjectionMask; use futures_util::TryStreamExt; -use object_store::path::Path as ObjectPath; -use object_store::ObjectStore; -use std::pin::Pin; -use std::sync::Arc; +use object_store::{path::Path as ObjectPath, ObjectStore}; + +use crate::error::{FilestoreError, Result}; // ========== Async streaming reader (ObjectStore-backed) ========== @@ -97,21 +97,21 @@ fn resolve_column_indices( #[cfg(test)] mod tests { + use std::{env, fs, sync::Arc}; + + use arrow::{ + array::{Array, BooleanArray, Float64Array, Int64Array, StringArray}, + record_batch::RecordBatch, + }; + use kalamdb_commons::{ + arrow_utils::{field_boolean, field_float64, field_int64, field_utf8, schema}, + models::{ids::StorageId, TableId}, + schemas::TableType, + }; + use kalamdb_system::{providers::storages::models::StorageType, Storage}; + use super::*; use crate::registry::StorageCached; - use arrow::array::{Array, BooleanArray, Float64Array, Int64Array, StringArray}; - use arrow::record_batch::RecordBatch; - use kalamdb_commons::arrow_utils::{ - field_boolean, field_float64, field_int64, field_utf8, schema, - }; - use kalamdb_commons::models::ids::StorageId; - use kalamdb_commons::models::TableId; - use kalamdb_commons::schemas::TableType; - use kalamdb_system::providers::storages::models::StorageType; - use kalamdb_system::Storage; - use std::env; - use std::fs; - use std::sync::Arc; fn create_test_storage(temp_dir: &std::path::Path) -> Storage { let now = chrono::Utc::now().timestamp_millis(); diff --git a/backend/crates/kalamdb-filestore/src/parquet/writer.rs b/backend/crates/kalamdb-filestore/src/parquet/writer.rs index ee813c267..bf3bb1349 100644 --- a/backend/crates/kalamdb-filestore/src/parquet/writer.rs +++ b/backend/crates/kalamdb-filestore/src/parquet/writer.rs @@ -2,16 +2,21 @@ //! //! Provides serialization helpers for Parquet writes managed by StorageCached. -use crate::error::{FilestoreError, Result}; -use arrow::array::{Array, Int64Array}; -use arrow::datatypes::SchemaRef; -use arrow::record_batch::RecordBatch; +use arrow::{ + array::{Array, Int64Array}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; use bytes::Bytes; use datafusion::arrow::compute::{self, SortOptions}; use kalamdb_commons::constants::SystemColumnNames; -use parquet::arrow::ArrowWriter; -use parquet::basic::{Compression, ZstdLevel}; -use parquet::file::properties::WriterProperties; +use parquet::{ + arrow::ArrowWriter, + basic::{Compression, ZstdLevel}, + file::properties::WriterProperties, +}; + +use crate::error::{FilestoreError, Result}; /// Result of a Parquet write operation. #[derive(Debug, Clone)] @@ -155,10 +160,12 @@ fn is_sorted_by_seq(batch: &RecordBatch, seq_idx: usize) -> Result { #[cfg(test)] mod tests { - use super::*; + use std::sync::Arc; + use arrow::array::StringArray; use kalamdb_commons::arrow_utils::{field_utf8, schema}; - use std::sync::Arc; + + use super::*; fn make_test_batch() -> (SchemaRef, Vec) { let schema = schema(vec![field_utf8("name", false)]); diff --git a/backend/crates/kalamdb-filestore/src/paths/mod.rs b/backend/crates/kalamdb-filestore/src/paths/mod.rs index afcfd5faa..de8889df7 100644 --- a/backend/crates/kalamdb-filestore/src/paths/mod.rs +++ b/backend/crates/kalamdb-filestore/src/paths/mod.rs @@ -3,9 +3,10 @@ //! This module is intentionally scoped to StorageCached usage to avoid //! duplicate path logic elsewhere in the crate. -use kalamdb_commons::models::{TableId, UserId}; use std::borrow::Cow; +use kalamdb_commons::models::{TableId, UserId}; + /// Template resolution utilities for storage path templates. pub(crate) struct TemplateResolver; @@ -113,9 +114,10 @@ impl PathResolver { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + // ==================== TemplateResolver tests ==================== #[test] diff --git a/backend/crates/kalamdb-filestore/src/registry/storage_cached.rs b/backend/crates/kalamdb-filestore/src/registry/storage_cached.rs index 8523c90a0..4f4385216 100644 --- a/backend/crates/kalamdb-filestore/src/registry/storage_cached.rs +++ b/backend/crates/kalamdb-filestore/src/registry/storage_cached.rs @@ -4,25 +4,29 @@ //! the complete file-operation API for KalamDB cold storage. All operations are //! async-first; thin `_sync` wrappers delegate via `run_blocking`. -use super::operations::{ - DeletePrefixResult, DeleteResult, ExistsResult, FileInfo, GetResult, ListResult, PathResult, - PutResult, RenameResult, -}; -use crate::core::runtime::run_blocking; -use crate::error::{FilestoreError, Result}; -use crate::parquet::writer::{serialize_to_parquet, ParquetWriteResult}; -use crate::paths::{PathResolver, TemplateResolver}; -use arrow::datatypes::SchemaRef; -use arrow::record_batch::RecordBatch; +use std::sync::Arc; + +use arrow::{datatypes::SchemaRef, record_batch::RecordBatch}; use bytes::Bytes; use futures_util::StreamExt; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; +use kalamdb_commons::{ + models::{TableId, UserId}, + schemas::TableType, +}; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; use object_store::{path::Path as ObjectPath, ObjectStore, ObjectStoreExt}; use parking_lot::RwLock; -use std::sync::Arc; + +use super::operations::{ + DeletePrefixResult, DeleteResult, ExistsResult, FileInfo, GetResult, ListResult, PathResult, + PutResult, RenameResult, +}; +use crate::{ + core::runtime::run_blocking, + error::{FilestoreError, Result}, + parquet::writer::{serialize_to_parquet, ParquetWriteResult}, + paths::{PathResolver, TemplateResolver}, +}; /// Unified storage interface with lazy `ObjectStore` and template resolution. /// @@ -607,12 +611,13 @@ fn extract_parquet_filenames(list_result: &ListResult) -> Vec { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::ids::StorageId; - use kalamdb_commons::{NamespaceId, TableName}; - use kalamdb_system::providers::storages::models::StorageType; use std::env; + use kalamdb_commons::{models::ids::StorageId, NamespaceId, TableName}; + use kalamdb_system::providers::storages::models::StorageType; + + use super::*; + fn create_test_storage() -> Storage { // Create a unique temp directory for each test invocation let unique_id = std::time::SystemTime::now() diff --git a/backend/crates/kalamdb-filestore/src/registry/storage_registry.rs b/backend/crates/kalamdb-filestore/src/registry/storage_registry.rs index f25f52d5e..b3566eb4f 100644 --- a/backend/crates/kalamdb-filestore/src/registry/storage_registry.rs +++ b/backend/crates/kalamdb-filestore/src/registry/storage_registry.rs @@ -4,14 +4,17 @@ //! Includes an in-memory cache to avoid repeated RocksDB lookups when multiple tables //! share the same storage. -use crate::error::{FilestoreError, Result}; -use crate::registry::storage_cached::StorageCached; +use std::sync::Arc; + use dashmap::DashMap; use kalamdb_commons::models::StorageId; use kalamdb_configs::config::types::RemoteStorageTimeouts; -use kalamdb_system::Storage; -use kalamdb_system::StoragesTableProvider; -use std::sync::Arc; +use kalamdb_system::{Storage, StoragesTableProvider}; + +use crate::{ + error::{FilestoreError, Result}, + registry::storage_cached::StorageCached, +}; /// Registry for managing storage backends /// @@ -333,10 +336,12 @@ impl StorageRegistry { #[cfg(test)] mod tests { - use super::*; - use kalamdb_store::test_utils::InMemoryBackend; use std::sync::Arc; + use kalamdb_store::test_utils::InMemoryBackend; + + use super::*; + // These are unit tests for template validation logic. // Use an in-memory backend so validation tests do not depend on RocksDB lifecycle. diff --git a/backend/crates/kalamdb-filestore/src/tests/bloom_filter_pk_test.rs b/backend/crates/kalamdb-filestore/src/tests/bloom_filter_pk_test.rs index 1f3c5f546..f890f4e32 100644 --- a/backend/crates/kalamdb-filestore/src/tests/bloom_filter_pk_test.rs +++ b/backend/crates/kalamdb-filestore/src/tests/bloom_filter_pk_test.rs @@ -4,19 +4,21 @@ //! This enables efficient point query filtering (WHERE id = X) by skipping batch files //! where the Bloom filter indicates "definitely not present". -use crate::registry::StorageCached; -use arrow::array::{Int64Array, StringArray}; -use arrow::datatypes::{DataType, Field, Schema}; -use arrow::record_batch::RecordBatch; +use std::{env, fs, sync::Arc}; + +use arrow::{ + array::{Int64Array, StringArray}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, +}; use datafusion::parquet::file::reader::{FileReader, SerializedFileReader}; -use kalamdb_commons::models::ids::StorageId; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_system::providers::storages::models::StorageType; -use kalamdb_system::Storage; -use std::env; -use std::fs; -use std::sync::Arc; +use kalamdb_commons::{ + models::{ids::StorageId, TableId}, + schemas::TableType, +}; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; + +use crate::registry::StorageCached; fn create_test_storage(temp_dir: &std::path::Path) -> Storage { let now = chrono::Utc::now().timestamp_millis(); diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/backup/backup_database.rs b/backend/crates/kalamdb-handlers/crates/admin/src/backup/backup_database.rs index da431b0e1..034f8ce44 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/backup/backup_database.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/backup/backup_database.rs @@ -1,15 +1,19 @@ //! Typed handler for BACKUP DATABASE statement +use std::sync::Arc; + use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::backup::BackupParams; -use kalamdb_jobs::AppContextJobsExt; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::backup::BackupParams, AppContextJobsExt}; use kalamdb_sql::ddl::BackupDatabaseStatement; use kalamdb_system::JobType; -use std::sync::Arc; /// Handler for BACKUP DATABASE TO '' pub struct BackupDatabaseHandler { diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/backup/restore_database.rs b/backend/crates/kalamdb-handlers/crates/admin/src/backup/restore_database.rs index 9304c9074..7ec9dd587 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/backup/restore_database.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/backup/restore_database.rs @@ -1,15 +1,19 @@ //! Typed handler for RESTORE DATABASE statement +use std::sync::Arc; + use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::restore::RestoreParams; -use kalamdb_jobs::AppContextJobsExt; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::restore::RestoreParams, AppContextJobsExt}; use kalamdb_sql::ddl::RestoreDatabaseStatement; use kalamdb_system::JobType; -use std::sync::Arc; /// Handler for RESTORE DATABASE FROM '' pub struct RestoreDatabaseHandler { @@ -61,7 +65,8 @@ impl TypedStatementHandler for RestoreDatabaseHandler Ok(ExecutionResult::Success { message: format!( - "Database restore started from '{}'. Job ID: {}. Server restart required after completion.", + "Database restore started from '{}'. Job ID: {}. Server restart required after \ + completion.", statement.backup_path, job_id.as_str() ), diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/clear.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/clear.rs index f1ca487a8..cd6a9be9e 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/clear.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/clear.rs @@ -2,13 +2,15 @@ //! //! Clears old snapshots from the cluster storage -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, +use std::sync::Arc; + +use super::result_rows::cluster_clear_rows; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, }; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; pub struct ClusterClearHandler { app_context: Arc, @@ -41,12 +43,15 @@ impl StatementHandler for ClusterClearHandler { let snapshots_dir = config.storage.resolved_snapshots_dir(); if !snapshots_dir.exists() { - return Ok(ExecutionResult::Success { - message: format!( - "No snapshots directory found at: {}\nNothing to clear.", - snapshots_dir.display() - ), - }); + return cluster_clear_rows( + snapshots_dir.display().to_string(), + false, + 0, + 0, + 0, + 0, + Vec::new(), + ); } // Count files before clearing @@ -114,35 +119,20 @@ impl StatementHandler for ClusterClearHandler { } } - // Build response message - let mut message = format!( - "Cluster clear completed\n\ - Snapshots directory: {}\n\ - Total snapshots found: {} ({:.2} MB)\n\ - Snapshots cleared: {} ({:.2} MB freed)", - snapshots_dir.display(), - total_files, - total_size as f64 / 1024.0 / 1024.0, - cleared_files, - cleared_size as f64 / 1024.0 / 1024.0 - ); - - if !errors.is_empty() { - message.push_str(&format!("\n\nErrors ({}):", errors.len())); - for error in errors.iter().take(5) { - message.push_str(&format!("\n - {}", error)); - } - if errors.len() > 5 { - message.push_str(&format!("\n ... and {} more errors", errors.len() - 5)); - } - } - log::info!( "CLUSTER CLEAR completed: {} files cleared, {:.2} MB freed", cleared_files, cleared_size as f64 / 1024.0 / 1024.0 ); - Ok(ExecutionResult::Success { message }) + cluster_clear_rows( + snapshots_dir.display().to_string(), + true, + total_files as u64, + total_size, + cleared_files as u64, + cleared_size, + errors, + ) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/join.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/join.rs new file mode 100644 index 000000000..6d685fe79 --- /dev/null +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/join.rs @@ -0,0 +1,70 @@ +//! CLUSTER JOIN handler +//! +//! Adds a node to all Raft groups at runtime as a learner, waits for catch-up, +//! promotes it to voter, then requests best-effort data leader rebalancing. + +use std::sync::Arc; + +use super::result_rows::cluster_join_rows; +use kalamdb_commons::models::NodeId; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, +}; +use kalamdb_raft::RaftExecutor; +use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; + +pub struct ClusterJoinHandler { + app_context: Arc, +} + +impl ClusterJoinHandler { + pub fn new(app_context: Arc) -> Self { + Self { app_context } + } +} + +impl StatementHandler for ClusterJoinHandler { + async fn execute( + &self, + statement: SqlStatement, + _params: Vec, + ctx: &ExecutionContext, + ) -> Result { + let SqlStatementKind::ClusterJoin { + node_id, + rpc_addr, + api_addr, + } = statement.kind() + else { + return Err(KalamDbError::InvalidOperation(format!( + "CLUSTER JOIN handler received wrong statement type: {}", + statement.name() + ))); + }; + + log::info!( + "CLUSTER JOIN initiated by user: {} (node={}, rpc={}, api={})", + ctx.user_id(), + node_id, + rpc_addr, + api_addr + ); + + let executor = self.app_context.executor(); + let Some(raft_executor) = executor.as_any().downcast_ref::() else { + return Err(KalamDbError::InvalidOperation( + "CLUSTER JOIN requires cluster mode (Raft executor not available)".to_string(), + )); + }; + + raft_executor + .manager() + .add_node(NodeId::from(*node_id), rpc_addr.clone(), api_addr.clone()) + .await + .map_err(|e| KalamDbError::InvalidOperation(format!("Failed to join node: {}", e)))?; + + cluster_join_rows(*node_id, rpc_addr, api_addr, true) + } +} diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/list.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/list.rs deleted file mode 100644 index 20e179352..000000000 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/list.rs +++ /dev/null @@ -1,276 +0,0 @@ -//! CLUSTER LIST handler -//! -//! Lists all nodes in the cluster with their groups and health status -//! Provides a formatted display for debugging and cluster overview - -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, -}; -use kalamdb_raft::{GroupId, NodeRole, RaftExecutor}; -use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; - -pub struct ClusterListHandler { - app_context: Arc, -} - -impl ClusterListHandler { - pub fn new(app_context: Arc) -> Self { - Self { app_context } - } -} - -impl StatementHandler for ClusterListHandler { - async fn execute( - &self, - statement: SqlStatement, - _params: Vec, - ctx: &ExecutionContext, - ) -> Result { - if !matches!(statement.kind(), SqlStatementKind::ClusterList) { - return Err(KalamDbError::InvalidOperation(format!( - "CLUSTER LIST handler received wrong statement type: {}", - statement.name() - ))); - } - - log::info!("CLUSTER LIST queried by user: {}", ctx.user_id()); - - // Get the RaftExecutor to access cluster info - let executor = self.app_context.executor(); - let Some(raft_executor) = executor.as_any().downcast_ref::() else { - return Err(KalamDbError::InvalidOperation( - "CLUSTER LIST requires cluster mode (Raft executor not available)".to_string(), - )); - }; - - // Fan-out GetNodeInfo to all peers so get_cluster_info() can return full data. - // This is fire-and-update: each peer response populates the cache before - // get_cluster_info() reads it below. - raft_executor.refresh_peer_stats().await; - - let manager = raft_executor.manager(); - let cluster_info = executor.get_cluster_info(); - - // Build formatted output - let mut output = String::new(); - - // Header with cluster info - output.push_str(&format!( - "╔══════════════════════════════════════════════════════════════════╗\n\ - ║ CLUSTER OVERVIEW ║\n\ - ╠══════════════════════════════════════════════════════════════════╣\n\ - ║ Cluster ID: {:<54} ║\n\ - ║ Mode: {:<60} ║\n\ - ║ Current Node: {:<52} ║\n\ - ║ Total Groups: {:<52} ║\n\ - ║ • Meta: 1 ║\n\ - ║ • User Shards: {:<49} ║\n\ - ║ • Shared Shards: {:<47} ║\n\ - ╠══════════════════════════════════════════════════════════════════╣\n", - cluster_info.cluster_id, - if cluster_info.is_cluster_mode { - "Cluster" - } else { - "Single-Node" - }, - cluster_info.current_node_id.as_u64(), - cluster_info.total_groups, - cluster_info.user_shards, - cluster_info.shared_shards, - )); - - // Cluster-wide metrics (if available) - if let Some(term) = cluster_info.last_log_index { - output.push_str(&format!("║ Raft Term: {:<55} ║\n", cluster_info.current_term)); - output.push_str(&format!("║ Last Log Index: {:<50} ║\n", term)); - if let Some(applied) = cluster_info.last_applied { - output.push_str(&format!("║ Last Applied: {:<52} ║\n", applied)); - } - if let Some(ms) = cluster_info.millis_since_quorum_ack { - output.push_str(&format!("║ Quorum ACK: {:<54} ║\n", format!("{}ms ago", ms))); - } - output - .push_str("╠══════════════════════════════════════════════════════════════════╣\n"); - } - - // Nodes section - output.push_str("║ NODES ║\n"); - output.push_str("╠══════════════════════════════════════════════════════════════════╣\n"); - - for node in &cluster_info.nodes { - let self_marker = if node.is_self { " (this)" } else { "" }; - let leader_marker = if node.is_leader { " ★" } else { "" }; - - // Status with color codes (ANSI escape sequences) - let status_str = format!("{:?}", node.status); - let role_str = match node.role { - NodeRole::Leader => "LEADER", - NodeRole::Follower => "FOLLOWER", - NodeRole::Candidate => "CANDIDATE", - NodeRole::Learner => "LEARNER", - NodeRole::Shutdown => "SHUTDOWN", - }; - - output.push_str(&format!( - "║ Node {}{}{:<57} ║\n", - node.node_id.as_u64(), - self_marker, - leader_marker - )); - output.push_str(&format!("║ Role: {:<58} ║\n", role_str)); - output.push_str(&format!("║ Status: {:<56} ║\n", status_str)); - output.push_str(&format!("║ API: {:<59} ║\n", node.api_addr)); - output.push_str(&format!("║ RPC: {:<59} ║\n", node.rpc_addr)); - output.push_str(&format!( - "║ Groups Leading: {:<48} ║\n", - format!("{}/{}", node.groups_leading, node.total_groups) - )); - - if let Some(term) = node.current_term { - output.push_str(&format!("║ Term: {:<58} ║\n", term)); - } - if let Some(applied) = node.last_applied_log { - output.push_str(&format!("║ Last Applied: {:<50} ║\n", applied)); - } - if let Some(lag) = node.replication_lag { - output.push_str(&format!( - "║ Replication Lag: {:<47} ║\n", - format!("{} entries", lag) - )); - } - if let Some(pct) = node.catchup_progress_pct { - output.push_str(&format!("║ Catchup Progress: {:<46} ║\n", format!("{}%", pct))); - } - - output - .push_str("║ ║\n"); - } - - // Group status summary - output.push_str("╠══════════════════════════════════════════════════════════════════╣\n"); - output.push_str("║ GROUP STATUS SUMMARY ║\n"); - output.push_str("╠══════════════════════════════════════════════════════════════════╣\n"); - - // Collect group metrics - let all_groups = manager.all_group_ids(); - let mut leaders = 0; - let mut followers = 0; - let mut unknown = 0; - - for group_id in &all_groups { - if manager.is_leader(*group_id) { - leaders += 1; - } else if manager.current_leader(*group_id).is_some() { - followers += 1; - } else { - unknown += 1; - } - } - - output.push_str(&format!("║ Leading: {:<57} ║\n", leaders)); - output.push_str(&format!("║ Following: {:<55} ║\n", followers)); - if unknown > 0 { - output.push_str(&format!("║ Unknown/Pending: {:<49} ║\n", unknown)); - } - - // Show sample of groups (first 5 of each type) - output.push_str("╠══════════════════════════════════════════════════════════════════╣\n"); - output.push_str("║ Group │ Type │ State │ Leader │ Snapshot │ Applied ║\n"); - output.push_str("╠══════════════════════════════════════════════════════════════════╣\n"); - - // Meta group - if let Some(metrics) = manager.group_metrics(GroupId::Meta) { - let state = format!("{:?}", metrics.state); - let leader = metrics - .current_leader - .map(|id| id.to_string()) - .unwrap_or_else(|| "-".to_string()); - let snapshot = - metrics.snapshot.map(|l| l.index.to_string()).unwrap_or_else(|| "-".to_string()); - let applied = metrics - .last_applied - .map(|l| l.index.to_string()) - .unwrap_or_else(|| "-".to_string()); - output.push_str(&format!( - "║ {:<5} │ {:<11} │ {:<8} │ {:<6} │ {:<8} │ {:<10} ║\n", - "Meta", - "meta", - &state[..state.len().min(8)], - leader, - snapshot, - applied - )); - } - - // Sample user shards (first 3) - for shard in 0..cluster_info.user_shards.min(3) { - let group_id = GroupId::DataUserShard(shard); - if let Some(metrics) = manager.group_metrics(group_id) { - let state = format!("{:?}", metrics.state); - let leader = metrics - .current_leader - .map(|id| id.to_string()) - .unwrap_or_else(|| "-".to_string()); - let snapshot = metrics - .snapshot - .map(|l| l.index.to_string()) - .unwrap_or_else(|| "-".to_string()); - let applied = metrics - .last_applied - .map(|l| l.index.to_string()) - .unwrap_or_else(|| "-".to_string()); - output.push_str(&format!( - "║ U{:<4} │ {:<11} │ {:<8} │ {:<6} │ {:<8} │ {:<10} ║\n", - shard, - "user_data", - &state[..state.len().min(8)], - leader, - snapshot, - applied - )); - } - } - if cluster_info.user_shards > 3 { - output.push_str(&format!( - "║ ... │ ({} more user shards) ║\n", - cluster_info.user_shards - 3 - )); - } - - // Sample shared shards (first 2) - for shard in 0..cluster_info.shared_shards.min(2) { - let group_id = GroupId::DataSharedShard(shard); - if let Some(metrics) = manager.group_metrics(group_id) { - let state = format!("{:?}", metrics.state); - let leader = metrics - .current_leader - .map(|id| id.to_string()) - .unwrap_or_else(|| "-".to_string()); - let snapshot = metrics - .snapshot - .map(|l| l.index.to_string()) - .unwrap_or_else(|| "-".to_string()); - let applied = metrics - .last_applied - .map(|l| l.index.to_string()) - .unwrap_or_else(|| "-".to_string()); - output.push_str(&format!( - "║ S{:<4} │ {:<11} │ {:<8} │ {:<6} │ {:<8} │ {:<10} ║\n", - shard, - "shared_data", - &state[..state.len().min(8)], - leader, - snapshot, - applied - )); - } - } - - output.push_str("╚══════════════════════════════════════════════════════════════════╝\n"); - - Ok(ExecutionResult::Success { message: output }) - } -} diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/mod.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/mod.rs index 79ad3911b..d6600860f 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/mod.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/mod.rs @@ -5,21 +5,25 @@ //! - CLUSTER PURGE: Purge logs up to index //! - CLUSTER TRIGGER ELECTION: Trigger leader election //! - CLUSTER TRANSFER-LEADER: Transfer leadership +//! - CLUSTER JOIN: Add a node at runtime +//! - CLUSTER REBALANCE: Best-effort data leader redistribution //! - CLUSTER STEPDOWN: Attempt leader stepdown //! - CLUSTER CLEAR: Clear old snapshots -//! - CLUSTER LIST: List cluster nodes pub mod clear; -pub mod list; +pub mod join; pub mod purge; +pub mod rebalance; +mod result_rows; pub mod snapshot; pub mod stepdown; pub mod transfer_leader; pub mod trigger_election; pub use clear::ClusterClearHandler; -pub use list::ClusterListHandler; +pub use join::ClusterJoinHandler; pub use purge::ClusterPurgeHandler; +pub use rebalance::ClusterRebalanceHandler; pub use snapshot::ClusterSnapshotHandler; pub use stepdown::ClusterStepdownHandler; pub use transfer_leader::ClusterTransferLeaderHandler; diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/purge.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/purge.rs index 1483f3374..917ca9cff 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/purge.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/purge.rs @@ -2,14 +2,16 @@ //! //! Purges Raft logs up to the specified index across all groups. -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, +use std::sync::Arc; + +use super::result_rows::cluster_group_action_rows; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, }; use kalamdb_raft::RaftExecutor; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; pub struct ClusterPurgeHandler { app_context: Arc, @@ -50,24 +52,14 @@ impl StatementHandler for ClusterPurgeHandler { .await .map_err(|e| KalamDbError::InvalidOperation(format!("Failed to purge logs: {}", e)))?; - let success_count = results.iter().filter(|r| r.success).count(); - let total_count = results.len(); - let failed_count = total_count - success_count; - - let mut message = format!( - "Cluster purge completed: {}/{} groups purged successfully (upto={})", - success_count, total_count, upto - ); - - if failed_count > 0 { - message.push_str("\n\nFailed groups:"); - for result in results.iter().filter(|r| !r.success) { - if let Some(ref err) = result.error { - message.push_str(&format!("\n - {:?}: {}", result.group_id, err)); - } - } - } - - Ok(ExecutionResult::Success { message }) + cluster_group_action_rows( + "purge", + None, + Some(*upto), + None, + results + .into_iter() + .map(|result| (result.group_id.to_string(), result.success, result.error, None)), + ) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/rebalance.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/rebalance.rs new file mode 100644 index 000000000..4ff3bf22c --- /dev/null +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/rebalance.rs @@ -0,0 +1,64 @@ +//! CLUSTER REBALANCE handler +//! +//! Requests best-effort data-group leader redistribution. + +use std::sync::Arc; + +use super::result_rows::cluster_group_action_rows; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, +}; +use kalamdb_raft::RaftExecutor; +use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; + +pub struct ClusterRebalanceHandler { + app_context: Arc, +} + +impl ClusterRebalanceHandler { + pub fn new(app_context: Arc) -> Self { + Self { app_context } + } +} + +impl StatementHandler for ClusterRebalanceHandler { + async fn execute( + &self, + statement: SqlStatement, + _params: Vec, + ctx: &ExecutionContext, + ) -> Result { + if !matches!(statement.kind(), SqlStatementKind::ClusterRebalance) { + return Err(KalamDbError::InvalidOperation(format!( + "CLUSTER REBALANCE handler received wrong statement type: {}", + statement.name() + ))); + } + + log::info!("CLUSTER REBALANCE initiated by user: {}", ctx.user_id()); + + let executor = self.app_context.executor(); + let Some(raft_executor) = executor.as_any().downcast_ref::() else { + return Err(KalamDbError::InvalidOperation( + "CLUSTER REBALANCE requires cluster mode (Raft executor not available)".to_string(), + )); + }; + + let results = + raft_executor.manager().rebalance_data_leaders().await.map_err(|e| { + KalamDbError::InvalidOperation(format!("Failed to rebalance: {}", e)) + })?; + + cluster_group_action_rows( + "rebalance", + None, + None, + None, + results + .into_iter() + .map(|result| (result.group_id.to_string(), result.success, result.error, None)), + ) + } +} diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/result_rows.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/result_rows.rs new file mode 100644 index 000000000..aeb93ea16 --- /dev/null +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/result_rows.rs @@ -0,0 +1,168 @@ +use std::sync::Arc; + +use arrow::{ + array::{ArrayRef, BooleanArray, StringArray, UInt64Array}, + record_batch::RecordBatch, +}; +use kalamdb_commons::arrow_utils::{field_boolean, field_uint64, field_utf8, schema}; +use kalamdb_core::{error::KalamDbError, sql::context::ExecutionResult}; + +pub fn cluster_group_action_rows( + action: &str, + target_node_id: Option, + upto: Option, + snapshots_dir: Option, + results: I, +) -> Result +where + I: IntoIterator, Option)>, +{ + let mut group_ids = Vec::new(); + let mut successes = Vec::new(); + let mut errors = Vec::new(); + let mut snapshot_indices = Vec::new(); + + for (group_id, success, error, snapshot_index) in results { + group_ids.push(Some(group_id)); + successes.push(Some(success)); + errors.push(error); + snapshot_indices.push(snapshot_index); + } + + if group_ids.is_empty() { + group_ids.push(None); + successes.push(None); + errors.push(None); + snapshot_indices.push(None); + } + + let row_count = group_ids.len(); + let schema = schema(vec![ + field_utf8("action", false), + field_utf8("group_id", true), + field_boolean("success", true), + field_utf8("error", true), + field_uint64("snapshot_index", true), + field_uint64("target_node_id", true), + field_uint64("upto", true), + field_utf8("snapshots_dir", true), + ]); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec![action.to_string(); row_count])) as ArrayRef, + Arc::new(StringArray::from(group_ids)) as ArrayRef, + Arc::new(BooleanArray::from(successes)) as ArrayRef, + Arc::new(StringArray::from(errors)) as ArrayRef, + Arc::new(UInt64Array::from(snapshot_indices)) as ArrayRef, + Arc::new(UInt64Array::from(vec![target_node_id; row_count])) as ArrayRef, + Arc::new(UInt64Array::from(vec![upto; row_count])) as ArrayRef, + Arc::new(StringArray::from(vec![snapshots_dir; row_count])) as ArrayRef, + ], + ) + .map_err(|e| { + KalamDbError::SerializationError(format!( + "Failed to build cluster action result rows: {}", + e + )) + })?; + + Ok(ExecutionResult::Rows { + batches: vec![batch], + row_count, + schema: Some(schema), + }) +} + +pub fn cluster_join_rows( + node_id: u64, + rpc_addr: &str, + api_addr: &str, + rebalance_requested: bool, +) -> Result { + let schema = schema(vec![ + field_utf8("action", false), + field_uint64("node_id", false), + field_utf8("rpc_addr", false), + field_utf8("api_addr", false), + field_boolean("rebalance_requested", false), + ]); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec!["join"])) as ArrayRef, + Arc::new(UInt64Array::from(vec![node_id])) as ArrayRef, + Arc::new(StringArray::from(vec![rpc_addr])) as ArrayRef, + Arc::new(StringArray::from(vec![api_addr])) as ArrayRef, + Arc::new(BooleanArray::from(vec![rebalance_requested])) as ArrayRef, + ], + ) + .map_err(|e| { + KalamDbError::SerializationError(format!("Failed to build cluster join result rows: {}", e)) + })?; + + Ok(ExecutionResult::Rows { + batches: vec![batch], + row_count: 1, + schema: Some(schema), + }) +} + +pub fn cluster_clear_rows( + snapshots_dir: String, + snapshots_dir_exists: bool, + total_snapshots_found: u64, + total_size_bytes: u64, + snapshots_cleared: u64, + cleared_size_bytes: u64, + errors: Vec, +) -> Result { + let row_count = errors.len().max(1); + let error_count = errors.len() as u64; + let error_rows = if errors.is_empty() { + vec![None] + } else { + errors.into_iter().map(Some).collect() + }; + + let schema = schema(vec![ + field_utf8("action", false), + field_utf8("snapshots_dir", false), + field_boolean("snapshots_dir_exists", false), + field_uint64("total_snapshots_found", false), + field_uint64("total_size_bytes", false), + field_uint64("snapshots_cleared", false), + field_uint64("cleared_size_bytes", false), + field_uint64("error_count", false), + field_utf8("error", true), + ]); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec!["clear"; row_count])) as ArrayRef, + Arc::new(StringArray::from(vec![snapshots_dir; row_count])) as ArrayRef, + Arc::new(BooleanArray::from(vec![snapshots_dir_exists; row_count])) as ArrayRef, + Arc::new(UInt64Array::from(vec![total_snapshots_found; row_count])) as ArrayRef, + Arc::new(UInt64Array::from(vec![total_size_bytes; row_count])) as ArrayRef, + Arc::new(UInt64Array::from(vec![snapshots_cleared; row_count])) as ArrayRef, + Arc::new(UInt64Array::from(vec![cleared_size_bytes; row_count])) as ArrayRef, + Arc::new(UInt64Array::from(vec![error_count; row_count])) as ArrayRef, + Arc::new(StringArray::from(error_rows)) as ArrayRef, + ], + ) + .map_err(|e| { + KalamDbError::SerializationError(format!( + "Failed to build cluster clear result rows: {}", + e + )) + })?; + + Ok(ExecutionResult::Rows { + batches: vec![batch], + row_count, + schema: Some(schema), + }) +} diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/snapshot.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/snapshot.rs index 7ee2cd11f..462761bbe 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/snapshot.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/snapshot.rs @@ -2,14 +2,16 @@ //! //! Forces all Raft logs to be written to snapshots across the cluster -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, +use std::sync::Arc; + +use super::result_rows::cluster_group_action_rows; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, }; use kalamdb_raft::RaftExecutor; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; pub struct ClusterSnapshotHandler { app_context: Arc, @@ -47,57 +49,21 @@ impl StatementHandler for ClusterSnapshotHandler { let manager = raft_executor.manager(); - // Trigger snapshots for all groups let results = manager.trigger_all_snapshots().await.map_err(|e| { KalamDbError::InvalidOperation(format!("Failed to trigger snapshots: {}", e)) })?; - // Build response message - let success_count = results.iter().filter(|r| r.success).count(); - let total_count = results.len(); - let failed_count = total_count - success_count; - - // Get snapshot directory from config let config = self.app_context.config(); let snapshots_dir = config.storage.resolved_snapshots_dir(); - // Build detailed message - let mut message = format!( - "Cluster snapshot completed: {}/{} snapshots triggered successfully\n\ - Snapshots directory: {}", - success_count, - total_count, - snapshots_dir.display() - ); - - if failed_count > 0 { - message.push_str("\n\nFailed groups:"); - for result in results.iter().filter(|r| !r.success) { - if let Some(ref err) = result.error { - message.push_str(&format!("\n - {:?}: {}", result.group_id, err)); - } - } - } - - // Add snapshot index info for successfully triggered groups - let snapshots_with_idx: Vec<_> = - results.iter().filter(|r| r.success && r.snapshot_index.is_some()).collect(); - - if !snapshots_with_idx.is_empty() { - message.push_str("\n\nSnapshot indices:"); - for result in snapshots_with_idx.iter().take(5) { - if let Some(idx) = result.snapshot_index { - message.push_str(&format!("\n - {:?}: index {}", result.group_id, idx)); - } - } - if snapshots_with_idx.len() > 5 { - message - .push_str(&format!("\n ... and {} more groups", snapshots_with_idx.len() - 5)); - } - } - - log::info!("CLUSTER SNAPSHOT completed: {}/{} snapshots", success_count, total_count); - - Ok(ExecutionResult::Success { message }) + cluster_group_action_rows( + "snapshot", + None, + None, + Some(snapshots_dir.display().to_string()), + results.into_iter().map(|result| { + (result.group_id.to_string(), result.success, result.error, result.snapshot_index) + }), + ) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/stepdown.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/stepdown.rs index b06fce8b8..5f3765dc4 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/stepdown.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/stepdown.rs @@ -2,14 +2,16 @@ //! //! Attempts to step down leaders for all Raft groups. -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, +use std::sync::Arc; + +use super::result_rows::cluster_group_action_rows; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, }; use kalamdb_raft::RaftExecutor; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; pub struct ClusterStepdownHandler { app_context: Arc, @@ -49,24 +51,14 @@ impl StatementHandler for ClusterStepdownHandler { KalamDbError::InvalidOperation(format!("Failed to step down leaders: {}", e)) })?; - let success_count = results.iter().filter(|r| r.success).count(); - let total_count = results.len(); - let failed_count = total_count - success_count; - - let mut message = format!( - "Cluster stepdown completed: {}/{} groups requested", - success_count, total_count - ); - - if failed_count > 0 { - message.push_str("\n\nFailed groups:"); - for result in results.iter().filter(|r| !r.success) { - if let Some(ref err) = result.error { - message.push_str(&format!("\n - {:?}: {}", result.group_id, err)); - } - } - } - - Ok(ExecutionResult::Success { message }) + cluster_group_action_rows( + "stepdown", + None, + None, + None, + results + .into_iter() + .map(|result| (result.group_id.to_string(), result.success, result.error, None)), + ) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/transfer_leader.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/transfer_leader.rs index e34cf3aed..63f0e79a3 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/transfer_leader.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/transfer_leader.rs @@ -2,15 +2,17 @@ //! //! Attempts to transfer leadership for all Raft groups to the specified node. +use std::sync::Arc; + +use super::result_rows::cluster_group_action_rows; use kalamdb_commons::models::NodeId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, }; use kalamdb_raft::RaftExecutor; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; pub struct ClusterTransferLeaderHandler { app_context: Arc, @@ -56,31 +58,14 @@ impl StatementHandler for ClusterTransferLeaderHandler { KalamDbError::InvalidOperation(format!("Failed to transfer leadership: {}", e)) })?; - let success_count = results.iter().filter(|r| r.success).count(); - let total_count = results.len(); - let failed_count = total_count - success_count; - - let mut message = if success_count == 0 && failed_count == total_count { - format!( - "CLUSTER TRANSFER-LEADER is unsupported in the current OpenRaft version (target={}).", - node_id - ) - } else { - format!( - "Cluster transfer-leader completed: {}/{} groups requested (target={})", - success_count, total_count, node_id - ) - }; - - if failed_count > 0 { - message.push_str("\n\nFailed groups:"); - for result in results.iter().filter(|r| !r.success) { - if let Some(ref err) = result.error { - message.push_str(&format!("\n - {:?}: {}", result.group_id, err)); - } - } - } - - Ok(ExecutionResult::Success { message }) + cluster_group_action_rows( + "transfer-leader", + Some(*node_id), + None, + None, + results + .into_iter() + .map(|result| (result.group_id.to_string(), result.success, result.error, None)), + ) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/trigger_election.rs b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/trigger_election.rs index 0b62ff7fa..8be5ceaf3 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/cluster/trigger_election.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/cluster/trigger_election.rs @@ -2,14 +2,16 @@ //! //! Triggers leader election for all Raft groups. -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::{ - ExecutionContext, ExecutionResult, ScalarValue, StatementHandler, +use std::sync::Arc; + +use super::result_rows::cluster_group_action_rows; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::executor::handlers::{ExecutionContext, ExecutionResult, ScalarValue, StatementHandler}, }; use kalamdb_raft::RaftExecutor; use kalamdb_sql::classifier::{SqlStatement, SqlStatementKind}; -use std::sync::Arc; pub struct ClusterTriggerElectionHandler { app_context: Arc, @@ -50,24 +52,14 @@ impl StatementHandler for ClusterTriggerElectionHandler { KalamDbError::InvalidOperation(format!("Failed to trigger elections: {}", e)) })?; - let success_count = results.iter().filter(|r| r.success).count(); - let total_count = results.len(); - let failed_count = total_count - success_count; - - let mut message = format!( - "Cluster trigger election completed: {}/{} groups triggered successfully", - success_count, total_count - ); - - if failed_count > 0 { - message.push_str("\n\nFailed groups:"); - for result in results.iter().filter(|r| !r.success) { - if let Some(ref err) = result.error { - message.push_str(&format!("\n - {:?}: {}", result.group_id, err)); - } - } - } - - Ok(ExecutionResult::Success { message }) + cluster_group_action_rows( + "trigger-election", + None, + None, + None, + results + .into_iter() + .map(|result| (result.group_id.to_string(), result.success, result.error, None)), + ) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_all.rs b/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_all.rs index f77d30cb9..6ffe72f9b 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_all.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_all.rs @@ -1,17 +1,23 @@ //! Typed handler for STORAGE COMPACT ALL statement -use kalamdb_commons::models::{TableId, TableName}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::compact::CompactParams; -use kalamdb_jobs::AppContextJobsExt; +use std::sync::Arc; + +use kalamdb_commons::{ + models::{TableId, TableName}, + schemas::TableType, + JobId, +}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::compact::CompactParams, AppContextJobsExt}; use kalamdb_sql::ddl::CompactAllTablesStatement; use kalamdb_system::JobType; -use std::sync::Arc; /// Handler for STORAGE COMPACT ALL pub struct CompactAllTablesHandler { diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_table.rs b/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_table.rs index 700d3065b..d0e15ac1a 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_table.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/compact/compact_table.rs @@ -1,17 +1,19 @@ //! Typed handler for STORAGE COMPACT TABLE statement -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::compact::CompactParams; -use kalamdb_jobs::AppContextJobsExt; +use std::sync::Arc; + +use kalamdb_commons::{models::TableId, schemas::TableType, JobId}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::compact::CompactParams, AppContextJobsExt}; use kalamdb_sql::ddl::CompactTableStatement; use kalamdb_system::JobType; -use std::sync::Arc; /// Handler for STORAGE COMPACT TABLE pub struct CompactTableHandler { diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/export/export_user_data.rs b/backend/crates/kalamdb-handlers/crates/admin/src/export/export_user_data.rs index 9365c58f7..93e695057 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/export/export_user_data.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/export/export_user_data.rs @@ -1,15 +1,19 @@ //! Typed handler for EXPORT USER DATA statement +use std::sync::Arc; + use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::user_export::UserExportParams; -use kalamdb_jobs::AppContextJobsExt; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::user_export::UserExportParams, AppContextJobsExt}; use kalamdb_sql::ddl::ExportUserDataStatement; use kalamdb_system::JobType; -use std::sync::Arc; /// Handler for EXPORT USER DATA /// @@ -54,7 +58,8 @@ impl TypedStatementHandler for ExportUserDataHandler { Ok(ExecutionResult::Success { message: format!( - "User data export started. Job ID: {}. Use SHOW EXPORT to check status and get the download link.", + "User data export started. Job ID: {}. Use SHOW EXPORT to check status and get \ + the download link.", job_id.as_str() ), }) diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/export/show_export.rs b/backend/crates/kalamdb-handlers/crates/admin/src/export/show_export.rs index 2dfb3f467..d0b51bafd 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/export/show_export.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/export/show_export.rs @@ -1,16 +1,25 @@ //! Typed handler for SHOW EXPORT statement -use arrow::array::{RecordBatch, StringArray, TimestampMicrosecondArray}; -use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use std::sync::Arc; + +use arrow::{ + array::{RecordBatch, StringArray, TimestampMicrosecondArray}, + datatypes::{DataType, Field, Schema, TimeUnit}, +}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_jobs::AppContextJobsExt; use kalamdb_sql::ddl::ShowExportStatement; -use kalamdb_system::providers::jobs::models::{Job, JobFilter, JobSortField, SortOrder}; -use kalamdb_system::JobType; -use std::sync::Arc; +use kalamdb_system::{ + providers::jobs::models::{Job, JobFilter, JobSortField, SortOrder}, + JobType, +}; /// Handler for SHOW EXPORT /// @@ -48,9 +57,9 @@ impl ShowExportHandler { /// Extract export_id from job parameters JSON fn extract_export_id(job: &Job) -> Option { - job.parameters.as_ref().and_then(|v| { - v.get("export_id").and_then(|e| e.as_str().map(String::from)) - }) + job.parameters + .as_ref() + .and_then(|v| v.get("export_id").and_then(|e| e.as_str().map(String::from))) } } diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_all.rs b/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_all.rs index 4283691ea..74d216e40 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_all.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_all.rs @@ -1,15 +1,19 @@ //! Typed handler for STORAGE FLUSH ALL statement +use std::sync::Arc; + use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::flush::FlushParams; -use kalamdb_jobs::AppContextJobsExt; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::flush::FlushParams, AppContextJobsExt}; use kalamdb_sql::ddl::FlushAllTablesStatement; use kalamdb_system::JobType; -use std::sync::Arc; use tokio::task::JoinSet; /// Handler for STORAGE FLUSH ALL diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_table.rs b/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_table.rs index bcdc75da1..428465807 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_table.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/flush/flush_table.rs @@ -1,16 +1,19 @@ //! Typed handler for STORAGE FLUSH TABLE statement -use kalamdb_commons::models::TableId; -use kalamdb_commons::{JobId, TableType}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::flush::FlushParams; -use kalamdb_jobs::AppContextJobsExt; +use std::sync::Arc; + +use kalamdb_commons::{models::TableId, JobId, TableType}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{executors::flush::FlushParams, AppContextJobsExt}; use kalamdb_sql::ddl::FlushTableStatement; use kalamdb_system::JobType; -use std::sync::Arc; /// Handler for STORAGE FLUSH TABLE pub struct FlushTableHandler { @@ -81,7 +84,8 @@ impl TypedStatementHandler for FlushTableHandler { Err(KalamDbError::IdempotentConflict(_)) => { return Ok(ExecutionResult::Success { message: format!( - "Storage flush skipped: a flush is already queued or running for table '{}.{}'", + "Storage flush skipped: a flush is already queued or running for table \ + '{}.{}'", statement.namespace.as_str(), statement.table_name.as_str() ), diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_job.rs b/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_job.rs index 3b9ba2d91..7a41933ee 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_job.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_job.rs @@ -1,13 +1,18 @@ //! Typed handler for KILL JOB statement +use std::sync::Arc; + use kalamdb_commons::JobId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_jobs::AppContextJobsExt; use kalamdb_sql::ddl::JobCommand; -use std::sync::Arc; /// Handler for KILL JOB pub struct KillJobHandler { diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_live_query.rs b/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_live_query.rs index 529d43370..1f7f58753 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_live_query.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/jobs/kill_live_query.rs @@ -1,12 +1,17 @@ //! Typed handler for KILL LIVE QUERY statement -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_sql::ddl::KillLiveQueryStatement; use std::sync::Arc; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_sql::ddl::KillLiveQueryStatement; + /// Handler for KILL LIVE QUERY pub struct KillLiveQueryHandler { app_context: Arc, diff --git a/backend/crates/kalamdb-handlers/crates/admin/src/lib.rs b/backend/crates/kalamdb-handlers/crates/admin/src/lib.rs index 59bcc4f2c..6ce1c2af9 100644 --- a/backend/crates/kalamdb-handlers/crates/admin/src/lib.rs +++ b/backend/crates/kalamdb-handlers/crates/admin/src/lib.rs @@ -7,11 +7,11 @@ mod helpers; pub mod jobs; pub mod system; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; +use std::sync::Arc; + +use kalamdb_core::{app_context::AppContext, sql::executor::handler_registry::HandlerRegistry}; use kalamdb_handlers_support::{register_dynamic_handler, register_typed_handler}; use kalamdb_sql::classifier::SqlStatementKind; -use std::sync::Arc; pub fn register_admin_handlers(registry: &HandlerRegistry, app_context: Arc) { use kalamdb_commons::models::{LiveQueryId, NamespaceId, TableName}; @@ -86,6 +86,20 @@ pub fn register_admin_handlers(registry: &HandlerRegistry, app_context: Arc for ShowManifestCacheHandler { #[cfg(test)] mod tests { + use kalamdb_core::{app_context::AppContext, test_helpers::create_test_session_simple}; + use super::*; - use kalamdb_core::app_context::AppContext; - use kalamdb_core::test_helpers::create_test_session_simple; #[tokio::test] async fn test_show_manifest_cache_empty() { diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/lib.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/lib.rs index fc11a5191..0ffbb9190 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/lib.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/lib.rs @@ -4,19 +4,22 @@ pub mod storage; pub mod table; pub mod view; +use std::{collections::HashMap, sync::Arc}; + use datafusion::arrow::datatypes::Schema as ArrowSchema; -use kalamdb_commons::models::{NamespaceId, StorageId, TableName}; -use kalamdb_commons::TableType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; +use kalamdb_commons::{ + models::{NamespaceId, StorageId, TableName}, + TableType, +}; +use kalamdb_core::{app_context::AppContext, sql::executor::handler_registry::HandlerRegistry}; use kalamdb_handlers_support::register_typed_handler; -use kalamdb_sql::classifier::SqlStatementKind; -use kalamdb_sql::ddl::{ - AlterTableStatement, CreateTableStatement, CreateViewStatement, DescribeTableStatement, - DropTableStatement, ShowTableStatsStatement, ShowTablesStatement, +use kalamdb_sql::{ + classifier::SqlStatementKind, + ddl::{ + AlterTableStatement, CreateTableStatement, CreateViewStatement, DescribeTableStatement, + DropTableStatement, ShowTableStatsStatement, ShowTablesStatement, + }, }; -use std::collections::HashMap; -use std::sync::Arc; pub fn register_ddl_handlers(registry: &HandlerRegistry, app_context: Arc) { register_typed_handler!( diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/alter.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/alter.rs index bbed7eef2..4316dbdad 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/alter.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/alter.rs @@ -1,13 +1,19 @@ //! Typed DDL handler for ALTER NAMESPACE statements -use crate::helpers::guards::require_admin; +use std::sync::Arc; + use kalamdb_commons::models::NamespaceId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::AlterNamespaceStatement; -use std::sync::Arc; + +use crate::helpers::guards::require_admin; /// Typed handler for ALTER NAMESPACE statements pub struct AlterNamespaceHandler { @@ -42,7 +48,8 @@ impl TypedStatementHandler for AlterNamespaceHandler { })?; // Update namespace options (merge with existing options) - let mut current_options = namespace.options + let mut current_options = namespace + .options .clone() .and_then(|v| v.as_object().cloned()) .unwrap_or_default(); @@ -93,12 +100,13 @@ impl TypedStatementHandler for AlterNamespaceHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::Role; - use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role}; + use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + + use super::*; + fn init_app_context() -> Arc { test_app_context_simple() } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/create.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/create.rs index fdc394628..e46be5ba8 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/create.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/create.rs @@ -6,21 +6,32 @@ //! When a namespace is created, it is also registered as a DataFusion schema //! so that queries like `SELECT * FROM namespace.table` work correctly. -use crate::helpers::guards::require_admin; +use std::sync::Arc; + use datafusion::catalog::MemorySchemaProvider; use kalamdb_commons::models::{NamespaceId, UserId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::CreateNamespaceStatement; -use std::sync::Arc; + +use crate::helpers::guards::require_admin; /// Typed handler for CREATE NAMESPACE statements pub struct CreateNamespaceHandler { app_context: Arc, } +fn is_namespace_already_exists_error(error: &impl std::fmt::Display) -> bool { + let message = error.to_string().to_ascii_lowercase(); + message.contains("namespace") && message.contains("already exists") +} + impl CreateNamespaceHandler { pub fn new(app_context: Arc) -> Self { Self { app_context } @@ -89,6 +100,7 @@ impl TypedStatementHandler for CreateNamespaceHandler if existing.is_some() { if statement.if_not_exists { + self.register_namespace_schema(&namespace_id)?; let message = format!("Namespace '{}' already exists", name); return Ok(ExecutionResult::Success { message }); } @@ -102,14 +114,30 @@ impl TypedStatementHandler for CreateNamespaceHandler // In standalone mode, the executor calls the provider directly let executor = self.app_context.executor(); let created_by = Some(UserId::new(context.user_id().as_str())); - let cmd = kalamdb_raft::MetaCommand::CreateNamespace { - namespace_id: namespace_id.clone(), - created_by, + let cmd = if statement.if_not_exists { + kalamdb_raft::MetaCommand::CreateNamespaceIfNotExists { + namespace_id: namespace_id.clone(), + created_by, + } + } else { + kalamdb_raft::MetaCommand::CreateNamespace { + namespace_id: namespace_id.clone(), + created_by, + } }; - executor.execute_meta(cmd).await.map_err(|e| { - KalamDbError::ExecutionError(format!("Failed to create namespace via executor: {}", e)) - })?; + if let Err(error) = executor.execute_meta(cmd).await { + if statement.if_not_exists && is_namespace_already_exists_error(&error) { + self.register_namespace_schema(&namespace_id)?; + let message = format!("Namespace '{}' already exists", name); + return Ok(ExecutionResult::Success { message }); + } + + return Err(KalamDbError::ExecutionError(format!( + "Failed to create namespace via executor: {}", + error + ))); + } // Register namespace as DataFusion schema for SQL queries self.register_namespace_schema(&namespace_id)?; @@ -140,16 +168,25 @@ impl TypedStatementHandler for CreateNamespaceHandler #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::Role; + use kalamdb_commons::{models::UserId, Role}; use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use kalamdb_system::Namespace; + use super::*; + fn test_context() -> ExecutionContext { ExecutionContext::new(UserId::from("test_user"), Role::Dba, create_test_session_simple()) } + #[test] + fn test_namespace_already_exists_error_recognizer() { + assert!(is_namespace_already_exists_error(&"Namespace 'app' already exists")); + assert!(is_namespace_already_exists_error( + &"Already exists: Namespace 'app' already exists" + )); + assert!(!is_namespace_already_exists_error(&"Table 'app.t' already exists")); + } + #[ignore = "Requires Raft for CREATE NAMESPACE"] #[tokio::test] async fn test_typed_create_namespace() { diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/drop.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/drop.rs index 2a43c9de6..06156c056 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/drop.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/drop.rs @@ -3,16 +3,26 @@ //! When a namespace is dropped, its DataFusion schema becomes unavailable. //! Any queries referencing tables in the dropped namespace will fail. -use crate::helpers::audit; -use crate::helpers::guards::{block_anonymous_write, require_admin}; -use crate::table::drop::{capture_storage_cleanup_details, schedule_drop_table_cleanup}; +use std::sync::Arc; + use kalamdb_commons::models::{NamespaceId, TableId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::DropNamespaceStatement; -use std::sync::Arc; + +use crate::{ + helpers::{ + audit, + guards::{block_anonymous_write, require_admin}, + }, + table::drop::{capture_storage_cleanup_details, schedule_drop_table_cleanup}, +}; /// Typed handler for DROP NAMESPACE statements pub struct DropNamespaceHandler { @@ -156,17 +166,25 @@ impl TypedStatementHandler for DropNamespaceHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::datatypes::KalamDataType; - use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; - use kalamdb_commons::models::{TableName, UserId}; - use kalamdb_commons::schemas::TableType; - use kalamdb_commons::Role; + use std::{ + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, + }; + + use kalamdb_commons::{ + models::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + TableName, UserId, + }, + schemas::TableType, + Role, + }; use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use kalamdb_store::EntityStore; use kalamdb_system::Namespace; - use std::sync::Arc; - use std::time::{SystemTime, UNIX_EPOCH}; + + use super::*; fn init_app_context() -> Arc { test_app_context_simple() diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/show.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/show.rs index 4933c4182..e28034c57 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/show.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/show.rs @@ -1,12 +1,17 @@ //! Typed DDL handler for SHOW NAMESPACES statements -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_sql::ddl::ShowNamespacesStatement; use std::sync::Arc; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_sql::ddl::ShowNamespacesStatement; + /// Typed handler for SHOW NAMESPACES statements pub struct ShowNamespacesHandler { app_context: Arc, @@ -62,12 +67,13 @@ impl TypedStatementHandler for ShowNamespacesHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::Role; - use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role}; + use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + + use super::*; + fn init_app_context() -> Arc { test_app_context_simple() } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/use_namespace.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/use_namespace.rs index f9f8dc3a2..ec4ea98b6 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/use_namespace.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/namespace/use_namespace.rs @@ -6,13 +6,18 @@ //! After executing `USE namespace1`, queries like `SELECT * FROM users` //! will resolve to `kalam.namespace1.users`. -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_sql::ddl::UseNamespaceStatement; use std::sync::Arc; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_sql::ddl::UseNamespaceStatement; + /// Handler for USE NAMESPACE / USE / SET NAMESPACE statements /// /// Uses DataFusion's native `datafusion.catalog.default_schema` configuration @@ -84,11 +89,11 @@ impl TypedStatementHandler for UseNamespaceHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::Role; + use kalamdb_commons::{models::UserId, Role}; use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + use super::*; + fn test_context() -> ExecutionContext { ExecutionContext::new(UserId::from("test_user"), Role::User, create_test_session_simple()) } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/alter.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/alter.rs index f6ba2e6f8..12d2e05c2 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/alter.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/alter.rs @@ -1,14 +1,20 @@ //! Typed DDL handler for ALTER STORAGE statements -use crate::helpers::guards::require_admin; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use std::sync::Arc; + +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_filestore::StorageHealthService; use kalamdb_sql::ddl::AlterStorageStatement; -use std::sync::Arc; + +use crate::helpers::guards::require_admin; /// Typed handler for ALTER STORAGE statements pub struct AlterStorageHandler { @@ -107,7 +113,8 @@ impl TypedStatementHandler for AlterStorageHandler { .error .unwrap_or_else(|| "Unknown storage health error".to_string()); return Err(KalamDbError::InvalidOperation(format!( - "Storage health check failed (status {}, readable={}, writable={}, listable={}, deletable={}, latency {} ms): {}", + "Storage health check failed (status {}, readable={}, writable={}, listable={}, \ + deletable={}, latency {} ms): {}", health_result.status, health_result.readable, health_result.writable, @@ -149,12 +156,13 @@ impl TypedStatementHandler for AlterStorageHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::{Role, StorageId}; + use std::sync::Arc; + + use kalamdb_commons::{models::UserId, Role, StorageId}; use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use kalamdb_system::Storage; - use std::sync::Arc; + + use super::*; fn init_app_context() -> Arc { test_app_context_simple() diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/check.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/check.rs index ff61bc703..5189b8a22 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/check.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/check.rs @@ -1,19 +1,25 @@ //! Typed DDL handler for STORAGE CHECK statements -use crate::helpers::guards::require_admin; -use arrow::array::{ - ArrayRef, BooleanBuilder, Int64Builder, StringBuilder, TimestampMillisecondBuilder, +use std::sync::Arc; + +use arrow::{ + array::{ArrayRef, BooleanBuilder, Int64Builder, StringBuilder, TimestampMillisecondBuilder}, + datatypes::{DataType, Field, Schema, TimeUnit}, + record_batch::RecordBatch, +}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, }; -use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; -use arrow::record_batch::RecordBatch; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; use kalamdb_filestore::{HealthStatus, StorageHealthService}; use kalamdb_sql::ddl::CheckStorageStatement; -use std::sync::Arc; + +use crate::helpers::guards::require_admin; /// Typed handler for STORAGE CHECK statements pub struct CheckStorageHandler { @@ -149,12 +155,13 @@ impl TypedStatementHandler for CheckStorageHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::{Role, StorageId}; - use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role, StorageId}; + use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + + use super::*; + fn init_app_context() -> Arc { test_app_context_simple() } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/create.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/create.rs index 260a0d45a..38b8a528f 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/create.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/create.rs @@ -1,17 +1,22 @@ //! Typed DDL handler for CREATE STORAGE statements -use crate::helpers::guards::require_admin; -use crate::helpers::storage::ensure_filesystem_directory; +use std::sync::Arc; + use kalamdb_commons::models::StorageId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_filestore::StorageHealthService; use kalamdb_sql::ddl::CreateStorageStatement; use kalamdb_system::StorageType; -use std::sync::Arc; + +use crate::helpers::{guards::require_admin, storage::ensure_filesystem_directory}; /// Typed handler for CREATE STORAGE statements pub struct CreateStorageHandler { @@ -133,7 +138,8 @@ impl TypedStatementHandler for CreateStorageHandler { .error .unwrap_or_else(|| "Unknown storage health error".to_string()); return Err(KalamDbError::InvalidOperation(format!( - "Storage health check failed (status {}, readable={}, writable={}, listable={}, deletable={}, latency {} ms): {}", + "Storage health check failed (status {}, readable={}, writable={}, listable={}, \ + deletable={}, latency {} ms): {}", health_result.status, health_result.readable, health_result.writable, @@ -167,12 +173,13 @@ impl TypedStatementHandler for CreateStorageHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::{Role, StorageId}; - use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role, StorageId}; + use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + + use super::*; + fn init_app_context() -> Arc { test_app_context_simple() } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/drop.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/drop.rs index f7d364b63..a6cd65ef4 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/drop.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/drop.rs @@ -1,13 +1,19 @@ //! Typed DDL handler for DROP STORAGE statements -use crate::helpers::guards::require_admin; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_sql::ddl::DropStorageStatement; use std::sync::Arc; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_sql::ddl::DropStorageStatement; + +use crate::helpers::guards::require_admin; + /// Typed handler for DROP STORAGE statements pub struct DropStorageHandler { app_context: Arc, @@ -112,12 +118,13 @@ impl TypedStatementHandler for DropStorageHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::{Role, StorageId}; - use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role, StorageId}; + use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + + use super::*; + fn init_app_context() -> Arc { test_app_context_simple() } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/show.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/show.rs index 187244874..47bfd5fd0 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/storage/show.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/storage/show.rs @@ -1,12 +1,17 @@ //! Typed DDL handler for SHOW STORAGES statements -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_sql::ddl::ShowStoragesStatement; use std::sync::Arc; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_sql::ddl::ShowStoragesStatement; + /// Typed handler for SHOW STORAGES statements pub struct ShowStoragesHandler { app_context: Arc, @@ -54,12 +59,13 @@ impl TypedStatementHandler for ShowStoragesHandler { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::Role; - use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role}; + use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; + + use super::*; + fn init_app_context() -> Arc { test_app_context_simple() } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/table/alter.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/table/alter.rs index b6bc8ad90..56b840008 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/table/alter.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/table/alter.rs @@ -1,15 +1,24 @@ //! Typed DDL handler for ALTER TABLE statements -use crate::helpers::guards::block_system_namespace_modification; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use std::sync::Arc; + // Note: table_registration moved to unified applier commands use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition}; -use kalamdb_commons::models::{NamespaceId, TableId, UserId}; -use kalamdb_commons::schemas::{ColumnDefault, TableType}; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; +use kalamdb_commons::{ + models::{ + schemas::{ColumnDefinition, TableDefinition}, + NamespaceId, TableId, UserId, + }, + schemas::{ColumnDefault, TableType}, +}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::{AlterTableStatement, ColumnOperation}; use kalamdb_store::Partition; use kalamdb_system::{VectorEngine, VectorIndexState, VectorMetric}; @@ -18,7 +27,8 @@ use kalamdb_vector::{ shared_vector_pk_index_partition_name, user_vector_ops_partition_name, user_vector_pk_index_partition_name, }; -use std::sync::Arc; + +use crate::helpers::guards::block_system_namespace_modification; /// Typed handler for ALTER TABLE statements pub struct AlterTableHandler { @@ -495,7 +505,8 @@ impl TypedStatementHandler for AlterTableHandler { audit::persist_audit_entry(&self.app_context, &audit_entry).await?; log::info!( - "✅ ALTER TABLE succeeded: {}.{} | operation: {} | new_version: {} | table_type: {:?}", + "✅ ALTER TABLE succeeded: {}.{} | operation: {} | new_version: {} | table_type: \ + {:?}", namespace_id.as_str(), statement.table_name.as_str(), change_desc, @@ -830,7 +841,11 @@ fn apply_alter_operation( ))); } if is_system_column(new_column_name) { - log::error!("❌ ALTER TABLE failed: Cannot rename column to reserved system column name '{}'", new_column_name); + log::error!( + "❌ ALTER TABLE failed: Cannot rename column to reserved system column name \ + '{}'", + new_column_name + ); return Err(KalamDbError::InvalidOperation(format!( "Cannot rename column to '{}': reserved system column name", new_column_name diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/table/create.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/table/create.rs index 5286090ec..2c33ce022 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/table/create.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/table/create.rs @@ -1,14 +1,18 @@ //! Typed DDL handler for CREATE TABLE statements -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_sql::ddl::CreateTableStatement; use std::sync::Arc; +use kalamdb_commons::{models::TableId, schemas::TableType}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_sql::ddl::CreateTableStatement; + /// Typed handler for CREATE TABLE statements (all table types: USER, SHARED, STREAM) pub struct CreateTableHandler { app_context: Arc, @@ -43,8 +47,7 @@ impl TypedStatementHandler for CreateTableHandler { _params: Vec, context: &ExecutionContext, ) -> Result { - use crate::helpers::audit; - use crate::helpers::table_creation; + use crate::helpers::{audit, table_creation}; let mut statement = statement; let effective_type = Self::resolve_table_type(&statement, context); @@ -132,14 +135,17 @@ impl TypedStatementHandler for CreateTableHandler { #[cfg(test)] mod tests { - use super::*; use arrow::datatypes::{DataType, Field, Schema}; - use kalamdb_commons::models::{NamespaceId, UserId}; - use kalamdb_commons::schemas::TableType; - use kalamdb_commons::Role; + use kalamdb_commons::{ + models::{NamespaceId, UserId}, + schemas::TableType, + Role, + }; use kalamdb_core::test_helpers::{create_test_session_simple, test_app_context_simple}; use kalamdb_system::Storage; + use super::*; + fn create_test_context(role: Role) -> ExecutionContext { ExecutionContext::new(UserId::new("test_user"), role, create_test_session_simple()) } diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/table/describe.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/table/describe.rs index 0a53291b7..dcc641f04 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/table/describe.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/table/describe.rs @@ -1,13 +1,18 @@ //! Typed DDL handler for DESCRIBE TABLE statements +use std::sync::Arc; + use kalamdb_commons::models::{NamespaceId, TableId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_core::views::describe::DescribeView; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, + views::describe::DescribeView, +}; use kalamdb_sql::ddl::DescribeTableStatement; -use std::sync::Arc; /// Typed handler for DESCRIBE TABLE statements pub struct DescribeTableHandler { diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/table/drop.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/table/drop.rs index ab90977a3..c557cc9da 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/table/drop.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/table/drop.rs @@ -3,20 +3,29 @@ //! This module provides both the DROP TABLE handler and reusable cleanup functions //! for table deletion operations (used by both DDL handler and CleanupExecutor). -use crate::helpers::audit; -use crate::helpers::guards::{block_anonymous_write, block_system_namespace_modification}; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::operations::table_cleanup::cleanup_table_data_internal; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; -use kalamdb_jobs::executors::cleanup::{CleanupOperation, CleanupParams, StorageCleanupDetails}; -use kalamdb_jobs::AppContextJobsExt; +use std::sync::Arc; + +use kalamdb_commons::{models::TableId, schemas::TableType}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + operations::table_cleanup::cleanup_table_data_internal, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; +use kalamdb_jobs::{ + executors::cleanup::{CleanupOperation, CleanupParams, StorageCleanupDetails}, + AppContextJobsExt, +}; use kalamdb_sql::ddl::DropTableStatement; use kalamdb_system::JobType; -use std::sync::Arc; + +use crate::helpers::{ + audit, + guards::{block_anonymous_write, block_system_namespace_modification}, +}; /// Typed handler for DROP TABLE statements pub struct DropTableHandler { @@ -155,7 +164,8 @@ impl TypedStatementHandler for DropTableHandler { let is_owner = false; if !kalamdb_session::can_delete_table(context.user_role(), actual_type, is_owner) { log::error!( - "❌ DROP TABLE {}.{}: Insufficient privileges (user: {}, role: {:?}, table_type: {:?})", + "❌ DROP TABLE {}.{}: Insufficient privileges (user: {}, role: {:?}, table_type: \ + {:?})", statement.namespace_id.as_str(), statement.table_name.as_str(), context.user_id().as_str(), @@ -352,21 +362,26 @@ impl TypedStatementHandler for DropTableHandler { #[cfg(test)] mod tests { - use super::{cleanup_table_data_internal, DropTableHandler}; - use crate::table::create::CreateTableHandler; + use std::{collections::HashMap, sync::Arc}; + use arrow::datatypes::{DataType, Field, Schema}; - use kalamdb_commons::models::{NamespaceId, TableName, UserId}; - use kalamdb_commons::schemas::TableType; - use kalamdb_commons::Role; - use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult}; - use kalamdb_core::sql::executor::handlers::TypedStatementHandler; - use kalamdb_core::test_helpers::{ - create_test_session_simple, test_app_context, test_app_context_simple, + use kalamdb_commons::{ + models::{NamespaceId, TableName, UserId}, + schemas::TableType, + Role, + }; + use kalamdb_core::{ + sql::{ + context::{ExecutionContext, ExecutionResult}, + executor::handlers::TypedStatementHandler, + }, + test_helpers::{create_test_session_simple, test_app_context, test_app_context_simple}, }; use kalamdb_sql::ddl::{CreateTableStatement, DropTableStatement, TableKind}; use kalamdb_store::EntityStore; - use std::collections::HashMap; - use std::sync::Arc; + + use super::{cleanup_table_data_internal, DropTableHandler}; + use crate::table::create::CreateTableHandler; fn create_test_context(role: Role) -> ExecutionContext { ExecutionContext::new(UserId::new("test_user"), role, create_test_session_simple()) diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/table/show.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/table/show.rs index 790ac3078..ad9954e25 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/table/show.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/table/show.rs @@ -1,18 +1,25 @@ //! Typed DDL handler for SHOW TABLES statements -use datafusion::arrow::array::{ - ArrayRef, Int32Array, RecordBatch, StringBuilder, TimestampMicrosecondArray, +use std::sync::Arc; + +use datafusion::{ + arrow::{ + array::{ArrayRef, Int32Array, RecordBatch, StringBuilder, TimestampMicrosecondArray}, + datatypes::SchemaRef, + }, + datasource::TableProvider, }; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::datasource::TableProvider; use kalamdb_commons::schemas::TableDefinition; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::ShowTablesStatement; -use std::sync::Arc; /// Typed handler for SHOW TABLES statements pub struct ShowTablesHandler { diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/table/show_stats.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/table/show_stats.rs index eec4fc894..d52246366 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/table/show_stats.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/table/show_stats.rs @@ -1,15 +1,22 @@ //! Typed DDL handler for SHOW STATS statements +use std::sync::Arc; + use datafusion::arrow::array::{ArrayRef, RecordBatch, StringArray, UInt64Array}; -use kalamdb_commons::arrow_utils::{field_uint64, field_utf8, schema}; -use kalamdb_commons::models::{NamespaceId, TableId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_commons::{ + arrow_utils::{field_uint64, field_utf8, schema}, + models::{NamespaceId, TableId}, +}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::ShowTableStatsStatement; -use std::sync::Arc; /// Typed handler for SHOW STATS statements pub struct ShowStatsHandler { diff --git a/backend/crates/kalamdb-handlers/crates/ddl/src/view/create.rs b/backend/crates/kalamdb-handlers/crates/ddl/src/view/create.rs index 33ffcb262..004f1ed2a 100644 --- a/backend/crates/kalamdb-handlers/crates/ddl/src/view/create.rs +++ b/backend/crates/kalamdb-handlers/crates/ddl/src/view/create.rs @@ -4,14 +4,19 @@ //! registration to the shared base SessionContext so subsequent per-user //! sessions inherit the view definition. +use std::sync::Arc; + use kalamdb_commons::models::NamespaceId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::CreateViewStatement; -use std::sync::Arc; /// Handler for CREATE VIEW statements pub struct CreateViewHandler { @@ -123,11 +128,12 @@ impl TypedStatementHandler for CreateViewHandler { #[cfg(test)] mod tests { - use super::*; use arrow::array::Int64Array; use kalamdb_commons::models::{NamespaceId, UserId}; use kalamdb_core::test_helpers::test_app_context_simple; + use super::*; + #[tokio::test] async fn create_view_registers_and_is_queryable() { let app_ctx = test_app_context_simple(); diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/lib.rs b/backend/crates/kalamdb-handlers/crates/stream/src/lib.rs index b3632ac61..dd9dce7f0 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/lib.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/lib.rs @@ -1,15 +1,17 @@ pub mod result_rows; pub mod topics; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; +use std::sync::Arc; + +use kalamdb_core::{app_context::AppContext, sql::executor::handler_registry::HandlerRegistry}; use kalamdb_handlers_support::register_typed_handler; -use kalamdb_sql::classifier::SqlStatementKind; -use kalamdb_sql::ddl::{ - AckStatement, AddTopicSourceStatement, ClearTopicStatement, ConsumePosition, ConsumeStatement, - CreateTopicStatement, DropTopicStatement, +use kalamdb_sql::{ + classifier::SqlStatementKind, + ddl::{ + AckStatement, AddTopicSourceStatement, ClearTopicStatement, ConsumePosition, + ConsumeStatement, CreateTopicStatement, DropTopicStatement, + }, }; -use std::sync::Arc; pub fn register_stream_handlers(registry: &HandlerRegistry, app_context: Arc) { use kalamdb_commons::models::{PayloadMode, TableId, TopicId}; diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/result_rows.rs b/backend/crates/kalamdb-handlers/crates/stream/src/result_rows.rs index bd83a2295..00d92cb58 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/result_rows.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/result_rows.rs @@ -1,11 +1,11 @@ +use std::sync::Arc; + use datafusion::arrow::{ array::{ArrayRef, Int32Array, Int64Array, StringBuilder}, datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::ExecutionResult; -use std::sync::Arc; +use kalamdb_core::{error::KalamDbError, sql::context::ExecutionResult}; pub fn ack_result( topic_name: &str, diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/topics/ack.rs b/backend/crates/kalamdb-handlers/crates/stream/src/topics/ack.rs index a779fa8f3..8cce140f5 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/topics/ack.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/topics/ack.rs @@ -1,11 +1,17 @@ -use crate::result_rows; +use std::sync::Arc; + use kalamdb_commons::models::{ConsumerGroupId, TopicId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::AckStatement; -use std::sync::Arc; + +use crate::result_rows; pub struct AckHandler { app_context: Arc, diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/topics/add_source.rs b/backend/crates/kalamdb-handlers/crates/stream/src/topics/add_source.rs index 3a74a6cec..a1438c35a 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/topics/add_source.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/topics/add_source.rs @@ -1,11 +1,16 @@ +use std::sync::Arc; + use kalamdb_commons::models::TopicId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::AddTopicSourceStatement; use kalamdb_system::providers::topics::models::TopicRoute; -use std::sync::Arc; pub struct AddTopicSourceHandler { app_context: Arc, diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/topics/clear.rs b/backend/crates/kalamdb-handlers/crates/stream/src/topics/clear.rs index 8149f8666..b6a90d3b2 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/topics/clear.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/topics/clear.rs @@ -1,10 +1,15 @@ -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use std::sync::Arc; + +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_jobs::AppContextJobsExt; use kalamdb_sql::ddl::ClearTopicStatement; -use std::sync::Arc; pub struct ClearTopicHandler { app_context: Arc, diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/topics/consume.rs b/backend/crates/kalamdb-handlers/crates/stream/src/topics/consume.rs index a305da072..99f6eade8 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/topics/consume.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/topics/consume.rs @@ -1,15 +1,20 @@ +use std::sync::Arc; + use datafusion::arrow::{ array::{ArrayRef, BinaryBuilder, Int32Array, Int64Array, StringBuilder}, record_batch::RecordBatch, }; use kalamdb_commons::models::{ConsumerGroupId, TopicId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::{ConsumePosition, ConsumeStatement}; use kalamdb_tables::topics::topic_message_schema::topic_message_schema; -use std::sync::Arc; pub struct ConsumeHandler { app_context: Arc, @@ -37,22 +42,34 @@ impl TypedStatementHandler for ConsumeHandler { let topic_publisher = self.app_context.topic_publisher(); let limit = statement.limit.unwrap_or(100) as usize; let partition_id = 0u32; - - let start_offset = match (&statement.position, &statement.group_id) { - (ConsumePosition::Offset(offset), _) => *offset, - (ConsumePosition::Earliest, None) => 0, - (ConsumePosition::Latest, _) => topic_publisher - .latest_offset(&topic_id, partition_id) - .map_err(|e| KalamDbError::InvalidOperation(e.to_string()))? - .map(|offset| offset + 1) - .unwrap_or(0), - (ConsumePosition::Earliest, Some(_)) => 0, + let group_id = + statement.group_id.as_ref().map(|group_name| ConsumerGroupId::new(group_name)); + + let committed_offset = group_id.as_ref().and_then(|group_id| { + topic_publisher.get_group_offsets(&topic_id, group_id).ok().and_then(|offsets| { + offsets + .iter() + .find(|offset| offset.partition_id == partition_id) + .map(|offset| offset.last_acked_offset + 1) + }) + }); + + let start_offset = match committed_offset { + Some(committed) => committed, + None => match statement.position { + ConsumePosition::Offset(offset) => offset, + ConsumePosition::Earliest => 0, + ConsumePosition::Latest => topic_publisher + .latest_offset(&topic_id, partition_id) + .map_err(|e| KalamDbError::InvalidOperation(e.to_string()))? + .map(|offset| offset + 1) + .unwrap_or(0), + }, }; - let messages = if let Some(group_name) = &statement.group_id { - let group_id = ConsumerGroupId::new(group_name); + let messages = if let Some(group_id) = group_id.as_ref() { topic_publisher - .fetch_messages_for_group(&topic_id, &group_id, partition_id, start_offset, limit) + .fetch_messages_for_group(&topic_id, group_id, partition_id, start_offset, limit) .map_err(|e| KalamDbError::InvalidOperation(e.to_string()))? } else { topic_publisher @@ -109,11 +126,10 @@ impl TypedStatementHandler for ConsumeHandler { KalamDbError::SerializationError(format!("Failed to create RecordBatch: {}", e)) })?; - if let Some(group_name) = &statement.group_id { + if let Some(group_id) = group_id.as_ref() { if let Some(last_msg) = messages.last() { - let group_id = ConsumerGroupId::new(group_name); topic_publisher - .ack_offset(&topic_id, &group_id, partition_id, last_msg.offset) + .ack_offset(&topic_id, group_id, partition_id, last_msg.offset) .map_err(|e| { KalamDbError::InvalidOperation(format!("Failed to commit offset: {}", e)) })?; diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/topics/create.rs b/backend/crates/kalamdb-handlers/crates/stream/src/topics/create.rs index 9a357c2f4..383132bf0 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/topics/create.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/topics/create.rs @@ -1,11 +1,16 @@ +use std::sync::Arc; + use kalamdb_commons::models::{NamespaceId, TopicId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::CreateTopicStatement; use kalamdb_system::providers::topics::models::Topic; -use std::sync::Arc; pub struct CreateTopicHandler { app_context: Arc, diff --git a/backend/crates/kalamdb-handlers/crates/stream/src/topics/drop.rs b/backend/crates/kalamdb-handlers/crates/stream/src/topics/drop.rs index dfe548c3f..47ee6c0f8 100644 --- a/backend/crates/kalamdb-handlers/crates/stream/src/topics/drop.rs +++ b/backend/crates/kalamdb-handlers/crates/stream/src/topics/drop.rs @@ -1,11 +1,16 @@ +use std::sync::Arc; + use kalamdb_commons::models::TopicId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_jobs::AppContextJobsExt; use kalamdb_sql::ddl::DropTopicStatement; -use std::sync::Arc; pub struct DropTopicHandler { app_context: Arc, diff --git a/backend/crates/kalamdb-handlers/crates/support/src/audit.rs b/backend/crates/kalamdb-handlers/crates/support/src/audit.rs index 19a06974e..60912ad7e 100644 --- a/backend/crates/kalamdb-handlers/crates/support/src/audit.rs +++ b/backend/crates/kalamdb-handlers/crates/support/src/audit.rs @@ -5,8 +5,7 @@ use chrono::Utc; use kalamdb_commons::models::AuditLogId; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::ExecutionContext; +use kalamdb_core::{error::KalamDbError, sql::context::ExecutionContext}; use kalamdb_system::AuditLogEntry; /// Create an audit log entry for a SQL operation @@ -140,9 +139,10 @@ pub fn log_auth_event( } } -use kalamdb_core::app_context::AppContext; use std::sync::Arc; +use kalamdb_core::app_context::AppContext; + /// Persist an audit entry to the system.audit_logs table /// /// Delegates to provider's async method which handles spawn_blocking internally. @@ -160,11 +160,13 @@ pub async fn persist_audit_entry( #[cfg(test)] mod tests { - use super::*; + use std::sync::Arc; + use datafusion::prelude::SessionContext; use kalamdb_commons::{Role, UserId}; use kalamdb_session::AuthSession; - use std::sync::Arc; + + use super::*; fn test_session() -> Arc { Arc::new(SessionContext::new()) diff --git a/backend/crates/kalamdb-handlers/crates/support/src/guards.rs b/backend/crates/kalamdb-handlers/crates/support/src/guards.rs index 769a52b43..a279bf8ee 100644 --- a/backend/crates/kalamdb-handlers/crates/support/src/guards.rs +++ b/backend/crates/kalamdb-handlers/crates/support/src/guards.rs @@ -4,8 +4,7 @@ //! These helpers consolidate repeated validation patterns across handlers. use kalamdb_commons::models::NamespaceId; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::ExecutionContext; +use kalamdb_core::{error::KalamDbError, sql::context::ExecutionContext}; /// Block modifications (ALTER, DROP, CREATE) on system namespaces. /// @@ -112,7 +111,9 @@ pub fn block_anonymous_write( if context.is_anonymous() { log::warn!("❌ {} blocked: Anonymous users cannot perform write operations", operation); return Err(KalamDbError::Unauthorized( - "Anonymous users can only SELECT from public tables. Please authenticate to perform write operations.".to_string() + "Anonymous users can only SELECT from public tables. Please authenticate to perform \ + write operations." + .to_string(), )); } Ok(()) @@ -120,12 +121,13 @@ pub fn block_anonymous_write( #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::UserId; - use kalamdb_commons::Role; - use kalamdb_session::AuthSession; use std::sync::Arc; + use kalamdb_commons::{models::UserId, Role}; + use kalamdb_session::AuthSession; + + use super::*; + fn create_context(role: Role) -> ExecutionContext { ExecutionContext::new( UserId::from("test_user"), diff --git a/backend/crates/kalamdb-handlers/crates/support/src/table_creation.rs b/backend/crates/kalamdb-handlers/crates/support/src/table_creation.rs index dbda8a716..1c51c2d6e 100644 --- a/backend/crates/kalamdb-handlers/crates/support/src/table_creation.rs +++ b/backend/crates/kalamdb-handlers/crates/support/src/table_creation.rs @@ -2,15 +2,18 @@ //! //! Provides unified logic for creating all table types (USER/SHARED/STREAM) -use kalamdb_commons::models::{NamespaceId, StorageId, TableAccess, TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::Role; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; +use std::sync::Arc; + +use kalamdb_commons::{ + models::{NamespaceId, StorageId, TableAccess, TableId, UserId}, + schemas::TableType, + Role, +}; +use kalamdb_core::{ + app_context::AppContext, error::KalamDbError, error_extensions::KalamDbResultExt, +}; use kalamdb_sql::ddl::CreateTableStatement; use kalamdb_system::providers::storages::models::StorageType; -use std::sync::Arc; /// Unified CREATE TABLE handler for all table types (USER/SHARED/STREAM) /// @@ -143,7 +146,8 @@ fn log_table_created( match &table_def.table_options { TableOptions::User(opts) => { log::info!( - "✅ USER TABLE created: {} | storage: {} | columns: {} | pk: {} | system_columns: [_seq, _deleted]", + "✅ USER TABLE created: {} | storage: {} | columns: {} | pk: {} | system_columns: \ + [_seq, _deleted]", table_id, opts.storage_id.as_str(), table_def.columns.len(), @@ -152,7 +156,8 @@ fn log_table_created( }, TableOptions::Shared(opts) => { log::info!( - "✅ SHARED TABLE created: {} | storage: {} | columns: {} | pk: {} | access_level: {:?} | system_columns: [_seq, _deleted]", + "✅ SHARED TABLE created: {} | storage: {} | columns: {} | pk: {} | access_level: \ + {:?} | system_columns: [_seq, _deleted]", table_id, opts.storage_id.as_str(), table_def.columns.len(), @@ -198,9 +203,11 @@ pub fn build_table_definition( user_id: &UserId, user_role: Role, ) -> Result { - use kalamdb_commons::datatypes::{FromArrowType, KalamDataType}; - use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; - use kalamdb_commons::schemas::ColumnDefault; + use kalamdb_commons::{ + datatypes::{FromArrowType, KalamDataType}, + models::schemas::{ColumnDefinition, TableDefinition, TableOptions}, + schemas::ColumnDefault, + }; let table_id_str = format!("{}.{}", stmt.namespace_id.as_str(), stmt.table_name.as_str()); @@ -380,7 +387,8 @@ pub fn build_table_definition( }, (TableOptions::Shared(opts), TableType::Shared) => { opts.storage_id = storage_id.clone(); - // Only override access_level if explicitly specified in SQL; otherwise keep the default (Private) + // Only override access_level if explicitly specified in SQL; otherwise keep the default + // (Private) if let Some(access) = stmt.access_level { opts.access_level = Some(access); } diff --git a/backend/crates/kalamdb-handlers/crates/user/src/lib.rs b/backend/crates/kalamdb-handlers/crates/user/src/lib.rs index 7f04032d8..c4679806c 100644 --- a/backend/crates/kalamdb-handlers/crates/user/src/lib.rs +++ b/backend/crates/kalamdb-handlers/crates/user/src/lib.rs @@ -1,15 +1,15 @@ pub mod helpers; pub mod user; +use std::sync::Arc; + use kalamdb_commons::AuthType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; +use kalamdb_core::{app_context::AppContext, sql::executor::handler_registry::HandlerRegistry}; use kalamdb_handlers_support::register_typed_handler; -use kalamdb_sql::classifier::SqlStatementKind; -use kalamdb_sql::ddl::{ - AlterUserStatement, CreateUserStatement, DropUserStatement, UserModification, +use kalamdb_sql::{ + classifier::SqlStatementKind, + ddl::{AlterUserStatement, CreateUserStatement, DropUserStatement, UserModification}, }; -use std::sync::Arc; pub fn register_user_handlers( registry: &HandlerRegistry, diff --git a/backend/crates/kalamdb-handlers/crates/user/src/user/alter.rs b/backend/crates/kalamdb-handlers/crates/user/src/user/alter.rs index 6bf83a93c..23e65cfc4 100644 --- a/backend/crates/kalamdb-handlers/crates/user/src/user/alter.rs +++ b/backend/crates/kalamdb-handlers/crates/user/src/user/alter.rs @@ -1,15 +1,21 @@ //! Typed handler for ALTER USER statement +use std::sync::Arc; + use kalamdb_auth::security::password::{ hash_password, validate_password_with_policy, PasswordPolicy, }; -use kalamdb_commons::{Role, UserId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_commons::UserId; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::{AlterUserStatement, UserModification}; -use std::sync::Arc; +use kalamdb_system::Role; /// Handler for ALTER USER pub struct AlterUserHandler { diff --git a/backend/crates/kalamdb-handlers/crates/user/src/user/create.rs b/backend/crates/kalamdb-handlers/crates/user/src/user/create.rs index 82967d91e..0208fdb0e 100644 --- a/backend/crates/kalamdb-handlers/crates/user/src/user/create.rs +++ b/backend/crates/kalamdb-handlers/crates/user/src/user/create.rs @@ -1,17 +1,22 @@ //! Typed handler for CREATE USER statement +use std::sync::Arc; + use kalamdb_auth::security::password::{ hash_password, validate_password_with_policy, PasswordPolicy, }; use kalamdb_commons::{AuthType, UserId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::CreateUserStatement; use kalamdb_system::{AuthData, User}; -use std::sync::Arc; /// Handler for CREATE USER pub struct CreateUserHandler { diff --git a/backend/crates/kalamdb-handlers/crates/user/src/user/drop.rs b/backend/crates/kalamdb-handlers/crates/user/src/user/drop.rs index 5fb29a9c1..7f3ece977 100644 --- a/backend/crates/kalamdb-handlers/crates/user/src/user/drop.rs +++ b/backend/crates/kalamdb-handlers/crates/user/src/user/drop.rs @@ -1,12 +1,17 @@ //! Typed handler for DROP USER statement +use std::sync::Arc; + use kalamdb_commons::UserId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_sql::ddl::DropUserStatement; -use std::sync::Arc; /// Handler for DROP USER pub struct DropUserHandler { diff --git a/backend/crates/kalamdb-handlers/src/lib.rs b/backend/crates/kalamdb-handlers/src/lib.rs index f21123168..9f181f531 100644 --- a/backend/crates/kalamdb-handlers/src/lib.rs +++ b/backend/crates/kalamdb-handlers/src/lib.rs @@ -12,20 +12,19 @@ pub mod subscription; -pub use kalamdb_handlers_admin::{backup, cluster, compact, export, flush, jobs, system}; -pub use kalamdb_handlers_ddl::{namespace, storage, table, view}; -pub use kalamdb_handlers_stream::topics; -pub use kalamdb_handlers_support::{audit, guards, table_creation}; -pub use kalamdb_handlers_user::user; +use std::sync::Arc; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; +use kalamdb_core::{app_context::AppContext, sql::executor::handler_registry::HandlerRegistry}; use kalamdb_handlers_admin::register_admin_handlers; +pub use kalamdb_handlers_admin::{backup, cluster, compact, export, flush, jobs, system}; use kalamdb_handlers_ddl::register_ddl_handlers; +pub use kalamdb_handlers_ddl::{namespace, storage, table, view}; use kalamdb_handlers_stream::register_stream_handlers; +pub use kalamdb_handlers_stream::topics; use kalamdb_handlers_support::register_typed_handler; +pub use kalamdb_handlers_support::{audit, guards, table_creation}; use kalamdb_handlers_user::register_user_handlers; -use std::sync::Arc; +pub use kalamdb_handlers_user::user; /// Register all SQL statement handlers into the given registry. /// diff --git a/backend/crates/kalamdb-handlers/src/subscription/subscribe.rs b/backend/crates/kalamdb-handlers/src/subscription/subscribe.rs index 76ae49abd..6d438bcbc 100644 --- a/backend/crates/kalamdb-handlers/src/subscription/subscribe.rs +++ b/backend/crates/kalamdb-handlers/src/subscription/subscribe.rs @@ -1,12 +1,17 @@ //! Typed handler for SUBSCRIBE statement -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::{ExecutionContext, ExecutionResult, ScalarValue}; -use kalamdb_core::sql::executor::handlers::TypedStatementHandler; +use std::sync::Arc; + +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + context::{ExecutionContext, ExecutionResult, ScalarValue}, + executor::handlers::TypedStatementHandler, + }, +}; use kalamdb_handlers_support::audit; use kalamdb_sql::ddl::SubscribeStatement; -use std::sync::Arc; use uuid::Uuid; /// Handler for SUBSCRIBE TO (Live Query) diff --git a/backend/crates/kalamdb-jobs/src/executors/backup.rs b/backend/crates/kalamdb-jobs/src/executors/backup.rs index a940b7a97..c9dacaaa0 100644 --- a/backend/crates/kalamdb-jobs/src/executors/backup.rs +++ b/backend/crates/kalamdb-jobs/src/executors/backup.rs @@ -21,13 +21,14 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::{fs, path::Path}; + use async_trait::async_trait; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; -use std::fs; -use std::path::Path; + +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; /// Typed parameters for full database backup operations #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/backend/crates/kalamdb-jobs/src/executors/cleanup.rs b/backend/crates/kalamdb-jobs/src/executors/cleanup.rs index b2bb1e538..2389baa4a 100644 --- a/backend/crates/kalamdb-jobs/src/executors/cleanup.rs +++ b/backend/crates/kalamdb-jobs/src/executors/cleanup.rs @@ -19,20 +19,22 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::sync::Arc; + use async_trait::async_trait; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::operations::table_cleanup::{ - cleanup_metadata_internal, cleanup_parquet_files_internal, cleanup_table_data_internal, +use kalamdb_commons::{schemas::TableType, TableId}; +// Re-export so consumers can keep importing from this module. +pub use kalamdb_core::operations::table_cleanup::{CleanupOperation, StorageCleanupDetails}; +use kalamdb_core::{ + error::KalamDbError, + operations::table_cleanup::{ + cleanup_metadata_internal, cleanup_parquet_files_internal, cleanup_table_data_internal, + }, }; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; -use std::sync::Arc; -// Re-export so consumers can keep importing from this module. -pub use kalamdb_core::operations::table_cleanup::{CleanupOperation, StorageCleanupDetails}; +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; /// Typed parameters for cleanup operations (T191) #[derive(Debug, Clone, Serialize, Deserialize)] @@ -133,7 +135,8 @@ impl JobExecutor for CleanupExecutor { // Build success message with metrics let message = format!( - "Cleaned up table {} successfully - {} rows deleted, {} bytes freed, {} cache entries invalidated", + "Cleaned up table {} successfully - {} rows deleted, {} bytes freed, {} cache entries \ + invalidated", table_id, rows_deleted, bytes_freed, cache_entries_invalidated ); @@ -161,9 +164,9 @@ impl Default for CleanupExecutor { #[cfg(test)] mod tests { + use kalamdb_commons::{NamespaceId, StorageId, TableName}; + use super::*; - use kalamdb_commons::StorageId; - use kalamdb_commons::{NamespaceId, TableName}; #[test] fn test_executor_properties() { diff --git a/backend/crates/kalamdb-jobs/src/executors/compact.rs b/backend/crates/kalamdb-jobs/src/executors/compact.rs index 539fd1e1a..23173d562 100644 --- a/backend/crates/kalamdb-jobs/src/executors/compact.rs +++ b/backend/crates/kalamdb-jobs/src/executors/compact.rs @@ -18,17 +18,18 @@ //! } //! ``` -use crate::executors::shared_table_cleanup::cleanup_empty_shared_scope_if_needed; -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; use async_trait::async_trait; -use kalamdb_commons::constants::ColumnFamilyNames; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; +use kalamdb_commons::{schemas::TableType, TableId}; use kalamdb_core::error::KalamDbError; -use kalamdb_store::storage_trait::{Partition, StorageBackendAsync}; +use kalamdb_store::storage_trait::StorageBackendAsync; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; +use crate::executors::{ + shared_table_cleanup::cleanup_empty_shared_scope_if_needed, + table_partition::hot_table_partition, JobContext, JobDecision, JobExecutor, JobParams, +}; + /// Typed parameters for compaction operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompactParams { @@ -82,26 +83,21 @@ impl JobExecutor for CompactExecutor { let table_id = params.table_id.clone(); let table_type = params.table_type; - let partition_name = match table_type { - TableType::User => format!("{}{}", ColumnFamilyNames::USER_TABLE_PREFIX, table_id), - TableType::Shared => { - format!("{}{}", ColumnFamilyNames::SHARED_TABLE_PREFIX, table_id) - }, - TableType::Stream => { - return Ok(JobDecision::Failed { - message: "STORAGE COMPACT TABLE is not supported for STREAM tables".to_string(), - exception_trace: None, - }) - }, - TableType::System => { - return Ok(JobDecision::Failed { - message: "STORAGE COMPACT TABLE is not supported for SYSTEM tables".to_string(), - exception_trace: None, - }) - }, + let Some(partition) = hot_table_partition(table_type, &table_id) else { + let table_kind = match table_type { + TableType::Stream => "STREAM", + TableType::System => "SYSTEM", + TableType::User | TableType::Shared => unreachable!(), + }; + + return Ok(JobDecision::Failed { + message: format!( + "STORAGE COMPACT TABLE is not supported for {} tables", + table_kind + ), + exception_trace: None, + }); }; - - let partition = Partition::new(partition_name); ctx.log_debug(&format!("Running RocksDB compaction for partition {}", partition.name())); let backend = ctx.app_ctx.storage_backend(); diff --git a/backend/crates/kalamdb-jobs/src/executors/executor_trait.rs b/backend/crates/kalamdb-jobs/src/executors/executor_trait.rs index 62ce9dfeb..609ed15ac 100644 --- a/backend/crates/kalamdb-jobs/src/executors/executor_trait.rs +++ b/backend/crates/kalamdb-jobs/src/executors/executor_trait.rs @@ -37,16 +37,17 @@ //! } //! ``` -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; +// Note: CancellationToken is not available in tokio::sync in older versions +// We'll use a simple atomic bool for now +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; + +use kalamdb_core::{app_context::AppContext, error::KalamDbError}; use kalamdb_system::JobType; use log::{debug, error, info, trace, warn}; use serde::{Deserialize, Serialize}; -use std::sync::Arc; - -// Note: CancellationToken is not available in tokio::sync in older versions -// We'll use a simple atomic bool for now -use std::sync::atomic::{AtomicBool, Ordering}; /// Simple cancellation token wrapper #[derive(Clone)] @@ -295,7 +296,8 @@ pub trait JobExecutor: Send + Sync { /// - Local file cleanup /// - RocksDB compaction /// - /// By default, delegates to `execute()` for backwards compatibility with single-phase executors. + /// By default, delegates to `execute()` for backwards compatibility with single-phase + /// executors. /// /// # Returns /// - `Ok(JobDecision::Completed { .. })` - Local work completed successfully @@ -346,9 +348,10 @@ pub trait JobExecutor: Send + Sync { #[cfg(test)] mod tests { - use super::*; use kalamdb_core::test_helpers::test_app_context_simple; + use super::*; + #[test] fn test_job_decision_completed() { let decision = JobDecision::Completed { diff --git a/backend/crates/kalamdb-jobs/src/executors/flush.rs b/backend/crates/kalamdb-jobs/src/executors/flush.rs index eef31f217..199fd7e75 100644 --- a/backend/crates/kalamdb-jobs/src/executors/flush.rs +++ b/backend/crates/kalamdb-jobs/src/executors/flush.rs @@ -27,19 +27,34 @@ //! } //! ``` -use crate::executors::shared_table_cleanup::cleanup_empty_shared_scope_if_needed; -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::{future::Future, sync::Arc}; + use async_trait::async_trait; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::manifest::flush::{SharedTableFlushJob, TableFlush, UserTableFlushJob}; +use datafusion::arrow::datatypes::SchemaRef; +use kalamdb_commons::{schemas::TableType, TableId}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + error_extensions::KalamDbResultExt, + manifest::{ + flush::{FlushJobResult, SharedTableFlushJob, TableFlush, UserTableFlushJob}, + ManifestService, + }, + providers::{SharedTableProvider, UserTableProvider}, + schema_registry::SchemaRegistry, +}; use kalamdb_store::EntityStore; use kalamdb_system::JobType; +use kalamdb_tables::{SharedTableIndexedStore, UserTableIndexedStore}; use serde::{Deserialize, Serialize}; -use std::sync::Arc; +use tokio::sync::Semaphore; + +use crate::executors::{ + shared_table_cleanup::cleanup_empty_shared_scope_if_needed, + table_partition::hot_table_partition, JobContext, JobDecision, JobExecutor, JobParams, +}; + +const MAX_POST_FLUSH_TASKS: usize = 2; /// Typed parameters for flush operations (T189) #[derive(Debug, Clone, Serialize, Deserialize)] @@ -60,6 +75,85 @@ impl JobParams for FlushParams { } } +enum FlushTarget { + User(Arc), + Shared(Arc), +} + +impl FlushTarget { + fn resolve( + schema_registry: &SchemaRegistry, + table_id: &TableId, + table_type: TableType, + ) -> Result, KalamDbError> { + let provider_arc = match table_type { + TableType::User | TableType::Shared => { + schema_registry.get_provider(table_id).ok_or_else(|| { + KalamDbError::NotFound(format!( + "Table provider not registered for {} (id={})", + table_id, table_id + )) + })? + }, + TableType::Stream | TableType::System => return Ok(None), + }; + + match table_type { + TableType::User => { + let provider = + provider_arc.as_any().downcast_ref::().ok_or_else(|| { + KalamDbError::InvalidOperation( + "Cached provider type mismatch for user table".into(), + ) + })?; + + Ok(Some(Self::User(provider.store()))) + }, + TableType::Shared => { + let provider = provider_arc + .as_any() + .downcast_ref::() + .ok_or_else(|| { + KalamDbError::InvalidOperation( + "Cached provider type mismatch for shared table".into(), + ) + })?; + + Ok(Some(Self::Shared(provider.store()))) + }, + TableType::Stream | TableType::System => Ok(None), + } + } + + fn log_message(&self) -> &'static str { + match self { + FlushTarget::User(_) => "Executing UserTableFlushJob (non-blocking)", + FlushTarget::Shared(_) => "Executing SharedTableFlushJob (non-blocking)", + } + } + + fn has_min_rows(self, min_rows: usize) -> bool { + match self { + FlushTarget::User(store) => { + let partition = store.partition(); + store + .backend() + .scan(&partition, None, None, Some(min_rows)) + .map(|iter| iter.count() >= min_rows) + .unwrap_or(true) + }, + FlushTarget::Shared(store) => { + let partition = store.partition(); + store + .backend() + .scan(&partition, None, None, Some(min_rows)) + .map(|iter| iter.count() >= min_rows) + .unwrap_or(true) + }, + } + } +} + /// Flush Job Executor /// /// Executes flush operations for buffered table data. @@ -72,19 +166,96 @@ impl JobParams for FlushParams { /// **Phase 2 - Leader Actions (leader only)**: /// Full flush: read from RocksDB, write Parquet files to storage, update manifest, /// delete flushed rows from RocksDB. -pub struct FlushExecutor; +pub struct FlushExecutor { + post_flush_permits: Arc, +} impl FlushExecutor { /// Create a new FlushExecutor pub fn new() -> Self { - Self + Self { + post_flush_permits: Arc::new(Semaphore::new(MAX_POST_FLUSH_TASKS)), + } + } + + async fn run_blocking_flush( + task: F, + error_context: &'static str, + ) -> Result + where + F: FnOnce() -> Result + Send + 'static, + { + tokio::task::spawn_blocking(task) + .await + .map_err(|err| KalamDbError::InvalidOperation(format!("Flush task panicked: {}", err)))? + .into_kalamdb_error(error_context) + } + + async fn execute_target_flush( + target: FlushTarget, + app_ctx: Arc, + table_id: Arc, + schema: SchemaRef, + schema_registry: Arc, + manifest_service: Arc, + ) -> Result { + match target { + FlushTarget::User(store) => { + let flush_job = UserTableFlushJob::new( + app_ctx, + table_id, + store, + schema, + schema_registry, + manifest_service, + ); + + Self::run_blocking_flush(move || flush_job.execute(), "User table flush failed") + .await + }, + FlushTarget::Shared(store) => { + let flush_job = SharedTableFlushJob::new( + app_ctx, + table_id, + store, + schema, + schema_registry, + manifest_service, + ); + + Self::run_blocking_flush(move || flush_job.execute(), "Shared table flush failed") + .await + }, + } + } + + fn try_spawn_post_flush_task(&self, task_name: &'static str, table_id: &TableId, future: F) + where + F: Future + Send + 'static, + { + let permit = match Arc::clone(&self.post_flush_permits).try_acquire_owned() { + Ok(permit) => permit, + Err(_) => { + log::trace!( + "Skipping {} for {}; post-flush maintenance is saturated", + task_name, + table_id + ); + return; + }, + }; + + tokio::task::spawn(async move { + let _permit = permit; + future.await; + }); } /// Internal flush implementation used by both execute() and execute_leader() async fn do_flush(&self, ctx: &JobContext) -> Result { // Parameters already validated in JobContext - type-safe access let params = ctx.params(); - let table_id = Arc::new(params.table_id.clone()); + let table_id = params.table_id.clone(); let table_type = params.table_type; log::trace!("[{}] Flushing {} (type: {:?})", ctx.job_id, table_id, table_type); @@ -119,120 +290,44 @@ impl FlushExecutor { }); } + let target = match FlushTarget::resolve(&schema_registry, &table_id, table_type)? { + Some(target) => target, + None => match table_type { + TableType::Stream => { + ctx.log_trace("Stream table flush not yet implemented"); + return Ok(JobDecision::Completed { + message: Some(format!( + "Stream flush skipped (not implemented) for {}", + table_id + )), + }); + }, + TableType::System => { + return Err(KalamDbError::InvalidOperation( + "Cannot flush SYSTEM tables".to_string(), + )); + }, + TableType::User | TableType::Shared => unreachable!(), + }, + }; + // Get current Arrow schema from the registry (already includes system columns) let schema = schema_registry .get_arrow_schema(&table_id) .into_kalamdb_error(&format!("Arrow schema not found for {}", table_id))?; - // Get current schema version for manifest recording - // Phase 16: Will be used when writing SegmentMetadata.schema_version - // let _current_schema_version = table_def.schema_version; + ctx.log_trace(target.log_message()); - // Execute flush based on table type - // Use spawn_blocking to avoid blocking the async runtime during RocksDB I/O - let result = match table_type { - TableType::User => { - ctx.log_trace("Executing UserTableFlushJob (non-blocking)"); - - // IMPORTANT: Use the per-table UserTableStore (created at table registration) - // instead of the generic prefix-only user_table_store() created in AppContext. - // The generic store points to partition "user_" (no namespace/table suffix) and - // cannot see actual row data stored under per-table partitions like - // "user_:". Using it caused runtime errors: - // Not found: user_ - // Retrieve the UserTableProvider instance to access the correct store. - let provider_arc = schema_registry.get_provider(&table_id).ok_or_else(|| { - KalamDbError::NotFound(format!( - "User table provider not registered for {} (id={})", - table_id, table_id - )) - })?; - - // Downcast to UserTableProvider to access store - let provider = provider_arc - .as_any() - .downcast_ref::() - .ok_or_else(|| { - KalamDbError::InvalidOperation( - "Cached provider type mismatch for user table".into(), - ) - })?; - - let store = provider.store(); - - let flush_job = UserTableFlushJob::new( - app_ctx.clone(), - table_id.clone(), - store, - schema.clone(), - schema_registry.clone(), - app_ctx.manifest_service(), - ); - - // Execute in blocking thread pool to avoid starving async runtime - tokio::task::spawn_blocking(move || flush_job.execute()) - .await - .map_err(|e| { - KalamDbError::InvalidOperation(format!("Flush task panicked: {}", e)) - })? - .into_kalamdb_error("User table flush failed")? - }, - TableType::Shared => { - ctx.log_trace("Executing SharedTableFlushJob (non-blocking)"); - - // Get the SharedTableProvider from the schema registry to reuse the cached store - let provider_arc = schema_registry.get_provider(&table_id).ok_or_else(|| { - KalamDbError::NotFound(format!( - "Shared table provider not registered for {} (id={})", - table_id, table_id - )) - })?; - - // Downcast to SharedTableProvider to access store - let provider = provider_arc - .as_any() - .downcast_ref::() - .ok_or_else(|| { - KalamDbError::InvalidOperation( - "Cached provider type mismatch for shared table".into(), - ) - })?; - - let store = provider.store(); - - let flush_job = SharedTableFlushJob::new( - app_ctx.clone(), - table_id.clone(), - store, - schema.clone(), - schema_registry.clone(), - app_ctx.manifest_service(), - ); - - // Execute in blocking thread pool to avoid starving async runtime - tokio::task::spawn_blocking(move || flush_job.execute()) - .await - .map_err(|e| { - KalamDbError::InvalidOperation(format!("Flush task panicked: {}", e)) - })? - .into_kalamdb_error("Shared table flush failed")? - }, - TableType::Stream => { - ctx.log_trace("Stream table flush not yet implemented"); - // Streams: return Completed (no-op) for idempotency and clarity - return Ok(JobDecision::Completed { - message: Some(format!( - "Stream flush skipped (not implemented) for {}", - table_id - )), - }); - }, - TableType::System => { - return Err(KalamDbError::InvalidOperation( - "Cannot flush SYSTEM tables".to_string(), - )); - }, - }; + let table_id_arc = Arc::new(table_id.clone()); + let result = Self::execute_target_flush( + target, + app_ctx.clone(), + table_id_arc, + schema, + schema_registry.clone(), + app_ctx.manifest_service(), + ) + .await?; log::debug!( "[{}] Flush operation completed: {} rows flushed, {} files created", @@ -241,55 +336,13 @@ impl FlushExecutor { result.parquet_files.len() ); - // Fire-and-forget: compact RocksDB partition after flush to reclaim - // space from tombstones. Compaction is an optimisation, not a - // correctness requirement, so we must not block the job from being - // marked "completed". With max_background_jobs=2, synchronous - // compaction under concurrent flush load was the root cause of - // 90-120 s stalls observed in smoke tests. - let compact_table_type = table_type; - let compact_table_id = table_id.clone(); - let compact_backend = app_ctx.storage_backend(); - if matches!(compact_table_type, TableType::User | TableType::Shared) { - tokio::task::spawn(async move { - let partition_name = match compact_table_type { - TableType::User => { - use kalamdb_commons::constants::ColumnFamilyNames; - format!("{}{}", ColumnFamilyNames::USER_TABLE_PREFIX, compact_table_id) - }, - TableType::Shared => { - use kalamdb_commons::constants::ColumnFamilyNames; - format!("{}{}", ColumnFamilyNames::SHARED_TABLE_PREFIX, compact_table_id) - }, - _ => return, - }; - use kalamdb_store::storage_trait::Partition; - let partition = Partition::new(partition_name); - match tokio::task::spawn_blocking(move || { - compact_backend.compact_partition(&partition) - }) - .await - { - Ok(Ok(())) => { - log::trace!("Post-flush compaction completed for {}", compact_table_id); - }, - Ok(Err(e)) => { - log::warn!("Post-flush compaction failed (non-critical): {}", e); - }, - Err(e) => { - log::warn!("Post-flush compaction task panicked: {}", e); - }, - } - }); - } - // Fire-and-forget: check if the shared table scope is empty and clean // up cold segments if so. Also non-blocking to avoid stalling. if matches!(table_type, TableType::Shared) { let cleanup_app_ctx = app_ctx.clone(); - let cleanup_table_id = (*table_id).clone(); + let cleanup_table_id = table_id.clone(); let cleanup_job_id = ctx.job_id.clone(); - tokio::task::spawn(async move { + self.try_spawn_post_flush_task("post-flush shared cleanup", &table_id, async move { // Build a minimal JobContext just for the helper let params = FlushParams { table_id: cleanup_table_id.clone(), @@ -305,6 +358,34 @@ impl FlushExecutor { }); } + // Fire-and-forget: compact RocksDB partition after flush to reclaim + // space from tombstones. Compaction is an optimisation, not a + // correctness requirement, so we must not block the job from being + // marked "completed". With max_background_jobs=2, synchronous + // compaction under concurrent flush load was the root cause of + // 90-120 s stalls observed in smoke tests. + if let Some(partition) = hot_table_partition(table_type, &table_id) { + let compact_table_id = table_id.clone(); + let compact_backend = app_ctx.storage_backend(); + self.try_spawn_post_flush_task("post-flush compaction", &table_id, async move { + match tokio::task::spawn_blocking(move || { + compact_backend.compact_partition(&partition) + }) + .await + { + Ok(Ok(())) => { + log::trace!("Post-flush compaction completed for {}", compact_table_id); + }, + Ok(Err(err)) => { + log::warn!("Post-flush compaction failed (non-critical): {}", err); + }, + Err(err) => { + log::warn!("Post-flush compaction task panicked: {}", err); + }, + } + }); + } + Ok(JobDecision::Completed { message: Some(format!( "Flushed {} successfully ({} rows, {} files)", @@ -347,49 +428,35 @@ impl JobExecutor for FlushExecutor { None => return Ok(false), }; - // Minimum rows needed: flush_threshold or 1 (just check for any data) - let min_rows = params.flush_threshold.unwrap_or(1) as usize; - - // Only check for User and Shared tables - match table_def.table_type { - TableType::User => { - if let Some(provider_arc) = schema_registry.get_provider(¶ms.table_id) { - if let Some(provider) = provider_arc - .as_any() - .downcast_ref::( - ) { - let store = provider.store(); - let partition = store.partition(); - let has_enough = store - .backend() - .scan(&partition, None, None, Some(min_rows)) - .map(|iter| iter.count() >= min_rows) - .unwrap_or(true); // on error, assume data exists - return Ok(has_enough); - } - } - Ok(false) - }, - TableType::Shared => { - if let Some(provider_arc) = schema_registry.get_provider(¶ms.table_id) { - if let Some(provider) = provider_arc - .as_any() - .downcast_ref::( - ) { - let store = provider.store(); - let partition = store.partition(); - let has_enough = store - .backend() - .scan(&partition, None, None, Some(min_rows)) - .map(|iter| iter.count() >= min_rows) - .unwrap_or(true); - return Ok(has_enough); - } - } - Ok(false) - }, - _ => Ok(true), + if table_def.table_type != params.table_type { + return Ok(false); } + + // Minimum rows needed: flush_threshold or 1 (just check for any data) + let min_rows = params.flush_threshold.unwrap_or(1).max(1) as usize; + + let target = + match FlushTarget::resolve(&schema_registry, ¶ms.table_id, table_def.table_type) { + Ok(Some(target)) => target, + Ok(None) => return Ok(true), + Err(err) => { + log::trace!( + "Flush pre-validation skipped {} because target resolution failed: {}", + params.table_id, + err + ); + return Ok(false); + }, + }; + + tokio::task::spawn_blocking(move || target.has_min_rows(min_rows)) + .await + .map_err(|err| { + KalamDbError::InvalidOperation(format!( + "Flush pre-validation task panicked: {}", + err + )) + }) } /// Legacy single-phase execute - delegates to do_flush for backward compatibility @@ -438,9 +505,10 @@ impl Default for FlushExecutor { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::NamespaceId; + use super::*; + #[test] fn test_executor_properties() { let executor = FlushExecutor::new(); diff --git a/backend/crates/kalamdb-jobs/src/executors/job_cleanup.rs b/backend/crates/kalamdb-jobs/src/executors/job_cleanup.rs index 6934c0530..0235550ea 100644 --- a/backend/crates/kalamdb-jobs/src/executors/job_cleanup.rs +++ b/backend/crates/kalamdb-jobs/src/executors/job_cleanup.rs @@ -15,12 +15,13 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; use async_trait::async_trait; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; + /// Typed parameters for job cleanup operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct JobCleanupParams { diff --git a/backend/crates/kalamdb-jobs/src/executors/manifest_eviction.rs b/backend/crates/kalamdb-jobs/src/executors/manifest_eviction.rs index 5b8d677c8..acda66077 100644 --- a/backend/crates/kalamdb-jobs/src/executors/manifest_eviction.rs +++ b/backend/crates/kalamdb-jobs/src/executors/manifest_eviction.rs @@ -19,13 +19,14 @@ //! - eviction_interval_seconds: How often the job runs (default: 600s = 10 minutes) //! - eviction_ttl_days: How many days before an unaccessed manifest is evicted (default: 7 days) -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::sync::Arc; + use async_trait::async_trait; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; +use kalamdb_core::{app_context::AppContext, error::KalamDbError}; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; -use std::sync::Arc; + +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; fn default_ttl_days() -> u64 { 7 diff --git a/backend/crates/kalamdb-jobs/src/executors/mod.rs b/backend/crates/kalamdb-jobs/src/executors/mod.rs index f79308901..c65e47694 100644 --- a/backend/crates/kalamdb-jobs/src/executors/mod.rs +++ b/backend/crates/kalamdb-jobs/src/executors/mod.rs @@ -7,7 +7,8 @@ //! - `JobDecision`: Result type for job execution (Completed, Retry, Failed) //! - `JobContext`: Execution context with app access and auto-prefixed logging //! - `JobRegistry`: Thread-safe registry mapping JobType to executors -//! - Concrete executors: Flush, Cleanup, Retention, StreamEviction, UserCleanup, Compact, Backup, Restore, ManifestEviction +//! - Concrete executors: Flush, Cleanup, Retention, StreamEviction, UserCleanup, Compact, Backup, +//! Restore, ManifestEviction pub mod executor_trait; pub mod registry; @@ -23,6 +24,7 @@ pub mod restore; pub mod retention; pub(crate) mod shared_table_cleanup; pub mod stream_eviction; +pub(crate) mod table_partition; pub mod topic_cleanup; pub mod topic_retention; pub mod user_cleanup; @@ -31,16 +33,15 @@ pub mod vector_index; // Re-export key types // Export core trait and types -pub use executor_trait::{CancellationToken, JobContext, JobDecision, JobExecutor, JobParams}; -pub use registry::JobRegistry; - // Re-export concrete executors pub use backup::BackupExecutor; pub use cleanup::CleanupExecutor; pub use compact::CompactExecutor; +pub use executor_trait::{CancellationToken, JobContext, JobDecision, JobExecutor, JobParams}; pub use flush::FlushExecutor; pub use job_cleanup::JobCleanupExecutor; pub use manifest_eviction::ManifestEvictionExecutor; +pub use registry::JobRegistry; pub use restore::RestoreExecutor; pub use retention::RetentionExecutor; pub use stream_eviction::StreamEvictionExecutor; diff --git a/backend/crates/kalamdb-jobs/src/executors/registry.rs b/backend/crates/kalamdb-jobs/src/executors/registry.rs index a7a1559dd..f4065a052 100644 --- a/backend/crates/kalamdb-jobs/src/executors/registry.rs +++ b/backend/crates/kalamdb-jobs/src/executors/registry.rs @@ -11,16 +11,16 @@ //! This allows storing executors with different parameter types in a single collection //! while preserving type safety at execution time through runtime deserialization. -use super::executor_trait::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::{collections::HashMap, sync::Arc}; + use async_trait::async_trait; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::SerdeJsonResultExt; -use kalamdb_system::Job; -use kalamdb_system::JobType; +use kalamdb_core::{ + app_context::AppContext, error::KalamDbError, error_extensions::SerdeJsonResultExt, +}; +use kalamdb_system::{Job, JobType}; use parking_lot::RwLock; -use std::collections::HashMap; -use std::sync::Arc; + +use super::executor_trait::{JobContext, JobDecision, JobExecutor, JobParams}; /// Type-erased job executor trait for heterogeneous storage /// @@ -205,9 +205,10 @@ where /// # Example /// /// ```no_run -/// use kalamdb_core::jobs::executors::{JobRegistry, flush::FlushExecutor}; /// use std::sync::Arc; /// +/// use kalamdb_core::jobs::executors::{flush::FlushExecutor, JobRegistry}; +/// /// let registry = JobRegistry::new(); /// /// // Register executors (type-safe at registration) @@ -466,13 +467,14 @@ impl Default for JobRegistry { #[cfg(test)] mod tests { - use super::*; - use crate::executors::JobParams; use kalamdb_commons::models::{JobId, NodeId}; use kalamdb_core::test_helpers::test_app_context_simple; use kalamdb_system::JobStatus; use serde::{Deserialize, Serialize}; + use super::*; + use crate::executors::JobParams; + #[derive(Clone, Serialize, Deserialize)] struct MockParams { value: i32, diff --git a/backend/crates/kalamdb-jobs/src/executors/restore.rs b/backend/crates/kalamdb-jobs/src/executors/restore.rs index 915cfc83e..0877f05ed 100644 --- a/backend/crates/kalamdb-jobs/src/executors/restore.rs +++ b/backend/crates/kalamdb-jobs/src/executors/restore.rs @@ -24,13 +24,14 @@ //! ## IMPORTANT //! Restore requires a server restart after completion to reload the restored data. -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::{fs, path::Path}; + use async_trait::async_trait; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; -use std::fs; -use std::path::Path; + +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; /// Typed parameters for full database restore operations #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/backend/crates/kalamdb-jobs/src/executors/retention.rs b/backend/crates/kalamdb-jobs/src/executors/retention.rs index fd541997a..e28cebc70 100644 --- a/backend/crates/kalamdb-jobs/src/executors/retention.rs +++ b/backend/crates/kalamdb-jobs/src/executors/retention.rs @@ -20,14 +20,15 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::sync::Arc; + use async_trait::async_trait; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; +use kalamdb_commons::{schemas::TableType, TableId}; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; -use std::sync::Arc; + +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; /// Typed parameters for retention operations (T192) #[derive(Debug, Clone, Serialize, Deserialize)] @@ -102,8 +103,8 @@ impl JobExecutor for RetentionExecutor { )); // TODO: Implement actual retention enforcement logic - // Current limitation: UserTableRow/SharedTableRow/StreamTableRow don't have deleted_at field yet - // When adding soft-delete support to table rows: + // Current limitation: UserTableRow/SharedTableRow/StreamTableRow don't have deleted_at + // field yet When adding soft-delete support to table rows: // 1. Add `deleted_at: Option` field to UserTableRow/SharedTableRow/StreamTableRow // 2. Scan table using store.scan_prefix() (no filter needed - small datasets) // 3. Filter rows where deleted_at.is_some() && deleted_at.unwrap() < cutoff_time @@ -147,9 +148,10 @@ impl Default for RetentionExecutor { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::{NamespaceId, TableName}; + use super::*; + #[test] fn test_executor_properties() { let executor = RetentionExecutor::new(); diff --git a/backend/crates/kalamdb-jobs/src/executors/shared_table_cleanup.rs b/backend/crates/kalamdb-jobs/src/executors/shared_table_cleanup.rs index 5f86e17a7..a5cc385eb 100644 --- a/backend/crates/kalamdb-jobs/src/executors/shared_table_cleanup.rs +++ b/backend/crates/kalamdb-jobs/src/executors/shared_table_cleanup.rs @@ -1,7 +1,10 @@ -use crate::executors::{JobContext, JobParams}; use kalamdb_commons::TableId; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::providers::{row_utils::system_user_id, BaseTableProvider, SharedTableProvider}; +use kalamdb_core::{ + error::KalamDbError, + providers::{row_utils::system_user_id, BaseTableProvider, SharedTableProvider}, +}; + +use crate::executors::{JobContext, JobParams}; pub(crate) async fn cleanup_empty_shared_scope_if_needed( ctx: &JobContext, diff --git a/backend/crates/kalamdb-jobs/src/executors/stream_eviction.rs b/backend/crates/kalamdb-jobs/src/executors/stream_eviction.rs index fb31506a4..6ba0b6363 100644 --- a/backend/crates/kalamdb-jobs/src/executors/stream_eviction.rs +++ b/backend/crates/kalamdb-jobs/src/executors/stream_eviction.rs @@ -21,23 +21,29 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; +use std::{ + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, +}; + use async_trait::async_trait; -use kalamdb_commons::ids::{SeqId, SnowflakeGenerator}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_core::providers::StreamTableProvider; +use kalamdb_commons::{ + ids::{SeqId, SnowflakeGenerator}, + schemas::TableType, + TableId, +}; #[cfg(test)] use kalamdb_core::schema_registry::TablesSchemaRegistryAdapter; +use kalamdb_core::{ + app_context::AppContext, error::KalamDbError, error_extensions::KalamDbResultExt, + providers::StreamTableProvider, +}; #[cfg(test)] use kalamdb_sharding::ShardRouter; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; fn default_batch_size() -> u64 { 10000 @@ -234,30 +240,34 @@ impl Default for StreamEvictionExecutor { #[cfg(test)] mod tests { - use super::*; + use std::{collections::HashMap, sync::Arc}; + use chrono::Utc; use datafusion::datasource::TableProvider; - use kalamdb_commons::models::datatypes::KalamDataType; - use kalamdb_commons::models::schemas::{ - ColumnDefinition, TableDefinition, TableOptions, TableType, + use kalamdb_commons::{ + models::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, TableDefinition, TableOptions, TableType}, + TableId, TableName, UserId, + }, + ChangeNotification, JobId, NamespaceId, NodeId, + }; + use kalamdb_core::{ + app_context::AppContext, + providers::{ + arrow_json_conversion::json_to_row, + base::{BaseTableProvider, TableProviderCore}, + StreamTableProvider, + }, + test_helpers::test_app_context_simple, }; - use kalamdb_commons::models::{TableId, TableName, UserId}; - use kalamdb_commons::{ChangeNotification, JobId, NamespaceId, NodeId}; - use kalamdb_core::app_context::AppContext; - use kalamdb_core::providers::arrow_json_conversion::json_to_row; - use kalamdb_core::providers::base::{BaseTableProvider, TableProviderCore}; - use kalamdb_core::providers::StreamTableProvider; - use kalamdb_core::test_helpers::test_app_context_simple; - use kalamdb_system::providers::jobs::models::Job; - use kalamdb_system::NotificationService; - use kalamdb_system::SchemaRegistry; - use kalamdb_tables::utils::TableServices; - use kalamdb_tables::StreamTableStoreConfig; + use kalamdb_system::{providers::jobs::models::Job, NotificationService, SchemaRegistry}; + use kalamdb_tables::{utils::TableServices, StreamTableStoreConfig}; use serde_json::json; - use std::collections::HashMap; - use std::sync::Arc; use tokio::time::{sleep, Duration}; + use super::*; + fn make_job(id: &str, job_type: JobType, _ns: &str) -> Job { let now = chrono::Utc::now().timestamp_millis(); Job { @@ -439,15 +449,13 @@ mod tests { sleep(Duration::from_millis(1500)).await; let mut job = make_job("SE-evict", JobType::StreamEviction, harness.namespace.as_str()); - job.parameters = Some( - serde_json::json!({ - "namespace_id": harness.namespace.as_str(), - "table_name": harness.table_name_value.clone(), - "table_type": "Stream", - "ttl_seconds": 1, - "batch_size": 100 - }), - ); + job.parameters = Some(serde_json::json!({ + "namespace_id": harness.namespace.as_str(), + "table_name": harness.table_name_value.clone(), + "table_type": "Stream", + "ttl_seconds": 1, + "batch_size": 100 + })); let params = StreamEvictionParams { table_id: harness.table_id.clone(), diff --git a/backend/crates/kalamdb-jobs/src/executors/table_partition.rs b/backend/crates/kalamdb-jobs/src/executors/table_partition.rs new file mode 100644 index 000000000..fdd2f7a47 --- /dev/null +++ b/backend/crates/kalamdb-jobs/src/executors/table_partition.rs @@ -0,0 +1,12 @@ +use kalamdb_commons::{constants::ColumnFamilyNames, schemas::TableType, TableId}; +use kalamdb_store::storage_trait::Partition; + +pub(crate) fn hot_table_partition(table_type: TableType, table_id: &TableId) -> Option { + let partition_name = match table_type { + TableType::User => format!("{}{}", ColumnFamilyNames::USER_TABLE_PREFIX, table_id), + TableType::Shared => format!("{}{}", ColumnFamilyNames::SHARED_TABLE_PREFIX, table_id), + TableType::Stream | TableType::System => return None, + }; + + Some(Partition::new(partition_name)) +} diff --git a/backend/crates/kalamdb-jobs/src/executors/topic_cleanup.rs b/backend/crates/kalamdb-jobs/src/executors/topic_cleanup.rs index 05b60c696..128ebb3df 100644 --- a/backend/crates/kalamdb-jobs/src/executors/topic_cleanup.rs +++ b/backend/crates/kalamdb-jobs/src/executors/topic_cleanup.rs @@ -21,13 +21,14 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; use async_trait::async_trait; use kalamdb_commons::models::TopicId; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; + /// Typed parameters for topic cleanup operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TopicCleanupParams { @@ -135,7 +136,8 @@ impl JobExecutor for TopicCleanupExecutor { // This job is responsible for cleaning up the actual data (messages + offsets). let message = format!( - "Cleaned up topic '{}' - {} consumer group offsets deleted, {} messages deleted, {} bytes freed", + "Cleaned up topic '{}' - {} consumer group offsets deleted, {} messages deleted, {} \ + bytes freed", topic_name, offsets_deleted, messages_deleted, bytes_freed ); diff --git a/backend/crates/kalamdb-jobs/src/executors/topic_retention.rs b/backend/crates/kalamdb-jobs/src/executors/topic_retention.rs index d25f406f9..21b449ea5 100644 --- a/backend/crates/kalamdb-jobs/src/executors/topic_retention.rs +++ b/backend/crates/kalamdb-jobs/src/executors/topic_retention.rs @@ -20,13 +20,14 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; use async_trait::async_trait; use kalamdb_commons::models::TopicId; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; + /// Typed parameters for topic retention operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TopicRetentionParams { diff --git a/backend/crates/kalamdb-jobs/src/executors/user_cleanup.rs b/backend/crates/kalamdb-jobs/src/executors/user_cleanup.rs index 867f9d70e..0fda97acc 100644 --- a/backend/crates/kalamdb-jobs/src/executors/user_cleanup.rs +++ b/backend/crates/kalamdb-jobs/src/executors/user_cleanup.rs @@ -19,13 +19,14 @@ //! } //! ``` -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; use async_trait::async_trait; use kalamdb_commons::models::UserId; use kalamdb_core::error::KalamDbError; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; + /// Typed parameters for user cleanup operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct UserCleanupParams { diff --git a/backend/crates/kalamdb-jobs/src/executors/user_export.rs b/backend/crates/kalamdb-jobs/src/executors/user_export.rs index 78ccf685e..07b96e0ff 100644 --- a/backend/crates/kalamdb-jobs/src/executors/user_export.rs +++ b/backend/crates/kalamdb-jobs/src/executors/user_export.rs @@ -1,11 +1,11 @@ //! User Export Job Executor //! //! Exports all user tables for a specific user across all namespaces by: -//! 1. Triggering a flush job for every user table (ensures all buffered RocksDB -//! writes are persisted as Parquet files). +//! 1. Triggering a flush job for every user table (ensures all buffered RocksDB writes are +//! persisted as Parquet files). //! 2. Waiting for all flush jobs to reach a terminal state. -//! 3. Reading the raw Parquet files from `StorageCached` and packaging them -//! into a ZIP archive at `{data_path}/exports/{user_id}/{export_id}.zip`. +//! 3. Reading the raw Parquet files from `StorageCached` and packaging them into a ZIP archive at +//! `{data_path}/exports/{user_id}/{export_id}.zip`. //! //! ## Parameters Format //! ```json @@ -15,27 +15,31 @@ //! } //! ``` -use crate::executors::flush::FlushParams; -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; -use crate::AppContextJobsExt; +use std::{ + collections::HashMap, + fs, + io::{Cursor, Write}, + time::Duration, +}; + use async_trait::async_trait; -use kalamdb_commons::ids::UserTableRowId; -use kalamdb_commons::models::UserId; -use kalamdb_commons::schemas::{TableOptions, TableType}; -use kalamdb_commons::{JobId, TableId}; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::providers::UserTableProvider; +use kalamdb_commons::{ + ids::UserTableRowId, + models::UserId, + schemas::{TableOptions, TableType}, + JobId, TableId, +}; +use kalamdb_core::{error::KalamDbError, providers::UserTableProvider}; use kalamdb_store::EntityStore; -use kalamdb_system::JobStatus; -use kalamdb_system::JobType; +use kalamdb_system::{JobStatus, JobType}; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::fs; -use std::io::{Cursor, Write}; -use std::time::Duration; use tokio::time::sleep; -use zip::write::SimpleFileOptions; -use zip::ZipWriter; +use zip::{write::SimpleFileOptions, ZipWriter}; + +use crate::{ + executors::{flush::FlushParams, JobContext, JobDecision, JobExecutor, JobParams}, + AppContextJobsExt, +}; /// Maximum time to wait for all flush jobs to complete. const FLUSH_WAIT_TIMEOUT: Duration = Duration::from_secs(5 * 60); // 5 min diff --git a/backend/crates/kalamdb-jobs/src/executors/vector_index.rs b/backend/crates/kalamdb-jobs/src/executors/vector_index.rs index 486cb9852..5d936ce03 100644 --- a/backend/crates/kalamdb-jobs/src/executors/vector_index.rs +++ b/backend/crates/kalamdb-jobs/src/executors/vector_index.rs @@ -3,15 +3,20 @@ //! Flushes per-column vector hot staging into cold snapshot artifacts and updates //! vector metadata embedded in manifest.json. -use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; use async_trait::async_trait; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::vector::{flush_shared_scope_vectors, flush_user_scope_vectors}; +use kalamdb_commons::{ + models::{TableId, UserId}, + schemas::TableType, +}; +use kalamdb_core::{ + error::KalamDbError, + vector::{flush_shared_scope_vectors, flush_user_scope_vectors}, +}; use kalamdb_system::JobType; use serde::{Deserialize, Serialize}; +use crate::executors::{JobContext, JobDecision, JobExecutor, JobParams}; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VectorIndexParams { pub table_id: TableId, diff --git a/backend/crates/kalamdb-jobs/src/flush_scheduler.rs b/backend/crates/kalamdb-jobs/src/flush_scheduler.rs index fcbe3edcf..99ff664a1 100644 --- a/backend/crates/kalamdb-jobs/src/flush_scheduler.rs +++ b/backend/crates/kalamdb-jobs/src/flush_scheduler.rs @@ -1,10 +1,57 @@ -use crate::executors::flush::FlushParams; -use crate::JobsManager; -use kalamdb_commons::TableType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; +use std::{collections::HashSet, sync::Arc}; + +use kalamdb_commons::{TableId, TableType}; +use kalamdb_core::{app_context::AppContext, error::KalamDbError, manifest::ManifestService}; use kalamdb_system::JobType; -use std::sync::Arc; + +use crate::{ + executors::flush::FlushParams, + scheduler_common::{ + classify_schedule_error, hourly_date_key, hourly_table_idempotency_key, ScheduleErrorKind, + }, + JobsManager, +}; + +#[derive(Debug, Default)] +struct PendingFlushScan { + table_ids: Vec, + pending_entries: usize, + duplicate_entries: usize, + read_errors: usize, +} + +fn collect_pending_flush_tables( + manifest_service: &ManifestService, +) -> Result { + let pending_iter = manifest_service + .pending_manifest_ids_iter() + .map_err(|err| KalamDbError::Other(format!("Pending manifest scan failed: {}", err)))?; + + let mut scan = PendingFlushScan::default(); + let mut seen_tables = HashSet::new(); + + for manifest_id_result in pending_iter { + let manifest_id = match manifest_id_result { + Ok(id) => id, + Err(err) => { + scan.read_errors += 1; + log::warn!("FlushScheduler: failed to read pending manifest entry: {}", err); + continue; + }, + }; + + scan.pending_entries += 1; + + let table_id = manifest_id.table_id().clone(); + if seen_tables.insert(table_id.clone()) { + scan.table_ids.push(table_id); + } else { + scan.duplicate_entries += 1; + } + } + + Ok(scan) +} /// Periodic scheduler that checks for tables with pending (unflushed) writes /// and creates flush jobs for them. @@ -27,30 +74,18 @@ impl FlushScheduler { ) -> Result<(), KalamDbError> { let manifest_service = app_context.manifest_service(); let default_row_limit = app_context.config().flush.default_row_limit as u64; - - let pending_iter = manifest_service - .pending_manifest_ids_iter() - .map_err(|e| KalamDbError::Other(format!("Pending manifest scan failed: {}", e)))?; + let pending_scan = collect_pending_flush_tables(&manifest_service)?; let schema_registry = app_context.schema_registry(); let mut tables_checked: u32 = 0; let mut jobs_created: u32 = 0; + let date_key = hourly_date_key(); - for manifest_id_result in pending_iter { - let manifest_id = match manifest_id_result { - Ok(id) => id, - Err(e) => { - log::warn!("FlushScheduler: failed to read pending manifest entry: {}", e); - continue; - }, - }; - - let table_id = manifest_id.table_id().clone(); - + for table_id in &pending_scan.table_ids { // Look up the table definition to determine its type // Use async variant to avoid blocking tokio worker on RocksDB cache miss - let table_def = match schema_registry.get_table_if_exists_async(&table_id).await { + let table_def = match schema_registry.get_table_if_exists_async(table_id).await { Ok(Some(def)) => def, Ok(None) => { // Table dropped after pending write was recorded — skip @@ -84,15 +119,13 @@ impl FlushScheduler { .unwrap_or(default_row_limit); let params = FlushParams { - table_id: table_id.clone(), + table_id: (*table_id).clone(), table_type: table_def.table_type, flush_threshold: Some(flush_threshold), }; // Hourly idempotency key prevents duplicate flush jobs - let now = chrono::Utc::now(); - let date_key = now.format("%Y-%m-%d-%H").to_string(); - let idempotency_key = format!("FL:{}:{}", table_id, date_key); + let idempotency_key = hourly_table_idempotency_key(JobType::Flush, table_id, &date_key); match jobs_manager .create_job_typed(JobType::Flush, params, Some(idempotency_key), None) @@ -106,37 +139,46 @@ impl FlushScheduler { table_id ); }, - Err(e) => { - let err_msg = e.to_string(); - if err_msg.contains("already running") || err_msg.contains("already exists") { + Err(err) => match classify_schedule_error(&err) { + ScheduleErrorKind::AlreadyActive => { log::trace!( "FlushScheduler: flush job for {} already exists (idempotent)", table_id ); - } else if err_msg.contains("pre-validation returned false") { + }, + ScheduleErrorKind::PreValidationSkipped => { log::trace!( "FlushScheduler: flush job for {} skipped (no data to flush)", table_id ); - } else { + }, + ScheduleErrorKind::Other => { log::warn!( "FlushScheduler: failed to create flush job for {}: {}", table_id, - e + err ); - } + }, }, } } if tables_checked > 0 { log::trace!( - "FlushScheduler: checked {} table(s), created {} flush job(s)", + "FlushScheduler: scanned {} pending manifest entries, checked {} table(s), \ + skipped {} duplicate pending entries, created {} flush job(s)", + pending_scan.pending_entries, tables_checked, + pending_scan.duplicate_entries, jobs_created ); } else { - log::trace!("FlushScheduler: no tables with pending writes found"); + log::trace!( + "FlushScheduler: no tables with pending writes found (pending_entries={}, \ + read_errors={})", + pending_scan.pending_entries, + pending_scan.read_errors + ); } Ok(()) diff --git a/backend/crates/kalamdb-jobs/src/health_monitor.rs b/backend/crates/kalamdb-jobs/src/health_monitor.rs index 51256e8f5..7257346f4 100644 --- a/backend/crates/kalamdb-jobs/src/health_monitor.rs +++ b/backend/crates/kalamdb-jobs/src/health_monitor.rs @@ -1,8 +1,6 @@ -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use std::sync::Arc; -use std::time::Duration; +use std::{sync::Arc, time::Duration}; +use kalamdb_core::{app_context::AppContext, error::KalamDbError}; // Re-export the WebSocket session tracking functions from kalamdb-observability pub use kalamdb_observability::{ decrement_websocket_sessions, get_websocket_session_count, idle_duration, diff --git a/backend/crates/kalamdb-jobs/src/jobs_manager/actions.rs b/backend/crates/kalamdb-jobs/src/jobs_manager/actions.rs index b8c8e543a..127e684ca 100644 --- a/backend/crates/kalamdb-jobs/src/jobs_manager/actions.rs +++ b/backend/crates/kalamdb-jobs/src/jobs_manager/actions.rs @@ -1,14 +1,16 @@ -use super::types::JobsManager; -use crate::executors::JobParams; use chrono::Utc; use kalamdb_commons::JobId; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; +use kalamdb_core::{error::KalamDbError, error_extensions::KalamDbResultExt}; use kalamdb_raft::commands::MetaCommand; -use kalamdb_system::providers::jobs::models::{Job, JobOptions}; -use kalamdb_system::{JobStatus, JobType}; +use kalamdb_system::{ + providers::jobs::models::{Job, JobOptions}, + JobStatus, JobType, +}; use log::Level; +use super::types::JobsManager; +use crate::executors::JobParams; + impl JobsManager { /// Insert a job in the database asynchronously via Raft or direct write /// @@ -33,7 +35,8 @@ impl JobsManager { /// /// # Arguments /// * `job_type` - Type of job to create - /// * `parameters` - Job parameters as JSON value (should contain namespace_id, table_name, etc.) + /// * `parameters` - Job parameters as JSON value (should contain namespace_id, table_name, + /// etc.) /// * `idempotency_key` - Optional key to prevent duplicate jobs /// * `options` - Optional job creation options (retry, priority, queue) /// @@ -147,14 +150,16 @@ impl JobsManager { /// Create a job with type-safe parameters /// - /// **Type-Safe Alternative**: Accepts JobParams trait implementations for compile-time validation + /// **Type-Safe Alternative**: Accepts JobParams trait implementations for compile-time + /// validation /// /// # Type Parameters /// * `T` - JobParams implementation (FlushParams, CleanupParams, etc.) /// /// # Arguments /// * `job_type` - Type of job to create - /// * `params` - Typed parameters (automatically validated and serialized, should contain namespace_id, table_name) + /// * `params` - Typed parameters (automatically validated and serialized, should contain + /// namespace_id, table_name) /// * `idempotency_key` - Optional key to prevent duplicate jobs /// * `options` - Optional job configuration (retries, priority, queue) /// @@ -174,6 +179,18 @@ impl JobsManager { // Validate parameters before serialization params.validate()?; + // Fast duplicate rejection before executor pre-validation. Flush pre-validation can scan + // RocksDB, so avoid doing that work for jobs we already know are active. create_job() + // still performs the same check after pre-validation to keep the race window closed. + if let Some(ref key) = idempotency_key { + if self.has_active_job_with_key(key).await? { + return Err(KalamDbError::IdempotentConflict(format!( + "Job with idempotency key '{}' is already running or queued", + key + ))); + } + } + // Serialize to JSON for storage and pre-validation let params_json = serde_json::to_string(¶ms) .into_kalamdb_error("Failed to serialize job parameters")?; diff --git a/backend/crates/kalamdb-jobs/src/jobs_manager/mod.rs b/backend/crates/kalamdb-jobs/src/jobs_manager/mod.rs index 9cffc3b6a..367e93dfe 100644 --- a/backend/crates/kalamdb-jobs/src/jobs_manager/mod.rs +++ b/backend/crates/kalamdb-jobs/src/jobs_manager/mod.rs @@ -1,6 +1,7 @@ //! Unified Job Management System //! -//! **Phase 9 (US6)**: Single JobsManager with typed JobIds, richer statuses, idempotency, retry/backoff, dedicated logging +//! **Phase 9 (US6)**: Single JobsManager with typed JobIds, richer statuses, idempotency, +//! retry/backoff, dedicated logging //! //! This module provides a centralized job management system with: //! - Typed JobIds with prefixes (FL, CL, RT, SE, UC, CO, BK, RS) diff --git a/backend/crates/kalamdb-jobs/src/jobs_manager/queries.rs b/backend/crates/kalamdb-jobs/src/jobs_manager/queries.rs index b6c05ce24..06b69dff8 100644 --- a/backend/crates/kalamdb-jobs/src/jobs_manager/queries.rs +++ b/backend/crates/kalamdb-jobs/src/jobs_manager/queries.rs @@ -1,9 +1,11 @@ -use super::types::JobsManager; use kalamdb_commons::JobId; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_system::providers::jobs::models::{Job, JobFilter}; -use kalamdb_system::JobStatus; +use kalamdb_core::{error::KalamDbError, error_extensions::KalamDbResultExt}; +use kalamdb_system::{ + providers::jobs::models::{Job, JobFilter}, + JobStatus, +}; + +use super::types::JobsManager; impl JobsManager { /// Get job details diff --git a/backend/crates/kalamdb-jobs/src/jobs_manager/runner.rs b/backend/crates/kalamdb-jobs/src/jobs_manager/runner.rs index 19349cc57..613c208d8 100644 --- a/backend/crates/kalamdb-jobs/src/jobs_manager/runner.rs +++ b/backend/crates/kalamdb-jobs/src/jobs_manager/runner.rs @@ -1,24 +1,26 @@ -use super::types::JobsManager; -use super::utils::log_job; -use crate::executors::JobDecision; -use crate::AppContextJobsExt; -use crate::{FlushScheduler, HealthMonitor, StreamEvictionScheduler}; +use std::{collections::VecDeque, sync::Arc}; + use kalamdb_commons::{JobId, NodeId}; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_raft::commands::MetaCommand; -use kalamdb_raft::GroupId; -use kalamdb_system::providers::jobs::models::{Job, JobFilter}; -use kalamdb_system::JobNode; -use kalamdb_system::JobStatus; +use kalamdb_core::{error::KalamDbError, error_extensions::KalamDbResultExt}; +use kalamdb_raft::{commands::MetaCommand, GroupId}; +use kalamdb_system::{ + providers::jobs::models::{Job, JobFilter}, + JobNode, JobStatus, +}; use log::Level; -use std::sync::Arc; -use tokio::sync::mpsc; -use tokio::sync::Semaphore; -use tokio::task::JoinSet; -use tokio::time::{sleep, Duration, Instant}; +use tokio::{ + sync::{mpsc, Semaphore}, + task::JoinSet, + time::{sleep, Duration, Instant}, +}; use tracing::Instrument; +use super::{types::JobsManager, utils::log_job}; +use crate::{ + executors::JobDecision, AppContextJobsExt, FlushScheduler, HealthMonitor, + StreamEvictionScheduler, +}; + const JOB_NODE_QUORUM_POLL_MS: u64 = 250; const JOB_NODE_QUORUM_TIMEOUT_SECS: u64 = 10; @@ -280,6 +282,7 @@ impl JobsManager { let semaphore = Arc::new(Semaphore::new(max_concurrent)); let job_manager = self.get_attached_app_context().job_manager(); let mut join_set = JoinSet::new(); + let mut pending_awakened_jobs = VecDeque::new(); // Adaptive idle polling (reduces CPU in empty systems) let idle_poll_min_ms: u64 = 500; @@ -304,97 +307,106 @@ impl JobsManager { } } - // Event-driven loop: await job wakeups or periodic ticks - let job_id_opt = tokio::select! { - biased; - // Priority 1: awakened jobs from state machine - Some(job_id) = awake_receiver.recv() => Some(job_id), - // Priority 2: fallback polling for crash recovery/retries - _ = poll_interval.tick() => None, - // Periodic leadership check - _ = leadership_interval.tick() => { - let leader_now = self.is_cluster_leader().await; - if leader_now && !was_leader { - log::info!("[JobLoop] This node became leader - handling failover"); - self.handle_leader_failover().await; - } else if !leader_now && was_leader { - log::info!("[JobLoop] This node lost leadership"); - } - was_leader = leader_now; - is_leader = leader_now; - continue; - } - // Periodic health metrics logging (all nodes) - _ = health_interval.tick() => { - let app_ctx = self.get_attached_app_context(); - if let Err(e) = HealthMonitor::log_metrics(app_ctx).await { - log::warn!("Failed to log health metrics: {}", e); - } - continue; - } - // Periodic WAL cleanup: flush all RocksDB memtables so idle CFs - // don't pin WAL files forever (prevents WAL file accumulation) - _ = async { - if wal_cleanup_enabled { - let interval = wal_cleanup_interval - .as_mut() - .expect("wal cleanup interval missing"); - interval.tick().await; - } - }, if wal_cleanup_enabled => { - let app_ctx = self.get_attached_app_context(); - let backend = app_ctx.storage_backend(); - match tokio::task::spawn_blocking(move || backend.flush_all_memtables()).await { - Ok(Ok(())) => { - log::debug!("WAL cleanup: flushed all memtables"); - }, - Ok(Err(e)) => { - log::warn!("WAL cleanup flush_all_memtables failed: {}", e); - }, - Err(e) => { - log::warn!("WAL cleanup task join failed: {}", e); - }, + // Event-driven loop: preserve awakened jobs until a worker permit is available. + // Otherwise a full semaphore can drop a wakeup and leave the job dependent on + // fallback polling. + let job_id_opt = if let Some(job_id) = pending_awakened_jobs.pop_front() { + Some(job_id) + } else { + tokio::select! { + biased; + // Priority 1: awakened jobs from state machine + Some(job_id) = awake_receiver.recv() => Some(job_id), + // Priority 2: fallback polling for crash recovery/retries + _ = poll_interval.tick() => None, + // Periodic leadership check + _ = leadership_interval.tick() => { + let leader_now = self.is_cluster_leader().await; + if leader_now && !was_leader { + log::info!("[JobLoop] This node became leader - handling failover"); + self.handle_leader_failover().await; + } else if !leader_now && was_leader { + log::info!("[JobLoop] This node lost leadership"); + } + was_leader = leader_now; + is_leader = leader_now; + continue; } - continue; - } - // Periodic stream eviction job creation (leader-only) - _ = async { - if stream_eviction_enabled { - let interval = stream_eviction_interval - .as_mut() - .expect("stream eviction interval missing"); - interval.tick().await; + // Periodic health metrics logging (all nodes) + _ = health_interval.tick() => { + let app_ctx = self.get_attached_app_context(); + if let Err(e) = HealthMonitor::log_metrics(app_ctx).await { + log::warn!("Failed to log health metrics: {}", e); + } + continue; } - }, if stream_eviction_enabled => { - if is_leader { + // Periodic WAL cleanup: flush all RocksDB memtables so idle CFs + // don't pin WAL files forever (prevents WAL file accumulation) + _ = async { + if wal_cleanup_enabled { + let interval = wal_cleanup_interval + .as_mut() + .expect("wal cleanup interval missing"); + interval.tick().await; + } + }, if wal_cleanup_enabled => { let app_ctx = self.get_attached_app_context(); - if let Err(e) = StreamEvictionScheduler::check_and_schedule(&app_ctx, self).await { - log::warn!("Failed to check stream eviction: {}", e); + let backend = app_ctx.storage_backend(); + match tokio::task::spawn_blocking(move || backend.flush_all_memtables()).await { + Ok(Ok(())) => { + log::debug!("WAL cleanup: flushed all memtables"); + }, + Ok(Err(e)) => { + log::warn!("WAL cleanup flush_all_memtables failed: {}", e); + }, + Err(e) => { + log::warn!("WAL cleanup task join failed: {}", e); + }, } + continue; } - continue; - } - // Periodic flush scheduler (leader-only) — creates flush jobs - // for tables with pending writes in RocksDB - _ = async { - if flush_check_enabled { - let interval = flush_check_interval - .as_mut() - .expect("flush check interval missing"); - interval.tick().await; + // Periodic stream eviction job creation (leader-only) + _ = async { + if stream_eviction_enabled { + let interval = stream_eviction_interval + .as_mut() + .expect("stream eviction interval missing"); + interval.tick().await; + } + }, if stream_eviction_enabled => { + if is_leader { + let app_ctx = self.get_attached_app_context(); + if let Err(e) = StreamEvictionScheduler::check_and_schedule(&app_ctx, self).await { + log::warn!("Failed to check stream eviction: {}", e); + } + } + continue; } - }, if flush_check_enabled => { - if is_leader { - let app_ctx = self.get_attached_app_context(); - if let Err(e) = FlushScheduler::check_and_schedule(&app_ctx, self).await { - log::warn!("Failed to check periodic flush: {}", e); + // Periodic flush scheduler (leader-only) — creates flush jobs + // for tables with pending writes in RocksDB + _ = async { + if flush_check_enabled { + let interval = flush_check_interval + .as_mut() + .expect("flush check interval missing"); + interval.tick().await; } + }, if flush_check_enabled => { + if is_leader { + let app_ctx = self.get_attached_app_context(); + if let Err(e) = FlushScheduler::check_and_schedule(&app_ctx, self).await { + log::warn!("Failed to check periodic flush: {}", e); + } + } + continue; } - continue; } }; if semaphore.available_permits() == 0 { + if let Some(job_id) = job_id_opt { + pending_awakened_jobs.push_front(job_id); + } if let Some(Err(err)) = join_set.join_next().await { log::error!("Job task panicked: {}", err); } @@ -404,6 +416,9 @@ impl JobsManager { let permit = match Arc::clone(&semaphore).try_acquire_owned() { Ok(permit) => permit, Err(_) => { + if let Some(job_id) = job_id_opt { + pending_awakened_jobs.push_front(job_id); + } tokio::task::yield_now().await; continue; }, @@ -772,7 +787,8 @@ impl JobsManager { &job_id, &Level::Warn, &format!( - "Quorum timeout (completed {}/{}); proceeding with leader actions", + "Quorum timeout (completed {}/{}); proceeding with leader \ + actions", completed, total ), ); diff --git a/backend/crates/kalamdb-jobs/src/jobs_manager/types.rs b/backend/crates/kalamdb-jobs/src/jobs_manager/types.rs index 3d62018b9..46e9d05b7 100644 --- a/backend/crates/kalamdb-jobs/src/jobs_manager/types.rs +++ b/backend/crates/kalamdb-jobs/src/jobs_manager/types.rs @@ -1,11 +1,15 @@ -use crate::executors::JobRegistry; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Weak, +}; + use kalamdb_commons::{JobId, NodeId}; use kalamdb_core::app_context::AppContext; use kalamdb_system::{JobNodesTableProvider, JobsTableProvider}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Weak}; use tokio::sync::mpsc; +use crate::executors::JobRegistry; + /// Unified Job Manager /// /// Provides centralized job creation, execution, tracking, and lifecycle management. diff --git a/backend/crates/kalamdb-jobs/src/jobs_manager/utils.rs b/backend/crates/kalamdb-jobs/src/jobs_manager/utils.rs index cd0acf12e..b6f311dbd 100644 --- a/backend/crates/kalamdb-jobs/src/jobs_manager/utils.rs +++ b/backend/crates/kalamdb-jobs/src/jobs_manager/utils.rs @@ -1,14 +1,12 @@ -use super::types::JobsManager; use chrono::Utc; use kalamdb_commons::{JobId, NodeId}; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::error_extensions::KalamDbResultExt; -use kalamdb_raft::commands::MetaCommand; -use kalamdb_raft::NodeStatus; -use kalamdb_system::providers::jobs::models::JobFilter; -use kalamdb_system::{JobStatus, JobType}; +use kalamdb_core::{error::KalamDbError, error_extensions::KalamDbResultExt}; +use kalamdb_raft::{commands::MetaCommand, NodeStatus}; +use kalamdb_system::{providers::jobs::models::JobFilter, JobStatus, JobType}; use log::Level; +use super::types::JobsManager; + /// Lazy-formatting version of log_job_event. /// Avoids String allocation when the log level is disabled. macro_rules! log_job { diff --git a/backend/crates/kalamdb-jobs/src/leader_failover.rs b/backend/crates/kalamdb-jobs/src/leader_failover.rs index 462a7991c..c7c231f71 100644 --- a/backend/crates/kalamdb-jobs/src/leader_failover.rs +++ b/backend/crates/kalamdb-jobs/src/leader_failover.rs @@ -21,16 +21,17 @@ //! - **Backup jobs**: May need special handling (check partial backups) //! - **Restore jobs**: Should be failed and require manual restart +use std::{collections::HashSet, sync::Arc}; + use chrono::Utc; use kalamdb_commons::models::{JobId, NodeId}; -use kalamdb_system::providers::jobs::models::Job; -use kalamdb_system::{JobFilter, JobSortField, JobStatus, JobType, SortOrder}; -use std::collections::HashSet; -use std::sync::Arc; +use kalamdb_core::error::KalamDbError; +use kalamdb_system::{ + providers::jobs::models::Job, JobFilter, JobSortField, JobStatus, JobType, JobsTableProvider, + SortOrder, +}; use crate::leader_guard::LeaderOnlyJobGuard; -use kalamdb_core::error::KalamDbError; -use kalamdb_system::JobsTableProvider; /// How long to wait before considering a job orphaned (in seconds) const ORPHAN_DETECTION_TIMEOUT_SECS: i64 = 300; // 5 minutes diff --git a/backend/crates/kalamdb-jobs/src/leader_guard.rs b/backend/crates/kalamdb-jobs/src/leader_guard.rs index 02d97f26a..d06e70687 100644 --- a/backend/crates/kalamdb-jobs/src/leader_guard.rs +++ b/backend/crates/kalamdb-jobs/src/leader_guard.rs @@ -19,12 +19,12 @@ //! This ensures that even with leadership changes mid-execution, //! only one node executes each job. -use chrono::Utc; -use kalamdb_raft::{CommandExecutor, GroupId, MetaCommand, MetaResponse}; use std::sync::Arc; +use chrono::Utc; use kalamdb_commons::models::{JobId, NodeId}; use kalamdb_core::error::KalamDbError; +use kalamdb_raft::{CommandExecutor, GroupId, MetaCommand, MetaResponse}; /// Result of checking leadership status #[derive(Debug, Clone, PartialEq, Eq)] @@ -177,14 +177,16 @@ impl LeaderOnlyJobGuard { #[cfg(test)] mod tests { - use super::*; + use std::sync::atomic::{AtomicBool, Ordering}; + use async_trait::async_trait; use kalamdb_commons::models::UserId; use kalamdb_raft::{ ClusterInfo, ClusterNodeInfo, DataResponse, NodeRole, NodeStatus, Result, SharedDataCommand, UserDataCommand, }; - use std::sync::atomic::{AtomicBool, Ordering}; + + use super::*; /// Mock CommandExecutor for testing #[derive(Debug)] diff --git a/backend/crates/kalamdb-jobs/src/lib.rs b/backend/crates/kalamdb-jobs/src/lib.rs index c8418f83e..d13535836 100644 --- a/backend/crates/kalamdb-jobs/src/lib.rs +++ b/backend/crates/kalamdb-jobs/src/lib.rs @@ -11,6 +11,7 @@ pub mod executors; pub mod flush_scheduler; pub mod health_monitor; pub mod jobs_manager; +pub(crate) mod scheduler_common; pub mod stream_eviction; // ============================================================================ @@ -24,11 +25,10 @@ pub use executors::{JobContext, JobDecision, JobExecutor as JobExecutorTrait, Jo pub use flush_scheduler::FlushScheduler; pub use health_monitor::HealthMonitor; pub use jobs_manager::JobsManager; -pub use stream_eviction::StreamEvictionScheduler; - // Phase 16 exports (cluster mode) pub use leader_failover::{JobRecoveryAction, LeaderFailoverHandler, RecoveryReport}; pub use leader_guard::{LeaderOnlyJobGuard, LeadershipStatus}; +pub use stream_eviction::StreamEvictionScheduler; // ============================================================================ // JobWaker implementation (bridges kalamdb-core trait → JobsManager) @@ -43,9 +43,10 @@ impl kalamdb_core::job_waker::JobWaker for JobsManager { // ============================================================================ // Extension trait: ergonomic `.job_manager()` on AppContext // ============================================================================ -use kalamdb_core::app_context::AppContext; use std::sync::Arc; +use kalamdb_core::app_context::AppContext; + /// Extension trait that provides typed access to the `JobsManager` stored /// inside `AppContext` (which stores it as `Arc`). pub trait AppContextJobsExt { diff --git a/backend/crates/kalamdb-jobs/src/scheduler_common.rs b/backend/crates/kalamdb-jobs/src/scheduler_common.rs new file mode 100644 index 000000000..b33384f00 --- /dev/null +++ b/backend/crates/kalamdb-jobs/src/scheduler_common.rs @@ -0,0 +1,78 @@ +use kalamdb_commons::TableId; +use kalamdb_core::error::KalamDbError; +use kalamdb_system::JobType; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum ScheduleErrorKind { + AlreadyActive, + PreValidationSkipped, + Other, +} + +pub(crate) fn hourly_date_key() -> String { + chrono::Utc::now().format("%Y-%m-%d-%H").to_string() +} + +pub(crate) fn hourly_table_idempotency_key( + job_type: JobType, + table_id: &TableId, + date_key: &str, +) -> String { + format!("{}:{}:{}", job_type.short_prefix(), table_id, date_key) +} + +pub(crate) fn classify_schedule_error(error: &KalamDbError) -> ScheduleErrorKind { + match error { + KalamDbError::IdempotentConflict(_) => ScheduleErrorKind::AlreadyActive, + KalamDbError::Other(message) if is_pre_validation_skip(message) => { + ScheduleErrorKind::PreValidationSkipped + }, + _ => { + let message = error.to_string(); + if message.contains("already running") || message.contains("already exists") { + ScheduleErrorKind::AlreadyActive + } else if is_pre_validation_skip(&message) { + ScheduleErrorKind::PreValidationSkipped + } else { + ScheduleErrorKind::Other + } + }, + } +} + +fn is_pre_validation_skip(message: &str) -> bool { + message.contains("pre-validation returned false") +} + +#[cfg(test)] +mod tests { + use kalamdb_commons::{NamespaceId, TableName}; + + use super::*; + + #[test] + fn hourly_table_key_uses_job_prefix() { + let table_id = TableId::new(NamespaceId::default(), TableName::new("events")); + + assert_eq!( + hourly_table_idempotency_key(JobType::Flush, &table_id, "2026-04-29-10"), + format!("FL:{}:2026-04-29-10", table_id) + ); + } + + #[test] + fn classify_idempotent_conflict() { + let error = KalamDbError::IdempotentConflict("duplicate".to_string()); + + assert_eq!(classify_schedule_error(&error), ScheduleErrorKind::AlreadyActive); + } + + #[test] + fn classify_pre_validation_skip() { + let error = KalamDbError::Other( + "Job flush skipped: pre-validation returned false (nothing to do)".to_string(), + ); + + assert_eq!(classify_schedule_error(&error), ScheduleErrorKind::PreValidationSkipped); + } +} diff --git a/backend/crates/kalamdb-jobs/src/stream_eviction.rs b/backend/crates/kalamdb-jobs/src/stream_eviction.rs index edf3f2708..f806dd231 100644 --- a/backend/crates/kalamdb-jobs/src/stream_eviction.rs +++ b/backend/crates/kalamdb-jobs/src/stream_eviction.rs @@ -1,11 +1,19 @@ -use crate::JobsManager; -use kalamdb_commons::models::{schemas::TableOptions, TableId}; -use kalamdb_commons::TableType; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_system::JobType; use std::sync::Arc; +use kalamdb_commons::{ + models::{schemas::TableOptions, TableId}, + TableType, +}; +use kalamdb_core::{app_context::AppContext, error::KalamDbError}; +use kalamdb_system::JobType; + +use crate::{ + scheduler_common::{ + classify_schedule_error, hourly_date_key, hourly_table_idempotency_key, ScheduleErrorKind, + }, + JobsManager, +}; + /// Scheduler for stream table eviction jobs pub struct StreamEvictionScheduler; @@ -24,8 +32,9 @@ impl StreamEvictionScheduler { let mut stream_tables_found = 0; let mut jobs_created = 0; + let date_key = hourly_date_key(); - //Loop over the tables + // Loop over the tables for table in tables.iter() { // Only process STREAM tables if table.table_type != TableType::Stream { @@ -55,9 +64,8 @@ impl StreamEvictionScheduler { }; // Generate idempotency key (hourly granularity) - let now = chrono::Utc::now(); - let date_key = now.format("%Y-%m-%d-%H").to_string(); - let idempotency_key = format!("SE:{}:{}", table_id, date_key); + let idempotency_key = + hourly_table_idempotency_key(JobType::StreamEviction, &table_id, &date_key); // Create eviction job match jobs_manager @@ -73,23 +81,28 @@ impl StreamEvictionScheduler { ttl_seconds ); }, - Err(e) => { - let err_msg = e.to_string(); + Err(err) => match classify_schedule_error(&err) { // Idempotency errors are expected (job already exists for this hour) - if err_msg.contains("already running") || err_msg.contains("already exists") { + ScheduleErrorKind::AlreadyActive => { log::trace!( "Stream eviction job for {} already exists (idempotent)", table_id ); - } else if err_msg.contains("pre-validation returned false") { + }, + ScheduleErrorKind::PreValidationSkipped => { // Pre-validation skipped job creation (nothing to evict) log::trace!( "Stream eviction job for {} skipped (no expired rows)", table_id ); - } else { - log::warn!("Failed to create stream eviction job for {}: {}", table_id, e); - } + }, + ScheduleErrorKind::Other => { + log::warn!( + "Failed to create stream eviction job for {}: {}", + table_id, + err + ); + }, }, } } diff --git a/backend/crates/kalamdb-live/src/fanout.rs b/backend/crates/kalamdb-live/src/fanout.rs index 1e467ea17..bc53d6c58 100644 --- a/backend/crates/kalamdb-live/src/fanout.rs +++ b/backend/crates/kalamdb-live/src/fanout.rs @@ -5,10 +5,12 @@ //! coordinator and consumed by the notification service. use datafusion::scalar::ScalarValue; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{TableId, TransactionId, UserId}; -use kalamdb_commons::websocket::ChangeNotification; +use kalamdb_commons::{ + constants::SystemColumnNames, + ids::SeqId, + models::{TableId, TransactionId, UserId}, + websocket::ChangeNotification, +}; /// Owner scope for live fanout: either a shared table or a user-scoped table. /// diff --git a/backend/crates/kalamdb-live/src/helpers/filter_eval.rs b/backend/crates/kalamdb-live/src/helpers/filter_eval.rs index 571524770..461a61472 100644 --- a/backend/crates/kalamdb-live/src/helpers/filter_eval.rs +++ b/backend/crates/kalamdb-live/src/helpers/filter_eval.rs @@ -8,14 +8,19 @@ //! and stored in SubscriptionState. This module provides evaluation functions //! to match row data against the expression. -use crate::error::LiveError; -use datafusion::scalar::ScalarValue; -use datafusion::sql::sqlparser::ast::{BinaryOperator, Expr, Statement, Value}; -use datafusion::sql::sqlparser::dialect::PostgreSqlDialect; -use datafusion::sql::sqlparser::parser::Parser; +use datafusion::{ + scalar::ScalarValue, + sql::sqlparser::{ + ast::{BinaryOperator, Expr, Statement, Value}, + dialect::PostgreSqlDialect, + parser::Parser, + }, +}; use kalamdb_commons::models::rows::Row; use regex::RegexBuilder; +use crate::error::LiveError; + /// Parse a WHERE clause string into an Expr AST /// /// # Arguments @@ -411,10 +416,12 @@ fn lookup_column_value(row_data: &Row, column_name: &str) -> Option #[cfg(test)] mod tests { - use super::*; - use serde_json::json; use std::collections::BTreeMap; + use serde_json::json; + + use super::*; + fn to_row(value: serde_json::Value) -> Row { let object = value.as_object().expect("test rows must be JSON objects").clone(); diff --git a/backend/crates/kalamdb-live/src/helpers/initial_data.rs b/backend/crates/kalamdb-live/src/helpers/initial_data.rs index 0ea572435..a7efe8b30 100644 --- a/backend/crates/kalamdb-live/src/helpers/initial_data.rs +++ b/backend/crates/kalamdb-live/src/helpers/initial_data.rs @@ -4,20 +4,22 @@ // Provides "changes since timestamp" functionality to populate client state // before real-time notifications begin. -use crate::error::{LiveError, LiveResultExt}; -use crate::traits::{LiveSchemaLookup, LiveSqlExecutor}; +use std::{collections::BTreeMap, fmt::Write, sync::Arc}; + use datafusion::arrow::array::{Array, Int64Array}; use datafusion_common::ScalarValue; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{ReadContext, TableId}; -use kalamdb_commons::Role; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + constants::SystemColumnNames, + ids::SeqId, + models::{rows::Row, ReadContext, TableId}, + Role, TableType, +}; use once_cell::sync::OnceCell; -use std::collections::BTreeMap; -use std::fmt::Write; -use std::sync::Arc; + +use crate::{ + error::{LiveError, LiveResultExt}, + traits::{LiveSchemaLookup, LiveSqlExecutor}, +}; /// Options for fetching initial data when subscribing to a live query #[derive(Debug, Clone)] @@ -30,6 +32,12 @@ pub struct InitialDataOptions { /// Used to define the snapshot boundary pub until_seq: Option, + /// Fetch changes after this deterministic commit sequence (exclusive). + pub since_commit_seq: Option, + + /// Fetch changes up to this deterministic commit sequence (inclusive). + pub until_commit_seq: Option, + /// Maximum number of rows to return (batch size) /// Default: 100 pub limit: usize, @@ -48,6 +56,8 @@ impl Default for InitialDataOptions { Self { since_seq: None, until_seq: None, + since_commit_seq: None, + until_commit_seq: None, limit: 100, include_deleted: false, fetch_last: false, @@ -61,6 +71,8 @@ impl InitialDataOptions { Self { since_seq: Some(seq), until_seq: None, + since_commit_seq: None, + until_commit_seq: None, limit: 100, include_deleted: false, fetch_last: false, @@ -73,6 +85,8 @@ impl InitialDataOptions { Self { since_seq: None, until_seq: None, + since_commit_seq: None, + until_commit_seq: None, limit, include_deleted: false, fetch_last: true, @@ -84,6 +98,8 @@ impl InitialDataOptions { Self { since_seq, until_seq, + since_commit_seq: None, + until_commit_seq: None, limit: batch_size, include_deleted: false, fetch_last: false, @@ -101,6 +117,17 @@ impl InitialDataOptions { self.include_deleted = true; self } + + /// Set deterministic commit-sequence resume bounds. + pub fn with_commit_range( + mut self, + since_commit_seq: Option, + until_commit_seq: Option, + ) -> Self { + self.since_commit_seq = since_commit_seq; + self.until_commit_seq = until_commit_seq; + self + } } /// Result of an initial data fetch @@ -113,11 +140,17 @@ pub struct InitialDataResult { /// Used for pagination (passed as since_seq in next request) pub last_seq: Option, + /// Deterministic commit sequence of the last row in the result. + pub last_commit_seq: Option, + /// Whether there are more rows available in the snapshot range pub has_more: bool, /// The snapshot boundary used for this fetch pub snapshot_end_seq: Option, + + /// Deterministic snapshot boundary used for this fetch. + pub snapshot_end_commit_seq: Option, } /// Service for fetching initial data when subscribing to live queries @@ -177,8 +210,10 @@ impl InitialDataFetcher { return Ok(InitialDataResult { rows: Vec::new(), last_seq: None, + last_commit_seq: None, has_more: false, snapshot_end_seq: None, + snapshot_end_commit_seq: None, }); } @@ -190,12 +225,16 @@ impl InitialDataFetcher { // Build SELECT clause: either specific columns or * // Always include _seq column for pagination, even if not in projections + let has_commit_seq = self.table_has_column(table_id, SystemColumnNames::COMMIT_SEQ)?; let select_clause = if let Some(cols) = projections { - // Ensure _seq is always included for pagination tracking + // Ensure system resume columns are always included for pagination tracking. let mut columns = cols.to_vec(); if !columns.iter().any(|c| c == SystemColumnNames::SEQ) { columns.push(SystemColumnNames::SEQ.to_string()); } + if has_commit_seq && !columns.iter().any(|c| c == SystemColumnNames::COMMIT_SEQ) { + columns.push(SystemColumnNames::COMMIT_SEQ.to_string()); + } columns.join(", ") } else { "*".to_string() @@ -212,7 +251,17 @@ impl InitialDataFetcher { } // Add ORDER BY — use write! to avoid intermediate format! allocations - if options.fetch_last { + if has_commit_seq && options.since_commit_seq.is_some() { + let direction = if options.fetch_last { "DESC" } else { "ASC" }; + let _ = write!( + sql, + " ORDER BY {} {}, {} {}", + SystemColumnNames::COMMIT_SEQ, + direction, + SystemColumnNames::SEQ, + direction + ); + } else if options.fetch_last { let _ = write!(sql, " ORDER BY {} DESC", SystemColumnNames::SEQ); } else { let _ = write!(sql, " ORDER BY {} ASC", SystemColumnNames::SEQ); @@ -228,7 +277,7 @@ impl InitialDataFetcher { // Convert batches to Rows // Pre-allocate with limit+1 since that's the max we'll fetch - let mut rows_with_seq: Vec<(SeqId, Row)> = Vec::with_capacity(limit + 1); + let mut rows_with_seq: Vec<(SeqId, Option, Row)> = Vec::with_capacity(limit + 1); for batch in batches { let schema = batch.schema(); @@ -243,6 +292,17 @@ impl InitialDataFetcher { .ok_or_else(|| { LiveError::Other(format!("{} column is not Int64", SystemColumnNames::SEQ)) })?; + let commit_seq_array = if has_commit_seq { + let commit_idx = schema.index_of(SystemColumnNames::COMMIT_SEQ).map_err(|_| { + LiveError::Other(format!( + "Result missing {} column", + SystemColumnNames::COMMIT_SEQ + )) + })?; + Some(batch.column(commit_idx)) + } else { + None + }; let num_rows = batch.num_rows(); let num_cols = batch.num_columns(); @@ -251,6 +311,9 @@ impl InitialDataFetcher { let mut row_map = BTreeMap::new(); for col_idx in 0..num_cols { let col_name = schema.field(col_idx).name(); + if col_name == SystemColumnNames::COMMIT_SEQ { + continue; + } let col_array = batch.column(col_idx); let value = ScalarValue::try_from_array(col_array, row_idx) .into_serialization_error("Failed to convert to ScalarValue")?; @@ -259,11 +322,26 @@ impl InitialDataFetcher { let seq_val = seq_array.value(row_idx); let seq_id = SeqId::from(seq_val); - rows_with_seq.push((seq_id, Row::new(row_map))); + let commit_seq = commit_seq_array + .as_ref() + .and_then(|array| ScalarValue::try_from_array(array, row_idx).ok()) + .and_then(|value| match value { + ScalarValue::UInt64(Some(commit_seq)) => Some(commit_seq), + ScalarValue::Int64(Some(commit_seq)) if commit_seq >= 0 => { + Some(commit_seq as u64) + }, + _ => None, + }); + rows_with_seq.push((seq_id, commit_seq, Row::new(row_map))); } } - rows_with_seq.sort_unstable_by_key(|(seq_id, _)| *seq_id); + if has_commit_seq && options.since_commit_seq.is_some() { + rows_with_seq + .sort_unstable_by_key(|(seq_id, commit_seq, _)| (commit_seq.unwrap_or(0), *seq_id)); + } else { + rows_with_seq.sort_unstable_by_key(|(seq_id, _, _)| *seq_id); + } if options.fetch_last { rows_with_seq.reverse(); } @@ -283,55 +361,103 @@ impl InitialDataFetcher { } // Determine snapshot boundary - let last_seq = rows_with_seq.last().map(|(seq, _)| *seq); + let last_seq = rows_with_seq.last().map(|(seq, _, _)| *seq); + let last_commit_seq = rows_with_seq.last().and_then(|(_, commit_seq, _)| *commit_seq); let snapshot_end_seq = options.until_seq.or(last_seq); + let snapshot_end_commit_seq = options.until_commit_seq.or(last_commit_seq); - let rows: Vec = rows_with_seq.into_iter().map(|(_, row)| row).collect(); + let rows: Vec = rows_with_seq.into_iter().map(|(_, _, row)| row).collect(); Ok(InitialDataResult { rows, last_seq, + last_commit_seq, has_more, snapshot_end_seq, + snapshot_end_commit_seq, }) } /// Compute snapshot end sequence for a subscription. /// - /// Fast path: since `_seq` is a Snowflake ID with embedded timestamp, the - /// maximum possible `_seq` at the current wall-clock millisecond is an - /// upper bound on every row already written. Any write performed *after* - /// this boundary is computed will get a strictly larger `_seq` (different - /// timestamp component) and therefore flow through the live notification - /// path, not the initial snapshot. - /// - /// This removes an entire DataFusion execution from the subscribe critical - /// path (previously ~several ms to tens of ms depending on planning cost), - /// which is one of the biggest wins for time-to-first-row. + /// Compute the snapshot boundary from rows already materialized on this node. /// - /// All arguments are accepted for API compatibility; `role`, `table_id`, - /// `table_type`, `options`, and `where_clause` are unused on the fast path. + /// The boundary deliberately uses local `MAX(_seq)` instead of a wall-clock + /// Snowflake upper bound. On a follower, the wall-clock bound can include + /// leader commits that have not applied locally yet; using the local max keeps + /// the initial snapshot and buffered notification gate aligned with this + /// replica's actual storage state. pub async fn compute_snapshot_end_seq( &self, - _live_id: &kalamdb_commons::models::LiveQueryId, - _role: Role, - _table_id: &TableId, - _table_type: TableType, - _options: &InitialDataOptions, - _where_clause: Option<&str>, + live_id: &kalamdb_commons::models::LiveQueryId, + role: Role, + table_id: &TableId, + table_type: TableType, + options: &InitialDataOptions, + where_clause: Option<&str>, ) -> Result, LiveError> { - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(SeqId::EPOCH); - - match SeqId::max_id_for_timestamp(now_ms) { - Ok(seq) => Ok(Some(seq)), - Err(e) => Err(LiveError::Other(format!("Failed to compute snapshot boundary: {}", e))), + self.compute_snapshot_end_seq_sql_fallback( + live_id, + role, + table_id, + table_type, + options, + where_clause, + ) + .await + } + + /// Compute the deterministic commit-sequence snapshot boundary for tables + /// that expose `_commit_seq`. + pub async fn compute_snapshot_end_commit_seq( + &self, + live_id: &kalamdb_commons::models::LiveQueryId, + role: Role, + table_id: &TableId, + table_type: TableType, + options: &InitialDataOptions, + where_clause: Option<&str>, + ) -> Result, LiveError> { + if !self.table_has_column(table_id, SystemColumnNames::COMMIT_SEQ)? { + return Ok(None); + } + + let user_id = live_id.user_id().clone(); + let table_name = table_id.full_name(); + let mut sql = format!( + "SELECT MAX({}) AS max_commit_seq FROM {}", + SystemColumnNames::COMMIT_SEQ, + table_name + ); + + let where_clauses = + self.build_where_clauses(table_id, table_type, options, where_clause)?; + if !where_clauses.is_empty() { + sql.push_str(" WHERE "); + sql.push_str(&where_clauses.join(" AND ")); + } + + let batches = self + .sql_executor()? + .execute_for_batches(&sql, user_id, role, ReadContext::Internal) + .await?; + + if batches.is_empty() || batches[0].num_rows() == 0 { + return Ok(None); + } + + let batch = &batches[0]; + let value = ScalarValue::try_from_array(batch.column(0), 0) + .into_serialization_error("Failed to convert max_commit_seq")?; + + match value { + ScalarValue::UInt64(Some(commit_seq)) => Ok(Some(commit_seq)), + ScalarValue::Int64(Some(commit_seq)) if commit_seq >= 0 => Ok(Some(commit_seq as u64)), + ScalarValue::Null | ScalarValue::UInt64(None) | ScalarValue::Int64(None) => Ok(None), + _ => Err(LiveError::Other("max_commit_seq column is not an integer".to_string())), } } - #[allow(dead_code)] async fn compute_snapshot_end_seq_sql_fallback( &self, live_id: &kalamdb_commons::models::LiveQueryId, @@ -386,11 +512,46 @@ impl InitialDataFetcher { ) -> Result, LiveError> { let mut where_clauses = Vec::new(); - if let Some(since) = options.since_seq { - where_clauses.push(format!("{} > {}", SystemColumnNames::SEQ, since.as_i64())); - } - if let Some(until) = options.until_seq { - where_clauses.push(format!("{} <= {}", SystemColumnNames::SEQ, until.as_i64())); + let has_commit_seq = self.table_has_column(table_id, SystemColumnNames::COMMIT_SEQ)?; + + if has_commit_seq { + match (options.since_commit_seq, options.since_seq) { + (Some(since_commit), Some(since_seq)) => where_clauses.push(format!( + "({commit_col} > {since_commit} OR ({commit_col} = {since_commit} AND \ + {seq_col} > {since_seq}))", + commit_col = SystemColumnNames::COMMIT_SEQ, + seq_col = SystemColumnNames::SEQ, + since_seq = since_seq.as_i64() + )), + (Some(since_commit), None) => where_clauses.push(format!( + "{} > {}", + SystemColumnNames::COMMIT_SEQ, + since_commit + )), + (None, Some(since_seq)) => where_clauses.push(format!( + "{} > {}", + SystemColumnNames::SEQ, + since_seq.as_i64() + )), + (None, None) => {}, + } + + if let Some(until_commit_seq) = options.until_commit_seq { + where_clauses.push(format!( + "{} <= {}", + SystemColumnNames::COMMIT_SEQ, + until_commit_seq + )); + } else if let Some(until_seq) = options.until_seq { + where_clauses.push(format!("{} <= {}", SystemColumnNames::SEQ, until_seq.as_i64())); + } + } else { + if let Some(since) = options.since_seq { + where_clauses.push(format!("{} > {}", SystemColumnNames::SEQ, since.as_i64())); + } + if let Some(until) = options.until_seq { + where_clauses.push(format!("{} <= {}", SystemColumnNames::SEQ, until.as_i64())); + } } if !options.include_deleted @@ -415,8 +576,53 @@ impl InitialDataFetcher { #[cfg(test)] mod tests { + use arrow::{ + array::Int64Array, + datatypes::{DataType, Field, Schema}, + }; + use async_trait::async_trait; + use datafusion::arrow::record_batch::RecordBatch; + use kalamdb_commons::{ + models::{LiveQueryId, NamespaceId, TableName, UserId}, + schemas::TableDefinition, + }; + use parking_lot::Mutex; + use super::*; + struct EmptySchemaLookup; + + impl LiveSchemaLookup for EmptySchemaLookup { + fn get_table_definition(&self, _table_id: &TableId) -> Option> { + None + } + + fn get_arrow_schema(&self, _table_id: &TableId) -> Result, LiveError> { + Ok(Arc::new(Schema::empty())) + } + } + + struct MaxSeqExecutor { + seen_sql: Mutex>, + } + + #[async_trait] + impl LiveSqlExecutor for MaxSeqExecutor { + async fn execute_for_batches( + &self, + sql: &str, + _user_id: kalamdb_commons::models::UserId, + _role: Role, + _read_context: ReadContext, + ) -> Result, LiveError> { + *self.seen_sql.lock() = Some(sql.to_string()); + let schema = Arc::new(Schema::new(vec![Field::new("max_seq", DataType::Int64, true)])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(Int64Array::from(vec![42]))]) + .map_err(|err| LiveError::Other(err.to_string()))?; + Ok(vec![batch]) + } + } + #[test] fn test_initial_data_options_default() { let options = InitialDataOptions::default(); @@ -454,4 +660,39 @@ mod tests { assert_eq!(options.limit, 200); assert!(options.include_deleted); } + + #[tokio::test] + async fn snapshot_boundary_uses_local_max_seq_query() { + let fetcher = InitialDataFetcher::new(Arc::new(EmptySchemaLookup)); + let executor = Arc::new(MaxSeqExecutor { + seen_sql: Mutex::new(None), + }); + fetcher.set_sql_executor(executor.clone()); + + let table_id = TableId::new(NamespaceId::from("app"), TableName::from("items")); + let live_id = LiveQueryId::new( + UserId::new("u1"), + kalamdb_commons::models::ConnectionId::new("c1"), + "sub1".to_string(), + ); + let options = InitialDataOptions::default().with_deleted(); + + let boundary = fetcher + .compute_snapshot_end_seq( + &live_id, + Role::User, + &table_id, + TableType::User, + &options, + None, + ) + .await + .expect("snapshot boundary"); + + assert_eq!(boundary, Some(SeqId::from(42))); + assert_eq!( + executor.seen_sql.lock().as_deref(), + Some("SELECT MAX(_seq) AS max_seq FROM app.items") + ); + } } diff --git a/backend/crates/kalamdb-live/src/lib.rs b/backend/crates/kalamdb-live/src/lib.rs index f73c10f86..8f56adb55 100644 --- a/backend/crates/kalamdb-live/src/lib.rs +++ b/backend/crates/kalamdb-live/src/lib.rs @@ -20,9 +20,23 @@ pub mod subscription; pub mod traits; // Re-export types from kalamdb-commons (canonical source) -pub use kalamdb_commons::models::{ConnectionId, LiveQueryId, TableId, UserId}; -pub use kalamdb_commons::NodeId; - +// Re-export fanout types +pub use fanout::{ + CommitSideEffectPlan, FanoutDispatchPlan, FanoutOwnerScope, TransactionSideEffects, +}; +// Re-export from helpers +pub use helpers::{ + filter_eval::{matches as filter_matches, parse_where_clause}, + initial_data::{InitialDataFetcher, InitialDataOptions, InitialDataResult}, +}; +pub use kalamdb_commons::{ + models::{ConnectionId, LiveQueryId, TableId, UserId}, + NodeId, +}; +// Re-export from kalamdb-publisher crate +pub use kalamdb_publisher::{TopicCacheStats, TopicPrimaryKeyLookup, TopicPublisherService}; +// Re-export from manager modules +pub use manager::{ConnectionsManager, LiveQueryManager}; // Re-export from models (consolidated model definitions) pub use models::{ BufferedNotification, ChangeNotification, ChangeType, ConnectionEvent, ConnectionRegistration, @@ -30,24 +44,6 @@ pub use models::{ NotificationSender, SharedConnectionState, SubscriptionFlowControl, SubscriptionHandle, SubscriptionResult, SubscriptionState, EVENT_CHANNEL_CAPACITY, NOTIFICATION_CHANNEL_CAPACITY, }; - -// Re-export from manager modules -pub use manager::{ConnectionsManager, LiveQueryManager}; - -// Re-export from helpers -pub use helpers::{ - filter_eval::{matches as filter_matches, parse_where_clause}, - initial_data::{InitialDataFetcher, InitialDataOptions, InitialDataResult}, -}; - // Re-export from other modules pub use notification::NotificationService; pub use subscription::SubscriptionService; - -// Re-export fanout types -pub use fanout::{ - CommitSideEffectPlan, FanoutDispatchPlan, FanoutOwnerScope, TransactionSideEffects, -}; - -// Re-export from kalamdb-publisher crate -pub use kalamdb_publisher::{TopicCacheStats, TopicPrimaryKeyLookup, TopicPublisherService}; diff --git a/backend/crates/kalamdb-live/src/manager/connections_manager.rs b/backend/crates/kalamdb-live/src/manager/connections_manager.rs index fe04166e7..18b6166b0 100644 --- a/backend/crates/kalamdb-live/src/manager/connections_manager.rs +++ b/backend/crates/kalamdb-live/src/manager/connections_manager.rs @@ -19,23 +19,31 @@ //! - Dead TCP detected at the client (write failure) rather than waiting for timeout //! - O(N) atomic reads per tick — no mpsc events, no synchronised wakeups -use super::super::models::{ - ConnectionEvent, ConnectionRegistration, ConnectionState, SharedConnectionState, - SubscriptionHandle, EVENT_CHANNEL_CAPACITY, NOTIFICATION_CHANNEL_CAPACITY, +use std::{ + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }, + time::{Duration, Instant}, }; -use dashmap::DashMap; -use kalamdb_commons::models::{ConnectionId, ConnectionInfo, LiveQueryId, TableId, UserId}; -use kalamdb_commons::NodeId; + +use dashmap::{mapref::entry::Entry, DashMap}; #[cfg(any(test, feature = "test-helpers"))] use kalamdb_commons::WireNotification; +use kalamdb_commons::{ + models::{ConnectionId, ConnectionInfo, LiveQueryId, TableId, UserId}, + NodeId, +}; use kalamdb_system::{LiveQuery, LiveQueryStatus}; use log::{debug, info, warn}; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; use tokio::sync::mpsc; use tokio_util::sync::CancellationToken; +use super::super::models::{ + ConnectionEvent, ConnectionRegistration, ConnectionState, SharedConnectionState, + SubscriptionHandle, EVENT_CHANNEL_CAPACITY, NOTIFICATION_CHANNEL_CAPACITY, +}; + /// Connections Manager /// /// Responsibilities: @@ -185,9 +193,7 @@ impl ConnectionsManager { return None; } - // DoS protection: reject if at max connections - let current = self.total_connections.load(Ordering::Acquire); - if current >= self.max_connections { + if !self.try_reserve_connection_slot() { warn!( "Rejecting connection {}: max connections ({}) reached", connection_id, self.max_connections @@ -203,9 +209,16 @@ impl ConnectionsManager { ConnectionState::new(connection_id.clone(), client_ip, notification_tx, event_tx); let shared_state = Arc::new(state); - self.connections.insert(connection_id.clone(), Arc::clone(&shared_state)); - let count = self.total_connections.fetch_add(1, Ordering::AcqRel) + 1; - self.peak_connections.fetch_max(count, Ordering::AcqRel); + match self.connections.entry(connection_id.clone()) { + Entry::Vacant(entry) => { + entry.insert(Arc::clone(&shared_state)); + }, + Entry::Occupied(_) => { + self.release_connection_slot(); + warn!("Rejecting duplicate connection id: {}", connection_id); + return None; + }, + } Some(ConnectionRegistration { connection_id, @@ -215,6 +228,38 @@ impl ConnectionsManager { }) } + fn try_reserve_connection_slot(&self) -> bool { + let mut current = self.total_connections.load(Ordering::Acquire); + loop { + if current >= self.max_connections { + return false; + } + + match self.total_connections.compare_exchange_weak( + current, + current + 1, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => { + self.peak_connections.fetch_max(current + 1, Ordering::AcqRel); + return true; + }, + Err(observed) => current = observed, + } + } + } + + fn release_connection_slot(&self) { + if self + .total_connections + .fetch_update(Ordering::AcqRel, Ordering::Acquire, |current| current.checked_sub(1)) + .is_err() + { + warn!("Connection count release requested while count was already zero"); + } + } + /// Unregister a connection and all its subscriptions /// /// Returns the list of removed LiveQueryIds for cleanup. @@ -223,7 +268,7 @@ impl ConnectionsManager { let removed_live_ids = if let Some((_, shared_state)) = self.connections.remove(connection_id) { - self.total_connections.fetch_sub(1, Ordering::AcqRel); + self.release_connection_slot(); // Remove from user_table_subscriptions and shared_table_subscriptions indices if let Some(user_id) = shared_state.user_id() { @@ -276,7 +321,8 @@ impl ConnectionsManager { // NOTE: Actual subscription storage is in ConnectionState.subscriptions // These methods maintain secondary indices for efficient notification routing - /// Add subscription to user_table_subscriptions index (called by LiveQueryManager after adding to ConnectionState) + /// Add subscription to user_table_subscriptions index (called by LiveQueryManager after adding + /// to ConnectionState) /// /// Uses lightweight SubscriptionHandle for the index instead of cloning full state. pub fn index_subscription( @@ -343,7 +389,8 @@ impl ConnectionsManager { self.peak_subscriptions.fetch_max(count, Ordering::AcqRel); } - /// Remove subscription from indices (called by LiveQueryManager after removing from ConnectionState) + /// Remove subscription from indices (called by LiveQueryManager after removing from + /// ConnectionState) pub fn unindex_subscription( &self, user_id: &UserId, @@ -465,7 +512,8 @@ impl ConnectionsManager { Ok(_) => {}, Err(mpsc::error::TrySendError::Full(_)) | Err(mpsc::error::TrySendError::Closed(_)) => { - // Handler is likely stalled or gone; unregister immediately so shutdown doesn't wait forever. + // Handler is likely stalled or gone; unregister immediately so shutdown doesn't + // wait forever. force_unregister.push(conn_id); }, } @@ -549,7 +597,8 @@ impl ConnectionsManager { table_name: subscription.table_id.table_name().clone(), user_id: user_id.clone(), query: runtime_metadata.query().to_string(), - options: runtime_metadata.options_json() + options: runtime_metadata + .options_json() .and_then(|s| serde_json::from_str(s).ok()), status: LiveQueryStatus::Active, created_at: runtime_metadata.created_at_ms(), @@ -902,9 +951,10 @@ mod tests { // ==================== Shared Table Subscription Tests ==================== - use super::super::super::models::{SubscriptionFlowControl, SubscriptionRuntimeMetadata}; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::super::super::models::{SubscriptionFlowControl, SubscriptionRuntimeMetadata}; + /// Helper: create a SubscriptionHandle with pre-completed flow control fn create_test_handle( notification_tx: tokio::sync::mpsc::Sender>, @@ -1007,6 +1057,7 @@ mod tests { initial_load: Some(super::super::super::models::InitialLoadState { batch_size: 100, snapshot_end_seq: None, + snapshot_end_commit_seq: None, current_batch_num: 0, flow_control: Arc::new(SubscriptionFlowControl::new()), }), diff --git a/backend/crates/kalamdb-live/src/manager/queries_manager.rs b/backend/crates/kalamdb-live/src/manager/queries_manager.rs index d848210ac..e4468589c 100644 --- a/backend/crates/kalamdb-live/src/manager/queries_manager.rs +++ b/backend/crates/kalamdb-live/src/manager/queries_manager.rs @@ -11,22 +11,31 @@ //! When data is applied on any node (leader or follower), the provider's methods //! fire local notifications - no need for separate HTTP cluster broadcast. -use crate::error::LiveError; -use crate::helpers::filter_eval::parse_where_clause; -use crate::helpers::initial_data::{InitialDataFetcher, InitialDataOptions, InitialDataResult}; -use crate::manager::ConnectionsManager; -use crate::models::{SharedConnectionState, SubscriptionResult}; -use crate::subscription::SubscriptionService; -use crate::traits::LiveSchemaLookup; +use std::sync::Arc; + use datafusion::sql::sqlparser::ast::Expr; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{ConnectionId, LiveQueryId, NamespaceId, TableId, TableName, UserId}; -use kalamdb_commons::schemas::{SchemaField, TableDefinition}; -use kalamdb_commons::websocket::SubscriptionRequest; -use kalamdb_commons::{NodeId, Role}; +use kalamdb_commons::{ + ids::SeqId, + models::{ConnectionId, LiveQueryId, NamespaceId, TableId, TableName, UserId}, + schemas::{SchemaField, TableDefinition}, + websocket::SubscriptionRequest, + NodeId, Role, +}; use kalamdb_sql::parser::query_parser::QueryParser; use kalamdb_system::LiveQuery as SystemLiveQuery; -use std::sync::Arc; +use tokio::sync::OnceCell; + +use crate::{ + error::LiveError, + helpers::{ + filter_eval::parse_where_clause, + initial_data::{InitialDataFetcher, InitialDataOptions, InitialDataResult}, + }, + manager::ConnectionsManager, + models::{SharedConnectionState, SubscriptionResult}, + subscription::SubscriptionService, + traits::{LiveApplyBarrier, LiveSchemaLookup}, +}; /// Live query manager pub struct LiveQueryManager { @@ -34,6 +43,7 @@ pub struct LiveQueryManager { registry: Arc, initial_data_fetcher: Arc, schema_lookup: Arc, + apply_barrier: OnceCell>, node_id: NodeId, // Delegated services @@ -53,19 +63,25 @@ impl LiveQueryManager { // Row-level security filters data to only their rows during query execution Ok(()) }, - kalamdb_commons::TableType::System if !is_admin => Err(LiveError::PermissionDenied( - format!("Cannot subscribe to system table '{}': insufficient privileges. Only DBA and system roles can subscribe to system tables.", table_id) - )), + kalamdb_commons::TableType::System if !is_admin => { + Err(LiveError::PermissionDenied(format!( + "Cannot subscribe to system table '{}': insufficient privileges. Only DBA and \ + system roles can subscribe to system tables.", + table_id + ))) + }, kalamdb_commons::TableType::Shared => { // SHARED tables require access-level check: // - Public: any authenticated user can subscribe // - Private/Restricted: only DBA/System/Service roles - let access_level = kalamdb_session::permissions::shared_table_access_level(table_def); + let access_level = + kalamdb_session::permissions::shared_table_access_level(table_def); if kalamdb_session::permissions::can_access_shared_table(access_level, user_role) { Ok(()) } else { Err(LiveError::PermissionDenied(format!( - "Cannot subscribe to shared table '{}': access level '{}' requires elevated privileges.", + "Cannot subscribe to shared table '{}': access level '{}' requires \ + elevated privileges.", table_id, access_level ))) } @@ -119,6 +135,7 @@ impl LiveQueryManager { registry, initial_data_fetcher, schema_lookup, + apply_barrier: OnceCell::new(), node_id, subscription_service, } @@ -129,6 +146,13 @@ impl LiveQueryManager { self.initial_data_fetcher.set_sql_executor(executor); } + /// Wire an optional apply barrier used before follower snapshots. + pub fn set_apply_barrier(&self, barrier: Arc) { + if self.apply_barrier.set(barrier).is_err() { + log::warn!("LiveApplyBarrier already initialized in LiveQueryManager"); + } + } + /// Get the node_id for this manager pub fn node_id(&self) -> &NodeId { &self.node_id @@ -211,12 +235,11 @@ impl LiveQueryManager { let table_id = TableId::new(namespace_id.clone(), table_name); if namespace_id.is_system_namespace() && !matches!(user_role, Role::Dba | Role::System) { - return Err(LiveError::PermissionDenied( - format!( - "Cannot subscribe to system table '{}': insufficient privileges. Only DBA and system roles can subscribe to system tables.", - table_id - ), - )); + return Err(LiveError::PermissionDenied(format!( + "Cannot subscribe to system table '{}': insufficient privileges. Only DBA and \ + system roles can subscribe to system tables.", + table_id + ))); } // Look up table definition from in-memory cache. @@ -232,6 +255,14 @@ impl LiveQueryManager { // - SHARED tables: Access-level gated (public OK, private/restricted require elevated role) Self::validate_table_subscription_permission(user_role, &table_def, &table_id)?; + if initial_data_options.is_some() { + if let Some(barrier) = self.apply_barrier.get() { + barrier + .wait_for_table_apply_barrier(&table_id, table_def.table_type, &user_id) + .await?; + } + } + // Determine batch size let batch_size = request .options @@ -298,11 +329,28 @@ impl LiveQueryManager { .unwrap_or_else(|| SeqId::from(0)) }; + let snapshot_commit_seq = if fetch_options.until_commit_seq.is_some() { + fetch_options.until_commit_seq + } else { + self.initial_data_fetcher + .compute_snapshot_end_commit_seq( + &live_id, + user_role, + &table_id, + table_def.table_type, + &fetch_options, + where_clause.as_deref(), + ) + .await? + }; + fetch_options.until_seq = Some(snapshot_seq); - self.subscription_service.update_snapshot_end_seq( + fetch_options.until_commit_seq = snapshot_commit_seq; + self.subscription_service.update_snapshot_boundaries( connection_state, &request.id, - snapshot_seq, + Some(snapshot_seq), + snapshot_commit_seq, ); self.initial_data_fetcher @@ -399,7 +447,8 @@ impl LiveQueryManager { since_seq, initial_load.snapshot_end_seq, initial_load.batch_size, - ); + ) + .with_commit_range(None, initial_load.snapshot_end_commit_seq); self.initial_data_fetcher .fetch_initial_data( @@ -465,12 +514,17 @@ impl LiveQueryManager { #[cfg(test)] mod tests { + use kalamdb_commons::{ + models::{NamespaceId, TableId, TableName}, + schemas::{ + table_options::{SharedTableOptions, SystemTableOptions}, + TableDefinition, TableOptions, + }, + Role, TableAccess, TableType, + }; + use super::LiveQueryManager; use crate::error::LiveError; - use kalamdb_commons::models::{NamespaceId, TableId, TableName}; - use kalamdb_commons::schemas::table_options::{SharedTableOptions, SystemTableOptions}; - use kalamdb_commons::schemas::{TableDefinition, TableOptions}; - use kalamdb_commons::{Role, TableAccess, TableType}; fn table_id() -> TableId { TableId::new(NamespaceId::from("shared"), TableName::from("events")) diff --git a/backend/crates/kalamdb-live/src/models/connection.rs b/backend/crates/kalamdb-live/src/models/connection.rs index 3433456c1..48a0b4fb6 100644 --- a/backend/crates/kalamdb-live/src/models/connection.rs +++ b/backend/crates/kalamdb-live/src/models/connection.rs @@ -2,23 +2,29 @@ //! //! These models support both live query WebSocket subscriptions and topic consumer connections. +use std::{ + collections::{HashMap, VecDeque}, + hash::{DefaultHasher, Hash, Hasher}, + sync::{ + atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}, + Arc, OnceLock, Weak, + }, + time::{Instant, SystemTime, UNIX_EPOCH}, +}; + use datafusion::sql::sqlparser::ast::Expr; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{ConnectionId, ConnectionInfo, LiveQueryId, TableId, UserId}; -use kalamdb_commons::websocket::WireNotification; -use kalamdb_commons::websocket::{CompressionType, ProtocolOptions, SerializationType}; -use kalamdb_commons::Role; +use kalamdb_commons::{ + ids::SeqId, + models::{ConnectionId, ConnectionInfo, LiveQueryId, TableId, UserId}, + websocket::{CompressionType, ProtocolOptions, SerializationType, WireNotification}, + Role, +}; use parking_lot::{Mutex, RwLock}; -use std::collections::{HashMap, VecDeque}; -use std::hash::{DefaultHasher, Hash, Hasher}; -use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering}; -use std::sync::{Arc, OnceLock, Weak}; -use std::time::{Instant, SystemTime, UNIX_EPOCH}; use tokio::sync::mpsc; -/// Get current epoch time in milliseconds (for lock-free heartbeat tracking) +/// Get current epoch time in milliseconds (for lock-free heartbeat and metadata tracking) #[inline] -fn epoch_millis() -> u64 { +pub(crate) fn epoch_millis() -> u64 { SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_millis() as u64 } @@ -70,17 +76,19 @@ fn intern_subscription_str(value: &str) -> Arc { subscription_string_pool().lock().intern(value) } +/// Maximum live-query subscriptions allowed on a single WebSocket connection. +pub const MAX_SUBSCRIPTIONS_PER_CONNECTION: usize = 100; + /// Maximum pending notifications per connection before dropping new ones. -/// Keep this modest: large snapshot catch-up is handled by per-subscription -/// flow control, while a smaller live buffer reduces worst-case memory per -/// slow connection. At 100k concurrent idle connections this directly -/// governs the per-connection memory floor. -pub const NOTIFICATION_CHANNEL_CAPACITY: usize = 256; +/// This must cover one full table fanout for a saturated shared WebSocket. +/// The benchmark and backend contract allow 100 subscriptions per connection; +/// 128 keeps a small cushion while staying tight for idle-connection memory. +pub const NOTIFICATION_CHANNEL_CAPACITY: usize = 128; /// Maximum pending control events per connection. /// Only a few event kinds exist (auth timeout, heartbeat timeout, shutdown), /// so a small queue is sufficient and reduces fixed per-connection footprint. -pub const EVENT_CHANNEL_CAPACITY: usize = 8; +pub const EVENT_CHANNEL_CAPACITY: usize = 1; /// Maximum buffered notifications per subscription while initial snapshot loading is in progress. /// @@ -186,7 +194,12 @@ impl SubscriptionRuntimeMetadata { #[inline] pub fn record_delivery(&self) { - self.last_update_ms.store(epoch_millis() as i64, Ordering::Release); + self.record_delivery_at(epoch_millis()); + } + + #[inline] + pub fn record_delivery_at(&self, epoch_millis: u64) { + self.last_update_ms.store(epoch_millis as i64, Ordering::Release); self.changes.fetch_add(1, Ordering::AcqRel); } } @@ -195,6 +208,7 @@ impl SubscriptionRuntimeMetadata { #[derive(Debug, Clone)] pub struct BufferedNotification { pub seq: Option, + pub commit_seq: Option, pub notification: Arc, } @@ -202,7 +216,9 @@ pub struct BufferedNotification { #[derive(Debug)] pub struct SubscriptionFlowControl { snapshot_end_seq: AtomicI64, + snapshot_end_commit_seq: AtomicU64, has_snapshot: AtomicBool, + has_commit_snapshot: AtomicBool, initial_complete: AtomicBool, buffer: Mutex>, } @@ -211,25 +227,49 @@ impl SubscriptionFlowControl { pub fn new() -> Self { Self { snapshot_end_seq: AtomicI64::new(0), + snapshot_end_commit_seq: AtomicU64::new(0), has_snapshot: AtomicBool::new(false), + has_commit_snapshot: AtomicBool::new(false), initial_complete: AtomicBool::new(false), buffer: Mutex::new(VecDeque::new()), } } pub fn set_snapshot_end_seq(&self, snapshot_end_seq: Option) { + self.set_snapshot_boundaries(snapshot_end_seq, None); + } + + pub fn set_snapshot_boundaries( + &self, + snapshot_end_seq: Option, + snapshot_end_commit_seq: Option, + ) { if let Some(seq) = snapshot_end_seq { self.snapshot_end_seq.store(seq.as_i64(), Ordering::Release); self.has_snapshot.store(true, Ordering::Release); + } else { + self.has_snapshot.store(false, Ordering::Release); + } + + if let Some(commit_seq) = snapshot_end_commit_seq { + self.snapshot_end_commit_seq.store(commit_seq, Ordering::Release); + self.has_commit_snapshot.store(true, Ordering::Release); + } else { + self.has_commit_snapshot.store(false, Ordering::Release); + } - let max_seq = seq.as_i64(); + if snapshot_end_seq.is_some() || snapshot_end_commit_seq.is_some() { + let max_seq = snapshot_end_seq.map(|seq| seq.as_i64()); let mut buffer = self.buffer.lock(); buffer.retain(|item| match item.seq { - Some(item_seq) => item_seq.as_i64() > max_seq, - None => true, + Some(item_seq) if snapshot_end_commit_seq.is_none() => { + max_seq.map(|seq| item_seq.as_i64() > seq).unwrap_or(true) + }, + _ => match (snapshot_end_commit_seq, item.commit_seq) { + (Some(max_commit), Some(item_commit)) => item_commit > max_commit, + _ => true, + }, }); - } else { - self.has_snapshot.store(false, Ordering::Release); } } @@ -241,6 +281,14 @@ impl SubscriptionFlowControl { } } + pub fn snapshot_end_commit_seq(&self) -> Option { + if self.has_commit_snapshot.load(Ordering::Acquire) { + Some(self.snapshot_end_commit_seq.load(Ordering::Acquire)) + } else { + None + } + } + pub fn is_initial_complete(&self) -> bool { self.initial_complete.load(Ordering::Acquire) } @@ -249,23 +297,44 @@ impl SubscriptionFlowControl { self.initial_complete.store(true, Ordering::Release); } - pub fn buffer_notification(&self, notification: Arc, seq: Option) { + pub fn buffer_notification( + &self, + notification: Arc, + seq: Option, + commit_seq: Option, + ) { let mut buffer = self.buffer.lock(); if buffer.len() >= MAX_BUFFERED_NOTIFICATIONS_PER_SUBSCRIPTION { buffer.pop_front(); } - buffer.push_back(BufferedNotification { seq, notification }); + buffer.push_back(BufferedNotification { + seq, + commit_seq, + notification, + }); } pub fn drain_buffered_notifications(&self) -> Vec { let mut buffer = self.buffer.lock(); // Sort in-place via contiguous slice, then drain — avoids a second Vec allocation let slice = buffer.make_contiguous(); - slice.sort_by(|a, b| match (a.seq, b.seq) { - (Some(a_seq), Some(b_seq)) => a_seq.as_i64().cmp(&b_seq.as_i64()), - (Some(_), None) => std::cmp::Ordering::Less, - (None, Some(_)) => std::cmp::Ordering::Greater, - (None, None) => std::cmp::Ordering::Equal, + slice.sort_by(|a, b| { + let commit_order = match (a.commit_seq, b.commit_seq) { + (Some(a_commit), Some(b_commit)) => a_commit.cmp(&b_commit), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + }; + if commit_order != std::cmp::Ordering::Equal { + return commit_order; + } + + match (a.seq, b.seq) { + (Some(a_seq), Some(b_seq)) => a_seq.as_i64().cmp(&b_seq.as_i64()), + (Some(_), None) => std::cmp::Ordering::Less, + (None, Some(_)) => std::cmp::Ordering::Greater, + (None, None) => std::cmp::Ordering::Equal, + } }); buffer.drain(..).collect() } @@ -278,6 +347,8 @@ pub struct InitialLoadState { pub batch_size: usize, /// Snapshot boundary SeqId for consistent batch loading pub snapshot_end_seq: Option, + /// Deterministic snapshot boundary for reconnects across followers + pub snapshot_end_commit_seq: Option, /// Current batch number for pagination tracking (0-indexed) /// Incremented after each batch is sent pub current_batch_num: u32, @@ -538,10 +609,23 @@ impl ConnectionState { /// Update snapshot_end_seq for a subscription. pub fn update_snapshot_end_seq(&self, subscription_id: &str, snapshot_end_seq: Option) { + self.update_snapshot_boundaries(subscription_id, snapshot_end_seq, None); + } + + /// Update snapshot boundaries for a subscription. + pub fn update_snapshot_boundaries( + &self, + subscription_id: &str, + snapshot_end_seq: Option, + snapshot_end_commit_seq: Option, + ) { if let Some(sub) = self.subscriptions.write().get_mut(subscription_id) { if let Some(initial_load) = sub.initial_load.as_mut() { initial_load.snapshot_end_seq = snapshot_end_seq; - initial_load.flow_control.set_snapshot_end_seq(snapshot_end_seq); + initial_load.snapshot_end_commit_seq = snapshot_end_commit_seq; + initial_load + .flow_control + .set_snapshot_boundaries(snapshot_end_seq, snapshot_end_commit_seq); } } } @@ -572,6 +656,7 @@ impl ConnectionState { let buffered = flow_control.drain_buffered_notifications(); let mut sent = 0usize; + let delivery_timestamp_ms = epoch_millis(); for item in buffered { if let Err(e) = self.notification_tx.try_send(item.notification) { if matches!(e, mpsc::error::TrySendError::Full(_)) { @@ -582,7 +667,7 @@ impl ConnectionState { break; } } else { - runtime_metadata.record_delivery(); + runtime_metadata.record_delivery_at(delivery_timestamp_ms); sent += 1; } } @@ -614,9 +699,10 @@ pub struct ConnectionRegistration { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::websocket::{ChangeType, SharedChangePayload}; + use super::*; + fn make_notification(subscription_id: &str) -> Arc { Arc::new(WireNotification { subscription_id: Arc::from(subscription_id), @@ -629,7 +715,11 @@ mod tests { let flow_control = SubscriptionFlowControl::new(); for seq in 1..=2_048 { - flow_control.buffer_notification(make_notification("sub-1"), Some(SeqId::from(seq))); + flow_control.buffer_notification( + make_notification("sub-1"), + Some(SeqId::from(seq)), + None, + ); } let buffered = flow_control.drain_buffered_notifications(); @@ -650,9 +740,21 @@ mod tests { fn test_subscription_flow_control_drains_in_seq_order() { let flow_control = SubscriptionFlowControl::new(); - flow_control.buffer_notification(make_notification("sub-ordered"), Some(SeqId::from(9))); - flow_control.buffer_notification(make_notification("sub-ordered"), Some(SeqId::from(3))); - flow_control.buffer_notification(make_notification("sub-ordered"), Some(SeqId::from(6))); + flow_control.buffer_notification( + make_notification("sub-ordered"), + Some(SeqId::from(9)), + None, + ); + flow_control.buffer_notification( + make_notification("sub-ordered"), + Some(SeqId::from(3)), + None, + ); + flow_control.buffer_notification( + make_notification("sub-ordered"), + Some(SeqId::from(6)), + None, + ); let buffered = flow_control.drain_buffered_notifications(); let seqs: Vec<_> = buffered diff --git a/backend/crates/kalamdb-live/src/models/mod.rs b/backend/crates/kalamdb-live/src/models/mod.rs index b3e5dffc1..03edda95e 100644 --- a/backend/crates/kalamdb-live/src/models/mod.rs +++ b/backend/crates/kalamdb-live/src/models/mod.rs @@ -7,12 +7,13 @@ pub mod connection; pub mod subscription; +pub(crate) use connection::epoch_millis; + // Re-export commonly used types pub use connection::{ BufferedNotification, ConnectionEvent, ConnectionRegistration, ConnectionState, EventReceiver, EventSender, InitialLoadState, NotificationReceiver, NotificationSender, SharedConnectionState, SubscriptionFlowControl, SubscriptionHandle, SubscriptionRuntimeMetadata, SubscriptionState, - EVENT_CHANNEL_CAPACITY, NOTIFICATION_CHANNEL_CAPACITY, + EVENT_CHANNEL_CAPACITY, MAX_SUBSCRIPTIONS_PER_CONNECTION, NOTIFICATION_CHANNEL_CAPACITY, }; - pub use subscription::{ChangeNotification, ChangeType, SubscriptionResult}; diff --git a/backend/crates/kalamdb-live/src/models/subscription.rs b/backend/crates/kalamdb-live/src/models/subscription.rs index 8b774fe05..b7bbf4cb7 100644 --- a/backend/crates/kalamdb-live/src/models/subscription.rs +++ b/backend/crates/kalamdb-live/src/models/subscription.rs @@ -1,9 +1,9 @@ //! Subscription and notification result models -use crate::helpers::initial_data::InitialDataResult; -use kalamdb_commons::models::LiveQueryId; -use kalamdb_commons::schemas::SchemaField; pub use kalamdb_commons::websocket::{ChangeNotification, ChangeType}; +use kalamdb_commons::{models::LiveQueryId, schemas::SchemaField}; + +use crate::helpers::initial_data::InitialDataResult; /// Result of registering a live query subscription with initial data #[derive(Debug)] diff --git a/backend/crates/kalamdb-live/src/notification.rs b/backend/crates/kalamdb-live/src/notification.rs index 31a48c6c0..4806e87e2 100644 --- a/backend/crates/kalamdb-live/src/notification.rs +++ b/backend/crates/kalamdb-live/src/notification.rs @@ -9,26 +9,36 @@ //! Used by: //! - WebSocket live query subscribers -use super::helpers::filter_eval::matches as filter_matches; -use super::manager::ConnectionsManager; -use super::models::{ChangeNotification, ChangeType, SubscriptionHandle}; -use crate::error::LiveError; -use crate::fanout::{CommitSideEffectPlan, FanoutOwnerScope}; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::conversions::arrow_json_conversion::scalar_value_to_json; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{LiveQueryId, TableId, UserId}; -use kalamdb_commons::websocket::{RowData, SharedChangePayload, WireNotification}; +use std::{ + collections::HashMap, + hash::{Hash, Hasher}, + sync::Arc, +}; + +use kalamdb_commons::{ + constants::SystemColumnNames, + conversions::arrow_json_conversion::scalar_value_to_json, + ids::SeqId, + models::{rows::Row, LiveQueryId, TableId, UserId}, + websocket::{RowData, SharedChangePayload, WireNotification}, +}; use kalamdb_system::NotificationService as NotificationServiceTrait; -use std::collections::HashMap; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; use tokio::sync::mpsc; +use super::{ + helpers::filter_eval::matches as filter_matches, + manager::ConnectionsManager, + models::{epoch_millis, ChangeNotification, ChangeType, SubscriptionHandle}, +}; +use crate::{ + error::LiveError, + fanout::{CommitSideEffectPlan, FanoutOwnerScope}, +}; + /// Number of sharded notification workers. -/// Deterministic routing by table_id hash preserves per-table ordering -/// while achieving parallelism across different tables. +/// Deterministic routing by owner scope preserves ordering for a shared table +/// or for one user's slice of a user-scoped table, while parallelizing +/// fanout across different tables and different users. /// /// Scales with available CPUs (up to a hard cap) so multi-core deployments /// can fan out across more tables in parallel. Falls back to 4 on the @@ -36,8 +46,9 @@ use tokio::sync::mpsc; fn num_notify_workers() -> usize { // Cap at 16 to bound DashMap contention and worker overhead. // Minimum of 4 preserves previous baseline behavior on small machines. - let cpus = - std::thread::available_parallelism().map(std::num::NonZeroUsize::get).unwrap_or(4); + let cpus = std::thread::available_parallelism() + .map(std::num::NonZeroUsize::get) + .unwrap_or(4); cpus.clamp(4, 16) } @@ -48,7 +59,7 @@ const NOTIFY_QUEUE_PER_WORKER: usize = 4_096; /// For single-table fan-out at high subscriber counts (e.g. 100K on one /// table all hashing to one worker), spawning per-chunk lets the tokio /// runtime parallelise delivery across its thread pool. -const SHARED_NOTIFY_CHUNK_SIZE: usize = 512; +const SHARED_NOTIFY_CHUNK_SIZE: usize = 2_048; struct NotificationTask { user_id: Option, @@ -86,11 +97,28 @@ fn extract_seq(change_notification: &ChangeNotification) -> Option { }) } +#[inline] +fn extract_commit_seq(change_notification: &ChangeNotification) -> Option { + use datafusion::scalar::ScalarValue; + change_notification + .row_data + .values + .get(SystemColumnNames::COMMIT_SEQ) + .and_then(|value| match value { + ScalarValue::UInt64(Some(commit_seq)) => Some(*commit_seq), + ScalarValue::Int64(Some(commit_seq)) if *commit_seq >= 0 => Some(*commit_seq as u64), + _ => None, + }) +} + /// Convert a Row to a projected RowData map (`HashMap`). /// Includes `_seq` always. When `projections` is `None`, includes all columns. fn project_row(row: &Row, projections: &Option>>) -> Result { let mut map = HashMap::new(); for (col, sv) in &row.values { + if col == SystemColumnNames::COMMIT_SEQ { + continue; + } let include = match projections { None => true, Some(proj) => col == SystemColumnNames::SEQ || proj.iter().any(|p| p == col), @@ -220,24 +248,32 @@ fn try_deliver( handle: &SubscriptionHandle, notification: Arc, seq_value: Option, + commit_seq: Option, + delivery_timestamp_ms: u64, ) -> bool { if let Some(flow_control) = handle.flow_control.as_ref() { if !flow_control.is_initial_complete() { - if let Some(snapshot_seq) = flow_control.snapshot_end_seq() { + if let Some(snapshot_commit_seq) = flow_control.snapshot_end_commit_seq() { + if let Some(commit_seq) = commit_seq { + if commit_seq <= snapshot_commit_seq { + return false; + } + } + } else if let Some(snapshot_seq) = flow_control.snapshot_end_seq() { if let Some(seq) = seq_value { if seq.as_i64() <= snapshot_seq { return false; } } } - flow_control.buffer_notification(Arc::clone(¬ification), seq_value); + flow_control.buffer_notification(Arc::clone(¬ification), seq_value, commit_seq); return false; } } match handle.notification_tx.try_send(notification) { Ok(()) => { - handle.runtime_metadata.record_delivery(); + handle.runtime_metadata.record_delivery_at(delivery_timestamp_ms); true }, Err(e) => { @@ -251,7 +287,8 @@ fn try_deliver( }, TrySendError::Closed(_) => { log::debug!( - "Notification channel closed for subscription_id={}, connection likely disconnected", + "Notification channel closed for subscription_id={}, connection likely \ + disconnected", handle.subscription_id ); }, @@ -310,12 +347,19 @@ impl NotificationService { service } - /// Route a table_id to a deterministic worker index. + /// Route a notification to a deterministic worker index. + /// + /// Shared-table notifications stay keyed by table to preserve global order. + /// User-scoped notifications include the owner user_id so one hot user table + /// does not serialize all subscribers for every user through a single worker. #[inline] - fn worker_index(&self, table_id: &TableId) -> usize { + fn worker_index(&self, user_id: Option<&UserId>, table_id: &TableId) -> usize { let mut hasher = std::collections::hash_map::DefaultHasher::new(); table_id.namespace_id().as_str().hash(&mut hasher); table_id.table_name().as_str().hash(&mut hasher); + if let Some(user_id) = user_id { + user_id.as_str().hash(&mut hasher); + } hasher.finish() as usize % self.worker_txs.len() } @@ -369,15 +413,25 @@ impl NotificationService { table_id: TableId, notification: ChangeNotification, ) { - let worker_idx = self.worker_index(&table_id); + let worker_idx = self.worker_index(user_id.as_ref(), &table_id); let task = NotificationTask { user_id, table_id, notification, }; if let Err(e) = self.worker_txs[worker_idx].try_send(task) { - if matches!(e, mpsc::error::TrySendError::Full(_)) { - log::warn!("Notification worker {} queue full, dropping notification", worker_idx); + if let mpsc::error::TrySendError::Full(task) = e { + let owner_scope = match &task.user_id { + Some(user_id) => format!("user:{}", user_id.as_str()), + None => "shared".to_string(), + }; + log::warn!( + "Notification worker {} queue full for table={} owner_scope={}, dropping \ + notification", + worker_idx, + task.table_id, + owner_scope, + ); } } } @@ -415,6 +469,8 @@ impl NotificationService { all_handles: Arc>, ) -> Result { let seq_value = extract_seq(&change_notification); + let commit_seq = extract_commit_seq(&change_notification); + let delivery_timestamp_ms = epoch_millis(); let change_type = change_notification.change_type.clone(); let pk_columns = Arc::new(change_notification.pk_columns); let new_row = Arc::new(change_notification.row_data); @@ -427,13 +483,16 @@ impl NotificationService { // Small fan-out: inline dispatch directly from DashMap refs (no clone/spawn overhead) if handle_count <= SHARED_NOTIFY_CHUNK_SIZE { + let chunk_handles = all_handles.iter().map(|entry| entry.value().clone()).collect(); return dispatch_chunk( - all_handles.iter().map(|entry| entry.value().clone()), + chunk_handles, &new_row, old_row.as_deref(), &change_type, &pk_columns, seq_value, + commit_seq, + delivery_timestamp_ms, ); } @@ -441,14 +500,46 @@ impl NotificationService { // parallelise delivery across its thread pool. When all subscribers // are on the same table they hash to one notification worker — // spawning is the only way to utilise multiple cores for the fan-out. - let handles_vec: Vec = - all_handles.iter().map(|entry| entry.value().clone()).collect(); - let table_id = table_id.clone(); let mut tasks = Vec::new(); + let mut chunk_handles = Vec::with_capacity(SHARED_NOTIFY_CHUNK_SIZE); + + for entry in all_handles.iter() { + chunk_handles.push(entry.value().clone()); + + if chunk_handles.len() < SHARED_NOTIFY_CHUNK_SIZE { + continue; + } + + let ready_handles = + std::mem::replace(&mut chunk_handles, Vec::with_capacity(SHARED_NOTIFY_CHUNK_SIZE)); + let new_row = Arc::clone(&new_row); + let old_row = old_row.as_ref().map(Arc::clone); + let change_type = change_type.clone(); + let pk_columns = Arc::clone(&pk_columns); + let table_id = table_id.clone(); + + tasks.push(tokio::spawn(async move { + match dispatch_chunk( + ready_handles, + &new_row, + old_row.as_deref(), + &change_type, + &pk_columns, + seq_value, + commit_seq, + delivery_timestamp_ms, + ) { + Ok(count) => count, + Err(e) => { + log::error!("Notification dispatch error for table {}: {}", table_id, e); + 0 + }, + } + })); + } - for chunk in handles_vec.chunks(SHARED_NOTIFY_CHUNK_SIZE) { - let chunk_handles: Vec = chunk.to_vec(); + if !chunk_handles.is_empty() { let new_row = Arc::clone(&new_row); let old_row = old_row.as_ref().map(Arc::clone); let change_type = change_type.clone(); @@ -457,12 +548,14 @@ impl NotificationService { tasks.push(tokio::spawn(async move { match dispatch_chunk( - chunk_handles.into_iter(), + chunk_handles, &new_row, old_row.as_deref(), &change_type, &pk_columns, seq_value, + commit_seq, + delivery_timestamp_ms, ) { Ok(count) => count, Err(e) => { @@ -490,17 +583,38 @@ impl NotificationService { /// identical projections share a single `Arc`, avoiding /// redundant Row→RowData conversion. The shared payload then reuses cached JSON /// and MessagePack bytes across subscribers that serialize the same change. -fn dispatch_chunk( - handles: I, +fn dispatch_chunk( + handles: Vec, new_row: &Row, old_row: Option<&Row>, change_type: &ChangeType, pk_columns: &[String], seq_value: Option, -) -> Result -where - I: IntoIterator, -{ + commit_seq: Option, + delivery_timestamp_ms: u64, +) -> Result { + if handles + .iter() + .all(|handle| handle.filter_expr.is_none() && handle.projections.is_none()) + { + let payload = + Arc::new(build_shared_payload(change_type, new_row, old_row, pk_columns, &None)?); + let mut count = 0usize; + + for handle in handles { + let notification = Arc::new(WireNotification { + subscription_id: Arc::clone(&handle.subscription_id), + payload: Arc::clone(&payload), + }); + + if try_deliver(&handle, notification, seq_value, commit_seq, delivery_timestamp_ms) { + count += 1; + } + } + + return Ok(count); + } + // Cache: projection requirements → shared payload (built once per projection group). let mut cache: HashMap> = HashMap::new(); let mut count = 0usize; @@ -544,7 +658,7 @@ where payload, }); - if try_deliver(&handle, notification, seq_value) { + if try_deliver(&handle, notification, seq_value, commit_seq, delivery_timestamp_ms) { count += 1; } } @@ -576,15 +690,22 @@ impl NotificationServiceTrait for NotificationService { #[cfg(test)] mod tests { - use super::*; - use crate::helpers::filter_eval::parse_where_clause; - use crate::models::{SubscriptionFlowControl, SubscriptionHandle, SubscriptionRuntimeMetadata}; + use std::{collections::BTreeMap, time::Duration}; + use datafusion::scalar::ScalarValue; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{ConnectionId, NamespaceId, TableName}; - use kalamdb_commons::NodeId; - use std::collections::BTreeMap; - use std::time::Duration; + use kalamdb_commons::{ + models::{rows::Row, ConnectionId, NamespaceId, TableName}, + NodeId, + }; + + use super::*; + use crate::{ + helpers::filter_eval::parse_where_clause, + models::{ + SubscriptionFlowControl, SubscriptionHandle, SubscriptionRuntimeMetadata, + MAX_SUBSCRIPTIONS_PER_CONNECTION, NOTIFICATION_CHANNEL_CAPACITY, + }, + }; fn make_table_id(ns: &str, table: &str) -> TableId { TableId::new(NamespaceId::from(ns), TableName::from(table)) @@ -691,6 +812,49 @@ mod tests { assert!(rx_skip.try_recv().is_err(), "filtered subscriber should not receive"); } + #[tokio::test] + #[ntest::timeout(1000)] + async fn saturated_shared_connection_fanout_fits_notification_channel() { + assert!(NOTIFICATION_CHANNEL_CAPACITY >= MAX_SUBSCRIPTIONS_PER_CONNECTION); + + let table_id = make_table_id("shared", "scale_sub"); + let connection_id = ConnectionId::new("scale-conn"); + let subscriptions = Arc::new(dashmap::DashMap::new()); + let (tx, mut rx) = mpsc::channel(NOTIFICATION_CHANNEL_CAPACITY); + + for index in 0..MAX_SUBSCRIPTIONS_PER_CONNECTION { + let subscription_id = format!("scale_{}", index); + let flow = Arc::new(SubscriptionFlowControl::new()); + flow.mark_initial_complete(); + + subscriptions.insert( + LiveQueryId::new( + UserId::new(format!("user_{}", index)), + connection_id.clone(), + subscription_id.clone(), + ), + make_shared_handle(&subscription_id, tx.clone(), flow, None, None), + ); + } + + let delivered = NotificationService::dispatch_to_subscribers( + &table_id, + ChangeNotification::insert(table_id.clone(), make_row(1, "probe", 1)), + subscriptions, + ) + .await + .expect("fanout should succeed"); + + assert_eq!(delivered, MAX_SUBSCRIPTIONS_PER_CONNECTION); + + let mut received = 0usize; + while rx.try_recv().is_ok() { + received += 1; + } + + assert_eq!(received, MAX_SUBSCRIPTIONS_PER_CONNECTION); + } + #[tokio::test] async fn test_notify_async_user_table_projection_keeps_seq() { let registry = ConnectionsManager::new( @@ -826,12 +990,14 @@ mod tests { let row = make_row(9, "shared", 9); let delivered = dispatch_chunk( - vec![handle_a, handle_b].into_iter(), + vec![handle_a, handle_b], &row, None, &ChangeType::Insert, &[], Some(SeqId::from(9)), + None, + epoch_millis(), ) .expect("dispatch succeeds"); assert_eq!(delivered, 2); @@ -871,12 +1037,14 @@ mod tests { let row = make_row(17, "separate", 17); let delivered = dispatch_chunk( - vec![handle_id, handle_body].into_iter(), + vec![handle_id, handle_body], &row, None, &ChangeType::Insert, &[], Some(SeqId::from(17)), + None, + epoch_millis(), ) .expect("dispatch succeeds"); assert_eq!(delivered, 2); diff --git a/backend/crates/kalamdb-live/src/subscription.rs b/backend/crates/kalamdb-live/src/subscription.rs index e3502e2ed..42d4d5b94 100644 --- a/backend/crates/kalamdb-live/src/subscription.rs +++ b/backend/crates/kalamdb-live/src/subscription.rs @@ -9,20 +9,26 @@ //! Active subscriptions are tracked in node-local memory and surfaced through //! `system.live`. Table mutations still replicate through Raft separately. -use super::manager::ConnectionsManager; -use super::models::{ - InitialLoadState, SharedConnectionState, SubscriptionFlowControl, SubscriptionHandle, - SubscriptionRuntimeMetadata, SubscriptionState, -}; -use crate::error::{LiveError, LiveResultExt}; +use std::sync::Arc; + use chrono::Utc; use datafusion::sql::sqlparser::ast::Expr; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{ConnectionId, LiveQueryId, TableId, UserId}; -use kalamdb_commons::websocket::SubscriptionRequest; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + ids::SeqId, + models::{ConnectionId, LiveQueryId, TableId, UserId}, + websocket::SubscriptionRequest, + TableType, +}; use log::debug; -use std::sync::Arc; + +use super::{ + manager::ConnectionsManager, + models::{ + InitialLoadState, SharedConnectionState, SubscriptionFlowControl, SubscriptionHandle, + SubscriptionRuntimeMetadata, SubscriptionState, MAX_SUBSCRIPTIONS_PER_CONNECTION, + }, +}; +use crate::error::{LiveError, LiveResultExt}; /// Service for managing subscriptions /// @@ -70,7 +76,6 @@ impl SubscriptionService { })?; // Prevent DoS via excessive subscriptions per connection - const MAX_SUBSCRIPTIONS_PER_CONNECTION: usize = 100; if connection_state.subscription_count() >= MAX_SUBSCRIPTIONS_PER_CONNECTION { return Err(LiveError::InvalidOperation(format!( "Maximum subscriptions ({}) per connection exceeded", @@ -119,6 +124,7 @@ impl SubscriptionService { initial_load: flow_control.as_ref().map(|flow_control| InitialLoadState { batch_size, snapshot_end_seq: None, + snapshot_end_commit_seq: None, current_batch_num: 0, flow_control: Arc::clone(flow_control), }), @@ -179,6 +185,21 @@ impl SubscriptionService { } /// Unregister a single live query subscription + + /// Update sequence and commit snapshot boundaries after initial data planning. + pub fn update_snapshot_boundaries( + &self, + connection_state: &SharedConnectionState, + subscription_id: &str, + snapshot_end_seq: Option, + snapshot_end_commit_seq: Option, + ) { + connection_state.update_snapshot_boundaries( + subscription_id, + snapshot_end_seq, + snapshot_end_commit_seq, + ); + } pub async fn unregister_subscription( &self, connection_state: &SharedConnectionState, diff --git a/backend/crates/kalamdb-live/src/traits.rs b/backend/crates/kalamdb-live/src/traits.rs index b1c0ec98a..5d134385f 100644 --- a/backend/crates/kalamdb-live/src/traits.rs +++ b/backend/crates/kalamdb-live/src/traits.rs @@ -4,14 +4,34 @@ //! system (schema registry, SQL executor). Implementations are provided by //! kalamdb-core during wiring. -use crate::error::LiveError; +use std::sync::Arc; + use arrow::datatypes::Schema as ArrowSchema; use async_trait::async_trait; use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::models::{ReadContext, TableId, UserId}; -use kalamdb_commons::schemas::TableDefinition; -use kalamdb_commons::Role; -use std::sync::Arc; +use kalamdb_commons::{ + models::{ReadContext, TableId, UserId}, + schemas::TableDefinition, + Role, TableType, +}; + +use crate::error::LiveError; + +/// Optional cluster apply barrier used before live snapshots on follower nodes. +/// +/// Implementations should wait until the local replica has applied every log +/// entry it already knows for the target table's Raft group. This keeps the +/// initial snapshot boundary aligned with local storage before notifications are +/// buffered and replayed. +#[async_trait] +pub trait LiveApplyBarrier: Send + Sync { + async fn wait_for_table_apply_barrier( + &self, + table_id: &TableId, + table_type: TableType, + user_id: &UserId, + ) -> Result<(), LiveError>; +} /// Schema operations needed by the live subsystem. /// diff --git a/backend/crates/kalamdb-macros/src/lib.rs b/backend/crates/kalamdb-macros/src/lib.rs index a9b6e89cf..8e83b5644 100644 --- a/backend/crates/kalamdb-macros/src/lib.rs +++ b/backend/crates/kalamdb-macros/src/lib.rs @@ -425,7 +425,8 @@ impl ColumnArgs { other => syn::Error::new( proc_macro2::Span::call_site(), format!( - "unsupported default '{}'; expected None, Literal(true|false), or Function(NAME)", + "unsupported default '{}'; expected None, Literal(true|false), or \ + Function(NAME)", other ), ) diff --git a/backend/crates/kalamdb-observability/src/activity.rs b/backend/crates/kalamdb-observability/src/activity.rs index f5bb8626e..3f4b49abc 100644 --- a/backend/crates/kalamdb-observability/src/activity.rs +++ b/backend/crates/kalamdb-observability/src/activity.rs @@ -1,5 +1,7 @@ -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Duration; +use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::Duration, +}; fn epoch_millis() -> u64 { std::time::SystemTime::now() diff --git a/backend/crates/kalamdb-observability/src/health_monitor.rs b/backend/crates/kalamdb-observability/src/health_monitor.rs index 6a53e6a84..b8e237a77 100644 --- a/backend/crates/kalamdb-observability/src/health_monitor.rs +++ b/backend/crates/kalamdb-observability/src/health_monitor.rs @@ -173,7 +173,9 @@ impl HealthMonitor { if let (Some(memory_mb), Some(cpu_usage)) = (metrics.memory_mb, metrics.cpu_usage) { log::debug!( - "Health metrics: Memory: {} MB | CPU: {:.2}% | Open Files: {} | Storage Partitions: {} | Namespaces: {} | Tables: {} | Subscriptions: {} ({} connections, {} ws sessions) | Jobs: {} running, {} queued, {} failed (total: {})", + "Health metrics: Memory: {} MB | CPU: {:.2}% | Open Files: {} | Storage \ + Partitions: {} | Namespaces: {} | Tables: {} | Subscriptions: {} ({} \ + connections, {} ws sessions) | Jobs: {} running, {} queued, {} failed (total: {})", memory_mb, cpu_usage, open_files_text, @@ -190,7 +192,9 @@ impl HealthMonitor { ); } else { log::debug!( - "Health metrics: Open Files: {} | Storage Partitions: {} | Namespaces: {} | Tables: {} | Subscriptions: {} ({} connections, {} ws sessions) | Jobs: {} running, {} queued, {} failed (total: {})", + "Health metrics: Open Files: {} | Storage Partitions: {} | Namespaces: {} | \ + Tables: {} | Subscriptions: {} ({} connections, {} ws sessions) | Jobs: {} \ + running, {} queued, {} failed (total: {})", open_files_text, storage_partitions, metrics.namespace_count, diff --git a/backend/crates/kalamdb-observability/src/runtime_metrics.rs b/backend/crates/kalamdb-observability/src/runtime_metrics.rs index 4da9b1d8d..9b5369928 100644 --- a/backend/crates/kalamdb-observability/src/runtime_metrics.rs +++ b/backend/crates/kalamdb-observability/src/runtime_metrics.rs @@ -1,8 +1,9 @@ -use crate::allocator_metrics::{collect_allocator_metrics, AllocatorMetrics}; -use std::sync::Mutex; -use std::time::Instant; +use std::{sync::Mutex, time::Instant}; + use sysinfo::{MemoryRefreshKind, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System}; +use crate::allocator_metrics::{collect_allocator_metrics, AllocatorMetrics}; + /// Reusable System instance to avoid repeated allocation/deallocation. /// sysinfo docs explicitly recommend reusing the same System instance. /// Creating System::new_all() every 30s causes severe heap fragmentation @@ -97,7 +98,8 @@ impl RuntimeMetrics { /// Render a concise log line for the console. pub fn to_log_string(&self) -> String { format!( - "uptime={} mem={}MB source={} rss={}MB gap={}MB used={}MB cpu={} pid={} threads={} sys_mem={}MB/{}MB", + "uptime={} mem={}MB source={} rss={}MB gap={}MB used={}MB cpu={} pid={} threads={} \ + sys_mem={}MB/{}MB", self.uptime_human, self.memory_mb.unwrap_or(0), self.memory_usage_source, diff --git a/backend/crates/kalamdb-pg/src/operation_executor.rs b/backend/crates/kalamdb-pg/src/operation_executor.rs index e8a58c410..ba29dad91 100644 --- a/backend/crates/kalamdb-pg/src/operation_executor.rs +++ b/backend/crates/kalamdb-pg/src/operation_executor.rs @@ -3,20 +3,25 @@ use std::str::FromStr; use arrow::record_batch::RecordBatch; use arrow_ipc::writer::StreamWriter; use async_trait::async_trait; -use tonic::Status; - -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{TransactionId, UserId}; -use kalamdb_commons::{TableId, TableType}; - -use crate::session_registry::LivePgTransaction; -use crate::service::{ScanRpcRequest, ScanRpcResponse}; -use crate::{DeleteRpcRequest, InsertRpcRequest, UpdateRpcRequest}; - // Re-export domain types from kalamdb-commons (canonical location). pub use kalamdb_commons::models::pg_operations::{ DeleteRequest, InsertRequest, MutationResult, ScanRequest, ScanResult, UpdateRequest, }; +use kalamdb_commons::{ + models::{ + rows::{Row, StoredScalarValue}, + TransactionId, UserId, + }, + TableId, TableType, +}; +use serde_json::Value; +use tonic::Status; + +use crate::{ + service::{ScanRpcRequest, ScanRpcResponse}, + session_registry::LivePgTransaction, + DeleteRpcRequest, InsertRpcRequest, UpdateRpcRequest, +}; /// Domain-typed query executor. /// @@ -28,7 +33,10 @@ pub trait OperationExecutor: Send + Sync + 'static { async fn execute_insert(&self, request: InsertRequest) -> Result; async fn execute_update(&self, request: UpdateRequest) -> Result; async fn execute_delete(&self, request: DeleteRequest) -> Result; - async fn active_transaction(&self, _session_id: &str) -> Result, Status> { + async fn active_transaction( + &self, + _session_id: &str, + ) -> Result, Status> { Ok(None) } async fn begin_transaction(&self, _session_id: &str) -> Result, Status> { @@ -81,9 +89,68 @@ pub fn parse_user_id(raw: Option<&str>) -> Option { raw.map(str::trim).filter(|s| !s.is_empty()).map(UserId::new) } +fn stored_scalar_from_json_value(value: &Value) -> StoredScalarValue { + match value { + Value::Null => StoredScalarValue::Null, + Value::Bool(value) => StoredScalarValue::Boolean(Some(*value)), + Value::Number(value) => { + if let Some(value) = value.as_i64() { + StoredScalarValue::Int64(Some(value.to_string())) + } else if let Some(value) = value.as_u64() { + StoredScalarValue::UInt64(Some(value.to_string())) + } else if let Some(value) = value.as_f64() { + StoredScalarValue::Float64(Some(value)) + } else { + StoredScalarValue::Fallback(value.to_string()) + } + }, + Value::String(value) => StoredScalarValue::Utf8(Some(value.clone())), + Value::Array(_) | Value::Object(_) => StoredScalarValue::Fallback(value.to_string()), + } +} + +fn parse_row_value(value: &Value) -> Result { + match value { + Value::Object(values) => { + let row = values + .iter() + .map(|(column_name, value)| { + Ok((column_name.clone(), stored_scalar_from_json_value(value).into())) + }) + .collect::, Status>>()?; + Ok(Row::from_vec(row)) + }, + _ => Err(Status::invalid_argument("invalid row JSON: expected object payload")), + } +} + pub fn parse_row(json: &str) -> Result { - serde_json::from_str::(json) - .map_err(|e| Status::invalid_argument(format!("invalid row JSON: {}", e))) + if let Ok(row) = serde_json::from_str::(json) { + return Ok(row); + } + + let value = serde_json::from_str::(json) + .map_err(|e| Status::invalid_argument(format!("invalid row JSON: {}", e)))?; + + parse_row_value(&value) +} + +fn parse_rows_json(json: &str) -> Result, Status> { + if let Ok(rows) = serde_json::from_str::>(json) { + return Ok(rows); + } + + if let Ok(row) = serde_json::from_str::(json) { + return Ok(vec![row]); + } + + let value = serde_json::from_str::(json) + .map_err(|e| Status::invalid_argument(format!("invalid row JSON: {}", e)))?; + + match value { + Value::Array(rows) => rows.iter().map(parse_row_value).collect(), + other => Ok(vec![parse_row_value(&other)?]), + } } /// Encode Arrow RecordBatches into IPC bytes for gRPC transport. @@ -166,7 +233,7 @@ pub fn insert_request_from_rpc(rpc: &InsertRpcRequest) -> Result Result { let table_id = parse_table_id(&rpc.namespace, &rpc.table_name)?; let table_type = parse_table_type(&rpc.table_type)?; - let updates = vec![parse_row(&rpc.updates_json)?]; + let updates = parse_rows_json(&rpc.updates_json)?; Ok(UpdateRequest { table_id, table_type, @@ -177,6 +244,80 @@ pub fn update_request_from_rpc(rpc: &UpdateRpcRequest) -> Result Result { let table_id = parse_table_id(&rpc.namespace, &rpc.table_name)?; diff --git a/backend/crates/kalamdb-pg/src/service.rs b/backend/crates/kalamdb-pg/src/service.rs index 2c6ed29b0..6f5b006a3 100644 --- a/backend/crates/kalamdb-pg/src/service.rs +++ b/backend/crates/kalamdb-pg/src/service.rs @@ -4,13 +4,14 @@ use std::sync::Arc; #[cfg(feature = "server")] use async_trait::async_trait; #[cfg(feature = "server")] -use kalamdb_auth::{authenticate, helpers::basic_auth, AuthRequest, AuthenticationResult, UserRepository}; +use kalamdb_auth::{ + authenticate, helpers::basic_auth, AuthRequest, AuthenticationResult, UserRepository, +}; #[cfg(feature = "server")] use kalamdb_commons::models::{ConnectionInfo, TransactionId}; -use tonic::codegen::*; #[cfg(feature = "server")] use tonic::Request; -use tonic::{Response, Status}; +use tonic::{codegen::*, Response, Status}; use tonic_prost::ProstCodec; #[cfg(feature = "server")] @@ -925,7 +926,8 @@ impl KalamPgService { && self.bearer_user_repo.is_none() { log::warn!( - "PG RPC service is running without mTLS, pg_auth_token, or bearer auth; requests are unauthenticated" + "PG RPC service is running without mTLS, pg_auth_token, or bearer auth; requests \ + are unauthenticated" ); } } @@ -967,14 +969,15 @@ impl KalamPgService { return Ok(()); } - self.session_registry - .validate_session(session_id) - .map(|_| ()) - .map_err(|reason| match reason { - "session lease expired" => Status::unauthenticated("session lease expired – re-authenticate via open_session"), + self.session_registry.validate_session(session_id).map(|_| ()).map_err( + |reason| match reason { + "session lease expired" => Status::unauthenticated( + "session lease expired – re-authenticate via open_session", + ), "session not authenticated" => Status::unauthenticated("session not authenticated"), _ => Status::unauthenticated("invalid or expired session"), - }) + }, + ) } async fn authenticate_admin_metadata( @@ -991,27 +994,29 @@ impl KalamPgService { return Err(Status::unauthenticated(missing_header_message)); } - let connection_info = ConnectionInfo::new(request.remote_addr().map(|addr| addr.to_string())); + let connection_info = + ConnectionInfo::new(request.remote_addr().map(|addr| addr.to_string())); let auth_request = if provided .get(..6) .map(|prefix| prefix.eq_ignore_ascii_case("Basic ")) .unwrap_or(false) { - let (user, password) = basic_auth::parse_basic_auth_header(provided) - .map_err(|error| Status::unauthenticated(format!("authentication failed: {}", error)))?; + let (user, password) = + basic_auth::parse_basic_auth_header(provided).map_err(|error| { + Status::unauthenticated(format!("authentication failed: {}", error)) + })?; AuthRequest::Credentials { user, password } } else { AuthRequest::Header(provided.to_string()) }; - let auth_result = authenticate(auth_request, &connection_info, repo) - .await - .map_err(|error| Status::unauthenticated(format!("authentication failed: {}", error)))?; + let auth_result = + authenticate(auth_request, &connection_info, repo).await.map_err(|error| { + Status::unauthenticated(format!("authentication failed: {}", error)) + })?; if !auth_result.user.is_admin() { - return Err(Status::permission_denied( - "pg rpc requires a DBA or system account", - )); + return Err(Status::permission_denied("pg rpc requires a DBA or system account")); } Ok(Some(auth_result)) @@ -1143,7 +1148,8 @@ impl KalamPgService { Ok(None) => {}, Err(status) => { log::debug!( - "PG tracked_transaction_id: authoritative lookup failed for session '{}' with {}: {}; falling back to pinned session state", + "PG tracked_transaction_id: authoritative lookup failed for session '{}' \ + with {}: {}; falling back to pinned session state", session_id, status.code(), status.message() @@ -1167,7 +1173,8 @@ impl KalamPgService { self.session_registry .clear_transaction_state_if_matches(session_id, Some(transaction_id)); log::debug!( - "PG {}: cleared local transaction bookkeeping for session '{}' tx '{}' after executor returned {}: {}", + "PG {}: cleared local transaction bookkeeping for session '{}' tx '{}' after executor \ + returned {}: {}", rpc_name, session_id, transaction_id, @@ -1187,7 +1194,8 @@ impl KalamPgService { match executor.rollback_transaction(session_id, transaction_id).await { Ok(_) => { log::debug!( - "PG {}: finalized terminal transaction cleanup for session '{}' tx '{}' via rollback", + "PG {}: finalized terminal transaction cleanup for session '{}' tx '{}' \ + via rollback", rpc_name, session_id, transaction_id @@ -1197,7 +1205,8 @@ impl KalamPgService { if Self::should_reconcile_local_transaction_state(&rollback_status) => { log::debug!( - "PG {}: rollback cleanup for session '{}' tx '{}' also returned terminal state {}: {}", + "PG {}: rollback cleanup for session '{}' tx '{}' also returned terminal \ + state {}: {}", rpc_name, session_id, transaction_id, @@ -1207,7 +1216,8 @@ impl KalamPgService { }, Err(rollback_status) => { log::warn!( - "PG {}: rollback cleanup for session '{}' tx '{}' failed after terminal error {}: {}", + "PG {}: rollback cleanup for session '{}' tx '{}' failed after terminal \ + error {}: {}", rpc_name, session_id, transaction_id, @@ -1343,7 +1353,11 @@ impl PgService for KalamPgService { .to_string(); if provided != expected.as_str() { if self - .authenticate_admin_metadata(&request, &provided, "missing authorization header") + .authenticate_admin_metadata( + &request, + &provided, + "missing authorization header", + ) .await? .is_none() { @@ -1421,7 +1435,8 @@ impl PgService for KalamPgService { }, Err(status) => { log::warn!( - "PG close_session: proceeding after remote rollback error for session '{}' tx '{}': {}", + "PG close_session: proceeding after remote rollback error for session \ + '{}' tx '{}': {}", session_id, transaction_id, status diff --git a/backend/crates/kalamdb-pg/src/session_registry.rs b/backend/crates/kalamdb-pg/src/session_registry.rs index 3a72c38b4..a6a07002b 100644 --- a/backend/crates/kalamdb-pg/src/session_registry.rs +++ b/backend/crates/kalamdb-pg/src/session_registry.rs @@ -1,9 +1,14 @@ +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicI64, Ordering}, + Arc, + }, + time::{SystemTime, UNIX_EPOCH}, +}; + use dashmap::DashMap; use kalamdb_commons::models::{TransactionId, TransactionState}; -use std::collections::HashMap; -use std::sync::atomic::{AtomicI64, Ordering}; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; use uuid::Uuid; const STALE_IDLE_SESSION_TTL_MS: i64 = 5_000; @@ -185,7 +190,8 @@ impl RemotePgSession { } fn with_live_transaction(mut self, live_transaction: Option<&LivePgTransaction>) -> Self { - self.transaction_id = live_transaction.map(|transaction| transaction.transaction_id().clone()); + self.transaction_id = + live_transaction.map(|transaction| transaction.transaction_id().clone()); self.transaction_state = live_transaction.map(LivePgTransaction::transaction_state); self.transaction_has_writes = live_transaction.map(LivePgTransaction::transaction_has_writes).unwrap_or(false); @@ -332,7 +338,8 @@ impl SessionRegistry { client_addr: Option<&str>, bridge_auth: BridgeAuth, ) -> RemotePgSession { - let session_id = normalize_optional(session_id).unwrap_or_else(|| Uuid::now_v7().to_string()); + let session_id = + normalize_optional(session_id).unwrap_or_else(|| Uuid::now_v7().to_string()); let now_ms = current_timestamp_ms(); self.maybe_prune_stale_local_idle_sessions(now_ms); @@ -443,11 +450,7 @@ impl SessionRegistry { log::warn!( "PG session '{}': auto-rolling back stale transaction '{}' before starting new one", session_id, - session - .transaction_id - .as_ref() - .map(TransactionId::as_str) - .unwrap_or("?") + session.transaction_id.as_ref().map(TransactionId::as_str).unwrap_or("?") ); session.transaction_id = None; session.transaction_state = None; @@ -500,11 +503,8 @@ impl SessionRegistry { } if session.transaction_id.as_ref() != Some(transaction_id) { - let current_tx = session - .transaction_id - .as_ref() - .map(TransactionId::as_str) - .unwrap_or(""); + let current_tx = + session.transaction_id.as_ref().map(TransactionId::as_str).unwrap_or(""); return Err(format!( "transaction ID mismatch: expected '{}', got '{}'", current_tx, transaction_id @@ -557,11 +557,8 @@ impl SessionRegistry { } if session.transaction_id.as_ref() != Some(transaction_id) { - let current_tx = session - .transaction_id - .as_ref() - .map(TransactionId::as_str) - .unwrap_or(""); + let current_tx = + session.transaction_id.as_ref().map(TransactionId::as_str).unwrap_or(""); return Err(format!( "transaction ID mismatch: expected '{}', got '{}'", current_tx, transaction_id @@ -668,9 +665,10 @@ impl SessionRegistry { #[cfg(test)] mod tests { - use super::*; use uuid::Uuid; + use super::*; + #[test] fn open_or_get_creates_session() { let registry = SessionRegistry::default(); diff --git a/backend/crates/kalamdb-pg/tests/service_auth_tx.rs b/backend/crates/kalamdb-pg/tests/service_auth_tx.rs index 10e6c71e4..54bf91a47 100644 --- a/backend/crates/kalamdb-pg/tests/service_auth_tx.rs +++ b/backend/crates/kalamdb-pg/tests/service_auth_tx.rs @@ -1,18 +1,23 @@ +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, Mutex, +}; + use async_trait::async_trait; use bytes::Bytes; -use kalamdb_auth::{create_and_sign_token, services::unified::init_auth_config, AuthError, AuthResult, UserRepository}; +use kalamdb_auth::{ + create_and_sign_token, services::unified::init_auth_config, AuthError, AuthResult, + UserRepository, +}; use kalamdb_commons::models::TransactionId; use kalamdb_configs::{AuthSettings, OAuthSettings}; use kalamdb_pg::{ BeginTransactionRequest, CloseSessionRequest, CommitTransactionRequest, DeleteRequest, - ExecuteQueryRpcRequest, ExecuteSqlRpcRequest, InsertRequest, KalamPgService, MutationResult, - LivePgTransaction, OpenSessionRequest, OperationExecutor, PgService, PgServiceServer, PingRequest, + ExecuteQueryRpcRequest, ExecuteSqlRpcRequest, InsertRequest, KalamPgService, LivePgTransaction, + MutationResult, OpenSessionRequest, OperationExecutor, PgService, PgServiceServer, PingRequest, RollbackTransactionRequest, ScanRequest, ScanResult, UpdateRequest, }; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::{AuthType, Role, User}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::{Arc, Mutex}; +use kalamdb_system::{providers::storages::models::StorageMode, AuthType, Role, User}; use tonic::Request; const VALID_DBA_BASIC_AUTH: &str = "Basic cGdfYnJpZGdlX3VzZXI6c2VjcmV0LXBhc3M="; @@ -55,7 +60,8 @@ impl StaticUserRepo { Self { user: User { user_id: kalamdb_commons::UserId::new("pg_bridge_user"), - password_hash: "$2b$12$unusedhashunusedhashunusedhashunusedhashunusedhashu".to_string(), + password_hash: "$2b$12$unusedhashunusedhashunusedhashunusedhashunusedhashu" + .to_string(), role, email: Some("pg-bridge@example.com".to_string()), auth_type: AuthType::Password, @@ -169,14 +175,16 @@ impl OperationExecutor for RecordingExecutor { &self, session_id: &str, ) -> Result, tonic::Status> { - Ok(self - .active_tx - .lock() - .expect("recording executor active tx") - .clone() - .map(|transaction_id| { - LivePgTransaction::new(session_id.to_string(), transaction_id, kalamdb_pg::TransactionState::OpenRead, false) - })) + Ok(self.active_tx.lock().expect("recording executor active tx").clone().map( + |transaction_id| { + LivePgTransaction::new( + session_id.to_string(), + transaction_id, + kalamdb_pg::TransactionState::OpenRead, + false, + ) + }, + )) } async fn begin_transaction( @@ -198,10 +206,7 @@ impl OperationExecutor for RecordingExecutor { transaction_id: &TransactionId, ) -> Result, tonic::Status> { self.commit_calls.fetch_add(1, Ordering::Relaxed); - self.active_tx - .lock() - .expect("recording executor commit active tx") - .take(); + self.active_tx.lock().expect("recording executor commit active tx").take(); Ok(Some(transaction_id.clone())) } @@ -211,10 +216,7 @@ impl OperationExecutor for RecordingExecutor { transaction_id: &TransactionId, ) -> Result, tonic::Status> { self.rollback_calls.fetch_add(1, Ordering::Relaxed); - self.active_tx - .lock() - .expect("recording executor rollback active tx") - .take(); + self.active_tx.lock().expect("recording executor rollback active tx").take(); Ok(Some(transaction_id.clone())) } @@ -267,7 +269,12 @@ impl OperationExecutor for BeginRollbackNotFoundExecutor { .expect("stale executor active tx") .clone() .map(|transaction_id| { - LivePgTransaction::new(session_id.to_string(), transaction_id, kalamdb_pg::TransactionState::OpenRead, false) + LivePgTransaction::new( + session_id.to_string(), + transaction_id, + kalamdb_pg::TransactionState::OpenRead, + false, + ) })) } @@ -301,10 +308,7 @@ impl OperationExecutor for BeginRollbackNotFoundExecutor { _session_id: &str, transaction_id: &TransactionId, ) -> Result, tonic::Status> { - self.active_tx - .lock() - .expect("stale executor rollback active tx") - .take(); + self.active_tx.lock().expect("stale executor rollback active tx").take(); Err(tonic::Status::failed_precondition(format!( "transaction '{}' not found", transaction_id @@ -354,14 +358,16 @@ impl OperationExecutor for CommitNotFoundExecutor { &self, session_id: &str, ) -> Result, tonic::Status> { - Ok(self - .active_tx - .lock() - .expect("commit-missing executor active tx") - .clone() - .map(|transaction_id| { - LivePgTransaction::new(session_id.to_string(), transaction_id, kalamdb_pg::TransactionState::OpenRead, false) - })) + Ok(self.active_tx.lock().expect("commit-missing executor active tx").clone().map( + |transaction_id| { + LivePgTransaction::new( + session_id.to_string(), + transaction_id, + kalamdb_pg::TransactionState::OpenRead, + false, + ) + }, + )) } async fn begin_transaction( @@ -381,10 +387,7 @@ impl OperationExecutor for CommitNotFoundExecutor { _session_id: &str, transaction_id: &TransactionId, ) -> Result, tonic::Status> { - self.active_tx - .lock() - .expect("commit-missing executor commit active tx") - .take(); + self.active_tx.lock().expect("commit-missing executor commit active tx").take(); Err(tonic::Status::failed_precondition(format!( "transaction '{}' not found during commit", transaction_id @@ -448,7 +451,12 @@ impl OperationExecutor for RollbackCommittedExecutor { .expect("rollback-committed executor active tx") .clone() .map(|transaction_id| { - LivePgTransaction::new(session_id.to_string(), transaction_id, kalamdb_pg::TransactionState::OpenRead, false) + LivePgTransaction::new( + session_id.to_string(), + transaction_id, + kalamdb_pg::TransactionState::OpenRead, + false, + ) })) } @@ -613,10 +621,7 @@ impl OperationExecutor for CapturingSqlExecutor { } async fn execute_query(&self, sql: &str) -> Result<(String, Vec), tonic::Status> { - self.last_query_sql - .lock() - .expect("capture query sql") - .replace(sql.to_string()); + self.last_query_sql.lock().expect("capture query sql").replace(sql.to_string()); Ok(("ok".to_string(), Vec::new())) } } @@ -995,10 +1000,7 @@ async fn execute_query_passes_json_operator_sql_through_without_rewrite() { .expect("query sql should be captured"); // SQL is now forwarded as-is; DataFusion handles the operator natively. - assert_eq!( - captured, - "SELECT doc->>'name' AS name FROM docs" - ); + assert_eq!(captured, "SELECT doc->>'name' AS name FROM docs"); } #[tokio::test] @@ -1081,10 +1083,9 @@ async fn close_session_rolls_back_via_configured_operation_executor() { .transaction_id; assert!(!tx_id.is_empty()); - service.session_registry().clear_transaction_state_if_matches( - &session_id, - Some(&TransactionId::new(tx_id.clone())), - ); + service + .session_registry() + .clear_transaction_state_if_matches(&session_id, Some(&TransactionId::new(tx_id.clone()))); service .close_session(plain_request(CloseSessionRequest { @@ -1120,10 +1121,9 @@ async fn begin_transaction_reclaims_stale_remote_transaction_via_executor() { .into_inner() .transaction_id; - service.session_registry().clear_transaction_state_if_matches( - &session_id, - Some(&TransactionId::new(tx_id)), - ); + service + .session_registry() + .clear_transaction_state_if_matches(&session_id, Some(&TransactionId::new(tx_id))); service .begin_transaction(plain_request(BeginTransactionRequest { @@ -1172,10 +1172,7 @@ async fn begin_transaction_reconciles_local_state_when_stale_remote_tx_is_missin assert_eq!(first_tx, TX_ID_STALE); assert_eq!(replacement_tx, TX_ID_REPLACEMENT); - let session = service - .session_registry() - .get(&session_id) - .expect("session remains open"); + let session = service.session_registry().get(&session_id).expect("session remains open"); assert_eq!(session.transaction_id(), Some(TX_ID_REPLACEMENT)); } @@ -1212,10 +1209,7 @@ async fn commit_transaction_clears_local_state_when_remote_tx_is_already_gone() .expect_err("remote missing transaction should still report an error"); assert_eq!(err.code(), tonic::Code::FailedPrecondition); - let session = service - .session_registry() - .get(&session_id) - .expect("session remains open"); + let session = service.session_registry().get(&session_id).expect("session remains open"); assert_eq!(session.transaction_id(), None); assert_eq!(session.transaction_state(), None); } diff --git a/backend/crates/kalamdb-pg/tests/session_registry.rs b/backend/crates/kalamdb-pg/tests/session_registry.rs index 5d629ae94..fb908d877 100644 --- a/backend/crates/kalamdb-pg/tests/session_registry.rs +++ b/backend/crates/kalamdb-pg/tests/session_registry.rs @@ -82,9 +82,7 @@ fn session_registry_commit_wrong_tx_id_fails() { let _tx_id = registry.begin_transaction("s1").expect("begin"); let wrong_tx_id = TransactionId::new("01960f7b-3d16-7d6d-b26c-7e4db6f25f8d"); - let err = registry - .commit_transaction("s1", &wrong_tx_id) - .expect_err("wrong tx id"); + let err = registry.commit_transaction("s1", &wrong_tx_id).expect_err("wrong tx id"); assert!(err.contains("mismatch")); } diff --git a/backend/crates/kalamdb-pg/tests/support/mod.rs b/backend/crates/kalamdb-pg/tests/support/mod.rs index aa062ea10..812ba3de3 100644 --- a/backend/crates/kalamdb-pg/tests/support/mod.rs +++ b/backend/crates/kalamdb-pg/tests/support/mod.rs @@ -1,23 +1,26 @@ #![allow(dead_code)] -use std::collections::HashMap; -use std::io::Cursor; -use std::sync::Arc; +use std::{collections::HashMap, io::Cursor, sync::Arc}; use arrow::record_batch::RecordBatch; use arrow_ipc::reader::StreamReader; use datafusion_common::ScalarValue; -use kalamdb_commons::conversions::arrow_json_conversion::record_batch_to_json_rows; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; -use kalamdb_commons::models::KalamCellValue; -use kalamdb_commons::models::{NamespaceId, TableId, TableName, TransactionId}; -use kalamdb_commons::schemas::ColumnDefault; -use kalamdb_commons::{TableAccess, TableType}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::operations::service::OperationService; -use kalamdb_core::test_helpers::{test_app_context, test_app_context_simple}; +use kalamdb_commons::{ + conversions::arrow_json_conversion::record_batch_to_json_rows, + models::{ + datatypes::KalamDataType, + rows::Row, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + KalamCellValue, NamespaceId, TableId, TableName, TransactionId, + }, + schemas::ColumnDefault, + TableAccess, TableType, +}; +use kalamdb_core::{ + app_context::AppContext, + operations::service::OperationService, + test_helpers::{test_app_context, test_app_context_simple}, +}; use kalamdb_pg::{ BeginTransactionRequest, CloseSessionRequest, CommitTransactionRequest, InsertRpcRequest, KalamPgService, OpenSessionRequest, PgService, RollbackTransactionRequest, ScanRpcRequest, diff --git a/backend/crates/kalamdb-pg/tests/transaction_canonical_id.rs b/backend/crates/kalamdb-pg/tests/transaction_canonical_id.rs index 427c1735c..1470614b1 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_canonical_id.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_canonical_id.rs @@ -1,7 +1,6 @@ mod support; use ntest::timeout; - use support::{ await_user_leader, begin_transaction, commit_transaction, insert_user_row, new_cluster_user_service_with_tables, open_session, parse_transaction_id, diff --git a/backend/crates/kalamdb-pg/tests/transaction_cluster_failover.rs b/backend/crates/kalamdb-pg/tests/transaction_cluster_failover.rs index 5b767e940..2f6e36059 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_cluster_failover.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_cluster_failover.rs @@ -3,12 +3,10 @@ mod support; use std::collections::BTreeMap; use datafusion_common::ScalarValue; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::NodeId; +use kalamdb_commons::models::{rows::Row, NodeId}; use kalamdb_core::transactions::TransactionRaftBinding; use kalamdb_pg::{InsertRpcRequest, PgService, RollbackTransactionRequest}; use ntest::timeout; - use support::{ await_shared_leader, begin_transaction, new_cluster_service_with_tables, open_session, parse_transaction_id, request, scan_shared_rows, diff --git a/backend/crates/kalamdb-pg/tests/transaction_cluster_group_rejection.rs b/backend/crates/kalamdb-pg/tests/transaction_cluster_group_rejection.rs index 0ff5080ee..6cd176c02 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_cluster_group_rejection.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_cluster_group_rejection.rs @@ -4,11 +4,9 @@ use std::collections::BTreeMap; use datafusion_common::ScalarValue; use kalamdb_commons::models::rows::Row; -use kalamdb_core::test_helpers::test_app_context; -use kalamdb_core::transactions::TransactionRaftBinding; +use kalamdb_core::{test_helpers::test_app_context, transactions::TransactionRaftBinding}; use kalamdb_pg::{InsertRpcRequest, PgService}; use ntest::timeout; - use support::{ await_user_leader, begin_transaction, build_service, create_shared_table, create_user_table, insert_user_row, open_session, parse_transaction_id, request, rollback_transaction, diff --git a/backend/crates/kalamdb-pg/tests/transaction_commit.rs b/backend/crates/kalamdb-pg/tests/transaction_commit.rs index 9745ebc95..a7d75e5ab 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_commit.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_commit.rs @@ -3,7 +3,6 @@ mod support; use kalamdb_commons::models::UserId; use kalamdb_sharding::ShardRouter; use ntest::timeout; - use support::{ await_user_leader, begin_transaction, commit_transaction, insert_user_row, new_cluster_user_service_with_tables, open_session, parse_transaction_id, scan_user_rows, diff --git a/backend/crates/kalamdb-pg/tests/transaction_disconnect.rs b/backend/crates/kalamdb-pg/tests/transaction_disconnect.rs index 384966a61..45a04f2d4 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_disconnect.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_disconnect.rs @@ -1,7 +1,6 @@ mod support; use ntest::timeout; - use support::{ begin_transaction, insert_shared_row, new_service_with_tables, open_session, rollback_transaction, scan_shared_rows, diff --git a/backend/crates/kalamdb-pg/tests/transaction_isolation.rs b/backend/crates/kalamdb-pg/tests/transaction_isolation.rs index 1bcd3cd99..9e71a1c24 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_isolation.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_isolation.rs @@ -1,7 +1,6 @@ mod support; use ntest::timeout; - use support::{ await_shared_leader, begin_transaction, commit_transaction, insert_shared_row, new_cluster_service_with_tables, open_session, scan_shared_rows, diff --git a/backend/crates/kalamdb-pg/tests/transaction_races.rs b/backend/crates/kalamdb-pg/tests/transaction_races.rs index 2d6509365..c8b5381de 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_races.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_races.rs @@ -2,12 +2,11 @@ mod support; use std::sync::Arc; -use ntest::timeout; - use kalamdb_core::transactions::ExecutionOwnerKey; use kalamdb_pg::{ CloseSessionRequest, CommitTransactionRequest, PgService, RollbackTransactionRequest, }; +use ntest::timeout; use support::{ begin_transaction, insert_shared_row, new_service_with_tables, open_session, parse_transaction_id, request, scan_shared_rows, @@ -74,7 +73,8 @@ async fn pg_commit_vs_rollback_repeats_without_leaking_state() { let rollback_ok = rollback_result.is_ok(); assert_ne!( commit_ok, rollback_ok, - "exactly one PG terminal RPC should succeed: commit={commit_result:?} rollback={rollback_result:?}" + "exactly one PG terminal RPC should succeed: commit={commit_result:?} \ + rollback={rollback_result:?}" ); if commit_ok { committed_names.push(row_name); diff --git a/backend/crates/kalamdb-pg/tests/transaction_read_your_writes.rs b/backend/crates/kalamdb-pg/tests/transaction_read_your_writes.rs index d22637b42..15508b157 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_read_your_writes.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_read_your_writes.rs @@ -1,7 +1,6 @@ mod support; use ntest::timeout; - use support::{ begin_transaction, insert_shared_row, new_service_with_tables, open_session, rollback_transaction, scan_shared_rows, diff --git a/backend/crates/kalamdb-pg/tests/transaction_rollback.rs b/backend/crates/kalamdb-pg/tests/transaction_rollback.rs index 0dab34539..b908a04ec 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_rollback.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_rollback.rs @@ -1,7 +1,6 @@ mod support; use ntest::timeout; - use support::{ begin_transaction, insert_shared_row, new_service_with_tables, open_session, rollback_transaction, scan_shared_rows, diff --git a/backend/crates/kalamdb-pg/tests/transaction_session_close.rs b/backend/crates/kalamdb-pg/tests/transaction_session_close.rs index cc8c788be..95e1317bb 100644 --- a/backend/crates/kalamdb-pg/tests/transaction_session_close.rs +++ b/backend/crates/kalamdb-pg/tests/transaction_session_close.rs @@ -1,7 +1,6 @@ mod support; use ntest::timeout; - use support::{ begin_transaction, close_session, insert_shared_row, new_service_with_tables, open_session, scan_shared_rows, diff --git a/backend/crates/kalamdb-plan-cache/src/lib.rs b/backend/crates/kalamdb-plan-cache/src/lib.rs index ab42f4090..816c8a5bd 100644 --- a/backend/crates/kalamdb-plan-cache/src/lib.rs +++ b/backend/crates/kalamdb-plan-cache/src/lib.rs @@ -1,10 +1,8 @@ -use datafusion::logical_expr::LogicalPlan; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::{NamespaceId, Role, TableId}; +use std::{sync::Arc, time::Duration}; + +use datafusion::{logical_expr::LogicalPlan, scalar::ScalarValue}; +use kalamdb_commons::{schemas::TableType, NamespaceId, Role, TableId}; use moka::sync::Cache; -use std::sync::Arc; -use std::time::Duration; const DEFAULT_PLAN_MAX_ENTRIES: u64 = 1000; const DEFAULT_IDLE_TTL_SECS: u64 = 900; @@ -211,11 +209,12 @@ impl Default for SqlCacheRegistry { #[cfg(test)] mod tests { - use super::*; - use datafusion::common::DFSchema; - use datafusion::logical_expr::EmptyRelation; use std::sync::Arc; + use datafusion::{common::DFSchema, logical_expr::EmptyRelation}; + + use super::*; + #[test] fn clearing_registry_invalidates_all_caches() { let registry = SqlCacheRegistry::default(); diff --git a/backend/crates/kalamdb-publisher/src/offset.rs b/backend/crates/kalamdb-publisher/src/offset.rs index b764ef7b8..1014fa938 100644 --- a/backend/crates/kalamdb-publisher/src/offset.rs +++ b/backend/crates/kalamdb-publisher/src/offset.rs @@ -1,9 +1,12 @@ //! Atomic offset allocation for topic partitions. +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + use dashmap::DashMap; use kalamdb_commons::models::TopicId; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; #[derive(Clone, Debug, PartialEq, Eq, Hash)] struct TopicPartitionKey { diff --git a/backend/crates/kalamdb-publisher/src/payload.rs b/backend/crates/kalamdb-publisher/src/payload.rs index 1609508cb..ea4c4a393 100644 --- a/backend/crates/kalamdb-publisher/src/payload.rs +++ b/backend/crates/kalamdb-publisher/src/payload.rs @@ -145,8 +145,10 @@ pub(crate) fn hash_row(row: &Row) -> u64 { } } - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; let mut hasher = DefaultHasher::new(); // Fallback: hash column names only @@ -159,8 +161,10 @@ pub(crate) fn hash_row(row: &Row) -> u64 { /// Hash a serialized topic key using the same stable hash as partition selection. pub(crate) fn hash_key(key: &str) -> u64 { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; let mut hasher = DefaultHasher::new(); key.hash(&mut hasher); diff --git a/backend/crates/kalamdb-publisher/src/service.rs b/backend/crates/kalamdb-publisher/src/service.rs index 836025021..1021bf74a 100644 --- a/backend/crates/kalamdb-publisher/src/service.rs +++ b/backend/crates/kalamdb-publisher/src/service.rs @@ -7,8 +7,10 @@ //! - Track consumer group offsets //! - Provide fast TableId → Topics lookup -use std::sync::{Arc, Mutex}; -use std::time::{Duration, Instant}; +use std::{ + sync::{Arc, Mutex}, + time::{Duration, Instant}, +}; use dashmap::DashMap; use kalamdb_commons::{ @@ -23,10 +25,7 @@ use kalamdb_system::providers::{ }; use kalamdb_tables::{TopicMessage, TopicMessageStore}; -use crate::models::TopicCacheStats; -use crate::offset::OffsetAllocator; -use crate::payload; -use crate::routing::RouteCache; +use crate::{models::TopicCacheStats, offset::OffsetAllocator, payload, routing::RouteCache}; /// Lookup primary-key columns for a table so topic keys can be derived from /// stable row identity instead of the full row payload. @@ -140,6 +139,41 @@ impl ClaimState { self.cursor = next; } } + + /// Return the next server-owned cursor and maximum contiguous fetch size + /// before a still-pending claim. + fn next_available_window(&self, requested_limit: usize) -> (u64, usize) { + let mut next = self.cursor; + + loop { + let mut advanced = false; + for claim in &self.pending { + if claim.start <= next && next < claim.end_exclusive { + next = claim.end_exclusive; + advanced = true; + } + } + + if !advanced { + break; + } + } + + let next_pending_start = self + .pending + .iter() + .filter(|claim| claim.start > next) + .map(|claim| claim.start) + .min(); + + let available_offsets = next_pending_start + .map(|claim_start| claim_start.saturating_sub(next)) + .unwrap_or(u64::MAX); + let available_limit = + requested_limit.min(available_offsets.try_into().unwrap_or(usize::MAX)); + + (next, available_limit) + } } /// Topic Publisher Service — unified service for all topic operations. @@ -518,7 +552,11 @@ impl TopicPublisherService { ); let msg_id = message.id(); - //TODO: Use the store to serialize the message directly to avoid redundant serialization in TopicMessage::new and TopicMessageStore::put. This would require refactoring TopicMessage to separate the in-memory model from the serialized form, or adding a method to get the pre-encoded bytes without going through the full struct construction. + // TODO: Use the store to serialize the message directly to avoid redundant + // serialization in TopicMessage::new and TopicMessageStore::put. This would + // require refactoring TopicMessage to separate the in-memory model from the + // serialized form, or adding a method to get the pre-encoded bytes without + // going through the full struct construction. let key_encoded = kalamdb_commons::StorageKey::storage_key(&msg_id); let value_encoded = kalamdb_commons::KSerializable::encode(&message).map_err(|e| { @@ -560,10 +598,10 @@ impl TopicPublisherService { /// Fetch messages for a consumer group while claiming offsets in-memory. /// /// Guarantees: - /// - Concurrent consumers in the same group and partition never receive - /// overlapping offset ranges (serialized via DashMap entry lock). - /// - If a consumer does not ack within [`VISIBILITY_TIMEOUT`], the - /// claimed range expires and is re-delivered to the next consumer. + /// - Concurrent consumers in the same group and partition never receive overlapping offset + /// ranges (serialized via DashMap entry lock). + /// - If a consumer does not ack within [`VISIBILITY_TIMEOUT`], the claimed range expires and is + /// re-delivered to the next consumer. pub fn fetch_messages_for_group( &self, topic_id: &TopicId, @@ -572,33 +610,61 @@ impl TopicPublisherService { start_offset: u64, limit: usize, ) -> Result> { + if limit == 0 { + return Ok(Vec::new()); + } + let cursor_key = GroupPartitionKey::new(topic_id, group_id, partition_id); - let mut state = self - .group_claim_state - .entry(cursor_key) - .or_insert_with(|| ClaimState::new(start_offset)); - // Expire stale claims so crashed consumers don't block delivery. - state.expire_stale_claims(Instant::now(), self.visibility_timeout); + loop { + let (effective_start, effective_limit) = { + let mut state = self + .group_claim_state + .entry(cursor_key.clone()) + .or_insert_with(|| ClaimState::new(start_offset)); - let effective_start = state.cursor.max(start_offset); + // Expire stale claims so crashed consumers don't block delivery. + state.expire_stale_claims(Instant::now(), self.visibility_timeout); + state.next_available_window(limit) + }; - let messages = self - .message_store - .fetch_messages(topic_id, partition_id, effective_start, limit) - .map_err(|e| CommonError::Internal(format!("Failed to fetch messages: {}", e)))?; + if effective_limit == 0 { + return Ok(Vec::new()); + } + + let messages = self + .message_store + .fetch_messages(topic_id, partition_id, effective_start, effective_limit) + .map_err(|e| CommonError::Internal(format!("Failed to fetch messages: {}", e)))?; + + let Some(last_message) = messages.last() else { + return Ok(messages); + }; + + let claim_start = + messages.first().map(|message| message.offset).unwrap_or(effective_start); + let end_exclusive = last_message.offset + 1; + let claimed_at = Instant::now(); + let mut state = self + .group_claim_state + .entry(cursor_key.clone()) + .or_insert_with(|| ClaimState::new(start_offset)); + + state.expire_stale_claims(claimed_at, self.visibility_timeout); + let (current_start, _) = state.next_available_window(limit); + if current_start != effective_start { + continue; + } - if !messages.is_empty() { - let end_exclusive = messages.last().unwrap().offset + 1; state.cursor = end_exclusive; state.pending.push(PendingClaim { - start: effective_start, + start: claim_start, end_exclusive, - claimed_at: Instant::now(), + claimed_at, }); - } - Ok(messages) + return Ok(messages); + } } /// Get the latest offset for a topic partition. @@ -749,14 +815,22 @@ impl kalamdb_system::TopicPublisher for TopicPublisherService { #[cfg(test)] mod tests { - use super::*; use std::collections::HashSet; + use std::sync::{ + atomic::{AtomicBool, Ordering}, + Condvar, Mutex as StdMutex, + }; + use std::time::Duration as StdDuration; + use std::{sync::mpsc, thread}; use datafusion::scalar::ScalarValue; use kalamdb_commons::models::{NamespaceId, PayloadMode, TableName}; + use kalamdb_store::storage_trait::{KvIterator, Operation, Partition, StorageBackend}; use kalamdb_store::test_utils::InMemoryBackend; use kalamdb_system::providers::topics::TopicRoute; + use super::*; + struct FixedPrimaryKeyLookup { columns: Vec, } @@ -815,6 +889,132 @@ mod tests { ) } + struct PausingScanBackend { + inner: InMemoryBackend, + pause_next_scan: AtomicBool, + scan_started: (StdMutex, Condvar), + release_scan: (StdMutex, Condvar), + } + + impl PausingScanBackend { + fn new() -> Self { + Self { + inner: InMemoryBackend::new(), + pause_next_scan: AtomicBool::new(false), + scan_started: (StdMutex::new(false), Condvar::new()), + release_scan: (StdMutex::new(false), Condvar::new()), + } + } + + fn pause_next_scan(&self) { + self.pause_next_scan.store(true, Ordering::SeqCst); + *self.scan_started.0.lock().unwrap() = false; + *self.release_scan.0.lock().unwrap() = false; + } + + fn wait_for_paused_scan(&self) { + let (lock, cvar) = &self.scan_started; + let started = lock.lock().unwrap(); + let (started, _) = cvar + .wait_timeout_while(started, StdDuration::from_secs(1), |started| !*started) + .unwrap(); + assert!(*started, "first consumer should enter the paused storage scan"); + } + + fn release_paused_scan(&self) { + let (lock, cvar) = &self.release_scan; + *lock.lock().unwrap() = true; + cvar.notify_all(); + } + } + + impl StorageBackend for PausingScanBackend { + fn get( + &self, + partition: &Partition, + key: &[u8], + ) -> kalamdb_store::storage_trait::Result>> { + self.inner.get(partition, key) + } + + fn put( + &self, + partition: &Partition, + key: &[u8], + value: &[u8], + ) -> kalamdb_store::storage_trait::Result<()> { + self.inner.put(partition, key, value) + } + + fn delete( + &self, + partition: &Partition, + key: &[u8], + ) -> kalamdb_store::storage_trait::Result<()> { + self.inner.delete(partition, key) + } + + fn batch(&self, operations: Vec) -> kalamdb_store::storage_trait::Result<()> { + self.inner.batch(operations) + } + + fn scan( + &self, + partition: &Partition, + prefix: Option<&[u8]>, + start_key: Option<&[u8]>, + limit: Option, + ) -> kalamdb_store::storage_trait::Result> { + if self.pause_next_scan.swap(false, Ordering::SeqCst) { + let (started_lock, started_cvar) = &self.scan_started; + *started_lock.lock().unwrap() = true; + started_cvar.notify_all(); + + let (release_lock, release_cvar) = &self.release_scan; + let released = release_lock.lock().unwrap(); + let (released, _) = release_cvar + .wait_timeout_while(released, StdDuration::from_secs(2), |released| !*released) + .unwrap(); + assert!(*released, "paused scan should be released by the test"); + } + + self.inner.scan(partition, prefix, start_key, limit) + } + + fn partition_exists(&self, partition: &Partition) -> bool { + self.inner.partition_exists(partition) + } + + fn create_partition( + &self, + partition: &Partition, + ) -> kalamdb_store::storage_trait::Result<()> { + self.inner.create_partition(partition) + } + + fn list_partitions(&self) -> kalamdb_store::storage_trait::Result> { + self.inner.list_partitions() + } + + fn drop_partition( + &self, + partition: &Partition, + ) -> kalamdb_store::storage_trait::Result<()> { + self.inner.drop_partition(partition) + } + + fn compact_partition( + &self, + partition: &Partition, + ) -> kalamdb_store::storage_trait::Result<()> { + self.inner.compact_partition(partition) + } + + fn stats(&self) -> kalamdb_store::storage_trait::StorageStats { + self.inner.stats() + } + } + #[test] fn test_service_creation() { let backend = Arc::new(InMemoryBackend::new()); @@ -931,8 +1131,12 @@ mod tests { let topic_id = TopicId::new("pk_batch_topic"); let partitions = 32; - let topic = - create_test_topic_with_partitions(topic_id.clone(), table_id.clone(), TopicOp::Insert, partitions); + let topic = create_test_topic_with_partitions( + topic_id.clone(), + table_id.clone(), + TopicOp::Insert, + partitions, + ); service.add_topic(topic); let first = create_test_row(7, "alpha"); @@ -1026,6 +1230,79 @@ mod tests { ); } + #[test] + fn test_group_fetch_does_not_hold_claim_state_during_storage_scan() { + let backend = Arc::new(PausingScanBackend::new()); + let storage_backend: Arc = backend.clone(); + let service = Arc::new(TopicPublisherService::new(storage_backend)); + + let ns = NamespaceId::new("test_ns"); + let table_id = TableId::new(ns.clone(), TableName::from("events")); + let topic_id = TopicId::new("nonblocking_claim_topic"); + let group_id = ConsumerGroupId::new("nonblocking_claim_group"); + + let topic = create_test_topic_with_partitions( + topic_id.clone(), + table_id.clone(), + TopicOp::Insert, + 1, + ); + service.add_topic(topic); + + for idx in 0..30 { + let row = create_test_row(idx, &format!("event_{}", idx)); + service.publish_message(&table_id, TopicOp::Insert, &row, None).unwrap(); + } + + backend.pause_next_scan(); + + let first_service = service.clone(); + let first_topic = topic_id.clone(); + let first_group = group_id.clone(); + let first_handle = thread::spawn(move || { + first_service + .fetch_messages_for_group(&first_topic, &first_group, 0, 0, 10) + .unwrap() + }); + + backend.wait_for_paused_scan(); + + let (tx, rx) = mpsc::channel(); + let second_service = service.clone(); + let second_topic = topic_id.clone(); + let second_group = group_id.clone(); + thread::spawn(move || { + let batch = second_service + .fetch_messages_for_group(&second_topic, &second_group, 0, 0, 10) + .unwrap(); + let _ = tx.send(batch); + }); + + let second_batch = match rx.recv_timeout(StdDuration::from_millis(100)) { + Ok(batch) => batch, + Err(_) => { + backend.release_paused_scan(); + let _ = first_handle.join(); + panic!("second consumer should not wait for the first consumer's storage scan"); + }, + }; + + backend.release_paused_scan(); + let first_batch = first_handle.join().unwrap(); + + let first_offsets: HashSet = + first_batch.iter().map(|message| message.offset).collect(); + let second_offsets: HashSet = + second_batch.iter().map(|message| message.offset).collect(); + + assert_eq!(first_offsets.len(), 10); + assert_eq!(second_offsets.len(), 10); + assert!( + first_offsets.is_disjoint(&second_offsets), + "concurrent same-group fetches must reserve disjoint offsets" + ); + } + #[test] fn test_out_of_order_ack_does_not_regress_offset() { let backend = Arc::new(InMemoryBackend::new()); @@ -1133,6 +1410,100 @@ mod tests { } } + #[test] + fn test_expired_claim_redelivery_skips_still_pending_ranges() { + let backend = Arc::new(InMemoryBackend::new()); + let service = + TopicPublisherService::with_visibility_timeout(backend, StdDuration::from_millis(80)); + + let ns = NamespaceId::new("test_ns"); + let table_id = TableId::new(ns.clone(), TableName::from("events")); + let topic_id = TopicId::new("partial_expiry_topic"); + let group_id = ConsumerGroupId::new("partial_expiry_group"); + + let topic = create_test_topic_with_partitions( + topic_id.clone(), + table_id.clone(), + TopicOp::Insert, + 1, + ); + service.add_topic(topic); + + for idx in 0..30 { + let row = create_test_row(idx, &format!("event_{}", idx)); + service.publish_message(&table_id, TopicOp::Insert, &row, None).unwrap(); + } + + let first = service.fetch_messages_for_group(&topic_id, &group_id, 0, 0, 10).unwrap(); + assert_eq!(first.first().map(|message| message.offset), Some(0)); + + thread::sleep(StdDuration::from_millis(50)); + + let second = service.fetch_messages_for_group(&topic_id, &group_id, 0, 0, 10).unwrap(); + assert_eq!(second.first().map(|message| message.offset), Some(10)); + + thread::sleep(StdDuration::from_millis(50)); + + let redelivered = service.fetch_messages_for_group(&topic_id, &group_id, 0, 0, 10).unwrap(); + assert_eq!(redelivered.first().map(|message| message.offset), Some(0)); + + let next = service.fetch_messages_for_group(&topic_id, &group_id, 0, 0, 10).unwrap(); + assert_eq!( + next.first().map(|message| message.offset), + Some(20), + "fetch should skip the still-pending 10..20 range after redelivering 0..10" + ); + } + + #[test] + fn test_expired_claim_redelivery_uses_group_cursor_not_client_position() { + let backend = Arc::new(InMemoryBackend::new()); + let service = + TopicPublisherService::with_visibility_timeout(backend, StdDuration::from_millis(120)); + + let ns = NamespaceId::new("test_ns"); + let table_id = TableId::new(ns.clone(), TableName::from("events")); + let topic_id = TopicId::new("position_ahead_recovery_topic"); + let group_id = ConsumerGroupId::new("position_ahead_recovery_group"); + + let topic = create_test_topic_with_partitions( + topic_id.clone(), + table_id.clone(), + TopicOp::Insert, + 1, + ); + service.add_topic(topic); + + for idx in 0..480 { + let row = create_test_row(idx, &format!("event_{}", idx)); + service.publish_message(&table_id, TopicOp::Insert, &row, None).unwrap(); + } + + let crashed_claim = + service.fetch_messages_for_group(&topic_id, &group_id, 0, 0, 160).unwrap(); + assert_eq!(crashed_claim.first().map(|message| message.offset), Some(0)); + assert_eq!(crashed_claim.last().map(|message| message.offset), Some(159)); + + thread::sleep(StdDuration::from_millis(80)); + + let active_tail_claim = + service.fetch_messages_for_group(&topic_id, &group_id, 0, 0, 120).unwrap(); + assert_eq!(active_tail_claim.first().map(|message| message.offset), Some(160)); + assert_eq!(active_tail_claim.last().map(|message| message.offset), Some(279)); + + thread::sleep(StdDuration::from_millis(60)); + + let recovered_prefix = + service.fetch_messages_for_group(&topic_id, &group_id, 0, 280, 120).unwrap(); + assert_eq!(recovered_prefix.first().map(|message| message.offset), Some(0)); + assert_eq!(recovered_prefix.last().map(|message| message.offset), Some(119)); + + let recovered_gap = + service.fetch_messages_for_group(&topic_id, &group_id, 0, 120, 120).unwrap(); + assert_eq!(recovered_gap.first().map(|message| message.offset), Some(120)); + assert_eq!(recovered_gap.last().map(|message| message.offset), Some(159)); + } + #[test] fn test_empty_partition_returns_empty() { let backend = Arc::new(InMemoryBackend::new()); diff --git a/backend/crates/kalamdb-raft/src/applier/meta_applier.rs b/backend/crates/kalamdb-raft/src/applier/meta_applier.rs index b59a9736f..b020c2ca4 100644 --- a/backend/crates/kalamdb-raft/src/applier/meta_applier.rs +++ b/backend/crates/kalamdb-raft/src/applier/meta_applier.rs @@ -10,12 +10,11 @@ //! providers (namespaces, tables, storages, users, jobs). use async_trait::async_trait; -use kalamdb_commons::models::schemas::{TableDefinition, TableType}; -use kalamdb_commons::models::{JobId, NamespaceId, NodeId, StorageId, TableId, UserId}; -use kalamdb_system::providers::jobs::models::Job; -use kalamdb_system::JobStatus; -use kalamdb_system::Storage; -use kalamdb_system::User; +use kalamdb_commons::models::{ + schemas::{TableDefinition, TableType}, + JobId, NamespaceId, NodeId, StorageId, TableId, UserId, +}; +use kalamdb_system::{providers::jobs::models::Job, JobStatus, Storage, User}; use crate::RaftError; @@ -319,12 +318,18 @@ impl MetaApplier for NoOpMetaApplier { #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::{AuthType, NamespaceId, TableName}; - use kalamdb_commons::Role; + use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }; + + use kalamdb_commons::{ + models::{AuthType, NamespaceId, TableName}, + Role, + }; use kalamdb_system::JobType; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; + + use super::*; /// Mock applier that tracks all operations struct MockMetaApplier { diff --git a/backend/crates/kalamdb-raft/src/applier/mod.rs b/backend/crates/kalamdb-raft/src/applier/mod.rs index 99d56f16a..2f758871b 100644 --- a/backend/crates/kalamdb-raft/src/applier/mod.rs +++ b/backend/crates/kalamdb-raft/src/applier/mod.rs @@ -16,7 +16,6 @@ mod user_data_applier; // Unified Meta applier pub use meta_applier::{MetaApplier, NoOpMetaApplier}; - // Data appliers (split into separate files for better organization) pub use shared_data_applier::{NoOpSharedDataApplier, SharedDataApplier}; pub use user_data_applier::{NoOpUserDataApplier, UserDataApplier}; diff --git a/backend/crates/kalamdb-raft/src/applier/shared_data_applier.rs b/backend/crates/kalamdb-raft/src/applier/shared_data_applier.rs index ba6bac991..5629ecf84 100644 --- a/backend/crates/kalamdb-raft/src/applier/shared_data_applier.rs +++ b/backend/crates/kalamdb-raft/src/applier/shared_data_applier.rs @@ -6,8 +6,7 @@ //! The implementation lives in kalamdb-core using provider infrastructure. use async_trait::async_trait; -use kalamdb_commons::models::TransactionId; -use kalamdb_commons::TableId; +use kalamdb_commons::{models::TransactionId, TableId}; use kalamdb_transactions::StagedMutation; use crate::{RaftError, TransactionApplyResult}; @@ -30,6 +29,7 @@ pub trait SharedDataApplier: Send + Sync { &self, table_id: &TableId, rows: &[kalamdb_commons::models::rows::Row], + commit_seq: u64, ) -> Result; /// Update rows in a shared table @@ -46,6 +46,7 @@ pub trait SharedDataApplier: Send + Sync { table_id: &TableId, updates: &[kalamdb_commons::models::rows::Row], filter: Option<&str>, + commit_seq: u64, ) -> Result; /// Delete rows from a shared table @@ -60,6 +61,7 @@ pub trait SharedDataApplier: Send + Sync { &self, table_id: &TableId, pk_values: Option<&[String]>, + commit_seq: u64, ) -> Result; /// Apply an explicit-transaction write set inside one state-machine cycle. @@ -67,6 +69,7 @@ pub trait SharedDataApplier: Send + Sync { &self, transaction_id: &TransactionId, mutations: &[StagedMutation], + commit_seq: u64, ) -> Result; } @@ -79,6 +82,7 @@ impl SharedDataApplier for NoOpSharedDataApplier { &self, _table_id: &TableId, _rows: &[kalamdb_commons::models::rows::Row], + _commit_seq: u64, ) -> Result { Ok(0) } @@ -88,6 +92,7 @@ impl SharedDataApplier for NoOpSharedDataApplier { _table_id: &TableId, _updates: &[kalamdb_commons::models::rows::Row], _filter: Option<&str>, + _commit_seq: u64, ) -> Result { Ok(0) } @@ -96,6 +101,7 @@ impl SharedDataApplier for NoOpSharedDataApplier { &self, _table_id: &TableId, _pk_values: Option<&[String]>, + _commit_seq: u64, ) -> Result { Ok(0) } @@ -104,6 +110,7 @@ impl SharedDataApplier for NoOpSharedDataApplier { &self, _transaction_id: &TransactionId, _mutations: &[StagedMutation], + _commit_seq: u64, ) -> Result { Ok(TransactionApplyResult::default()) } @@ -111,10 +118,14 @@ impl SharedDataApplier for NoOpSharedDataApplier { #[cfg(test)] mod tests { - use super::*; + use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }; + use kalamdb_commons::models::{NamespaceId, TableName}; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; + + use super::*; /// Mock shared applier for testing struct MockSharedDataApplier { @@ -135,6 +146,7 @@ mod tests { &self, _table_id: &TableId, rows: &[kalamdb_commons::models::rows::Row], + _commit_seq: u64, ) -> Result { self.insert_count.fetch_add(1, Ordering::SeqCst); Ok(rows.len()) @@ -145,6 +157,7 @@ mod tests { _table_id: &TableId, _updates: &[kalamdb_commons::models::rows::Row], _filter: Option<&str>, + _commit_seq: u64, ) -> Result { Ok(1) } @@ -153,6 +166,7 @@ mod tests { &self, _table_id: &TableId, _pk_values: Option<&[String]>, + _commit_seq: u64, ) -> Result { Ok(1) } @@ -161,6 +175,7 @@ mod tests { &self, _transaction_id: &TransactionId, mutations: &[StagedMutation], + _commit_seq: u64, ) -> Result { Ok(TransactionApplyResult { rows_affected: mutations.len(), @@ -178,7 +193,7 @@ mod tests { let table_id = TableId::new(NamespaceId::from("shared_ns"), TableName::from("shared_table")); - let result = applier.insert(&table_id, &[]).await; + let result = applier.insert(&table_id, &[], 1).await; assert!(result.is_ok()); assert_eq!(applier.insert_count.load(Ordering::SeqCst), 1); } @@ -188,8 +203,8 @@ mod tests { let applier = NoOpSharedDataApplier; let table_id = TableId::new(NamespaceId::from("test_ns"), TableName::from("test_table")); - assert_eq!(applier.insert(&table_id, &[]).await.unwrap(), 0); - assert_eq!(applier.update(&table_id, &[], None).await.unwrap(), 0); - assert_eq!(applier.delete(&table_id, None).await.unwrap(), 0); + assert_eq!(applier.insert(&table_id, &[], 1).await.unwrap(), 0); + assert_eq!(applier.update(&table_id, &[], None, 1).await.unwrap(), 0); + assert_eq!(applier.delete(&table_id, None, 1).await.unwrap(), 0); } } diff --git a/backend/crates/kalamdb-raft/src/applier/user_data_applier.rs b/backend/crates/kalamdb-raft/src/applier/user_data_applier.rs index 681af7a03..57e04f692 100644 --- a/backend/crates/kalamdb-raft/src/applier/user_data_applier.rs +++ b/backend/crates/kalamdb-raft/src/applier/user_data_applier.rs @@ -7,8 +7,10 @@ //! The implementation lives in kalamdb-core using provider infrastructure. use async_trait::async_trait; -use kalamdb_commons::models::{TransactionId, UserId}; -use kalamdb_commons::TableId; +use kalamdb_commons::{ + models::{TransactionId, UserId}, + TableId, +}; use kalamdb_transactions::StagedMutation; use crate::{RaftError, TransactionApplyResult}; @@ -39,6 +41,7 @@ pub trait UserDataApplier: Send + Sync { table_id: &TableId, user_id: &UserId, rows: &[kalamdb_commons::models::rows::Row], + commit_seq: u64, ) -> Result; /// Update rows in a user table @@ -57,6 +60,7 @@ pub trait UserDataApplier: Send + Sync { user_id: &UserId, updates: &[kalamdb_commons::models::rows::Row], filter: Option<&str>, + commit_seq: u64, ) -> Result; /// Delete rows from a user table @@ -73,6 +77,7 @@ pub trait UserDataApplier: Send + Sync { table_id: &TableId, user_id: &UserId, pk_values: Option<&[String]>, + commit_seq: u64, ) -> Result; /// Apply an explicit-transaction write set inside one state-machine cycle. @@ -80,6 +85,7 @@ pub trait UserDataApplier: Send + Sync { &self, transaction_id: &TransactionId, mutations: &[StagedMutation], + commit_seq: u64, ) -> Result; } @@ -93,6 +99,7 @@ impl UserDataApplier for NoOpUserDataApplier { _table_id: &TableId, _user_id: &UserId, _rows: &[kalamdb_commons::models::rows::Row], + _commit_seq: u64, ) -> Result { Ok(0) } @@ -103,6 +110,7 @@ impl UserDataApplier for NoOpUserDataApplier { _user_id: &UserId, _updates: &[kalamdb_commons::models::rows::Row], _filter: Option<&str>, + _commit_seq: u64, ) -> Result { Ok(0) } @@ -112,6 +120,7 @@ impl UserDataApplier for NoOpUserDataApplier { _table_id: &TableId, _user_id: &UserId, _pk_values: Option<&[String]>, + _commit_seq: u64, ) -> Result { Ok(0) } @@ -120,6 +129,7 @@ impl UserDataApplier for NoOpUserDataApplier { &self, _transaction_id: &TransactionId, _mutations: &[StagedMutation], + _commit_seq: u64, ) -> Result { Ok(TransactionApplyResult::default()) } @@ -127,10 +137,14 @@ impl UserDataApplier for NoOpUserDataApplier { #[cfg(test)] mod tests { - use super::*; + use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }; + use kalamdb_commons::models::{NamespaceId, TableName}; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; + + use super::*; /// Mock applier that tracks calls for testing struct MockUserDataApplier { @@ -164,6 +178,7 @@ mod tests { _table_id: &TableId, _user_id: &UserId, rows: &[kalamdb_commons::models::rows::Row], + _commit_seq: u64, ) -> Result { self.insert_count.fetch_add(1, Ordering::SeqCst); Ok(rows.len()) @@ -175,6 +190,7 @@ mod tests { _user_id: &UserId, _updates: &[kalamdb_commons::models::rows::Row], _filter: Option<&str>, + _commit_seq: u64, ) -> Result { self.update_count.fetch_add(1, Ordering::SeqCst); Ok(1) @@ -185,6 +201,7 @@ mod tests { _table_id: &TableId, _user_id: &UserId, _pk_values: Option<&[String]>, + _commit_seq: u64, ) -> Result { self.delete_count.fetch_add(1, Ordering::SeqCst); Ok(1) @@ -194,6 +211,7 @@ mod tests { &self, _transaction_id: &TransactionId, mutations: &[StagedMutation], + _commit_seq: u64, ) -> Result { Ok(TransactionApplyResult { rows_affected: mutations.len(), @@ -211,7 +229,7 @@ mod tests { let table_id = TableId::new(NamespaceId::from("test_ns"), TableName::from("test_table")); let user_id = UserId::from("user_123"); - let result = applier.insert(&table_id, &user_id, &[]).await; + let result = applier.insert(&table_id, &user_id, &[], 1).await; assert!(result.is_ok()); assert_eq!(applier.get_counts(), (1, 0, 0)); } @@ -222,7 +240,7 @@ mod tests { let table_id = TableId::new(NamespaceId::from("test_ns"), TableName::from("test_table")); let user_id = UserId::from("user_123"); - let result = applier.update(&table_id, &user_id, &[], None).await; + let result = applier.update(&table_id, &user_id, &[], None, 1).await; assert!(result.is_ok()); assert_eq!(applier.get_counts(), (0, 1, 0)); } @@ -233,7 +251,7 @@ mod tests { let table_id = TableId::new(NamespaceId::from("test_ns"), TableName::from("test_table")); let user_id = UserId::from("user_123"); - let result = applier.delete(&table_id, &user_id, None).await; + let result = applier.delete(&table_id, &user_id, None, 1).await; assert!(result.is_ok()); assert_eq!(result.unwrap(), 1); assert_eq!(applier.get_counts(), (0, 0, 1)); @@ -245,9 +263,9 @@ mod tests { let table_id = TableId::new(NamespaceId::from("test_ns"), TableName::from("test_table")); let user_id = UserId::from("user_123"); - assert_eq!(applier.insert(&table_id, &user_id, &[]).await.unwrap(), 0); - assert_eq!(applier.update(&table_id, &user_id, &[], None).await.unwrap(), 0); - assert_eq!(applier.delete(&table_id, &user_id, None).await.unwrap(), 0); + assert_eq!(applier.insert(&table_id, &user_id, &[], 1).await.unwrap(), 0); + assert_eq!(applier.update(&table_id, &user_id, &[], None, 1).await.unwrap(), 0); + assert_eq!(applier.delete(&table_id, &user_id, None, 1).await.unwrap(), 0); } #[tokio::test] @@ -257,11 +275,11 @@ mod tests { let user_id = UserId::from("user_123"); // Update with filter - let result = applier.update(&table_id, &user_id, &[], Some("filter_value")).await; + let result = applier.update(&table_id, &user_id, &[], Some("filter_value"), 1).await; assert!(result.is_ok()); // Delete with filter - let result = applier.delete(&table_id, &user_id, Some(&["pk_1".to_string()])).await; + let result = applier.delete(&table_id, &user_id, Some(&["pk_1".to_string()]), 1).await; assert!(result.is_ok()); assert_eq!(applier.get_counts(), (0, 1, 1)); @@ -273,10 +291,10 @@ mod tests { let table_id = TableId::new(NamespaceId::from("test_ns"), TableName::from("test_table")); let user_id = UserId::from("user_123"); - applier.insert(&table_id, &user_id, &[]).await.unwrap(); - applier.insert(&table_id, &user_id, &[]).await.unwrap(); - applier.update(&table_id, &user_id, &[], None).await.unwrap(); - applier.delete(&table_id, &user_id, None).await.unwrap(); + applier.insert(&table_id, &user_id, &[], 1).await.unwrap(); + applier.insert(&table_id, &user_id, &[], 2).await.unwrap(); + applier.update(&table_id, &user_id, &[], None, 3).await.unwrap(); + applier.delete(&table_id, &user_id, None, 4).await.unwrap(); assert_eq!(applier.get_counts(), (2, 1, 1)); } diff --git a/backend/crates/kalamdb-raft/src/cluster_types.rs b/backend/crates/kalamdb-raft/src/cluster_types.rs index 373edb2a7..31f614b20 100644 --- a/backend/crates/kalamdb-raft/src/cluster_types.rs +++ b/backend/crates/kalamdb-raft/src/cluster_types.rs @@ -2,14 +2,13 @@ //! //! Re-export OpenRaft's ServerState and provide a NodeStatus enum for node health tracking. -use openraft::ServerState; -use serde::{Deserialize, Serialize}; -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; +use openraft::ServerState; // Re-export OpenRaft's ServerState as NodeRole for consistency // ServerState has: Leader, Follower, Learner, Candidate, Shutdown pub use openraft::ServerState as NodeRole; +use serde::{Deserialize, Serialize}; /// Helper trait to convert ServerState to string pub trait ServerStateExt { diff --git a/backend/crates/kalamdb-raft/src/codec/command_codec.rs b/backend/crates/kalamdb-raft/src/codec/command_codec.rs index 7353a6c59..02059b661 100644 --- a/backend/crates/kalamdb-raft/src/codec/command_codec.rs +++ b/backend/crates/kalamdb-raft/src/codec/command_codec.rs @@ -1,9 +1,8 @@ use serde::{de::DeserializeOwned, Deserialize, Serialize}; -use crate::error::RaftError; use crate::{ - DataResponse, MetaCommand, MetaResponse, RaftCommand, RaftResponse, SharedDataCommand, - UserDataCommand, + error::RaftError, DataResponse, MetaCommand, MetaResponse, RaftCommand, RaftResponse, + SharedDataCommand, UserDataCommand, }; const COMMAND_WIRE_VERSION: u16 = 1; @@ -119,13 +118,16 @@ pub fn decode_raft_response(bytes: &[u8]) -> Result { #[cfg(test)] mod tests { + use std::collections::BTreeMap; + + use kalamdb_commons::{ + models::{rows::Row, NamespaceId, NodeId, TableName, UserId}, + TableId, TableType, + }; + use kalamdb_transactions::StagedMutation; + use super::*; use crate::{DataResponse, MetaResponse}; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{NamespaceId, NodeId, TableName, UserId}; - use kalamdb_commons::{TableId, TableType}; - use kalamdb_transactions::StagedMutation; - use std::collections::BTreeMap; #[test] fn meta_response_roundtrip() { diff --git a/backend/crates/kalamdb-raft/src/commands/data_response.rs b/backend/crates/kalamdb-raft/src/commands/data_response.rs index 78843e8fc..c25e74b58 100644 --- a/backend/crates/kalamdb-raft/src/commands/data_response.rs +++ b/backend/crates/kalamdb-raft/src/commands/data_response.rs @@ -3,6 +3,22 @@ //! Shared response type for both user and shared data operations. use serde::{Deserialize, Serialize}; +use crate::GroupId; + +const COMMIT_SEQ_GROUP_BITS: u32 = 16; +const COMMIT_SEQ_GROUP_MASK: u64 = (1u64 << COMMIT_SEQ_GROUP_BITS) - 1; + +/// Build a deterministic commit marker from a committed Raft position. +/// +/// This is stable across replicas for the same group/log entry. It preserves +/// ordering within a Raft group without letting followers allocate their own +/// local `_commit_seq` values. +pub fn commit_seq_from_log_position(group_id: GroupId, log_index: u64) -> u64 { + log_index + .saturating_mul(1u64 << COMMIT_SEQ_GROUP_BITS) + .saturating_add(group_id.as_u64() & COMMIT_SEQ_GROUP_MASK) +} + #[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)] pub struct TransactionApplyResult { pub rows_affected: usize, @@ -87,11 +103,11 @@ mod tests { }) .is_ok()); assert!(DataResponse::Subscribed { - subscription_id: "sub_1".to_string() + subscription_id: "sub_1".to_string(), } .is_ok()); assert!(!DataResponse::Error { - message: "error".to_string() + message: "error".to_string(), } .is_ok()); } @@ -113,7 +129,7 @@ mod tests { ); assert_eq!( DataResponse::Error { - message: "err".to_string() + message: "err".to_string(), } .rows_affected(), 0 diff --git a/backend/crates/kalamdb-raft/src/commands/meta.rs b/backend/crates/kalamdb-raft/src/commands/meta.rs index e4d78da48..7dc88072f 100644 --- a/backend/crates/kalamdb-raft/src/commands/meta.rs +++ b/backend/crates/kalamdb-raft/src/commands/meta.rs @@ -10,13 +10,14 @@ //! for data groups. use chrono::{DateTime, Utc}; -use kalamdb_commons::models::schemas::{TableDefinition, TableType}; -use kalamdb_commons::models::{JobId, NamespaceId, NodeId, StorageId, UserId}; -use kalamdb_commons::TableId; -use kalamdb_system::providers::jobs::models::Job; -use kalamdb_system::JobStatus; -use kalamdb_system::Storage; -use kalamdb_system::User; +use kalamdb_commons::{ + models::{ + schemas::{TableDefinition, TableType}, + JobId, NamespaceId, NodeId, StorageId, UserId, + }, + TableId, +}; +use kalamdb_system::{providers::jobs::models::Job, JobStatus, Storage, User}; use serde::{Deserialize, Serialize}; /// Commands for the unified metadata Raft group @@ -176,13 +177,27 @@ pub enum MetaCommand { reason: String, cancelled_at: DateTime, }, + + // ========================================================================= + // Compatibility-Appended Operations + // ========================================================================= + /// Create a new namespace, succeeding if it already exists. + /// + /// Keep this variant at the end so existing bincode-encoded Raft log + /// discriminants remain stable for older variants. + CreateNamespaceIfNotExists { + namespace_id: NamespaceId, + created_by: Option, + }, } impl MetaCommand { /// Returns the category of this command for logging/metrics pub fn category(&self) -> &'static str { match self { - Self::CreateNamespace { .. } | Self::DeleteNamespace { .. } => "namespace", + Self::CreateNamespace { .. } + | Self::CreateNamespaceIfNotExists { .. } + | Self::DeleteNamespace { .. } => "namespace", Self::CreateTable { .. } | Self::AlterTable { .. } | Self::DropTable { .. } => "table", Self::RegisterStorage { .. } | Self::UnregisterStorage { .. } => "storage", Self::CreateUser { .. } @@ -288,9 +303,10 @@ impl MetaResponse { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::{AuthType, Role}; + use super::*; + fn test_user() -> User { User { user_id: UserId::from("test_user"), diff --git a/backend/crates/kalamdb-raft/src/commands/mod.rs b/backend/crates/kalamdb-raft/src/commands/mod.rs index 74482775c..c8e5d7176 100644 --- a/backend/crates/kalamdb-raft/src/commands/mod.rs +++ b/backend/crates/kalamdb-raft/src/commands/mod.rs @@ -18,10 +18,9 @@ mod shared_data; mod user_data; // Unified Meta commands -pub use meta::{MetaCommand, MetaResponse}; - // Data commands (split into separate files for better organization) -pub use data_response::{DataResponse, TransactionApplyResult}; +pub use data_response::{commit_seq_from_log_position, DataResponse, TransactionApplyResult}; +pub use meta::{MetaCommand, MetaResponse}; pub use shared_data::SharedDataCommand; pub use user_data::UserDataCommand; diff --git a/backend/crates/kalamdb-raft/src/commands/shared_data.rs b/backend/crates/kalamdb-raft/src/commands/shared_data.rs index 1b82c5ac6..3e908219e 100644 --- a/backend/crates/kalamdb-raft/src/commands/shared_data.rs +++ b/backend/crates/kalamdb-raft/src/commands/shared_data.rs @@ -4,8 +4,7 @@ //! `Meta` group's last applied index on the leader at proposal time. Followers //! buffer data commands until local `Meta` has applied at least that index. -use kalamdb_commons::models::TransactionId; -use kalamdb_commons::TableId; +use kalamdb_commons::{models::TransactionId, TableId}; use serde::{Deserialize, Serialize}; /// Commands for shared data shards (1 shard by default) @@ -111,9 +110,10 @@ impl SharedDataCommand { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + #[test] fn test_shared_data_command_watermark() { let mut cmd = SharedDataCommand::Update { diff --git a/backend/crates/kalamdb-raft/src/commands/user_data.rs b/backend/crates/kalamdb-raft/src/commands/user_data.rs index 79de43df6..58fb32c57 100644 --- a/backend/crates/kalamdb-raft/src/commands/user_data.rs +++ b/backend/crates/kalamdb-raft/src/commands/user_data.rs @@ -4,8 +4,10 @@ //! `Meta` group's last applied index on the leader at proposal time. Followers //! buffer data commands until local `Meta` has applied at least that index. -use kalamdb_commons::models::{TransactionId, UserId}; -use kalamdb_commons::TableId; +use kalamdb_commons::{ + models::{TransactionId, UserId}, + TableId, +}; use serde::{Deserialize, Serialize}; /// Commands for user data shards (32 shards by default) @@ -116,9 +118,10 @@ impl UserDataCommand { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + #[test] fn test_user_data_command_watermark_get_set() { let mut cmd = UserDataCommand::Insert { diff --git a/backend/crates/kalamdb-raft/src/error.rs b/backend/crates/kalamdb-raft/src/error.rs index 88e4ae714..a79b91b18 100644 --- a/backend/crates/kalamdb-raft/src/error.rs +++ b/backend/crates/kalamdb-raft/src/error.rs @@ -53,7 +53,10 @@ pub enum RaftError { Timeout(std::time::Duration), /// Replication timeout - command committed but not all nodes applied - #[error("Replication timeout for group {group}: committed at {committed_log_id} but not all nodes applied within {timeout_ms}ms")] + #[error( + "Replication timeout for group {group}: committed at {committed_log_id} but not all nodes \ + applied within {timeout_ms}ms" + )] ReplicationTimeout { group: String, committed_log_id: String, @@ -179,7 +182,7 @@ mod tests { assert!(RaftError::ReplicationTimeout { group: "g1".to_string(), committed_log_id: "1-100".to_string(), - timeout_ms: 5000 + timeout_ms: 5000, } .is_retryable()); diff --git a/backend/crates/kalamdb-raft/src/executor/raft.rs b/backend/crates/kalamdb-raft/src/executor/raft.rs index 968885aa0..8c017c607 100644 --- a/backend/crates/kalamdb-raft/src/executor/raft.rs +++ b/backend/crates/kalamdb-raft/src/executor/raft.rs @@ -3,31 +3,36 @@ //! This executor routes commands through Raft groups for consensus //! before applying them to the local state machine. -use std::collections::{BTreeMap, BTreeSet}; -use std::sync::Arc; -use std::time::Instant; +use std::{ + collections::{BTreeMap, BTreeSet}, + sync::Arc, + time::{Duration, Instant}, +}; use async_trait::async_trait; use dashmap::DashMap; +use kalamdb_commons::models::{NodeId, UserId}; use kalamdb_observability::collect_runtime_metrics; use kalamdb_pg::KalamPgService; -use openraft::ServerState; - -use kalamdb_commons::models::{NodeId, UserId}; use kalamdb_sharding::ShardRouter; +use openraft::ServerState; -use crate::cluster_types::NodeStatus; -use crate::network::cluster_client::ClusterClient; -use crate::network::cluster_handler::ClusterMessageHandler; -use crate::network::models::GetNodeInfoResponse; use crate::{ - manager::RaftManager, ClusterInfo, ClusterNodeInfo, CommandExecutor, DataResponse, GroupId, - KalamNode, MetaCommand, MetaResponse, RaftError, SharedDataCommand, UserDataCommand, + cluster_types::NodeStatus, + manager::RaftManager, + network::{ + cluster_client::ClusterClient, cluster_handler::ClusterMessageHandler, + models::GetNodeInfoResponse, + }, + ClusterInfo, ClusterNodeInfo, CommandExecutor, DataResponse, GroupId, KalamNode, MetaCommand, + MetaResponse, RaftError, SharedDataCommand, UserDataCommand, }; /// Result type for executor operations type Result = std::result::Result; +const PEER_STATS_REFRESH_MIN_INTERVAL: Duration = Duration::from_secs(5); + /// Cluster mode executor using Raft consensus. /// /// Routes commands through the appropriate Raft group leader, @@ -46,6 +51,9 @@ pub struct RaftExecutor { /// Keyed by `NodeId`. Only populated in cluster mode when the node has peers. /// Stale entries are acceptable — they are replaced on the next refresh. peer_stats_cache: Arc>, + /// Last successful peer stats refresh. Cluster health and UI surfaces poll frequently, + /// so bound fanout churn and reuse slightly stale cached peer metrics between polls. + peer_stats_cache_refreshed_at: Arc>>, } impl std::fmt::Debug for RaftExecutor { @@ -65,6 +73,7 @@ impl RaftExecutor { cluster_handler: tokio::sync::OnceCell::new(), pg_service: tokio::sync::OnceCell::new(), peer_stats_cache: Arc::new(DashMap::new()), + peer_stats_cache_refreshed_at: Arc::new(tokio::sync::Mutex::new(None)), } } @@ -107,11 +116,21 @@ impl RaftExecutor { /// This is called by `\cluster list` and future callers that need /// fresh per-node data before rendering cluster state. pub async fn refresh_peer_stats(&self) { + let mut refreshed_at = self.peer_stats_cache_refreshed_at.lock().await; + if refreshed_at + .as_ref() + .is_some_and(|instant| instant.elapsed() < PEER_STATS_REFRESH_MIN_INTERVAL) + { + return; + } + let client = ClusterClient::new(Arc::clone(&self.manager)); let fresh = client.gather_all_node_infos(2_000).await; + let refreshed_now = Instant::now(); for (node_id, resp) in fresh { self.peer_stats_cache.insert(node_id, resp); } + *refreshed_at = Some(refreshed_now); } /// Compute the shard for a user based on their ID diff --git a/backend/crates/kalamdb-raft/src/executor/trait_def.rs b/backend/crates/kalamdb-raft/src/executor/trait_def.rs index 5bfe44bf3..755d8d37c 100644 --- a/backend/crates/kalamdb-raft/src/executor/trait_def.rs +++ b/backend/crates/kalamdb-raft/src/executor/trait_def.rs @@ -1,16 +1,16 @@ //! CommandExecutor trait definition -use async_trait::async_trait; use std::fmt::Debug; +use async_trait::async_trait; use kalamdb_commons::models::{NodeId, UserId}; -use crate::cluster_types::{NodeRole, NodeStatus}; -use crate::commands::{ - DataResponse, MetaCommand, MetaResponse, SharedDataCommand, UserDataCommand, +use crate::{ + cluster_types::{NodeRole, NodeStatus}, + commands::{DataResponse, MetaCommand, MetaResponse, SharedDataCommand, UserDataCommand}, + error::Result, + GroupId, }; -use crate::error::Result; -use crate::GroupId; /// Information about a cluster node #[derive(Debug, Clone)] diff --git a/backend/crates/kalamdb-raft/src/lib.rs b/backend/crates/kalamdb-raft/src/lib.rs index 087db122a..ddd9f1210 100644 --- a/backend/crates/kalamdb-raft/src/lib.rs +++ b/backend/crates/kalamdb-raft/src/lib.rs @@ -49,29 +49,29 @@ pub mod storage; // Re-exports - Meta layer pub use applier::{MetaApplier, NoOpMetaApplier}; -pub use commands::{MetaCommand, MetaResponse, RaftCommand, RaftResponse}; -pub use state_machine::MetaStateMachine; - // Re-exports - Data layer pub use applier::{NoOpSharedDataApplier, NoOpUserDataApplier, SharedDataApplier, UserDataApplier}; -pub use commands::{DataResponse, SharedDataCommand, TransactionApplyResult, UserDataCommand}; -pub use state_machine::{SharedDataStateMachine, UserDataStateMachine}; - // Re-exports - Core types pub use cluster_types::{NodeRole, NodeStatus, ServerStateExt}; +pub use commands::{ + commit_seq_from_log_position, DataResponse, MetaCommand, MetaResponse, RaftCommand, + RaftResponse, SharedDataCommand, TransactionApplyResult, UserDataCommand, +}; pub use error::{RaftError, Result}; pub use executor::{ClusterInfo, ClusterNodeInfo, CommandExecutor, RaftExecutor}; -pub use kalamdb_sharding::{ClusterConfig as RaftClusterConfig, PeerConfig}; -pub use kalamdb_sharding::{GroupId, ShardRouter}; +pub use kalamdb_sharding::{ClusterConfig as RaftClusterConfig, GroupId, PeerConfig, ShardRouter}; pub use manager::{ PeerNode, RaftGroup, RaftManager, RaftManagerConfig, SnapshotInfo, SnapshotsSummary, DEFAULT_SHARED_DATA_SHARDS, DEFAULT_USER_DATA_SHARDS, }; pub use network::{ - forward_sql_param, ClusterClient, ClusterMessageHandler, ClusterServiceImpl, ForwardSqlParam, - ForwardSqlRequest, ForwardSqlResponse, ForwardSqlResponsePayload, GetNodeInfoRequest, - GetNodeInfoResponse, NoOpClusterHandler, PingRequest, PingResponse, + forward_sql_param, start_rpc_server, ClusterClient, ClusterMessageHandler, ClusterServiceImpl, + ForwardSqlParam, ForwardSqlRequest, ForwardSqlResponse, ForwardSqlResponsePayload, + GetNodeInfoRequest, GetNodeInfoResponse, NoOpClusterHandler, PingRequest, PingResponse, + RaftNetwork, RaftNetworkFactory, RaftService, +}; +pub use state_machine::{ + serde_helpers, ApplyResult, KalamStateMachine, MetaStateMachine, SharedDataStateMachine, + StateMachineSnapshot, UserDataStateMachine, }; -pub use network::{start_rpc_server, RaftNetwork, RaftNetworkFactory, RaftService}; -pub use state_machine::{serde_helpers, ApplyResult, KalamStateMachine, StateMachineSnapshot}; pub use storage::{KalamNode, KalamRaftStorage, KalamTypeConfig}; diff --git a/backend/crates/kalamdb-raft/src/manager/config.rs b/backend/crates/kalamdb-raft/src/manager/config.rs index 27f0c5bde..1525b485e 100644 --- a/backend/crates/kalamdb-raft/src/manager/config.rs +++ b/backend/crates/kalamdb-raft/src/manager/config.rs @@ -6,7 +6,6 @@ use std::time::Duration; use kalamdb_commons::models::NodeId; - pub use kalamdb_configs::RpcTlsConfig; /// Default number of user data shards diff --git a/backend/crates/kalamdb-raft/src/manager/raft_group.rs b/backend/crates/kalamdb-raft/src/manager/raft_group.rs index 5cb8536e5..b929d9a6a 100644 --- a/backend/crates/kalamdb-raft/src/manager/raft_group.rs +++ b/backend/crates/kalamdb-raft/src/manager/raft_group.rs @@ -2,19 +2,22 @@ //! //! Represents a single Raft consensus group with its own log, state machine, and network. -use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; use kalamdb_commons::models::NodeId; use kalamdb_store::StorageBackend; -use openraft::storage::Adaptor; -use openraft::{Config, Raft, RaftMetrics}; +use openraft::{storage::Adaptor, Config, Raft, RaftMetrics}; use parking_lot::RwLock; -use crate::network::RaftNetworkFactory; -use crate::state_machine::KalamStateMachine; -use crate::storage::{KalamNode, KalamRaftStorage, KalamTypeConfig}; -use crate::{GroupId, RaftError}; +use crate::{ + network::{RaftChannelPool, RaftNetworkFactory}, + state_machine::KalamStateMachine, + storage::{KalamNode, KalamRaftStorage, KalamTypeConfig}, + GroupId, RaftError, +}; /// Type alias for the openraft Raft instance pub type RaftInstance = Raft; @@ -44,11 +47,20 @@ pub struct RaftGroup { impl RaftGroup { /// Create a new Raft group with in-memory storage (not yet started) pub fn new(group_id: GroupId, state_machine: SM) -> Self { + Self::new_with_channel_pool(group_id, state_machine, RaftNetworkFactory::new_channel_pool()) + } + + /// Create a new Raft group with in-memory storage and a shared channel pool. + pub fn new_with_channel_pool( + group_id: GroupId, + state_machine: SM, + channel_pool: RaftChannelPool, + ) -> Self { Self { group_id, raft: RwLock::new(None), storage: Arc::new(KalamRaftStorage::new(group_id, state_machine)), - network_factory: RaftNetworkFactory::new(group_id), + network_factory: RaftNetworkFactory::new_with_channel_pool(group_id, channel_pool), } } @@ -61,6 +73,23 @@ impl RaftGroup { state_machine: SM, backend: Arc, snapshots_dir: std::path::PathBuf, + ) -> Result { + Self::new_persistent_with_channel_pool( + group_id, + state_machine, + backend, + snapshots_dir, + RaftNetworkFactory::new_channel_pool(), + ) + } + + /// Create a new persistent Raft group backed by a shared channel pool. + pub fn new_persistent_with_channel_pool( + group_id: GroupId, + state_machine: SM, + backend: Arc, + snapshots_dir: std::path::PathBuf, + channel_pool: RaftChannelPool, ) -> Result { let storage = KalamRaftStorage::new_persistent(group_id, state_machine, backend, snapshots_dir) @@ -72,7 +101,7 @@ impl RaftGroup { group_id, raft: RwLock::new(None), storage: Arc::new(storage), - network_factory: RaftNetworkFactory::new(group_id), + network_factory: RaftNetworkFactory::new_with_channel_pool(group_id, channel_pool), }) } @@ -107,6 +136,69 @@ impl RaftGroup { self.storage.get_last_applied() } + /// Wait until this node has applied every log entry already known locally. + pub async fn wait_for_local_apply_barrier(&self, timeout: Duration) -> Result { + let metrics = + self.metrics().ok_or_else(|| RaftError::NotStarted(self.group_id.to_string()))?; + let committed_index = self.storage.get_committed().map(|log_id| log_id.index).unwrap_or(0); + let snapshot_index = metrics.snapshot.map(|log_id| log_id.index).unwrap_or(0); + let target_index = committed_index.max(snapshot_index); + + if target_index == 0 { + return Ok(0); + } + + let start = Instant::now(); + if let Some(mut applied_rx) = self.storage.state_machine().subscribe_last_applied() { + loop { + let applied_index = *applied_rx.borrow(); + if applied_index >= target_index { + return Ok(applied_index); + } + + let elapsed = start.elapsed(); + if elapsed >= timeout { + return Err(RaftError::ReplicationTimeout { + group: self.group_id.to_string(), + committed_log_id: target_index.to_string(), + timeout_ms: timeout.as_millis() as u64, + }); + } + + match tokio::time::timeout(timeout - elapsed, applied_rx.changed()).await { + Ok(Ok(())) => {}, + Ok(Err(_)) => break, + Err(_) => { + return Err(RaftError::ReplicationTimeout { + group: self.group_id.to_string(), + committed_log_id: target_index.to_string(), + timeout_ms: timeout.as_millis() as u64, + }); + }, + } + } + } + + let poll_interval = Duration::from_millis(5); + + loop { + let applied_index = self.storage.state_machine().last_applied_index(); + if applied_index >= target_index { + return Ok(applied_index); + } + + if start.elapsed() > timeout { + return Err(RaftError::ReplicationTimeout { + group: self.group_id.to_string(), + committed_log_id: target_index.to_string(), + timeout_ms: timeout.as_millis() as u64, + }); + } + + tokio::time::sleep(poll_interval).await; + } + } + /// Start the Raft group with the given node ID and configuration /// /// This initializes the Raft instance and begins participating in consensus. @@ -449,8 +541,9 @@ impl RaftGroup { .map_err(|e| RaftError::Proposal(format!("{:?}", e)))?; // Deserialize the response based on command type using centralized serde_helpers. - // The state machine returns MetaResponse or DataResponse directly, not wrapped in RaftResponse. - // If the state machine short-circuits with NoOp, response.data can be empty; treat as Ok. + // The state machine returns MetaResponse or DataResponse directly, not wrapped in + // RaftResponse. If the state machine short-circuits with NoOp, response.data can be + // empty; treat as Ok. let response_obj = if response.data.is_empty() { match command { crate::RaftCommand::Meta(_) => crate::RaftResponse::Meta(crate::MetaResponse::Ok), @@ -543,8 +636,10 @@ impl RaftGroup { // This applies to ALL groups (Meta and Data shards) if log_index > 0 { log::debug!( - "Waiting for {} log index {} to be applied locally for read-your-writes consistency", - self.group_id, log_index + "Waiting for {} log index {} to be applied locally \ + for read-your-writes consistency", + self.group_id, + log_index ); // Poll the state machine until the log is applied @@ -557,17 +652,24 @@ impl RaftGroup { self.storage.state_machine().last_applied_index(); if applied >= log_index { log::debug!( - "{} log index {} applied locally (current: {}), read-your-writes consistency achieved", - self.group_id, log_index, applied + "{} log index {} applied locally (current: \ + {}), read-your-writes consistency achieved", + self.group_id, + log_index, + applied ); break; } if start.elapsed() > timeout { log::warn!( - "Timeout waiting for {} log index {} to be applied locally (current: {}). \ - Read-your-writes consistency may not be guaranteed.", - self.group_id, log_index, applied + "Timeout waiting for {} log index {} to be \ + applied locally (current: {}). \ + Read-your-writes consistency may not be \ + guaranteed.", + self.group_id, + log_index, + applied ); break; } @@ -628,32 +730,10 @@ impl RaftGroup { leader_node_id: NodeId, command: Vec, ) -> Result<(Vec, u64), RaftError> { - use crate::network::{ClientProposalRequest, RaftClient}; - let channel = - self.network_factory.get_or_create_channel(leader_node_id).ok_or_else(|| { - RaftError::Network(format!( - "No channel available for leader node {}", - leader_node_id - )) - })?; - - let mut client = RaftClient::new(channel); - - // Send the proposal - let mut request = tonic::Request::new(ClientProposalRequest { - group_id: self.group_id.to_string(), - command, - }); - self.network_factory - .add_outgoing_rpc_metadata(&mut request) - .map_err(|e| RaftError::Network(format!("Failed to add RPC metadata: {}", e)))?; - - let response = client - .client_proposal(request) - .await - .map_err(|e| RaftError::Network(format!("gRPC error forwarding proposal: {}", e)))?; - - let inner = response.into_inner(); + let inner = self + .network_factory + .send_client_proposal(leader_node_id, self.group_id, command) + .await?; if inner.success { Ok((inner.payload, inner.log_index)) @@ -688,7 +768,8 @@ impl RaftGroup { /// supported, we just log and continue - the cluster will re-elect. pub async fn transfer_leadership(&self, target_node_id: NodeId) -> Result<(), RaftError> { Err(RaftError::InvalidState(format!( - "Leadership transfer is unsupported in current OpenRaft version for group {} (target node {})", + "Leadership transfer is unsupported in current OpenRaft version for group {} (target \ + node {})", self.group_id, target_node_id ))) } diff --git a/backend/crates/kalamdb-raft/src/manager/raft_manager.rs b/backend/crates/kalamdb-raft/src/manager/raft_manager.rs index 2308aad91..1eff16ce9 100644 --- a/backend/crates/kalamdb-raft/src/manager/raft_manager.rs +++ b/backend/crates/kalamdb-raft/src/manager/raft_manager.rs @@ -5,27 +5,32 @@ //! - DataUserShard(0..N): User table data shards (default 32) //! - DataSharedShard(0..M): Shared table data shards (default 1) -use std::collections::BTreeSet; -use std::net::{IpAddr, SocketAddr, ToSocketAddrs}; -use std::sync::Arc; -use std::time::Duration; +use std::{ + collections::{BTreeSet, HashMap}, + net::{IpAddr, SocketAddr, ToSocketAddrs}, + sync::Arc, + time::Duration, +}; use kalamdb_commons::models::{NodeId, TableId}; use kalamdb_sharding::ShardRouter; -use kalamdb_store::raft_storage::RAFT_PARTITION_NAME; -use kalamdb_store::{Partition, StorageBackend}; +use kalamdb_store::{raft_storage::RAFT_PARTITION_NAME, Partition, StorageBackend}; use openraft::RaftMetrics; use parking_lot::RwLock; use tonic::transport::{Certificate, ClientTlsConfig, Identity}; -use crate::manager::config::RaftManagerConfig; -use crate::manager::RaftGroup; -use crate::network::cluster_service::cluster_client::ClusterServiceClient; -use crate::network::cluster_service::PingRequest; -use crate::state_machine::KalamStateMachine; -use crate::state_machine::{MetaStateMachine, SharedDataStateMachine, UserDataStateMachine}; -use crate::storage::KalamNode; -use crate::{GroupId, RaftError}; +use crate::{ + manager::{config::RaftManagerConfig, RaftGroup}, + network::{ + cluster_service::{cluster_client::ClusterServiceClient, PingRequest}, + RaftNetworkFactory, + }, + state_machine::{ + KalamStateMachine, MetaStateMachine, SharedDataStateMachine, UserDataStateMachine, + }, + storage::KalamNode, + GroupId, RaftError, +}; const RPC_CLUSTER_ID_HEADER: &str = "x-kalamdb-cluster-id"; const RPC_NODE_ID_HEADER: &str = "x-kalamdb-node-id"; @@ -100,6 +105,9 @@ pub struct RaftManager { /// Cluster configuration config: RaftManagerConfig, + /// Nodes known at runtime, including dynamically joined nodes. + runtime_peers: RwLock>, + /// Number of user shards (cached from config) user_shards_count: u32, @@ -151,20 +159,37 @@ async fn promote_learner( } impl RaftManager { + fn configured_peers_summary(&self) -> String { + self.config + .peers + .iter() + .map(|peer| { + format!("node {} (rpc={}, api={})", peer.node_id, peer.rpc_addr, peer.api_addr) + }) + .collect::>() + .join(", ") + } + /// Create a new Raft manager with in-memory storage (for testing or standalone mode) pub fn new(config: RaftManagerConfig) -> Self { let user_shards_count = config.user_shards; let shared_shards_count = config.shared_shards; + let channel_pool = RaftNetworkFactory::new_channel_pool(); // Create unified meta group - let meta = Arc::new(RaftGroup::new(GroupId::Meta, MetaStateMachine::new())); + let meta = Arc::new(RaftGroup::new_with_channel_pool( + GroupId::Meta, + MetaStateMachine::new(), + Arc::clone(&channel_pool), + )); // Create user data shards (configurable) let user_data_shards: Vec<_> = (0..user_shards_count) .map(|shard_id| { - Arc::new(RaftGroup::new( + Arc::new(RaftGroup::new_with_channel_pool( GroupId::DataUserShard(shard_id), UserDataStateMachine::new(shard_id), + Arc::clone(&channel_pool), )) }) .collect(); @@ -172,9 +197,10 @@ impl RaftManager { // Create shared data shards (configurable) let shared_data_shards: Vec<_> = (0..shared_shards_count) .map(|shard_id| { - Arc::new(RaftGroup::new( + Arc::new(RaftGroup::new_with_channel_pool( GroupId::DataSharedShard(shard_id), SharedDataStateMachine::new(shard_id), + Arc::clone(&channel_pool), )) }) .collect(); @@ -186,6 +212,7 @@ impl RaftManager { shared_data_shards, started: RwLock::new(false), config, + runtime_peers: RwLock::new(HashMap::new()), user_shards_count, shared_shards_count, cluster_init_handle: RwLock::new(None), @@ -205,6 +232,7 @@ impl RaftManager { ) -> Result { let user_shards_count = config.user_shards; let shared_shards_count = config.shared_shards; + let channel_pool = RaftNetworkFactory::new_channel_pool(); // Ensure the raft_data partition exists let partition = Partition::new(RAFT_PARTITION_NAME); @@ -223,21 +251,23 @@ impl RaftManager { })?; // Create unified meta group with persistent storage - let meta = Arc::new(RaftGroup::new_persistent( + let meta = Arc::new(RaftGroup::new_persistent_with_channel_pool( GroupId::Meta, MetaStateMachine::new(), backend.clone(), snapshots_dir.clone(), + Arc::clone(&channel_pool), )?); // Create user data shards with persistent storage let user_data_shards: Vec<_> = (0..user_shards_count) .map(|shard_id| { - RaftGroup::new_persistent( + RaftGroup::new_persistent_with_channel_pool( GroupId::DataUserShard(shard_id), UserDataStateMachine::new(shard_id), backend.clone(), snapshots_dir.clone(), + Arc::clone(&channel_pool), ) .map(Arc::new) }) @@ -246,11 +276,12 @@ impl RaftManager { // Create shared data shards with persistent storage let shared_data_shards: Vec<_> = (0..shared_shards_count) .map(|shard_id| { - RaftGroup::new_persistent( + RaftGroup::new_persistent_with_channel_pool( GroupId::DataSharedShard(shard_id), SharedDataStateMachine::new(shard_id), backend.clone(), snapshots_dir.clone(), + Arc::clone(&channel_pool), ) .map(Arc::new) }) @@ -269,6 +300,7 @@ impl RaftManager { shared_data_shards, started: RwLock::new(false), config, + runtime_peers: RwLock::new(HashMap::new()), user_shards_count, shared_shards_count, cluster_init_handle: RwLock::new(None), @@ -299,6 +331,29 @@ impl RaftManager { } } + /// Wait until the local replica has applied every log entry already known + /// for a Raft group. + pub async fn wait_for_local_apply_barrier( + &self, + group_id: GroupId, + timeout: std::time::Duration, + ) -> Result { + match group_id { + GroupId::Meta => self.meta.wait_for_local_apply_barrier(timeout).await, + GroupId::DataUserShard(shard) if shard < self.user_shards_count => { + self.user_data_shards[shard as usize] + .wait_for_local_apply_barrier(timeout) + .await + }, + GroupId::DataSharedShard(shard) if shard < self.shared_shards_count => { + self.shared_data_shards[shard as usize] + .wait_for_local_apply_barrier(timeout) + .await + }, + _ => Err(RaftError::GroupNotFound(group_id.to_string())), + } + } + /// Check if the manager has been started pub fn is_started(&self) -> bool { *self.started.read() @@ -313,25 +368,24 @@ impl RaftManager { return Ok(()); } - log::debug!( - "Starting Raft Cluster: node={} rpc={} api={}", + log::info!( + "[CLUSTER] Node {} starting Raft services (rpc={}, api={})", self.node_id, self.config.rpc_addr, self.config.api_addr ); - // log::info!("Groups: {} (1 meta + {}u + {}s) │ Peers: {}", - // self.group_count(), self.user_shards_count, self.shared_shards_count, - // self.config.peers.len()); - for peer in &self.config.peers { + if self.config.peers.is_empty() { + log::info!("[CLUSTER] No other configured cluster nodes"); + } else { log::info!( - "[CLUSTER] Peer node_id={}: rpc={}, api={}", - peer.node_id, - peer.rpc_addr, - peer.api_addr + "[CLUSTER] Other configured cluster nodes ({}): {}", + self.config.peers.len(), + self.configured_peers_summary() ); } - // Register this node for leader forwarding (covers self-forward when leader detection lags). + // Register this node for leader forwarding (covers self-forward when leader detection + // lags). self.register_peer( self.node_id, self.config.rpc_addr.clone(), @@ -413,8 +467,10 @@ impl RaftManager { if already_initialized { let meta_last_applied = self.meta.get_last_applied().map(|id| id.index).unwrap_or(0); log::info!( - "Cluster already initialized (meta last_applied={}); skipping group initialization", - meta_last_applied + "[CLUSTER] Membership already initialized on node {} (meta last_applied={}); \ + skipping bootstrap", + self.node_id, + meta_last_applied, ); } else { // Initialize unified meta group @@ -441,10 +497,11 @@ impl RaftManager { ); } - // After initialization, wait for peer nodes to come online before adding them to the cluster. - // This prevents OpenRaft from generating thousands of connection errors when trying to - // replicate to offline nodes. We wait for each peer's RPC endpoint to respond before - // calling add_node(), which ensures a clean cluster formation with minimal error logs. + // After initialization, wait for peer nodes to come online before adding them to the + // cluster. This prevents OpenRaft from generating thousands of connection errors + // when trying to replicate to offline nodes. We wait for each peer's RPC endpoint + // to respond before calling add_node(), which ensures a clean cluster formation + // with minimal error logs. let should_attempt_peer_join = if !already_initialized { // First boot: always attempt to add configured peers. true @@ -490,9 +547,14 @@ impl RaftManager { } if attempt == 1 || attempt % 5 == 0 { log::debug!( - "Waiting for node {} to become leader for all groups (attempt {}/{}, meta={}, user={}, shared={})...", - node_id, attempt, max_leadership_wait, - is_meta_leader, all_user_leaders, all_shared_leaders + "Waiting for node {} to become leader for all groups (attempt {}/{}, \ + meta={}, user={}, shared={})...", + node_id, + attempt, + max_leadership_wait, + is_meta_leader, + all_user_leaders, + all_shared_leaders ); } tokio::time::sleep(Duration::from_millis(200)).await; @@ -507,15 +569,22 @@ impl RaftManager { return; } - log::info!("Waiting for {} peer nodes to come online...", peers.len()); + log::info!( + "[CLUSTER] Bootstrap node {} is connecting {} configured peer(s) to the \ + cluster", + node_id, + peers.len() + ); // Peer wait configuration from RaftManagerConfig for peer in &peers { log::info!( - " Waiting for peer node_id={} (rpc={}) to be online...", + "[CLUSTER] Waiting for configured peer {} to come online (rpc={}, \ + api={})", peer.node_id, - peer.rpc_addr + peer.rpc_addr, + peer.api_addr, ); // Wait for the peer's RPC endpoint to respond @@ -532,7 +601,8 @@ impl RaftManager { { Ok(_) => { log::info!( - " ✓ Peer {} is online, adding to cluster...", + "[CLUSTER] Configured peer {} is online; starting join \ + sequence", peer.node_id ); @@ -548,15 +618,11 @@ impl RaftManager { ) .await { - Ok(_) => { - log::info!( - " ✓ Peer {} joined cluster successfully", - peer.node_id - ); - }, + Ok(_) => {}, Err(e) => { log::error!( - " ✗ Failed to add peer {} to cluster: {}", + "[CLUSTER] Failed to add configured peer {} to the \ + cluster: {}", peer.node_id, e ); @@ -564,11 +630,15 @@ impl RaftManager { } }, Err(e) => { - log::error!(" ✗ Peer {} did not come online: {}", peer.node_id, e); + log::error!( + "[CLUSTER] Configured peer {} did not come online: {}", + peer.node_id, + e + ); }, } } - log::info!("Cluster formation complete"); + log::info!("[CLUSTER] Cluster formation complete"); }); let mut guard = self.cluster_init_handle.write(); @@ -737,7 +807,7 @@ impl RaftManager { replication_timeout: Duration, ) -> Result<(), RaftError> { log::info!( - "[CLUSTER] Node {} joining cluster (rpc={}, api={})", + "[CLUSTER] Starting join sequence for node {} (rpc={}, api={})", node_id, rpc_addr, api_addr @@ -848,11 +918,24 @@ impl RaftManager { promote_learner(shard, node_id).await?; } + self.register_peer(node_id, rpc_addr.clone(), api_addr.clone()); + log::info!( - "[CLUSTER] ✓ Node {} joined cluster successfully (added to {} groups)", + "[CLUSTER] Node {} joined cluster successfully and now participates in {} Raft \ + groups", node_id, self.group_count() ); + + let rebalance_results = self.rebalance_data_leaders().await?; + let rebalance_success = rebalance_results.iter().filter(|result| result.success).count(); + log::info!( + "[CLUSTER] Best-effort data leader rebalance requested for {}/{} groups after node {} \ + join", + rebalance_success, + rebalance_results.len(), + node_id + ); Ok(()) } @@ -862,14 +945,36 @@ impl RaftManager { } fn is_known_cluster_node(&self, node_id: NodeId) -> bool { + self.rpc_addr_for_node(node_id).is_some() + } + + fn node_for_node(&self, node_id: NodeId) -> Option { + if let Some(node) = self.runtime_peers.read().get(&node_id) { + return Some(node.clone()); + } + if node_id == self.config.node_id { - return true; + return Some(KalamNode::new( + self.config.rpc_addr.clone(), + self.config.api_addr.clone(), + )); } - self.config.peers.iter().any(|peer| peer.node_id == node_id) + + if let Some(peer) = self.config.peers.iter().find(|peer| peer.node_id == node_id) { + return Some(KalamNode::new(peer.rpc_addr.clone(), peer.api_addr.clone())); + } + + self.meta.metrics().and_then(|metrics| { + metrics + .membership_config + .nodes() + .find(|(candidate_id, _)| **candidate_id == node_id.as_u64()) + .map(|(_, node)| node.clone()) + }) } - fn peer_for_node(&self, node_id: NodeId) -> Option<&crate::manager::PeerNode> { - self.config.peers.iter().find(|peer| peer.node_id == node_id) + fn rpc_addr_for_node(&self, node_id: NodeId) -> Option { + self.node_for_node(node_id).map(|node| node.rpc_addr) } fn extract_host_from_rpc_addr(rpc_addr: &str) -> Option<&str> { @@ -903,11 +1008,11 @@ impl RaftManager { return true; } - let Some(peer) = self.peer_for_node(node_id) else { + let Some(rpc_addr) = self.rpc_addr_for_node(node_id) else { return false; }; - let Some(host) = Self::extract_host_from_rpc_addr(&peer.rpc_addr) else { + let Some(host) = Self::extract_host_from_rpc_addr(&rpc_addr) else { return false; }; @@ -1210,6 +1315,8 @@ impl RaftManager { pub fn register_peer(&self, node_id: NodeId, rpc_addr: String, api_addr: String) { let node = KalamNode::new(rpc_addr, api_addr); + self.runtime_peers.write().insert(node_id, node.clone()); + // Register with unified meta group self.meta.register_peer(node_id, node.clone()); @@ -1229,6 +1336,12 @@ impl RaftManager { /// /// Returns `None` if the node is not registered. pub fn get_peer_channel(&self, node_id: NodeId) -> Option { + if let Some(channel) = self.meta.network_factory().get_or_create_channel(node_id) { + return Some(channel); + } + + let node = self.node_for_node(node_id)?; + self.meta.register_peer(node_id, node); self.meta.network_factory().get_or_create_channel(node_id) } @@ -1416,9 +1529,10 @@ impl RaftManager { group_id: GroupId, payload: &[u8], ) -> Result, RaftError> { - use crate::state_machine::{decode, encode}; use openraft::raft::VoteRequest; + use crate::state_machine::{decode, encode}; + let raft = self.get_raft_instance(group_id)?; let request: VoteRequest = decode(payload)?; @@ -1436,10 +1550,13 @@ impl RaftManager { group_id: GroupId, payload: &[u8], ) -> Result, RaftError> { - use crate::state_machine::{decode, encode}; - use crate::storage::KalamTypeConfig; use openraft::raft::AppendEntriesRequest; + use crate::{ + state_machine::{decode, encode}, + storage::KalamTypeConfig, + }; + let raft = self.get_raft_instance(group_id)?; let request: AppendEntriesRequest = decode(payload)?; @@ -1457,10 +1574,13 @@ impl RaftManager { group_id: GroupId, payload: &[u8], ) -> Result, RaftError> { - use crate::state_machine::{decode, encode}; - use crate::storage::KalamTypeConfig; use openraft::raft::InstallSnapshotRequest; + use crate::{ + state_machine::{decode, encode}, + storage::KalamTypeConfig, + }; + let raft = self.get_raft_instance(group_id)?; let request: InstallSnapshotRequest = decode(payload)?; @@ -1634,6 +1754,25 @@ impl RaftManager { results } + async fn run_action_for_data_groups(&self, action: ClusterAction) -> Vec { + let mut results = + Vec::with_capacity(self.user_data_shards.len() + self.shared_data_shards.len()); + + for (i, shard) in self.user_data_shards.iter().enumerate() { + results.push( + Self::run_action_for_group(GroupId::DataUserShard(i as u32), shard, action).await, + ); + } + + for (i, shard) in self.shared_data_shards.iter().enumerate() { + results.push( + Self::run_action_for_group(GroupId::DataSharedShard(i as u32), shard, action).await, + ); + } + + results + } + /// Trigger elections for all Raft groups pub async fn trigger_all_elections(&self) -> Result, RaftError> { Ok(self.run_action_for_all_groups(ClusterAction::TriggerElection).await) @@ -1654,6 +1793,14 @@ impl RaftManager { .await) } + /// Best-effort data leader rebalance. + /// + /// OpenRaft 0.9 does not expose a targeted leadership-transfer API here, so this asks local + /// leaders for user/shared data groups to step down and lets Raft elect replacements. + pub async fn rebalance_data_leaders(&self) -> Result, RaftError> { + Ok(self.run_action_for_data_groups(ClusterAction::StepDown).await) + } + /// Attempt to step down leaders for all Raft groups pub async fn step_down_all(&self) -> Result, RaftError> { Ok(self.run_action_for_all_groups(ClusterAction::StepDown).await) @@ -1790,10 +1937,14 @@ impl RaftManager { // Give time for any leadership-transition side effects to settle. tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; log::info!( - "[CLUSTER] Leadership transfer attempts completed (explicit transfer may be unsupported by current OpenRaft version)" + "[CLUSTER] Leadership transfer attempts completed (explicit transfer may be \ + unsupported by current OpenRaft version)" ); } else { - log::warn!("[CLUSTER] No peers available for leadership transfer - cluster may experience brief unavailability"); + log::warn!( + "[CLUSTER] No peers available for leadership transfer - cluster may \ + experience brief unavailability" + ); } } @@ -1837,15 +1988,23 @@ impl RaftManager { /// Get the total number of cluster nodes (self + peers) pub fn total_nodes(&self) -> usize { - 1 + self.config.peers.len() + let mut nodes = BTreeSet::new(); + nodes.insert(self.config.node_id.as_u64()); + nodes.extend(self.config.peers.iter().map(|peer| peer.node_id.as_u64())); + nodes.extend(self.runtime_peers.read().keys().map(|node_id| node_id.as_u64())); + if let Some(metrics) = self.meta.metrics() { + nodes.extend(metrics.membership_config.nodes().map(|(node_id, _)| *node_id)); + } + nodes.len() } } #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + fn test_config() -> RaftManagerConfig { RaftManagerConfig { node_id: NodeId::new(1), @@ -1915,6 +2074,27 @@ mod tests { ); } + #[test] + fn test_runtime_registered_peer_is_authorized() { + let manager = RaftManager::new(test_config()); + let node_id = NodeId::new(2); + + assert!(!manager.is_known_cluster_node(node_id)); + + manager.register_peer(node_id, "127.0.0.1:5002".to_string(), "127.0.0.1:3002".to_string()); + + assert!(manager.is_known_cluster_node(node_id)); + assert_eq!(manager.total_nodes(), 2); + assert!(manager.is_allowed_peer_remote_addr( + node_id, + "127.0.0.1:61000".parse::().unwrap(), + )); + assert!(!manager.is_allowed_peer_remote_addr( + node_id, + "127.0.0.2:61000".parse::().unwrap(), + )); + } + #[test] fn test_is_leader_before_start() { let manager = RaftManager::new(test_config()); diff --git a/backend/crates/kalamdb-raft/src/network/cluster_client.rs b/backend/crates/kalamdb-raft/src/network/cluster_client.rs index 37d9b3500..7d1838a72 100644 --- a/backend/crates/kalamdb-raft/src/network/cluster_client.rs +++ b/backend/crates/kalamdb-raft/src/network/cluster_client.rs @@ -3,17 +3,19 @@ //! Provides a small, typed API for inter-node cluster RPCs that are not part //! of Raft log replication (notify-followers, forward-sql, ping). -use std::sync::Arc; +use std::{future::Future, sync::Arc}; use kalamdb_commons::models::NodeId; - -use super::cluster_service::cluster_client::ClusterServiceClient; -use super::models::{ - ForwardSqlRequest, ForwardSqlResponse, GetNodeInfoRequest, GetNodeInfoResponse, PingRequest, - PingResponse, +use tonic::{transport::Channel, Request, Response, Status}; + +use super::{ + cluster_service::cluster_client::ClusterServiceClient, + models::{ + ForwardSqlRequest, ForwardSqlResponse, GetNodeInfoRequest, GetNodeInfoResponse, + PingRequest, PingResponse, + }, }; -use crate::manager::RaftManager; -use crate::{GroupId, RaftError}; +use crate::{manager::RaftManager, GroupId, RaftError}; /// High-level cluster RPC client built on top of the shared Raft channel pool. #[derive(Clone)] @@ -27,15 +29,62 @@ impl ClusterClient { Self { manager } } + fn client_for_node( + &self, + target_node_id: NodeId, + ) -> Result, RaftError> { + let channel = self + .manager + .get_peer_channel(target_node_id) + .ok_or_else(|| RaftError::Network(format!("No channel for node {}", target_node_id)))?; + + Ok(ClusterServiceClient::new(channel)) + } + + fn request_with_metadata(&self, payload: T) -> Result, RaftError> { + let mut request = Request::new(payload); + self.manager.add_outgoing_rpc_metadata(&mut request)?; + Ok(request) + } + + async fn call_node( + &self, + target_node_id: NodeId, + method: &'static str, + payload: T, + call: F, + ) -> Result + where + F: FnOnce(ClusterServiceClient, Request) -> Fut, + Fut: Future, Status>>, + { + let client = self.client_for_node(target_node_id)?; + let request = self.request_with_metadata(payload)?; + let response = call(client, request).await.map_err(|e| { + RaftError::Network(format!("gRPC {} to node {} failed: {}", method, target_node_id, e)) + })?; + + Ok(response.into_inner()) + } + /// Forward SQL to the current Meta leader. pub async fn forward_sql_to_leader( &self, request: ForwardSqlRequest, + ) -> Result { + self.forward_sql_to_group_leader(GroupId::Meta, request).await + } + + /// Forward SQL to the current leader of a specific Raft group. + pub async fn forward_sql_to_group_leader( + &self, + group_id: GroupId, + request: ForwardSqlRequest, ) -> Result { let leader_node_id = self .manager - .current_leader(GroupId::Meta) - .ok_or_else(|| RaftError::Network("No meta leader available".to_string()))?; + .current_leader(group_id) + .ok_or_else(|| RaftError::Network(format!("No leader available for {}", group_id)))?; self.forward_sql_to_node(leader_node_id, request).await } @@ -46,40 +95,22 @@ impl ClusterClient { target_node_id: NodeId, request: ForwardSqlRequest, ) -> Result { - let channel = self - .manager - .get_peer_channel(target_node_id) - .ok_or_else(|| RaftError::Network(format!("No channel for node {}", target_node_id)))?; - - let mut client = ClusterServiceClient::new(channel); - let mut grpc_request = tonic::Request::new(request); - self.manager.add_outgoing_rpc_metadata(&mut grpc_request)?; - - let response = client.forward_sql(grpc_request).await.map_err(|e| { - RaftError::Network(format!("gRPC forward_sql to node {} failed: {}", target_node_id, e)) - })?; - - Ok(response.into_inner()) + self.call_node(target_node_id, "forward_sql", request, |mut client, request| async move { + client.forward_sql(request).await + }) + .await } /// Ping a specific peer node. pub async fn ping_peer(&self, target_node_id: NodeId) -> Result { - let channel = self - .manager - .get_peer_channel(target_node_id) - .ok_or_else(|| RaftError::Network(format!("No channel for node {}", target_node_id)))?; - - let mut client = ClusterServiceClient::new(channel); - let mut grpc_request = tonic::Request::new(PingRequest { + let request = PingRequest { from_node_id: self.manager.node_id().as_u64(), - }); - self.manager.add_outgoing_rpc_metadata(&mut grpc_request)?; - - let response = client.ping(grpc_request).await.map_err(|e| { - RaftError::Network(format!("gRPC ping to node {} failed: {}", target_node_id, e)) - })?; + }; - Ok(response.into_inner()) + self.call_node(target_node_id, "ping", request, |mut client, request| async move { + client.ping(request).await + }) + .await } /// Fetch live node statistics from a specific peer. @@ -90,25 +121,14 @@ impl ClusterClient { &self, target_node_id: NodeId, ) -> Result { - let channel = self - .manager - .get_peer_channel(target_node_id) - .ok_or_else(|| RaftError::Network(format!("No channel for node {}", target_node_id)))?; - - let mut client = ClusterServiceClient::new(channel); - let mut grpc_request = tonic::Request::new(GetNodeInfoRequest { + let request = GetNodeInfoRequest { from_node_id: self.manager.node_id().as_u64(), - }); - self.manager.add_outgoing_rpc_metadata(&mut grpc_request)?; - - let response = client.get_node_info(grpc_request).await.map_err(|e| { - RaftError::Network(format!( - "gRPC get_node_info to node {} failed: {}", - target_node_id, e - )) - })?; + }; - Ok(response.into_inner()) + self.call_node(target_node_id, "get_node_info", request, |mut client, request| async move { + client.get_node_info(request).await + }) + .await } /// Fan-out `GetNodeInfo` to every known peer (excluding self) **in parallel**. @@ -124,6 +144,7 @@ impl ClusterClient { timeout_ms: u64, ) -> std::collections::HashMap { use std::time::Duration; + use tokio::time::timeout; let self_id = self.manager.node_id(); diff --git a/backend/crates/kalamdb-raft/src/network/cluster_handler.rs b/backend/crates/kalamdb-raft/src/network/cluster_handler.rs index 78d40b0dc..b073c8e5c 100644 --- a/backend/crates/kalamdb-raft/src/network/cluster_handler.rs +++ b/backend/crates/kalamdb-raft/src/network/cluster_handler.rs @@ -16,10 +16,12 @@ use std::sync::Arc; use tonic::{Request, Response, Status}; -use super::cluster_service::cluster_server::ClusterService; -use super::models::{ - ForwardSqlRequest, ForwardSqlResponse, ForwardSqlResponsePayload, GetNodeInfoRequest, - GetNodeInfoResponse, PingRequest, PingResponse, +use super::{ + cluster_service::cluster_server::ClusterService, + models::{ + ForwardSqlRequest, ForwardSqlResponse, ForwardSqlResponsePayload, GetNodeInfoRequest, + GetNodeInfoResponse, PingRequest, PingResponse, + }, }; use crate::manager::RaftManager; diff --git a/backend/crates/kalamdb-raft/src/network/cluster_service.rs b/backend/crates/kalamdb-raft/src/network/cluster_service.rs index d0039fab1..458f6544b 100644 --- a/backend/crates/kalamdb-raft/src/network/cluster_service.rs +++ b/backend/crates/kalamdb-raft/src/network/cluster_service.rs @@ -22,7 +22,6 @@ use tonic_prost::ProstCodec; // ─── Request/Response Messages ────────────────────────────────────────────── // All message types live in the `models` sub-module; they are re-exported here // so that the rest of this file (client, server, tests) can use them directly. - pub use super::models::{ ForwardSqlRequest, ForwardSqlResponse, ForwardSqlResponsePayload, GetNodeInfoRequest, GetNodeInfoResponse, PingRequest, PingResponse, @@ -31,9 +30,10 @@ pub use super::models::{ // ─── gRPC Client ──────────────────────────────────────────────────────────── pub mod cluster_client { - use super::*; use tonic::codegen::*; + use super::*; + /// Cluster service gRPC client #[derive(Debug, Clone)] pub struct ClusterServiceClient { @@ -113,10 +113,12 @@ pub mod cluster_client { // ─── gRPC Server ──────────────────────────────────────────────────────────── pub mod cluster_server { - use super::*; use std::sync::Arc; + use tonic::codegen::*; + use super::*; + /// Cluster service trait — implement this to handle incoming cluster RPCs. #[async_trait::async_trait] pub trait ClusterService: std::marker::Send + std::marker::Sync + 'static { diff --git a/backend/crates/kalamdb-raft/src/network/mod.rs b/backend/crates/kalamdb-raft/src/network/mod.rs index d129e7b24..7855390e6 100644 --- a/backend/crates/kalamdb-raft/src/network/mod.rs +++ b/backend/crates/kalamdb-raft/src/network/mod.rs @@ -29,10 +29,6 @@ pub mod cluster_handler; pub mod cluster_service; pub mod models; -pub use network::{RaftNetwork, RaftNetworkFactory}; -pub use service::raft_client::RaftClient; -pub use service::{start_rpc_server, ClientProposalRequest, ClientProposalResponse, RaftService}; - // Cluster messaging re-exports pub use cluster_client::ClusterClient; pub use cluster_handler::{ClusterMessageHandler, ClusterServiceImpl, NoOpClusterHandler}; @@ -40,3 +36,8 @@ pub use models::{ forward_sql_param, ForwardSqlParam, ForwardSqlRequest, ForwardSqlResponse, ForwardSqlResponsePayload, GetNodeInfoRequest, GetNodeInfoResponse, PingRequest, PingResponse, }; +pub use network::{RaftChannelPool, RaftNetwork, RaftNetworkFactory}; +pub use service::{ + raft_client::RaftClient, start_rpc_server, ClientProposalRequest, ClientProposalResponse, + RaftService, +}; diff --git a/backend/crates/kalamdb-raft/src/network/network.rs b/backend/crates/kalamdb-raft/src/network/network.rs index 3f57d371e..0f50e6867 100644 --- a/backend/crates/kalamdb-raft/src/network/network.rs +++ b/backend/crates/kalamdb-raft/src/network/network.rs @@ -2,29 +2,37 @@ //! //! Provides the network transport for Raft RPCs using gRPC (tonic). -use std::collections::HashMap; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use openraft::error::{ - InstallSnapshotError, NetworkError, RPCError, RaftError, RemoteError, Unreachable, -}; -use openraft::network::{ - RPCOption, RaftNetwork as OpenRaftNetwork, RaftNetworkFactory as OpenRaftNetworkFactory, +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, }; -use openraft::raft::{ - AppendEntriesRequest, AppendEntriesResponse, InstallSnapshotRequest, InstallSnapshotResponse, - VoteRequest, VoteResponse, + +use kalamdb_commons::models::NodeId; +use openraft::{ + error::{InstallSnapshotError, NetworkError, RPCError, RaftError, RemoteError, Unreachable}, + network::{ + RPCOption, RaftNetwork as OpenRaftNetwork, RaftNetworkFactory as OpenRaftNetworkFactory, + }, + raft::{ + AppendEntriesRequest, AppendEntriesResponse, InstallSnapshotRequest, + InstallSnapshotResponse, VoteRequest, VoteResponse, + }, }; use parking_lot::RwLock; +use serde::{de::DeserializeOwned, Serialize}; use tonic::transport::{Certificate, Channel, ClientTlsConfig, Identity}; -use kalamdb_commons::models::NodeId; +use crate::{ + manager::{PeerNode, RpcTlsConfig}, + storage::{KalamNode, KalamTypeConfig}, + GroupId, +}; -use crate::manager::{PeerNode, RpcTlsConfig}; -use crate::storage::{KalamNode, KalamTypeConfig}; -use crate::GroupId; +pub type RaftChannelPool = Arc>; /// Simple connection error wrapper for openraft compatibility #[derive(Debug)] @@ -131,7 +139,8 @@ impl ConnectionTracker { if now.duration_since(entry.last_log) >= retry_interval { entry.last_log = now; log::warn!( - "Raft node {} in group {} left the cluster - trying to reconnect #{} (interval={}ms): {}", + "Raft node {} in group {} left the cluster - trying to reconnect #{} \ + (interval={}ms): {}", target, self.group_id, entry.retry_count, @@ -217,41 +226,47 @@ impl RaftNetwork { request.metadata_mut().insert("x-kalamdb-node-id", node_id); Ok(()) } -} -impl OpenRaftNetwork for RaftNetwork { - async fn append_entries( + fn build_raft_rpc_request( + &self, + rpc_kind: crate::network::service::RaftRpcKind, + rpc: &T, + ) -> Result, ConnectionError> { + let rpc_type = rpc_kind.as_str(); + let payload = crate::state_machine::encode(rpc) + .map_err(|e| ConnectionError(format!("failed to encode {}: {}", rpc_type, e)))?; + let mut request = tonic::Request::new(crate::network::service::RaftRpcRequest { + group_id: self.group_id.to_string(), + rpc_type: rpc_type.to_owned(), + payload, + }); + self.add_outgoing_rpc_metadata(&mut request)?; + Ok(request) + } + + async fn send_raft_rpc( &mut self, - rpc: AppendEntriesRequest, - _option: RPCOption, - ) -> Result, RPCError>> { + rpc_kind: crate::network::service::RaftRpcKind, + rpc: &T, + ) -> Result> + where + E: std::error::Error + Send + Sync + 'static, + T: Serialize, + { if !self.connection_tracker.should_attempt(self.target) { return Err(RPCError::Unreachable(Unreachable::new(&ConnectionError( "reconnect backoff".to_string(), )))); } - // Get channel let channel = self.get_channel().map_err(|e| RPCError::Unreachable(Unreachable::new(&e)))?; - - // Serialize request - let request_bytes = crate::state_machine::encode(&rpc) + let request = self + .build_raft_rpc_request(rpc_kind, rpc) .map_err(|e| RPCError::Network(NetworkError::new(&e)))?; - // Create gRPC request let mut client = crate::network::service::raft_client::RaftClient::new(channel); - - let mut grpc_request = tonic::Request::new(crate::network::service::RaftRpcRequest { - group_id: self.group_id.to_string(), - rpc_type: "append_entries".to_string(), - payload: request_bytes, - }); - self.add_outgoing_rpc_metadata(&mut grpc_request) - .map_err(|e| RPCError::Network(NetworkError::new(&e)))?; - - // Send request - let response = match client.raft_rpc(grpc_request).await { + let response = match client.raft_rpc(request).await { Ok(response) => { self.connection_tracker.record_success(self.target); response @@ -262,23 +277,48 @@ impl OpenRaftNetwork for RaftNetwork { }, }; - // Deserialize response - let inner = response.into_inner(); - if !inner.error.is_empty() { - self.connection_tracker.record_failure(self.target, &inner.error); - return Err(RPCError::RemoteError(RemoteError::new( - self.target, - RaftError::Fatal(openraft::error::Fatal::Panicked), - ))); + Ok(response.into_inner()) + } + + fn decode_raft_rpc_response( + &self, + rpc_kind: crate::network::service::RaftRpcKind, + response: crate::network::service::RaftRpcResponse, + remote_error: E, + ) -> Result> + where + E: std::error::Error + Send + Sync + 'static, + T: DeserializeOwned, + { + if !response.error.is_empty() { + self.connection_tracker.record_failure(self.target, &response.error); + return Err(RPCError::RemoteError(RemoteError::new(self.target, remote_error))); } - let result: AppendEntriesResponse = crate::state_machine::decode(&inner.payload) - .map_err(|e| { - self.connection_tracker.record_failure(self.target, &e.to_string()); - RPCError::Network(NetworkError::new(&e)) - })?; + crate::state_machine::decode(&response.payload).map_err(|e| { + self.connection_tracker.record_failure(self.target, &e.to_string()); + RPCError::Network(NetworkError::new(&ConnectionError(format!( + "failed to decode {} response: {}", + rpc_kind, e + )))) + }) + } +} - Ok(result) +impl OpenRaftNetwork for RaftNetwork { + async fn append_entries( + &mut self, + rpc: AppendEntriesRequest, + _option: RPCOption, + ) -> Result, RPCError>> { + let response = self + .send_raft_rpc(crate::network::service::RaftRpcKind::AppendEntries, &rpc) + .await?; + self.decode_raft_rpc_response( + crate::network::service::RaftRpcKind::AppendEntries, + response, + RaftError::Fatal(openraft::error::Fatal::Panicked), + ) } async fn install_snapshot( @@ -289,60 +329,14 @@ impl OpenRaftNetwork for RaftNetwork { InstallSnapshotResponse, RPCError>, > { - if !self.connection_tracker.should_attempt(self.target) { - return Err(RPCError::Unreachable(Unreachable::new(&ConnectionError( - "reconnect backoff".to_string(), - )))); - } - - // Get channel - let channel = - self.get_channel().map_err(|e| RPCError::Unreachable(Unreachable::new(&e)))?; - - // Serialize request - let request_bytes = crate::state_machine::encode(&rpc) - .map_err(|e| RPCError::Network(NetworkError::new(&e)))?; - - // Create gRPC request - let mut client = crate::network::service::raft_client::RaftClient::new(channel); - - let mut grpc_request = tonic::Request::new(crate::network::service::RaftRpcRequest { - group_id: self.group_id.to_string(), - rpc_type: "install_snapshot".to_string(), - payload: request_bytes, - }); - self.add_outgoing_rpc_metadata(&mut grpc_request) - .map_err(|e| RPCError::Network(NetworkError::new(&e)))?; - - // Send request - let response = match client.raft_rpc(grpc_request).await { - Ok(response) => { - self.connection_tracker.record_success(self.target); - response - }, - Err(e) => { - self.connection_tracker.record_failure(self.target, &e.to_string()); - return Err(RPCError::Network(NetworkError::new(&e))); - }, - }; - - // Deserialize response - let inner = response.into_inner(); - if !inner.error.is_empty() { - self.connection_tracker.record_failure(self.target, &inner.error); - return Err(RPCError::RemoteError(RemoteError::new( - self.target, - RaftError::Fatal(openraft::error::Fatal::Panicked), - ))); - } - - let result: InstallSnapshotResponse = crate::state_machine::decode(&inner.payload) - .map_err(|e| { - self.connection_tracker.record_failure(self.target, &e.to_string()); - RPCError::Network(NetworkError::new(&e)) - })?; - - Ok(result) + let response = self + .send_raft_rpc(crate::network::service::RaftRpcKind::InstallSnapshot, &rpc) + .await?; + self.decode_raft_rpc_response( + crate::network::service::RaftRpcKind::InstallSnapshot, + response, + RaftError::Fatal(openraft::error::Fatal::Panicked), + ) } async fn vote( @@ -350,60 +344,12 @@ impl OpenRaftNetwork for RaftNetwork { rpc: VoteRequest, _option: RPCOption, ) -> Result, RPCError>> { - if !self.connection_tracker.should_attempt(self.target) { - return Err(RPCError::Unreachable(Unreachable::new(&ConnectionError( - "reconnect backoff".to_string(), - )))); - } - - // Get channel - let channel = - self.get_channel().map_err(|e| RPCError::Unreachable(Unreachable::new(&e)))?; - - // Serialize request - let request_bytes = crate::state_machine::encode(&rpc) - .map_err(|e| RPCError::Network(NetworkError::new(&e)))?; - - // Create gRPC request - let mut client = crate::network::service::raft_client::RaftClient::new(channel); - - let mut grpc_request = tonic::Request::new(crate::network::service::RaftRpcRequest { - group_id: self.group_id.to_string(), - rpc_type: "vote".to_string(), - payload: request_bytes, - }); - self.add_outgoing_rpc_metadata(&mut grpc_request) - .map_err(|e| RPCError::Network(NetworkError::new(&e)))?; - - // Send request - let response = match client.raft_rpc(grpc_request).await { - Ok(response) => { - self.connection_tracker.record_success(self.target); - response - }, - Err(e) => { - self.connection_tracker.record_failure(self.target, &e.to_string()); - return Err(RPCError::Network(NetworkError::new(&e))); - }, - }; - - // Deserialize response - let inner = response.into_inner(); - if !inner.error.is_empty() { - self.connection_tracker.record_failure(self.target, &inner.error); - return Err(RPCError::RemoteError(RemoteError::new( - self.target, - RaftError::Fatal(openraft::error::Fatal::Panicked), - ))); - } - - let result: VoteResponse = - crate::state_machine::decode(&inner.payload).map_err(|e| { - self.connection_tracker.record_failure(self.target, &e.to_string()); - RPCError::Network(NetworkError::new(&e)) - })?; - - Ok(result) + let response = self.send_raft_rpc(crate::network::service::RaftRpcKind::Vote, &rpc).await?; + self.decode_raft_rpc_response( + crate::network::service::RaftRpcKind::Vote, + response, + RaftError::Fatal(openraft::error::Fatal::Panicked), + ) } } @@ -415,7 +361,7 @@ pub struct RaftNetworkFactory { /// Known nodes in the cluster nodes: Arc>>, /// Cached gRPC channels (node_id -> channel) - channels: Arc>, + channels: RaftChannelPool, /// Connection retry tracker connection_tracker: ConnectionTracker, /// Outgoing RPC auth identity (set during group start) @@ -425,12 +371,22 @@ pub struct RaftNetworkFactory { } impl RaftNetworkFactory { + /// Create a shared channel pool for all Raft groups on a node. + pub fn new_channel_pool() -> RaftChannelPool { + Arc::new(dashmap::DashMap::new()) + } + /// Create a new network factory pub fn new(group_id: GroupId) -> Self { + Self::new_with_channel_pool(group_id, Self::new_channel_pool()) + } + + /// Create a new network factory backed by a shared channel pool. + pub fn new_with_channel_pool(group_id: GroupId, channels: RaftChannelPool) -> Self { Self { group_id, nodes: Arc::new(RwLock::new(HashMap::new())), - channels: Arc::new(dashmap::DashMap::new()), + channels, connection_tracker: ConnectionTracker::new(group_id, Duration::from_secs(3)), auth_identity: Arc::new(RwLock::new(None)), tls_material: Arc::new(RwLock::new(None)), @@ -483,8 +439,10 @@ impl RaftNetworkFactory { ) -> Result<(), crate::RaftError> { if !tls.enabled { let mut guard = self.tls_material.write(); - *guard = None; - self.channels.clear(); + if guard.is_some() { + *guard = None; + self.channels.clear(); + } return Ok(()); } @@ -616,6 +574,35 @@ impl RaftNetworkFactory { Some(ch) } + /// Send a follower-to-leader proposal over the shared Raft gRPC channel pool. + pub async fn send_client_proposal( + &self, + target_node_id: NodeId, + group_id: GroupId, + command: Vec, + ) -> Result { + let channel = self.get_or_create_channel(target_node_id).ok_or_else(|| { + crate::RaftError::Network(format!( + "No channel available for leader node {}", + target_node_id + )) + })?; + + let mut client = crate::network::service::raft_client::RaftClient::new(channel); + let mut request = tonic::Request::new(crate::network::service::ClientProposalRequest { + group_id: group_id.to_string(), + command, + }); + self.add_outgoing_rpc_metadata(&mut request) + .map_err(|e| crate::RaftError::Network(format!("Failed to add RPC metadata: {}", e)))?; + + let response = client.client_proposal(request).await.map_err(|e| { + crate::RaftError::Network(format!("gRPC error forwarding proposal: {}", e)) + })?; + + Ok(response.into_inner()) + } + /// Get all registered peer node IDs and their info. /// /// Used by [`super::cluster_client::ClusterClient`] for broadcasting. @@ -639,8 +626,8 @@ impl OpenRaftNetworkFactory for RaftNetworkFactory { } else { let ch = self.build_channel(target_node_id, node).unwrap_or_else(|e| { log::error!( - "[group {}] Cannot build gRPC channel to node {}: {}. \ - Check network configuration and peer addresses.", + "[group {}] Cannot build gRPC channel to node {}: {}. Check network \ + configuration and peer addresses.", self.group_id, target_node_id, e @@ -656,8 +643,8 @@ impl OpenRaftNetworkFactory for RaftNetworkFactory { let auth_identity = self.auth_identity.read().clone().unwrap_or_else(|| { log::error!( - "[group {}] RPC auth identity is not configured. \ - Ensure configure_rpc_auth_identity() is called before the Raft node starts.", + "[group {}] RPC auth identity is not configured. Ensure \ + configure_rpc_auth_identity() is called before the Raft node starts.", self.group_id ); panic!("RPC auth identity is not configured for group {}", self.group_id) diff --git a/backend/crates/kalamdb-raft/src/network/service.rs b/backend/crates/kalamdb-raft/src/network/service.rs index dbce6448b..f80e420ad 100644 --- a/backend/crates/kalamdb-raft/src/network/service.rs +++ b/backend/crates/kalamdb-raft/src/network/service.rs @@ -6,13 +6,14 @@ //! - Raft consensus RPCs (vote, append_entries, install_snapshot) //! - Client proposal forwarding (forward proposals from followers to leader) +use std::{io::ErrorKind, time::Duration}; + use kalamdb_pg::{KalamPgService, PgServiceServer}; -use std::io::ErrorKind; -use std::time::Duration; -use tokio::sync::oneshot; -use tokio::time::sleep; -use tonic::transport::{Certificate, Identity, ServerTlsConfig}; -use tonic::{Request, Response, Status}; +use tokio::{sync::oneshot, time::sleep}; +use tonic::{ + transport::{Certificate, Identity, ServerTlsConfig}, + Request, Response, Status, +}; use tonic_prost::ProstCodec; /// Raft RPC request message @@ -43,6 +44,43 @@ pub struct RaftRpcResponse { pub error: String, } +/// Canonical Raft RPC operation names carried in [`RaftRpcRequest`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RaftRpcKind { + Vote, + AppendEntries, + InstallSnapshot, +} + +impl RaftRpcKind { + pub const fn as_str(self) -> &'static str { + match self { + Self::Vote => "vote", + Self::AppendEntries => "append_entries", + Self::InstallSnapshot => "install_snapshot", + } + } +} + +impl std::fmt::Display for RaftRpcKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +impl std::str::FromStr for RaftRpcKind { + type Err = String; + + fn from_str(value: &str) -> Result { + match value { + "vote" => Ok(Self::Vote), + "append_entries" => Ok(Self::AppendEntries), + "install_snapshot" => Ok(Self::InstallSnapshot), + other => Err(format!("Unknown RPC type: {}", other)), + } + } +} + /// Client proposal request (for forwarding from follower to leader) #[derive(Clone, PartialEq, prost::Message)] pub struct ClientProposalRequest { @@ -83,9 +121,10 @@ pub struct ClientProposalResponse { /// Generated gRPC client module pub mod raft_client { - use super::*; use tonic::codegen::*; + use super::*; + /// Raft RPC client #[derive(Debug, Clone)] pub struct RaftClient { @@ -144,9 +183,10 @@ pub mod raft_client { /// Generated gRPC server module pub mod raft_server { - use super::*; use tonic::codegen::*; + use super::*; + /// Raft service trait #[async_trait::async_trait] pub trait Raft: std::marker::Send + std::marker::Sync + 'static { @@ -260,9 +300,10 @@ pub mod raft_server { } } -use crate::manager::RaftManager; use std::sync::Arc; +use crate::manager::RaftManager; + /// Raft gRPC service implementation #[derive(Clone)] pub struct RaftService { @@ -292,18 +333,15 @@ impl raft_server::Raft for RaftService { .parse::() .map_err(|e| Status::invalid_argument(format!("Invalid group ID: {}", e)))?; - // Route to appropriate Raft group - let result = match req.rpc_type.as_str() { - "vote" => self.manager.handle_vote(group_id, &req.payload).await, - "append_entries" => self.manager.handle_append_entries(group_id, &req.payload).await, - "install_snapshot" => { - self.manager.handle_install_snapshot(group_id, &req.payload).await + let rpc_kind = req.rpc_type.parse::().map_err(Status::invalid_argument)?; + + let result = match rpc_kind { + RaftRpcKind::Vote => self.manager.handle_vote(group_id, &req.payload).await, + RaftRpcKind::AppendEntries => { + self.manager.handle_append_entries(group_id, &req.payload).await }, - _ => { - return Err(Status::invalid_argument(format!( - "Unknown RPC type: {}", - req.rpc_type - ))); + RaftRpcKind::InstallSnapshot => { + self.manager.handle_install_snapshot(group_id, &req.payload).await }, }; @@ -501,7 +539,8 @@ pub async fn start_rpc_server( match tokio::time::timeout(tokio::time::Duration::from_secs(5), rx).await { Ok(Ok(Ok(()))) => { log::info!( - "✓ Raft RPC server started on {} (advertising as {})", + "[CLUSTER] Node {} RPC listener is up on {} and advertising {}", + manager.config().node_id, bind_addr, advertise_addr ); @@ -572,11 +611,11 @@ mod tests { fn test_request_message() { let req = RaftRpcRequest { group_id: "MetaSystem".to_string(), - rpc_type: "vote".to_string(), + rpc_type: RaftRpcKind::Vote.to_string(), payload: vec![1, 2, 3], }; assert_eq!(req.group_id, "MetaSystem"); - assert_eq!(req.rpc_type, "vote"); + assert_eq!(req.rpc_type, RaftRpcKind::Vote.as_str()); } } diff --git a/backend/crates/kalamdb-raft/src/state_machine/meta.rs b/backend/crates/kalamdb-raft/src/state_machine/meta.rs index e86e6fd56..91d2d7ae9 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/meta.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/meta.rs @@ -9,18 +9,28 @@ //! //! Runs in the unified Meta Raft group (replaces MetaSystem + MetaUsers + MetaJobs). +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + use async_trait::async_trait; use serde::{Deserialize, Serialize}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; use super::{ decode as bincode_decode, encode as bincode_encode, ApplyResult, KalamStateMachine, StateMachineSnapshot, }; -use crate::applier::MetaApplier; -use crate::commands::{MetaCommand, MetaResponse}; -use crate::{GroupId, RaftError}; +use crate::{ + applier::MetaApplier, + commands::{MetaCommand, MetaResponse}, + GroupId, RaftError, +}; + +fn is_namespace_already_exists_error(error: &RaftError) -> bool { + let message = error.to_string().to_ascii_lowercase(); + message.contains("namespace") && message.contains("already exists") +} // ============================================================================= // Snapshot Structure @@ -56,6 +66,8 @@ pub struct MetaStateMachine { last_applied_index: AtomicU64, /// Last applied log term last_applied_term: AtomicU64, + /// Notifies waiters when the applied index advances. + last_applied_tx: tokio::sync::watch::Sender, /// Approximate data size in bytes approximate_size: AtomicU64, @@ -78,9 +90,11 @@ impl MetaStateMachine { /// /// Use `set_applier` to inject persistence after construction. pub fn new() -> Self { + let (last_applied_tx, _) = tokio::sync::watch::channel(0); Self { last_applied_index: AtomicU64::new(0), last_applied_term: AtomicU64::new(0), + last_applied_tx, approximate_size: AtomicU64::new(0), applier: parking_lot::RwLock::new(None), } @@ -88,9 +102,11 @@ impl MetaStateMachine { /// Create a new MetaStateMachine with an applier pub fn with_applier(applier: Arc) -> Self { + let (last_applied_tx, _) = tokio::sync::watch::channel(0); Self { last_applied_index: AtomicU64::new(0), last_applied_term: AtomicU64::new(0), + last_applied_tx, approximate_size: AtomicU64::new(0), applier: parking_lot::RwLock::new(Some(applier)), } @@ -117,6 +133,10 @@ impl MetaStateMachine { self.last_applied_index.load(Ordering::Acquire) } + fn publish_last_applied(&self, index: u64) { + self.last_applied_tx.send_replace(index); + } + /// Apply a meta command async fn apply_command(&self, cmd: MetaCommand) -> Result { let applier = { @@ -160,6 +180,35 @@ impl MetaStateMachine { }) }, + MetaCommand::CreateNamespaceIfNotExists { + namespace_id, + created_by, + } => { + log::debug!( + "MetaStateMachine: CreateNamespaceIfNotExists {:?} by {:?}", + namespace_id, + created_by + ); + + let message = if let Some(ref applier) = applier { + match applier.create_namespace(&namespace_id, created_by.as_ref()).await { + Ok(message) => message, + Err(error) if is_namespace_already_exists_error(&error) => { + format!("Namespace '{}' already exists", namespace_id) + }, + Err(error) => return Err(error), + } + } else { + String::new() + }; + + self.approximate_size.fetch_add(100, Ordering::Relaxed); + Ok(MetaResponse::NamespaceCreated { + namespace_id, + message, + }) + }, + MetaCommand::DeleteNamespace { namespace_id } => { log::debug!("MetaStateMachine: DeleteNamespace {:?}", namespace_id); @@ -519,6 +568,7 @@ impl KalamStateMachine for MetaStateMachine { // Update last applied self.last_applied_index.store(index, Ordering::Release); self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); // Notify data shards that meta has advanced (for watermark draining) super::get_coordinator().advance(index); @@ -532,6 +582,21 @@ impl KalamStateMachine for MetaStateMachine { self.last_applied_index.load(Ordering::Acquire) } + fn subscribe_last_applied(&self) -> Option> { + Some(self.last_applied_tx.subscribe()) + } + + fn mark_applied_index(&self, index: u64, term: u64) { + let last_applied = self.last_applied_index.load(Ordering::Acquire); + if index <= last_applied { + return; + } + + self.last_applied_index.store(index, Ordering::Release); + self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); + } + fn last_applied_term(&self) -> u64 { self.last_applied_term.load(Ordering::Acquire) } @@ -555,6 +620,7 @@ impl KalamStateMachine for MetaStateMachine { self.last_applied_index.store(snapshot.last_applied_index, Ordering::Release); self.last_applied_term.store(snapshot.last_applied_term, Ordering::Release); + self.publish_last_applied(snapshot.last_applied_index); log::info!( "MetaStateMachine: Restored snapshot at index {} term {}", diff --git a/backend/crates/kalamdb-raft/src/state_machine/meta_coordinator.rs b/backend/crates/kalamdb-raft/src/state_machine/meta_coordinator.rs index 2f155d0eb..0c69a0a48 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/meta_coordinator.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/meta_coordinator.rs @@ -7,16 +7,19 @@ //! - Atomic tracking of current meta index //! - Notification mechanism for data shards to wake up and drain -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + use tokio::sync::Notify; /// Coordinates meta index updates and notifies data shards /// /// Used by: /// - MetaStateMachine: calls `advance()` after applying each entry -/// - Data state machines: call `current_index()` to check watermark, -/// and `subscribe()` to get notified when meta advances +/// - Data state machines: call `current_index()` to check watermark, and `subscribe()` to get +/// notified when meta advances #[derive(Debug)] pub struct MetadataCoordinator { /// Current meta group last_applied_index diff --git a/backend/crates/kalamdb-raft/src/state_machine/mod.rs b/backend/crates/kalamdb-raft/src/state_machine/mod.rs index 9b78a80a3..2d881762d 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/mod.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/mod.rs @@ -25,17 +25,13 @@ mod trait_def; mod user_data; // Re-export serialization helpers for convenience -pub use serde_helpers::{decode, encode}; - -pub use trait_def::{ApplyResult, KalamStateMachine, StateMachineSnapshot}; - // Unified Meta state machine pub use meta::MetaStateMachine; - -// Data state machines -pub use shared_data::SharedDataStateMachine; -pub use user_data::UserDataStateMachine; - // Watermark coordination pub use meta_coordinator::{get_coordinator, init_coordinator, MetadataCoordinator}; pub use pending_buffer::{PendingBuffer, PendingCommand}; +pub use serde_helpers::{decode, encode}; +// Data state machines +pub use shared_data::SharedDataStateMachine; +pub use trait_def::{ApplyResult, KalamStateMachine, StateMachineSnapshot}; +pub use user_data::UserDataStateMachine; diff --git a/backend/crates/kalamdb-raft/src/state_machine/pending_buffer.rs b/backend/crates/kalamdb-raft/src/state_machine/pending_buffer.rs index be794ee44..78403d1f1 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/pending_buffer.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/pending_buffer.rs @@ -6,9 +6,10 @@ //! This ensures correct ordering: data commands are not applied until all //! dependent metadata (tables, users, storages) has been applied locally. +use std::collections::BTreeMap; + use parking_lot::RwLock; use serde::{Deserialize, Serialize}; -use std::collections::BTreeMap; /// A command waiting to be applied once Meta catches up #[derive(Clone, Debug, Serialize, Deserialize)] diff --git a/backend/crates/kalamdb-raft/src/state_machine/serde_helpers.rs b/backend/crates/kalamdb-raft/src/state_machine/serde_helpers.rs index 6e189a65a..2bdaf5bbd 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/serde_helpers.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/serde_helpers.rs @@ -4,9 +4,10 @@ //! self-describing, schema-evolution-friendly (add/remove/reorder fields), //! and supports any serde key type including integer-keyed maps. -use crate::error::RaftError; use serde::{de::DeserializeOwned, Serialize}; +use crate::error::RaftError; + /// Encode a value to bytes using MessagePack with named fields. pub fn encode(value: &T) -> Result, RaftError> { rmp_serde::to_vec_named(value).map_err(|e| RaftError::Serialization(e.to_string())) @@ -19,9 +20,10 @@ pub fn decode(bytes: &[u8]) -> Result { #[cfg(test)] mod tests { - use super::*; use serde::{Deserialize, Serialize}; + use super::*; + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] struct TestData { id: u64, @@ -52,10 +54,12 @@ mod tests { #[test] fn test_entry_payload_membership_roundtrip() { - use crate::storage::{KalamNode, KalamTypeConfig}; - use openraft::{EntryPayload, Membership}; use std::collections::BTreeMap; + use openraft::{EntryPayload, Membership}; + + use crate::storage::{KalamNode, KalamTypeConfig}; + let node = KalamNode::new("127.0.0.1:9081", "http://127.0.0.1:8081"); let mut nodes = BTreeMap::new(); nodes.insert(1u64, node); @@ -80,10 +84,12 @@ mod tests { #[test] fn test_entry_payload_membership_with_two_nodes() { - use crate::storage::{KalamNode, KalamTypeConfig}; - use openraft::{EntryPayload, Membership}; use std::collections::BTreeMap; + use openraft::{EntryPayload, Membership}; + + use crate::storage::{KalamNode, KalamTypeConfig}; + let node1 = KalamNode::new("127.0.0.1:9081", "http://127.0.0.1:8081"); let node2 = KalamNode::new("127.0.0.1:9082", "http://127.0.0.1:8082"); let mut nodes = BTreeMap::new(); diff --git a/backend/crates/kalamdb-raft/src/state_machine/shared_data.rs b/backend/crates/kalamdb-raft/src/state_machine/shared_data.rs index 048145cd3..481a807af 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/shared_data.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/shared_data.rs @@ -12,30 +12,34 @@ //! to catch up before applying. This ensures data operations don't run before //! their dependent metadata (tables) is applied locally. +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + use async_trait::async_trait; +use kalamdb_commons::{ + models::{OperationKind, TransactionId}, + TableId, TableType, +}; +use kalamdb_transactions::StagedMutation; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; - -use crate::applier::SharedDataApplier; -use crate::{DataResponse, GroupId, RaftCommand, RaftError, SharedDataCommand}; -use kalamdb_commons::models::TransactionId; -use kalamdb_commons::TableType; -use kalamdb_transactions::StagedMutation; use super::{ - decode as bincode_decode, encode as bincode_encode, ApplyResult, KalamStateMachine, - StateMachineSnapshot, + decode as bincode_decode, encode as bincode_encode, get_coordinator, ApplyResult, + KalamStateMachine, PendingBuffer, PendingCommand, StateMachineSnapshot, +}; +use crate::{ + applier::SharedDataApplier, commit_seq_from_log_position, DataResponse, GroupId, RaftCommand, + RaftError, SharedDataCommand, }; -use super::{get_coordinator, PendingBuffer, PendingCommand}; /// Row operation tracking (for metrics) #[derive(Debug, Clone, Serialize, Deserialize)] struct SharedOperation { - table_namespace: String, - table_name: String, - operation: String, // "insert", "update", "delete" + table_id: TableId, + operation: OperationKind, row_count: u64, } @@ -88,6 +92,8 @@ pub struct SharedDataStateMachine { last_applied_index: AtomicU64, /// Last applied log term last_applied_term: AtomicU64, + /// Notifies waiters when the applied index advances. + last_applied_tx: tokio::sync::watch::Sender, /// Approximate data size in bytes approximate_size: AtomicU64, /// Total operations processed @@ -114,10 +120,12 @@ impl std::fmt::Debug for SharedDataStateMachine { impl SharedDataStateMachine { /// Create a new SharedDataStateMachine pub fn new(shard: u32) -> Self { + let (last_applied_tx, _) = tokio::sync::watch::channel(0); Self { shard, last_applied_index: AtomicU64::new(0), last_applied_term: AtomicU64::new(0), + last_applied_tx, approximate_size: AtomicU64::new(0), total_operations: AtomicU64::new(0), recent_operations: RwLock::new(Vec::new()), @@ -128,10 +136,12 @@ impl SharedDataStateMachine { /// Create a new SharedDataStateMachine with an applier pub fn with_applier(shard: u32, applier: Arc) -> Self { + let (last_applied_tx, _) = tokio::sync::watch::channel(0); Self { shard, last_applied_index: AtomicU64::new(0), last_applied_term: AtomicU64::new(0), + last_applied_tx, approximate_size: AtomicU64::new(0), total_operations: AtomicU64::new(0), recent_operations: RwLock::new(Vec::new()), @@ -173,7 +183,8 @@ impl SharedDataStateMachine { for pending in drained { let cmd = Self::decode_apply_command(&pending.command_bytes)?; - let _ = self.apply_decoded_command(cmd).await?; + let commit_seq = commit_seq_from_log_position(self.group_id(), pending.log_index); + let _ = self.apply_decoded_command(cmd, commit_seq).await?; log::debug!( "SharedDataStateMachine[{}]: Applied buffered command log_index={}", self.shard, @@ -189,8 +200,28 @@ impl SharedDataStateMachine { self.pending_buffer.len() } + fn publish_last_applied(&self, index: u64) { + self.last_applied_tx.send_replace(index); + } + + fn record_operation(&self, table_id: TableId, operation: OperationKind, row_count: usize) { + let mut ops = self.recent_operations.write(); + ops.push(SharedOperation { + table_id, + operation, + row_count: row_count as u64, + }); + if ops.len() > 100 { + ops.remove(0); + } + } + /// Apply a shared data command - async fn apply_command(&self, cmd: SharedDataCommand) -> Result { + async fn apply_command( + &self, + cmd: SharedDataCommand, + commit_seq: u64, + ) -> Result { // Get applier reference let applier = { let guard = self.applier.read(); @@ -208,7 +239,7 @@ impl SharedDataStateMachine { // Persist data via applier if available let rows_affected = if let Some(ref a) = applier { - match a.insert(&table_id, &rows).await { + match a.insert(&table_id, &rows, commit_seq).await { Ok(count) => count, Err(e) => { log::warn!( @@ -227,22 +258,7 @@ impl SharedDataStateMachine { 0 }; - // Track operation - let op = SharedOperation { - table_namespace: table_id.namespace_id().as_str().to_string(), - table_name: table_id.table_name().as_str().to_string(), - operation: "insert".to_string(), - row_count: rows_affected as u64, - }; - - { - let mut ops = self.recent_operations.write(); - ops.push(op); - // Keep only last 100 operations - if ops.len() > 100 { - ops.remove(0); - } - } + self.record_operation(table_id, OperationKind::Insert, rows_affected); self.total_operations.fetch_add(1, Ordering::Relaxed); self.approximate_size.fetch_add(rows.len() as u64, Ordering::Relaxed); @@ -259,7 +275,7 @@ impl SharedDataStateMachine { log::debug!("SharedDataStateMachine[{}]: Update {:?}", self.shard, table_id); let rows_affected = if let Some(ref a) = applier { - match a.update(&table_id, &updates, filter.as_deref()).await { + match a.update(&table_id, &updates, filter.as_deref(), commit_seq).await { Ok(count) => count, Err(e) => { log::warn!( @@ -278,20 +294,7 @@ impl SharedDataStateMachine { 0 }; - let op = SharedOperation { - table_namespace: table_id.namespace_id().as_str().to_string(), - table_name: table_id.table_name().as_str().to_string(), - operation: "update".to_string(), - row_count: rows_affected as u64, - }; - - { - let mut ops = self.recent_operations.write(); - ops.push(op); - if ops.len() > 100 { - ops.remove(0); - } - } + self.record_operation(table_id, OperationKind::Update, rows_affected); self.total_operations.fetch_add(1, Ordering::Relaxed); Ok(DataResponse::RowsAffected(rows_affected)) @@ -305,7 +308,7 @@ impl SharedDataStateMachine { log::debug!("SharedDataStateMachine[{}]: Delete from {:?}", self.shard, table_id); let rows_affected = if let Some(ref a) = applier { - match a.delete(&table_id, pk_values.as_deref()).await { + match a.delete(&table_id, pk_values.as_deref(), commit_seq).await { Ok(count) => count, Err(e) => { log::warn!( @@ -324,20 +327,7 @@ impl SharedDataStateMachine { 0 }; - let op = SharedOperation { - table_namespace: table_id.namespace_id().as_str().to_string(), - table_name: table_id.table_name().as_str().to_string(), - operation: "delete".to_string(), - row_count: rows_affected as u64, - }; - - { - let mut ops = self.recent_operations.write(); - ops.push(op); - if ops.len() > 100 { - ops.remove(0); - } - } + self.record_operation(table_id, OperationKind::Delete, rows_affected); self.total_operations.fetch_add(1, Ordering::Relaxed); Ok(DataResponse::RowsAffected(rows_affected)) @@ -368,13 +358,14 @@ impl SharedDataStateMachine { async fn apply_decoded_command( &self, cmd: SharedApplyCommand, + commit_seq: u64, ) -> Result { match cmd { - SharedApplyCommand::Shared(command) => self.apply_command(command).await, + SharedApplyCommand::Shared(command) => self.apply_command(command, commit_seq).await, SharedApplyCommand::TransactionCommit { transaction_id, mutations, - } => self.apply_transaction_commit(transaction_id, mutations).await, + } => self.apply_transaction_commit(transaction_id, mutations, commit_seq).await, } } @@ -382,6 +373,7 @@ impl SharedDataStateMachine { &self, transaction_id: TransactionId, mutations: Vec, + commit_seq: u64, ) -> Result { if mutations.iter().any(|mutation| mutation.table_type != TableType::Shared) { return Ok(DataResponse::error( @@ -398,7 +390,7 @@ impl SharedDataStateMachine { return Ok(DataResponse::error("No applier set, transaction commit not persisted")); }; - match applier.apply_transaction_batch(&transaction_id, &mutations).await { + match applier.apply_transaction_batch(&transaction_id, &mutations, commit_seq).await { Ok(result) => { self.total_operations.fetch_add(1, Ordering::Relaxed); Ok(DataResponse::TransactionCommitted(result)) @@ -445,7 +437,8 @@ impl KalamStateMachine for SharedDataStateMachine { let current_meta = get_coordinator().current_index(); if required_meta > current_meta { log::debug!( - "SharedDataStateMachine[{}]: Buffering command (required_meta={} > current_meta={})", + "SharedDataStateMachine[{}]: Buffering command (required_meta={} > \ + current_meta={})", self.shard, required_meta, current_meta @@ -460,6 +453,7 @@ impl KalamStateMachine for SharedDataStateMachine { // Mark as applied (buffered) to satisfy Raft log progress self.last_applied_index.store(index, Ordering::Release); self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); return Ok(ApplyResult::NoOp); } @@ -471,11 +465,13 @@ impl KalamStateMachine for SharedDataStateMachine { } // Apply current command - let response = self.apply_decoded_command(cmd).await?; + let commit_seq = commit_seq_from_log_position(self.group_id(), index); + let response = self.apply_decoded_command(cmd, commit_seq).await?; // Update last applied self.last_applied_index.store(index, Ordering::Release); self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); // Serialize response let response_data = crate::codec::command_codec::encode_data_response(&response)?; @@ -487,6 +483,21 @@ impl KalamStateMachine for SharedDataStateMachine { self.last_applied_index.load(Ordering::Acquire) } + fn subscribe_last_applied(&self) -> Option> { + Some(self.last_applied_tx.subscribe()) + } + + fn mark_applied_index(&self, index: u64, term: u64) { + let last_applied = self.last_applied_index.load(Ordering::Acquire); + if index <= last_applied { + return; + } + + self.last_applied_index.store(index, Ordering::Release); + self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); + } + fn last_applied_term(&self) -> u64 { self.last_applied_term.load(Ordering::Acquire) } @@ -525,10 +536,14 @@ impl KalamStateMachine for SharedDataStateMachine { self.total_operations.store(data.total_operations, Ordering::Release); self.last_applied_index.store(snapshot.last_applied_index, Ordering::Release); self.last_applied_term.store(snapshot.last_applied_term, Ordering::Release); + self.publish_last_applied(snapshot.last_applied_index); log::info!( - "SharedDataStateMachine[{}]: Restored from snapshot at index {}, term {}, {} pending commands", - self.shard, snapshot.last_applied_index, snapshot.last_applied_term, + "SharedDataStateMachine[{}]: Restored from snapshot at index {}, term {}, {} pending \ + commands", + self.shard, + snapshot.last_applied_index, + snapshot.last_applied_term, self.pending_buffer.len() ); @@ -542,19 +557,26 @@ impl KalamStateMachine for SharedDataStateMachine { #[cfg(test)] mod tests { - use super::*; - use async_trait::async_trait; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::NamespaceId; - use kalamdb_commons::models::OperationKind; - use kalamdb_commons::TableId; use std::collections::BTreeMap; + use async_trait::async_trait; + use kalamdb_commons::{ + models::{rows::Row, NamespaceId, OperationKind}, + TableId, + }; + + use super::*; + struct TransactionBatchSharedApplier; #[async_trait] impl SharedDataApplier for TransactionBatchSharedApplier { - async fn insert(&self, _table_id: &TableId, rows: &[Row]) -> Result { + async fn insert( + &self, + _table_id: &TableId, + rows: &[Row], + _commit_seq: u64, + ) -> Result { Ok(rows.len()) } @@ -563,6 +585,7 @@ mod tests { _table_id: &TableId, _updates: &[Row], _filter: Option<&str>, + _commit_seq: u64, ) -> Result { Ok(1) } @@ -571,6 +594,7 @@ mod tests { &self, _table_id: &TableId, _pk_values: Option<&[String]>, + _commit_seq: u64, ) -> Result { Ok(1) } @@ -579,10 +603,11 @@ mod tests { &self, _transaction_id: &TransactionId, mutations: &[StagedMutation], + commit_seq: u64, ) -> Result { Ok(crate::TransactionApplyResult { rows_affected: mutations.len(), - commit_seq: 91, + commit_seq, notifications_sent: 0, manifest_updates: 0, publisher_events: 0, @@ -653,9 +678,10 @@ mod tests { { let ops = sm.recent_operations.read(); assert_eq!(ops.len(), 3); - assert_eq!(ops[0].operation, "insert"); - assert_eq!(ops[1].operation, "update"); - assert_eq!(ops[2].operation, "delete"); + assert_eq!(ops[0].operation, OperationKind::Insert); + assert_eq!(ops[1].operation, OperationKind::Update); + assert_eq!(ops[2].operation, OperationKind::Delete); + assert_eq!(ops[0].table_id, TableId::new(NamespaceId::default(), "settings".into())); } } @@ -688,7 +714,10 @@ mod tests { match response { DataResponse::TransactionCommitted(result) => { assert_eq!(result.rows_affected, 1); - assert_eq!(result.commit_seq, 91); + assert_eq!( + result.commit_seq, + commit_seq_from_log_position(GroupId::DataSharedShard(0), 1) + ); }, other => panic!("unexpected response: {:?}", other), } diff --git a/backend/crates/kalamdb-raft/src/state_machine/trait_def.rs b/backend/crates/kalamdb-raft/src/state_machine/trait_def.rs index 421688f11..1dd73c402 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/trait_def.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/trait_def.rs @@ -123,6 +123,17 @@ pub trait KalamStateMachine: Send + Sync { /// - Recovery (start applying from this index + 1) fn last_applied_index(&self) -> u64; + /// Subscribe to last-applied index changes when the implementation supports it. + fn subscribe_last_applied(&self) -> Option> { + None + } + + /// Mark a non-command Raft entry as applied in the state-machine watermark. + /// + /// Blank and membership entries do not call [`Self::apply`], but apply barriers still need the + /// state-machine watermark to advance past them. + fn mark_applied_index(&self, _index: u64, _term: u64) {} + /// Get the last applied log term fn last_applied_term(&self) -> u64; diff --git a/backend/crates/kalamdb-raft/src/state_machine/user_data.rs b/backend/crates/kalamdb-raft/src/state_machine/user_data.rs index 20d3bc09b..282355706 100644 --- a/backend/crates/kalamdb-raft/src/state_machine/user_data.rs +++ b/backend/crates/kalamdb-raft/src/state_machine/user_data.rs @@ -11,23 +11,25 @@ //! to catch up before applying. This ensures data operations don't run before //! their dependent metadata (tables, users) is applied locally. +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + use async_trait::async_trait; +use kalamdb_commons::{models::TransactionId, TableType}; +use kalamdb_transactions::StagedMutation; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; - -use crate::applier::UserDataApplier; -use crate::{DataResponse, GroupId, RaftCommand, RaftError, UserDataCommand}; -use kalamdb_commons::models::TransactionId; -use kalamdb_commons::TableType; -use kalamdb_transactions::StagedMutation; use super::{ - decode as bincode_decode, encode as bincode_encode, ApplyResult, KalamStateMachine, - StateMachineSnapshot, + decode as bincode_decode, encode as bincode_encode, get_coordinator, ApplyResult, + KalamStateMachine, PendingBuffer, PendingCommand, StateMachineSnapshot, +}; +use crate::{ + applier::UserDataApplier, commit_seq_from_log_position, DataResponse, GroupId, RaftCommand, + RaftError, UserDataCommand, }; -use super::{get_coordinator, PendingBuffer, PendingCommand}; /// Snapshot data for UserDataStateMachine #[derive(Debug, Clone, Serialize, Deserialize)] @@ -78,6 +80,8 @@ pub struct UserDataStateMachine { last_applied_index: AtomicU64, /// Last applied log term last_applied_term: AtomicU64, + /// Notifies waiters when the applied index advances. + last_applied_tx: tokio::sync::watch::Sender, /// Approximate data size in bytes approximate_size: AtomicU64, /// Total operations processed @@ -102,11 +106,12 @@ impl std::fmt::Debug for UserDataStateMachine { impl UserDataStateMachine { /// Create a new UserDataStateMachine for the specified shard pub fn new(shard: u32) -> Self { - assert!(shard < 32, "Shard must be 0-31"); + let (last_applied_tx, _) = tokio::sync::watch::channel(0); Self { shard, last_applied_index: AtomicU64::new(0), last_applied_term: AtomicU64::new(0), + last_applied_tx, approximate_size: AtomicU64::new(0), total_operations: AtomicU64::new(0), applier: RwLock::new(None), @@ -116,11 +121,12 @@ impl UserDataStateMachine { /// Create a new UserDataStateMachine with an applier pub fn with_applier(shard: u32, applier: Arc) -> Self { - assert!(shard < 32, "Shard must be 0-31"); + let (last_applied_tx, _) = tokio::sync::watch::channel(0); Self { shard, last_applied_index: AtomicU64::new(0), last_applied_term: AtomicU64::new(0), + last_applied_tx, approximate_size: AtomicU64::new(0), total_operations: AtomicU64::new(0), applier: RwLock::new(Some(applier)), @@ -158,7 +164,8 @@ impl UserDataStateMachine { for pending in drained { let cmd = Self::decode_apply_command(&pending.command_bytes)?; - let _ = self.apply_decoded_command(cmd).await?; + let commit_seq = commit_seq_from_log_position(self.group_id(), pending.log_index); + let _ = self.apply_decoded_command(cmd, commit_seq).await?; log::debug!( "UserDataStateMachine[{}]: Applied buffered command log_index={}", self.shard, @@ -174,9 +181,17 @@ impl UserDataStateMachine { self.pending_buffer.len() } + fn publish_last_applied(&self, index: u64) { + self.last_applied_tx.send_replace(index); + } + /// Apply a user data command /// Note: user_id is extracted from inside each command variant - async fn apply_command(&self, cmd: UserDataCommand) -> Result { + async fn apply_command( + &self, + cmd: UserDataCommand, + commit_seq: u64, + ) -> Result { // Get applier reference let applier = { let guard = self.applier.read(); @@ -199,7 +214,7 @@ impl UserDataStateMachine { // Persist data via applier if available let rows_affected = if let Some(ref a) = applier { - match a.insert(&table_id, &user_id, &rows).await { + match a.insert(&table_id, &user_id, &rows, commit_seq).await { Ok(count) => count, Err(e) => { // Convert applier errors to DataResponse::Error @@ -236,7 +251,10 @@ impl UserDataStateMachine { log::debug!("UserDataStateMachine[{}]: Update {:?}", self.shard, table_id); let rows_affected = if let Some(ref a) = applier { - match a.update(&table_id, &user_id, &updates, filter.as_deref()).await { + match a + .update(&table_id, &user_id, &updates, filter.as_deref(), commit_seq) + .await + { Ok(count) => count, Err(e) => { log::warn!( @@ -268,7 +286,7 @@ impl UserDataStateMachine { log::debug!("UserDataStateMachine[{}]: Delete from {:?}", self.shard, table_id); let rows_affected = if let Some(ref a) = applier { - match a.delete(&table_id, &user_id, pk_values.as_deref()).await { + match a.delete(&table_id, &user_id, pk_values.as_deref(), commit_seq).await { Ok(count) => count, Err(e) => { log::warn!( @@ -316,13 +334,14 @@ impl UserDataStateMachine { async fn apply_decoded_command( &self, cmd: UserApplyCommand, + commit_seq: u64, ) -> Result { match cmd { - UserApplyCommand::User(command) => self.apply_command(command).await, + UserApplyCommand::User(command) => self.apply_command(command, commit_seq).await, UserApplyCommand::TransactionCommit { transaction_id, mutations, - } => self.apply_transaction_commit(transaction_id, mutations).await, + } => self.apply_transaction_commit(transaction_id, mutations, commit_seq).await, } } @@ -330,6 +349,7 @@ impl UserDataStateMachine { &self, transaction_id: TransactionId, mutations: Vec, + commit_seq: u64, ) -> Result { if mutations .iter() @@ -349,7 +369,7 @@ impl UserDataStateMachine { return Ok(DataResponse::error("No applier set, transaction commit not persisted")); }; - match applier.apply_transaction_batch(&transaction_id, &mutations).await { + match applier.apply_transaction_batch(&transaction_id, &mutations, commit_seq).await { Ok(result) => { self.total_operations.fetch_add(1, Ordering::Relaxed); Ok(DataResponse::TransactionCommitted(result)) @@ -390,7 +410,8 @@ impl KalamStateMachine for UserDataStateMachine { let current_meta = get_coordinator().current_index(); if required_meta > current_meta { log::debug!( - "UserDataStateMachine[{}]: Buffering command (required_meta={} > current_meta={})", + "UserDataStateMachine[{}]: Buffering command (required_meta={} > \ + current_meta={})", self.shard, required_meta, current_meta @@ -405,6 +426,7 @@ impl KalamStateMachine for UserDataStateMachine { // Mark as applied (buffered) to satisfy Raft log progress self.last_applied_index.store(index, Ordering::Release); self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); return Ok(ApplyResult::NoOp); } @@ -416,11 +438,13 @@ impl KalamStateMachine for UserDataStateMachine { } // Apply current command - let response = self.apply_decoded_command(cmd).await?; + let commit_seq = commit_seq_from_log_position(self.group_id(), index); + let response = self.apply_decoded_command(cmd, commit_seq).await?; // Update last applied self.last_applied_index.store(index, Ordering::Release); self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); // Serialize response let response_data = crate::codec::command_codec::encode_data_response(&response)?; @@ -432,6 +456,21 @@ impl KalamStateMachine for UserDataStateMachine { self.last_applied_index.load(Ordering::Acquire) } + fn subscribe_last_applied(&self) -> Option> { + Some(self.last_applied_tx.subscribe()) + } + + fn mark_applied_index(&self, index: u64, term: u64) { + let last_applied = self.last_applied_index.load(Ordering::Acquire); + if index <= last_applied { + return; + } + + self.last_applied_index.store(index, Ordering::Release); + self.last_applied_term.store(term, Ordering::Release); + self.publish_last_applied(index); + } + fn last_applied_term(&self) -> u64 { self.last_applied_term.load(Ordering::Acquire) } @@ -471,10 +510,14 @@ impl KalamStateMachine for UserDataStateMachine { self.total_operations.store(data.total_operations, Ordering::Release); self.last_applied_index.store(snapshot.last_applied_index, Ordering::Release); self.last_applied_term.store(snapshot.last_applied_term, Ordering::Release); + self.publish_last_applied(snapshot.last_applied_index); log::info!( - "UserDataStateMachine[{}]: Restored from snapshot at index {}, term {}, {} pending commands", - self.shard, snapshot.last_applied_index, snapshot.last_applied_term, + "UserDataStateMachine[{}]: Restored from snapshot at index {}, term {}, {} pending \ + commands", + self.shard, + snapshot.last_applied_index, + snapshot.last_applied_term, self.pending_buffer.len() ); @@ -488,14 +531,16 @@ impl KalamStateMachine for UserDataStateMachine { #[cfg(test)] mod tests { - use super::*; - use async_trait::async_trait; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::NamespaceId; - use kalamdb_commons::models::OperationKind; - use kalamdb_commons::{TableId, UserId}; use std::collections::BTreeMap; + use async_trait::async_trait; + use kalamdb_commons::{ + models::{rows::Row, NamespaceId, OperationKind}, + TableId, UserId, + }; + + use super::*; + struct TransactionBatchUserApplier; #[async_trait] @@ -505,6 +550,7 @@ mod tests { _table_id: &TableId, _user_id: &UserId, rows: &[Row], + _commit_seq: u64, ) -> Result { Ok(rows.len()) } @@ -515,6 +561,7 @@ mod tests { _user_id: &UserId, _updates: &[Row], _filter: Option<&str>, + _commit_seq: u64, ) -> Result { Ok(1) } @@ -524,6 +571,7 @@ mod tests { _table_id: &TableId, _user_id: &UserId, _pk_values: Option<&[String]>, + _commit_seq: u64, ) -> Result { Ok(1) } @@ -532,10 +580,11 @@ mod tests { &self, _transaction_id: &TransactionId, mutations: &[StagedMutation], + commit_seq: u64, ) -> Result { Ok(crate::TransactionApplyResult { rows_affected: mutations.len(), - commit_seq: 77, + commit_seq, notifications_sent: 0, manifest_updates: 0, publisher_events: 0, @@ -591,7 +640,10 @@ mod tests { match response { DataResponse::TransactionCommitted(result) => { assert_eq!(result.rows_affected, 1); - assert_eq!(result.commit_seq, 77); + assert_eq!( + result.commit_seq, + commit_seq_from_log_position(GroupId::DataUserShard(0), 1) + ); }, other => panic!("unexpected response: {:?}", other), } diff --git a/backend/crates/kalamdb-raft/src/storage/raft_store.rs b/backend/crates/kalamdb-raft/src/storage/raft_store.rs index 10d48ddb5..06e2dbc36 100644 --- a/backend/crates/kalamdb-raft/src/storage/raft_store.rs +++ b/backend/crates/kalamdb-raft/src/storage/raft_store.rs @@ -8,29 +8,37 @@ //! This module supports both in-memory storage (for testing) and persistent //! storage via `kalamdb-store::RaftPartitionStore` (for production). -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::io::Cursor; -use std::ops::RangeBounds; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; - -use kalamdb_store::raft_storage::{ - RaftLogEntry, RaftLogId, RaftPartitionStore, RaftSnapshotData, RaftSnapshotMeta, RaftVote, +use std::{ + collections::BTreeMap, + fmt::Debug, + io::Cursor, + ops::RangeBounds, + path::{Path, PathBuf}, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; + +use kalamdb_store::{ + raft_storage::{ + RaftLogEntry, RaftLogId, RaftPartitionStore, RaftSnapshotData, RaftSnapshotMeta, RaftVote, + }, + StorageBackend, }; -use kalamdb_store::StorageBackend; -use openraft::storage::{LogState, RaftLogReader, RaftStorage, Snapshot}; use openraft::{ + storage::{LogState, RaftLogReader, RaftStorage, Snapshot}, Entry, EntryPayload, LogId, OptionalSend, RaftSnapshotBuilder, SnapshotMeta, StorageError, StorageIOError, StoredMembership, Vote, }; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; -use crate::state_machine::{decode, encode, KalamStateMachine}; -use crate::storage::types::{KalamNode, KalamTypeConfig}; -use crate::GroupId; +use crate::{ + state_machine::{decode, encode, KalamStateMachine}, + storage::types::{KalamNode, KalamTypeConfig}, + GroupId, +}; /// Stored snapshot data #[derive(Debug, Clone)] @@ -114,12 +122,12 @@ const LOG_CACHE_MAX_BYTES: usize = 16 * 1024 * 1024; /// /// ## Storage Modes /// -/// - **In-memory only**: When created with `new()`, all data is stored in memory. -/// Suitable for testing or single-node deployments where durability isn't critical. +/// - **In-memory only**: When created with `new()`, all data is stored in memory. Suitable for +/// testing or single-node deployments where durability isn't critical. /// -/// - **Persistent**: When created with `new_persistent()`, log entries, votes, and -/// metadata are durably stored via `RaftPartitionStore`. A bounded in-memory cache -/// keeps recent entries for fast access. +/// - **Persistent**: When created with `new_persistent()`, log entries, votes, and metadata are +/// durably stored via `RaftPartitionStore`. A bounded in-memory cache keeps recent entries for +/// fast access. pub struct KalamRaftStorage { /// Which Raft group this storage belongs to group_id: GroupId, @@ -315,7 +323,8 @@ impl KalamRaftStorage { .unwrap_or(0); log::debug!( - "KalamRaftStorage[{}]: Recovered state - last_applied={:?}, last_purged={:?}, committed={:?}, vote={:?}", + "KalamRaftStorage[{}]: Recovered state - last_applied={:?}, last_purged={:?}, \ + committed={:?}, vote={:?}", group_id, last_applied.map(|id| id.index), last_purged.map(|id| id.index), @@ -399,7 +408,9 @@ impl KalamRaftStorage { let restore_ms = restore_start.elapsed().as_secs_f64() * 1000.0; log::info!( - "KalamRaftStorage[{}]: Restored state machine from snapshot (last_applied_index={}, last_applied_term={}) - deserialize: {:.2}ms, restore: {:.2}ms, total: {:.2}ms", + "KalamRaftStorage[{}]: Restored state machine from snapshot \ + (last_applied_index={}, last_applied_term={}) - deserialize: {:.2}ms, restore: \ + {:.2}ms, total: {:.2}ms", self.group_id, sm_data.state_applied_index, sm_data.state_applied_term, @@ -430,6 +441,11 @@ impl KalamRaftStorage { *self.last_applied.read() } + /// Get the committed log ID from storage. + pub fn get_committed(&self) -> Option> { + *self.committed.read() + } + /// Check if this storage has any persisted Raft state /// /// Returns true if we have a vote, log entries, or last_applied state, @@ -881,6 +897,7 @@ impl RaftStorage match &entry.payload { EntryPayload::Blank => { + self.state_machine.mark_applied_index(index, term); results.push(Vec::new()); }, EntryPayload::Normal(data) => { @@ -919,6 +936,7 @@ impl RaftStorage } } } + self.state_machine.mark_applied_index(index, term); results.push(Vec::new()); }, } @@ -1102,18 +1120,23 @@ impl RaftLogReader DataFusionError { DataFusionError::Plan(err.to_string()) diff --git a/backend/crates/kalamdb-session-datafusion/src/secured_provider.rs b/backend/crates/kalamdb-session-datafusion/src/secured_provider.rs index 701228565..f73035325 100644 --- a/backend/crates/kalamdb-session-datafusion/src/secured_provider.rs +++ b/backend/crates/kalamdb-session-datafusion/src/secured_provider.rs @@ -1,17 +1,19 @@ -use crate::permissions::{check_system_table_access, session_error_to_datafusion}; +use std::{any::Any, fmt::Debug, sync::Arc}; + use arrow::datatypes::SchemaRef; use async_trait::async_trait; -use datafusion::catalog::Session; -use datafusion::datasource::{TableProvider, TableType}; -use datafusion::error::Result as DataFusionResult; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_plan::ExecutionPlan; +use datafusion::{ + catalog::Session, + datasource::{TableProvider, TableType}, + error::Result as DataFusionResult, + logical_expr::{Expr, TableProviderFilterPushDown}, + physical_plan::ExecutionPlan, +}; use kalamdb_commons::models::TableId; -use std::any::Any; -use std::fmt::Debug; -use std::sync::Arc; use tracing::Instrument; +use crate::permissions::{check_system_table_access, session_error_to_datafusion}; + pub struct SecuredSystemTableProvider { inner: Arc, table_id: TableId, @@ -96,12 +98,15 @@ pub fn secure_provider( #[cfg(test)] mod tests { - use super::*; - use arrow::array::StringArray; - use arrow::datatypes::{DataType, Field, Schema}; + use arrow::{ + array::StringArray, + datatypes::{DataType, Field, Schema}, + }; use datafusion::datasource::MemTable; use kalamdb_commons::{NamespaceId, TableName}; + use super::*; + fn create_mock_provider() -> Arc { let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Utf8, false)])); let array = StringArray::from(vec!["test_id"]); diff --git a/backend/crates/kalamdb-session/src/auth_session.rs b/backend/crates/kalamdb-session/src/auth_session.rs index 471f9d855..cd399256a 100644 --- a/backend/crates/kalamdb-session/src/auth_session.rs +++ b/backend/crates/kalamdb-session/src/auth_session.rs @@ -1,9 +1,11 @@ //! Authenticated Session Context -use crate::UserContext; -use kalamdb_commons::models::{ConnectionInfo, ReadContext, Role, UserId}; use std::sync::Arc; +use kalamdb_commons::models::{ConnectionInfo, ReadContext, Role, UserId}; + +use crate::UserContext; + /// Authentication method used for the session #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum AuthMethod { diff --git a/backend/crates/kalamdb-session/src/error.rs b/backend/crates/kalamdb-session/src/error.rs index e58473938..93ef326a2 100644 --- a/backend/crates/kalamdb-session/src/error.rs +++ b/backend/crates/kalamdb-session/src/error.rs @@ -1,8 +1,9 @@ //! Session error types -use kalamdb_commons::models::{NamespaceId, Role, TableName}; use std::fmt; +use kalamdb_commons::models::{NamespaceId, Role, TableName}; + /// Result type for session operations pub type SessionResult = Result; diff --git a/backend/crates/kalamdb-session/src/permissions.rs b/backend/crates/kalamdb-session/src/permissions.rs index ab565fa07..504d36c80 100644 --- a/backend/crates/kalamdb-session/src/permissions.rs +++ b/backend/crates/kalamdb-session/src/permissions.rs @@ -4,10 +4,13 @@ //! DataFusion session extraction and provider wrappers live in //! `kalamdb-session-datafusion`. +use kalamdb_commons::{ + models::{NamespaceId, Role, TableName}, + schemas::{TableDefinition, TableOptions, TableType}, + TableAccess, +}; + use crate::error::SessionError; -use kalamdb_commons::models::{NamespaceId, Role, TableName}; -use kalamdb_commons::schemas::{TableDefinition, TableOptions, TableType}; -use kalamdb_commons::TableAccess; /// Check if a role can access system tables. /// diff --git a/backend/crates/kalamdb-sharding/src/group_id.rs b/backend/crates/kalamdb-sharding/src/group_id.rs index b88100c04..051fc2445 100644 --- a/backend/crates/kalamdb-sharding/src/group_id.rs +++ b/backend/crates/kalamdb-sharding/src/group_id.rs @@ -1,12 +1,11 @@ //! Raft Group ID definitions //! -//! KalamDB uses Multi-Raft with 34 groups: +//! KalamDB uses Multi-Raft with independently configured groups: //! - 1 unified metadata group (Meta) -//! - 32 user data shards (user tables) -//! - 1 shared data shard (shared tables) +//! - N user data shards (user tables) +//! - M shared data shards (shared tables) -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -24,8 +23,8 @@ pub const DEFAULT_SHARED_SHARDS: u32 = 1; /// ## Structure /// /// - **Meta**: Unified metadata group (namespaces, tables, storages, users, jobs) -/// - **DataUserShard(0..31)**: User table data shards -/// - **DataSharedShard(0)**: Shared table data shard +/// - **DataUserShard(N)**: User table data shards +/// - **DataSharedShard(N)**: Shared table data shards #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum GroupId { @@ -34,11 +33,10 @@ pub enum GroupId { Meta, // === Data Groups (33) === - /// User table data shard (0..31) + /// User table data shard /// Routes by: user_id % num_user_shards DataUserShard(u32), - /// Shared table data shard (0 for Phase 1) - /// Future: shard by table_id or row key + /// Shared table data shard DataSharedShard(u32), } @@ -89,8 +87,8 @@ impl GroupId { pub fn from_u64(id: u64) -> Option { match id { 10 => Some(GroupId::Meta), - 100..=131 => Some(GroupId::DataUserShard((id - 100) as u32)), - 200..=231 => Some(GroupId::DataSharedShard((id - 200) as u32)), + 100..=199 => Some(GroupId::DataUserShard((id - 100) as u32)), + 200.. => u32::try_from(id - 200).ok().map(GroupId::DataSharedShard), _ => None, } } diff --git a/backend/crates/kalamdb-sharding/src/lib.rs b/backend/crates/kalamdb-sharding/src/lib.rs index 2aaa397d4..1371078cc 100644 --- a/backend/crates/kalamdb-sharding/src/lib.rs +++ b/backend/crates/kalamdb-sharding/src/lib.rs @@ -1,15 +1,14 @@ mod group_id; -use kalamdb_commons::models::{TableId, UserId}; use std::hash::{Hash, Hasher}; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; - // Re-export GroupId and related types pub use group_id::{GroupId, DEFAULT_SHARED_SHARDS, DEFAULT_USER_SHARDS}; +use kalamdb_commons::models::{TableId, UserId}; // Re-export cluster config types for shared consumption pub use kalamdb_configs::{ClusterConfig, PeerConfig}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; /// Shard kind used across stream and data shards. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/backend/crates/kalamdb-store/src/backends/mod.rs b/backend/crates/kalamdb-store/src/backends/mod.rs index a630157d9..c541a9f14 100644 --- a/backend/crates/kalamdb-store/src/backends/mod.rs +++ b/backend/crates/kalamdb-store/src/backends/mod.rs @@ -1,4 +1,4 @@ //! Backend-specific storage engine implementations. #[cfg(feature = "rocksdb")] -pub mod rocksdb; \ No newline at end of file +pub mod rocksdb; diff --git a/backend/crates/kalamdb-store/src/backends/rocksdb/backend.rs b/backend/crates/kalamdb-store/src/backends/rocksdb/backend.rs index 646d5491d..b2fc18ac9 100644 --- a/backend/crates/kalamdb-store/src/backends/rocksdb/backend.rs +++ b/backend/crates/kalamdb-store/src/backends/rocksdb/backend.rs @@ -1,17 +1,26 @@ //! RocksDB implementation of the StorageBackend trait. //! //! This module provides a concrete implementation of `StorageBackend` using RocksDB -//! as the underlying storage engine. It maps the generic partition concept to -//! RocksDB column families. +//! as the underlying storage engine. Logical partitions are encoded as key prefixes +//! inside a small fixed set of physical RocksDB column families. -use super::cf_tuning::apply_cf_settings; -use super::init::create_block_options_with_cache; +use std::{collections::HashMap, sync::Arc}; + +use kalamdb_configs::RocksDbSettings; +use rocksdb::{BoundColumnFamily, Cache, IteratorMode, Options, PrefixRange, WriteOptions, DB}; + +use super::{ + cf_tuning::apply_cf_settings, + init::create_block_options_with_cache, + keyspace::{ + decode_logical_partition_registry_key, logical_partition_registry_key, + logical_partition_registry_prefix, next_prefix_bound, partition_key_prefix, + physical_cf_for_partition, physical_key, SYSTEM_META_CF, + }, +}; use crate::storage_trait::{ Operation, Partition, Result, StorageBackend, StorageError, StorageStats, }; -use kalamdb_configs::RocksDbSettings; -use rocksdb::{BoundColumnFamily, Cache, IteratorMode, Options, PrefixRange, WriteOptions, DB}; -use std::sync::Arc; const ESTIMATE_NUM_KEYS_PROPERTY: &str = "rocksdb.estimate-num-keys"; const ESTIMATE_LIVE_DATA_SIZE_PROPERTY: &str = "rocksdb.estimate-live-data-size"; @@ -22,6 +31,15 @@ const LIVE_SST_FILES_SIZE_PROPERTY: &str = "rocksdb.live-sst-files-size"; const TOTAL_SST_FILES_SIZE_PROPERTY: &str = "rocksdb.total-sst-files-size"; const ALL_MEMTABLES_SIZE_PROPERTY: &str = "rocksdb.cur-size-all-mem-tables"; const PENDING_COMPACTION_BYTES_PROPERTY: &str = "rocksdb.estimate-pending-compaction-bytes"; +const DROP_PARTITION_BATCH_SIZE: usize = 4_096; + +#[inline] +fn prefixed_physical_key(partition_prefix: &[u8], user_key: &[u8]) -> Vec { + let mut physical_key = Vec::with_capacity(partition_prefix.len() + user_key.len()); + physical_key.extend_from_slice(partition_prefix); + physical_key.extend_from_slice(user_key); + physical_key +} /// RocksDB implementation of the StorageBackend trait. pub struct RocksDBBackend { @@ -30,6 +48,7 @@ pub struct RocksDBBackend { settings: RocksDbSettings, block_cache: Cache, known_cf_names: std::sync::RwLock>, + logical_partition_names: std::sync::RwLock>, } impl RocksDBBackend { @@ -42,15 +61,17 @@ impl RocksDBBackend { let mut write_opts = WriteOptions::default(); write_opts.set_sync(sync_writes); write_opts.disable_wal(disable_wal); - let block_cache = - Cache::new_lru_cache(std::cmp::min(settings.block_cache_size, 1024 * 1024)); - Self { + let block_cache = Cache::new_lru_cache(settings.block_cache_size); + let backend = Self { db, write_opts, settings, block_cache, known_cf_names: std::sync::RwLock::new(Vec::new()), - } + logical_partition_names: std::sync::RwLock::new(Vec::new()), + }; + backend.load_logical_partitions(); + backend } /// Creates a new RocksDB backend with the given database handle. @@ -68,15 +89,107 @@ impl RocksDBBackend { Self::new_internal(db, sync_writes, disable_wal, settings) } - /// Set the known column family names. + /// Set the known physical column family names. pub fn set_known_cf_names(&self, names: Vec) { *self.known_cf_names.write().unwrap() = names; } fn get_cf(&self, partition: &Partition) -> Result>> { + let cf_name = physical_cf_for_partition(partition.name()); + self.db + .cf_handle(cf_name) + .ok_or_else(|| StorageError::PartitionNotFound(cf_name.to_string())) + } + + fn ensure_physical_cf(&self, cf_name: &str) -> Result<()> { + if self.db.cf_handle(cf_name).is_some() { + self.track_physical_cf(cf_name); + return Ok(()); + } + + let mut opts = Options::default(); + apply_cf_settings(&mut opts, &self.settings, cf_name); + opts.set_block_based_table_factory(&create_block_options_with_cache(&self.block_cache)); + + match self.db.create_cf(cf_name, &opts) { + Ok(()) => { + self.track_physical_cf(cf_name); + Ok(()) + }, + Err(e) => { + let msg = e.to_string(); + if msg.contains("Column family already exists") + || msg.contains("column family already exists") + { + self.track_physical_cf(cf_name); + return Ok(()); + } + Err(StorageError::IoError(msg)) + }, + } + } + + fn track_physical_cf(&self, cf_name: &str) { + if let Ok(mut names) = self.known_cf_names.write() { + if !names.iter().any(|name| name == cf_name) { + names.push(cf_name.to_string()); + } + } + } + + fn track_logical_partition(&self, partition_name: &str) { + if let Ok(mut names) = self.logical_partition_names.write() { + if !names.iter().any(|name| name == partition_name) { + names.push(partition_name.to_string()); + } + } + } + + fn load_logical_partitions(&self) { + let Some(cf) = self.db.cf_handle(SYSTEM_META_CF) else { + return; + }; + let prefix = logical_partition_registry_prefix(); + let mut readopts = rocksdb::ReadOptions::default(); + readopts.set_iterate_range(PrefixRange(prefix.clone())); + + let names: Vec = self + .db + .iterator_cf_opt( + &cf, + readopts, + IteratorMode::From(prefix.as_slice(), rocksdb::Direction::Forward), + ) + .filter_map(|item| { + item.ok().and_then(|(key, _)| { + decode_logical_partition_registry_key(&key).map(str::to_string) + }) + }) + .collect(); + + if let Ok(mut tracked) = self.logical_partition_names.write() { + *tracked = names; + } + } + + fn persist_logical_partition(&self, partition_name: &str) -> Result<()> { + let cf = self + .db + .cf_handle(SYSTEM_META_CF) + .ok_or_else(|| StorageError::PartitionNotFound(SYSTEM_META_CF.to_string()))?; + self.db + .put_cf_opt(&cf, logical_partition_registry_key(partition_name), b"", &self.write_opts) + .map_err(|e| StorageError::IoError(e.to_string())) + } + + fn remove_logical_partition(&self, partition_name: &str) -> Result<()> { + let cf = self + .db + .cf_handle(SYSTEM_META_CF) + .ok_or_else(|| StorageError::PartitionNotFound(SYSTEM_META_CF.to_string()))?; self.db - .cf_handle(partition.name()) - .ok_or_else(|| StorageError::PartitionNotFound(partition.name().to_string())) + .delete_cf_opt(&cf, logical_partition_registry_key(partition_name), &self.write_opts) + .map_err(|e| StorageError::IoError(e.to_string())) } fn tracked_cf_names(&self) -> Vec { @@ -88,7 +201,7 @@ impl RocksDBBackend { } fn tracked_partition_count(&self) -> usize { - match self.known_cf_names.read() { + match self.logical_partition_names.read() { Ok(names) => names.iter().filter(|name| name.as_str() != "default").count(), Err(_) => 0, } @@ -105,24 +218,45 @@ impl RocksDBBackend { impl StorageBackend for RocksDBBackend { fn get(&self, partition: &Partition, key: &[u8]) -> Result>> { - let _span = tracing::trace_span!("rocksdb.get", partition = %partition.name()).entered(); + let _span = tracing::trace_span!( + "rocksdb.get", + partition = %partition.name(), + physical_cf = physical_cf_for_partition(partition.name()) + ) + .entered(); let cf = self.get_cf(partition)?; - self.db.get_cf(&cf, key).map_err(|e| StorageError::IoError(e.to_string())) + let physical_key = physical_key(partition.name(), key); + self.db + .get_cf(&cf, physical_key) + .map_err(|e| StorageError::IoError(e.to_string())) } fn put(&self, partition: &Partition, key: &[u8], value: &[u8]) -> Result<()> { - let _span = tracing::trace_span!("rocksdb.put", partition = %partition.name(), value_len = value.len()).entered(); + let _span = tracing::trace_span!( + "rocksdb.put", + partition = %partition.name(), + physical_cf = physical_cf_for_partition(partition.name()), + value_len = value.len() + ) + .entered(); let cf = self.get_cf(partition)?; + let physical_key = physical_key(partition.name(), key); self.db - .put_cf_opt(&cf, key, value, &self.write_opts) + .put_cf_opt(&cf, physical_key, value, &self.write_opts) .map_err(|e| StorageError::IoError(e.to_string())) } fn delete(&self, partition: &Partition, key: &[u8]) -> Result<()> { - let _span = tracing::trace_span!("rocksdb.delete", partition = %partition.name()).entered(); + let _span = tracing::trace_span!( + "rocksdb.delete", + partition = %partition.name(), + physical_cf = physical_cf_for_partition(partition.name()) + ) + .entered(); let cf = self.get_cf(partition)?; + let physical_key = physical_key(partition.name(), key); self.db - .delete_cf_opt(&cf, key, &self.write_opts) + .delete_cf_opt(&cf, physical_key, &self.write_opts) .map_err(|e| StorageError::IoError(e.to_string())) } @@ -130,22 +264,37 @@ impl StorageBackend for RocksDBBackend { let _span = tracing::debug_span!("rocksdb.batch", op_count = operations.len()).entered(); use rocksdb::WriteBatch; + if operations.is_empty() { + return Ok(()); + } + let mut batch = WriteBatch::default(); + let mut partition_cache: HashMap>, Vec)> = + HashMap::with_capacity(operations.len().min(16)); for op in operations { - match op { + let (partition, key, value) = match op { Operation::Put { partition, key, value, - } => { - let cf = self.get_cf(&partition)?; - batch.put_cf(&cf, key, value); - }, - Operation::Delete { partition, key } => { + } => (partition, key, Some(value)), + Operation::Delete { partition, key } => (partition, key, None), + }; + + let partition_name = partition.name(); + let cache_entry = match partition_cache.entry(partition_name.to_string()) { + std::collections::hash_map::Entry::Occupied(entry) => entry.into_mut(), + std::collections::hash_map::Entry::Vacant(entry) => { let cf = self.get_cf(&partition)?; - batch.delete_cf(&cf, key); + entry.insert((cf, partition_key_prefix(partition_name))) }, + }; + let physical_key = prefixed_physical_key(&cache_entry.1, &key); + + match value { + Some(value) => batch.put_cf(&cache_entry.0, physical_key, value), + None => batch.delete_cf(&cache_entry.0, physical_key), } } @@ -161,32 +310,50 @@ impl StorageBackend for RocksDBBackend { start_key: Option<&[u8]>, limit: Option, ) -> Result, Vec)> + Send + '_>> { - let _span = tracing::trace_span!("rocksdb.scan", partition = %partition.name(), has_prefix = prefix.is_some(), limit = ?limit).entered(); + let _span = tracing::trace_span!( + "rocksdb.scan", + partition = %partition.name(), + physical_cf = physical_cf_for_partition(partition.name()), + has_prefix = prefix.is_some(), + limit = ?limit + ) + .entered(); use rocksdb::Direction; let cf = self.get_cf(partition)?; let snapshot = self.db.snapshot(); - let prefix_vec = prefix.map(|p| p.to_vec()); - - let iter_mode = if let Some(start) = start_key { - IteratorMode::From(start, Direction::Forward) - } else if let Some(p) = &prefix_vec { - IteratorMode::From(p.as_slice(), Direction::Forward) + let partition_prefix = partition_key_prefix(partition.name()); + let user_prefix = prefix.map(|p| p.to_vec()); + let physical_prefix = user_prefix.as_ref().map_or_else( + || partition_prefix.clone(), + |prefix| { + let mut physical_prefix = partition_prefix.clone(); + physical_prefix.extend_from_slice(prefix); + physical_prefix + }, + ); + let physical_start = start_key.map(|start| { + let mut physical_start = partition_prefix.clone(); + physical_start.extend_from_slice(start); + physical_start + }); + + let iter_mode = if let Some(start) = &physical_start { + IteratorMode::From(start.as_slice(), Direction::Forward) } else { - IteratorMode::Start + IteratorMode::From(physical_prefix.as_slice(), Direction::Forward) }; let mut readopts = rocksdb::ReadOptions::default(); readopts.set_snapshot(&snapshot); - if let Some(p) = &prefix_vec { - readopts.set_iterate_range(PrefixRange(p.clone())); - } + readopts.set_iterate_range(PrefixRange(physical_prefix.clone())); let inner = self.db.iterator_cf_opt(&cf, readopts, iter_mode); struct SnapshotScanIter<'a, D: rocksdb::DBAccess> { _snapshot: rocksdb::SnapshotWithThreadMode<'a, D>, inner: rocksdb::DBIteratorWithThreadMode<'a, D>, - prefix: Option>, + partition_prefix: Vec, + user_prefix: Option>, remaining: Option, } @@ -200,8 +367,12 @@ impl StorageBackend for RocksDBBackend { match self.inner.next()? { Ok((k, v)) => { - if let Some(ref p) = self.prefix { - if !k.starts_with(p) { + if !k.starts_with(&self.partition_prefix) { + return None; + } + let logical_key = &k[self.partition_prefix.len()..]; + if let Some(ref prefix) = self.user_prefix { + if !logical_key.starts_with(prefix) { return None; } } @@ -210,7 +381,7 @@ impl StorageBackend for RocksDBBackend { *left -= 1; } } - Some((k.to_vec(), v.to_vec())) + Some((logical_key.to_vec(), v.to_vec())) }, Err(_) => None, } @@ -220,7 +391,8 @@ impl StorageBackend for RocksDBBackend { let iter = SnapshotScanIter:: { _snapshot: snapshot, inner, - prefix: prefix_vec, + partition_prefix, + user_prefix, remaining: limit, }; @@ -245,16 +417,28 @@ impl StorageBackend for RocksDBBackend { let cf = self.get_cf(partition)?; let snapshot = self.db.snapshot(); - let prefix_vec = prefix.map(|p| p.to_vec()); + let partition_prefix = partition_key_prefix(partition.name()); + let user_prefix = prefix.map(|p| p.to_vec()); + let physical_prefix = user_prefix.as_ref().map_or_else( + || partition_prefix.clone(), + |prefix| { + let mut physical_prefix = partition_prefix.clone(); + physical_prefix.extend_from_slice(prefix); + physical_prefix + }, + ); + let physical_start = start_key.map(|start| { + let mut physical_start = partition_prefix.clone(); + physical_start.extend_from_slice(start); + physical_start + }); let mut readopts = rocksdb::ReadOptions::default(); readopts.set_snapshot(&snapshot); - if let Some(p) = &prefix_vec { - readopts.set_iterate_range(PrefixRange(p.clone())); - } + readopts.set_iterate_range(PrefixRange(physical_prefix.clone())); - let iter_mode = if let Some(start) = start_key { - IteratorMode::From(start, Direction::Reverse) + let iter_mode = if let Some(start) = &physical_start { + IteratorMode::From(start.as_slice(), Direction::Reverse) } else { IteratorMode::End }; @@ -264,7 +448,8 @@ impl StorageBackend for RocksDBBackend { struct SnapshotReverseScanIter<'a, D: rocksdb::DBAccess> { _snapshot: rocksdb::SnapshotWithThreadMode<'a, D>, inner: rocksdb::DBIteratorWithThreadMode<'a, D>, - prefix: Option>, + partition_prefix: Vec, + user_prefix: Option>, remaining: Option, } @@ -278,8 +463,12 @@ impl StorageBackend for RocksDBBackend { match self.inner.next()? { Ok((k, v)) => { - if let Some(ref prefix) = self.prefix { - if !k.starts_with(prefix) { + if !k.starts_with(&self.partition_prefix) { + return None; + } + let logical_key = &k[self.partition_prefix.len()..]; + if let Some(ref prefix) = self.user_prefix { + if !logical_key.starts_with(prefix) { return None; } } @@ -288,7 +477,7 @@ impl StorageBackend for RocksDBBackend { *left -= 1; } } - Some((k.to_vec(), v.to_vec())) + Some((logical_key.to_vec(), v.to_vec())) }, Err(_) => None, } @@ -298,7 +487,8 @@ impl StorageBackend for RocksDBBackend { let iter = SnapshotReverseScanIter:: { _snapshot: snapshot, inner, - prefix: prefix_vec, + partition_prefix, + user_prefix, remaining: limit, }; @@ -306,41 +496,21 @@ impl StorageBackend for RocksDBBackend { } fn partition_exists(&self, partition: &Partition) -> bool { - self.db.cf_handle(partition.name()).is_some() + self.logical_partition_names + .read() + .map(|names| names.iter().any(|name| name == partition.name())) + .unwrap_or(false) } fn create_partition(&self, partition: &Partition) -> Result<()> { - if self.partition_exists(partition) { - return Ok(()); - } - - let mut opts = Options::default(); - apply_cf_settings(&mut opts, &self.settings, partition.name()); - opts.set_block_based_table_factory(&create_block_options_with_cache(&self.block_cache)); - match self.db.create_cf(partition.name(), &opts) { - Ok(()) => { - if let Ok(mut names) = self.known_cf_names.write() { - let name = partition.name().to_string(); - if !names.contains(&name) { - names.push(name); - } - } - Ok(()) - }, - Err(e) => { - let msg = e.to_string(); - if msg.contains("Column family already exists") - || msg.contains("column family already exists") - { - return Ok(()); - } - Err(StorageError::IoError(msg)) - }, - } + self.ensure_physical_cf(physical_cf_for_partition(partition.name()))?; + self.persist_logical_partition(partition.name())?; + self.track_logical_partition(partition.name()); + Ok(()) } fn list_partitions(&self) -> Result> { - let names = self.known_cf_names.read().unwrap(); + let names = self.logical_partition_names.read().unwrap(); let partitions = names .iter() .filter(|name| *name != "default") @@ -350,24 +520,68 @@ impl StorageBackend for RocksDBBackend { } fn drop_partition(&self, partition: &Partition) -> Result<()> { - if !self.partition_exists(partition) { - return Ok(()); - } + let cf = self.get_cf(partition)?; + let prefix = partition_key_prefix(partition.name()); + + loop { + let (batch, deleted_count) = { + let snapshot = self.db.snapshot(); + let mut readopts = rocksdb::ReadOptions::default(); + readopts.set_snapshot(&snapshot); + readopts.set_iterate_range(PrefixRange(prefix.clone())); + + let mut batch = rocksdb::WriteBatch::default(); + let mut deleted_count = 0usize; + + for item in self.db.iterator_cf_opt( + &cf, + readopts, + IteratorMode::From(prefix.as_slice(), rocksdb::Direction::Forward), + ) { + let (key, _) = item.map_err(|e| StorageError::IoError(e.to_string()))?; + if !key.starts_with(&prefix) { + break; + } - self.db - .drop_cf(partition.name()) - .map_err(|e| StorageError::IoError(e.to_string()))?; + batch.delete_cf(&cf, key.as_ref()); + deleted_count += 1; + if deleted_count >= DROP_PARTITION_BATCH_SIZE { + break; + } + } - if let Ok(mut names) = self.known_cf_names.write() { + (batch, deleted_count) + }; + + if deleted_count == 0 { + break; + } + + self.db + .write_opt(batch, &self.write_opts) + .map_err(|e| StorageError::IoError(e.to_string()))?; + + if deleted_count < DROP_PARTITION_BATCH_SIZE { + break; + } + } + + if let Ok(mut names) = self.logical_partition_names.write() { names.retain(|n| n != partition.name()); } + self.remove_logical_partition(partition.name())?; Ok(()) } fn compact_partition(&self, partition: &Partition) -> Result<()> { let cf = self.get_cf(partition)?; - self.db.compact_range_cf(&cf, None::<&[u8]>, None::<&[u8]>); + let start = partition_key_prefix(partition.name()); + if let Some(end) = next_prefix_bound(&start) { + self.db.compact_range_cf(&cf, Some(start.as_slice()), Some(end.as_slice())); + } else { + self.db.compact_range_cf(&cf, Some(start.as_slice()), None::<&[u8]>); + } Ok(()) } @@ -395,8 +609,10 @@ impl StorageBackend for RocksDBBackend { } fn backup_to(&self, backup_dir: &std::path::Path) -> crate::storage_trait::Result<()> { - use rocksdb::backup::{BackupEngine, BackupEngineOptions}; - use rocksdb::Env; + use rocksdb::{ + backup::{BackupEngine, BackupEngineOptions}, + Env, + }; std::fs::create_dir_all(backup_dir).map_err(|e| { crate::storage_trait::StorageError::Other(format!( @@ -431,8 +647,10 @@ impl StorageBackend for RocksDBBackend { } fn restore_from(&self, backup_dir: &std::path::Path) -> crate::storage_trait::Result<()> { - use rocksdb::backup::{BackupEngine, BackupEngineOptions, RestoreOptions}; - use rocksdb::Env; + use rocksdb::{ + backup::{BackupEngine, BackupEngineOptions, RestoreOptions}, + Env, + }; let opts = BackupEngineOptions::new(backup_dir).map_err(|e| { crate::storage_trait::StorageError::Other(format!( @@ -486,6 +704,10 @@ impl StorageBackend for RocksDBBackend { "storage_partition_count".to_string(), self.tracked_partition_count().to_string(), ), + ( + "rocksdb_physical_cf_count".to_string(), + self.tracked_cf_names().len().to_string(), + ), ( "rocksdb_estimate_num_keys".to_string(), self.sum_cf_property(ESTIMATE_NUM_KEYS_PROPERTY).to_string(), @@ -528,10 +750,13 @@ impl StorageBackend for RocksDBBackend { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::{encode_key, encode_prefix}; + use kalamdb_configs::RocksDbSettings; use tempfile::TempDir; + use super::super::init::RocksDbInit; + use super::*; + fn create_test_db() -> (Arc, TempDir) { let temp_dir = TempDir::new().unwrap(); let mut opts = Options::default(); @@ -707,6 +932,46 @@ mod tests { let _ = partitions.len(); } + #[test] + fn test_logical_partitions_survive_reopen() { + let temp_dir = TempDir::new().unwrap(); + let db_path = temp_dir.path().to_string_lossy().into_owned(); + let partition = Partition::new("shared_default:profiles"); + + { + let init = RocksDbInit::with_defaults(db_path.clone()); + let (db, cf_names) = init.open_with_cf_names().unwrap(); + let backend = RocksDBBackend::with_options_and_settings( + db, + false, + false, + RocksDbSettings::default(), + ); + backend.set_known_cf_names(cf_names); + + backend.create_partition(&partition).unwrap(); + backend.put(&partition, b"key1", b"value1").unwrap(); + + assert!(backend.partition_exists(&partition)); + } + + { + let init = RocksDbInit::with_defaults(db_path); + let (db, cf_names) = init.open_with_cf_names().unwrap(); + let backend = RocksDBBackend::with_options_and_settings( + db, + false, + false, + RocksDbSettings::default(), + ); + backend.set_known_cf_names(cf_names); + + assert!(backend.partition_exists(&partition)); + assert_eq!(backend.get(&partition, b"key1").unwrap(), Some(b"value1".to_vec())); + assert_eq!(backend.list_partitions().unwrap(), vec![partition]); + } + } + #[test] fn test_drop_partition() { let (db, _temp) = create_test_db(); @@ -744,4 +1009,4 @@ mod tests { assert!(stats.contains_key("rocksdb_memtables_size_bytes")); assert!(stats.contains_key("rocksdb_pending_compaction_bytes")); } -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-store/src/backends/rocksdb/cf_tuning.rs b/backend/crates/kalamdb-store/src/backends/rocksdb/cf_tuning.rs index 339de0251..a3a1a02db 100644 --- a/backend/crates/kalamdb-store/src/backends/rocksdb/cf_tuning.rs +++ b/backend/crates/kalamdb-store/src/backends/rocksdb/cf_tuning.rs @@ -28,4 +28,4 @@ fn profile_settings( ColumnFamilyProfile::HotIndex => &settings.cf_profiles.hot_index, ColumnFamilyProfile::Raft => &settings.cf_profiles.raft, } -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-store/src/backends/rocksdb/init.rs b/backend/crates/kalamdb-store/src/backends/rocksdb/init.rs index c4c9ad24b..656327c5e 100644 --- a/backend/crates/kalamdb-store/src/backends/rocksdb/init.rs +++ b/backend/crates/kalamdb-store/src/backends/rocksdb/init.rs @@ -1,18 +1,20 @@ //! RocksDB initialization utilities for KalamDB. //! //! Provides a thin helper to open a RocksDB instance with required -//! system column families present. +//! fixed physical column families present. + +use std::{path::Path, sync::Arc}; -use super::cf_tuning::{apply_cf_settings, apply_db_settings}; use anyhow::Result; -use kalamdb_commons::system_tables::StoragePartition; -use kalamdb_commons::SystemTable; use kalamdb_configs::RocksDbSettings; use rocksdb::{BlockBasedOptions, Cache, ColumnFamilyDescriptor, Options, DB}; -use std::path::Path; -use std::sync::Arc; -/// RocksDB initializer for creating/opening a database with system CFs. +use super::{ + cf_tuning::{apply_cf_settings, apply_db_settings}, + keyspace::fixed_column_families, +}; + +/// RocksDB initializer for creating/opening a database with fixed physical CFs. pub struct RocksDbInit { db_path: String, settings: RocksDbSettings, @@ -32,7 +34,7 @@ impl RocksDbInit { Self::new(db_path, RocksDbSettings::default()) } - /// Open or create the RocksDB database and ensure system CFs exist. + /// Open or create the RocksDB database and ensure fixed physical CFs exist. pub fn open(&self) -> Result> { let mut db_opts = Options::default(); db_opts.create_if_missing(true); @@ -98,24 +100,7 @@ impl RocksDbInit { _ => vec!["default".to_string()], }; - for table in SystemTable::all_tables().iter() { - if let Some(name) = table.column_family_name() { - if !existing.iter().any(|existing_name| existing_name == name) { - existing.push(name.to_string()); - } - } - } - - let extra_partitions = [ - StoragePartition::InformationSchemaTables.name(), - StoragePartition::SystemUsersUsernameIdx.name(), - StoragePartition::SystemUsersRoleIdx.name(), - StoragePartition::SystemUsersDeletedAtIdx.name(), - StoragePartition::ManifestCache.name(), - StoragePartition::SystemJobsStatusIdx.name(), - ]; - - for name in extra_partitions { + for name in fixed_column_families() { if !existing.iter().any(|existing_name| existing_name == name) { existing.push(name.to_string()); } @@ -134,4 +119,4 @@ pub(crate) fn create_block_options_with_cache(cache: &Cache) -> BlockBasedOption block_opts.set_pin_top_level_index_and_filter(true); block_opts.set_whole_key_filtering(true); block_opts -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-store/src/backends/rocksdb/keyspace.rs b/backend/crates/kalamdb-store/src/backends/rocksdb/keyspace.rs new file mode 100644 index 000000000..ff9f4b855 --- /dev/null +++ b/backend/crates/kalamdb-store/src/backends/rocksdb/keyspace.rs @@ -0,0 +1,87 @@ +use kalamdb_commons::system_tables::{classify_column_family_name, ColumnFamilyProfile}; + +pub(crate) const SYSTEM_META_CF: &str = "system_meta"; +pub(crate) const SYSTEM_INDEX_CF: &str = "system_index"; +pub(crate) const HOT_DATA_CF: &str = "hot_data"; +pub(crate) const HOT_INDEX_CF: &str = "hot_index"; +pub(crate) const RAFT_CF: &str = "raft_data"; + +const PARTITION_KEY_PREFIX_TAG: u8 = 0x70; +const LOGICAL_PARTITION_REGISTRY_TAG: u8 = 0x71; + +pub(crate) fn fixed_column_families() -> &'static [&'static str] { + &[ + "default", + SYSTEM_META_CF, + SYSTEM_INDEX_CF, + HOT_DATA_CF, + HOT_INDEX_CF, + RAFT_CF, + ] +} + +pub(crate) fn physical_cf_for_partition(partition_name: &str) -> &'static str { + match classify_column_family_name(partition_name) { + ColumnFamilyProfile::SystemMeta => SYSTEM_META_CF, + ColumnFamilyProfile::SystemIndex => SYSTEM_INDEX_CF, + ColumnFamilyProfile::HotData => HOT_DATA_CF, + ColumnFamilyProfile::HotIndex => HOT_INDEX_CF, + ColumnFamilyProfile::Raft => RAFT_CF, + } +} + +pub(crate) fn partition_key_prefix(partition_name: &str) -> Vec { + let name = partition_name.as_bytes(); + let mut prefix = Vec::with_capacity(1 + std::mem::size_of::() + name.len()); + prefix.push(PARTITION_KEY_PREFIX_TAG); + prefix.extend_from_slice(&(name.len() as u32).to_be_bytes()); + prefix.extend_from_slice(name); + prefix +} + +pub(crate) fn physical_key(partition_name: &str, user_key: &[u8]) -> Vec { + let mut key = partition_key_prefix(partition_name); + key.extend_from_slice(user_key); + key +} + +pub(crate) fn logical_partition_registry_prefix() -> Vec { + vec![LOGICAL_PARTITION_REGISTRY_TAG] +} + +pub(crate) fn logical_partition_registry_key(partition_name: &str) -> Vec { + let name = partition_name.as_bytes(); + let mut key = Vec::with_capacity(1 + std::mem::size_of::() + name.len()); + key.push(LOGICAL_PARTITION_REGISTRY_TAG); + key.extend_from_slice(&(name.len() as u32).to_be_bytes()); + key.extend_from_slice(name); + key +} + +pub(crate) fn decode_logical_partition_registry_key(key: &[u8]) -> Option<&str> { + if key.len() < 1 + std::mem::size_of::() || key[0] != LOGICAL_PARTITION_REGISTRY_TAG { + return None; + } + + let len_start = 1; + let len_end = len_start + std::mem::size_of::(); + let name_len = u32::from_be_bytes(key[len_start..len_end].try_into().ok()?) as usize; + let name_end = len_end.checked_add(name_len)?; + if key.len() != name_end { + return None; + } + + std::str::from_utf8(&key[len_end..name_end]).ok() +} + +pub(crate) fn next_prefix_bound(prefix: &[u8]) -> Option> { + let mut bound = prefix.to_vec(); + for index in (0..bound.len()).rev() { + if bound[index] != u8::MAX { + bound[index] += 1; + bound.truncate(index + 1); + return Some(bound); + } + } + None +} diff --git a/backend/crates/kalamdb-store/src/backends/rocksdb/mod.rs b/backend/crates/kalamdb-store/src/backends/rocksdb/mod.rs index 6b1f3c644..842858b8b 100644 --- a/backend/crates/kalamdb-store/src/backends/rocksdb/mod.rs +++ b/backend/crates/kalamdb-store/src/backends/rocksdb/mod.rs @@ -3,16 +3,16 @@ mod backend; mod cf_tuning; mod init; +mod keyspace; pub mod test_utils; -use std::path::Path; -use std::sync::Arc; - -use crate::storage_trait::StorageBackend; +use std::{path::Path, sync::Arc}; pub use backend::RocksDBBackend; pub use init::RocksDbInit; +use crate::storage_trait::StorageBackend; + /// Open the RocksDB storage backend and return it through the generic storage trait. pub fn open_storage_backend( db_path: &Path, @@ -28,4 +28,4 @@ pub fn open_storage_backend( )); backend.set_known_cf_names(cf_names.clone()); Ok((backend, cf_names.len())) -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-store/src/backends/rocksdb/test_utils.rs b/backend/crates/kalamdb-store/src/backends/rocksdb/test_utils.rs index 26619073e..3c2c1692d 100644 --- a/backend/crates/kalamdb-store/src/backends/rocksdb/test_utils.rs +++ b/backend/crates/kalamdb-store/src/backends/rocksdb/test_utils.rs @@ -1,14 +1,16 @@ //! RocksDB-backed test helpers for kalamdb-store. +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; + use anyhow::Result; use rocksdb::{Options, DB}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; use tempfile::TempDir; -use crate::storage_trait::StorageBackend; - use super::{RocksDBBackend, RocksDbInit}; +use crate::storage_trait::StorageBackend; /// Test database wrapper that automatically cleans up on drop. pub struct TestDb { @@ -101,4 +103,4 @@ mod tests { assert!(test_db.db.cf_handle("user_table:app:messages").is_some()); } -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-store/src/entity_store.rs b/backend/crates/kalamdb-store/src/entity_store.rs index 1d640b1ed..41b5f4d9d 100644 --- a/backend/crates/kalamdb-store/src/entity_store.rs +++ b/backend/crates/kalamdb-store/src/entity_store.rs @@ -53,12 +53,15 @@ //! let retrieved = store.get(&user_id).unwrap().unwrap(); //! ``` -use crate::async_utils::run_blocking_result; -use crate::storage_trait::{Partition, Result, StorageBackend, StorageError}; -use kalamdb_commons::{next_storage_key_bytes, KSerializable, StorageKey}; -use std::collections::VecDeque; use std::sync::Arc; +use kalamdb_commons::{next_storage_key_bytes, KSerializable, StorageKey}; + +use crate::{ + async_utils::run_blocking_result, + storage_trait::{Partition, Result, StorageBackend, StorageError}, +}; + /// Directional scanning for entity stores #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ScanDirection { @@ -115,7 +118,7 @@ where /// **Rationale**: Direct backend access bypasses type safety and proper key serialization. /// All operations should go through EntityStore methods which ensure: /// - Proper key serialization via StorageKey trait - /// - Type-safe deserialization via KSerializable trait + /// - Type-safe deserialization via KSerializable trait /// - Consistent error handling /// - Future optimizations (caching, batching, etc.) #[doc(hidden)] @@ -160,28 +163,32 @@ where }, ScanDirection::Older => { let start_bytes = start_key.map(|k| k.storage_key()); - let iter = self.backend().scan(&partition, None, None, None)?; - let mut collected: VecDeque<(Vec, Vec)> = - VecDeque::with_capacity(limit.min(MAX_PREALLOC_CAPACITY)); + let scan_limit = if start_bytes.is_some() { + limit.saturating_add(1) + } else { + limit + }; + let iter = self.backend().scan_reverse( + &partition, + None, + start_bytes.as_deref(), + Some(scan_limit), + )?; + let mut rows = Vec::with_capacity(limit.min(MAX_PREALLOC_CAPACITY)); for (key_bytes, value_bytes) in iter { if let Some(start) = &start_bytes { if key_bytes.as_slice() >= start.as_slice() { - break; + continue; } } - if collected.len() == limit { - collected.pop_front(); - } - collected.push_back((key_bytes, value_bytes)); - } - - let mut rows = Vec::with_capacity(collected.len().min(MAX_PREALLOC_CAPACITY)); - for (key_bytes, value_bytes) in collected.into_iter().rev() { let key = K::from_storage_key(&key_bytes) .map_err(StorageError::SerializationError)?; let entity = self.deserialize(&value_bytes)?; rows.push(Ok((key, entity))); + if rows.len() >= limit { + break; + } } Ok(Box::new(rows.into_iter())) @@ -651,28 +658,31 @@ where }, ScanDirection::Older => { let start_bytes = start_key.map(|k| k.storage_key()); - let iter = self.backend().scan(&partition, None, None, None)?; - let mut collected: Vec> = Vec::new(); + let scan_limit = if start_bytes.is_some() { + limit.saturating_add(1) + } else { + limit + }; + let iter = self.backend().scan_reverse( + &partition, + None, + start_bytes.as_deref(), + Some(scan_limit), + )?; + let mut keys = Vec::with_capacity(limit.min(MAX_PREALLOC_CAPACITY)); for (key_bytes, _) in iter { if let Some(start) = &start_bytes { if key_bytes.as_slice() >= start.as_slice() { - break; + continue; } } - collected.push(key_bytes); - } - - if collected.len() > limit { - let drain_end = collected.len() - limit; - collected.drain(0..drain_end); - } - - let mut keys = Vec::with_capacity(collected.len()); - for key_bytes in collected { let key = K::from_storage_key(&key_bytes) .map_err(StorageError::SerializationError)?; keys.push(key); + if keys.len() >= limit { + break; + } } Ok(keys) }, @@ -823,7 +833,8 @@ where .await } - /// Async version of `scan_typed_with_prefix_and_start()` - scans entities with typed prefix and start key. + /// Async version of `scan_typed_with_prefix_and_start()` - scans entities with typed prefix and + /// start key. /// /// Uses `spawn_blocking` internally to prevent blocking the async runtime. async fn scan_typed_with_prefix_and_start_async( @@ -933,10 +944,12 @@ use kalamdb_commons::models::{Role, TableAccess}; #[cfg(test)] mod tests { - use super::*; + use std::sync::Arc; + use kalamdb_commons::models::{Role, TableAccess, UserId}; use serde_json::Value as JsonValue; - use std::sync::Arc; + + use super::*; // Mock implementation for testing struct MockStore { diff --git a/backend/crates/kalamdb-store/src/index/secondary_index.rs b/backend/crates/kalamdb-store/src/index/secondary_index.rs index 78b25d2f9..f39377e43 100644 --- a/backend/crates/kalamdb-store/src/index/secondary_index.rs +++ b/backend/crates/kalamdb-store/src/index/secondary_index.rs @@ -1,7 +1,9 @@ +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + use super::extractor::{FunctionExtractor, IndexKeyExtractor}; use crate::storage_trait::{Partition, Result, StorageBackend, StorageError}; -use serde::{Deserialize, Serialize}; -use std::sync::Arc; /// Secondary index implementation for entity stores. /// @@ -240,7 +242,8 @@ where { assert!( self.unique, - "get_primary_key() only works for unique indexes. Use get_primary_keys() for non-unique indexes." + "get_primary_key() only works for unique indexes. Use get_primary_keys() for \ + non-unique indexes." ); match self.backend.get(&self.partition, index_key.as_ref())? { @@ -306,11 +309,12 @@ where #[cfg(all(test, feature = "rocksdb"))] mod tests { - use super::*; - use crate::{RocksDBBackend, RocksDbInit}; use serde::{Deserialize, Serialize}; use tempfile::TempDir; + use super::*; + use crate::{RocksDBBackend, RocksDbInit}; + #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] struct TestUser { user_id: String, diff --git a/backend/crates/kalamdb-store/src/indexed_store.rs b/backend/crates/kalamdb-store/src/indexed_store.rs index 2c1fcf133..f3dfb7a8f 100644 --- a/backend/crates/kalamdb-store/src/indexed_store.rs +++ b/backend/crates/kalamdb-store/src/indexed_store.rs @@ -96,16 +96,19 @@ //! let running_jobs = store.scan_by_index(0, Some(&[JobStatus::Running as u8]), Some(10))?; //! ``` -use crate::async_utils::run_blocking_result; -use crate::entity_store::{EntityIterator, EntityStore}; -use crate::storage_trait::{Operation, Partition, Result, StorageBackend, StorageError}; -use kalamdb_commons::{KSerializable, StorageKey}; use std::sync::Arc; #[cfg(feature = "datafusion")] use datafusion::logical_expr::{Expr, Operator}; #[cfg(feature = "datafusion")] use datafusion::scalar::ScalarValue; +use kalamdb_commons::{KSerializable, StorageKey}; + +use crate::{ + async_utils::run_blocking_result, + entity_store::{EntityIterator, EntityStore}, + storage_trait::{Operation, Partition, Result, StorageBackend, StorageError}, +}; // ============================================================================ // IndexDefinition Trait @@ -327,7 +330,6 @@ where None } - /// Finds the "best" index for a set of DataFusion filters. /// /// Strategy: pick the index that yields the longest prefix (more selective). @@ -1067,11 +1069,11 @@ pub fn extract_i64_equality(filter: &Expr) -> Option<(&str, i64)> { #[cfg(test)] mod tests { + use kalamdb_commons::{JobId, NodeId}; + use kalamdb_system::{providers::jobs::models::Job, JobStatus, JobType}; + use super::*; use crate::test_utils::InMemoryBackend; - use kalamdb_commons::{JobId, NodeId}; - use kalamdb_system::providers::jobs::models::Job; - use kalamdb_system::{JobStatus, JobType}; // Test index: Jobs by status struct TestStatusIndex; diff --git a/backend/crates/kalamdb-store/src/lib.rs b/backend/crates/kalamdb-store/src/lib.rs index df6e6cf40..28c1fca75 100644 --- a/backend/crates/kalamdb-store/src/lib.rs +++ b/backend/crates/kalamdb-store/src/lib.rs @@ -30,34 +30,28 @@ pub mod storage_trait; #[cfg(feature = "rocksdb")] pub use backends::rocksdb::{RocksDBBackend, RocksDbInit}; -pub use storage_trait::{ - Operation, Partition, StorageBackend, StorageBackendAsync, StorageError, StorageStats, -}; - -// Re-export StorageKey from kalamdb-commons to avoid import inconsistency -pub use kalamdb_commons::StorageKey; - // Phase 14: Export new type-safe EntityStore traits pub use entity_store::{ CrossUserTableStore, EntityStore, EntityStoreAsync, // Async versions using spawn_blocking internally }; - // Export index types pub use index::{FunctionExtractor, IndexKeyExtractor, SecondaryIndex}; - -// Phase 15: Export IndexedEntityStore for automatic index management -pub use indexed_store::{IndexDefinition, IndexedEntityStore}; - #[cfg(feature = "datafusion")] pub use indexed_store::{extract_i64_equality, extract_string_equality}; - +// Phase 15: Export IndexedEntityStore for automatic index management +pub use indexed_store::{IndexDefinition, IndexedEntityStore}; +// Re-export StorageKey from kalamdb-commons to avoid import inconsistency +pub use kalamdb_commons::StorageKey; // Phase 17: Export Raft storage types pub use raft_storage::{ GroupId, RaftLogEntry, RaftLogId, RaftPartitionStore, RaftSnapshotData, RaftSnapshotMeta, RaftVote, RAFT_PARTITION_NAME, }; +pub use storage_trait::{ + Operation, Partition, StorageBackend, StorageBackendAsync, StorageError, StorageStats, +}; // Make test_utils available for testing in dependent crates pub mod test_utils; @@ -78,8 +72,6 @@ pub fn open_storage_backend( #[cfg(not(feature = "rocksdb"))] { let _ = (db_path, settings); - anyhow::bail!( - "kalamdb-store was built without the rocksdb backend feature enabled" - ) + anyhow::bail!("kalamdb-store was built without the rocksdb backend feature enabled") } } diff --git a/backend/crates/kalamdb-store/src/raft_storage.rs b/backend/crates/kalamdb-store/src/raft_storage.rs index 5968a0427..3db8d66ad 100644 --- a/backend/crates/kalamdb-store/src/raft_storage.rs +++ b/backend/crates/kalamdb-store/src/raft_storage.rs @@ -40,13 +40,14 @@ //! store.save_vote(&vote)?; //! ``` -use crate::storage_trait::{Operation, Partition, Result, StorageBackend}; -use kalamdb_commons::KSerializable; -use serde::{Deserialize, Serialize}; use std::sync::Arc; +use kalamdb_commons::KSerializable; // Re-export GroupId from kalamdb-sharding pub use kalamdb_sharding::GroupId; +use serde::{Deserialize, Serialize}; + +use crate::storage_trait::{Operation, Partition, Result, StorageBackend}; /// The single partition name for all Raft data. pub const RAFT_PARTITION_NAME: &str = "raft_data"; diff --git a/backend/crates/kalamdb-store/src/storage_trait.rs b/backend/crates/kalamdb-store/src/storage_trait.rs index 3c320c87c..577170d9e 100644 --- a/backend/crates/kalamdb-store/src/storage_trait.rs +++ b/backend/crates/kalamdb-store/src/storage_trait.rs @@ -10,12 +10,12 @@ //! - get/put/delete for key-value access //! - batch for atomic multi-operation transactions //! - scan for range queries -//! - partition management (maps to column families in RocksDB, trees in Sled, etc.) +//! - partition management (mapped to backend-native keyspaces) //! //! ## Partition Model //! //! Since different backends have different concepts for data organization: -//! - **RocksDB**: Partition = Column Family +//! - **RocksDB**: Partition = key prefix inside a fixed physical column-family set //! - **Sled**: Partition = Tree //! - **Redis**: Partition = Key Prefix //! - **In-Memory**: Partition = HashMap namespace @@ -25,7 +25,7 @@ //! ## Example Usage //! //! ```rust -//! use kalamdb_store::storage_trait::{StorageBackend, Partition, Operation}; +//! use kalamdb_store::storage_trait::{Operation, Partition, StorageBackend}; //! //! fn store_user_data(backend: &S, user_id: &str, data: &[u8]) { //! let partition = Partition::new(format!("user_{}", user_id)); @@ -59,8 +59,7 @@ //! } //! ``` -use std::collections::BTreeMap; -use std::path::Path; +use std::{collections::BTreeMap, path::Path}; pub use kalamdb_commons::storage::{KvIterator, Operation, Partition, Result, StorageError}; @@ -113,7 +112,8 @@ pub trait StorageBackend: Send + Sync { /// /// ## Parameters /// - `prefix`: If Some, only return keys starting with this prefix - /// - `start_key`: If Some, start scanning from this key (inclusive). Must be >= prefix if both are set. + /// - `start_key`: If Some, start scanning from this key (inclusive). Must be >= prefix if both + /// are set. /// - `limit`: If Some, return at most this many entries fn scan( &self, @@ -186,11 +186,11 @@ pub trait StorageBackend: Send + Sync { /// compaction (no-op for backends without compaction). fn compact_partition(&self, partition: &Partition) -> Result<()>; - /// Flush all memtables across all partitions (column families) to SST files. + /// Flush all memtables across the backend to SST files. /// /// This allows RocksDB to reclaim WAL files that are pinned by unflushed - /// memtables in idle column families. Without periodic flushing, WAL files - /// accumulate indefinitely when only a subset of CFs receive writes. + /// memtables in idle physical keyspaces. Without periodic flushing, WAL files + /// can accumulate indefinitely when only a subset of physical stores receive writes. /// /// Default implementation is a no-op for backends without WAL. fn flush_all_memtables(&self) -> Result<()> { @@ -210,7 +210,8 @@ pub trait StorageBackend: Send + Sync { )) } - /// Restores the storage engine from a backup directory created by [`StorageBackend::backup_to`]. + /// Restores the storage engine from a backup directory created by + /// [`StorageBackend::backup_to`]. /// /// For RocksDB this calls `BackupEngine::restore_from_latest_backup`, overwriting the /// current data directory. **The server must be restarted after this call** to reload @@ -229,7 +230,6 @@ pub trait StorageBackend: Send + Sync { /// (for example `storage_backend` and `storage_partition_count`) and any /// engine-specific keys under a stable engine prefix such as `rocksdb_*`. fn stats(&self) -> StorageStats; - } /// Extension trait providing async versions of StorageBackend methods. diff --git a/backend/crates/kalamdb-store/src/test_utils.rs b/backend/crates/kalamdb-store/src/test_utils.rs index c8961dc9e..d0fadb8e5 100644 --- a/backend/crates/kalamdb-store/src/test_utils.rs +++ b/backend/crates/kalamdb-store/src/test_utils.rs @@ -2,13 +2,14 @@ //! //! Provides generic test helpers plus feature-gated backend-specific helpers. -use std::collections::{BTreeMap, HashMap}; -use std::sync::RwLock; - -use crate::storage_trait::{Operation, Partition, StorageBackend, StorageStats}; +use std::{ + collections::{BTreeMap, HashMap}, + sync::RwLock, +}; #[cfg(feature = "rocksdb")] pub use crate::backends::rocksdb::test_utils::TestDb; +use crate::storage_trait::{Operation, Partition, StorageBackend, StorageStats}; /// In-memory implementation of StorageBackend for testing. /// @@ -186,10 +187,7 @@ impl StorageBackend for InMemoryBackend { StorageStats::from([ ("storage_backend".to_string(), "in_memory".to_string()), - ( - "storage_partition_count".to_string(), - partition_count.to_string(), - ), + ("storage_partition_count".to_string(), partition_count.to_string()), ]) } } diff --git a/backend/crates/kalamdb-streams/src/config.rs b/backend/crates/kalamdb-streams/src/config.rs index 8173ce877..9d48f15f9 100644 --- a/backend/crates/kalamdb-streams/src/config.rs +++ b/backend/crates/kalamdb-streams/src/config.rs @@ -1,7 +1,9 @@ -use crate::time_bucket::StreamTimeBucket; +use std::path::PathBuf; + use kalamdb_commons::models::TableId; use kalamdb_sharding::ShardRouter; -use std::path::PathBuf; + +use crate::time_bucket::StreamTimeBucket; /// Stream log configuration. #[derive(Debug, Clone)] diff --git a/backend/crates/kalamdb-streams/src/file_store.rs b/backend/crates/kalamdb-streams/src/file_store.rs index 85b8ca87a..fc8021a0e 100644 --- a/backend/crates/kalamdb-streams/src/file_store.rs +++ b/backend/crates/kalamdb-streams/src/file_store.rs @@ -1,26 +1,44 @@ -use crate::config::StreamLogConfig; -use crate::error::{Result, StreamLogError}; -use crate::record::StreamLogRecord; -use crate::store_trait::StreamLogStore; -use crate::time_bucket::StreamTimeBucket; -use crate::utils::{cleanup_empty_dir, parse_log_window, read_dirs, read_files}; +use std::{ + collections::{HashMap, HashSet}, + fmt, + fs::{self, File, OpenOptions}, + io::{BufReader, BufWriter, Read, Write}, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + time::Instant, +}; + use chrono::{Datelike, TimeZone, Timelike, Utc}; -use dashmap::{DashMap, DashSet}; -use kalamdb_commons::ids::StreamTableRowId; -use kalamdb_commons::models::{StreamTableRow, TableId, UserId}; -use std::collections::{HashMap, HashSet}; -use std::fmt; -use std::fs::{self, File, OpenOptions}; -use std::io::{BufReader, BufWriter, Read, Write}; -use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; -use std::time::Instant; - -/// Write buffer capacity per segment file handle (256 KB). +use dashmap::DashMap; +use kalamdb_commons::{ + ids::StreamTableRowId, + models::{StreamTableRow, TableId, UserId}, +}; + +use crate::{ + config::StreamLogConfig, + error::{Result, StreamLogError}, + record::StreamLogRecord, + store_trait::StreamLogStore, + time_bucket::StreamTimeBucket, + utils::{cleanup_empty_dir, parse_log_window, parse_tmp_log_window, visit_dirs, visit_files}, +}; + +/// Write buffer capacity per segment file handle (64 KB). +/// +/// This keeps per-active-segment memory bounded while still amortising small +/// append records into fewer `write()` syscalls. +const SEGMENT_BUF_CAPACITY: usize = 64 * 1024; + +/// Maximum cached segment writers per stream table. /// -/// Larger buffers amortise `write()` syscalls — with ~500-byte records a 256 KB -/// buffer holds ~500 records before the OS sees a single `write()`. -const SEGMENT_BUF_CAPACITY: usize = 256 * 1024; +/// Each cached segment owns a file descriptor and a write buffer, so this cap +/// prevents high-cardinality stream users from turning into unbounded memory and +/// open-file growth. +const MAX_OPEN_SEGMENTS: usize = 256; + +/// Number of cold segment writers to close when the cache exceeds its cap. +const SEGMENT_EVICT_BATCH: usize = 32; /// Cached state for an open segment file. struct SegmentWriter { @@ -33,19 +51,18 @@ struct SegmentWriter { /// /// Optimised for high-throughput concurrent writes: /// -/// * **Cached file handles** — open segment files are kept in a sharded -/// `DashMap`, eliminating open / close syscall overhead per write. -/// * **Sharded write buffers** — each segment has its own 256 KB `BufWriter`, -/// reducing flush frequency while enabling per-user parallelism. -/// * **Directory cache** — avoids repeated `create_dir_all` syscalls. -/// * **Batch writes** — multiple records targeting the same segment share a -/// single lock acquisition. +/// * **Cached file handles** — open segment files are kept in a sharded `DashMap`, eliminating open +/// / close syscall overhead per write. +/// * **Bounded write buffers** — each segment has its own 64 KB `BufWriter`, reducing flush +/// frequency while enabling per-user parallelism. +/// * **Bounded cache** — old segment writers are flushed and closed when too many users/windows are +/// active at once. +/// * **Batch writes** — multiple records targeting the same segment share a single lock +/// acquisition. pub struct FileStreamLogStore { config: StreamLogConfig, /// Cached open segment writers keyed by log-file path. segments: DashMap>>, - /// Parent directories that have already been created. - created_dirs: DashSet, } impl fmt::Debug for FileStreamLogStore { @@ -53,7 +70,6 @@ impl fmt::Debug for FileStreamLogStore { f.debug_struct("FileStreamLogStore") .field("config", &self.config) .field("open_segments", &self.segments.len()) - .field("cached_dirs", &self.created_dirs.len()) .finish() } } @@ -63,7 +79,6 @@ impl FileStreamLogStore { Self { config, segments: DashMap::new(), - created_dirs: DashSet::new(), } } @@ -106,11 +121,7 @@ impl FileStreamLogStore { } } for path in to_remove { - if let Some((_, writer)) = self.segments.remove(&path) { - if let Ok(mut g) = writer.lock() { - let _ = g.writer.flush(); - } - } + self.close_segment(&path); } } @@ -137,6 +148,26 @@ impl FileStreamLogStore { ) } + pub fn append_row( + &self, + table_id: &TableId, + user_id: &UserId, + row_id: &StreamTableRowId, + row: &StreamTableRow, + ) -> Result<()> { + self.ensure_table(table_id)?; + let ts = row_id.seq().timestamp_millis(); + let window_start = self.window_start_ms(ts); + let path = self.log_path(user_id, window_start); + self.append_record( + &path, + StreamLogRecord::Put { + row_id: row_id.clone(), + row: row.clone(), + }, + ) + } + pub fn delete_old_logs_with_count(&self, before_time: u64) -> Result { let mut deleted = 0usize; let base_dir = &self.config.base_dir; @@ -144,36 +175,24 @@ impl FileStreamLogStore { return Ok(0); } - let bucket_dirs = read_dirs(base_dir)?; - for bucket_dir in bucket_dirs { - let shard_dirs = read_dirs(&bucket_dir)?; - for shard_dir in shard_dirs { - let user_dirs = read_dirs(&shard_dir)?; - for user_dir in user_dirs { - let log_files = read_files(&user_dir)?; - for log_file in log_files { - if let Some(window_start) = parse_log_window(&log_file) { - let window_end = - window_start.saturating_add(self.config.bucket.duration_ms()); - if window_end < before_time { - // Flush and drop the cached writer before deleting from disk. - if let Some((_, writer)) = self.segments.remove(&log_file) { - if let Ok(mut g) = writer.lock() { - let _ = g.writer.flush(); - } - } - if fs::remove_file(&log_file).is_ok() { - deleted += 1; - } - } + visit_dirs(base_dir, |bucket_dir| { + visit_dirs(&bucket_dir, |shard_dir| { + visit_dirs(&shard_dir, |user_dir| { + visit_files(&user_dir, |log_file| { + if self.delete_if_expired(&log_file, before_time)? { + deleted += 1; } - } + Ok(true) + })?; cleanup_empty_dir(&user_dir); - } + Ok(true) + })?; cleanup_empty_dir(&shard_dir); - } + Ok(true) + })?; cleanup_empty_dir(&bucket_dir); - } + Ok(true) + })?; cleanup_empty_dir(base_dir); @@ -186,27 +205,22 @@ impl FileStreamLogStore { return Ok(false); } - let bucket_dirs = read_dirs(base_dir)?; - for bucket_dir in bucket_dirs { - let shard_dirs = read_dirs(&bucket_dir)?; - for shard_dir in shard_dirs { - let user_dirs = read_dirs(&shard_dir)?; - for user_dir in user_dirs { - let log_files = read_files(&user_dir)?; - for log_file in log_files { - if let Some(window_start) = parse_log_window(&log_file) { - let window_end = - window_start.saturating_add(self.config.bucket.duration_ms()); - if window_end < before_time { - return Ok(true); - } + let mut found = false; + visit_dirs(base_dir, |bucket_dir| { + visit_dirs(&bucket_dir, |shard_dir| { + visit_dirs(&shard_dir, |user_dir| { + visit_files(&user_dir, |log_file| { + if self.is_expired_log_or_tmp(&log_file, before_time) { + found = true; + return Ok(false); } - } - } - } - } + Ok(true) + }) + }) + }) + })?; - Ok(false) + Ok(found) } pub fn list_user_ids(&self) -> Result> { @@ -216,20 +230,72 @@ impl FileStreamLogStore { return Ok(Vec::new()); } - let bucket_dirs = read_dirs(base_dir)?; - for bucket_dir in bucket_dirs { - let shard_dirs = read_dirs(&bucket_dir)?; - for shard_dir in shard_dirs { - let user_dirs = read_dirs(&shard_dir)?; - for user_dir in user_dirs { + visit_dirs(base_dir, |bucket_dir| { + visit_dirs(&bucket_dir, |shard_dir| { + visit_dirs(&shard_dir, |user_dir| { if let Some(name) = user_dir.file_name().and_then(|n| n.to_str()) { users.insert(UserId::new(name)); } - } + Ok(true) + }) + }) + })?; + + Ok(users.into_iter().collect()) + } + + fn is_expired_log_or_tmp(&self, path: &Path, before_time: u64) -> bool { + let Some(window_start) = parse_log_window(path).or_else(|| parse_tmp_log_window(path)) + else { + return false; + }; + let window_end = window_start.saturating_add(self.config.bucket.duration_ms()); + window_end < before_time + } + + fn delete_if_expired(&self, path: &Path, before_time: u64) -> Result { + if !self.is_expired_log_or_tmp(path, before_time) { + return Ok(false); + } + + self.close_segment(path); + match fs::remove_file(path) { + Ok(()) => Ok(true), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), + Err(err) => Err(StreamLogError::Io(err.to_string())), + } + } + + fn close_segment(&self, path: &Path) -> bool { + if let Some((_, writer)) = self.segments.remove(path) { + if let Ok(mut g) = writer.lock() { + let _ = g.writer.flush(); } + return true; } + false + } - Ok(users.into_iter().collect()) + fn prune_open_segments_if_needed(&self) { + let current_len = self.segments.len(); + if current_len <= MAX_OPEN_SEGMENTS { + return; + } + + let over_limit = current_len.saturating_sub(MAX_OPEN_SEGMENTS); + let remove_count = over_limit.saturating_add(SEGMENT_EVICT_BATCH).min(current_len); + let mut candidates = Vec::with_capacity(current_len.min(remove_count * 2)); + + for entry in self.segments.iter() { + if let Ok(g) = entry.value().try_lock() { + candidates.push((g.last_write, entry.key().clone())); + } + } + + candidates.sort_by_key(|(last_write, _)| *last_write); + for (_, path) in candidates.into_iter().take(remove_count) { + self.close_segment(&path); + } } fn ensure_table(&self, table_id: &TableId) -> Result<()> { @@ -304,10 +370,7 @@ impl FileStreamLogStore { /// skip redundant `create_dir_all` calls. fn ensure_parent_dir(&self, path: &Path) -> Result<()> { if let Some(parent) = path.parent() { - if !self.created_dirs.contains(parent) { - fs::create_dir_all(parent).map_err(|e| StreamLogError::Io(e.to_string()))?; - self.created_dirs.insert(parent.to_path_buf()); - } + fs::create_dir_all(parent).map_err(|e| StreamLogError::Io(e.to_string()))?; } Ok(()) } @@ -335,7 +398,9 @@ impl FileStreamLogStore { // Use `entry` API so a concurrent creation by another thread is // handled correctly — whichever was inserted first wins. - Ok(self.segments.entry(path.to_path_buf()).or_insert(writer).value().clone()) + let segment = self.segments.entry(path.to_path_buf()).or_insert(writer).value().clone(); + self.prune_open_segments_if_needed(); + Ok(segment) } /// Serialise `record` and write the length-prefixed frame to `writer`. @@ -372,10 +437,12 @@ impl FileStreamLogStore { } } - fn read_records(path: &Path) -> Result> { + fn visit_records(path: &Path, mut visitor: F) -> Result + where + F: FnMut(StreamLogRecord) -> Result, + { let file = File::open(path).map_err(|e| StreamLogError::Io(e.to_string()))?; let mut reader = BufReader::new(file); - let mut records = Vec::new(); loop { let mut len_buf = [0u8; 4]; match reader.read_exact(&mut len_buf) { @@ -389,11 +456,30 @@ impl FileStreamLogStore { } let len = u32::from_le_bytes(len_buf) as usize; let mut payload = vec![0u8; len]; - reader.read_exact(&mut payload).map_err(|e| StreamLogError::Io(e.to_string()))?; + match reader.read_exact(&mut payload) { + Ok(_) => {}, + Err(err) => { + if err.kind() == std::io::ErrorKind::UnexpectedEof { + break; + } + return Err(StreamLogError::Io(err.to_string())); + }, + } let record = flexbuffers::from_slice::(&payload) .map_err(|e| StreamLogError::Serialization(e.to_string()))?; - records.push(record); + if !visitor(record)? { + return Ok(false); + } } + Ok(true) + } + + fn read_records(path: &Path) -> Result> { + let mut records = Vec::new(); + Self::visit_records(path, |record| { + records.push(record); + Ok(true) + })?; Ok(records) } @@ -404,61 +490,66 @@ impl FileStreamLogStore { return Ok(entries); } - let bucket_dirs = read_dirs(base_dir)?; - for bucket_dir in bucket_dirs { - let shard_dirs = read_dirs(&bucket_dir)?; - for shard_dir in shard_dirs { - let user_dir = shard_dir.join(user_id.as_str()); - if !user_dir.exists() { - continue; - } - let log_files = read_files(&user_dir)?; - for log_file in log_files { - if let Some(window_start) = parse_log_window(&log_file) { - entries.push((window_start, log_file)); - } - } + let shard = self.config.shard_router.route_stream_user(user_id).folder_name(); + visit_dirs(base_dir, |bucket_dir| { + let user_dir = bucket_dir.join(&shard).join(user_id.as_str()); + if !user_dir.exists() { + return Ok(true); } - } + visit_files(&user_dir, |log_file| { + if let Some(window_start) = parse_log_window(&log_file) { + entries.push((window_start, log_file)); + } + Ok(true) + })?; + Ok(true) + })?; Ok(entries) } - fn read_range_internal( + fn list_log_files_for_user_in_range( &self, user_id: &UserId, start_time: u64, end_time: u64, - limit: usize, - ) -> Result> { - let mut entries = self.list_log_files_for_user(user_id)?; - if entries.is_empty() || limit == 0 { - return Ok(Vec::new()); - } - + ) -> Result> { let bucket_ms = self.config.bucket.duration_ms(); + let mut entries = self.list_log_files_for_user(user_id)?; entries.retain(|(window_start, _)| { let window_end = window_start.saturating_add(bucket_ms); *window_start <= end_time && window_end >= start_time }); entries.sort_by_key(|(window_start, _)| *window_start); + Ok(entries) + } + fn list_log_files_for_user_latest(&self, user_id: &UserId) -> Result> { + let mut entries = self.list_log_files_for_user(user_id)?; + entries.sort_by(|a, b| b.0.cmp(&a.0)); + Ok(entries) + } + + fn collect_range_from_entries( + &self, + entries: &[(u64, PathBuf)], + limit: usize, + ) -> Result> { let mut results: Vec<(StreamTableRowId, StreamTableRow)> = Vec::new(); let mut deleted: HashSet = HashSet::new(); - for (_window_start, path) in &entries { + for (_window_start, path) in entries { self.flush_segment(path); - let records = Self::read_records(path)?; - for record in records { + let should_continue = Self::visit_records(path, |record| { match record { StreamLogRecord::Put { row_id, row } => { let seq = row_id.seq().as_i64(); if deleted.contains(&seq) { - continue; + return Ok(true); } results.push((row_id, row)); if results.len() >= limit { - return Ok(results); + return Ok(false); } }, StreamLogRecord::Delete { row_id } => { @@ -467,24 +558,45 @@ impl FileStreamLogStore { results.retain(|(existing_id, _)| existing_id.seq().as_i64() != seq); }, } + Ok(true) + })?; + if !should_continue { + break; } } Ok(results) } + fn read_range_internal( + &self, + user_id: &UserId, + start_time: u64, + end_time: u64, + limit: usize, + ) -> Result> { + if limit == 0 { + return Ok(Vec::new()); + } + + let entries = self.list_log_files_for_user_in_range(user_id, start_time, end_time)?; + if entries.is_empty() { + return Ok(Vec::new()); + } + + self.collect_range_from_entries(&entries, limit) + } + fn read_latest_internal( &self, user_id: &UserId, limit: usize, ) -> Result> { - let mut entries = self.list_log_files_for_user(user_id)?; + let entries = self.list_log_files_for_user_latest(user_id)?; if entries.is_empty() || limit == 0 { return Ok(Vec::new()); } - entries.sort_by(|a, b| b.0.cmp(&a.0)); - let mut results: Vec<(StreamTableRowId, StreamTableRow)> = Vec::new(); let mut deleted: HashSet = HashSet::new(); @@ -586,19 +698,24 @@ impl Drop for FileStreamLogStore { #[cfg(test)] mod tests { - use super::FileStreamLogStore; - use crate::config::StreamLogConfig; - use crate::store_trait::StreamLogStore; - use crate::time_bucket::StreamTimeBucket; + use std::{ + collections::{BTreeMap, HashMap}, + fs, + path::PathBuf, + }; + use chrono::{Datelike, TimeZone, Timelike}; use datafusion::scalar::ScalarValue; - use kalamdb_commons::ids::{SeqId, SnowflakeGenerator, StreamTableRowId}; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{NamespaceId, StreamTableRow, TableId, TableName, UserId}; + use kalamdb_commons::{ + ids::{SeqId, SnowflakeGenerator, StreamTableRowId}, + models::{rows::Row, NamespaceId, StreamTableRow, TableId, TableName, UserId}, + }; use kalamdb_sharding::ShardRouter; - use std::collections::{BTreeMap, HashMap}; - use std::fs; - use std::path::PathBuf; + + use super::{FileStreamLogStore, MAX_OPEN_SEGMENTS}; + use crate::{ + config::StreamLogConfig, store_trait::StreamLogStore, time_bucket::StreamTimeBucket, + }; fn temp_base_dir(prefix: &str) -> PathBuf { let now = std::time::SystemTime::now() @@ -673,6 +790,20 @@ mod tests { } } + fn create_store( + base_dir: PathBuf, + table_id: TableId, + shard_router: ShardRouter, + bucket: StreamTimeBucket, + ) -> FileStreamLogStore { + FileStreamLogStore::new(StreamLogConfig { + base_dir, + shard_router, + bucket, + table_id, + }) + } + #[test] fn test_delete_old_logs_cleans_files_and_folders() { let base_dir = temp_base_dir("kalamdb_streams_delete_old"); @@ -680,12 +811,7 @@ mod tests { let shard_router = ShardRouter::new(4, 1); let bucket = StreamTimeBucket::Hour; - let store = FileStreamLogStore::new(StreamLogConfig { - base_dir: base_dir.clone(), - shard_router: shard_router.clone(), - bucket, - table_id: table_id.clone(), - }); + let store = create_store(base_dir.clone(), table_id.clone(), shard_router.clone(), bucket); let user_id = UserId::new("user-1"); let now_ms = chrono::Utc::now().timestamp_millis() as u64; @@ -744,16 +870,47 @@ mod tests { let _ = fs::remove_dir_all(&base_dir); } + #[test] + fn test_delete_old_logs_cleans_stale_tmp_files() { + let base_dir = temp_base_dir("kalamdb_streams_delete_tmp"); + let table_id = TableId::new(NamespaceId::new("test_ns"), TableName::new("events")); + let shard_router = ShardRouter::new(4, 1); + let bucket = StreamTimeBucket::Hour; + let store = create_store(base_dir.clone(), table_id, shard_router.clone(), bucket); + + let user_id = UserId::new("user-tmp"); + let now_ms = chrono::Utc::now().timestamp_millis() as u64; + let old_ts = now_ms.saturating_sub(3 * 60 * 60 * 1000); + let old_window = window_start_ms(bucket, old_ts); + let old_bucket = bucket_folder(bucket, old_window); + let tmp_path = base_dir + .join(old_bucket) + .join(shard_router.route_stream_user(&user_id).folder_name()) + .join(user_id.as_str()) + .join(format!("{}.log.tmp", old_window)); + fs::create_dir_all(tmp_path.parent().unwrap()).unwrap(); + fs::write(&tmp_path, b"partial").unwrap(); + + let deleted = store + .delete_old_logs_with_count(now_ms.saturating_sub(60 * 60 * 1000)) + .expect("delete_old_logs_with_count failed"); + + assert_eq!(deleted, 1); + assert!(!tmp_path.exists(), "expected stale tmp log file to be deleted"); + + let _ = fs::remove_dir_all(&base_dir); + } + #[test] fn test_concurrent_writes_from_multiple_users() { let base_dir = temp_base_dir("kalamdb_streams_concurrent"); let table_id = TableId::new(NamespaceId::new("test_ns"), TableName::new("events")); - let store = std::sync::Arc::new(FileStreamLogStore::new(StreamLogConfig { - base_dir: base_dir.clone(), - shard_router: ShardRouter::new(4, 1), - bucket: StreamTimeBucket::Hour, - table_id: table_id.clone(), - })); + let store = std::sync::Arc::new(create_store( + base_dir.clone(), + table_id.clone(), + ShardRouter::new(4, 1), + StreamTimeBucket::Hour, + )); let num_users: usize = 50; let writes_per_user: usize = 100; @@ -768,9 +925,7 @@ mod tests { let seq = SeqId::new((i * 10000 + j + 1) as i64); let row_id = StreamTableRowId::new(user_id.clone(), seq); let row = build_row(&user_id, seq); - let mut rows = HashMap::new(); - rows.insert(row_id, row); - store.append_rows(&tid, &user_id, rows).unwrap(); + store.append_row(&tid, &user_id, &row_id, &row).unwrap(); } })); } @@ -799,24 +954,56 @@ mod tests { let _ = fs::remove_dir_all(&base_dir); } + #[test] + fn test_open_segment_cache_is_bounded() { + let base_dir = temp_base_dir("kalamdb_streams_segment_cap"); + let table_id = TableId::new(NamespaceId::new("test_ns"), TableName::new("events")); + let store = create_store( + base_dir.clone(), + table_id.clone(), + ShardRouter::new(4, 1), + StreamTimeBucket::Hour, + ); + + let user_count = MAX_OPEN_SEGMENTS + 64; + for i in 0..user_count { + let user_id = UserId::new(format!("user-{}", i)); + let seq = SeqId::new((i + 1) as i64); + let row_id = StreamTableRowId::new(user_id.clone(), seq); + let row = build_row(&user_id, seq); + store.append_row(&table_id, &user_id, &row_id, &row).unwrap(); + } + + assert!( + store.open_segment_count() <= MAX_OPEN_SEGMENTS, + "open segment cache exceeded cap: {} > {}", + store.open_segment_count(), + MAX_OPEN_SEGMENTS + ); + + let first_user = UserId::new("user-0"); + let read = store.read_with_limit(&table_id, &first_user, 10).unwrap(); + assert_eq!(read.len(), 1, "evicted segment should remain readable"); + + let _ = fs::remove_dir_all(&base_dir); + } + #[test] fn test_flush_all_and_close_idle() { let base_dir = temp_base_dir("kalamdb_streams_flush"); let table_id = TableId::new(NamespaceId::new("test_ns"), TableName::new("events")); - let store = FileStreamLogStore::new(StreamLogConfig { - base_dir: base_dir.clone(), - shard_router: ShardRouter::new(4, 1), - bucket: StreamTimeBucket::Hour, - table_id: table_id.clone(), - }); + let store = create_store( + base_dir.clone(), + table_id.clone(), + ShardRouter::new(4, 1), + StreamTimeBucket::Hour, + ); let user_id = UserId::new("user-flush"); let seq = SeqId::new(42); let row_id = StreamTableRowId::new(user_id.clone(), seq); let row = build_row(&user_id, seq); - let mut rows = HashMap::new(); - rows.insert(row_id, row); - store.append_rows(&table_id, &user_id, rows).unwrap(); + store.append_row(&table_id, &user_id, &row_id, &row).unwrap(); assert_eq!(store.open_segment_count(), 1); store.flush_all().unwrap(); diff --git a/backend/crates/kalamdb-streams/src/memory_store.rs b/backend/crates/kalamdb-streams/src/memory_store.rs index ae94d02d8..9dd2bef0b 100644 --- a/backend/crates/kalamdb-streams/src/memory_store.rs +++ b/backend/crates/kalamdb-streams/src/memory_store.rs @@ -4,14 +4,22 @@ //! that don't need persistence. Data is organized by user_id and then by timestamp //! for efficient time-range queries. -use crate::config::StreamLogConfig; -use crate::error::{Result, StreamLogError}; -use crate::record::StreamLogRecord; -use crate::store_trait::StreamLogStore; -use kalamdb_commons::ids::StreamTableRowId; -use kalamdb_commons::models::{StreamTableRow, TableId, UserId}; -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::sync::RwLock; +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + sync::RwLock, +}; + +use kalamdb_commons::{ + ids::StreamTableRowId, + models::{StreamTableRow, TableId, UserId}, +}; + +use crate::{ + config::StreamLogConfig, + error::{Result, StreamLogError}, + record::StreamLogRecord, + store_trait::StreamLogStore, +}; /// Key for the in-memory store: (user_id string, timestamp_ms, row_id bytes for uniqueness) type RowKey = (String, u64, Vec); @@ -91,6 +99,37 @@ impl MemoryStreamLogStore { Ok(()) } + pub fn append_row( + &self, + table_id: &TableId, + _user_id: &UserId, + row_id: &StreamTableRowId, + row: &StreamTableRow, + ) -> Result<()> { + self.ensure_table(table_id)?; + let key = self.make_key(row_id); + let user_id = row_id.user_id().clone(); + let mut state = self + .state + .write() + .map_err(|e| StreamLogError::Io(format!("Failed to acquire write lock: {}", e)))?; + let was_new = state + .data + .insert( + key, + StreamLogRecord::Put { + row_id: row_id.clone(), + row: row.clone(), + }, + ) + .is_none(); + if was_new { + *state.per_user_entry_counts.entry(user_id.clone()).or_default() += 1; + } + self.evict_excess_user_rows(&mut state, &user_id); + Ok(()) + } + /// Delete old logs before a given timestamp and return count of deleted entries. pub fn delete_old_logs_with_count(&self, before_time: u64) -> Result { let mut state = self @@ -376,13 +415,16 @@ impl StreamLogStore for MemoryStreamLogStore { #[cfg(test)] mod tests { - use super::*; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{NamespaceId, TableName}; use std::collections::BTreeMap; + use datafusion::scalar::ScalarValue; + use kalamdb_commons::{ + ids::SeqId, + models::{rows::Row, NamespaceId, TableName}, + }; + + use super::*; + fn build_row(user_id: &UserId, seq: SeqId) -> StreamTableRow { let values: BTreeMap = BTreeMap::new(); StreamTableRow { diff --git a/backend/crates/kalamdb-streams/src/record.rs b/backend/crates/kalamdb-streams/src/record.rs index 1ac924c79..8ec795a6d 100644 --- a/backend/crates/kalamdb-streams/src/record.rs +++ b/backend/crates/kalamdb-streams/src/record.rs @@ -1,5 +1,4 @@ -use kalamdb_commons::ids::StreamTableRowId; -use kalamdb_commons::models::StreamTableRow; +use kalamdb_commons::{ids::StreamTableRowId, models::StreamTableRow}; use serde::{Deserialize, Serialize}; /// Log record stored in the commit log. diff --git a/backend/crates/kalamdb-streams/src/store_trait.rs b/backend/crates/kalamdb-streams/src/store_trait.rs index 234195d94..328b271d9 100644 --- a/backend/crates/kalamdb-streams/src/store_trait.rs +++ b/backend/crates/kalamdb-streams/src/store_trait.rs @@ -1,8 +1,12 @@ -use crate::error::Result; -use kalamdb_commons::ids::StreamTableRowId; -use kalamdb_commons::models::{StreamTableRow, TableId, UserId}; use std::collections::HashMap; +use kalamdb_commons::{ + ids::StreamTableRowId, + models::{StreamTableRow, TableId, UserId}, +}; + +use crate::error::Result; + /// Stream log storage trait. pub trait StreamLogStore: Send + Sync { fn append_rows( diff --git a/backend/crates/kalamdb-streams/src/utils.rs b/backend/crates/kalamdb-streams/src/utils.rs index 889b3a762..cfbbc1f03 100644 --- a/backend/crates/kalamdb-streams/src/utils.rs +++ b/backend/crates/kalamdb-streams/src/utils.rs @@ -1,6 +1,9 @@ +use std::{ + fs, + path::{Path, PathBuf}, +}; + use crate::error::{Result, StreamLogError}; -use std::fs; -use std::path::{Path, PathBuf}; pub(crate) fn parse_log_window(path: &Path) -> Option { let file_name = path.file_name()?.to_string_lossy(); @@ -8,28 +11,42 @@ pub(crate) fn parse_log_window(path: &Path) -> Option { trimmed.parse::().ok() } -pub(crate) fn read_dirs(path: &Path) -> Result> { - let mut dirs = Vec::new(); +pub(crate) fn parse_tmp_log_window(path: &Path) -> Option { + let file_name = path.file_name()?.to_string_lossy(); + let trimmed = file_name.strip_suffix(".log.tmp")?; + trimmed.parse::().ok() +} + +pub(crate) fn visit_dirs(path: &Path, mut visitor: F) -> Result +where + F: FnMut(PathBuf) -> Result, +{ for entry in fs::read_dir(path).map_err(|e| StreamLogError::Io(e.to_string()))? { let entry = entry.map_err(|e| StreamLogError::Io(e.to_string()))?; let path = entry.path(); if path.is_dir() { - dirs.push(path); + if !visitor(path)? { + return Ok(false); + } } } - Ok(dirs) + Ok(true) } -pub(crate) fn read_files(path: &Path) -> Result> { - let mut files = Vec::new(); +pub(crate) fn visit_files(path: &Path, mut visitor: F) -> Result +where + F: FnMut(PathBuf) -> Result, +{ for entry in fs::read_dir(path).map_err(|e| StreamLogError::Io(e.to_string()))? { let entry = entry.map_err(|e| StreamLogError::Io(e.to_string()))?; let path = entry.path(); if path.is_file() { - files.push(path); + if !visitor(path)? { + return Ok(false); + } } } - Ok(files) + Ok(true) } pub(crate) fn cleanup_empty_dir(path: &Path) { diff --git a/backend/crates/kalamdb-system/src/impls/mod.rs b/backend/crates/kalamdb-system/src/impls/mod.rs index 97ccac929..8aa75d682 100644 --- a/backend/crates/kalamdb-system/src/impls/mod.rs +++ b/backend/crates/kalamdb-system/src/impls/mod.rs @@ -1,9 +1,10 @@ -use crate::{Manifest, ManifestCacheEntry}; +use std::sync::Arc; + use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::models::schemas::TableDefinition; -use kalamdb_commons::{StorageId, TableId, UserId}; +use kalamdb_commons::{models::schemas::TableDefinition, StorageId, TableId, UserId}; use kalamdb_store::StorageError; -use std::sync::Arc; + +use crate::{Manifest, ManifestCacheEntry}; // Notification service trait for data change notifications mod notification_service; diff --git a/backend/crates/kalamdb-system/src/initialization.rs b/backend/crates/kalamdb-system/src/initialization.rs index 44edebae0..9c9410ecb 100644 --- a/backend/crates/kalamdb-system/src/initialization.rs +++ b/backend/crates/kalamdb-system/src/initialization.rs @@ -5,10 +5,12 @@ //! This function remains as a compatibility no-op for callers that still invoke //! it during bootstrap. -use crate::error::SystemError; -use kalamdb_store::StorageBackend; use std::sync::Arc; +use kalamdb_store::StorageBackend; + +use crate::error::SystemError; + /// Compatibility no-op. /// /// System tables are now initialized and evolved through schema reconciliation @@ -17,16 +19,18 @@ pub async fn initialize_system_tables( _storage_backend: Arc, ) -> Result<(), SystemError> { log::debug!( - "initialize_system_tables() is a no-op; system schema reconciliation is handled by SchemaRegistry" + "initialize_system_tables() is a no-op; system schema reconciliation is handled by \ + SchemaRegistry" ); Ok(()) } #[cfg(test)] mod tests { - use super::*; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + #[tokio::test] async fn initialization_is_noop_and_succeeds() { let backend: Arc = Arc::new(InMemoryBackend::new()); diff --git a/backend/crates/kalamdb-system/src/lib.rs b/backend/crates/kalamdb-system/src/lib.rs index 78232a852..aa459eb88 100644 --- a/backend/crates/kalamdb-system/src/lib.rs +++ b/backend/crates/kalamdb-system/src/lib.rs @@ -48,43 +48,40 @@ pub use impls::{ ClusterCoordinator, ManifestService, NotificationService, SchemaRegistry, TopicPublisher, }; pub use initialization::initialize_system_tables; -pub use registry::SystemTablesRegistry; -pub use services::SystemColumnsService; +// Re-export from kalamdb-commons for convenience +pub use kalamdb_commons::models::{AuthType, OAuthProvider, Role}; // Re-export SystemTable and StoragePartition from kalamdb_commons for consistent usage pub use kalamdb_commons::{schemas, NamespaceId, StoragePartition, SystemTable, TableName}; - // Re-export DataFusion session security adapters for convenience pub use kalamdb_session_datafusion::{ check_system_table_access, secure_provider, SecuredSystemTableProvider, SessionUserContext, }; - -// Re-export all providers -pub use providers::{ - AuditLogsTableProvider, InMemoryChecker, JobNodesTableProvider, JobsTableProvider, - ManifestTableProvider, NamespacesTableProvider, SchemasTableProvider, StoragesTableProvider, - UsersTableProvider, -}; - -// Re-export live query models for convenience -pub use providers::live::models::{LiveQuery, LiveQueryStatus}; - +// Re-export other system table models for convenience +pub use providers::audit_logs::models::AuditLogEntry; // Re-export job models for convenience pub use providers::jobs::models::{ Job, JobFilter, JobOptions, JobSortField, JobStatus, JobType, SortOrder, }; - -// Re-export other system table models for convenience -pub use providers::audit_logs::models::AuditLogEntry; -pub use providers::job_nodes::models::JobNode; -pub use providers::manifest::models::{ - ColumnStats, FileRef, FileSubfolderState, Manifest, ManifestCacheEntry, SegmentMetadata, - SegmentStatus, SyncState, VectorEngine, VectorIndexMetadata, VectorIndexState, VectorMetric, +// Re-export live query models for convenience +pub use providers::live::models::{LiveQuery, LiveQueryStatus}; +pub use providers::{ + job_nodes::models::JobNode, + manifest::models::{ + ColumnStats, FileRef, FileSubfolderState, Manifest, ManifestCacheEntry, SegmentMetadata, + SegmentStatus, SyncState, VectorEngine, VectorIndexMetadata, VectorIndexState, + VectorMetric, + }, + namespaces::models::Namespace, + storages::models::{Storage, StorageType}, + users::models::{ + AuthData, User, DEFAULT_LOCKOUT_DURATION_MINUTES, DEFAULT_MAX_FAILED_ATTEMPTS, + }, }; -pub use providers::namespaces::models::Namespace; -pub use providers::storages::models::{Storage, StorageType}; -pub use providers::users::models::{ - AuthData, User, DEFAULT_LOCKOUT_DURATION_MINUTES, DEFAULT_MAX_FAILED_ATTEMPTS, +// Re-export all providers +pub use providers::{ + AuditLogsTableProvider, InMemoryChecker, JobNodesTableProvider, JobsTableProvider, + ManifestTableProvider, NamespacesTableProvider, SchemasTableProvider, StoragesTableProvider, + UsersTableProvider, }; - -// Re-export from kalamdb-commons for convenience -pub use kalamdb_commons::models::{AuthType, OAuthProvider, Role}; +pub use registry::SystemTablesRegistry; +pub use services::SystemColumnsService; diff --git a/backend/crates/kalamdb-system/src/providers/audit_logs/audit_logs_provider.rs b/backend/crates/kalamdb-system/src/providers/audit_logs/audit_logs_provider.rs index 4faa60ca0..a1b1b8c22 100644 --- a/backend/crates/kalamdb-system/src/providers/audit_logs/audit_logs_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/audit_logs/audit_logs_provider.rs @@ -3,23 +3,31 @@ //! This module provides a DataFusion TableProvider implementation for the system.audit_log table. //! Uses the EntityStore architecture with type-safe keys (AuditLogId). -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::audit_logs::models::AuditLogEntry; -use crate::providers::base::{ - extract_filter_value, system_rows_to_batch, SimpleProviderDefinition, -}; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::models::AuditLogId; -use kalamdb_commons::schemas::TableDefinition; -use kalamdb_commons::SystemTable; -use kalamdb_store::entity_store::{EntityStore, EntityStoreAsync}; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use datafusion::{ + arrow::{array::RecordBatch, datatypes::SchemaRef}, + logical_expr::Expr, +}; +use kalamdb_commons::{ + models::{rows::SystemTableRow, AuditLogId}, + schemas::TableDefinition, + SystemTable, +}; +use kalamdb_store::{ + entity_store::{EntityStore, EntityStoreAsync}, + IndexedEntityStore, StorageBackend, +}; + +use crate::{ + error::{SystemError, SystemResultExt}, + providers::{ + audit_logs::models::AuditLogEntry, + base::{extract_filter_value, system_rows_to_batch, SimpleProviderDefinition}, + }, + system_row_mapper::{model_to_system_row, system_row_to_model}, +}; + /// System.audit_log table provider using EntityStore architecture #[derive(Clone)] pub struct AuditLogsTableProvider { @@ -174,14 +182,14 @@ crate::impl_simple_system_table_provider!( #[cfg(test)] mod tests { - use super::*; use arrow::array::Array; - use datafusion::arrow::array::TimestampMicrosecondArray; - use datafusion::datasource::TableProvider; + use datafusion::{arrow::array::TimestampMicrosecondArray, datasource::TableProvider}; use kalamdb_commons::UserId; use kalamdb_store::test_utils::InMemoryBackend; use serde_json::json; + use super::*; + fn create_test_provider() -> AuditLogsTableProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); AuditLogsTableProvider::new(backend) diff --git a/backend/crates/kalamdb-system/src/providers/audit_logs/models/audit_log.rs b/backend/crates/kalamdb-system/src/providers/audit_logs/models/audit_log.rs index 352b962a3..ec91f56f2 100644 --- a/backend/crates/kalamdb-system/src/providers/audit_logs/models/audit_log.rs +++ b/backend/crates/kalamdb-system/src/providers/audit_logs/models/audit_log.rs @@ -1,7 +1,9 @@ //! Audit log entry for administrative actions. -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::ids::{AuditLogId, UserId}; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::ids::{AuditLogId, UserId}, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-system/src/providers/base.rs b/backend/crates/kalamdb-system/src/providers/base.rs index 19ed30cd4..004b8a349 100644 --- a/backend/crates/kalamdb-system/src/providers/base.rs +++ b/backend/crates/kalamdb-system/src/providers/base.rs @@ -14,21 +14,23 @@ use std::sync::Arc; use arrow::array::RecordBatch; use async_trait::async_trait; -use datafusion::catalog::Session; -use datafusion::common::{DataFusionError, DFSchema}; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::ExecutionPlan; -use kalamdb_datafusion_sources::exec::{ - finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource, +use datafusion::{ + catalog::Session, + common::{DFSchema, DataFusionError}, + logical_expr::{Expr, TableProviderFilterPushDown}, + physical_expr::PhysicalExpr, + physical_plan::ExecutionPlan, }; -use kalamdb_datafusion_sources::pruning::{ - FilterRequest, LimitRequest, ProjectionRequest, PruningRequest, +use kalamdb_commons::{ + conversions::json_rows_to_arrow_batch, + models::rows::{Row, SystemTableRow}, + KSerializable, StorageKey, +}; +use kalamdb_datafusion_sources::{ + exec::{finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource}, + provider::{combined_filter, pushdown_results_for_filters, FilterCapability}, + pruning::{FilterRequest, LimitRequest, ProjectionRequest, PruningRequest}, }; -use kalamdb_datafusion_sources::provider::{combined_filter, pushdown_results_for_filters, FilterCapability}; -use kalamdb_commons::conversions::json_rows_to_arrow_batch; -use kalamdb_commons::models::rows::{Row, SystemTableRow}; -use kalamdb_commons::{KSerializable, StorageKey}; use kalamdb_store::{EntityStore, IndexedEntityStore}; use crate::error::SystemError; @@ -95,8 +97,7 @@ where K: StorageKey + Clone + Send + Sync + 'static, V: KSerializable + Clone + Send + Sync + 'static, { - use datafusion::logical_expr::Operator; - use datafusion::scalar::ScalarValue; + use datafusion::{logical_expr::Operator, scalar::ScalarValue}; let mut start_key: Option = None; let mut prefix: Option = None; @@ -130,15 +131,15 @@ where let store = provider.store(); let mut pairs: Vec<(K, V)> = Vec::new(); if let Some((index_idx, index_prefix)) = store.find_best_index_for_filters(&filters) { - let iter = store - .scan_by_index_iter(index_idx, Some(&index_prefix), scan_limit) - .map_err(|error| { + let iter = store.scan_by_index_iter(index_idx, Some(&index_prefix), scan_limit).map_err( + |error| { DataFusionError::Execution(format!( "Failed to scan {} by index: {}", provider.table_name(), error )) - })?; + }, + )?; let effective_limit = scan_limit.unwrap_or(100_000); for result in iter { @@ -155,15 +156,13 @@ where } } } else { - let iter = store - .scan_iterator(prefix.as_ref(), start_key.as_ref()) - .map_err(|error| { - DataFusionError::Execution(format!( - "Failed to create iterator for {}: {}", - provider.table_name(), - error - )) - })?; + let iter = store.scan_iterator(prefix.as_ref(), start_key.as_ref()).map_err(|error| { + DataFusionError::Execution(format!( + "Failed to create iterator for {}: {}", + provider.table_name(), + error + )) + })?; let effective_limit = scan_limit.unwrap_or(100_000); for result in iter { @@ -175,11 +174,7 @@ where } }, Err(error) => { - log::warn!( - "Error during scan of {}: {}", - provider.table_name(), - error - ); + log::warn!("Error during scan of {}: {}", provider.table_name(), error); }, } } @@ -264,14 +259,15 @@ where let filters = self.pruning.filters.filters.as_ref().to_vec(); let limit = self.pruning.limit.limit; let table_name = self.provider.table_name().to_string(); - let batch = tokio::task::spawn_blocking(move || build_indexed_batch(provider, filters, limit)) - .await - .map_err(|error| { - DataFusionError::Execution(format!( - "{} scan task failed: {}", - table_name, error - )) - })??; + let batch = + tokio::task::spawn_blocking(move || build_indexed_batch(provider, filters, limit)) + .await + .map_err(|error| { + DataFusionError::Execution(format!( + "{} scan task failed: {}", + table_name, error + )) + })??; finalize_deferred_batch( batch, @@ -420,15 +416,17 @@ where None }; - Ok(Arc::new(DeferredBatchExec::new(Arc::new( - IndexedSystemScanSource:: { - provider: self.clone(), - pruning, - physical_filter, - output_schema, - _marker: std::marker::PhantomData, - }, - )))) + Ok(Arc::new(DeferredBatchExec::new(Arc::new(IndexedSystemScanSource::< + Self, + K, + V, + > { + provider: self.clone(), + pruning, + physical_filter, + output_schema, + _marker: std::marker::PhantomData, + })))) } } @@ -528,8 +526,7 @@ where /// Helper function to extract string equality filter value for a column pub fn extract_filter_value(filters: &[Expr], column_name: &str) -> Option { - use datafusion::logical_expr::Operator; - use datafusion::scalar::ScalarValue; + use datafusion::{logical_expr::Operator, scalar::ScalarValue}; for expr in filters { if let Expr::BinaryExpr(binary) = expr { @@ -550,8 +547,7 @@ pub fn extract_range_filters( filters: &[Expr], column_name: &str, ) -> (Option, Option) { - use datafusion::logical_expr::Operator; - use datafusion::scalar::ScalarValue; + use datafusion::{logical_expr::Operator, scalar::ScalarValue}; let mut start = None; let mut end = None; @@ -576,17 +572,24 @@ pub fn extract_range_filters( #[cfg(test)] mod tests { - use super::*; - use datafusion::arrow::array::{RecordBatch, StringArray}; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; - use datafusion::execution::context::SessionContext; - use datafusion::logical_expr::{col, lit}; - use datafusion::physical_plan::collect; + use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Mutex, + }; + + use datafusion::{ + arrow::{ + array::{RecordBatch, StringArray}, + datatypes::{DataType, Field, Schema}, + }, + execution::context::SessionContext, + logical_expr::{col, lit}, + physical_plan::collect, + }; use kalamdb_commons::{KSerializable, StorageKey}; - use kalamdb_store::test_utils::InMemoryBackend; - use kalamdb_store::{IndexedEntityStore, StorageBackend}; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Mutex; + use kalamdb_store::{test_utils::InMemoryBackend, IndexedEntityStore, StorageBackend}; + + use super::*; #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] struct DummyValue { @@ -752,9 +755,12 @@ mod tests { } RecordBatch::try_new( self.arrow_schema(), - vec![Arc::new(StringArray::from(ids)), Arc::new(StringArray::from(values))], + vec![ + Arc::new(StringArray::from(ids)), + Arc::new(StringArray::from(values)), + ], ) - .map_err(|e| SystemError::Other(e.to_string())) + .map_err(|e| SystemError::Other(e.to_string())) } } @@ -838,9 +844,7 @@ mod tests { // Deferred scans should not touch storage during planning. assert_eq!(backend.scan_calls(), 0); - let batches = collect(plan, state.task_ctx()) - .await - .expect("collect deferred system scan"); + let batches = collect(plan, state.task_ctx()).await.expect("collect deferred system scan"); assert_eq!(batches.iter().map(|batch| batch.num_rows()).sum::(), 1); assert_eq!(backend.scan_calls(), 1); @@ -877,14 +881,9 @@ mod tests { let state = ctx.state(); let filter = col("value").eq(lit("match")); - let plan = provider - .base_system_scan(&state, None, &[filter], Some(1)) - .await - .unwrap(); + let plan = provider.base_system_scan(&state, None, &[filter], Some(1)).await.unwrap(); - let batches = collect(plan, state.task_ctx()) - .await - .expect("collect filtered system scan"); + let batches = collect(plan, state.task_ctx()).await.expect("collect filtered system scan"); assert_eq!(batches.iter().map(|batch| batch.num_rows()).sum::(), 1); let values = batches @@ -912,14 +911,9 @@ mod tests { let state = ctx.state(); let filter = col("value").eq(lit("match")); - let plan = provider - .base_simple_scan(&state, None, &[filter], Some(1)) - .await - .unwrap(); + let plan = provider.base_simple_scan(&state, None, &[filter], Some(1)).await.unwrap(); - let batches = collect(plan, state.task_ctx()) - .await - .expect("collect filtered simple scan"); + let batches = collect(plan, state.task_ctx()).await.expect("collect filtered simple scan"); assert_eq!(batches.iter().map(|batch| batch.num_rows()).sum::(), 1); assert_eq!(limits.lock().unwrap().as_slice(), &[None]); } diff --git a/backend/crates/kalamdb-system/src/providers/job_nodes/job_nodes_provider.rs b/backend/crates/kalamdb-system/src/providers/job_nodes/job_nodes_provider.rs index c66015c2b..e5dd6c8e8 100644 --- a/backend/crates/kalamdb-system/src/providers/job_nodes/job_nodes_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/job_nodes/job_nodes_provider.rs @@ -1,20 +1,25 @@ //! System.job_nodes table provider -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{system_rows_to_batch, IndexedProviderDefinition}; -use crate::providers::job_nodes::models::JobNode; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use crate::JobStatus; -use chrono::Utc; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::models::JobNodeId; -use kalamdb_commons::{JobId, NodeId, SystemTable}; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use chrono::Utc; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; +use kalamdb_commons::{ + models::{rows::SystemTableRow, JobNodeId}, + next_storage_key_bytes, JobId, NodeId, StorageKey, SystemTable, +}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + +use crate::{ + error::{SystemError, SystemResultExt}, + providers::{ + base::{system_rows_to_batch, IndexedProviderDefinition}, + job_nodes::models::JobNode, + }, + system_row_mapper::{model_to_system_row, system_row_to_model}, + JobStatus, +}; + pub type JobNodesStore = IndexedEntityStore; #[derive(Clone)] @@ -76,32 +81,46 @@ impl JobNodesTableProvider { statuses: &[JobStatus], limit: usize, ) -> Result, SystemError> { + const PAGE_SIZE: usize = 256; + let prefix = JobNodeId::prefix_for_node(node_id); - let scan_limit = if limit == 0 { - 10_000 - } else { - limit.saturating_mul(10) - }; - let rows = { - let store = self.store.clone(); - tokio::task::spawn_blocking(move || { - store.scan_with_raw_prefix(&prefix, None, scan_limit) - }) - .await - .into_system_error("scan_async job_nodes join error")? - .into_system_error("scan_async job_nodes error")? - }; + let mut start_key: Option> = None; + let mut filtered = Vec::new(); - let mut filtered = Vec::with_capacity(rows.len()); - for (_, row) in rows { - let node = Self::decode_job_node_row(&row)?; - if statuses.contains(&node.status) { - filtered.push(node); + loop { + let rows = { + let store = self.store.clone(); + let prefix = prefix.clone(); + let start_key = start_key.clone(); + tokio::task::spawn_blocking(move || { + store.scan_with_raw_prefix(&prefix, start_key.as_deref(), PAGE_SIZE) + }) + .await + .into_system_error("scan_async job_nodes join error")? + .into_system_error("scan_async job_nodes error")? + }; + + if rows.is_empty() { + break; } - } - if limit > 0 && filtered.len() > limit { - filtered.truncate(limit); + let next_start_key = + rows.last().map(|(key, _)| next_storage_key_bytes(&key.storage_key())); + + for (_, row) in rows { + let node = Self::decode_job_node_row(&row)?; + if statuses.contains(&node.status) { + filtered.push(node); + if limit > 0 && filtered.len() >= limit { + return Ok(filtered); + } + } + } + + if next_start_key.is_none() { + break; + } + start_key = next_start_key; } Ok(filtered) @@ -200,3 +219,60 @@ crate::impl_indexed_system_table_provider!( definition = provider_definition, build_batch = create_batch ); + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend}; + + use super::*; + + fn provider() -> JobNodesTableProvider { + let backend: Arc = Arc::new(InMemoryBackend::new()); + JobNodesTableProvider::new(backend) + } + + fn job_node(job_id: &str, node_id: NodeId, status: JobStatus, now: i64) -> JobNode { + JobNode { + job_id: JobId::new(job_id), + node_id, + status, + error_message: None, + created_at: now, + updated_at: now, + started_at: None, + finished_at: None, + } + } + + #[test] + fn list_for_node_with_statuses_scans_past_terminal_entries() { + let provider = provider(); + let node_id = NodeId::from(1u64); + let now = Utc::now().timestamp_millis(); + + for idx in 0..300 { + provider + .create_job_node(job_node( + &format!("AA-{idx:012}"), + node_id, + JobStatus::Completed, + now, + )) + .expect("create completed job_node"); + } + + provider + .create_job_node(job_node("ZZ-queued", node_id, JobStatus::Queued, now)) + .expect("create queued job_node"); + + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime"); + let rows = runtime + .block_on(provider.list_for_node_with_statuses_async(&node_id, &[JobStatus::Queued], 1)) + .expect("list queued job_nodes"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].job_id, JobId::new("ZZ-queued")); + } +} diff --git a/backend/crates/kalamdb-system/src/providers/job_nodes/models/job_node.rs b/backend/crates/kalamdb-system/src/providers/job_nodes/models/job_node.rs index af19e78fd..f112c2d6a 100644 --- a/backend/crates/kalamdb-system/src/providers/job_nodes/models/job_node.rs +++ b/backend/crates/kalamdb-system/src/providers/job_nodes/models/job_node.rs @@ -1,11 +1,14 @@ //! Job-node execution state for system.job_nodes table. -use crate::JobStatus; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::ids::{JobId, JobNodeId, NodeId}; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::ids::{JobId, JobNodeId, NodeId}, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; +use crate::JobStatus; + #[table(name = "job_nodes", comment = "Per-node job execution state")] #[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] pub struct JobNode { diff --git a/backend/crates/kalamdb-system/src/providers/jobs/jobs_indexes.rs b/backend/crates/kalamdb-system/src/providers/jobs/jobs_indexes.rs index 3776cb26e..da2c0f57c 100644 --- a/backend/crates/kalamdb-system/src/providers/jobs/jobs_indexes.rs +++ b/backend/crates/kalamdb-system/src/providers/jobs/jobs_indexes.rs @@ -2,16 +2,16 @@ //! //! This module defines secondary indexes for the system.jobs table. -use crate::providers::jobs::models::Job; -use crate::system_row_mapper::system_row_to_model; -use crate::JobStatus; -use crate::StoragePartition; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::JobId; -use kalamdb_store::IndexDefinition; use std::sync::Arc; +use kalamdb_commons::{models::rows::SystemTableRow, storage::Partition, JobId}; +use kalamdb_store::IndexDefinition; + +use crate::{ + providers::jobs::models::Job, system_row_mapper::system_row_to_model, JobStatus, + StoragePartition, +}; + /// Index for querying jobs by status + created_at (sorted). /// /// Key format: `[status_byte][created_at_be][job_id_bytes]` @@ -89,7 +89,8 @@ impl IndexDefinition for JobIdempotencyKeyIndex { /// Convert JobStatus to a u8 for index key ordering. /// -/// Order: New(0) < Queued(1) < Running(2) < Retrying(3) < Completed(4) < Failed(5) < Cancelled(6) < Skipped(7) +/// Order: New(0) < Queued(1) < Running(2) < Retrying(3) < Completed(4) < Failed(5) < Cancelled(6) < +/// Skipped(7) pub fn status_to_u8(status: JobStatus) -> u8 { match status { JobStatus::New => 0, @@ -128,11 +129,11 @@ pub fn create_jobs_indexes() -> Vec Job { let now = chrono::Utc::now().timestamp_millis(); Job { diff --git a/backend/crates/kalamdb-system/src/providers/jobs/jobs_provider.rs b/backend/crates/kalamdb-system/src/providers/jobs/jobs_provider.rs index 542604ea0..34db17782 100644 --- a/backend/crates/kalamdb-system/src/providers/jobs/jobs_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/jobs/jobs_provider.rs @@ -17,21 +17,22 @@ //! //! Note: namespace_id and table_name are now stored in the parameters JSON field -use super::jobs_indexes::{create_jobs_indexes, status_to_u8}; -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{system_rows_to_batch, IndexedProviderDefinition}; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use crate::JobStatus; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::JobId; -use kalamdb_commons::SystemTable; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; -use super::models::{Job, JobFilter, JobSortField, SortOrder}; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; +use kalamdb_commons::{models::rows::SystemTableRow, JobId, SystemTable}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + +use super::{ + jobs_indexes::{create_jobs_indexes, status_to_u8}, + models::{Job, JobFilter, JobSortField, SortOrder}, +}; +use crate::{ + error::{SystemError, SystemResultExt}, + providers::base::{system_rows_to_batch, IndexedProviderDefinition}, + system_row_mapper::{model_to_system_row, system_row_to_model}, + JobStatus, +}; /// Type alias for the indexed jobs store pub type JobsStore = IndexedEntityStore; @@ -178,7 +179,8 @@ impl JobsTableProvider { /// Optimized: When filtering by status and sorting by CreatedAt ASC, uses /// the `JobStatusCreatedAtIndex` for efficient prefix scanning. pub fn list_jobs_filtered(&self, filter: &JobFilter) -> Result, SystemError> { - // Optimization: If filtering by status(es) and sorting by CreatedAt ASC, use the status index + // Optimization: If filtering by status(es) and sorting by CreatedAt ASC, use the status + // index let use_index = filter.sort_by == Some(JobSortField::CreatedAt) && filter.sort_order == Some(SortOrder::Asc) && (filter.status.is_some() || filter.statuses.is_some()); @@ -270,7 +272,8 @@ impl JobsTableProvider { &self, filter: JobFilter, ) -> Result, SystemError> { - // Optimization: If filtering by status(es) and sorting by CreatedAt ASC, use the status index + // Optimization: If filtering by status(es) and sorting by CreatedAt ASC, use the status + // index let use_index = filter.sort_by == Some(JobSortField::CreatedAt) && filter.sort_order == Some(SortOrder::Asc) && (filter.status.is_some() || filter.statuses.is_some()); @@ -612,12 +615,13 @@ crate::impl_indexed_system_table_provider!( #[cfg(test)] mod tests { - use super::*; - use crate::{JobStatus, JobType}; use datafusion::datasource::TableProvider; use kalamdb_commons::NodeId; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + use crate::{JobStatus, JobType}; + fn create_test_provider() -> JobsTableProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); JobsTableProvider::new(backend) diff --git a/backend/crates/kalamdb-system/src/providers/jobs/models/job.rs b/backend/crates/kalamdb-system/src/providers/jobs/models/job.rs index f3561f5f3..824913042 100644 --- a/backend/crates/kalamdb-system/src/providers/jobs/models/job.rs +++ b/backend/crates/kalamdb-system/src/providers/jobs/models/job.rs @@ -2,10 +2,14 @@ //! //! Represents a background job (flush, retention, cleanup, etc.). -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::ids::{JobId, NamespaceId, NodeId}; -use kalamdb_commons::models::TableName; -use kalamdb_commons::KSerializable; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::{ + ids::{JobId, NamespaceId, NodeId}, + TableName, + }, + KSerializable, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -20,7 +24,8 @@ use super::{JobStatus, JobType}; /// - `job_id`: Unique job identifier (e.g., "job_123456") /// - `job_type`: Type of job (Flush, Compact, Cleanup, Backup, Restore) /// - `status`: Job status (Running, Completed, Failed, Cancelled) -/// - `parameters`: Optional JSON object containing job parameters (includes namespace_id, table_name, etc.) +/// - `parameters`: Optional JSON object containing job parameters (includes namespace_id, +/// table_name, etc.) /// - `result`: Optional result message (for completed jobs) /// - `trace`: Optional stack trace (for failed jobs) /// - `memory_used`: Optional memory usage in bytes @@ -332,16 +337,16 @@ impl Job { /// Extract namespace_id from parameters JSON pub fn namespace_id(&self) -> Option { - self.parameters.as_ref().and_then(|p| { - p.get("namespace_id")?.as_str().map(NamespaceId::new) - }) + self.parameters + .as_ref() + .and_then(|p| p.get("namespace_id")?.as_str().map(NamespaceId::new)) } /// Extract table_name from parameters JSON pub fn table_name(&self) -> Option { - self.parameters.as_ref().and_then(|p| { - p.get("table_name")?.as_str().map(TableName::new) - }) + self.parameters + .as_ref() + .and_then(|p| p.get("table_name")?.as_str().map(TableName::new)) } /// Set parameters (JSON value) diff --git a/backend/crates/kalamdb-system/src/providers/jobs/models/job_status.rs b/backend/crates/kalamdb-system/src/providers/jobs/models/job_status.rs index 7b06e48df..3c8a474e2 100644 --- a/backend/crates/kalamdb-system/src/providers/jobs/models/job_status.rs +++ b/backend/crates/kalamdb-system/src/providers/jobs/models/job_status.rs @@ -1,5 +1,4 @@ -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-system/src/providers/jobs/models/job_type.rs b/backend/crates/kalamdb-system/src/providers/jobs/models/job_type.rs index 60227b15b..9df1645bd 100644 --- a/backend/crates/kalamdb-system/src/providers/jobs/models/job_type.rs +++ b/backend/crates/kalamdb-system/src/providers/jobs/models/job_type.rs @@ -1,5 +1,4 @@ -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-system/src/providers/live/models/live_query.rs b/backend/crates/kalamdb-system/src/providers/live/models/live_query.rs index 7e965c8e4..ef1637f51 100644 --- a/backend/crates/kalamdb-system/src/providers/live/models/live_query.rs +++ b/backend/crates/kalamdb-system/src/providers/live/models/live_query.rs @@ -1,15 +1,18 @@ //! Live query subscription row model for the system.live view. -use super::LiveQueryStatus; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::{ - ids::{LiveQueryId, NamespaceId, UserId}, - NodeId, TableName, +use kalamdb_commons::{ + datatypes::KalamDataType, + models::{ + ids::{LiveQueryId, NamespaceId, UserId}, + NodeId, TableName, + }, }; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; use serde_json::Value; +use super::LiveQueryStatus; + /// Live query subscription row model for `system.live`. /// /// Represents an active live query subscription (WebSocket connection). diff --git a/backend/crates/kalamdb-system/src/providers/live/models/live_query_status.rs b/backend/crates/kalamdb-system/src/providers/live/models/live_query_status.rs index 5001e1da3..74914dbf5 100644 --- a/backend/crates/kalamdb-system/src/providers/live/models/live_query_status.rs +++ b/backend/crates/kalamdb-system/src/providers/live/models/live_query_status.rs @@ -1,5 +1,4 @@ -use std::fmt; -use std::str::FromStr; +use std::{fmt, str::FromStr}; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-system/src/providers/manifest/manifest_definition.rs b/backend/crates/kalamdb-system/src/providers/manifest/manifest_definition.rs index becacb941..ccc5c1e40 100644 --- a/backend/crates/kalamdb-system/src/providers/manifest/manifest_definition.rs +++ b/backend/crates/kalamdb-system/src/providers/manifest/manifest_definition.rs @@ -1,10 +1,11 @@ +use std::sync::OnceLock; + use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, SystemTable, TableName, }; -use kalamdb_commons::{NamespaceId, SystemTable, TableName}; -use std::sync::OnceLock; pub fn manifest_table_definition() -> TableDefinition { let columns = vec![ diff --git a/backend/crates/kalamdb-system/src/providers/manifest/manifest_indexes.rs b/backend/crates/kalamdb-system/src/providers/manifest/manifest_indexes.rs index b591e5141..f5148c816 100644 --- a/backend/crates/kalamdb-system/src/providers/manifest/manifest_indexes.rs +++ b/backend/crates/kalamdb-system/src/providers/manifest/manifest_indexes.rs @@ -2,13 +2,13 @@ //! //! This module defines secondary indexes for the system.manifest table. -use crate::StoragePartition; +use std::sync::Arc; + use datafusion::scalar::ScalarValue; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::{ManifestId, StorageKey}; +use kalamdb_commons::{models::rows::SystemTableRow, storage::Partition, ManifestId, StorageKey}; use kalamdb_store::IndexDefinition; -use std::sync::Arc; + +use crate::StoragePartition; /// Index for querying manifests by PendingWrite state. /// @@ -60,12 +60,16 @@ pub fn create_manifest_indexes() -> Vec SystemTableRow { let mut fields = BTreeMap::new(); fields.insert("sync_state".to_string(), ScalarValue::Utf8(Some(sync_state.to_string()))); diff --git a/backend/crates/kalamdb-system/src/providers/manifest/manifest_provider.rs b/backend/crates/kalamdb-system/src/providers/manifest/manifest_provider.rs index bdce99d4f..f0086aef1 100644 --- a/backend/crates/kalamdb-system/src/providers/manifest/manifest_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/manifest/manifest_provider.rs @@ -3,23 +3,28 @@ //! This module provides a DataFusion TableProvider implementation for the system.manifest table. //! Exposes manifest cache entries as a queryable system table. -use super::{ - create_manifest_indexes, manifest_arrow_schema, manifest_table_definition, Manifest, SyncState, +use std::sync::{Arc, OnceLock, RwLock}; + +use datafusion::{ + arrow::{array::RecordBatch, datatypes::SchemaRef}, + logical_expr::Expr, }; -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{extract_filter_value, SimpleProviderDefinition}; -use crate::providers::manifest::ManifestCacheEntry; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::{ManifestId, StorageKey, TableId}; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend, StorageError}; +use kalamdb_commons::{models::rows::SystemTableRow, ManifestId, StorageKey, TableId}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend, StorageError}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use std::sync::{Arc, OnceLock, RwLock}; + +use super::{ + create_manifest_indexes, manifest_arrow_schema, manifest_table_definition, Manifest, SyncState, +}; +use crate::{ + error::{SystemError, SystemResultExt}, + providers::{ + base::{extract_filter_value, SimpleProviderDefinition}, + manifest::ManifestCacheEntry, + }, + system_row_mapper::{model_to_system_row, system_row_to_model}, +}; #[derive(Debug, Clone, Serialize, Deserialize)] struct ManifestStorageRow { @@ -423,11 +428,11 @@ crate::impl_simple_system_table_provider!( #[cfg(test)] mod tests { + use kalamdb_commons::{NamespaceId, TableId, TableName}; + use kalamdb_store::{entity_store::EntityStore, test_utils::InMemoryBackend}; + use super::*; use crate::providers::manifest::{Manifest, ManifestCacheEntry, SyncState}; - use kalamdb_commons::{NamespaceId, TableId, TableName}; - use kalamdb_store::entity_store::EntityStore; - use kalamdb_store::test_utils::InMemoryBackend; #[tokio::test] async fn test_empty_manifest_table() { diff --git a/backend/crates/kalamdb-system/src/providers/manifest/models/manifest.rs b/backend/crates/kalamdb-system/src/providers/manifest/models/manifest.rs index 0ef02d912..45bec0c32 100644 --- a/backend/crates/kalamdb-system/src/providers/manifest/models/manifest.rs +++ b/backend/crates/kalamdb-system/src/providers/manifest/models/manifest.rs @@ -11,14 +11,16 @@ //! `ColumnStats.min/max` use `StoredScalarValue` for typed scalar stats and //! proper JSON output for manifest.json files. -use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use kalamdb_commons::{ + ids::SeqId, + models::{rows::StoredScalarValue, TableId}, + UserId, +}; +use serde::{Deserialize, Serialize}; + use super::FileSubfolderState; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::StoredScalarValue; -use kalamdb_commons::models::TableId; -use kalamdb_commons::UserId; /// Synchronization state of a cached manifest entry. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] @@ -549,7 +551,7 @@ pub struct Manifest { /// This is embedded in manifest.json so index artifacts can be managed with /// the same atomic flush lifecycle used for segment metadata. #[serde(default)] - //TODO: Dont include in json if empty + // TODO: Dont include in json if empty pub vector_indexes: HashMap, } @@ -685,9 +687,10 @@ impl Manifest { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::{NamespaceId, TableName}; + use super::*; + #[test] fn test_manifest_cache_entry_json_roundtrip() { // This test ensures ManifestCacheEntry can be serialized/deserialized diff --git a/backend/crates/kalamdb-system/src/providers/mod.rs b/backend/crates/kalamdb-system/src/providers/mod.rs index ff1f38306..d20a4d184 100644 --- a/backend/crates/kalamdb-system/src/providers/mod.rs +++ b/backend/crates/kalamdb-system/src/providers/mod.rs @@ -7,8 +7,8 @@ //! - Model types with `#[table]` derive are the source of truth for `TableDefinition` //! - `*TableProvider` structs implement DataFusion's `TableProvider` trait //! - Providers memoize Arrow schemas with local `OnceLock` caches -//! - `base` centralizes deferred scan execution so planning stays lightweight -//! and provider families share one filter/projection execution model +//! - `base` centralizes deferred scan execution so planning stays lightweight and provider families +//! share one filter/projection execution model pub mod audit_logs; pub mod base; @@ -24,22 +24,17 @@ pub mod topics; pub mod users; // Re-export base traits +// Re-export all providers +pub use audit_logs::{AuditLogEntry, AuditLogsTableProvider}; pub use base::{ extract_filter_value, extract_range_filters, SimpleSystemTableScan, SystemTableScan, }; - -// Re-export all providers -pub use audit_logs::AuditLogEntry; -pub use audit_logs::AuditLogsTableProvider; pub use job_nodes::JobNodesTableProvider; pub use jobs::JobsTableProvider; -pub use manifest::{InMemoryChecker, ManifestTableProvider}; +pub use manifest::{manifest_table_definition, InMemoryChecker, ManifestTableProvider}; pub use namespaces::NamespacesTableProvider; pub use storages::StoragesTableProvider; -pub use tables::SchemasTableProvider; +pub use tables::{schemas_table_definition, SchemasTableProvider}; pub use topic_offsets::TopicOffsetsTableProvider; pub use topics::TopicsTableProvider; pub use users::UsersTableProvider; - -pub use manifest::manifest_table_definition; -pub use tables::schemas_table_definition; diff --git a/backend/crates/kalamdb-system/src/providers/namespaces/models/namespace.rs b/backend/crates/kalamdb-system/src/providers/namespaces/models/namespace.rs index 4afd0aae5..4d9a9cc38 100644 --- a/backend/crates/kalamdb-system/src/providers/namespaces/models/namespace.rs +++ b/backend/crates/kalamdb-system/src/providers/namespaces/models/namespace.rs @@ -1,7 +1,6 @@ //! Namespace entity for system.namespaces table. -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::ids::NamespaceId; +use kalamdb_commons::{datatypes::KalamDataType, models::ids::NamespaceId}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -24,15 +23,15 @@ use serde_json::Value; /// ## Example /// /// ```rust -/// use kalamdb_system::Namespace; /// use kalamdb_commons::NamespaceId; +/// use kalamdb_system::Namespace; /// /// let namespace = Namespace { /// namespace_id: NamespaceId::default(), -/// name: "default".to_string(), -/// created_at: 1730000000000, -/// options: Some(serde_json::json!({})), -/// table_count: 0, +/// name: "default".to_string(), +/// created_at: 1730000000000, +/// options: Some(serde_json::json!({})), +/// table_count: 0, /// }; /// ``` /// Namespace struct with fields ordered for optimal memory alignment. @@ -90,7 +89,7 @@ pub struct Namespace { default = "None", comment = "Number of tables in this namespace" )] - pub table_count: i32, //TODO: Remove this field and calculate on the fly + pub table_count: i32, // TODO: Remove this field and calculate on the fly } impl Namespace { diff --git a/backend/crates/kalamdb-system/src/providers/namespaces/namespaces_provider.rs b/backend/crates/kalamdb-system/src/providers/namespaces/namespaces_provider.rs index 6e7c9b839..1e9a9b57b 100644 --- a/backend/crates/kalamdb-system/src/providers/namespaces/namespaces_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/namespaces/namespaces_provider.rs @@ -3,22 +3,24 @@ //! This module provides a DataFusion TableProvider implementation for the system.namespaces table. //! Uses the new EntityStore architecture with NamespaceId keys. -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{ - extract_filter_value, system_rows_to_batch, SimpleProviderDefinition, -}; -use crate::providers::namespaces::models::Namespace; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::NamespaceId; -use kalamdb_commons::SystemTable; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use datafusion::{ + arrow::{array::RecordBatch, datatypes::SchemaRef}, + logical_expr::Expr, +}; +use kalamdb_commons::{models::rows::SystemTableRow, NamespaceId, SystemTable}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + +use crate::{ + error::{SystemError, SystemResultExt}, + providers::{ + base::{extract_filter_value, system_rows_to_batch, SimpleProviderDefinition}, + namespaces::models::Namespace, + }, + system_row_mapper::{model_to_system_row, system_row_to_model}, +}; + /// System.namespaces table provider using EntityStore architecture #[derive(Clone)] pub struct NamespacesTableProvider { @@ -187,10 +189,11 @@ crate::impl_simple_system_table_provider!( #[cfg(test)] mod tests { - use super::*; use datafusion::datasource::TableProvider; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + fn create_test_provider() -> NamespacesTableProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); NamespacesTableProvider::new(backend) diff --git a/backend/crates/kalamdb-system/src/providers/storages/models/location_config.rs b/backend/crates/kalamdb-system/src/providers/storages/models/location_config.rs index 52b4d1335..1780be5cf 100644 --- a/backend/crates/kalamdb-system/src/providers/storages/models/location_config.rs +++ b/backend/crates/kalamdb-system/src/providers/storages/models/location_config.rs @@ -1,6 +1,7 @@ -use super::{AzureStorageConfig, GcsStorageConfig, LocalStorageConfig, S3StorageConfig}; use serde::{Deserialize, Serialize}; +use super::{AzureStorageConfig, GcsStorageConfig, LocalStorageConfig, S3StorageConfig}; + /// Type-safe JSON configuration for `system.storages` locations. /// /// Stored as raw JSON text in the `config_json` column, but can be decoded diff --git a/backend/crates/kalamdb-system/src/providers/storages/models/storage.rs b/backend/crates/kalamdb-system/src/providers/storages/models/storage.rs index 68c6b442c..87fca52f4 100644 --- a/backend/crates/kalamdb-system/src/providers/storages/models/storage.rs +++ b/backend/crates/kalamdb-system/src/providers/storages/models/storage.rs @@ -1,12 +1,12 @@ //! Storage configuration entity for system.storages table. -use super::{StorageLocationConfig, StorageLocationConfigError, StorageType}; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::ids::StorageId; +use kalamdb_commons::{datatypes::KalamDataType, models::ids::StorageId}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; use serde_json::Value; +use super::{StorageLocationConfig, StorageLocationConfigError, StorageType}; + /// Storage configuration in system_storages table #[table( name = "storages", @@ -72,7 +72,8 @@ pub struct Storage { nullable = true, primary_key = false, default = "None", - comment = "Storage credentials JSON (WARNING: stored as plaintext - use environment variables for sensitive credentials)" + comment = "Storage credentials JSON (WARNING: stored as plaintext - use environment \ + variables for sensitive credentials)" )] pub credentials: Option, /// Storage backend parameters encoded as JSON. @@ -137,10 +138,7 @@ pub struct Storage { impl Storage { /// Decode `config_json` into a type-safe `StorageLocationConfig`. pub fn location_config(&self) -> Result { - let raw = self - .config_json - .as_ref() - .ok_or(StorageLocationConfigError::MissingConfigJson)?; + let raw = self.config_json.as_ref().ok_or(StorageLocationConfigError::MissingConfigJson)?; serde_json::from_value::(raw.clone()) .map_err(|e| StorageLocationConfigError::InvalidJson(e.to_string())) diff --git a/backend/crates/kalamdb-system/src/providers/storages/models/storage_mode.rs b/backend/crates/kalamdb-system/src/providers/storages/models/storage_mode.rs index 536a7f438..d5a29e0ee 100644 --- a/backend/crates/kalamdb-system/src/providers/storages/models/storage_mode.rs +++ b/backend/crates/kalamdb-system/src/providers/storages/models/storage_mode.rs @@ -1,6 +1,6 @@ +use std::{fmt, str::FromStr}; + use serde::{Deserialize, Serialize}; -use std::fmt; -use std::str::FromStr; /// Enum representing storage mode preferences for users. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] diff --git a/backend/crates/kalamdb-system/src/providers/storages/models/storage_type.rs b/backend/crates/kalamdb-system/src/providers/storages/models/storage_type.rs index f42c8eb5a..86f58fb68 100644 --- a/backend/crates/kalamdb-system/src/providers/storages/models/storage_type.rs +++ b/backend/crates/kalamdb-system/src/providers/storages/models/storage_type.rs @@ -1,6 +1,7 @@ -use serde::{Deserialize, Serialize}; use std::fmt; +use serde::{Deserialize, Serialize}; + /// Enum representing the type of storage backend in KalamDB. /// /// - Filesystem: Local or network filesystem storage diff --git a/backend/crates/kalamdb-system/src/providers/storages/storages_provider.rs b/backend/crates/kalamdb-system/src/providers/storages/storages_provider.rs index 21ae1f89e..0c6135f4f 100644 --- a/backend/crates/kalamdb-system/src/providers/storages/storages_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/storages/storages_provider.rs @@ -3,22 +3,24 @@ //! This module provides a DataFusion TableProvider implementation for the system.storages table. //! Uses the new EntityStore architecture with StorageId keys. -use crate::error::SystemError; -use crate::providers::base::{ - extract_filter_value, system_rows_to_batch, SimpleProviderDefinition, -}; -use crate::providers::storages::models::Storage; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::StorageId; -use kalamdb_commons::SystemTable; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use datafusion::{ + arrow::{array::RecordBatch, datatypes::SchemaRef}, + logical_expr::Expr, +}; +use kalamdb_commons::{models::rows::SystemTableRow, StorageId, SystemTable}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + +use crate::{ + error::SystemError, + providers::{ + base::{extract_filter_value, system_rows_to_batch, SimpleProviderDefinition}, + storages::models::Storage, + }, + system_row_mapper::{model_to_system_row, system_row_to_model}, +}; + /// System.storages table provider using EntityStore architecture #[derive(Clone)] pub struct StoragesTableProvider { @@ -171,11 +173,12 @@ crate::impl_simple_system_table_provider!( #[cfg(test)] mod tests { - use super::*; - use crate::StorageType; use datafusion::datasource::TableProvider; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + use crate::StorageType; + fn create_test_provider() -> StoragesTableProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); StoragesTableProvider::new(backend) @@ -268,7 +271,10 @@ mod tests { // Scan let batch = provider.scan_all_storages().unwrap(); assert_eq!(batch.num_rows(), 1); - assert_eq!(batch.num_columns(), 11); // storage_id, storage_name, description, storage_type, base_directory, credentials, config_json, shared_tables_template, user_tables_template, created_at, updated_at + assert_eq!(batch.num_columns(), 11); // storage_id, storage_name, description, storage_type, + // base_directory, credentials, config_json, + // shared_tables_template, user_tables_template, + // created_at, updated_at } #[tokio::test] diff --git a/backend/crates/kalamdb-system/src/providers/tables/schemas_definition.rs b/backend/crates/kalamdb-system/src/providers/tables/schemas_definition.rs index 89db2a29c..e829160eb 100644 --- a/backend/crates/kalamdb-system/src/providers/tables/schemas_definition.rs +++ b/backend/crates/kalamdb-system/src/providers/tables/schemas_definition.rs @@ -1,10 +1,11 @@ +use std::sync::OnceLock; + use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, SystemTable, TableName, }; -use kalamdb_commons::{NamespaceId, SystemTable, TableName}; -use std::sync::OnceLock; pub fn schemas_table_definition() -> TableDefinition { let columns = vec![ diff --git a/backend/crates/kalamdb-system/src/providers/tables/schemas_provider.rs b/backend/crates/kalamdb-system/src/providers/tables/schemas_provider.rs index 946d88baa..f7cc1473a 100644 --- a/backend/crates/kalamdb-system/src/providers/tables/schemas_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/tables/schemas_provider.rs @@ -3,17 +3,26 @@ //! Phase 16: Consolidated provider using single store with TableVersionId keys. //! Exposes all table versions with is_latest flag for schema history queries. -use super::{new_schemas_store, schemas_arrow_schema, SchemasStore}; -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{extract_filter_value, SimpleProviderDefinition}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::array::RecordBatch; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::{TableDefinition, TableOptions}; +use std::{ + collections::HashMap, + sync::{Arc, OnceLock}, +}; + +use datafusion::{ + arrow::{array::RecordBatch, datatypes::SchemaRef}, + logical_expr::Expr, +}; +use kalamdb_commons::{ + models::TableId, + schemas::{TableDefinition, TableOptions}, +}; use kalamdb_store::StorageBackend; -use std::collections::HashMap; -use std::sync::{Arc, OnceLock}; + +use super::{new_schemas_store, schemas_arrow_schema, SchemasStore}; +use crate::{ + error::{SystemError, SystemResultExt}, + providers::base::{extract_filter_value, SimpleProviderDefinition}, +}; /// System.tables table provider using consolidated store with versioning #[derive(Clone)] @@ -150,7 +159,8 @@ impl SchemasTableProvider { /// The `is_latest` column indicates which version is the current active schema. pub fn scan_all_tables(&self) -> Result { // Return ALL versions including historical ones for schema evolution support. - // The store contains both `` pointers and `N` rows; we expose only versioned rows. + // The store contains both `` pointers and `N` rows; we expose only versioned + // rows. let entries = self.store.scan_all_with_versions()?; // First pass: find max version for each table. @@ -233,11 +243,9 @@ impl SchemasTableProvider { versions .into_iter() .map(|(table_id, def)| { - let is_latest = max_versions - .get(&table_id) - .copied() - .unwrap_or(def.schema_version) - == def.schema_version; + let is_latest = + max_versions.get(&table_id).copied().unwrap_or(def.schema_version) + == def.schema_version; (def, is_latest) }) .collect(), @@ -350,16 +358,19 @@ crate::impl_simple_system_table_provider!( #[cfg(test)] mod tests { - use super::*; - use datafusion::datasource::TableProvider; - use datafusion::logical_expr::{col, lit}; - use kalamdb_commons::datatypes::KalamDataType; - use kalamdb_commons::schemas::{ - ColumnDefinition, TableDefinition, TableOptions, TableType as KalamTableType, + use datafusion::{ + datasource::TableProvider, + logical_expr::{col, lit}, + }; + use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, TableDefinition, TableOptions, TableType as KalamTableType}, + NamespaceId, TableId, TableName, }; - use kalamdb_commons::{NamespaceId, TableId, TableName}; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + fn create_test_provider() -> SchemasTableProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); SchemasTableProvider::new(backend) @@ -514,9 +525,7 @@ mod tests { table_def.schema_version = 2; provider.update_table(&table_id, &table_def).unwrap(); - let batch = provider - .build_versions_batch_for_namespace(table_id.namespace_id()) - .unwrap(); + let batch = provider.build_versions_batch_for_namespace(table_id.namespace_id()).unwrap(); assert_eq!(batch.num_rows(), 2); diff --git a/backend/crates/kalamdb-system/src/providers/tables/schemas_store.rs b/backend/crates/kalamdb-system/src/providers/tables/schemas_store.rs index 83c57f2d9..ffd7d1375 100644 --- a/backend/crates/kalamdb-system/src/providers/tables/schemas_store.rs +++ b/backend/crates/kalamdb-system/src/providers/tables/schemas_store.rs @@ -10,14 +10,20 @@ //! - Efficient range scans for version history //! - Single storage partition for simplicity -use crate::SystemTable; -use kalamdb_commons::models::{NamespaceId, TableId, TableVersionId}; -use kalamdb_commons::schemas::TableDefinition; -use kalamdb_commons::storage::Partition; -use kalamdb_store::entity_store::{CrossUserTableStore, EntityStore}; -use kalamdb_store::StorageBackend; use std::sync::Arc; +use kalamdb_commons::{ + models::{NamespaceId, TableId, TableVersionId}, + schemas::TableDefinition, + storage::Partition, +}; +use kalamdb_store::{ + entity_store::{CrossUserTableStore, EntityStore}, + StorageBackend, +}; + +use crate::SystemTable; + /// Store for `system.schemas` definitions. /// /// Uses `TableVersionId` keys to support both latest-pointer and historical @@ -240,12 +246,7 @@ impl SchemasStore { .namespace_id() .as_str() .cmp(right_id.namespace_id().as_str()) - .then_with(|| { - left_id - .table_name() - .as_str() - .cmp(right_id.table_name().as_str()) - }) + .then_with(|| left_id.table_name().as_str().cmp(right_id.table_name().as_str())) .then_with(|| left_def.schema_version.cmp(&right_def.schema_version)) }); @@ -291,12 +292,14 @@ impl SchemasStore { #[cfg(test)] mod tests { + use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, Role, TableId, TableName, + }; + use kalamdb_store::{test_utils::InMemoryBackend, CrossUserTableStore}; + use super::*; - use kalamdb_commons::datatypes::KalamDataType; - use kalamdb_commons::schemas::{ColumnDefinition, TableDefinition, TableOptions, TableType}; - use kalamdb_commons::{NamespaceId, Role, TableId, TableName}; - use kalamdb_store::test_utils::InMemoryBackend; - use kalamdb_store::CrossUserTableStore; fn create_test_store() -> SchemasStore { let backend: Arc = Arc::new(InMemoryBackend::new()); @@ -517,9 +520,7 @@ mod tests { table_def.schema_version = 2; store.put_version(&table_id, &table_def).unwrap(); - let versions = store - .scan_namespace_with_versions(table_id.namespace_id()) - .unwrap(); + let versions = store.scan_namespace_with_versions(table_id.namespace_id()).unwrap(); assert_eq!(versions.len(), 2); assert_eq!(versions[0].1.schema_version, 1); diff --git a/backend/crates/kalamdb-system/src/providers/topic_offsets/models.rs b/backend/crates/kalamdb-system/src/providers/topic_offsets/models.rs index 9329b6748..97bbf6e02 100644 --- a/backend/crates/kalamdb-system/src/providers/topic_offsets/models.rs +++ b/backend/crates/kalamdb-system/src/providers/topic_offsets/models.rs @@ -1,7 +1,9 @@ //! Topic offset models - entity definitions -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::{ConsumerGroupId, TopicId}; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::{ConsumerGroupId, TopicId}, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-system/src/providers/topic_offsets/topic_offsets_provider.rs b/backend/crates/kalamdb-system/src/providers/topic_offsets/topic_offsets_provider.rs index a2ef59568..4cde417d1 100644 --- a/backend/crates/kalamdb-system/src/providers/topic_offsets/topic_offsets_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/topic_offsets/topic_offsets_provider.rs @@ -1,24 +1,26 @@ //! System.topic_offsets table provider //! -//! This module provides a DataFusion TableProvider implementation for the system.topic_offsets table. -//! Uses `IndexedEntityStore` with composite primary key (topic_id, group_id, partition_id). +//! This module provides a DataFusion TableProvider implementation for the system.topic_offsets +//! table. Uses `IndexedEntityStore` with composite primary key (topic_id, group_id, partition_id). -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{ - extract_filter_value, system_rows_to_batch, SimpleProviderDefinition, -}; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::models::{ConsumerGroupId, TopicId}; -use kalamdb_commons::SystemTable; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use datafusion::{ + arrow::{array::RecordBatch, datatypes::SchemaRef}, + logical_expr::Expr, +}; +use kalamdb_commons::{ + models::{rows::SystemTableRow, ConsumerGroupId, TopicId}, + SystemTable, +}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + use super::models::TopicOffset; +use crate::{ + error::{SystemError, SystemResultExt}, + providers::base::{extract_filter_value, system_rows_to_batch, SimpleProviderDefinition}, + system_row_mapper::{model_to_system_row, system_row_to_model}, +}; /// Composite key for topic offsets as a String: "topic_id:group_id:partition_id" pub type TopicOffsetKey = String; @@ -269,9 +271,10 @@ crate::impl_simple_system_table_provider!( #[cfg(test)] mod tests { - use super::*; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + #[test] fn test_topic_offsets_provider_creation() { let backend = Arc::new(InMemoryBackend::new()); diff --git a/backend/crates/kalamdb-system/src/providers/topics/models/topic.rs b/backend/crates/kalamdb-system/src/providers/topics/models/topic.rs index 12c46f31c..5efdab674 100644 --- a/backend/crates/kalamdb-system/src/providers/topics/models/topic.rs +++ b/backend/crates/kalamdb-system/src/providers/topics/models/topic.rs @@ -2,8 +2,7 @@ //! //! Represents a durable pub/sub topic backed by RocksDB. -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::TopicId; +use kalamdb_commons::{datatypes::KalamDataType, models::TopicId}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; @@ -212,9 +211,10 @@ impl Topic { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableId, TableName, TopicOp}; + use super::*; + #[test] fn test_topic_creation() { let topic = Topic::new(TopicId::new("topic_123"), "app.notifications".to_string()); diff --git a/backend/crates/kalamdb-system/src/providers/topics/models/topic_route.rs b/backend/crates/kalamdb-system/src/providers/topics/models/topic_route.rs index 003642280..cb8c6353e 100644 --- a/backend/crates/kalamdb-system/src/providers/topics/models/topic_route.rs +++ b/backend/crates/kalamdb-system/src/providers/topics/models/topic_route.rs @@ -108,9 +108,10 @@ impl TopicRoute { #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + #[test] fn test_topic_route_builder() { let route = TopicRoute::new( diff --git a/backend/crates/kalamdb-system/src/providers/topics/topics_provider.rs b/backend/crates/kalamdb-system/src/providers/topics/topics_provider.rs index 0c0ac421a..4da6a316a 100644 --- a/backend/crates/kalamdb-system/src/providers/topics/topics_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/topics/topics_provider.rs @@ -3,19 +3,21 @@ //! This module provides a DataFusion TableProvider implementation for the system.topics table. //! Uses `IndexedEntityStore` for automatic secondary index management. -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{system_rows_to_batch, IndexedProviderDefinition}; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::models::TopicId; -use kalamdb_commons::SystemTable; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; +use kalamdb_commons::{ + models::{rows::SystemTableRow, TopicId}, + SystemTable, +}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + use super::models::Topic; +use crate::{ + error::{SystemError, SystemResultExt}, + providers::base::{system_rows_to_batch, IndexedProviderDefinition}, + system_row_mapper::{model_to_system_row, system_row_to_model}, +}; /// Type alias for the indexed topics store pub type TopicsStore = IndexedEntityStore; @@ -180,9 +182,10 @@ crate::impl_indexed_system_table_provider!( #[cfg(test)] mod tests { - use super::*; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + #[test] fn test_topics_provider_creation() { let backend = Arc::new(InMemoryBackend::new()); diff --git a/backend/crates/kalamdb-system/src/providers/users/models/mod.rs b/backend/crates/kalamdb-system/src/providers/users/models/mod.rs index bd421f222..f8af87120 100644 --- a/backend/crates/kalamdb-system/src/providers/users/models/mod.rs +++ b/backend/crates/kalamdb-system/src/providers/users/models/mod.rs @@ -4,7 +4,6 @@ mod auth_data; mod user; pub use auth_data::AuthData; -pub use user::{User, DEFAULT_LOCKOUT_DURATION_MINUTES, DEFAULT_MAX_FAILED_ATTEMPTS}; - // Re-export from kalamdb-commons for convenience pub use kalamdb_commons::models::{AuthType, OAuthProvider, Role}; +pub use user::{User, DEFAULT_LOCKOUT_DURATION_MINUTES, DEFAULT_MAX_FAILED_ATTEMPTS}; diff --git a/backend/crates/kalamdb-system/src/providers/users/models/user.rs b/backend/crates/kalamdb-system/src/providers/users/models/user.rs index fae0f6182..36ed37cf1 100644 --- a/backend/crates/kalamdb-system/src/providers/users/models/user.rs +++ b/backend/crates/kalamdb-system/src/providers/users/models/user.rs @@ -2,13 +2,15 @@ //! //! Represents a database user with authentication and authorization information. -use crate::providers::storages::models::StorageMode; -use crate::providers::users::models::auth_data::AuthData; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::{ids::UserId, AuthType, Role, StorageId}; +use kalamdb_commons::{ + datatypes::KalamDataType, + models::{ids::UserId, AuthType, Role, StorageId}, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; +use crate::providers::{storages::models::StorageMode, users::models::auth_data::AuthData}; + /// Default maximum failed login attempts before lockout pub const DEFAULT_MAX_FAILED_ATTEMPTS: i32 = 5; @@ -43,25 +45,25 @@ pub const DEFAULT_LOCKOUT_DURATION_MINUTES: i64 = 15; /// ## Example /// /// ```rust +/// use kalamdb_commons::{AuthType, Role, StorageId, StorageMode, UserId}; /// use kalamdb_system::User; -/// use kalamdb_commons::{UserId, Role, AuthType, StorageMode, StorageId}; /// /// let user = User { -/// user_id: UserId::new("u_123456"), -/// password_hash: "$2b$12$...".to_string(), -/// role: Role::User, -/// email: Some("alice@example.com".to_string()), -/// auth_type: AuthType::Password, -/// auth_data: None, -/// storage_mode: StorageMode::Table, -/// storage_id: Some(StorageId::new("storage_1")), +/// user_id: UserId::new("u_123456"), +/// password_hash: "$2b$12$...".to_string(), +/// role: Role::User, +/// email: Some("alice@example.com".to_string()), +/// auth_type: AuthType::Password, +/// auth_data: None, +/// storage_mode: StorageMode::Table, +/// storage_id: Some(StorageId::new("storage_1")), /// failed_login_attempts: 0, -/// locked_until: None, -/// last_login_at: None, -/// created_at: 1730000000000, -/// updated_at: 1730000000000, -/// last_seen: None, -/// deleted_at: None, +/// locked_until: None, +/// last_login_at: None, +/// created_at: 1730000000000, +/// updated_at: 1730000000000, +/// last_seen: None, +/// deleted_at: None, /// }; /// ``` /// User struct with fields ordered for optimal memory alignment. diff --git a/backend/crates/kalamdb-system/src/providers/users/users_indexes.rs b/backend/crates/kalamdb-system/src/providers/users/users_indexes.rs index 360fa5414..25ee8d54c 100644 --- a/backend/crates/kalamdb-system/src/providers/users/users_indexes.rs +++ b/backend/crates/kalamdb-system/src/providers/users/users_indexes.rs @@ -2,15 +2,15 @@ //! //! This module defines secondary indexes for the system.users table. -use crate::providers::users::models::User; -use crate::system_row_mapper::system_row_to_model; -use crate::StoragePartition; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::UserId; -use kalamdb_store::IndexDefinition; use std::sync::Arc; +use kalamdb_commons::{models::rows::SystemTableRow, storage::Partition, UserId}; +use kalamdb_store::IndexDefinition; + +use crate::{ + providers::users::models::User, system_row_mapper::system_row_to_model, StoragePartition, +}; + /// Index for querying users by role. /// /// Key format: `{role}:{user_id}` @@ -54,9 +54,10 @@ pub fn create_users_indexes() -> Vec User { User { diff --git a/backend/crates/kalamdb-system/src/providers/users/users_provider.rs b/backend/crates/kalamdb-system/src/providers/users/users_provider.rs index c4b02b3f0..8488384ca 100644 --- a/backend/crates/kalamdb-system/src/providers/users/users_provider.rs +++ b/backend/crates/kalamdb-system/src/providers/users/users_provider.rs @@ -11,20 +11,23 @@ //! - Key: `{role}:{user_id}` //! - Enables: "All users with role 'admin'" -use super::users_indexes::create_users_indexes; -use crate::error::{SystemError, SystemResultExt}; -use crate::providers::base::{system_rows_to_batch, IndexedProviderDefinition}; -use crate::providers::users::models::User; -use crate::system_row_mapper::{model_to_system_row, system_row_to_model}; -use crate::SystemTable; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::UserId; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; use std::sync::{Arc, OnceLock}; +use datafusion::arrow::{array::RecordBatch, datatypes::SchemaRef}; +use kalamdb_commons::{models::rows::SystemTableRow, UserId}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + +use super::users_indexes::create_users_indexes; +use crate::{ + error::{SystemError, SystemResultExt}, + providers::{ + base::{system_rows_to_batch, IndexedProviderDefinition}, + users::models::User, + }, + system_row_mapper::{model_to_system_row, system_row_to_model}, + SystemTable, +}; + /// Type alias for the indexed users store pub type UsersStore = IndexedEntityStore; @@ -176,11 +179,12 @@ crate::impl_indexed_system_table_provider!( #[cfg(test)] mod tests { - use super::*; use datafusion::datasource::TableProvider; use kalamdb_commons::{AuthType, Role, StorageId}; use kalamdb_store::test_utils::InMemoryBackend; + use super::*; + fn create_test_provider() -> UsersTableProvider { let backend: Arc = Arc::new(InMemoryBackend::new()); UsersTableProvider::new(backend) diff --git a/backend/crates/kalamdb-system/src/registry.rs b/backend/crates/kalamdb-system/src/registry.rs index 54bfb06a2..5acec9891 100644 --- a/backend/crates/kalamdb-system/src/registry.rs +++ b/backend/crates/kalamdb-system/src/registry.rs @@ -6,30 +6,27 @@ //! **Phase 5 Completion**: Consolidates all 10 system table providers into //! a single struct for cleaner AppContext API. -use super::providers::job_nodes::models::JobNode; -use super::providers::jobs::models::Job; -use super::providers::manifest::manifest_table_definition; -use super::providers::namespaces::models::Namespace; -use super::providers::storages::models::Storage; -use super::providers::tables::schemas_table_definition; -use super::providers::topic_offsets::models::TopicOffset; -use super::providers::topics::models::Topic; -use super::providers::users::models::User; -use super::providers::{ - AuditLogEntry, AuditLogsTableProvider, JobNodesTableProvider, JobsTableProvider, - ManifestTableProvider, NamespacesTableProvider, SchemasTableProvider, StoragesTableProvider, - TopicOffsetsTableProvider, TopicsTableProvider, UsersTableProvider, -}; +use std::{collections::HashSet, sync::Arc}; + // SchemaRegistry will be passed as Arc parameter from kalamdb-core use datafusion::datasource::TableProvider; -use kalamdb_commons::schemas::{TableDefinition, TableType}; -use kalamdb_commons::SystemTable; +use kalamdb_commons::{ + schemas::{TableDefinition, TableType}, + SystemTable, +}; use kalamdb_session_datafusion::secure_provider; use kalamdb_store::StorageBackend; use once_cell::sync::OnceCell; use parking_lot::RwLock; -use std::collections::HashSet; -use std::sync::Arc; + +use super::providers::{ + job_nodes::models::JobNode, jobs::models::Job, manifest::manifest_table_definition, + namespaces::models::Namespace, storages::models::Storage, tables::schemas_table_definition, + topic_offsets::models::TopicOffset, topics::models::Topic, users::models::User, AuditLogEntry, + AuditLogsTableProvider, JobNodesTableProvider, JobsTableProvider, ManifestTableProvider, + NamespacesTableProvider, SchemasTableProvider, StoragesTableProvider, + TopicOffsetsTableProvider, TopicsTableProvider, UsersTableProvider, +}; /// Registry of all system table providers /// @@ -37,7 +34,8 @@ use std::sync::Arc; /// Used by AppContext to eliminate 10 individual provider fields. /// /// Note: information_schema.tables and information_schema.columns are provided -/// by DataFusion's built-in information_schema support (enabled via .with_information_schema(true)). +/// by DataFusion's built-in information_schema support (enabled via +/// .with_information_schema(true)). #[derive(Debug)] pub struct SystemTablesRegistry { // ===== system.* tables (EntityStore-based) ===== @@ -76,8 +74,9 @@ impl SystemTablesRegistry { /// /// # Example /// ```no_run - /// use kalamdb_core::tables::system::SystemTablesRegistry; /// use std::sync::Arc; + /// + /// use kalamdb_core::tables::system::SystemTablesRegistry; /// # use kalamdb_store::StorageBackend; /// /// # let backend: Arc = unimplemented!(); diff --git a/backend/crates/kalamdb-system/src/services/system_columns_service.rs b/backend/crates/kalamdb-system/src/services/system_columns_service.rs index 541178782..cec5139b4 100644 --- a/backend/crates/kalamdb-system/src/services/system_columns_service.rs +++ b/backend/crates/kalamdb-system/src/services/system_columns_service.rs @@ -10,22 +10,28 @@ //! - Apply deletion filters to queries //! //! ## MVCC Architecture Changes -//! - **Removed**: `_id` (replaced by user-defined PK), `_updated` (replaced by _seq.timestamp_millis()) +//! - **Removed**: `_id` (replaced by user-defined PK), `_updated` (replaced by +//! _seq.timestamp_millis()) //! - **Added**: `_seq: SeqId` - Snowflake ID for version tracking with embedded timestamp //! - **Kept**: `_deleted: bool` - Soft delete flag //! //! ## Architecture -//! - **SnowflakeGenerator**: Generates time-ordered unique IDs (41-bit timestamp + 10-bit worker + 12-bit sequence) +//! - **SnowflakeGenerator**: Generates time-ordered unique IDs (41-bit timestamp + 10-bit worker + +//! 12-bit sequence) //! - **SeqId Wrapper**: Wraps Snowflake ID with timestamp extraction methods -//! - **Soft Deletes**: Records marked `_deleted=true` are filtered from queries unless explicitly requested +//! - **Soft Deletes**: Records marked `_deleted=true` are filtered from queries unless explicitly +//! requested -use crate::error::SystemError; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::snowflake::SnowflakeGenerator; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::schemas::{ColumnDefault, ColumnDefinition, TableDefinition}; use std::sync::Arc; +use kalamdb_commons::{ + constants::SystemColumnNames, + ids::{snowflake::SnowflakeGenerator, SeqId}, + models::schemas::{ColumnDefault, ColumnDefinition, TableDefinition}, +}; + +use crate::error::SystemError; + /// System Columns Service /// /// **MVCC Architecture**: Manages system columns `_seq` and `_deleted`. @@ -251,8 +257,10 @@ mod tests { #[test] fn test_add_system_columns() { - use kalamdb_commons::models::schemas::{TableOptions, TableType}; - use kalamdb_commons::{NamespaceId, TableName}; + use kalamdb_commons::{ + models::schemas::{TableOptions, TableType}, + NamespaceId, TableName, + }; let svc = SystemColumnsService::new(1); let mut table_def = TableDefinition::new( diff --git a/backend/crates/kalamdb-system/src/system_row_mapper.rs b/backend/crates/kalamdb-system/src/system_row_mapper.rs index a7d56429e..a4e156c78 100644 --- a/backend/crates/kalamdb-system/src/system_row_mapper.rs +++ b/backend/crates/kalamdb-system/src/system_row_mapper.rs @@ -1,8 +1,9 @@ -use kalamdb_commons::conversions::{row_to_serde_model, serde_model_to_row}; -use kalamdb_commons::models::rows::SystemTableRow; -use kalamdb_commons::schemas::TableDefinition; -use serde::de::DeserializeOwned; -use serde::Serialize; +use kalamdb_commons::{ + conversions::{row_to_serde_model, serde_model_to_row}, + models::rows::SystemTableRow, + schemas::TableDefinition, +}; +use serde::{de::DeserializeOwned, Serialize}; use crate::error::SystemError; @@ -23,9 +24,9 @@ pub fn system_row_to_model( #[cfg(test)] mod tests { - use kalamdb_commons::models::rows::SystemTableRow; - use kalamdb_commons::schemas::TableDefinition; - use kalamdb_commons::{NamespaceId, TableName}; + use kalamdb_commons::{ + models::rows::SystemTableRow, schemas::TableDefinition, NamespaceId, TableName, + }; use serde::{Deserialize, Serialize}; use super::{model_to_system_row, system_row_to_model}; diff --git a/backend/crates/kalamdb-system/tests/filter_pushdown_contract.rs b/backend/crates/kalamdb-system/tests/filter_pushdown_contract.rs index cbc5fcb9a..f45fba158 100644 --- a/backend/crates/kalamdb-system/tests/filter_pushdown_contract.rs +++ b/backend/crates/kalamdb-system/tests/filter_pushdown_contract.rs @@ -1,9 +1,10 @@ use std::sync::Arc; -use datafusion::datasource::TableProvider; -use datafusion::logical_expr::{col, lit, Expr, TableProviderFilterPushDown}; -use kalamdb_store::test_utils::InMemoryBackend; -use kalamdb_store::StorageBackend; +use datafusion::{ + datasource::TableProvider, + logical_expr::{col, lit, Expr, TableProviderFilterPushDown}, +}; +use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend}; use kalamdb_system::UsersTableProvider; #[test] @@ -14,9 +15,7 @@ fn users_provider_reports_exact_filter_pushdown_for_representative_predicates() let filters = vec![ col("user_id").eq(lit("filter-user")), col("role").eq(lit("user")), - col("created_at") - .eq(lit(1_i64)) - .and(col("updated_at").eq(lit(1_i64))), + col("created_at").eq(lit(1_i64)).and(col("updated_at").eq(lit(1_i64))), ]; let filter_refs: Vec<&Expr> = filters.iter().collect(); @@ -32,4 +31,4 @@ fn users_provider_reports_exact_filter_pushdown_for_representative_predicates() TableProviderFilterPushDown::Exact, ] ); -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-system/tests/system_provider_exec_models.rs b/backend/crates/kalamdb-system/tests/system_provider_exec_models.rs index f915ef315..065fa20b9 100644 --- a/backend/crates/kalamdb-system/tests/system_provider_exec_models.rs +++ b/backend/crates/kalamdb-system/tests/system_provider_exec_models.rs @@ -1,14 +1,14 @@ use std::sync::Arc; -use datafusion::datasource::TableProvider; -use datafusion::execution::context::SessionContext; -use datafusion::physical_plan::collect; +use datafusion::{ + datasource::TableProvider, execution::context::SessionContext, physical_plan::collect, +}; use kalamdb_commons::{StorageId, UserId}; use kalamdb_datafusion_sources::exec::DeferredBatchExec; -use kalamdb_store::test_utils::InMemoryBackend; -use kalamdb_store::StorageBackend; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::{AuthType, Role, User, UsersTableProvider}; +use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend}; +use kalamdb_system::{ + providers::storages::models::StorageMode, AuthType, Role, User, UsersTableProvider, +}; fn total_rows(batches: &[datafusion::arrow::record_batch::RecordBatch]) -> usize { batches.iter().map(|batch| batch.num_rows()).sum() @@ -41,13 +41,10 @@ async fn users_provider_scan_uses_deferred_batch_exec_and_returns_rows() { let ctx = SessionContext::new(); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build users plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build users plan"); assert!(plan.as_any().is::()); let batches = collect(plan, state.task_ctx()).await.expect("collect users plan"); assert_eq!(total_rows(&batches), 1); -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-tables/src/common.rs b/backend/crates/kalamdb-tables/src/common.rs index 7268a8573..7d75b894f 100644 --- a/backend/crates/kalamdb-tables/src/common.rs +++ b/backend/crates/kalamdb-tables/src/common.rs @@ -1,9 +1,8 @@ -use kalamdb_commons::KSerializable; -use kalamdb_commons::StorageKey; -use kalamdb_commons::TableId; -use kalamdb_store::{IndexDefinition, IndexedEntityStore, Partition, StorageBackend}; use std::sync::Arc; +use kalamdb_commons::{KSerializable, StorageKey, TableId}; +use kalamdb_store::{IndexDefinition, IndexedEntityStore, Partition, StorageBackend}; + /// Build the canonical RocksDB partition name for a table scope. /// /// Format: `{prefix}{namespace}:{table}` (e.g., "user_default:messages") diff --git a/backend/crates/kalamdb-tables/src/lib.rs b/backend/crates/kalamdb-tables/src/lib.rs index 199dc5551..7b28943c0 100644 --- a/backend/crates/kalamdb-tables/src/lib.rs +++ b/backend/crates/kalamdb-tables/src/lib.rs @@ -50,9 +50,7 @@ pub mod user_tables; pub mod utils; // Re-export commonly used types -pub use error::KalamDbError; -pub use error::{Result, TableError}; - +pub use error::{KalamDbError, Result, TableError}; // Re-export table stores pub use kalamdb_commons::models::StreamTableRow; pub use kalamdb_commons::models::UserTableRow; @@ -61,21 +59,26 @@ pub use kalamdb_vector::{ normalize_vector_column_name, SharedVectorHotOpId, SharedVectorHotStore, UserVectorHotOpId, UserVectorHotStore, VectorHotOp, VectorHotOpType, }; -pub use shared_tables::pk_index::{create_shared_table_pk_index, SharedTablePkIndex}; -pub use shared_tables::shared_table_store::{ - new_indexed_shared_table_store, new_shared_table_store, SharedTableIndexedStore, - SharedTableRow, SharedTableStore, +pub use shared_tables::{ + pk_index::{create_shared_table_pk_index, SharedTablePkIndex}, + shared_table_store::{ + new_indexed_shared_table_store, new_shared_table_store, SharedTableIndexedStore, + SharedTableRow, SharedTableStore, + }, }; pub use stream_tables::stream_table_store::{ new_stream_table_store, StreamTableStorageMode, StreamTableStore, StreamTableStoreConfig, }; -pub use topics::topic_message_models::{TopicMessage, TopicMessageId}; -pub use topics::topic_message_store::TopicMessageStore; -pub use user_tables::pk_index::{create_user_table_pk_index, UserTablePkIndex}; -pub use user_tables::user_table_store::{ - new_indexed_user_table_store, new_user_table_store, UserTableIndexedStore, UserTableStore, +pub use topics::{ + topic_message_models::{TopicMessage, TopicMessageId}, + topic_message_store::TopicMessageStore, +}; +pub use user_tables::{ + pk_index::{create_user_table_pk_index, UserTablePkIndex}, + user_table_store::{ + new_indexed_user_table_store, new_user_table_store, UserTableIndexedStore, UserTableStore, + }, }; - // Re-export providers for core integration pub use utils::{ BaseTableProvider, KalamTableProvider, SharedTableProvider, StreamTableProvider, diff --git a/backend/crates/kalamdb-tables/src/manifest/manifest_helpers.rs b/backend/crates/kalamdb-tables/src/manifest/manifest_helpers.rs index 021427eed..06ba826da 100644 --- a/backend/crates/kalamdb-tables/src/manifest/manifest_helpers.rs +++ b/backend/crates/kalamdb-tables/src/manifest/manifest_helpers.rs @@ -1,14 +1,22 @@ -use crate::error::KalamDbError; -use crate::utils::core::TableProviderCore; -use crate::utils::parquet::scan_parquet_files_as_batch_async; -use crate::utils::version_resolution::{parquet_batch_to_rows, ParquetRowData}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use datafusion::prelude::{col, lit}; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::schemas::TableType; -use kalamdb_commons::models::UserId; +use datafusion::{ + arrow::datatypes::SchemaRef, + logical_expr::Expr, + prelude::{col, lit}, +}; +use kalamdb_commons::{ + constants::SystemColumnNames, + ids::SeqId, + models::{schemas::TableType, UserId}, +}; + +use crate::{ + error::KalamDbError, + utils::{ + core::TableProviderCore, + parquet::scan_parquet_files_as_batch_async, + version_resolution::{parquet_batch_to_rows, ParquetRowData}, + }, +}; /// Ensure manifest.json exists (and is cached) for the current scope before hot writes. pub fn ensure_manifest_ready( diff --git a/backend/crates/kalamdb-tables/src/manifest/planner.rs b/backend/crates/kalamdb-tables/src/manifest/planner.rs index 6c5dd044b..1d8e40fe9 100644 --- a/backend/crates/kalamdb-tables/src/manifest/planner.rs +++ b/backend/crates/kalamdb-tables/src/manifest/planner.rs @@ -3,22 +3,16 @@ //! Provides utilities to translate `Manifest` metadata into //! concrete file/row-group selections for efficient reads. -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use datafusion::arrow::compute::cast; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use futures_util::future::try_join_all; -use futures_util::TryStreamExt; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::UserId; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::TableId; -use kalamdb_filestore::StorageCached; -use kalamdb_system::Manifest; -use kalamdb_system::SchemaRegistry as SchemaRegistryTrait; use std::sync::Arc; +use datafusion::arrow::{compute::cast, datatypes::SchemaRef, record_batch::RecordBatch}; +use futures_util::{future::try_join_all, TryStreamExt}; +use kalamdb_commons::{ids::SeqId, models::UserId, schemas::TableType, TableId}; +use kalamdb_filestore::StorageCached; +use kalamdb_system::{Manifest, SchemaRegistry as SchemaRegistryTrait}; + +use crate::{error::KalamDbError, error_extensions::KalamDbResultExt}; + /// Planned selection for a single Parquet file #[derive(Debug, Clone, PartialEq, Eq)] pub struct RowGroupSelection { @@ -273,9 +267,9 @@ impl ManifestAccessPlanner { fn project_batch_to_current_schema( &self, batch: RecordBatch, - old_schema_version: u32, + _old_schema_version: u32, current_schema: &SchemaRef, - table_id: &TableId, + _table_id: &TableId, _schema_registry: &dyn SchemaRegistryTrait, ) -> Result { let batch_schema = batch.schema(); @@ -291,7 +285,8 @@ impl ManifestAccessPlanner { // table_id // ); - // Build projection: for each field in current_schema, find it in old_schema or create NULL array + // Build projection: for each field in current_schema, find it in old_schema or create NULL + // array let mut projected_columns: Vec> = Vec::new(); for current_field in current_schema.fields() { @@ -415,10 +410,11 @@ impl ManifestAccessPlanner { mod tests { use std::collections::HashMap; - use super::*; use kalamdb_commons::models::rows::StoredScalarValue; use kalamdb_system::{ColumnStats, SegmentMetadata}; + use super::*; + fn numeric_stats(min: i64, max: i64) -> ColumnStats { ColumnStats::new( Some(StoredScalarValue::Int64(Some(min.to_string()))), diff --git a/backend/crates/kalamdb-tables/src/shared_tables/mod.rs b/backend/crates/kalamdb-tables/src/shared_tables/mod.rs index db5443552..e06dbba3a 100644 --- a/backend/crates/kalamdb-tables/src/shared_tables/mod.rs +++ b/backend/crates/kalamdb-tables/src/shared_tables/mod.rs @@ -13,12 +13,11 @@ pub mod pk_index; pub mod shared_table_provider; pub mod shared_table_store; +// Re-export SharedTableRowId from commons for convenience +pub use kalamdb_commons::ids::SharedTableRowId; pub use pk_index::{create_shared_table_pk_index, SharedTablePkIndex}; pub use shared_table_provider::SharedTableProvider; pub use shared_table_store::{ new_indexed_shared_table_store, new_shared_table_store, SharedTableIndexedStore, SharedTableRow, SharedTableStore, }; - -// Re-export SharedTableRowId from commons for convenience -pub use kalamdb_commons::ids::SharedTableRowId; diff --git a/backend/crates/kalamdb-tables/src/shared_tables/pk_index.rs b/backend/crates/kalamdb-tables/src/shared_tables/pk_index.rs index 328b95190..9d9333278 100644 --- a/backend/crates/kalamdb-tables/src/shared_tables/pk_index.rs +++ b/backend/crates/kalamdb-tables/src/shared_tables/pk_index.rs @@ -18,10 +18,12 @@ //! 3. Results are ordered by seq (storekey preserves numeric ordering) use datafusion::scalar::ScalarValue; -use kalamdb_commons::conversions::scalar_value_to_bytes; -use kalamdb_commons::ids::SharedTableRowId; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::storage_key::{encode_key, encode_prefix}; +use kalamdb_commons::{ + conversions::scalar_value_to_bytes, + ids::SharedTableRowId, + storage::Partition, + storage_key::{encode_key, encode_prefix}, +}; use kalamdb_store::IndexDefinition; use super::SharedTableRow; @@ -55,7 +57,6 @@ impl SharedTablePkIndex { } /// Build a prefix for scanning all versions of a PK. - /// pub fn build_prefix_for_pk(&self, pk_value: &ScalarValue) -> Vec { let pk_bytes = scalar_value_to_bytes(pk_value); encode_prefix(&(pk_bytes,)) @@ -95,8 +96,7 @@ impl IndexDefinition for SharedTablePkIndex { } fn filter_to_prefix(&self, filter: &datafusion::logical_expr::Expr) -> Option> { - use kalamdb_store::extract_i64_equality; - use kalamdb_store::extract_string_equality; + use kalamdb_store::{extract_i64_equality, extract_string_equality}; // Try to extract equality filter on PK column if let Some((col, val)) = extract_string_equality(filter) { @@ -133,12 +133,13 @@ pub fn create_shared_table_pk_index( #[cfg(test)] mod tests { - use super::*; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::rows::Row; use std::collections::BTreeMap; + use datafusion::scalar::ScalarValue; + use kalamdb_commons::{ids::SeqId, models::rows::Row}; + + use super::*; + fn create_test_row(seq: i64, id_value: i64) -> (SharedTableRowId, SharedTableRow) { let mut values = BTreeMap::new(); values.insert("id".to_string(), ScalarValue::Int64(Some(id_value))); diff --git a/backend/crates/kalamdb-tables/src/shared_tables/shared_table_provider.rs b/backend/crates/kalamdb-tables/src/shared_tables/shared_table_provider.rs index f89f1418d..d16ebc529 100644 --- a/backend/crates/kalamdb-tables/src/shared_tables/shared_table_provider.rs +++ b/backend/crates/kalamdb-tables/src/shared_tables/shared_table_provider.rs @@ -1,7 +1,7 @@ //! Shared table provider implementation without RLS //! -//! This module provides SharedTableProvider implementing BaseTableProvider -//! for cross-user shared tables (no Row-Level Security). +//! This module provides SharedTableProvider implementing BaseTableProvider for cross-user shared tables (no Row-Level Security). //! //! **Key Features**: //! - Direct fields (no wrapper layer) @@ -11,35 +11,29 @@ //! - SessionState NOT extracted in scan_rows() (scans all rows) //! - PK Index: Uses SharedTableIndexedStore for efficient O(1) lookups by PK value -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::manifest::manifest_helpers::{ensure_manifest_ready, load_row_from_parquet_by_seq}; -use crate::shared_tables::{SharedTableIndexedStore, SharedTablePkIndex, SharedTableRow}; -use crate::utils::base::{ - self, BaseTableProvider, DeferredMvccScanProvider, TableProviderCore, +use std::{ + any::Any, + collections::{HashMap, HashSet}, + sync::Arc, }; -use crate::utils::row_utils::extract_full_user_context; -use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; -use datafusion::datasource::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::dml::InsertOp; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_plan::ExecutionPlan; -use datafusion::scalar::ScalarValue; - -use kalamdb_commons::conversions::arrow_json_conversion::{coerce_rows, coerce_updates}; -use kalamdb_commons::ids::SharedTableRowId; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::OperationKind; -use kalamdb_commons::models::UserId; -use kalamdb_commons::websocket::ChangeNotification; -use kalamdb_commons::NotLeaderError; -use kalamdb_commons::TableType; +use async_trait::async_trait; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + catalog::Session, + datasource::TableProvider, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown}, + physical_plan::ExecutionPlan, + scalar::ScalarValue, +}; +use kalamdb_commons::{ + conversions::arrow_json_conversion::{coerce_rows, coerce_updates}, + ids::SharedTableRowId, + models::{datatypes::KalamDataType, rows::Row, OperationKind, UserId}, + websocket::ChangeNotification, + NotLeaderError, TableType, +}; use kalamdb_datafusion_sources::provider::{ merged_projection_scan_descriptor, mvcc_filter_capability, FilterCapability, ScanDescriptor, SourceProvider, @@ -52,17 +46,26 @@ use kalamdb_transactions::{extract_transaction_query_context, StagedMutation}; use kalamdb_vector::{ new_indexed_shared_vector_hot_store, SharedVectorHotOpId, SharedVectorHotStore, }; -use std::any::Any; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; use tracing::Instrument; +use crate::{ + error::KalamDbError, + error_extensions::KalamDbResultExt, + manifest::manifest_helpers::{ensure_manifest_ready, load_row_from_parquet_by_seq}, + shared_tables::{SharedTableIndexedStore, SharedTablePkIndex, SharedTableRow}, + utils::{ + base::{self, BaseTableProvider, DeferredMvccScanProvider, TableProviderCore}, + row_utils::extract_full_user_context, + }, +}; + /// Shared table provider without RLS /// /// **Architecture**: /// - Stateless provider (user context passed but ignored) /// - Direct fields (no wrapper layer) -/// - Shared core via Arc (holds schema, pk_name, column_defaults, non_null_columns, table_def) +/// - Shared core via Arc (holds schema, pk_name, column_defaults, +/// non_null_columns, table_def) /// - NO RLS - user_id parameter ignored in all operations /// - Uses SharedTableIndexedStore for efficient PK lookups #[derive(Clone)] @@ -180,7 +183,8 @@ impl SharedTableProvider { })? } - /// Build a complete Row for live query/topic notifications including system columns (_seq, _deleted) + /// Build a complete Row for live query/topic notifications including system columns (_seq, + /// _deleted) /// /// This ensures notifications include all columns, not just user-defined fields. fn build_notification_row(entity: &SharedTableRow) -> Row { @@ -298,8 +302,8 @@ impl SharedTableProvider { /// Scan Parquet files from cold storage for shared table /// - /// Lists all *.parquet files in the table's storage directory and merges them into a single RecordBatch. - /// Returns an empty batch if no Parquet files exist. + /// Lists all *.parquet files in the table's storage directory and merges them into a single + /// RecordBatch. Returns an empty batch if no Parquet files exist. /// /// **Difference from user tables**: Shared tables have NO user_id partitioning, /// so all Parquet files are in the same directory (no subdirectories per user). @@ -308,8 +312,8 @@ impl SharedTableProvider { /// Logs cache hits/misses and updates last_accessed timestamp. Full query optimization /// (batch file pruning based on manifest metadata) implemented in Phase 5 (US2, T119-T123). /// - /// **Manifest-Driven Pruning**: Uses ManifestAccessPlanner to select files based on filter predicates, - /// enabling row-group level pruning when row_group metadata is available. + /// **Manifest-Driven Pruning**: Uses ManifestAccessPlanner to select files based on filter + /// predicates, enabling row-group level pruning when row_group metadata is available. async fn scan_parquet_files_as_batch_async( &self, filter: Option<&Expr>, @@ -515,14 +519,17 @@ fn finalize_primary_key_group( #[cfg(test)] mod tests { + use std::collections::BTreeMap; + + use datafusion::scalar::ScalarValue; + use kalamdb_commons::{ + ids::SeqId, + models::{NamespaceId, TableId, TableName}, + }; + use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend}; + use super::*; use crate::shared_tables::{new_indexed_shared_table_store, SharedTableRow}; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::{NamespaceId, TableId, TableName}; - use kalamdb_store::test_utils::InMemoryBackend; - use kalamdb_store::StorageBackend; - use std::collections::BTreeMap; fn create_store(pk_field_name: &str) -> Arc { let backend: Arc = Arc::new(InMemoryBackend::new()); @@ -669,8 +676,7 @@ impl DeferredMvccScanProvider for SharedTableP &self, scan_context: &Self::ScanContext, ) -> Result { - self.count_resolved_rows_async(scan_context.snapshot_commit_seq) - .await + self.count_resolved_rows_async(scan_context.snapshot_commit_seq).await } async fn scan_kvs_with_context( @@ -1151,7 +1157,8 @@ impl BaseTableProvider for SharedTableProvider .map_err(KalamDbError::InvalidOperation)?; // Find latest resolved row for this PK - // First try hot storage (O(1) via PK index), then fall back to cold storage (Parquet scan) + // First try hot storage (O(1) via PK index), then fall back to cold storage (Parquet + // scan) let latest_row = if let Some((_key, row)) = self.find_by_pk(&pk_value_scalar).await? { row } else if self.pk_tombstoned_in_hot(&pk_value_scalar).await? { @@ -1254,8 +1261,7 @@ impl BaseTableProvider for SharedTableProvider limit: Option, ) -> Result { let scan_context = self.build_scan_context(state)?; - self.scan_rows_with_context(&scan_context, projection, filter, limit) - .await + self.scan_rows_with_context(&scan_context, projection, filter, limit).await } async fn scan_with_version_resolution_to_kvs_async( @@ -1311,10 +1317,7 @@ impl BaseTableProvider for SharedTableProvider ) .await?; - log::trace!( - "[SharedProvider] RocksDB scan returned {} rows", - resolved.hot_rows_scanned - ); + log::trace!("[SharedProvider] RocksDB scan returned {} rows", resolved.hot_rows_scanned); log::trace!( "[SharedProvider] Cold scan returned {} Parquet rows", resolved.cold_rows_scanned @@ -1376,7 +1379,8 @@ impl SharedTableProvider { // Cold storage: project only the PK + MVCC metadata needed for counting. let cold_columns = base::compute_metadata_only_cold_columns(&pk_name); - let cold_future = self.scan_parquet_files_as_batch_async(None, Some(cold_columns.as_slice())); + let cold_future = + self.scan_parquet_files_as_batch_async(None, Some(cold_columns.as_slice())); let hot_future = async { hot_future.await.map_err(|e| { @@ -1512,7 +1516,8 @@ impl SharedTableProvider { crate::utils::unified_dml::extract_user_pk_value(row_data, pk_name)?; if !seen_batch_pks.insert(pk_str.clone()) { return Err(KalamDbError::AlreadyExists(format!( - "Primary key violation: value '{}' appears multiple times in the insert batch for column '{}'", + "Primary key violation: value '{}' appears multiple times in the \ + insert batch for column '{}'", pk_str, pk_name ))); } @@ -2257,6 +2262,13 @@ impl TableProvider for SharedTableProvider { &row, &assignments, )?; + if crate::utils::datafusion_dml::update_assignments_noop( + &schema, + &row, + &evaluated_updates, + )? { + continue; + } if let Some(staged_mutations) = staged_mutations.as_mut() { staged_mutations.push(StagedMutation::new( diff --git a/backend/crates/kalamdb-tables/src/shared_tables/shared_table_store.rs b/backend/crates/kalamdb-tables/src/shared_tables/shared_table_store.rs index de8937f8c..8c2b5ad42 100644 --- a/backend/crates/kalamdb-tables/src/shared_tables/shared_table_store.rs +++ b/backend/crates/kalamdb-tables/src/shared_tables/shared_table_store.rs @@ -15,22 +15,27 @@ //! - Enables O(1) row lookup by PK value instead of full scan //! - Used by UPDATE/DELETE to find target rows -use crate::common::{ensure_partition, new_indexed_store_with_pk, partition_name}; -use kalamdb_commons::ids::{SeqId, SharedTableRowId}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::{KSerializable, TableId}; +use std::sync::Arc; + +use kalamdb_commons::{ + ids::{SeqId, SharedTableRowId}, + models::rows::Row, + storage::Partition, + KSerializable, TableId, +}; use kalamdb_store::{EntityStore, IndexedEntityStore, StorageBackend}; use serde::{Deserialize, Serialize}; -use std::sync::Arc; use super::pk_index::create_shared_table_pk_index; +use crate::common::{ensure_partition, new_indexed_store_with_pk, partition_name}; /// Shared table row data /// /// **MVCC Architecture (Phase 12, User Story 5)**: -/// - Removed: row_id (redundant with _seq), _updated (timestamp embedded in _seq Snowflake ID), access_level (moved to schema definition) -/// - Kept: _seq (version identifier with embedded timestamp), `_commit_seq` (commit-order visibility), _deleted (tombstone), fields (all shared table columns including PK) +/// - Removed: row_id (redundant with _seq), _updated (timestamp embedded in _seq Snowflake ID), +/// access_level (moved to schema definition) +/// - Kept: _seq (version identifier with embedded timestamp), `_commit_seq` (commit-order +/// visibility), _deleted (tombstone), fields (all shared table columns including PK) /// /// **Note on System Column Naming**: /// The underscore prefix (`_seq`, `_deleted`) follows SQL convention for system-managed columns. @@ -166,12 +171,16 @@ pub fn new_indexed_shared_table_store( #[cfg(test)] mod tests { + use std::collections::BTreeMap; + + use datafusion::scalar::ScalarValue; + use kalamdb_commons::{ + models::{NamespaceId, TableId, TableName}, + StorageKey, + }; + use super::*; use crate::utils::test_backend::RecordingBackend; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::models::{NamespaceId, TableId, TableName}; - use kalamdb_commons::StorageKey; - use std::collections::BTreeMap; fn create_test_store() -> SharedTableStore { let backend: Arc = Arc::new(RecordingBackend::new()); diff --git a/backend/crates/kalamdb-tables/src/stream_tables/mod.rs b/backend/crates/kalamdb-tables/src/stream_tables/mod.rs index c400e1192..1e878fd7c 100644 --- a/backend/crates/kalamdb-tables/src/stream_tables/mod.rs +++ b/backend/crates/kalamdb-tables/src/stream_tables/mod.rs @@ -8,11 +8,9 @@ pub mod stream_table_provider; pub mod stream_table_store; -pub use kalamdb_commons::models::StreamTableRow; +// Re-export StreamTableRowId from kalamdb_commons for convenience +pub use kalamdb_commons::{ids::StreamTableRowId, models::StreamTableRow}; pub use stream_table_provider::StreamTableProvider; pub use stream_table_store::{ new_stream_table_store, StreamTableStorageMode, StreamTableStore, StreamTableStoreConfig, }; - -// Re-export StreamTableRowId from kalamdb_commons for convenience -pub use kalamdb_commons::ids::StreamTableRowId; diff --git a/backend/crates/kalamdb-tables/src/stream_tables/stream_table_provider.rs b/backend/crates/kalamdb-tables/src/stream_tables/stream_table_provider.rs index a422621ff..718ea8582 100644 --- a/backend/crates/kalamdb-tables/src/stream_tables/stream_table_provider.rs +++ b/backend/crates/kalamdb-tables/src/stream_tables/stream_table_provider.rs @@ -1,7 +1,7 @@ //! Stream table provider implementation with RLS + TTL //! -//! This module provides StreamTableProvider implementing BaseTableProvider -//! for ephemeral event streams with Row-Level Security and TTL-based eviction. +//! This module provides StreamTableProvider implementing BaseTableProvider for ephemeral event streams with Row-Level Security and TTL-based eviction. //! //! **Key Features**: //! - Direct fields (no wrapper layer) @@ -11,49 +11,58 @@ //! - Commit log-backed storage (append-only, no Parquet) //! - TTL-based eviction in scan operations -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::stream_tables::{StreamTableRow, StreamTableStore}; -use crate::utils::base::{extract_seq_bounds_from_filter, BaseTableProvider, TableProviderCore}; -use crate::utils::row_utils::extract_user_context; -use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; -use datafusion::common::DFSchema; -use datafusion::datasource::TableProvider; -use datafusion::error::DataFusionError; -use datafusion::error::Result as DataFusionResult; -use datafusion::logical_expr::dml::InsertOp; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::ExecutionPlan; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::ids::{SeqId, StreamTableRowId}; -use kalamdb_commons::models::UserId; -use kalamdb_session_datafusion::{check_user_table_write_access, session_error_to_datafusion}; -use kalamdb_datafusion_sources::exec::{ - finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource, -}; -use kalamdb_datafusion_sources::provider::{ - combined_filter, merged_projection_scan_descriptor, pushdown_results_for_filters, - remap_projection_indices, FilterCapability, ScanDescriptor, SourceProvider, +use std::{ + any::Any, + collections::HashSet, + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, }; -use std::any::Any; -use std::collections::HashSet; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; +use async_trait::async_trait; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + catalog::Session, + common::DFSchema, + datasource::TableProvider, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown}, + physical_expr::PhysicalExpr, + physical_plan::ExecutionPlan, + scalar::ScalarValue, +}; // Arrow <-> JSON helpers use kalamdb_commons::models::rows::Row; -use kalamdb_commons::websocket::ChangeNotification; +use kalamdb_commons::{ + ids::{SeqId, StreamTableRowId}, + models::UserId, + websocket::ChangeNotification, +}; +use kalamdb_datafusion_sources::{ + exec::{finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource}, + provider::{ + combined_filter, merged_projection_scan_descriptor, pushdown_results_for_filters, + remap_projection_indices, FilterCapability, ScanDescriptor, SourceProvider, + }, +}; +use kalamdb_session_datafusion::{check_user_table_write_access, session_error_to_datafusion}; + +use crate::{ + error::KalamDbError, + error_extensions::KalamDbResultExt, + stream_tables::{StreamTableRow, StreamTableStore}, + utils::{ + base::{extract_seq_bounds_from_filter, BaseTableProvider, TableProviderCore}, + row_utils::extract_user_context, + }, +}; /// Stream table provider with RLS and TTL filtering /// /// **Architecture**: /// - Stateless provider (user context passed per-operation) /// - Direct fields (no wrapper layer) -/// - Shared core via Arc (holds schema, pk_name, column_defaults, non_null_columns) +/// - Shared core via Arc (holds schema, pk_name, column_defaults, +/// non_null_columns) /// - RLS enforced via user_id parameter /// - HOT-ONLY storage (ephemeral data, no Parquet) /// - TTL-based eviction @@ -251,7 +260,7 @@ impl BaseTableProvider for StreamTableProvider user_id: &UserId, row_data: Row, ) -> Result { - let table_id = self.core.table_id(); + let _table_id = self.core.table_id(); // Call SystemColumnsService to generate SeqId let sys_cols = self.core.services.system_columns.clone(); @@ -289,7 +298,7 @@ impl BaseTableProvider for StreamTableProvider let has_topics = self.core.has_topic_routes(&table_id); let has_live_subs = manager.has_subscribers(Some(&user_id), &table_id); if has_topics || has_live_subs { - let table_name = table_id.full_name(); + let _table_name = table_id.full_name(); // Build complete row including system column (_seq) let row = Self::build_notification_row(&entity); @@ -482,7 +491,7 @@ impl BaseTableProvider for StreamTableProvider _cold_columns: Option<&[String]>, _snapshot_commit_seq: Option, ) -> Result, KalamDbError> { - let table_id = self.core.table_id(); + let _table_id = self.core.table_id(); // since_seq is exclusive, so start at seq + 1 let start_seq = since_seq.map(|seq| SeqId::from_i64(seq.as_i64().saturating_add(1))); @@ -589,7 +598,9 @@ impl TableProvider for StreamTableProvider { }; let output_projection = if !filters.is_empty() { - projection.map(|indices| remap_projection_indices(&descriptor.schema, &merged_schema, indices)) + projection.map(|indices| { + remap_projection_indices(&descriptor.schema, &merged_schema, indices) + }) } else { None }; diff --git a/backend/crates/kalamdb-tables/src/stream_tables/stream_table_store.rs b/backend/crates/kalamdb-tables/src/stream_tables/stream_table_store.rs index 6529712da..41779867b 100644 --- a/backend/crates/kalamdb-tables/src/stream_tables/stream_table_store.rs +++ b/backend/crates/kalamdb-tables/src/stream_tables/stream_table_store.rs @@ -1,26 +1,28 @@ -//! Stream table store implementation backed by in-memory storage. +//! Stream table store implementation backed by append-only stream log storage. //! -//! Stream tables now use in-memory BTreeMap storage for fast ephemeral access. -//! In the future, we will support specifying persistence mode when creating a stream table. +//! Stream tables use file-backed logs in production and an in-memory backend for tests or +//! explicitly ephemeral tables. //! //! **MVCC Architecture (Phase 13.2)**: //! - StreamTableRowId: Composite struct with user_id and _seq fields //! - StreamTableRow: Minimal structure with user_id, _seq, fields (JSON) //! - Ordering: (user_id, _seq) for deterministic scans -use crate::common::partition_name; -use kalamdb_commons::ids::{SeqId, StreamTableRowId}; -use kalamdb_commons::models::{StreamTableRow, UserId}; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::TableId; +use std::{collections::HashMap, path::PathBuf, sync::Arc}; + +use kalamdb_commons::{ + ids::{SeqId, StreamTableRowId}, + models::{StreamTableRow, UserId}, + storage::Partition, + TableId, +}; use kalamdb_sharding::ShardRouter; use kalamdb_store::storage_trait::{Result, StorageError}; use kalamdb_streams::{ bucket_for_ttl, FileStreamLogStore, MemoryStreamLogStore, StreamLogConfig, StreamLogStore, }; -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::Arc; + +use crate::common::partition_name; const MAX_SCAN_LIMIT: usize = 100000; @@ -60,18 +62,19 @@ enum StreamLogStoreBackend { } impl StreamLogStoreBackend { - fn append_rows( + fn append_row( &self, table_id: &TableId, user_id: &UserId, - rows: HashMap, + key: &StreamTableRowId, + row: &StreamTableRow, ) -> Result<()> { match self { Self::Memory(store) => { - store.append_rows(table_id, user_id, rows).map_err(map_stream_error) + store.append_row(table_id, user_id, key, row).map_err(map_stream_error) }, Self::File(store) => { - store.append_rows(table_id, user_id, rows).map_err(map_stream_error) + store.append_row(table_id, user_id, key, row).map_err(map_stream_error) }, } } @@ -154,7 +157,7 @@ fn map_stream_error(error: kalamdb_streams::StreamLogError) -> StorageError { StorageError::IoError(error.to_string()) } -/// Store for stream tables (in-memory BTreeMap storage for fast ephemeral access). +/// Store for stream tables. #[derive(Clone)] pub struct StreamTableStore { table_id: TableId, @@ -163,7 +166,7 @@ pub struct StreamTableStore { } impl StreamTableStore { - /// Create a new stream table store (in-memory backed) + /// Create a new stream table store. pub fn new( table_id: TableId, partition: impl Into, @@ -200,9 +203,7 @@ impl StreamTableStore { /// Append a row. pub fn put(&self, key: &StreamTableRowId, entity: &StreamTableRow) -> Result<()> { - let mut rows = HashMap::new(); - rows.insert(key.clone(), entity.clone()); - self.log_store.append_rows(&self.table_id, key.user_id(), rows) + self.log_store.append_row(&self.table_id, key.user_id(), key, entity) } /// Flush all buffered segment writers to the OS page cache. @@ -337,13 +338,21 @@ impl StreamTableStore { return Ok(Vec::new()); } - let start_time = start_seq.map(|seq| seq.timestamp_millis()).unwrap_or(0); + let mut start_time = start_seq.map(|seq| seq.timestamp_millis()).unwrap_or(0); + if let Some(ttl) = ttl_ms { + start_time = start_time.max(now_ms.saturating_sub(ttl).saturating_add(1)); + } + let read_limit = if start_seq.is_some() { + MAX_SCAN_LIMIT + } else { + limit + }; let rows = self.log_store.read_in_time_range( &self.table_id, user_id, start_time, u64::MAX, - MAX_SCAN_LIMIT, + read_limit, )?; // Filter by start_seq, apply TTL, and collect with early termination @@ -399,7 +408,6 @@ impl StreamTableStore { .await .map_err(|e| StorageError::Other(format!("spawn_blocking join error: {}", e)))? } - } /// Helper function to create a new stream table store (in-memory backed). @@ -423,11 +431,13 @@ pub fn new_stream_table_store( #[cfg(test)] mod tests { - use super::*; + use std::collections::BTreeMap; + use datafusion::scalar::ScalarValue; use kalamdb_commons::models::{rows::Row, NamespaceId, TableName}; use kalamdb_sharding::ShardRouter; - use std::collections::BTreeMap; + + use super::*; fn create_test_store(_base_dir: &std::path::Path) -> StreamTableStore { let table_id = TableId::new(NamespaceId::new("test_ns"), TableName::new("test_stream")); diff --git a/backend/crates/kalamdb-tables/src/topics/topic_message_models.rs b/backend/crates/kalamdb-tables/src/topics/topic_message_models.rs index 0f82fc157..764dbed98 100644 --- a/backend/crates/kalamdb-tables/src/topics/topic_message_models.rs +++ b/backend/crates/kalamdb-tables/src/topics/topic_message_models.rs @@ -4,9 +4,12 @@ //! - TopicMessageId: Composite key for message identification //! - TopicMessage: Message envelope with payload and metadata -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::models::{TopicId, TopicOp, UserId}; -use kalamdb_commons::{decode_key, encode_key, encode_prefix, KSerializable, StorageKey}; +use kalamdb_commons::{ + datatypes::KalamDataType, + decode_key, encode_key, encode_prefix, + models::{TopicId, TopicOp, UserId}, + KSerializable, StorageKey, +}; use kalamdb_macros::table; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-tables/src/topics/topic_message_schema.rs b/backend/crates/kalamdb-tables/src/topics/topic_message_schema.rs index 9bda07590..dcb1cd7bc 100644 --- a/backend/crates/kalamdb-tables/src/topics/topic_message_schema.rs +++ b/backend/crates/kalamdb-tables/src/topics/topic_message_schema.rs @@ -4,10 +4,11 @@ //! The schema is derived from `TopicMessage::definition()` so the model and //! Arrow projection stay in sync. -use datafusion::arrow::datatypes::SchemaRef; +use std::sync::OnceLock; + #[cfg(test)] use datafusion::arrow::datatypes::DataType; -use std::sync::OnceLock; +use datafusion::arrow::datatypes::SchemaRef; use super::topic_message_models::TopicMessage; diff --git a/backend/crates/kalamdb-tables/src/topics/topic_message_store.rs b/backend/crates/kalamdb-tables/src/topics/topic_message_store.rs index 8d73b62f2..a142f2cb5 100644 --- a/backend/crates/kalamdb-tables/src/topics/topic_message_store.rs +++ b/backend/crates/kalamdb-tables/src/topics/topic_message_store.rs @@ -9,12 +9,13 @@ //! - Storage key format: composite encoding for efficient filtering //! - Efficient range scans for message fetching -use crate::topics::topic_message_models::{TopicMessage, TopicMessageId}; -use kalamdb_commons::models::TopicId; -use kalamdb_commons::storage::Partition; -use kalamdb_store::{EntityStore, StorageBackend}; use std::sync::Arc; +use kalamdb_commons::{models::TopicId, storage::Partition}; +use kalamdb_store::{EntityStore, StorageBackend}; + +use crate::topics::topic_message_models::{TopicMessage, TopicMessageId}; + /// Store for topic messages (append-only message log) /// /// Uses composite TopicMessageId keys for efficient range queries. @@ -190,9 +191,10 @@ impl EntityStore for TopicMessageStore { #[cfg(test)] mod tests { + use kalamdb_commons::StorageKey; + use super::*; use crate::utils::test_backend::RecordingBackend; - use kalamdb_commons::StorageKey; fn setup_test_store() -> TopicMessageStore { let backend = Arc::new(RecordingBackend::new()); diff --git a/backend/crates/kalamdb-tables/src/user_tables/mod.rs b/backend/crates/kalamdb-tables/src/user_tables/mod.rs index 1a3582a2b..745eb920e 100644 --- a/backend/crates/kalamdb-tables/src/user_tables/mod.rs +++ b/backend/crates/kalamdb-tables/src/user_tables/mod.rs @@ -14,12 +14,10 @@ pub mod pk_index; pub mod user_table_provider; pub mod user_table_store; +// Re-export UserTableRowId and UserTableRow from commons for convenience +pub use kalamdb_commons::{ids::UserTableRowId, models::rows::UserTableRow}; pub use pk_index::{create_user_table_pk_index, UserTablePkIndex}; pub use user_table_provider::UserTableProvider; pub use user_table_store::{ new_indexed_user_table_store, new_user_table_store, UserTableIndexedStore, UserTableStore, }; - -// Re-export UserTableRowId and UserTableRow from commons for convenience -pub use kalamdb_commons::ids::UserTableRowId; -pub use kalamdb_commons::models::rows::UserTableRow; diff --git a/backend/crates/kalamdb-tables/src/user_tables/pk_index.rs b/backend/crates/kalamdb-tables/src/user_tables/pk_index.rs index b399b6ce2..ed0d1ad5f 100644 --- a/backend/crates/kalamdb-tables/src/user_tables/pk_index.rs +++ b/backend/crates/kalamdb-tables/src/user_tables/pk_index.rs @@ -19,11 +19,13 @@ //! 3. Results are ordered by seq (storekey preserves numeric ordering) use datafusion::scalar::ScalarValue; -use kalamdb_commons::conversions::scalar_value_to_bytes; -use kalamdb_commons::ids::UserTableRowId; -use kalamdb_commons::models::rows::UserTableRow; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::storage_key::{encode_key, encode_prefix}; +use kalamdb_commons::{ + conversions::scalar_value_to_bytes, + ids::UserTableRowId, + models::rows::UserTableRow, + storage::Partition, + storage_key::{encode_key, encode_prefix}, +}; use kalamdb_store::IndexDefinition; /// Index for querying user table rows by primary key value. @@ -92,8 +94,7 @@ impl IndexDefinition for UserTablePkIndex { } fn filter_to_prefix(&self, filter: &datafusion::logical_expr::Expr) -> Option> { - use kalamdb_store::extract_i64_equality; - use kalamdb_store::extract_string_equality; + use kalamdb_store::{extract_i64_equality, extract_string_equality}; // Try to extract equality filter on PK column // Note: User-scoped indexes require user_id to build a valid storekey prefix. @@ -131,13 +132,16 @@ pub fn create_user_table_pk_index( #[cfg(test)] mod tests { - use super::*; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::UserId; use std::collections::BTreeMap; + use datafusion::scalar::ScalarValue; + use kalamdb_commons::{ + ids::SeqId, + models::{rows::Row, UserId}, + }; + + use super::*; + fn create_test_row( user_id: &UserId, seq: i64, diff --git a/backend/crates/kalamdb-tables/src/user_tables/user_table_provider.rs b/backend/crates/kalamdb-tables/src/user_tables/user_table_provider.rs index b57f7bee6..ca16cb8c5 100644 --- a/backend/crates/kalamdb-tables/src/user_tables/user_table_provider.rs +++ b/backend/crates/kalamdb-tables/src/user_tables/user_table_provider.rs @@ -1,7 +1,7 @@ //! User table provider implementation with RLS //! -//! This module provides UserTableProvider implementing BaseTableProvider -//! with Row-Level Security (RLS) enforced via user_id parameter. +//! This module provides UserTableProvider implementing BaseTableProvider with Row-Level Security (RLS) enforced via user_id parameter. //! //! **Key Features**: //! - Direct fields (no UserTableShared wrapper) @@ -11,62 +11,63 @@ //! - SessionState extraction for scan_rows() //! - PK Index for efficient row lookup (Phase 14) -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::manifest::manifest_helpers::{ensure_manifest_ready, load_row_from_parquet_by_seq}; -use crate::user_tables::{UserTableIndexedStore, UserTablePkIndex, UserTableRow}; -use crate::utils::base::{ - self, BaseTableProvider, DeferredMvccScanProvider, TableProviderCore, +use std::{ + any::Any, + collections::{HashMap, HashSet}, + sync::Arc, }; -use crate::utils::row_utils::extract_user_context; -use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; -use datafusion::datasource::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::dml::InsertOp; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_plan::{ExecutionPlan, Statistics}; -use datafusion::scalar::ScalarValue; - -use kalamdb_commons::conversions::arrow_json_conversion::{coerce_rows, coerce_updates}; -use kalamdb_commons::ids::{SeqId, UserTableRowId}; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::OperationKind; -use kalamdb_commons::models::UserId; -use kalamdb_commons::StorageKey; -use kalamdb_commons::TableType; +use async_trait::async_trait; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + catalog::Session, + datasource::TableProvider, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown}, + physical_plan::{ExecutionPlan, Statistics}, + scalar::ScalarValue, +}; +use kalamdb_commons::{ + conversions::arrow_json_conversion::{coerce_rows, coerce_updates}, + ids::{SeqId, UserTableRowId}, + models::{datatypes::KalamDataType, rows::Row, OperationKind, UserId}, + websocket::ChangeNotification, + StorageKey, TableType, +}; +use kalamdb_datafusion_sources::{ + exec::{resolve_latest_kvs_from_cold_batch, VersionedRow}, + provider::{ + merged_projection_scan_descriptor, mvcc_filter_capability, FilterCapability, + ScanDescriptor, SourceProvider, + }, +}; use kalamdb_session::can_read_all_users; use kalamdb_session_datafusion::{ check_user_table_access, check_user_table_write_access, session_error_to_datafusion, }; -use kalamdb_datafusion_sources::exec::{ - resolve_latest_kvs_from_cold_batch, VersionedRow, -}; -use kalamdb_datafusion_sources::provider::{ - merged_projection_scan_descriptor, mvcc_filter_capability, FilterCapability, ScanDescriptor, - SourceProvider, -}; use kalamdb_store::EntityStore; use kalamdb_transactions::{extract_transaction_query_context, StagedMutation}; use kalamdb_vector::{new_indexed_user_vector_hot_store, UserVectorHotOpId, UserVectorHotStore}; -use std::any::Any; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; use tracing::Instrument; -use kalamdb_commons::models::rows::Row; - -use kalamdb_commons::websocket::ChangeNotification; +use crate::{ + error::KalamDbError, + error_extensions::KalamDbResultExt, + manifest::manifest_helpers::{ensure_manifest_ready, load_row_from_parquet_by_seq}, + user_tables::{UserTableIndexedStore, UserTablePkIndex, UserTableRow}, + utils::{ + base::{self, BaseTableProvider, DeferredMvccScanProvider, TableProviderCore}, + row_utils::extract_user_context, + }, +}; /// User table provider with RLS /// /// **Architecture**: /// - Stateless provider (user context passed per-operation) /// - Direct fields (no wrapper layer) -/// - Shared core via Arc (holds schema, pk_name, column_defaults, non_null_columns) +/// - Shared core via Arc (holds schema, pk_name, column_defaults, +/// non_null_columns) /// - RLS enforced via user_id parameter /// - PK Index for efficient row lookup (Phase 14) #[derive(Clone)] @@ -306,7 +307,8 @@ impl UserTableProvider { crate::utils::unified_dml::extract_user_pk_value(row_data, pk_name)?; if !seen_batch_pks.insert(pk_str.clone()) { return Err(KalamDbError::AlreadyExists(format!( - "Primary key violation: value '{}' appears multiple times in the insert batch for column '{}'", + "Primary key violation: value '{}' appears multiple times in the \ + insert batch for column '{}'", pk_str, pk_name ))); } @@ -600,8 +602,8 @@ impl UserTableProvider { /// Scan Parquet files from cold storage for a specific user (async version). /// - /// Lists all *.parquet files in the user's storage directory and merges them into a single RecordBatch. - /// Returns an empty batch if no Parquet files exist. + /// Lists all *.parquet files in the user's storage directory and merges them into a single + /// RecordBatch. Returns an empty batch if no Parquet files exist. /// /// **Phase 4 (US6, T082-T084)**: Integrated with ManifestService for manifest caching. /// Logs cache hits/misses and updates last_accessed timestamp. Full query optimization @@ -787,11 +789,8 @@ impl DeferredMvccScanProvider for UserTableProvide &self, scan_context: &Self::ScanContext, ) -> Result { - self.count_resolved_rows_async( - &scan_context.user_id, - scan_context.snapshot_commit_seq, - ) - .await + self.count_resolved_rows_async(&scan_context.user_id, scan_context.snapshot_commit_seq) + .await } async fn scan_kvs_with_context( @@ -871,8 +870,9 @@ impl BaseTableProvider for UserTableProvider { /// For hot storage (RocksDB), uses fast existence check. If not found in hot storage, /// falls back to checking cold storage using manifest-based pruning. /// - /// OPTIMIZED: Uses `pk_exists_in_hot` for fast hot-path check (single index lookup + 1 entity fetch max). - /// OPTIMIZED: Uses `pk_exists_in_cold` with manifest-based segment pruning for cold storage. + /// OPTIMIZED: Uses `pk_exists_in_hot` for fast hot-path check (single index lookup + 1 entity + /// fetch max). OPTIMIZED: Uses `pk_exists_in_cold` with manifest-based segment pruning for + /// cold storage. async fn find_row_key_by_id_field( &self, user_id: &UserId, @@ -1086,7 +1086,8 @@ impl BaseTableProvider for UserTableProvider { KalamDbError::InvalidOperation(format!("Prior row missing PK {}", pk_name)) })?; - // Validate PK update (check if new PK value already exists) — only needed when updating by key + // Validate PK update (check if new PK value already exists) — only needed when updating by + // key base::validate_pk_update(self, Some(user_id), &updates, &pk_value_scalar).await?; // Delegate to the canonical implementation @@ -1125,7 +1126,8 @@ impl BaseTableProvider for UserTableProvider { let pk_value_scalar = parse_string_as_scalar(pk_value, pk_column_type) .map_err(|e| KalamDbError::InvalidOperation(e))?; // Find latest resolved row for this PK under same user - // First try hot storage (O(1) via PK index), then fall back to cold storage (Parquet scan) + // First try hot storage (O(1) via PK index), then fall back to cold storage (Parquet + // scan) let (_latest_key, latest_row) = if let Some(result) = self.find_by_pk(user_id, &pk_value_scalar).await? { result @@ -1138,7 +1140,8 @@ impl BaseTableProvider for UserTableProvider { // Not in hot storage, check cold storage if log::log_enabled!(log::Level::Debug) { log::debug!( - "[UPDATE] PK {} not found in hot storage, querying cold storage for user={}, pk={}", + "[UPDATE] PK {} not found in hot storage, querying cold storage for \ + user={}, pk={}", pk_name, user_id.as_str(), pk_value @@ -1325,7 +1328,8 @@ impl BaseTableProvider for UserTableProvider { .map_err(KalamDbError::InvalidOperation)?; // Find latest resolved row for this PK under same user - // First try hot storage (O(1) via PK index), then fall back to cold storage (Parquet scan) + // First try hot storage (O(1) via PK index), then fall back to cold storage (Parquet + // scan) let latest_row = if let Some((_key, row)) = self.find_by_pk(user_id, &pk_value_scalar).await? { row @@ -1439,8 +1443,7 @@ impl BaseTableProvider for UserTableProvider { limit: Option, ) -> Result { let scan_context = self.build_scan_context(state)?; - self.scan_rows_with_context(&scan_context, projection, filter, limit) - .await + self.scan_rows_with_context(&scan_context, projection, filter, limit).await } async fn scan_with_version_resolution_to_kvs_async( @@ -1524,15 +1527,13 @@ impl BaseTableProvider for UserTableProvider { // ); // } - let result: Vec<(UserTableRowId, UserTableRow)> = resolved - .rows - .into_iter() - .map(|(row_id, row)| (row_id, row.0)) - .collect(); + let result: Vec<(UserTableRowId, UserTableRow)> = + resolved.rows.into_iter().map(|(row_id, row)| (row_id, row.0)).collect(); if log::log_enabled!(log::Level::Trace) { log::trace!( - "[UserProvider] Final version-resolved (post-tombstone): {} rows (table={}; user={})", + "[UserProvider] Final version-resolved (post-tombstone): {} rows (table={}; \ + user={})", result.len(), table_id, user_id.as_str() @@ -2293,6 +2294,13 @@ impl TableProvider for UserTableProvider { &row, &assignments, )?; + if crate::utils::datafusion_dml::update_assignments_noop( + &schema, + &row, + &evaluated_updates, + )? { + continue; + } if let Some(staged_mutations) = staged_mutations.as_mut() { staged_mutations.push(StagedMutation::new( diff --git a/backend/crates/kalamdb-tables/src/user_tables/user_table_store.rs b/backend/crates/kalamdb-tables/src/user_tables/user_table_store.rs index a58e86835..bede4e555 100644 --- a/backend/crates/kalamdb-tables/src/user_tables/user_table_store.rs +++ b/backend/crates/kalamdb-tables/src/user_tables/user_table_store.rs @@ -13,15 +13,15 @@ //! - IndexedEntityStore variant maintains a secondary index on the PK field //! - Enables O(1) lookup of row by PK value instead of O(n) scan +use std::sync::Arc; + +use kalamdb_commons::{ + ids::UserTableRowId, models::rows::UserTableRow, storage::Partition, TableId, +}; +use kalamdb_store::{entity_store::EntityStore, IndexedEntityStore, StorageBackend}; + use super::pk_index::create_user_table_pk_index; use crate::common::{ensure_partition, new_indexed_store_with_pk, partition_name}; -use kalamdb_commons::ids::UserTableRowId; -use kalamdb_commons::models::rows::UserTableRow; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::TableId; -use kalamdb_store::entity_store::EntityStore; -use kalamdb_store::{IndexedEntityStore, StorageBackend}; -use std::sync::Arc; // KSerializable for UserTableRow is implemented in kalamdb-store // impl KSerializable for UserTableRow {} @@ -111,15 +111,17 @@ pub fn new_indexed_user_table_store( #[cfg(test)] mod tests { - use super::*; - use crate::utils::test_backend::RecordingBackend; + use std::collections::BTreeMap; + use datafusion::scalar::ScalarValue; use kalamdb_commons::{ ids::SeqId, models::{rows::Row, NamespaceId, TableId, TableName}, StorageKey, UserId, }; - use std::collections::BTreeMap; + + use super::*; + use crate::utils::test_backend::RecordingBackend; fn create_test_store() -> UserTableStore { let backend: Arc = Arc::new(RecordingBackend::new()); diff --git a/backend/crates/kalamdb-tables/src/utils/base.rs b/backend/crates/kalamdb-tables/src/utils/base.rs index 7465cb010..8fb7c4183 100644 --- a/backend/crates/kalamdb-tables/src/utils/base.rs +++ b/backend/crates/kalamdb-tables/src/utils/base.rs @@ -36,8 +36,8 @@ //! - Are append-only (no updates, no version resolution needed) //! - Use TTL-based eviction instead of tombstones //! - Can return rows as they're scanned with early termination on LIMIT -//! - They now use the provider-family-specific exec-backed path in -//! `stream_table_provider.rs` instead of sharing the MVCC-oriented flow below +//! - They now use the provider-family-specific exec-backed path in `stream_table_provider.rs` +//! instead of sharing the MVCC-oriented flow below //! //! ## Architecture //! @@ -65,59 +65,67 @@ //! `stream_table_provider.rs` so the hot-store scan runs at execute time. //! ``` -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; +use std::{ + collections::{HashMap, HashSet}, + future::Future, + sync::Arc, +}; -use crate::manifest::ManifestAccessPlanner; -use crate::utils::unified_dml; use async_trait::async_trait; -use datafusion::arrow::array::{Array, BooleanArray, Float32Array, Int64Array, UInt64Array}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; -use datafusion::common::DFSchema; -use datafusion::datasource::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{utils::expr_to_columns, Expr, TableProviderFilterPushDown}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::{ExecutionPlan, Statistics}; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::conversions::arrow_json_conversion::coerce_rows; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{NamespaceId, TableName, UserId}; -use kalamdb_commons::serialization::row_codec::RowMetadata; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::NotLeaderError; -use kalamdb_commons::{StorageKey, TableId}; -use kalamdb_datafusion_sources::exec::{ - count_resolved_from_metadata, finalize_deferred_batch, resolve_latest_kvs_from_cold_batch, - DeferredBatchExec, DeferredBatchSource, ParquetRowData, VersionedRow, +use datafusion::{ + arrow::{ + array::{Array, BooleanArray, Float32Array, Int64Array, UInt64Array}, + datatypes::SchemaRef, + record_batch::RecordBatch, + }, + catalog::Session, + common::DFSchema, + datasource::TableProvider, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{utils::expr_to_columns, Expr, TableProviderFilterPushDown}, + physical_expr::PhysicalExpr, + physical_plan::{ExecutionPlan, Statistics}, + scalar::ScalarValue, +}; +use kalamdb_commons::{ + constants::SystemColumnNames, + conversions::arrow_json_conversion::coerce_rows, + ids::SeqId, + models::{rows::Row, NamespaceId, TableName, UserId}, + schemas::TableType, + serialization::row_codec::RowMetadata, + NotLeaderError, StorageKey, TableId, }; -use kalamdb_datafusion_sources::pruning::mvcc_filter_evaluation; -use kalamdb_datafusion_sources::provider::{ - combined_filter, pushdown_results_for_filters, remap_projection_indices, SourceProvider, +use kalamdb_datafusion_sources::{ + exec::{ + count_resolved_from_metadata, finalize_deferred_batch, resolve_latest_kvs_from_cold_batch, + DeferredBatchExec, DeferredBatchSource, ParquetRowData, VersionedRow, + }, + provider::{ + combined_filter, pushdown_results_for_filters, remap_projection_indices, SourceProvider, + }, + pruning::mvcc_filter_evaluation, }; use kalamdb_filestore::registry::ListResult; -use kalamdb_system::ClusterCoordinator as ClusterCoordinatorTrait; -use kalamdb_system::Manifest; -use kalamdb_system::SchemaRegistry as SchemaRegistryTrait; +use kalamdb_system::{ + ClusterCoordinator as ClusterCoordinatorTrait, Manifest, SchemaRegistry as SchemaRegistryTrait, +}; use kalamdb_transactions::{ extract_transaction_query_context, TransactionAccessError, TransactionOverlay, TransactionOverlayExec, }; -use std::collections::{HashMap, HashSet}; -use std::future::Future; -use std::sync::Arc; // Re-export types moved to submodules pub use crate::utils::core::TableProviderCore; pub(crate) use crate::utils::parquet::scan_parquet_files_as_batch_async; pub use crate::utils::row_utils::{ - extract_full_user_context, extract_seq_bounds_from_filter, resolve_user_scope, system_user_id, + extract_full_user_context, extract_seq_bounds_from_filter, inject_system_columns, + resolve_user_scope, rows_to_arrow_batch, system_user_id, ScanRow, +}; +use crate::{ + error::KalamDbError, error_extensions::KalamDbResultExt, manifest::ManifestAccessPlanner, + utils::unified_dml, }; -pub use crate::utils::row_utils::{inject_system_columns, rows_to_arrow_batch, ScanRow}; #[async_trait] pub trait DeferredMvccScanProvider: @@ -184,11 +192,8 @@ where ) -> Result { let schema = self.schema_ref(); let pk_name = self.primary_key_field_name(); - let scope_label = self.scan_scope_label(scan_context); - let subject_user = self - .scan_cold_scope(scan_context) - .map(UserId::as_str) - .unwrap_or("-"); + let _scope_label = self.scan_scope_label(scan_context); + let _subject_user = self.scan_cold_scope(scan_context).map(UserId::as_str).unwrap_or("-"); if self.allow_pk_fast_path(scan_context) { if let Some(pk_scalar) = typed_pk_literal_from_filter(&schema, filter, pk_name) { @@ -201,13 +206,18 @@ where // scope_label, // subject_user // ); - return rows_to_arrow_batch(&schema, vec![(row_id, row)], projection, |_, _| {}); + return rows_to_arrow_batch( + &schema, + vec![(row_id, row)], + projection, + |_, _| {}, + ); } if self.hot_pk_tombstoned(scan_context, &pk_scalar).await? { // log::debug!( - // "[MvccScan] PK fast-path tombstone for {}={} (table={}; scope={}; subject={})", - // pk_name, + // "[MvccScan] PK fast-path tombstone for {}={} (table={}; scope={}; + // subject={})", pk_name, // pk_scalar, // self.table_id(), // scope_label, @@ -221,19 +231,27 @@ where ); } - let cold_found = - find_row_by_pk(self, self.scan_cold_scope(scan_context), &pk_scalar.to_string()) - .await?; + let cold_found = find_row_by_pk( + self, + self.scan_cold_scope(scan_context), + &pk_scalar.to_string(), + ) + .await?; if let Some((row_id, row)) = cold_found { // log::debug!( - // "[MvccScan] PK fast-path cold hit for {}={} (table={}; scope={}; subject={})", - // pk_name, + // "[MvccScan] PK fast-path cold hit for {}={} (table={}; scope={}; + // subject={})", pk_name, // pk_scalar, // self.table_id(), // scope_label, // subject_user // ); - return rows_to_arrow_batch(&schema, vec![(row_id, row)], projection, |_, _| {}); + return rows_to_arrow_batch( + &schema, + vec![(row_id, row)], + projection, + |_, _| {}, + ); } // log::debug!( @@ -343,11 +361,7 @@ where ) .await .map_err(|error| { - DataFusionError::Execution(format!( - "{} failed: {}", - self.source_name(), - error - )) + DataFusionError::Execution(format!("{} failed: {}", self.source_name(), error)) })?; finalize_deferred_batch( @@ -657,9 +671,7 @@ pub trait BaseTableProvider: Send + Sync + TableProvider { where Self: SourceProvider, { - Ok(pushdown_results_for_filters(filters, |filter| { - self.filter_capability(filter) - })) + Ok(pushdown_results_for_filters(filters, |filter| self.filter_capability(filter))) } /// Default implementation for statistics @@ -691,11 +703,8 @@ pub trait BaseTableProvider: Send + Sync + TableProvider { let _ = pruning.limit.limit; let source_filter = combined_filter(filter_evaluation.inexact.filters.as_ref()); let exact_filter = combined_filter(filter_evaluation.exact.filters.as_ref()); - let effective_projection = pruning - .projection - .columns - .as_ref() - .map(|indices| indices.as_ref().to_vec()); + let effective_projection = + pruning.projection.columns.as_ref().map(|indices| indices.as_ref().to_vec()); let merged_schema = match effective_projection.as_ref() { Some(indices) => descriptor @@ -708,8 +717,9 @@ pub trait BaseTableProvider: Send + Sync + TableProvider { let output_projection = if pruning.filters.filters.is_empty() { None } else { - projection - .map(|indices| remap_projection_indices(&descriptor.schema, &merged_schema, indices)) + projection.map(|indices| { + remap_projection_indices(&descriptor.schema, &merged_schema, indices) + }) }; let output_schema = match projection { Some(indices) => descriptor @@ -765,12 +775,7 @@ pub trait BaseTableProvider: Send + Sync + TableProvider { self.primary_key_field_name(), )?; let base_plan = self - .base_scan( - state, - overlay_projection.effective_projection.as_ref(), - filters, - limit, - ) + .base_scan(state, overlay_projection.effective_projection.as_ref(), filters, limit) .await?; Ok(Arc::new(TransactionOverlayExec::try_new( @@ -1027,9 +1032,10 @@ where P: BaseTableProvider, K: StorageKey, { - use crate::utils::version_resolution::{parquet_batch_to_rows, ParquetRowData}; use datafusion::prelude::{col, lit}; + use crate::utils::version_resolution::{parquet_batch_to_rows, ParquetRowData}; + let pk_name = provider.primary_key_field_name(); let user_scope = resolve_user_scope(scope); @@ -1275,8 +1281,8 @@ pub async fn pk_exists_in_cold( let pruned_paths = planner.plan_by_pk_value(m, pk_column_id, pk_value); if pruned_paths.is_empty() { // log::trace!( - // "[pk_exists_in_cold] Manifest pruning returned no candidate segments for PK {} on {}.{} {} - PK not in cold", - // pk_value, + // "[pk_exists_in_cold] Manifest pruning returned no candidate segments for PK {} on + // {}.{} {} - PK not in cold", pk_value, // namespace.as_str(), // table.as_str(), // scope_label @@ -1284,8 +1290,8 @@ pub async fn pk_exists_in_cold( return Ok(false); } else { // log::trace!( - // "[pk_exists_in_cold] Manifest pruning: {} of {} segments may contain PK {} for {}.{} {}", - // pruned_paths.len(), + // "[pk_exists_in_cold] Manifest pruning: {} of {} segments may contain PK {} for + // {}.{} {}", pruned_paths.len(), // m.segments.len(), // pk_value, // namespace.as_str(), @@ -1459,8 +1465,8 @@ pub async fn pk_exists_batch_in_cold( if let Some(ref m) = manifest { if m.segments.is_empty() { // log::trace!( - // "[pk_exists_batch_in_cold] Manifest has no segments for {}.{} {} - PK not in cold", - // namespace.as_str(), + // "[pk_exists_batch_in_cold] Manifest has no segments for {}.{} {} - PK not in + // cold", namespace.as_str(), // table.as_str(), // scope_label // ); @@ -1479,7 +1485,8 @@ pub async fn pk_exists_batch_in_cold( } if relevant_files.is_empty() { log::trace!( - "[pk_exists_batch_in_cold] Manifest pruning returned no candidate segments for {}.{} {} - PKs not in cold", + "[pk_exists_batch_in_cold] Manifest pruning returned no candidate segments for \ + {}.{} {} - PKs not in cold", namespace.as_str(), table.as_str(), scope_label @@ -1487,7 +1494,8 @@ pub async fn pk_exists_batch_in_cold( return Ok(None); } else { log::trace!( - "[pk_exists_batch_in_cold] Manifest pruning: {} of {} segments may contain {} PKs for {}.{} {}", + "[pk_exists_batch_in_cold] Manifest pruning: {} of {} segments may contain {} PKs \ + for {}.{} {}", relevant_files.len(), m.segments.len(), pk_values.len(), @@ -1650,7 +1658,8 @@ async fn pk_exists_batch_in_parquet_via_storage_cache( Ok(None) } -/// Check if a PK value exists in a single Parquet file via streaming (async, with MVCC version resolution). +/// Check if a PK value exists in a single Parquet file via streaming (async, with MVCC version +/// resolution). /// /// Uses column-projected streaming — only reads pk/_seq/_deleted column chunks, /// never loads the entire file into memory. @@ -1787,7 +1796,7 @@ where let pk_str = unified_dml::extract_user_pk_value(row_data, pk_name)?; let user_scope = resolve_user_scope(scope); - //Step 1: Check hot storage (RocksDB) - fast PK index lookup + // Step 1: Check hot storage (RocksDB) - fast PK index lookup if provider.find_row_key_by_id_field(user_scope, &pk_str).await?.is_some() { return Err(KalamDbError::AlreadyExists(format!( "Primary key violation: value '{}' already exists in column '{}' (hot storage)", @@ -1813,7 +1822,8 @@ where if let crate::utils::pk::PkCheckResult::FoundInCold { segment_path } = check_result { return Err(KalamDbError::AlreadyExists(format!( - "Primary key violation: value '{}' already exists in column '{}' (cold storage: {})", + "Primary key violation: value '{}' already exists in column '{}' (cold \ + storage: {})", pk_str, pk_name, segment_path ))); } @@ -1840,8 +1850,8 @@ pub fn warn_if_unfiltered_scan( ) { // if filter.is_none() && limit.is_none() { // log::warn!( - // "⚠️ [UNFILTERED SCAN] table={} type={} | No filter or limit provided - scanning ALL rows. \ - // This may cause performance issues for large tables.", + // "⚠️ [UNFILTERED SCAN] table={} type={} | No filter or limit provided - scanning ALL + // rows. \ This may cause performance issues for large tables.", // table_id, // table_type.as_str() // ); @@ -1944,7 +1954,8 @@ where if provider.find_row_key_by_id_field(user_scope, &new_pk_str).await?.is_some() { return Err(KalamDbError::AlreadyExists(format!( - "Primary key violation: value '{}' already exists in column '{}' (UPDATE would create duplicate)", + "Primary key violation: value '{}' already exists in column '{}' (UPDATE would create \ + duplicate)", new_pk_str, pk_name ))); } @@ -2072,9 +2083,10 @@ pub fn build_count_only_batch(count: usize) -> Result #[cfg(test)] mod tests { - use super::*; use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use super::*; + #[test] fn compute_metadata_only_cold_columns_returns_pk_and_mvcc_columns() { let columns = compute_metadata_only_cold_columns("id"); @@ -2104,15 +2116,7 @@ mod tests { assert!(columns.iter().any(|column| column == "id")); assert!(columns.iter().any(|column| column == "name")); assert!(columns.iter().any(|column| column == SystemColumnNames::SEQ)); - assert!( - columns - .iter() - .any(|column| column == SystemColumnNames::COMMIT_SEQ) - ); - assert!( - columns - .iter() - .any(|column| column == SystemColumnNames::DELETED) - ); + assert!(columns.iter().any(|column| column == SystemColumnNames::COMMIT_SEQ)); + assert!(columns.iter().any(|column| column == SystemColumnNames::DELETED)); } } diff --git a/backend/crates/kalamdb-tables/src/utils/core.rs b/backend/crates/kalamdb-tables/src/utils/core.rs index 24067a7e7..024b7756c 100644 --- a/backend/crates/kalamdb-tables/src/utils/core.rs +++ b/backend/crates/kalamdb-tables/src/utils/core.rs @@ -1,9 +1,14 @@ -use crate::error::KalamDbError; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::logical_expr::Expr; -use kalamdb_commons::schemas::{TableDefinition, TableType}; -use kalamdb_commons::websocket::ChangeNotification; -use kalamdb_commons::TableId; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; + +use datafusion::{arrow::datatypes::SchemaRef, logical_expr::Expr}; +use kalamdb_commons::{ + schemas::{TableDefinition, TableType}, + websocket::ChangeNotification, + TableId, +}; use kalamdb_filestore::StorageRegistry; use kalamdb_system::{ ClusterCoordinator as ClusterCoordinatorTrait, ManifestService as ManifestServiceTrait, @@ -11,8 +16,8 @@ use kalamdb_system::{ TopicPublisher as TopicPublisherTrait, }; use kalamdb_transactions::CommitSequenceSource; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; + +use crate::error::KalamDbError; /// Combined services struct shared across all table providers. /// @@ -41,7 +46,8 @@ pub struct TableServices { /// Commit-sequence source for stamping direct DML writes that bypass the applier fast path. pub commit_sequence_source: Arc, - /// Topic publisher for synchronous CDC publishing (optional, None when topics are not configured) + /// Topic publisher for synchronous CDC publishing (optional, None when topics are not + /// configured) pub topic_publisher: Option>, } @@ -117,8 +123,7 @@ impl TableProviderCore { schema: SchemaRef, column_defaults: HashMap, ) -> Self { - use kalamdb_commons::constants::SystemColumnNames; - use kalamdb_commons::schemas::ColumnDefault; + use kalamdb_commons::{constants::SystemColumnNames, schemas::ColumnDefault}; // Precompute non-nullable columns from the schema. // Exclude system columns (_seq, _deleted) because they are auto-generated @@ -300,9 +305,9 @@ impl TableProviderCore { /// Publish a row change to matching topics synchronously. /// - /// Only publishes if the current node is the leader (in cluster mode). - /// This ensures topic messages are persisted before the write is acknowledged, - /// replacing the previous async notification-queue approach that dropped events. + /// In cluster mode, publishing is tied to local apply plus the local topic route cache. + /// That lets the topic-owning node materialize CDC events for data groups led by + /// other nodes without requiring a separate async notification queue. pub async fn publish_to_topics( &self, table_id: &TableId, @@ -315,16 +320,6 @@ impl TableProviderCore { _ => return, }; - // Leadership check: only leader publishes to avoid duplicates in cluster mode. - // In standalone mode, is_leader_for_* always returns true. - let is_leader = match user_id { - Some(uid) => self.services.cluster_coordinator.is_leader_for_user(uid).await, - None => self.services.cluster_coordinator.is_leader_for_shared().await, - }; - if !is_leader { - return; - } - if let Err(e) = topic_pub.publish_for_table(table_id, op, row, user_id) { log::warn!("Topic publish failed for table {}: {}", table_id, e); } @@ -351,15 +346,6 @@ impl TableProviderCore { _ => return, }; - // Leadership check: only leader publishes to avoid duplicates in cluster mode. - let is_leader = match user_id { - Some(uid) => self.services.cluster_coordinator.is_leader_for_user(uid).await, - None => self.services.cluster_coordinator.is_leader_for_shared().await, - }; - if !is_leader { - return; - } - if let Err(e) = topic_pub.publish_batch_for_table(table_id, op, rows, user_id) { log::warn!("Topic batch publish failed for table {}: {}", table_id, e); } diff --git a/backend/crates/kalamdb-tables/src/utils/datafusion_dml.rs b/backend/crates/kalamdb-tables/src/utils/datafusion_dml.rs index 1c02943b0..8a4c875cb 100644 --- a/backend/crates/kalamdb-tables/src/utils/datafusion_dml.rs +++ b/backend/crates/kalamdb-tables/src/utils/datafusion_dml.rs @@ -1,32 +1,36 @@ +use std::{ + collections::{BTreeMap, HashSet}, + sync::Arc, +}; + use async_trait::async_trait; -use datafusion::arrow::array::{ArrayRef, UInt64Array}; -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::Session; -use datafusion::common::DFSchema; -use datafusion::datasource::source::DataSourceExec; -use datafusion::datasource::TableProvider; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{utils::expr_to_columns, Expr}; -use datafusion::physical_plan::filter::FilterExec; -use datafusion::physical_plan::projection::ProjectionExec; -use datafusion::physical_plan::{collect, ExecutionPlan}; -use datafusion::scalar::ScalarValue; +use datafusion::{ + arrow::{ + array::{ArrayRef, UInt64Array}, + datatypes::{DataType, Field, Schema, SchemaRef}, + record_batch::RecordBatch, + }, + catalog::Session, + common::DFSchema, + datasource::{source::DataSourceExec, TableProvider}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{utils::expr_to_columns, Expr}, + physical_plan::{collect, filter::FilterExec, projection::ProjectionExec, ExecutionPlan}, + scalar::ScalarValue, +}; use datafusion_datasource::memory::MemorySourceConfig; -use kalamdb_datafusion_sources::exec::{DeferredBatchExec, DeferredBatchSource}; -use kalamdb_commons::conversions::arrow_json_conversion::{ - arrow_value_to_scalar, json_rows_to_arrow_batch, +use kalamdb_commons::{ + conversions::{ + arrow_json_conversion::{arrow_value_to_scalar, coerce_updates, json_rows_to_arrow_batch}, + scalar_to_pk_string, + }, + models::{rows::Row, UserId}, + NotLeaderError, TableId, TableType, }; -use kalamdb_commons::conversions::scalar_to_pk_string; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use kalamdb_commons::NotLeaderError; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_datafusion_sources::exec::{DeferredBatchExec, DeferredBatchSource}; use kalamdb_transactions::{ build_insert_staged_mutations, StagedMutation, TransactionAccessError, TransactionQueryContext, }; -use std::collections::{BTreeMap, HashSet}; -use std::sync::Arc; pub struct OverlayScanProjection { pub effective_projection: Option>, @@ -68,9 +72,7 @@ pub async fn rows_affected_plan( vec![Arc::new(UInt64Array::from(vec![rows_affected])) as ArrayRef], )?; - Ok(Arc::new(DeferredBatchExec::new(Arc::new( - RowsAffectedSource { batch }, - )))) + Ok(Arc::new(DeferredBatchExec::new(Arc::new(RowsAffectedSource { batch })))) } pub fn prepare_overlay_scan_projection( @@ -299,6 +301,7 @@ pub fn dml_scan_projection( for (_, expr) in assignments { collect_expr_columns(expr, &mut referenced_columns)?; } + referenced_columns.extend(assignments.iter().map(|(column, _)| column.clone())); referenced_columns.extend(required_columns.iter().copied().map(str::to_owned)); let projection: Vec = schema @@ -402,6 +405,23 @@ pub fn evaluate_assignment_values( Ok(Row::new(values)) } +pub fn update_assignments_noop( + schema: &SchemaRef, + row: &Row, + updates: &Row, +) -> DataFusionResult { + let updates = coerce_updates(updates.clone(), schema).map_err(DataFusionError::Execution)?; + + for (column, value) in updates.values { + match row.values.get(&column) { + Some(existing) if existing == &value => {}, + _ => return Ok(false), + } + } + + Ok(true) +} + fn build_row_batch(schema: &SchemaRef, row: &Row) -> DataFusionResult { json_rows_to_arrow_batch(schema, vec![row.clone()]).map_err(DataFusionError::Execution) } @@ -505,14 +525,16 @@ pub fn validate_not_null_constraints(schema: &SchemaRef, rows: &[Row]) -> DataFu match row.values.get(column_name) { None => { return Err(DataFusionError::Execution(format!( - "NOT NULL constraint violation: column '{}' is missing in row {} (row index {})", + "NOT NULL constraint violation: column '{}' is missing in row {} (row \ + index {})", column_name, row_idx + 1, row_idx ))); }, Some(value) if value.is_null() => { - // Use is_null() to catch both ScalarValue::Null and typed NULLs like Utf8(None), Int32(None), etc. + // Use is_null() to catch both ScalarValue::Null and typed NULLs like + // Utf8(None), Int32(None), etc. return Err(DataFusionError::Execution(format!( "NOT NULL constraint violation: column '{}' cannot be NULL (row {})", column_name, @@ -548,7 +570,8 @@ pub fn validate_not_null_with_set( match row.values.get(column_name) { None => { return Err(DataFusionError::Execution(format!( - "NOT NULL constraint violation: column '{}' is missing in row {} (row index {})", + "NOT NULL constraint violation: column '{}' is missing in row {} (row \ + index {})", column_name, row_idx + 1, row_idx @@ -571,14 +594,18 @@ pub fn validate_not_null_with_set( #[cfg(test)] mod tests { - use super::{dml_scan_projection, evaluate_assignment_values, row_matches_filters}; - use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; - use datafusion::logical_expr::{col, lit}; - use datafusion::prelude::SessionContext; - use datafusion::scalar::ScalarValue; - use kalamdb_commons::models::rows::Row; use std::sync::Arc; + use datafusion::{ + arrow::datatypes::{DataType, Field, Schema, SchemaRef}, + logical_expr::{col, lit}, + prelude::SessionContext, + scalar::ScalarValue, + }; + use kalamdb_commons::models::rows::Row; + + use super::{dml_scan_projection, evaluate_assignment_values, row_matches_filters}; + fn test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("id", DataType::Utf8, false), @@ -634,7 +661,7 @@ mod tests { } #[test] - fn dml_scan_projection_includes_assignment_source_columns_but_not_targets() { + fn dml_scan_projection_includes_assignment_source_and_target_columns() { let schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Utf8, false), Field::new("value", DataType::Int64, false), @@ -646,6 +673,6 @@ mod tests { let projection = dml_scan_projection(&schema, &filters, &assignments, &["id"]).unwrap(); - assert_eq!(projection, Some(vec![0, 1, 2])); + assert_eq!(projection, None); } } diff --git a/backend/crates/kalamdb-tables/src/utils/dml_provider.rs b/backend/crates/kalamdb-tables/src/utils/dml_provider.rs index ea3fc1dc6..adc072de1 100644 --- a/backend/crates/kalamdb-tables/src/utils/dml_provider.rs +++ b/backend/crates/kalamdb-tables/src/utils/dml_provider.rs @@ -9,10 +9,8 @@ //! User/Shared/Stream providers override it with actual batch-insert logic. use async_trait::async_trait; -use datafusion::datasource::TableProvider; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; +use datafusion::{datasource::TableProvider, scalar::ScalarValue}; +use kalamdb_commons::models::{rows::Row, UserId}; use crate::error::KalamDbError; diff --git a/backend/crates/kalamdb-tables/src/utils/mod.rs b/backend/crates/kalamdb-tables/src/utils/mod.rs index 9dcff8742..9bf584047 100644 --- a/backend/crates/kalamdb-tables/src/utils/mod.rs +++ b/backend/crates/kalamdb-tables/src/utils/mod.rs @@ -34,17 +34,17 @@ pub mod streams { } // Re-export key types for convenience -pub use base::{BaseTableProvider, TableProviderCore}; pub use core::TableServices; + +pub use base::{BaseTableProvider, TableProviderCore}; pub use dml_provider::KalamTableProvider; pub use shared::SharedTableProvider; pub use streams::StreamTableProvider; -pub use users::UserTableProvider; - // Re-export unified DML functions pub use unified_dml::{ append_version, append_version_sync, extract_user_pk_value, validate_primary_key, }; +pub use users::UserTableProvider; /// Provider consolidation summary /// diff --git a/backend/crates/kalamdb-tables/src/utils/parquet.rs b/backend/crates/kalamdb-tables/src/utils/parquet.rs index fd734d9cf..6b30ae5ad 100644 --- a/backend/crates/kalamdb-tables/src/utils/parquet.rs +++ b/backend/crates/kalamdb-tables/src/utils/parquet.rs @@ -1,12 +1,13 @@ -use crate::error::KalamDbError; -use crate::manifest::ManifestAccessPlanner; -use crate::utils::core::TableProviderCore; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::logical_expr::Expr; -use kalamdb_commons::models::schemas::TableType; -use kalamdb_commons::models::UserId; -use kalamdb_commons::TableId; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + logical_expr::Expr, +}; +use kalamdb_commons::{ + models::{schemas::TableType, UserId}, + TableId, +}; + +use crate::{error::KalamDbError, manifest::ManifestAccessPlanner, utils::core::TableProviderCore}; /// Async helper for loading Parquet batches via ManifestAccessPlanner. /// @@ -108,7 +109,8 @@ pub(crate) async fn scan_parquet_files_as_batch_async( }, Ok(None) => { log::trace!( - "[PARQUET_SCAN_ASYNC] Manifest cache MISS | table={} | {} | fallback=directory_scan", + "[PARQUET_SCAN_ASYNC] Manifest cache MISS | table={} | {} | \ + fallback=directory_scan", table_id, scope_label ); @@ -163,7 +165,8 @@ pub(crate) async fn scan_parquet_files_as_batch_async( .await?; log::trace!( - "[PARQUET_SCAN_ASYNC] Scan complete: table={} {} total_batches={} skipped={} scanned={} rows={} use_degraded_mode={}", + "[PARQUET_SCAN_ASYNC] Scan complete: table={} {} total_batches={} skipped={} scanned={} \ + rows={} use_degraded_mode={}", table_id, scope_label, total_batches, diff --git a/backend/crates/kalamdb-tables/src/utils/pk/existence_checker.rs b/backend/crates/kalamdb-tables/src/utils/pk/existence_checker.rs index 307b5711c..c4db7e718 100644 --- a/backend/crates/kalamdb-tables/src/utils/pk/existence_checker.rs +++ b/backend/crates/kalamdb-tables/src/utils/pk/existence_checker.rs @@ -2,18 +2,17 @@ //! //! Provides optimized PK uniqueness validation using manifest-based segment pruning. -use kalamdb_commons::models::TableId; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::UserId; -use kalamdb_system::Manifest; use std::sync::Arc; -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use crate::manifest::ManifestAccessPlanner; +use kalamdb_commons::{models::TableId, schemas::TableType, UserId}; use kalamdb_filestore::StorageRegistry; -use kalamdb_system::ManifestService as ManifestServiceTrait; -use kalamdb_system::SchemaRegistry as SchemaRegistryTrait; +use kalamdb_system::{ + Manifest, ManifestService as ManifestServiceTrait, SchemaRegistry as SchemaRegistryTrait, +}; + +use crate::{ + error::KalamDbError, error_extensions::KalamDbResultExt, manifest::ManifestAccessPlanner, +}; /// Result of a primary key existence check #[derive(Debug, Clone, PartialEq, Eq)] @@ -117,8 +116,7 @@ impl PkExistenceChecker { let pk_column_id = core.primary_key_column_id(); // Step 2-4: Use the optimized cold storage check - self - .check_cold_storage(table_id, table_type, user_id, pk_column, pk_column_id, pk_value) + self.check_cold_storage(table_id, table_type, user_id, pk_column, pk_column_id, pk_value) .await } @@ -244,7 +242,8 @@ impl PkExistenceChecker { let pruned_paths = planner.plan_by_pk_value(m, pk_column_id, pk_value); if pruned_paths.is_empty() { log::trace!( - "[PkExistenceChecker] Manifest pruning returned no candidate segments for PK {} on {}.{} {} - PK not in cold", + "[PkExistenceChecker] Manifest pruning returned no candidate segments for PK \ + {} on {}.{} {} - PK not in cold", pk_value, namespace.as_str(), table.as_str(), @@ -253,7 +252,8 @@ impl PkExistenceChecker { return Ok(PkCheckResult::PrunedByManifest); } else { log::trace!( - "[PkExistenceChecker] Manifest pruning: {} of {} segments may contain PK {} for {}.{} {}", + "[PkExistenceChecker] Manifest pruning: {} of {} segments may contain PK {} \ + for {}.{} {}", pruned_paths.len(), m.segments.len(), pk_value, @@ -441,29 +441,29 @@ impl PkExistenceChecker { #[cfg(test)] mod tests { - use super::*; - use std::collections::HashMap; - use std::fs; - use std::path::Path; + use std::{collections::HashMap, fs, path::Path}; use async_trait::async_trait; use datafusion::arrow::datatypes::SchemaRef; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::datatypes::KalamDataType; - use kalamdb_commons::models::rows::StoredScalarValue; - use kalamdb_commons::models::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableType, + use kalamdb_commons::{ + ids::SeqId, + models::{ + datatypes::KalamDataType, + rows::StoredScalarValue, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableType}, + }, + NamespaceId, StorageId, TableId, TableName, UserId, }; - use kalamdb_commons::{NamespaceId, StorageId, TableId, TableName, UserId}; use kalamdb_filestore::StorageRegistry; - use kalamdb_store::test_utils::InMemoryBackend; - use kalamdb_store::{StorageBackend, StorageError}; + use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend, StorageError}; use kalamdb_system::{ ManifestCacheEntry, ManifestService, SegmentMetadata, Storage, StorageType, StoragesTableProvider, SyncState, }; use tempfile::TempDir; + use super::*; + #[derive(Debug, Clone)] struct TestSchemaRegistry { table_id: TableId, @@ -605,11 +605,7 @@ mod tests { }) .expect("seed local storage"); - Arc::new(StorageRegistry::new( - storages_provider, - base_directory, - Default::default(), - )) + Arc::new(StorageRegistry::new(storages_provider, base_directory, Default::default())) } fn numeric_stats(min: i64, max: i64) -> kalamdb_system::ColumnStats { @@ -666,7 +662,7 @@ mod tests { assert!(!PkCheckResult::PrunedByManifest.exists()); assert!(PkCheckResult::FoundInHot.exists()); assert!(PkCheckResult::FoundInCold { - segment_path: "batch-0.parquet".to_string() + segment_path: "batch-0.parquet".to_string(), } .exists()); } @@ -692,9 +688,7 @@ mod tests { "batch-0.parquet", ); fs::create_dir_all( - Path::new(&parquet_path.full_path) - .parent() - .expect("parquet parent exists"), + Path::new(&parquet_path.full_path).parent().expect("parquet parent exists"), ) .expect("create parquet dir"); fs::write(&parquet_path.full_path, b"not a parquet file") diff --git a/backend/crates/kalamdb-tables/src/utils/row_utils.rs b/backend/crates/kalamdb-tables/src/utils/row_utils.rs index ce9ad93b8..bcdf8fce4 100644 --- a/backend/crates/kalamdb-tables/src/utils/row_utils.rs +++ b/backend/crates/kalamdb-tables/src/utils/row_utils.rs @@ -1,22 +1,27 @@ -use crate::error::KalamDbError; -use crate::error_extensions::KalamDbResultExt; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::{RecordBatch, RecordBatchOptions}; -use datafusion::catalog::Session; -use datafusion::logical_expr::{Expr, Operator}; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::constants::SystemColumnNames; -use kalamdb_commons::conversions::arrow_json_conversion::json_rows_to_arrow_batch; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{ReadContext, Role, UserId}; +use std::{collections::BTreeMap, sync::Arc}; + +use datafusion::{ + arrow::{ + datatypes::SchemaRef, + record_batch::{RecordBatch, RecordBatchOptions}, + }, + catalog::Session, + logical_expr::{Expr, Operator}, + scalar::ScalarValue, +}; +use kalamdb_commons::{ + constants::SystemColumnNames, + conversions::arrow_json_conversion::json_rows_to_arrow_batch, + ids::SeqId, + models::{rows::Row, ReadContext, Role, UserId}, +}; use kalamdb_session_datafusion::{ extract_full_user_context as extract_full_user_context_session, extract_user_context as extract_user_context_session, }; use once_cell::sync::Lazy; -use std::collections::BTreeMap; -use std::sync::Arc; + +use crate::{error::KalamDbError, error_extensions::KalamDbResultExt}; static SYSTEM_USER_ID: Lazy = Lazy::new(|| UserId::from("_system")); @@ -139,7 +144,8 @@ pub fn extract_user_context(state: &dyn Session) -> Result<(&UserId, Role), Kala }) } -/// Extract full session context (user_id, role, read_context) from DataFusion SessionState extensions. +/// Extract full session context (user_id, role, read_context) from DataFusion SessionState +/// extensions. /// /// Use this when you need to check read routing (leader-only reads in Raft cluster mode). pub fn extract_full_user_context( diff --git a/backend/crates/kalamdb-tables/src/utils/test_backend.rs b/backend/crates/kalamdb-tables/src/utils/test_backend.rs index 91f3a7df1..29070c4b9 100644 --- a/backend/crates/kalamdb-tables/src/utils/test_backend.rs +++ b/backend/crates/kalamdb-tables/src/utils/test_backend.rs @@ -1,10 +1,15 @@ //! Test backend wrapper that records scan parameters. +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Mutex, +}; + use kalamdb_commons::storage::KvIterator; -use kalamdb_store::storage_trait::{Operation, Partition, StorageBackend}; -use kalamdb_store::test_utils::InMemoryBackend; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Mutex; +use kalamdb_store::{ + storage_trait::{Operation, Partition, StorageBackend}, + test_utils::InMemoryBackend, +}; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ScanArgs { diff --git a/backend/crates/kalamdb-tables/src/utils/unified_dml/append.rs b/backend/crates/kalamdb-tables/src/utils/unified_dml/append.rs index f8f06d1c0..2711e6151 100644 --- a/backend/crates/kalamdb-tables/src/utils/unified_dml/append.rs +++ b/backend/crates/kalamdb-tables/src/utils/unified_dml/append.rs @@ -3,15 +3,17 @@ //! This module implements append_version(), the core function used by INSERT/UPDATE/DELETE //! to create new versions in the hot storage layer. -use crate::error::KalamDbError; -use crate::{SharedTableRow, SharedTableStore, UserTableRow, UserTableStore}; -use kalamdb_commons::conversions::arrow_json_conversion::json_to_row; -use kalamdb_commons::ids::{SeqId, UserTableRowId}; -use kalamdb_commons::models::schemas::TableType; -use kalamdb_commons::models::{TableId, UserId}; +use std::sync::Arc; + +use kalamdb_commons::{ + conversions::arrow_json_conversion::json_to_row, + ids::{SeqId, UserTableRowId}, + models::{schemas::TableType, TableId, UserId}, +}; use kalamdb_store::EntityStore; use kalamdb_system::SystemColumnsService; -use std::sync::Arc; + +use crate::{error::KalamDbError, SharedTableRow, SharedTableStore, UserTableRow, UserTableStore}; /// Append a new version to the table's hot storage (synchronous) /// diff --git a/backend/crates/kalamdb-tables/src/utils/unified_dml/validate.rs b/backend/crates/kalamdb-tables/src/utils/unified_dml/validate.rs index 20d19ba34..a6721ac56 100644 --- a/backend/crates/kalamdb-tables/src/utils/unified_dml/validate.rs +++ b/backend/crates/kalamdb-tables/src/utils/unified_dml/validate.rs @@ -3,10 +3,11 @@ //! This module implements `validate_primary_key()` and `extract_user_pk_value()` //! using the internal `Row` representation (`BTreeMap`). -use crate::error::KalamDbError; use datafusion::scalar::ScalarValue; use kalamdb_commons::models::rows::Row; +use crate::error::KalamDbError; + /// Extract primary key value from a `Row` pub fn extract_user_pk_value(fields: &Row, pk_column: &str) -> Result { let pk_value = fields.get(pk_column).ok_or_else(|| { @@ -60,9 +61,10 @@ fn scalar_pk_to_string(value: &ScalarValue, column: &str) -> Result( table_id: &TableId, primary_key_field_name: &str, @@ -114,12 +114,14 @@ where #[cfg(test)] mod tests { - use super::{build_vector_delete_ops, build_vector_upsert_batch_ops}; use datafusion::scalar::ScalarValue; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{NamespaceId, TableName}; - use kalamdb_commons::TableId; + use kalamdb_commons::{ + ids::SeqId, + models::{rows::Row, NamespaceId, TableName}, + TableId, + }; + + use super::{build_vector_delete_ops, build_vector_upsert_batch_ops}; fn table_id() -> TableId { TableId::new(NamespaceId::new("app"), TableName::new("items")) diff --git a/backend/crates/kalamdb-tables/src/utils/version_resolution.rs b/backend/crates/kalamdb-tables/src/utils/version_resolution.rs index 6d1037305..c2801bba2 100644 --- a/backend/crates/kalamdb-tables/src/utils/version_resolution.rs +++ b/backend/crates/kalamdb-tables/src/utils/version_resolution.rs @@ -1,26 +1,29 @@ //! Table-layer MVCC helpers that remain after moving shared winner-selection //! and Parquet decoding logic into `kalamdb-datafusion-sources`. -use crate::error::KalamDbError; -use crate::SharedTableRow; -use datafusion::arrow::array::RecordBatch; -use datafusion::error::DataFusionError; -use datafusion::scalar::ScalarValue; +use datafusion::{arrow::array::RecordBatch, error::DataFusionError, scalar::ScalarValue}; +use kalamdb_commons::{ids::SeqId, serialization::row_codec::RowMetadata}; use kalamdb_datafusion_sources::exec::{ parquet_batch_to_metadata as shared_parquet_batch_to_metadata, parquet_batch_to_rows as shared_parquet_batch_to_rows, VersionedRow, }; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::serialization::row_codec::RowMetadata; + +use crate::{error::KalamDbError, SharedTableRow}; #[cfg(test)] mod tests { - use super::*; - use datafusion::arrow::array::{BooleanArray, Int64Array, StringArray, UInt64Array}; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }; + + use datafusion::arrow::{ + array::{BooleanArray, Int64Array, StringArray, UInt64Array}, + datatypes::{DataType, Field, Schema}, + }; use kalamdb_commons::constants::SystemColumnNames; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; + + use super::*; #[derive(Debug, Clone)] struct TestVersionedRow { @@ -186,10 +189,9 @@ mod tests { } } -pub use kalamdb_datafusion_sources::exec::ParquetRowData; pub use kalamdb_datafusion_sources::exec::{ count_merged_rows, count_resolved_from_metadata, merge_versioned_rows, - resolve_latest_kvs_from_cold_batch, + resolve_latest_kvs_from_cold_batch, ParquetRowData, }; fn shared_decoder_error(error: DataFusionError) -> KalamDbError { @@ -238,4 +240,3 @@ pub fn parquet_batch_to_metadata( .map(|rows| rows.into_iter().map(|metadata| (metadata.seq, metadata)).collect()) .map_err(shared_decoder_error) } - diff --git a/backend/crates/kalamdb-tables/tests/filter_pushdown_contract.rs b/backend/crates/kalamdb-tables/tests/filter_pushdown_contract.rs index 1333b9533..3341a3ee1 100644 --- a/backend/crates/kalamdb-tables/tests/filter_pushdown_contract.rs +++ b/backend/crates/kalamdb-tables/tests/filter_pushdown_contract.rs @@ -1,12 +1,12 @@ use datafusion::logical_expr::{col, lit, Expr, TableProviderFilterPushDown}; -use kalamdb_datafusion_sources::provider::{mvcc_filter_capability, pushdown_results_for_filters}; -use kalamdb_datafusion_sources::pruning::mvcc_filter_evaluation; +use kalamdb_datafusion_sources::{ + provider::{mvcc_filter_capability, pushdown_results_for_filters}, + pruning::mvcc_filter_evaluation, +}; fn classify_mvcc_filters(filters: Vec) -> Vec { let filter_refs: Vec<&Expr> = filters.iter().collect(); - pushdown_results_for_filters(&filter_refs, |filter| { - mvcc_filter_capability(filter, "id") - }) + pushdown_results_for_filters(&filter_refs, |filter| mvcc_filter_capability(filter, "id")) } #[test] @@ -40,10 +40,7 @@ fn mvcc_filter_evaluation_preserves_inexact_source_pruning_subset() { ]; let evaluation = mvcc_filter_evaluation(&filters, "id"); - assert_eq!( - evaluation.exact.filters.as_ref(), - filters.as_slice(), - ); + assert_eq!(evaluation.exact.filters.as_ref(), filters.as_slice(),); assert_eq!( evaluation.inexact.filters.as_ref(), vec![ @@ -54,4 +51,4 @@ fn mvcc_filter_evaluation_preserves_inexact_source_pruning_subset() { ] .as_slice(), ); -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-tables/tests/provider_source_models.rs b/backend/crates/kalamdb-tables/tests/provider_source_models.rs index b66f7c5cf..55fae9171 100644 --- a/backend/crates/kalamdb-tables/tests/provider_source_models.rs +++ b/backend/crates/kalamdb-tables/tests/provider_source_models.rs @@ -1,42 +1,49 @@ -use std::collections::HashMap; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::datasource::TableProvider; -use datafusion::execution::context::SessionContext; -use datafusion::physical_plan::collect; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; -use kalamdb_commons::models::{NamespaceId, ReadContext, Role, StorageId, TableId, TableName}; -use kalamdb_commons::schemas::ColumnDefault; -use kalamdb_commons::websocket::ChangeNotification; -use kalamdb_commons::{OperationKind, TableAccess, TableType, TransactionId, UserId}; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + datasource::TableProvider, + execution::context::SessionContext, + physical_plan::collect, + scalar::ScalarValue, +}; +use kalamdb_commons::{ + models::{ + datatypes::KalamDataType, + rows::Row, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + NamespaceId, ReadContext, Role, StorageId, TableId, TableName, + }, + schemas::ColumnDefault, + websocket::ChangeNotification, + OperationKind, TableAccess, TableType, TransactionId, UserId, +}; use kalamdb_datafusion_sources::exec::DeferredBatchExec; use kalamdb_filestore::StorageRegistry; use kalamdb_sharding::ShardRouter; -use kalamdb_store::test_utils::InMemoryBackend; -use kalamdb_store::{StorageBackend, StorageError}; +use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend, StorageError}; use kalamdb_system::{ ClusterCoordinator, Manifest, ManifestCacheEntry, ManifestService, NotificationService, SchemaRegistry, SessionUserContext, Storage, StorageType, StoragesTableProvider, SystemColumnsService, }; -use kalamdb_tables::utils::TableServices; use kalamdb_tables::{ new_indexed_shared_table_store, new_indexed_user_table_store, new_stream_table_store, - BaseTableProvider, SharedTableProvider, SharedTableRow, StreamTableProvider, - StreamTableStorageMode, StreamTableStoreConfig, TableProviderCore, UserTableProvider, - UserTableRow, + utils::TableServices, BaseTableProvider, SharedTableProvider, SharedTableRow, + StreamTableProvider, StreamTableStorageMode, StreamTableStoreConfig, TableProviderCore, + UserTableProvider, UserTableRow, }; use kalamdb_transactions::{ - CommitSequenceSource, TransactionAccessError, TransactionAccessValidator, TransactionMutationSink, - TransactionOverlay, TransactionOverlayEntry, TransactionOverlayExec, TransactionQueryContext, - TransactionQueryExtension, + CommitSequenceSource, TransactionAccessError, TransactionAccessValidator, + TransactionMutationSink, TransactionOverlay, TransactionOverlayEntry, TransactionOverlayExec, + TransactionQueryContext, TransactionQueryExtension, }; use tempfile::TempDir; @@ -45,12 +52,7 @@ fn total_rows(batches: &[RecordBatch]) -> usize { } fn row(values: Vec<(&str, ScalarValue)>) -> Row { - Row::from_vec( - values - .into_iter() - .map(|(name, value)| (name.to_string(), value)) - .collect(), - ) + Row::from_vec(values.into_iter().map(|(name, value)| (name.to_string(), value)).collect()) } #[derive(Debug, Clone)] @@ -77,8 +79,7 @@ impl SchemaRegistry for TestSchemaRegistry { if &TableId::from_strings( self.table_def.namespace_id.as_str(), self.table_def.table_name.as_str(), - ) - == table_id + ) == table_id { Ok(Arc::clone(&self.schema)) } else { @@ -93,8 +94,7 @@ impl SchemaRegistry for TestSchemaRegistry { if &TableId::from_strings( self.table_def.namespace_id.as_str(), self.table_def.table_name.as_str(), - ) - == table_id + ) == table_id { Ok(Some(Arc::clone(&self.table_def))) } else { @@ -339,14 +339,13 @@ fn build_storage_registry( }) .expect("seed local storage"); - Arc::new(StorageRegistry::new( - storages_provider, - base_directory, - Default::default(), - )) + Arc::new(StorageRegistry::new(storages_provider, base_directory, Default::default())) } -fn build_services(table_def: Arc, backend: Arc) -> OwnedServices { +fn build_services( + table_def: Arc, + backend: Arc, +) -> OwnedServices { let schema = table_def.to_arrow_schema().expect("build arrow schema"); let temp_dir = tempfile::tempdir().expect("create temp dir"); let storage_registry = build_storage_registry(backend, &temp_dir); @@ -536,10 +535,7 @@ async fn stream_provider_scan_uses_deferred_batch_exec_and_returns_rows() { let ctx = session_with_user(&user_id); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build stream plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build stream plan"); assert!(plan.as_any().is::()); @@ -585,10 +581,7 @@ async fn user_provider_scan_uses_deferred_batch_exec_and_returns_rows() { let ctx = session_with_user(&user_id); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build user plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build user plan"); assert!(plan.as_any().is::()); @@ -646,10 +639,7 @@ async fn user_provider_scan_with_overlay_uses_transaction_overlay_exec() { let ctx = session_with_transaction(&user_id, tx_context); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build user plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build user plan"); assert!(plan.as_any().is::()); let child = plan.children().into_iter().next().expect("overlay child plan"); @@ -696,10 +686,7 @@ async fn shared_provider_scan_uses_deferred_batch_exec_and_returns_rows() { let user_id = UserId::new("shared-reader"); let ctx = session_with_user(&user_id); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build shared plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build shared plan"); assert!(plan.as_any().is::()); @@ -756,10 +743,7 @@ async fn shared_provider_scan_with_overlay_uses_transaction_overlay_exec() { let ctx = session_with_transaction(&user_id, tx_context); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build shared plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build shared plan"); assert!(plan.as_any().is::()); let child = plan.children().into_iter().next().expect("overlay child plan"); @@ -767,4 +751,4 @@ async fn shared_provider_scan_with_overlay_uses_transaction_overlay_exec() { let batches = collect(plan, state.task_ctx()).await.expect("collect shared plan"); assert_eq!(total_rows(&batches), 2); -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-tables/tests/stream_provider_lightweight_scan.rs b/backend/crates/kalamdb-tables/tests/stream_provider_lightweight_scan.rs index 9d265c994..c441841df 100644 --- a/backend/crates/kalamdb-tables/tests/stream_provider_lightweight_scan.rs +++ b/backend/crates/kalamdb-tables/tests/stream_provider_lightweight_scan.rs @@ -1,43 +1,38 @@ -use std::collections::HashMap; -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::datasource::TableProvider; -use datafusion::execution::context::SessionContext; -use datafusion::physical_plan::collect; -use datafusion::scalar::ScalarValue; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableOptions}; -use kalamdb_commons::models::{NamespaceId, ReadContext, Role, StorageId, TableId, TableName}; -use kalamdb_commons::schemas::ColumnDefault; -use kalamdb_commons::websocket::ChangeNotification; +use datafusion::{ + arrow::datatypes::SchemaRef, datasource::TableProvider, execution::context::SessionContext, + physical_plan::collect, scalar::ScalarValue, +}; +use kalamdb_commons::{ + models::{ + datatypes::KalamDataType, + rows::Row, + schemas::{ColumnDefinition, TableDefinition, TableOptions}, + NamespaceId, ReadContext, Role, StorageId, TableId, TableName, + }, + schemas::ColumnDefault, + websocket::ChangeNotification, +}; use kalamdb_datafusion_sources::exec::DeferredBatchExec; use kalamdb_filestore::StorageRegistry; use kalamdb_sharding::ShardRouter; -use kalamdb_store::test_utils::InMemoryBackend; -use kalamdb_store::{StorageBackend, StorageError}; +use kalamdb_store::{test_utils::InMemoryBackend, StorageBackend, StorageError}; use kalamdb_system::{ ClusterCoordinator, Manifest, ManifestCacheEntry, ManifestService, NotificationService, SchemaRegistry, SessionUserContext, Storage, StorageType, StoragesTableProvider, SystemColumnsService, }; -use kalamdb_tables::utils::TableServices; use kalamdb_tables::{ - new_stream_table_store, BaseTableProvider, StreamTableProvider, StreamTableStorageMode, - StreamTableStoreConfig, TableProviderCore, + new_stream_table_store, utils::TableServices, BaseTableProvider, StreamTableProvider, + StreamTableStorageMode, StreamTableStoreConfig, TableProviderCore, }; use kalamdb_transactions::CommitSequenceSource; use tempfile::TempDir; fn row(values: Vec<(&str, ScalarValue)>) -> Row { - Row::from_vec( - values - .into_iter() - .map(|(name, value)| (name.to_string(), value)) - .collect(), - ) + Row::from_vec(values.into_iter().map(|(name, value)| (name.to_string(), value)).collect()) } #[derive(Debug, Clone)] @@ -166,7 +161,10 @@ impl ManifestService for NoopManifestService { panic!("stage_before_flush is unused in stream planning tests") } - fn get_manifest_user_ids(&self, _table_id: &TableId) -> Result, StorageError> { + fn get_manifest_user_ids( + &self, + _table_id: &TableId, + ) -> Result, StorageError> { Ok(Vec::new()) } } @@ -177,7 +175,11 @@ struct NoopNotificationService; impl NotificationService for NoopNotificationService { type Notification = ChangeNotification; - fn has_subscribers(&self, _user_id: Option<&kalamdb_commons::UserId>, _table_id: &TableId) -> bool { + fn has_subscribers( + &self, + _user_id: Option<&kalamdb_commons::UserId>, + _table_id: &TableId, + ) -> bool { false } @@ -275,14 +277,13 @@ fn build_storage_registry( }) .expect("seed local storage"); - Arc::new(StorageRegistry::new( - storages_provider, - base_directory, - Default::default(), - )) + Arc::new(StorageRegistry::new(storages_provider, base_directory, Default::default())) } -fn build_services(table_def: Arc, backend: Arc) -> OwnedServices { +fn build_services( + table_def: Arc, + backend: Arc, +) -> OwnedServices { let schema = table_def.to_arrow_schema().expect("build arrow schema"); let temp_dir = tempfile::tempdir().expect("create temp dir"); let storage_registry = build_storage_registry(backend, &temp_dir); @@ -369,10 +370,7 @@ async fn stream_provider_planning_stays_lightweight_until_execution() { let user_id = kalamdb_commons::UserId::new("stream-owner"); let ctx = session_with_user(&user_id); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build stream plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build stream plan"); assert!(plan.as_any().is::()); @@ -387,10 +385,8 @@ async fn stream_provider_planning_stays_lightweight_until_execution() { .await .expect("insert row after planning"); - let batches = collect(plan, state.task_ctx()) - .await - .expect("collect stream plan after insert"); + let batches = collect(plan, state.task_ctx()).await.expect("collect stream plan after insert"); let total_rows: usize = batches.iter().map(|batch| batch.num_rows()).sum(); assert_eq!(total_rows, 1, "execution should see rows inserted after planning"); -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-transactions/Cargo.toml b/backend/crates/kalamdb-transactions/Cargo.toml index 3802c7c04..f0bdbd3fc 100644 --- a/backend/crates/kalamdb-transactions/Cargo.toml +++ b/backend/crates/kalamdb-transactions/Cargo.toml @@ -16,7 +16,6 @@ kalamdb-datafusion-sources = { workspace = true } datafusion = { workspace = true } futures-util = { workspace = true } serde = { workspace = true } -serde_json = { workspace = true } [dev-dependencies] tokio = { workspace = true } diff --git a/backend/crates/kalamdb-transactions/src/access.rs b/backend/crates/kalamdb-transactions/src/access.rs index 02f9ea69c..c5eb65caf 100644 --- a/backend/crates/kalamdb-transactions/src/access.rs +++ b/backend/crates/kalamdb-transactions/src/access.rs @@ -1,7 +1,9 @@ use std::fmt; -use kalamdb_commons::models::{TableId, TransactionId, UserId}; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + models::{TableId, TransactionId, UserId}, + TableType, +}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum TransactionAccessError { diff --git a/backend/crates/kalamdb-transactions/src/overlay.rs b/backend/crates/kalamdb-transactions/src/overlay.rs index 512b5981d..f202aa6ea 100644 --- a/backend/crates/kalamdb-transactions/src/overlay.rs +++ b/backend/crates/kalamdb-transactions/src/overlay.rs @@ -1,8 +1,9 @@ use std::collections::{BTreeMap, HashMap, HashSet}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{OperationKind, TableId, TransactionId, UserId}; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + models::{rows::Row, OperationKind, TableId, TransactionId, UserId}, + TableType, +}; use crate::query_context::TransactionOverlayView; @@ -65,18 +66,20 @@ impl TransactionOverlay { let entry_key = scoped_entry_key(user_id.as_ref(), primary_key.as_str()); let effective_entry = self.merge_visible_entry(&table_id, user_id.as_ref(), primary_key.as_str(), entry); + let is_deleted = effective_entry.is_deleted(); + let operation_kind = effective_entry.operation_kind; self.entries_by_table .entry(table_id.clone()) .or_default() - .insert(entry_key.clone(), effective_entry.clone()); + .insert(entry_key.clone(), effective_entry); self.clear_key_membership(&table_id, entry_key.as_str()); - let target_map = if effective_entry.is_deleted() { + let target_map = if is_deleted { &mut self.deleted_keys } else { - match effective_entry.operation_kind { + match operation_kind { OperationKind::Insert => &mut self.inserted_keys, OperationKind::Update => &mut self.updated_keys, OperationKind::Delete => &mut self.deleted_keys, @@ -208,12 +211,13 @@ impl TransactionOverlayView for TransactionOverlay { #[cfg(test)] mod tests { - use datafusion::scalar::ScalarValue; use std::collections::BTreeMap; - use super::*; + use datafusion::scalar::ScalarValue; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + fn row(values: &[(&str, ScalarValue)]) -> Row { let mut fields = BTreeMap::new(); for (name, value) in values { diff --git a/backend/crates/kalamdb-transactions/src/overlay_exec.rs b/backend/crates/kalamdb-transactions/src/overlay_exec.rs index a651f634e..27b5001f6 100644 --- a/backend/crates/kalamdb-transactions/src/overlay_exec.rs +++ b/backend/crates/kalamdb-transactions/src/overlay_exec.rs @@ -1,26 +1,28 @@ -use std::any::Any; -use std::collections::BTreeMap; -use std::collections::HashMap; -use std::sync::Arc; - -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::execution::{SendableRecordBatchStream, TaskContext}; -use datafusion::physical_expr::EquivalenceProperties; -use datafusion::physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, - PlanProperties, +use std::{ + any::Any, + collections::{BTreeMap, HashMap}, + sync::Arc, }; -use datafusion::scalar::ScalarValue; -use datafusion::{common::Result as DataFusionResult, error::DataFusionError}; -use futures_util::TryStreamExt; -use kalamdb_commons::conversions::arrow_json_conversion::json_rows_to_arrow_batch; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use kalamdb_commons::TableId; -use kalamdb_datafusion_sources::exec::projected_schema; -use kalamdb_datafusion_sources::stream::one_shot_batch_stream; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + common::Result as DataFusionResult, + error::DataFusionError, + execution::{SendableRecordBatchStream, TaskContext}, + physical_expr::EquivalenceProperties, + physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning, + PlanProperties, + }, + scalar::ScalarValue, +}; +use futures_util::TryStreamExt; +use kalamdb_commons::{ + conversions::arrow_json_conversion::json_rows_to_arrow_batch, + models::{rows::Row, UserId}, + TableId, +}; +use kalamdb_datafusion_sources::{exec::projected_schema, stream::one_shot_batch_stream}; use crate::overlay::TransactionOverlay; @@ -343,13 +345,17 @@ fn merge_row(base: &mut Row, overlay: &Row) { #[cfg(test)] mod tests { - use datafusion::arrow::array::{Int64Array, StringArray}; - use datafusion::arrow::datatypes::{DataType, Field, Schema}; + use datafusion::arrow::{ + array::{Int64Array, StringArray}, + datatypes::{DataType, Field, Schema}, + }; + use kalamdb_commons::{ + models::{NamespaceId, OperationKind, TableName, TransactionId}, + TableType, + }; use super::*; use crate::overlay::TransactionOverlayEntry; - use kalamdb_commons::models::{NamespaceId, OperationKind, TableName, TransactionId}; - use kalamdb_commons::TableType; fn row(values: &[(&str, ScalarValue)]) -> Row { let mut fields = BTreeMap::new(); @@ -526,8 +532,7 @@ mod tests { #[tokio::test] async fn overlay_exec_applies_overlay_once_across_multiple_input_partitions() { - use datafusion::execution::context::SessionContext; - use datafusion::physical_plan::collect; + use datafusion::{execution::context::SessionContext, physical_plan::collect}; use kalamdb_datafusion_sources::stream::one_shot_batch_stream; #[derive(Debug)] @@ -601,9 +606,7 @@ mod tests { } let batch = RecordBatch::new_empty(Arc::clone(&self.schema)); - Ok(one_shot_batch_stream(Arc::clone(&self.schema), async move { - Ok(batch) - })) + Ok(one_shot_batch_stream(Arc::clone(&self.schema), async move { Ok(batch) })) } } diff --git a/backend/crates/kalamdb-transactions/src/query_context.rs b/backend/crates/kalamdb-transactions/src/query_context.rs index 6b18aae45..2fb3aa5a5 100644 --- a/backend/crates/kalamdb-transactions/src/query_context.rs +++ b/backend/crates/kalamdb-transactions/src/query_context.rs @@ -1,12 +1,15 @@ use std::sync::Arc; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{OperationKind, TableId, TransactionId, UserId}; -use kalamdb_commons::TableType; +use kalamdb_commons::{ + models::{rows::Row, OperationKind, TableId, TransactionId, UserId}, + TableType, +}; -use crate::access::{TransactionAccessError, TransactionAccessValidator}; -use crate::overlay::TransactionOverlay; -use crate::staged_mutation::StagedMutation; +use crate::{ + access::{TransactionAccessError, TransactionAccessValidator}, + overlay::TransactionOverlay, + staged_mutation::StagedMutation, +}; /// Lightweight view trait exposed to query providers for transaction-local reads. pub trait TransactionOverlayView: std::fmt::Debug + Send + Sync { diff --git a/backend/crates/kalamdb-transactions/src/query_extension.rs b/backend/crates/kalamdb-transactions/src/query_extension.rs index a1ce9f02b..987ab9156 100644 --- a/backend/crates/kalamdb-transactions/src/query_extension.rs +++ b/backend/crates/kalamdb-transactions/src/query_extension.rs @@ -1,8 +1,10 @@ use std::any::Any; -use datafusion::catalog::Session; -use datafusion::common::config::{ConfigEntry, ConfigExtension, ExtensionOptions}; -use datafusion::execution::context::SessionState; +use datafusion::{ + catalog::Session, + common::config::{ConfigEntry, ConfigExtension, ExtensionOptions}, + execution::context::SessionState, +}; use crate::query_context::TransactionQueryContext; diff --git a/backend/crates/kalamdb-transactions/src/staged_mutation.rs b/backend/crates/kalamdb-transactions/src/staged_mutation.rs index 81d2341a8..26ec0e0b0 100644 --- a/backend/crates/kalamdb-transactions/src/staged_mutation.rs +++ b/backend/crates/kalamdb-transactions/src/staged_mutation.rs @@ -1,8 +1,10 @@ use std::fmt; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{OperationKind, TableId, TransactionId, UserId}; -use kalamdb_commons::TableType; +use datafusion::{arrow::array::Array, scalar::ScalarValue}; +use kalamdb_commons::{ + models::{rows::Row, OperationKind, TableId, TransactionId, UserId}, + TableType, +}; use serde::{Deserialize, Serialize}; use crate::overlay::TransactionOverlayEntry; @@ -24,6 +26,46 @@ impl fmt::Display for StagedInsertBuildError { impl std::error::Error for StagedInsertBuildError {} +#[inline] +fn approximate_row_size_bytes(row: &Row) -> usize { + row.values + .iter() + .map(|(column_name, value)| column_name.len() + approximate_scalar_value_size_bytes(value)) + .sum() +} + +#[inline] +fn approximate_scalar_value_size_bytes(value: &ScalarValue) -> usize { + let base_size = std::mem::size_of::(); + + match value { + ScalarValue::Utf8(Some(value)) + | ScalarValue::Utf8View(Some(value)) + | ScalarValue::LargeUtf8(Some(value)) => base_size + value.len(), + ScalarValue::Binary(Some(value)) + | ScalarValue::BinaryView(Some(value)) + | ScalarValue::FixedSizeBinary(_, Some(value)) + | ScalarValue::LargeBinary(Some(value)) => base_size + value.len(), + ScalarValue::FixedSizeList(array) => base_size + array.get_array_memory_size(), + ScalarValue::List(array) => base_size + array.get_array_memory_size(), + ScalarValue::LargeList(array) => base_size + array.get_array_memory_size(), + ScalarValue::Struct(array) => base_size + array.get_array_memory_size(), + ScalarValue::Map(array) => base_size + array.get_array_memory_size(), + ScalarValue::TimestampSecond(_, Some(timezone)) + | ScalarValue::TimestampMillisecond(_, Some(timezone)) + | ScalarValue::TimestampMicrosecond(_, Some(timezone)) + | ScalarValue::TimestampNanosecond(_, Some(timezone)) => base_size + timezone.len(), + ScalarValue::Union(Some((_, nested_value)), _, _) => { + base_size + approximate_scalar_value_size_bytes(nested_value) + }, + ScalarValue::Dictionary(_, nested_value) + | ScalarValue::RunEndEncoded(_, _, nested_value) => { + base_size + approximate_scalar_value_size_bytes(nested_value) + }, + _ => base_size, + } +} + /// Shared logical DML mutation buffered inside an explicit transaction. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct StagedMutation { @@ -64,11 +106,10 @@ impl StagedMutation { #[inline] pub fn approximate_size_bytes(&self) -> usize { - let payload_bytes = serde_json::to_vec(&self.payload).map(|bytes| bytes.len()).unwrap_or(0); self.primary_key.len() + self.table_id.full_name().len() + self.user_id.as_ref().map(|user_id| user_id.as_str().len()).unwrap_or(0) - + payload_bytes + + approximate_row_size_bytes(&self.payload) + std::mem::size_of::() } @@ -120,11 +161,13 @@ pub fn build_insert_staged_mutations( #[cfg(test)] mod tests { - use super::{build_insert_staged_mutations, StagedInsertBuildError}; use datafusion::scalar::ScalarValue; - use kalamdb_commons::models::rows::Row; - use kalamdb_commons::models::{TableName, TransactionId, UserId}; - use kalamdb_commons::{NamespaceId, TableId, TableType}; + use kalamdb_commons::{ + models::{rows::Row, TableName, TransactionId, UserId}, + NamespaceId, TableId, TableType, + }; + + use super::{build_insert_staged_mutations, StagedInsertBuildError}; fn test_table_id() -> TableId { TableId::new(NamespaceId::new("app"), TableName::new("items")) diff --git a/backend/crates/kalamdb-transactions/tests/current_api_surface.rs b/backend/crates/kalamdb-transactions/tests/current_api_surface.rs index a35cd3724..dffd461e1 100644 --- a/backend/crates/kalamdb-transactions/tests/current_api_surface.rs +++ b/backend/crates/kalamdb-transactions/tests/current_api_surface.rs @@ -7,8 +7,7 @@ fn uses_current_execution_plan_surface() { #[allow(dead_code)] fn uses_current_record_batch_stream_surface() { - use datafusion::execution::RecordBatchStream; - use datafusion::physical_plan::SendableRecordBatchStream; + use datafusion::{execution::RecordBatchStream, physical_plan::SendableRecordBatchStream}; fn _assert_trait() {} fn _accept(_stream: SendableRecordBatchStream) {} } diff --git a/backend/crates/kalamdb-vector/src/flush.rs b/backend/crates/kalamdb-vector/src/flush.rs index 31b29d1da..af1bbd6f0 100644 --- a/backend/crates/kalamdb-vector/src/flush.rs +++ b/backend/crates/kalamdb-vector/src/flush.rs @@ -1,21 +1,29 @@ -use crate::hot_staging::{ - new_indexed_shared_vector_hot_store, new_indexed_user_vector_hot_store, - normalize_vector_column_name, SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp, - VectorHotOpType, +use std::{ + collections::HashMap, + fmt::{Display, Formatter}, + sync::Arc, }; -use crate::snapshot_codec::{decode_snapshot, encode_snapshot, VixSnapshotEntry, VixSnapshotFile}; -use crate::usearch_engine::{add_vector, create_index, export_vector, load_index, serialize_index}; + use bytes::Bytes; use datafusion::arrow::datatypes::{DataType, SchemaRef}; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; +use kalamdb_commons::{ + ids::SeqId, + models::{TableId, UserId}, + schemas::TableType, +}; use kalamdb_filestore::{FilestoreError, StorageCached}; use kalamdb_store::{EntityStore, StorageBackend}; use kalamdb_system::{Manifest, VectorEngine, VectorMetric}; -use std::collections::HashMap; -use std::fmt::{Display, Formatter}; -use std::sync::Arc; + +use crate::{ + hot_staging::{ + new_indexed_shared_vector_hot_store, new_indexed_user_vector_hot_store, + normalize_vector_column_name, SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp, + VectorHotOpType, + }, + snapshot_codec::{decode_snapshot, encode_snapshot, VixSnapshotEntry, VixSnapshotFile}, + usearch_engine::{add_vector, create_index, export_vector, load_index, serialize_index}, +}; const VECTOR_SCAN_LIMIT: usize = 100_000; diff --git a/backend/crates/kalamdb-vector/src/hot_query_cache.rs b/backend/crates/kalamdb-vector/src/hot_query_cache.rs index 28ac62e5f..9be1c8692 100644 --- a/backend/crates/kalamdb-vector/src/hot_query_cache.rs +++ b/backend/crates/kalamdb-vector/src/hot_query_cache.rs @@ -1,21 +1,29 @@ -use crate::hot_staging::{ - new_indexed_shared_vector_hot_store, new_indexed_user_vector_hot_store, SharedVectorHotOpId, - UserVectorHotOpId, VectorHotOp, VectorHotOpType, +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, }; -use crate::usearch_engine::{add_vector, create_index, search_index}; + use dashmap::DashMap; use datafusion::common::DataFusionError; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::StorageKey; +use kalamdb_commons::{ + ids::SeqId, + models::{TableId, UserId}, + schemas::TableType, + StorageKey, +}; use kalamdb_store::{EntityStore, StorageBackend}; use kalamdb_system::VectorMetric; use once_cell::sync::Lazy; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; use usearch::Index; +use crate::{ + hot_staging::{ + new_indexed_shared_vector_hot_store, new_indexed_user_vector_hot_store, + SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp, VectorHotOpType, + }, + usearch_engine::{add_vector, create_index, search_index}, +}; + const HOT_INCREMENTAL_SCAN_LIMIT: usize = 100_000; const HOT_RESERVE_STEP: usize = 1024; @@ -308,13 +316,14 @@ pub(crate) fn clear_hot_query_cache_for_tests() { #[cfg(test)] mod tests { + use kalamdb_commons::ids::SeqId; + use kalamdb_store::test_utils::TestDb; + use super::*; use crate::hot_staging::{ new_indexed_shared_vector_hot_store, new_indexed_user_vector_hot_store, SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp, VectorHotOpType, }; - use kalamdb_commons::ids::SeqId; - use kalamdb_store::test_utils::TestDb; fn shared_op( table_id: &TableId, diff --git a/backend/crates/kalamdb-vector/src/hot_staging/models.rs b/backend/crates/kalamdb-vector/src/hot_staging/models.rs index 3585fd16c..73a4e2d91 100644 --- a/backend/crates/kalamdb-vector/src/hot_staging/models.rs +++ b/backend/crates/kalamdb-vector/src/hot_staging/models.rs @@ -1,7 +1,9 @@ -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::storage_key::{decode_key, encode_key, encode_prefix}; -use kalamdb_commons::{KSerializable, StorageKey}; +use kalamdb_commons::{ + ids::SeqId, + models::{TableId, UserId}, + storage_key::{decode_key, encode_key, encode_prefix}, + KSerializable, StorageKey, +}; use kalamdb_system::VectorMetric; use serde::{Deserialize, Serialize}; diff --git a/backend/crates/kalamdb-vector/src/hot_staging/pk_index.rs b/backend/crates/kalamdb-vector/src/hot_staging/pk_index.rs index 39c617cf9..900550927 100644 --- a/backend/crates/kalamdb-vector/src/hot_staging/pk_index.rs +++ b/backend/crates/kalamdb-vector/src/hot_staging/pk_index.rs @@ -1,12 +1,17 @@ -use super::models::{SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp}; -use super::vector_hot_store::{ - shared_vector_pk_index_partition_name, user_vector_pk_index_partition_name, +use kalamdb_commons::{ + models::{TableId, UserId}, + storage::Partition, + storage_key::{encode_key, encode_prefix}, }; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::storage::Partition; -use kalamdb_commons::storage_key::{encode_key, encode_prefix}; use kalamdb_store::IndexDefinition; +use super::{ + models::{SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp}, + vector_hot_store::{ + shared_vector_pk_index_partition_name, user_vector_pk_index_partition_name, + }, +}; + /// Secondary index for user-scoped vector ops by (user_id, pk, seq). pub struct UserVectorPkIndex { partition: Partition, @@ -83,9 +88,12 @@ impl IndexDefinition for SharedVectorPkIndex { #[cfg(test)] mod tests { + use kalamdb_commons::{ + ids::SeqId, + models::{NamespaceId, TableName}, + }; + use super::*; - use kalamdb_commons::ids::SeqId; - use kalamdb_commons::models::{NamespaceId, TableName}; #[test] fn test_user_vector_pk_index_partition_name() { diff --git a/backend/crates/kalamdb-vector/src/hot_staging/vector_hot_store.rs b/backend/crates/kalamdb-vector/src/hot_staging/vector_hot_store.rs index 7b2cf0dcb..03f593d74 100644 --- a/backend/crates/kalamdb-vector/src/hot_staging/vector_hot_store.rs +++ b/backend/crates/kalamdb-vector/src/hot_staging/vector_hot_store.rs @@ -1,8 +1,12 @@ -use super::models::{SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp}; -use super::pk_index::{SharedVectorPkIndex, UserVectorPkIndex}; +use std::sync::Arc; + use kalamdb_commons::models::TableId; use kalamdb_store::{IndexedEntityStore, StorageBackend}; -use std::sync::Arc; + +use super::{ + models::{SharedVectorHotOpId, UserVectorHotOpId, VectorHotOp}, + pk_index::{SharedVectorPkIndex, UserVectorPkIndex}, +}; /// Indexed store alias for user-scoped vector hot ops. pub type UserVectorHotStore = IndexedEntityStore; @@ -62,9 +66,10 @@ pub fn new_indexed_shared_vector_hot_store( #[cfg(test)] mod tests { - use super::*; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + #[test] fn test_normalize_vector_column_name() { assert_eq!(normalize_vector_column_name("embedding"), "embedding"); diff --git a/backend/crates/kalamdb-vector/src/lib.rs b/backend/crates/kalamdb-vector/src/lib.rs index 56d293191..fdcce8f23 100644 --- a/backend/crates/kalamdb-vector/src/lib.rs +++ b/backend/crates/kalamdb-vector/src/lib.rs @@ -16,7 +16,6 @@ pub use hot_staging::{ SharedVectorPkIndex, UserVectorHotOpId, UserVectorHotStore, UserVectorPkIndex, VectorHotOp, VectorHotOpType, }; - pub use sql::{ CosineDistanceFunction, UnavailableVectorSearchRuntime, VectorSearchRuntime, VectorSearchScope, VectorSearchTableFunction, diff --git a/backend/crates/kalamdb-vector/src/sql/cosine_distance.rs b/backend/crates/kalamdb-vector/src/sql/cosine_distance.rs index 009792411..dc426db67 100644 --- a/backend/crates/kalamdb-vector/src/sql/cosine_distance.rs +++ b/backend/crates/kalamdb-vector/src/sql/cosine_distance.rs @@ -1,16 +1,18 @@ -use datafusion::arrow::array::{ - Array, ArrayRef, FixedSizeListArray, Float32Array, Float64Array, Int32Array, Int64Array, - LargeListArray, ListArray, UInt32Array, UInt64Array, -}; -use datafusion::arrow::datatypes::DataType; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{ - ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, +use std::{any::Any, sync::Arc}; + +use datafusion::{ + arrow::{ + array::{ + Array, ArrayRef, FixedSizeListArray, Float32Array, Float64Array, Int32Array, + Int64Array, LargeListArray, ListArray, UInt32Array, UInt64Array, + }, + datatypes::DataType, + }, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}, + scalar::ScalarValue, }; -use datafusion::scalar::ScalarValue; use kalamdb_commons::arrow_utils::{arrow_float32, ArrowDataType}; -use std::any::Any; -use std::sync::Arc; #[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] pub struct CosineDistanceFunction; @@ -306,9 +308,9 @@ fn parse_numeric_array(values: &dyn Array) -> DataFusionResult> { #[cfg(test)] mod tests { + use datafusion::{arrow::datatypes::Field, logical_expr::ScalarUDF}; + use super::*; - use datafusion::arrow::datatypes::Field; - use datafusion::logical_expr::ScalarUDF; #[test] fn test_cosine_distance_function_creation() { diff --git a/backend/crates/kalamdb-vector/src/sql/vector_search.rs b/backend/crates/kalamdb-vector/src/sql/vector_search.rs index 061842b46..b3b00ee87 100644 --- a/backend/crates/kalamdb-vector/src/sql/vector_search.rs +++ b/backend/crates/kalamdb-vector/src/sql/vector_search.rs @@ -1,35 +1,42 @@ -use crate::hot_query_cache::search_hot_candidates; -use crate::snapshot_codec::decode_snapshot; -use crate::usearch_engine::{load_index, search_index}; +use std::{any::Any, collections::HashMap, fmt::Debug, sync::Arc}; + use async_trait::async_trait; -use datafusion::arrow::array::{ - Array, ArrayRef, Float32Array, Float64Array, Int32Array, Int64Array, StringArray, UInt32Array, - UInt64Array, +use datafusion::{ + arrow::{ + array::{ + Array, ArrayRef, Float32Array, Float64Array, Int32Array, Int64Array, StringArray, + UInt32Array, UInt64Array, + }, + datatypes::{DataType, Field, Schema, SchemaRef}, + record_batch::RecordBatch, + }, + catalog::{Session, TableFunctionImpl}, + common::{DFSchema, DataFusionError, Result}, + datasource::TableProvider, + logical_expr::{Expr, TableProviderFilterPushDown, TableType as DataFusionTableType}, + physical_expr::PhysicalExpr, + physical_plan::ExecutionPlan, + scalar::ScalarValue, }; -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::catalog::{Session, TableFunctionImpl}; -use datafusion::common::{DFSchema, DataFusionError, Result}; -use datafusion::datasource::TableProvider; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown, TableType as DataFusionTableType}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::ExecutionPlan; -use datafusion::scalar::ScalarValue; -use kalamdb_datafusion_sources::exec::{ - finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource, +use kalamdb_commons::{ + ids::SeqId, + models::{TableId, UserId}, + schemas::TableType, +}; +use kalamdb_datafusion_sources::{ + exec::{finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource}, + provider::{combined_filter, FilterCapability}, }; -use kalamdb_datafusion_sources::provider::{combined_filter, FilterCapability}; -use kalamdb_commons::ids::SeqId; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; use kalamdb_filestore::{FilestoreError, StorageCached}; use kalamdb_session_datafusion::extract_user_id; use kalamdb_store::StorageBackend; use kalamdb_system::VectorMetric; -use std::any::Any; -use std::collections::HashMap; -use std::fmt::Debug; -use std::sync::Arc; + +use crate::{ + hot_query_cache::search_hot_candidates, + snapshot_codec::decode_snapshot, + usearch_engine::{load_index, search_index}, +}; const DEFAULT_TOP_K: usize = 10; const CANDIDATE_MULTIPLIER: usize = 4; @@ -209,7 +216,8 @@ impl DeferredBatchSource for VectorSearchScanSource { })?; if parsed.dimensions as usize != self.args.query_vector.len() { return Err(DataFusionError::Execution(format!( - "vector_search query vector dimensions mismatch: query has {}, index has {}", + "vector_search query vector dimensions mismatch: query has {}, \ + index has {}", self.args.query_vector.len(), parsed.dimensions ))); @@ -238,9 +246,7 @@ impl DeferredBatchSource for VectorSearchScanSource { } } - let candidate_limit = search_limit - .saturating_mul(CANDIDATE_MULTIPLIER) - .max(search_limit); + let candidate_limit = search_limit.saturating_mul(CANDIDATE_MULTIPLIER).max(search_limit); let hot_search = search_hot_candidates( Arc::clone(&scope.backend), @@ -259,13 +265,14 @@ impl DeferredBatchSource for VectorSearchScanSource { .max(candidate_limit); if let Some(index) = &base_index { - let raw = search_index(index, &self.args.query_vector, cold_candidate_limit) - .map_err(|error| { + let raw = search_index(index, &self.args.query_vector, cold_candidate_limit).map_err( + |error| { DataFusionError::Execution(format!( "Failed to search base vector index: {}", error )) - })?; + }, + )?; for (key, distance) in raw { let Some(pk) = base_snapshot_key_to_pk.get(&key) else { continue; @@ -476,7 +483,9 @@ fn parse_query_vector(value: &ScalarValue) -> Result> { fn parse_args(args: &[Expr]) -> Result { if args.len() < 3 || args.len() > 4 { return Err(DataFusionError::Plan( - "vector_search(table_id, column_name, query_vector[, top_k]) expects 3 or 4 literal arguments".to_string(), + "vector_search(table_id, column_name, query_vector[, top_k]) expects 3 or 4 literal \ + arguments" + .to_string(), )); } @@ -576,18 +585,16 @@ impl TableProvider for VectorSearchTableProvider { None }; - Ok(Arc::new(DeferredBatchExec::new(Arc::new( - VectorSearchScanSource { - runtime: Arc::clone(&self.runtime), - args: self.args.clone(), - session_user: extract_user_id(state), - physical_filter, - projection: projection.cloned(), - limit, - base_schema, - output_schema, - }, - )))) + Ok(Arc::new(DeferredBatchExec::new(Arc::new(VectorSearchScanSource { + runtime: Arc::clone(&self.runtime), + args: self.args.clone(), + session_user: extract_user_id(state), + physical_filter, + projection: projection.cloned(), + limit, + base_schema, + output_schema, + })))) } fn supports_filters_pushdown( diff --git a/backend/crates/kalamdb-vector/tests/vector_provider_exec_models.rs b/backend/crates/kalamdb-vector/tests/vector_provider_exec_models.rs index a325a4c14..7574d5533 100644 --- a/backend/crates/kalamdb-vector/tests/vector_provider_exec_models.rs +++ b/backend/crates/kalamdb-vector/tests/vector_provider_exec_models.rs @@ -1,10 +1,9 @@ use std::sync::Arc; -use datafusion::catalog::TableFunctionImpl; -use datafusion::execution::context::SessionContext; -use datafusion::logical_expr::Expr; -use datafusion::physical_plan::collect; -use datafusion::scalar::ScalarValue; +use datafusion::{ + catalog::TableFunctionImpl, execution::context::SessionContext, logical_expr::Expr, + physical_plan::collect, scalar::ScalarValue, +}; use kalamdb_commons::models::{ReadContext, Role, UserId}; use kalamdb_datafusion_sources::exec::DeferredBatchExec; use kalamdb_session_datafusion::SessionUserContext; @@ -51,13 +50,10 @@ async fn vector_search_scan_uses_deferred_batch_exec_and_keeps_empty_results_sta let ctx = session_with_user("vector-exec-model"); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build vector plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build vector plan"); assert!(plan.as_any().is::()); let batches = collect(plan, state.task_ctx()).await.expect("collect vector plan"); assert_eq!(total_rows(&batches), 0); -} \ No newline at end of file +} diff --git a/backend/crates/kalamdb-views/src/cluster.rs b/backend/crates/kalamdb-views/src/cluster.rs index 9fb964664..22cd1e8d6 100644 --- a/backend/crates/kalamdb-views/src/cluster.rs +++ b/backend/crates/kalamdb-views/src/cluster.rs @@ -21,31 +21,34 @@ //! //! **Schema**: TableDefinition provides consistent metadata for views -use crate::error::RegistryError; -use crate::view_base::VirtualView; +use std::sync::{Arc, OnceLock}; + use async_trait::async_trait; -use datafusion::arrow::array::{ - ArrayRef, BooleanArray, Float32Array, Int16Array, Int32Array, Int64Array, StringArray, -}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::common::DFSchema; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_expr::PhysicalExpr; -use kalamdb_datafusion_sources::exec::{ - finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource, +use datafusion::{ + arrow::{ + array::{ + ArrayRef, BooleanArray, Float32Array, Int16Array, Int32Array, Int64Array, StringArray, + }, + datatypes::SchemaRef, + record_batch::RecordBatch, + }, + common::DFSchema, + logical_expr::{Expr, TableProviderFilterPushDown}, + physical_expr::PhysicalExpr, }; -use kalamdb_datafusion_sources::provider::{ - combined_filter, pushdown_results_for_filters, FilterCapability, +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableName, }; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use kalamdb_datafusion_sources::{ + exec::{finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource}, + provider::{combined_filter, pushdown_results_for_filters, FilterCapability}, }; -use kalamdb_commons::{NamespaceId, TableName}; use kalamdb_raft::{ClusterInfo, CommandExecutor, RaftExecutor, ServerStateExt}; use kalamdb_system::SystemTable; -use std::sync::{Arc, OnceLock}; + +use crate::{error::RegistryError, view_base::VirtualView}; /// Get the cluster schema (memoized) fn cluster_schema() -> SchemaRef { @@ -611,10 +614,9 @@ impl datafusion::datasource::TableProvider for ClusterTableProvider { ) -> datafusion::error::Result> { let base_schema = self.view.schema(); let output_schema = match projection { - Some(indices) => base_schema - .project(indices) - .map(Arc::new) - .map_err(|error| datafusion::error::DataFusionError::ArrowError(Box::new(error), None))?, + Some(indices) => base_schema.project(indices).map(Arc::new).map_err(|error| { + datafusion::error::DataFusionError::ArrowError(Box::new(error), None) + })?, None => Arc::clone(&base_schema), }; let physical_filter = if let Some(filter) = combined_filter(filters) { diff --git a/backend/crates/kalamdb-views/src/cluster_groups.rs b/backend/crates/kalamdb-views/src/cluster_groups.rs index 10073352c..7d3c1eece 100644 --- a/backend/crates/kalamdb-views/src/cluster_groups.rs +++ b/backend/crates/kalamdb-views/src/cluster_groups.rs @@ -5,19 +5,25 @@ //! Provides per-Raft-group OpenRaft metrics directly from RaftMetrics. //! Each row represents one Raft group's current state on this node. -use crate::error::RegistryError; -use crate::view_base::{ViewTableProvider, VirtualView}; -use datafusion::arrow::array::{ArrayRef, Int64Array, StringArray}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, Int64Array, StringArray}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableName, }; -use kalamdb_commons::{NamespaceId, TableName}; use kalamdb_raft::{CommandExecutor, GroupId}; use kalamdb_system::SystemTable; -use std::sync::{Arc, OnceLock}; + +use crate::{ + error::RegistryError, + view_base::{ViewTableProvider, VirtualView}, +}; fn cluster_groups_schema() -> SchemaRef { static SCHEMA: OnceLock = OnceLock::new(); @@ -239,8 +245,8 @@ impl VirtualView for ClusterGroupsView { } // log::info!( - // "cluster_groups: Building view with {} user_shards, {} shared_shards = {} total groups", - // info.user_shards, info.shared_shards, group_ids.len() + // "cluster_groups: Building view with {} user_shards, {} shared_shards = {} total + // groups", info.user_shards, info.shared_shards, group_ids.len() // ); // Pre-allocate vectors for each column @@ -285,8 +291,8 @@ impl VirtualView for ClusterGroupsView { last_applieds.push(metrics.last_applied.map(|log_id| log_id.index as i64)); snapshots.push(metrics.snapshot.map(|log_id| log_id.index as i64)); purgeds.push(metrics.purged.map(|log_id| log_id.index as i64)); - // Note: RaftMetrics doesn't directly expose committed; we'd need to get it from storage/log - // For now, use None + // Note: RaftMetrics doesn't directly expose committed; we'd need to get it from + // storage/log For now, use None committeds.push(None); states.push(Some(format!("{:?}", metrics.state))); current_leaders.push(metrics.current_leader.map(|id| id as i64)); diff --git a/backend/crates/kalamdb-views/src/columns_view.rs b/backend/crates/kalamdb-views/src/columns_view.rs index 2e9000d0d..ed787b586 100644 --- a/backend/crates/kalamdb-views/src/columns_view.rs +++ b/backend/crates/kalamdb-views/src/columns_view.rs @@ -10,18 +10,21 @@ //! //! **DataFusion Pattern**: Implements VirtualView trait for consistent view behavior -use crate::error::RegistryError; -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ArrayRef, BooleanBuilder, Int64Builder, StringBuilder}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, BooleanBuilder, Int64Builder, StringBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, SystemTable, TableName, }; -use kalamdb_commons::{NamespaceId, SystemTable, TableName}; use kalamdb_system::SystemTablesRegistry; -use std::sync::{Arc, OnceLock}; + +use crate::{error::RegistryError, view_base::VirtualView}; /// Get the columns view schema (memoized) fn columns_schema() -> SchemaRef { diff --git a/backend/crates/kalamdb-views/src/datatypes.rs b/backend/crates/kalamdb-views/src/datatypes.rs index 5efd9fded..56705d8be 100644 --- a/backend/crates/kalamdb-views/src/datatypes.rs +++ b/backend/crates/kalamdb-views/src/datatypes.rs @@ -15,17 +15,21 @@ //! **Schema Caching**: Memoized via `OnceLock` //! **Schema**: TableDefinition provides consistent metadata for views -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ArrayRef, StringBuilder}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, StringBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableName, }; -use kalamdb_commons::{NamespaceId, TableName}; use kalamdb_system::SystemTable; -use std::sync::{Arc, OnceLock}; + +use crate::view_base::VirtualView; /// Get or initialize the datatypes schema (memoized) fn datatypes_schema() -> SchemaRef { @@ -283,8 +287,7 @@ mod tests { fn test_table_provider() { let view = Arc::new(DatatypesView::new()); let provider = DatatypesTableProvider::new(view); - use datafusion::datasource::TableProvider; - use datafusion::datasource::TableType; + use datafusion::datasource::{TableProvider, TableType}; assert_eq!(provider.table_type(), TableType::View); assert_eq!(provider.schema().fields().len(), 4); diff --git a/backend/crates/kalamdb-views/src/describe.rs b/backend/crates/kalamdb-views/src/describe.rs index 71eead4ea..cf07451e7 100644 --- a/backend/crates/kalamdb-views/src/describe.rs +++ b/backend/crates/kalamdb-views/src/describe.rs @@ -12,19 +12,20 @@ //! //! **Schema**: TableDefinition provides consistent metadata for views -use crate::error::RegistryError; -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ - ArrayRef, BooleanArray, Int32Array, Int64Array, RecordBatch, StringBuilder, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, BooleanArray, Int32Array, Int64Array, RecordBatch, StringBuilder}, + datatypes::SchemaRef, }; -use datafusion::arrow::datatypes::SchemaRef; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableId, TableName, }; -use kalamdb_commons::{NamespaceId, TableId, TableName}; use kalamdb_system::SystemTable; -use std::sync::{Arc, OnceLock}; + +use crate::{error::RegistryError, view_base::VirtualView}; /// Get the describe schema (memoized) fn describe_schema() -> SchemaRef { diff --git a/backend/crates/kalamdb-views/src/error.rs b/backend/crates/kalamdb-views/src/error.rs index eee7b906d..2a5e13b3f 100644 --- a/backend/crates/kalamdb-views/src/error.rs +++ b/backend/crates/kalamdb-views/src/error.rs @@ -1,8 +1,9 @@ //! Error types for KalamDB views and registry -use kalamdb_commons::models::StorageId; use std::fmt; +use kalamdb_commons::models::StorageId; + /// Registry-specific errors #[derive(Debug, Clone)] pub enum RegistryError { diff --git a/backend/crates/kalamdb-views/src/live.rs b/backend/crates/kalamdb-views/src/live.rs index d0b0e4c07..7e6fe8532 100644 --- a/backend/crates/kalamdb-views/src/live.rs +++ b/backend/crates/kalamdb-views/src/live.rs @@ -5,17 +5,18 @@ //! Provides the current set of active subscriptions from the in-memory //! connection registry. -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ - ArrayRef, Int64Builder, StringBuilder, TimestampMicrosecondBuilder, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, Int64Builder, StringBuilder, TimestampMicrosecondBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, }; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::schemas::TableDefinition; -use kalamdb_commons::SystemTable; +use kalamdb_commons::{schemas::TableDefinition, SystemTable}; use kalamdb_system::LiveQuery; use parking_lot::RwLock; -use std::sync::{Arc, OnceLock}; + +use crate::view_base::VirtualView; /// Live-query snapshot callback type. pub type LiveSnapshotCallback = Arc Vec + Send + Sync>; @@ -161,11 +162,14 @@ pub type LiveTableProvider = crate::view_base::ViewTableProvider; #[cfg(test)] mod tests { - use super::*; - use kalamdb_commons::models::{ConnectionId, LiveQueryId, NamespaceId, UserId}; - use kalamdb_commons::{NodeId, TableName}; + use kalamdb_commons::{ + models::{ConnectionId, LiveQueryId, NamespaceId, UserId}, + NodeId, TableName, + }; use kalamdb_system::LiveQueryStatus; + use super::*; + fn sample_live_query() -> LiveQuery { let user_id = UserId::new("u_live"); let connection_id = ConnectionId::new("conn_live"); diff --git a/backend/crates/kalamdb-views/src/server_logs.rs b/backend/crates/kalamdb-views/src/server_logs.rs index cedfc0bb3..282983608 100644 --- a/backend/crates/kalamdb-views/src/server_logs.rs +++ b/backend/crates/kalamdb-views/src/server_logs.rs @@ -13,19 +13,27 @@ //! **Schema Caching**: Memoized via `OnceLock` //! **Schema**: TableDefinition provides consistent metadata for views -use crate::error::RegistryError; -use crate::view_base::{ViewTableProvider, VirtualView}; -use datafusion::arrow::array::{ArrayRef, Int64Builder, StringBuilder}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::{ + path::PathBuf, + sync::{Arc, OnceLock}, +}; + +use datafusion::arrow::{ + array::{ArrayRef, Int64Builder, StringBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableName, }; -use kalamdb_commons::{NamespaceId, TableName}; use kalamdb_system::SystemTable; -use std::path::PathBuf; -use std::sync::{Arc, OnceLock}; + +use crate::{ + error::RegistryError, + view_base::{ViewTableProvider, VirtualView}, +}; /// Get or initialize the server_logs schema (memoized) fn server_logs_schema() -> SchemaRef { @@ -306,10 +314,12 @@ pub fn create_server_logs_provider(logs_path: impl Into) -> ServerLogsT #[cfg(test)] mod tests { - use super::*; use std::io::Write; + use tempfile::tempdir; + use super::*; + #[test] fn test_schema() { let schema = server_logs_schema(); diff --git a/backend/crates/kalamdb-views/src/sessions.rs b/backend/crates/kalamdb-views/src/sessions.rs index 75fa1c285..0f6746385 100644 --- a/backend/crates/kalamdb-views/src/sessions.rs +++ b/backend/crates/kalamdb-views/src/sessions.rs @@ -5,19 +5,21 @@ //! Provides the current set of active PostgreSQL gRPC sessions tracked by the //! server-side pg session registry. -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ - ArrayRef, BooleanBuilder, Int64Builder, StringBuilder, TimestampMicrosecondBuilder, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, BooleanBuilder, Int64Builder, StringBuilder, TimestampMicrosecondBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, }; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, SystemTable, TableName, }; -use kalamdb_commons::{NamespaceId, SystemTable, TableName}; use parking_lot::RwLock; -use std::sync::{Arc, OnceLock}; + +use crate::view_base::VirtualView; /// Serializable snapshot of a live PostgreSQL gRPC session. #[derive(Debug, Clone, PartialEq, Eq)] @@ -108,7 +110,8 @@ impl SessionsView { false, ColumnDefault::None, Some( - "Parsed PostgreSQL backend PID when session_id follows pg- or pg--" + "Parsed PostgreSQL backend PID when session_id follows pg- or \ + pg--" .to_string(), ), ), diff --git a/backend/crates/kalamdb-views/src/settings.rs b/backend/crates/kalamdb-views/src/settings.rs index 9efd14781..80ae16a02 100644 --- a/backend/crates/kalamdb-views/src/settings.rs +++ b/backend/crates/kalamdb-views/src/settings.rs @@ -13,19 +13,23 @@ //! //! **Schema**: TableDefinition provides consistent metadata for views -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ArrayRef, StringBuilder}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, StringBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableName, }; -use kalamdb_commons::{NamespaceId, TableName}; use kalamdb_configs::ServerConfig; use kalamdb_system::SystemTable; use parking_lot::RwLock; -use std::sync::{Arc, OnceLock}; + +use crate::view_base::VirtualView; /// Get the settings schema (memoized) fn settings_schema() -> SchemaRef { @@ -143,7 +147,6 @@ impl SettingsView { config: Arc::new(RwLock::new(Some(config))), } } - } impl Default for SettingsView { @@ -208,10 +211,27 @@ impl VirtualView for SettingsView { // Cluster Settings (optional) if let Some(cluster) = &config.cluster { - add_settings!(names, values, descriptions, categories, [ - ("storage.data_path", config.storage.data_path, "Base data directory (auto-creates rocksdb/, storage/, snapshots/ subdirs)", "storage"), - ("cluster.cluster_id", cluster.cluster_id, "Unique cluster identifier", "cluster"), - ]); + add_settings!( + names, + values, + descriptions, + categories, + [ + ( + "storage.data_path", + config.storage.data_path, + "Base data directory (auto-creates rocksdb/, storage/, snapshots/ \ + subdirs)", + "storage" + ), + ( + "cluster.cluster_id", + cluster.cluster_id, + "Unique cluster identifier", + "cluster" + ), + ] + ); } // Storage Settings @@ -398,7 +418,8 @@ impl VirtualView for SettingsView { ( "flush.check_interval_seconds", config.flush.check_interval_seconds, - "How often the background scheduler checks for pending flushes (seconds, 0 = disabled)", + "How often the background scheduler checks for pending flushes (seconds, \ + 0 = disabled)", "flush" ), ] @@ -518,7 +539,8 @@ impl VirtualView for SettingsView { ( "rate_limit.max_auth_requests_per_ip_per_sec", config.rate_limit.max_auth_requests_per_ip_per_sec, - "Maximum auth requests per second per IP (applies to /auth/login, /auth/refresh, /setup)", + "Maximum auth requests per second per IP (applies to /auth/login, \ + /auth/refresh, /setup)", "rate_limit" ), ( @@ -530,7 +552,8 @@ impl VirtualView for SettingsView { ( "rate_limit.max_requests_per_ip_per_sec", config.rate_limit.max_requests_per_ip_per_sec, - "Maximum requests per second per IP BEFORE authentication (if exceeded → IP BAN)", + "Maximum requests per second per IP BEFORE authentication (if exceeded → \ + IP BAN)", "rate_limit" ), ( diff --git a/backend/crates/kalamdb-views/src/stats.rs b/backend/crates/kalamdb-views/src/stats.rs index a5a4115ca..78a6c7f4c 100644 --- a/backend/crates/kalamdb-views/src/stats.rs +++ b/backend/crates/kalamdb-views/src/stats.rs @@ -12,18 +12,22 @@ //! //! **Schema**: TableDefinition provides consistent metadata for views -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ArrayRef, StringBuilder}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, StringBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, TableName, }; -use kalamdb_commons::{NamespaceId, TableName}; use kalamdb_system::SystemTable; use parking_lot::RwLock; -use std::sync::{Arc, OnceLock}; + +use crate::view_base::VirtualView; /// Metrics provider callback type /// Returns a vector of (metric_name, metric_value) tuples @@ -224,8 +228,7 @@ mod tests { fn test_table_provider() { let view = Arc::new(StatsView::new()); let provider = StatsTableProvider::new(view); - use datafusion::datasource::TableProvider; - use datafusion::datasource::TableType; + use datafusion::datasource::{TableProvider, TableType}; assert_eq!(provider.table_type(), TableType::View); assert_eq!(provider.schema().fields().len(), 2); diff --git a/backend/crates/kalamdb-views/src/tables_view.rs b/backend/crates/kalamdb-views/src/tables_view.rs index 00362a5e9..bafd1402f 100644 --- a/backend/crates/kalamdb-views/src/tables_view.rs +++ b/backend/crates/kalamdb-views/src/tables_view.rs @@ -5,24 +5,26 @@ //! Provides a simplified view of table metadata similar to information_schema.tables. //! This replaces the persisted system.tables (now renamed to system.schemas). //! -//! **Schema**: namespace_id, table_name, table_type, storage_id, version, options, comment, updated, created +//! **Schema**: namespace_id, table_name, table_type, storage_id, version, options, comment, +//! updated, created //! //! **DataFusion Pattern**: Implements VirtualView trait for consistent view behavior -use crate::error::RegistryError; -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ - ArrayRef, Int64Builder, StringBuilder, TimestampMicrosecondBuilder, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, Int64Builder, StringBuilder, TimestampMicrosecondBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, }; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, SystemTable, TableName, }; -use kalamdb_commons::{NamespaceId, SystemTable, TableName}; use kalamdb_system::SystemTablesRegistry; -use std::sync::{Arc, OnceLock}; + +use crate::{error::RegistryError, view_base::VirtualView}; /// Get the tables view schema (memoized) fn tables_schema() -> SchemaRef { diff --git a/backend/crates/kalamdb-views/src/transactions.rs b/backend/crates/kalamdb-views/src/transactions.rs index 53bae783c..2451f4664 100644 --- a/backend/crates/kalamdb-views/src/transactions.rs +++ b/backend/crates/kalamdb-views/src/transactions.rs @@ -5,17 +5,21 @@ //! Provides the current set of active explicit transactions tracked by the //! in-memory transaction coordinator. -use crate::view_base::VirtualView; -use datafusion::arrow::array::{ArrayRef, Int64Builder, StringBuilder}; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use kalamdb_commons::datatypes::KalamDataType; -use kalamdb_commons::schemas::{ - ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType, +use std::sync::{Arc, OnceLock}; + +use datafusion::arrow::{ + array::{ArrayRef, Int64Builder, StringBuilder}, + datatypes::SchemaRef, + record_batch::RecordBatch, +}; +use kalamdb_commons::{ + datatypes::KalamDataType, + schemas::{ColumnDefault, ColumnDefinition, TableDefinition, TableOptions, TableType}, + NamespaceId, SystemTable, TableName, }; -use kalamdb_commons::{NamespaceId, SystemTable, TableName}; use parking_lot::RwLock; -use std::sync::{Arc, OnceLock}; + +use crate::view_base::VirtualView; /// Serializable snapshot of an active explicit transaction. #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/backend/crates/kalamdb-views/src/view_base.rs b/backend/crates/kalamdb-views/src/view_base.rs index 2c15fd99e..75c772c8e 100644 --- a/backend/crates/kalamdb-views/src/view_base.rs +++ b/backend/crates/kalamdb-views/src/view_base.rs @@ -8,25 +8,25 @@ //! Views memoize their Arrow schema using a `static OnceLock`. //! Each view's schema is computed once and shared across all uses. -use crate::error::RegistryError; +use std::{any::Any, sync::Arc}; + use async_trait::async_trait; -use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::common::DFSchema; -use datafusion::datasource::{TableProvider, TableType}; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; -use datafusion::logical_expr::{Expr, TableProviderFilterPushDown}; -use datafusion::physical_expr::PhysicalExpr; -use datafusion::physical_plan::ExecutionPlan; -use kalamdb_datafusion_sources::exec::{ - finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource, +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + common::DFSchema, + datasource::{TableProvider, TableType}, + error::{DataFusionError, Result as DataFusionResult}, + logical_expr::{Expr, TableProviderFilterPushDown}, + physical_expr::PhysicalExpr, + physical_plan::ExecutionPlan, }; -use kalamdb_datafusion_sources::provider::{ - combined_filter, pushdown_results_for_filters, FilterCapability, +use kalamdb_datafusion_sources::{ + exec::{finalize_deferred_batch, DeferredBatchExec, DeferredBatchSource}, + provider::{combined_filter, pushdown_results_for_filters, FilterCapability}, }; use kalamdb_system::SystemTable; -use std::any::Any; -use std::sync::Arc; + +use crate::error::RegistryError; /// VirtualView trait defines the core behavior for virtual tables (views) /// diff --git a/backend/crates/kalamdb-views/tests/view_provider_exec_models.rs b/backend/crates/kalamdb-views/tests/view_provider_exec_models.rs index bd10980fd..3fae8d712 100644 --- a/backend/crates/kalamdb-views/tests/view_provider_exec_models.rs +++ b/backend/crates/kalamdb-views/tests/view_provider_exec_models.rs @@ -1,6 +1,6 @@ -use datafusion::datasource::TableProvider; -use datafusion::execution::context::SessionContext; -use datafusion::physical_plan::collect; +use datafusion::{ + datasource::TableProvider, execution::context::SessionContext, physical_plan::collect, +}; use kalamdb_datafusion_sources::exec::DeferredBatchExec; use kalamdb_views::create_datatypes_provider; @@ -13,15 +13,10 @@ async fn datatypes_view_scan_uses_deferred_batch_exec_and_returns_rows() { let provider = create_datatypes_provider(); let ctx = SessionContext::new(); let state = ctx.state(); - let plan = provider - .scan(&state, None, &[], None) - .await - .expect("build datatypes plan"); + let plan = provider.scan(&state, None, &[], None).await.expect("build datatypes plan"); assert!(plan.as_any().is::()); - let batches = collect(plan, state.task_ctx()) - .await - .expect("collect datatypes plan"); + let batches = collect(plan, state.task_ctx()).await.expect("collect datatypes plan"); assert!(total_rows(&batches) > 0); -} \ No newline at end of file +} diff --git a/backend/src/connection_guard.rs b/backend/src/connection_guard.rs index bf7a9224c..c98287308 100644 --- a/backend/src/connection_guard.rs +++ b/backend/src/connection_guard.rs @@ -1,12 +1,17 @@ //! IP-based connection guard for DoS protection. +use std::{ + net::IpAddr, + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + use kalamdb_configs::RateLimitSettings; use moka::sync::Cache; use parking_lot::Mutex; -use std::net::IpAddr; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; #[derive(Debug)] struct TokenBucket { @@ -328,9 +333,10 @@ pub struct ConnectionGuardStats { #[cfg(test)] mod tests { - use super::*; use std::thread; + use super::*; + fn test_config( max_conn: u32, max_req: u32, diff --git a/backend/src/http_runtime.rs b/backend/src/http_runtime.rs new file mode 100644 index 000000000..78e2c14ed --- /dev/null +++ b/backend/src/http_runtime.rs @@ -0,0 +1,84 @@ +//! Lightweight HTTP runtime state shared by Actix workers. + +use std::sync::Arc; + +use actix_web::web; +use anyhow::Result; +use kalamdb_api::{limiter::RateLimiter, ui::UiRuntimeConfig}; +use kalamdb_auth::UserRepository; +use kalamdb_configs::{AuthSettings, CorsSettings, ServerConfig}; +use kalamdb_core::{ + app_context::AppContext, + sql::{datafusion_session::DataFusionSessionFactory, executor::SqlExecutor}, +}; +use kalamdb_live::{ConnectionsManager, LiveQueryManager}; + +use crate::{ + lifecycle::ApplicationComponents, middleware::ConnectionProtection, + startup::configure_auth_runtime, +}; + +#[derive(Clone, Copy)] +pub enum AuthRuntimeMode { + Configure, + AlreadyConfigured, +} + +#[derive(Clone)] +pub struct HttpRuntimeState { + pub app_context: web::Data>, + pub session_factory: web::Data>, + pub sql_executor: web::Data>, + pub rate_limiter: web::Data>, + pub live_query_manager: web::Data>, + pub user_repo: web::Data>, + pub connection_registry: web::Data>, + pub auth_settings: web::Data, + pub connection_protection: ConnectionProtection, + pub cors_settings: Arc, + pub ui_path: Option, + pub ui_runtime_config: UiRuntimeConfig, + ui_status: &'static str, +} + +impl HttpRuntimeState { + pub fn new( + config: &ServerConfig, + components: &ApplicationComponents, + app_context: Arc, + auth_runtime_mode: AuthRuntimeMode, + ) -> Result { + if matches!(auth_runtime_mode, AuthRuntimeMode::Configure) { + configure_auth_runtime(config)?; + } + + let ui_path = config.server.ui_path.clone(); + let ui_status = if kalamdb_api::routes::is_embedded_ui_available() { + "embedded in binary" + } else if ui_path.is_some() { + "filesystem" + } else { + "disabled" + }; + + Ok(Self { + app_context: web::Data::new(app_context), + session_factory: web::Data::new(components.session_factory.clone()), + sql_executor: web::Data::new(components.sql_executor.clone()), + rate_limiter: web::Data::new(components.rate_limiter.clone()), + live_query_manager: web::Data::new(components.live_query_manager.clone()), + user_repo: web::Data::new(components.user_repo.clone()), + connection_registry: web::Data::new(components.connection_registry.clone()), + auth_settings: web::Data::new(config.auth.clone()), + connection_protection: ConnectionProtection::from_server_config(config), + cors_settings: Arc::new(config.security.cors.clone()), + ui_path, + ui_runtime_config: UiRuntimeConfig::new(config.server.configured_public_origin()), + ui_status, + }) + } + + pub fn ui_status(&self) -> &'static str { + self.ui_status + } +} diff --git a/backend/src/lib.rs b/backend/src/lib.rs index 39da654b5..5538e74ee 100644 --- a/backend/src/lib.rs +++ b/backend/src/lib.rs @@ -3,6 +3,8 @@ //! This library exposes server modules for integration testing. pub mod connection_guard; +pub mod http_runtime; pub mod lifecycle; pub mod middleware; pub mod routes; +pub mod startup; diff --git a/backend/src/lifecycle.rs b/backend/src/lifecycle.rs index 8229dbbbe..05112b08f 100644 --- a/backend/src/lifecycle.rs +++ b/backend/src/lifecycle.rs @@ -4,27 +4,36 @@ //! in `main.rs`: bootstrapping databases and services, wiring the HTTP //! server, and coordinating graceful shutdown. -use crate::{middleware, routes}; -use actix_web::{web, App, HttpServer}; +use std::{ + net::{SocketAddr, TcpListener}, + sync::Arc, +}; + +use actix_web::{App, HttpServer}; use anyhow::Result; use kalamdb_api::limiter::RateLimiter; use kalamdb_auth::CachedUsersRepo; use kalamdb_commons::{AuthType, Role, StorageId, UserId}; use kalamdb_configs::ServerConfig; -use kalamdb_core::sql::datafusion_session::DataFusionSessionFactory; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; -use kalamdb_core::sql::executor::SqlExecutor; -use kalamdb_dba::{initialize_dba_namespace, start_stats_recorder}; +use kalamdb_core::sql::{ + datafusion_session::DataFusionSessionFactory, + executor::{handler_registry::HandlerRegistry, SqlExecutor}, +}; +use kalamdb_dba::{ + initialize_dba_namespace, start_startup_stats_snapshot, start_stats_recorder, +}; use kalamdb_jobs::AppContextJobsExt; use kalamdb_live::{ConnectionsManager, LiveQueryManager}; use kalamdb_store::open_storage_backend; use kalamdb_system::providers::storages::models::StorageMode; -use log::debug; -use log::{info, warn}; -use std::net::{SocketAddr, TcpListener}; -use std::sync::Arc; +use log::{debug, info, warn}; use tracing_actix_web::{RootSpanBuilder, TracingLogger}; +use crate::{ + http_runtime::{AuthRuntimeMode, HttpRuntimeState}, + middleware, routes, startup, +}; + /// Resolve the effective number of actix-web worker threads. /// /// Precedence: `KALAMDB_SERVER_WORKERS` env var > server.toml `workers` > auto. @@ -159,11 +168,14 @@ pub async fn prepare_components( ) .await?; if config.retention.enable_dba_stats { - if let Err(error) = start_stats_recorder(app_context.clone()).await { + if let Err(error) = start_stats_recorder(app_context.clone()) { log::error!("Failed to start DBA stats recorder: {}", error); } } else { - log::info!("DBA stats recorder disabled via config (retention.enable_dba_stats = false)"); + start_startup_stats_snapshot(app_context.clone()); + log::info!( + "DBA periodic stats recorder disabled via config (retention.enable_dba_stats = false); recording startup snapshot only" + ); } Ok(ApplicationComponents { @@ -229,6 +241,14 @@ pub async fn bootstrap( if is_cluster_mode { // Multi-node cluster mode let cluster_config = config.cluster.as_ref().unwrap(); + info!( + "Starting cluster node {} in cluster '{}' (rpc={}, api={}, peers={})", + cluster_config.node_id, + cluster_config.cluster_id, + cluster_config.rpc_addr, + cluster_config.api_addr, + cluster_config.peers.len() + ); debug!("╔═══════════════════════════════════════════════════════════════════╗"); debug!("║ Multi-Node Cluster Mode ║"); debug!("╚═══════════════════════════════════════════════════════════════════╝"); @@ -263,7 +283,11 @@ pub async fn bootstrap( if should_bootstrap { if !cluster_config.peers.is_empty() { - info!("Node {} is bootstrap node - initializing cluster", cluster_config.node_id); + info!( + "Node {} is the bootstrap node; initializing cluster membership and admitting \ + configured peers", + cluster_config.node_id + ); } app_context .executor() @@ -271,10 +295,17 @@ pub async fn bootstrap( .await .map_err(|e| anyhow::anyhow!("Failed to initialize cluster: {}", e))?; } else { - info!("Node {} waiting for bootstrap node (node_id=1)", cluster_config.node_id); + info!( + "Node {} is ready and waiting for bootstrap node 1 to admit it to the cluster", + cluster_config.node_id + ); } - info!("✓ Raft cluster started ({:.2}ms)", phase_start.elapsed().as_secs_f64() * 1000.0); + info!( + "Cluster node {} started Raft services in {:.2}ms", + cluster_config.node_id, + phase_start.elapsed().as_secs_f64() * 1000.0 + ); } else { // Single-node mode (lightweight Raft) debug!("Single-node mode - initializing lightweight Raft"); @@ -298,7 +329,8 @@ pub async fn bootstrap( // Ensure Raft appliers are registered after Raft has started. // Some Raft initialization flows may recreate state machines; re-wiring here keeps - // metadata/data replication applying into local providers (system tables, schema registry, etc.). + // metadata/data replication applying into local providers (system tables, schema registry, + // etc.). app_context.wire_raft_appliers(); // NOTE: restore_raft_state_machines() is called LATER after system tables, storages, @@ -311,32 +343,7 @@ pub async fn bootstrap( // Seed default storage if necessary (using SystemTablesRegistry) let phase_start = std::time::Instant::now(); - let storages_provider = app_context.system_tables().storages(); - let existing_storages = storages_provider.scan_all_storages()?; - let storage_count = existing_storages.num_rows(); - - //TODO: Extract as a separate function create_default_storage_if_needed - if storage_count == 0 { - info!("No storages found, creating default 'local' storage"); - let now = chrono::Utc::now().timestamp_millis(); - let default_storage = kalamdb_system::Storage { - storage_id: StorageId::from("local"), - storage_name: "Local Filesystem".to_string(), - description: Some("Default local filesystem storage".to_string()), - storage_type: kalamdb_system::providers::storages::models::StorageType::Filesystem, - base_directory: config.storage.storage_dir().to_string_lossy().into_owned(), - credentials: None, - config_json: None, - shared_tables_template: config.storage.shared_tables_template.clone(), // Need clone for Storage struct - user_tables_template: config.storage.user_tables_template.clone(), // Need clone for Storage struct - created_at: now, - updated_at: now, - }; - storages_provider.insert_storage(default_storage)?; - info!("Default 'local' storage created successfully"); - } else { - debug!("Found {} existing storage(s)", storage_count); - } + startup::create_default_storage_if_needed(config, &app_context)?; debug!( "Storage initialization completed ({:.2}ms)", phase_start.elapsed().as_secs_f64() * 1000.0 @@ -402,25 +409,7 @@ async fn bootstrap_isolated_inner( // apply commands that interact with these providers. // Seed default storage if necessary - let storages_provider = app_context.system_tables().storages(); - let existing_storages = storages_provider.scan_all_storages()?; - if existing_storages.num_rows() == 0 { - let now = chrono::Utc::now().timestamp_millis(); - let default_storage = kalamdb_system::Storage { - storage_id: StorageId::from("local"), - storage_name: "Local Filesystem".to_string(), - description: Some("Default local filesystem storage".to_string()), - storage_type: kalamdb_system::providers::storages::models::StorageType::Filesystem, - base_directory: config.storage.storage_dir().to_string_lossy().into_owned(), - credentials: None, - config_json: None, - shared_tables_template: config.storage.shared_tables_template.clone(), - user_tables_template: config.storage.user_tables_template.clone(), - created_at: now, - updated_at: now, - }; - storages_provider.insert_storage(default_storage)?; - } + startup::create_default_storage_if_needed(config, &app_context)?; let components = prepare_components(config, app_context.clone(), false).await?; @@ -470,7 +459,8 @@ pub async fn run( // Log server configuration for debugging debug!( - "Server config: workers={}, max_connections={}, backlog={}, blocking_threads={}, body_limit={}MB", + "Server config: workers={}, max_connections={}, backlog={}, blocking_threads={}, \ + body_limit={}MB", effective_workers(config.server.workers), config.performance.max_connections, config.performance.backlog, @@ -480,7 +470,8 @@ pub async fn run( if config.rate_limit.enable_connection_protection { debug!( - "Connection protection: max_conn_per_ip={}, max_req_per_ip_per_sec={}, ban_duration={}s", + "Connection protection: max_conn_per_ip={}, max_req_per_ip_per_sec={}, \ + ban_duration={}s", config.rate_limit.max_connections_per_ip, config.rate_limit.max_requests_per_ip_per_sec, config.rate_limit.ban_duration_seconds @@ -490,7 +481,7 @@ pub async fn run( } if config.security.cors.allowed_origins.is_empty() - || config.security.cors.allowed_origins.contains(&"*".to_string()) + || config.security.cors.allowed_origins.iter().any(|origin| origin == "*") { debug!("CORS: allowing any origin"); } else { @@ -500,72 +491,47 @@ pub async fn run( // Get JobsManager for graceful shutdown let job_manager_shutdown = app_context.job_manager(); let shutdown_timeout_secs = config.shutdown.flush.timeout; + let connection_registry_shutdown = components.connection_registry.clone(); - let session_factory = components.session_factory.clone(); - let sql_executor = components.sql_executor.clone(); - let rate_limiter = components.rate_limiter.clone(); - let live_query_manager = components.live_query_manager.clone(); - let user_repo = components.user_repo.clone(); - let connection_registry = components.connection_registry.clone(); - - // Create connection protection middleware from config - let connection_protection = middleware::ConnectionProtection::from_server_config(config); - - // Build CORS middleware from config (uses actix-cors) - let cors_config = config.clone(); - - let app_context_for_handler = app_context.clone(); - let connection_registry_for_handler = connection_registry.clone(); - - // Initialize shared JWT configuration for kalamdb-auth - kalamdb_auth::services::unified::init_auth_config(&config.auth, &config.oauth); - kalamdb_auth::init_trusted_proxy_ranges(&config.security.trusted_proxy_ranges)?; - - // Share auth settings with HTTP handlers - let auth_settings = config.auth.clone(); - let ui_path = config.server.ui_path.clone(); - let ui_runtime_config = - kalamdb_api::ui::UiRuntimeConfig::new(config.server.configured_public_origin()); - - // Log UI serving status - let ui_status = if kalamdb_api::routes::is_embedded_ui_available() { - "embedded in binary" - } else if let Some(ref _path) = ui_path { - "filesystem" - } else { - "disabled" - }; + let http_runtime = HttpRuntimeState::new( + config, + &components, + app_context.clone(), + AuthRuntimeMode::AlreadyConfigured, + )?; + let ui_status = http_runtime.ui_status(); debug!("Admin UI: {} (at /ui)", ui_status); let server = HttpServer::new(move || { + let runtime = http_runtime.clone(); let mut app = App::new() // Connection protection (first middleware - drops bad requests early) - .wrap(connection_protection.clone()) + .wrap(runtime.connection_protection.clone()) // Tracing middleware (creates a root span per HTTP request) // Uses KalamDbRootSpanBuilder to force `parent: None` on each request, // preventing cross-request span contamination in OTel/Jaeger. .wrap(TracingLogger::::new()) - .wrap(middleware::build_cors_from_config(&cors_config)) - .app_data(web::Data::new(app_context_for_handler.clone())) - .app_data(web::Data::new(session_factory.clone())) - .app_data(web::Data::new(sql_executor.clone())) - .app_data(web::Data::new(rate_limiter.clone())) - .app_data(web::Data::new(live_query_manager.clone())) - .app_data(web::Data::new(user_repo.clone())) - .app_data(web::Data::new(connection_registry_for_handler.clone())) - .app_data(web::Data::new(auth_settings.clone())) + .wrap(middleware::build_cors_from_settings(runtime.cors_settings.as_ref())) + .app_data(runtime.app_context.clone()) + .app_data(runtime.session_factory.clone()) + .app_data(runtime.sql_executor.clone()) + .app_data(runtime.rate_limiter.clone()) + .app_data(runtime.live_query_manager.clone()) + .app_data(runtime.user_repo.clone()) + .app_data(runtime.connection_registry.clone()) + .app_data(runtime.auth_settings.clone()) .configure(routes::configure); // Add UI routes - prefer embedded, fallback to filesystem path #[cfg(feature = "embedded-ui")] if kalamdb_api::routes::is_embedded_ui_available() { - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_embedded_ui_routes(cfg, runtime_config.clone()); }); - } else if let Some(ref path) = ui_path { + } else if let Some(ref path) = runtime.ui_path { let path: String = path.clone(); - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_ui_routes( cfg, @@ -576,9 +542,9 @@ pub async fn run( } #[cfg(not(feature = "embedded-ui"))] - if let Some(ref path) = ui_path { + if let Some(ref path) = runtime.ui_path { let path: String = path.clone(); - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_ui_routes( cfg, @@ -647,7 +613,7 @@ pub async fn run( // Gracefully shutdown WebSocket connections info!("Shutting down WebSocket connections..."); - connection_registry.shutdown(std::time::Duration::from_secs(5)).await; + connection_registry_shutdown.shutdown(std::time::Duration::from_secs(5)).await; info!( "Waiting up to {}s for active jobs to complete...", @@ -766,59 +732,47 @@ pub async fn run_for_tests( let listener = TcpListener::bind((bind_ip, 0))?; let bind_addr = listener.local_addr()?; - let session_factory = components.session_factory.clone(); - let sql_executor = components.sql_executor.clone(); - let rate_limiter = components.rate_limiter.clone(); - let live_query_manager = components.live_query_manager.clone(); - let user_repo = components.user_repo.clone(); - let connection_registry = components.connection_registry.clone(); - - let connection_protection = middleware::ConnectionProtection::from_server_config(config); - let cors_config = config.clone(); - - let app_context_for_handler = app_context.clone(); - let connection_registry_for_handler = connection_registry.clone(); - - kalamdb_auth::services::unified::init_auth_config(&config.auth, &config.oauth); - kalamdb_auth::init_trusted_proxy_ranges(&config.security.trusted_proxy_ranges)?; - let auth_settings = config.auth.clone(); - let ui_path = config.server.ui_path.clone(); - let ui_runtime_config = - kalamdb_api::ui::UiRuntimeConfig::new(config.server.configured_public_origin()); + let http_runtime = HttpRuntimeState::new( + config, + &components, + app_context.clone(), + AuthRuntimeMode::Configure, + )?; let server = HttpServer::new(move || { + let runtime = http_runtime.clone(); let mut app = App::new() - .wrap(connection_protection.clone()) + .wrap(runtime.connection_protection.clone()) .wrap(TracingLogger::::new()) - .wrap(middleware::build_cors_from_config(&cors_config)) - .app_data(web::Data::new(app_context_for_handler.clone())) - .app_data(web::Data::new(session_factory.clone())) - .app_data(web::Data::new(sql_executor.clone())) - .app_data(web::Data::new(rate_limiter.clone())) - .app_data(web::Data::new(live_query_manager.clone())) - .app_data(web::Data::new(user_repo.clone())) - .app_data(web::Data::new(connection_registry_for_handler.clone())) - .app_data(web::Data::new(auth_settings.clone())) + .wrap(middleware::build_cors_from_settings(runtime.cors_settings.as_ref())) + .app_data(runtime.app_context.clone()) + .app_data(runtime.session_factory.clone()) + .app_data(runtime.sql_executor.clone()) + .app_data(runtime.rate_limiter.clone()) + .app_data(runtime.live_query_manager.clone()) + .app_data(runtime.user_repo.clone()) + .app_data(runtime.connection_registry.clone()) + .app_data(runtime.auth_settings.clone()) .configure(routes::configure); #[cfg(feature = "embedded-ui")] if kalamdb_api::routes::is_embedded_ui_available() { - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_embedded_ui_routes(cfg, runtime_config.clone()); }); - } else if let Some(ref path) = ui_path { + } else if let Some(ref path) = runtime.ui_path { let path: String = path.clone(); - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_ui_routes(cfg, &path, runtime_config.clone()); }); } #[cfg(not(feature = "embedded-ui"))] - if let Some(ref path) = ui_path { + if let Some(ref path) = runtime.ui_path { let path: String = path.clone(); - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_ui_routes(cfg, &path, runtime_config.clone()); }); @@ -863,59 +817,47 @@ pub async fn run_detached( ) -> Result { let bind_addr = format!("{}:{}", config.server.host, config.server.port); - let session_factory = components.session_factory.clone(); - let sql_executor = components.sql_executor.clone(); - let rate_limiter = components.rate_limiter.clone(); - let live_query_manager = components.live_query_manager.clone(); - let user_repo = components.user_repo.clone(); - let connection_registry = components.connection_registry.clone(); - - let connection_protection = middleware::ConnectionProtection::from_server_config(config); - let cors_config = config.clone(); - - let app_context_for_handler = app_context.clone(); - let connection_registry_for_handler = connection_registry.clone(); - - kalamdb_auth::services::unified::init_auth_config(&config.auth, &config.oauth); - kalamdb_auth::init_trusted_proxy_ranges(&config.security.trusted_proxy_ranges)?; - let auth_settings = config.auth.clone(); - let ui_path = config.server.ui_path.clone(); - let ui_runtime_config = - kalamdb_api::ui::UiRuntimeConfig::new(config.server.configured_public_origin()); + let http_runtime = HttpRuntimeState::new( + config, + &components, + app_context.clone(), + AuthRuntimeMode::Configure, + )?; let server = HttpServer::new(move || { + let runtime = http_runtime.clone(); let mut app = App::new() - .wrap(connection_protection.clone()) + .wrap(runtime.connection_protection.clone()) .wrap(TracingLogger::::new()) - .wrap(middleware::build_cors_from_config(&cors_config)) - .app_data(web::Data::new(app_context_for_handler.clone())) - .app_data(web::Data::new(session_factory.clone())) - .app_data(web::Data::new(sql_executor.clone())) - .app_data(web::Data::new(rate_limiter.clone())) - .app_data(web::Data::new(live_query_manager.clone())) - .app_data(web::Data::new(user_repo.clone())) - .app_data(web::Data::new(connection_registry_for_handler.clone())) - .app_data(web::Data::new(auth_settings.clone())) + .wrap(middleware::build_cors_from_settings(runtime.cors_settings.as_ref())) + .app_data(runtime.app_context.clone()) + .app_data(runtime.session_factory.clone()) + .app_data(runtime.sql_executor.clone()) + .app_data(runtime.rate_limiter.clone()) + .app_data(runtime.live_query_manager.clone()) + .app_data(runtime.user_repo.clone()) + .app_data(runtime.connection_registry.clone()) + .app_data(runtime.auth_settings.clone()) .configure(routes::configure); #[cfg(feature = "embedded-ui")] if kalamdb_api::routes::is_embedded_ui_available() { - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_embedded_ui_routes(cfg, runtime_config.clone()); }); - } else if let Some(ref path) = ui_path { + } else if let Some(ref path) = runtime.ui_path { let path: String = path.clone(); - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_ui_routes(cfg, &path, runtime_config.clone()); }); } #[cfg(not(feature = "embedded-ui"))] - if let Some(ref path) = ui_path { + if let Some(ref path) = runtime.ui_path { let path: String = path.clone(); - let runtime_config = ui_runtime_config.clone(); + let runtime_config = runtime.ui_runtime_config.clone(); app = app.configure(move |cfg| { kalamdb_api::routes::configure_ui_routes(cfg, &path, runtime_config.clone()); }); @@ -1006,13 +948,14 @@ async fn create_default_system_user( let created_at = chrono::Utc::now().timestamp_millis(); // Check for root password from environment variable or config file. - // Priority: KALAMDB_ROOT_PASSWORD env var > config auth.root_password > empty (localhost-only) + // Priority: KALAMDB_ROOT_PASSWORD env var > config auth.root_password > empty + // (localhost-only) let root_password_from_env = if use_root_password_env { std::env::var("KALAMDB_ROOT_PASSWORD").ok().filter(|p| !p.is_empty()) } else { None }; - let root_password_from_config = config_root_password.clone().filter(|p| !p.is_empty()); + let root_password_from_config = config_root_password.filter(|p| !p.is_empty()); let root_password = root_password_from_env.or(root_password_from_config); let password_hash = match root_password { @@ -1088,24 +1031,24 @@ async fn create_default_system_user( // users_provider.get_user_by_username(AuthConstants::DEFAULT_SYSTEM_USERNAME) // { // if user.password_hash.is_empty() { -// // Root user has no password - this is secure for localhost-only but warn about limitations -// warn!("╔═══════════════════════════════════════════════════════════════════╗"); -// warn!("║ ⚠️ SECURITY NOTICE ⚠️ ║"); -// warn!("╠═══════════════════════════════════════════════════════════════════╣"); -// warn!("║ ║"); -// warn!("║ Root user has NO PASSWORD (localhost-only access enabled) ║"); -// warn!("║ ║"); -// warn!("║ SECURITY ENFORCEMENT: ║"); -// warn!("║ • Remote authentication is BLOCKED for users with no password ║"); -// warn!("║ • Root can only connect from localhost (127.0.0.1) ║"); -// warn!("║ • This configuration is secure by design ║"); -// warn!("║ ║"); -// warn!("║ TO ENABLE REMOTE ACCESS: ║"); -// warn!("║ Set a strong password for the root user: ║"); -// warn!("║ ALTER USER root SET PASSWORD 'strong-password-here'; ║"); -// warn!("║ ║"); -// warn!( -// "║ Note: allow_remote_access config is currently: {} ║", +// // Root user has no password - this is secure for localhost-only but warn about +// limitations +// warn!("╔═══════════════════════════════════════════════════════════════════╗"); +// warn!("║ ⚠️ SECURITY NOTICE ⚠️ ║"); +// warn!("╠═══════════════════════════════════════════════════════════════════╣"); +// warn!("║ ║"); +// warn!("║ Root user has NO PASSWORD (localhost-only access enabled) ║"); +// warn!("║ ║"); +// warn!("║ SECURITY ENFORCEMENT: ║"); +// warn!("║ • Remote authentication is BLOCKED for users with no password ║"); +// warn!("║ • Root can only connect from localhost (127.0.0.1) ║"); +// warn!("║ • This configuration is secure by design ║"); +// warn!("║ ║"); +// warn!("║ TO ENABLE REMOTE ACCESS: ║"); +// warn!("║ Set a strong password for the root user: ║"); +// warn!("║ ALTER USER root SET PASSWORD 'strong-password-here'; ║"); +// warn!("║ ║"); +// warn!( "║ Note: allow_remote_access config is currently: {} ║", // if config.auth.allow_remote_access { // "ENABLED " // } else { diff --git a/backend/src/logging.rs b/backend/src/logging.rs index 9492aa73b..ed46a490d 100644 --- a/backend/src/logging.rs +++ b/backend/src/logging.rs @@ -5,13 +5,15 @@ // `log::*` macro calls and routes them through the tracing subscriber so // span context is preserved end-to-end. -use std::collections::HashMap; -use std::fs::{self, OpenOptions}; -use std::path::Path; #[cfg(feature = "otel")] use std::sync::{Mutex, OnceLock}; #[cfg(feature = "otel")] use std::time::Duration; +use std::{ + collections::HashMap, + fs::{self, OpenOptions}, + path::Path, +}; use kalamdb_configs::config::types::OtlpSettings; #[cfg(feature = "otel")] @@ -24,10 +26,9 @@ use opentelemetry_sdk::trace::SdkTracerProvider; use opentelemetry_sdk::Resource; #[cfg(feature = "otel")] use tracing_subscriber::filter::filter_fn; -use tracing_subscriber::fmt::format::FmtSpan; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::util::SubscriberInitExt; -use tracing_subscriber::{EnvFilter, Layer}; +use tracing_subscriber::{ + fmt::format::FmtSpan, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer, +}; #[cfg(feature = "otel")] static OTEL_TRACER_PROVIDER: OnceLock>> = OnceLock::new(); diff --git a/backend/src/main.rs b/backend/src/main.rs index 82b0f7629..194164701 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -1,5 +1,4 @@ // KalamDB Server entrypoint -//! //! The heavy lifting (initialization, middleware wiring, graceful shutdown) //! lives in dedicated modules so this file remains a thin orchestrator. @@ -7,13 +6,19 @@ use kalamdb_core::metrics::{BUILD_DATE, SERVER_VERSION}; mod logging; +use std::{ + collections::HashSet, + net::{SocketAddr, TcpListener, ToSocketAddrs}, + path::{Path, PathBuf}, +}; + use anyhow::{anyhow, Result}; use kalamdb_configs::ServerConfig; -use kalamdb_server::lifecycle::{bootstrap, run}; +use kalamdb_server::{ + lifecycle::{bootstrap, run}, + startup::configure_auth_runtime, +}; use log::info; -use std::collections::HashSet; -use std::net::{SocketAddr, TcpListener, ToSocketAddrs}; -use std::path::{Path, PathBuf}; fn resolve_bind_addrs(addr: &str, label: &str) -> Result> { let addrs: Vec = addr @@ -80,7 +85,8 @@ fn validate_startup_ports(config: &ServerConfig) -> Result<()> { if !http_addrs.is_disjoint(&rpc_addrs) { return Err(anyhow!( - "Invalid configuration: HTTP '{}' and Raft RPC '{}' resolve to at least one identical socket address. Configure distinct ports.", + "Invalid configuration: HTTP '{}' and Raft RPC '{}' resolve to at least one \ + identical socket address. Configure distinct ports.", http_addr, cluster.rpc_addr )); @@ -185,10 +191,12 @@ fn load_server_config(config_path: &Path) -> ServerConfig { if config.should_warn_on_non_local_http_wildcard_cors() { eprintln!( - "⚠️ SECURITY WARNING: Non-localhost HTTP exposure is using security.cors.allowed_origins = [\"*\"]" + "⚠️ SECURITY WARNING: Non-localhost HTTP exposure is using \ + security.cors.allowed_origins = [\"*\"]" ); eprintln!( - "⚠️ Any browser origin can reach this server. Replace '*' with an explicit origin list before production use." + "⚠️ Any browser origin can reach this server. Replace '*' with an explicit origin \ + list before production use." ); } @@ -240,8 +248,7 @@ async fn async_main(config: ServerConfig) -> Result<()> { // JWT CONFIG INITIALIZATION // ======================================================================== // Initialize auth JWT config from server.toml (after env overrides are applied). - kalamdb_auth::services::unified::init_auth_config(&config.auth, &config.oauth); - kalamdb_auth::init_trusted_proxy_ranges(&config.security.trusted_proxy_ranges)?; + configure_auth_runtime(&config)?; // ======================================================================== // Security: Validate critical configuration at startup @@ -341,9 +348,7 @@ async fn async_main(config: ServerConfig) -> Result<()> { #[cfg(test)] mod tests { - use std::alloc::Layout; - use std::hint::black_box; - use std::time::Instant; + use std::{alloc::Layout, hint::black_box, time::Instant}; use kalamdb_observability::{collect_runtime_metrics, force_allocator_collection}; @@ -478,7 +483,8 @@ mod tests { assert!( after.memory_bytes.unwrap_or_default() <= before.memory_bytes.unwrap_or_default() + allowed_growth, - "runtime metrics collection retained too much process memory: before={} after={} source={}", + "runtime metrics collection retained too much process memory: before={} after={} \ + source={}", before.memory_bytes.unwrap_or_default(), after.memory_bytes.unwrap_or_default(), after.memory_usage_source, diff --git a/backend/src/middleware.rs b/backend/src/middleware.rs index dffb8a744..cfc2fa962 100644 --- a/backend/src/middleware.rs +++ b/backend/src/middleware.rs @@ -16,32 +16,40 @@ //! - Request body size limits //! - Automatic IP banning for persistent abusers -use crate::connection_guard::{ConnectionGuard, ConnectionGuardResult}; +use std::{ + future::{ready, Ready}, + net::IpAddr, + sync::Arc, +}; + use actix_cors::Cors; -use actix_web::body::{BoxBody, EitherBody}; -use actix_web::dev::{forward_ready, Service, ServiceRequest, ServiceResponse, Transform}; -use actix_web::http::{header::HeaderName, Method, StatusCode}; -use actix_web::{Error, HttpResponse}; +use actix_web::{ + body::{BoxBody, EitherBody}, + dev::{forward_ready, Service, ServiceRequest, ServiceResponse, Transform}, + http::{header::HeaderName, Method, StatusCode}, + Error, HttpResponse, +}; use futures_util::future::LocalBoxFuture; use kalamdb_auth::extract_client_ip_addr_secure; -use kalamdb_configs::{RateLimitSettings, ServerConfig}; +use kalamdb_configs::{CorsSettings, RateLimitSettings, ServerConfig}; use log::warn; -use std::future::{ready, Ready}; -use std::net::IpAddr; -use std::sync::Arc; + +use crate::connection_guard::{ConnectionGuard, ConnectionGuardResult}; /// Build CORS middleware from server configuration using actix-cors. /// /// Maps all CorsSettings options to actix-cors builder methods. /// See: https://docs.rs/actix-cors/latest/actix_cors/struct.Cors.html pub fn build_cors_from_config(config: &ServerConfig) -> Cors { - let cors_config = &config.security.cors; + build_cors_from_settings(&config.security.cors) +} +pub fn build_cors_from_settings(cors_config: &CorsSettings) -> Cors { let mut cors = Cors::default(); // Configure allowed origins if cors_config.allowed_origins.is_empty() - || cors_config.allowed_origins.contains(&"*".to_string()) + || cors_config.allowed_origins.iter().any(|origin| origin == "*") { cors = cors.allow_any_origin(); } else { @@ -58,7 +66,7 @@ pub fn build_cors_from_config(config: &ServerConfig) -> Cors { } // Configure allowed headers - if cors_config.allowed_headers.contains(&"*".to_string()) { + if cors_config.allowed_headers.iter().any(|header| header == "*") { cors = cors.allow_any_header(); } else { let headers: Vec = @@ -88,9 +96,12 @@ pub fn build_cors_from_config(config: &ServerConfig) -> Cors { #[cfg(test)] mod tests { + use actix_web::{ + http::{header, Method}, + test, web, App, HttpResponse, + }; + use super::*; - use actix_web::http::{header, Method}; - use actix_web::{test, web, App, HttpResponse}; #[actix_web::test] async fn preflight_login_request_allows_vite_chat_origin() { diff --git a/backend/src/startup.rs b/backend/src/startup.rs new file mode 100644 index 000000000..a0dd69d5e --- /dev/null +++ b/backend/src/startup.rs @@ -0,0 +1,51 @@ +//! Startup helpers shared by the production binary and test server wiring. + +use std::sync::Arc; + +use anyhow::Result; +use kalamdb_commons::StorageId; +use kalamdb_configs::ServerConfig; +use kalamdb_core::app_context::AppContext; +use kalamdb_system::{providers::storages::models::StorageType, Storage}; +use log::{debug, info}; + +/// Initialize global auth/proxy runtime state from the already-finalized config. +pub fn configure_auth_runtime(config: &ServerConfig) -> Result<()> { + kalamdb_auth::services::unified::init_auth_config(&config.auth, &config.oauth); + kalamdb_auth::init_trusted_proxy_ranges(&config.security.trusted_proxy_ranges)?; + Ok(()) +} + +/// Ensure the default local storage row exists without materializing an Arrow batch. +pub fn create_default_storage_if_needed( + config: &ServerConfig, + app_context: &Arc, +) -> Result { + let storages_provider = app_context.system_tables().storages(); + let storage_count = storages_provider.list_storages()?.len(); + + if storage_count > 0 { + debug!("Found {} existing storage(s)", storage_count); + return Ok(storage_count); + } + + info!("No storages found, creating default 'local' storage"); + let now = chrono::Utc::now().timestamp_millis(); + let default_storage = Storage { + storage_id: StorageId::from("local"), + storage_name: "Local Filesystem".to_string(), + description: Some("Default local filesystem storage".to_string()), + storage_type: StorageType::Filesystem, + base_directory: config.storage.storage_dir().to_string_lossy().into_owned(), + credentials: None, + config_json: None, + shared_tables_template: config.storage.shared_tables_template.clone(), + user_tables_template: config.storage.user_tables_template.clone(), + created_at: now, + updated_at: now, + }; + storages_provider.insert_storage(default_storage)?; + info!("Default 'local' storage created successfully"); + + Ok(0) +} diff --git a/backend/tests/common/testserver/auth_helper.rs b/backend/tests/common/testserver/auth_helper.rs index 71e8a6da3..6dc5dc47c 100644 --- a/backend/tests/common/testserver/auth_helper.rs +++ b/backend/tests/common/testserver/auth_helper.rs @@ -9,14 +9,11 @@ use jsonwebtoken::{encode, Algorithm, EncodingKey, Header}; use kalamdb_commons::{AuthType, Role, StorageId, UserId}; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::context::ExecutionContext; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::User; +use kalamdb_core::{error::KalamDbError, sql::context::ExecutionContext}; +use kalamdb_system::{providers::storages::models::StorageMode, User}; use serde::{Deserialize, Serialize}; -use super::consolidated_helpers::ensure_user_exists; -use super::http_server::HttpTestServer; +use super::{consolidated_helpers::ensure_user_exists, http_server::HttpTestServer}; /// Create a test user with password authentication /// @@ -69,8 +66,7 @@ pub async fn create_test_user( let users_provider = server.app_context.system_tables().users(); if let Ok(Some(mut existing)) = users_provider.get_user_by_id(&user_id) { - existing.password_hash = - bcrypt::hash(password, 4).expect("Failed to hash password"); + existing.password_hash = bcrypt::hash(password, 4).expect("Failed to hash password"); existing.role = role; existing.email = Some(format!("{}@example.com", username)); existing.auth_type = AuthType::Password; @@ -89,8 +85,8 @@ pub async fn create_test_user( if let Err(e) = &result { if matches!(e, KalamDbError::AlreadyExists(_)) { if let Ok(Some(mut existing)) = users_provider.get_user_by_id(&user_id) { - existing.password_hash = bcrypt::hash(password, 4) - .expect("Failed to hash password"); + existing.password_hash = + bcrypt::hash(password, 4).expect("Failed to hash password"); existing.role = role; existing.email = Some(format!("{}@example.com", username)); existing.auth_type = AuthType::Password; diff --git a/backend/tests/common/testserver/cluster.rs b/backend/tests/common/testserver/cluster.rs index 3fd941902..ea50605cf 100644 --- a/backend/tests/common/testserver/cluster.rs +++ b/backend/tests/common/testserver/cluster.rs @@ -1,10 +1,11 @@ //! Cluster test server for testing multiple node scenarios -use super::http_server::HttpTestServer; use anyhow::Result; use kalam_client::models::QueryResponse; use rand::RngExt; use tokio::sync::Mutex; +use super::http_server::HttpTestServer; + /// A test cluster with 3 nodes for testing replication and consistency. pub struct ClusterTestServer { /// Three independent server instances that form a cluster @@ -149,7 +150,8 @@ impl ClusterTestServer { if first_maps.len() != result_maps.len() { eprintln!( - "❌ Node consistency failed: Node 0 result set {} has {} rows, Node {} has {}", + "❌ Node consistency failed: Node 0 result set {} has {} rows, Node {} \ + has {}", j, first_maps.len(), i, diff --git a/backend/tests/common/testserver/consolidated_helpers.rs b/backend/tests/common/testserver/consolidated_helpers.rs index 9070cf674..b712456fc 100644 --- a/backend/tests/common/testserver/consolidated_helpers.rs +++ b/backend/tests/common/testserver/consolidated_helpers.rs @@ -13,13 +13,18 @@ //! - **Subscription Helpers**: WebSocket subscription utilities //! - **Parallel Testing**: Multi-user and concurrency helpers +use std::{ + collections::{HashMap, HashSet}, + path::{Path, PathBuf}, + time::Duration, +}; + use anyhow::Result; -use kalam_client::models::{ChangeEvent, QueryResponse, ResponseStatus}; -use kalam_client::{KalamCellValue, SubscriptionManager}; +use kalam_client::{ + models::{ChangeEvent, QueryResponse, ResponseStatus}, + KalamCellValue, SubscriptionManager, +}; use kalamdb_commons::Role; -use std::collections::{HashMap, HashSet}; -use std::path::{Path, PathBuf}; -use std::time::Duration; use tokio::time::{timeout, Instant}; use super::http_server::HttpTestServer; diff --git a/backend/tests/common/testserver/fixtures.rs b/backend/tests/common/testserver/fixtures.rs index 31ac824d2..f46fe05e5 100644 --- a/backend/tests/common/testserver/fixtures.rs +++ b/backend/tests/common/testserver/fixtures.rs @@ -15,27 +15,31 @@ //! #[actix_web::test] //! async fn test_example() { //! let server = TestServer::new_shared().await; -//! +//! //! // Create namespace //! fixtures::create_namespace(&server, "app").await; -//! +//! //! // Create user table with sample data //! fixtures::create_messages_table(&server, "app").await; //! fixtures::insert_sample_messages(&server, "app", "user123", 10).await; -//! +//! //! // Run your test... //! } //! ``` -use super::TestServer; +use std::{ + sync::atomic::{AtomicUsize, Ordering}, + time::{Duration, Instant}, +}; + use anyhow::Result; use kalam_client::models::{QueryResponse, QueryResult, ResponseStatus}; use kalamdb_commons::models::NamespaceId; use serde_json::json; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::time::{Duration, Instant}; use tokio::time::sleep; +use super::TestServer; + static UNIQUE_NS_COUNTER: AtomicUsize = AtomicUsize::new(0); fn unique_namespace(prefix: &str) -> String { @@ -59,7 +63,9 @@ fn unique_namespace(prefix: &str) -> String { /// # Example /// /// ```no_run -/// fixtures::execute_sql(&server, "CREATE TABLE test.messages (...)", "user1").await.unwrap(); +/// fixtures::execute_sql(&server, "CREATE TABLE test.messages (...)", "user1") +/// .await +/// .unwrap(); /// ``` pub async fn execute_sql(server: &TestServer, sql: &str, user_id: &str) -> Result { Ok(server.execute_sql_as_user(sql, user_id).await) @@ -565,7 +571,8 @@ mod tests { let response = create_messages_table(&server, "app", Some("user123")).await; if response.status != ResponseStatus::Success { - // In shared TestServer runs, provider may already be registered; accept idempotent already-exists + // In shared TestServer runs, provider may already be registered; accept idempotent + // already-exists let msg = response.error.as_ref().map(|e| e.message.clone()).unwrap_or_default(); assert!( msg.contains("already exists"), diff --git a/backend/tests/common/testserver/flush.rs b/backend/tests/common/testserver/flush.rs index 0223e9b1d..a60f4bd24 100644 --- a/backend/tests/common/testserver/flush.rs +++ b/backend/tests/common/testserver/flush.rs @@ -1,10 +1,15 @@ +use std::path::{Path, PathBuf}; + use anyhow::Result; use kalam_client::models::{QueryResponse, ResponseStatus}; use kalamdb_commons::{NamespaceId, TableId, TableName}; -use kalamdb_jobs::executors::flush::{FlushExecutor, FlushParams}; -use kalamdb_jobs::executors::{JobContext, JobExecutor}; -use kalamdb_jobs::AppContextJobsExt; -use std::path::{Path, PathBuf}; +use kalamdb_jobs::{ + executors::{ + flush::{FlushExecutor, FlushParams}, + JobContext, JobExecutor, + }, + AppContextJobsExt, +}; use tokio::time::{sleep, Duration, Instant}; use super::http_server::HttpTestServer; @@ -68,8 +73,7 @@ async fn wait_for_flush_job_by_id( loop { let resp = server .execute_sql(&format!( - "SELECT status, message FROM system.jobs \ - WHERE job_id = '{}' LIMIT 1", + "SELECT status, message FROM system.jobs WHERE job_id = '{}' LIMIT 1", escaped_job_id )) .await?; @@ -111,9 +115,8 @@ async fn wait_for_flush_job_by_idempotency_key( loop { let resp = server .execute_sql(&format!( - "SELECT job_id, status, message FROM system.jobs \ - WHERE job_type = 'flush' AND idempotency_key = '{}' \ - ORDER BY created_at DESC LIMIT 1", + "SELECT job_id, status, message FROM system.jobs WHERE job_type = 'flush' AND \ + idempotency_key = '{}' ORDER BY created_at DESC LIMIT 1", escaped_key )) .await?; @@ -161,9 +164,8 @@ pub async fn wait_for_flush_jobs_settled( loop { let resp = server .execute_sql( - "SELECT status, parameters \ - FROM system.jobs WHERE job_type = 'flush' \ - ORDER BY created_at DESC LIMIT 500", + "SELECT status, parameters FROM system.jobs WHERE job_type = 'flush' ORDER BY \ + created_at DESC LIMIT 500", ) .await?; @@ -219,17 +221,16 @@ pub async fn wait_for_flush_jobs_settled( if Instant::now() >= deadline { if matching_count > 0 { println!( - "Timed out waiting for flush jobs to settle for {}.{} (matching_count={}, statuses={:?}) - proceeding", - ns, - table, - matching_count, - status_samples + "Timed out waiting for flush jobs to settle for {}.{} (matching_count={}, \ + statuses={:?}) - proceeding", + ns, table, matching_count, status_samples ); return Ok(()); } anyhow::bail!( - "Timed out waiting for flush jobs to settle for {}.{} (matching_count={}, statuses={:?})", + "Timed out waiting for flush jobs to settle for {}.{} (matching_count={}, \ + statuses={:?})", ns, table, matching_count, diff --git a/backend/tests/common/testserver/flush_helpers.rs b/backend/tests/common/testserver/flush_helpers.rs index ae7aa9a9f..383a0ffef 100644 --- a/backend/tests/common/testserver/flush_helpers.rs +++ b/backend/tests/common/testserver/flush_helpers.rs @@ -7,16 +7,20 @@ //! - Checking Parquet file existence //! - Verifying job completion metrics -use super::TestServer; +use std::{ + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; + use kalam_client::models::ResponseStatus; use kalamdb_commons::models::{NamespaceId, StorageId, TableId, TableName}; use kalamdb_core::manifest::{FlushJobResult, SharedTableFlushJob, TableFlush, UserTableFlushJob}; use kalamdb_tables::new_indexed_user_table_store; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; use tokio::time::sleep; +use super::TestServer; + /// Execute a flush job synchronously for testing /// /// Calls flush_job.execute() directly to get immediate results. @@ -72,7 +76,8 @@ pub async fn execute_flush_synchronously( .map(|f| f.name().clone()) .unwrap_or_else(|| "id".to_string()); - // Construct a per-table UserTableIndexedStore directly (avoids reaching into provider internals) + // Construct a per-table UserTableIndexedStore directly (avoids reaching into provider + // internals) let user_table_store = Arc::new(new_indexed_user_table_store( server.app_context.storage_backend(), &table_id, @@ -186,7 +191,10 @@ pub async fn wait_for_flush_job_completion( if response.status != ResponseStatus::Success { // system.jobs might not be accessible in some test setups // Just wait the full duration and return success - println!(" ℹ Cannot query system.jobs (not an error in test env), waiting for job to execute..."); + println!( + " ℹ Cannot query system.jobs (not an error in test env), waiting for job to \ + execute..." + ); sleep(max_wait).await; return Ok("Job executed (system.jobs not queryable in test)".to_string()); } @@ -229,7 +237,9 @@ pub async fn wait_for_flush_job_completion( let duration_ms = end - start; if duration_ms == 0 { return Err(format!( - "Job {} completed but duration_ms = 0 (started_at: {}, finished_at: {}), which indicates a failure or instant completion bug", + "Job {} completed but duration_ms = 0 (started_at: {}, \ + finished_at: {}), which indicates a failure or instant \ + completion bug", job_id, start, end )); } diff --git a/backend/tests/common/testserver/http_server.rs b/backend/tests/common/testserver/http_server.rs index 82e299a4f..ed4ab9e84 100644 --- a/backend/tests/common/testserver/http_server.rs +++ b/backend/tests/common/testserver/http_server.rs @@ -1,22 +1,27 @@ #![allow(unused_imports)] -use super::cluster::ClusterTestServer; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, + sync::{mpsc, Arc}, + thread, +}; + use anyhow::{Context, Result}; -use kalam_client::models::{QueryResponse, ResponseStatus}; -use kalam_client::{AuthProvider, KalamLinkClient, KalamLinkTimeouts}; +use kalam_client::{ + models::{QueryResponse, ResponseStatus}, + AuthProvider, KalamLinkClient, KalamLinkTimeouts, +}; use kalamdb_commons::{NamespaceId, Role, UserId}; use kalamdb_core::app_context::AppContext; use once_cell::sync::{Lazy, OnceCell as SyncOnceCell}; use serde_json::Value as JsonValue; -use std::collections::HashMap; -use std::path::Path; -use std::path::PathBuf; -use std::sync::mpsc; -use std::sync::Arc; -use std::thread; -use tokio::sync::Mutex; -use tokio::sync::OnceCell; -use tokio::time::{sleep, Duration, Instant}; +use tokio::{ + sync::{Mutex, OnceCell}, + time::{sleep, Duration, Instant}, +}; + +use super::cluster::ClusterTestServer; static GLOBAL_HTTP_TEST_RUNTIME: SyncOnceCell> = SyncOnceCell::new(); @@ -331,7 +336,11 @@ impl HttpTestServer { password: &str, role: &Role, ) -> Result { - let check_sql = format!("SELECT user_id, COUNT(*) AS user_count FROM system.users WHERE user_id = '{}' GROUP BY user_id", username); + let check_sql = format!( + "SELECT user_id, COUNT(*) AS user_count FROM system.users WHERE user_id = '{}' GROUP \ + BY user_id", + username + ); let resp = self.execute_sql(&check_sql).await?; // Check if user exists and get their user_id @@ -436,13 +445,16 @@ impl HttpTestServer { }; // Try to get cached user_id, fall back to username - let user_id = self.get_cached_user_id(username) - .unwrap_or_else(|| { - if username != "root" { - eprintln!("WARNING: link_client('{}') called without cached user_id. USER table RLS may not work correctly.", username); - } - username.to_string() - }); + let user_id = self.get_cached_user_id(username).unwrap_or_else(|| { + if username != "root" { + eprintln!( + "WARNING: link_client('{}') called without cached user_id. USER table RLS may \ + not work correctly.", + username + ); + } + username.to_string() + }); self.link_client_with_id(&user_id, username, &role) } @@ -470,7 +482,8 @@ impl HttpTestServer { self.execute_sql_with_auth_and_params(sql, auth_header, Vec::new()).await } - /// Execute SQL (optionally parameterized) via the real HTTP API using an explicit `Authorization` header. + /// Execute SQL (optionally parameterized) via the real HTTP API using an explicit + /// `Authorization` header. pub async fn execute_sql_with_auth_and_params( &self, sql: &str, @@ -644,8 +657,10 @@ impl HttpTestServer { let probe = format!("SELECT 1 AS ok FROM {}.{} LIMIT 1", namespace_id.as_str(), table_name); let mut last_error: Option = None; let system_probe = format!( - "SELECT COUNT(*) AS cnt FROM system.schemas WHERE namespace_id='{}' AND table_name='{}'", - namespace_id.as_str(), table_name + "SELECT COUNT(*) AS cnt FROM system.schemas WHERE namespace_id='{}' AND \ + table_name='{}'", + namespace_id.as_str(), + table_name ); let mut last_system_cnt: Option = None; @@ -705,7 +720,8 @@ impl HttpTestServer { if Instant::now() >= deadline { return Err(anyhow::anyhow!( - "CREATE TABLE did not become queryable in time ({}.{}): last_error={:?} system.schemas_cnt={:?}", + "CREATE TABLE did not become queryable in time ({}.{}): last_error={:?} \ + system.schemas_cnt={:?}", namespace_id.as_str(), table_name, last_error, @@ -909,7 +925,8 @@ async fn wait_for_cluster_ready(nodes: &[HttpTestServer]) -> Result<()> { if Instant::now() >= deadline { return Err(anyhow::anyhow!( - "Timed out waiting for 3-node cluster to converge (meta_leaders={}, shared_leaders={})", + "Timed out waiting for 3-node cluster to converge (meta_leaders={}, \ + shared_leaders={})", meta_leader_count, shared_leader_count )); @@ -1063,9 +1080,11 @@ async fn start_cluster_server() -> Result { // All other tests should use get_global_server() for better performance. // ============================================================================ -/// Run a test closure against a freshly started HTTP test server (with config override), then shut it down. +/// Run a test closure against a freshly started HTTP test server (with config override), then shut +/// it down. /// -/// **DEPRECATED**: Only use this if you need a config override. Otherwise use `get_global_server()`. +/// **DEPRECATED**: Only use this if you need a config override. Otherwise use +/// `get_global_server()`. #[allow(dead_code)] pub async fn with_http_test_server_config( override_config: impl FnOnce(&mut kalamdb_configs::ServerConfig), diff --git a/backend/tests/common/testserver/query_helpers.rs b/backend/tests/common/testserver/query_helpers.rs index 3aed58b78..f6a453666 100644 --- a/backend/tests/common/testserver/query_helpers.rs +++ b/backend/tests/common/testserver/query_helpers.rs @@ -5,12 +5,15 @@ //! possible, and adds test-specific utilities for common patterns. //! //! # Core Principle -//! - Use `QueryResponse` built-in methods: `rows_as_maps()`, `first_row_as_map()`, `get_i64()`, `get_string()` +//! - Use `QueryResponse` built-in methods: `rows_as_maps()`, `first_row_as_map()`, `get_i64()`, +//! `get_string()` //! - Add test-specific helpers here for common assertions and patterns //! - Keep all query helpers in this single file -use kalam_client::models::{QueryResponse, ResponseStatus}; -use kalam_client::KalamCellValue; +use kalam_client::{ + models::{QueryResponse, ResponseStatus}, + KalamCellValue, +}; use serde_json::Value as JsonValue; /// Get a count value from a COUNT(*) query response safely. diff --git a/backend/tests/common/testserver/query_result_ext.rs b/backend/tests/common/testserver/query_result_ext.rs index 9e1a67666..a105b1434 100644 --- a/backend/tests/common/testserver/query_result_ext.rs +++ b/backend/tests/common/testserver/query_result_ext.rs @@ -1,7 +1,7 @@ -use kalam_client::models::QueryResult; -use kalam_client::KalamCellValue; use std::collections::HashMap; +use kalam_client::{models::QueryResult, KalamCellValue}; + /// Extension trait for `QueryResult` to provide test-friendly row access. #[allow(dead_code)] pub trait QueryResultTestExt { diff --git a/backend/tests/common/testserver/test_server.rs b/backend/tests/common/testserver/test_server.rs index d47b49626..21975962b 100644 --- a/backend/tests/common/testserver/test_server.rs +++ b/backend/tests/common/testserver/test_server.rs @@ -3,16 +3,16 @@ //! Provides a TestServer API similar to the legacy in-process test server, //! but backed by the shared HttpTestServer instance. -use super::http_server::{self, HttpTestServer}; +use std::sync::Arc; + use datafusion::prelude::SessionContext; use kalam_client::models::{ErrorDetail, QueryResponse, ResponseStatus}; use kalamdb_auth::{CoreUsersRepo, UserRepository}; -use kalamdb_commons::constants::AuthConstants; -use kalamdb_commons::{AuthType, Role, StorageId, UserId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::sql::executor::SqlExecutor; +use kalamdb_commons::{constants::AuthConstants, AuthType, Role, StorageId, UserId}; +use kalamdb_core::{app_context::AppContext, sql::executor::SqlExecutor}; use kalamdb_system::providers::storages::models::StorageMode; -use std::sync::Arc; + +use super::http_server::{self, HttpTestServer}; /// Test server instance backed by the shared HTTP server. #[derive(Clone)] @@ -49,8 +49,7 @@ impl TestServer { app_context.system_tables().users().get_user_by_id(&system_user_id) { if user.password_hash.is_empty() { - user.password_hash = - bcrypt::hash("admin", 4).unwrap_or_default(); + user.password_hash = bcrypt::hash("admin", 4).unwrap_or_default(); user.updated_at = chrono::Utc::now().timestamp_millis(); let _ = app_context.system_tables().users().update_user(user); } @@ -80,8 +79,7 @@ impl TestServer { app_context.system_tables().users().get_user_by_id(&system_user_id) { if user.password_hash.is_empty() { - user.password_hash = - bcrypt::hash("admin", 4).unwrap_or_default(); + user.password_hash = bcrypt::hash("admin", 4).unwrap_or_default(); user.updated_at = chrono::Utc::now().timestamp_millis(); let _ = app_context.system_tables().users().update_user(user); } @@ -144,8 +142,7 @@ impl TestServer { } user.role } else { - let password_hash = - bcrypt::hash("test123", 4).unwrap_or_else(|_| String::new()); + let password_hash = bcrypt::hash("test123", 4).unwrap_or_else(|_| String::new()); let user = kalamdb_system::User { user_id: user_id_obj.clone(), password_hash, @@ -188,8 +185,7 @@ impl TestServer { /// Helper to create a test user with explicit role and password. pub async fn create_user(&self, username: &str, password: &str, role: Role) -> UserId { let user_id = UserId::new(username); - let password_hash = - bcrypt::hash(password, 4).expect("Failed to hash password"); + let password_hash = bcrypt::hash(password, 4).expect("Failed to hash password"); let users_provider = self.app_context.system_tables().users(); if let Ok(Some(mut existing)) = users_provider.get_user_by_id(&user_id) { diff --git a/backend/tests/endurance_test.rs b/backend/tests/endurance_test.rs index b60f47920..0f462a6b8 100644 --- a/backend/tests/endurance_test.rs +++ b/backend/tests/endurance_test.rs @@ -10,22 +10,26 @@ #[path = "common/testserver/mod.rs"] mod test_support; +use std::{ + collections::VecDeque, + env, + sync::{ + atomic::{AtomicI64, AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + use anyhow::{Context, Result}; -use kalam_client::models::ChangeEvent; -use kalam_client::{KalamLinkClient, SubscriptionManager}; +use kalam_client::{models::ChangeEvent, KalamLinkClient, SubscriptionManager}; use kalamdb_commons::Role; -use rand::rngs::StdRng; -use rand::{RngExt, SeedableRng}; -use std::collections::VecDeque; -use std::env; -use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; +use rand::{rngs::StdRng, RngExt, SeedableRng}; +use test_support::{ + consolidated_helpers::{create_user_and_client, unique_namespace, unique_table}, + http_server::get_global_server, +}; use tokio::time::sleep; -use test_support::consolidated_helpers::{create_user_and_client, unique_namespace, unique_table}; -use test_support::http_server::get_global_server; - const DEFAULT_DURATION_SECS: u64 = 60 * 60; const DEFAULT_USER_COUNT: usize = 100; const DEFAULT_SUBSCRIBER_COUNT: usize = 24; @@ -86,7 +90,8 @@ async fn run_subscriber( ) -> Result<()> { let mut subscription = client .subscribe(&format!( - "SELECT id, conversation_id, role_id, content FROM {}.messages WHERE conversation_id = {}", + "SELECT id, conversation_id, role_id, content FROM {}.messages WHERE conversation_id \ + = {}", namespace, conversation_id )) .await @@ -154,7 +159,8 @@ async fn run_chat_user( username, conversation_id, message_id, op_counter ); let sql = format!( - "INSERT INTO {}.messages (id, conversation_id, role_id, content, created_at_ms, edited_at_ms) VALUES ({}, {}, 'user', '{}', {}, 0)", + "INSERT INTO {}.messages (id, conversation_id, role_id, content, created_at_ms, \ + edited_at_ms) VALUES ({}, {}, 'user', '{}', {}, 0)", namespace, message_id, conversation_id, @@ -172,7 +178,8 @@ async fn run_chat_user( } else if operation < 72 { if let Some(message_id) = own_messages.back().copied() { let sql = format!( - "UPDATE {}.messages SET content = 'edited:{}:{}', edited_at_ms = {} WHERE id = {}", + "UPDATE {}.messages SET content = 'edited:{}:{}', edited_at_ms = {} WHERE id \ + = {}", namespace, sql_quote(&username), op_counter, @@ -194,7 +201,8 @@ async fn run_chat_user( } else if operation < 92 { let event_id = next_message_id.fetch_add(1, Ordering::Relaxed); let sql = format!( - "INSERT INTO {}.typing_events (id, conversation_id, event_type, created_at_ms) VALUES ({}, {}, 'typing', {})", + "INSERT INTO {}.typing_events (id, conversation_id, event_type, created_at_ms) \ + VALUES ({}, {}, 'typing', {})", namespace, event_id, conversation_id, @@ -323,7 +331,8 @@ async fn test_chat_app_endurance_with_100_parallel_users() -> Result<()> { for (_, client) in &user_clients { for conversation_id in 1..=conversation_count { let sql = format!( - "INSERT INTO {}.conversations (id, title, created_at_ms) VALUES ({}, 'Conversation {}', {})", + "INSERT INTO {}.conversations (id, title, created_at_ms) VALUES ({}, \ + 'Conversation {}', {})", namespace, conversation_id, conversation_id, @@ -358,7 +367,9 @@ async fn test_chat_app_endurance_with_100_parallel_users() -> Result<()> { let conversation_id = rng.random_range(1..=(conversation_count as i64)); let message_id = assistant_message_ids.fetch_add(1, Ordering::Relaxed); let sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.messages (id, conversation_id, role_id, content, created_at_ms, edited_at_ms) VALUES ({}, {}, 'assistant', 'assistant-reply:{}:{}', {}, 0))", + "EXECUTE AS USER '{}' (INSERT INTO {}.messages (id, conversation_id, role_id, \ + content, created_at_ms, edited_at_ms) VALUES ({}, {}, 'assistant', \ + 'assistant-reply:{}:{}', {}, 0))", sql_quote(target_user), assistant_namespace, message_id, @@ -490,7 +501,8 @@ async fn test_chat_app_endurance_with_100_parallel_users() -> Result<()> { let duplicate_check = server .execute_sql(&format!( - "EXECUTE AS USER '{}' (SELECT COUNT(*) AS total_rows, COUNT(DISTINCT id) AS distinct_rows FROM {}.messages)", + "EXECUTE AS USER '{}' (SELECT COUNT(*) AS total_rows, COUNT(DISTINCT id) AS \ + distinct_rows FROM {}.messages)", sql_quote(&user_clients[0].0), namespace )) @@ -519,7 +531,9 @@ async fn test_chat_app_endurance_with_100_parallel_users() -> Result<()> { let error_count = errors.load(Ordering::Relaxed); eprintln!( - "Endurance workload complete: duration_secs={} users={} subscribers={} conversations={} inserts={} updates={} deletes={} typing_events={} queries={} subscriber_events={} health_checks={} final_rows={}", + "Endurance workload complete: duration_secs={} users={} subscribers={} conversations={} \ + inserts={} updates={} deletes={} typing_events={} queries={} subscriber_events={} \ + health_checks={} final_rows={}", duration.as_secs(), user_count, subscriber_count, diff --git a/backend/tests/integration_tests/topic_pubsub.rs b/backend/tests/integration_tests/topic_pubsub.rs index 8f08614f3..afe023728 100644 --- a/backend/tests/integration_tests/topic_pubsub.rs +++ b/backend/tests/integration_tests/topic_pubsub.rs @@ -13,12 +13,23 @@ //! 2. Async notification verification helpers //! 3. Extended timeout handling for CDC workflows -use crate::test_support::*; -use kalam_client::models::ResponseStatus; -use kalamdb_commons::Role; +use std::sync::Arc; + +use kalam_client::{ + models::{QueryResponse, ResponseStatus}, + parse_i64, +}; +use kalamdb_commons::{ + models::{ConsumerGroupId, TopicId}, + Role, +}; +use kalamdb_core::app_context::AppContext; use reqwest::StatusCode; use serde::Deserialize; use serde_json::{json, Value}; +use serial_test::serial; + +use crate::test_support::*; #[derive(Debug, Clone, Deserialize)] struct HttpConsumeMessage { @@ -40,6 +51,20 @@ struct HttpAckResponse { acknowledged_offset: u64, } +struct TopicPublisherCacheGuard { + app_context: Arc, +} + +impl Drop for TopicPublisherCacheGuard { + fn drop(&mut self) { + if let Ok(topics) = self.app_context.system_tables().topics().list_topics() { + let topic_publisher = self.app_context.topic_publisher(); + topic_publisher.refresh_topics_cache(topics); + topic_publisher.restore_offset_counters(); + } + } +} + async fn post_topics_consume( server: &http_server::HttpTestServer, auth_header: &str, @@ -130,7 +155,8 @@ async fn wait_until_group_reads_at_least( if tokio::time::Instant::now() >= deadline { panic!( - "Timed out waiting for at least {} messages (got {}) for topic='{}' group='{}' start='{}' limit={}", + "Timed out waiting for at least {} messages (got {}) for topic='{}' group='{}' \ + start='{}' limit={}", min_messages, aggregated_messages.len(), topic_id, @@ -155,6 +181,205 @@ fn json_string(value: &Value) -> Option { .map(|s| s.to_string()) } +async fn setup_topic_source_fixture(server: &TestServer, fixture_name: &str) -> (String, String) { + let http_server = http_server::get_global_server().await; + let namespace = consolidated_helpers::unique_namespace(fixture_name); + let table = consolidated_helpers::unique_table("events"); + let topic_table = consolidated_helpers::unique_table("topic"); + let topic = format!("{}.{}", namespace, topic_table); + let source_table = format!("{}.{}", namespace, table); + + let create_namespace = server.execute_sql(&format!("CREATE NAMESPACE {}", namespace)).await; + assert_eq!( + create_namespace.status, + ResponseStatus::Success, + "CREATE NAMESPACE failed: {:?}", + create_namespace.error + ); + + let create_table = server + .execute_sql(&format!("CREATE TABLE {} (id INT PRIMARY KEY, payload TEXT)", source_table)) + .await; + assert_eq!( + create_table.status, + ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + create_table.error + ); + + let create_topic = server.execute_sql(&format!("CREATE TOPIC {} PARTITIONS 1", topic)).await; + assert_eq!( + create_topic.status, + ResponseStatus::Success, + "CREATE TOPIC failed: {:?}", + create_topic.error + ); + + let add_source = server + .execute_sql(&format!( + "ALTER TOPIC {} ADD SOURCE {} ON INSERT WITH (payload = 'full')", + topic, source_table + )) + .await; + assert_eq!( + add_source.status, + ResponseStatus::Success, + "ALTER TOPIC ADD SOURCE failed: {:?}", + add_source.error + ); + + wait_for_topic_routes(http_server, &topic, 1).await; + + (topic, source_table) +} + +async fn wait_until_sql_consume_row_count_at_least( + server: &TestServer, + sql: &str, + min_rows: usize, +) -> QueryResponse { + let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(20); + + loop { + let response = server.execute_sql(sql).await; + if response.status == ResponseStatus::Success && response.row_count() >= min_rows { + return response; + } + + if tokio::time::Instant::now() >= deadline { + panic!( + "Timed out waiting for SQL consume to return at least {} row(s): sql='{}' \ + response={:?}", + min_rows, sql, response.error + ); + } + + tokio::time::sleep(tokio::time::Duration::from_millis(120)).await; + } +} + +fn first_row_offset(response: &QueryResponse) -> i64 { + response + .rows_as_maps() + .first() + .and_then(|row| row.get("offset")) + .map(parse_i64) + .expect("Expected consume response to include an offset column") +} + +fn row_offsets(response: &QueryResponse) -> Vec { + response + .rows_as_maps() + .iter() + .map(|row| { + row.get("offset") + .map(parse_i64) + .expect("Expected consume response row to include an offset column") + }) + .collect() +} + +async fn assert_topic_offset_state( + server: &TestServer, + topic: &str, + group: &str, + expected_last_acked: Option, +) -> Option { + let topic_id = TopicId::new(topic); + let group_id = ConsumerGroupId::new(group); + let provider = server.app_context.system_tables().topic_offsets(); + let provider_offsets = provider + .get_group_offsets(&topic_id, &group_id) + .expect("Failed to read topic offsets from provider"); + + let sql = format!( + "SELECT topic_id, group_id, partition_id, last_acked_offset, updated_at FROM \ + system.topic_offsets WHERE topic_id = '{}' AND group_id = '{}' ORDER BY partition_id", + topic, group + ); + let response = server.execute_sql(&sql).await; + assert_eq!( + response.status, + ResponseStatus::Success, + "system.topic_offsets query failed: {:?}", + response.error + ); + let rows = response.rows_as_maps(); + + match expected_last_acked { + Some(expected) => { + assert_eq!(provider_offsets.len(), 1, "Expected exactly one provider offset row"); + let provider_offset = &provider_offsets[0]; + assert_eq!(provider_offset.topic_id, topic_id); + assert_eq!(provider_offset.group_id, group_id); + assert_eq!(provider_offset.partition_id, 0); + assert_eq!(provider_offset.last_acked_offset, expected); + assert!(provider_offset.updated_at > 0, "updated_at should be populated"); + + assert_eq!(rows.len(), 1, "Expected exactly one SQL offset row"); + let row = &rows[0]; + assert_eq!( + row.get("topic_id").and_then(|value| json_string(value.inner())).as_deref(), + Some(topic) + ); + assert_eq!( + row.get("group_id").and_then(|value| json_string(value.inner())).as_deref(), + Some(group) + ); + assert_eq!(row.get("partition_id").map(parse_i64), Some(0)); + assert_eq!(row.get("last_acked_offset").map(parse_i64), Some(expected as i64)); + + let sql_updated_at = row + .get("updated_at") + .map(parse_i64) + .expect("Expected updated_at column in system.topic_offsets row"); + assert_eq!(provider_offset.updated_at, sql_updated_at); + + Some(sql_updated_at) + }, + None => { + assert!(provider_offsets.is_empty(), "Expected no provider offset rows"); + assert!(rows.is_empty(), "Expected no SQL offset rows"); + None + }, + } +} + +async fn assert_topic_offset_count(server: &TestServer, topic: &str, expected_count: usize) { + let topic_id = TopicId::new(topic); + let provider_offsets = server + .app_context + .system_tables() + .topic_offsets() + .get_topic_offsets(&topic_id) + .expect("Failed to read topic offsets for topic"); + assert_eq!( + provider_offsets.len(), + expected_count, + "Unexpected provider topic offset count for topic '{}'", + topic + ); + + let response = server + .execute_sql(&format!( + "SELECT COUNT(*) AS count FROM system.topic_offsets WHERE topic_id = '{}'", + topic + )) + .await; + assert_eq!( + response.status, + ResponseStatus::Success, + "COUNT query on system.topic_offsets failed: {:?}", + response.error + ); + assert_eq!( + response.get_i64("count"), + Some(expected_count as i64), + "Unexpected SQL topic offset count for topic '{}'", + topic + ); +} + async fn wait_for_topic_routes( server: &http_server::HttpTestServer, topic_id: &str, @@ -204,7 +429,7 @@ async fn test_create_topic_basic() { let server = TestServer::new_shared().await; let sql = "CREATE TOPIC default.user_events_topic PARTITIONS 1"; - let result = server.execute_sql(sql).await; + let result = server.execute_sql(&sql).await; // Basic smoke test - verify command executes (or already exists) assert!( @@ -272,28 +497,40 @@ async fn test_consume_from_topic() { async fn test_ack_offset() { let server = TestServer::new_shared().await; - // Setup: need a namespace for the topic - server.execute_sql("CREATE NAMESPACE test_ack_ns").await; + let namespace = consolidated_helpers::unique_namespace("tp_ack"); + let topic_table = consolidated_helpers::unique_table("topic"); + let topic = format!("{}.{}", namespace, topic_table); + let group = format!("ack-{}", consolidated_helpers::unique_table("group")); - // Setup topic (namespace-qualified) - server.execute_sql("CREATE TOPIC test_ack_ns.test_ack_tp PARTITIONS 1").await; + let create_namespace = server.execute_sql(&format!("CREATE NAMESPACE {}", namespace)).await; + assert_eq!(create_namespace.status, ResponseStatus::Success); - // Consume first to create offset record - server - .execute_sql( - "CONSUME FROM test_ack_ns.test_ack_tp GROUP 'test_ack_group' START EARLIEST LIMIT 10", - ) + let create_topic = server.execute_sql(&format!("CREATE TOPIC {} PARTITIONS 1", topic)).await; + assert_eq!(create_topic.status, ResponseStatus::Success); + + assert_topic_offset_state(&server, &topic, &group, None).await; + + // Empty consume should not persist an auto-ack row. + let consume = server + .execute_sql(&format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 10", topic, group)) .await; + assert_eq!(consume.status, ResponseStatus::Success); + assert_topic_offset_state(&server, &topic, &group, None).await; // ACK offset (should succeed) - let sql = "ACK test_ack_ns.test_ack_tp GROUP 'test_ack_group' UPTO OFFSET 0"; - let result = server.execute_sql(sql).await; + let sql = format!("ACK {} GROUP '{}' UPTO OFFSET 0", topic, group); + let result = server.execute_sql(&sql).await; assert!( result.status == ResponseStatus::Success, "ACK should succeed: {:?}", result.error ); + + let updated_at = assert_topic_offset_state(&server, &topic, &group, Some(0)) + .await + .expect("Expected ACK to create a persisted topic offset row"); + assert!(updated_at > 0, "ACK should populate updated_at"); } /// Test DROP TOPIC @@ -457,8 +694,8 @@ async fn test_cdc_insert_to_consume_workflow() { .await; // 3. Insert data (should trigger CDC → topic) - let insert_1 = - "INSERT INTO test_cdc_ns.events (id, event_type, data) VALUES ('evt1', 'user_signup', 'John Doe')"; + let insert_1 = "INSERT INTO test_cdc_ns.events (id, event_type, data) VALUES ('evt1', \ + 'user_signup', 'John Doe')"; let result_1 = server.execute_sql(insert_1).await; assert!( result_1.status == ResponseStatus::Success, @@ -466,8 +703,8 @@ async fn test_cdc_insert_to_consume_workflow() { result_1.error ); - let insert_2 = - "INSERT INTO test_cdc_ns.events (id, event_type, data) VALUES ('evt2', 'user_login', 'Jane Smith')"; + let insert_2 = "INSERT INTO test_cdc_ns.events (id, event_type, data) VALUES ('evt2', \ + 'user_login', 'Jane Smith')"; let result_2 = server.execute_sql(insert_2).await; assert!( result_2.status == ResponseStatus::Success, @@ -502,7 +739,8 @@ async fn test_cdc_insert_to_consume_workflow() { assert_eq!( first_batch.schema.len(), 8, - "Should have 8 schema fields (topic_id, partition_id, offset, key, payload, timestamp_ms, user_id, op)" + "Should have 8 schema fields (topic_id, partition_id, offset, key, payload, \ + timestamp_ms, user_id, op)" ); // Verify column names match schema @@ -522,7 +760,9 @@ async fn test_cdc_insert_to_consume_workflow() { } let http_server = http_server::get_global_server().await; - let auth_header = http_server.bearer_auth_header("root").expect("Failed to create root auth header"); + let auth_header = http_server + .bearer_auth_header("root") + .expect("Failed to create root auth header"); let group = format!("cdc-key-check-{}", consolidated_helpers::unique_table("group")); let http_consume = wait_until_group_reads_at_least( http_server, @@ -535,11 +775,8 @@ async fn test_cdc_insert_to_consume_workflow() { ) .await; - let consumed_keys: std::collections::HashSet = http_consume - .messages - .iter() - .filter_map(|message| message.key.clone()) - .collect(); + let consumed_keys: std::collections::HashSet = + http_consume.messages.iter().filter_map(|message| message.key.clone()).collect(); assert!(consumed_keys.contains("evt1"), "Expected consumed key set to contain evt1"); assert!(consumed_keys.contains("evt2"), "Expected consumed key set to contain evt2"); } @@ -592,6 +829,361 @@ async fn test_consume_schema_structure() { } } +#[tokio::test] +#[ntest::timeout(30000)] +#[serial] +async fn test_sql_group_consume_resumes_from_committed_offsets_after_cache_clear() { + let server = TestServer::new_shared().await; + let _cache_guard = TopicPublisherCacheGuard { + app_context: server.app_context.clone(), + }; + + let (topic, source_table) = setup_topic_source_fixture(&server, "tp_sql_resume").await; + let group = format!("sql-resume-{}", consolidated_helpers::unique_table("group")); + + for id in 1..=2 { + let insert = server + .execute_sql(&format!( + "INSERT INTO {} (id, payload) VALUES ({}, 'payload_{}')", + source_table, id, id + )) + .await; + assert_eq!(insert.status, ResponseStatus::Success); + } + + let readiness_sql = format!("CONSUME FROM {} FROM EARLIEST LIMIT 10", topic); + let ready = wait_until_sql_consume_row_count_at_least(&server, &readiness_sql, 2).await; + assert_eq!(ready.row_count(), 2, "Expected stateless consume to observe both rows"); + + assert_topic_offset_state(&server, &topic, &group, None).await; + + let first_consume = server + .execute_sql(&format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 1", topic, group)) + .await; + assert_eq!(first_consume.status, ResponseStatus::Success); + assert_eq!( + first_consume.results.first().map(|batch| batch.row_count).unwrap_or(0), + 1, + "Initial consume should return the first message" + ); + let first_updated_at = assert_topic_offset_state(&server, &topic, &group, Some(0)) + .await + .expect("Expected first grouped consume to auto-ack offset 0"); + + server.app_context.topic_publisher().clear_cache(); + + let second_consume = server + .execute_sql(&format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 10", topic, group)) + .await; + assert_eq!(second_consume.status, ResponseStatus::Success); + assert_eq!( + second_consume.results.first().map(|batch| batch.row_count).unwrap_or(0), + 1, + "After clearing in-memory claims, SQL consume should resume from the committed offset" + ); + let second_updated_at = assert_topic_offset_state(&server, &topic, &group, Some(1)) + .await + .expect("Expected second grouped consume to advance auto-ack to offset 1"); + assert!( + second_updated_at >= first_updated_at, + "Offset update timestamp should move forward or stay equal across rapid commits" + ); + + let lower_ack = server + .execute_sql(&format!("ACK {} GROUP '{}' UPTO OFFSET 0", topic, group)) + .await; + assert_eq!(lower_ack.status, ResponseStatus::Success); + let after_lower_ack = assert_topic_offset_state(&server, &topic, &group, Some(1)) + .await + .expect("Expected lower ACK to leave committed offset intact"); + assert_eq!( + after_lower_ack, second_updated_at, + "Lower ACK should not regress or rewrite the committed topic offset row" + ); + + server.app_context.topic_publisher().clear_cache(); + + let third_consume = server + .execute_sql(&format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 10", topic, group)) + .await; + assert_eq!(third_consume.status, ResponseStatus::Success); + assert_eq!( + third_consume.results.first().map(|batch| batch.row_count).unwrap_or(0), + 0, + "Committed offsets should prevent replay once the group has consumed the backlog" + ); + let final_updated_at = assert_topic_offset_state(&server, &topic, &group, Some(1)) + .await + .expect("Expected committed topic offset row to remain after empty replay check"); + assert_eq!(final_updated_at, second_updated_at); +} + +#[tokio::test] +#[ntest::timeout(45000)] +#[serial] +async fn test_sql_group_offsets_are_isolated_per_group() { + let server = TestServer::new_shared().await; + let _cache_guard = TopicPublisherCacheGuard { + app_context: server.app_context.clone(), + }; + + let (topic, source_table) = setup_topic_source_fixture(&server, "tp_sql_groups").await; + for id in 1..=10 { + let insert = server + .execute_sql(&format!( + "INSERT INTO {} (id, payload) VALUES ({}, 'payload_{}')", + source_table, id, id + )) + .await; + assert_eq!(insert.status, ResponseStatus::Success); + } + + let readiness_sql = format!("CONSUME FROM {} FROM EARLIEST LIMIT 20", topic); + let ready = wait_until_sql_consume_row_count_at_least(&server, &readiness_sql, 10).await; + assert_eq!(ready.row_count(), 10, "Expected stateless consume to observe all inserted rows"); + + let group_specs = vec![ + (format!("group-a-{}", consolidated_helpers::unique_table("tp")), 1usize, 0u64), + (format!("group-b-{}", consolidated_helpers::unique_table("tp")), 2usize, 1u64), + (format!("group-c-{}", consolidated_helpers::unique_table("tp")), 3usize, 2u64), + (format!("group-d-{}", consolidated_helpers::unique_table("tp")), 4usize, 3u64), + (format!("group-e-{}", consolidated_helpers::unique_table("tp")), 5usize, 4u64), + ]; + + for (group, limit, expected_last_acked) in &group_specs { + let consume_sql = + format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT {}", topic, group, limit); + let response = + wait_until_sql_consume_row_count_at_least(&server, &consume_sql, *limit).await; + assert_eq!( + response.row_count(), + *limit, + "Group '{}' should receive exactly {} row(s) on first consume", + group, + limit + ); + assert_eq!( + first_row_offset(&response), + 0, + "New group '{}' should start from the beginning when consuming FROM EARLIEST", + group + ); + assert_topic_offset_state(&server, &topic, group, Some(*expected_last_acked)).await; + } + + assert_topic_offset_count(&server, &topic, group_specs.len()).await; + + let topic_id = TopicId::new(&topic); + let mut provider_snapshot: Vec<(String, u64)> = server + .app_context + .system_tables() + .topic_offsets() + .get_topic_offsets(&topic_id) + .expect("Failed to list topic offsets for group isolation test") + .into_iter() + .map(|offset| (offset.group_id.as_str().to_string(), offset.last_acked_offset)) + .collect(); + provider_snapshot.sort(); + + let mut expected_snapshot: Vec<(String, u64)> = group_specs + .iter() + .map(|(group, _, expected_last_acked)| (group.clone(), *expected_last_acked)) + .collect(); + expected_snapshot.sort(); + assert_eq!( + provider_snapshot, expected_snapshot, + "Each group should persist its own last_acked_offset without leaking another group's \ + cursor" + ); + + server.app_context.topic_publisher().clear_cache(); + + for (group, _, expected_last_acked) in &group_specs { + let resume_sql = format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 1", topic, group); + let resumed = wait_until_sql_consume_row_count_at_least(&server, &resume_sql, 1).await; + let expected_next_offset = *expected_last_acked as i64 + 1; + assert_eq!( + first_row_offset(&resumed), + expected_next_offset, + "Group '{}' should resume from its own committed cursor after cache clear", + group + ); + assert_topic_offset_state(&server, &topic, group, Some(*expected_last_acked + 1)).await; + } + + let mut final_provider_snapshot: Vec<(String, u64)> = server + .app_context + .system_tables() + .topic_offsets() + .get_topic_offsets(&topic_id) + .expect("Failed to list final topic offsets for group isolation test") + .into_iter() + .map(|offset| (offset.group_id.as_str().to_string(), offset.last_acked_offset)) + .collect(); + final_provider_snapshot.sort(); + + let mut final_expected_snapshot: Vec<(String, u64)> = group_specs + .iter() + .map(|(group, _, expected_last_acked)| (group.clone(), *expected_last_acked + 1)) + .collect(); + final_expected_snapshot.sort(); + assert_eq!( + final_provider_snapshot, final_expected_snapshot, + "Each group should keep its own independent cursor after resuming again" + ); +} + +#[tokio::test] +#[ntest::timeout(45000)] +#[serial] +async fn test_sql_group_from_latest_tails_new_messages_and_then_persists_offset() { + let server = TestServer::new_shared().await; + let _cache_guard = TopicPublisherCacheGuard { + app_context: server.app_context.clone(), + }; + + let (topic, source_table) = setup_topic_source_fixture(&server, "tp_sql_latest").await; + let group = format!("latest-{}", consolidated_helpers::unique_table("group")); + + for id in 1..=5 { + let insert = server + .execute_sql(&format!( + "INSERT INTO {} (id, payload) VALUES ({}, 'payload_{}')", + source_table, id, id + )) + .await; + assert_eq!(insert.status, ResponseStatus::Success); + } + + let readiness_sql = format!("CONSUME FROM {} FROM EARLIEST LIMIT 10", topic); + let ready = wait_until_sql_consume_row_count_at_least(&server, &readiness_sql, 5).await; + assert_eq!(ready.row_count(), 5, "Expected stateless consume to observe the backlog"); + + let latest_consume = server + .execute_sql(&format!("CONSUME FROM {} GROUP '{}' FROM LATEST LIMIT 10", topic, group)) + .await; + assert_eq!(latest_consume.status, ResponseStatus::Success); + assert_eq!(latest_consume.row_count(), 0, "New latest group should not replay backlog"); + assert_topic_offset_state(&server, &topic, &group, None).await; + + for id in 100..=101 { + let insert = server + .execute_sql(&format!( + "INSERT INTO {} (id, payload) VALUES ({}, 'live_{}')", + source_table, id, id + )) + .await; + assert_eq!(insert.status, ResponseStatus::Success); + } + + let tail_consume = wait_until_sql_consume_row_count_at_least( + &server, + &format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 10", topic, group), + 2, + ) + .await; + assert_eq!(row_offsets(&tail_consume), vec![5, 6]); + assert_topic_offset_state(&server, &topic, &group, Some(6)).await; + + server.app_context.topic_publisher().clear_cache(); + + let resumed = server + .execute_sql(&format!("CONSUME FROM {} GROUP '{}' FROM 0 LIMIT 10", topic, group)) + .await; + assert_eq!(resumed.status, ResponseStatus::Success); + assert_eq!( + resumed.row_count(), + 0, + "Committed latest-group cursor should prevent replay after cache clear" + ); + assert_topic_offset_state(&server, &topic, &group, Some(6)).await; +} + +#[tokio::test] +#[ntest::timeout(45000)] +#[serial] +async fn test_sql_group_from_offset_starts_at_requested_offset_and_persists_resume() { + let server = TestServer::new_shared().await; + let _cache_guard = TopicPublisherCacheGuard { + app_context: server.app_context.clone(), + }; + + let (topic, source_table) = setup_topic_source_fixture(&server, "tp_sql_offset").await; + let group = format!("offset-{}", consolidated_helpers::unique_table("group")); + + for id in 1..=6 { + let insert = server + .execute_sql(&format!( + "INSERT INTO {} (id, payload) VALUES ({}, 'payload_{}')", + source_table, id, id + )) + .await; + assert_eq!(insert.status, ResponseStatus::Success); + } + + let readiness_sql = format!("CONSUME FROM {} FROM EARLIEST LIMIT 10", topic); + let ready = wait_until_sql_consume_row_count_at_least(&server, &readiness_sql, 6).await; + assert_eq!(ready.row_count(), 6, "Expected stateless consume to observe all inserted rows"); + + let offset_consume = wait_until_sql_consume_row_count_at_least( + &server, + &format!("CONSUME FROM {} GROUP '{}' FROM 3 LIMIT 2", topic, group), + 2, + ) + .await; + assert_eq!(row_offsets(&offset_consume), vec![3, 4]); + assert_topic_offset_state(&server, &topic, &group, Some(4)).await; + + server.app_context.topic_publisher().clear_cache(); + + let resumed = wait_until_sql_consume_row_count_at_least( + &server, + &format!("CONSUME FROM {} GROUP '{}' FROM EARLIEST LIMIT 10", topic, group), + 1, + ) + .await; + assert_eq!(row_offsets(&resumed), vec![5]); + assert_topic_offset_state(&server, &topic, &group, Some(5)).await; +} + +#[tokio::test] +#[ntest::timeout(30000)] +async fn test_sql_consume_without_group_is_stateless_and_does_not_persist_offsets() { + let server = TestServer::new_shared().await; + let (topic, source_table) = setup_topic_source_fixture(&server, "tp_sql_stateless").await; + + for id in 1..=2 { + let insert = server + .execute_sql(&format!( + "INSERT INTO {} (id, payload) VALUES ({}, 'payload_{}')", + source_table, id, id + )) + .await; + assert_eq!(insert.status, ResponseStatus::Success); + } + + let sql = format!("CONSUME FROM {} FROM EARLIEST LIMIT 1", topic); + let first_consume = wait_until_sql_consume_row_count_at_least(&server, &sql, 1).await; + let first_offset = first_row_offset(&first_consume); + assert_eq!(first_offset, 0, "First stateless consume should start at offset 0"); + + let second_consume = server.execute_sql(&sql).await; + assert_eq!( + second_consume.status, + ResponseStatus::Success, + "Second stateless consume should succeed: {:?}", + second_consume.error + ); + assert_eq!(second_consume.row_count(), 1, "Expected another single-row stateless consume"); + let second_offset = first_row_offset(&second_consume); + assert_eq!( + second_offset, first_offset, + "Without GROUP, repeated consumes should replay the same earliest row" + ); + + assert_topic_offset_count(&server, &topic, 0).await; +} + /// HTTP API integration: consume/ack option combinations and offset progression. /// /// Covers the consumer options from SDK usage: @@ -1012,7 +1604,10 @@ async fn test_clear_topic() { // Verify messages are cleared by consuming again let verify_result = server - .execute_sql("CONSUME FROM test_clear_ns.messages_topic GROUP 'verify_group' START EARLIEST LIMIT 10") + .execute_sql( + "CONSUME FROM test_clear_ns.messages_topic GROUP 'verify_group' START EARLIEST LIMIT \ + 10", + ) .await; assert_eq!( verify_result.status, diff --git a/backend/tests/misc/auth/test_as_user_impersonation.rs b/backend/tests/misc/auth/test_as_user_impersonation.rs index bdcecfebf..e3ac28efd 100644 --- a/backend/tests/misc/auth/test_as_user_impersonation.rs +++ b/backend/tests/misc/auth/test_as_user_impersonation.rs @@ -7,12 +7,13 @@ //! - Audit logging: Both actor and subject logged //! - Performance: Permission checks complete in <10ms -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::models::{AuthType, Role, UserId}; use kalamdb_system::providers::storages::models::StorageMode; use uuid::Uuid; +use super::test_support::TestServer; + async fn insert_user(server: &TestServer, username: &str, role: Role) -> UserId { let user_id = UserId::new(username); @@ -83,7 +84,8 @@ async fn test_as_user_blocked_for_regular_user() { // Create namespace and USER table (using default/system for DDL) server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; let create_table = format!( - "CREATE TABLE {}.items (item_id VARCHAR PRIMARY KEY, name VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.items (item_id VARCHAR PRIMARY KEY, name VARCHAR) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", ns ); server.execute_sql(&create_table).await; // Use default system user for table creation @@ -124,14 +126,16 @@ async fn test_as_user_with_service_role() { // Create namespace and USER table as DBA server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; let create_table = format!( - "CREATE TABLE {}.orders (order_id VARCHAR PRIMARY KEY, amount VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.orders (order_id VARCHAR PRIMARY KEY, amount VARCHAR) WITH (TYPE = \ + 'USER', STORAGE_ID = 'local')", ns ); server.execute_sql_as_user(&create_table, admin_user.as_str()).await; // INSERT AS USER target_user (should succeed) let insert_sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.orders (order_id, amount) VALUES ('ORD-123', '99.99'))", + "EXECUTE AS USER '{}' (INSERT INTO {}.orders (order_id, amount) VALUES ('ORD-123', \ + '99.99'))", target_user.as_str(), ns ); @@ -170,7 +174,8 @@ async fn test_as_user_success_is_audited() { assert_eq!(ns_resp.status, ResponseStatus::Success, "CREATE NAMESPACE failed"); let create_table = format!( - "CREATE TABLE {}.audit_items (id VARCHAR PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.audit_items (id VARCHAR PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", ns ); let create_resp = server.execute_sql_as_user(&create_table, "root").await; @@ -227,7 +232,8 @@ async fn test_as_user_with_dba_role() { ); let create_table = format!( - "CREATE TABLE {}.logs (log_id VARCHAR PRIMARY KEY, message VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.logs (log_id VARCHAR PRIMARY KEY, message VARCHAR) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", ns ); let table_resp = server.execute_sql_as_user(&create_table, dba_user.as_str()).await; @@ -240,7 +246,8 @@ async fn test_as_user_with_dba_role() { // INSERT AS USER (should succeed) let insert_sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.logs (log_id, message) VALUES ('LOG-1', 'Test message'))", + "EXECUTE AS USER '{}' (INSERT INTO {}.logs (log_id, message) VALUES ('LOG-1', 'Test \ + message'))", target_user.as_str(), ns ); @@ -263,14 +270,16 @@ async fn test_insert_as_user_ownership() { // Create namespace and USER table server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; let create_table = format!( - "CREATE TABLE {}.messages (msg_id VARCHAR PRIMARY KEY, content VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.messages (msg_id VARCHAR PRIMARY KEY, content VARCHAR) WITH (TYPE = \ + 'USER', STORAGE_ID = 'local')", ns ); server.execute_sql_as_user(&create_table, admin_user.as_str()).await; // INSERT AS USER alice let insert_sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.messages (msg_id, content) VALUES ('MSG-1', 'Hello from Alice'))", + "EXECUTE AS USER '{}' (INSERT INTO {}.messages (msg_id, content) VALUES ('MSG-1', 'Hello \ + from Alice'))", user_alice.as_str(), ns ); @@ -304,11 +313,16 @@ async fn test_update_as_user() { // Create namespace, USER table, and insert record as charlie server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; - let create_table = format!("CREATE TABLE {}.profiles (profile_id VARCHAR PRIMARY KEY, status VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", ns); + let create_table = format!( + "CREATE TABLE {}.profiles (profile_id VARCHAR PRIMARY KEY, status VARCHAR) WITH (TYPE = \ + 'USER', STORAGE_ID = 'local')", + ns + ); server.execute_sql_as_user(&create_table, admin_user.as_str()).await; let insert_sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.profiles (profile_id, status) VALUES ('PROF-1', 'active'))", + "EXECUTE AS USER '{}' (INSERT INTO {}.profiles (profile_id, status) VALUES ('PROF-1', \ + 'active'))", user_charlie.as_str(), ns ); @@ -316,7 +330,8 @@ async fn test_update_as_user() { // UPDATE AS USER charlie let update_sql = format!( - "EXECUTE AS USER '{}' (UPDATE {}.profiles SET status = 'inactive' WHERE profile_id = 'PROF-1')", + "EXECUTE AS USER '{}' (UPDATE {}.profiles SET status = 'inactive' WHERE profile_id = \ + 'PROF-1')", user_charlie.as_str(), ns ); @@ -348,11 +363,16 @@ async fn test_delete_as_user() { // Create namespace, USER table, and insert record as dave server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; - let create_table = format!("CREATE TABLE {}.sessions (session_id VARCHAR PRIMARY KEY, active BOOLEAN) WITH (TYPE = 'USER', STORAGE_ID = 'local')", ns); + let create_table = format!( + "CREATE TABLE {}.sessions (session_id VARCHAR PRIMARY KEY, active BOOLEAN) WITH (TYPE = \ + 'USER', STORAGE_ID = 'local')", + ns + ); server.execute_sql_as_user(&create_table, admin_user.as_str()).await; let insert_sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.sessions (session_id, active) VALUES ('SESS-1', true))", + "EXECUTE AS USER '{}' (INSERT INTO {}.sessions (session_id, active) VALUES ('SESS-1', \ + true))", user_dave.as_str(), ns ); @@ -389,7 +409,8 @@ async fn test_select_as_user_scopes_reads() { server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; let create_table = format!( - "CREATE TABLE {}.items (id VARCHAR PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.items (id VARCHAR PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", ns ); server.execute_sql(&create_table).await; @@ -434,7 +455,8 @@ async fn test_stream_table_isolation_with_select_as_user() { server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; let create_table = format!( - "CREATE TABLE {}.events (id VARCHAR PRIMARY KEY, payload VARCHAR) WITH (TYPE = 'STREAM', TTL_SECONDS = 3600)", + "CREATE TABLE {}.events (id VARCHAR PRIMARY KEY, payload VARCHAR) WITH (TYPE = 'STREAM', \ + TTL_SECONDS = 3600)", ns ); server.execute_sql(&create_table).await; @@ -486,7 +508,8 @@ async fn test_as_user_on_shared_table_rejected() { ); let create_table = format!( - "CREATE TABLE {}.global_config (config_key VARCHAR PRIMARY KEY, value VARCHAR) WITH (TYPE = 'SHARED')", + "CREATE TABLE {}.global_config (config_key VARCHAR PRIMARY KEY, value VARCHAR) WITH (TYPE \ + = 'SHARED')", ns ); let create_resp = server.execute_sql_as_user(&create_table, admin_user.as_str()).await; @@ -499,7 +522,8 @@ async fn test_as_user_on_shared_table_rejected() { // INSERT AS USER on SHARED table (should fail) let insert_sql = format!( - "EXECUTE AS USER '{}' (INSERT INTO {}.global_config (config_key, value) VALUES ('setting1', 'value1'))", + "EXECUTE AS USER '{}' (INSERT INTO {}.global_config (config_key, value) VALUES \ + ('setting1', 'value1'))", user_eve.as_str(), ns ); @@ -531,14 +555,16 @@ async fn test_as_user_nonexistent_user() { // Create namespace and USER table server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await; let create_table = format!( - "CREATE TABLE {}.logs (log_id VARCHAR PRIMARY KEY, message VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.logs (log_id VARCHAR PRIMARY KEY, message VARCHAR) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", ns ); server.execute_sql_as_user(&create_table, admin_user.as_str()).await; // INSERT AS USER with non-existent user let insert_sql = format!( - "EXECUTE AS USER 'nonexistent_user_12345' (INSERT INTO {}.logs (log_id, message) VALUES ('LOG-1', 'Test'))", + "EXECUTE AS USER 'nonexistent_user_12345' (INSERT INTO {}.logs (log_id, message) VALUES \ + ('LOG-1', 'Test'))", ns ); let resp = server.execute_sql_as_user(&insert_sql, admin_user.as_str()).await; @@ -572,7 +598,8 @@ async fn test_as_user_performance() { ); let create_table = format!( - "CREATE TABLE {}.perf_test (id VARCHAR PRIMARY KEY, data VARCHAR) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {}.perf_test (id VARCHAR PRIMARY KEY, data VARCHAR) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", ns ); let table_resp = server.execute_sql_as_user(&create_table, admin_user.as_str()).await; diff --git a/backend/tests/misc/auth/test_auth_performance.rs b/backend/tests/misc/auth/test_auth_performance.rs index 32eeef77c..98f04e284 100644 --- a/backend/tests/misc/auth/test_auth_performance.rs +++ b/backend/tests/misc/auth/test_auth_performance.rs @@ -12,10 +12,12 @@ //! - p99 latency: <200ms for cached auth, <500ms for uncached //! - Cache hit rate: >95% for repeated authentications -use super::test_support::{auth_helper, TestServer}; +use std::time::{Duration, Instant}; + use actix_web::{test, web, App}; use kalamdb_commons::{models::ConnectionInfo, Role}; -use std::time::{Duration, Instant}; + +use super::test_support::{auth_helper, TestServer}; /// Performance benchmark for Bearer token authentication /// @@ -272,9 +274,10 @@ async fn test_auth_cache_effectiveness() { /// Test concurrent authentication load #[tokio::test] async fn test_concurrent_auth_load() { - use kalamdb_auth::{authenticate, AuthRequest, CachedUsersRepo, UserRepository}; use std::sync::Arc; + use kalamdb_auth::{authenticate, AuthRequest, CachedUsersRepo, UserRepository}; + let server = TestServer::new_shared().await; // Create multiple test users diff --git a/backend/tests/misc/auth/test_basic_auth.rs b/backend/tests/misc/auth/test_basic_auth.rs index 113bf638d..8f75dedaa 100644 --- a/backend/tests/misc/auth/test_basic_auth.rs +++ b/backend/tests/misc/auth/test_basic_auth.rs @@ -9,11 +9,13 @@ //! **Test Philosophy**: Follow TDD - these tests verify the unified authentication //! flow that is used by both HTTP and WebSocket handlers. -use super::test_support::{auth_helper, TestServer}; +use std::sync::Arc; + use base64::Engine as _; use kalamdb_auth::AuthError; use kalamdb_commons::{models::ConnectionInfo, Role}; -use std::sync::Arc; + +use super::test_support::{auth_helper, TestServer}; /// Test successful Bearer auth with valid token #[tokio::test] diff --git a/backend/tests/misc/auth/test_cli_auth.rs b/backend/tests/misc/auth/test_cli_auth.rs index 58649391a..eeb630b5b 100644 --- a/backend/tests/misc/auth/test_cli_auth.rs +++ b/backend/tests/misc/auth/test_cli_auth.rs @@ -8,9 +8,9 @@ //! - System user can authenticate from localhost //! - System user credentials are generated securely +use kalamdb_commons::{constants::AuthConstants, AuthType, Role, UserId}; + use super::test_support::TestServer; -use kalamdb_commons::constants::AuthConstants; -use kalamdb_commons::{AuthType, Role, UserId}; #[tokio::test] async fn test_init_creates_system_user() { diff --git a/backend/tests/misc/auth/test_e2e_auth_flow.rs b/backend/tests/misc/auth/test_e2e_auth_flow.rs index ad10ea9bb..9034269b2 100644 --- a/backend/tests/misc/auth/test_e2e_auth_flow.rs +++ b/backend/tests/misc/auth/test_e2e_auth_flow.rs @@ -12,10 +12,11 @@ //! This test ensures the authentication system works end-to-end //! and validates user lifecycle management. -use super::test_support::{auth_helper, TestServer}; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; +use super::test_support::{auth_helper, TestServer}; + /// End-to-end authentication flow test #[actix_web::test] async fn test_e2e_auth_flow() { diff --git a/backend/tests/misc/auth/test_jwt_auth.rs b/backend/tests/misc/auth/test_jwt_auth.rs index 63f4f88b7..6f91237b4 100644 --- a/backend/tests/misc/auth/test_jwt_auth.rs +++ b/backend/tests/misc/auth/test_jwt_auth.rs @@ -13,14 +13,18 @@ //! **Phase 4 - User Story 2**: Token-Based Authentication //! Task IDs: T059-T064 (Integration tests for JWT auth) -use super::test_support::{auth_helper, TestServer}; +use std::sync::Arc; + use actix_web::{test, web, App}; use jsonwebtoken::{encode, Algorithm, EncodingKey, Header}; -use kalamdb_auth::providers::jwt_auth::{JwtClaims as AuthJwtClaims, KALAMDB_ISSUER}; -use kalamdb_auth::{CoreUsersRepo, UserRepository}; +use kalamdb_auth::{ + providers::jwt_auth::{JwtClaims as AuthJwtClaims, KALAMDB_ISSUER}, + CoreUsersRepo, UserRepository, +}; use kalamdb_commons::{Role, UserId}; use serde::Serialize; -use std::sync::Arc; + +use super::test_support::{auth_helper, TestServer}; fn jwt_secret_for_tests() -> String { kalamdb_configs::ServerConfig::default().auth.jwt_secret diff --git a/backend/tests/misc/auth/test_last_seen.rs b/backend/tests/misc/auth/test_last_seen.rs index 6984de6e4..c2b344ec3 100644 --- a/backend/tests/misc/auth/test_last_seen.rs +++ b/backend/tests/misc/auth/test_last_seen.rs @@ -7,13 +7,14 @@ //! These tests verify token authentication behavior and are placeholders for //! future last_seen implementation at the HTTP/WebSocket handler level. -use super::test_support::TestServer; use kalamdb_auth::{authenticate, AuthRequest}; use kalamdb_commons::{ models::{ConnectionInfo, UserId}, Role, }; +use super::test_support::TestServer; + fn bearer_auth_header(username: &str, user_id: &str, role: Role) -> String { let secret = kalamdb_configs::defaults::default_auth_jwt_secret(); let email = format!("{}@example.com", username); diff --git a/backend/tests/misc/auth/test_live_queries_auth_expiry.rs b/backend/tests/misc/auth/test_live_queries_auth_expiry.rs index 08975baa3..95545851f 100644 --- a/backend/tests/misc/auth/test_live_queries_auth_expiry.rs +++ b/backend/tests/misc/auth/test_live_queries_auth_expiry.rs @@ -2,10 +2,12 @@ //! //! Verifies that auth expiry events correctly terminate WebSocket connections. -use super::test_support::TestServer; -use kalamdb_commons::models::{ConnectionId, ConnectionInfo, Role, UserId}; use std::time::Duration; +use kalamdb_commons::models::{ConnectionId, ConnectionInfo, Role, UserId}; + +use super::test_support::TestServer; + #[tokio::test] async fn test_live_query_auth_expiry() { let server = TestServer::new_shared().await; diff --git a/backend/tests/misc/auth/test_oauth.rs b/backend/tests/misc/auth/test_oauth.rs index 9323c27c2..68c50237c 100644 --- a/backend/tests/misc/auth/test_oauth.rs +++ b/backend/tests/misc/auth/test_oauth.rs @@ -7,12 +7,13 @@ //! - OAuth subject matching //! - Auto-provisioning disabled by default -use super::test_support::TestServer; -use kalam_client::models::ResponseStatus; -use kalamdb_commons::models::ConnectionInfo; -use kalamdb_commons::{AuthType, OAuthProvider, Role, UserId}; use std::sync::atomic::{AtomicUsize, Ordering}; +use kalam_client::models::ResponseStatus; +use kalamdb_commons::{models::ConnectionInfo, AuthType, OAuthProvider, Role, UserId}; + +use super::test_support::TestServer; + static UNIQUE_USER_COUNTER: AtomicUsize = AtomicUsize::new(0); fn unique_username(prefix: &str) -> String { @@ -32,9 +33,9 @@ async fn test_oauth_google_success() { // Create OAuth user with Google provider let create_sql = format!( - "CREATE USER {} WITH OAUTH '{{\"provider\": \"google\", \"subject\": \"google_123456\"}}' ROLE user EMAIL '{}@gmail.com'", - oauth_username, - oauth_username + "CREATE USER {} WITH OAUTH '{{\"provider\": \"google\", \"subject\": \"google_123456\"}}' \ + ROLE user EMAIL '{}@gmail.com'", + oauth_username, oauth_username ); let result = server.execute_sql_as_user(&create_sql, admin_id_str).await; @@ -73,7 +74,8 @@ async fn test_oauth_user_password_rejected() { // Create OAuth user let create_sql = format!( - "CREATE USER {} WITH OAUTH '{{\"provider\": \"github\", \"subject\": \"github_789\"}}' ROLE user", + "CREATE USER {} WITH OAUTH '{{\"provider\": \"github\", \"subject\": \"github_789\"}}' \ + ROLE user", oauth_username ); let res = server.execute_sql_as_user(&create_sql, admin_id_str).await; @@ -119,11 +121,13 @@ async fn test_oauth_subject_matching() { // Create two OAuth users with different subjects let create_sql1 = format!( - "CREATE USER {} WITH OAUTH '{{\"provider\": \"google\", \"subject\": \"google_111\"}}' ROLE user", + "CREATE USER {} WITH OAUTH '{{\"provider\": \"google\", \"subject\": \"google_111\"}}' \ + ROLE user", user1_name ); let create_sql2 = format!( - "CREATE USER {} WITH OAUTH '{{\"provider\": \"google\", \"subject\": \"google_222\"}}' ROLE user", + "CREATE USER {} WITH OAUTH '{{\"provider\": \"google\", \"subject\": \"google_222\"}}' \ + ROLE user", user2_name ); @@ -218,9 +222,9 @@ async fn test_oauth_azure_provider() { // Create OAuth user with Azure provider let create_sql = format!( - "CREATE USER {} WITH OAUTH '{{\"provider\": \"azure\", \"subject\": \"azure_tenant_user\"}}' ROLE service EMAIL '{}@microsoft.com'", - oauth_username, - oauth_username + "CREATE USER {} WITH OAUTH '{{\"provider\": \"azure\", \"subject\": \ + \"azure_tenant_user\"}}' ROLE service EMAIL '{}@microsoft.com'", + oauth_username, oauth_username ); let result = server.execute_sql_as_user(&create_sql, admin_id_str).await; diff --git a/backend/tests/misc/auth/test_password_complexity.rs b/backend/tests/misc/auth/test_password_complexity.rs index e0fa89829..bb52f4444 100644 --- a/backend/tests/misc/auth/test_password_complexity.rs +++ b/backend/tests/misc/auth/test_password_complexity.rs @@ -1,13 +1,18 @@ //! Tests for password complexity enforcement. -use super::test_support::TestServer; +use std::sync::Arc; + use kalamdb_commons::{AuthType, Role, StorageId, UserId}; -use kalamdb_core::app_context::AppContext; -use kalamdb_core::error::KalamDbError; -use kalamdb_core::sql::executor::handler_registry::HandlerRegistry; -use kalamdb_core::sql::{ExecutionContext, ExecutionResult, SqlExecutor}; +use kalamdb_core::{ + app_context::AppContext, + error::KalamDbError, + sql::{ + executor::handler_registry::HandlerRegistry, ExecutionContext, ExecutionResult, SqlExecutor, + }, +}; use kalamdb_system::providers::storages::models::StorageMode; -use std::sync::Arc; + +use super::test_support::TestServer; async fn setup_executor( enforce_complexity: bool, diff --git a/backend/tests/misc/auth/test_rbac.rs b/backend/tests/misc/auth/test_rbac.rs index fd3eb8ed2..7488e12b2 100644 --- a/backend/tests/misc/auth/test_rbac.rs +++ b/backend/tests/misc/auth/test_rbac.rs @@ -2,10 +2,11 @@ //! //! Verifies role-based access control behavior using SQL executor paths. -use super::test_support::{fixtures, TestServer}; use kalam_client::models::ResponseStatus; use kalamdb_commons::models::{Role, UserId}; +use super::test_support::{fixtures, TestServer}; + async fn insert_user(server: &TestServer, username: &str, role: Role) -> UserId { server.create_user(username, "TestPass123!", role).await } diff --git a/backend/tests/misc/auth/test_soft_delete.rs b/backend/tests/misc/auth/test_soft_delete.rs index 01947c0ab..57c9f4fe9 100644 --- a/backend/tests/misc/auth/test_soft_delete.rs +++ b/backend/tests/misc/auth/test_soft_delete.rs @@ -6,9 +6,10 @@ //! - Deleted data can be recovered //! - _deleted field is accessible when explicitly selected -use super::test_support::{consolidated_helpers::unique_namespace, fixtures, TestServer}; use kalam_client::models::ResponseStatus; +use super::test_support::{consolidated_helpers::unique_namespace, fixtures, TestServer}; + #[actix_web::test] async fn test_soft_delete_hides_rows() { let server = TestServer::new_shared().await; @@ -40,7 +41,8 @@ async fn test_soft_delete_hides_rows() { server .execute_sql_as_user( &format!( - "INSERT INTO {}.tasks (id, title, completed) VALUES ('task2', 'Second task', false)", + "INSERT INTO {}.tasks (id, title, completed) VALUES ('task2', 'Second task', \ + false)", namespace ), "user1", @@ -112,7 +114,8 @@ async fn test_soft_delete_preserves_data() { server .execute_sql_as_user( &format!( - "INSERT INTO {}.tasks (id, title, completed) VALUES ('task1', 'Important task', false)", + "INSERT INTO {}.tasks (id, title, completed) VALUES ('task1', 'Important task', \ + false)", namespace ), "user1", @@ -157,7 +160,9 @@ async fn test_deleted_field_default_false() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n title TEXT\n ) WITH (\n TYPE = 'USER',\n STORAGE_ID = 'local'\n )", + "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n \ + title TEXT\n ) WITH (\n TYPE = 'USER',\n \ + STORAGE_ID = 'local'\n )", namespace ), "user1", @@ -200,7 +205,9 @@ async fn test_multiple_deletes() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n title TEXT\n ) WITH (\n TYPE = 'USER',\n STORAGE_ID = 'local'\n )", + "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n \ + title TEXT\n ) WITH (\n TYPE = 'USER',\n \ + STORAGE_ID = 'local'\n )", namespace ), "user1", @@ -261,7 +268,9 @@ async fn test_delete_with_where_clause() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n title TEXT,\n priority INT\n ) WITH (\n TYPE = 'USER',\n STORAGE_ID = 'local'\n )", + "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n \ + title TEXT,\n priority INT\n ) WITH (\n \ + TYPE = 'USER',\n STORAGE_ID = 'local'\n )", namespace ), "user1", @@ -332,7 +341,9 @@ async fn test_count_excludes_deleted_rows() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n title TEXT\n ) WITH (\n TYPE = 'USER',\n STORAGE_ID = 'local'\n )", + "CREATE TABLE {}.tasks (\n id TEXT PRIMARY KEY,\n \ + title TEXT\n ) WITH (\n TYPE = 'USER',\n \ + STORAGE_ID = 'local'\n )", namespace ), "user1", diff --git a/backend/tests/misc/production/test_mvcc_phase2.rs b/backend/tests/misc/production/test_mvcc_phase2.rs index 47b500b6d..9392904e8 100644 --- a/backend/tests/misc/production/test_mvcc_phase2.rs +++ b/backend/tests/misc/production/test_mvcc_phase2.rs @@ -12,9 +12,9 @@ //! - T063: RocksDB prefix scan `{user_id}:` → efficiently returns only that user's rows //! - T064: RocksDB range scan `_seq > threshold` → efficiently skips older versions +use kalam_client::{models::ResponseStatus, parse_i64}; + use super::test_support::{consolidated_helpers, fixtures, TestServer}; -use kalam_client::models::ResponseStatus; -use kalam_client::parse_i64; /// T051: CREATE TABLE without PK should be rejected #[actix_web::test] diff --git a/backend/tests/misc/production/test_production_concurrency.rs b/backend/tests/misc/production/test_production_concurrency.rs index 1d40ca59c..063a4db36 100644 --- a/backend/tests/misc/production/test_production_concurrency.rs +++ b/backend/tests/misc/production/test_production_concurrency.rs @@ -3,11 +3,11 @@ //! Tests concurrent access patterns, contention handling, and race conditions. //! These tests ensure KalamDB handles multiple simultaneous operations safely. -use super::test_support::{consolidated_helpers, TestServer}; -use kalam_client::models::ResponseStatus; -use kalam_client::parse_i64; +use kalam_client::{models::ResponseStatus, parse_i64}; use kalamdb_commons::Role; +use super::test_support::{consolidated_helpers, TestServer}; + /// Verify concurrent inserts to same user table work correctly #[tokio::test] async fn concurrent_inserts_same_user_table() { diff --git a/backend/tests/misc/production/test_production_validation.rs b/backend/tests/misc/production/test_production_validation.rs index cd6ff6d7a..c95c730a7 100644 --- a/backend/tests/misc/production/test_production_validation.rs +++ b/backend/tests/misc/production/test_production_validation.rs @@ -3,10 +3,11 @@ //! Tests error message clarity, validation logic, and graceful failure modes. //! These tests ensure users get helpful feedback when things go wrong. -use super::test_support::{consolidated_helpers, TestServer}; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; +use super::test_support::{consolidated_helpers, TestServer}; + /// Verify syntax errors return clear, helpful messages #[tokio::test] async fn syntax_error_messages_are_clear() { diff --git a/backend/tests/misc/schema/test_alter_table.rs b/backend/tests/misc/schema/test_alter_table.rs index a7670bebf..7ca576f79 100644 --- a/backend/tests/misc/schema/test_alter_table.rs +++ b/backend/tests/misc/schema/test_alter_table.rs @@ -8,9 +8,10 @@ //! - Schema versioning //! - Cache invalidation after ALTER -use super::test_support::{fixtures, TestServer}; use kalam_client::models::ResponseStatus; +use super::test_support::{fixtures, TestServer}; + /// T022a: ALTER TABLE ADD COLUMN #[actix_web::test] async fn test_alter_table_add_column() { diff --git a/backend/tests/misc/schema/test_alter_table_after_flush.rs b/backend/tests/misc/schema/test_alter_table_after_flush.rs index aa562b8ab..9aa1924d4 100644 --- a/backend/tests/misc/schema/test_alter_table_after_flush.rs +++ b/backend/tests/misc/schema/test_alter_table_after_flush.rs @@ -8,9 +8,9 @@ //! This is a critical scenario because it tests schema compatibility //! between hot storage (RocksDB) and cold storage (Parquet files). +use kalam_client::{models::ResponseStatus, parse_i64}; + use super::test_support::{consolidated_helpers, fixtures, flush_helpers, TestServer}; -use kalam_client::models::ResponseStatus; -use kalam_client::parse_i64; /// Test ALTER TABLE ADD COLUMN after flushing data to cold storage /// @@ -231,7 +231,8 @@ async fn test_multiple_alter_operations_with_flushes() { server .execute_sql_as_user( &format!( - "INSERT INTO {}.events (id, event_type, user_id) VALUES ('e2', 'logout', 'user123')", + "INSERT INTO {}.events (id, event_type, user_id) VALUES ('e2', 'logout', \ + 'user123')", ns ), "user1", diff --git a/backend/tests/misc/schema/test_column_id_stability.rs b/backend/tests/misc/schema/test_column_id_stability.rs index 0125ba9d3..ca21cf166 100644 --- a/backend/tests/misc/schema/test_column_id_stability.rs +++ b/backend/tests/misc/schema/test_column_id_stability.rs @@ -6,10 +6,12 @@ //! 3. RENAME COLUMN - column_id stays the same //! 4. Flushed data - column statistics use column_id keys -use super::test_support::{consolidated_helpers, fixtures, flush_helpers, TestServer}; +use std::collections::HashMap; + use kalam_client::models::ResponseStatus; use serde_json::Value; -use std::collections::HashMap; + +use super::test_support::{consolidated_helpers, fixtures, flush_helpers, TestServer}; /// Test that column_id is stable across ADD, DROP, RENAME operations #[actix_web::test] @@ -464,7 +466,8 @@ async fn get_table_schema( table_name: &str, ) -> Vec> { let query = format!( - "SELECT columns FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}' AND is_latest = true", + "SELECT columns FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}' AND \ + is_latest = true", namespace, table_name ); diff --git a/backend/tests/misc/schema/test_column_ordering.rs b/backend/tests/misc/schema/test_column_ordering.rs index a77558cc4..e48235b77 100644 --- a/backend/tests/misc/schema/test_column_ordering.rs +++ b/backend/tests/misc/schema/test_column_ordering.rs @@ -2,10 +2,13 @@ //! //! Tests that SELECT * returns columns in ordinal_position order +use kalamdb_commons::models::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, TableDefinition, TableType}, + NamespaceId, TableId, TableName, +}; + use super::test_support::TestServer; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::schemas::{ColumnDefinition, TableDefinition, TableType}; -use kalamdb_commons::models::{NamespaceId, TableId, TableName}; fn unique_namespace(prefix: &str) -> String { let run_id = std::time::SystemTime::now() diff --git a/backend/tests/misc/sql/test_combined_data_integrity.rs b/backend/tests/misc/sql/test_combined_data_integrity.rs index 2545449c3..c2308fdbb 100644 --- a/backend/tests/misc/sql/test_combined_data_integrity.rs +++ b/backend/tests/misc/sql/test_combined_data_integrity.rs @@ -16,11 +16,11 @@ //! - Query and verify ALL rows are returned correctly //! - Test various SQL operations (COUNT, SUM, AVG, WHERE, ORDER BY, etc.) +use std::{collections::HashMap, path::PathBuf}; + +use kalam_client::{models::ResponseStatus, parse_i64}; + use super::test_support::{fixtures, flush_helpers, query_helpers, TestServer}; -use kalam_client::models::ResponseStatus; -use kalam_client::parse_i64; -use std::collections::HashMap; -use std::path::PathBuf; // ============================================================================ // Test 1: Basic Combined Query - Count and Simple Select @@ -457,7 +457,8 @@ async fn test_03_combined_data_filtering() { let compound_response = server .execute_sql_as_user( &format!( - "SELECT product_id, name, price FROM {}.{} WHERE category = 'books' AND price < 35.0 ORDER BY product_id", + "SELECT product_id, name, price FROM {}.{} WHERE category = 'books' AND price < \ + 35.0 ORDER BY product_id", namespace, table_name ), user_id, diff --git a/backend/tests/misc/sql/test_datafusion_commands.rs b/backend/tests/misc/sql/test_datafusion_commands.rs index 96e11070c..610e1ab65 100644 --- a/backend/tests/misc/sql/test_datafusion_commands.rs +++ b/backend/tests/misc/sql/test_datafusion_commands.rs @@ -3,10 +3,11 @@ //! Verifies that DataFusion built-in commands (EXPLAIN, SET, SHOW, DESCRIBE) //! work correctly for admin users and are blocked for non-admin users. -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::models::{Role, UserId}; +use super::test_support::TestServer; + async fn insert_user(server: &TestServer, username: &str, role: Role) -> UserId { server.create_user(username, "TestPass123!", role).await } @@ -141,7 +142,8 @@ async fn test_show_columns_admin_allowed() { // Create namespace and table server.execute_sql("CREATE NAMESPACE df_cols_test").await; - let create_sql = "CREATE TABLE df_cols_test.test_table (id INT PRIMARY KEY, name TEXT, age INT) WITH (TYPE = 'SHARED')"; + let create_sql = "CREATE TABLE df_cols_test.test_table (id INT PRIMARY KEY, name TEXT, age \ + INT) WITH (TYPE = 'SHARED')"; server.execute_sql(create_sql).await; // SHOW COLUMNS should work for admin @@ -163,7 +165,8 @@ async fn test_show_columns_user_denied() { // Create namespace and table server.execute_sql("CREATE NAMESPACE df_cols_test2").await; - let create_sql = "CREATE TABLE df_cols_test2.test_table (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED')"; + let create_sql = "CREATE TABLE df_cols_test2.test_table (id INT PRIMARY KEY, name TEXT) WITH \ + (TYPE = 'SHARED')"; server.execute_sql(create_sql).await; // SHOW COLUMNS should be denied for regular user @@ -190,7 +193,8 @@ async fn test_describe_datafusion_style_admin_allowed() { // Create namespace and table server.execute_sql("CREATE NAMESPACE df_desc_test").await; - let create_sql = "CREATE TABLE df_desc_test.test_table (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED')"; + let create_sql = "CREATE TABLE df_desc_test.test_table (id INT PRIMARY KEY, name TEXT) WITH \ + (TYPE = 'SHARED')"; server.execute_sql(create_sql).await; // DESCRIBE (DataFusion style without TABLE keyword) should work for admin @@ -212,7 +216,8 @@ async fn test_describe_datafusion_style_user_denied() { // Create namespace and table server.execute_sql("CREATE NAMESPACE df_desc_test2").await; - let create_sql = "CREATE TABLE df_desc_test2.test_table (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED')"; + let create_sql = "CREATE TABLE df_desc_test2.test_table (id INT PRIMARY KEY, name TEXT) WITH \ + (TYPE = 'SHARED')"; server.execute_sql(create_sql).await; // DESCRIBE should be denied for regular user @@ -235,7 +240,8 @@ async fn test_system_role_datafusion_commands() { // Create namespace and table server.execute_sql("CREATE NAMESPACE df_sys_test").await; - let create_sql = "CREATE TABLE df_sys_test.test_table (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED')"; + let create_sql = "CREATE TABLE df_sys_test.test_table (id INT PRIMARY KEY, name TEXT) WITH \ + (TYPE = 'SHARED')"; server.execute_sql(create_sql).await; // System role should also be able to use DataFusion commands @@ -267,7 +273,8 @@ async fn test_service_role_datafusion_commands_denied() { // Create namespace and table server.execute_sql("CREATE NAMESPACE df_svc_test").await; - let create_sql = "CREATE TABLE df_svc_test.test_table (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED')"; + let create_sql = "CREATE TABLE df_svc_test.test_table (id INT PRIMARY KEY, name TEXT) WITH \ + (TYPE = 'SHARED')"; server.execute_sql(create_sql).await; // Service role should NOT be able to use DataFusion commands (not admin) diff --git a/backend/tests/misc/sql/test_dml_complex.rs b/backend/tests/misc/sql/test_dml_complex.rs index fba7a62de..4dfac997a 100644 --- a/backend/tests/misc/sql/test_dml_complex.rs +++ b/backend/tests/misc/sql/test_dml_complex.rs @@ -6,9 +6,10 @@ //! - UPDATE/DELETE across flushed and unflushed data //! - Multi-row operations -use super::test_support::{fixtures, flush_helpers, TestServer}; use kalam_client::models::ResponseStatus; +use super::test_support::{fixtures, flush_helpers, TestServer}; + /// T017a: UPDATE with simple multi-column predicate (single equality) /// /// Note: Complex AND/OR predicates require full DataFusion SELECT-to-UPDATE conversion diff --git a/backend/tests/misc/sql/test_edge_cases.rs b/backend/tests/misc/sql/test_edge_cases.rs index 8aefd5ca7..c7a240441 100644 --- a/backend/tests/misc/sql/test_edge_cases.rs +++ b/backend/tests/misc/sql/test_edge_cases.rs @@ -9,13 +9,15 @@ //! - Maximum password length //! - Shared table default access levels -use super::test_support::TestServer; +use std::time::{SystemTime, UNIX_EPOCH}; + use kalamdb_auth::{authenticate, AuthRequest}; use kalamdb_commons::{ models::{ConnectionInfo, UserId}, Role, }; -use std::time::{SystemTime, UNIX_EPOCH}; + +use super::test_support::TestServer; fn bearer_auth_header(username: &str, user_id: &str, role: Role) -> String { let secret = kalamdb_configs::defaults::default_auth_jwt_secret(); diff --git a/backend/tests/misc/sql/test_explain_index_usage.rs b/backend/tests/misc/sql/test_explain_index_usage.rs index 9fbc27cdf..04bbae8b5 100644 --- a/backend/tests/misc/sql/test_explain_index_usage.rs +++ b/backend/tests/misc/sql/test_explain_index_usage.rs @@ -5,9 +5,10 @@ //! 2. Verifies that EXPLAIN works without errors //! 3. The actual query results may vary depending on server bootstrap state -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; +use super::test_support::TestServer; + #[actix_web::test] async fn test_explain_user_id_equality() { let server: TestServer = TestServer::new_shared().await; diff --git a/backend/tests/misc/sql/test_pk_index_efficiency.rs b/backend/tests/misc/sql/test_pk_index_efficiency.rs index ed74ff43c..370fdaba4 100644 --- a/backend/tests/misc/sql/test_pk_index_efficiency.rs +++ b/backend/tests/misc/sql/test_pk_index_efficiency.rs @@ -8,13 +8,18 @@ //! - Shared table PK index: INSERT 100 rows → UPDATE by PK → verify O(1) lookup //! - Performance comparison: Update with many rows vs few rows should have similar latency -use super::test_support::flush_helpers::{ - execute_flush_synchronously, execute_shared_flush_synchronously, +use std::{ + cmp::max, + time::{Duration, Instant}, }; -use super::test_support::{consolidated_helpers, fixtures, TestServer}; + use kalam_client::models::ResponseStatus; -use std::cmp::max; -use std::time::{Duration, Instant}; + +use super::test_support::{ + consolidated_helpers, fixtures, + flush_helpers::{execute_flush_synchronously, execute_shared_flush_synchronously}, + TestServer, +}; fn median_duration(samples: &mut [Duration]) -> Duration { samples.sort(); @@ -29,7 +34,7 @@ fn median_duration(samples: &mut [Duration]) -> Duration { /// Strategy: /// 1. Insert 100 rows /// 2. Measure UPDATE latency for a specific row -/// 3. Insert 1000 more rows +/// 3. Insert 1000 more rows /// 4. Measure UPDATE latency again /// 5. Verify latency doesn't scale linearly with row count (O(1) not O(n)) #[actix_web::test] @@ -191,9 +196,8 @@ async fn test_user_table_pk_index_update() { // The latency should be sub-linear even if not O(1) due to other factors assert!( latency_1000_rows <= max_allowed, - "UPDATE latency scaled too much with row count. \ - With 100 rows: {:?}, with 1000 rows: {:?}. \ - Expected sub-linear scaling with PK index.", + "UPDATE latency scaled too much with row count. With 100 rows: {:?}, with 1000 rows: \ + {:?}. Expected sub-linear scaling with PK index.", latency_100_rows, latency_1000_rows ); @@ -331,9 +335,8 @@ async fn test_shared_table_pk_index_update() { assert!( latency_1000_rows <= max_allowed, - "UPDATE latency scaled too much with row count. \ - With 100 rows: {:?}, with 1000 rows: {:?}. \ - Expected sub-linear scaling with PK index.", + "UPDATE latency scaled too much with row count. With 100 rows: {:?}, with 1000 rows: \ + {:?}. Expected sub-linear scaling with PK index.", latency_100_rows, latency_1000_rows ); @@ -450,9 +453,8 @@ async fn test_user_table_pk_index_select() { assert!( latency_2500_rows <= max_allowed, - "SELECT latency scaled too much with row count. \ - With 500 rows: {:?}, with 2500 rows: {:?}. \ - This suggests the PK index may not be used for efficient lookup.", + "SELECT latency scaled too much with row count. With 500 rows: {:?}, with 2500 rows: \ + {:?}. This suggests the PK index may not be used for efficient lookup.", latency_500_rows, latency_2500_rows ); @@ -579,9 +581,8 @@ async fn test_user_table_pk_index_delete() { assert!( latency_1500_rows <= max_allowed, - "DELETE latency scaled too much with row count. \ - With 300 rows: {:?}, with 1500 rows: {:?}. \ - This suggests the PK index may not be used for efficient lookup.", + "DELETE latency scaled too much with row count. With 300 rows: {:?}, with 1500 rows: \ + {:?}. This suggests the PK index may not be used for efficient lookup.", latency_300_rows, latency_1500_rows ); @@ -709,7 +710,8 @@ async fn test_user_table_pk_index_update_after_flush() { let all_with_deleted = server .execute_sql_as_user( &format!( - "SELECT id, value, _seq, _deleted FROM {}.user_items WHERE _deleted = true OR _deleted = false", + "SELECT id, value, _seq, _deleted FROM {}.user_items WHERE _deleted = true OR \ + _deleted = false", ns ), "flush_user", diff --git a/backend/tests/misc/sql/test_row_count_behavior.rs b/backend/tests/misc/sql/test_row_count_behavior.rs index 7f148590f..cc4091aa3 100644 --- a/backend/tests/misc/sql/test_row_count_behavior.rs +++ b/backend/tests/misc/sql/test_row_count_behavior.rs @@ -5,9 +5,10 @@ //! - DELETE returns count of rows that were soft-deleted //! - Row counts are accurate and match expectations -use super::test_support::{consolidated_helpers, fixtures, TestServer}; use kalam_client::models::{QueryResponse, ResponseStatus}; +use super::test_support::{consolidated_helpers, fixtures, TestServer}; + fn assert_row_count(response: &QueryResponse, expected: usize, verbs: &[&str]) { assert_eq!( response.status, @@ -49,8 +50,7 @@ async fn test_update_returns_correct_row_count() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.users (\ - id TEXT PRIMARY KEY, + "CREATE TABLE {}.users (id TEXT PRIMARY KEY, name TEXT, email TEXT ) WITH ( @@ -67,7 +67,8 @@ async fn test_update_returns_correct_row_count() { server .execute_sql_as_user( &format!( - "INSERT INTO {}.users (id, name, email) VALUES ('user1', 'Alice', 'alice@example.com')", + "INSERT INTO {}.users (id, name, email) VALUES ('user1', 'Alice', \ + 'alice@example.com')", ns ), "user1", @@ -117,8 +118,7 @@ async fn test_update_same_values_returns_zero() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.users (\ - id TEXT PRIMARY KEY, + "CREATE TABLE {}.users (id TEXT PRIMARY KEY, name TEXT ) WITH ( TYPE = 'USER', @@ -161,8 +161,7 @@ async fn test_delete_returns_correct_row_count() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\ - id TEXT PRIMARY KEY, + "CREATE TABLE {}.tasks (id TEXT PRIMARY KEY, title TEXT ) WITH ( TYPE = 'USER', @@ -221,8 +220,7 @@ async fn test_delete_already_deleted_returns_zero() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\ - id TEXT PRIMARY KEY, + "CREATE TABLE {}.tasks (id TEXT PRIMARY KEY, title TEXT ) WITH ( TYPE = 'USER', @@ -313,8 +311,7 @@ async fn test_delete_multiple_rows_count() { server .execute_sql_as_user( &format!( - "CREATE TABLE {}.tasks (\ - id TEXT PRIMARY KEY, + "CREATE TABLE {}.tasks (id TEXT PRIMARY KEY, priority INT ) WITH ( TYPE = 'USER', diff --git a/backend/tests/misc/sql/test_shared_access.rs b/backend/tests/misc/sql/test_shared_access.rs index 179a3d530..bb005d61b 100644 --- a/backend/tests/misc/sql/test_shared_access.rs +++ b/backend/tests/misc/sql/test_shared_access.rs @@ -7,10 +7,11 @@ //! - T088: Access level modification authorization (only service/dba/system can modify) //! - T089: Read-only enforcement for regular users on public tables -use super::test_support::{consolidated_helpers, TestServer}; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; +use super::test_support::{consolidated_helpers, TestServer}; + #[tokio::test] async fn test_public_table_read_only_for_users() { let server = TestServer::new_shared().await; diff --git a/backend/tests/misc/sql/test_sql_error_redaction.rs b/backend/tests/misc/sql/test_sql_error_redaction.rs index 2fbf57850..a6493606d 100644 --- a/backend/tests/misc/sql/test_sql_error_redaction.rs +++ b/backend/tests/misc/sql/test_sql_error_redaction.rs @@ -1,10 +1,11 @@ //! Integration tests for public SQL error redaction. -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; use uuid::Uuid; +use super::test_support::TestServer; + #[actix_web::test] #[ntest::timeout(45000)] async fn test_non_admin_sql_errors_redact_table_details() { diff --git a/backend/tests/misc/sql/test_system_table_index_usage.rs b/backend/tests/misc/sql/test_system_table_index_usage.rs index 506601b16..4aec3743f 100644 --- a/backend/tests/misc/sql/test_system_table_index_usage.rs +++ b/backend/tests/misc/sql/test_system_table_index_usage.rs @@ -14,16 +14,18 @@ //! 3. Verify correct results are returned //! 4. Measure performance to ensure O(1) lookup behavior -use super::test_support::{consolidated_helpers, TestServer}; -use kalam_client::models::ResponseStatus; -use kalam_client::parse_i64; -use kalamdb_commons::models::{ConnectionId, ConnectionInfo}; -use kalamdb_commons::websocket::{SubscriptionOptions, SubscriptionRequest}; -use kalamdb_commons::{AuthType, JobId, NodeId, Role, StorageId, UserId}; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::{Job, JobStatus, JobType, User}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use kalam_client::{models::ResponseStatus, parse_i64}; +use kalamdb_commons::{ + models::{ConnectionId, ConnectionInfo}, + websocket::{SubscriptionOptions, SubscriptionRequest}, + AuthType, JobId, NodeId, Role, StorageId, UserId, +}; +use kalamdb_system::{providers::storages::models::StorageMode, Job, JobStatus, JobType, User}; + +use super::test_support::{consolidated_helpers, TestServer}; + /// Test: system.users uses user_id for WHERE user_id = '...' queries /// /// This test verifies that queries filtering by user_id work correctly. @@ -196,7 +198,8 @@ async fn test_system_jobs_status_index() { // Check what statuses actually exist let status_query = format!( - "SELECT status, COUNT(*) AS count FROM system.jobs WHERE job_id LIKE '{}%' GROUP BY status ORDER BY status", + "SELECT status, COUNT(*) AS count FROM system.jobs WHERE job_id LIKE '{}%' GROUP BY \ + status ORDER BY status", job_prefix ); let status_response = server.execute_sql(&status_query).await; @@ -217,7 +220,8 @@ async fn test_system_jobs_status_index() { // Test 1: Query for Running jobs (should use status index) let query_running = format!( - "SELECT COUNT(*) AS job_count FROM system.jobs WHERE status = 'running' AND job_id LIKE '{}%'", + "SELECT COUNT(*) AS job_count FROM system.jobs WHERE status = 'running' AND job_id LIKE \ + '{}%'", job_prefix ); let start = Instant::now(); @@ -248,7 +252,8 @@ async fn test_system_jobs_status_index() { // Test 2: Query for completed jobs let query_completed = format!( - "SELECT job_id, status FROM system.jobs WHERE status = 'completed' AND job_id LIKE '{}%' LIMIT 5", + "SELECT job_id, status FROM system.jobs WHERE status = 'completed' AND job_id LIKE '{}%' \ + LIMIT 5", job_prefix ); let response2 = server.execute_sql(&query_completed).await; @@ -264,7 +269,8 @@ async fn test_system_jobs_status_index() { // Test 3: Query for failed jobs let query_failed = format!( - "SELECT COUNT(*) AS failed_count FROM system.jobs WHERE status = 'failed' AND job_id LIKE '{}%'", + "SELECT COUNT(*) AS failed_count FROM system.jobs WHERE status = 'failed' AND job_id LIKE \ + '{}%'", job_prefix ); let response3 = server.execute_sql(&query_failed).await; @@ -505,7 +511,8 @@ async fn test_index_performance_scaling() { assert!( ratio < 6.0, - "Query time scaled too much ({}x). Expected O(1) with index, got O(n) behavior. This suggests the index is NOT being used!", + "Query time scaled too much ({}x). Expected O(1) with index, got O(n) behavior. This \ + suggests the index is NOT being used!", ratio ); diff --git a/backend/tests/misc/sql/test_update_delete_version_resolution.rs b/backend/tests/misc/sql/test_update_delete_version_resolution.rs index 551b16e17..b766d909f 100644 --- a/backend/tests/misc/sql/test_update_delete_version_resolution.rs +++ b/backend/tests/misc/sql/test_update_delete_version_resolution.rs @@ -11,11 +11,13 @@ //! - T067: Nanosecond collision test → verify +1ns increment //! - T068: Performance regression test → query latency with multiple versions -use super::test_support::{consolidated_helpers, fixtures, flush_helpers, TestServer}; -use kalam_client::models::ResponseStatus; use std::sync::Arc; + +use kalam_client::models::ResponseStatus; use tokio::task::JoinSet; +use super::test_support::{consolidated_helpers, fixtures, flush_helpers, TestServer}; + /// T060: Unit test UPDATE in fast storage #[actix_web::test] #[ntest::timeout(60000)] @@ -393,7 +395,8 @@ async fn test_delete_excludes_record() { println!("✅ T064: DELETE sets _deleted=true and query excludes record"); } -/// T065: Integration test - DELETE record in Parquet → new version with _deleted = true in fast storage +/// T065: Integration test - DELETE record in Parquet → new version with _deleted = true in fast +/// storage #[actix_web::test] #[ntest::timeout(60000)] async fn test_delete_in_parquet() { diff --git a/backend/tests/misc/storage/test_cold_storage_manifest.rs b/backend/tests/misc/storage/test_cold_storage_manifest.rs index 57bf0ee16..b0d8b3c70 100644 --- a/backend/tests/misc/storage/test_cold_storage_manifest.rs +++ b/backend/tests/misc/storage/test_cold_storage_manifest.rs @@ -14,9 +14,9 @@ //! - Manifest-based file pruning by seq range //! - Fallback to directory scan when manifest missing +use kalam_client::{models::ResponseStatus, parse_i64}; + use super::test_support::{fixtures, flush_helpers, TestServer}; -use kalam_client::models::ResponseStatus; -use kalam_client::parse_i64; /// Test: User table cold storage query uses manifest cache /// @@ -265,8 +265,10 @@ async fn test_manifest_tracks_multiple_flush_segments() { server .execute_sql_as_user( &format!( - "INSERT INTO multi_flush_ns.events (id, event_type, timestamp) VALUES ({}, 'batch1', {})", - i, 1000 + i + "INSERT INTO multi_flush_ns.events (id, event_type, timestamp) VALUES ({}, \ + 'batch1', {})", + i, + 1000 + i ), "multi_user", ) @@ -283,8 +285,10 @@ async fn test_manifest_tracks_multiple_flush_segments() { server .execute_sql_as_user( &format!( - "INSERT INTO multi_flush_ns.events (id, event_type, timestamp) VALUES ({}, 'batch2', {})", - i, 2000 + i + "INSERT INTO multi_flush_ns.events (id, event_type, timestamp) VALUES ({}, \ + 'batch2', {})", + i, + 2000 + i ), "multi_user", ) @@ -368,8 +372,10 @@ async fn test_cold_storage_version_resolution_after_update() { server .execute_sql_as_user( &format!( - "INSERT INTO version_res_ns.records (id, status, count) VALUES ({}, 'initial', {})", - i, i * 5 + "INSERT INTO version_res_ns.records (id, status, count) VALUES ({}, \ + 'initial', {})", + i, + i * 5 ), "version_user", ) diff --git a/backend/tests/misc/storage/test_helpers.rs b/backend/tests/misc/storage/test_helpers.rs index cf8c0f2ce..aea8850f7 100644 --- a/backend/tests/misc/storage/test_helpers.rs +++ b/backend/tests/misc/storage/test_helpers.rs @@ -9,5 +9,8 @@ #[allow(dead_code)] pub fn test_app_context() { - panic!("test_app_context is not available in server integration tests - this test should be #[ignore]"); + panic!( + "test_app_context is not available in server integration tests - this test should be \ + #[ignore]" + ); } diff --git a/backend/tests/misc/storage/test_storage_compact.rs b/backend/tests/misc/storage/test_storage_compact.rs index 61cee9699..633704639 100644 --- a/backend/tests/misc/storage/test_storage_compact.rs +++ b/backend/tests/misc/storage/test_storage_compact.rs @@ -5,15 +5,19 @@ //! - STORAGE COMPACT ALL IN creates jobs for user/shared tables only //! - Unsupported table types return validation errors -use super::test_support::{fixtures, TestServer}; +use std::{ + path::{Path, PathBuf}, + sync::atomic::{AtomicU64, Ordering}, + time::{Duration, SystemTime}, +}; + use anyhow::Result; use kalam_client::models::ResponseStatus; use kalamdb_system::Manifest; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::{Duration, SystemTime}; use tokio::time::{sleep, Instant}; +use super::test_support::{fixtures, TestServer}; + fn unique_name(prefix: &str) -> String { static COUNTER: AtomicU64 = AtomicU64::new(0); let id = COUNTER.fetch_add(1, Ordering::SeqCst); @@ -101,8 +105,8 @@ async fn wait_for_compact_jobs( loop { let resp = server .execute_sql( - "SELECT job_id, status, parameters, message FROM system.jobs \ - WHERE job_type = 'compact' ORDER BY created_at DESC LIMIT 50", + "SELECT job_id, status, parameters, message FROM system.jobs WHERE job_type = \ + 'compact' ORDER BY created_at DESC LIMIT 50", ) .await; @@ -308,7 +312,8 @@ async fn test_storage_compact_rejects_stream_and_empty_namespace() -> Result<()> fixtures::create_namespace(&server, &namespace).await; let create_stream = format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, value TEXT) WITH (TYPE = 'STREAM', TTL_SECONDS = 3600)", + "CREATE TABLE {}.{} (id INT PRIMARY KEY, value TEXT) WITH (TYPE = 'STREAM', TTL_SECONDS = \ + 3600)", namespace, stream_table ); let resp = server.execute_sql(&create_stream).await; @@ -343,7 +348,8 @@ async fn test_storage_compact_cleans_empty_shared_segments_and_parquet_files() - fixtures::create_namespace(&server, &namespace).await; let create_shared = format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, value TEXT) WITH (TYPE = 'SHARED', FLUSH_POLICY = 'rows:5')", + "CREATE TABLE {}.{} (id INT PRIMARY KEY, value TEXT) WITH (TYPE = 'SHARED', FLUSH_POLICY \ + = 'rows:5')", namespace, table ); let resp = server.execute_sql(&create_shared).await; @@ -418,7 +424,8 @@ async fn test_storage_compact_cleans_empty_shared_segments_and_parquet_files() - if Instant::now() >= cleanup_deadline { anyhow::bail!( - "Expected empty manifest and no parquet files after compaction cleanup for {}.{} (segments={}, files={:?})", + "Expected empty manifest and no parquet files after compaction cleanup for {}.{} \ + (segments={}, files={:?})", namespace, table, manifest.segments.len(), diff --git a/backend/tests/misc/system/test_audit_logging.rs b/backend/tests/misc/system/test_audit_logging.rs index f74978433..44b2ca1de 100644 --- a/backend/tests/misc/system/test_audit_logging.rs +++ b/backend/tests/misc/system/test_audit_logging.rs @@ -2,7 +2,6 @@ //! //! Verifies that privileged operations write entries to `system.audit_log`. -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::models::{AuthType, Role, UserId}; use kalamdb_system::providers::storages::models::StorageMode; @@ -10,6 +9,8 @@ use reqwest::StatusCode; use serde_json::json; use uuid::Uuid; +use super::test_support::TestServer; + async fn create_system_user(server: &TestServer, username: &str) -> UserId { let user_id = UserId::new(username); let now = chrono::Utc::now().timestamp_millis(); @@ -163,8 +164,8 @@ async fn test_audit_log_for_table_access_change() { // Let's try to create a shared table with a unique name let table_name = format!("analytics.events_{}", chrono::Utc::now().timestamp_millis()); - // Note: If this fails due to missing column families, we might need to skip this part of the test - // or update TestServer to support shared tables. + // Note: If this fails due to missing column families, we might need to skip this part of the + // test or update TestServer to support shared tables. // For now, let's try to proceed and see if unique name helps (unlikely if it's a CF issue). let sql = diff --git a/backend/tests/misc/system/test_dba_init.rs b/backend/tests/misc/system/test_dba_init.rs index ec79f41cc..18034db46 100644 --- a/backend/tests/misc/system/test_dba_init.rs +++ b/backend/tests/misc/system/test_dba_init.rs @@ -1,6 +1,5 @@ //! Integration tests for bootstrap-managed dba.* tables. -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::models::UserId; use kalamdb_dba::{ @@ -9,6 +8,9 @@ use kalamdb_dba::{ }; use ntest::timeout; +use super::test_support::TestServer; + +#[ignore = "Requires dba bootstrap to be enabled, which is not the default in tests. Remove this once we enable dba bootstrap by default in tests"] #[tokio::test] #[timeout(30000)] async fn test_dba_namespace_and_tables_created_on_startup() { @@ -31,7 +33,8 @@ async fn test_dba_namespace_and_tables_created_on_startup() { let tables_response = server .execute_sql( - "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = 'dba' AND is_latest = true ORDER BY table_name", + "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = 'dba' AND \ + is_latest = true ORDER BY table_name", ) .await; assert_eq!( @@ -96,7 +99,8 @@ async fn test_dba_repositories_write_through_normal_table_paths() { let statistics_response = server .execute_sql( - "SELECT metric_name, metric_value FROM dba.stats WHERE metric_name = 'memory_usage_mb' AND node_id = 'node-1'" + "SELECT metric_name, metric_value FROM dba.stats WHERE metric_name = \ + 'memory_usage_mb' AND node_id = 'node-1'", ) .await; assert_eq!(statistics_response.status, ResponseStatus::Success); diff --git a/backend/tests/misc/system/test_helpers.rs b/backend/tests/misc/system/test_helpers.rs index 652e20abf..87f804550 100644 --- a/backend/tests/misc/system/test_helpers.rs +++ b/backend/tests/misc/system/test_helpers.rs @@ -5,5 +5,8 @@ #[allow(dead_code)] pub fn test_app_context() { - panic!("test_app_context is not available in server integration tests - this test should be #[ignore]"); + panic!( + "test_app_context is not available in server integration tests - this test should be \ + #[ignore]" + ); } diff --git a/backend/tests/misc/system/test_live_queries_metadata.rs b/backend/tests/misc/system/test_live_queries_metadata.rs index 5eb46d6cf..6b8fa067f 100644 --- a/backend/tests/misc/system/test_live_queries_metadata.rs +++ b/backend/tests/misc/system/test_live_queries_metadata.rs @@ -1,11 +1,14 @@ -use super::test_support::{consolidated_helpers, TestServer}; -use kalam_client::models::ResponseStatus; -use kalamdb_api::limiter::RateLimiter; -use kalamdb_api::ws::events::cleanup::cleanup_connection; -use kalamdb_commons::models::{ConnectionId, ConnectionInfo, Role, UserId}; -use kalamdb_commons::websocket::{SubscriptionOptions, SubscriptionRequest}; use std::sync::Arc; +use kalam_client::models::ResponseStatus; +use kalamdb_api::{limiter::RateLimiter, ws::events::cleanup::cleanup_connection}; +use kalamdb_commons::{ + models::{ConnectionId, ConnectionInfo, Role, UserId}, + websocket::{SubscriptionOptions, SubscriptionRequest}, +}; + +use super::test_support::{consolidated_helpers, TestServer}; + #[tokio::test(flavor = "multi_thread")] async fn test_system_live_metadata() { let server = TestServer::new_shared().await; diff --git a/backend/tests/misc/system/test_runtime_metrics.rs b/backend/tests/misc/system/test_runtime_metrics.rs index 709a3e78c..e752db2b3 100644 --- a/backend/tests/misc/system/test_runtime_metrics.rs +++ b/backend/tests/misc/system/test_runtime_metrics.rs @@ -1,7 +1,8 @@ -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use serial_test::serial; +use super::test_support::TestServer; + #[tokio::test] #[ntest::timeout(10000)] // local runs are fast, but shared-server startup can spike under CI load #[serial(memory_metrics)] @@ -10,15 +11,9 @@ async fn test_system_stats_expose_memory_breakdown_and_allocator_metrics() { let response = server .execute_sql( - "SELECT metric_name, metric_value FROM system.stats \ - WHERE metric_name IN (\ - 'memory_usage_mb',\ - 'memory_usage_source',\ - 'memory_rss_mb',\ - 'memory_virtual_mb',\ - 'memory_rss_gap_mb',\ - 'allocator_name'\ - ) ORDER BY metric_name", + "SELECT metric_name, metric_value FROM system.stats WHERE metric_name IN \ + ('memory_usage_mb','memory_usage_source','memory_rss_mb','memory_virtual_mb','\ + memory_rss_gap_mb','allocator_name') ORDER BY metric_name", ) .await; diff --git a/backend/tests/misc/system/test_system_user_init.rs b/backend/tests/misc/system/test_system_user_init.rs index a55fb3e0e..8845bdaf1 100644 --- a/backend/tests/misc/system/test_system_user_init.rs +++ b/backend/tests/misc/system/test_system_user_init.rs @@ -3,10 +3,11 @@ //! Tests that the database creates a default system user on first startup //! with appropriate credentials and security settings. -use super::test_support::TestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::constants::AuthConstants; +use super::test_support::TestServer; + #[tokio::test] async fn test_system_user_created_on_init() { let server = TestServer::new_shared().await; diff --git a/backend/tests/misc/system/test_system_users.rs b/backend/tests/misc/system/test_system_users.rs index bd3c73318..5f4997374 100644 --- a/backend/tests/misc/system/test_system_users.rs +++ b/backend/tests/misc/system/test_system_users.rs @@ -12,19 +12,19 @@ //! - Remote-enabled system users MUST have a password set //! - Localhost connections can skip password for internal users -use super::test_support::{auth_helper, TestServer}; +use std::{net::SocketAddr, sync::Arc}; + use actix_web::{test, web, App}; use kalamdb_auth::{CoreUsersRepo, UserRepository}; use kalamdb_commons::{AuthType, Role, StorageId, UserId}; -use kalamdb_system::providers::storages::models::StorageMode; -use kalamdb_system::User; -use std::net::SocketAddr; -use std::sync::Arc; +use kalamdb_system::{providers::storages::models::StorageMode, User}; + +use super::test_support::{auth_helper, TestServer}; /// Helper function to create a system user with specific settings async fn create_system_user( server: &TestServer, - username: &str, //TODO: Use UserName type + username: &str, // TODO: Use UserName type password_hash: String, _allow_remote: bool, ) -> User { diff --git a/backend/tests/scenarios/helpers.rs b/backend/tests/scenarios/helpers.rs index d3ea7b6ab..4672de01a 100644 --- a/backend/tests/scenarios/helpers.rs +++ b/backend/tests/scenarios/helpers.rs @@ -7,16 +7,12 @@ //! - Flush/storage artifact validation //! - Parallel test utilities -use anyhow::Result; -use kalam_client::models::{ChangeEvent, ResponseStatus}; -use kalam_client::{KalamCellValue, SubscriptionManager}; -use std::collections::{HashMap, HashSet}; -use std::time::Duration; -use tokio::time::{sleep, timeout, Instant}; - -use crate::test_support::consolidated_helpers as helpers; -use crate::test_support::http_server::HttpTestServer; +use std::{ + collections::{HashMap, HashSet}, + time::Duration, +}; +use anyhow::Result; // Re-export commonly used consolidated helpers pub use helpers::{ assert_error_contains, assert_manifest_exists, assert_min_row_count, assert_no_duplicates, @@ -27,6 +23,13 @@ pub use helpers::{ get_string_value, json_to_i64, run_parallel_users, unique_namespace, unique_table, wait_for_ack, }; +use kalam_client::{ + models::{ChangeEvent, ResponseStatus}, + KalamCellValue, SubscriptionManager, +}; +use tokio::time::{sleep, timeout, Instant}; + +use crate::test_support::{consolidated_helpers as helpers, http_server::HttpTestServer}; // ============================================================================= // Scenario-Specific Helpers diff --git a/backend/tests/scenarios/scenario_01_chat_app.rs b/backend/tests/scenarios/scenario_01_chat_app.rs index fdd1f6a83..a3259bf02 100644 --- a/backend/tests/scenarios/scenario_01_chat_app.rs +++ b/backend/tests/scenarios/scenario_01_chat_app.rs @@ -19,13 +19,14 @@ //! - [x] Storage artifacts exist and parquet is non-empty //! - [x] No duplicates by primary key -use super::helpers::*; +use std::time::Duration; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; -use std::time::Duration; use tokio::time::{sleep, Instant}; +use super::helpers::*; + const TEST_TIMEOUT: Duration = Duration::from_secs(60); /// Main chat app scenario test @@ -103,7 +104,8 @@ async fn test_scenario_01_chat_app_core() -> anyhow::Result<()> { // Step 3: Create users and get clients // ========================================================= // Use unique usernames based on namespace to avoid interference when tests run in parallel. - // USER tables partition by user_id, so shared usernames between tests can cause data collisions. + // USER tables partition by user_id, so shared usernames between tests can cause data + // collisions. let u1_name = format!("{}_u1", ns); let u2_name = format!("{}_u2", ns); let u1_client = create_user_and_client(server, &u1_name, &Role::User).await?; @@ -127,9 +129,10 @@ async fn test_scenario_01_chat_app_core() -> anyhow::Result<()> { let conv_id = if i <= 25 { 1 } else { 2 }; let role = if i % 2 == 0 { "user" } else { "assistant" }; let sql = format!( - "INSERT INTO {}.messages (id, conversation_id, role_id, content) VALUES ({}, {}, '{}', 'Message {} from u1')", - ns, i, conv_id, role, i - ); + "INSERT INTO {}.messages (id, conversation_id, role_id, content) VALUES ({}, {}, \ + '{}', 'Message {} from u1')", + ns, i, conv_id, role, i + ); let resp = u1_client.execute_query(&sql, None, None, None).await?; assert!(resp.success(), "u1 insert message {}", i); } @@ -141,9 +144,10 @@ async fn test_scenario_01_chat_app_core() -> anyhow::Result<()> { for i in 101..=120 { let sql = format!( - "INSERT INTO {}.messages (id, conversation_id, role_id, content) VALUES ({}, 100, 'user', 'Message {} from u2')", - ns, i, i - ); + "INSERT INTO {}.messages (id, conversation_id, role_id, content) VALUES ({}, 100, \ + 'user', 'Message {} from u2')", + ns, i, i + ); let resp = u2_client.execute_query(&sql, None, None, None).await?; assert!(resp.success(), "u2 insert message {}", i); } @@ -222,9 +226,10 @@ async fn test_scenario_01_chat_app_core() -> anyhow::Result<()> { let mut attempt = 0; loop { let sql = format!( - "INSERT INTO {}.messages (id, conversation_id, role_id, content) VALUES ({}, 1, 'user', 'New message during flush {}')", - ns, i, i - ); + "INSERT INTO {}.messages (id, conversation_id, role_id, content) VALUES ({}, 1, \ + 'user', 'New message during flush {}')", + ns, i, i + ); let resp = u1_client.execute_query(&sql, None, None, None).await?; if resp.success() { break; @@ -359,12 +364,17 @@ async fn test_scenario_01_service_writes_as_user() -> anyhow::Result<()> { // Insert as service (this goes to service's own partition by default) let _resp = service_client - .execute_query( - &format!("INSERT INTO {}.messages (id, role_id, content) VALUES (2, 'assistant', 'AI Response')", ns), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.messages (id, role_id, content) VALUES (2, 'assistant', 'AI \ + Response')", + ns + ), + None, + None, + None, + ) + .await?; // This might succeed or fail depending on AS USER support // Verify u1 can see their own message diff --git a/backend/tests/scenarios/scenario_02_offline_sync.rs b/backend/tests/scenarios/scenario_02_offline_sync.rs index 8b6a81725..326deb7e7 100644 --- a/backend/tests/scenarios/scenario_02_offline_sync.rs +++ b/backend/tests/scenarios/scenario_02_offline_sync.rs @@ -14,17 +14,22 @@ //! - [x] No duplicates across batches //! - [x] Live changes during snapshot are not lost -use super::helpers::*; +use std::{ + collections::HashSet, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; use futures_util::StreamExt; use kalam_client::models::ChangeEvent; use kalamdb_commons::Role; -use std::collections::HashSet; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::Duration; use tokio::time::sleep; +use super::helpers::*; + const TEST_TIMEOUT: Duration = Duration::from_secs(120); /// Main offline sync scenario - tests 10 parallel users syncing @@ -82,9 +87,16 @@ async fn test_scenario_02_offline_sync_parallel() -> anyhow::Result<()> { _ => "message", }; let sql = format!( - "INSERT INTO {}.items (id, kind, title, body, priority, device_id) VALUES ({}, '{}', 'Item {}', 'Body {}', {}, 'device_{}')", - ns, item_id, kind, i, i, i % 5, user_idx - ); + "INSERT INTO {}.items (id, kind, title, body, priority, device_id) VALUES ({}, \ + '{}', 'Item {}', 'Body {}', {}, 'device_{}')", + ns, + item_id, + kind, + i, + i, + i % 5, + user_idx + ); let resp = client.execute_query(&sql, None, None, None).await?; if !resp.success() { // Log but continue - some inserts might fail due to concurrent access diff --git a/backend/tests/scenarios/scenario_03_shopping_cart.rs b/backend/tests/scenarios/scenario_03_shopping_cart.rs index 41e658a57..df9a63e1b 100644 --- a/backend/tests/scenarios/scenario_03_shopping_cart.rs +++ b/backend/tests/scenarios/scenario_03_shopping_cart.rs @@ -16,14 +16,19 @@ //! - [x] Partial flush affects only intended partitions //! - [x] Post-flush reads correct -use super::helpers::*; +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; use futures_util::StreamExt; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::Duration; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(90); @@ -99,106 +104,116 @@ async fn test_scenario_03_shopping_cart_parallel() -> anyhow::Result<()> { } let handles: Vec<_> = clients - .into_iter() - .map(|(user_idx, client)| { - let ns = ns.clone(); - let success = Arc::clone(&success_count); - - tokio::spawn(async move { - - // Create a cart - let cart_id = user_idx * 1000 + 1; - let resp = client - .execute_query( - &format!( - "INSERT INTO {}.carts (id, name) VALUES ({}, 'Cart for user {}')", - ns, cart_id, user_idx - ), None, - None, - None, - ) - .await?; - if !resp.success() { - return Err(anyhow::anyhow!("Failed to create cart")); - } - - // Insert 50 cart items - for i in 0..50 { - let item_id = user_idx * 10000 + i; - let resp = client - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity, price) VALUES ({}, {}, 'Product {}', {}, {})", - ns, item_id, cart_id, i, (i % 5) + 1, (i as f64) * 9.99 - ), None, - None, - None, - ) - .await?; - if !resp.success() { - return Err(anyhow::anyhow!("Failed to insert item {}", i)); - } - } - - // Update 20 quantities - for i in 0..20 { - let item_id = user_idx * 10000 + i; - let resp = client - .execute_query( - &format!( - "UPDATE {}.cart_items SET quantity = quantity + 1 WHERE id = {}", - ns, item_id - ), None, - None, - None, - ) - .await?; - if !resp.success() { - eprintln!("Warning: Update failed for item {}", item_id); - } - } - - // Delete 5 items - for i in 45..50 { - let item_id = user_idx * 10000 + i; - let resp = client - .execute_query( - &format!("DELETE FROM {}.cart_items WHERE id = {}", ns, item_id), None, - None, - None, - ) - .await?; - if !resp.success() { - eprintln!("Warning: Delete failed for item {}", item_id); - } - } - - // Verify final count (should be 45) - let resp = client - .execute_query( - &format!( - "SELECT COUNT(*) as cnt FROM {}.cart_items WHERE cart_id = {}", - ns, cart_id - ), None, - None, - None, - ) - .await?; - let count: i64 = resp.get_i64("cnt").unwrap_or(0); - - if count != 45 { - return Err(anyhow::anyhow!( - "User {} expected 45 items, got {}", - user_idx, - count - )); - } - - success.fetch_add(1, Ordering::SeqCst); - Ok::<(), anyhow::Error>(()) - }) - }) - .collect(); + .into_iter() + .map(|(user_idx, client)| { + let ns = ns.clone(); + let success = Arc::clone(&success_count); + + tokio::spawn(async move { + // Create a cart + let cart_id = user_idx * 1000 + 1; + let resp = client + .execute_query( + &format!( + "INSERT INTO {}.carts (id, name) VALUES ({}, 'Cart for user {}')", + ns, cart_id, user_idx + ), + None, + None, + None, + ) + .await?; + if !resp.success() { + return Err(anyhow::anyhow!("Failed to create cart")); + } + + // Insert 50 cart items + for i in 0..50 { + let item_id = user_idx * 10000 + i; + let resp = client + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity, \ + price) VALUES ({}, {}, 'Product {}', {}, {})", + ns, + item_id, + cart_id, + i, + (i % 5) + 1, + (i as f64) * 9.99 + ), + None, + None, + None, + ) + .await?; + if !resp.success() { + return Err(anyhow::anyhow!("Failed to insert item {}", i)); + } + } + + // Update 20 quantities + for i in 0..20 { + let item_id = user_idx * 10000 + i; + let resp = client + .execute_query( + &format!( + "UPDATE {}.cart_items SET quantity = quantity + 1 WHERE id = {}", + ns, item_id + ), + None, + None, + None, + ) + .await?; + if !resp.success() { + eprintln!("Warning: Update failed for item {}", item_id); + } + } + + // Delete 5 items + for i in 45..50 { + let item_id = user_idx * 10000 + i; + let resp = client + .execute_query( + &format!("DELETE FROM {}.cart_items WHERE id = {}", ns, item_id), + None, + None, + None, + ) + .await?; + if !resp.success() { + eprintln!("Warning: Delete failed for item {}", item_id); + } + } + + // Verify final count (should be 45) + let resp = client + .execute_query( + &format!( + "SELECT COUNT(*) as cnt FROM {}.cart_items WHERE cart_id = {}", + ns, cart_id + ), + None, + None, + None, + ) + .await?; + let count: i64 = resp.get_i64("cnt").unwrap_or(0); + + if count != 45 { + return Err(anyhow::anyhow!( + "User {} expected 45 items, got {}", + user_idx, + count + )); + } + + success.fetch_add(1, Ordering::SeqCst); + Ok::<(), anyhow::Error>(()) + }) + }) + .collect(); // Wait for all users for handle in handles { @@ -284,29 +299,33 @@ async fn test_scenario_03_filtered_subscription() -> anyhow::Result<()> { // Insert items for two carts for i in 1..=5 { let resp = client - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES ({}, 1, 'Cart1 Product {}', 1)", - ns, i, i - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES ({}, \ + 1, 'Cart1 Product {}', 1)", + ns, i, i + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert cart 1 item {}", i); } for i in 6..=10 { let resp = client - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES ({}, 2, 'Cart2 Product {}', 1)", - ns, i, i - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES ({}, \ + 2, 'Cart2 Product {}', 1)", + ns, i, i + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert cart 2 item {}", i); } @@ -321,28 +340,32 @@ async fn test_scenario_03_filtered_subscription() -> anyhow::Result<()> { // Insert to cart 2 (should NOT appear in subscription) let client2 = client.clone(); let resp = client2 - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES (11, 2, 'Cart2 New', 1)", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES (11, 2, \ + 'Cart2 New', 1)", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert to cart 2"); // Insert to cart 1 (should appear in subscription) let resp = client2 - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES (12, 1, 'Cart1 New', 1)", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name, quantity) VALUES (12, 1, \ + 'Cart1 New', 1)", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert to cart 1"); // Wait for insert event (should only get cart 1 insert) @@ -386,29 +409,33 @@ async fn test_scenario_03_partial_flush() -> anyhow::Result<()> { for i in 1..=20 { let resp = u1_client - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name) VALUES ({}, 1, 'U1 Product {}')", - ns, i, i - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name) VALUES ({}, 1, 'U1 \ + Product {}')", + ns, i, i + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "u1 insert {}", i); } for i in 101..=120 { let resp = u2_client - .execute_query( - &format!( - "INSERT INTO {}.cart_items (id, cart_id, product_name) VALUES ({}, 2, 'U2 Product {}')", - ns, i, i - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.cart_items (id, cart_id, product_name) VALUES ({}, 2, 'U2 \ + Product {}')", + ns, i, i + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "u2 insert {}", i); } diff --git a/backend/tests/scenarios/scenario_04_iot_telemetry.rs b/backend/tests/scenarios/scenario_04_iot_telemetry.rs index f8eae6636..7b39b30a9 100644 --- a/backend/tests/scenarios/scenario_04_iot_telemetry.rs +++ b/backend/tests/scenarios/scenario_04_iot_telemetry.rs @@ -12,11 +12,12 @@ //! - [x] Flush does not change query results //! - [x] Cold artifacts valid and non-empty -use super::helpers::*; +use std::time::Duration; use futures_util::StreamExt; use kalam_client::models::ResponseStatus; -use std::time::Duration; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(180); const ROW_COUNT: usize = 5000; @@ -77,10 +78,20 @@ async fn test_scenario_04_iot_telemetry_5k_rows() -> anyhow::Result<()> { let firmware = format!("v1.{}.{}", row_num / 1000, row_num % 100); let sql = format!( - "INSERT INTO {}.telemetry (id, device_id, temp, humidity, pressure, battery, is_charging, firmware, payload) \ - VALUES ({}, '{}', {}, {}, {}, {}, {}, '{}', 'payload_data_{}')", - ns, row_num, device_id, temp, humidity, pressure, battery, is_charging, firmware, row_num - ); + "INSERT INTO {}.telemetry (id, device_id, temp, humidity, pressure, battery, \ + is_charging, firmware, payload) VALUES ({}, '{}', {}, {}, {}, {}, {}, '{}', \ + 'payload_data_{}')", + ns, + row_num, + device_id, + temp, + humidity, + pressure, + battery, + is_charging, + firmware, + row_num + ); let resp = client.execute_query(&sql, None, None, None).await?; if !resp.success() { @@ -195,8 +206,8 @@ async fn test_scenario_04_iot_telemetry_5k_rows() -> anyhow::Result<()> { } /// Test anomaly subscription for IoT data -/// NOTE: This test is ignored because SHARED table subscriptions are not supported (FR-128, FR-129). -/// The subscription infrastructure only supports USER tables for per-user real-time sync. +/// NOTE: This test is ignored because SHARED table subscriptions are not supported (FR-128, +/// FR-129). The subscription infrastructure only supports USER tables for per-user real-time sync. #[tokio::test] #[ignore = "SHARED table subscriptions not supported by design (FR-128, FR-129)"] async fn test_scenario_04_anomaly_subscription() -> anyhow::Result<()> { @@ -225,15 +236,17 @@ async fn test_scenario_04_anomaly_subscription() -> anyhow::Result<()> { // Insert some normal data for i in 1..=10 { let resp = client - .execute_query( - &format!( - "INSERT INTO {}.telemetry (id, device_id, temp, battery) VALUES ({}, 'device_1', 25.0, 80.0)", - ns, i - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.telemetry (id, device_id, temp, battery) VALUES ({}, \ + 'device_1', 25.0, 80.0)", + ns, i + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert normal data {}", i); } @@ -253,28 +266,32 @@ async fn test_scenario_04_anomaly_subscription() -> anyhow::Result<()> { // High temperature anomaly let resp = client2 - .execute_query( - &format!( - "INSERT INTO {}.telemetry (id, device_id, temp, battery) VALUES (100, 'device_2', 75.0, 50.0)", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.telemetry (id, device_id, temp, battery) VALUES (100, 'device_2', \ + 75.0, 50.0)", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert high temp anomaly"); // Low battery anomaly let resp = client2 - .execute_query( - &format!( - "INSERT INTO {}.telemetry (id, device_id, temp, battery) VALUES (101, 'device_3', 25.0, 5.0)", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.telemetry (id, device_id, temp, battery) VALUES (101, 'device_3', \ + 25.0, 5.0)", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert low battery anomaly"); // Wait for insert events @@ -321,10 +338,20 @@ async fn test_scenario_04_wide_column_scan() -> anyhow::Result<()> { // Insert 500 rows for i in 0..500 { let sql = format!( - "INSERT INTO {}.telemetry (id, device_id, temp, humidity, pressure, battery, is_charging, firmware, payload) \ - VALUES ({}, 'device_{}', {}, {}, {}, {}, {}, 'v1.0.{}', 'payload_{}')", - ns, i, i % 10, 20.0 + (i % 30) as f64, 50.0, 1000.0, 80.0, i % 2 == 0, i, i - ); + "INSERT INTO {}.telemetry (id, device_id, temp, humidity, pressure, battery, \ + is_charging, firmware, payload) VALUES ({}, 'device_{}', {}, {}, {}, {}, {}, \ + 'v1.0.{}', 'payload_{}')", + ns, + i, + i % 10, + 20.0 + (i % 30) as f64, + 50.0, + 1000.0, + 80.0, + i % 2 == 0, + i, + i + ); let resp = client.execute_query(&sql, None, None, None).await?; if !resp.success() { eprintln!("Insert {} failed", i); diff --git a/backend/tests/scenarios/scenario_05_dashboards.rs b/backend/tests/scenarios/scenario_05_dashboards.rs index 7c4ff36ce..89157b750 100644 --- a/backend/tests/scenarios/scenario_05_dashboards.rs +++ b/backend/tests/scenarios/scenario_05_dashboards.rs @@ -14,11 +14,12 @@ //! - [x] Post-flush joins still correct //! - [x] No cross-user leakage -use super::helpers::*; +use std::time::Duration; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; -use std::time::Duration; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(60); @@ -54,15 +55,17 @@ async fn test_scenario_05_dashboards_shared_reference() -> anyhow::Result<()> { let client = server.link_client("root"); for (id, name, price) in [(1, "Free", 0.0), (2, "Pro", 9.99), (3, "Enterprise", 99.99)] { let resp = client - .execute_query( - &format!( - "INSERT INTO {}.plans (id, name, price, features) VALUES ({}, '{}', {}, 'features for {}')", - ns, id, name, price, name - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.plans (id, name, price, features) VALUES ({}, '{}', {}, \ + 'features for {}')", + ns, id, name, price, name + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert plan {}", name); } @@ -197,15 +200,17 @@ async fn test_scenario_05_dashboards_shared_reference() -> anyhow::Result<()> { if resp.status == ResponseStatus::Success { // Insert new row with device_type let resp = user1_client - .execute_query( - &format!( - "INSERT INTO {}.activity (id, plan_id, action, device_type) VALUES (1000, 2, 'mobile_action', 'mobile')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.activity (id, plan_id, action, device_type) VALUES (1000, 2, \ + 'mobile_action', 'mobile')", + ns + ), + None, + None, + None, + ) + .await?; // This might succeed or fail depending on implementation if resp.success() { // Verify old rows still readable @@ -347,15 +352,17 @@ async fn test_scenario_05_schema_evolution() -> anyhow::Result<()> { if alter_resp.status == ResponseStatus::Success { // Insert with new column let _resp = client - .execute_query( - &format!( - "INSERT INTO {}.events (id, event_name, metadata) VALUES (100, 'new_event', 'some metadata')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.events (id, event_name, metadata) VALUES (100, 'new_event', \ + 'some metadata')", + ns + ), + None, + None, + None, + ) + .await?; // Accept either success or error (column might not be immediately available) // Old data should still be readable diff --git a/backend/tests/scenarios/scenario_06_jobs.rs b/backend/tests/scenarios/scenario_06_jobs.rs index 75d9bc014..2cd56240d 100644 --- a/backend/tests/scenarios/scenario_06_jobs.rs +++ b/backend/tests/scenarios/scenario_06_jobs.rs @@ -9,13 +9,14 @@ //! - [x] Cold artifacts are valid //! - [x] Data not duplicated -use super::helpers::*; +use std::time::Duration; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; -use std::time::Duration; use tokio::time::sleep; +use super::helpers::*; + const TEST_TIMEOUT: Duration = Duration::from_secs(60); /// Main jobs scenario test @@ -79,8 +80,11 @@ async fn test_scenario_06_jobs_lifecycle() -> anyhow::Result<()> { sleep(Duration::from_millis(500)).await; let resp = server - .execute_sql("SELECT job_id, job_type, status, parameters FROM system.jobs WHERE job_type = 'flush' ORDER BY created_at DESC LIMIT 5") - .await?; + .execute_sql( + "SELECT job_id, job_type, status, parameters FROM system.jobs WHERE job_type = \ + 'flush' ORDER BY created_at DESC LIMIT 5", + ) + .await?; if resp.status == ResponseStatus::Success { let rows = get_rows(&resp); @@ -268,11 +272,12 @@ async fn test_scenario_06_job_status_transitions() -> anyhow::Result<()> { while tokio::time::Instant::now() < deadline { let resp = server - .execute_sql(&format!( - "SELECT status FROM system.jobs WHERE job_type = 'flush' AND parameters LIKE '%{}%' ORDER BY created_at DESC LIMIT 1", - ns - )) - .await?; + .execute_sql(&format!( + "SELECT status FROM system.jobs WHERE job_type = 'flush' AND parameters LIKE \ + '%{}%' ORDER BY created_at DESC LIMIT 1", + ns + )) + .await?; if resp.status == ResponseStatus::Success { if let Some(row) = get_rows(&resp).first() { diff --git a/backend/tests/scenarios/scenario_07_collaborative.rs b/backend/tests/scenarios/scenario_07_collaborative.rs index dae65981a..6049df8d5 100644 --- a/backend/tests/scenarios/scenario_07_collaborative.rs +++ b/backend/tests/scenarios/scenario_07_collaborative.rs @@ -13,17 +13,18 @@ //! - [x] Subscriptions deliver only matching doc_id //! - [x] No cross-user leakage in USER edits -use super::helpers::*; +use std::time::Duration; use kalamdb_commons::Role; -use std::time::Duration; use tokio::time::sleep; +use super::helpers::*; + const TEST_TIMEOUT: Duration = Duration::from_secs(60); /// Main collaborative editing scenario test -/// NOTE: This test is ignored because SHARED table subscriptions are not supported (FR-128, FR-129). -/// The subscription infrastructure only supports USER tables for per-user real-time sync. +/// NOTE: This test is ignored because SHARED table subscriptions are not supported (FR-128, +/// FR-129). The subscription infrastructure only supports USER tables for per-user real-time sync. #[tokio::test] #[ignore = "SHARED table subscriptions not supported by design (FR-128, FR-129)"] async fn test_scenario_07_collaborative_editing() -> anyhow::Result<()> { @@ -86,15 +87,17 @@ async fn test_scenario_07_collaborative_editing() -> anyhow::Result<()> { // ========================================================= let admin_client = server.link_client("root"); let resp = admin_client - .execute_query( - &format!( - "INSERT INTO {}.documents (id, title, content) VALUES (1, 'Shared Doc', 'Initial content')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.documents (id, title, content) VALUES (1, 'Shared Doc', 'Initial \ + content')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Create shared document"); // ========================================================= @@ -160,28 +163,32 @@ async fn test_scenario_07_collaborative_editing() -> anyhow::Result<()> { // User 1 sets presence let resp = admin_client - .execute_query( - &format!( - "INSERT INTO {}.presence (id, doc_id, user_id, cursor_pos, status) VALUES (1, 1, 'collab_user1', 100, 'typing')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.presence (id, doc_id, user_id, cursor_pos, status) VALUES (1, 1, \ + 'collab_user1', 100, 'typing')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "User1 presence"); // User 2 sets presence let resp = admin_client - .execute_query( - &format!( - "INSERT INTO {}.presence (id, doc_id, user_id, cursor_pos, status) VALUES (2, 1, 'collab_user2', 50, 'viewing')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.presence (id, doc_id, user_id, cursor_pos, status) VALUES (2, 1, \ + 'collab_user2', 50, 'viewing')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "User2 presence"); // Verify presence is visible immediately @@ -220,28 +227,32 @@ async fn test_scenario_07_collaborative_editing() -> anyhow::Result<()> { // User 1 creates private edit history let resp = user1_client - .execute_query( - &format!( - "INSERT INTO {}.user_edits (id, doc_id, edit_type, edit_data) VALUES (1, 1, 'insert', 'private data')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.user_edits (id, doc_id, edit_type, edit_data) VALUES (1, 1, \ + 'insert', 'private data')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "User1 private edit"); // User 2 creates their own edit history let resp = user2_client - .execute_query( - &format!( - "INSERT INTO {}.user_edits (id, doc_id, edit_type, edit_data) VALUES (2, 1, 'delete', 'other private data')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.user_edits (id, doc_id, edit_type, edit_data) VALUES (2, 1, \ + 'delete', 'other private data')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "User2 private edit"); // User 1 should only see their own edits @@ -309,28 +320,32 @@ async fn test_scenario_07_presence_subscription() -> anyhow::Result<()> { // Insert presence for doc_id = 1 (should appear in subscription) let client2 = server.link_client("root"); let resp = client2 - .execute_query( - &format!( - "INSERT INTO {}.presence (id, doc_id, user_id, status) VALUES (1, 1, 'user1', 'active')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.presence (id, doc_id, user_id, status) VALUES (1, 1, 'user1', \ + 'active')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert presence for doc 1"); // Insert presence for doc_id = 2 (should NOT appear in subscription) let resp = client2 - .execute_query( - &format!( - "INSERT INTO {}.presence (id, doc_id, user_id, status) VALUES (2, 2, 'user2', 'active')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.presence (id, doc_id, user_id, status) VALUES (2, 2, 'user2', \ + 'active')", + ns + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert presence for doc 2"); // Wait for insert event (should only get doc 1) diff --git a/backend/tests/scenarios/scenario_08_burst.rs b/backend/tests/scenarios/scenario_08_burst.rs index 4a91ba833..a01dfb46d 100644 --- a/backend/tests/scenarios/scenario_08_burst.rs +++ b/backend/tests/scenarios/scenario_08_burst.rs @@ -7,16 +7,21 @@ //! - [x] No missed events beyond accepted semantics //! - [x] Final counts match expected -use super::helpers::*; +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; use futures_util::StreamExt; use kalam_client::models::ChangeEvent; use kalamdb_commons::Role; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::Duration; use tokio::time::sleep; +use super::helpers::*; + const TEST_TIMEOUT: Duration = Duration::from_secs(90); /// Main burst test - high-rate writes with active subscription @@ -67,32 +72,34 @@ async fn test_scenario_08_burst_writes() -> anyhow::Result<()> { let insert_count = Arc::new(AtomicUsize::new(0)); let base_client = client.clone(); let handles: Vec<_> = (0..writer_count) - .map(|writer_idx| { - let ns = ns.clone(); - let count = Arc::clone(&insert_count); - let client = base_client.clone(); - - tokio::spawn(async move { - for i in 0..writes_per_writer { - let id = writer_idx * writes_per_writer + i; - let resp = client - .execute_query( - &format!( - "INSERT INTO {}.events (id, event_type, payload) VALUES ({}, 'burst', 'data_{}')", - ns, id, id - ), None, - None, - None, - ) - .await?; - if resp.success() { - count.fetch_add(1, Ordering::SeqCst); - } - } - Ok::<(), anyhow::Error>(()) - }) - }) - .collect(); + .map(|writer_idx| { + let ns = ns.clone(); + let count = Arc::clone(&insert_count); + let client = base_client.clone(); + + tokio::spawn(async move { + for i in 0..writes_per_writer { + let id = writer_idx * writes_per_writer + i; + let resp = client + .execute_query( + &format!( + "INSERT INTO {}.events (id, event_type, payload) VALUES ({}, \ + 'burst', 'data_{}')", + ns, id, id + ), + None, + None, + None, + ) + .await?; + if resp.success() { + count.fetch_add(1, Ordering::SeqCst); + } + } + Ok::<(), anyhow::Error>(()) + }) + }) + .collect(); // ========================================================= // Step 4: Collect events while writes happen diff --git a/backend/tests/scenarios/scenario_09_ddl_while_active.rs b/backend/tests/scenarios/scenario_09_ddl_while_active.rs index c24a95622..ce8ad349b 100644 --- a/backend/tests/scenarios/scenario_09_ddl_while_active.rs +++ b/backend/tests/scenarios/scenario_09_ddl_while_active.rs @@ -7,15 +7,15 @@ //! - [x] Reads remain correct //! - [x] Subscription continues (or fails gracefully with clear error) -use super::helpers::*; +use std::time::Duration; use futures_util::StreamExt; -use kalam_client::models::ChangeEvent; -use kalam_client::models::ResponseStatus; +use kalam_client::models::{ChangeEvent, ResponseStatus}; use kalamdb_commons::Role; -use std::time::Duration; use tokio::time::sleep; +use super::helpers::*; + const TEST_TIMEOUT: Duration = Duration::from_secs(60); /// Main DDL while active scenario test @@ -93,15 +93,17 @@ async fn test_scenario_09_ddl_while_active() -> anyhow::Result<()> { // Step 5: Insert new rows with new column // ========================================================= let resp = client - .execute_query( - &format!( - "INSERT INTO {}.data (id, name, value, description) VALUES (100, 'new_item', 1000, 'has description')", - ns - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.data (id, name, value, description) VALUES (100, 'new_item', \ + 1000, 'has description')", + ns + ), + None, + None, + None, + ) + .await?; if resp.success() { println!("Insert with new column succeeded"); @@ -221,15 +223,21 @@ async fn test_scenario_09_drop_column() -> anyhow::Result<()> { // Insert data with old_column for i in 1..=5 { let resp = client - .execute_query( - &format!( - "INSERT INTO {}.data (id, name, old_column, value) VALUES ({}, 'item_{}', 'old_value_{}', {})", - ns, i, i, i, i * 10 - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.data (id, name, old_column, value) VALUES ({}, 'item_{}', \ + 'old_value_{}', {})", + ns, + i, + i, + i, + i * 10 + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert {}", i); } @@ -257,7 +265,8 @@ async fn test_scenario_09_drop_column() -> anyhow::Result<()> { let resp = client .execute_query( &format!( - "INSERT INTO {}.data (id, name, old_column, value) VALUES (100, 'new', 'should_fail', 100)", + "INSERT INTO {}.data (id, name, old_column, value) VALUES (100, 'new', \ + 'should_fail', 100)", ns ), None, diff --git a/backend/tests/scenarios/scenario_10_multi_tenant.rs b/backend/tests/scenarios/scenario_10_multi_tenant.rs index 5d76f1b85..7f5476219 100644 --- a/backend/tests/scenarios/scenario_10_multi_tenant.rs +++ b/backend/tests/scenarios/scenario_10_multi_tenant.rs @@ -7,10 +7,11 @@ //! - [x] Shared table access correct //! - [x] No subscription data leakage -use super::helpers::*; +use std::time::Duration; use kalamdb_commons::Role; -use std::time::Duration; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(60); @@ -84,15 +85,17 @@ async fn test_scenario_10_multi_tenant_isolation() -> anyhow::Result<()> { ("analytics", true), ] { let resp = admin_client - .execute_query( - &format!( - "INSERT INTO {}.feature_flags (flag_name, enabled, description) VALUES ('{}', {}, 'Flag: {}')", - global, flag, enabled, flag - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.feature_flags (flag_name, enabled, description) VALUES ('{}', \ + {}, 'Flag: {}')", + global, flag, enabled, flag + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert flag {}", flag); } @@ -104,15 +107,20 @@ async fn test_scenario_10_multi_tenant_isolation() -> anyhow::Result<()> { let tenant_a_client = create_user_and_client(server, &tenant_a_user, &Role::User).await?; for i in 1..=5 { let resp = tenant_a_client - .execute_query( - &format!( - "INSERT INTO {}.orders (id, customer_name, amount) VALUES ({}, 'A Customer {}', {})", - tenant_a, i, i, (i as f64) * 100.0 - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.orders (id, customer_name, amount) VALUES ({}, 'A Customer \ + {}', {})", + tenant_a, + i, + i, + (i as f64) * 100.0 + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Tenant A insert order {}", i); } @@ -122,15 +130,20 @@ async fn test_scenario_10_multi_tenant_isolation() -> anyhow::Result<()> { let tenant_b_client = create_user_and_client(server, &tenant_b_user, &Role::User).await?; for i in 101..=105 { let resp = tenant_b_client - .execute_query( - &format!( - "INSERT INTO {}.orders (id, customer_name, amount) VALUES ({}, 'B Customer {}', {})", - tenant_b, i, i, (i as f64) * 50.0 - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.orders (id, customer_name, amount) VALUES ({}, 'B Customer \ + {}', {})", + tenant_b, + i, + i, + (i as f64) * 50.0 + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Tenant B insert order {}", i); } diff --git a/backend/tests/scenarios/scenario_11_multi_storage.rs b/backend/tests/scenarios/scenario_11_multi_storage.rs index 75cf9eb79..7eaaa4076 100644 --- a/backend/tests/scenarios/scenario_11_multi_storage.rs +++ b/backend/tests/scenarios/scenario_11_multi_storage.rs @@ -8,10 +8,11 @@ //! - [x] Flush writes to correct destination //! - [x] Verify artifacts in expected storage folder -use super::helpers::*; +use std::time::Duration; use kalamdb_commons::Role; -use std::time::Duration; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(60); diff --git a/backend/tests/scenarios/scenario_12_performance.rs b/backend/tests/scenarios/scenario_12_performance.rs index 45f19408a..8f613f1fe 100644 --- a/backend/tests/scenarios/scenario_12_performance.rs +++ b/backend/tests/scenarios/scenario_12_performance.rs @@ -9,10 +9,11 @@ //! - [x] Insert time per batch //! - [x] Subscription snapshot timing -use super::helpers::*; +use std::time::{Duration, Instant}; use kalamdb_commons::Role; -use std::time::{Duration, Instant}; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(120); @@ -56,15 +57,21 @@ async fn test_scenario_12_insert_performance() -> anyhow::Result<()> { for _ in 0..batch_size { id_counter += 1; let resp = client - .execute_query( - &format!( - "INSERT INTO {}.metrics (id, timestamp, value, label) VALUES ({}, {}, {}, 'batch_{}')", - ns, id_counter, id_counter * 1000, id_counter as f64 * 1.5, batch_size - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.metrics (id, timestamp, value, label) VALUES ({}, {}, {}, \ + 'batch_{}')", + ns, + id_counter, + id_counter * 1000, + id_counter as f64 * 1.5, + batch_size + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert id {}", id_counter); } @@ -237,15 +244,17 @@ async fn test_scenario_12_subscription_snapshot_timing() -> anyhow::Result<()> { // Insert data for i in 1..=(data_size as i64) { let resp = client - .execute_query( - &format!( - "INSERT INTO {}.documents (id, title, content) VALUES ({}, 'Doc {}', 'Content for document number {}')", - ns, i, i, i - ), None, - None, - None, - ) - .await?; + .execute_query( + &format!( + "INSERT INTO {}.documents (id, title, content) VALUES ({}, 'Doc {}', \ + 'Content for document number {}')", + ns, i, i, i + ), + None, + None, + None, + ) + .await?; assert!(resp.success(), "Insert doc {}", i); } diff --git a/backend/tests/scenarios/scenario_13_soak_test.rs b/backend/tests/scenarios/scenario_13_soak_test.rs index d2ce618dd..5e6d6e02a 100644 --- a/backend/tests/scenarios/scenario_13_soak_test.rs +++ b/backend/tests/scenarios/scenario_13_soak_test.rs @@ -11,12 +11,17 @@ //! - [x] Subscription stability under load //! - [x] Error rate and latency tracking -use super::helpers::*; +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; use kalamdb_commons::Role; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; + +use super::helpers::*; const TEST_TIMEOUT: Duration = Duration::from_secs(180); @@ -108,15 +113,20 @@ async fn test_scenario_13_mixed_workload_soak() -> anyhow::Result<()> { 0 | 1 | 2 => { // Insert (60% of operations) let resp = client - .execute_query( - &format!( - "INSERT INTO {}.orders (id, customer_id, amount, status) VALUES ({}, {}, {}, 'pending')", - ns_clone, local_id, user_idx, local_id as f64 * 10.5 - ), None, - None, - None, - ) - .await; + .execute_query( + &format!( + "INSERT INTO {}.orders (id, customer_id, amount, status) \ + VALUES ({}, {}, {}, 'pending')", + ns_clone, + local_id, + user_idx, + local_id as f64 * 10.5 + ), + None, + None, + None, + ) + .await; match resp { Ok(r) if r.success() => { insert_count_clone.fetch_add(1, Ordering::Relaxed); @@ -138,15 +148,18 @@ async fn test_scenario_13_mixed_workload_soak() -> anyhow::Result<()> { 3 => { // Update (20% of operations) let resp = client - .execute_query( - &format!( - "UPDATE {}.orders SET status = 'completed' WHERE id = {} AND status = 'pending'", - ns_clone, local_id - 1 - ), None, - None, - None, - ) - .await; + .execute_query( + &format!( + "UPDATE {}.orders SET status = 'completed' WHERE id = {} AND \ + status = 'pending'", + ns_clone, + local_id - 1 + ), + None, + None, + None, + ) + .await; match resp { Ok(r) if r.success() => { update_count_clone.fetch_add(1, Ordering::Relaxed); @@ -173,15 +186,17 @@ async fn test_scenario_13_mixed_workload_soak() -> anyhow::Result<()> { _ => { // Query (20% of operations) let resp = client - .execute_query( - &format!( - "SELECT COUNT(*) as cnt FROM {}.orders WHERE status = 'pending'", - ns_clone - ), None, - None, - None, - ) - .await; + .execute_query( + &format!( + "SELECT COUNT(*) as cnt FROM {}.orders WHERE status = \ + 'pending'", + ns_clone + ), + None, + None, + None, + ) + .await; match resp { Ok(r) if r.success() => { query_count_clone.fetch_add(1, Ordering::Relaxed); @@ -463,7 +478,8 @@ async fn test_scenario_13_concurrent_read_write() -> anyhow::Result<()> { let write_count_clone = Arc::clone(&write_count); let handle = tokio::spawn(async move { - // First, seed this user's own 10 counters (USER table RLS means each user needs their own data) + // First, seed this user's own 10 counters (USER table RLS means each user needs their + // own data) for i in 1..=10 { let _ = client .execute_query( diff --git a/backend/tests/scenarios/scenario_14_vector_rag.rs b/backend/tests/scenarios/scenario_14_vector_rag.rs index 4b3575190..7169e5e35 100644 --- a/backend/tests/scenarios/scenario_14_vector_rag.rs +++ b/backend/tests/scenarios/scenario_14_vector_rag.rs @@ -6,16 +6,19 @@ //! - Vector indexes + flush artifacts in cold storage //! - Similarity queries joined back to document rows -use super::helpers::*; use kalam_client::KalamCellValue; use kalamdb_api::http::sql::models::{ResponseStatus as ApiResponseStatus, SqlResponse}; -use kalamdb_commons::models::{TableId, UserId}; -use kalamdb_commons::schemas::TableType; -use kalamdb_commons::Role; +use kalamdb_commons::{ + models::{TableId, UserId}, + schemas::TableType, + Role, +}; use kalamdb_system::FileRef; use reqwest::multipart; use serde_json::Value as JsonValue; +use super::helpers::*; + fn parse_file_ref(value: &JsonValue) -> anyhow::Result { if let Some(raw) = value.as_str() { return Ok(FileRef::from_json(raw)?); @@ -70,25 +73,17 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res assert_success(&resp, "CREATE NAMESPACE"); let create_files_table_sql = format!( - "CREATE TABLE {}.{} (\ - id BIGINT PRIMARY KEY, \ - title TEXT NOT NULL, \ - body TEXT NOT NULL, \ - attachment_a FILE, \ - attachment_b FILE\ - ) WITH (TYPE='USER')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, title TEXT NOT NULL, body TEXT NOT NULL, \ + attachment_a FILE, attachment_b FILE) WITH (TYPE='USER')", ns, files_table ); let resp = server.execute_sql(&create_files_table_sql).await?; assert_success(&resp, "CREATE documents file table"); let create_vectors_table_sql = format!( - "CREATE TABLE {}.{} (\ - id BIGINT PRIMARY KEY, \ - doc_embedding EMBEDDING(3), \ - attachment_a_embedding EMBEDDING(3), \ - attachment_b_embedding EMBEDDING(3)\ - ) WITH (TYPE='USER')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, doc_embedding EMBEDDING(3), \ + attachment_a_embedding EMBEDDING(3), attachment_b_embedding EMBEDDING(3)) WITH \ + (TYPE='USER')", ns, vectors_table ); let resp = server.execute_sql(&create_vectors_table_sql).await?; @@ -146,8 +141,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res for (id, title, body, doc_vec, file_a_vec, file_b_vec, file_a_name, file_b_name) in docs { let insert_files_sql = format!( - "INSERT INTO {}.{} (id, title, body, attachment_a, attachment_b) \ - VALUES ({}, '{}', '{}', FILE(\"file_a\"), FILE(\"file_b\"))", + "INSERT INTO {}.{} (id, title, body, attachment_a, attachment_b) VALUES ({}, '{}', \ + '{}', FILE(\"file_a\"), FILE(\"file_b\"))", ns, files_table, id, title, body ); let upload = execute_sql_multipart( @@ -179,8 +174,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res ); let insert_vectors_sql = format!( - "INSERT INTO {}.{} (id, doc_embedding, attachment_a_embedding, attachment_b_embedding) \ - VALUES ({}, '{}', '{}', '{}')", + "INSERT INTO {}.{} (id, doc_embedding, attachment_a_embedding, \ + attachment_b_embedding) VALUES ({}, '{}', '{}', '{}')", ns, vectors_table, id, doc_vec, file_a_vec, file_b_vec ); let insert_vectors_resp = @@ -277,7 +272,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res let doc_query_resp = user_client .execute_query( &format!( - "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(doc_embedding, '[1.0,0.0,0.0]') LIMIT 2", + "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(doc_embedding, '[1.0,0.0,0.0]') \ + LIMIT 2", ns, vectors_table ), None, @@ -296,7 +292,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res let attachment_query_resp = user_client .execute_query( &format!( - "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(attachment_b_embedding, '[0.0,1.0,0.0]') LIMIT 2", + "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(attachment_b_embedding, \ + '[0.0,1.0,0.0]') LIMIT 2", ns, vectors_table ), None, @@ -316,8 +313,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res ); let insert_files_hot_sql = format!( - "INSERT INTO {}.{} (id, title, body, attachment_a, attachment_b) \ - VALUES (4, 'Realtime Notes', 'Hot row before flush', FILE(\"file_a\"), FILE(\"file_b\"))", + "INSERT INTO {}.{} (id, title, body, attachment_a, attachment_b) VALUES (4, 'Realtime \ + Notes', 'Hot row before flush', FILE(\"file_a\"), FILE(\"file_b\"))", ns, files_table ); let upload_hot = execute_sql_multipart( @@ -340,8 +337,9 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res let insert_hot_vector_resp = user_client .execute_query( &format!( - "INSERT INTO {}.{} (id, doc_embedding, attachment_a_embedding, attachment_b_embedding) \ - VALUES (4, '[0.999,0.001,0.0]', '[0.98,0.02,0.0]', '[0.97,0.03,0.0]')", + "INSERT INTO {}.{} (id, doc_embedding, attachment_a_embedding, \ + attachment_b_embedding) VALUES (4, '[0.999,0.001,0.0]', '[0.98,0.02,0.0]', \ + '[0.97,0.03,0.0]')", ns, vectors_table ), None, @@ -358,7 +356,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res let mixed_tier_resp = user_client .execute_query( &format!( - "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(doc_embedding, '[1.0,0.0,0.0]') LIMIT 3", + "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(doc_embedding, '[1.0,0.0,0.0]') \ + LIMIT 3", ns, vectors_table ), None, @@ -388,7 +387,8 @@ async fn test_scenario_14_rag_docs_with_files_and_vector_search() -> anyhow::Res let after_delete_resp = user_client .execute_query( &format!( - "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(doc_embedding, '[1.0,0.0,0.0]') LIMIT 3", + "SELECT id FROM {}.{} ORDER BY COSINE_DISTANCE(doc_embedding, '[1.0,0.0,0.0]') \ + LIMIT 3", ns, vectors_table ), None, diff --git a/backend/tests/testserver/cluster/test_cluster_commands_http.rs b/backend/tests/testserver/cluster/test_cluster_commands_http.rs index 029f6a97a..00a7b076d 100644 --- a/backend/tests/testserver/cluster/test_cluster_commands_http.rs +++ b/backend/tests/testserver/cluster/test_cluster_commands_http.rs @@ -10,9 +10,16 @@ async fn test_cluster_commands_over_http() -> Result<()> { let result = async { let resp = server.execute_sql("CLUSTER LIST").await?; + anyhow::ensure!( + resp.status == ResponseStatus::Error, + "CLUSTER LIST should be rejected as a CLI-only command, got {:?}", + resp.status + ); + + let resp = server.execute_sql("SELECT cluster_id, node_id FROM system.cluster").await?; anyhow::ensure!( resp.status == ResponseStatus::Success, - "CLUSTER LIST failed: {:?}", + "system.cluster query failed: {:?}", resp.error ); @@ -22,6 +29,29 @@ async fn test_cluster_commands_over_http() -> Result<()> { "CLUSTER SNAPSHOT failed: {:?}", resp.error ); + let snapshot_result = resp + .results + .first() + .ok_or_else(|| anyhow::anyhow!("CLUSTER SNAPSHOT returned no result batch"))?; + anyhow::ensure!( + snapshot_result.schema.iter().any(|field| field.name == "action"), + "CLUSTER SNAPSHOT result missing action column" + ); + anyhow::ensure!( + snapshot_result.schema.iter().any(|field| field.name == "group_id"), + "CLUSTER SNAPSHOT result missing group_id column" + ); + anyhow::ensure!( + snapshot_result.row_count > 0, + "CLUSTER SNAPSHOT should return at least one row" + ); + + let resp = server.execute_sql("CLUSTER REBALANCE").await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CLUSTER REBALANCE failed: {:?}", + resp.error + ); let resp = server.execute_sql("CLUSTER CLEAR").await?; anyhow::ensure!( @@ -29,13 +59,23 @@ async fn test_cluster_commands_over_http() -> Result<()> { "CLUSTER CLEAR failed: {:?}", resp.error ); + let clear_result = resp + .results + .first() + .ok_or_else(|| anyhow::anyhow!("CLUSTER CLEAR returned no result batch"))?; + anyhow::ensure!( + clear_result.schema.iter().any(|field| field.name == "snapshots_dir"), + "CLUSTER CLEAR result missing snapshots_dir column" + ); + anyhow::ensure!( + clear_result.schema.iter().any(|field| field.name == "snapshots_cleared"), + "CLUSTER CLEAR result missing snapshots_cleared column" + ); - // CLUSTER JOIN and CLUSTER LEAVE were removed from the command set. - // Verify they fail gracefully with an informative error rather than panicking. let resp = server.execute_sql("CLUSTER JOIN 127.0.0.1:9001").await?; anyhow::ensure!( resp.status == ResponseStatus::Error, - "CLUSTER JOIN should return Error (command removed), got {:?}", + "CLUSTER JOIN should return Error for malformed input, got {:?}", resp.status ); diff --git a/backend/tests/testserver/cluster/test_cluster_snapshots_http.rs b/backend/tests/testserver/cluster/test_cluster_snapshots_http.rs index eec4fa4de..85af1ed5e 100644 --- a/backend/tests/testserver/cluster/test_cluster_snapshots_http.rs +++ b/backend/tests/testserver/cluster/test_cluster_snapshots_http.rs @@ -1,6 +1,7 @@ +use std::path::{Path, PathBuf}; + use anyhow::Result; use kalam_client::models::ResponseStatus; -use std::path::{Path, PathBuf}; use tokio::time::{sleep, Duration, Instant}; use super::test_support::http_server::start_http_test_server_with_config; @@ -99,18 +100,25 @@ async fn test_cluster_snapshot_creation_and_reuse() -> Result<()> { let ns = "snap_ns"; let table = "snap_table"; - let resp = server - .execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE NAMESPACE failed: {:?}", resp.error); + let resp = server.execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)).await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE NAMESPACE failed: {:?}", + resp.error + ); let resp = server .execute_sql(&format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, v TEXT) WITH (TYPE='SHARED', STORAGE_ID='local')", + "CREATE TABLE {}.{} (id INT PRIMARY KEY, v TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local')", ns, table )) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + resp.error + ); let resp = server .execute_sql(&format!("INSERT INTO {}.{} (id, v) VALUES (1, 'a')", ns, table)) @@ -118,7 +126,11 @@ async fn test_cluster_snapshot_creation_and_reuse() -> Result<()> { anyhow::ensure!(resp.status == ResponseStatus::Success, "INSERT failed: {:?}", resp.error); let resp = server.execute_sql("CLUSTER SNAPSHOT").await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CLUSTER SNAPSHOT failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CLUSTER SNAPSHOT failed: {:?}", + resp.error + ); let snapshots_dir = data_path.join("snapshots").join("meta"); let _ = wait_for_snapshots(&snapshots_dir, 1).await?; diff --git a/backend/tests/testserver/cluster/test_cluster_transactions_http.rs b/backend/tests/testserver/cluster/test_cluster_transactions_http.rs index 7b97ca597..0e570321a 100644 --- a/backend/tests/testserver/cluster/test_cluster_transactions_http.rs +++ b/backend/tests/testserver/cluster/test_cluster_transactions_http.rs @@ -3,8 +3,10 @@ use kalam_client::models::{QueryResult, ResponseStatus}; use kalamdb_raft::GroupId; use tokio::time::{sleep, Duration, Instant}; -use super::test_support::cluster::ClusterTestServer; -use super::test_support::consolidated_helpers::{get_count_value, unique_namespace}; +use super::test_support::{ + cluster::ClusterTestServer, + consolidated_helpers::{get_count_value, unique_namespace}, +}; fn result_i64(result: &QueryResult, column: &str) -> Result { let row = result @@ -118,7 +120,8 @@ async fn test_sql_transaction_forwarded_from_follower_preserves_atomic_staging() let create_table = meta_leader .execute_sql(&format!( - "CREATE TABLE {}.items (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', STORAGE_ID='local')", + "CREATE TABLE {}.items (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local')", namespace )) .await?; @@ -131,12 +134,10 @@ async fn test_sql_transaction_forwarded_from_follower_preserves_atomic_staging() wait_for_table_visible(shared_leader, &namespace).await?; let batch = format!( - "/* strict */ BEGIN; \ - INSERT INTO {}.items (id, name) VALUES (4101, 'alpha'); \ - INSERT INTO {}.items (id, name) VALUES (4102, 'beta'); \ - SELECT COUNT(*) AS visible_rows FROM {}.items; \ - SELECT write_count AS staged_writes, touched_tables_count AS staged_tables FROM system.transactions WHERE origin = 'SqlBatch'; \ - COMMIT;", + "/* strict */ BEGIN; INSERT INTO {}.items (id, name) VALUES (4101, 'alpha'); INSERT INTO \ + {}.items (id, name) VALUES (4102, 'beta'); SELECT COUNT(*) AS visible_rows FROM \ + {}.items; SELECT write_count AS staged_writes, touched_tables_count AS staged_tables \ + FROM system.transactions WHERE origin = 'SqlBatch'; COMMIT;", namespace, namespace, namespace ); diff --git a/backend/tests/testserver/files/test_file_permissions_http.rs b/backend/tests/testserver/files/test_file_permissions_http.rs index 6f6983746..7db2836fd 100644 --- a/backend/tests/testserver/files/test_file_permissions_http.rs +++ b/backend/tests/testserver/files/test_file_permissions_http.rs @@ -1,7 +1,7 @@ //! File permission tests over HTTP. -use super::test_support::auth_helper::create_user_auth_header_with_id; -use super::test_support::http_server::start_http_test_server; +use std::path::{Path, PathBuf}; + use kalam_client::models::ResponseStatus as LinkResponseStatus; use kalamdb_api::http::sql::models::{ResponseStatus, SqlResponse}; use kalamdb_commons::Role; @@ -9,9 +9,12 @@ use kalamdb_system::FileRef; use reqwest::multipart; use serde_json::Value as JsonValue; use serial_test::serial; -use std::path::{Path, PathBuf}; use uuid::Uuid; +use super::test_support::{ + auth_helper::create_user_auth_header_with_id, http_server::start_http_test_server, +}; + fn unique_suffix() -> String { Uuid::new_v4().simple().to_string() } @@ -267,7 +270,8 @@ async fn test_failed_insert_cleans_up_files() -> anyhow::Result<()> { assert_eq!(resp.status, LinkResponseStatus::Success, "CREATE NAMESPACE failed"); let create_table_sql = format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name TEXT NOT NULL, doc FILE) WITH (TYPE='USER')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name TEXT NOT NULL, doc FILE) WITH \ + (TYPE='USER')", namespace, table_name ); let resp = server.execute_sql(&create_table_sql).await?; @@ -294,11 +298,7 @@ async fn test_failed_insert_cleans_up_files() -> anyhow::Result<()> { let table_path = table_path_for_user(&server.storage_root(), &namespace, &table_name, &alice_id); let leaked = find_files_in_subfolders(&table_path, "f"); - assert!( - leaked.is_empty(), - "Failed insert should cleanup staged files: {:?}", - leaked - ); + assert!(leaked.is_empty(), "Failed insert should cleanup staged files: {:?}", leaked); let _ = server .execute_sql(&format!("DROP TABLE IF EXISTS {}.{}", namespace, table_name)) diff --git a/backend/tests/testserver/flush/test_flush_jobs_http.rs b/backend/tests/testserver/flush/test_flush_jobs_http.rs index d111b22fd..a35acc346 100644 --- a/backend/tests/testserver/flush/test_flush_jobs_http.rs +++ b/backend/tests/testserver/flush/test_flush_jobs_http.rs @@ -1,9 +1,10 @@ //! Flush-related SQL tests over the real HTTP SQL API. -use super::test_support::consolidated_helpers::unique_namespace; use kalam_client::models::ResponseStatus; use tokio::time::{sleep, Duration, Instant}; +use super::test_support::consolidated_helpers::unique_namespace; + #[tokio::test] #[ntest::timeout(120000)] // 120 seconds - allow for server startup + job persistence async fn test_flush_table_persists_job_over_http() -> anyhow::Result<()> { diff --git a/backend/tests/testserver/flush/test_flush_policy_verification_http.rs b/backend/tests/testserver/flush/test_flush_policy_verification_http.rs index 1f205de0d..1b650a278 100644 --- a/backend/tests/testserver/flush/test_flush_policy_verification_http.rs +++ b/backend/tests/testserver/flush/test_flush_policy_verification_http.rs @@ -6,21 +6,21 @@ //! - tests/integration/flush/test_automatic_flushing_comprehensive.rs //! - tests/integration/flush/test_flush_operations.rs -use super::test_support::auth_helper::create_user_auth_header_with_id; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::flush::{ - count_parquet_files_for_table, flush_table_and_wait, wait_for_parquet_files_for_table, - wait_for_parquet_files_for_user_table, -}; -use super::test_support::jobs::{ - extract_cleanup_job_id, wait_for_job_completion, wait_for_path_absent, -}; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; -use kalamdb_jobs::AppContextJobsExt; -use kalamdb_jobs::FlushScheduler; +use kalamdb_jobs::{AppContextJobsExt, FlushScheduler}; use tokio::time::Duration; +use super::test_support::{ + auth_helper::create_user_auth_header_with_id, + consolidated_helpers::{unique_namespace, unique_table}, + flush::{ + count_parquet_files_for_table, flush_table_and_wait, wait_for_parquet_files_for_table, + wait_for_parquet_files_for_user_table, + }, + jobs::{extract_cleanup_job_id, wait_for_job_completion, wait_for_path_absent}, +}; + async fn count_matching_flush_jobs( server: &super::test_support::http_server::HttpTestServer, ns: &str, @@ -28,7 +28,8 @@ async fn count_matching_flush_jobs( ) -> anyhow::Result { let resp = server .execute_sql( - "SELECT parameters FROM system.jobs WHERE job_type = 'flush' ORDER BY created_at DESC LIMIT 500", + "SELECT parameters FROM system.jobs WHERE job_type = 'flush' ORDER BY created_at DESC \ + LIMIT 500", ) .await?; anyhow::ensure!( @@ -100,229 +101,272 @@ async fn query_count_with_auth( #[ntest::timeout(180000)] // 3 minutes max for comprehensive flush policy test async fn test_flush_policy_and_parquet_output_over_http() { (async { - let _guard = super::test_support::http_server::acquire_test_lock().await; - let server = super::test_support::http_server::get_global_server().await; - let ns = unique_namespace("flush_policy"); + let _guard = super::test_support::http_server::acquire_test_lock().await; + let server = super::test_support::http_server::get_global_server().await; + let ns = unique_namespace("flush_policy"); - let resp = server - .execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success); - - let user_a = unique_table("alice"); - let user_b = unique_table("bob"); - let (auth_a, user_a_id) = - create_user_auth_header_with_id(server, &user_a, "UserPass123!", &Role::User).await?; - let (auth_b, user_b_id) = - create_user_auth_header_with_id(server, &user_b, "UserPass123!", &Role::User).await?; - - // ----------------------------------------------------------------- - // USER table: manual flush creates parquet + respects row threshold - // ----------------------------------------------------------------- - { - let table = "messages"; + let resp = server.execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)).await?; + anyhow::ensure!(resp.status == ResponseStatus::Success); + + let user_a = unique_table("alice"); + let user_b = unique_table("bob"); + let (auth_a, user_a_id) = + create_user_auth_header_with_id(server, &user_a, "UserPass123!", &Role::User).await?; + let (auth_b, user_b_id) = + create_user_auth_header_with_id(server, &user_b, "UserPass123!", &Role::User).await?; + + // ----------------------------------------------------------------- + // USER table: manual flush creates parquet + respects row threshold + // ----------------------------------------------------------------- + { + let table = "messages"; + let resp = server + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, content TEXT) WITH \ + (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:25')", + ns, table + ), + &auth_a, + ) + .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + resp.error + ); + + for i in 0..25 { let resp = server .execute_sql_with_auth( &format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, content TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:25')", - ns, table + "INSERT INTO {}.{} (id, content) VALUES ({}, 'msg-{}')", + ns, table, i, i ), &auth_a, ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "insert failed: {:?}", + resp.error + ); + } - for i in 0..25 { + flush_table_and_wait(server, &ns, table).await?; + let _ = wait_for_parquet_files_for_user_table( + server, + &ns, + table, + &user_a_id, + 1, + Duration::from_secs(40), + ) + .await?; + } + + // ----------------------------------------------------------------- + // USER table: multiple flush batches produce additional parquet + // ----------------------------------------------------------------- + { + let table = "events"; + let resp = server + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, payload TEXT) WITH \ + (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:100,interval:30')", + ns, table + ), + &auth_a, + ) + .await?; + anyhow::ensure!(resp.status == ResponseStatus::Success); + + for batch in 0..3 { + for i in 0..10 { + let id = batch * 100 + i; let resp = server .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, content) VALUES ({}, 'msg-{}')", ns, table, i, i), + &format!( + "INSERT INTO {}.{} (id, payload) VALUES ({}, 'p-{}')", + ns, table, id, id + ), &auth_a, ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "insert failed: {:?}", resp.error); + anyhow::ensure!(resp.status == ResponseStatus::Success); } flush_table_and_wait(server, &ns, table).await?; - let _ = wait_for_parquet_files_for_user_table(server, &ns, table, &user_a_id, 1, Duration::from_secs(40)).await?; + let _ = wait_for_parquet_files_for_table( + server, + &ns, + table, + 1, + Duration::from_secs(60), + ) + .await?; } + } - // ----------------------------------------------------------------- - // USER table: multiple flush batches produce additional parquet - // ----------------------------------------------------------------- - { - let table = "events"; + // ----------------------------------------------------------------- + // USER table: multi-user partitions produce per-user parquet output + // ----------------------------------------------------------------- + { + let table = "inbox"; + let resp = server + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, body TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local', FLUSH_POLICY='rows:20')", + ns, table + ), + &auth_a, + ) + .await?; + anyhow::ensure!(resp.status == ResponseStatus::Success); + + for i in 0..15 { let resp = server .execute_sql_with_auth( &format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, payload TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:100,interval:30')", - ns, table + "INSERT INTO {}.{} (id, body) VALUES ({}, '{}-msg-{}')", + ns, table, i, user_a, i ), &auth_a, ) .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); - - for batch in 0..3 { - for i in 0..10 { - let id = batch * 100 + i; - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, payload) VALUES ({}, 'p-{}')", ns, table, id, id), - &auth_a, - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success); - } - - flush_table_and_wait(server, &ns, table).await?; - let _ = wait_for_parquet_files_for_table( - server, - &ns, - table, - 1, - Duration::from_secs(60), - ) - .await?; - } } - - // ----------------------------------------------------------------- - // USER table: multi-user partitions produce per-user parquet output - // ----------------------------------------------------------------- - { - let table = "inbox"; + for i in 100..115 { let resp = server .execute_sql_with_auth( &format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, body TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:20')", - ns, table + "INSERT INTO {}.{} (id, body) VALUES ({}, '{}-msg-{}')", + ns, table, i, user_b, i ), - &auth_a, + &auth_b, ) .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); + } - for i in 0..15 { - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, body) VALUES ({}, '{}-msg-{}')", ns, table, i, user_a, i), - &auth_a, - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success); - } - for i in 100..115 { - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, body) VALUES ({}, '{}-msg-{}')", ns, table, i, user_b, i), - &auth_b, - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success); - } + flush_table_and_wait(server, &ns, table).await?; - flush_table_and_wait(server, &ns, table).await?; + let _ = wait_for_parquet_files_for_user_table( + server, + &ns, + table, + &user_a_id, + 1, + Duration::from_secs(40), + ) + .await?; + let _ = wait_for_parquet_files_for_user_table( + server, + &ns, + table, + &user_b_id, + 1, + Duration::from_secs(40), + ) + .await?; + } - let _ = wait_for_parquet_files_for_user_table(server, &ns, table, &user_a_id, 1, Duration::from_secs(40)).await?; - let _ = wait_for_parquet_files_for_user_table(server, &ns, table, &user_b_id, 1, Duration::from_secs(40)).await?; - } + // ----------------------------------------------------------------- + // SHARED table: manual flush creates parquet + // ----------------------------------------------------------------- + { + let table = "audit_events"; + let resp = server + .execute_sql(&format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, entry TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local', FLUSH_POLICY='rows:5')", + ns, table + )) + .await?; + anyhow::ensure!(resp.status == ResponseStatus::Success); - // ----------------------------------------------------------------- - // SHARED table: manual flush creates parquet - // ----------------------------------------------------------------- - { - let table = "audit_events"; + for i in 0..8 { let resp = server .execute_sql(&format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, entry TEXT) WITH (TYPE='SHARED', STORAGE_ID='local', FLUSH_POLICY='rows:5')", - ns, table + "INSERT INTO {}.{} (id, entry) VALUES ({}, 'entry-{}')", + ns, table, i, i )) .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); + } - for i in 0..8 { - let resp = server - .execute_sql(&format!( - "INSERT INTO {}.{} (id, entry) VALUES ({}, 'entry-{}')", - ns, table, i, i - )) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success); - } + flush_table_and_wait(server, &ns, table).await?; + let _ = + wait_for_parquet_files_for_table(server, &ns, table, 1, Duration::from_secs(40)) + .await?; + } - flush_table_and_wait(server, &ns, table).await?; - let _ = wait_for_parquet_files_for_table(server, &ns, table, 1, Duration::from_secs(40)).await?; - } + // ----------------------------------------------------------------- + // DROP TABLE: wait for cleanup job + parquet removal (smoke) + // ----------------------------------------------------------------- + { + let table = "drop_cleanup"; + let resp = server + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, body TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local', FLUSH_POLICY='rows:2')", + ns, table + ), + &auth_a, + ) + .await?; + anyhow::ensure!(resp.status == ResponseStatus::Success); - // ----------------------------------------------------------------- - // DROP TABLE: wait for cleanup job + parquet removal (smoke) - // ----------------------------------------------------------------- - { - let table = "drop_cleanup"; + for i in 0..2 { let resp = server .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, body TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:2')", - ns, table - ), + &format!("INSERT INTO {}.{} (id, body) VALUES ({}, 'x')", ns, table, i), &auth_a, ) .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); + } - for i in 0..2 { - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, body) VALUES ({}, 'x')", ns, table, i), - &auth_a, - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success); - } - - flush_table_and_wait(server, &ns, table).await?; - let parquet = wait_for_parquet_files_for_user_table( - server, - &ns, - table, - &user_a_id, - 1, - Duration::from_secs(90), - ) - .await?; - let parquet_dir = parquet - .first() - .and_then(|p| p.parent()) - .map(|p| p.to_path_buf()) - .ok_or_else(|| anyhow::anyhow!("missing parquet parent dir"))?; - - let drop_resp = server - .execute_sql(&format!("DROP TABLE {}.{}", ns, table)) - .await?; - anyhow::ensure!(drop_resp.status == ResponseStatus::Success); + flush_table_and_wait(server, &ns, table).await?; + let parquet = wait_for_parquet_files_for_user_table( + server, + &ns, + table, + &user_a_id, + 1, + Duration::from_secs(90), + ) + .await?; + let parquet_dir = parquet + .first() + .and_then(|p| p.parent()) + .map(|p| p.to_path_buf()) + .ok_or_else(|| anyhow::anyhow!("missing parquet parent dir"))?; - let msg = drop_resp - .results - .first() - .and_then(|r| r.message.as_deref()) - .unwrap_or(""); + let drop_resp = server.execute_sql(&format!("DROP TABLE {}.{}", ns, table)).await?; + anyhow::ensure!(drop_resp.status == ResponseStatus::Success); - if let Some(job_id) = extract_cleanup_job_id(msg) { - let _ = wait_for_job_completion(server, &job_id, Duration::from_secs(15)).await?; - } + let msg = drop_resp.results.first().and_then(|r| r.message.as_deref()).unwrap_or(""); - // Allow async filestore cleanup to finish. - anyhow::ensure!( - wait_for_path_absent(&parquet_dir, Duration::from_secs(5)).await, - "expected parquet dir removed after drop: {}", - parquet_dir.display() - ); + if let Some(job_id) = extract_cleanup_job_id(msg) { + let _ = wait_for_job_completion(server, &job_id, Duration::from_secs(15)).await?; } - Ok(()) + // Allow async filestore cleanup to finish. + anyhow::ensure!( + wait_for_path_absent(&parquet_dir, Duration::from_secs(5)).await, + "expected parquet dir removed after drop: {}", + parquet_dir.display() + ); + } + + Ok(()) }) - .await - .expect("test_flush_policy_and_parquet_output_over_http"); + .await + .expect("test_flush_policy_and_parquet_output_over_http"); } #[tokio::test] @@ -336,18 +380,21 @@ async fn test_automatic_flush_waits_for_row_limit_before_writing_parquet_over_ht let ns = unique_namespace("flush_row_limit"); let table = "threshold_guard"; - let resp = server - .execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)) - .await?; + let resp = server.execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)).await?; anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE NAMESPACE failed"); let resp = server .execute_sql(&format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, payload TEXT) WITH (TYPE='SHARED', STORAGE_ID='local', FLUSH_POLICY='rows:5')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, payload TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local', FLUSH_POLICY='rows:5')", ns, table )) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + resp.error + ); let storage_root = server.storage_root(); anyhow::ensure!(count_parquet_files_for_table(&storage_root, &ns, table) == 0); @@ -359,7 +406,11 @@ async fn test_automatic_flush_waits_for_row_limit_before_writing_parquet_over_ht ns, table, i, i )) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "INSERT failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "INSERT failed: {:?}", + resp.error + ); } FlushScheduler::check_and_schedule(&app_context, jobs_manager.as_ref()).await?; @@ -374,12 +425,13 @@ async fn test_automatic_flush_waits_for_row_limit_before_writing_parquet_over_ht ); let resp = server - .execute_sql(&format!( - "INSERT INTO {}.{} (id, payload) VALUES (4, 'row-4')", - ns, table - )) + .execute_sql(&format!("INSERT INTO {}.{} (id, payload) VALUES (4, 'row-4')", ns, table)) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "threshold INSERT failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "threshold INSERT failed: {:?}", + resp.error + ); FlushScheduler::check_and_schedule(&app_context, jobs_manager.as_ref()).await?; @@ -403,7 +455,9 @@ async fn test_automatic_flush_waits_for_row_limit_before_writing_parquet_over_ht let _ = jobs_manager.run_once_for_tests().await?; if tokio::time::Instant::now() >= flush_deadline { - anyhow::bail!("expected parquet files to be written after the threshold flush job executed"); + anyhow::bail!( + "expected parquet files to be written after the threshold flush job executed" + ); } tokio::time::sleep(Duration::from_millis(50)).await; @@ -444,28 +498,29 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file let user_a = unique_table("threshold_alice"); let user_b = unique_table("threshold_bob"); - let resp = server - .execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)) - .await?; + let resp = server.execute_sql(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)).await?; anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE NAMESPACE failed"); let (auth_a, user_a_id) = - create_user_auth_header_with_id(server, &user_a, "UserPass123!", &Role::User) - .await?; + create_user_auth_header_with_id(server, &user_a, "UserPass123!", &Role::User).await?; let (auth_b, user_b_id) = - create_user_auth_header_with_id(server, &user_b, "UserPass123!", &Role::User) - .await?; + create_user_auth_header_with_id(server, &user_b, "UserPass123!", &Role::User).await?; let resp = server .execute_sql_with_auth( &format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, payload TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:5')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, payload TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local', FLUSH_POLICY='rows:5')", ns, table ), &auth_a, ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + resp.error + ); let storage_root = server.storage_root(); let initial_job_count = count_matching_flush_jobs(server, &ns, table).await?; @@ -482,7 +537,11 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file &auth_a, ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "INSERT failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "INSERT failed: {:?}", + resp.error + ); } FlushScheduler::check_and_schedule(&app_context, jobs_manager.as_ref()).await?; @@ -504,14 +563,15 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file let resp = server .execute_sql_with_auth( - &format!( - "INSERT INTO {}.{} (id, payload) VALUES (4, 'user-a-4')", - ns, table - ), + &format!("INSERT INTO {}.{} (id, payload) VALUES (4, 'user-a-4')", ns, table), &auth_a, ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "threshold INSERT failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "threshold INSERT failed: {:?}", + resp.error + ); FlushScheduler::check_and_schedule(&app_context, jobs_manager.as_ref()).await?; @@ -535,7 +595,9 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file let _ = jobs_manager.run_once_for_tests().await?; if tokio::time::Instant::now() >= flush_deadline { - anyhow::bail!("expected user-table parquet files after the threshold flush job executed"); + anyhow::bail!( + "expected user-table parquet files after the threshold flush job executed" + ); } tokio::time::sleep(Duration::from_millis(50)).await; @@ -565,7 +627,8 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file anyhow::ensure!(query_count_with_auth(server, &auth_a, &ns, table).await? == 5); anyhow::ensure!(query_count_with_auth(server, &auth_b, &ns, table).await? == 0); - let files_after_first_flush = count_parquet_files_for_user(&storage_root, &ns, table, &user_a_id); + let files_after_first_flush = + count_parquet_files_for_user(&storage_root, &ns, table, &user_a_id); let jobs_after_first_flush = count_matching_flush_jobs(server, &ns, table).await?; for i in 100..104 { @@ -578,7 +641,11 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file &auth_a, ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "second-batch INSERT failed: {:?}", resp.error); + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "second-batch INSERT failed: {:?}", + resp.error + ); } FlushScheduler::check_and_schedule(&app_context, jobs_manager.as_ref()).await?; @@ -588,7 +655,8 @@ async fn test_automatic_user_flush_waits_for_row_limit_and_writes_only_user_file "row threshold should reset after a flush and not schedule a new job at 4 rows" ); anyhow::ensure!( - count_parquet_files_for_user(&storage_root, &ns, table, &user_a_id) == files_after_first_flush, + count_parquet_files_for_user(&storage_root, &ns, table, &user_a_id) + == files_after_first_flush, "no additional parquet files should be created for the next batch below threshold" ); anyhow::ensure!(query_count_with_auth(server, &auth_a, &ns, table).await? == 9); diff --git a/backend/tests/testserver/flush/test_flush_unregistered_suite_http.rs b/backend/tests/testserver/flush/test_flush_unregistered_suite_http.rs index ba4c274bb..b63312ffa 100644 --- a/backend/tests/testserver/flush/test_flush_unregistered_suite_http.rs +++ b/backend/tests/testserver/flush/test_flush_unregistered_suite_http.rs @@ -4,13 +4,16 @@ //! registered in `backend/Cargo.toml`, so they never ran. This suite migrates //! them to the near-production HTTP harness. -use super::test_support::auth_helper::create_user_auth_header; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::http_server::HttpTestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; use tokio::time::{sleep, Duration, Instant}; +use super::test_support::{ + auth_helper::create_user_auth_header, + consolidated_helpers::{unique_namespace, unique_table}, + http_server::HttpTestServer, +}; + async fn wait_for_flush_jobs_settled( server: &HttpTestServer, ns: &str, diff --git a/backend/tests/testserver/flush/test_pk_uniqueness_hot_cold_http.rs b/backend/tests/testserver/flush/test_pk_uniqueness_hot_cold_http.rs index eb2a63e1d..26e724dea 100644 --- a/backend/tests/testserver/flush/test_pk_uniqueness_hot_cold_http.rs +++ b/backend/tests/testserver/flush/test_pk_uniqueness_hot_cold_http.rs @@ -1,11 +1,14 @@ //! Primary key uniqueness checks in hot storage and after flush (cold Parquet), over HTTP. -use super::test_support::auth_helper::create_user_auth_header_default; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::flush::{flush_table_and_wait, wait_for_parquet_files_for_table}; use kalam_client::models::ResponseStatus; use tokio::time::Duration; +use super::test_support::{ + auth_helper::create_user_auth_header_default, + consolidated_helpers::{unique_namespace, unique_table}, + flush::{flush_table_and_wait, wait_for_parquet_files_for_table}, +}; + async fn count_rows( server: &super::test_support::http_server::HttpTestServer, auth: &str, @@ -59,232 +62,250 @@ async fn get_name_for_id( #[ntest::timeout(180000)] // 3 minutes max for comprehensive PK uniqueness test async fn test_pk_uniqueness_hot_and_cold_over_http() { (async { - let server = super::test_support::http_server::get_global_server().await; - let ns = unique_namespace("pk"); - let table_user = "items_user"; - let table_shared = "items_shared"; + let server = super::test_support::http_server::get_global_server().await; + let ns = unique_namespace("pk"); + let table_user = "items_user"; + let table_shared = "items_shared"; + + let resp = server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await?; + assert_eq!(resp.status, ResponseStatus::Success); + + let auth_a = create_user_auth_header_default(server, &unique_table("user_a")).await?; + // ------------------------- + // USER table: hot duplicate + // ------------------------- + { let resp = server - .execute_sql(&format!("CREATE NAMESPACE {}", ns)) + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local', FLUSH_POLICY='rows:100')", + ns, table_user + ), + &auth_a, + ) .await?; assert_eq!(resp.status, ResponseStatus::Success); - let auth_a = create_user_auth_header_default(server, &unique_table("user_a")).await?; - - // ------------------------- - // USER table: hot duplicate - // ------------------------- - { - let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:100')", - ns, table_user - ), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (1, 'first')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (1, 'dup')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Error); + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (1, 'first')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (2, 'second')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - } + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (1, 'dup')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Error); - // ------------------------- - // USER table: cold duplicate - // ------------------------- - { - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (10, 'cold')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (2, 'second')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); + } - flush_table_and_wait(server, &ns, table_user).await?; - let _ = wait_for_parquet_files_for_table(server, &ns, table_user, 1, Duration::from_secs(20)).await?; + // ------------------------- + // USER table: cold duplicate + // ------------------------- + { + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (10, 'cold')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (10, 'dup_cold')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Error); + flush_table_and_wait(server, &ns, table_user).await?; + let _ = wait_for_parquet_files_for_table( + server, + &ns, + table_user, + 1, + Duration::from_secs(20), + ) + .await?; - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (11, 'ok')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - } + let resp = server + .execute_sql_with_auth( + &format!( + "INSERT INTO {}.{} (id, name) VALUES (10, 'dup_cold')", + ns, table_user + ), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Error); - // ------------------------- - // USER table: across segments - // ------------------------- - { - // Insert + flush two separate segments; ensure duplicate is rejected. - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (20, 'seg1')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - flush_table_and_wait(server, &ns, table_user).await?; + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (11, 'ok')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); + } - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (30, 'seg2')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - flush_table_and_wait(server, &ns, table_user).await?; + // ------------------------- + // USER table: across segments + // ------------------------- + { + // Insert + flush two separate segments; ensure duplicate is rejected. + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (20, 'seg1')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); + flush_table_and_wait(server, &ns, table_user).await?; - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (20, 'dup_seg')", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Error); - } + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (30, 'seg2')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); + flush_table_and_wait(server, &ns, table_user).await?; - // ------------------------- - // Shared table: hot+cold dup - // ------------------------- - { - let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', STORAGE_ID='local', FLUSH_POLICY='rows:100')", - ns, table_shared - )) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (20, 'dup_seg')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Error); + } - let resp = server - .execute_sql(&format!( - "INSERT INTO {}.{} (id, name) VALUES (100, 'first')", - ns, table_shared - )) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); + // ------------------------- + // Shared table: hot+cold dup + // ------------------------- + { + let resp = server + .execute_sql(&format!( + "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local', FLUSH_POLICY='rows:100')", + ns, table_shared + )) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); - let resp = server - .execute_sql(&format!( - "INSERT INTO {}.{} (id, name) VALUES (100, 'dup')", - ns, table_shared - )) - .await?; - anyhow::ensure!( - matches!(resp.status, ResponseStatus::Success | ResponseStatus::Error), - "Unexpected response status: {:?}", - resp.status - ); + let resp = server + .execute_sql(&format!( + "INSERT INTO {}.{} (id, name) VALUES (100, 'first')", + ns, table_shared + )) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); - // Regardless of immediate response status, the shared table should not allow - // a visible overwrite of the existing PK. - let name = get_name_for_id(server, &ns, table_shared, 100).await?; - anyhow::ensure!(name == "first", "expected name='first', got '{}'", name); + let resp = server + .execute_sql(&format!( + "INSERT INTO {}.{} (id, name) VALUES (100, 'dup')", + ns, table_shared + )) + .await?; + anyhow::ensure!( + matches!(resp.status, ResponseStatus::Success | ResponseStatus::Error), + "Unexpected response status: {:?}", + resp.status + ); + + // Regardless of immediate response status, the shared table should not allow + // a visible overwrite of the existing PK. + let name = get_name_for_id(server, &ns, table_shared, 100).await?; + anyhow::ensure!(name == "first", "expected name='first', got '{}'", name); + + flush_table_and_wait(server, &ns, table_shared).await?; + let _ = wait_for_parquet_files_for_table( + server, + &ns, + table_shared, + 1, + Duration::from_secs(20), + ) + .await?; - flush_table_and_wait(server, &ns, table_shared).await?; - let _ = wait_for_parquet_files_for_table(server, &ns, table_shared, 1, Duration::from_secs(20)).await?; + let resp = server + .execute_sql(&format!( + "INSERT INTO {}.{} (id, name) VALUES (100, 'dup_cold')", + ns, table_shared + )) + .await?; + anyhow::ensure!( + matches!(resp.status, ResponseStatus::Success | ResponseStatus::Error), + "Unexpected response status: {:?}", + resp.status + ); + let name = get_name_for_id(server, &ns, table_shared, 100).await?; + anyhow::ensure!(name == "first", "expected name='first', got '{}'", name); + } + + // ------------------------- + // UPDATE changing PK to duplicate should error + // ------------------------- + { + // Make sure ids 1 and 2 exist. + let resp = server + .execute_sql_with_auth( + &format!("INSERT INTO {}.{} (id, name) VALUES (40, 'x')", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); - let resp = server - .execute_sql(&format!( - "INSERT INTO {}.{} (id, name) VALUES (100, 'dup_cold')", - ns, table_shared - )) - .await?; - anyhow::ensure!( - matches!(resp.status, ResponseStatus::Success | ResponseStatus::Error), - "Unexpected response status: {:?}", - resp.status - ); - let name = get_name_for_id(server, &ns, table_shared, 100).await?; - anyhow::ensure!(name == "first", "expected name='first', got '{}'", name); - } + // Try to change 40 -> 2 (duplicate) + let resp = server + .execute_sql_with_auth( + &format!("UPDATE {}.{} SET id = 2 WHERE id = 40", ns, table_user), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Error); + } + + // ------------------------- + // AUTO_INCREMENT PK should allow inserts without id conflicts + // ------------------------- + { + let table_auto = "items_auto"; + let resp = server + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY AUTO_INCREMENT, name TEXT) \ + WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:100')", + ns, table_auto + ), + &auth_a, + ) + .await?; + assert_eq!(resp.status, ResponseStatus::Success); - // ------------------------- - // UPDATE changing PK to duplicate should error - // ------------------------- - { - // Make sure ids 1 and 2 exist. + for _ in 0..3 { let resp = server .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (id, name) VALUES (40, 'x')", ns, table_user), + &format!("INSERT INTO {}.{} (name) VALUES ('n')", ns, table_auto), &auth_a, ) .await?; assert_eq!(resp.status, ResponseStatus::Success); - - // Try to change 40 -> 2 (duplicate) - let resp = server - .execute_sql_with_auth( - &format!("UPDATE {}.{} SET id = 2 WHERE id = 40", ns, table_user), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Error); } - // ------------------------- - // AUTO_INCREMENT PK should allow inserts without id conflicts - // ------------------------- - { - let table_auto = "items_auto"; - let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY AUTO_INCREMENT, name TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:100')", - ns, table_auto - ), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - - for _ in 0..3 { - let resp = server - .execute_sql_with_auth( - &format!("INSERT INTO {}.{} (name) VALUES ('n')", ns, table_auto), - &auth_a, - ) - .await?; - assert_eq!(resp.status, ResponseStatus::Success); - } - - let cnt = count_rows(server, &auth_a, &ns, table_auto).await?; - anyhow::ensure!(cnt == 3, "expected 3 rows in auto table, got {}", cnt); - } + let cnt = count_rows(server, &auth_a, &ns, table_auto).await?; + anyhow::ensure!(cnt == 3, "expected 3 rows in auto table, got {}", cnt); + } - Ok(()) + Ok(()) }) - .await - .expect("test_pk_uniqueness_hot_and_cold_over_http"); + .await + .expect("test_pk_uniqueness_hot_and_cold_over_http"); } diff --git a/backend/tests/testserver/manifest/test_manifest_flush_http_v2.rs b/backend/tests/testserver/manifest/test_manifest_flush_http_v2.rs index b6105612a..f87633c2c 100644 --- a/backend/tests/testserver/manifest/test_manifest_flush_http_v2.rs +++ b/backend/tests/testserver/manifest/test_manifest_flush_http_v2.rs @@ -1,10 +1,11 @@ //! Manifest flush behavior over the real HTTP SQL API. -use super::test_support::consolidated_helpers::unique_namespace; use kalam_client::models::ResponseStatus; use kalamdb_system::Manifest; use tokio::time::{sleep, Duration, Instant}; +use super::test_support::consolidated_helpers::unique_namespace; + fn find_manifest_files(root: &std::path::Path) -> Vec { fn recurse(dir: &std::path::Path, out: &mut Vec) { let Ok(entries) = std::fs::read_dir(dir) else { @@ -91,11 +92,12 @@ async fn test_shared_flush_creates_manifest_json_over_http() -> anyhow::Result<( assert_eq!(resp.status, ResponseStatus::Success); let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', FLUSH_POLICY = 'rows:5')", - namespace, table - )) - .await?; + .execute_sql(&format!( + "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', \ + FLUSH_POLICY = 'rows:5')", + namespace, table + )) + .await?; assert_eq!(resp.status, ResponseStatus::Success); for i in 1..=7 { @@ -165,7 +167,8 @@ async fn test_shared_flush_cleans_empty_segments_and_parquet_files() -> anyhow:: let resp = server .execute_sql(&format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', FLUSH_POLICY = 'rows:5')", + "CREATE TABLE {}.{} (id INT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', \ + FLUSH_POLICY = 'rows:5')", namespace, table )) .await?; @@ -246,7 +249,8 @@ async fn test_shared_flush_cleans_empty_segments_and_parquet_files() -> anyhow:: if Instant::now() >= cleanup_deadline { anyhow::bail!( - "Expected empty manifest and no parquet files after cleanup for {}.{} (segments={}, files={:?})", + "Expected empty manifest and no parquet files after cleanup for {}.{} \ + (segments={}, files={:?})", namespace, table, manifest.segments.len(), diff --git a/backend/tests/testserver/manifest/test_manifest_persistence_http.rs b/backend/tests/testserver/manifest/test_manifest_persistence_http.rs index 134d6dddf..54a6571c8 100644 --- a/backend/tests/testserver/manifest/test_manifest_persistence_http.rs +++ b/backend/tests/testserver/manifest/test_manifest_persistence_http.rs @@ -1,12 +1,15 @@ //! Manifest persistence behavior over the real HTTP SQL API. -use super::test_support::auth_helper::create_user_auth_header; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::flush::flush_table_and_wait; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; use tokio::time::{sleep, Duration, Instant}; +use super::test_support::{ + auth_helper::create_user_auth_header, + consolidated_helpers::{unique_namespace, unique_table}, + flush::flush_table_and_wait, +}; + fn find_manifest_files(root: &std::path::Path) -> Vec { fn recurse(dir: &std::path::Path, out: &mut Vec) { let Ok(entries) = std::fs::read_dir(dir) else { @@ -44,14 +47,15 @@ async fn test_user_table_manifest_persistence_over_http() -> anyhow::Result<()> assert_eq!(resp.status, ResponseStatus::Success, "resp.error={:?}", resp.error); let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, event_type TEXT, ts INT) WITH (TYPE = 'USER', STORAGE_ID = 'local')", - ns, table - ), - &user_auth, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, event_type TEXT, ts INT) WITH (TYPE \ + = 'USER', STORAGE_ID = 'local')", + ns, table + ), + &user_auth, + ) + .await?; assert_eq!(resp.status, ResponseStatus::Success); let resp = server @@ -132,14 +136,15 @@ async fn test_user_table_manifest_persistence_over_http() -> anyhow::Result<()> assert_eq!(resp.status, ResponseStatus::Success, "resp.error={:?}", resp.error); let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, metric_name TEXT, value DOUBLE) WITH (TYPE = 'USER', STORAGE_ID = 'local')", - ns, table - ), - &user_auth, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, metric_name TEXT, value DOUBLE) \ + WITH (TYPE = 'USER', STORAGE_ID = 'local')", + ns, table + ), + &user_auth, + ) + .await?; assert_eq!(resp.status, ResponseStatus::Success, "resp.error={:?}", resp.error); let resp = server diff --git a/backend/tests/testserver/observability/test_production_observability_http.rs b/backend/tests/testserver/observability/test_production_observability_http.rs index a339beb53..ca865ce6d 100644 --- a/backend/tests/testserver/observability/test_production_observability_http.rs +++ b/backend/tests/testserver/observability/test_production_observability_http.rs @@ -1,11 +1,14 @@ //! Production-readiness observability checks over the real HTTP SQL API. -use super::test_support::auth_helper::create_user_auth_header; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; use tokio::time::{sleep, Duration, Instant}; +use super::test_support::{ + auth_helper::create_user_auth_header, + consolidated_helpers::{unique_namespace, unique_table}, +}; + #[tokio::test] #[ntest::timeout(60000)] // 60 seconds - observability test with job polling async fn test_observability_system_tables_and_jobs_over_http() -> anyhow::Result<()> { @@ -19,19 +22,21 @@ async fn test_observability_system_tables_and_jobs_over_http() -> anyhow::Result assert_eq!(resp.status, ResponseStatus::Success); let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.messages (id TEXT PRIMARY KEY, content TEXT NOT NULL, timestamp BIGINT) WITH (TYPE = 'USER')", - ns_tables - )) - .await?; + .execute_sql(&format!( + "CREATE TABLE {}.messages (id TEXT PRIMARY KEY, content TEXT NOT NULL, timestamp \ + BIGINT) WITH (TYPE = 'USER')", + ns_tables + )) + .await?; assert_eq!(resp.status, ResponseStatus::Success); let resp = server - .execute_sql(&format!( - "SELECT namespace_id, table_name, table_type FROM system.schemas WHERE namespace_id = '{}' AND table_name = 'messages' AND is_latest = true", - ns_tables - )) - .await?; + .execute_sql(&format!( + "SELECT namespace_id, table_name, table_type FROM system.schemas WHERE namespace_id = \ + '{}' AND table_name = 'messages' AND is_latest = true", + ns_tables + )) + .await?; assert_eq!(resp.status, ResponseStatus::Success); let rows = resp.rows_as_maps(); assert_eq!(rows.len(), 1); diff --git a/backend/tests/testserver/sql/test_dml_parameters_http.rs b/backend/tests/testserver/sql/test_dml_parameters_http.rs index 9ade5396b..7a0d2b222 100644 --- a/backend/tests/testserver/sql/test_dml_parameters_http.rs +++ b/backend/tests/testserver/sql/test_dml_parameters_http.rs @@ -5,14 +5,17 @@ //! - Parameter validation (max 50 params, 512KB each) //! - Params not allowed with multi-statement batches -use super::test_support::auth_helper::create_user_auth_header; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::http_server::HttpTestServer; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; use serde_json::json; use tokio::time::{sleep, Duration, Instant}; +use super::test_support::{ + auth_helper::create_user_auth_header, + consolidated_helpers::{unique_namespace, unique_table}, + http_server::HttpTestServer, +}; + async fn count_rows( server: &HttpTestServer, auth: &str, @@ -43,244 +46,258 @@ async fn count_rows( #[ntest::timeout(90000)] // 90 seconds - parameterized DML test async fn test_parameterized_dml_over_http() { (async { - let server = super::test_support::http_server::get_global_server().await; - let ns = unique_namespace("params"); - let table = "items"; + let server = super::test_support::http_server::get_global_server().await; + let ns = unique_namespace("params"); + let table = "items"; - let resp = server - .execute_sql(&format!("CREATE NAMESPACE {}", ns)) + let resp = server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await?; + anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE NAMESPACE failed"); + + let auth = create_user_auth_header( + server, + &unique_table("user_params"), + "UserPass123!", + &Role::Dba, + ) .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE NAMESPACE failed"); - let auth = - create_user_auth_header(server, &unique_table("user_params"), "UserPass123!", &Role::Dba) + let resp = server + .execute_sql(&format!( + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name TEXT, age INT) WITH \ + (TYPE='SHARED', STORAGE_ID='local')", + ns, table + )) .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + resp.error + ); - let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name TEXT, age INT) WITH (TYPE='SHARED', STORAGE_ID='local')", - ns, table - )) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", resp.error); - - // Near-production servers may accept DDL before it is immediately queryable - // (e.g. metadata propagation/registration). Wait briefly for visibility. - { - let deadline = Instant::now() + Duration::from_secs(5); - loop { - let probe = server - .execute_sql(&format!( - "SELECT COUNT(*) AS cnt FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", - ns, table - )) - .await; + // Near-production servers may accept DDL before it is immediately queryable + // (e.g. metadata propagation/registration). Wait briefly for visibility. + { + let deadline = Instant::now() + Duration::from_secs(5); + loop { + let probe = server + .execute_sql(&format!( + "SELECT COUNT(*) AS cnt FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}'", + ns, table + )) + .await; - if let Ok(resp) = &probe { - if resp.status == ResponseStatus::Success { - let cnt = resp - .results - .first() - .and_then(|r| r.row_as_map(0)) - .and_then(|m| m.get("cnt").cloned()) - .and_then(|v| { - v.as_i64() - .or_else(|| v.as_u64().map(|u| u as i64)) - .or_else(|| v.as_str().and_then(|s| s.parse::().ok())) - }) - .unwrap_or(0); - if cnt >= 1 { - break; + if let Ok(resp) = &probe { + if resp.status == ResponseStatus::Success { + let cnt = resp + .results + .first() + .and_then(|r| r.row_as_map(0)) + .and_then(|m| m.get("cnt").cloned()) + .and_then(|v| { + v.as_i64() + .or_else(|| v.as_u64().map(|u| u as i64)) + .or_else(|| v.as_str().and_then(|s| s.parse::().ok())) + }) + .unwrap_or(0); + if cnt >= 1 { + break; + } } } - } - if Instant::now() >= deadline { - let listing = server - .execute_sql(&format!( - "SELECT namespace_id, table_name FROM system.schemas WHERE table_name = '{}'", - table - )) - .await - .ok() - .and_then(|r| r.results.first().map(|qr| qr.rows_as_maps())) - .unwrap_or_default(); + if Instant::now() >= deadline { + let listing = server + .execute_sql(&format!( + "SELECT namespace_id, table_name FROM system.schemas WHERE table_name \ + = '{}'", + table + )) + .await + .ok() + .and_then(|r| r.results.first().map(|qr| qr.rows_as_maps())) + .unwrap_or_default(); + + anyhow::bail!( + "Table {}.{} not visible in system.schemas after CREATE TABLE \ + (last_probe={:?}, listing={:?})", + ns, + table, + probe, + listing + ); + } - anyhow::bail!( - "Table {}.{} not visible in system.schemas after CREATE TABLE (last_probe={:?}, listing={:?})", - ns, - table, - probe - , - listing - ); + sleep(Duration::from_millis(50)).await; } + } - sleep(Duration::from_millis(50)).await; + // INSERT with parameters + { + let resp = server + .execute_sql_with_auth_and_params( + &format!("INSERT INTO {}.{} (id, name, age) VALUES ($1, $2, $3)", ns, table), + &auth, + vec![json!(1), json!("Alice"), json!(30)], + ) + .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "INSERT params failed: {:?}", + resp.error + ); + let cnt = count_rows(server, &auth, &ns, table).await?; + anyhow::ensure!(cnt == 1, "expected 1 row after insert, got {}", cnt); } - } - // INSERT with parameters - { - let resp = server - .execute_sql_with_auth_and_params( - &format!( - "INSERT INTO {}.{} (id, name, age) VALUES ($1, $2, $3)", - ns, table - ), - &auth, - vec![json!(1), json!("Alice"), json!(30)], - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "INSERT params failed: {:?}", resp.error); - let cnt = count_rows(server, &auth, &ns, table).await?; - anyhow::ensure!(cnt == 1, "expected 1 row after insert, got {}", cnt); - } + // UPDATE with parameters + { + let resp = server + .execute_sql_with_auth_and_params( + &format!("UPDATE {}.{} SET name = $1, age = $2 WHERE id = $3", ns, table), + &auth, + vec![json!("Alice Updated"), json!(31), json!(1)], + ) + .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "UPDATE params failed: {:?}", + resp.error + ); - // UPDATE with parameters - { - let resp = server - .execute_sql_with_auth_and_params( - &format!( - "UPDATE {}.{} SET name = $1, age = $2 WHERE id = $3", - ns, table - ), - &auth, - vec![json!("Alice Updated"), json!(31), json!(1)], - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "UPDATE params failed: {:?}", resp.error); + let resp = server + .execute_sql_with_auth( + &format!("SELECT name, age FROM {}.{} WHERE id = 1", ns, table), + &auth, + ) + .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "SELECT failed: {:?}", + resp.error + ); + let row = resp + .results + .first() + .and_then(|r| r.row_as_map(0)) + .ok_or_else(|| anyhow::anyhow!("Missing row"))?; + anyhow::ensure!(row.get("name").and_then(|v| v.as_str()) == Some("Alice Updated")); + anyhow::ensure!(row.get("age").and_then(|v| v.as_i64()) == Some(31)); + } - let resp = server - .execute_sql_with_auth( - &format!("SELECT name, age FROM {}.{} WHERE id = 1", ns, table), - &auth, - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "SELECT failed: {:?}", resp.error); - let row = resp - .results - .first() - .and_then(|r| r.row_as_map(0)) - .ok_or_else(|| anyhow::anyhow!("Missing row"))?; - anyhow::ensure!(row.get("name").and_then(|v| v.as_str()) == Some("Alice Updated")); - anyhow::ensure!(row.get("age").and_then(|v| v.as_i64()) == Some(31)); - } + // DELETE with parameters + { + let resp = server + .execute_sql_with_auth_and_params( + &format!("DELETE FROM {}.{} WHERE id = $1", ns, table), + &auth, + vec![json!(1)], + ) + .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "DELETE params failed: {:?}", + resp.error + ); + let cnt = count_rows(server, &auth, &ns, table).await?; + anyhow::ensure!(cnt == 0, "expected 0 rows after delete, got {}", cnt); + } - // DELETE with parameters - { - let resp = server - .execute_sql_with_auth_and_params( - &format!("DELETE FROM {}.{} WHERE id = $1", ns, table), - &auth, - vec![json!(1)], - ) - .await?; - anyhow::ensure!(resp.status == ResponseStatus::Success, "DELETE params failed: {:?}", resp.error); - let cnt = count_rows(server, &auth, &ns, table).await?; - anyhow::ensure!(cnt == 0, "expected 0 rows after delete, got {}", cnt); + // Parameter count validation (max 50) + { + let mut params = Vec::new(); + for i in 1..=51 { + params.push(json!(i)); } - // Parameter count validation (max 50) - { - let mut params = Vec::new(); - for i in 1..=51 { - params.push(json!(i)); - } + let resp = server + .execute_sql_with_auth_and_params( + &format!("INSERT INTO {}.{} (id, name, age) VALUES ($1, 'x', 0)", ns, table), + &auth, + params, + ) + .await; - let resp = server - .execute_sql_with_auth_and_params( - &format!("INSERT INTO {}.{} (id, name, age) VALUES ($1, 'x', 0)", ns, table), - &auth, - params, - ) - .await; - - // Should fail with parameter count error - match resp { - Err(e) => { - let err_msg = e.to_string(); - // Error might be wrapped, just ensure it failed - assert!(!err_msg.is_empty(), "Should have error message"); - } - Ok(resp) if resp.status == ResponseStatus::Error => { - let msg = resp - .error - .as_ref() - .map(|e| e.message.as_str()) - .unwrap_or(""); - let msg_lower = msg.to_lowercase(); - anyhow::ensure!( - msg_lower.contains("parameter") - && (msg_lower.contains("limit") - || msg_lower.contains("maximum") - || msg_lower.contains("exceeded")), - "unexpected error message: {}", - msg - ); - } - Ok(_) => anyhow::bail!("expected params count error"), - } + // Should fail with parameter count error + match resp { + Err(e) => { + let err_msg = e.to_string(); + // Error might be wrapped, just ensure it failed + assert!(!err_msg.is_empty(), "Should have error message"); + }, + Ok(resp) if resp.status == ResponseStatus::Error => { + let msg = resp.error.as_ref().map(|e| e.message.as_str()).unwrap_or(""); + let msg_lower = msg.to_lowercase(); + anyhow::ensure!( + msg_lower.contains("parameter") + && (msg_lower.contains("limit") + || msg_lower.contains("maximum") + || msg_lower.contains("exceeded")), + "unexpected error message: {}", + msg + ); + }, + Ok(_) => anyhow::bail!("expected params count error"), } + } - // Parameter size validation (512KB) - { - let large_string = "a".repeat(600_000); - let resp = server - .execute_sql_with_auth_and_params( - &format!("INSERT INTO {}.{} (id, name, age) VALUES ($1, $2, 0)", ns, table), - &auth, - vec![json!(2), json!(large_string)], - ) - .await; - - // Should fail with parameter size error - match resp { - Err(e) => { - let err_msg = e.to_string(); - // Error might be wrapped, just ensure it failed - assert!(!err_msg.is_empty(), "Should have error message"); - } - Ok(resp) if resp.status == ResponseStatus::Error => { - let msg = resp - .error - .as_ref() - .map(|e| e.message.as_str()) - .unwrap_or(""); - anyhow::ensure!( - msg.to_lowercase().contains("size") || msg.to_lowercase().contains("512"), - "unexpected error message: {}", - msg - ); - } - Ok(_) => anyhow::bail!("expected params size error"), - } + // Parameter size validation (512KB) + { + let large_string = "a".repeat(600_000); + let resp = server + .execute_sql_with_auth_and_params( + &format!("INSERT INTO {}.{} (id, name, age) VALUES ($1, $2, 0)", ns, table), + &auth, + vec![json!(2), json!(large_string)], + ) + .await; + + // Should fail with parameter size error + match resp { + Err(e) => { + let err_msg = e.to_string(); + // Error might be wrapped, just ensure it failed + assert!(!err_msg.is_empty(), "Should have error message"); + }, + Ok(resp) if resp.status == ResponseStatus::Error => { + let msg = resp.error.as_ref().map(|e| e.message.as_str()).unwrap_or(""); + anyhow::ensure!( + msg.to_lowercase().contains("size") || msg.to_lowercase().contains("512"), + "unexpected error message: {}", + msg + ); + }, + Ok(_) => anyhow::bail!("expected params size error"), } + } - // Multi-statement batches with params should be rejected - { - let resp = server - .execute_sql_with_auth_and_params( - &format!("INSERT INTO {}.{} (id, name, age) VALUES ($1, 'x', 0); SELECT 1", ns, table), - &auth, - vec![json!(123)], - ) - .await; - - // Should fail - multi-statement batches with parameters are not allowed - match resp { - Err(_e) => { - // Expected - operation should be rejected - } - Ok(resp) if resp.status == ResponseStatus::Error => { - // Also acceptable if server returns error response - } - Ok(_) => anyhow::bail!("expected params-with-batch error"), + // Multi-statement batches with params should be rejected + { + let resp = server + .execute_sql_with_auth_and_params( + &format!( + "INSERT INTO {}.{} (id, name, age) VALUES ($1, 'x', 0); SELECT 1", + ns, table + ), + &auth, + vec![json!(123)], + ) + .await; + + // Should fail - multi-statement batches with parameters are not allowed + match resp { + Err(_e) => { + // Expected - operation should be rejected + }, + Ok(resp) if resp.status == ResponseStatus::Error => { + // Also acceptable if server returns error response + }, + Ok(_) => anyhow::bail!("expected params-with-batch error"), + } } - } - Ok(()) + Ok(()) }) - .await - .expect("test_parameterized_dml_over_http"); + .await + .expect("test_parameterized_dml_over_http"); } diff --git a/backend/tests/testserver/sql/test_namespace_validation_http.rs b/backend/tests/testserver/sql/test_namespace_validation_http.rs index 837a55ec9..d340c1ed5 100644 --- a/backend/tests/testserver/sql/test_namespace_validation_http.rs +++ b/backend/tests/testserver/sql/test_namespace_validation_http.rs @@ -1,11 +1,14 @@ //! Namespace validation tests over the real HTTP SQL API. -use super::test_support::auth_helper::create_user_auth_header; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; use kalam_client::models::ResponseStatus; use kalamdb_commons::Role; use tokio::time::Duration; +use super::test_support::{ + auth_helper::create_user_auth_header, + consolidated_helpers::{unique_namespace, unique_table}, +}; + #[tokio::test] #[ntest::timeout(60000)] // 60 seconds - namespace validation test async fn test_namespace_validation_over_http() -> anyhow::Result<()> { diff --git a/backend/tests/testserver/sql/test_naming_validation_http.rs b/backend/tests/testserver/sql/test_naming_validation_http.rs index ed34f72d9..c3caf839b 100644 --- a/backend/tests/testserver/sql/test_naming_validation_http.rs +++ b/backend/tests/testserver/sql/test_naming_validation_http.rs @@ -1,8 +1,9 @@ //! Naming validation tests over the real HTTP SQL API. -use super::test_support::consolidated_helpers::unique_namespace; use kalam_client::models::ResponseStatus; +use super::test_support::consolidated_helpers::unique_namespace; + #[tokio::test] #[ntest::timeout(60000)] // 60 seconds - naming validation test async fn test_naming_validation_over_http() -> anyhow::Result<()> { diff --git a/backend/tests/testserver/sql/test_quickstart_http.rs b/backend/tests/testserver/sql/test_quickstart_http.rs index 05e0e0b45..f777e871a 100644 --- a/backend/tests/testserver/sql/test_quickstart_http.rs +++ b/backend/tests/testserver/sql/test_quickstart_http.rs @@ -1,9 +1,12 @@ //! Quickstart end-to-end smoke over the real HTTP SQL API. -use super::test_support::auth_helper::create_user_auth_header_default; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; use kalam_client::models::ResponseStatus; +use super::test_support::{ + auth_helper::create_user_auth_header_default, + consolidated_helpers::{unique_namespace, unique_table}, +}; + #[tokio::test] #[ntest::timeout(60000)] // 60 seconds - comprehensive quickstart test async fn test_quickstart_workflow_over_http() -> anyhow::Result<()> { @@ -18,14 +21,15 @@ async fn test_quickstart_workflow_over_http() -> anyhow::Result<()> { // USER table: messages { let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.messages (id BIGINT PRIMARY KEY, content TEXT, created_at BIGINT) WITH (TYPE='USER', STORAGE_ID='local')", - ns - ), - &auth, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.messages (id BIGINT PRIMARY KEY, content TEXT, created_at \ + BIGINT) WITH (TYPE='USER', STORAGE_ID='local')", + ns + ), + &auth, + ) + .await?; anyhow::ensure!( resp.status == ResponseStatus::Success, "CREATE USER table failed: {:?}", @@ -34,17 +38,18 @@ async fn test_quickstart_workflow_over_http() -> anyhow::Result<()> { for i in 0..5 { let resp = server - .execute_sql_with_auth( - &format!( - "INSERT INTO {}.messages (id, content, created_at) VALUES ({}, 'msg-{}', {})", - ns, - i, - i, - 1000 + i - ), - &auth, - ) - .await?; + .execute_sql_with_auth( + &format!( + "INSERT INTO {}.messages (id, content, created_at) VALUES ({}, 'msg-{}', \ + {})", + ns, + i, + i, + 1000 + i + ), + &auth, + ) + .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); } @@ -78,11 +83,12 @@ async fn test_quickstart_workflow_over_http() -> anyhow::Result<()> { // SHARED table: config { let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.config (name TEXT PRIMARY KEY, value TEXT) WITH (TYPE='SHARED', STORAGE_ID='local')", - ns - )) - .await?; + .execute_sql(&format!( + "CREATE TABLE {}.config (name TEXT PRIMARY KEY, value TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local')", + ns + )) + .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let resp = server @@ -109,14 +115,15 @@ async fn test_quickstart_workflow_over_http() -> anyhow::Result<()> { // Note: STREAM table type is not yet implemented. { let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.events (id BIGINT PRIMARY KEY, kind TEXT, created_at BIGINT) WITH (TYPE='USER', STORAGE_ID='local')", - ns - ), - &auth, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.events (id BIGINT PRIMARY KEY, kind TEXT, created_at BIGINT) \ + WITH (TYPE='USER', STORAGE_ID='local')", + ns + ), + &auth, + ) + .await?; anyhow::ensure!( resp.status == ResponseStatus::Success, "CREATE events table failed: {:?}", diff --git a/backend/tests/testserver/sql/test_transaction_quickstart_http.rs b/backend/tests/testserver/sql/test_transaction_quickstart_http.rs index 70a6d3df5..7fa31a239 100644 --- a/backend/tests/testserver/sql/test_transaction_quickstart_http.rs +++ b/backend/tests/testserver/sql/test_transaction_quickstart_http.rs @@ -2,10 +2,11 @@ use std::time::Duration; -use super::test_support::consolidated_helpers::{get_count_value, unique_namespace}; use kalam_client::models::ResponseStatus; use tokio::time::{sleep, Instant}; +use super::test_support::consolidated_helpers::{get_count_value, unique_namespace}; + async fn create_messages_table( server: &super::test_support::http_server::HttpTestServer, namespace: &str, @@ -17,7 +18,8 @@ async fn create_messages_table( let resp = server .execute_sql(&format!( - "CREATE TABLE {}.messages (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', STORAGE_ID='local')", + "CREATE TABLE {}.messages (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local')", namespace )) .await?; @@ -41,7 +43,8 @@ async fn create_typing_events_table( let resp = server .execute_sql(&format!( - "CREATE TABLE {}.typing_events (id BIGINT PRIMARY KEY, conversation_id BIGINT, event_type TEXT, created_at_ms BIGINT) WITH (TYPE='STREAM', TTL_SECONDS=3600)", + "CREATE TABLE {}.typing_events (id BIGINT PRIMARY KEY, conversation_id BIGINT, \ + event_type TEXT, created_at_ms BIGINT) WITH (TYPE='STREAM', TTL_SECONDS=3600)", namespace )) .await?; @@ -121,7 +124,8 @@ async fn test_sql_transaction_commit_and_sequential_blocks_over_http() -> anyhow let resp = server .execute_sql(&format!( - "BEGIN; INSERT INTO {}.messages (id, name) VALUES (3001, 'rest'); INSERT INTO {}.messages (id, name) VALUES (3002, 'rest-2'); COMMIT;", + "BEGIN; INSERT INTO {}.messages (id, name) VALUES (3001, 'rest'); INSERT INTO \ + {}.messages (id, name) VALUES (3002, 'rest-2'); COMMIT;", namespace, namespace )) .await?; @@ -136,7 +140,8 @@ async fn test_sql_transaction_commit_and_sequential_blocks_over_http() -> anyhow let resp = server .execute_sql(&format!( - "BEGIN; INSERT INTO {}.messages (id, name) VALUES (3010, 'kept'); COMMIT; BEGIN; INSERT INTO {}.messages (id, name) VALUES (3011, 'dropped'); ROLLBACK;", + "BEGIN; INSERT INTO {}.messages (id, name) VALUES (3010, 'kept'); COMMIT; BEGIN; \ + INSERT INTO {}.messages (id, name) VALUES (3011, 'dropped'); ROLLBACK;", namespace, namespace )) .await?; @@ -164,7 +169,8 @@ async fn test_sql_transaction_statement_failure_rolls_back_over_http() -> anyhow let resp = server .execute_sql(&format!( - "BEGIN; INSERT INTO {}.messages (id, name) VALUES (3003, 'ok'); INSERT INTO {}.messages (id, missing_col) VALUES (3004, 'bad'); COMMIT;", + "BEGIN; INSERT INTO {}.messages (id, name) VALUES (3003, 'ok'); INSERT INTO \ + {}.messages (id, missing_col) VALUES (3004, 'bad'); COMMIT;", namespace, namespace )) .await?; @@ -209,7 +215,8 @@ async fn test_sql_transaction_rejects_stream_table_writes_over_http() -> anyhow: let resp = server .execute_sql(&format!( - "BEGIN; INSERT INTO {}.typing_events (id, conversation_id, event_type, created_at_ms) VALUES (9001, 7, 'typing', 1700000000000); COMMIT;", + "BEGIN; INSERT INTO {}.typing_events (id, conversation_id, event_type, created_at_ms) \ + VALUES (9001, 7, 'typing', 1700000000000); COMMIT;", namespace )) .await?; diff --git a/backend/tests/testserver/sql/test_user_sql_commands_http.rs b/backend/tests/testserver/sql/test_user_sql_commands_http.rs index 214776337..10b540850 100644 --- a/backend/tests/testserver/sql/test_user_sql_commands_http.rs +++ b/backend/tests/testserver/sql/test_user_sql_commands_http.rs @@ -7,7 +7,8 @@ use kalam_client::models::ResponseStatus; // Once we support per-test config overrides for the global server, this can be migrated. #[tokio::test] #[ntest::timeout(300000)] // 300 seconds - user SQL commands test (fresh server + bcrypt is slow in debug) -#[ignore = "Hangs in debug mode: fresh server startup + bcrypt cost 12 exceeds timeout. Run with --release."] +#[ignore = "Hangs in debug mode: fresh server startup + bcrypt cost 12 exceeds timeout. Run with \ + --release."] async fn test_user_sql_commands_over_http() { super::test_support::http_server::with_http_test_server_config( |cfg| { diff --git a/backend/tests/testserver/storage/test_storage_abstraction_http.rs b/backend/tests/testserver/storage/test_storage_abstraction_http.rs index 2942899c7..611d47517 100644 --- a/backend/tests/testserver/storage/test_storage_abstraction_http.rs +++ b/backend/tests/testserver/storage/test_storage_abstraction_http.rs @@ -3,9 +3,10 @@ //! These tests intentionally go through the real HTTP API (`/v1/api/sql`) using the //! near-production server wiring from `tests/testserver/commons`. -use kalam_client::models::ResponseStatus; use std::path::PathBuf; +use kalam_client::models::ResponseStatus; + fn file_uri(path: PathBuf) -> String { #[cfg(unix)] { @@ -56,10 +57,11 @@ async fn test_storage_abstraction_over_http() -> anyhow::Result<()> { ); let response = server - .execute_sql( - "CREATE TABLE test_rocksdb_trait.test_table (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'USER')", - ) - .await?; + .execute_sql( + "CREATE TABLE test_rocksdb_trait.test_table (id BIGINT PRIMARY KEY, name TEXT) WITH \ + (TYPE = 'USER')", + ) + .await?; assert_eq!( response.status, ResponseStatus::Success, @@ -115,10 +117,20 @@ async fn test_storage_abstraction_over_http() -> anyhow::Result<()> { response.error ); - let response = server.execute_sql("CREATE TABLE test_partition_ns.table1 (id BIGINT PRIMARY KEY, data TEXT) WITH (TYPE = 'USER')").await?; + let response = server + .execute_sql( + "CREATE TABLE test_partition_ns.table1 (id BIGINT PRIMARY KEY, data TEXT) WITH (TYPE \ + = 'USER')", + ) + .await?; assert_eq!(response.status, ResponseStatus::Success); - let response = server.execute_sql("CREATE TABLE test_partition_ns.table2 (id BIGINT PRIMARY KEY, value BIGINT) WITH (TYPE = 'USER')").await?; + let response = server + .execute_sql( + "CREATE TABLE test_partition_ns.table2 (id BIGINT PRIMARY KEY, value BIGINT) WITH \ + (TYPE = 'USER')", + ) + .await?; assert_eq!(response.status, ResponseStatus::Success); let _ = server diff --git a/backend/tests/testserver/storage/test_storage_management_http.rs b/backend/tests/testserver/storage/test_storage_management_http.rs index 85eb9358b..1bf4f7e30 100644 --- a/backend/tests/testserver/storage/test_storage_management_http.rs +++ b/backend/tests/testserver/storage/test_storage_management_http.rs @@ -6,9 +6,10 @@ //! - Template validation //! - Prevent dropping in-use storage -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; use kalam_client::models::ResponseStatus; +use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; + #[tokio::test] async fn test_storage_management_over_http() -> anyhow::Result<()> { let server = super::test_support::http_server::start_http_test_server().await?; @@ -55,16 +56,15 @@ async fn test_storage_management_over_http() -> anyhow::Result<()> { // Verify system.storages row let resp = server .execute_sql(&format!( - "SELECT storage_id, storage_type, storage_name FROM system.storages WHERE storage_id = '{}'", + "SELECT storage_id, storage_type, storage_name FROM system.storages WHERE \ + storage_id = '{}'", storage_id )) .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let rows = resp.rows_as_maps(); anyhow::ensure!(rows.len() == 1); - anyhow::ensure!( - rows[0].get("storage_type").and_then(|v| v.as_str()) == Some("filesystem") - ); + anyhow::ensure!(rows[0].get("storage_type").and_then(|v| v.as_str()) == Some("filesystem")); // Duplicate storage_id should error let resp = server @@ -135,14 +135,18 @@ async fn test_storage_management_over_http() -> anyhow::Result<()> { let resp = server .execute_sql(&format!( - "CREATE TABLE {}.t (id INT PRIMARY KEY, v TEXT) WITH (TYPE='SHARED', STORAGE_ID='{}')", + "CREATE TABLE {}.t (id INT PRIMARY KEY, v TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='{}')", ns, storage_id )) .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let resp = server.execute_sql(&format!("DROP STORAGE {}", storage_id)).await?; - anyhow::ensure!(resp.status == ResponseStatus::Error, "expected DROP STORAGE in-use to error"); + anyhow::ensure!( + resp.status == ResponseStatus::Error, + "expected DROP STORAGE in-use to error" + ); // After dropping table, dropping storage should succeed let resp = server.execute_sql(&format!("DROP TABLE {}.t", ns)).await?; diff --git a/backend/tests/testserver/stress/test_stress_and_memory_http.rs b/backend/tests/testserver/stress/test_stress_and_memory_http.rs index e078fbcaa..a99e97e69 100644 --- a/backend/tests/testserver/stress/test_stress_and_memory_http.rs +++ b/backend/tests/testserver/stress/test_stress_and_memory_http.rs @@ -3,12 +3,15 @@ //! These are intentionally short (seconds, not minutes) and run through the real //! HTTP surface to cover business logic without flaking CI. -use super::test_support::auth_helper::create_user_auth_header_default; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; use futures_util::future::try_join_all; use kalam_client::models::ResponseStatus; use serial_test::serial; +use super::test_support::{ + auth_helper::create_user_auth_header_default, + consolidated_helpers::{unique_namespace, unique_table}, +}; + async fn count_rows( server: &super::test_support::http_server::HttpTestServer, auth: &str, @@ -40,80 +43,77 @@ async fn count_rows( #[serial] async fn test_stress_smoke_over_http() { (async { - if std::env::var("KALAMDB_RUN_STRESS_TESTS").as_deref() != Ok("1") { - eprintln!( - "Skipping stress smoke test. Set KALAMDB_RUN_STRESS_TESTS=1 to enable." - ); - return Ok(()); - } + if std::env::var("KALAMDB_RUN_STRESS_TESTS").as_deref() != Ok("1") { + eprintln!("Skipping stress smoke test. Set KALAMDB_RUN_STRESS_TESTS=1 to enable."); + return Ok(()); + } - let server = super::test_support::http_server::get_global_server().await; - let ns = unique_namespace("stress"); + let server = super::test_support::http_server::get_global_server().await; + let ns = unique_namespace("stress"); - let resp = server - .execute_sql(&format!("CREATE NAMESPACE {}", ns)) - .await?; - anyhow::ensure!( - resp.status == ResponseStatus::Success, - "CREATE NAMESPACE failed: {:?}", - resp.error - ); + let resp = server.execute_sql(&format!("CREATE NAMESPACE {}", ns)).await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE NAMESPACE failed: {:?}", + resp.error + ); - let user = unique_table("stress_user"); - let auth = create_user_auth_header_default(server, &user).await?; + let user = unique_table("stress_user"); + let auth = create_user_auth_header_default(server, &user).await?; - let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.stress_data (id INT PRIMARY KEY, value TEXT) WITH (TYPE='USER', STORAGE_ID='local')", - ns - ), - &auth, - ) - .await?; - anyhow::ensure!( - resp.status == ResponseStatus::Success, - "CREATE TABLE failed: {:?}", - resp.error - ); + let resp = server + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.stress_data (id INT PRIMARY KEY, value TEXT) WITH \ + (TYPE='USER', STORAGE_ID='local')", + ns + ), + &auth, + ) + .await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "CREATE TABLE failed: {:?}", + resp.error + ); - // Concurrent writers (small, deterministic). - let writer_futures = (0..3).map(|writer| { - let ns = ns.clone(); - let auth = auth.clone(); - async move { - for j in 0..8 { - let id = writer * 100 + j; - let sql = format!( - "INSERT INTO {}.stress_data (id, value) VALUES ({}, 'w{}-{}')", - ns, id, writer, j - ); - let resp = server.execute_sql_with_auth(&sql, &auth).await?; - anyhow::ensure!( - resp.status == ResponseStatus::Success, - "insert failed: {:?}", - resp.error - ); + // Concurrent writers (small, deterministic). + let writer_futures = (0..3).map(|writer| { + let ns = ns.clone(); + let auth = auth.clone(); + async move { + for j in 0..8 { + let id = writer * 100 + j; + let sql = format!( + "INSERT INTO {}.stress_data (id, value) VALUES ({}, 'w{}-{}')", + ns, id, writer, j + ); + let resp = server.execute_sql_with_auth(&sql, &auth).await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "insert failed: {:?}", + resp.error + ); + } + anyhow::Ok(()) } - anyhow::Ok(()) - } - }); - try_join_all(writer_futures).await?; + }); + try_join_all(writer_futures).await?; - let cnt = count_rows(server, &auth, &ns, "stress_data").await?; - anyhow::ensure!(cnt == 24, "expected 24 rows, got {}", cnt); + let cnt = count_rows(server, &auth, &ns, "stress_data").await?; + anyhow::ensure!(cnt == 24, "expected 24 rows, got {}", cnt); - // Basic cleanup: DROP TABLE should succeed. - // Note: DROP TABLE currently requires admin privileges. - let resp = server.execute_sql(&format!("DROP TABLE {}.stress_data", ns)).await?; - anyhow::ensure!( - resp.status == ResponseStatus::Success, - "DROP TABLE failed: {:?}", - resp.error - ); + // Basic cleanup: DROP TABLE should succeed. + // Note: DROP TABLE currently requires admin privileges. + let resp = server.execute_sql(&format!("DROP TABLE {}.stress_data", ns)).await?; + anyhow::ensure!( + resp.status == ResponseStatus::Success, + "DROP TABLE failed: {:?}", + resp.error + ); - Ok(()) + Ok(()) }) - .await - .expect("test_stress_smoke_over_http"); + .await + .expect("test_stress_smoke_over_http"); } diff --git a/backend/tests/testserver/subscription/test_live_query_deletes.rs b/backend/tests/testserver/subscription/test_live_query_deletes.rs index 74e2ab352..60075fdbe 100644 --- a/backend/tests/testserver/subscription/test_live_query_deletes.rs +++ b/backend/tests/testserver/subscription/test_live_query_deletes.rs @@ -1,10 +1,10 @@ //! Integration test for Live Query DELETE detection via WebSocket -use super::test_support::consolidated_helpers::unique_namespace; -use kalam_client::models::ChangeEvent; -use kalam_client::models::ResponseStatus; +use kalam_client::models::{ChangeEvent, ResponseStatus}; use tokio::time::Duration; +use super::test_support::consolidated_helpers::unique_namespace; + /// Test DELETE detection #[tokio::test] async fn test_live_query_detects_deletes() -> anyhow::Result<()> { diff --git a/backend/tests/testserver/subscription/test_live_query_inserts.rs b/backend/tests/testserver/subscription/test_live_query_inserts.rs index fb634e93b..1a03ed7c9 100644 --- a/backend/tests/testserver/subscription/test_live_query_inserts.rs +++ b/backend/tests/testserver/subscription/test_live_query_inserts.rs @@ -1,11 +1,11 @@ //! Integration test for Live Query INSERT detection via WebSocket -use super::test_support::consolidated_helpers::unique_namespace; use futures_util::StreamExt; -use kalam_client::models::ChangeEvent; -use kalam_client::models::ResponseStatus; +use kalam_client::models::{ChangeEvent, ResponseStatus}; use tokio::time::Duration; +use super::test_support::consolidated_helpers::unique_namespace; + /// Test basic INSERT detection via live query subscription #[tokio::test] async fn test_live_query_detects_inserts() -> anyhow::Result<()> { @@ -43,13 +43,17 @@ async fn test_live_query_detects_inserts() -> anyhow::Result<()> { // Insert 10 rows for i in 0..10 { let resp = server - .execute_sql( - &format!( - "INSERT INTO {}.{} (id, content, priority, created_at) VALUES ('msg{}', 'Content {}', {}, {})", - ns, table, i, i, i % 3, 1000 + i - ) - ) - .await?; + .execute_sql(&format!( + "INSERT INTO {}.{} (id, content, priority, created_at) VALUES ('msg{}', 'Content \ + {}', {}, {})", + ns, + table, + i, + i, + i % 3, + 1000 + i + )) + .await?; assert_eq!(resp.status, ResponseStatus::Success); } diff --git a/backend/tests/testserver/subscription/test_live_query_updates.rs b/backend/tests/testserver/subscription/test_live_query_updates.rs index 70eeee486..71d020774 100644 --- a/backend/tests/testserver/subscription/test_live_query_updates.rs +++ b/backend/tests/testserver/subscription/test_live_query_updates.rs @@ -1,10 +1,10 @@ //! Integration test for Live Query UPDATE detection via WebSocket -use super::test_support::consolidated_helpers::unique_namespace; -use kalam_client::models::ChangeEvent; -use kalam_client::models::ResponseStatus; +use kalam_client::models::{ChangeEvent, ResponseStatus}; use tokio::time::Duration; +use super::test_support::consolidated_helpers::unique_namespace; + /// Test UPDATE detection with old/new values #[tokio::test] async fn test_live_query_detects_updates() -> anyhow::Result<()> { @@ -34,13 +34,12 @@ async fn test_live_query_detects_updates() -> anyhow::Result<()> { // Insert initial row let resp = server - .execute_sql( - &format!( - "INSERT INTO {}.{} (id, title, status, updated_at) VALUES ('task1', 'Test Task', 'pending', 1000)", - ns, table - ) - ) - .await?; + .execute_sql(&format!( + "INSERT INTO {}.{} (id, title, status, updated_at) VALUES ('task1', 'Test Task', \ + 'pending', 1000)", + ns, table + )) + .await?; assert_eq!(resp.status, ResponseStatus::Success); // Connect using the kalam-client SDK @@ -156,7 +155,8 @@ async fn test_live_query_detects_updates_with_like_filter() -> anyhow::Result<() let resp = server .execute_sql(&format!( - "INSERT INTO {}.{} (id, metric_name, metric_value, updated_at) VALUES ('metric1', 'open_files_other', 10, 1000)", + "INSERT INTO {}.{} (id, metric_name, metric_value, updated_at) VALUES ('metric1', \ + 'open_files_other', 10, 1000)", ns, table )) .await?; diff --git a/backend/tests/testserver/subscription/test_stream_ttl_eviction_sql.rs b/backend/tests/testserver/subscription/test_stream_ttl_eviction_sql.rs index 3ceb654e3..dc8cbf38f 100644 --- a/backend/tests/testserver/subscription/test_stream_ttl_eviction_sql.rs +++ b/backend/tests/testserver/subscription/test_stream_ttl_eviction_sql.rs @@ -3,10 +3,11 @@ //! This test executes the SQL script from test_stream_ttl.sql to validate //! that stream tables with TTL properly evict old events. -use super::test_support::consolidated_helpers::unique_namespace; use kalam_client::models::ResponseStatus; use tokio::time::{sleep, Duration}; +use super::test_support::consolidated_helpers::unique_namespace; + /// Test stream table TTL eviction using the SQL script approach #[tokio::test] async fn test_stream_ttl_eviction_from_sql_script() -> anyhow::Result<()> { diff --git a/backend/tests/testserver/system/test_system_tables_http.rs b/backend/tests/testserver/system/test_system_tables_http.rs index 33d4ea39a..9fc58997b 100644 --- a/backend/tests/testserver/system/test_system_tables_http.rs +++ b/backend/tests/testserver/system/test_system_tables_http.rs @@ -1,10 +1,13 @@ //! System tables smoke coverage over the real HTTP SQL API. -use super::test_support::auth_helper::create_user_auth_header_default; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::flush::flush_table_and_wait; use kalam_client::models::ResponseStatus; +use super::test_support::{ + auth_helper::create_user_auth_header_default, + consolidated_helpers::{unique_namespace, unique_table}, + flush::flush_table_and_wait, +}; + #[tokio::test] async fn test_system_tables_queryable_over_http() -> anyhow::Result<()> { let server = super::test_support::http_server::get_global_server().await; @@ -19,22 +22,24 @@ async fn test_system_tables_queryable_over_http() -> anyhow::Result<()> { let auth = create_user_auth_header_default(server, &user).await?; let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, v TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:5')", - ns, table_user - ), - &auth, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id INT PRIMARY KEY, v TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local', FLUSH_POLICY='rows:5')", + ns, table_user + ), + &auth, + ) + .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.{} (id INT PRIMARY KEY, v TEXT) WITH (TYPE='SHARED', STORAGE_ID='local', FLUSH_POLICY='rows:5')", - ns, table_shared - )) - .await?; + .execute_sql(&format!( + "CREATE TABLE {}.{} (id INT PRIMARY KEY, v TEXT) WITH (TYPE='SHARED', \ + STORAGE_ID='local', FLUSH_POLICY='rows:5')", + ns, table_shared + )) + .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let resp = server @@ -64,11 +69,12 @@ async fn test_system_tables_queryable_over_http() -> anyhow::Result<()> { // system.schemas let resp = server - .execute_sql(&format!( - "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = '{}' ORDER BY table_name", - ns - )) - .await?; + .execute_sql(&format!( + "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = '{}' ORDER BY \ + table_name", + ns + )) + .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let rows = resp.rows_as_maps(); anyhow::ensure!(rows.len() >= 2); diff --git a/backend/tests/testserver/tables/test_shared_tables_http.rs b/backend/tests/testserver/tables/test_shared_tables_http.rs index 62ed9cbfa..ebe184f5c 100644 --- a/backend/tests/testserver/tables/test_shared_tables_http.rs +++ b/backend/tests/testserver/tables/test_shared_tables_http.rs @@ -1,13 +1,14 @@ //! Shared table lifecycle tests over the real HTTP SQL API. -use super::test_support::consolidated_helpers::unique_namespace; -use super::test_support::flush::{flush_table_and_wait, wait_for_parquet_files_for_table}; -use super::test_support::jobs::{ - extract_cleanup_job_id, wait_for_job_completion, wait_for_path_absent, -}; use kalam_client::models::ResponseStatus; use tokio::time::Duration; +use super::test_support::{ + consolidated_helpers::unique_namespace, + flush::{flush_table_and_wait, wait_for_parquet_files_for_table}, + jobs::{extract_cleanup_job_id, wait_for_job_completion, wait_for_path_absent}, +}; + #[tokio::test] async fn test_shared_tables_lifecycle_over_http() -> anyhow::Result<()> { let _guard = super::test_support::http_server::acquire_test_lock().await; @@ -21,11 +22,12 @@ async fn test_shared_tables_lifecycle_over_http() -> anyhow::Result<()> { let _ = server.execute_sql(&format!("DROP TABLE {}.{}", ns, table)).await; let resp = server - .execute_sql(&format!( - "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, entry TEXT, level INT) WITH (TYPE='SHARED', STORAGE_ID='local', FLUSH_POLICY='rows:50')", - ns, table - )) - .await?; + .execute_sql(&format!( + "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, entry TEXT, level INT) WITH (TYPE='SHARED', \ + STORAGE_ID='local', FLUSH_POLICY='rows:50')", + ns, table + )) + .await?; anyhow::ensure!( resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", diff --git a/backend/tests/testserver/tables/test_stream_tables_http.rs b/backend/tests/testserver/tables/test_stream_tables_http.rs index 1577208bb..718309344 100644 --- a/backend/tests/testserver/tables/test_stream_tables_http.rs +++ b/backend/tests/testserver/tables/test_stream_tables_http.rs @@ -1,8 +1,9 @@ //! Stream table DML checks over the real HTTP SQL API. -use super::test_support::consolidated_helpers::unique_namespace; use kalam_client::models::ResponseStatus; +use super::test_support::consolidated_helpers::unique_namespace; + async fn create_stream_table( server: &super::test_support::http_server::HttpTestServer, namespace: &str, diff --git a/backend/tests/testserver/tables/test_user_tables_http.rs b/backend/tests/testserver/tables/test_user_tables_http.rs index 3f70d6772..81ab18f0d 100644 --- a/backend/tests/testserver/tables/test_user_tables_http.rs +++ b/backend/tests/testserver/tables/test_user_tables_http.rs @@ -1,15 +1,16 @@ //! User table lifecycle + isolation tests over the real HTTP SQL API. -use super::test_support::auth_helper::create_user_auth_header_default; -use super::test_support::consolidated_helpers::{unique_namespace, unique_table}; -use super::test_support::flush::{flush_table_and_wait, wait_for_parquet_files_for_user_table}; -use super::test_support::http_server::HttpTestServer; -use super::test_support::jobs::{ - extract_cleanup_job_id, wait_for_job_completion, wait_for_path_absent, -}; use kalam_client::models::ResponseStatus; use tokio::time::Duration; +use super::test_support::{ + auth_helper::create_user_auth_header_default, + consolidated_helpers::{unique_namespace, unique_table}, + flush::{flush_table_and_wait, wait_for_parquet_files_for_user_table}, + http_server::HttpTestServer, + jobs::{extract_cleanup_job_id, wait_for_job_completion, wait_for_path_absent}, +}; + async fn lookup_user_id(server: &HttpTestServer, username: &str) -> anyhow::Result { let resp = server .execute_sql(&format!("SELECT user_id FROM system.users WHERE user_id='{}'", username)) @@ -53,14 +54,15 @@ async fn test_user_tables_lifecycle_and_isolation_over_http() -> anyhow::Result< // ----------------------------------------------------------------- { let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.notes (id TEXT PRIMARY KEY, content TEXT, priority INT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:50')", - ns - ), - &auth1, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.notes (id TEXT PRIMARY KEY, content TEXT, priority INT) WITH \ + (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:50')", + ns + ), + &auth1, + ) + .await?; anyhow::ensure!( resp.status == ResponseStatus::Success, "CREATE TABLE failed: {:?}", @@ -165,14 +167,15 @@ async fn test_user_tables_lifecycle_and_isolation_over_http() -> anyhow::Result< let _ = server.execute_sql(&format!("DROP TABLE {}.{}", ns, table)).await; let resp = server - .execute_sql_with_auth( - &format!( - "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, content TEXT) WITH (TYPE='USER', STORAGE_ID='local', FLUSH_POLICY='rows:2')", - ns, table - ), - &auth1, - ) - .await?; + .execute_sql_with_auth( + &format!( + "CREATE TABLE {}.{} (id TEXT PRIMARY KEY, content TEXT) WITH (TYPE='USER', \ + STORAGE_ID='local', FLUSH_POLICY='rows:2')", + ns, table + ), + &auth1, + ) + .await?; anyhow::ensure!(resp.status == ResponseStatus::Success); let resp = server diff --git a/benchv2/Cargo.lock b/benchv2/Cargo.lock index 19b741ec2..4d0d068a8 100644 --- a/benchv2/Cargo.lock +++ b/benchv2/Cargo.lock @@ -206,9 +206,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.6.0" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", "clap_derive", @@ -228,9 +228,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.6.0" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", "proc-macro2", @@ -946,7 +946,7 @@ dependencies = [ [[package]] name = "kalam-client" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "link-common", ] @@ -969,7 +969,7 @@ dependencies = [ [[package]] name = "kalamdb-commons" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "dashmap", "hex", @@ -997,7 +997,7 @@ checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "link-common" -version = "0.4.2-rc.3" +version = "0.4.3-rc.4" dependencies = [ "aws-lc-rs", "base64", @@ -1847,9 +1847,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.51.1" +version = "1.52.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" +checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6" dependencies = [ "bytes", "libc", diff --git a/benchv2/Cargo.toml b/benchv2/Cargo.toml index 591195626..294b781a2 100644 --- a/benchv2/Cargo.toml +++ b/benchv2/Cargo.toml @@ -13,11 +13,11 @@ path = "src/main.rs" kalam-client = { path = "../link/kalam-client", default-features = false, features = ["native-bench"] } # benchv2 is excluded from the root workspace, so this must stay aligned with # the Tokio version inherited by linked workspace crates like `link-common`. -tokio = { version = "1.51.1", features = ["rt-multi-thread", "macros", "sync", "time"] } +tokio = { version = "1.52.1", features = ["rt-multi-thread", "macros", "sync", "time"] } serde = { version = "1", features = ["derive"] } serde_json = "1" chrono = { version = "0.4", features = ["serde"] } -clap = { version = "4.6.0", features = ["derive", "env"] } +clap = { version = "4.6.1", features = ["derive", "env"] } rand = "0.10.1" sysinfo = "0.38.4" hostname = "0.4" diff --git a/benchv2/results/bench-2026-04-26-184014-0-4-2-rc-3.html b/benchv2/results/bench-2026-04-26-184014-0-4-2-rc-3.html new file mode 100644 index 000000000..a08973c4c --- /dev/null +++ b/benchv2/results/bench-2026-04-26-184014-0-4-2-rc-3.html @@ -0,0 +1,671 @@ + + + + + +KalamDB Benchmark Report + + + + +
+ +
+

KalamDB Benchmark Report

+
Performance analysis — v0.4.2-rc.3
+
+ http://127.0.0.1:8080 + 2026-04-26T18:40:14.807395+00:00 + 100 iters + 5 warmup + 10 concurrency + 100.0K max subs +
+
+ +
+
+
38
+
Total Benchmarks
+
+
+
38
+
Passed
+
+
+
0
+
Failed
+
+
Verdict Mix
32Excellent
5Acceptable
1Slow
successful benchmarks only
+
Compared To Previous
4Faster
5Same
29Slower
against 2026-04-21T17:18:42.418760+00:00
+
+
252.149s
+
Measured Benchmark Time
+
+
+
288.719s
+
Wall Clock Duration
+
+
+ +
+
System Information
+
+
Hostname
Jamals-MacBook-Pro.local
+
Machine Model
MacBookPro18,3
+
CPU Model
Apple M1 Pro
+
CPU Cores
10 logical / 10 physical
+
Total Memory
16.00 GiB
+
Available Memory
10.45 GiB
+
Used Memory
5.55 GiB
+
Memory Usage
34.7%
+
OS
Darwin macOS 26.5
+
Kernel
25.5.0
+
Architecture
aarch64
+
+
+ +
+
+

Latency by Operation (µs)

+ +
+
+

Throughput (ops/sec)

+ +
+
+

Avg Latency by Category (µs)

+ +
+
+ +
Detailed Results
+
+
+ + + + + + + + + + + + + + + + + + + + + + + +
StatusBenchmarkCategoryDescriptionItersMeanP50P95P99MinMaxOps/secTotalVerdictvs Prev
PASScreate_tableDDL
CREATE TABLE with 3 columns
100531µs471µs881µs1.19ms364µs1.44ms1.9K53.1ms🟢 Excellent↑99% faster
PASSdrop_tableDDL
DROP TABLE on a previously created table
1004.28ms4.32ms5.46ms5.59ms2.69ms5.67ms234428.1ms🟢 Excellent↑96% faster
PASSsingle_insertInsert
INSERT a single row into a table
100384µs335µs634µs1.05ms257µs1.06ms2.6K38.4ms🟢 Excellent↓15% slower
PASSbulk_insertInsert
One INSERT statement with 50 rows (statement-scoped transaction)
1001.29ms1.25ms1.78ms1.91ms907µs2.09ms777128.7ms🟢 Excellent↑6% faster
PASStransaction_multi_insertInsert
Explicit BEGIN/COMMIT with 50 single-row INSERT statements
1004.22ms4.13ms5.28ms5.74ms3.12ms6.03ms237421.5ms🟡 Acceptable↓46% slower
PASSselect_allSelect
SELECT * from a 200-row table
1001.53ms1.22ms3.56ms4.95ms844µs6.89ms654152.9ms🟢 Excellent↓93% slower
PASSselect_by_filterSelect
SELECT with WHERE clause on a 200-row table
100647µs568µs1.24ms1.38ms382µs1.65ms1.5K64.7ms🟢 Excellent↓51% slower
PASSselect_countSelect
SELECT COUNT(*) on a 200-row table
100998µs972µs1.41ms1.51ms667µs1.56ms1.0K99.8ms🟢 Excellent↓52% slower
PASSselect_order_by_limitSelect
SELECT with ORDER BY + LIMIT 10 on a 200-row table
1001.21ms1.21ms1.58ms1.75ms810µs1.85ms823121.5ms🟢 Excellent↓55% slower
PASSsingle_updateUpdate
UPDATE a single row by filter condition
1001.54ms1.56ms1.98ms2.09ms1.06ms2.14ms647154.4ms🟢 Excellent↓298% slower
PASSsingle_deleteDelete
DELETE a single row by filter condition
1002.13ms2.04ms2.66ms2.83ms1.57ms3.02ms470212.6ms🟡 Acceptable↓600% slower
PASSconcurrent_insertConcurrent
N concurrent INSERT operations in parallel (N = concurrency setting)
1001.69ms1.53ms2.37ms2.55ms1.17ms5.86ms591169.2ms🟢 Excellent↓45% slower
PASSconcurrent_selectConcurrent
N concurrent SELECT operations in parallel (N = concurrency setting)
1001.39ms1.31ms2.02ms2.56ms903µs2.85ms719139.0ms🟢 Excellent↓19% slower
PASSpoint_lookupSelect
SELECT by primary key from a 10K-row table (single row lookup)
Baselinephase-0 performanceQuery Classprimary-key lookupDataset10000 seeded rowsQuery ShapeSELECT * FROM <ns>.point_lookup WHERE id = ?
100591µs521µs1.11ms1.19ms355µs1.32ms1.7K59.1ms🟢 Excellent↓55% slower
PASSaggregate_querySelect
GROUP BY + SUM/AVG/COUNT on a 10K-row table (analytical query performance)
10034.9ms34.9ms36.8ms37.1ms32.3ms37.2ms293.487s🔴 Slow↓28% slower
PASSmulti_table_joinSelect
SELECT with subquery across two tables (200 customers, 1000 orders)
1004.44ms4.45ms4.99ms5.26ms3.34ms5.40ms225443.6ms🟢 Excellent↓48% slower
PASSlarge_payload_insertInsert
INSERT rows with ~4KB TEXT payloads (serialization + storage throughput)
1002.08ms2.03ms2.59ms2.70ms1.59ms2.85ms480208.3ms🟢 Excellent↓71% slower
PASSwide_column_insertInsert
INSERT into a 20-column table (wide schema overhead)
1001.15ms1.11ms1.66ms1.73ms838µs1.80ms867115.3ms🟢 Excellent↑31% faster
PASSbulk_deleteDelete
DELETE 100 rows at once with a range filter (bulk deletion)
10081.5ms81.8ms88.8ms92.3ms71.5ms126.3ms128.150s🟡 Acceptable↓19% slower
PASSsequential_crudDML
INSERT → UPDATE → SELECT → DELETE full DML lifecycle per iteration
1003.63ms3.59ms4.81ms5.22ms2.04ms12.8ms275363.1ms🟢 Excellent↓138% slower
PASSalter_tableDDL
ALTER TABLE ADD COLUMN + DROP COLUMN (schema evolution latency)
1001.94ms1.88ms2.75ms2.99ms1.13ms3.39ms515194.2ms🟢 Excellent↓63% slower
PASSconcurrent_updateConcurrent
N concurrent UPDATE operations on the same table (write contention test)
1004.27ms4.11ms5.86ms6.84ms2.23ms8.81ms234427.0ms🟢 Excellent↓202% slower
PASSconcurrent_mixed_dmlConcurrent
Concurrent INSERT + UPDATE + DELETE on the same table (multi-op contention)
1004.17ms4.21ms5.11ms5.93ms2.57ms6.07ms240417.2ms🟢 Excellent↓49% slower
PASSnamespace_isolationConcurrent
Concurrent queries across 5 different namespaces (isolation test)
1002.77ms2.27ms3.49ms14.3ms1.77ms16.3ms361277.3ms🟢 Excellent↓57% slower
PASSsubscribe_initial_loadSubscribe
Subscribe to a 1000-row user table and receive the full initial data batch
1008.73ms8.74ms9.26ms9.88ms7.62ms10.1ms115873.0ms🟢 Excellent↓102% slower
PASSsubscribe_change_latencySubscribe
Latency from INSERT to subscriber receiving the change notification
10049.6ms49.6ms53.5ms56.6ms42.5ms58.4ms204.957s🟡 Acceptable~ 49414µs prior
PASSreconnect_subscribeSubscribe
Disconnect and re-subscribe to a user table (reconnection overhead)
10011.0ms11.1ms11.9ms12.1ms9.35ms12.4ms911.105s🟢 Excellent↓14% slower
PASSflushed_parquet_queryStorage
SELECT from a shared table with 20 flushed Parquet files (200K rows)
100189.1ms223.9ms318.5ms329.6ms28.3ms346.7ms518.909s🟡 Acceptable↓24% slower
PASSconcurrent_subscribersLoad
N WebSocket live-query subscribers receiving changes from concurrent writes
1001.567s1.575s1.599s1.607s1.523s1.612s12.61m🟢 Excellent~ 1546624µs prior
PASSconcurrent_publishersLoad
N concurrent INSERTs into a topic-sourced table (measures publish overhead)
1001.68ms1.61ms2.12ms2.45ms1.33ms4.00ms595168.1ms🟢 Excellent↓36% slower
PASSconcurrent_consumersLoad
N concurrent topic CONSUME calls pulling messages in parallel
100662µs656µs762µs858µs556µs898µs1.5K66.2ms🟢 Excellent~ 646µs prior
PASSsql_1k_concurrentLoad
1000 concurrent SQL SELECT queries at once (RPS degradation test)
Baselinephase-0 performanceQuery Classconcurrent read burstDataset500 seeded rowsBurst1000 concurrent SQL queriesQuery Mixpk lookup, count, selective order-by limit, narrow projection
100171.6ms171.8ms177.0ms179.4ms164.7ms184.4ms617.163s🟢 Excellent↓11% slower
PASScreate_userLoad
CREATE USER (auth subsystem stress test)
1001.39ms1.38ms1.60ms1.78ms1.21ms1.87ms722138.6ms🟢 Excellent↓10% slower
PASSdrop_userLoad
DROP USER (auth subsystem teardown stress test)
100280µs268µs405µs445µs194µs495µs3.6K28.0ms🟢 Excellent~ 286µs prior
PASSconnection_stormLoad
N simultaneous login + SQL + cycles (connection setup overhead)
100318.9ms316.3ms338.2ms370.7ms304.7ms378.9ms331.890s🟢 Excellent↓13% slower
PASSmixed_read_writeLoad
50/50 concurrent reads + writes on same table (contention test)
Baselinephase-0 performanceQuery Classmixed concurrent read/writeDataset200 seeded rowsMix5 reads / 5 writesRead Shapesrange filter, count-star, order-by desc limit
1002.41ms2.30ms3.32ms6.26ms1.51ms6.76ms415241.2ms🟢 Excellent↓6% slower
PASSwide_fanout_queryLoad
N concurrent large-result-set SELECTs (serialization + memory pressure)
1007.06ms6.54ms11.8ms14.0ms5.81ms15.1ms142705.9ms🟢 Excellent↓15% slower
PASSsubscriber_scaleScale
Progressive live-query subscriber scale and insert fanout verification up to 100.0K
Max100.0KTiers10 checkpoints to 100.0KBatch/Wave1.0K / 500Pause/Timeout0ms / 30.0sShared WS1.0K conns @ 100 subs/ws across 1 targetDelivery Checksall tiers to 10.0K + 25.0K/50.0K/100.0K
12.836s2.836s2.836s2.836s2.836s2.836s02.836s🟢 Excellent~ 2857025µs prior
Whole Bench Totals252.149sWall clock 288.719s
+ + + + +
+ KalamDB v0.4.2-rc.3 — Generated 2026-04-26T18:40:14.807395+00:00 +
+ + + + + + diff --git a/benchv2/results/bench-2026-04-26-184014-0-4-2-rc-3.json b/benchv2/results/bench-2026-04-26-184014-0-4-2-rc-3.json new file mode 100644 index 000000000..49eac70d9 --- /dev/null +++ b/benchv2/results/bench-2026-04-26-184014-0-4-2-rc-3.json @@ -0,0 +1,842 @@ +{ + "version": "0.4.2-rc.3", + "server_url": "http://127.0.0.1:8080", + "timestamp": "2026-04-26T18:40:14.806927+00:00", + "config": { + "iterations": 100, + "warmup": 5, + "concurrency": 10, + "namespace": "bench_183525", + "max_subscribers": 100000 + }, + "system": { + "hostname": "Jamals-MacBook-Pro.local", + "machine_model": "MacBookPro18,3", + "os_name": "Darwin", + "os_version": "macOS 26.5", + "kernel_version": "25.5.0", + "architecture": "aarch64", + "cpu_model": "Apple M1 Pro", + "cpu_logical_cores": 10, + "cpu_physical_cores": 10, + "total_memory_bytes": 17179869184, + "available_memory_bytes": 11218485248, + "used_memory_bytes": 5961383936, + "used_memory_percent": 34.69982147216797 + }, + "results": [ + { + "name": "create_table", + "category": "DDL", + "description": "CREATE TABLE with 3 columns", + "full_description": "CREATE TABLE with 3 columns", + "details": [], + "iterations": 100, + "total_us": 53142, + "mean_us": 531.42, + "median_us": 471.0, + "p95_us": 880.8999999999996, + "p99_us": 1194.5000000000014, + "min_us": 364.0, + "max_us": 1442.0, + "stddev_us": 189.49114102050402, + "ops_per_sec": 1881.750780926574, + "success": true, + "error": null + }, + { + "name": "drop_table", + "category": "DDL", + "description": "DROP TABLE on a previously created table", + "full_description": "DROP TABLE on a previously created table", + "details": [], + "iterations": 100, + "total_us": 428086, + "mean_us": 4280.86, + "median_us": 4316.5, + "p95_us": 5462.05, + "p99_us": 5594.77, + "min_us": 2693.0, + "max_us": 5671.0, + "stddev_us": 710.0008965707813, + "ops_per_sec": 233.59792191288665, + "success": true, + "error": null + }, + { + "name": "single_insert", + "category": "Insert", + "description": "INSERT a single row into a table", + "full_description": "INSERT a single row into a table", + "details": [], + "iterations": 100, + "total_us": 38433, + "mean_us": 384.33, + "median_us": 335.0, + "p95_us": 634.3999999999997, + "p99_us": 1051.11, + "min_us": 257.0, + "max_us": 1062.0, + "stddev_us": 147.75697047257137, + "ops_per_sec": 2601.9306325293364, + "success": true, + "error": null + }, + { + "name": "bulk_insert", + "category": "Insert", + "description": "One INSERT statement with 50 rows (statement-scoped transaction)", + "full_description": "One INSERT statement with 50 rows (statement-scoped transaction)", + "details": [], + "iterations": 100, + "total_us": 128695, + "mean_us": 1286.95, + "median_us": 1249.0, + "p95_us": 1775.6499999999999, + "p99_us": 1911.800000000001, + "min_us": 907.0, + "max_us": 2090.0, + "stddev_us": 242.25824667244683, + "ops_per_sec": 777.0309646839427, + "success": true, + "error": null + }, + { + "name": "transaction_multi_insert", + "category": "Insert", + "description": "Explicit BEGIN/COMMIT with 50 single-row INSERT statements", + "full_description": "Explicit BEGIN/COMMIT with 50 single-row INSERT statements", + "details": [], + "iterations": 100, + "total_us": 421511, + "mean_us": 4215.11, + "median_us": 4134.0, + "p95_us": 5279.2, + "p99_us": 5737.980000000001, + "min_us": 3123.0, + "max_us": 6033.0, + "stddev_us": 565.2787976661684, + "ops_per_sec": 237.24173271871908, + "success": true, + "error": null + }, + { + "name": "select_all", + "category": "Select", + "description": "SELECT * from a 200-row table", + "full_description": "SELECT * from a 200-row table", + "details": [], + "iterations": 100, + "total_us": 152926, + "mean_us": 1529.26, + "median_us": 1224.5, + "p95_us": 3563.199999999998, + "p99_us": 4951.62000000001, + "min_us": 844.0, + "max_us": 6894.0, + "stddev_us": 991.530992705195, + "ops_per_sec": 653.9110419418543, + "success": true, + "error": null + }, + { + "name": "select_by_filter", + "category": "Select", + "description": "SELECT with WHERE clause on a 200-row table", + "full_description": "SELECT with WHERE clause on a 200-row table", + "details": [], + "iterations": 100, + "total_us": 64667, + "mean_us": 646.67, + "median_us": 568.5, + "p95_us": 1235.9499999999998, + "p99_us": 1376.7800000000013, + "min_us": 382.0, + "max_us": 1652.0, + "stddev_us": 234.0405755635142, + "ops_per_sec": 1546.3837815268994, + "success": true, + "error": null + }, + { + "name": "select_count", + "category": "Select", + "description": "SELECT COUNT(*) on a 200-row table", + "full_description": "SELECT COUNT(*) on a 200-row table", + "details": [], + "iterations": 100, + "total_us": 99840, + "mean_us": 998.4, + "median_us": 972.0, + "p95_us": 1407.85, + "p99_us": 1513.46, + "min_us": 667.0, + "max_us": 1559.0, + "stddev_us": 204.17347545851317, + "ops_per_sec": 1001.6025641025641, + "success": true, + "error": null + }, + { + "name": "select_order_by_limit", + "category": "Select", + "description": "SELECT with ORDER BY + LIMIT 10 on a 200-row table", + "full_description": "SELECT with ORDER BY + LIMIT 10 on a 200-row table", + "details": [], + "iterations": 100, + "total_us": 121494, + "mean_us": 1214.94, + "median_us": 1206.0, + "p95_us": 1583.9499999999998, + "p99_us": 1748.9800000000005, + "min_us": 810.0, + "max_us": 1846.0, + "stddev_us": 207.37049360061235, + "ops_per_sec": 823.0859137076728, + "success": true, + "error": null + }, + { + "name": "single_update", + "category": "Update", + "description": "UPDATE a single row by filter condition", + "full_description": "UPDATE a single row by filter condition", + "details": [], + "iterations": 100, + "total_us": 154450, + "mean_us": 1544.5, + "median_us": 1562.5, + "p95_us": 1978.05, + "p99_us": 2094.4200000000005, + "min_us": 1056.0, + "max_us": 2136.0, + "stddev_us": 246.72446087885083, + "ops_per_sec": 647.4587245063127, + "success": true, + "error": null + }, + { + "name": "single_delete", + "category": "Delete", + "description": "DELETE a single row by filter condition", + "full_description": "DELETE a single row by filter condition", + "details": [], + "iterations": 100, + "total_us": 212642, + "mean_us": 2126.42, + "median_us": 2042.0, + "p95_us": 2662.1499999999996, + "p99_us": 2831.920000000001, + "min_us": 1575.0, + "max_us": 3022.0, + "stddev_us": 306.4199330225359, + "ops_per_sec": 470.2739816216928, + "success": true, + "error": null + }, + { + "name": "concurrent_insert", + "category": "Concurrent", + "description": "N concurrent INSERT operations in parallel (N = concurrency setting)", + "full_description": "N concurrent INSERT operations in parallel (N = concurrency setting)", + "details": [], + "iterations": 100, + "total_us": 169160, + "mean_us": 1691.6, + "median_us": 1526.5, + "p95_us": 2369.85, + "p99_us": 2546.4600000000173, + "min_us": 1170.0, + "max_us": 5859.0, + "stddev_us": 544.881582909479, + "ops_per_sec": 591.1563017261764, + "success": true, + "error": null + }, + { + "name": "concurrent_select", + "category": "Concurrent", + "description": "N concurrent SELECT operations in parallel (N = concurrency setting)", + "full_description": "N concurrent SELECT operations in parallel (N = concurrency setting)", + "details": [], + "iterations": 100, + "total_us": 138997, + "mean_us": 1389.97, + "median_us": 1306.5, + "p95_us": 2015.75, + "p99_us": 2556.9400000000014, + "min_us": 903.0, + "max_us": 2848.0, + "stddev_us": 332.25861459064834, + "ops_per_sec": 719.4399879134081, + "success": true, + "error": null + }, + { + "name": "point_lookup", + "category": "Select", + "description": "SELECT by primary key from a 10K-row table (single row lookup)", + "full_description": "SELECT by primary key from a 10K-row table (single row lookup)", + "details": [ + { + "label": "Baseline", + "value": "phase-0 performance" + }, + { + "label": "Query Class", + "value": "primary-key lookup" + }, + { + "label": "Dataset", + "value": "10000 seeded rows" + }, + { + "label": "Query Shape", + "value": "SELECT * FROM .point_lookup WHERE id = ?" + } + ], + "iterations": 100, + "total_us": 59057, + "mean_us": 590.57, + "median_us": 521.0, + "p95_us": 1114.3, + "p99_us": 1191.2600000000007, + "min_us": 355.0, + "max_us": 1316.0, + "stddev_us": 200.36276469131553, + "ops_per_sec": 1693.2793741639434, + "success": true, + "error": null + }, + { + "name": "aggregate_query", + "category": "Select", + "description": "GROUP BY + SUM/AVG/COUNT on a 10K-row table (analytical query performance)", + "full_description": "GROUP BY + SUM/AVG/COUNT on a 10K-row table (analytical query performance)", + "details": [], + "iterations": 100, + "total_us": 3487159, + "mean_us": 34871.59, + "median_us": 34868.5, + "p95_us": 36772.15, + "p99_us": 37148.59, + "min_us": 32347.0, + "max_us": 37207.0, + "stddev_us": 1136.8016812642616, + "ops_per_sec": 28.67663906348979, + "success": true, + "error": null + }, + { + "name": "multi_table_join", + "category": "Select", + "description": "SELECT with subquery across two tables (200 customers, 1000 orders)", + "full_description": "SELECT with subquery across two tables (200 customers, 1000 orders)", + "details": [], + "iterations": 100, + "total_us": 443620, + "mean_us": 4436.2, + "median_us": 4446.5, + "p95_us": 4986.400000000001, + "p99_us": 5262.420000000001, + "min_us": 3337.0, + "max_us": 5403.0, + "stddev_us": 381.17677968855526, + "ops_per_sec": 225.4181506694919, + "success": true, + "error": null + }, + { + "name": "large_payload_insert", + "category": "Insert", + "description": "INSERT rows with ~4KB TEXT payloads (serialization + storage throughput)", + "full_description": "INSERT rows with ~4KB TEXT payloads (serialization + storage throughput)", + "details": [], + "iterations": 100, + "total_us": 208259, + "mean_us": 2082.59, + "median_us": 2034.0, + "p95_us": 2592.0, + "p99_us": 2701.520000000001, + "min_us": 1586.0, + "max_us": 2852.0, + "stddev_us": 277.1162136335329, + "ops_per_sec": 480.17132512880596, + "success": true, + "error": null + }, + { + "name": "wide_column_insert", + "category": "Insert", + "description": "INSERT into a 20-column table (wide schema overhead)", + "full_description": "INSERT into a 20-column table (wide schema overhead)", + "details": [], + "iterations": 100, + "total_us": 115289, + "mean_us": 1152.89, + "median_us": 1113.0, + "p95_us": 1656.2, + "p99_us": 1731.7300000000005, + "min_us": 838.0, + "max_us": 1804.0, + "stddev_us": 224.929254646788, + "ops_per_sec": 867.385440068003, + "success": true, + "error": null + }, + { + "name": "bulk_delete", + "category": "Delete", + "description": "DELETE 100 rows at once with a range filter (bulk deletion)", + "full_description": "DELETE 100 rows at once with a range filter (bulk deletion)", + "details": [], + "iterations": 100, + "total_us": 8149810, + "mean_us": 81498.1, + "median_us": 81768.0, + "p95_us": 88798.55, + "p99_us": 92343.77000000016, + "min_us": 71528.0, + "max_us": 126278.0, + "stddev_us": 6796.565395978926, + "ops_per_sec": 12.270224704625015, + "success": true, + "error": null + }, + { + "name": "sequential_crud", + "category": "DML", + "description": "INSERT → UPDATE → SELECT → DELETE full DML lifecycle per iteration", + "full_description": "INSERT → UPDATE → SELECT → DELETE full DML lifecycle per iteration", + "details": [], + "iterations": 100, + "total_us": 363123, + "mean_us": 3631.23, + "median_us": 3589.5, + "p95_us": 4810.6, + "p99_us": 5222.870000000039, + "min_us": 2041.0, + "max_us": 12833.0, + "stddev_us": 1177.6901193746712, + "ops_per_sec": 275.38878011032074, + "success": true, + "error": null + }, + { + "name": "alter_table", + "category": "DDL", + "description": "ALTER TABLE ADD COLUMN + DROP COLUMN (schema evolution latency)", + "full_description": "ALTER TABLE ADD COLUMN + DROP COLUMN (schema evolution latency)", + "details": [], + "iterations": 100, + "total_us": 194249, + "mean_us": 1942.49, + "median_us": 1875.0, + "p95_us": 2748.85, + "p99_us": 2987.0600000000018, + "min_us": 1134.0, + "max_us": 3389.0, + "stddev_us": 492.0571843050995, + "ops_per_sec": 514.8031650098585, + "success": true, + "error": null + }, + { + "name": "concurrent_update", + "category": "Concurrent", + "description": "N concurrent UPDATE operations on the same table (write contention test)", + "full_description": "N concurrent UPDATE operations on the same table (write contention test)", + "details": [], + "iterations": 100, + "total_us": 427038, + "mean_us": 4270.38, + "median_us": 4112.0, + "p95_us": 5857.799999999999, + "p99_us": 6838.91000000001, + "min_us": 2235.0, + "max_us": 8810.0, + "stddev_us": 1090.1130863401747, + "ops_per_sec": 234.17119787934564, + "success": true, + "error": null + }, + { + "name": "concurrent_mixed_dml", + "category": "Concurrent", + "description": "Concurrent INSERT + UPDATE + DELETE on the same table (multi-op contention)", + "full_description": "Concurrent INSERT + UPDATE + DELETE on the same table (multi-op contention)", + "details": [], + "iterations": 100, + "total_us": 417237, + "mean_us": 4172.37, + "median_us": 4212.0, + "p95_us": 5108.5, + "p99_us": 5928.460000000001, + "min_us": 2574.0, + "max_us": 6073.0, + "stddev_us": 695.3460101549326, + "ops_per_sec": 239.67193705256244, + "success": true, + "error": null + }, + { + "name": "namespace_isolation", + "category": "Concurrent", + "description": "Concurrent queries across 5 different namespaces (isolation test)", + "full_description": "Concurrent queries across 5 different namespaces (isolation test)", + "details": [], + "iterations": 100, + "total_us": 277252, + "mean_us": 2772.52, + "median_us": 2268.0, + "p95_us": 3488.4499999999916, + "p99_us": 14260.89000000001, + "min_us": 1766.0, + "max_us": 16329.0, + "stddev_us": 2136.860074828003, + "ops_per_sec": 360.68270021496687, + "success": true, + "error": null + }, + { + "name": "subscribe_initial_load", + "category": "Subscribe", + "description": "Subscribe to a 1000-row user table and receive the full initial data batch", + "full_description": "Subscribe to a 1000-row user table and receive the full initial data batch", + "details": [], + "iterations": 100, + "total_us": 872953, + "mean_us": 8729.53, + "median_us": 8741.5, + "p95_us": 9261.849999999999, + "p99_us": 9875.45, + "min_us": 7618.0, + "max_us": 10118.0, + "stddev_us": 380.42672624357516, + "ops_per_sec": 114.55370449497282, + "success": true, + "error": null + }, + { + "name": "subscribe_change_latency", + "category": "Subscribe", + "description": "Latency from INSERT to subscriber receiving the change notification", + "full_description": "Latency from INSERT to subscriber receiving the change notification", + "details": [], + "iterations": 100, + "total_us": 4957437, + "mean_us": 49574.37, + "median_us": 49573.5, + "p95_us": 53503.799999999996, + "p99_us": 56566.95000000001, + "min_us": 42509.0, + "max_us": 58443.0, + "stddev_us": 2717.385391652733, + "ops_per_sec": 20.171713730300556, + "success": true, + "error": null + }, + { + "name": "reconnect_subscribe", + "category": "Subscribe", + "description": "Disconnect and re-subscribe to a user table (reconnection overhead)", + "full_description": "Disconnect and re-subscribe to a user table (reconnection overhead)", + "details": [], + "iterations": 100, + "total_us": 1104744, + "mean_us": 11047.44, + "median_us": 11082.0, + "p95_us": 11934.3, + "p99_us": 12129.240000000002, + "min_us": 9350.0, + "max_us": 12351.0, + "stddev_us": 609.8030866143959, + "ops_per_sec": 90.51870840665349, + "success": true, + "error": null + }, + { + "name": "flushed_parquet_query", + "category": "Storage", + "description": "SELECT from a shared table with 20 flushed Parquet files (200K rows)", + "full_description": "SELECT from a shared table with 20 flushed Parquet files (200K rows)", + "details": [], + "iterations": 100, + "total_us": 18909321, + "mean_us": 189093.21, + "median_us": 223948.0, + "p95_us": 318457.55, + "p99_us": 329644.6000000001, + "min_us": 28265.0, + "max_us": 346732.0, + "stddev_us": 115827.68683619707, + "ops_per_sec": 5.288397187820758, + "success": true, + "error": null + }, + { + "name": "concurrent_subscribers", + "category": "Load", + "description": "N WebSocket live-query subscribers receiving changes from concurrent writes", + "full_description": "N WebSocket live-query subscribers receiving changes from concurrent writes", + "details": [], + "iterations": 100, + "total_us": 156741622, + "mean_us": 1567416.22, + "median_us": 1574578.5, + "p95_us": 1598600.4000000001, + "p99_us": 1606932.4300000002, + "min_us": 1523005.0, + "max_us": 1612321.0, + "stddev_us": 24542.618879949725, + "ops_per_sec": 0.6379926322314056, + "success": true, + "error": null + }, + { + "name": "concurrent_publishers", + "category": "Load", + "description": "N concurrent INSERTs into a topic-sourced table (measures publish overhead)", + "full_description": "N concurrent INSERTs into a topic-sourced table (measures publish overhead)", + "details": [], + "iterations": 100, + "total_us": 168089, + "mean_us": 1680.89, + "median_us": 1613.5, + "p95_us": 2119.9, + "p99_us": 2445.730000000008, + "min_us": 1330.0, + "max_us": 4003.0, + "stddev_us": 324.8427714473821, + "ops_per_sec": 594.9229277347121, + "success": true, + "error": null + }, + { + "name": "concurrent_consumers", + "category": "Load", + "description": "N concurrent topic CONSUME calls pulling messages in parallel", + "full_description": "N concurrent topic CONSUME calls pulling messages in parallel", + "details": [], + "iterations": 100, + "total_us": 66198, + "mean_us": 661.98, + "median_us": 656.5, + "p95_us": 761.85, + "p99_us": 858.4000000000002, + "min_us": 556.0, + "max_us": 898.0, + "stddev_us": 60.05266712047707, + "ops_per_sec": 1510.619656182966, + "success": true, + "error": null + }, + { + "name": "sql_1k_concurrent", + "category": "Load", + "description": "1000 concurrent SQL SELECT queries at once (RPS degradation test)", + "full_description": "1000 concurrent SQL SELECT queries at once (RPS degradation test)", + "details": [ + { + "label": "Baseline", + "value": "phase-0 performance" + }, + { + "label": "Query Class", + "value": "concurrent read burst" + }, + { + "label": "Dataset", + "value": "500 seeded rows" + }, + { + "label": "Burst", + "value": "1000 concurrent SQL queries" + }, + { + "label": "Query Mix", + "value": "pk lookup, count, selective order-by limit, narrow projection" + } + ], + "iterations": 100, + "total_us": 17163128, + "mean_us": 171631.28, + "median_us": 171814.0, + "p95_us": 177018.5, + "p99_us": 179406.94000000003, + "min_us": 164726.0, + "max_us": 184351.0, + "stddev_us": 3390.9952684583727, + "ops_per_sec": 5.826443757804522, + "success": true, + "error": null + }, + { + "name": "create_user", + "category": "Load", + "description": "CREATE USER (auth subsystem stress test)", + "full_description": "CREATE USER (auth subsystem stress test)", + "details": [], + "iterations": 100, + "total_us": 138568, + "mean_us": 1385.68, + "median_us": 1380.0, + "p95_us": 1605.0, + "p99_us": 1775.9500000000005, + "min_us": 1210.0, + "max_us": 1870.0, + "stddev_us": 125.64250069064202, + "ops_per_sec": 721.6673402228508, + "success": true, + "error": null + }, + { + "name": "drop_user", + "category": "Load", + "description": "DROP USER (auth subsystem teardown stress test)", + "full_description": "DROP USER (auth subsystem teardown stress test)", + "details": [], + "iterations": 100, + "total_us": 28024, + "mean_us": 280.24, + "median_us": 268.0, + "p95_us": 405.0999999999999, + "p99_us": 444.5100000000003, + "min_us": 194.0, + "max_us": 495.0, + "stddev_us": 66.71454644281638, + "ops_per_sec": 3568.3699685983443, + "success": true, + "error": null + }, + { + "name": "connection_storm", + "category": "Load", + "description": "N simultaneous login + SQL + cycles (connection setup overhead)", + "full_description": "N simultaneous login + SQL + cycles (connection setup overhead)", + "details": [], + "iterations": 100, + "total_us": 31889750, + "mean_us": 318897.5, + "median_us": 316274.5, + "p95_us": 338173.75, + "p99_us": 370746.25000000006, + "min_us": 304688.0, + "max_us": 378889.0, + "stddev_us": 11583.347391750789, + "ops_per_sec": 3.1358038241127635, + "success": true, + "error": null + }, + { + "name": "mixed_read_write", + "category": "Load", + "description": "50/50 concurrent reads + writes on same table (contention test)", + "full_description": "50/50 concurrent reads + writes on same table (contention test)", + "details": [ + { + "label": "Baseline", + "value": "phase-0 performance" + }, + { + "label": "Query Class", + "value": "mixed concurrent read/write" + }, + { + "label": "Dataset", + "value": "200 seeded rows" + }, + { + "label": "Mix", + "value": "5 reads / 5 writes" + }, + { + "label": "Read Shapes", + "value": "range filter, count-star, order-by desc limit" + } + ], + "iterations": 100, + "total_us": 241249, + "mean_us": 2412.49, + "median_us": 2304.0, + "p95_us": 3323.25, + "p99_us": 6259.020000000003, + "min_us": 1510.0, + "max_us": 6756.0, + "stddev_us": 732.8376629943567, + "ops_per_sec": 414.50949019477804, + "success": true, + "error": null + }, + { + "name": "wide_fanout_query", + "category": "Load", + "description": "N concurrent large-result-set SELECTs (serialization + memory pressure)", + "full_description": "N concurrent large-result-set SELECTs (serialization + memory pressure)", + "details": [], + "iterations": 100, + "total_us": 705920, + "mean_us": 7059.2, + "median_us": 6542.0, + "p95_us": 11755.549999999997, + "p99_us": 14016.220000000005, + "min_us": 5810.0, + "max_us": 15127.0, + "stddev_us": 1798.6512679889852, + "ops_per_sec": 141.6591115140526, + "success": true, + "error": null + }, + { + "name": "subscriber_scale", + "category": "Scale", + "description": "Progressive live-query subscriber scale and insert fanout verification up to 100.0K", + "full_description": "Progressively ramps cumulative live-query subscribers across tiers 10 -> 100 -> 500 -> 1.0K -> 2.0K -> 5.0K -> 10.0K -> 25.0K -> 50.0K -> 100.0K to verify connection establishment, subscription completion, and INSERT fanout delivery. Delivery probes run at 10 -> 100 -> 500 -> 1.0K -> 2.0K -> 5.0K -> 10.0K -> 25.0K -> 50.0K -> 100.0K. This run is configured with connect_batch=1.0K, wave_size=500, wave_pause=0ms, connect_timeout=30.0s, 1 shared WebSocket target, 100 subscriptions per shared WebSocket connection, and a pooled shared-connection budget of 1.0K.", + "details": [ + { + "label": "Max", + "value": "100.0K" + }, + { + "label": "Tiers", + "value": "10 checkpoints to 100.0K" + }, + { + "label": "Batch/Wave", + "value": "1.0K / 500" + }, + { + "label": "Pause/Timeout", + "value": "0ms / 30.0s" + }, + { + "label": "Shared WS", + "value": "1.0K conns @ 100 subs/ws across 1 target" + }, + { + "label": "Delivery Checks", + "value": "all tiers to 10.0K + 25.0K/50.0K/100.0K" + } + ], + "iterations": 1, + "total_us": 2836207, + "mean_us": 2836207.0, + "median_us": 2836207.0, + "p95_us": 2836207.0, + "p99_us": 2836207.0, + "min_us": 2836207.0, + "max_us": 2836207.0, + "stddev_us": 0.0, + "ops_per_sec": 0.3525835737659487, + "success": true, + "error": null + } + ], + "summary": { + "total_benchmarks": 38, + "passed": 38, + "failed": 0, + "total_duration_ms": 288719.468625, + "measured_duration_ms": 252149.34600000002 + } +} \ No newline at end of file diff --git a/benchv2/results/bench-2026-04-29-191433-0-4-3-rc-4.html b/benchv2/results/bench-2026-04-29-191433-0-4-3-rc-4.html new file mode 100644 index 000000000..1c70e21e9 --- /dev/null +++ b/benchv2/results/bench-2026-04-29-191433-0-4-3-rc-4.html @@ -0,0 +1,671 @@ + + + + + +KalamDB Benchmark Report + + + + +
+ +
+

KalamDB Benchmark Report

+
Performance analysis — v0.4.3-rc.4
+
+ http://127.0.0.1:8080 + 2026-04-29T19:14:33.905989+00:00 + 100 iters + 5 warmup + 10 concurrency + 100.0K max subs +
+
+ +
+
+
38
+
Total Benchmarks
+
+
+
38
+
Passed
+
+
+
0
+
Failed
+
+
Verdict Mix
33Excellent
4Acceptable
1Slow
successful benchmarks only
+
Compared To Previous
33Faster
3Same
2Slower
against 2026-04-26T18:40:14.806927+00:00
+
+
237.400s
+
Measured Benchmark Time
+
+
+
270.288s
+
Wall Clock Duration
+
+
+ +
+
System Information
+
+
Hostname
Jamals-MacBook-Pro.local
+
Machine Model
MacBookPro18,3
+
CPU Model
Apple M1 Pro
+
CPU Cores
10 logical / 10 physical
+
Total Memory
16.00 GiB
+
Available Memory
11.64 GiB
+
Used Memory
4.36 GiB
+
Memory Usage
27.3%
+
OS
Darwin macOS 26.5
+
Kernel
25.5.0
+
Architecture
aarch64
+
+
+ +
+
+

Latency by Operation (µs)

+ +
+
+

Throughput (ops/sec)

+ +
+
+

Avg Latency by Category (µs)

+ +
+
+ +
Detailed Results
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
StatusBenchmarkCategoryDescriptionItersMeanP50P95P99MinMaxOps/secTotalVerdictvs Prev
PASScreate_tableDDL
CREATE TABLE with 3 columns
100406µs382µs514µs611µs347µs691µs2.5K40.6ms🟢 Excellent↑24% faster
PASSdrop_tableDDL
DROP TABLE on a previously created table
1004.62ms4.59ms5.71ms5.74ms3.34ms5.75ms216461.9ms🟢 Excellent↓8% slower
PASSsingle_insertInsert
INSERT a single row into a table
100263µs268µs313µs375µs211µs375µs3.8K26.3ms🟢 Excellent↑32% faster
PASSbulk_insertInsert
One INSERT statement with 50 rows (statement-scoped transaction)
100992µs974µs1.16ms1.24ms801µs1.41ms1.0K99.2ms🟢 Excellent↑23% faster
PASStransaction_multi_insertInsert
Explicit BEGIN/COMMIT with 50 single-row INSERT statements
1003.13ms3.06ms3.59ms3.90ms2.80ms3.92ms320312.6ms🟡 Acceptable↑26% faster
PASSselect_allSelect
SELECT * from a 200-row table
100782µs776µs848µs876µs747µs881µs1.3K78.2ms🟢 Excellent↑49% faster
PASSselect_by_filterSelect
SELECT with WHERE clause on a 200-row table
100397µs388µs470µs505µs312µs521µs2.5K39.7ms🟢 Excellent↑39% faster
PASSselect_countSelect
SELECT COUNT(*) on a 200-row table
100608µs602µs663µs690µs576µs762µs1.6K60.8ms🟢 Excellent↑39% faster
PASSselect_order_by_limitSelect
SELECT with ORDER BY + LIMIT 10 on a 200-row table
100831µs809µs992µs1.02ms762µs1.05ms1.2K83.1ms🟢 Excellent↑32% faster
PASSsingle_updateUpdate
UPDATE a single row by filter condition
100869µs873µs985µs1.06ms764µs1.07ms1.2K86.9ms🟢 Excellent↑44% faster
PASSsingle_deleteDelete
DELETE a single row by filter condition
1001.27ms1.25ms1.35ms1.49ms1.19ms1.61ms790126.5ms🟢 Excellent↑40% faster
PASSconcurrent_insertConcurrent
N concurrent INSERT operations in parallel (N = concurrency setting)
1001.34ms1.26ms1.58ms1.77ms1.14ms4.74ms744134.4ms🟢 Excellent↑21% faster
PASSconcurrent_selectConcurrent
N concurrent SELECT operations in parallel (N = concurrency setting)
1001.16ms1.20ms1.33ms1.43ms783µs1.46ms866115.5ms🟢 Excellent↑17% faster
PASSpoint_lookupSelect
SELECT by primary key from a 10K-row table (single row lookup)
Baselinephase-0 performanceQuery Classprimary-key lookupDataset10000 seeded rowsQuery ShapeSELECT * FROM <ns>.point_lookup WHERE id = ?
100386µs380µs448µs502µs324µs520µs2.6K38.6ms🟢 Excellent↑35% faster
PASSaggregate_querySelect
GROUP BY + SUM/AVG/COUNT on a 10K-row table (analytical query performance)
10027.5ms27.6ms28.2ms29.1ms26.6ms29.3ms362.752s🔴 Slow↑21% faster
PASSmulti_table_joinSelect
SELECT with subquery across two tables (200 customers, 1000 orders)
1003.08ms3.06ms3.19ms3.35ms2.97ms3.63ms325307.6ms🟢 Excellent↑31% faster
PASSlarge_payload_insertInsert
INSERT rows with ~4KB TEXT payloads (serialization + storage throughput)
1001.66ms1.64ms1.88ms1.94ms1.46ms2.10ms604165.6ms🟢 Excellent↑20% faster
PASSwide_column_insertInsert
INSERT into a 20-column table (wide schema overhead)
100903µs880µs1.02ms1.07ms833µs1.08ms1.1K90.3ms🟢 Excellent↑22% faster
PASSbulk_deleteDelete
DELETE 100 rows at once with a range filter (bulk deletion)
10067.6ms66.9ms74.3ms75.3ms59.2ms85.5ms156.755s🟡 Acceptable↑17% faster
PASSsequential_crudDML
INSERT → UPDATE → SELECT → DELETE full DML lifecycle per iteration
1002.16ms2.01ms2.50ms3.88ms1.51ms9.38ms463215.9ms🟢 Excellent↑41% faster
PASSalter_tableDDL
ALTER TABLE ADD COLUMN + DROP COLUMN (schema evolution latency)
1001.19ms1.11ms1.56ms1.79ms1.04ms2.57ms839119.3ms🟢 Excellent↑39% faster
PASSconcurrent_updateConcurrent
N concurrent UPDATE operations on the same table (write contention test)
1003.37ms3.15ms4.38ms6.71ms2.11ms12.9ms297337.0ms🟢 Excellent↑21% faster
PASSconcurrent_mixed_dmlConcurrent
Concurrent INSERT + UPDATE + DELETE on the same table (multi-op contention)
1003.24ms3.18ms4.01ms4.34ms2.21ms7.30ms308324.4ms🟢 Excellent↑22% faster
PASSnamespace_isolationConcurrent
Concurrent queries across 5 different namespaces (isolation test)
1001.82ms1.82ms2.02ms2.19ms1.50ms2.36ms549182.2ms🟢 Excellent↑34% faster
PASSsubscribe_initial_loadSubscribe
Subscribe to a 1000-row user table and receive the full initial data batch
1006.33ms6.31ms6.41ms6.60ms6.26ms6.80ms158632.6ms🟢 Excellent↑28% faster
PASSsubscribe_change_latencySubscribe
Latency from INSERT to subscriber receiving the change notification
10051.4ms51.7ms54.1ms57.3ms42.5ms60.6ms195.138s🟡 Acceptable↓4% slower
PASSreconnect_subscribeSubscribe
Disconnect and re-subscribe to a user table (reconnection overhead)
10010.3ms10.5ms11.1ms11.2ms9.03ms11.2ms971.034s🟢 Excellent↑6% faster
PASSflushed_parquet_queryStorage
SELECT from a shared table with 20 flushed Parquet files (200K rows)
100152.1ms183.7ms244.8ms267.9ms23.9ms283.0ms715.215s🟡 Acceptable↑20% faster
PASSconcurrent_subscribersLoad
N WebSocket live-query subscribers receiving changes from concurrent writes
1001.556s1.556s1.583s1.585s1.520s1.591s12.59m🟢 Excellent~ 1567416µs prior
PASSconcurrent_publishersLoad
N concurrent INSERTs into a topic-sourced table (measures publish overhead)
1001.49ms1.43ms1.82ms1.94ms1.28ms2.51ms671149.1ms🟢 Excellent↑11% faster
PASSconcurrent_consumersLoad
N concurrent topic CONSUME calls pulling messages in parallel
100618µs619µs681µs706µs526µs781µs1.6K61.8ms🟢 Excellent↑7% faster
PASSsql_1k_concurrentLoad
1000 concurrent SQL SELECT queries at once (RPS degradation test)
Baselinephase-0 performanceQuery Classconcurrent read burstDataset500 seeded rowsBurst1000 concurrent SQL queriesQuery Mixpk lookup, count, selective order-by limit, narrow projection
100152.3ms152.1ms156.1ms157.6ms146.9ms159.6ms715.233s🟢 Excellent↑11% faster
PASScreate_userLoad
CREATE USER (auth subsystem stress test)
1001.22ms1.21ms1.34ms1.43ms1.16ms1.45ms819122.0ms🟢 Excellent↑12% faster
PASSdrop_userLoad
DROP USER (auth subsystem teardown stress test)
100273µs256µs363µs411µs233µs449µs3.7K27.3ms🟢 Excellent~ 280µs prior
PASSconnection_stormLoad
N simultaneous login + SQL + cycles (connection setup overhead)
100275.5ms272.5ms294.4ms320.4ms265.7ms320.6ms427.546s🟢 Excellent↑14% faster
PASSmixed_read_writeLoad
50/50 concurrent reads + writes on same table (contention test)
Baselinephase-0 performanceQuery Classmixed concurrent read/writeDataset200 seeded rowsMix5 reads / 5 writesRead Shapesrange filter, count-star, order-by desc limit
1002.11ms2.10ms2.61ms2.69ms1.37ms2.75ms475210.6ms🟢 Excellent↑13% faster
PASSwide_fanout_queryLoad
N concurrent large-result-set SELECTs (serialization + memory pressure)
1005.99ms5.86ms6.59ms8.62ms5.19ms9.16ms167599.1ms🟢 Excellent↑15% faster
PASSsubscriber_scaleScale
Progressive live-query subscriber scale and insert fanout verification up to 100.0K
Max100.0KTiers10 checkpoints to 100.0KBatch/Wave1.0K / 500Pause/Timeout0ms / 30.0sShared WS1.0K conns @ 100 subs/ws across 1 targetDelivery Checksall tiers to 10.0K + 25.0K/50.0K/100.0K
12.855s2.855s2.855s2.855s2.855s2.855s02.855s🟢 Excellent~ 2836207µs prior
Whole Bench Totals237.400sWall clock 270.288s
+
+ + + +
+ KalamDB v0.4.3-rc.4 — Generated 2026-04-29T19:14:33.905989+00:00 +
+ +
+ + + + diff --git a/benchv2/results/bench-2026-04-29-191433-0-4-3-rc-4.json b/benchv2/results/bench-2026-04-29-191433-0-4-3-rc-4.json new file mode 100644 index 000000000..13c816fd9 --- /dev/null +++ b/benchv2/results/bench-2026-04-29-191433-0-4-3-rc-4.json @@ -0,0 +1,842 @@ +{ + "version": "0.4.3-rc.4", + "server_url": "http://127.0.0.1:8080", + "timestamp": "2026-04-29T19:14:33.905313+00:00", + "config": { + "iterations": 100, + "warmup": 5, + "concurrency": 10, + "namespace": "bench_191002", + "max_subscribers": 100000 + }, + "system": { + "hostname": "Jamals-MacBook-Pro.local", + "machine_model": "MacBookPro18,3", + "os_name": "Darwin", + "os_version": "macOS 26.5", + "kernel_version": "25.5.0", + "architecture": "aarch64", + "cpu_model": "Apple M1 Pro", + "cpu_logical_cores": 10, + "cpu_physical_cores": 10, + "total_memory_bytes": 17179869184, + "available_memory_bytes": 12496584704, + "used_memory_bytes": 4683284480, + "used_memory_percent": 27.260303497314453 + }, + "results": [ + { + "name": "create_table", + "category": "DDL", + "description": "CREATE TABLE with 3 columns", + "full_description": "CREATE TABLE with 3 columns", + "details": [], + "iterations": 100, + "total_us": 40582, + "mean_us": 405.82, + "median_us": 382.5, + "p95_us": 513.9499999999999, + "p99_us": 610.8100000000004, + "min_us": 347.0, + "max_us": 691.0, + "stddev_us": 59.461791483814594, + "ops_per_sec": 2464.1466660095607, + "success": true, + "error": null + }, + { + "name": "drop_table", + "category": "DDL", + "description": "DROP TABLE on a previously created table", + "full_description": "DROP TABLE on a previously created table", + "details": [], + "iterations": 100, + "total_us": 461932, + "mean_us": 4619.32, + "median_us": 4588.5, + "p95_us": 5706.7, + "p99_us": 5742.13, + "min_us": 3339.0, + "max_us": 5755.0, + "stddev_us": 698.2425684849388, + "ops_per_sec": 216.4820796134496, + "success": true, + "error": null + }, + { + "name": "single_insert", + "category": "Insert", + "description": "INSERT a single row into a table", + "full_description": "INSERT a single row into a table", + "details": [], + "iterations": 100, + "total_us": 26283, + "mean_us": 262.83, + "median_us": 268.0, + "p95_us": 313.04999999999995, + "p99_us": 375.0, + "min_us": 211.0, + "max_us": 375.0, + "stddev_us": 30.681503327976717, + "ops_per_sec": 3804.7407069208234, + "success": true, + "error": null + }, + { + "name": "bulk_insert", + "category": "Insert", + "description": "One INSERT statement with 50 rows (statement-scoped transaction)", + "full_description": "One INSERT statement with 50 rows (statement-scoped transaction)", + "details": [], + "iterations": 100, + "total_us": 99225, + "mean_us": 992.25, + "median_us": 974.5, + "p95_us": 1157.5, + "p99_us": 1241.7000000000007, + "min_us": 801.0, + "max_us": 1410.0, + "stddev_us": 94.75021654515379, + "ops_per_sec": 1007.8105316200555, + "success": true, + "error": null + }, + { + "name": "transaction_multi_insert", + "category": "Insert", + "description": "Explicit BEGIN/COMMIT with 50 single-row INSERT statements", + "full_description": "Explicit BEGIN/COMMIT with 50 single-row INSERT statements", + "details": [], + "iterations": 100, + "total_us": 312567, + "mean_us": 3125.67, + "median_us": 3062.5, + "p95_us": 3586.0999999999995, + "p99_us": 3896.23, + "min_us": 2800.0, + "max_us": 3919.0, + "stddev_us": 235.1665240641279, + "ops_per_sec": 319.9314067064022, + "success": true, + "error": null + }, + { + "name": "select_all", + "category": "Select", + "description": "SELECT * from a 200-row table", + "full_description": "SELECT * from a 200-row table", + "details": [], + "iterations": 100, + "total_us": 78171, + "mean_us": 781.71, + "median_us": 775.5, + "p95_us": 848.5, + "p99_us": 876.05, + "min_us": 747.0, + "max_us": 881.0, + "stddev_us": 27.572968390410654, + "ops_per_sec": 1279.2467794962326, + "success": true, + "error": null + }, + { + "name": "select_by_filter", + "category": "Select", + "description": "SELECT with WHERE clause on a 200-row table", + "full_description": "SELECT with WHERE clause on a 200-row table", + "details": [], + "iterations": 100, + "total_us": 39733, + "mean_us": 397.33, + "median_us": 387.5, + "p95_us": 469.54999999999995, + "p99_us": 505.1600000000001, + "min_us": 312.0, + "max_us": 521.0, + "stddev_us": 35.48083989986708, + "ops_per_sec": 2516.7996375808525, + "success": true, + "error": null + }, + { + "name": "select_count", + "category": "Select", + "description": "SELECT COUNT(*) on a 200-row table", + "full_description": "SELECT COUNT(*) on a 200-row table", + "details": [], + "iterations": 100, + "total_us": 60839, + "mean_us": 608.39, + "median_us": 602.0, + "p95_us": 663.0999999999999, + "p99_us": 689.7300000000004, + "min_us": 576.0, + "max_us": 762.0, + "stddev_us": 27.351027162954722, + "ops_per_sec": 1643.6825062870857, + "success": true, + "error": null + }, + { + "name": "select_order_by_limit", + "category": "Select", + "description": "SELECT with ORDER BY + LIMIT 10 on a 200-row table", + "full_description": "SELECT with ORDER BY + LIMIT 10 on a 200-row table", + "details": [], + "iterations": 100, + "total_us": 83143, + "mean_us": 831.43, + "median_us": 809.0, + "p95_us": 992.2, + "p99_us": 1020.2900000000001, + "min_us": 762.0, + "max_us": 1049.0, + "stddev_us": 69.14722326202612, + "ops_per_sec": 1202.7470743177419, + "success": true, + "error": null + }, + { + "name": "single_update", + "category": "Update", + "description": "UPDATE a single row by filter condition", + "full_description": "UPDATE a single row by filter condition", + "details": [], + "iterations": 100, + "total_us": 86928, + "mean_us": 869.28, + "median_us": 873.0, + "p95_us": 985.0, + "p99_us": 1064.1000000000001, + "min_us": 764.0, + "max_us": 1074.0, + "stddev_us": 62.75897134950894, + "ops_per_sec": 1150.377323762194, + "success": true, + "error": null + }, + { + "name": "single_delete", + "category": "Delete", + "description": "DELETE a single row by filter condition", + "full_description": "DELETE a single row by filter condition", + "details": [], + "iterations": 100, + "total_us": 126545, + "mean_us": 1265.45, + "median_us": 1253.0, + "p95_us": 1354.2, + "p99_us": 1488.2300000000007, + "min_us": 1192.0, + "max_us": 1610.0, + "stddev_us": 57.596888068293374, + "ops_per_sec": 790.2327235370817, + "success": true, + "error": null + }, + { + "name": "concurrent_insert", + "category": "Concurrent", + "description": "N concurrent INSERT operations in parallel (N = concurrency setting)", + "full_description": "N concurrent INSERT operations in parallel (N = concurrency setting)", + "details": [], + "iterations": 100, + "total_us": 134406, + "mean_us": 1344.06, + "median_us": 1257.5, + "p95_us": 1578.75, + "p99_us": 1768.0000000000152, + "min_us": 1138.0, + "max_us": 4738.0, + "stddev_us": 366.5446028228841, + "ops_per_sec": 744.0144041188638, + "success": true, + "error": null + }, + { + "name": "concurrent_select", + "category": "Concurrent", + "description": "N concurrent SELECT operations in parallel (N = concurrency setting)", + "full_description": "N concurrent SELECT operations in parallel (N = concurrency setting)", + "details": [], + "iterations": 100, + "total_us": 115535, + "mean_us": 1155.35, + "median_us": 1202.0, + "p95_us": 1328.15, + "p99_us": 1434.24, + "min_us": 783.0, + "max_us": 1458.0, + "stddev_us": 155.12864488754636, + "ops_per_sec": 865.5385813822651, + "success": true, + "error": null + }, + { + "name": "point_lookup", + "category": "Select", + "description": "SELECT by primary key from a 10K-row table (single row lookup)", + "full_description": "SELECT by primary key from a 10K-row table (single row lookup)", + "details": [ + { + "label": "Baseline", + "value": "phase-0 performance" + }, + { + "label": "Query Class", + "value": "primary-key lookup" + }, + { + "label": "Dataset", + "value": "10000 seeded rows" + }, + { + "label": "Query Shape", + "value": "SELECT * FROM .point_lookup WHERE id = ?" + } + ], + "iterations": 100, + "total_us": 38646, + "mean_us": 386.46, + "median_us": 380.5, + "p95_us": 448.15, + "p99_us": 502.18000000000006, + "min_us": 324.0, + "max_us": 520.0, + "stddev_us": 35.56766776018622, + "ops_per_sec": 2587.5899187496766, + "success": true, + "error": null + }, + { + "name": "aggregate_query", + "category": "Select", + "description": "GROUP BY + SUM/AVG/COUNT on a 10K-row table (analytical query performance)", + "full_description": "GROUP BY + SUM/AVG/COUNT on a 10K-row table (analytical query performance)", + "details": [], + "iterations": 100, + "total_us": 2751782, + "mean_us": 27517.82, + "median_us": 27563.5, + "p95_us": 28156.35, + "p99_us": 29137.63, + "min_us": 26566.0, + "max_us": 29299.0, + "stddev_us": 510.55112586420984, + "ops_per_sec": 36.340087986621036, + "success": true, + "error": null + }, + { + "name": "multi_table_join", + "category": "Select", + "description": "SELECT with subquery across two tables (200 customers, 1000 orders)", + "full_description": "SELECT with subquery across two tables (200 customers, 1000 orders)", + "details": [], + "iterations": 100, + "total_us": 307609, + "mean_us": 3076.09, + "median_us": 3063.0, + "p95_us": 3187.25, + "p99_us": 3354.8300000000013, + "min_us": 2974.0, + "max_us": 3635.0, + "stddev_us": 84.03812206133573, + "ops_per_sec": 325.08801758076, + "success": true, + "error": null + }, + { + "name": "large_payload_insert", + "category": "Insert", + "description": "INSERT rows with ~4KB TEXT payloads (serialization + storage throughput)", + "full_description": "INSERT rows with ~4KB TEXT payloads (serialization + storage throughput)", + "details": [], + "iterations": 100, + "total_us": 165648, + "mean_us": 1656.48, + "median_us": 1639.0, + "p95_us": 1878.2, + "p99_us": 1944.620000000001, + "min_us": 1458.0, + "max_us": 2105.0, + "stddev_us": 121.73986838975783, + "ops_per_sec": 603.6897517627741, + "success": true, + "error": null + }, + { + "name": "wide_column_insert", + "category": "Insert", + "description": "INSERT into a 20-column table (wide schema overhead)", + "full_description": "INSERT into a 20-column table (wide schema overhead)", + "details": [], + "iterations": 100, + "total_us": 90263, + "mean_us": 902.63, + "median_us": 879.5, + "p95_us": 1023.15, + "p99_us": 1074.05, + "min_us": 833.0, + "max_us": 1079.0, + "stddev_us": 60.790525411432164, + "ops_per_sec": 1107.8736580880318, + "success": true, + "error": null + }, + { + "name": "bulk_delete", + "category": "Delete", + "description": "DELETE 100 rows at once with a range filter (bulk deletion)", + "full_description": "DELETE 100 rows at once with a range filter (bulk deletion)", + "details": [], + "iterations": 100, + "total_us": 6755271, + "mean_us": 67552.71, + "median_us": 66936.5, + "p95_us": 74322.45, + "p99_us": 75306.52000000005, + "min_us": 59193.0, + "max_us": 85456.0, + "stddev_us": 4976.8661010497135, + "ops_per_sec": 14.80325511737427, + "success": true, + "error": null + }, + { + "name": "sequential_crud", + "category": "DML", + "description": "INSERT → UPDATE → SELECT → DELETE full DML lifecycle per iteration", + "full_description": "INSERT → UPDATE → SELECT → DELETE full DML lifecycle per iteration", + "details": [], + "iterations": 100, + "total_us": 215860, + "mean_us": 2158.6, + "median_us": 2007.0, + "p95_us": 2499.25, + "p99_us": 3880.5800000000286, + "min_us": 1509.0, + "max_us": 9383.0, + "stddev_us": 794.2802853635307, + "ops_per_sec": 463.263226165107, + "success": true, + "error": null + }, + { + "name": "alter_table", + "category": "DDL", + "description": "ALTER TABLE ADD COLUMN + DROP COLUMN (schema evolution latency)", + "full_description": "ALTER TABLE ADD COLUMN + DROP COLUMN (schema evolution latency)", + "details": [], + "iterations": 100, + "total_us": 119260, + "mean_us": 1192.6, + "median_us": 1115.0, + "p95_us": 1557.6999999999998, + "p99_us": 1791.900000000004, + "min_us": 1040.0, + "max_us": 2574.0, + "stddev_us": 206.60600282673397, + "ops_per_sec": 838.5041086701325, + "success": true, + "error": null + }, + { + "name": "concurrent_update", + "category": "Concurrent", + "description": "N concurrent UPDATE operations on the same table (write contention test)", + "full_description": "N concurrent UPDATE operations on the same table (write contention test)", + "details": [], + "iterations": 100, + "total_us": 337042, + "mean_us": 3370.42, + "median_us": 3151.5, + "p95_us": 4384.549999999999, + "p99_us": 6707.250000000032, + "min_us": 2111.0, + "max_us": 12870.0, + "stddev_us": 1255.6554260986209, + "ops_per_sec": 296.6989277300752, + "success": true, + "error": null + }, + { + "name": "concurrent_mixed_dml", + "category": "Concurrent", + "description": "Concurrent INSERT + UPDATE + DELETE on the same table (multi-op contention)", + "full_description": "Concurrent INSERT + UPDATE + DELETE on the same table (multi-op contention)", + "details": [], + "iterations": 100, + "total_us": 324367, + "mean_us": 3243.67, + "median_us": 3179.0, + "p95_us": 4005.0499999999997, + "p99_us": 4339.910000000015, + "min_us": 2207.0, + "max_us": 7301.0, + "stddev_us": 641.438677403138, + "ops_per_sec": 308.29276714339005, + "success": true, + "error": null + }, + { + "name": "namespace_isolation", + "category": "Concurrent", + "description": "Concurrent queries across 5 different namespaces (isolation test)", + "full_description": "Concurrent queries across 5 different namespaces (isolation test)", + "details": [], + "iterations": 100, + "total_us": 182177, + "mean_us": 1821.77, + "median_us": 1820.5, + "p95_us": 2015.5, + "p99_us": 2187.770000000001, + "min_us": 1497.0, + "max_us": 2363.0, + "stddev_us": 143.87963930334482, + "ops_per_sec": 548.9167128671567, + "success": true, + "error": null + }, + { + "name": "subscribe_initial_load", + "category": "Subscribe", + "description": "Subscribe to a 1000-row user table and receive the full initial data batch", + "full_description": "Subscribe to a 1000-row user table and receive the full initial data batch", + "details": [], + "iterations": 100, + "total_us": 632585, + "mean_us": 6325.85, + "median_us": 6312.5, + "p95_us": 6407.549999999999, + "p99_us": 6596.110000000001, + "min_us": 6257.0, + "max_us": 6805.0, + "stddev_us": 69.75460232453875, + "ops_per_sec": 158.0815226412261, + "success": true, + "error": null + }, + { + "name": "subscribe_change_latency", + "category": "Subscribe", + "description": "Latency from INSERT to subscriber receiving the change notification", + "full_description": "Latency from INSERT to subscriber receiving the change notification", + "details": [], + "iterations": 100, + "total_us": 5137899, + "mean_us": 51378.99, + "median_us": 51701.0, + "p95_us": 54147.200000000004, + "p99_us": 57307.110000000015, + "min_us": 42527.0, + "max_us": 60585.0, + "stddev_us": 2642.5098625428745, + "ops_per_sec": 19.46320859946838, + "success": true, + "error": null + }, + { + "name": "reconnect_subscribe", + "category": "Subscribe", + "description": "Disconnect and re-subscribe to a user table (reconnection overhead)", + "full_description": "Disconnect and re-subscribe to a user table (reconnection overhead)", + "details": [], + "iterations": 100, + "total_us": 1034207, + "mean_us": 10342.07, + "median_us": 10492.5, + "p95_us": 11144.050000000001, + "p99_us": 11229.130000000001, + "min_us": 9025.0, + "max_us": 11242.0, + "stddev_us": 525.6253467551912, + "ops_per_sec": 96.69244164852877, + "success": true, + "error": null + }, + { + "name": "flushed_parquet_query", + "category": "Storage", + "description": "SELECT from a shared table with 20 flushed Parquet files (200K rows)", + "full_description": "SELECT from a shared table with 20 flushed Parquet files (200K rows)", + "details": [], + "iterations": 100, + "total_us": 15214505, + "mean_us": 152145.05, + "median_us": 183704.5, + "p95_us": 244843.55, + "p99_us": 267923.76000000007, + "min_us": 23910.0, + "max_us": 283047.0, + "stddev_us": 89397.92124934756, + "ops_per_sec": 6.572675220127109, + "success": true, + "error": null + }, + { + "name": "concurrent_subscribers", + "category": "Load", + "description": "N WebSocket live-query subscribers receiving changes from concurrent writes", + "full_description": "N WebSocket live-query subscribers receiving changes from concurrent writes", + "details": [], + "iterations": 100, + "total_us": 155623147, + "mean_us": 1556231.47, + "median_us": 1555796.5, + "p95_us": 1582518.0, + "p99_us": 1585197.4500000002, + "min_us": 1520014.0, + "max_us": 1590885.0, + "stddev_us": 18758.422280502065, + "ops_per_sec": 0.6425779321889693, + "success": true, + "error": null + }, + { + "name": "concurrent_publishers", + "category": "Load", + "description": "N concurrent INSERTs into a topic-sourced table (measures publish overhead)", + "full_description": "N concurrent INSERTs into a topic-sourced table (measures publish overhead)", + "details": [], + "iterations": 100, + "total_us": 149093, + "mean_us": 1490.93, + "median_us": 1431.5, + "p95_us": 1819.4499999999998, + "p99_us": 1939.770000000003, + "min_us": 1277.0, + "max_us": 2511.0, + "stddev_us": 185.8750669268219, + "ops_per_sec": 670.7223008457808, + "success": true, + "error": null + }, + { + "name": "concurrent_consumers", + "category": "Load", + "description": "N concurrent topic CONSUME calls pulling messages in parallel", + "full_description": "N concurrent topic CONSUME calls pulling messages in parallel", + "details": [], + "iterations": 100, + "total_us": 61772, + "mean_us": 617.72, + "median_us": 619.0, + "p95_us": 681.0500000000001, + "p99_us": 705.7600000000004, + "min_us": 526.0, + "max_us": 781.0, + "stddev_us": 43.10196508210958, + "ops_per_sec": 1618.8564398109177, + "success": true, + "error": null + }, + { + "name": "sql_1k_concurrent", + "category": "Load", + "description": "1000 concurrent SQL SELECT queries at once (RPS degradation test)", + "full_description": "1000 concurrent SQL SELECT queries at once (RPS degradation test)", + "details": [ + { + "label": "Baseline", + "value": "phase-0 performance" + }, + { + "label": "Query Class", + "value": "concurrent read burst" + }, + { + "label": "Dataset", + "value": "500 seeded rows" + }, + { + "label": "Burst", + "value": "1000 concurrent SQL queries" + }, + { + "label": "Query Mix", + "value": "pk lookup, count, selective order-by limit, narrow projection" + } + ], + "iterations": 100, + "total_us": 15232537, + "mean_us": 152325.37, + "median_us": 152111.5, + "p95_us": 156070.65, + "p99_us": 157593.05000000002, + "min_us": 146943.0, + "max_us": 159578.0, + "stddev_us": 2406.880678199671, + "ops_per_sec": 6.564894606853737, + "success": true, + "error": null + }, + { + "name": "create_user", + "category": "Load", + "description": "CREATE USER (auth subsystem stress test)", + "full_description": "CREATE USER (auth subsystem stress test)", + "details": [], + "iterations": 100, + "total_us": 122039, + "mean_us": 1220.39, + "median_us": 1207.0, + "p95_us": 1343.6, + "p99_us": 1427.21, + "min_us": 1156.0, + "max_us": 1448.0, + "stddev_us": 55.92182404071244, + "ops_per_sec": 819.4101885462844, + "success": true, + "error": null + }, + { + "name": "drop_user", + "category": "Load", + "description": "DROP USER (auth subsystem teardown stress test)", + "full_description": "DROP USER (auth subsystem teardown stress test)", + "details": [], + "iterations": 100, + "total_us": 27312, + "mean_us": 273.12, + "median_us": 256.5, + "p95_us": 363.05, + "p99_us": 411.38000000000017, + "min_us": 233.0, + "max_us": 449.0, + "stddev_us": 42.0851181739718, + "ops_per_sec": 3661.394258933802, + "success": true, + "error": null + }, + { + "name": "connection_storm", + "category": "Load", + "description": "N simultaneous login + SQL + cycles (connection setup overhead)", + "full_description": "N simultaneous login + SQL + cycles (connection setup overhead)", + "details": [], + "iterations": 100, + "total_us": 27546238, + "mean_us": 275462.38, + "median_us": 272512.5, + "p95_us": 294437.39999999997, + "p99_us": 320377.9, + "min_us": 265746.0, + "max_us": 320566.0, + "stddev_us": 9845.375760952114, + "ops_per_sec": 3.630259783568268, + "success": true, + "error": null + }, + { + "name": "mixed_read_write", + "category": "Load", + "description": "50/50 concurrent reads + writes on same table (contention test)", + "full_description": "50/50 concurrent reads + writes on same table (contention test)", + "details": [ + { + "label": "Baseline", + "value": "phase-0 performance" + }, + { + "label": "Query Class", + "value": "mixed concurrent read/write" + }, + { + "label": "Dataset", + "value": "200 seeded rows" + }, + { + "label": "Mix", + "value": "5 reads / 5 writes" + }, + { + "label": "Read Shapes", + "value": "range filter, count-star, order-by desc limit" + } + ], + "iterations": 100, + "total_us": 210629, + "mean_us": 2106.29, + "median_us": 2099.5, + "p95_us": 2606.0, + "p99_us": 2694.5800000000004, + "min_us": 1371.0, + "max_us": 2752.0, + "stddev_us": 333.84443528340705, + "ops_per_sec": 474.76843169743955, + "success": true, + "error": null + }, + { + "name": "wide_fanout_query", + "category": "Load", + "description": "N concurrent large-result-set SELECTs (serialization + memory pressure)", + "full_description": "N concurrent large-result-set SELECTs (serialization + memory pressure)", + "details": [], + "iterations": 100, + "total_us": 599055, + "mean_us": 5990.55, + "median_us": 5864.0, + "p95_us": 6585.55, + "p99_us": 8616.520000000002, + "min_us": 5192.0, + "max_us": 9163.0, + "stddev_us": 565.8518986555769, + "ops_per_sec": 166.92958075635792, + "success": true, + "error": null + }, + { + "name": "subscriber_scale", + "category": "Scale", + "description": "Progressive live-query subscriber scale and insert fanout verification up to 100.0K", + "full_description": "Progressively ramps cumulative live-query subscribers across tiers 10 -> 100 -> 500 -> 1.0K -> 2.0K -> 5.0K -> 10.0K -> 25.0K -> 50.0K -> 100.0K to verify connection establishment, subscription completion, and INSERT fanout delivery. Delivery probes run at 10 -> 100 -> 500 -> 1.0K -> 2.0K -> 5.0K -> 10.0K -> 25.0K -> 50.0K -> 100.0K. This run is configured with connect_batch=1.0K, wave_size=500, wave_pause=0ms, connect_timeout=30.0s, 1 shared WebSocket target, 100 subscriptions per shared WebSocket connection, and a pooled shared-connection budget of 1.0K.", + "details": [ + { + "label": "Max", + "value": "100.0K" + }, + { + "label": "Tiers", + "value": "10 checkpoints to 100.0K" + }, + { + "label": "Batch/Wave", + "value": "1.0K / 500" + }, + { + "label": "Pause/Timeout", + "value": "0ms / 30.0s" + }, + { + "label": "Shared WS", + "value": "1.0K conns @ 100 subs/ws across 1 target" + }, + { + "label": "Delivery Checks", + "value": "all tiers to 10.0K + 25.0K/50.0K/100.0K" + } + ], + "iterations": 1, + "total_us": 2855095, + "mean_us": 2855095.0, + "median_us": 2855095.0, + "p95_us": 2855095.0, + "p99_us": 2855095.0, + "min_us": 2855095.0, + "max_us": 2855095.0, + "stddev_us": 0.0, + "ops_per_sec": 0.35025104243466504, + "success": true, + "error": null + } + ], + "summary": { + "total_benchmarks": 38, + "passed": 38, + "failed": 0, + "total_duration_ms": 270287.988292, + "measured_duration_ms": 237399.927 + } +} \ No newline at end of file diff --git a/benchv2/run-benchmarks.sh b/benchv2/run-benchmarks.sh index 9bed13361..f212a4ecc 100755 --- a/benchv2/run-benchmarks.sh +++ b/benchv2/run-benchmarks.sh @@ -106,7 +106,9 @@ derive_cluster_rpc_port() { } ensure_release_server_bin() { - if [[ -x "$BENCH_SERVER_BIN" ]]; then + if [[ -x "$BENCH_SERVER_BIN" ]] \ + && ! find "$BACKEND_DIR" "$REPO_DIR/Cargo.toml" "$REPO_DIR/Cargo.lock" \ + -newer "$BENCH_SERVER_BIN" -print -quit 2>/dev/null | grep -q .; then return fi diff --git a/benchv2/run-chat-realtime.sh b/benchv2/run-chat-realtime.sh new file mode 100755 index 000000000..5f1f86cec --- /dev/null +++ b/benchv2/run-chat-realtime.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Run the opt-in realtime chat workload benchmark. +# +# Usage: +# ./run-chat-realtime.sh +# ./run-chat-realtime.sh --url http://127.0.0.1:8080 --minutes 10 --users 1000 --realtime-convs 150 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +URL="${KALAMDB_URL:-http://127.0.0.1:8080}" +MINUTES="${KALAMDB_BENCH_CHAT_MINUTES:-5}" +USER_COUNT="${KALAMDB_BENCH_CHAT_USERS:-1000}" +REALTIME_CONVS="${KALAMDB_BENCH_CHAT_REALTIME_CONVS:-100}" +BENCH_USER="${KALAMDB_USER:-}" +BENCH_PASSWORD="${KALAMDB_PASSWORD:-}" +EXTRA_ARGS=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --url) + URL="$2" + shift 2 + ;; + --minutes) + MINUTES="$2" + shift 2 + ;; + --users) + USER_COUNT="$2" + shift 2 + ;; + --realtime-convs) + REALTIME_CONVS="$2" + shift 2 + ;; + --user) + BENCH_USER="$2" + shift 2 + ;; + --password) + BENCH_PASSWORD="$2" + shift 2 + ;; + *) + EXTRA_ARGS+=("$1") + shift + ;; + esac +done + +export KALAMDB_BENCH_CHAT_MINUTES="$MINUTES" +export KALAMDB_BENCH_CHAT_USERS="$USER_COUNT" +export KALAMDB_BENCH_CHAT_REALTIME_CONVS="$REALTIME_CONVS" + +echo "▸ Running chat_realtime benchmark" +echo "▸ URL: $URL" +echo "▸ Minutes: $MINUTES" +echo "▸ Seeded users: $USER_COUNT" +echo "▸ Active conversations: $REALTIME_CONVS" + +CMD=(./run-benchmarks.sh --urls "$URL" --bench chat_realtime --iterations 1 --warmup 0) + +if [[ -n "$BENCH_USER" ]]; then + CMD+=(--user "$BENCH_USER") +fi + +if [[ -n "$BENCH_PASSWORD" ]]; then + CMD+=(--password "$BENCH_PASSWORD") +fi + +if (( ${#EXTRA_ARGS[@]} > 0 )); then + CMD+=("${EXTRA_ARGS[@]}") +fi + +exec "${CMD[@]}" \ No newline at end of file diff --git a/benchv2/src/benchmarks/chat_realtime_bench.rs b/benchv2/src/benchmarks/chat_realtime_bench.rs new file mode 100644 index 000000000..a3f2a6d5d --- /dev/null +++ b/benchv2/src/benchmarks/chat_realtime_bench.rs @@ -0,0 +1,2472 @@ +use std::collections::HashMap; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use kalam_client::{ + AutoOffsetReset, ChangeEvent, KalamCellValue, KalamLinkClient, SubscriptionConfig, + SubscriptionManager, TopicOp, +}; +use serde_json::Value as JsonValue; +use sysinfo::{MemoryRefreshKind, Pid, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System}; +use tokio::sync::{Mutex as AsyncMutex, Semaphore, watch}; +use tokio::task::JoinSet; +use tokio::task::JoinHandle; +use tokio::time::{sleep, timeout, Instant}; + +use crate::benchmarks::Benchmark; +use crate::client::{KalamClient, SqlResponse}; +use crate::config::Config; +use crate::metrics::BenchmarkDetail; + +pub struct ChatRealtimeBench; + +const DEFAULT_CHAT_MINUTES: u64 = 5; +const DEFAULT_CHAT_USERS: u32 = 1_000; +const DEFAULT_CHAT_REALTIME_CONVS: u32 = 100; +const CHAT_USER_PASSWORD: &str = "BenchChatP@ss123"; +const CHAT_TYPING_INTERVAL: Duration = Duration::from_secs(2); +const CHAT_TYPING_BURSTS: u64 = 3; +const CHAT_WORKER_START_STAGGER_MS: u64 = 5; +const CHAT_LOGIN_MAX_IN_FLIGHT: usize = 64; +const CHAT_LOGIN_RETRY_ATTEMPTS: u32 = 5; +const CHAT_LOGIN_RETRY_BASE_DELAY: Duration = Duration::from_millis(200); +const CHAT_SUBSCRIBE_RETRY_ATTEMPTS: u32 = 2; +const CHAT_SUBSCRIBE_RETRY_BASE_DELAY: Duration = Duration::from_millis(200); +const SQL_RETRY_ATTEMPTS: u32 = 8; +const CHAT_SQL_RETRY_BASE_DELAY: Duration = Duration::from_millis(200); +const CHAT_SQL_RETRY_MAX_DELAY: Duration = Duration::from_secs(5); +const SUBSCRIBE_TIMEOUT: Duration = Duration::from_secs(30); +const SUBSCRIPTION_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(10); +const CHAT_FORWARDER_POLL_TIMEOUT: Duration = Duration::from_secs(1); +const CHAT_MESSAGE_FORWARDER_MAX_IN_FLIGHT: usize = 16; +const CHAT_TYPING_FORWARDER_MAX_IN_FLIGHT: usize = 32; +const CHAT_DELIVERY_TIMEOUT_LOG_LIMIT: u64 = 5; +const CHAT_MIRROR_WAIT_MIN_TIMEOUT_SECS: u64 = 15; +const CHAT_MIRROR_WAIT_MAX_TIMEOUT_SECS: u64 = 120; +const CHAT_MEMORY_SAMPLE_INTERVAL: Duration = Duration::from_secs(1); +const CHAT_CONVERSATION_TOPIC_SUFFIX: &str = "chat_conversation_events"; +const CHAT_MESSAGE_TOPIC_SUFFIX: &str = "chat_message_events"; +const CHAT_TYPING_TOPIC_SUFFIX: &str = "chat_typing_events"; + +#[derive(Clone, Copy)] +struct ChatWorkloadSettings { + minutes: u64, + user_count: u32, + realtime_conversations: u32, +} + +impl ChatWorkloadSettings { + fn from_env() -> Result { + let minutes = parse_u64_env("KALAMDB_BENCH_CHAT_MINUTES", DEFAULT_CHAT_MINUTES)?; + let user_count = parse_u32_env("KALAMDB_BENCH_CHAT_USERS", DEFAULT_CHAT_USERS)?; + let realtime_conversations = parse_u32_env( + "KALAMDB_BENCH_CHAT_REALTIME_CONVS", + DEFAULT_CHAT_REALTIME_CONVS, + )?; + + if minutes == 0 { + return Err("KALAMDB_BENCH_CHAT_MINUTES must be greater than zero".to_string()); + } + if user_count < 2 { + return Err("KALAMDB_BENCH_CHAT_USERS must be at least 2".to_string()); + } + if realtime_conversations == 0 { + return Err( + "KALAMDB_BENCH_CHAT_REALTIME_CONVS must be greater than zero".to_string(), + ); + } + + Ok(Self { + minutes, + user_count, + realtime_conversations, + }) + } + + fn for_report() -> Self { + Self::from_env().unwrap_or(Self { + minutes: DEFAULT_CHAT_MINUTES, + user_count: DEFAULT_CHAT_USERS, + realtime_conversations: DEFAULT_CHAT_REALTIME_CONVS, + }) + } + + fn duration(&self) -> Duration { + Duration::from_secs(self.minutes.saturating_mul(60)) + } +} + +#[derive(Default)] +struct ChatWorkloadStats { + sessions_started: AtomicU64, + sessions_completed: AtomicU64, + delivery_wait_timeouts: AtomicU64, + active_sessions: AtomicU64, + peak_active_sessions: AtomicU64, + logged_in_users: AtomicU64, + selects: AtomicU64, + inserts: AtomicU64, + updates: AtomicU64, + select_latency_us: AtomicU64, + insert_latency_us: AtomicU64, + update_latency_us: AtomicU64, + select_max_us: AtomicU64, + insert_max_us: AtomicU64, + update_max_us: AtomicU64, + messages_sent: AtomicU64, + typing_events_sent: AtomicU64, + conversations_forwarded: AtomicU64, + messages_forwarded: AtomicU64, + typing_events_forwarded: AtomicU64, + conversation_topic_records: AtomicU64, + message_topic_records: AtomicU64, + typing_topic_records: AtomicU64, + skipped_topic_records: AtomicU64, + subscriptions_opened: AtomicU64, + subscriptions_closed: AtomicU64, + active_subscriptions: AtomicU64, + peak_active_subscriptions: AtomicU64, + subscription_open_latency_us: AtomicU64, + subscription_open_max_us: AtomicU64, + subscription_events: AtomicU64, + subscription_connect_timings: TimingSeries, + create_conversation_timings: TimingSeries, + insert_message_timings: TimingSeries, + insert_typing_event_timings: TimingSeries, +} + +impl Benchmark for ChatRealtimeBench { + fn name(&self) -> &str { + "chat_realtime" + } + + fn category(&self) -> &str { + "Load" + } + + fn description(&self) -> &str { + "Timed realtime chat workload with real auth users, USER tables, a Rust topic forwarder, and stream typing events" + } + + fn report_description(&self, _config: &Config) -> String { + let settings = ChatWorkloadSettings::for_report(); + format!( + "Realtime chat scenario for {}m with {} regular users and {} active conversations", + settings.minutes, settings.user_count, settings.realtime_conversations + ) + } + + fn report_full_description(&self, _config: &Config) -> String { + let settings = ChatWorkloadSettings::for_report(); + format!( + "Creates {} regular KalamDB users, then runs {} concurrent chat-session workers for {} minute(s). Each worker logs in as real users, opens a USER-table conversation row for user A, relies on a Rust topic consumer to mirror that conversation into user B's USER table, sends a user A message that the consumer forwards to user B, emits three typing events over six seconds from user B via a STREAM table, sends a user B reply that the consumer forwards back to user A, and keeps user A on live SQL subscriptions for message and typing delivery.", + settings.user_count, + settings.realtime_conversations, + settings.minutes, + ) + } + + fn report_details(&self, _config: &Config) -> Vec { + let settings = ChatWorkloadSettings::for_report(); + vec![ + BenchmarkDetail::new("Runtime", format!("{} minute(s)", settings.minutes)), + BenchmarkDetail::new("Regular Users", settings.user_count.to_string()), + BenchmarkDetail::new( + "Active Conversations", + settings.realtime_conversations.to_string(), + ), + BenchmarkDetail::new( + "Responder Typing Burst", + format!( + "{} events every {}s", + CHAT_TYPING_BURSTS, + CHAT_TYPING_INTERVAL.as_secs() + ), + ), + BenchmarkDetail::new( + "Tables", + "USER conversations, USER messages, STREAM typing_events, TOPIC conversation/message forwarders", + ), + ] + } + + fn single_run(&self) -> bool { + true + } + + fn setup<'a>( + &'a self, + client: &'a KalamClient, + config: &'a Config, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let settings = ChatWorkloadSettings::from_env()?; + let usernames = build_chat_users(&config.namespace, settings.user_count); + + client + .sql_ok(&format!("CREATE NAMESPACE IF NOT EXISTS {}", config.namespace)) + .await?; + + let conversation_topic = conversation_topic_name(&config.namespace); + let message_topic = message_topic_name(&config.namespace); + + let _ = client + .sql(&format!("DROP TOPIC IF EXISTS {}", message_topic)) + .await; + let _ = client + .sql(&format!("DROP TOPIC IF EXISTS {}", conversation_topic)) + .await; + let _ = client + .sql(&format!("DROP TOPIC IF EXISTS {}", typing_topic_name(&config.namespace))) + .await; + let _ = client + .sql(&format!("DROP STREAM TABLE IF EXISTS {}.typing_events", config.namespace)) + .await; + let _ = client + .sql(&format!("DROP USER TABLE IF EXISTS {}.messages", config.namespace)) + .await; + let _ = client + .sql(&format!("DROP USER TABLE IF EXISTS {}.conversations", config.namespace)) + .await; + + for username in &usernames { + let _ = client + .sql(&format!("DROP USER IF EXISTS {}", sql_literal(username))) + .await; + } + + run_sql_with_retry( + client, + &format!( + "CREATE USER TABLE IF NOT EXISTS {}.conversations (id BIGINT PRIMARY KEY, peer_user TEXT NOT NULL, opened_by TEXT NOT NULL, direction TEXT NOT NULL, needs_forward BOOLEAN NOT NULL, state TEXT NOT NULL, created_at_ms BIGINT NOT NULL)", + config.namespace + ), + ) + .await?; + + run_sql_with_retry( + client, + &format!( + "CREATE USER TABLE IF NOT EXISTS {}.messages (id BIGINT PRIMARY KEY, conversation_id BIGINT NOT NULL, sender_user TEXT NOT NULL, recipient_user TEXT NOT NULL, direction TEXT NOT NULL, needs_forward BOOLEAN NOT NULL, body TEXT NOT NULL, created_at_ms BIGINT NOT NULL)", + config.namespace + ), + ) + .await?; + + run_sql_with_retry( + client, + &format!( + "CREATE STREAM TABLE IF NOT EXISTS {}.typing_events (id BIGINT PRIMARY KEY, conversation_id BIGINT NOT NULL, sender_user TEXT NOT NULL, recipient_user TEXT NOT NULL, phase TEXT NOT NULL, needs_forward BOOLEAN NOT NULL, created_at_ms BIGINT NOT NULL) WITH (TTL_SECONDS = 30)", + config.namespace + ), + ) + .await?; + + run_sql_with_retry(client, &format!("CREATE TOPIC {}", conversation_topic)).await?; + run_sql_with_retry( + client, + &format!( + "ALTER TOPIC {} ADD SOURCE {}.conversations ON INSERT", + conversation_topic, config.namespace + ), + ) + .await?; + + run_sql_with_retry(client, &format!("CREATE TOPIC {}", message_topic)).await?; + run_sql_with_retry( + client, + &format!( + "ALTER TOPIC {} ADD SOURCE {}.messages ON INSERT", + message_topic, config.namespace + ), + ) + .await?; + + let typing_topic = typing_topic_name(&config.namespace); + run_sql_with_retry(client, &format!("CREATE TOPIC {}", typing_topic)).await?; + run_sql_with_retry( + client, + &format!( + "ALTER TOPIC {} ADD SOURCE {}.typing_events ON INSERT", + typing_topic, config.namespace + ), + ) + .await?; + + wait_for_topic_ready(client, &conversation_topic).await?; + wait_for_topic_ready(client, &message_topic).await?; + wait_for_topic_ready(client, &typing_topic).await?; + + for username in &usernames { + run_sql_with_retry( + client, + &format!( + "CREATE USER {} WITH PASSWORD {} ROLE user", + sql_literal(username), + sql_literal(CHAT_USER_PASSWORD), + ), + ) + .await?; + } + + Ok(()) + }) + } + + fn run<'a>( + &'a self, + client: &'a KalamClient, + config: &'a Config, + iteration: u32, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let settings = ChatWorkloadSettings::from_env()?; + let users = Arc::new(build_chat_users(&config.namespace, settings.user_count)); + let target_active_user_count = + target_active_chat_user_count(users.len(), settings.realtime_conversations); + let minimum_active_user_count = + minimum_active_chat_user_count(users.len(), settings.realtime_conversations); + let stats = Arc::new(ChatWorkloadStats::default()); + let user_pool = Arc::new(UserClientPool::new( + config.urls.clone(), + CHAT_USER_PASSWORD, + stats.clone(), + )); + let global_stop = Arc::new(AtomicBool::new(false)); + let memory_probe = ChatManagedServerMemoryProbe::start(); + let forwarders = ChatTopicForwarder::start( + client.clone(), + config.namespace.clone(), + stats.clone(), + global_stop.clone(), + iteration, + ); + let conversation_ids = Arc::new(AtomicU64::new( + 40_000_000_000 + u64::from(iteration) * 1_000_000, + )); + let message_ids = Arc::new(AtomicU64::new( + 50_000_000_000 + u64::from(iteration) * 10_000_000, + )); + let typing_ids = Arc::new(AtomicU64::new( + 60_000_000_000 + u64::from(iteration) * 10_000_000, + )); + + println!( + " Chat workload settings: duration={}m, regular_users={}, target_active_chat_users={}, active_conversations={}, typing_burst={}x{}s", + settings.minutes, + settings.user_count, + target_active_user_count, + settings.realtime_conversations, + CHAT_TYPING_BURSTS, + CHAT_TYPING_INTERVAL.as_secs(), + ); + + let prewarmed_active_users = prewarm_user_clients( + user_pool.clone(), + users.clone(), + target_active_user_count, + minimum_active_user_count, + ) + .await?; + let active_users = Arc::new(prewarmed_active_users.usernames); + + if prewarmed_active_users.failed_attempts > 0 { + println!( + " Active user prewarm: warmed={} target={} failed_login_attempts={}", + active_users.len(), + target_active_user_count, + prewarmed_active_users.failed_attempts, + ); + } + + let scenario_started = Instant::now(); + let run_deadline = scenario_started + settings.duration(); + let delivery_timeout = chat_delivery_wait_timeout(settings.realtime_conversations); + let mut handles = Vec::with_capacity(settings.realtime_conversations as usize); + + for worker_id in 0..settings.realtime_conversations { + let namespace = config.namespace.clone(); + let worker_stats = stats.clone(); + let worker_pool = user_pool.clone(); + let worker_users = active_users.clone(); + let worker_stop = global_stop.clone(); + let worker_conversations = conversation_ids.clone(); + let worker_messages = message_ids.clone(); + let worker_typing = typing_ids.clone(); + let worker_start_delay = chat_worker_start_delay( + worker_id, + ); + let worker_deadline = run_deadline + worker_start_delay; + + handles.push(tokio::spawn(async move { + if !worker_start_delay.is_zero() { + sleep(worker_start_delay).await; + } + + run_chat_worker( + worker_id, + namespace, + worker_deadline, + worker_stats, + worker_pool, + worker_users, + worker_conversations, + worker_messages, + worker_typing, + delivery_timeout, + worker_stop.clone(), + ) + .await + })); + } + + let mut errors = Vec::new(); + for handle in handles { + match handle.await { + Ok(Ok(())) => {} + Ok(Err(error)) => errors.push(error), + Err(error) => errors.push(format!("worker join error: {}", error)), + } + } + + global_stop.store(true, Ordering::Relaxed); + + if let Err(error) = forwarders.shutdown().await { + errors.push(error); + } + + let memory_summary = memory_probe.finish().await; + print_chat_summary(&stats, settings, scenario_started.elapsed(), &memory_summary); + + if errors.is_empty() { + Ok(()) + } else { + Err(format_chat_errors(&errors)) + } + }) + } + + fn teardown<'a>( + &'a self, + client: &'a KalamClient, + config: &'a Config, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let settings = ChatWorkloadSettings::from_env()?; + let users = build_chat_users(&config.namespace, settings.user_count); + + let _ = client + .sql(&format!("DROP TOPIC IF EXISTS {}", message_topic_name(&config.namespace))) + .await; + let _ = client + .sql(&format!( + "DROP TOPIC IF EXISTS {}", + conversation_topic_name(&config.namespace) + )) + .await; + let _ = client + .sql(&format!( + "DROP TOPIC IF EXISTS {}", + typing_topic_name(&config.namespace) + )) + .await; + let _ = client + .sql(&format!("DROP STREAM TABLE IF EXISTS {}.typing_events", config.namespace)) + .await; + let _ = client + .sql(&format!("DROP USER TABLE IF EXISTS {}.messages", config.namespace)) + .await; + let _ = client + .sql(&format!("DROP USER TABLE IF EXISTS {}.conversations", config.namespace)) + .await; + + for username in users { + let _ = client + .sql(&format!("DROP USER IF EXISTS {}", sql_literal(&username))) + .await; + } + + Ok(()) + }) + } +} + +#[allow(clippy::too_many_arguments)] +async fn run_chat_worker( + worker_id: u32, + namespace: String, + run_deadline: Instant, + stats: Arc, + user_pool: Arc, + users: Arc>, + conversation_ids: Arc, + message_ids: Arc, + typing_ids: Arc, + delivery_timeout: Duration, + global_stop: Arc, +) -> Result<(), String> { + let mut rng = SimpleRng::seeded(0xC0FFEE_u64 + u64::from(worker_id) * 7_919); + let initiator_index = random_index(&mut rng, users.len()); + let mut responder_index = random_index(&mut rng, users.len()); + while responder_index == initiator_index { + responder_index = random_index(&mut rng, users.len()); + } + + let initiator_user = users[initiator_index].clone(); + let responder_user = users[responder_index].clone(); + let conversation_id = conversation_ids.fetch_add(1, Ordering::Relaxed); + let created_at_ms = epoch_ms(); + let _session_guard = SessionActivityGuard::new(stats.clone()); + + let initiator_client = user_pool.client_for(&initiator_user).await?; + let responder_client = user_pool.client_for(&responder_user).await?; + let delivery_tracker = Arc::new(SessionDeliveryTracker::new(responder_user.clone())); + let (session_stop_tx, _) = watch::channel(false); + let subscription_handles = start_initiator_subscriptions( + &initiator_client.link(), + &namespace, + conversation_id, + &initiator_user, + &delivery_tracker, + &session_stop_tx, + stats.clone(), + ) + .await?; + + let worker_result = async { + let create_conversation_started = Instant::now(); + run_tracked_sql( + &initiator_client, + &format!( + "INSERT INTO {}.conversations (id, peer_user, opened_by, direction, needs_forward, state, created_at_ms) VALUES ({}, {}, {}, 'outbound', TRUE, 'open', {})", + namespace, + conversation_id, + sql_literal(&responder_user), + sql_literal(&initiator_user), + created_at_ms, + ), + ChatSqlKind::Insert, + &stats, + ) + .await?; + stats + .create_conversation_timings + .record(create_conversation_started.elapsed()); + + let mut cycle_ordinal = 0_u64; + while Instant::now() < run_deadline && !global_stop.load(Ordering::Relaxed) { + stats.sessions_started.fetch_add(1, Ordering::Relaxed); + cycle_ordinal += 1; + + let typing_before = delivery_tracker.typing_events.load(Ordering::Relaxed); + let peer_messages_before = delivery_tracker.peer_messages.load(Ordering::Relaxed); + + let _first_message_id = emit_message( + &initiator_client, + &namespace, + conversation_id, + &initiator_user, + &responder_user, + &format!("msg_{}_{}_a_{}", conversation_id, worker_id, cycle_ordinal), + &stats, + &message_ids, + ) + .await?; + + let mut emitted_typing_events = 0_u64; + for _ in 0..CHAT_TYPING_BURSTS { + emit_typing_event( + &responder_client, + &namespace, + conversation_id, + &responder_user, + &initiator_user, + &typing_ids, + &stats, + ) + .await?; + emitted_typing_events += 1; + + if global_stop.load(Ordering::Relaxed) { + break; + } + + sleep(CHAT_TYPING_INTERVAL).await; + } + + let _reply_message_id = emit_message( + &responder_client, + &namespace, + conversation_id, + &responder_user, + &initiator_user, + &format!("msg_{}_{}_b_{}", conversation_id, worker_id, cycle_ordinal), + &stats, + &message_ids, + ) + .await?; + + if let Err(error) = wait_for_delivery( + &delivery_tracker, + typing_before + emitted_typing_events, + peer_messages_before + 1, + delivery_timeout, + ) + .await + { + let timeout_count = + stats.delivery_wait_timeouts.fetch_add(1, Ordering::Relaxed) + 1; + if timeout_count <= CHAT_DELIVERY_TIMEOUT_LOG_LIMIT { + let delivered_typing = delivery_tracker.typing_events.load(Ordering::Relaxed); + let delivered_peer_messages = + delivery_tracker.peer_messages.load(Ordering::Relaxed); + let messages_sent = stats.messages_sent.load(Ordering::Relaxed); + let messages_forwarded = stats.messages_forwarded.load(Ordering::Relaxed); + let typing_sent = stats.typing_events_sent.load(Ordering::Relaxed); + let typing_forwarded = stats.typing_events_forwarded.load(Ordering::Relaxed); + println!( + " Live delivery timeout sample #{}: worker={} conversation_id={} cycle={} | typing={}/{} | peer_messages={}/{} | message_forward_gap={} | typing_forward_gap={} | active_subscriptions={}", + timeout_count, + worker_id, + conversation_id, + cycle_ordinal, + delivered_typing, + typing_before + emitted_typing_events, + delivered_peer_messages, + peer_messages_before + 1, + messages_sent.saturating_sub(messages_forwarded), + typing_sent.saturating_sub(typing_forwarded), + stats.active_subscriptions.load(Ordering::Relaxed), + ); + } + return Err(error); + } + + stats.sessions_completed.fetch_add(1, Ordering::Relaxed); + } + + Ok(()) + } + .await; + + let _ = session_stop_tx.send(true); + let shutdown_result = shutdown_subscription_tasks(subscription_handles).await; + + match (worker_result, shutdown_result) { + (Ok(()), Ok(())) => Ok(()), + (Err(error), _) => Err(error), + (Ok(()), Err(error)) => Err(error), + } +} + +async fn emit_typing_event( + client: &KalamClient, + namespace: &str, + conversation_id: u64, + sender_user: &str, + recipient_user: &str, + typing_ids: &Arc, + stats: &Arc, +) -> Result<(), String> { + let event_id = typing_ids.fetch_add(1, Ordering::Relaxed); + let created_at_ms = epoch_ms(); + let insert_typing_started = Instant::now(); + + run_tracked_sql( + client, + &format!( + "INSERT INTO {}.typing_events (id, conversation_id, sender_user, recipient_user, phase, needs_forward, created_at_ms) VALUES ({}, {}, {}, {}, 'typing', TRUE, {})", + namespace, + event_id, + conversation_id, + sql_literal(sender_user), + sql_literal(recipient_user), + created_at_ms, + ), + ChatSqlKind::Insert, + stats, + ) + .await?; + + stats + .insert_typing_event_timings + .record(insert_typing_started.elapsed()); + stats.typing_events_sent.fetch_add(1, Ordering::Relaxed); + Ok(()) +} + +async fn emit_message( + client: &KalamClient, + namespace: &str, + conversation_id: u64, + sender_user: &str, + recipient_user: &str, + body: &str, + stats: &Arc, + message_ids: &Arc, +) -> Result { + let message_id = message_ids.fetch_add(1, Ordering::Relaxed); + let created_at_ms = epoch_ms(); + let insert_message_started = Instant::now(); + + run_tracked_sql( + client, + &format!( + "INSERT INTO {}.messages (id, conversation_id, sender_user, recipient_user, direction, needs_forward, body, created_at_ms) VALUES ({}, {}, {}, {}, 'outbound', TRUE, {}, {})", + namespace, + message_id, + conversation_id, + sql_literal(sender_user), + sql_literal(recipient_user), + sql_literal(body), + created_at_ms, + ), + ChatSqlKind::Insert, + stats, + ) + .await?; + + stats + .insert_message_timings + .record(insert_message_started.elapsed()); + stats.messages_sent.fetch_add(1, Ordering::Relaxed); + Ok(message_id) +} + +async fn start_initiator_subscriptions( + link: &KalamLinkClient, + namespace: &str, + conversation_id: u64, + initiator_user: &str, + tracker: &Arc, + session_stop_tx: &watch::Sender, + stats: Arc, +) -> Result>>, String> { + let message_open_started = Instant::now(); + let message_subscription = create_subscription( + link, + format!("chat_messages_{}_{}", initiator_user, conversation_id), + format!( + "SELECT id, conversation_id, sender_user, recipient_user, direction, body, created_at_ms FROM {}.messages WHERE conversation_id = {}", + namespace, conversation_id + ), + ) + .await?; + record_subscription_open(&stats, message_open_started.elapsed()); + + let typing_open_started = Instant::now(); + let typing_subscription = match create_subscription( + link, + format!("chat_typing_{}_{}", initiator_user, conversation_id), + format!( + "SELECT id, conversation_id, sender_user, recipient_user, phase, created_at_ms FROM {}.typing_events WHERE conversation_id = {} AND recipient_user = {}", + namespace, + conversation_id, + sql_literal(initiator_user), + ), + ) + .await + { + Ok(subscription) => subscription, + Err(error) => { + let mut message_subscription = message_subscription; + let _ = message_subscription.close().await; + return Err(error); + } + }; + record_subscription_open(&stats, typing_open_started.elapsed()); + + stats.subscriptions_opened.fetch_add(2, Ordering::Relaxed); + let active_subscriptions = stats.active_subscriptions.fetch_add(2, Ordering::Relaxed) + 2; + update_peak(&stats.peak_active_subscriptions, active_subscriptions); + + Ok(vec![ + spawn_subscription_drain( + message_subscription, + session_stop_tx.subscribe(), + stats.clone(), + tracker.clone(), + SubscriptionKind::Message, + ), + spawn_subscription_drain( + typing_subscription, + session_stop_tx.subscribe(), + stats, + tracker.clone(), + SubscriptionKind::Typing, + ), + ]) +} + +async fn create_subscription( + link: &KalamLinkClient, + subscription_id: String, + sql: String, +) -> Result { + let mut delay = CHAT_SUBSCRIBE_RETRY_BASE_DELAY; + let mut attempts = 0_u32; + + loop { + let config = SubscriptionConfig::without_initial_data( + subscription_id.clone(), + sql.clone(), + ); + + match timeout(SUBSCRIBE_TIMEOUT, link.subscribe_with_config(config)).await { + Ok(Ok(subscription)) => return Ok(subscription), + Ok(Err(error)) + if attempts < CHAT_SUBSCRIBE_RETRY_ATTEMPTS + && is_transient_chat_error(&error.to_string()) => + { + attempts += 1; + sleep(delay).await; + delay = (delay * 2).min(Duration::from_secs(5)); + } + Ok(Err(error)) => return Err(format!("subscribe error: {}", error)), + Err(_) if attempts < CHAT_SUBSCRIBE_RETRY_ATTEMPTS => { + attempts += 1; + sleep(delay).await; + delay = (delay * 2).min(Duration::from_secs(5)); + } + Err(_) => return Err("subscription timed out before becoming ready".to_string()), + } + } +} + +#[derive(Clone, Copy)] +enum SubscriptionKind { + Message, + Typing, +} + +fn spawn_subscription_drain( + mut subscription: SubscriptionManager, + mut stop_rx: watch::Receiver, + stats: Arc, + tracker: Arc, + kind: SubscriptionKind, +) -> JoinHandle> { + tokio::spawn(async move { + loop { + tokio::select! { + changed = stop_rx.changed() => { + if changed.is_ok() || changed.is_err() { + let close_result = subscription.close().await; + stats.subscriptions_closed.fetch_add(1, Ordering::Relaxed); + stats.active_subscriptions.fetch_sub(1, Ordering::Relaxed); + return close_result.map_err(|error| format!("failed to close subscription: {}", error)); + } + } + event = subscription.next() => { + match event { + Some(Ok(ChangeEvent::Ack { .. })) => {} + Some(Ok(ChangeEvent::InitialDataBatch { rows, .. })) => { + stats.subscription_events.fetch_add(rows.len() as u64, Ordering::Relaxed); + tracker.record_rows(&rows, kind); + } + Some(Ok(ChangeEvent::Insert { rows, .. })) => { + stats.subscription_events.fetch_add(rows.len() as u64, Ordering::Relaxed); + tracker.record_rows(&rows, kind); + } + Some(Ok(ChangeEvent::Update { rows, .. })) => { + stats.subscription_events.fetch_add(rows.len() as u64, Ordering::Relaxed); + tracker.record_rows(&rows, kind); + } + Some(Ok(ChangeEvent::Delete { .. })) => {} + Some(Ok(ChangeEvent::Error { message, .. })) => { + let _ = subscription.close().await; + stats.subscriptions_closed.fetch_add(1, Ordering::Relaxed); + stats.active_subscriptions.fetch_sub(1, Ordering::Relaxed); + return Err(format!("subscription server error: {}", message)); + } + Some(Ok(ChangeEvent::Unknown { .. })) => {} + Some(Err(error)) => { + let _ = subscription.close().await; + stats.subscriptions_closed.fetch_add(1, Ordering::Relaxed); + stats.active_subscriptions.fetch_sub(1, Ordering::Relaxed); + return Err(format!("subscription stream error: {}", error)); + } + None => { + stats.subscriptions_closed.fetch_add(1, Ordering::Relaxed); + stats.active_subscriptions.fetch_sub(1, Ordering::Relaxed); + return Ok(()); + } + } + } + } + } + }) +} + +async fn shutdown_subscription_tasks( + handles: Vec>>, +) -> Result<(), String> { + let mut errors = Vec::new(); + + for mut handle in handles { + match timeout(SUBSCRIPTION_SHUTDOWN_TIMEOUT, &mut handle).await { + Ok(Ok(Ok(()))) => {} + Ok(Ok(Err(error))) => errors.push(error), + Ok(Err(error)) => errors.push(format!("subscription join error: {}", error)), + Err(_) => { + handle.abort(); + errors.push("timed out waiting for subscription shutdown".to_string()); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(format_chat_errors(&errors)) + } +} + +struct ChatTopicForwarder { + handles: Vec>>, +} + +impl ChatTopicForwarder { + fn start( + admin_client: KalamClient, + namespace: String, + stats: Arc, + global_stop: Arc, + iteration: u32, + ) -> Self { + let conversation_handle = tokio::spawn(run_conversation_forwarder( + admin_client.clone(), + namespace.clone(), + stats.clone(), + global_stop.clone(), + iteration, + )); + let message_handle = tokio::spawn(run_message_forwarder( + admin_client.clone(), + namespace.clone(), + stats.clone(), + global_stop.clone(), + iteration, + )); + let typing_handle = tokio::spawn(run_typing_forwarder( + admin_client, + namespace, + stats, + global_stop, + iteration, + )); + + Self { + handles: vec![conversation_handle, message_handle, typing_handle], + } + } + + async fn shutdown(self) -> Result<(), String> { + let mut errors = Vec::new(); + + for mut handle in self.handles { + match timeout(SUBSCRIPTION_SHUTDOWN_TIMEOUT, &mut handle).await { + Ok(Ok(Ok(()))) => {} + Ok(Ok(Err(error))) => errors.push(error), + Ok(Err(error)) => errors.push(format!("forwarder join error: {}", error)), + Err(_) => { + handle.abort(); + errors.push("timed out waiting for topic forwarder shutdown".to_string()); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(format_chat_errors(&errors)) + } + } +} + +async fn run_conversation_forwarder( + admin_client: KalamClient, + namespace: String, + stats: Arc, + global_stop: Arc, + iteration: u32, +) -> Result<(), String> { + let topic = conversation_topic_name(&namespace); + let mut consumer = admin_client + .link() + .clone() + .consumer() + .topic(&topic) + .group_id(&format!("chat_rt_conv_forward_{}", iteration)) + .auto_offset_reset(AutoOffsetReset::Earliest) + .max_poll_records(128) + .poll_timeout(CHAT_FORWARDER_POLL_TIMEOUT) + .build() + .map_err(|error| format!("conversation forwarder build: {}", error))?; + + while !global_stop.load(Ordering::Relaxed) { + let records = consumer + .poll_with_timeout(CHAT_FORWARDER_POLL_TIMEOUT) + .await + .map_err(|error| format!("conversation forwarder poll: {}", error))?; + + if records.is_empty() { + continue; + } + + for record in records { + stats.conversation_topic_records.fetch_add(1, Ordering::Relaxed); + + if record.op != TopicOp::Insert { + stats.skipped_topic_records.fetch_add(1, Ordering::Relaxed); + consumer.mark_processed(&record); + continue; + } + + let forwarded = match parse_conversation_forward_record(&record.payload) { + Ok(row) if row.needs_forward => { + forward_conversation(&admin_client, &namespace, &row, &stats).await? + } + Ok(_) => { + stats.skipped_topic_records.fetch_add(1, Ordering::Relaxed); + false + } + Err(error) => { + let _ = consumer.close().await; + return Err(format!("conversation topic payload decode: {}", error)); + } + }; + + if forwarded { + stats.conversations_forwarded.fetch_add(1, Ordering::Relaxed); + } + + consumer.mark_processed(&record); + } + + consumer + .commit_sync() + .await + .map_err(|error| format!("conversation forwarder commit: {}", error))?; + } + + consumer + .close() + .await + .map_err(|error| format!("conversation forwarder close: {}", error)) +} + +async fn run_message_forwarder( + admin_client: KalamClient, + namespace: String, + stats: Arc, + global_stop: Arc, + iteration: u32, +) -> Result<(), String> { + let topic = message_topic_name(&namespace); + let mut consumer = admin_client + .link() + .clone() + .consumer() + .topic(&topic) + .group_id(&format!("chat_rt_msg_forward_{}", iteration)) + .auto_offset_reset(AutoOffsetReset::Earliest) + .max_poll_records(128) + .poll_timeout(CHAT_FORWARDER_POLL_TIMEOUT) + .build() + .map_err(|error| format!("message forwarder build: {}", error))?; + + while !global_stop.load(Ordering::Relaxed) { + let records = consumer + .poll_with_timeout(CHAT_FORWARDER_POLL_TIMEOUT) + .await + .map_err(|error| format!("message forwarder poll: {}", error))?; + + if records.is_empty() { + continue; + } + + let mut records_to_commit = Vec::new(); + let mut rows_to_forward = Vec::new(); + + for record in records { + stats.message_topic_records.fetch_add(1, Ordering::Relaxed); + + if record.op != TopicOp::Insert { + stats.skipped_topic_records.fetch_add(1, Ordering::Relaxed); + consumer.mark_processed(&record); + continue; + } + + match parse_message_forward_record(&record.payload) { + Ok(row) if row.needs_forward => { + rows_to_forward.push(row); + records_to_commit.push(record); + } + Ok(_) => { + stats.skipped_topic_records.fetch_add(1, Ordering::Relaxed); + consumer.mark_processed(&record); + } + Err(error) => { + let _ = consumer.close().await; + return Err(format!("message topic payload decode: {}", error)); + } + } + } + + if !rows_to_forward.is_empty() { + let forwarded_count = forward_message_rows( + &admin_client, + &namespace, + rows_to_forward, + &stats, + ) + .await?; + stats + .messages_forwarded + .fetch_add(forwarded_count, Ordering::Relaxed); + + for record in &records_to_commit { + consumer.mark_processed(record); + } + } + + consumer + .commit_sync() + .await + .map_err(|error| format!("message forwarder commit: {}", error))?; + } + + consumer + .close() + .await + .map_err(|error| format!("message forwarder close: {}", error)) +} + +async fn run_typing_forwarder( + admin_client: KalamClient, + namespace: String, + stats: Arc, + global_stop: Arc, + iteration: u32, +) -> Result<(), String> { + let topic = typing_topic_name(&namespace); + let mut consumer = admin_client + .link() + .clone() + .consumer() + .topic(&topic) + .group_id(&format!("chat_rt_typing_forward_{}", iteration)) + .auto_offset_reset(AutoOffsetReset::Earliest) + .max_poll_records(128) + .poll_timeout(CHAT_FORWARDER_POLL_TIMEOUT) + .build() + .map_err(|error| format!("typing forwarder build: {}", error))?; + + while !global_stop.load(Ordering::Relaxed) { + let records = consumer + .poll_with_timeout(CHAT_FORWARDER_POLL_TIMEOUT) + .await + .map_err(|error| format!("typing forwarder poll: {}", error))?; + + if records.is_empty() { + continue; + } + + let mut records_to_commit = Vec::new(); + let mut rows_to_forward = Vec::new(); + + for record in records { + stats.typing_topic_records.fetch_add(1, Ordering::Relaxed); + + if record.op != TopicOp::Insert { + stats.skipped_topic_records.fetch_add(1, Ordering::Relaxed); + consumer.mark_processed(&record); + continue; + } + + match parse_typing_forward_record(&record.payload) { + Ok(row) if row.needs_forward => { + rows_to_forward.push(row); + records_to_commit.push(record); + } + Ok(_) => { + stats.skipped_topic_records.fetch_add(1, Ordering::Relaxed); + consumer.mark_processed(&record); + } + Err(error) => { + let _ = consumer.close().await; + return Err(format!("typing topic payload decode: {}", error)); + } + } + } + + if !rows_to_forward.is_empty() { + let forwarded_count = forward_typing_rows( + &admin_client, + &namespace, + rows_to_forward, + &stats, + ) + .await?; + stats + .typing_events_forwarded + .fetch_add(forwarded_count, Ordering::Relaxed); + + for record in &records_to_commit { + consumer.mark_processed(record); + } + } + + consumer + .commit_sync() + .await + .map_err(|error| format!("typing forwarder commit: {}", error))?; + } + + consumer + .close() + .await + .map_err(|error| format!("typing forwarder close: {}", error)) +} + +#[derive(Debug, Clone)] +struct ConversationForwardRecord { + id: u64, + peer_user: String, + opened_by: String, + needs_forward: bool, + state: String, + created_at_ms: u64, +} + +#[derive(Debug, Clone)] +struct MessageForwardRecord { + id: u64, + conversation_id: u64, + sender_user: String, + recipient_user: String, + needs_forward: bool, + body: String, + created_at_ms: u64, +} + +#[derive(Debug, Clone)] +struct TypingForwardRecord { + id: u64, + conversation_id: u64, + sender_user: String, + recipient_user: String, + phase: String, + needs_forward: bool, + created_at_ms: u64, +} + +async fn forward_conversation( + admin_client: &KalamClient, + namespace: &str, + row: &ConversationForwardRecord, + stats: &Arc, +) -> Result { + let statement = format!( + "INSERT INTO {}.conversations (id, peer_user, opened_by, direction, needs_forward, state, created_at_ms) VALUES ({}, {}, {}, 'inbound', FALSE, {}, {})", + namespace, + row.id, + sql_literal(&row.opened_by), + sql_literal(&row.opened_by), + sql_literal(&row.state), + row.created_at_ms, + ); + + match run_tracked_sql( + admin_client, + &execute_as_user_sql(&row.peer_user, &statement), + ChatSqlKind::Insert, + stats, + ) + .await + { + Ok(_) => Ok(true), + Err(error) if is_duplicate_chat_error(&error) => Ok(false), + Err(error) => Err(error), + } +} + +async fn forward_message( + admin_client: &KalamClient, + namespace: &str, + row: &MessageForwardRecord, + stats: &Arc, +) -> Result { + let statement = format!( + "INSERT INTO {}.messages (id, conversation_id, sender_user, recipient_user, direction, needs_forward, body, created_at_ms) VALUES ({}, {}, {}, {}, 'inbound', FALSE, {}, {})", + namespace, + row.id, + row.conversation_id, + sql_literal(&row.sender_user), + sql_literal(&row.recipient_user), + sql_literal(&row.body), + row.created_at_ms, + ); + + match run_tracked_sql( + admin_client, + &execute_as_user_sql(&row.recipient_user, &statement), + ChatSqlKind::Insert, + stats, + ) + .await + { + Ok(_) => Ok(true), + Err(error) if is_duplicate_chat_error(&error) => Ok(false), + Err(error) => Err(error), + } +} + +async fn forward_message_rows( + admin_client: &KalamClient, + namespace: &str, + rows: Vec, + stats: &Arc, +) -> Result { + let mut join_set = JoinSet::new(); + let mut rows = rows.into_iter(); + let mut in_flight = 0_usize; + let mut forwarded_count = 0_u64; + + loop { + while in_flight < CHAT_MESSAGE_FORWARDER_MAX_IN_FLIGHT { + let Some(row) = rows.next() else { + break; + }; + + let forward_client = admin_client.clone(); + let forward_namespace = namespace.to_string(); + let forward_stats = stats.clone(); + join_set.spawn(async move { + forward_message(&forward_client, &forward_namespace, &row, &forward_stats).await + }); + in_flight += 1; + } + + if in_flight == 0 { + break; + } + + match join_set.join_next().await { + Some(Ok(Ok(forwarded))) => { + if forwarded { + forwarded_count += 1; + } + } + Some(Ok(Err(error))) => { + join_set.abort_all(); + while join_set.join_next().await.is_some() {} + return Err(error); + } + Some(Err(error)) => { + join_set.abort_all(); + while join_set.join_next().await.is_some() {} + return Err(format!("message forward task join error: {}", error)); + } + None => break, + } + + in_flight = in_flight.saturating_sub(1); + } + + Ok(forwarded_count) +} + +async fn forward_typing_event( + admin_client: &KalamClient, + namespace: &str, + row: &TypingForwardRecord, + stats: &Arc, +) -> Result { + let statement = format!( + "INSERT INTO {}.typing_events (id, conversation_id, sender_user, recipient_user, phase, needs_forward, created_at_ms) VALUES ({}, {}, {}, {}, {}, FALSE, {})", + namespace, + row.id, + row.conversation_id, + sql_literal(&row.sender_user), + sql_literal(&row.recipient_user), + sql_literal(&row.phase), + row.created_at_ms, + ); + + run_tracked_sql( + admin_client, + &execute_as_user_sql(&row.recipient_user, &statement), + ChatSqlKind::Insert, + stats, + ) + .await + .map(|_| true) +} + +async fn forward_typing_rows( + admin_client: &KalamClient, + namespace: &str, + rows: Vec, + stats: &Arc, +) -> Result { + let mut join_set = JoinSet::new(); + let mut rows = rows.into_iter(); + let mut in_flight = 0_usize; + let mut forwarded_count = 0_u64; + + loop { + while in_flight < CHAT_TYPING_FORWARDER_MAX_IN_FLIGHT { + let Some(row) = rows.next() else { + break; + }; + + let forward_client = admin_client.clone(); + let forward_namespace = namespace.to_string(); + let forward_stats = stats.clone(); + join_set.spawn(async move { + forward_typing_event(&forward_client, &forward_namespace, &row, &forward_stats) + .await + }); + in_flight += 1; + } + + if in_flight == 0 { + break; + } + + match join_set.join_next().await { + Some(Ok(Ok(forwarded))) => { + if forwarded { + forwarded_count += 1; + } + } + Some(Ok(Err(error))) => { + join_set.abort_all(); + while join_set.join_next().await.is_some() {} + return Err(error); + } + Some(Err(error)) => { + join_set.abort_all(); + while join_set.join_next().await.is_some() {} + return Err(format!("typing forward task join error: {}", error)); + } + None => break, + } + + in_flight = in_flight.saturating_sub(1); + } + + Ok(forwarded_count) +} + +fn parse_conversation_forward_record(payload_bytes: &[u8]) -> Result { + let payload: JsonValue = serde_json::from_slice(payload_bytes) + .map_err(|error| format!("invalid topic payload json: {}", error))?; + let row = payload.get("row").unwrap_or(&payload); + + Ok(ConversationForwardRecord { + id: json_u64_field(row, "id")?, + peer_user: json_string_field(row, "peer_user")?, + opened_by: json_string_field(row, "opened_by")?, + needs_forward: json_bool_field(row, "needs_forward")?, + state: json_string_field(row, "state")?, + created_at_ms: json_u64_field(row, "created_at_ms")?, + }) +} + +fn parse_message_forward_record(payload_bytes: &[u8]) -> Result { + let payload: JsonValue = serde_json::from_slice(payload_bytes) + .map_err(|error| format!("invalid topic payload json: {}", error))?; + let row = payload.get("row").unwrap_or(&payload); + + Ok(MessageForwardRecord { + id: json_u64_field(row, "id")?, + conversation_id: json_u64_field(row, "conversation_id")?, + sender_user: json_string_field(row, "sender_user")?, + recipient_user: json_string_field(row, "recipient_user")?, + needs_forward: json_bool_field(row, "needs_forward")?, + body: json_string_field(row, "body")?, + created_at_ms: json_u64_field(row, "created_at_ms")?, + }) +} + +fn parse_typing_forward_record(payload_bytes: &[u8]) -> Result { + let payload: JsonValue = serde_json::from_slice(payload_bytes) + .map_err(|error| format!("invalid topic payload json: {}", error))?; + let row = payload.get("row").unwrap_or(&payload); + + Ok(TypingForwardRecord { + id: json_u64_field(row, "id")?, + conversation_id: json_u64_field(row, "conversation_id")?, + sender_user: json_string_field(row, "sender_user")?, + recipient_user: json_string_field(row, "recipient_user")?, + phase: json_string_field(row, "phase")?, + needs_forward: json_bool_field(row, "needs_forward")?, + created_at_ms: json_u64_field(row, "created_at_ms")?, + }) +} + +struct UserClientPool { + urls: Arc>, + password: Arc, + clients: AsyncMutex>, + login_permits: Arc, + stats: Arc, +} + +impl UserClientPool { + fn new(urls: Vec, password: &str, stats: Arc) -> Self { + Self { + urls: Arc::new(urls), + password: Arc::new(password.to_string()), + clients: AsyncMutex::new(HashMap::new()), + login_permits: Arc::new(Semaphore::new(CHAT_LOGIN_MAX_IN_FLIGHT)), + stats, + } + } + + async fn client_for(&self, username: &str) -> Result { + { + let clients = self.clients.lock().await; + if let Some(client) = clients.get(username) { + return Ok(client.clone()); + } + } + + let _login_permit = self + .login_permits + .clone() + .acquire_owned() + .await + .map_err(|error| format!("failed to acquire login permit: {}", error))?; + + { + let clients = self.clients.lock().await; + if let Some(client) = clients.get(username) { + return Ok(client.clone()); + } + } + + let fresh_client = login_user_with_retry( + self.urls.as_ref(), + username, + self.password.as_ref(), + ) + .await + .map_err(|error| format!("login failed for {}: {}", username, error))?; + + let mut clients = self.clients.lock().await; + if let Some(client) = clients.get(username) { + return Ok(client.clone()); + } + + clients.insert(username.to_string(), fresh_client.clone()); + self.stats.logged_in_users.fetch_add(1, Ordering::Relaxed); + Ok(fresh_client) + } +} + +async fn login_user_with_retry( + urls: &[String], + username: &str, + password: &str, +) -> Result { + let mut delay = CHAT_LOGIN_RETRY_BASE_DELAY; + let mut attempts = 0_u32; + + loop { + match KalamClient::login_steady_state(urls, username, password).await { + Ok(client) => return Ok(client), + Err(error) + if attempts < CHAT_LOGIN_RETRY_ATTEMPTS + && is_transient_chat_error(&error) => + { + attempts += 1; + sleep(delay).await; + delay = (delay * 2).min(Duration::from_secs(5)); + } + Err(error) => return Err(error), + } + } +} + +struct SessionDeliveryTracker { + peer_sender: String, + typing_events: AtomicU64, + peer_messages: AtomicU64, +} + +impl SessionDeliveryTracker { + fn new(peer_sender: String) -> Self { + Self { + peer_sender, + typing_events: AtomicU64::new(0), + peer_messages: AtomicU64::new(0), + } + } + + fn record_rows(&self, rows: &[HashMap], kind: SubscriptionKind) { + match kind { + SubscriptionKind::Message => { + for row in rows { + if row + .get("sender_user") + .and_then(KalamCellValue::as_text) + .map(|value| value == self.peer_sender.as_str()) + .unwrap_or(false) + { + self.peer_messages.fetch_add(1, Ordering::Relaxed); + } + } + } + SubscriptionKind::Typing => { + self.typing_events + .fetch_add(rows.len() as u64, Ordering::Relaxed); + } + } + } +} + +async fn wait_for_delivery( + tracker: &SessionDeliveryTracker, + required_typing_events: u64, + required_peer_messages: u64, + timeout_duration: Duration, +) -> Result<(), String> { + let deadline = Instant::now() + timeout_duration; + + loop { + if tracker.typing_events.load(Ordering::Relaxed) >= required_typing_events + && tracker.peer_messages.load(Ordering::Relaxed) >= required_peer_messages + { + return Ok(()); + } + + if Instant::now() >= deadline { + return Err(format!( + "timed out waiting for live delivery (typing={}, peer_messages={})", + tracker.typing_events.load(Ordering::Relaxed), + tracker.peer_messages.load(Ordering::Relaxed), + )); + } + + sleep(Duration::from_millis(100)).await; + } +} + +fn build_chat_users(namespace: &str, user_count: u32) -> Vec { + let prefix = sanitize_identifier_fragment(namespace); + let mut users = Vec::with_capacity(user_count as usize); + for ordinal in 1..=user_count { + users.push(format!("benchchat_{}_u_{:06}", prefix, ordinal)); + } + users +} + +fn target_active_chat_user_count(total_user_count: usize, realtime_conversations: u32) -> usize { + total_user_count.min((realtime_conversations as usize).max(2)) +} + +fn minimum_active_chat_user_count(total_user_count: usize, realtime_conversations: u32) -> usize { + total_user_count.min(((realtime_conversations as usize) / 2).max(2)) +} + +struct PrewarmedActiveUsers { + usernames: Vec, + failed_attempts: u64, +} + +async fn prewarm_user_clients( + user_pool: Arc, + users: Arc>, + target_active_user_count: usize, + minimum_active_user_count: usize, +) -> Result { + let mut warmed_usernames = Vec::with_capacity(target_active_user_count); + let mut failure_samples = Vec::new(); + let mut failed_attempts = 0_u64; + let mut join_set = JoinSet::new(); + let mut user_candidates = users.iter().cloned(); + let mut in_flight = 0_usize; + + while in_flight < CHAT_LOGIN_MAX_IN_FLIGHT { + let Some(username) = user_candidates.next() else { + break; + }; + let pool = user_pool.clone(); + join_set.spawn(async move { + pool.client_for(&username).await.map(|_| username) + }); + in_flight += 1; + } + + while in_flight > 0 && warmed_usernames.len() < target_active_user_count { + let Some(result) = join_set.join_next().await else { + break; + }; + in_flight = in_flight.saturating_sub(1); + + match result { + Ok(Ok(username)) => warmed_usernames.push(username), + Ok(Err(error)) => { + failed_attempts += 1; + if failure_samples.len() < 5 { + failure_samples.push(error); + } + } + Err(error) => { + failed_attempts += 1; + if failure_samples.len() < 5 { + failure_samples.push(format!("active-user prewarm join error: {}", error)); + } + } + } + + while in_flight < CHAT_LOGIN_MAX_IN_FLIGHT + && warmed_usernames.len() + in_flight < target_active_user_count + { + let Some(username) = user_candidates.next() else { + break; + }; + let pool = user_pool.clone(); + join_set.spawn(async move { + pool.client_for(&username).await.map(|_| username) + }); + in_flight += 1; + } + } + + join_set.abort_all(); + + while join_set.join_next().await.is_some() {} + + if warmed_usernames.len() < minimum_active_user_count { + let mut error = format!( + "active-user prewarm only warmed {}/{} required users (target={}, failed_login_attempts={})", + warmed_usernames.len(), + minimum_active_user_count, + target_active_user_count, + failed_attempts, + ); + + for sample in failure_samples.iter().take(5) { + error.push_str("\n - "); + error.push_str(sample); + } + + return Err(error); + } + + Ok(PrewarmedActiveUsers { + usernames: warmed_usernames, + failed_attempts, + }) +} + +fn sanitize_identifier_fragment(value: &str) -> String { + let mut out = String::with_capacity(value.len()); + for ch in value.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else { + out.push('_'); + } + } + if out.is_empty() { + "bench".to_string() + } else { + out + } +} + +fn conversation_topic_name(namespace: &str) -> String { + format!("{}.{}", namespace, CHAT_CONVERSATION_TOPIC_SUFFIX) +} + +fn message_topic_name(namespace: &str) -> String { + format!("{}.{}", namespace, CHAT_MESSAGE_TOPIC_SUFFIX) +} + +fn typing_topic_name(namespace: &str) -> String { + format!("{}.{}", namespace, CHAT_TYPING_TOPIC_SUFFIX) +} + +async fn wait_for_topic_ready(client: &KalamClient, topic_name: &str) -> Result<(), String> { + let deadline = Instant::now() + Duration::from_secs(10); + let sql = format!( + "SELECT topic_id FROM system.topics WHERE topic_id = {} LIMIT 1", + sql_literal(topic_name) + ); + + loop { + let response = run_sql_with_retry(client, &sql).await?; + if sql_response_has_rows(&response) { + return Ok(()); + } + + if Instant::now() >= deadline { + return Err(format!("timed out waiting for topic {} to become ready", topic_name)); + } + + sleep(Duration::from_millis(100)).await; + } +} + +fn execute_as_user_sql(user: &str, inner_sql: &str) -> String { + format!("EXECUTE AS USER {} ({})", sql_literal(user), inner_sql) +} + +fn sql_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) +} + +fn json_string_field(row: &JsonValue, key: &str) -> Result { + match row.get(key) { + Some(JsonValue::String(value)) => Ok(value.clone()), + Some(other) => Err(format!("expected string field {} but found {}", key, other)), + None => Err(format!("missing field {}", key)), + } +} + +fn json_u64_field(row: &JsonValue, key: &str) -> Result { + match row.get(key) { + Some(JsonValue::Number(value)) => value + .as_u64() + .ok_or_else(|| format!("field {} is not a u64", key)), + Some(JsonValue::String(value)) => value + .parse::() + .map_err(|error| format!("field {} is not a valid u64: {}", key, error)), + Some(other) => Err(format!("expected numeric field {} but found {}", key, other)), + None => Err(format!("missing field {}", key)), + } +} + +fn json_bool_field(row: &JsonValue, key: &str) -> Result { + match row.get(key) { + Some(JsonValue::Bool(value)) => Ok(*value), + Some(JsonValue::String(value)) => value + .parse::() + .map_err(|error| format!("field {} is not a valid bool: {}", key, error)), + Some(other) => Err(format!("expected bool field {} but found {}", key, other)), + None => Err(format!("missing field {}", key)), + } +} + +fn sql_response_has_rows(response: &SqlResponse) -> bool { + response.results.iter().any(|result| { + result + .rows + .as_ref() + .map(|rows| !rows.is_empty()) + .unwrap_or(false) + || result.row_count.unwrap_or(0) > 0 + }) +} + +fn format_chat_errors(errors: &[String]) -> String { + let mut summary = format!("{} error(s)", errors.len()); + for error in errors.iter().take(5) { + summary.push_str("\n - "); + summary.push_str(error); + } + summary +} + +fn parse_u64_env(name: &str, default: u64) -> Result { + match std::env::var(name) { + Ok(value) => value + .parse::() + .map_err(|error| format!("{} must be an integer: {}", name, error)), + Err(_) => Ok(default), + } +} + +fn parse_u32_env(name: &str, default: u32) -> Result { + match std::env::var(name) { + Ok(value) => value + .parse::() + .map_err(|error| format!("{} must be an integer: {}", name, error)), + Err(_) => Ok(default), + } +} + +#[derive(Clone, Copy)] +enum ChatSqlKind { + Insert, +} + +async fn run_tracked_sql( + client: &KalamClient, + sql: &str, + kind: ChatSqlKind, + stats: &Arc, +) -> Result { + let started = Instant::now(); + let result = run_sql_with_retry(client, sql).await; + + if result.is_ok() { + record_sql_metric(stats, kind, started.elapsed()); + } + + result +} + +fn record_sql_metric(stats: &ChatWorkloadStats, kind: ChatSqlKind, elapsed: Duration) { + let elapsed_us = duration_to_us(elapsed); + match kind { + ChatSqlKind::Insert => { + stats.inserts.fetch_add(1, Ordering::Relaxed); + stats.insert_latency_us.fetch_add(elapsed_us, Ordering::Relaxed); + update_peak(&stats.insert_max_us, elapsed_us); + } + } +} + +fn record_subscription_open(stats: &ChatWorkloadStats, elapsed: Duration) { + let elapsed_us = duration_to_us(elapsed); + stats + .subscription_open_latency_us + .fetch_add(elapsed_us, Ordering::Relaxed); + update_peak(&stats.subscription_open_max_us, elapsed_us); + stats.subscription_connect_timings.record(elapsed); +} + +#[derive(Default)] +struct TimingSeries { + samples_us: Mutex>, +} + +impl TimingSeries { + fn record(&self, elapsed: Duration) { + let elapsed_us = duration_to_us(elapsed); + let mut samples = lock_unpoisoned(&self.samples_us); + samples.push(elapsed_us); + } + + fn snapshot(&self) -> TimingStatsSnapshot { + let mut samples = lock_unpoisoned(&self.samples_us).clone(); + if samples.is_empty() { + return TimingStatsSnapshot::default(); + } + + samples.sort_unstable(); + let count = samples.len() as u64; + let total_us = samples.iter().copied().sum::(); + + TimingStatsSnapshot { + count, + mean_us: total_us as f64 / count as f64, + p50_us: percentile_us(&samples, 50.0), + p90_us: percentile_us(&samples, 90.0), + p95_us: percentile_us(&samples, 95.0), + p99_us: percentile_us(&samples, 99.0), + } + } +} + +#[derive(Default)] +struct TimingStatsSnapshot { + count: u64, + mean_us: f64, + p50_us: f64, + p90_us: f64, + p95_us: f64, + p99_us: f64, +} + +impl TimingStatsSnapshot { + fn display_ms(&self, label: &str) -> String { + if self.count == 0 { + return format!(" {}: n/a", label); + } + + format!( + " {}: n={} mean={:.2}ms p50={:.2}ms p90={:.2}ms p95={:.2}ms p99={:.2}ms", + label, + self.count, + self.mean_us / 1000.0, + self.p50_us / 1000.0, + self.p90_us / 1000.0, + self.p95_us / 1000.0, + self.p99_us / 1000.0, + ) + } +} + +struct SessionActivityGuard { + stats: Arc, +} + +impl SessionActivityGuard { + fn new(stats: Arc) -> Self { + let active_sessions = stats.active_sessions.fetch_add(1, Ordering::Relaxed) + 1; + update_peak(&stats.peak_active_sessions, active_sessions); + Self { stats } + } +} + +impl Drop for SessionActivityGuard { + fn drop(&mut self) { + self.stats.active_sessions.fetch_sub(1, Ordering::Relaxed); + } +} + +#[derive(Default)] +struct ChatManagedServerMemorySummary { + start_rss: Option, + peak_rss: Option, + end_rss: Option, +} + +struct ChatManagedServerMemoryProbe { + stop_tx: Option>, + handle: Option>, + fallback: ChatManagedServerMemorySummary, +} + +impl ChatManagedServerMemoryProbe { + fn start() -> Self { + let Some(mut tracker) = ManagedServerMemoryTracker::from_env() else { + return Self { + stop_tx: None, + handle: None, + fallback: ChatManagedServerMemorySummary::default(), + }; + }; + + let start_rss = tracker.sample_rss_bytes(); + let fallback = ChatManagedServerMemorySummary { + start_rss, + peak_rss: start_rss, + end_rss: start_rss, + }; + let (stop_tx, mut stop_rx) = watch::channel(false); + + let handle = tokio::spawn(async move { + let mut peak_rss = start_rss; + let mut end_rss = start_rss; + + loop { + tokio::select! { + changed = stop_rx.changed() => { + if changed.is_ok() || changed.is_err() { + if let Some(current_rss) = tracker.sample_rss_bytes() { + end_rss = Some(current_rss); + peak_rss = max_option_u64(peak_rss, Some(current_rss)); + } + return ChatManagedServerMemorySummary { + start_rss, + peak_rss, + end_rss, + }; + } + } + _ = sleep(CHAT_MEMORY_SAMPLE_INTERVAL) => { + if let Some(current_rss) = tracker.sample_rss_bytes() { + end_rss = Some(current_rss); + peak_rss = max_option_u64(peak_rss, Some(current_rss)); + } + } + } + } + }); + + Self { + stop_tx: Some(stop_tx), + handle: Some(handle), + fallback, + } + } + + async fn finish(mut self) -> ChatManagedServerMemorySummary { + if let Some(stop_tx) = self.stop_tx.take() { + let _ = stop_tx.send(true); + } + + match self.handle.take() { + Some(handle) => match handle.await { + Ok(summary) => summary, + Err(_) => self.fallback, + }, + None => self.fallback, + } + } +} + +struct ManagedServerMemoryTracker { + pid: Pid, + system: System, +} + +impl ManagedServerMemoryTracker { + fn from_env() -> Option { + if std::env::var("KALAMDB_BENCH_MANAGED_SERVER").ok().as_deref() != Some("1") { + return None; + } + + let pid = std::env::var("KALAMDB_BENCH_SERVER_PID") + .ok() + .and_then(|value| value.parse::().ok())?; + + Some(Self { + pid: Pid::from_u32(pid), + system: System::new_with_specifics( + RefreshKind::nothing().with_memory(MemoryRefreshKind::everything()), + ), + }) + } + + fn sample_rss_bytes(&mut self) -> Option { + let process_refresh = ProcessRefreshKind::nothing().with_memory(); + self.system.refresh_processes_specifics( + ProcessesToUpdate::Some(&[self.pid]), + false, + process_refresh, + ); + self.system.process(self.pid).map(|process| process.memory()) + } +} + +fn print_chat_summary( + stats: &Arc, + settings: ChatWorkloadSettings, + scenario_elapsed: Duration, + memory: &ChatManagedServerMemorySummary, +) { + let subscription_connect_stats = stats.subscription_connect_timings.snapshot(); + let create_conversation_stats = stats.create_conversation_timings.snapshot(); + let insert_message_stats = stats.insert_message_timings.snapshot(); + let insert_typing_stats = stats.insert_typing_event_timings.snapshot(); + let sessions_started = stats.sessions_started.load(Ordering::Relaxed); + let sessions_completed = stats.sessions_completed.load(Ordering::Relaxed); + let delivery_wait_timeouts = stats.delivery_wait_timeouts.load(Ordering::Relaxed); + let peak_active_sessions = stats.peak_active_sessions.load(Ordering::Relaxed); + let logged_in_users = stats.logged_in_users.load(Ordering::Relaxed); + let selects = stats.selects.load(Ordering::Relaxed); + let inserts = stats.inserts.load(Ordering::Relaxed); + let updates = stats.updates.load(Ordering::Relaxed); + let select_latency_us = stats.select_latency_us.load(Ordering::Relaxed); + let insert_latency_us = stats.insert_latency_us.load(Ordering::Relaxed); + let update_latency_us = stats.update_latency_us.load(Ordering::Relaxed); + let select_max_us = stats.select_max_us.load(Ordering::Relaxed); + let insert_max_us = stats.insert_max_us.load(Ordering::Relaxed); + let update_max_us = stats.update_max_us.load(Ordering::Relaxed); + let messages_sent = stats.messages_sent.load(Ordering::Relaxed); + let typing_events_sent = stats.typing_events_sent.load(Ordering::Relaxed); + let conversations_forwarded = stats.conversations_forwarded.load(Ordering::Relaxed); + let messages_forwarded = stats.messages_forwarded.load(Ordering::Relaxed); + let typing_events_forwarded = stats.typing_events_forwarded.load(Ordering::Relaxed); + let conversation_topic_records = stats.conversation_topic_records.load(Ordering::Relaxed); + let message_topic_records = stats.message_topic_records.load(Ordering::Relaxed); + let typing_topic_records = stats.typing_topic_records.load(Ordering::Relaxed); + let skipped_topic_records = stats.skipped_topic_records.load(Ordering::Relaxed); + let subscriptions_opened = stats.subscriptions_opened.load(Ordering::Relaxed); + let subscriptions_closed = stats.subscriptions_closed.load(Ordering::Relaxed); + let peak_active_subscriptions = stats.peak_active_subscriptions.load(Ordering::Relaxed); + let subscription_open_latency_us = stats.subscription_open_latency_us.load(Ordering::Relaxed); + let subscription_open_max_us = stats.subscription_open_max_us.load(Ordering::Relaxed); + let subscription_events = stats.subscription_events.load(Ordering::Relaxed); + let total_ops = selects + inserts + updates; + let duration_secs = scenario_elapsed.as_secs_f64(); + + println!( + " Chat pressure: elapsed={:.1}s | peak_active_sessions={} | logged_in_users={} | peak_live_subscriptions={}", + duration_secs, + peak_active_sessions, + logged_in_users, + peak_active_subscriptions, + ); + println!( + " Chat rates: sessions={:.1}/s | user_messages={:.1}/s | typing={:.1}/s | total_sql={:.1}/s | delivered_live_events={:.1}/s", + per_sec(sessions_completed, duration_secs), + per_sec(messages_sent, duration_secs), + per_sec(typing_events_sent, duration_secs), + per_sec(total_ops, duration_secs), + per_sec(subscription_events, duration_secs), + ); + println!( + " SQL breakdown: select={:.1}/s avg={:.2}ms max={:.2}ms | insert={:.1}/s avg={:.2}ms max={:.2}ms | update={:.1}/s avg={:.2}ms max={:.2}ms", + per_sec(selects, duration_secs), + avg_latency_ms(select_latency_us, selects), + us_to_ms(select_max_us), + per_sec(inserts, duration_secs), + avg_latency_ms(insert_latency_us, inserts), + us_to_ms(insert_max_us), + per_sec(updates, duration_secs), + avg_latency_ms(update_latency_us, updates), + us_to_ms(update_max_us), + ); + println!( + " Forwarder: mirrored_conversations={:.1}/s ({}) | mirrored_messages={:.1}/s ({}) | mirrored_typing={:.1}/s ({}) | conversation_topic_records={} | message_topic_records={} | typing_topic_records={} | skipped_topic_records={}", + per_sec(conversations_forwarded, duration_secs), + conversations_forwarded, + per_sec(messages_forwarded, duration_secs), + messages_forwarded, + per_sec(typing_events_forwarded, duration_secs), + typing_events_forwarded, + conversation_topic_records, + message_topic_records, + typing_topic_records, + skipped_topic_records, + ); + println!( + " Delivery diagnostics: timed_out_waits={} | message_forward_gap={} | typing_forward_gap={}", + delivery_wait_timeouts, + messages_sent.saturating_sub(messages_forwarded), + typing_events_sent.saturating_sub(typing_events_forwarded), + ); + println!( + " Session shape: avg_messages/session={:.2} | avg_typing/session={:.2} | subscriptions_opened={} | subscriptions_closed={} | avg_subscribe_open={:.2}ms max={:.2}ms", + ratio(messages_sent, sessions_completed), + ratio(typing_events_sent, sessions_completed), + subscriptions_opened, + subscriptions_closed, + avg_latency_ms(subscription_open_latency_us, subscriptions_opened), + us_to_ms(subscription_open_max_us), + ); + println!(" Timing percentiles:"); + println!("{}", subscription_connect_stats.display_ms("subscription_connect")); + println!("{}", create_conversation_stats.display_ms("create_conversation")); + println!("{}", insert_message_stats.display_ms("insert_message")); + println!("{}", insert_typing_stats.display_ms("insert_typing_event")); + println!( + " Managed server RSS: {}", + format_memory_summary(memory, logged_in_users.max(u64::from(settings.realtime_conversations)), peak_active_subscriptions), + ); + println!( + " Backend note: conversations and messages are USER tables, typing_events is a STREAM table, and Rust topic consumers mirror conversation, message, and typing INSERTs back into recipient user scopes with EXECUTE AS USER.", + ); + println!( + " Scenario note: each session creates one conversation, sends one initiator message, emits {} typing events from the responder, and forwards one responder reply back to the subscribed initiator.", + CHAT_TYPING_BURSTS, + ); + println!( + " Coverage: configured_users={} | sessions_started={} | sessions_completed={}", + settings.user_count, + sessions_started, + sessions_completed, + ); +} + +async fn run_sql_with_retry(client: &KalamClient, sql: &str) -> Result { + let mut delay = CHAT_SQL_RETRY_BASE_DELAY; + + for attempt in 0..SQL_RETRY_ATTEMPTS { + match client.sql_ok(sql).await { + Ok(response) => return Ok(response), + Err(error) if attempt + 1 < SQL_RETRY_ATTEMPTS && is_transient_chat_error(&error) => { + sleep(delay).await; + delay = (delay * 2).min(CHAT_SQL_RETRY_MAX_DELAY); + } + Err(error) => return Err(error), + } + } + + Err("sql retry loop exhausted unexpectedly".to_string()) +} + +fn is_transient_chat_error(error: &str) -> bool { + let lower = error.to_ascii_lowercase(); + lower.contains("timeout") + || lower.contains("temporarily unavailable") + || lower.contains("connection failed") + || lower.contains("connection reset") + || lower.contains("connection refused") + || lower.contains("broken pipe") + || lower.contains("error sending request") + || lower.contains("transport") + || lower.contains("too many open files") + || lower.contains("503") +} + +fn is_duplicate_chat_error(error: &str) -> bool { + let lower = error.to_ascii_lowercase(); + lower.contains("duplicate") + || lower.contains("already exists") + || lower.contains("primary key") + || lower.contains("unique") +} + +fn per_sec(count: u64, duration_secs: f64) -> f64 { + if duration_secs > 0.0 { + count as f64 / duration_secs + } else { + 0.0 + } +} + +fn ratio(numerator: u64, denominator: u64) -> f64 { + if denominator == 0 { + 0.0 + } else { + numerator as f64 / denominator as f64 + } +} + +fn avg_latency_ms(total_us: u64, count: u64) -> f64 { + if count == 0 { + 0.0 + } else { + us_to_ms(total_us) / count as f64 + } +} + +fn us_to_ms(value_us: u64) -> f64 { + value_us as f64 / 1000.0 +} + +fn duration_to_us(elapsed: Duration) -> u64 { + elapsed.as_micros().min(u64::MAX as u128) as u64 +} + +fn percentile_us(sorted_samples: &[u64], percentile: f64) -> f64 { + if sorted_samples.is_empty() { + return 0.0; + } + + if sorted_samples.len() == 1 { + return sorted_samples[0] as f64; + } + + let idx = (percentile / 100.0) * (sorted_samples.len() - 1) as f64; + let lower = idx.floor() as usize; + let upper = idx.ceil() as usize; + let fraction = idx - lower as f64; + + sorted_samples[lower] as f64 * (1.0 - fraction) + + sorted_samples[upper] as f64 * fraction +} + +fn lock_unpoisoned(mutex: &Mutex) -> std::sync::MutexGuard<'_, T> { + match mutex.lock() { + Ok(guard) => guard, + Err(poisoned) => poisoned.into_inner(), + } +} + +fn update_peak(peak: &AtomicU64, candidate: u64) { + let mut current = peak.load(Ordering::Relaxed); + while candidate > current { + match peak.compare_exchange_weak( + current, + candidate, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => break, + Err(observed) => current = observed, + } + } +} + +fn max_option_u64(lhs: Option, rhs: Option) -> Option { + match (lhs, rhs) { + (Some(left), Some(right)) => Some(left.max(right)), + (Some(left), None) => Some(left), + (None, Some(right)) => Some(right), + (None, None) => None, + } +} + +fn format_memory_summary( + memory: &ChatManagedServerMemorySummary, + active_users: u64, + peak_active_subscriptions: u64, +) -> String { + if memory.start_rss.is_none() && memory.peak_rss.is_none() && memory.end_rss.is_none() { + return "n/a (external server or RSS tracking unavailable)".to_string(); + } + + let peak_delta = memory + .peak_rss + .zip(memory.start_rss) + .map(|(peak, start)| peak.saturating_sub(start)); + let per_user = peak_delta.and_then(|delta| { + if active_users > 0 { + Some(delta / active_users) + } else { + None + } + }); + let per_subscription = peak_delta.and_then(|delta| { + if peak_active_subscriptions > 0 { + Some(delta / peak_active_subscriptions) + } else { + None + } + }); + + format!( + "start={} | peak={} | end={} | peak_delta={} | approx/user={} | approx/peak_sub={}", + format_memory(memory.start_rss), + format_memory(memory.peak_rss), + format_memory(memory.end_rss), + format_memory(peak_delta), + format_memory(per_user), + format_memory(per_subscription), + ) +} + +fn format_memory(memory_bytes: Option) -> String { + match memory_bytes { + Some(bytes) if bytes >= 1024 * 1024 * 1024 => { + format!("{:.2} GiB", bytes as f64 / (1024.0 * 1024.0 * 1024.0)) + } + Some(bytes) if bytes >= 1024 * 1024 => { + format!("{:.1} MiB", bytes as f64 / (1024.0 * 1024.0)) + } + Some(bytes) if bytes >= 1024 => format!("{:.1} KiB", bytes as f64 / 1024.0), + Some(bytes) => format!("{} B", bytes), + None => "n/a".to_string(), + } +} + +fn epoch_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() + .min(u64::MAX as u128) as u64 +} + +struct SimpleRng { + state: u64, +} + +impl SimpleRng { + fn seeded(seed: u64) -> Self { + Self { state: seed.max(1) } + } + + fn next_u64(&mut self) -> u64 { + let mut x = self.state; + x ^= x >> 12; + x ^= x << 25; + x ^= x >> 27; + self.state = x; + x.wrapping_mul(0x2545F4914F6CDD1D) + } +} + +fn random_index(rng: &mut SimpleRng, len: usize) -> usize { + if len <= 1 { + 0 + } else { + (rng.next_u64() % len as u64) as usize + } +} + +fn chat_worker_start_delay(worker_id: u32) -> Duration { + Duration::from_millis(u64::from(worker_id) * CHAT_WORKER_START_STAGGER_MS) +} + +fn chat_delivery_wait_timeout(realtime_conversations: u32) -> Duration { + let timeout_secs = (u64::from(realtime_conversations) / 100) + .clamp(CHAT_MIRROR_WAIT_MIN_TIMEOUT_SECS, CHAT_MIRROR_WAIT_MAX_TIMEOUT_SECS); + Duration::from_secs(timeout_secs) +} diff --git a/benchv2/src/benchmarks/mod.rs b/benchv2/src/benchmarks/mod.rs index 1fd9f0c05..c059649a1 100644 --- a/benchv2/src/benchmarks/mod.rs +++ b/benchv2/src/benchmarks/mod.rs @@ -1,4 +1,5 @@ pub mod bulk_insert_bench; +pub mod chat_realtime_bench; pub mod concurrent_bench; pub mod connection_scale_bench; pub mod ddl_bench; @@ -102,7 +103,7 @@ pub trait Benchmark: Send + Sync { /// `connection_scale` remains opt-in because single-host runs often need extra /// loopback aliases or explicit override flags to avoid macOS ephemeral-port limits. pub fn enabled_in_default_suite(name: &str) -> bool { - !matches!(name, "connection_scale") + !matches!(name, "connection_scale" | "chat_realtime") } /// Returns all registered benchmarks. Add new benchmarks here. @@ -160,6 +161,7 @@ pub fn all_benchmarks() -> Vec> { Box::new(load_connection_storm_bench::ConnectionStormBench), Box::new(load_mixed_rw_bench::MixedReadWriteBench), Box::new(load_wide_fanout_bench::WideFanoutQueryBench), + Box::new(chat_realtime_bench::ChatRealtimeBench), // --- Scale tests (run with --iterations 1 --warmup 0 --filter subscriber_scale) --- Box::new(connection_scale_bench::ConnectionScaleBench), Box::new(subscriber_scale_bench::SubscriberScaleBench::default()), diff --git a/benchv2/src/client.rs b/benchv2/src/client.rs index 85ee673ae..42dc8a84e 100644 --- a/benchv2/src/client.rs +++ b/benchv2/src/client.rs @@ -59,6 +59,15 @@ impl KalamClient { Self::login_with_options(urls, user, password, true).await } + /// Create a multi-endpoint client when server setup is already complete. + pub async fn login_steady_state( + urls: &[String], + user: &str, + password: &str, + ) -> Result { + Self::login_with_options(urls, user, password, false).await + } + async fn login_with_options( urls: &[String], user: &str, @@ -144,7 +153,10 @@ impl KalamClient { // Build an unauthenticated client first — needed for setup + login let unauthed = KalamLinkClient::builder() .base_url(base_url) + .http_pool_max_idle_per_host(BENCH_HTTP_POOL_MAX_IDLE_PER_HOST) + .max_retries(BENCH_HTTP_MAX_RETRIES) .timeout(Duration::from_secs(60)) + .timeouts(timeouts.clone()) .build() .map_err(|e| format!("failed to build kalam-link client: {}", e))?; diff --git a/benchv2/src/preflight.rs b/benchv2/src/preflight.rs index 2542a6e50..f0d3f03a0 100644 --- a/benchv2/src/preflight.rs +++ b/benchv2/src/preflight.rs @@ -77,6 +77,7 @@ pub async fn run_preflight_checks(client: &KalamClient, config: &Config) -> bool // 2b. scale benchmark capacity feasibility checks.push(check_subscriber_scale_target_capacity(config)); checks.push(check_connection_scale_target_capacity(config)); + checks.push(check_chat_realtime_target_capacity(config)); // 3. SQL connectivity checks.push(check_sql_connectivity(client).await); @@ -257,6 +258,72 @@ fn will_run_connection_scale(config: &Config) -> bool { will_run_named_benchmark(config, "connection_scale", "scale") } +fn check_chat_realtime_target_capacity(config: &Config) -> CheckResult { + if !will_run_chat_realtime(config) { + return CheckResult::pass("chat_realtime target", "Skipped (chat_realtime not selected)"); + } + + let ws_targets = resolve_ws_targets(&config.urls); + if ws_targets.len() != 1 { + return CheckResult::pass( + "chat_realtime target", + format!( + "{} endpoint(s); single-host client socket ceiling warning not applicable", + ws_targets.len() + ), + ); + } + + let Some(single_target_socket_limit) = detected_single_target_ws_limit() else { + return CheckResult::pass( + "chat_realtime target", + "Host-specific single-target socket ceiling unavailable", + ); + }; + + let user_count = benchmark_chat_env_usize("KALAMDB_BENCH_CHAT_USERS", 1_000); + let realtime_conversations = benchmark_chat_env_usize("KALAMDB_BENCH_CHAT_REALTIME_CONVS", 100); + let target_active_users = user_count.min(realtime_conversations.max(2)); + let estimated_client_sockets = target_active_users.saturating_mul(2); + let socket_limit_label = human_count(single_target_socket_limit); + + if estimated_client_sockets > single_target_socket_limit { + return CheckResult::warn( + "chat_realtime target", + format!( + "Single target ({}) with target_active_chat_users={} implies about {} long-lived client sockets from this host (HTTP + WebSocket), which can hit the local ephemeral-port ceiling near {}. Resulting connect failures may be host-side rather than a backend bottleneck. Use multiple endpoints or loopback aliases for realistic same-host runs.", + ws_targets[0], + target_active_users, + estimated_client_sockets, + socket_limit_label, + ), + ); + } + + CheckResult::pass( + "chat_realtime target", + format!( + "Single target {}, estimated active users={} -> about {} long-lived client sockets (host ceiling near {})", + ws_targets[0], + target_active_users, + estimated_client_sockets, + socket_limit_label, + ), + ) +} + +fn will_run_chat_realtime(config: &Config) -> bool { + will_run_named_benchmark(config, "chat_realtime", "load") +} + +fn benchmark_chat_env_usize(name: &str, default: usize) -> usize { + std::env::var(name) + .ok() + .and_then(|value| value.parse::().ok()) + .filter(|value| *value > 0) + .unwrap_or(default) +} + fn will_run_named_benchmark(config: &Config, name: &str, category: &str) -> bool { if !config.bench.is_empty() { return config.bench.iter().any(|benchmark| benchmark == name); diff --git a/benchv2/src/verdict.rs b/benchv2/src/verdict.rs index 34d7afcec..380f769ca 100644 --- a/benchv2/src/verdict.rs +++ b/benchv2/src/verdict.rs @@ -137,6 +137,7 @@ fn threshold_for(name: &str, category: &str) -> Threshold { "sql_1k_concurrent" | "sql_1k_users" => Threshold::new(5000.0, 20000.0), "subscriber_scale" => Threshold::new(60000.0, 300000.0), + "chat_realtime" => Threshold::new(3_600_000.0, 7_200_000.0), // Fallback: use category _ => threshold_for_category(category), diff --git a/cli/README.md b/cli/README.md index 51cc09e40..4caacc002 100644 --- a/cli/README.md +++ b/cli/README.md @@ -219,17 +219,7 @@ Special commands starting with backslash (`\`): | `\show-credentials` / `\credentials` | Show stored credentials | | `\update-credentials

` | Update stored credentials | | `\delete-credentials` | Delete stored credentials | -| `\cluster snapshot` | Trigger cluster snapshot | -| `\cluster purge --upto ` | Purge cluster logs up to index | -| `\cluster trigger-election` | Trigger cluster election | -| `\cluster transfer-leader ` | Transfer cluster leadership | -| `\cluster stepdown` | Request leader stepdown | -| `\cluster clear` | Clear old snapshots | -| `\cluster list` / `\cluster ls` | List cluster nodes | -| `\cluster list groups` | List cluster groups | -| `\cluster status` | Cluster status | -| `\cluster join ` | Join a node (not yet implemented) | -| `\cluster leave` | Leave cluster (not yet implemented) | +| `\cluster ...` | Cluster inspection and admin commands. See [docs/getting-started/cli.md](../docs/getting-started/cli.md#cluster-meta-commands). | ### Using the CLI (Interactive Mode) diff --git a/cli/build.rs b/cli/build.rs index c693771eb..5b4d6d828 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -2,8 +2,7 @@ // Sets environment variables for use in the binary at compile time // Falls back to version.toml if git is not available (e.g., Docker builds) -use std::fs; -use std::process::Command; +use std::{fs, process::Command}; fn main() { // Try to read from version.toml first (for Docker/CI builds) diff --git a/cli/run-tests.sh b/cli/run-tests.sh index b09701cb2..2653ee623 100755 --- a/cli/run-tests.sh +++ b/cli/run-tests.sh @@ -280,46 +280,115 @@ validate_cluster_health() { local health_url="$1" local summary - if ! command -v curl >/dev/null 2>&1 || ! command -v python3 >/dev/null 2>&1; then + if ! command -v python3 >/dev/null 2>&1; then return 0 fi - summary="$(curl -fsS --max-time 3 "${health_url%/}/v1/api/cluster/health" 2>/dev/null | python3 -c ' + summary="$(python3 - "$health_url" "${CLUSTER_URLS:-}" <<'PY' import json import sys +from urllib.error import URLError +from urllib.request import urlopen + +target_url = sys.argv[1].rstrip("/") +cluster_urls_arg = sys.argv[2].strip() if len(sys.argv) > 2 else "" + + +def fetch_health(base_url): + with urlopen(f"{base_url.rstrip('/')}/v1/api/cluster/health", timeout=3) as response: + return json.load(response) + try: - payload = json.load(sys.stdin) + target_payload = fetch_health(target_url) except Exception: - raise SystemExit(1) + print("ok") + raise SystemExit(0) -is_cluster = bool(payload.get("is_cluster_mode")) -groups_leading = payload.get("groups_leading") -total_groups = payload.get("total_groups") +if not target_payload.get("is_cluster_mode"): + print("ok") + raise SystemExit(0) -if groups_leading is None or total_groups is None: - nodes = payload.get("nodes") or [] - if nodes: - groups_leading = max((node.get("groups_leading") or 0) for node in nodes) - total_groups = max((node.get("total_groups") or 0) for node in nodes) +cluster_urls = [url.strip() for url in cluster_urls_arg.split(",") if url.strip()] +if not cluster_urls: + seen = set() + for node in target_payload.get("nodes") or []: + api_addr = str(node.get("api_addr") or "").strip() + if api_addr and api_addr not in seen: + seen.add(api_addr) + cluster_urls.append(api_addr) + +if not cluster_urls: + cluster_urls = [target_url] + +payloads = [] +failed_urls = [] +for url in cluster_urls: + try: + payload = fetch_health(url) + except (OSError, URLError, TimeoutError, json.JSONDecodeError): + failed_urls.append(url) + continue + except Exception: + failed_urls.append(url) + continue + if payload.get("is_cluster_mode"): + payloads.append((url, payload)) + +if failed_urls: + print(f"unreachable {','.join(failed_urls)}") + raise SystemExit(0) -if not is_cluster or groups_leading is None or total_groups is None: +if not payloads: print("ok") raise SystemExit(0) -if int(groups_leading) < int(total_groups): +total_groups = max(int(payload.get("total_groups") or 0) for _, payload in payloads) +if total_groups <= 0: + print("ok") + raise SystemExit(0) + +groups_leading = 0 +seen_nodes = set() +for url, payload in payloads: + node_id = payload.get("node_id") or url + if node_id in seen_nodes: + continue + seen_nodes.add(node_id) + groups_leading += int(payload.get("groups_leading") or 0) + +if len(payloads) == 1: + node_counts = [ + int(node.get("groups_leading") or 0) + for node in (payloads[0][1].get("nodes") or []) + if node.get("groups_leading") is not None + ] + if node_counts: + groups_leading = sum(node_counts) + +if groups_leading != total_groups: print(f"degraded {groups_leading} {total_groups}") else: print("ok") -')" || true +PY +)" || true case "$summary" in ok|"") return 0 ;; + unreachable\ *) + set -- $summary + echo "Error: configured cluster node(s) are unreachable: ${2}" + echo "CLI e2e tests require every configured cluster node to be reachable." + echo "" + echo "Check: ${health_url%/}/v1/api/cluster/health" + echo "Fix the running cluster state, then rerun ./run-tests.sh." + exit 1 + ;; degraded\ *) set -- $summary - echo "Error: target server reports incomplete cluster leadership (${2}/${3} groups leading)." + echo "Error: cluster reports incomplete Raft group leadership (${2}/${3} groups leading across configured nodes)." echo "This usually means stale or mismatched local Raft state, and CLI e2e tests will fail nondeterministically." echo "" echo "Check: ${health_url%/}/v1/api/cluster/health" diff --git a/cli/src/args.rs b/cli/src/args.rs index 1969626c8..88231562b 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -1,6 +1,7 @@ +use std::path::PathBuf; + use clap::Parser; use kalam_cli::OutputFormat; -use std::path::PathBuf; // Build information - Create a static version string at compile time diff --git a/cli/src/commands/credentials.rs b/cli/src/commands/credentials.rs index 8653a7521..0556ac78d 100644 --- a/cli/src/commands/credentials.rs +++ b/cli/src/commands/credentials.rs @@ -1,9 +1,15 @@ -use crate::args::Cli; +use std::{ + io::{self, Write}, + time::Duration, +}; + use kalam_cli::{CLIError, FileCredentialStore, Result}; -use kalam_client::credentials::{CredentialStore, Credentials}; -use kalam_client::KalamLinkClient; -use std::io::{self, Write}; -use std::time::Duration; +use kalam_client::{ + credentials::{CredentialStore, Credentials}, + KalamLinkClient, +}; + +use crate::args::Cli; pub fn handle_credentials(cli: &Cli, credential_store: &mut FileCredentialStore) -> Result { if cli.list_instances { diff --git a/cli/src/commands/init.rs b/cli/src/commands/init.rs index bab010c1d..d3217a6f7 100644 --- a/cli/src/commands/init.rs +++ b/cli/src/commands/init.rs @@ -1,16 +1,19 @@ -use crate::args::Cli; +use std::{ + env, fs, + io::{self, IsTerminal, Write}, + path::{Component, Path, PathBuf}, +}; + use kalam_cli::{CLIError, Result}; -use std::env; -use std::fs; -use std::io::{self, IsTerminal, Write}; -use std::path::{Component, Path, PathBuf}; + +use crate::args::Cli; const DEFAULT_TABLE_ID: &str = "blog.blogs"; const DEFAULT_ID_COLUMN: &str = "blog_id"; const DEFAULT_INPUT_COLUMN: &str = "content"; const DEFAULT_OUTPUT_COLUMN: &str = "summary"; -const DEFAULT_SYSTEM_PROMPT: &str = - "Write one concise sentence summarizing the content. Preserve key facts and avoid hallucinations."; +const DEFAULT_SYSTEM_PROMPT: &str = "Write one concise sentence summarizing the content. Preserve \ + key facts and avoid hallucinations."; const FALLBACK_LOCAL_SDK_PATH: &str = "../../link/sdks/typescript/client"; #[derive(Debug, Clone)] @@ -67,7 +70,9 @@ fn build_non_interactive_config(cli: &Cli) -> Result { fn build_interactive_config(cli: &Cli) -> Result { if !io::stdin().is_terminal() { return Err(CLIError::ConfigurationError( - "Interactive init requires a terminal. Use --init-agent-non-interactive to run in scripts.".into(), + "Interactive init requires a terminal. Use --init-agent-non-interactive to run in \ + scripts." + .into(), )); } @@ -555,7 +560,16 @@ fn render_setup_sql(config: &AgentScaffoldConfig, failure_table: &str) -> String let failure = parse_table_id(failure_table).expect("validated failure table id"); format!( - "CREATE NAMESPACE IF NOT EXISTS {};\n\nCREATE SHARED TABLE IF NOT EXISTS {} (\n {} BIGINT PRIMARY KEY DEFAULT SNOWFLAKE_ID(),\n {} TEXT NOT NULL,\n {} TEXT,\n created TIMESTAMP NOT NULL DEFAULT NOW(),\n updated TIMESTAMP NOT NULL DEFAULT NOW()\n);\n\nCREATE SHARED TABLE IF NOT EXISTS {} (\n run_key TEXT PRIMARY KEY,\n row_id TEXT NOT NULL,\n error TEXT NOT NULL,\n created TIMESTAMP NOT NULL DEFAULT NOW(),\n updated TIMESTAMP NOT NULL DEFAULT NOW()\n);\n\nCREATE TOPIC {};\nALTER TOPIC {} ADD SOURCE {} ON INSERT WITH (payload = 'full');\nALTER TOPIC {} ADD SOURCE {} ON UPDATE WITH (payload = 'full');\n\nINSERT INTO {} ({}, {})\nVALUES (\n 'KalamDB topics let tiny agents consume table changes and write enriched data back with minimal boilerplate.',\n NULL\n);\n", + "CREATE NAMESPACE IF NOT EXISTS {};\n\nCREATE SHARED TABLE IF NOT EXISTS {} (\n {} \ + BIGINT PRIMARY KEY DEFAULT SNOWFLAKE_ID(),\n {} TEXT NOT NULL,\n {} TEXT,\n \ + created TIMESTAMP NOT NULL DEFAULT NOW(),\n updated TIMESTAMP NOT NULL DEFAULT \ + NOW()\n);\n\nCREATE SHARED TABLE IF NOT EXISTS {} (\n run_key TEXT PRIMARY KEY,\n \ + row_id TEXT NOT NULL,\n error TEXT NOT NULL,\n created TIMESTAMP NOT NULL DEFAULT \ + NOW(),\n updated TIMESTAMP NOT NULL DEFAULT NOW()\n);\n\nCREATE TOPIC {};\nALTER \ + TOPIC {} ADD SOURCE {} ON INSERT WITH (payload = 'full');\nALTER TOPIC {} ADD SOURCE {} \ + ON UPDATE WITH (payload = 'full');\n\nINSERT INTO {} ({}, {})\nVALUES (\n 'KalamDB \ + topics let tiny agents consume table changes and write enriched data back with minimal \ + boilerplate.',\n NULL\n);\n", table.namespace, config.table_id, config.id_column, @@ -1061,7 +1075,14 @@ main().catch((error) => {{ fn render_readme(config: &AgentScaffoldConfig) -> String { format!( - "# {}\n\nGenerated by `kalam --init-agent`.\n\n## What it does\n\n1. Consumes topic events from `{}`.\n2. Reads rows from `{}`.\n3. Writes summarized output back to `{}`.\n4. Retries failures and records exhausted runs in a failure table.\n\n## Run\n\n1. Start KalamDB server:\n - `cd backend && cargo run`\n2. Bootstrap schema/topic/sample row:\n - `./setup.sh`\n3. Install dependencies:\n - `npm install`\n4. Start the agent:\n - `npm run start`\n\n## Environment\n\nCopy `.env.example` to `.env.local` and adjust values as needed.\nSet `OPENAI_API_KEY` to enable LangChain-backed summarization.\n", + "# {}\n\nGenerated by `kalam --init-agent`.\n\n## What it does\n\n1. Consumes topic \ + events from `{}`.\n2. Reads rows from `{}`.\n3. Writes summarized output back to \ + `{}`.\n4. Retries failures and records exhausted runs in a failure table.\n\n## \ + Run\n\n1. Start KalamDB server:\n - `cd backend && cargo run`\n2. Bootstrap \ + schema/topic/sample row:\n - `./setup.sh`\n3. Install dependencies:\n - `npm \ + install`\n4. Start the agent:\n - `npm run start`\n\n## Environment\n\nCopy \ + `.env.example` to `.env.local` and adjust values as needed.\nSet `OPENAI_API_KEY` to \ + enable LangChain-backed summarization.\n", config.project_name, config.topic_id, config.table_id, config.output_column, ) } diff --git a/cli/src/commands/subscriptions.rs b/cli/src/commands/subscriptions.rs index c2fc8dad4..b19ed9068 100644 --- a/cli/src/commands/subscriptions.rs +++ b/cli/src/commands/subscriptions.rs @@ -1,8 +1,9 @@ -use crate::args::Cli; -use crate::connect::create_session; -use kalam_cli::{CLIConfiguration, FileCredentialStore, Result}; use std::time::Duration; +use kalam_cli::{CLIConfiguration, FileCredentialStore, Result}; + +use crate::{args::Cli, connect::create_session}; + fn print_list_subscriptions() { println!("Subscription management:"); println!(" • Subscriptions run in blocking mode per CLI session"); diff --git a/cli/src/completer.rs b/cli/src/completer.rs index e83f8ddc0..fc42b7668 100644 --- a/cli/src/completer.rs +++ b/cli/src/completer.rs @@ -7,9 +7,10 @@ //! Provides intelligent autocompletion for SQL keywords, table names, column names, //! and backslash commands with context-aware suggestions and beautiful styling. +use std::collections::HashMap; + use colored::*; use rustyline::completion::{Completer, Pair}; -use std::collections::HashMap; pub(crate) const SQL_KEYWORDS: &[&str] = &[ // DML @@ -277,7 +278,8 @@ impl AutoCompleter { before_dot.rfind(|c: char| c.is_whitespace() || c == '(' || c == ',') { let table_name = before_dot[word_start + 1..].trim().to_string(); - // If the token before dot matches a known namespace, assume namespace.table completion + // If the token before dot matches a known namespace, assume namespace.table + // completion let ns_upper = table_name.to_ascii_uppercase(); if self.namespaces.iter().any(|ns| ns.to_ascii_uppercase() == ns_upper) { return CompletionContext::NamespaceTable(table_name); diff --git a/cli/src/config.rs b/cli/src/config.rs index e043fa44e..3e5bf8d8a 100644 --- a/cli/src/config.rs +++ b/cli/src/config.rs @@ -25,12 +25,15 @@ //! timestamp_format = "iso8601" # iso8601, iso8601-date, iso8601-datetime, unix-ms, unix-sec, relative, rfc2822, rfc3339 //! ``` +use std::path::{Path, PathBuf}; + use kalam_client::{ConnectionOptions, HttpVersion, TimestampFormat}; use serde::{Deserialize, Serialize}; -use std::path::{Path, PathBuf}; -use crate::error::{CLIError, Result}; -use crate::history::get_cli_home_dir; +use crate::{ + error::{CLIError, Result}, + history::get_cli_home_dir, +}; /// CLI configuration loaded from TOML file #[derive(Debug, Clone, Serialize, Deserialize)] @@ -429,6 +432,7 @@ mod tests { #[test] fn test_config_file_auto_creation() { use std::fs; + use tempfile::TempDir; // Create a temporary directory @@ -459,6 +463,7 @@ mod tests { #[test] fn test_config_file_auto_creation_with_nested_dirs() { use std::fs; + use tempfile::TempDir; // Create a temporary directory diff --git a/cli/src/connect.rs b/cli/src/connect.rs index 1fca0f2af..762474adb 100644 --- a/cli/src/connect.rs +++ b/cli/src/connect.rs @@ -1,18 +1,22 @@ -use crate::args::Cli; +use std::{ + io::{self, IsTerminal, Write}, + net::IpAddr, + time::Duration, +}; + use colored::Colorize; use kalam_cli::{ CLIConfiguration, CLIError, CLISession, FileCredentialStore, OutputFormat, Result, }; -use kalam_client::credentials::{CredentialStore, Credentials}; use kalam_client::{ + credentials::{CredentialStore, Credentials}, AuthProvider, KalamLinkClient, KalamLinkError, KalamLinkTimeouts, LoginResponse, ServerSetupRequest, }; -use std::io::{self, IsTerminal, Write}; -use std::net::IpAddr; -use std::time::Duration; use url::Url; +use crate::args::Cli; + /// Build timeouts configuration from CLI arguments fn build_timeouts(cli: &Cli) -> KalamLinkTimeouts { // Check for preset flags first @@ -136,10 +140,7 @@ pub async fn create_session( let server_url = normalize_and_validate_server_url(&server_url)?; if cli.verbose { - eprintln!( - "Resolved server URL for instance '{}': {}", - cli.instance, server_url - ); + eprintln!("Resolved server URL for instance '{}': {}", cli.instance, server_url); } fn simplify_login_error(err: &KalamLinkError) -> String { @@ -198,12 +199,8 @@ pub async fn create_session( if verbose { eprintln!("Warning: Login failed: {}", e); } - if matches!(&e, KalamLinkError::NetworkError(_) | KalamLinkError::TimeoutError(_)) - { - LoginResult::ConnectivityFailed(build_connectivity_diagnostics( - server_url, - &e, - )) + if matches!(&e, KalamLinkError::NetworkError(_) | KalamLinkError::TimeoutError(_)) { + LoginResult::ConnectivityFailed(build_connectivity_diagnostics(server_url, &e)) } else { LoginResult::Failed(simplify_login_error(&e)) } @@ -411,7 +408,8 @@ pub async fn create_session( if is_localhost_url(server_url) { println!("This server is already configured, so setup is not available here."); println!( - "If you started it with scripts/cluster.sh, sign in as 'root' with the configured root password" + "If you started it with scripts/cluster.sh, sign in as 'root' with the configured \ + root password" ); println!("(default cluster password: kalamdb123)."); } @@ -477,10 +475,9 @@ pub async fn create_session( LoginResult::SetupRequired => { setup_and_login(server_url, verbose, instance, credential_store, true).await }, - LoginResult::Failed(error) => Err(CLIError::ConfigurationError(format!( - "Login failed: {}", - error - ))), + LoginResult::Failed(error) => { + Err(CLIError::ConfigurationError(format!("Login failed: {}", error))) + }, LoginResult::ConnectivityFailed(error) => { Err(CLIError::LinkError(KalamLinkError::NetworkError(error))) }, @@ -598,10 +595,7 @@ pub async fn create_session( .await? }, LoginResult::Failed(error) => { - return Err(CLIError::ConfigurationError(format!( - "Login failed: {}", - error - ))); + return Err(CLIError::ConfigurationError(format!("Login failed: {}", error))); }, LoginResult::ConnectivityFailed(error) => { return Err(CLIError::LinkError(KalamLinkError::NetworkError(error))); @@ -852,8 +846,10 @@ pub async fn create_session( }, _ => { return Err(CLIError::SetupRequired( - "Setup completed but login failed. Please try logging in manually.".to_string() - )); + "Setup completed but login failed. Please try logging in \ + manually." + .to_string(), + )); }, } }, @@ -872,7 +868,9 @@ pub async fn create_session( } else { // Non-interactive mode and not localhost - no auth available return Err(CLIError::ConfigurationError( - "No authentication credentials available. Use --user and --password, or run interactively.".to_string() + "No authentication credentials available. Use --user and --password, or run \ + interactively." + .to_string(), )); } }; diff --git a/cli/src/credentials.rs b/cli/src/credentials.rs index e0fcd9074..6ea88ebec 100644 --- a/cli/src/credentials.rs +++ b/cli/src/credentials.rs @@ -30,15 +30,19 @@ //! server_url = "https://db.example.com" //! ``` -use crate::history::get_kalam_config_dir; -use kalam_client::credentials::{CredentialStore, Credentials}; -use kalam_client::Result; -use kalam_client::UserId; +use std::{ + collections::HashMap, + env, fs, + path::{Path, PathBuf}, +}; + +use kalam_client::{ + credentials::{CredentialStore, Credentials}, + Result, UserId, +}; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::env; -use std::fs; -use std::path::{Path, PathBuf}; + +use crate::history::get_kalam_config_dir; /// File-based credential storage /// @@ -122,18 +126,10 @@ impl FileCredentialStore { let contents = fs::read_to_string(&self.file_path).map_err(|e| { let msg = format!( - "\n╭─ Cannot Read Credentials File\n\ - │\n\ - │ 📁 Location: {}\n\ - │ ⚠️ Problem: {}\n\ - │\n\ - ╰─ How to Fix:\n\ - \n\ - Option 1: Check file permissions\n\ - Option 2: Delete and re-authenticate\n\ - ───────────────────────────────────\n\ - del \"{}\"\n\ - kalamcli connect\n", + "\n╭─ Cannot Read Credentials File\n│\n│ 📁 Location: {}\n│ ⚠️ Problem: \ + {}\n│\n╰─ How to Fix:\n\nOption 1: Check file permissions\nOption 2: Delete and \ + re-authenticate\n───────────────────────────────────\ndel \"{}\"\nkalamcli \ + connect\n", self.file_path.display(), e, self.file_path.display() @@ -155,20 +151,10 @@ impl FileCredentialStore { }; let msg = format!( - "\n╭─ Corrupted Credentials File\n\ - │\n\ - │ 📁 Location: {}\n\ - │ ⚠️ Problem: {}\n\ - │\n\ - ╰─ How to Fix:\n\ - \n\ - Step 1: Delete the corrupted file\n\ - ───────────────────────────────\n\ - del \"{}\"\n\ - \n\ - Step 2: Re-authenticate\n\ - ───────────────────────\n\ - kalamcli connect\n", + "\n╭─ Corrupted Credentials File\n│\n│ 📁 Location: {}\n│ ⚠️ Problem: \ + {}\n│\n╰─ How to Fix:\n\nStep 1: Delete the corrupted \ + file\n───────────────────────────────\ndel \"{}\"\n\nStep 2: \ + Re-authenticate\n───────────────────────\nkalamcli connect\n", self.file_path.display(), simple_error, self.file_path.display() @@ -287,9 +273,10 @@ impl CredentialStore for FileCredentialStore { #[cfg(test)] mod tests { - use super::*; use tempfile::TempDir; + use super::*; + fn create_temp_store() -> (FileCredentialStore, TempDir) { let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("credentials.toml"); diff --git a/cli/src/error.rs b/cli/src/error.rs index aa5faa6ec..b5ed21363 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -4,9 +4,10 @@ //! //! Provides user-friendly error messages and context for common CLI failures. -use kalam_client::KalamLinkError; use std::fmt; +use kalam_client::KalamLinkError; + /// Result type for CLI operations pub type Result = std::result::Result; diff --git a/cli/src/formatter.rs b/cli/src/formatter.rs index 52ffa703c..197662959 100644 --- a/cli/src/formatter.rs +++ b/cli/src/formatter.rs @@ -519,9 +519,10 @@ impl OutputFormatter { #[cfg(test)] mod tests { - use super::*; use kalam_client::TimestampFormat; + use super::*; + #[test] fn test_format_json_value() { let formatter = OutputFormatter::new( diff --git a/cli/src/history.rs b/cli/src/history.rs index f839fc1e7..8514501eb 100644 --- a/cli/src/history.rs +++ b/cli/src/history.rs @@ -4,8 +4,10 @@ //! //! Maintains command history across sessions for better user experience. -use std::env; -use std::path::{Path, PathBuf}; +use std::{ + env, + path::{Path, PathBuf}, +}; use crate::error::{CLIError, Result}; @@ -304,10 +306,10 @@ fn parse_history_entries(contents: &str) -> Vec { #[cfg(test)] mod tests { - use super::*; - use tempfile::tempdir; + use super::*; + #[test] fn test_history_persistence() { let dir = tempdir().unwrap(); @@ -399,7 +401,8 @@ mod tests { let history = CommandHistory::with_path(&path, 100); // Test command with special characters and newlines - let special_cmd = "INSERT INTO messages (content)\nVALUES ('Hello\nWorld!'),\n ('Test\tmessage\nwith\\special chars');"; + let special_cmd = "INSERT INTO messages (content)\nVALUES ('Hello\nWorld!'),\n \ + ('Test\tmessage\nwith\\special chars');"; history.append(special_cmd).unwrap(); diff --git a/cli/src/main.rs b/cli/src/main.rs index d455215dc..b3115ad72 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -15,9 +15,9 @@ //! kalam-cli -u http://localhost:3000 --json -c "SELECT * FROM users" //! ``` -use clap::Parser; use std::io::IsTerminal; +use clap::Parser; use kalam_cli::{CLIConfiguration, CLIError, FileCredentialStore, Result}; mod args; @@ -25,9 +25,11 @@ mod commands; mod connect; use args::Cli; -use commands::credentials::{handle_credentials, login_and_store_credentials}; -use commands::init::handle_init_agent; -use commands::subscriptions::handle_subscriptions; +use commands::{ + credentials::{handle_credentials, login_and_store_credentials}, + init::handle_init_agent, + subscriptions::handle_subscriptions, +}; use connect::create_session; #[tokio::main] @@ -49,7 +51,8 @@ async fn run() -> Result<()> { } // If the password is explicitly set to an empty string, only prompt in interactive mode. - // In non-interactive modes (--command/--file), an empty password may be valid (e.g. default root). + // In non-interactive modes (--command/--file), an empty password may be valid (e.g. default + // root). let is_interactive_mode = cli.command.is_none() && cli.file.is_none(); if cli.password.as_deref() == Some("") && is_interactive_mode && std::io::stdin().is_terminal() { @@ -119,7 +122,7 @@ async fn run() -> Result<()> { // Execute single command (None, Some(command), false) => { - session.execute(&command).await?; + session.execute_input(&command).await?; }, // Interactive mode diff --git a/cli/src/parser.rs b/cli/src/parser.rs index 37a561b9f..93af863c3 100644 --- a/cli/src/parser.rs +++ b/cli/src/parser.rs @@ -28,9 +28,12 @@ pub enum Command { ClusterClear, ClusterList, ClusterListGroups, - ClusterStatus, - ClusterJoin(String), // node address to join - ClusterLeave, + ClusterJoin { + node_id: u64, + rpc_addr: String, + api_addr: String, + }, + ClusterRebalance, Health, Pause, Continue, @@ -108,7 +111,9 @@ impl CommandParser { "\\cluster" => { if args.is_empty() { Err(CLIError::ParseError( - "\\cluster requires: snapshot, purge, trigger-election, transfer-leader, stepdown, clear, list, status, join, or leave".into(), + "\\cluster requires: snapshot, purge, trigger-election, transfer-leader, \ + rebalance, stepdown, clear, list, or join" + .into(), )) } else { let sub = args[0].to_ascii_lowercase(); @@ -172,6 +177,7 @@ impl CommandParser { )) } }, + "rebalance" => Ok(Command::ClusterRebalance), "stepdown" | "step-down" => Ok(Command::ClusterStepdown), "clear" => Ok(Command::ClusterClear), "list" | "ls" => { @@ -181,17 +187,25 @@ impl CommandParser { Ok(Command::ClusterList) } }, - "status" => Ok(Command::ClusterStatus), "join" => { - if args.len() < 2 { + if args.len() < 4 { Err(CLIError::ParseError( - "\\cluster join requires a node address".into(), + "\\cluster join requires " + .into(), )) } else { - Ok(Command::ClusterJoin(args[1].to_string())) + let node_id = args[1].parse::().map_err(|_| { + CLIError::ParseError( + "\\cluster join requires a numeric node id".into(), + ) + })?; + Ok(Command::ClusterJoin { + node_id, + rpc_addr: args[2].to_string(), + api_addr: args[3].to_string(), + }) } }, - "leave" => Ok(Command::ClusterLeave), _ => Err(CLIError::ParseError(format!( "Unknown cluster subcommand: {}", args[0] @@ -245,7 +259,9 @@ impl CommandParser { "\\consume" => { if args.is_empty() { return Err(CLIError::ParseError( - "\\consume requires a topic name. Usage: \\consume [--group NAME] [--from earliest|latest|OFFSET] [--limit N] [--timeout SECONDS]".into(), + "\\consume requires a topic name. Usage: \\consume [--group NAME] \ + [--from earliest|latest|OFFSET] [--limit N] [--timeout SECONDS]" + .into(), )); } let topic = args[0].to_string(); @@ -398,4 +414,30 @@ mod tests { assert!(parser.parse("").is_err()); assert!(parser.parse(" ").is_err()); } + + #[test] + fn test_parse_cluster_join() { + let parser = CommandParser::new(); + let cmd = parser.parse("\\cluster join 2 10.0.0.2:9188 http://10.0.0.2:8080").unwrap(); + assert_eq!( + cmd, + Command::ClusterJoin { + node_id: 2, + rpc_addr: "10.0.0.2:9188".to_string(), + api_addr: "http://10.0.0.2:8080".to_string(), + } + ); + } + + #[test] + fn test_parse_cluster_rebalance() { + let parser = CommandParser::new(); + assert_eq!(parser.parse("\\cluster rebalance").unwrap(), Command::ClusterRebalance); + } + + #[test] + fn test_parse_cluster_leave_is_rejected() { + let parser = CommandParser::new(); + assert!(parser.parse("\\cluster leave").is_err()); + } } diff --git a/cli/src/session.rs b/cli/src/session.rs index ccbb4dc62..3982e76cc 100644 --- a/cli/src/session.rs +++ b/cli/src/session.rs @@ -7,8 +7,17 @@ //! Manages the connection to KalamDB server and execution state throughout //! the CLI session lifetime. -use crate::history_menu::{HistoryMenu, HistoryMenuResult}; -use crate::CLI_VERSION; +#[cfg(unix)] +use std::io::IsTerminal; +use std::{ + borrow::Cow, + collections::{HashMap, HashSet}, + fs, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + time::{Duration, Instant}, +}; + use clap::ValueEnum; use colored::*; use indicatif::{ProgressBar, ProgressStyle}; @@ -18,26 +27,19 @@ use kalam_client::{ KalamLinkClient, KalamLinkError, KalamLinkTimeouts, SubscriptionConfig, SubscriptionOptions, TimestampFormatter, UploadProgress, UploadProgressCallback, }; -use rustyline::completion::Completer; -use rustyline::error::ReadlineError; -use rustyline::highlight::Highlighter; -use rustyline::hint::Hinter; -use rustyline::history::DefaultHistory; -use rustyline::validate::Validator; -use rustyline::{Cmd, CompletionType, Config, EditMode, Editor, Helper, KeyEvent}; -use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; -use std::fs; -use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; -use std::time::{Duration, Instant}; - -#[cfg(unix)] -use std::io::IsTerminal; - +use rustyline::{ + completion::Completer, error::ReadlineError, highlight::Highlighter, hint::Hinter, + history::DefaultHistory, validate::Validator, Cmd, CompletionType, Config, EditMode, Editor, + Helper, KeyEvent, +}; #[cfg(unix)] use tokio::io::AsyncReadExt; +use crate::{ + history_menu::{HistoryMenu, HistoryMenuResult}, + CLI_VERSION, +}; + // Fallback system tables for autocomplete when the server does not return them const SYSTEM_TABLES: &[&str] = &[ "users", @@ -110,6 +112,7 @@ use crate::{ parser::{Command, CommandParser}, }; +mod cluster; mod commands; mod info; @@ -454,9 +457,16 @@ impl CLISession { /// **Implements T092**: Execute SQL via kalam-client /// **Implements T114a**: Show loading indicator for queries > threshold /// **Enhanced**: Colored output and styled timing - pub async fn execute(&mut self, sql: &str) -> Result<()> { - let start = Instant::now(); + pub async fn execute_input(&mut self, input: &str) -> Result<()> { + let command = self.parser.parse(input)?; + self.execute_command(command).await + } + /// Execute a SQL query and return the raw response. + pub(super) async fn execute_query_response( + &mut self, + sql: &str, + ) -> Result { let (sql_to_send, mut upload_parts) = Self::extract_file_uploads(sql)?; // Increment query counter @@ -537,8 +547,15 @@ impl CLISession { pb.finish_and_clear(); } + result.map_err(Into::into) + } + + pub async fn execute(&mut self, sql: &str) -> Result<()> { + let start = Instant::now(); let elapsed = start.elapsed(); + let result = self.execute_query_response(sql).await; + match result { Ok(response) => { if let Some((config, server_message)) = @@ -1410,11 +1427,14 @@ impl CLISession { } } - // Also fetch system/information_schema tables from information_schema.tables for autocomplete + // Also fetch system/information_schema tables from information_schema.tables for + // autocomplete let sys_tables_res = self .client .execute_query( - "SELECT table_schema, table_name FROM information_schema.tables WHERE table_schema IN ('system', 'information_schema') ORDER BY table_schema, table_name", + "SELECT table_schema, table_name FROM information_schema.tables WHERE \ + table_schema IN ('system', 'information_schema') ORDER BY table_schema, \ + table_name", None, None, None, @@ -1491,7 +1511,8 @@ impl CLISession { let resp = self .client .execute_query( - "SELECT table_name, column_name FROM information_schema.columns ORDER BY table_name, ordinal_position", + "SELECT table_name, column_name FROM information_schema.columns ORDER BY \ + table_name, ordinal_position", None, None, None, @@ -1502,7 +1523,8 @@ impl CLISession { } else { self.client .execute_query( - "SELECT table_name, column_name FROM information_schema.columns ORDER BY table_name, ordinal_position", + "SELECT table_name, column_name FROM information_schema.columns ORDER BY \ + table_name, ordinal_position", None, None, None, @@ -1838,7 +1860,10 @@ impl CLISession { ) .await; if let Err(_) = close_res { - eprintln!("Warning: Timed out while closing subscription; exiting anyway"); + eprintln!( + "Warning: Timed out while closing subscription; exiting \ + anyway" + ); } else if let Ok(Err(e)) = close_res { eprintln!( "Warning: Failed to close subscription cleanly: {}", @@ -2059,7 +2084,8 @@ impl CLISession { } => { if self.color { println!( - "\x1b[36m[{}] ✓ SUBSCRIBED\x1b[0m [{}] {} total rows, batch {} {}, {} columns", + "\x1b[36m[{}] ✓ SUBSCRIBED\x1b[0m [{}] {} total rows, batch {} {}, {} \ + columns", timestamp, subscription_id, total_rows, @@ -2407,7 +2433,9 @@ impl CLISession { let result = self .client .execute_query( - "SELECT cluster_id, node_id, role, status, api_addr, is_self, is_leader, hostname, memory_usage_mb, cpu_usage_percent, uptime_human FROM system.cluster ORDER BY is_leader DESC, node_id ASC", + "SELECT cluster_id, node_id, role, status, api_addr, is_self, is_leader, \ + hostname, memory_usage_mb, cpu_usage_percent, uptime_human FROM system.cluster \ + ORDER BY is_leader DESC, node_id ASC", None, None, None, @@ -3079,17 +3107,20 @@ impl Helper for CLIHelper {} #[cfg(test)] mod tests { - use super::*; - use crate::credentials::FileCredentialStore; + use std::{collections::HashMap, sync::Arc}; + use kalam_client::credentials::{CredentialStore, Credentials}; use ntest::timeout; use serde_json::json; - use std::collections::HashMap; - use std::sync::Arc; use tempfile::TempDir; - use tokio::io::{AsyncReadExt, AsyncWriteExt}; - use tokio::net::{TcpListener, TcpStream}; - use tokio::sync::Mutex as AsyncMutex; + use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + net::{TcpListener, TcpStream}, + sync::Mutex as AsyncMutex, + }; + + use super::*; + use crate::credentials::FileCredentialStore; #[derive(Debug, Default)] struct TestServerState { @@ -3226,7 +3257,8 @@ mod tests { }; let response = format!( - "{status_line}\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: close\r\n\r\n{}", + "{status_line}\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: \ + close\r\n\r\n{}", body.len(), body ); diff --git a/cli/src/session/cluster/actions.rs b/cli/src/session/cluster/actions.rs new file mode 100644 index 000000000..988ce5f60 --- /dev/null +++ b/cli/src/session/cluster/actions.rs @@ -0,0 +1,332 @@ +use std::collections::HashMap; + +use kalam_client::KalamCellValue; + +use super::{cell_bool, cell_text, cell_u64, CLISession}; +use crate::{CLIError, Result}; + +#[derive(Debug, Clone, PartialEq)] +struct ClusterGroupActionRow { + action: String, + group_id: Option, + success: Option, + error: Option, + snapshot_index: Option, + target_node_id: Option, + upto: Option, + snapshots_dir: Option, +} + +#[derive(Debug, Clone, PartialEq)] +struct ClusterJoinRow { + node_id: u64, + rpc_addr: String, + api_addr: String, + rebalance_requested: bool, +} + +#[derive(Debug, Clone, PartialEq)] +struct ClusterClearRow { + snapshots_dir: String, + snapshots_dir_exists: bool, + total_snapshots_found: u64, + total_size_bytes: u64, + snapshots_cleared: u64, + cleared_size_bytes: u64, + error_count: u64, + errors: Vec, +} + +impl CLISession { + pub(in crate::session) async fn show_cluster_group_action(&mut self, sql: &str) -> Result<()> { + let response = self.execute_query_response(sql).await?; + let rows = parse_group_action_rows(&response)?; + println!("{}", render_cluster_group_action_text(&rows)); + Ok(()) + } + + pub(in crate::session) async fn show_cluster_join( + &mut self, + node_id: u64, + rpc_addr: &str, + api_addr: &str, + ) -> Result<()> { + let sql = format!("CLUSTER JOIN {} {} {}", node_id, rpc_addr, api_addr); + let response = self.execute_query_response(&sql).await?; + let row = parse_cluster_join_row(&response)?; + println!("{}", render_cluster_join_text(&row)); + Ok(()) + } + + pub(in crate::session) async fn show_cluster_clear(&mut self) -> Result<()> { + let response = self.execute_query_response("CLUSTER CLEAR").await?; + let row = parse_cluster_clear_row(&response)?; + println!("{}", render_cluster_clear_text(&row)); + Ok(()) + } +} + +fn parse_group_action_rows( + response: &kalam_client::QueryResponse, +) -> Result> { + let rows = response.rows_as_maps(); + if rows.is_empty() { + return Err(CLIError::FormatError("cluster command returned no rows".to_string())); + } + + Ok(rows + .into_iter() + .map(|row| ClusterGroupActionRow { + action: cell_text(&row, "action").unwrap_or_else(|| "cluster".to_string()), + group_id: cell_text(&row, "group_id"), + success: cell_bool(&row, "success"), + error: cell_text(&row, "error"), + snapshot_index: cell_u64(&row, "snapshot_index"), + target_node_id: cell_u64(&row, "target_node_id"), + upto: cell_u64(&row, "upto"), + snapshots_dir: cell_text(&row, "snapshots_dir"), + }) + .collect()) +} + +fn parse_cluster_join_row(response: &kalam_client::QueryResponse) -> Result { + let rows = response.rows_as_maps(); + let Some(row) = rows.first() else { + return Err(CLIError::FormatError("cluster join returned no rows".to_string())); + }; + + Ok(ClusterJoinRow { + node_id: cell_u64(row, "node_id").unwrap_or(0), + rpc_addr: cell_text(row, "rpc_addr").unwrap_or_else(|| "-".to_string()), + api_addr: cell_text(row, "api_addr").unwrap_or_else(|| "-".to_string()), + rebalance_requested: cell_bool(row, "rebalance_requested").unwrap_or(false), + }) +} + +fn parse_cluster_clear_row(response: &kalam_client::QueryResponse) -> Result { + let rows = response.rows_as_maps(); + let Some(first_row) = rows.first() else { + return Err(CLIError::FormatError("cluster clear returned no rows".to_string())); + }; + + let errors = rows.iter().filter_map(|row| cell_text(row, "error")).collect::>(); + + Ok(ClusterClearRow { + snapshots_dir: cell_text(first_row, "snapshots_dir").unwrap_or_else(|| "-".to_string()), + snapshots_dir_exists: cell_bool(first_row, "snapshots_dir_exists").unwrap_or(false), + total_snapshots_found: cell_u64(first_row, "total_snapshots_found").unwrap_or(0), + total_size_bytes: cell_u64(first_row, "total_size_bytes").unwrap_or(0), + snapshots_cleared: cell_u64(first_row, "snapshots_cleared").unwrap_or(0), + cleared_size_bytes: cell_u64(first_row, "cleared_size_bytes").unwrap_or(0), + error_count: cell_u64(first_row, "error_count").unwrap_or(0), + errors, + }) +} + +fn render_cluster_group_action_text(rows: &[ClusterGroupActionRow]) -> String { + let action = rows.first().map(|row| row.action.as_str()).unwrap_or("cluster"); + let target_node_id = rows.iter().find_map(|row| row.target_node_id); + let upto = rows.iter().find_map(|row| row.upto); + let snapshots_dir = rows.iter().find_map(|row| row.snapshots_dir.clone()); + let group_rows = rows.iter().filter(|row| row.group_id.is_some()).collect::>(); + let success_count = group_rows.iter().filter(|row| row.success == Some(true)).count(); + let failed_rows = group_rows + .iter() + .filter(|row| row.success == Some(false)) + .copied() + .collect::>(); + + let mut lines = Vec::new(); + if group_rows.is_empty() { + lines.push(format!("{} returned no raft-group rows", action_heading(action))); + return lines.join("\n"); + } + + lines.push(format!( + "{}: {}/{} groups succeeded", + action_heading(action), + success_count, + group_rows.len() + )); + + if let Some(upto) = upto { + lines.push(format!("Upto index: {}", upto)); + } + if let Some(target_node_id) = target_node_id { + lines.push(format!("Target node: {}", target_node_id)); + } + if let Some(snapshots_dir) = snapshots_dir { + lines.push(format!("Snapshots directory: {}", snapshots_dir)); + } + + let snapshot_rows = group_rows + .iter() + .filter_map(|row| { + row.snapshot_index + .map(|snapshot_index| (row.group_id.as_deref().unwrap_or("-"), snapshot_index)) + }) + .take(5) + .collect::>(); + if !snapshot_rows.is_empty() { + lines.push(String::new()); + lines.push("Snapshot indices:".to_string()); + for (group_id, snapshot_index) in snapshot_rows { + lines.push(format!(" - {}: index {}", group_id, snapshot_index)); + } + } + + if !failed_rows.is_empty() { + lines.push(String::new()); + lines.push("Failed groups:".to_string()); + for row in failed_rows.iter().take(10) { + lines.push(format!( + " - {}: {}", + row.group_id.as_deref().unwrap_or("-"), + row.error.as_deref().unwrap_or("unknown error") + )); + } + if failed_rows.len() > 10 { + lines.push(format!(" ... and {} more groups", failed_rows.len() - 10)); + } + } + + lines.join("\n") +} + +fn render_cluster_join_text(row: &ClusterJoinRow) -> String { + format!( + "Cluster join completed for node {}\nRPC address: {}\nAPI address: {}\nData leader rebalance requested: {}", + row.node_id, + row.rpc_addr, + row.api_addr, + if row.rebalance_requested { "yes" } else { "no" } + ) +} + +fn render_cluster_clear_text(row: &ClusterClearRow) -> String { + if !row.snapshots_dir_exists { + return format!( + "No snapshots directory found at: {}\nNothing to clear.", + row.snapshots_dir + ); + } + + let mut lines = vec![ + "Cluster clear completed".to_string(), + format!("Snapshots directory: {}", row.snapshots_dir), + format!( + "Total snapshots found: {} ({:.2} MB)", + row.total_snapshots_found, + bytes_to_mb(row.total_size_bytes) + ), + format!( + "Snapshots cleared: {} ({:.2} MB freed)", + row.snapshots_cleared, + bytes_to_mb(row.cleared_size_bytes) + ), + ]; + + if row.error_count > 0 { + lines.push(String::new()); + lines.push(format!("Errors ({}):", row.error_count)); + for error in row.errors.iter().take(5) { + lines.push(format!(" - {}", error)); + } + if row.errors.len() > 5 { + lines.push(format!(" ... and {} more errors", row.errors.len() - 5)); + } + } + + lines.join("\n") +} + +fn action_heading(action: &str) -> &'static str { + match action { + "snapshot" => "Cluster snapshot completed", + "purge" => "Cluster purge completed", + "trigger-election" => "Cluster trigger election completed", + "transfer-leader" => "Cluster transfer-leader completed", + "rebalance" => "Cluster rebalance completed", + "stepdown" => "Cluster stepdown completed", + _ => "Cluster command completed", + } +} + +fn bytes_to_mb(bytes: u64) -> f64 { + bytes as f64 / 1024.0 / 1024.0 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn renders_group_action_summary_from_rows() { + let text = render_cluster_group_action_text(&[ + ClusterGroupActionRow { + action: "snapshot".to_string(), + group_id: Some("meta".to_string()), + success: Some(true), + error: None, + snapshot_index: Some(42), + target_node_id: None, + upto: None, + snapshots_dir: Some("/tmp/snaps".to_string()), + }, + ClusterGroupActionRow { + action: "snapshot".to_string(), + group_id: Some("data:user:0".to_string()), + success: Some(false), + error: Some("not leader".to_string()), + snapshot_index: None, + target_node_id: None, + upto: None, + snapshots_dir: Some("/tmp/snaps".to_string()), + }, + ]); + + assert!(text.contains("Cluster snapshot completed: 1/2 groups succeeded")); + assert!(text.contains("Snapshots directory: /tmp/snaps")); + assert!(text.contains("meta: index 42")); + assert!(text.contains("data:user:0: not leader")); + } + + #[test] + fn renders_join_summary() { + let text = render_cluster_join_text(&ClusterJoinRow { + node_id: 2, + rpc_addr: "10.0.0.2:9188".to_string(), + api_addr: "http://10.0.0.2:8080".to_string(), + rebalance_requested: true, + }); + + assert!(text.contains("Cluster join completed for node 2")); + assert!(text.contains("Data leader rebalance requested: yes")); + } + + #[test] + fn renders_clear_missing_directory_message() { + let text = render_cluster_clear_text(&ClusterClearRow { + snapshots_dir: "/tmp/missing".to_string(), + snapshots_dir_exists: false, + total_snapshots_found: 0, + total_size_bytes: 0, + snapshots_cleared: 0, + cleared_size_bytes: 0, + error_count: 0, + errors: Vec::new(), + }); + + assert_eq!(text, "No snapshots directory found at: /tmp/missing\nNothing to clear."); + } + + #[test] + fn cell_helpers_read_cluster_action_columns() { + let mut row = HashMap::::new(); + row.insert("action".to_string(), KalamCellValue::text("rebalance")); + row.insert("success".to_string(), KalamCellValue::boolean(true)); + + assert_eq!(cell_text(&row, "action").as_deref(), Some("rebalance")); + assert_eq!(cell_bool(&row, "success"), Some(true)); + } +} diff --git a/cli/src/session/cluster/groups.rs b/cli/src/session/cluster/groups.rs new file mode 100644 index 000000000..5d60b1570 --- /dev/null +++ b/cli/src/session/cluster/groups.rs @@ -0,0 +1,124 @@ +use std::fmt::Write as _; + +use super::{display_number, group_display_name, ClusterRenderData}; +use crate::Result; + +use super::super::CLISession; + +impl CLISession { + pub(in crate::session) async fn show_cluster_list_groups(&mut self) -> Result<()> { + let data = self.fetch_cluster_render_data().await?; + println!("{}", render_cluster_groups_text(&data)); + Ok(()) + } +} + +fn render_cluster_groups_text(data: &ClusterRenderData) -> String { + let mut output = String::new(); + + writeln!(&mut output, "CLUSTER GROUPS").ok(); + writeln!(&mut output, "--------------").ok(); + writeln!(&mut output, "Cluster ID: {}", data.cluster_id).ok(); + if let Some(node) = data.nodes.iter().find(|node| node.is_self) { + writeln!(&mut output, "Connected Node: {}", node.node_id).ok(); + } + if data.groups.is_empty() { + writeln!(&mut output, "No cluster groups available.").ok(); + return output.trim_end().to_string(); + } + + writeln!(&mut output).ok(); + writeln!( + &mut output, + "{:<6} {:<11} {:<10} {:<6} {:<8} {:<10} {:<6}", + "Group", "Type", "State", "Leader", "Snapshot", "Applied", "Term" + ) + .ok(); + for group in &data.groups { + writeln!( + &mut output, + "{:<6} {:<11} {:<10} {:<6} {:<8} {:<10} {:<6}", + group_display_name(group), + group.group_type, + group.state.as_deref().unwrap_or("-"), + display_number(group.current_leader), + display_number(group.snapshot), + display_number(group.last_applied), + display_number(group.current_term) + ) + .ok(); + } + + output.trim_end().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::session::cluster::{ClusterGroupDisplay, ClusterListNode, ClusterRenderData}; + + #[test] + fn renders_group_labels_for_all_group_types() { + let data = ClusterRenderData { + cluster_id: "local-cluster".to_string(), + is_cluster_mode: true, + nodes: vec![ClusterListNode { + cluster_id: "local-cluster".to_string(), + node_id: 1, + role: "leader".to_string(), + status: "active".to_string(), + rpc_addr: "127.0.0.1:9188".to_string(), + api_addr: "http://127.0.0.1:8080".to_string(), + is_self: true, + is_leader: true, + groups_leading: 2, + total_groups: 3, + current_term: None, + last_applied_log: None, + leader_last_log_index: None, + snapshot_index: None, + catchup_progress_pct: None, + replication_lag: None, + hostname: None, + memory_usage_mb: None, + cpu_usage_percent: None, + uptime_human: None, + }], + groups: vec![ + ClusterGroupDisplay { + group_id: 10, + group_type: "meta".to_string(), + current_term: Some(3), + last_applied: Some(40), + snapshot: Some(20), + state: Some("Leader".to_string()), + current_leader: Some(1), + }, + ClusterGroupDisplay { + group_id: 100, + group_type: "user_data".to_string(), + current_term: Some(3), + last_applied: Some(39), + snapshot: Some(20), + state: Some("Follower".to_string()), + current_leader: Some(1), + }, + ClusterGroupDisplay { + group_id: 200, + group_type: "shared_data".to_string(), + current_term: Some(3), + last_applied: Some(38), + snapshot: Some(20), + state: Some("Follower".to_string()), + current_leader: Some(1), + }, + ], + }; + + let rendered = render_cluster_groups_text(&data); + + assert!(rendered.contains("Meta")); + assert!(rendered.contains("U0")); + assert!(rendered.contains("S0")); + } +} diff --git a/cli/src/session/cluster/list.rs b/cli/src/session/cluster/list.rs new file mode 100644 index 000000000..733bed4c5 --- /dev/null +++ b/cli/src/session/cluster/list.rs @@ -0,0 +1,211 @@ +use std::fmt::Write as _; + +use super::{ + display_number, group_display_name, summarize_group_states, ClusterGroupDisplay, + ClusterRenderData, +}; +use crate::Result; + +use super::super::CLISession; + +impl CLISession { + pub(in crate::session) async fn show_cluster_list(&mut self) -> Result<()> { + let data = self.fetch_cluster_render_data().await?; + println!("{}", render_cluster_list_text(&data)); + Ok(()) + } +} + +fn render_cluster_list_text(data: &ClusterRenderData) -> String { + let mut output = String::new(); + let current_node_id = data + .nodes + .iter() + .find(|node| node.is_self) + .map(|node| node.node_id) + .unwrap_or(0); + let user_shards = data.groups.iter().filter(|group| group.group_type == "user_data").count(); + let shared_shards = + data.groups.iter().filter(|group| group.group_type == "shared_data").count(); + let total_groups = data.groups.len(); + + writeln!(&mut output, "CLUSTER OVERVIEW").ok(); + writeln!(&mut output, "----------------").ok(); + writeln!(&mut output, "Cluster ID: {}", data.cluster_id).ok(); + writeln!( + &mut output, + "Mode: {}", + if data.is_cluster_mode { + "Cluster" + } else { + "Standalone" + } + ) + .ok(); + if current_node_id > 0 { + writeln!(&mut output, "Current Node: {}", current_node_id).ok(); + } + if data.is_cluster_mode { + writeln!(&mut output, "Total Groups: {}", total_groups).ok(); + writeln!(&mut output, " Meta: 1").ok(); + writeln!(&mut output, " User Shards: {}", user_shards).ok(); + writeln!(&mut output, " Shared Shards: {}", shared_shards).ok(); + } + writeln!(&mut output).ok(); + + writeln!(&mut output, "NODES").ok(); + writeln!(&mut output, "-----").ok(); + for node in &data.nodes { + let self_marker = if node.is_self { " (connected)" } else { "" }; + let leader_marker = if node.is_leader { " [LEADER]" } else { "" }; + writeln!(&mut output, "Node {}{}{}", node.node_id, self_marker, leader_marker).ok(); + writeln!(&mut output, " Role: {}", node.role).ok(); + writeln!(&mut output, " Status: {}", node.status).ok(); + writeln!(&mut output, " API: {}", node.api_addr).ok(); + writeln!(&mut output, " RPC: {}", node.rpc_addr).ok(); + writeln!(&mut output, " Groups Leading: {}/{}", node.groups_leading, node.total_groups) + .ok(); + + let hostname = node.hostname.as_deref().unwrap_or("-"); + writeln!( + &mut output, + " Host: {} | {} | {} | {}", + hostname, + CLISession::format_cluster_memory(node.memory_usage_mb), + CLISession::format_cluster_cpu(node.cpu_usage_percent), + CLISession::format_cluster_uptime(node.uptime_human.as_deref()) + ) + .ok(); + + if let Some(term) = node.current_term { + writeln!(&mut output, " Term: {}", term).ok(); + } + if let Some(applied) = node.last_applied_log { + writeln!(&mut output, " Last Applied: {}", applied).ok(); + } + if let Some(leader_log) = node.leader_last_log_index { + writeln!(&mut output, " Leader Log: {}", leader_log).ok(); + } + if let Some(snapshot) = node.snapshot_index { + writeln!(&mut output, " Snapshot: {}", snapshot).ok(); + } + if let Some(lag) = node.replication_lag { + writeln!(&mut output, " Replication Lag: {} entries", lag).ok(); + } + if let Some(progress) = node.catchup_progress_pct { + writeln!(&mut output, " Catchup Progress: {}%", progress).ok(); + } + writeln!(&mut output).ok(); + } + + if data.is_cluster_mode { + let (leading, following, unknown) = summarize_group_states(&data.groups); + writeln!(&mut output, "GROUP STATUS SUMMARY").ok(); + writeln!(&mut output, "--------------------").ok(); + writeln!(&mut output, "Leading: {}", leading).ok(); + writeln!(&mut output, "Following: {}", following).ok(); + if unknown > 0 { + writeln!(&mut output, "Unknown/Pending: {}", unknown).ok(); + } + writeln!(&mut output).ok(); + + writeln!(&mut output, "GROUP SAMPLE").ok(); + writeln!(&mut output, "------------").ok(); + writeln!( + &mut output, + "{:<6} {:<11} {:<10} {:<6} {:<8} {:<10}", + "Group", "Type", "State", "Leader", "Snapshot", "Applied" + ) + .ok(); + + for group in sample_groups(&data.groups) { + writeln!( + &mut output, + "{:<6} {:<11} {:<10} {:<6} {:<8} {:<10}", + group_display_name(group), + group.group_type, + group.state.as_deref().unwrap_or("-"), + display_number(group.current_leader), + display_number(group.snapshot), + display_number(group.last_applied) + ) + .ok(); + } + } + + output.trim_end().to_string() +} + +fn sample_groups(groups: &[ClusterGroupDisplay]) -> Vec<&ClusterGroupDisplay> { + let mut sample = Vec::new(); + if let Some(meta) = groups.iter().find(|group| group.group_type == "meta") { + sample.push(meta); + } + sample.extend(groups.iter().filter(|group| group.group_type == "user_data").take(3)); + sample.extend(groups.iter().filter(|group| group.group_type == "shared_data").take(2)); + sample +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::session::cluster::{ClusterGroupDisplay, ClusterListNode, ClusterRenderData}; + + #[test] + fn renders_cluster_list_sections_from_system_views() { + let data = ClusterRenderData { + cluster_id: "local-cluster".to_string(), + is_cluster_mode: true, + nodes: vec![ClusterListNode { + cluster_id: "local-cluster".to_string(), + node_id: 1, + role: "leader".to_string(), + status: "active".to_string(), + rpc_addr: "127.0.0.1:9188".to_string(), + api_addr: "http://127.0.0.1:8080".to_string(), + is_self: true, + is_leader: true, + groups_leading: 4, + total_groups: 6, + current_term: Some(7), + last_applied_log: Some(120), + leader_last_log_index: Some(125), + snapshot_index: Some(90), + catchup_progress_pct: None, + replication_lag: None, + hostname: Some("node-1".to_string()), + memory_usage_mb: Some(32), + cpu_usage_percent: Some(1.5), + uptime_human: Some("2m".to_string()), + }], + groups: vec![ + ClusterGroupDisplay { + group_id: 10, + group_type: "meta".to_string(), + current_term: Some(7), + last_applied: Some(120), + snapshot: Some(90), + state: Some("Leader".to_string()), + current_leader: Some(1), + }, + ClusterGroupDisplay { + group_id: 100, + group_type: "user_data".to_string(), + current_term: Some(7), + last_applied: Some(111), + snapshot: Some(90), + state: Some("Follower".to_string()), + current_leader: Some(2), + }, + ], + }; + + let rendered = render_cluster_list_text(&data); + + assert!(rendered.contains("CLUSTER OVERVIEW")); + assert!(rendered.contains("NODES")); + assert!(rendered.contains("GROUP STATUS SUMMARY")); + assert!(rendered.contains("Meta")); + assert!(rendered.contains("U0")); + } +} diff --git a/cli/src/session/cluster/mod.rs b/cli/src/session/cluster/mod.rs new file mode 100644 index 000000000..52cd6700a --- /dev/null +++ b/cli/src/session/cluster/mod.rs @@ -0,0 +1,239 @@ +use std::collections::HashMap; + +use kalam_client::KalamCellValue; + +use super::CLISession; +use crate::{CLIError, Result}; + +mod actions; +mod groups; +mod list; + +const CLUSTER_LIST_SQL: &str = " +SELECT + cluster_id, + node_id, + role, + status, + rpc_addr, + api_addr, + is_self, + is_leader, + groups_leading, + total_groups, + current_term, + last_applied_log, + leader_last_log_index, + snapshot_index, + catchup_progress_pct, + replication_lag, + hostname, + memory_usage_mb, + cpu_usage_percent, + uptime_human +FROM system.cluster +ORDER BY is_leader DESC, node_id ASC +"; + +const CLUSTER_GROUPS_SQL: &str = " +SELECT + group_id, + group_type, + current_term, + last_applied, + snapshot, + state, + current_leader +FROM system.cluster_groups +ORDER BY group_id ASC +"; + +#[derive(Debug, Clone, PartialEq)] +struct ClusterListNode { + cluster_id: String, + node_id: u64, + role: String, + status: String, + rpc_addr: String, + api_addr: String, + is_self: bool, + is_leader: bool, + groups_leading: u64, + total_groups: u64, + current_term: Option, + last_applied_log: Option, + leader_last_log_index: Option, + snapshot_index: Option, + catchup_progress_pct: Option, + replication_lag: Option, + hostname: Option, + memory_usage_mb: Option, + cpu_usage_percent: Option, + uptime_human: Option, +} + +#[derive(Debug, Clone, PartialEq)] +struct ClusterGroupDisplay { + group_id: i64, + group_type: String, + current_term: Option, + last_applied: Option, + snapshot: Option, + state: Option, + current_leader: Option, +} + +#[derive(Debug, Clone, PartialEq)] +struct ClusterRenderData { + cluster_id: String, + is_cluster_mode: bool, + nodes: Vec, + groups: Vec, +} + +impl CLISession { + async fn fetch_cluster_render_data(&mut self) -> Result { + let response = self.execute_query_response(CLUSTER_LIST_SQL).await?; + let mut nodes = parse_cluster_nodes(&response)?; + if nodes.is_empty() { + return Err(CLIError::FormatError("system.cluster returned no rows".to_string())); + } + + let cluster_id = nodes + .first() + .map(|node| node.cluster_id.clone()) + .unwrap_or_else(|| "standalone".to_string()); + let is_cluster_mode = nodes.iter().any(|node| is_cluster_role(&node.role)); + + let groups = if is_cluster_mode { + let response = self.execute_query_response(CLUSTER_GROUPS_SQL).await?; + parse_cluster_groups(&response)? + } else { + Vec::new() + }; + + nodes.shrink_to_fit(); + + Ok(ClusterRenderData { + cluster_id, + is_cluster_mode, + nodes, + groups, + }) + } +} + +fn parse_cluster_nodes(response: &kalam_client::QueryResponse) -> Result> { + let rows = response.rows_as_maps(); + let mut nodes = Vec::with_capacity(rows.len()); + + for row in rows { + nodes.push(ClusterListNode { + cluster_id: cell_text(&row, "cluster_id").unwrap_or_else(|| "standalone".to_string()), + node_id: cell_u64(&row, "node_id").unwrap_or(0), + role: cell_text(&row, "role").unwrap_or_else(|| "unknown".to_string()), + status: cell_text(&row, "status").unwrap_or_else(|| "unknown".to_string()), + rpc_addr: cell_text(&row, "rpc_addr").unwrap_or_else(|| "-".to_string()), + api_addr: cell_text(&row, "api_addr").unwrap_or_else(|| "-".to_string()), + is_self: cell_bool(&row, "is_self").unwrap_or(false), + is_leader: cell_bool(&row, "is_leader").unwrap_or(false), + groups_leading: cell_u64(&row, "groups_leading").unwrap_or(0), + total_groups: cell_u64(&row, "total_groups").unwrap_or(0), + current_term: cell_i64(&row, "current_term"), + last_applied_log: cell_i64(&row, "last_applied_log"), + leader_last_log_index: cell_i64(&row, "leader_last_log_index"), + snapshot_index: cell_i64(&row, "snapshot_index"), + catchup_progress_pct: cell_i64(&row, "catchup_progress_pct"), + replication_lag: cell_i64(&row, "replication_lag"), + hostname: cell_text(&row, "hostname"), + memory_usage_mb: cell_u64(&row, "memory_usage_mb"), + cpu_usage_percent: cell_f32(&row, "cpu_usage_percent"), + uptime_human: cell_text(&row, "uptime_human"), + }); + } + + Ok(nodes) +} + +fn parse_cluster_groups( + response: &kalam_client::QueryResponse, +) -> Result> { + let rows = response.rows_as_maps(); + let mut groups = Vec::with_capacity(rows.len()); + + for row in rows { + let Some(group_id) = cell_i64(&row, "group_id") else { + return Err(CLIError::FormatError( + "system.cluster_groups row missing group_id".to_string(), + )); + }; + + groups.push(ClusterGroupDisplay { + group_id, + group_type: cell_text(&row, "group_type").unwrap_or_else(|| "unknown".to_string()), + current_term: cell_i64(&row, "current_term"), + last_applied: cell_i64(&row, "last_applied"), + snapshot: cell_i64(&row, "snapshot"), + state: cell_text(&row, "state"), + current_leader: cell_i64(&row, "current_leader"), + }); + } + + Ok(groups) +} + +fn cell_text(row: &HashMap, key: &str) -> Option { + row.get(key).and_then(|value| value.as_text().map(ToString::to_string)) +} + +fn cell_bool(row: &HashMap, key: &str) -> Option { + row.get(key).and_then(KalamCellValue::as_boolean) +} + +fn cell_i64(row: &HashMap, key: &str) -> Option { + row.get(key).and_then(KalamCellValue::as_big_int) +} + +fn cell_u64(row: &HashMap, key: &str) -> Option { + cell_i64(row, key).and_then(|value| u64::try_from(value).ok()) +} + +fn cell_f32(row: &HashMap, key: &str) -> Option { + row.get(key).and_then(KalamCellValue::as_float) +} + +fn is_cluster_role(role: &str) -> bool { + matches!(role, "leader" | "follower" | "learner" | "candidate") +} + +fn group_display_name(group: &ClusterGroupDisplay) -> String { + match group.group_type.as_str() { + "meta" => "Meta".to_string(), + "user_data" => format!("U{}", group.group_id - 100), + "shared_data" => format!("S{}", group.group_id - 200), + _ => group.group_id.to_string(), + } +} + +fn display_number(value: Option) -> String { + value.map_or_else(|| "-".to_string(), |num| num.to_string()) +} + +fn summarize_group_states(groups: &[ClusterGroupDisplay]) -> (usize, usize, usize) { + let mut leading = 0; + let mut following = 0; + let mut unknown = 0; + + for group in groups { + let state = group.state.as_deref().unwrap_or("unknown"); + if state.eq_ignore_ascii_case("leader") { + leading += 1; + } else if group.current_leader.is_some() { + following += 1; + } else { + unknown += 1; + } + } + + (leading, following, unknown) +} diff --git a/cli/src/session/commands.rs b/cli/src/session/commands.rs index d4cbbe5ea..f0e18e5f9 100644 --- a/cli/src/session/commands.rs +++ b/cli/src/session/commands.rs @@ -1,9 +1,13 @@ -use super::{CLISession, OutputFormat}; -use crate::error::{CLIError, Result}; -use crate::parser::Command; +use std::time::Instant; + use colored::Colorize; use kalam_client::SubscriptionConfig; -use std::time::Instant; + +use super::{CLISession, OutputFormat}; +use crate::{ + error::{CLIError, Result}, + parser::Command, +}; impl CLISession { /// Execute a parsed command @@ -29,73 +33,40 @@ impl CLISession { } }, Command::ClusterSnapshot => { - println!("Triggering cluster snapshots..."); - match self.execute("CLUSTER SNAPSHOT").await { - Ok(_) => println!("Snapshot triggered"), - Err(e) => eprintln!("Snapshot failed: {}", e), - } + self.show_cluster_group_action("CLUSTER SNAPSHOT").await?; }, Command::ClusterPurge { upto } => { - println!("Purging cluster logs up to {}...", upto); - match self.execute(&format!("CLUSTER PURGE --UPTO {}", upto)).await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster purge failed: {}", e), - } + self.show_cluster_group_action(&format!("CLUSTER PURGE --UPTO {}", upto)) + .await?; }, Command::ClusterTriggerElection => { - println!("Triggering cluster election..."); - match self.execute("CLUSTER TRIGGER ELECTION").await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster trigger-election failed: {}", e), - } + self.show_cluster_group_action("CLUSTER TRIGGER ELECTION").await?; }, Command::ClusterTransferLeader { node_id } => { - println!("Transferring cluster leadership to node {}...", node_id); - match self.execute(&format!("CLUSTER TRANSFER-LEADER {}", node_id)).await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster transfer-leader failed: {}", e), - } + self.show_cluster_group_action(&format!("CLUSTER TRANSFER-LEADER {}", node_id)) + .await?; + }, + Command::ClusterRebalance => { + self.show_cluster_group_action("CLUSTER REBALANCE").await?; }, Command::ClusterStepdown => { - println!("Requesting cluster leader stepdown..."); - match self.execute("CLUSTER STEPDOWN").await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster stepdown failed: {}", e), - } + self.show_cluster_group_action("CLUSTER STEPDOWN").await?; }, Command::ClusterClear => { - println!("Clearing old cluster snapshots..."); - match self.execute("CLUSTER CLEAR").await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster clear failed: {}", e), - } + self.show_cluster_clear().await?; }, - Command::ClusterList => match self.execute("SELECT * FROM system.cluster").await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster list failed: {}", e), + Command::ClusterList => { + self.show_cluster_list().await?; }, Command::ClusterListGroups => { - match self.execute("SELECT * FROM system.cluster_groups").await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster list groups failed: {}", e), - } + self.show_cluster_list_groups().await?; }, - Command::ClusterStatus => match self.execute("SELECT * FROM system.cluster").await { - Ok(_) => {}, - Err(e) => eprintln!("Cluster status failed: {}", e), - }, - Command::ClusterJoin(addr) => { - println!("{} CLUSTER JOIN is not implemented yet", "⚠️".yellow()); - println!("Would join node at: {}", addr); - println!( - "\nTo add a node to the cluster, configure it in server.toml and restart." - ); - }, - Command::ClusterLeave => { - println!("{} CLUSTER LEAVE is not implemented yet", "⚠️".yellow()); - println!( - "\nTo remove this node from the cluster, gracefully shut down the server." - ); + Command::ClusterJoin { + node_id, + rpc_addr, + api_addr, + } => { + self.show_cluster_join(node_id, &rpc_addr, &api_addr).await?; }, Command::Health => match self.health_check().await { Ok(_) => {}, @@ -117,13 +88,15 @@ impl CLISession { }, Command::ListTables => { self.execute( - "SELECT namespace_id AS namespace, table_name, table_type FROM system.tables ORDER BY namespace_id, table_name", + "SELECT namespace_id AS namespace, table_name, table_type FROM system.tables \ + ORDER BY namespace_id, table_name", ) .await?; }, Command::Describe(table) => { let query = format!( - "SELECT * FROM information_schema.columns WHERE table_name = '{}' ORDER BY ordinal_position", + "SELECT * FROM information_schema.columns WHERE table_name = '{}' ORDER BY \ + ordinal_position", table ); self.execute(&query).await?; @@ -185,7 +158,8 @@ impl CLISession { }, Command::Stats => { self.execute( - "SELECT metric_name, metric_value FROM system.stats ORDER BY metric_name LIMIT 5000", + "SELECT metric_name, metric_value FROM system.stats ORDER BY metric_name \ + LIMIT 5000", ) .await?; }, @@ -296,35 +270,20 @@ impl CLISession { ); println!("{}", "║ Cluster Commands".bright_blue().bold()); println!("║ {:<48} Trigger snapshot", "\\cluster snapshot".cyan()); - println!( - "║ {:<48} Purge logs up to index", - "\\cluster purge --upto ".cyan() - ); - println!( - "║ {:<48} Trigger cluster election", - "\\cluster trigger-election".cyan() - ); + println!("║ {:<48} Purge logs up to index", "\\cluster purge --upto ".cyan()); + println!("║ {:<48} Trigger cluster election", "\\cluster trigger-election".cyan()); println!( "║ {:<48} Transfer cluster leadership", "\\cluster transfer-leader ".cyan() ); + println!("║ {:<48} Rebalance data leaders", "\\cluster rebalance".cyan()); println!("║ {:<48} Leader stepdown", "\\cluster stepdown".cyan()); println!("║ {:<48} Clear old snapshots", "\\cluster clear".cyan()); println!("║ {:<48} List cluster nodes", "\\cluster list".cyan()); + println!("║ {:<48} List all raft groups", "\\cluster list groups".cyan()); println!( - "║ {:<48} List all raft groups", - "\\cluster list groups".cyan() - ); - println!("║ {:<48} Cluster status", "\\cluster status".cyan()); - println!( - "║ {:<48} Join node {}", - "\\cluster join ".cyan(), - "(not implemented)".yellow() - ); - println!( - "║ {:<48} Leave cluster {}", - "\\cluster leave".cyan(), - "(not implemented)".yellow() + "║ {:<48} Join node at runtime", + "\\cluster join ".cyan() ); println!( "║ {:<48} Live per-node stats", @@ -343,14 +302,8 @@ impl CLISession { "║ {:<32} Show stored credentials", "\\show-credentials, \\credentials".cyan() ); - println!( - "║ {:<32} Update credentials", - "\\update-credentials

".cyan() - ); - println!( - "║ {:<32} Delete stored credentials", - "\\delete-credentials".cyan() - ); + println!("║ {:<32} Update credentials", "\\update-credentials

".cyan()); + println!("║ {:<32} Delete stored credentials", "\\delete-credentials".cyan()); // Topic Consumption println!( @@ -384,7 +337,7 @@ impl CLISession { println!("{}", "║ Examples".bright_blue().bold()); println!("║ {}", "SELECT * FROM system.tables LIMIT 5;".green()); println!("║ {}", "SELECT name FROM system.namespaces;".green()); - println!("║ {}", "\\cluster status".green()); + println!("║ {}", "\\cluster list".green()); println!( "{}", @@ -405,8 +358,10 @@ impl CLISession { timeout: Option, ) -> Result<()> { use kalam_client::consumer::AutoOffsetReset; - use tokio::signal; - use tokio::time::{sleep, Duration}; + use tokio::{ + signal, + time::{sleep, Duration}, + }; // Warn if no consumer group specified if group.is_none() { @@ -435,7 +390,8 @@ impl CLISession { AutoOffsetReset::Offset(offset) } else { return Err(CLIError::ParseError(format!( - "Invalid --from value: {}. Use 'earliest', 'latest', or a numeric offset", + "Invalid --from value: {}. Use 'earliest', 'latest', or a numeric \ + offset", from_str ))); } @@ -520,7 +476,9 @@ impl CLISession { format!( "❌ Topic '{}' not found or consume endpoint not available.\n {}", topic, - "Create the topic with: CREATE TOPIC SOURCE TABLE .".dimmed() + "Create the topic with: CREATE TOPIC SOURCE TABLE \ + .
" + .dimmed() ) } else if error_msg.contains("401") || error_msg.contains("403") { format!( diff --git a/cli/src/session/info.rs b/cli/src/session/info.rs index 93c5d6a27..5385c3fb1 100644 --- a/cli/src/session/info.rs +++ b/cli/src/session/info.rs @@ -1,9 +1,9 @@ -use super::CLISession; -use crate::history::CommandHistory; -use crate::CLI_VERSION; use colored::Colorize; use kalam_client::KalamLinkError; +use super::CLISession; +use crate::{history::CommandHistory, CLI_VERSION}; + impl CLISession { pub(super) fn normalize_server_field(value: String) -> Option { let trimmed = value.trim(); @@ -72,7 +72,8 @@ impl CLISession { ); if let Some(ref err) = health_status { if self.connected { - // Server is reachable but health detail could not be retrieved (e.g. localhost-only restriction) + // Server is reachable but health detail could not be retrieved (e.g. localhost-only + // restriction) println!(" Health check: {}", format!("Note ({})", err).yellow()); } else { println!(" Last check: {}", format!("Failed ({})", err).red()); diff --git a/cli/tests/auth/test_auth.rs b/cli/tests/auth/test_auth.rs index 2145e2615..6f1acfafc 100644 --- a/cli/tests/auth/test_auth.rs +++ b/cli/tests/auth/test_auth.rs @@ -10,9 +10,10 @@ //! - Credential rotation and deletion //! - Admin operations with proper authentication -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Test configuration constants const TEST_TIMEOUT: Duration = Duration::from_secs(10); diff --git a/cli/tests/auth/test_keycloak_auth.rs b/cli/tests/auth/test_keycloak_auth.rs index 2c103f2bc..97c9815a6 100644 --- a/cli/tests/auth/test_keycloak_auth.rs +++ b/cli/tests/auth/test_keycloak_auth.rs @@ -38,10 +38,12 @@ //! | `KEYCLOAK_TEST_USER` | `kalamdb-user` | //! | `KEYCLOAK_TEST_PASSWORD` | `kalamdb123` | -use crate::common::*; +use std::time::Duration; + use reqwest::Client; use serde_json::json; -use std::time::Duration; + +use crate::common::*; // --------------------------------------------------------------------------- // Configuration helpers @@ -159,8 +161,7 @@ async fn get_keycloak_token() -> Result Option { - use base64::engine::general_purpose::URL_SAFE_NO_PAD; - use base64::Engine as _; + use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; let payload_b64 = token.splitn(3, '.').nth(1)?; let payload_bytes = URL_SAFE_NO_PAD.decode(payload_b64).ok()?; @@ -231,8 +232,7 @@ fn test_keycloak_realm_configured() { // Verify Keycloak uses asymmetric algorithm (not HS256) if let Some(access_token) = body.get("access_token").and_then(|v| v.as_str()) { - use base64::engine::general_purpose::URL_SAFE_NO_PAD; - use base64::Engine as _; + use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; if let Some(header_b64) = access_token.splitn(3, '.').next() { if let Ok(hdr_bytes) = URL_SAFE_NO_PAD.decode(header_b64) { if let Ok(hdr) = serde_json::from_slice::(&hdr_bytes) { @@ -252,9 +252,8 @@ fn test_keycloak_realm_configured() { }, Err(e) => { panic!( - "Failed to get token from Keycloak. \ - Ensure realm '{}' has client '{}' with Direct Access Grants enabled \ - and user '{}' exists: {}", + "Failed to get token from Keycloak. Ensure realm '{}' has client '{}' with Direct \ + Access Grants enabled and user '{}' exists: {}", keycloak_realm(), keycloak_client_id(), keycloak_test_user(), @@ -270,8 +269,8 @@ fn test_keycloak_realm_configured() { /// 1. Get a real RS256 token from Keycloak (signed with Keycloak's RSA private key). /// 2. Pre-create an OAuth user whose `user_id` exactly matches the token `sub`. /// 3. Send it to KalamDB as a Bearer token. -/// 4. KalamDB reads `iss`, fetches Keycloak's JWKS, verifies the RS256 signature -/// using Keycloak's *public* key. Only Keycloak can produce a valid signature. +/// 4. KalamDB reads `iss`, fetches Keycloak's JWKS, verifies the RS256 signature using Keycloak's +/// *public* key. Only Keycloak can produce a valid signature. /// 5. First request: resolve the pre-created OAuth user directly by canonical `sub`. /// 6. Second request: reuse the same canonical user via the user_id index. /// @@ -335,10 +334,8 @@ fn test_preprovisioned_oauth_user_via_bearer() { || err.contains("invalid_credentials") { eprintln!( - "Server not configured for Keycloak OIDC. Skipping.\n\ - Start server with:\n \ - KALAMDB_JWT_TRUSTED_ISSUERS=\"kalamdb,{}\" \\\n \ - cargo run", + "Server not configured for Keycloak OIDC. Skipping.\nStart server with:\n \ + KALAMDB_JWT_TRUSTED_ISSUERS=\"kalamdb,{}\" \\\n cargo run", keycloak_issuer() ); return; @@ -440,10 +437,9 @@ fn test_hs256_with_external_issuer_rejected() { assert!( status.as_u16() == 401 || status.as_u16() == 403, - "HS256 token with Keycloak issuer MUST be rejected (401/403), got {}.\n\ - If this passed, it means the server accepted a forged HS256 token — \n\ - anyone with the JWT secret can impersonate any OIDC provider!\n\ - body={:?}", + "HS256 token with Keycloak issuer MUST be rejected (401/403), got {}.\nIf this passed, it \ + means the server accepted a forged HS256 token — \nanyone with the JWT secret can \ + impersonate any OIDC provider!\nbody={:?}", status, body ); diff --git a/cli/tests/auth_retry_test.rs b/cli/tests/auth_retry_test.rs index d3f5797c8..49ba601ba 100644 --- a/cli/tests/auth_retry_test.rs +++ b/cli/tests/auth_retry_test.rs @@ -3,8 +3,10 @@ //! These tests verify that the CLI properly handles authentication failures //! and prompts for credentials when stored credentials become invalid. -use std::path::PathBuf; -use std::process::{Command, Stdio}; +use std::{ + path::PathBuf, + process::{Command, Stdio}, +}; /// Helper to get the CLI binary path fn get_cli_binary() -> PathBuf { @@ -131,7 +133,10 @@ fn test_expired_token_flow() { println!("\nSteps to test expired token handling:"); println!("1. Login and save credentials:"); println!(" cargo run --release -- --user testuser --password testpass --save-credentials"); - println!("\n2. Wait for the access token to expire (or manually edit the expiry in credentials file)"); + println!( + "\n2. Wait for the access token to expire (or manually edit the expiry in credentials \ + file)" + ); println!("\n3. Run CLI without arguments:"); println!(" cargo run --release"); println!("\n4. Expected behavior:"); diff --git a/cli/tests/cli/test_cli.rs b/cli/tests/cli/test_cli.rs index 178bfa6d1..dcce5eb79 100644 --- a/cli/tests/cli/test_cli.rs +++ b/cli/tests/cli/test_cli.rs @@ -10,10 +10,11 @@ //! - Session timeout and command history //! - Tab completion and verbose output -use crate::common::*; +use std::fs; use predicates::prelude::*; -use std::fs; + +use crate::common::*; /// T036: Test CLI connection and prompt display #[test] diff --git a/cli/tests/cli/test_cli_auth.rs b/cli/tests/cli/test_cli_auth.rs index 1bb71bc7e..0c527ea01 100644 --- a/cli/tests/cli/test_cli_auth.rs +++ b/cli/tests/cli/test_cli_auth.rs @@ -9,10 +9,10 @@ //! - Credential rotation and updates //! - JWT token storage (never user/password) -use crate::common::*; - use std::fs; +use crate::common::*; + // ============================================================================ // UNIT TESTS - FileCredentialStore (no server needed) // ============================================================================ @@ -354,7 +354,10 @@ fn test_cli_save_credentials_creates_file() { assert!(contents.contains(&instance), "Should contain instance name"); println!("✓ Credentials file created at: {:?}", creds_path); } else { - eprintln!("⚠️ No JWT token in credentials file (root password may not be set). Skipping test."); + eprintln!( + "⚠️ No JWT token in credentials file (root password may not be set). Skipping \ + test." + ); } } else { eprintln!( diff --git a/cli/tests/cli/test_cli_auth_admin.rs b/cli/tests/cli/test_cli_auth_admin.rs index f889e7be1..b8e93ff65 100644 --- a/cli/tests/cli/test_cli_auth_admin.rs +++ b/cli/tests/cli/test_cli_auth_admin.rs @@ -11,12 +11,14 @@ //! # Run tests in another terminal //! cargo test --test test_cli_auth_admin -- --test-threads=1 //! ``` -//TODO: Remove this since we have most of the tests covered by the integration tests +// TODO: Remove this since we have most of the tests covered by the integration tests #![allow(unused_imports)] -use crate::common::*; -use assert_cmd::Command; use std::time::Duration; +use assert_cmd::Command; + +use crate::common::*; + /// Test that root user can create namespaces #[tokio::test] async fn test_root_can_create_namespace() { @@ -109,7 +111,8 @@ async fn test_root_can_create_drop_tables() { // Create table as root let result = execute_sql_via_http_as_root(&format!( - "CREATE TABLE {}.test_table (id INT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {}.test_table (id INT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:10')", namespace_name )) .await @@ -269,7 +272,8 @@ async fn test_cli_admin_operations() { // Step 2: Create table let _ = execute_sql_via_http_as_root(&format!( - "CREATE TABLE IF NOT EXISTS {}.users (id TEXT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE IF NOT EXISTS {}.users (id TEXT PRIMARY KEY, name VARCHAR) WITH \ + (TYPE='USER', FLUSH_POLICY='rows:10')", namespace_name )) .await; @@ -339,7 +343,8 @@ async fn test_cli_flush_table() { // Create a USER table with flush policy (SHARED tables cannot be flushed) let result = execute_sql_via_http_as_root(&format!( - "CREATE TABLE {}.metrics (timestamp BIGINT PRIMARY KEY, value DOUBLE) WITH (TYPE='USER', FLUSH_POLICY='rows:5')", + "CREATE TABLE {}.metrics (timestamp BIGINT PRIMARY KEY, value DOUBLE) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:5')", namespace_name )) .await @@ -384,16 +389,16 @@ async fn test_cli_flush_table() { // Note: system.jobs stores namespace/table info inside the JSON `parameters` column. let jobs_query = if let Some(ref job_id) = job_id { format!( - "SELECT job_id, job_type, status, parameters, message FROM system.jobs \ - WHERE job_id = '{}' LIMIT 1", + "SELECT job_id, job_type, status, parameters, message FROM system.jobs WHERE job_id = \ + '{}' LIMIT 1", job_id ) } else { // Fallback to querying by type and table name (from `parameters` JSON) - "SELECT job_id, job_type, status, parameters, message FROM system.jobs \ - WHERE job_type = 'flush' AND parameters LIKE '%\"table_name\":\"metrics\"%' \ - ORDER BY created_at DESC LIMIT 1" - .to_string() + "SELECT job_id, job_type, status, parameters, message FROM system.jobs WHERE job_type = \ + 'flush' AND parameters LIKE '%\"table_name\":\"metrics\"%' ORDER BY created_at DESC LIMIT \ + 1" + .to_string() }; // In cluster mode, job creation/visibility can lag due to Raft replication. @@ -549,13 +554,17 @@ async fn test_cli_flush_all_tables() { let _ = execute_sql_via_http_as_root(&format!("CREATE NAMESPACE {}", namespace_name)).await; // Create multiple USER tables (SHARED tables cannot be flushed) - let _ = execute_sql_via_http_as_root( - &format!("CREATE TABLE {}.table1 (id INT PRIMARY KEY, data VARCHAR) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", namespace_name), - ) + let _ = execute_sql_via_http_as_root(&format!( + "CREATE TABLE {}.table1 (id INT PRIMARY KEY, data VARCHAR) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:10')", + namespace_name + )) .await; - let _ = execute_sql_via_http_as_root( - &format!("CREATE TABLE {}.table2 (id INT PRIMARY KEY, value DOUBLE) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", namespace_name), - ) + let _ = execute_sql_via_http_as_root(&format!( + "CREATE TABLE {}.table2 (id INT PRIMARY KEY, value DOUBLE) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:10')", + namespace_name + )) .await; // Insert some data @@ -614,17 +623,15 @@ async fn test_cli_flush_all_tables() { let job_id_list = job_ids.iter().map(|id| format!("'{}'", id)).collect::>().join(", "); format!( - "SELECT job_id, job_type, status, parameters, message FROM system.jobs \ - WHERE job_id IN ({}) \ - ORDER BY created_at DESC", + "SELECT job_id, job_type, status, parameters, message FROM system.jobs WHERE job_id \ + IN ({}) ORDER BY created_at DESC", job_id_list ) } else { // Fallback to querying by namespace format!( - "SELECT job_id, job_type, status, parameters, message FROM system.jobs \ - WHERE job_type = 'flush' AND parameters LIKE '%\"namespace_id\":\"{}\"%' \ - ORDER BY created_at DESC", + "SELECT job_id, job_type, status, parameters, message FROM system.jobs WHERE job_type \ + = 'flush' AND parameters LIKE '%\"namespace_id\":\"{}\"%' ORDER BY created_at DESC", namespace_name ) }; diff --git a/cli/tests/cli/test_cli_doc_matrix.rs b/cli/tests/cli/test_cli_doc_matrix.rs index 7d22fd3ed..e61d894da 100644 --- a/cli/tests/cli/test_cli_doc_matrix.rs +++ b/cli/tests/cli/test_cli_doc_matrix.rs @@ -5,10 +5,12 @@ //! 2) non-server clap parsing tests for previously uncovered flags //! 3) server-backed runtime tests for subscribe/consume/timeout-related flags -use crate::common::*; -use clap::Parser; use std::time::{Duration, Instant}; +use clap::Parser; + +use crate::common::*; + #[path = "../../src/args.rs"] mod cli_args; @@ -435,6 +437,10 @@ fn test_docs_matrix_has_execution_tests_for_documented_flags_and_commands() { item: "\\cluster transfer leader", tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], }, + Coverage { + item: "\\cluster rebalance", + tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], + }, Coverage { item: "\\cluster stepdown", tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], @@ -459,18 +465,10 @@ fn test_docs_matrix_has_execution_tests_for_documented_flags_and_commands() { item: "\\cluster list groups", tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], }, - Coverage { - item: "\\cluster status", - tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], - }, Coverage { item: "\\cluster join", tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], }, - Coverage { - item: "\\cluster leave", - tests: &["test_cli_meta_commands_doc_smoke_non_interactive"], - }, ]; let test_sources = concat!( diff --git a/cli/tests/cluster.rs b/cli/tests/cluster.rs index 09ae6b608..b601dbc4c 100644 --- a/cli/tests/cluster.rs +++ b/cli/tests/cluster.rs @@ -17,11 +17,16 @@ mod common; /// Cluster-specific common utilities mod cluster_common { - use crate::common::*; + use std::{ + collections::HashMap, + sync::{Mutex, OnceLock}, + time::Duration, + }; + use kalam_client::{KalamCellValue, KalamLinkTimeouts, QueryResponse}; use serde_json::Value; - use std::sync::OnceLock; - use std::time::Duration; + + use crate::common::*; /// Get cluster node URLs from environment or use defaults pub fn cluster_urls() -> Vec { @@ -45,8 +50,27 @@ mod cluster_common { }) } - /// Create a client connected to a specific cluster node - pub fn create_cluster_client(base_url: &str) -> KalamLinkClient { + /// Per-URL client cache for cluster helpers. + /// + /// Reusing a `KalamLinkClient` per URL avoids spawning a fresh reqwest connection pool + /// on every `execute_on_node` call. Each `KalamLinkClient` owns an `Arc`; + /// cloning is cheap and all clones share the same pool. + fn cached_cluster_client(base_url: &str) -> KalamLinkClient { + static CLIENT_CACHE: OnceLock>> = OnceLock::new(); + let cache = CLIENT_CACHE.get_or_init(|| Mutex::new(HashMap::new())); + if let Ok(mut guard) = cache.lock() { + if let Some(client) = guard.get(base_url) { + return client.clone(); + } + let client = build_cluster_client(base_url); + guard.insert(base_url.to_string(), client.clone()); + return client; + } + // Fallback if lock poisoned + build_cluster_client(base_url) + } + + fn build_cluster_client(base_url: &str) -> KalamLinkClient { client_for_user_on_url_with_timeouts( base_url, default_username(), @@ -63,8 +87,7 @@ mod cluster_common { .expect("Failed to build cluster client") } - /// Create a client connected to a specific cluster node with custom credentials - pub fn create_cluster_client_with_auth( + fn build_cluster_client_with_auth( base_url: &str, username: &str, password: &str, @@ -85,8 +108,24 @@ mod cluster_common { .expect("Failed to build cluster client") } + /// Create a client connected to a specific cluster node + pub fn create_cluster_client(base_url: &str) -> KalamLinkClient { + cached_cluster_client(base_url) + } + + /// Create a client connected to a specific cluster node with custom credentials + pub fn create_cluster_client_with_auth( + base_url: &str, + username: &str, + password: &str, + ) -> KalamLinkClient { + // Custom-auth clients are not pooled (credentials may differ per call). + build_cluster_client_with_auth(base_url, username, password) + } + /// Execute a query on a specific cluster node and return the count - /// Note: With leader-only reads (Spec 021), this will automatically use the leader node for client reads + /// Note: With leader-only reads (Spec 021), this will automatically use the leader node for + /// client reads pub fn query_count_on_url(base_url: &str, sql: &str) -> i64 { // Try the specified URL first, but if we get NOT_LEADER error, retry on leader let result = query_count_on_url_internal(base_url, sql); @@ -361,7 +400,8 @@ mod cluster_common { execute_on_node_response_internal(base_url, sql, true) } - /// Execute SQL on a specific cluster node and return the structured response without leader routing + /// Execute SQL on a specific cluster node and return the structured response without leader + /// routing #[allow(dead_code)] pub fn execute_on_node_response_raw( base_url: &str, @@ -501,7 +541,8 @@ mod cluster_common { execute_on_node_as_user_response_internal(base_url, username, password, sql, true) } - /// Execute SQL on a specific cluster node as a custom user and return the response without leader routing + /// Execute SQL on a specific cluster node as a custom user and return the response without + /// leader routing #[allow(dead_code)] pub fn execute_on_node_as_user_response_raw( base_url: &str, @@ -615,15 +656,28 @@ mod cluster_common { /// Require cluster to be running (skip test if not available) pub fn require_cluster_running() -> bool { + let cluster_requested = std::env::var("KALAMDB_SERVER_TYPE") + .map(|value| value.trim().eq_ignore_ascii_case("cluster")) + .unwrap_or(false); + if !crate::common::is_cluster_mode() { + if cluster_requested { + panic!( + "Cluster tests were requested, but the harness resolved single-node mode. \ + Check KALAMDB_CLUSTER_URLS and cluster reachability." + ); + } println!( "\n Skipping: single-node server detected (cluster tests require multi-node)\n" ); return false; } - let urls = cluster_urls(); + let urls = cluster_urls_config_order(); if urls.is_empty() { + if cluster_requested { + panic!("Cluster tests were requested, but no cluster URLs are configured."); + } println!("\n Skipping: no cluster URLs configured (set KALAMDB_CLUSTER_URLS)\n"); return false; } @@ -631,6 +685,13 @@ mod cluster_common { // Check if at least one node is reachable let any_healthy = urls.iter().any(|url| is_node_healthy(url)); if !any_healthy { + if cluster_requested { + panic!( + "Cluster tests were requested, but no configured cluster node is reachable: \ + {:?}", + urls + ); + } println!( "\n Skipping: no cluster nodes are reachable. Expected nodes at: {:?}\n", urls @@ -661,6 +722,7 @@ mod cluster_common { if all_visible { return true; } + std::thread::sleep(Duration::from_millis(50)); } false @@ -685,6 +747,7 @@ mod cluster_common { if all_visible { return true; } + std::thread::sleep(Duration::from_millis(50)); } false @@ -711,6 +774,7 @@ mod cluster_common { if all_match { return true; } + std::thread::sleep(Duration::from_millis(50)); } false @@ -744,6 +808,7 @@ mod cluster_common { } } } + std::thread::sleep(Duration::from_millis(100)); } } @@ -779,6 +844,7 @@ mod cluster_common { } } } + std::thread::sleep(Duration::from_millis(100)); } false diff --git a/cli/tests/cluster/cluster_test_cluster_list.rs b/cli/tests/cluster/cluster_test_cluster_list.rs index 25ac25529..fdade3592 100644 --- a/cli/tests/cluster/cluster_test_cluster_list.rs +++ b/cli/tests/cluster/cluster_test_cluster_list.rs @@ -1,26 +1,26 @@ -//! Cluster view tests: CLUSTER LIST/STATUS output +//! Cluster view tests: system.cluster fallback and CLUSTER LIST deprecation use crate::cluster_common; #[ntest::timeout(60_000)] #[test] -fn cluster_list_output_contains_overview() { +fn cluster_list_sql_is_rejected_with_guidance() { if !cluster_common::require_cluster_running() { return; } let urls = cluster_common::cluster_urls(); for url in &urls { - let output = - cluster_common::execute_on_node(url, "CLUSTER LIST").expect("CLUSTER LIST failed"); + let output = cluster_common::execute_on_node(url, "CLUSTER LIST") + .expect_err("CLUSTER LIST should now be rejected"); assert!( - output.contains("CLUSTER OVERVIEW"), - "missing overview in CLUSTER LIST output for {}", + output.contains("CLI-only command"), + "missing CLI-only guidance in CLUSTER LIST error for {}", url ); assert!( - output.contains("NODES"), - "missing NODES section in CLUSTER LIST output for {}", + output.contains("system.cluster"), + "missing system.cluster guidance in CLUSTER LIST error for {}", url ); } @@ -28,7 +28,7 @@ fn cluster_list_output_contains_overview() { #[ntest::timeout(60_000)] #[test] -fn cluster_status_aliases_render() { +fn system_cluster_view_still_returns_cluster_rows() { if !cluster_common::require_cluster_running() { return; } @@ -38,9 +38,12 @@ fn cluster_status_aliases_render() { return; }; - for cmd in ["CLUSTER STATUS", "CLUSTER LS"] { - let output = - cluster_common::execute_on_node(url, cmd).expect("cluster status alias failed"); - assert!(output.contains("CLUSTER OVERVIEW"), "{} output missing overview", cmd); - } + let output = cluster_common::execute_on_node( + url, + "SELECT cluster_id, node_id, role FROM system.cluster ORDER BY node_id", + ) + .expect("system.cluster query failed"); + + assert!(output.contains("cluster_id"), "missing cluster_id column header"); + assert!(output.contains("node_id"), "missing node_id column header"); } diff --git a/cli/tests/cluster/cluster_test_consistency.rs b/cli/tests/cluster/cluster_test_consistency.rs index 6713ae90e..d149b6c5a 100644 --- a/cli/tests/cluster/cluster_test_consistency.rs +++ b/cli/tests/cluster/cluster_test_consistency.rs @@ -2,10 +2,10 @@ //! //! Tests that verify data consistency across cluster nodes -use crate::cluster_common::*; -use crate::common::*; use std::time::Duration; +use crate::{cluster_common::*, common::*}; + /// Test: System table counts are consistent across all cluster nodes #[test] fn cluster_test_system_table_consistency() { @@ -131,7 +131,8 @@ fn cluster_test_table_replication() { ( "stream_tbl", format!( - "CREATE STREAM TABLE {}.stream_tbl (id BIGINT PRIMARY KEY, event STRING) WITH (TTL_SECONDS = 3600)", + "CREATE STREAM TABLE {}.stream_tbl (id BIGINT PRIMARY KEY, event STRING) WITH \ + (TTL_SECONDS = 3600)", namespace ), ), @@ -150,7 +151,8 @@ fn cluster_test_table_replication() { let result = execute_on_node( url, &format!( - "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}'", namespace, name ), ) diff --git a/cli/tests/cluster/cluster_test_data_digest.rs b/cli/tests/cluster/cluster_test_data_digest.rs index 91e3b4f59..889079c75 100644 --- a/cli/tests/cluster/cluster_test_data_digest.rs +++ b/cli/tests/cluster/cluster_test_data_digest.rs @@ -2,10 +2,10 @@ //! //! Ensures all nodes return identical data, not just row counts. -use crate::cluster_common::*; -use crate::common::*; use kalam_client::KalamCellValue; +use crate::{cluster_common::*, common::*}; + fn normalize_rows(rows: &[Vec]) -> Vec { rows.iter() .map(|row| { diff --git a/cli/tests/cluster/cluster_test_failover.rs b/cli/tests/cluster/cluster_test_failover.rs index 5bcf00870..1183d5a58 100644 --- a/cli/tests/cluster/cluster_test_failover.rs +++ b/cli/tests/cluster/cluster_test_failover.rs @@ -2,8 +2,7 @@ //! //! Tests that verify cluster behavior during node failures -use crate::cluster_common::*; -use crate::common::*; +use crate::{cluster_common::*, common::*}; /// Test: Cluster remains operational when one node fails /// @@ -40,6 +39,42 @@ fn cluster_test_node_health_detection() { println!("\n ✅ Cluster health detection working\n"); } +#[test] +fn cluster_test_all_configured_nodes_are_queryable() { + if !require_cluster_running() { + return; + } + + println!("\n=== TEST: All Configured Nodes Queryable ===\n"); + + let configured_urls = crate::cluster_common::cluster_urls_config_order(); + assert!( + !configured_urls.is_empty(), + "Expected configured cluster URLs when running cluster tests" + ); + + for (i, url) in configured_urls.iter().enumerate() { + assert!(is_node_healthy(url), "Configured cluster node {} is unhealthy: {}", i, url); + + let result = execute_on_node( + url, + "SELECT node_id, role, is_leader FROM system.cluster WHERE is_self = true", + ) + .unwrap_or_else(|err| panic!("Configured cluster node {} failed self-query: {}", i, err)); + + assert!( + result.contains("leader") || result.contains("follower") || result.contains("true"), + "Configured cluster node {} returned unexpected self-query output: {}", + i, + result + ); + + println!(" ✓ Configured node {} is healthy and queryable: {}", i, url); + } + + println!("\n ✅ All configured cluster nodes are healthy and queryable\n"); +} + /// Test: Leader election after leader disconnection /// /// This test checks the system.cluster table for leader information. diff --git a/cli/tests/cluster/cluster_test_final_consistency.rs b/cli/tests/cluster/cluster_test_final_consistency.rs index 16a41ec47..72b2ad716 100644 --- a/cli/tests/cluster/cluster_test_final_consistency.rs +++ b/cli/tests/cluster/cluster_test_final_consistency.rs @@ -4,10 +4,9 @@ //! These are "snapshot" tests that ensure the cluster has converged to //! a consistent state after a workload completes. -use crate::cluster_common::*; -use crate::common::*; -use std::collections::HashSet; -use std::time::Duration; +use std::{collections::HashSet, time::Duration}; + +use crate::{cluster_common::*, common::*}; /// Helper: Get row count from a node with retries fn get_row_count(url: &str, table: &str) -> i64 { @@ -166,7 +165,8 @@ fn cluster_test_final_metadata_consistency() { // Verify namespace counts let ns_query = format!( - "SELECT namespace_id FROM system.namespaces WHERE namespace_id LIKE '{}%' ORDER BY namespace_id", + "SELECT namespace_id FROM system.namespaces WHERE namespace_id LIKE '{}%' ORDER BY \ + namespace_id", base_ns ); @@ -284,12 +284,14 @@ fn cluster_test_final_mixed_workload_consistency() { } println!(" Phase 2: Updates (even IDs)..."); - // Update even IDs - use individual PK updates since SHARED tables don't support predicate updates + // Update even IDs - use individual PK updates since SHARED tables don't support predicate + // updates for i in (0..200).step_by(2) { execute_on_node( &urls[0], &format!( - "UPDATE {}.workload_data SET status = 'updated', counter = 1, updated_at = 'phase2' WHERE id = {}", + "UPDATE {}.workload_data SET status = 'updated', counter = 1, updated_at = \ + 'phase2' WHERE id = {}", namespace, i ), ) @@ -320,7 +322,8 @@ fn cluster_test_final_mixed_workload_consistency() { execute_on_node( &urls[0], &format!( - "INSERT INTO {}.workload_data (id, status, counter, updated_at) VALUES ({}, 'new', 0, 'phase5')", + "INSERT INTO {}.workload_data (id, status, counter, updated_at) VALUES ({}, \ + 'new', 0, 'phase5')", namespace, i ), ) diff --git a/cli/tests/cluster/cluster_test_flush.rs b/cli/tests/cluster/cluster_test_flush.rs index 88f657eeb..79853b73f 100644 --- a/cli/tests/cluster/cluster_test_flush.rs +++ b/cli/tests/cluster/cluster_test_flush.rs @@ -5,10 +5,10 @@ //! - Data is readable after flush on all nodes //! - Flush metadata (manifest) is consistent across nodes -use crate::cluster_common::*; -use crate::common::*; use std::time::Duration; +use crate::{cluster_common::*, common::*}; + /// Test: Flush table in cluster and verify data on all nodes /// /// 1. Create a namespace and shared table on node 0 (leader) @@ -118,7 +118,8 @@ fn cluster_test_flush_data_consistency() { // Step 7: Verify manifest exists (check system.schemas for latest version) println!(" 8. Verifying flush metadata..."); let metadata_sql = format!( - "SELECT table_name, schema_version FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT table_name, schema_version FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}'", namespace, table_name ); diff --git a/cli/tests/cluster/cluster_test_leader_jobs.rs b/cli/tests/cluster/cluster_test_leader_jobs.rs index cef668810..010bb6c75 100644 --- a/cli/tests/cluster/cluster_test_leader_jobs.rs +++ b/cli/tests/cluster/cluster_test_leader_jobs.rs @@ -8,10 +8,12 @@ //! - Job status is replicated across all nodes //! - System.jobs table shows node_id of the executor -use crate::cluster_common::*; -use crate::common::*; -use std::thread; -use std::time::{Duration, Instant}; +use std::{ + thread, + time::{Duration, Instant}, +}; + +use crate::{cluster_common::*, common::*}; /// Test: Only leader executes flush jobs /// @@ -60,23 +62,86 @@ fn cluster_test_leader_only_flush_jobs() { // Small delay for replication thread::sleep(Duration::from_millis(50)); - // Step 3: Trigger flush from a follower node (should still execute on leader) + // Step 3: Trigger flush from a follower node. Use execute_on_node_response to capture + // the response body which contains the job ID in its success message, e.g.: + // "Storage flush started for table '...'. Job ID: FL-XXXXX" + // This avoids querying system.jobs by latest created_at (which could race with background + // scheduler jobs from other concurrent tests). let follower_url = urls.iter().find(|u| *u != &leader_url).expect("Need at least 2 nodes"); let flush_sql = format!("STORAGE FLUSH TABLE {}.{}", namespace, table_name); println!(" → Triggering FLUSH from follower: {}", follower_url); - let result = execute_on_node(follower_url, &flush_sql); - // Flush should succeed regardless of which node receives the command - if result.is_err() { - println!(" ⚠ Flush failed (may be expected if follower rejects): {:?}", result); + let flush_response = execute_on_node_response(follower_url, &flush_sql); + + // Extract job ID from the response message ("... Job ID: FL-XXXXX") + let job_id_opt: Option = flush_response + .as_ref() + .ok() + .and_then(|resp| resp.results.first()) + .and_then(|r| r.message.as_deref()) + .and_then(|msg| { + msg.find("Job ID: ").map(|pos| msg[pos + "Job ID: ".len()..].trim().to_string()) + }) + .filter(|id| !id.is_empty()); + + if flush_response.is_err() { + println!(" ⚠ Flush failed (may be expected if follower rejects): {:?}", flush_response); } else { println!(" ✓ Flush command accepted"); } - // Step 4: Wait for job to complete - let job_id = wait_for_latest_job_id_by_type(&leader_url, "Flush", Duration::from_secs(3)) - .expect("Failed to find flush job id"); + // Step 4: Resolve the specific job ID for this table. + // Prefer the ID extracted from the flush response; fall back to a table-scoped DB query. + // Both approaches avoid the global "latest flush job" anti-pattern that can race with + // concurrent tests' flush jobs. + let job_id: String = if let Some(id) = job_id_opt { + println!(" → Job ID from response: {}", id); + id + } else { + // Fallback: search system.jobs filtered by this table's parameters + let table_filter = format!("{}.{}", namespace, table_name); + let sql = format!( + "SELECT job_id FROM system.jobs WHERE job_type = 'flush' AND parameters LIKE \ + '%{}%' ORDER BY created_at DESC LIMIT 1", + table_filter + ); + let deadline = Instant::now() + Duration::from_secs(15); + let mut found: Option = None; + while Instant::now() < deadline { + if let Ok(resp) = execute_on_node_response(&leader_url, &sql) { + if let Some(result) = resp.results.first() { + if let Some(rows) = &result.rows { + if let Some(row) = rows.first() { + if let Some(value) = row.first() { + let extracted = extract_typed_value(value); + if let Some(id) = extracted.as_str() { + found = Some(id.to_string()); + break; + } + } + } + } + } + } + thread::sleep(Duration::from_millis(100)); + } + match found { + Some(id) => { + println!(" → Job ID from DB query: {}", id); + id + }, + None => { + // If flush was skipped (no pending writes), the test is effectively a no-op — + // no job was created. This is valid. Skip the status check. + println!(" ⚠ No flush job found for table (possibly skipped — no pending writes)"); + let drop_sql = format!("DROP TABLE IF EXISTS {}.{}", namespace, table_name); + let _ = execute_on_node(&leader_url, &drop_sql); + return; + }, + } + }; + assert!( wait_for_job_status(&leader_url, &job_id, "Completed", Duration::from_secs(30)), "Flush job did not complete in time" @@ -170,7 +235,8 @@ fn cluster_test_jobs_table_consistency() { } // Query recent jobs and verify node_id field is populated - let recent_jobs_sql = "SELECT job_id, job_type, node_id, status FROM system.jobs ORDER BY created_at DESC LIMIT 3"; + let recent_jobs_sql = "SELECT job_id, job_type, node_id, status FROM system.jobs ORDER BY \ + created_at DESC LIMIT 3"; println!("\n Recent jobs (from leader):"); let leader_url = find_leader_url(&urls); diff --git a/cli/tests/cluster/cluster_test_multi_node_smoke.rs b/cli/tests/cluster/cluster_test_multi_node_smoke.rs index f15afef52..08e918282 100644 --- a/cli/tests/cluster/cluster_test_multi_node_smoke.rs +++ b/cli/tests/cluster/cluster_test_multi_node_smoke.rs @@ -4,10 +4,10 @@ //! when executed against ANY node in the cluster (not just the leader). //! This ensures the cluster behaves identically from any entry point. -use crate::cluster_common::*; -use crate::common::*; use serde_json::Value; +use crate::{cluster_common::*, common::*}; + /// Test: Basic CRUD operations work from any node #[test] fn cluster_test_smoke_crud_any_node() { @@ -166,7 +166,8 @@ fn cluster_test_smoke_table_types_any_node() { execute_on_node( &urls[0], &format!( - "CREATE STREAM TABLE {}.stream_tbl (id BIGINT PRIMARY KEY, data STRING) WITH (TTL_SECONDS = 3600)", + "CREATE STREAM TABLE {}.stream_tbl (id BIGINT PRIMARY KEY, data STRING) WITH \ + (TTL_SECONDS = 3600)", namespace ), ) @@ -190,7 +191,8 @@ fn cluster_test_smoke_table_types_any_node() { let tables = vec!["user_tbl", "shared_tbl", "stream_tbl"]; for table_name in &tables { let query = format!( - "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND table_name = \ + '{}'", namespace, table_name ); @@ -296,7 +298,8 @@ fn cluster_test_smoke_complex_queries_any_node() { execute_on_node( &urls[0], &format!( - "CREATE SHARED TABLE {}.products (id BIGINT PRIMARY KEY, name STRING, price DOUBLE, category STRING)", + "CREATE SHARED TABLE {}.products (id BIGINT PRIMARY KEY, name STRING, price DOUBLE, \ + category STRING)", namespace ), ) @@ -335,9 +338,22 @@ fn cluster_test_smoke_complex_queries_any_node() { // Test complex queries from each node let complex_queries = vec![ - ("Aggregation", format!("SELECT category, count(*) as cnt FROM {}.products GROUP BY category ORDER BY category", namespace)), - ("Filter", format!("SELECT name FROM {}.products WHERE price > 15 ORDER BY price", namespace)), - ("OrderBy", format!("SELECT name, price FROM {}.products ORDER BY price DESC LIMIT 3", namespace)), + ( + "Aggregation", + format!( + "SELECT category, count(*) as cnt FROM {}.products GROUP BY category ORDER BY \ + category", + namespace + ), + ), + ( + "Filter", + format!("SELECT name FROM {}.products WHERE price > 15 ORDER BY price", namespace), + ), + ( + "OrderBy", + format!("SELECT name, price FROM {}.products ORDER BY price DESC LIMIT 3", namespace), + ), ("Count", format!("SELECT count(*) as total FROM {}.products", namespace)), ]; diff --git a/cli/tests/cluster/cluster_test_node_rejoin.rs b/cli/tests/cluster/cluster_test_node_rejoin.rs index 0b823ae66..dececb08e 100644 --- a/cli/tests/cluster/cluster_test_node_rejoin.rs +++ b/cli/tests/cluster/cluster_test_node_rejoin.rs @@ -10,10 +10,9 @@ //! be applied before the corresponding CREATE TABLE if the data group catches up //! faster than the metadata group. This is being tracked for improvement. -use crate::cluster_common::*; -use crate::common::*; -use std::process::Command; -use std::time::Duration; +use std::{process::Command, time::Duration}; + +use crate::{cluster_common::*, common::*}; /// Check if Docker is available and cluster is running in Docker mode fn is_docker_cluster() -> bool { @@ -170,8 +169,13 @@ fn cluster_test_node_rejoin_system_metadata() { // Insert some data execute_on_node( leader_url, - &format!("INSERT INTO {}.metadata_test (id, data) VALUES (1, 'test1'), (2, 'test2'), (3, 'test3')", namespace), - ).expect("Failed to insert data"); + &format!( + "INSERT INTO {}.metadata_test (id, data) VALUES (1, 'test1'), (2, 'test2'), (3, \ + 'test3')", + namespace + ), + ) + .expect("Failed to insert data"); println!(" ✓ Inserted 3 rows into metadata_test"); // Wait for replication to node2 (verify cluster is working) @@ -298,8 +302,13 @@ fn cluster_test_node_rejoin_dml_operations() { // Insert initial data execute_on_node( leader_url, - &format!("INSERT INTO {}.dml_test (id, name, counter) VALUES (1, 'Alice', 100), (2, 'Bob', 200), (3, 'Charlie', 300)", namespace), - ).expect("Failed to insert initial data"); + &format!( + "INSERT INTO {}.dml_test (id, name, counter) VALUES (1, 'Alice', 100), (2, 'Bob', \ + 200), (3, 'Charlie', 300)", + namespace + ), + ) + .expect("Failed to insert initial data"); // Wait for initial sync std::thread::sleep(Duration::from_secs(2)); diff --git a/cli/tests/cluster/cluster_test_replication.rs b/cli/tests/cluster/cluster_test_replication.rs index 13c901a25..ebc4fa94d 100644 --- a/cli/tests/cluster/cluster_test_replication.rs +++ b/cli/tests/cluster/cluster_test_replication.rs @@ -2,11 +2,11 @@ //! //! Tests that verify Raft log replication behavior -use crate::cluster_common::*; -use crate::common::*; +use std::time::{Duration, Instant}; + use serde_json::Value; -use std::time::Duration; -use std::time::Instant; + +use crate::{cluster_common::*, common::*}; fn parse_count(response: &kalam_client::QueryResponse) -> Result { let result = response diff --git a/cli/tests/cluster/cluster_test_snapshot.rs b/cli/tests/cluster/cluster_test_snapshot.rs index 9ad6ceb26..7e62efa4d 100644 --- a/cli/tests/cluster/cluster_test_snapshot.rs +++ b/cli/tests/cluster/cluster_test_snapshot.rs @@ -2,11 +2,12 @@ //! //! Tests Raft snapshot creation and installation in cluster mode. -use crate::cluster_common::*; -use crate::common::*; -use kalam_client::QueryResponse; use std::time::Duration; +use kalam_client::QueryResponse; + +use crate::{cluster_common::*, common::*}; + async fn execute_query_with_retry( client: &kalam_client::KalamLinkClient, sql: &str, @@ -148,16 +149,13 @@ fn test_snapshot_with_high_write_load() { .expect("create namespace"); execute_query_with_retry( &client, - &format!( - "CREATE TABLE {}.{} (id INT, data TEXT, PRIMARY KEY (id))", - namespace, table - ), + &format!("CREATE TABLE {}.{} (id INT, data TEXT, PRIMARY KEY (id))", namespace, table), ) .await .expect("create table"); - + println!("📝 High-load write test (1500 inserts)..."); - + // Insert 1500 rows with larger data for i in 0..1500 { let large_value = format!("data_{}_", i).repeat(10); // ~70 bytes per row @@ -170,28 +168,29 @@ fn test_snapshot_with_high_write_load() { ) .await .expect("insert row"); - + if i % 100 == 0 { println!(" Inserted {} rows", i); } } - + println!("✅ Inserted 1500 rows"); - + // Wait for snapshot tokio::time::sleep(tokio::time::Duration::from_secs(3)).await; - + // Check all nodes for snapshot status let result = execute_query_with_retry( &client, - "SELECT node_id, role, snapshot_index, last_applied_log FROM system.cluster ORDER BY node_id", + "SELECT node_id, role, snapshot_index, last_applied_log FROM system.cluster ORDER BY \ + node_id", ) .await .expect("query cluster"); - + println!("📊 Cluster snapshot status:"); println!("{:?}", result); - + // Verify data integrity after potential snapshot let count_result = execute_query_with_retry( &client, @@ -199,14 +198,13 @@ fn test_snapshot_with_high_write_load() { ) .await .expect("count rows"); - + println!("Count result: {:?}", count_result); println!("✅ Data integrity verified after snapshot"); - + // Cleanup - let _ = - execute_query_with_retry(&client, &format!("DROP NAMESPACE {} CASCADE", namespace)) - .await; + let _ = execute_query_with_retry(&client, &format!("DROP NAMESPACE {} CASCADE", namespace)) + .await; }); } diff --git a/cli/tests/cluster/cluster_test_subscription_nodes.rs b/cli/tests/cluster/cluster_test_subscription_nodes.rs index 134156b87..8fa16b5b0 100644 --- a/cli/tests/cluster/cluster_test_subscription_nodes.rs +++ b/cli/tests/cluster/cluster_test_subscription_nodes.rs @@ -7,13 +7,15 @@ //! 3. Multiple subscriptions across different nodes should all receive the same events //! 4. Initial data should be consistent regardless of which node serves the subscription -use crate::cluster_common::*; -use crate::common::*; +use std::{ + sync::{atomic::Ordering, Arc}, + time::Duration, +}; + use kalam_client::{ChangeEvent, KalamLinkTimeouts, SubscriptionManager}; use serde_json::Value; -use std::sync::atomic::Ordering; -use std::sync::Arc; -use std::time::Duration; + +use crate::{cluster_common::*, common::*}; /// Parse cluster nodes to get leader and follower URLs fn get_leader_and_followers() -> (String, Vec) { @@ -159,7 +161,6 @@ async fn execute_query_with_retry( } /// Test: Subscription on leader receives changes from leader writes -/// #[test] fn cluster_test_subscription_leader_to_leader() { if !require_cluster_running() { @@ -243,7 +244,6 @@ fn cluster_test_subscription_leader_to_leader() { } /// Test: Subscription on follower receives changes from leader writes -/// #[test] fn cluster_test_subscription_follower_to_leader() { if !require_cluster_running() { @@ -335,7 +335,6 @@ fn cluster_test_subscription_follower_to_leader() { } /// Test: Multiple subscriptions across nodes receive identical events -/// #[test] fn cluster_test_subscription_multi_node_identical() { if !require_cluster_running() { @@ -434,7 +433,6 @@ fn cluster_test_subscription_multi_node_identical() { } /// Test: Initial data is identical when subscribing to any node -/// #[test] fn cluster_test_subscription_initial_data_consistency() { if !require_cluster_running() { @@ -495,7 +493,10 @@ fn cluster_test_subscription_initial_data_consistency() { } if !data_replicated { - panic!("Data did not replicate to all nodes within timeout (USER table replication may be slow under load)"); + panic!( + "Data did not replicate to all nodes within timeout (USER table replication may be \ + slow under load)" + ); } println!(" ✓ Data replicated to all nodes"); diff --git a/cli/tests/cluster/cluster_test_system_tables_replication.rs b/cli/tests/cluster/cluster_test_system_tables_replication.rs index e083cb997..dfd05f85a 100644 --- a/cli/tests/cluster/cluster_test_system_tables_replication.rs +++ b/cli/tests/cluster/cluster_test_system_tables_replication.rs @@ -8,10 +8,9 @@ //! 2. CRUD operations on system tables propagate to all nodes //! 3. System table queries return consistent results from any node -use crate::cluster_common::*; -use crate::common::*; -use std::collections::HashSet; -use std::time::Duration; +use std::{collections::HashSet, time::Duration}; + +use crate::{cluster_common::*, common::*}; /// Helper to extract rows from query response as a set of normalized strings fn extract_row_set(base_url: &str, sql: &str) -> Result, String> { @@ -67,8 +66,13 @@ fn cluster_test_system_tables_replication() { execute_on_node( &urls[0], - &format!("CREATE STREAM TABLE {}.events (id BIGINT PRIMARY KEY, payload STRING) WITH (TTL_SECONDS = 3600)", namespace), - ).expect("Failed to create stream table"); + &format!( + "CREATE STREAM TABLE {}.events (id BIGINT PRIMARY KEY, payload STRING) WITH \ + (TTL_SECONDS = 3600)", + namespace + ), + ) + .expect("Failed to create stream table"); // Wait for all tables to replicate let tables_to_check = vec!["users_data", "config_data", "events"]; @@ -81,7 +85,8 @@ fn cluster_test_system_tables_replication() { // Verify system.schemas is identical on all nodes let query = format!( - "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = '{}' ORDER BY table_name", + "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = '{}' ORDER BY \ + table_name", namespace ); @@ -155,7 +160,8 @@ fn cluster_test_system_namespaces_replication() { // Verify all namespaces exist on all nodes let query = format!( - "SELECT namespace_id FROM system.namespaces WHERE namespace_id LIKE '{}%' ORDER BY namespace_id", + "SELECT namespace_id FROM system.namespaces WHERE namespace_id LIKE '{}%' ORDER BY \ + namespace_id", base_ns ); @@ -419,7 +425,8 @@ fn cluster_test_drop_replication() { let mut found = false; for _ in 0..10 { let query = format!( - "SELECT table_name FROM system.schemas WHERE table_name = 'drop_test' AND namespace_id = '{}'", + "SELECT table_name FROM system.schemas WHERE table_name = 'drop_test' AND \ + namespace_id = '{}'", namespace ); match execute_on_node(url, &query) { @@ -446,7 +453,8 @@ fn cluster_test_drop_replication() { let mut gone = false; for _ in 0..10 { let query = format!( - "SELECT table_name FROM system.schemas WHERE table_name = 'drop_test' AND namespace_id = '{}'", + "SELECT table_name FROM system.schemas WHERE table_name = 'drop_test' AND \ + namespace_id = '{}'", namespace ); match execute_on_node(url, &query) { diff --git a/cli/tests/cluster/cluster_test_table_crud_consistency.rs b/cli/tests/cluster/cluster_test_table_crud_consistency.rs index 9206b1e47..5d565ce85 100644 --- a/cli/tests/cluster/cluster_test_table_crud_consistency.rs +++ b/cli/tests/cluster/cluster_test_table_crud_consistency.rs @@ -2,10 +2,10 @@ //! //! Verifies insert/update/delete results are identical across all nodes. -use crate::cluster_common::*; -use crate::common::*; use std::time::Duration; +use crate::{cluster_common::*, common::*}; + fn assert_rows_on_all_nodes(urls: &[String], sql: &str, expected: &[String]) { let mut expected_rows = expected.to_vec(); expected_rows.sort(); @@ -132,7 +132,8 @@ fn cluster_test_table_crud_consistency() { execute_on_node( &urls[0], &format!( - "CREATE STREAM TABLE {}.stream_crud (id BIGINT PRIMARY KEY, value STRING) WITH (TTL_SECONDS = 3600)", + "CREATE STREAM TABLE {}.stream_crud (id BIGINT PRIMARY KEY, value STRING) WITH \ + (TTL_SECONDS = 3600)", namespace ), ) diff --git a/cli/tests/cluster/cluster_test_table_identity.rs b/cli/tests/cluster/cluster_test_table_identity.rs index 24a349570..e693d577d 100644 --- a/cli/tests/cluster/cluster_test_table_identity.rs +++ b/cli/tests/cluster/cluster_test_table_identity.rs @@ -7,10 +7,9 @@ //! 3. Updates and deletes are properly propagated //! 4. Complex data types maintain fidelity across replication -use crate::cluster_common::*; -use crate::common::*; -use std::collections::HashSet; -use std::time::Duration; +use std::{collections::HashSet, time::Duration}; + +use crate::{cluster_common::*, common::*}; fn query_with_verification_limit(sql: &str, expected_rows: usize) -> String { let trimmed = sql.trim().trim_end_matches(';'); @@ -170,7 +169,8 @@ fn cluster_test_table_identity_updates() { execute_on_node( &urls[0], &format!( - "CREATE SHARED TABLE {}.update_test (id BIGINT PRIMARY KEY, value STRING, counter BIGINT)", + "CREATE SHARED TABLE {}.update_test (id BIGINT PRIMARY KEY, value STRING, counter \ + BIGINT)", namespace ), ) @@ -193,7 +193,8 @@ fn cluster_test_table_identity_updates() { .expect("Failed to insert"); } - // Update first 5 rows using individual PK-based updates (KalamDB doesn't support predicate-based updates on SHARED tables) + // Update first 5 rows using individual PK-based updates (KalamDB doesn't support + // predicate-based updates on SHARED tables) for i in 0..5 { execute_on_node( &urls[0], @@ -336,7 +337,8 @@ fn cluster_test_table_identity_mixed_operations() { execute_on_node( &urls[0], &format!( - "CREATE SHARED TABLE {}.mixed_test (id BIGINT PRIMARY KEY, status STRING, version BIGINT)", + "CREATE SHARED TABLE {}.mixed_test (id BIGINT PRIMARY KEY, status STRING, version \ + BIGINT)", namespace ), ) @@ -360,7 +362,8 @@ fn cluster_test_table_identity_mixed_operations() { .expect("Failed to insert"); } - // Phase 2: Update first 25 rows using individual PK-based updates (KalamDB doesn't support predicate-based updates on SHARED tables) + // Phase 2: Update first 25 rows using individual PK-based updates (KalamDB doesn't support + // predicate-based updates on SHARED tables) println!(" Phase 2: Updating rows with id 0-24..."); for i in 0..25 { execute_on_node( diff --git a/cli/tests/cluster/cluster_test_ws_follower.rs b/cli/tests/cluster/cluster_test_ws_follower.rs index bb8116bbf..010e6d9ec 100644 --- a/cli/tests/cluster/cluster_test_ws_follower.rs +++ b/cli/tests/cluster/cluster_test_ws_follower.rs @@ -2,11 +2,12 @@ //! //! Verifies followers can deliver live query updates from leader writes. -use crate::cluster_common::*; -use crate::common::*; +use std::time::Duration; + use kalam_client::{ChangeEvent, KalamLinkTimeouts, SubscriptionManager}; use serde_json::Value; -use std::time::Duration; + +use crate::{cluster_common::*, common::*}; fn parse_cluster_nodes() -> (String, String) { let urls = cluster_urls(); diff --git a/cli/tests/common/mod.rs b/cli/tests/common/mod.rs index 94eb2be2e..765a67e60 100644 --- a/cli/tests/common/mod.rs +++ b/cli/tests/common/mod.rs @@ -1,29 +1,30 @@ #![allow(dead_code, unused_imports)] extern crate kalam_cli; +#[cfg(unix)] +use std::os::unix::io::AsRawFd; +use std::{ + collections::{HashMap, HashSet, VecDeque}, + fs::OpenOptions, + io::{BufRead, BufReader}, + net::TcpListener, + path::{Path, PathBuf}, + process::{Child, Command, Stdio}, + sync::{mpsc as std_mpsc, Mutex, OnceLock}, + thread, + time::{Duration, Instant}, +}; + #[cfg(unix)] use libc::{flock, LOCK_EX, LOCK_UN}; use rand::{distr::Alphanumeric, RngExt}; use reqwest::Client; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use std::collections::{HashMap, HashSet, VecDeque}; -use std::fs::OpenOptions; -use std::io::{BufRead, BufReader}; -use std::net::TcpListener; -#[cfg(unix)] -use std::os::unix::io::AsRawFd; -use std::path::Path; -use std::path::PathBuf; -use std::process::Command; -use std::process::{Child, Stdio}; -use std::sync::mpsc as std_mpsc; -use std::sync::{Mutex, OnceLock}; -use std::thread; -use std::time::Duration; -use std::time::Instant; use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System}; -use tokio::runtime::{Handle, Runtime}; -use tokio::sync::Mutex as TokioMutex; +use tokio::{ + runtime::{Handle, Runtime}, + sync::Mutex as TokioMutex, +}; // Load environment variables from .env file at test startup fn load_env_file() { @@ -61,20 +62,22 @@ fn shared_token_cache_lock_path() -> PathBuf { } // Re-export commonly used types for credential tests -pub use kalam_cli::FileCredentialStore; -pub use kalam_client::client::KalamLinkClientBuilder; -pub use kalam_client::credentials::{CredentialStore, Credentials}; -pub use kalam_client::{AuthProvider, KalamLinkClient, KalamLinkTimeouts}; -pub use tempfile::TempDir; - #[cfg(unix)] pub use std::os::unix::fs::PermissionsExt; +pub use kalam_cli::FileCredentialStore; +pub use kalam_client::{ + client::KalamLinkClientBuilder, + credentials::{CredentialStore, Credentials}, + AuthProvider, KalamLinkClient, KalamLinkTimeouts, +}; +pub use tempfile::TempDir; + static SERVER_URL: OnceLock = OnceLock::new(); static ROOT_PASSWORD: OnceLock = OnceLock::new(); static ADMIN_PASSWORD: OnceLock = OnceLock::new(); static TEST_CONTEXT: OnceLock = OnceLock::new(); -static LAST_LEADER_URL: OnceLock>> = OnceLock::new(); +static LAST_LEADER_URL: OnceLock>> = OnceLock::new(); static AUTO_TEST_SERVER: OnceLock>> = OnceLock::new(); static AUTO_TEST_SERVER_STATE_MUTEX: OnceLock> = OnceLock::new(); static AUTO_TEST_RUNTIME: OnceLock<&'static Runtime> = OnceLock::new(); @@ -85,6 +88,28 @@ static LOGIN_MUTEX: OnceLock> = OnceLock::new(); static TOKEN_FILE_MUTEX: OnceLock> = OnceLock::new(); static TEST_CLI_HOME_DIR: OnceLock = OnceLock::new(); static TEST_CLI_CREDENTIALS_PATH: OnceLock = OnceLock::new(); +static SHARED_HTTP_CLIENT: OnceLock = OnceLock::new(); + +const LEADER_CACHE_TTL: Duration = Duration::from_secs(5); + +pub fn shared_http_client() -> Client { + SHARED_HTTP_CLIENT + .get_or_init(|| { + Client::builder() + .pool_max_idle_per_host(512) + .pool_idle_timeout(Duration::from_secs(90)) + .tcp_nodelay(true) + .build() + .expect("failed to build shared test HTTP client") + }) + .clone() +} + +#[derive(Clone, Debug)] +struct CachedLeaderUrl { + url: String, + confirmed_at: Instant, +} static AUTO_TEST_SERVER_EXIT_CLEANUP_REGISTERED: OnceLock<()> = OnceLock::new(); struct TestAuthManager { @@ -179,7 +204,7 @@ impl TestAuthManager { &self, base_url: &str, ) -> Result<(), Box> { - let client = Client::new(); + let client = shared_http_client(); let status_response = client.get(format!("{}/v1/api/auth/status", base_url)).send().await; let Ok(status_response) = status_response else { @@ -223,7 +248,7 @@ impl TestAuthManager { username: &str, password: &str, ) -> Result> { - let client = Client::new(); + let client = shared_http_client(); let mut attempt: u32 = 0; let max_attempts: u32 = 40; let retry_delay = Duration::from_millis(250); @@ -317,7 +342,7 @@ impl TestAuthManager { invalidate_cached_token_for_credentials(base_url, admin_username(), admin_password()); let root_token = self.token_for_url_cached(base_url, "root", root_password).await?; - let client = Client::new(); + let client = shared_http_client(); let exists_response = client .post(format!("{}/v1/api/sql", base_url)) .bearer_auth(&root_token) @@ -553,6 +578,7 @@ impl TestAuthManager { return KalamLinkClient::builder() .base_url(base_url) .auth(AuthProvider::none()) + .http_pool_max_idle_per_host(256) .timeouts( KalamLinkTimeouts::builder() .connection_timeout_secs(5) @@ -571,6 +597,7 @@ impl TestAuthManager { KalamLinkClient::builder() .base_url(base_url) .auth(auth) + .http_pool_max_idle_per_host(256) .timeouts( KalamLinkTimeouts::builder() .connection_timeout_secs(5) @@ -725,8 +752,7 @@ fn with_auto_test_server_state_lock( #[cfg(unix)] { let lock_path = auto_test_server_state_lock_path(); - let lock_file = - OpenOptions::new().create(true).read(true).write(true).open(&lock_path)?; + let lock_file = OpenOptions::new().create(true).read(true).write(true).open(&lock_path)?; unsafe { if flock(lock_file.as_raw_fd(), LOCK_EX) != 0 { @@ -756,9 +782,8 @@ fn read_auto_test_server_state_locked( return Ok(None); } - let state = serde_json::from_str::(&std::fs::read_to_string( - &state_path, - )?)?; + let state = + serde_json::from_str::(&std::fs::read_to_string(&state_path)?)?; Ok(Some(state)) } @@ -766,10 +791,7 @@ fn write_auto_test_server_state_locked( state: &SharedAutoTestServerState, ) -> Result<(), Box> { std::fs::create_dir_all(auto_test_server_state_root())?; - std::fs::write( - auto_test_server_state_file_path(), - serde_json::to_vec(state)?, - )?; + std::fs::write(auto_test_server_state_file_path(), serde_json::to_vec(state)?)?; Ok(()) } @@ -791,8 +813,7 @@ fn pid_is_alive(pid: u32) -> bool { guard.process(pid).is_some() } -fn remove_stale_auto_test_server_leases_locked( -) -> Result, Box> { +fn remove_stale_auto_test_server_leases_locked() -> Result, Box> { let leases_dir = auto_test_server_leases_dir(); std::fs::create_dir_all(&leases_dir)?; @@ -830,10 +851,7 @@ fn register_auto_test_server_lease_locked(pid: u32) -> Result<(), Box Option { let server_mutex = AUTO_TEST_SERVER.get()?; let mut guard = server_mutex.lock().ok()?; - if guard - .as_ref() - .is_some_and(|server| server.pid == pid && server.child.is_some()) - { + if guard.as_ref().is_some_and(|server| server.pid == pid && server.child.is_some()) { guard.take() } else { None @@ -868,9 +886,7 @@ fn terminate_auto_test_server_process(pid: u32) { #[cfg(windows)] { - let _ = Command::new("taskkill") - .args(["/PID", &pid.to_string(), "/T", "/F"]) - .status(); + let _ = Command::new("taskkill").args(["/PID", &pid.to_string(), "/T", "/F"]).status(); } } @@ -1014,6 +1030,49 @@ fn wait_for_url_reachable(url: &str, timeout: Duration) -> bool { url_reachable(url) } +fn reachable_cluster_urls(urls: &[String]) -> Vec { + urls.iter().filter(|url| url_reachable(url)).cloned().collect() +} + +fn wait_for_reachable_cluster_urls(urls: &[String], timeout: Duration) -> Vec { + let start = Instant::now(); + loop { + let reachable = reachable_cluster_urls(urls); + if !reachable.is_empty() || start.elapsed() >= timeout { + return reachable; + } + std::thread::sleep(Duration::from_millis(100)); + } +} + +fn cluster_node_ready_for_sql(url: &str) -> bool { + if !url_reachable(url) { + return false; + } + + let auth_ready = server_requires_auth_for_url(url).is_some(); + if !auth_ready { + return false; + } + + get_access_token_for_url_sync(url, default_username(), default_password()).is_some() +} + +fn wait_for_sql_ready_cluster_urls(urls: &[String], timeout: Duration) -> Vec { + let start = Instant::now(); + + loop { + let ready: Vec = + urls.iter().filter(|url| cluster_node_ready_for_sql(url)).cloned().collect(); + + if !ready.is_empty() || start.elapsed() >= timeout { + return ready; + } + + std::thread::sleep(Duration::from_millis(150)); + } +} + fn ensure_auto_test_server() -> Option<(String, PathBuf)> { let server_mutex = AUTO_TEST_SERVER.get_or_init(|| Mutex::new(None)); { @@ -1056,9 +1115,10 @@ fn ensure_auto_test_server() -> Option<(String, PathBuf)> { let (tx, rx) = std::sync::mpsc::channel(); std::thread::spawn(move || { let runtime = AUTO_TEST_RUNTIME.get_or_init(|| { - Box::leak(Box::new(Runtime::new().expect( - "Failed to create auto test server runtime", - ))) + Box::leak(Box::new( + Runtime::new() + .expect("Failed to create auto test server runtime"), + )) }); let result = (*runtime) .block_on(start_local_test_server()) @@ -1148,7 +1208,8 @@ fn kalamdb_server_bin() -> Result> { Ok(path) } else { Err(format!( - "kalamdb-server binary not found at {}. Build it with `cargo build -p kalamdb-server --bin kalamdb-server`.", + "kalamdb-server binary not found at {}. Build it with `cargo build -p kalamdb-server \ + --bin kalamdb-server`.", path.display() ) .into()) @@ -1259,32 +1320,79 @@ fn parse_test_urls() -> Option> { } } +fn configured_cluster_urls_from_env() -> Vec { + load_env_file(); + + parse_test_urls() + .or_else(|| { + std::env::var("KALAMDB_CLUSTER_URLS").ok().map(|raw| { + raw.split(',') + .map(|url| url.trim().to_string()) + .filter(|url| !url.is_empty()) + .collect() + }) + }) + .filter(|urls: &Vec| !urls.is_empty()) + .unwrap_or_else(|| { + vec![ + "http://127.0.0.1:8081".to_string(), + "http://127.0.0.1:8082".to_string(), + "http://127.0.0.1:8083".to_string(), + ] + }) +} + fn ensure_server_ready_sync(base_url: &str) { let server_type = server_type_from_env(); - - if !url_reachable(base_url) { - // If explicitly configured to use a running/cluster server, fail loudly - if matches!(server_type, Some(ServerType::Running) | Some(ServerType::Cluster)) { + let ready_url = if matches!(server_type, Some(ServerType::Cluster)) { + let cluster_urls = configured_cluster_urls_from_env(); + let reachable = wait_for_reachable_cluster_urls(&cluster_urls, Duration::from_secs(5)); + if reachable.is_empty() { panic!( "\n\n\ ╔══════════════════════════════════════════════════════════════════╗\n\ - ║ SERVER NOT REACHABLE ║\n\ + ║ CLUSTER NODES NOT REACHABLE ║\n\ ╠══════════════════════════════════════════════════════════════════╣\n\ - ║ KALAMDB_SERVER_TYPE={:?} but server is NOT reachable ║\n\ - ║ at: {}\n\ + ║ KALAMDB_SERVER_TYPE=Cluster but no configured node is ║\n\ + ║ reachable within 5s. ║\n\ + ║ ║\n\ + ║ Configured cluster URLs: ║\n\ + ║ {:?} ║\n\ ║ ║\n\ - ║ Start the server: cd backend && cargo run --release ║\n\ - ║ Or set KALAMDB_SERVER_TYPE=fresh to auto-start a test server ║\n\ + ║ Start/check the cluster: ./scripts/cluster.sh status ║\n\ ╚══════════════════════════════════════════════════════════════════╝\n", - server_type.unwrap(), - base_url + cluster_urls ); } - return; - } + + reachable[0].clone() + } else { + if !wait_for_url_reachable(base_url, Duration::from_secs(2)) { + // If explicitly configured to use a running server, fail loudly. + if matches!(server_type, Some(ServerType::Running)) { + panic!( + "\n\n\ + ╔══════════════════════════════════════════════════════════════════╗\n\ + ║ SERVER NOT REACHABLE ║\n\ + ╠══════════════════════════════════════════════════════════════════╣\n\ + ║ KALAMDB_SERVER_TYPE={:?} but server is NOT reachable ║\n\ + ║ at: {}\n\ + ║ ║\n\ + ║ Start the server: cd backend && cargo run --release ║\n\ + ║ Or set KALAMDB_SERVER_TYPE=fresh to auto-start a test server ║\n\ + ╚══════════════════════════════════════════════════════════════════╝\n", + server_type.unwrap(), + base_url + ); + } + return; + } + + base_url.to_string() + }; if tokio::runtime::Handle::try_current().is_ok() { - let base_url_owned = base_url.to_string(); + let base_url_owned = ready_url.clone(); let (tx, rx) = std::sync::mpsc::channel(); std::thread::spawn(move || { let runtime = match Runtime::new() { @@ -1294,9 +1402,12 @@ fn ensure_server_ready_sync(base_url: &str) { return; }, }; - let result = runtime - .block_on(test_auth_manager().ensure_ready(&base_url_owned)) - .map_err(|err| err.to_string()); + let result = runtime.block_on(async { + test_auth_manager() + .ensure_ready(&base_url_owned) + .await + .map_err(|err| err.to_string()) + }); let _ = tx.send(result); }); match rx.recv_timeout(Duration::from_secs(30)) { @@ -1343,20 +1454,35 @@ fn sql_body_is_self_leader(body: &serde_json::Value) -> Option { Some(json_value_is_true(&extracted)) } -fn leader_cache() -> &'static Mutex> { +fn leader_cache() -> &'static Mutex> { LAST_LEADER_URL.get_or_init(|| Mutex::new(None)) } fn cache_leader_url(url: &str) { if let Ok(mut guard) = leader_cache().lock() { - *guard = Some(url.to_string()); + *guard = Some(CachedLeaderUrl { + url: url.to_string(), + confirmed_at: Instant::now(), + }); + } +} + +fn clear_cached_leader_url() { + if let Ok(mut guard) = leader_cache().lock() { + *guard = None; } } -fn cached_leader_url() -> Option { +fn cached_leader_url() -> Option { leader_cache().lock().ok().and_then(|guard| guard.clone()) } +fn trace_leader_cache(message: impl AsRef) { + if std::env::var_os("KALAMDB_TEST_CLIENT_TRACE").is_some() { + eprintln!("[TEST_LEADER] {}", message.as_ref()); + } +} + fn extract_leader_url(message: &str) -> Option { let start = message.find("http://").or_else(|| message.find("https://"))?; let rest = &message[start..]; @@ -1391,7 +1517,7 @@ fn detect_leader_url(urls: &[String], username: &str, password: &str) -> Option< }; let leader = runtime.block_on(async move { - let client = Client::new(); + let client = shared_http_client(); let deadline = tokio::time::Instant::now() + Duration::from_secs(10); for _ in 0..15 { @@ -1535,13 +1661,13 @@ pub fn test_context() -> &'static TestContext { eprintln!("✅ [TEST] Auto-started fresh server at {}", server_url); } else { panic!( - "\n\n\ - ╔══════════════════════════════════════════════════════════════════╗\n\ - ║ FAILED TO START FRESH TEST SERVER ║\n\ - ╠══════════════════════════════════════════════════════════════════╣\n\ - ║ KALAMDB_SERVER_TYPE=fresh but could not auto-start server. ║\n\ - ║ Build the server: cd backend && cargo build ║\n\ - ╚══════════════════════════════════════════════════════════════════╝\n" + "\n\n╔══════════════════════════════════════════════════════════════════╗\\ + \ + n║ FAILED TO START FRESH TEST SERVER \ + ║\n╠══════════════════════════════════════════════════════════════════╣\\ + n║ KALAMDB_SERVER_TYPE=fresh but could not auto-start server. ║\n║ \ + Build the server: cd backend && cargo build \ + ║\n╚══════════════════════════════════════════════════════════════════╝\n" ); } }, @@ -1630,12 +1756,12 @@ pub fn test_context() -> &'static TestContext { }) }); - let (is_cluster, mut cluster_urls) = match server_type { + let (is_cluster, mut cluster_urls, cluster_urls_raw) = match server_type { Some(ServerType::Cluster) => { // Cluster mode: probe cluster URLs to find healthy nodes let cluster_default = "http://127.0.0.1:8081,http://127.0.0.1:8082,http://127.0.0.1:8083"; - let cluster_urls: Vec = + let configured_cluster_urls: Vec = explicit_cluster_urls.clone().unwrap_or_else(|| { cluster_default .split(',') @@ -1643,66 +1769,70 @@ pub fn test_context() -> &'static TestContext { .filter(|url| !url.is_empty()) .collect() }); - let healthy: Vec = cluster_urls - .iter() - .filter(|url| { - let host_port = url - .trim_start_matches("http://") - .trim_start_matches("https://") - .split('/') - .next() - .unwrap_or("127.0.0.1:8081"); - host_port_reachable(host_port) - }) - .cloned() - .collect(); - if let Some(explicit_urls) = explicit_cluster_urls { - if healthy.is_empty() { - if let Some((auto_url, storage_dir)) = ensure_auto_test_server() { - std::env::set_var("KALAMDB_SERVER_URL", &auto_url); - if std::env::var("KALAMDB_ROOT_PASSWORD").is_err() { - std::env::set_var( - "KALAMDB_ROOT_PASSWORD", - root_password_from_env(), - ); - } - std::env::set_var( - "KALAMDB_STORAGE_DIR", - storage_dir.to_string_lossy().to_string(), - ); - server_url = auto_url.clone(); - eprintln!( - "[TEST] Cluster URLs configured but unreachable; falling back to fresh auto-started server at {}", - server_url - ); - (false, vec![auto_url]) - } else { - eprintln!( - "[TEST] Cluster URLs configured but unreachable; falling back to single-node URL {}", - server_url - ); - (false, vec![server_url.clone()]) - } - } else { - (explicit_urls.len() > 1, explicit_urls) - } - } else if !healthy.is_empty() { - (true, healthy) - } else { - (false, vec![server_url.clone()]) + let reachable_cluster_urls = wait_for_reachable_cluster_urls( + &configured_cluster_urls, + Duration::from_secs(15), + ); + + if reachable_cluster_urls.is_empty() { + panic!( + "\n\n\ + ╔══════════════════════════════════════════════════════════════════╗\n\ + ║ CLUSTER NODES NOT REACHABLE ║\n\ + ╠══════════════════════════════════════════════════════════════════╣\n\ + ║ Cluster mode was requested, but no configured cluster node ║\n\ + ║ became reachable within 15s. ║\n\ + ║ ║\n\ + ║ Configured cluster URLs: ║\n\ + ║ {:?} ║\n\ + ║ ║\n\ + ║ Start the cluster first: ./scripts/cluster.sh start ║\n\ + ╚══════════════════════════════════════════════════════════════════╝\n\n", + configured_cluster_urls + ); + } + + let sql_ready_cluster_urls = wait_for_sql_ready_cluster_urls( + &reachable_cluster_urls, + Duration::from_secs(20), + ); + + if sql_ready_cluster_urls.is_empty() { + panic!( + "\n\n\ + ╔══════════════════════════════════════════════════════════════════╗\n\ + ║ CLUSTER NODES NOT READY FOR SQL/AUTH ║\n\ + ╠══════════════════════════════════════════════════════════════════╣\n\ + ║ Cluster nodes responded on TCP/HTTP, but none completed ║\n\ + ║ SQL/auth readiness within 20s. ║\n\ + ║ ║\n\ + ║ Reachable cluster URLs: ║\n\ + ║ {:?} ║\n\ + ║ ║\n\ + ║ Check node logs: ./scripts/cluster.sh logs 1 ║\n\ + ╚══════════════════════════════════════════════════════════════════╝\n\n", + reachable_cluster_urls + ); } + + (true, sql_ready_cluster_urls, configured_cluster_urls) }, Some(ServerType::Fresh) | Some(ServerType::Running) | None => { // Single-node: no cluster probing, instant startup if let Some(explicit_urls) = explicit_cluster_urls { - (explicit_urls.len() > 1, explicit_urls) + let reachable_urls = reachable_cluster_urls(&explicit_urls); + let effective_urls = if explicit_urls.len() > 1 && !reachable_urls.is_empty() { + reachable_urls + } else { + explicit_urls.clone() + }; + (explicit_urls.len() > 1, effective_urls, explicit_urls) } else { - (false, vec![server_url.clone()]) + (false, vec![server_url.clone()], vec![server_url.clone()]) } }, }; - let cluster_urls_raw = cluster_urls.clone(); if is_cluster { cluster_urls = reorder_cluster_urls_by_leader(cluster_urls, &username, &password); } @@ -1767,20 +1897,54 @@ pub fn leader_url() -> Option { return Some(ctx.server_url.clone()); } - // Check cached leader first + // Check cached leader first. Reachability alone is not enough here: after a + // leader change the old leader is still healthy, but writes and some plans + // must route to the new leader. + // + // Performance: do NOT issue a synchronous TCP probe per SQL call on the + // hot path. The cache TTL already bounds staleness; an unreachable leader + // will surface as a request error and clear the cache via + // `report_leader_error`. Adding a fresh `TcpStream::connect()` here was + // creating a short-lived ephemeral socket per SQL call in cluster mode, + // which dominated runtime in high-concurrency tests and contributed to + // localhost port exhaustion on macOS. if let Some(cached) = cached_leader_url() { - eprintln!("DEBUG leader_url: Returning cached URL: {}", cached); - return Some(cached); + if ctx.cluster_urls.iter().any(|url| url == &cached.url) { + if cached.confirmed_at.elapsed() <= LEADER_CACHE_TTL { + trace_leader_cache(format!("using cached leader {}", cached.url)); + return Some(cached.url); + } + + if detect_leader_url(std::slice::from_ref(&cached.url), &ctx.username, &ctx.password) + .as_deref() + == Some(cached.url.as_str()) + { + trace_leader_cache(format!("revalidated cached leader {}", cached.url)); + cache_leader_url(&cached.url); + return Some(cached.url); + } + } + clear_cached_leader_url(); } - if let Some(leader) = detect_leader_url(&ctx.cluster_urls_raw, &ctx.username, &ctx.password) { - eprintln!("DEBUG leader_url: Detected leader URL: {}", leader); + let leader_probe_urls = if ctx.cluster_urls.is_empty() { + &ctx.cluster_urls_raw + } else { + &ctx.cluster_urls + }; + + if let Some(leader) = detect_leader_url(leader_probe_urls, &ctx.username, &ctx.password) { + trace_leader_cache(format!("detected leader {}", leader)); cache_leader_url(&leader); return Some(leader); } - let fallback = ctx.cluster_urls.first().cloned(); - eprintln!("DEBUG leader_url: Using fallback URL: {:?}", fallback); + let fallback = ctx + .cluster_urls + .first() + .cloned() + .or_else(|| ctx.cluster_urls_raw.first().cloned()); + trace_leader_cache(format!("using fallback leader {:?}", fallback)); fallback } @@ -1917,7 +2081,7 @@ pub async fn execute_sql_via_http_as( // Get access token first let token = get_access_token(username, password).await?; - let client = Client::new(); + let client = shared_http_client(); let mut last_parsed: Option = None; for attempt in 0..5 { @@ -2045,8 +2209,8 @@ pub fn extract_arrow_value(value: &serde_json::Value) -> Option Option Option>> { - use serde_json::Value; use std::collections::HashMap; + use serde_json::Value; + // Extract schema - array of {name, data_type, index} let schema = result.get("schema")?.as_array()?; @@ -2137,10 +2302,11 @@ pub fn is_leader_error(message: &str) -> bool { if is_leader { if let Some(url) = extract_leader_url(message) { - eprintln!("DEBUG is_leader_error: Extracted and caching leader URL: {}", url); + trace_leader_cache(format!("leader error reported {}", url)); cache_leader_url(&url); } else { - eprintln!("DEBUG is_leader_error: No URL extracted from message"); + trace_leader_cache("leader error without URL; clearing cached leader"); + clear_cached_leader_url(); } } @@ -2165,7 +2331,11 @@ fn is_network_error(message: &str) -> bool { || lower.contains("broken pipe") || lower.contains("connection error") || lower.contains("timed out") - || lower.contains("timeout") + || lower.contains("request timeout") + || lower.contains("connection timeout") + || lower.contains("read timeout") + || lower.contains("write timeout") + || lower.contains("deadline has elapsed") } fn is_transient_missing_relation(sql: &str, message: &str) -> bool { @@ -2187,6 +2357,20 @@ fn is_transient_missing_relation(sql: &str, message: &str) -> bool { || lower_sql.starts_with("alter table") } +fn is_redacted_cluster_sql_error(sql: &str, message: &str) -> bool { + let lower_msg = message.to_ascii_lowercase(); + if !lower_msg.contains("sql_execution_error") || !lower_msg.contains("sql statement failed") { + return false; + } + + let lower_sql = sql.trim_start().to_ascii_lowercase(); + lower_sql.starts_with("select ") + || lower_sql.starts_with("insert ") + || lower_sql.starts_with("update ") + || lower_sql.starts_with("delete ") + || lower_sql.starts_with("execute as user ") +} + fn is_rate_limited_error(message: &str) -> bool { let lower = message.to_ascii_lowercase(); lower.contains("rate_limit_exceeded") @@ -2199,6 +2383,7 @@ pub fn is_retryable_cluster_error_for_sql(sql: &str, message: &str) -> bool { is_leader_error(message) || is_network_error(message) || is_transient_missing_relation(sql, message) + || is_redacted_cluster_sql_error(sql, message) } fn cli_output_error(stdout: &str) -> Option { @@ -2219,7 +2404,36 @@ fn cli_output_error(stdout: &str) -> Option { /// Returns false only when no explicit server type is set and server is down. pub fn is_server_running() -> bool { // test_context() has already validated server reachability based on KALAMDB_SERVER_TYPE - let _ctx = test_context(); + let ctx = test_context(); + + if ctx.is_cluster { + let urls = if ctx.cluster_urls.is_empty() { + ctx.cluster_urls_raw.clone() + } else { + ctx.cluster_urls.clone() + }; + let reachable = wait_for_reachable_cluster_urls(&urls, Duration::from_secs(5)); + if !reachable.is_empty() { + return true; + } + + panic!( + "\n\n\ + ╔══════════════════════════════════════════════════════════════════╗\n\ + ║ CLUSTER NODES NOT REACHABLE ║\n\ + ╠══════════════════════════════════════════════════════════════════╣\n\ + ║ Tests require at least one configured cluster node to be ║\n\ + ║ reachable, but none responded within 5s. ║\n\ + ║ ║\n\ + ║ Configured cluster URLs: ║\n\ + ║ {:?} ║\n\ + ║ ║\n\ + ║ Check the cluster: ./scripts/cluster.sh status ║\n\ + ╚══════════════════════════════════════════════════════════════════╝\n\n", + urls + ); + } + let url = server_url(); if is_server_reachable() { @@ -2327,7 +2541,7 @@ fn server_requires_auth_for_url(url: &str) -> Option { for _attempt in 0..3 { let request_url = url.clone(); let request = async move { - Client::new() + shared_http_client() .post(format!("{}/v1/api/sql", request_url)) .json(&json!({ "sql": "SELECT 1" })) .timeout(Duration::from_millis(2000)) @@ -2392,7 +2606,15 @@ fn server_requires_auth_for_url(url: &str) -> Option { pub fn get_available_server_urls() -> Vec { let ctx = test_context(); if ctx.is_cluster { - let mut urls = ctx.cluster_urls_raw.clone(); + let seed_urls = if ctx.cluster_urls.is_empty() { + ctx.cluster_urls_raw.clone() + } else { + ctx.cluster_urls.clone() + }; + let mut urls = wait_for_sql_ready_cluster_urls(&seed_urls, Duration::from_secs(2)); + if urls.is_empty() { + urls = seed_urls; + } if let Some(leader) = leader_url() { urls.retain(|url| url != &leader); urls.insert(0, leader); @@ -2473,7 +2695,7 @@ fn wait_for_namespace_on_all_nodes(namespace: &str, timeout: Duration) -> bool { if !is_cluster_mode() { return true; } - let urls = test_context().cluster_urls_raw.clone(); + let urls = get_available_server_urls(); let sql = format!( "SELECT namespace_id FROM system.namespaces WHERE namespace_id = '{}'", namespace @@ -2486,6 +2708,7 @@ fn wait_for_namespace_on_all_nodes(namespace: &str, timeout: Duration) -> bool { if all_visible { return true; } + std::thread::sleep(Duration::from_millis(50)); } false } @@ -2494,7 +2717,7 @@ fn wait_for_table_on_all_nodes(namespace: &str, table: &str, timeout: Duration) if !is_cluster_mode() { return true; } - let urls = test_context().cluster_urls_raw.clone(); + let urls = get_available_server_urls(); let sql = format!( "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", namespace, table @@ -2507,6 +2730,7 @@ fn wait_for_table_on_all_nodes(namespace: &str, table: &str, timeout: Duration) if all_visible { return true; } + std::thread::sleep(Duration::from_millis(50)); } false } @@ -2516,6 +2740,17 @@ fn wait_for_cluster_after_sql(sql: &str) { return; } + let strict_visibility = std::env::var("KALAMDB_TEST_STRICT_CLUSTER_VISIBILITY") + .map(|value| { + let value = value.trim(); + value == "1" || value.eq_ignore_ascii_case("true") + }) + .unwrap_or(false); + + if !strict_visibility { + return; + } + if let Some(namespace) = parse_create_namespace(sql) { let _ = wait_for_namespace_on_all_nodes(&namespace, Duration::from_secs(20)); return; @@ -2580,6 +2815,71 @@ pub fn websocket_url() -> String { format!("{}/v1/ws", base) } +fn retryable_embedded_cluster_error( + sql: &str, + response: &kalam_client::QueryResponse, +) -> Option { + for result in &response.results { + if let Some(message) = &result.message { + if is_retryable_cluster_error_for_sql(sql, message) { + return Some(message.clone()); + } + } + + let is_explain = sql.trim_start().to_ascii_uppercase().starts_with("EXPLAIN"); + if let Some(rows) = &result.rows { + for row in rows { + for value in row { + if let Some(message) = retryable_embedded_value(sql, value.inner(), is_explain) + { + return Some(message); + } + } + } + } + + if let Some(named_rows) = &result.named_rows { + for row in named_rows { + for value in row.values() { + if let Some(message) = retryable_embedded_value(sql, value.inner(), is_explain) + { + return Some(message); + } + } + } + } + } + + None +} + +fn retryable_embedded_value( + sql: &str, + value: &serde_json::Value, + is_explain: bool, +) -> Option { + match value { + serde_json::Value::String(message) => { + let lower = message.to_ascii_lowercase(); + let looks_like_plan_error = lower.contains("physical_plan_error"); + if (is_explain || looks_like_plan_error) + && is_retryable_cluster_error_for_sql(sql, message) + { + Some(message.clone()) + } else { + None + } + }, + serde_json::Value::Array(values) => { + values.iter().find_map(|value| retryable_embedded_value(sql, value, is_explain)) + }, + serde_json::Value::Object(values) => values + .values() + .find_map(|value| retryable_embedded_value(sql, value, is_explain)), + _ => None, + } +} + pub fn storage_base_dir() -> std::path::PathBuf { if let Ok(path) = std::env::var("KALAMDB_STORAGE_DIR") { return std::path::PathBuf::from(path); @@ -2611,18 +2911,35 @@ where { let start = Instant::now(); let poll_interval = Duration::from_millis(200); + let mut last_error = None; + let mut last_output = None; loop { - let output = execute(sql)?; - if output.contains(expected) { - return Ok(output); + match execute(sql) { + Ok(output) => { + if output.contains(expected) { + return Ok(output); + } + last_output = Some(output); + }, + Err(err) if is_cluster_mode() => { + last_error = Some(err.to_string()); + }, + Err(err) => return Err(err), } + if start.elapsed() > timeout { - return Err(format!( - "Timeout waiting for query to contain '{}'. Last output: {}", - expected, output - ) - .into()); + let detail = match (last_output, last_error) { + (Some(output), Some(error)) => { + format!("Last output: {}. Last error: {}", output, error) + }, + (Some(output), None) => format!("Last output: {}", output), + (None, Some(error)) => format!("Last error: {}", error), + (None, None) => "No output observed".to_string(), + }; + return Err( + format!("Timeout waiting for query to contain '{}'. {}", expected, detail).into() + ); } thread::sleep(poll_interval); } @@ -2721,35 +3038,19 @@ pub fn execute_sql_via_cli_as_with_timing( use std::time::Instant; let start = Instant::now(); - let output = Command::new(env!("CARGO_BIN_EXE_kalam")) - .arg("-u") - .arg(server_url()) - .arg("--user") - .arg(username) - .arg("--password") - .arg(password) - .arg("--command") - .arg(sql) - .output()?; + let output = execute_sql_via_cli_as(username, password, sql)?; let total_time_ms = start.elapsed().as_millis(); - if output.status.success() { - let output_str = String::from_utf8_lossy(&output.stdout).to_string(); - - // Extract server time from output (looks for "Took: XXX.XXX ms") - let server_time_ms = output_str.lines().find(|l| l.starts_with("Took:")).and_then(|line| { - // Parse "Took: 123.456 ms" - line.split_whitespace().nth(1).and_then(|s| s.parse::().ok()) - }); + let server_time_ms = output + .lines() + .find(|l| l.starts_with("Took:")) + .and_then(|line| line.split_whitespace().nth(1).and_then(|s| s.parse::().ok())); - Ok(CliTiming { - output: output_str, - total_time_ms, - server_time_ms, - }) - } else { - Err(format!("CLI command failed: {}", String::from_utf8_lossy(&output.stderr)).into()) - } + Ok(CliTiming { + output, + total_time_ms, + server_time_ms, + }) } /// Helper to execute SQL via CLI with authentication @@ -2834,6 +3135,7 @@ fn execute_sql_via_cli_as_with_args_and_urls( urls_override: Option>, ) -> Result> { use std::time::Instant; + use wait_timeout::ChildExt; // test_context() already ensures server is started/reachable @@ -3076,6 +3378,137 @@ pub fn execute_sql_as_root_via_cli_json(sql: &str) -> Result Result> { + static SETUP_DONE: std::sync::OnceLock<()> = std::sync::OnceLock::new(); + SETUP_DONE.get_or_init(|| { + ensure_cli_server_setup().expect("Failed to prepare CLI server setup"); + }); + + let file_arg = file_path + .to_str() + .ok_or_else(|| format!("Non-UTF8 SQL file path: {:?}", file_path))? + .to_string(); + + let max_attempts = if is_cluster_mode() { 6 } else { 3 }; + let mut last_err: Option = None; + + for attempt in 0..max_attempts { + let urls = if is_cluster_mode() { + get_available_server_urls() + } else { + vec![server_url().to_string()] + }; + let mut retry_after_attempt = false; + + for (idx, url) in urls.iter().enumerate() { + let creds_dir = TempDir::new().map_err(|err| err.to_string())?; + let creds_path = creds_dir.path().join("credentials.toml"); + let token = get_access_token_for_url_sync(url, admin_username(), admin_password()); + + let mut child = Command::new(env!("CARGO_BIN_EXE_kalam")); + child.arg("-u").arg(url); + + if let Some(token) = token { + child.arg("--token").arg(token); + } else { + child + .arg("--user") + .arg(admin_username()) + .arg("--password") + .arg(admin_password()); + } + + child + .env("KALAMDB_CREDENTIALS_PATH", &creds_path) + .env("NO_PROXY", "127.0.0.1,localhost,::1") + .env("no_proxy", "127.0.0.1,localhost,::1") + .env_remove("HTTP_PROXY") + .env_remove("http_proxy") + .env_remove("HTTPS_PROXY") + .env_remove("https_proxy") + .env_remove("ALL_PROXY") + .env_remove("all_proxy") + .arg("--no-spinner") + .arg("--file") + .arg(&file_arg) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()); + + let mut child = child.spawn()?; + use wait_timeout::ChildExt; + + match child.wait_timeout(Duration::from_secs(60))? { + Some(status) => { + let mut stdout = String::new(); + let mut stderr = String::new(); + if let Some(ref mut out) = child.stdout { + use std::io::Read; + out.read_to_string(&mut stdout)?; + } + if let Some(ref mut err) = child.stderr { + use std::io::Read; + err.read_to_string(&mut stderr)?; + } + + if status.success() { + if let Some(output_err) = cli_output_error(&stdout) { + let err_msg = format!("CLI file output error: {}", output_err); + if is_leader_error(&err_msg) || is_network_error(&err_msg) { + last_err = Some(err_msg); + if idx + 1 < urls.len() { + continue; + } + retry_after_attempt = true; + break; + } + return Err(err_msg.into()); + } + return Ok(stdout); + } + + let err_msg = format!("CLI file command failed: {}", stderr); + if is_refreshable_token_error(&err_msg) { + invalidate_auth_caches_for_credentials( + url, + admin_username(), + admin_password(), + ); + last_err = Some(err_msg); + retry_after_attempt = true; + break; + } + if is_leader_error(&err_msg) || is_network_error(&err_msg) { + last_err = Some(err_msg); + if idx + 1 < urls.len() { + continue; + } + retry_after_attempt = true; + break; + } + return Err(err_msg.into()); + }, + None => { + let _ = child.kill(); + let _ = child.wait(); + last_err = Some("CLI file command timed out after 60s".to_string()); + retry_after_attempt = true; + }, + } + } + + if retry_after_attempt { + let delay_ms = 100 + attempt * 100; + std::thread::sleep(Duration::from_millis(delay_ms as u64)); + } + } + + Err(last_err + .unwrap_or_else(|| "CLI file command failed on all cluster nodes".to_string()) + .into()) +} + /// Wait for a SQL query to return output containing an expected substring. /// /// Useful for eventual consistency scenarios where data may not be immediately visible. @@ -3171,6 +3604,7 @@ fn build_client_for_url_with_timeouts( KalamLinkClient::builder() .base_url(base_url) .auth(auth) + .http_pool_max_idle_per_host(256) .timeouts(timeouts) .build() .map_err(|err| Box::new(err) as Box) @@ -3192,6 +3626,7 @@ pub fn client_for_url_no_auth( KalamLinkClient::builder() .base_url(base_url) .auth(AuthProvider::none()) + .http_pool_max_idle_per_host(256) .timeouts(timeouts) .build() .map_err(|err| Box::new(err) as Box) @@ -3371,6 +3806,16 @@ fn execute_sql_via_client_internal( } return Err(err_msg.into()); } + if let Some(err_msg) = + retryable_embedded_cluster_error(&sql, &response) + { + last_err = Some(err_msg.into()); + if idx + 1 < urls.len() { + continue; + } + retry_after_attempt = true; + break; + } return Ok(response); }, Err(e) => { @@ -3710,8 +4155,10 @@ pub fn extract_typed_value(value: &serde_json::Value) -> serde_json::Value { /// Helper to generate unique namespace name pub fn generate_unique_namespace(base_name: &str) -> String { - use std::sync::atomic::{AtomicU64, Ordering}; - use std::time::{SystemTime, UNIX_EPOCH}; + use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::{SystemTime, UNIX_EPOCH}, + }; static COUNTER: AtomicU64 = AtomicU64::new(0); let count = COUNTER.fetch_add(1, Ordering::SeqCst); @@ -3730,8 +4177,10 @@ pub fn generate_unique_namespace(base_name: &str) -> String { /// Helper to generate unique table name pub fn generate_unique_table(base_name: &str) -> String { - use std::sync::atomic::{AtomicU64, Ordering}; - use std::time::{SystemTime, UNIX_EPOCH}; + use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::{SystemTime, UNIX_EPOCH}, + }; static COUNTER: AtomicU64 = AtomicU64::new(0); let count = COUNTER.fetch_add(1, Ordering::SeqCst); @@ -4003,7 +4452,8 @@ pub fn parse_job_id_from_flush_output(output: &str) -> Result Result> { @@ -4742,11 +5192,9 @@ pub fn assert_flush_storage_files_exist( if result.is_valid() { println!( - "✅ [{}] Verified flush storage: manifest.json ({} bytes), {} parquet file(s) ({} bytes total)", - context, - result.manifest_size, - result.parquet_file_count, - result.parquet_total_size + "✅ [{}] Verified flush storage: manifest.json ({} bytes), {} parquet file(s) ({} \ + bytes total)", + context, result.manifest_size, result.parquet_file_count, result.parquet_total_size ); return; } diff --git a/cli/tests/connection.rs b/cli/tests/connection.rs index 561a04b56..8e16b7d02 100644 --- a/cli/tests/connection.rs +++ b/cli/tests/connection.rs @@ -10,7 +10,8 @@ // cargo test --test connection timeout // cargo test --test connection concurrent_ws // -// Note: subscription_options_tests and live_connection_tests have been moved to the subscription category +// Note: subscription_options_tests and live_connection_tests have been moved to the subscription +// category // // Unit tests (no server required): mod common; diff --git a/cli/tests/connection/concurrent_ws_tests.rs b/cli/tests/connection/concurrent_ws_tests.rs index 7f3eb6d6f..fcee42c8f 100644 --- a/cli/tests/connection/concurrent_ws_tests.rs +++ b/cli/tests/connection/concurrent_ws_tests.rs @@ -8,11 +8,17 @@ //! Run with: //! cargo test --test connection concurrent_ws -- --test-threads=1 -use crate::common::*; +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + use kalam_client::{KalamLinkTimeouts, SubscriptionConfig}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; + +use crate::common::*; /// Test: Open many concurrent WebSocket connections, each subscribing to the same table. /// Verify all receive an initial ack and at least one notification without heartbeat timeout. @@ -182,12 +188,8 @@ fn test_concurrent_websocket_subscriptions() { let total_timeouts = timeouts.load(Ordering::Relaxed); eprintln!( - "\n=== Concurrent WS Test Results ===\n\ - Connections attempted: {}\n\ - Successfully subscribed: {}\n\ - Received notification: {}\n\ - Errors: {}\n\ - Heartbeat timeouts: {}\n", + "\n=== Concurrent WS Test Results ===\nConnections attempted: {}\nSuccessfully \ + subscribed: {}\nReceived notification: {}\nErrors: {}\nHeartbeat timeouts: {}\n", num_connections, total_subscribed, total_notified, total_errors, total_timeouts ); @@ -210,7 +212,7 @@ fn test_concurrent_websocket_subscriptions() { assert_eq!( total_timeouts, 0, "Expected zero heartbeat timeouts, got {} (this indicates the server heartbeat \ - checker is too aggressive or connection handling is too slow)", + checker is too aggressive or connection handling is too slow)", total_timeouts ); }); // rt.block_on @@ -303,11 +305,8 @@ fn test_rapid_connect_disconnect() { let elapsed = start.elapsed(); eprintln!( - "\n=== Rapid Connect/Disconnect Results ===\n\ - Cycles: {}\n\ - Successful: {}\n\ - Elapsed: {:.2}s\n\ - Avg per cycle: {:.1}ms\n", + "\n=== Rapid Connect/Disconnect Results ===\nCycles: {}\nSuccessful: {}\nElapsed: \ + {:.2}s\nAvg per cycle: {:.1}ms\n", cycles, success_count, elapsed.as_secs_f64(), diff --git a/cli/tests/connection/reconnection_tests.rs b/cli/tests/connection/reconnection_tests.rs index 1e1847ba4..e4ad11aee 100644 --- a/cli/tests/connection/reconnection_tests.rs +++ b/cli/tests/connection/reconnection_tests.rs @@ -7,9 +7,10 @@ //! //! NOTE: These are unit tests that don't require a running server. -use kalam_client::{ConnectionOptions, SeqId, SubscriptionOptions}; use std::collections::HashMap; +use kalam_client::{ConnectionOptions, SeqId, SubscriptionOptions}; + /// Simulates the subscription state tracking done by the WASM client #[allow(dead_code)] struct MockSubscriptionState { diff --git a/cli/tests/connection/timeout_tests.rs b/cli/tests/connection/timeout_tests.rs index 4253e98f0..47d5a7ac9 100644 --- a/cli/tests/connection/timeout_tests.rs +++ b/cli/tests/connection/timeout_tests.rs @@ -7,9 +7,10 @@ //! NOTE: These are integration tests that test actual network behavior. //! They do NOT require a running server (they test timeout behavior). -use crate::common::{client_for_url_no_auth, KalamLinkTimeouts}; use std::time::{Duration, Instant}; +use crate::common::{client_for_url_no_auth, KalamLinkTimeouts}; + /// Test that connection timeout is properly enforced when server is unreachable /// /// This test connects to a non-existent server and verifies that: diff --git a/cli/tests/flushing/test_flush.rs b/cli/tests/flushing/test_flush.rs index 30f088423..d7ca54c7f 100644 --- a/cli/tests/flushing/test_flush.rs +++ b/cli/tests/flushing/test_flush.rs @@ -7,9 +7,10 @@ //! - Data persistence after flush operations //! - Flush command error handling -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// T059: Test explicit flush command #[test] fn test_cli_explicit_flush() { diff --git a/cli/tests/performance/test_server_memory_regression.rs b/cli/tests/performance/test_server_memory_regression.rs index 78d43cdf6..1dd944519 100644 --- a/cli/tests/performance/test_server_memory_regression.rs +++ b/cli/tests/performance/test_server_memory_regression.rs @@ -1,7 +1,9 @@ -use crate::common::*; +use std::{ + thread, + time::{Duration, Instant}, +}; -use std::thread; -use std::time::{Duration, Instant}; +use crate::common::*; const DEFAULT_ROWS: usize = 4_000; const DEFAULT_BATCH_SIZE: usize = 200; @@ -102,7 +104,8 @@ fn create_memory_test_table(namespace: &str, table: &str) -> String { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE {}", namespace)) .expect("create namespace should succeed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, tenant TEXT NOT NULL, payload TEXT NOT NULL, seq BIGINT NOT NULL) WITH (TYPE = 'USER', FLUSH_POLICY = 'rows:500')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, tenant TEXT NOT NULL, payload TEXT NOT NULL, seq \ + BIGINT NOT NULL) WITH (TYPE = 'USER', FLUSH_POLICY = 'rows:500')", full_table )) .expect("create table should succeed"); @@ -232,7 +235,9 @@ fn smoke_test_server_memory_regression() { Duration::from_millis(env_u64("KALAMDB_MEM_TEST_SAMPLE_DELAY_MS", DEFAULT_SAMPLE_DELAY_MS)); println!( - "[config] rows={} batch={} payload_bytes={} query_loops={} peak_budget={}MB recovery_budget={}MB settle={}s warmup={} warmup_rows={} warmup_query_loops={} warmup_settle={}s", + "[config] rows={} batch={} payload_bytes={} query_loops={} peak_budget={}MB \ + recovery_budget={}MB settle={}s warmup={} warmup_rows={} warmup_query_loops={} \ + warmup_settle={}s", total_rows, batch_size, payload_bytes, @@ -308,7 +313,8 @@ fn smoke_test_server_memory_regression() { let recovery_delta_mb = settled_mb.saturating_sub(baseline_mb); println!( - "[summary] insert_time={:.2}s read_time={:.2}s baseline={}MB peak={}MB recovered={}MB peak_delta={}MB recovery_delta={}MB", + "[summary] insert_time={:.2}s read_time={:.2}s baseline={}MB peak={}MB recovered={}MB \ + peak_delta={}MB recovery_delta={}MB", insert_elapsed.as_secs_f64(), read_elapsed.as_secs_f64(), baseline_mb, @@ -320,7 +326,8 @@ fn smoke_test_server_memory_regression() { assert!( peak_delta_mb <= peak_budget_mb, - "server memory spiked too far above baseline: baseline={}MB peak={}MB delta={}MB budget={}MB", + "server memory spiked too far above baseline: baseline={}MB peak={}MB delta={}MB \ + budget={}MB", baseline_mb, peak_mb, peak_delta_mb, @@ -328,7 +335,8 @@ fn smoke_test_server_memory_regression() { ); assert!( recovery_delta_mb <= recovery_budget_mb, - "server memory did not recover near baseline after flush + cleanup: baseline={}MB recovered={}MB delta={}MB budget={}MB", + "server memory did not recover near baseline after flush + cleanup: baseline={}MB \ + recovered={}MB delta={}MB budget={}MB", baseline_mb, settled_mb, recovery_delta_mb, diff --git a/cli/tests/repro_issue.rs b/cli/tests/repro_issue.rs index c54d2bfc4..450ae052f 100644 --- a/cli/tests/repro_issue.rs +++ b/cli/tests/repro_issue.rs @@ -1,7 +1,8 @@ mod common; -use common::*; use std::time::Duration; +use common::*; + async fn execute_sql(sql: &str) -> Result { execute_sql_as_root_via_client(sql).map_err(|e| e.to_string()) } diff --git a/cli/tests/smoke.rs b/cli/tests/smoke.rs index 1e7dbdf89..68e3200c2 100644 --- a/cli/tests/smoke.rs +++ b/cli/tests/smoke.rs @@ -79,6 +79,8 @@ mod smoke_test_topic_consumption; mod smoke_test_topic_high_load; #[path = "smoke/topics/smoke_test_topic_throughput.rs"] mod smoke_test_topic_throughput; +#[path = "smoke/topics/topic_test_support.rs"] +mod topic_test_support; // CLI tests #[path = "smoke/cli/smoke_test_cli_commands.rs"] @@ -141,16 +143,16 @@ mod leader_only_reads; // Query tests #[path = "smoke/query/smoke_test_00_parallel_query_burst.rs"] mod smoke_test_00_parallel_query_burst; -#[path = "smoke/query/smoke_test_json_operators.rs"] -mod smoke_test_json_operators; #[path = "smoke/query/smoke_test_filter_pushdown.rs"] mod smoke_test_filter_pushdown; +#[path = "smoke/query/smoke_test_json_operators.rs"] +mod smoke_test_json_operators; #[path = "smoke/query/smoke_test_provider_exec_models.rs"] mod smoke_test_provider_exec_models; -#[path = "smoke/query/smoke_test_stream_explain_planning.rs"] -mod smoke_test_stream_explain_planning; #[path = "smoke/query/smoke_test_queries_benchmark.rs"] mod smoke_test_queries_benchmark; +#[path = "smoke/query/smoke_test_stream_explain_planning.rs"] +mod smoke_test_stream_explain_planning; // System tests #[path = "smoke/system/smoke_test_all_system_tables_schemas.rs"] diff --git a/cli/tests/smoke/cli/smoke_test_cli_commands.rs b/cli/tests/smoke/cli/smoke_test_cli_commands.rs index e589266c8..79ad083d6 100644 --- a/cli/tests/smoke/cli/smoke_test_cli_commands.rs +++ b/cli/tests/smoke/cli/smoke_test_cli_commands.rs @@ -10,9 +10,10 @@ //! - \help / \? - Help text //! - SQL execution -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Smoke Test: \stats command works correctly #[ntest::timeout(60000)] #[test] @@ -65,7 +66,8 @@ fn smoke_cli_list_tables_command() { // Query system.tables with a narrow filter (this is what \dt uses internally). let result = execute_sql_as_root_via_client(&format!( - "SELECT namespace_id AS namespace, table_name, table_type FROM system.tables WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT namespace_id AS namespace, table_name, table_type FROM system.tables WHERE \ + namespace_id = '{}' AND table_name = '{}'", namespace, table )) .expect("Failed to list tables"); @@ -114,9 +116,11 @@ fn smoke_cli_describe_table_command() { // Query information_schema.columns (this is what \describe does) let result = execute_sql_as_root_via_client(&format!( - "SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_schema = '{}' AND table_name = '{}' ORDER BY ordinal_position", + "SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE \ + table_schema = '{}' AND table_name = '{}' ORDER BY ordinal_position", namespace, table - )).expect("Failed to describe table"); + )) + .expect("Failed to describe table"); // Verify columns are shown assert!(result.contains("id"), "Should show id column: {}", result); @@ -397,8 +401,8 @@ fn smoke_cli_user_management() { let result = execute_sql_as_root_via_client(&format!("DROP USER IF EXISTS {}", user_id)); assert!(result.is_ok(), "DROP USER should succeed: {:?}", result); - // Note: Users are soft-deleted (deleted_at timestamp set), so they may still appear in system.users - // Verify user is soft-deleted by checking deleted_at is not null + // Note: Users are soft-deleted (deleted_at timestamp set), so they may still appear in + // system.users Verify user is soft-deleted by checking deleted_at is not null let _result = execute_sql_as_root_via_client(&format!( "SELECT deleted_at FROM system.users WHERE user_id = '{}'", user_id @@ -504,7 +508,8 @@ fn smoke_cli_alter_table() { // Verify column added wait_for_sql_output_contains( &format!( - "SELECT column_name FROM information_schema.columns WHERE table_schema = '{}' AND table_name = '{}'", + "SELECT column_name FROM information_schema.columns WHERE table_schema = '{}' AND \ + table_name = '{}'", namespace, table ), "email", diff --git a/cli/tests/smoke/cli/smoke_test_cluster_operations.rs b/cli/tests/smoke/cli/smoke_test_cluster_operations.rs index 1e2139618..8e2e21dc3 100644 --- a/cli/tests/smoke/cli/smoke_test_cluster_operations.rs +++ b/cli/tests/smoke/cli/smoke_test_cluster_operations.rs @@ -13,10 +13,11 @@ //! Note: Full multi-node cluster tests require multiple server instances. //! These smoke tests focus on single-node behavior that must work in both modes. -use crate::common::*; +use std::{sync::OnceLock, time::Duration}; + use kalam_client::KalamLinkTimeouts; -use std::sync::OnceLock; -use std::time::Duration; + +use crate::common::*; fn cluster_runtime() -> &'static tokio::runtime::Runtime { static RUNTIME: OnceLock = OnceLock::new(); @@ -430,9 +431,11 @@ fn smoke_test_cluster_shared_table_consistency() { // Insert config as root execute_sql_as_root_via_client(&format!( - "INSERT INTO {}.global_config (config_key, config_value, updated_by) VALUES ('app_version', '1.0.0', 'root')", + "INSERT INTO {}.global_config (config_key, config_value, updated_by) VALUES \ + ('app_version', '1.0.0', 'root')", namespace - )).expect("Failed to insert config"); + )) + .expect("Failed to insert config"); println!(" ✓ Config inserted by root"); // Create a test user @@ -670,8 +673,10 @@ fn smoke_test_cluster_job_tracking() { // Check system.jobs for flush jobs let result = execute_sql_as_root_via_client( - "SELECT job_id, job_type, status FROM system.jobs WHERE job_type = 'flush' ORDER BY created_at DESC LIMIT 5" - ).expect("Failed to query system.jobs"); + "SELECT job_id, job_type, status FROM system.jobs WHERE job_type = 'flush' ORDER BY \ + created_at DESC LIMIT 5", + ) + .expect("Failed to query system.jobs"); println!(" Recent flush jobs:\n{}", result); @@ -737,9 +742,11 @@ fn smoke_test_cluster_storage_operations() { // Verify table references the storage let result = execute_sql_as_root_via_client(&format!( - "SELECT options FROM system.schemas WHERE namespace_id = '{}' AND table_name = 'stored_data'", + "SELECT options FROM system.schemas WHERE namespace_id = '{}' AND table_name = \ + 'stored_data'", namespace - )).expect("Failed to query table options"); + )) + .expect("Failed to query table options"); assert!( result.contains("local") || result.contains("STORAGE_ID"), diff --git a/cli/tests/smoke/ddl/smoke_test_alter_with_data.rs b/cli/tests/smoke/ddl/smoke_test_alter_with_data.rs index f5366b275..86fe6ab45 100644 --- a/cli/tests/smoke/ddl/smoke_test_alter_with_data.rs +++ b/cli/tests/smoke/ddl/smoke_test_alter_with_data.rs @@ -9,9 +9,10 @@ //! Note: Default values for new columns are only applied to newly inserted rows, //! not retroactively to existing rows (standard SQL behavior). -use crate::common::*; use std::time::{Duration, Instant}; +use crate::common::*; + /// Test ALTER TABLE ADD/DROP COLUMN with actual data verification /// /// Verifies that schema changes are immediately reflected in query results, diff --git a/cli/tests/smoke/ddl/smoke_test_backup_restore.rs b/cli/tests/smoke/ddl/smoke_test_backup_restore.rs index c615ffe76..a7c12496a 100644 --- a/cli/tests/smoke/ddl/smoke_test_backup_restore.rs +++ b/cli/tests/smoke/ddl/smoke_test_backup_restore.rs @@ -1,22 +1,22 @@ //! Smoke tests for BACKUP DATABASE / RESTORE DATABASE SQL commands. //! //! ## What is tested -//! - `BACKUP DATABASE TO ''` creates a job that completes and writes the expected -//! backup directory layout (`rocksdb/`, `storage/`, `snapshots/`, `streams/`). +//! - `BACKUP DATABASE TO ''` creates a job that completes and writes the expected backup +//! directory layout (`rocksdb/`, `storage/`, `snapshots/`, `streams/`). //! - `RESTORE DATABASE FROM ''` creates a restore job that reaches a terminal state. //! - Non-DBA users receive an authorization error when attempting either command. -//! - `RESTORE DATABASE FROM ''` returns a clear error immediately -//! (no job created). +//! - `RESTORE DATABASE FROM ''` returns a clear error immediately (no job +//! created). //! //! ## Notes -//! - Backup/restore paths must be reachable on the **server's** filesystem. -//! For the auto-started test server this is the local machine's temp directory. -//! - Restore requires a server restart to reload data — these tests verify only -//! that the restore job itself completes, not post-restart data consistency. +//! - Backup/restore paths must be reachable on the **server's** filesystem. For the auto-started +//! test server this is the local machine's temp directory. +//! - Restore requires a server restart to reload data — these tests verify only that the restore +//! job itself completes, not post-restart data consistency. + +use std::{path::PathBuf, time::Duration}; use crate::common::*; -use std::path::PathBuf; -use std::time::Duration; /// Timeout for a backup job (RocksDB BackupEngine + file copies can be slow). const BACKUP_JOB_TIMEOUT: Duration = Duration::from_secs(120); diff --git a/cli/tests/smoke/ddl/smoke_test_datatype_preservation.rs b/cli/tests/smoke/ddl/smoke_test_datatype_preservation.rs index 51f33cc13..82ee70ae1 100644 --- a/cli/tests/smoke/ddl/smoke_test_datatype_preservation.rs +++ b/cli/tests/smoke/ddl/smoke_test_datatype_preservation.rs @@ -9,12 +9,13 @@ //! //! Run with: cargo test --test smoke smoke_test_datatype_preservation +use reqwest::Client; +use serde_json::Value; + use crate::common::{ force_auto_test_server_url_async, generate_unique_namespace, get_access_token_for_url, test_context, }; -use reqwest::Client; -use serde_json::Value; /// Test that all KalamDataTypes are preserved correctly in query results #[tokio::test] @@ -81,7 +82,8 @@ async fn test_all_kalam_datatypes_are_preserved() { let data_type_str = match data_type_val { Value::String(s) => s.clone(), Value::Object(obj) => { - // Handle complex types like {"Embedding": 384} or {"Decimal": {"precision": 10, "scale": 2}} + // Handle complex types like {"Embedding": 384} or {"Decimal": {"precision": 10, + // "scale": 2}} if let Some((key, _)) = obj.iter().next() { key.clone() } else { diff --git a/cli/tests/smoke/ddl/smoke_test_ddl_alter.rs b/cli/tests/smoke/ddl/smoke_test_ddl_alter.rs index d12817b65..6d1daf792 100644 --- a/cli/tests/smoke/ddl/smoke_test_ddl_alter.rs +++ b/cli/tests/smoke/ddl/smoke_test_ddl_alter.rs @@ -2,7 +2,7 @@ //! //! Tests schema evolution and table modification: //! - ALTER TABLE ADD COLUMN -//! - ALTER TABLE DROP COLUMN +//! - ALTER TABLE DROP COLUMN //! - ALTER TABLE MODIFY COLUMN //! - ALTER TABLE SET TBLPROPERTIES (for SHARED tables) //! - Error cases (adding NOT NULL without DEFAULT, modifying system columns) @@ -83,7 +83,10 @@ fn smoke_test_alter_table_add_column() { let output_result = execute_sql_as_root_via_client_json(&select_sql); if output_result.is_err() { - println!("⚠️ Column 'age' not found in schema - ALTER TABLE may have succeeded syntactically but schema wasn't updated"); + println!( + "⚠️ Column 'age' not found in schema - ALTER TABLE may have succeeded syntactically \ + but schema wasn't updated" + ); println!(" This indicates ALTER TABLE ADD COLUMN needs further implementation"); return; } diff --git a/cli/tests/smoke/ddl/smoke_test_export_user_data.rs b/cli/tests/smoke/ddl/smoke_test_export_user_data.rs index 5ffa27c62..74aaaf2a6 100644 --- a/cli/tests/smoke/ddl/smoke_test_export_user_data.rs +++ b/cli/tests/smoke/ddl/smoke_test_export_user_data.rs @@ -2,22 +2,23 @@ //! `GET /v1/exports/{user_id}/{export_id}` download endpoint. //! //! ## What is tested -//! 1. `EXPORT USER DATA` triggers a `UserExport` job that completes, flushes all -//! user tables first, and writes a `.zip` file under the exports directory. +//! 1. `EXPORT USER DATA` triggers a `UserExport` job that completes, flushes all user tables first, +//! and writes a `.zip` file under the exports directory. //! 2. `SHOW EXPORT` returns the job status and a download URL once the job is done. //! 3. The download endpoint serves a valid ZIP file to the owning user. -//! 4. The download endpoint returns 403 Forbidden when a different user tries to -//! download another user's export. +//! 4. The download endpoint returns 403 Forbidden when a different user tries to download another +//! user's export. //! //! ## Design notes -//! - Each test creates its own isolated user so idempotency keys never collide -//! across parallel or repeated test runs. -//! - The export executor flushes **all** user tables before copying Parquet files, -//! so the export job timeout is generous (10 min) to accommodate CI slowness. +//! - Each test creates its own isolated user so idempotency keys never collide across parallel or +//! repeated test runs. +//! - The export executor flushes **all** user tables before copying Parquet files, so the export +//! job timeout is generous (10 min) to accommodate CI slowness. -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Timeout for an export job (flush user's data + copy Parquet + zip). /// With the optimized executor that only flushes tables with user data, /// this should complete in well under 60 seconds. @@ -65,7 +66,7 @@ fn http_get_with_token( let rt = tokio::runtime::Builder::new_current_thread().enable_all().build()?; let result = rt.block_on(async move { - let response = reqwest::Client::new().get(&url).bearer_auth(&token).send().await?; + let response = shared_http_client().get(&url).bearer_auth(&token).send().await?; let status = response.status().as_u16(); let ct = response @@ -313,7 +314,8 @@ fn smoke_export_download_zip_is_valid() { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE {}", namespace)) .expect("CREATE NAMESPACE failed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT AUTO_INCREMENT PRIMARY KEY, data TEXT) WITH (TYPE='USER', FLUSH_POLICY='rows:5')", + "CREATE TABLE {} (id BIGINT AUTO_INCREMENT PRIMARY KEY, data TEXT) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:5')", full_table )) .expect("CREATE TABLE failed"); diff --git a/cli/tests/smoke/dml/smoke_test_dml_extended.rs b/cli/tests/smoke/dml/smoke_test_dml_extended.rs index 1755b9a91..29ae9ee8c 100644 --- a/cli/tests/smoke/dml/smoke_test_dml_extended.rs +++ b/cli/tests/smoke/dml/smoke_test_dml_extended.rs @@ -9,9 +9,10 @@ //! //! Reference: docs/SQL.md DML section lines 471-584 -use crate::common::*; use std::time::{Duration, Instant}; +use crate::common::*; + /// Test multi-row INSERT with batch VALUES /// /// Verifies: @@ -391,7 +392,10 @@ fn smoke_test_hard_delete_stream_table() { // Issue: delete_by_pk_value returns Ok(false) instead of actually deleting rows // See: backend/crates/kalamdb-tables/src/stream_tables/stream_table_provider.rs:291 if all_output.contains("click") { - println!("⚠️ WARNING: STREAM table DELETE not working - rows still present (known backend limitation)"); + println!( + "⚠️ WARNING: STREAM table DELETE not working - rows still present (known backend \ + limitation)" + ); println!("⚠️ TODO: Implement delete_by_pk_value for STREAM tables"); // TODO: Uncomment when backend fix is implemented: // panic!("Expected click events to be physically removed from STREAM table"); diff --git a/cli/tests/smoke/dml/smoke_test_dml_watermark_optimization.rs b/cli/tests/smoke/dml/smoke_test_dml_watermark_optimization.rs index d7ead6c73..be33f956f 100644 --- a/cli/tests/smoke/dml/smoke_test_dml_watermark_optimization.rs +++ b/cli/tests/smoke/dml/smoke_test_dml_watermark_optimization.rs @@ -6,9 +6,10 @@ // The optimization removes unnecessary Meta group synchronization for DML operations // where the table schema has already been validated. -use crate::common::*; use std::time::Instant; +use crate::common::*; + /// Test that INSERT operations complete in acceptable time (no Meta waiting) #[ntest::timeout(120000)] #[test] @@ -110,7 +111,8 @@ fn smoke_test_watermark_dml_update() { .expect("CREATE NAMESPACE should succeed"); let create_table_sql = format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, status TEXT, counter INT) WITH (TYPE = 'USER', STORAGE_ID = 'local')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, status TEXT, counter INT) WITH (TYPE = 'USER', \ + STORAGE_ID = 'local')", full_table_name ); execute_sql_as_root_via_client(&create_table_sql).expect("CREATE TABLE should succeed"); diff --git a/cli/tests/smoke/dml/smoke_test_dml_wide_columns.rs b/cli/tests/smoke/dml/smoke_test_dml_wide_columns.rs index c1753f35d..1bf6922ca 100644 --- a/cli/tests/smoke/dml/smoke_test_dml_wide_columns.rs +++ b/cli/tests/smoke/dml/smoke_test_dml_wide_columns.rs @@ -2,9 +2,10 @@ // Covers: insert -> check -> update -> check -> multi-column update -> check -> delete -> check // Also verifies subscription delivers UPDATE and DELETE events -use crate::common::*; use std::time::Duration; +use crate::common::*; + fn create_namespace(ns: &str) { let _ = execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)); } @@ -22,7 +23,8 @@ fn extract_first_id_from_json(json_output: &str) -> Option { fn run_dml_sequence(full: &str, _is_shared: bool) { // insert row 1 let ins1 = format!( - "INSERT INTO {} (name, age, active, score, balance, note) VALUES ('alpha', 25, true, 12.5, 1000, 'note-a')", + "INSERT INTO {} (name, age, active, score, balance, note) VALUES ('alpha', 25, true, \ + 12.5, 1000, 'note-a')", full ); let out1 = execute_sql_as_root_via_client(&ins1).expect("insert 1 should succeed"); @@ -30,7 +32,8 @@ fn run_dml_sequence(full: &str, _is_shared: bool) { // insert row 2 let ins2 = format!( - "INSERT INTO {} (name, age, active, score, balance, note) VALUES ('beta', 30, false, 99.9, 2000, 'note-b')", + "INSERT INTO {} (name, age, active, score, balance, note) VALUES ('beta', 30, false, \ + 99.9, 2000, 'note-b')", full ); let out2 = execute_sql_as_root_via_client(&ins2).expect("insert 2 should succeed"); @@ -65,7 +68,8 @@ fn run_dml_sequence(full: &str, _is_shared: bool) { // multi-column update on alpha let upd2 = format!( - "UPDATE {} SET name='alpha2', age=42, active=false, score=88.75, balance=4321, note='note-upd' WHERE id = {}", + "UPDATE {} SET name='alpha2', age=42, active=false, score=88.75, balance=4321, \ + note='note-upd' WHERE id = {}", full, id_alpha ); let out_upd2 = execute_sql_as_root_via_client(&upd2).expect("update 2 should succeed"); @@ -192,14 +196,16 @@ fn smoke_subscription_update_delete_notifications() { create_namespace(&namespace); let create_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, name VARCHAR, updated_at TIMESTAMP, is_deleted BOOLEAN, note VARCHAR) WITH (TYPE = 'USER')", + "CREATE TABLE {} (id INT PRIMARY KEY, name VARCHAR, updated_at TIMESTAMP, is_deleted \ + BOOLEAN, note VARCHAR) WITH (TYPE = 'USER')", full ); execute_sql_as_root_via_client(&create_sql).expect("create user table should succeed"); // Insert initial row BEFORE subscribing let _ = execute_sql_as_root_via_client(&format!( - "INSERT INTO {} (id, name, updated_at, is_deleted, note) VALUES (1, 'one', 1730497770045, false, 'n1')", + "INSERT INTO {} (id, name, updated_at, is_deleted, note) VALUES (1, 'one', 1730497770045, \ + false, 'n1')", full )); diff --git a/cli/tests/smoke/dml/smoke_test_insert_throughput.rs b/cli/tests/smoke/dml/smoke_test_insert_throughput.rs index 9e82a20a9..91742d256 100644 --- a/cli/tests/smoke/dml/smoke_test_insert_throughput.rs +++ b/cli/tests/smoke/dml/smoke_test_insert_throughput.rs @@ -4,10 +4,15 @@ // - Batched inserts (multi-row VALUES) // - Parallel inserts +use std::{ + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Instant, +}; + use crate::common::*; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::Instant; // Test configuration const SINGLE_INSERT_COUNT: usize = 100; // Single-row inserts to test @@ -38,7 +43,8 @@ fn smoke_test_insert_throughput_single() { .expect("CREATE NAMESPACE should succeed"); let create_table_sql = format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, value TEXT NOT NULL, created_at TIMESTAMP DEFAULT NOW())", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, value TEXT NOT NULL, created_at TIMESTAMP \ + DEFAULT NOW())", full_table_name ); execute_sql_as_root_via_client(&create_table_sql).expect("CREATE TABLE should succeed"); @@ -218,7 +224,8 @@ fn smoke_test_insert_throughput_parallel() { // Use AUTO_INCREMENT to avoid PK conflicts between parallel workers let create_table_sql = format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY AUTO_INCREMENT, worker_id INT, seq INT, value TEXT)", + "CREATE TABLE {} (id BIGINT PRIMARY KEY AUTO_INCREMENT, worker_id INT, seq INT, value \ + TEXT)", full_table_name ); execute_sql_as_root_via_client(&create_table_sql).expect("CREATE TABLE should succeed"); diff --git a/cli/tests/smoke/flushing/smoke_test_flush_manifest.rs b/cli/tests/smoke/flushing/smoke_test_flush_manifest.rs index 09c66be0e..c453f84fb 100644 --- a/cli/tests/smoke/flushing/smoke_test_flush_manifest.rs +++ b/cli/tests/smoke/flushing/smoke_test_flush_manifest.rs @@ -7,9 +7,10 @@ //! //! Reference: README.md lines 58-67, docs/SQL.md flush section -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Test manifest.json creation after flushing USER table /// /// Verifies: diff --git a/cli/tests/smoke/flushing/smoke_test_flush_operations.rs b/cli/tests/smoke/flushing/smoke_test_flush_operations.rs index b131ef1f6..2cf8bc5f4 100644 --- a/cli/tests/smoke/flushing/smoke_test_flush_operations.rs +++ b/cli/tests/smoke/flushing/smoke_test_flush_operations.rs @@ -13,9 +13,10 @@ //! 4. Verify job completes successfully //! 5. Query all data and verify correct row count from both sources -use crate::common::*; use std::time::Duration; +use crate::common::*; + const JOB_TIMEOUT: Duration = Duration::from_secs(30); const FLUSH_POLICY_ROWS: usize = 50; const INSERT_ROWS: usize = 200; diff --git a/cli/tests/smoke/flushing/smoke_test_flush_pk_integrity.rs b/cli/tests/smoke/flushing/smoke_test_flush_pk_integrity.rs index ceb078d3a..6ac076230 100644 --- a/cli/tests/smoke/flushing/smoke_test_flush_pk_integrity.rs +++ b/cli/tests/smoke/flushing/smoke_test_flush_pk_integrity.rs @@ -8,12 +8,14 @@ //! - INSERT duplicate PK fails (both hot and cold) //! - UPDATE works correctly post-flush -use crate::common::*; use std::time::Duration; +use crate::common::*; + const JOB_TIMEOUT: Duration = Duration::from_secs(90); -/// Test flush with PK integrity: insert, update, flush, re-query, duplicate PK check, post-flush update +/// Test flush with PK integrity: insert, update, flush, re-query, duplicate PK check, post-flush +/// update #[ntest::timeout(180_000)] #[test] fn smoke_test_flush_pk_integrity_user_table() { @@ -332,7 +334,10 @@ fn smoke_test_flush_pk_integrity_shared_table() { )); if duplicate_result.is_ok() { - println!("⚠️ WARNING: Duplicate PK insert succeeded for SHARED table (known backend limitation)"); + println!( + "⚠️ WARNING: Duplicate PK insert succeeded for SHARED table (known backend \ + limitation)" + ); println!("⚠️ TODO: Backend must implement PK uniqueness validation against cold storage"); } else { println!("✅ Duplicate PK correctly rejected"); diff --git a/cli/tests/smoke/impersonating/smoke_test_as_user_authorization.rs b/cli/tests/smoke/impersonating/smoke_test_as_user_authorization.rs index 342950efb..874671580 100644 --- a/cli/tests/smoke/impersonating/smoke_test_as_user_authorization.rs +++ b/cli/tests/smoke/impersonating/smoke_test_as_user_authorization.rs @@ -37,7 +37,8 @@ fn expect_unauthorized(result: Result>, conte fn smoke_security_regular_user_cannot_impersonate_privileged_users_in_batch() { if !is_server_running() { eprintln!( - "Skipping smoke_security_regular_user_cannot_impersonate_privileged_users_in_batch: server not running at {}", + "Skipping smoke_security_regular_user_cannot_impersonate_privileged_users_in_batch: \ + server not running at {}", server_url() ); return; diff --git a/cli/tests/smoke/impersonating/smoke_test_as_user_chat_impersonation.rs b/cli/tests/smoke/impersonating/smoke_test_as_user_chat_impersonation.rs index e46d42940..05b6133da 100644 --- a/cli/tests/smoke/impersonating/smoke_test_as_user_chat_impersonation.rs +++ b/cli/tests/smoke/impersonating/smoke_test_as_user_chat_impersonation.rs @@ -1,8 +1,12 @@ -use crate::common::*; +use std::{ + sync::mpsc as std_mpsc, + thread, + time::{Duration, Instant}, +}; + use kalam_client::KalamLinkTimeouts; -use std::sync::mpsc as std_mpsc; -use std::thread; -use std::time::{Duration, Instant}; + +use crate::common::*; struct AuthSubscriptionListener { event_receiver: std_mpsc::Receiver, @@ -248,13 +252,15 @@ fn setup_chat_fixture(suffix: &str) -> ChatFixture { .expect("Failed to create conversations table"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, conversation_id BIGINT, sender TEXT, role TEXT, content TEXT, status TEXT) WITH (TYPE='USER')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, conversation_id BIGINT, sender TEXT, role TEXT, \ + content TEXT, status TEXT) WITH (TYPE='USER')", messages_table )) .expect("Failed to create messages table"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, conversation_id BIGINT, user_name TEXT, is_typing BOOLEAN, state TEXT) WITH (TYPE='STREAM', TTL_SECONDS=3600)", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, conversation_id BIGINT, user_name TEXT, \ + is_typing BOOLEAN, state TEXT) WITH (TYPE='STREAM', TTL_SECONDS=3600)", typing_table )) .expect("Failed to create typing table"); @@ -305,7 +311,8 @@ fn run_base_chat_flow_with_impersonation(fixture: &ChatFixture) -> BaseFlow { &fixture.regular_user, &fixture.password, &format!( - "INSERT INTO {} (id, conversation_id, sender, role, content, status) VALUES ({}, {}, '{}', 'user', 'Hello from regular user', 'sent')", + "INSERT INTO {} (id, conversation_id, sender, role, content, status) VALUES ({}, {}, \ + '{}', 'user', 'Hello from regular user', 'sent')", fixture.messages_table, user_message_id, conversation_id, fixture.regular_user ), ) @@ -325,9 +332,9 @@ fn run_base_chat_flow_with_impersonation(fixture: &ChatFixture) -> BaseFlow { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, conversation_id, user_name, is_typing, state) VALUES (3001, {}, 'AI Assistant', true, 'thinking'))", - fixture.regular_user, - fixture.typing_table, conversation_id + "EXECUTE AS USER '{}' (INSERT INTO {} (id, conversation_id, user_name, is_typing, \ + state) VALUES (3001, {}, 'AI Assistant', true, 'thinking'))", + fixture.regular_user, fixture.typing_table, conversation_id ), ) .expect("Service should insert typing event AS USER"); @@ -336,9 +343,9 @@ fn run_base_chat_flow_with_impersonation(fixture: &ChatFixture) -> BaseFlow { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, conversation_id, user_name, is_typing, state) VALUES (3002, {}, 'AI Assistant', true, 'typing'))", - fixture.regular_user, - fixture.typing_table, conversation_id + "EXECUTE AS USER '{}' (INSERT INTO {} (id, conversation_id, user_name, is_typing, \ + state) VALUES (3002, {}, 'AI Assistant', true, 'typing'))", + fixture.regular_user, fixture.typing_table, conversation_id ), ) .expect("Service should insert second typing event AS USER"); @@ -368,9 +375,9 @@ fn run_base_chat_flow_with_impersonation(fixture: &ChatFixture) -> BaseFlow { }) .unwrap_or_else(|fallback_error| { panic!( - "Regular user should receive typing signal via subscription or persisted rows: subscription error: {}; fallback error: {}", - error, - fallback_error + "Regular user should receive typing signal via subscription or persisted rows: \ + subscription error: {}; fallback error: {}", + error, fallback_error ) }); assert!( @@ -418,7 +425,8 @@ fn run_base_chat_flow_with_impersonation(fixture: &ChatFixture) -> BaseFlow { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, conversation_id, sender, role, content, status) VALUES ({}, {}, 'AI Assistant', 'assistant', '{}', 'sent'))", + "EXECUTE AS USER '{}' (INSERT INTO {} (id, conversation_id, sender, role, content, \ + status) VALUES ({}, {}, 'AI Assistant', 'assistant', '{}', 'sent'))", fixture.regular_user_id, fixture.messages_table, assistant_message_id, @@ -465,7 +473,8 @@ fn run_base_chat_flow_with_impersonation(fixture: &ChatFixture) -> BaseFlow { assert!( fallback.contains(&assistant_message_id.to_string()) || fallback.to_lowercase().contains("service response via as user"), - "Regular user should receive inserted assistant message in subscription: {}. Fallback result: {}", + "Regular user should receive inserted assistant message in subscription: {}. Fallback \ + result: {}", error, fallback ); @@ -532,9 +541,9 @@ fn smoke_as_user_chat_insert_and_select_flow() { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (SELECT role, content FROM {} WHERE conversation_id = {} ORDER BY id)", - fixture.regular_user_id, - fixture.messages_table, flow.conversation_id + "EXECUTE AS USER '{}' (SELECT role, content FROM {} WHERE conversation_id = {} ORDER \ + BY id)", + fixture.regular_user_id, fixture.messages_table, flow.conversation_id ), ) .expect("Service SELECT AS USER should succeed"); @@ -566,9 +575,9 @@ fn smoke_as_user_chat_select_scope_for_different_user() { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (SELECT role, content FROM {} WHERE conversation_id = {} ORDER BY id)", - fixture.other_user_id, - fixture.messages_table, flow.conversation_id + "EXECUTE AS USER '{}' (SELECT role, content FROM {} WHERE conversation_id = {} ORDER \ + BY id)", + fixture.other_user_id, fixture.messages_table, flow.conversation_id ), ) .expect("Service SELECT AS USER for other user should succeed"); @@ -636,7 +645,8 @@ fn smoke_as_user_chat_update_flow() { assert!( fallback.contains(&flow.assistant_message_id.to_string()) || fallback.contains("service response via as user"), - "Regular user update listener should receive initial message snapshot before update: {}. Fallback result: {}", + "Regular user update listener should receive initial message snapshot before update: \ + {}. Fallback result: {}", error, fallback ); @@ -646,9 +656,9 @@ fn smoke_as_user_chat_update_flow() { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (UPDATE {} SET content = 'Service response updated', status = 'delivered' WHERE id = {})", - fixture.regular_user_id, - fixture.messages_table, flow.assistant_message_id + "EXECUTE AS USER '{}' (UPDATE {} SET content = 'Service response updated', status = \ + 'delivered' WHERE id = {})", + fixture.regular_user_id, fixture.messages_table, flow.assistant_message_id ), ) .expect("Service UPDATE AS USER should succeed"); @@ -738,9 +748,9 @@ fn smoke_as_user_chat_delete_flow() { &fixture.service_user, &fixture.password, &format!( - "EXECUTE AS USER '{}' (SELECT role, content FROM {} WHERE conversation_id = {} ORDER BY id)", - fixture.regular_user_id, - fixture.messages_table, flow.conversation_id + "EXECUTE AS USER '{}' (SELECT role, content FROM {} WHERE conversation_id = {} ORDER \ + BY id)", + fixture.regular_user_id, fixture.messages_table, flow.conversation_id ), ) .expect("Service SELECT AS USER after delete should succeed"); diff --git a/cli/tests/smoke/impersonating/smoke_test_as_user_impersonation.rs b/cli/tests/smoke/impersonating/smoke_test_as_user_impersonation.rs index 30fc79756..603f08773 100644 --- a/cli/tests/smoke/impersonating/smoke_test_as_user_impersonation.rs +++ b/cli/tests/smoke/impersonating/smoke_test_as_user_impersonation.rs @@ -8,9 +8,10 @@ //! - DELETE AS USER removes records as impersonated user //! - AS USER rejected on SHARED tables -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Helper to create a unique namespace for this test fn create_test_namespace(suffix: &str) -> String { generate_unique_namespace(&format!("smoke_as_user_{}", suffix)) @@ -62,6 +63,34 @@ fn get_user_id(user_id: &str) -> Option { None } +fn wait_for_query_success_with( + sql: &str, + timeout: Duration, + execute: F, +) -> Result> +where + F: Fn(&str) -> Result>, +{ + let start = std::time::Instant::now(); + let mut last_error = None; + + while start.elapsed() < timeout { + match execute(sql) { + Ok(output) => return Ok(output), + Err(err) => { + last_error = Some(err.to_string()); + std::thread::sleep(Duration::from_millis(120)); + }, + } + } + + Err(format!( + "Timed out waiting for query to succeed. Last error: {}", + last_error.unwrap_or_else(|| "".to_string()) + ) + .into()) +} + /// Smoke Test: Regular user CANNOT use AS USER (authorization check) #[ntest::timeout(120000)] #[test] @@ -369,7 +398,8 @@ fn smoke_as_user_rejected_on_shared_table() { // Create SHARED table (not USER table) execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (config_key VARCHAR PRIMARY KEY, config_value VARCHAR) WITH (TYPE='SHARED')", + "CREATE TABLE {} (config_key VARCHAR PRIMARY KEY, config_value VARCHAR) WITH \ + (TYPE='SHARED')", full_table )) .expect("Failed to create SHARED table"); @@ -450,7 +480,8 @@ fn smoke_as_user_full_workflow() { &service_user, password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, title, done) VALUES (1, 'Alice Task 1', false))", + "EXECUTE AS USER '{}' (INSERT INTO {} (id, title, done) VALUES (1, 'Alice Task 1', \ + false))", alice_user_id, full_table ), ) @@ -460,7 +491,8 @@ fn smoke_as_user_full_workflow() { &service_user, password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, title, done) VALUES (2, 'Alice Task 2', false))", + "EXECUTE AS USER '{}' (INSERT INTO {} (id, title, done) VALUES (2, 'Alice Task 2', \ + false))", alice_user_id, full_table ), ) @@ -471,7 +503,8 @@ fn smoke_as_user_full_workflow() { &service_user, password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, title, done) VALUES (10, 'Bob Task 1', false))", + "EXECUTE AS USER '{}' (INSERT INTO {} (id, title, done) VALUES (10, 'Bob Task 1', \ + false))", bob_user_id, full_table ), ) @@ -681,10 +714,12 @@ fn smoke_as_user_stream_table_isolation() { .expect("Failed to create namespace"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, payload VARCHAR) WITH (TYPE='STREAM', TTL_SECONDS=3600)", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, payload VARCHAR) WITH (TYPE='STREAM', \ + TTL_SECONDS=3600)", full_table )) .expect("Failed to create stream table"); + wait_for_table_ready(&full_table, Duration::from_secs(10)).expect("stream table not ready"); create_user_with_retry(&service_user, password, "service"); create_user_with_retry(&user1, password, "user"); @@ -703,9 +738,12 @@ fn smoke_as_user_stream_table_isolation() { ) .expect("Failed to insert stream row for user1"); - let user2_direct = - execute_sql_via_client_as(&user2, password, &format!("SELECT * FROM {}", full_table)) - .expect("User2 stream select failed"); + let user2_direct = wait_for_query_success_with( + &format!("SELECT * FROM {}", full_table), + Duration::from_secs(30), + |sql| execute_sql_via_client_as(&user2, password, sql), + ) + .expect("User2 stream select failed"); assert!(!user2_direct.contains("stream-u1")); let user1_select_sql = format!("EXECUTE AS USER '{}' (SELECT * FROM {})", user1_id, full_table); @@ -718,10 +756,10 @@ fn smoke_as_user_stream_table_isolation() { .expect("Service SELECT AS USER user1 on stream failed"); assert!(service_as_user1.contains("stream-u1")); - let service_as_user2 = execute_sql_via_client_as( - &service_user, - password, + let service_as_user2 = wait_for_query_success_with( &format!("EXECUTE AS USER '{}' (SELECT * FROM {})", user2_id, full_table), + Duration::from_secs(30), + |sql| execute_sql_via_client_as(&service_user, password, sql), ) .expect("Service SELECT AS USER user2 on stream failed"); assert!(!service_as_user2.contains("stream-u1")); diff --git a/cli/tests/smoke/leader_only_reads.rs b/cli/tests/smoke/leader_only_reads.rs index 31d5b8e45..d000f4656 100644 --- a/cli/tests/smoke/leader_only_reads.rs +++ b/cli/tests/smoke/leader_only_reads.rs @@ -9,9 +9,10 @@ // Note: Testing actual NOT_LEADER errors on follower nodes requires a multi-node cluster. // These tests focus on verifying the implementation is wired correctly on a single node. -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Test that basic SELECT queries work on the leader node /// This verifies the leader check doesn't break normal operation #[ntest::timeout(120000)] diff --git a/cli/tests/smoke/query/smoke_test_00_parallel_query_burst.rs b/cli/tests/smoke/query/smoke_test_00_parallel_query_burst.rs index 370277fb2..fd93feb2c 100644 --- a/cli/tests/smoke/query/smoke_test_00_parallel_query_burst.rs +++ b/cli/tests/smoke/query/smoke_test_00_parallel_query_burst.rs @@ -6,10 +6,14 @@ // The parallelism is handled within a single process using std threads that share // a single tokio runtime. -use crate::common::*; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; + use serde_json::Value; -use std::sync::Arc; -use std::time::{Duration, Instant}; + +use crate::common::*; // Keep enough parallelism to exercise concurrent reads without poisoning later smoke // tests on externally managed servers that enforce stricter request/IP guards. @@ -27,17 +31,9 @@ fn print_phase0_explain_baseline(label: &str, query: &str) { let analyze_sql = format!("EXPLAIN ANALYZE {}", query); let analyze_output = execute_sql_as_root_via_client(&analyze_sql).unwrap_or_else(|error| { - panic!( - "phase-0 EXPLAIN ANALYZE failed for '{}': {}", - analyze_sql, - error - ) + panic!("phase-0 EXPLAIN ANALYZE failed for '{}': {}", analyze_sql, error) }); - println!( - "Phase-0 EXPLAIN ANALYZE baseline [{}]:\n{}", - label, - analyze_output - ); + println!("Phase-0 EXPLAIN ANALYZE baseline [{}]:\n{}", label, analyze_output); } #[ntest::timeout(180000)] @@ -64,7 +60,8 @@ fn smoke_test_00_parallel_query_burst() { // Create the user table with simple schema let create_table_sql = format!( - "CREATE TABLE IF NOT EXISTS {} (id INT PRIMARY KEY, value VARCHAR NOT NULL) WITH (TYPE = 'USER', FLUSH_POLICY = 'rows:1000')", + "CREATE TABLE IF NOT EXISTS {} (id INT PRIMARY KEY, value VARCHAR NOT NULL) WITH (TYPE = \ + 'USER', FLUSH_POLICY = 'rows:1000')", full_table_name ); execute_sql_as_root_via_client(&create_table_sql).expect("CREATE TABLE should succeed"); @@ -242,7 +239,8 @@ fn extract_scalar(json_output: &str, field: &str) -> i64 { let field_value = extract_typed_value(field_value); - // Handle both number and string representations (large ints are serialized as strings for JS safety) + // Handle both number and string representations (large ints are serialized as strings for JS + // safety) match &field_value { Value::Number(n) => n .as_i64() diff --git a/cli/tests/smoke/query/smoke_test_filter_pushdown.rs b/cli/tests/smoke/query/smoke_test_filter_pushdown.rs index 8fb1f74bd..033c87bae 100644 --- a/cli/tests/smoke/query/smoke_test_filter_pushdown.rs +++ b/cli/tests/smoke/query/smoke_test_filter_pushdown.rs @@ -1,8 +1,9 @@ use std::collections::HashMap; -use crate::common::*; use serde_json::Value; +use crate::common::*; + struct CleanupGuard { namespace: String, user: String, @@ -27,9 +28,10 @@ fn query_rows(sql: &str) -> Vec> { } fn scalar_string(row: &HashMap, column: &str) -> String { - let value = extract_typed_value(row.get(column).unwrap_or_else(|| { - panic!("expected column '{}' in row {:?}", column, row) - })); + let value = extract_typed_value( + row.get(column) + .unwrap_or_else(|| panic!("expected column '{}' in row {:?}", column, row)), + ); match value { Value::String(text) => text, other => panic!("expected '{}' to be string-like, got {:?}", column, other), @@ -40,10 +42,7 @@ fn scalar_string(row: &HashMap, column: &str) -> String { #[test] fn smoke_test_filter_pushdown() { if !is_server_running() { - println!( - "Skipping smoke_test_filter_pushdown: server not running at {}", - server_url() - ); + println!("Skipping smoke_test_filter_pushdown: server not running at {}", server_url()); return; } @@ -68,12 +67,14 @@ fn smoke_test_filter_pushdown() { )) .expect("CREATE USER should succeed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', ACCESS_LEVEL = 'PUBLIC')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', ACCESS_LEVEL = \ + 'PUBLIC')", full_shared_table )) .expect("CREATE SHARED TABLE should succeed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (event_id TEXT PRIMARY KEY, payload TEXT) WITH (TYPE = 'STREAM', TTL_SECONDS = 60)", + "CREATE TABLE {} (event_id TEXT PRIMARY KEY, payload TEXT) WITH (TYPE = 'STREAM', \ + TTL_SECONDS = 60)", full_stream_table )) .expect("CREATE STREAM TABLE should succeed"); @@ -86,30 +87,19 @@ fn smoke_test_filter_pushdown() { "INSERT INTO {} (event_id, payload) VALUES ('evt-1', 'alpha')", full_stream_table ), - format!( - "INSERT INTO {} (event_id, payload) VALUES ('evt-2', 'beta')", - full_stream_table - ), - format!( - "INSERT INTO {} (event_id, payload) VALUES ('evt-3', 'beta')", - full_stream_table - ), + format!("INSERT INTO {} (event_id, payload) VALUES ('evt-2', 'beta')", full_stream_table), + format!("INSERT INTO {} (event_id, payload) VALUES ('evt-3', 'beta')", full_stream_table), ] { execute_sql_as_root_via_client(&sql) .unwrap_or_else(|error| panic!("seed statement failed for '{}': {}", sql, error)); } - let system_rows = query_rows(&format!( - "SELECT user_id FROM system.users WHERE user_id = '{}'", - user - )); + let system_rows = + query_rows(&format!("SELECT user_id FROM system.users WHERE user_id = '{}'", user)); assert_eq!(system_rows.len(), 1, "system exact filter should find one user"); assert_eq!(scalar_string(&system_rows[0], "user_id"), user); - let shared_pk_rows = query_rows(&format!( - "SELECT id FROM {} WHERE id = 2", - full_shared_table - )); + let shared_pk_rows = query_rows(&format!("SELECT id FROM {} WHERE id = 2", full_shared_table)); assert_eq!(shared_pk_rows.len(), 1, "shared PK filter should find one row"); assert_eq!(scalar_string(&shared_pk_rows[0], "id"), "2"); @@ -117,16 +107,12 @@ fn smoke_test_filter_pushdown() { "SELECT id FROM {} WHERE name = 'beta' ORDER BY id", full_shared_table )); - let shared_name_ids: Vec = shared_name_rows - .iter() - .map(|row| scalar_string(row, "id")) - .collect(); + let shared_name_ids: Vec = + shared_name_rows.iter().map(|row| scalar_string(row, "id")).collect(); assert_eq!(shared_name_ids, vec!["2".to_string(), "3".to_string()]); - let stream_rows = query_rows(&format!( - "SELECT event_id FROM {} WHERE event_id = 'evt-2'", - full_stream_table - )); + let stream_rows = + query_rows(&format!("SELECT event_id FROM {} WHERE event_id = 'evt-2'", full_stream_table)); assert_eq!(stream_rows.len(), 1, "stream exact filter should find one row"); assert_eq!(scalar_string(&stream_rows[0], "event_id"), "evt-2"); -} \ No newline at end of file +} diff --git a/cli/tests/smoke/query/smoke_test_json_operators.rs b/cli/tests/smoke/query/smoke_test_json_operators.rs index bb2ca568a..aa02a10ed 100644 --- a/cli/tests/smoke/query/smoke_test_json_operators.rs +++ b/cli/tests/smoke/query/smoke_test_json_operators.rs @@ -13,18 +13,19 @@ //! - WHERE clause filtering on JSON fields //! - Multiple JSON columns in one table -use crate::common::*; use std::time::{Duration, Instant}; -/// Helper: create a shared table with a JSON column, insert data, and return (namespace, full_table). +use crate::common::*; + +/// Helper: create a shared table with a JSON column, insert data, and return (namespace, +/// full_table). fn setup_json_table(prefix: &str, extra_cols: &str) -> (String, String) { let namespace = generate_unique_namespace(&format!("json_{prefix}")); let table = generate_unique_table(&format!("{prefix}_tbl")); let full_table = format!("{namespace}.{table}"); - let _ = execute_sql_as_root_via_client(&format!( - "DROP NAMESPACE IF EXISTS {namespace} CASCADE" - )); + let _ = + execute_sql_as_root_via_client(&format!("DROP NAMESPACE IF EXISTS {namespace} CASCADE")); execute_sql_as_root_via_client(&format!("CREATE NAMESPACE {namespace}")) .expect("create namespace"); @@ -43,9 +44,8 @@ fn setup_json_table(prefix: &str, extra_cols: &str) -> (String, String) { } fn cleanup(namespace: &str) { - let _ = execute_sql_as_root_via_client(&format!( - "DROP NAMESPACE IF EXISTS {namespace} CASCADE" - )); + let _ = + execute_sql_as_root_via_client(&format!("DROP NAMESPACE IF EXISTS {namespace} CASCADE")); } // ── JSON round-trip: objects, arrays, nested, primitives ────────────────── @@ -236,9 +236,7 @@ fn smoke_json_exists_operator_and_contains_function() { .expect("insert exists payload"); let output = wait_for_query_contains_with( - &format!( - "SELECT doc->>'customer_id' AS customer_id FROM {tbl} WHERE doc ? 'customer_id'" - ), + &format!("SELECT doc->>'customer_id' AS customer_id FROM {tbl} WHERE doc ? 'customer_id'"), "cust_123", Duration::from_secs(5), execute_sql_as_root_via_client, @@ -252,7 +250,8 @@ fn smoke_json_exists_operator_and_contains_function() { let output = wait_for_query_contains_with( &format!( - "SELECT json_as_text(doc, 'status') AS status FROM {tbl} WHERE json_contains(doc, 'customer_id')" + "SELECT json_as_text(doc, 'status') AS status FROM {tbl} WHERE json_contains(doc, \ + 'customer_id')" ), "paid", Duration::from_secs(5), @@ -308,8 +307,10 @@ fn smoke_json_where_filter() { assert!(output.contains("1"), "should contain priority 1: {output}"); assert!(output.contains("3"), "should contain priority 3: {output}"); // Row with status=inactive should not appear - assert!(!output.contains("priority") || !output.contains("2") || output.contains("1"), - "should filter out inactive row"); + assert!( + !output.contains("priority") || !output.contains("2") || output.contains("1"), + "should filter out inactive row" + ); cleanup(&ns); println!("✅ smoke_json_where_filter passed"); @@ -334,7 +335,8 @@ fn smoke_json_helper_functions() { let output = wait_for_query_contains_with( &format!( - "SELECT json_length(doc, 'items') AS item_count, json_get_int(doc, 'count') AS count_value, json_get_bool(doc, 'flag') AS flag_value FROM {tbl}" + "SELECT json_length(doc, 'items') AS item_count, json_get_int(doc, 'count') AS \ + count_value, json_get_bool(doc, 'flag') AS flag_value FROM {tbl}" ), "7", Duration::from_secs(5), @@ -389,10 +391,7 @@ fn smoke_json_big_payload() { } let big_json = format!("{{{}}}", entries.join(",")); let payload_size = big_json.len(); - assert!( - payload_size > 64_000, - "payload should be >64KB, got {payload_size}" - ); + assert!(payload_size > 64_000, "payload should be >64KB, got {payload_size}"); println!(" Big JSON payload size: {payload_size} bytes"); // Insert the big JSON @@ -413,18 +412,17 @@ fn smoke_json_big_payload() { println!(" Big JSON insert+query took {:.2}s", elapsed.as_secs_f64()); // Also verify last key - let output = execute_sql_as_root_via_client(&format!( - "SELECT doc->>'key_499' AS k499 FROM {tbl}" - )) - .expect("select big json key_499"); + let output = + execute_sql_as_root_via_client(&format!("SELECT doc->>'key_499' AS k499 FROM {tbl}")) + .expect("select big json key_499"); - assert!( - output.contains("value_499"), - "should extract key_499: {output}" - ); + assert!(output.contains("value_499"), "should extract key_499: {output}"); cleanup(&ns); - println!("✅ smoke_json_big_payload passed ({payload_size} bytes, {:.2}s)", elapsed.as_secs_f64()); + println!( + "✅ smoke_json_big_payload passed ({payload_size} bytes, {:.2}s)", + elapsed.as_secs_f64() + ); } // ── Multiple JSON columns ──────────────────────────────────────────────── diff --git a/cli/tests/smoke/query/smoke_test_provider_exec_models.rs b/cli/tests/smoke/query/smoke_test_provider_exec_models.rs index 69420a356..80328b254 100644 --- a/cli/tests/smoke/query/smoke_test_provider_exec_models.rs +++ b/cli/tests/smoke/query/smoke_test_provider_exec_models.rs @@ -20,12 +20,14 @@ fn smoke_test_provider_exec_models() { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)) .expect("CREATE NAMESPACE should succeed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', ACCESS_LEVEL = 'PUBLIC')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'SHARED', ACCESS_LEVEL = \ + 'PUBLIC')", full_shared_table )) .expect("CREATE SHARED TABLE should succeed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (event_id TEXT PRIMARY KEY, payload TEXT) WITH (TYPE = 'STREAM', TTL_SECONDS = 60)", + "CREATE TABLE {} (event_id TEXT PRIMARY KEY, payload TEXT) WITH (TYPE = 'STREAM', \ + TTL_SECONDS = 60)", full_stream_table )) .expect("CREATE STREAM TABLE should succeed"); @@ -84,4 +86,4 @@ fn smoke_test_provider_exec_models() { } let _ = execute_sql_as_root_via_client(&format!("DROP NAMESPACE {} CASCADE", namespace)); -} \ No newline at end of file +} diff --git a/cli/tests/smoke/query/smoke_test_queries_benchmark.rs b/cli/tests/smoke/query/smoke_test_queries_benchmark.rs index 0474a8f2d..29396d39f 100644 --- a/cli/tests/smoke/query/smoke_test_queries_benchmark.rs +++ b/cli/tests/smoke/query/smoke_test_queries_benchmark.rs @@ -3,9 +3,10 @@ // - Inserts X rows and measures rows/sec // - Paginates SELECT 10 rows/page and measures pages/sec -use crate::common::*; use std::time::Instant; +use crate::common::*; + // Global rows to insert (can be overridden via KBENCH_ROWS env) // Reduced from 1000 to 200 for faster smoke execution while still exercising pagination. // This ensures the test completes within timeout even under load. @@ -19,17 +20,9 @@ fn print_phase0_explain_baseline(label: &str, query: &str) { let analyze_sql = format!("EXPLAIN ANALYZE {}", query); let analyze_output = execute_sql_as_root_via_client(&analyze_sql).unwrap_or_else(|error| { - panic!( - "phase-0 EXPLAIN ANALYZE failed for '{}': {}", - analyze_sql, - error - ) + panic!("phase-0 EXPLAIN ANALYZE failed for '{}': {}", analyze_sql, error) }); - println!( - "Phase-0 EXPLAIN ANALYZE baseline [{}]:\n{}", - label, - analyze_output - ); + println!("Phase-0 EXPLAIN ANALYZE baseline [{}]:\n{}", label, analyze_output); } fn rows_to_insert() -> usize { @@ -93,7 +86,8 @@ fn smoke_queries_benchmark() { let mut inserted = 0usize; let insert_deadline = start_insert + std::time::Duration::from_secs(40); // hard timeout guard - // Use a high-offset base id derived from current time to avoid rare PK collisions if residual rows survived a failed DROP. + // Use a high-offset base id derived from current time to avoid rare PK collisions if residual + // rows survived a failed DROP. while inserted < total { let remain = total - inserted; let n = remain.min(batch_size); @@ -125,7 +119,8 @@ fn smoke_queries_benchmark() { } let insert_sql = format!( - "INSERT INTO {} (customer_id, sku, status, quantity, price, created_at, updated_at, paid, notes) VALUES {}", + "INSERT INTO {} (customer_id, sku, status, quantity, price, created_at, updated_at, \ + paid, notes) VALUES {}", full, values ); // Retry on transient errors (e.g., timeout) up to 3 times @@ -153,16 +148,15 @@ fn smoke_queries_benchmark() { "Benchmark INSERT: inserted {} rows in {:.3}s → {:.1} rows/sec", inserted, insert_elapsed, rows_per_sec ); - println!( - "Phase-0 baseline metric [query_insert_rows_per_sec]={:.1}", - rows_per_sec - ); + println!("Phase-0 baseline metric [query_insert_rows_per_sec]={:.1}", rows_per_sec); print_phase0_explain_baseline( "query_benchmark_paged_select", &format!( - "SELECT order_id, customer_id, sku, status, quantity, price, created_at, updated_at, paid, notes FROM {} WHERE order_id > 0 ORDER BY order_id LIMIT {}", - full, page_size_from_baseline() + "SELECT order_id, customer_id, sku, status, quantity, price, created_at, updated_at, \ + paid, notes FROM {} WHERE order_id > 0 ORDER BY order_id LIMIT {}", + full, + page_size_from_baseline() ), ); @@ -177,7 +171,8 @@ fn smoke_queries_benchmark() { let expected_pages = inserted.div_ceil(page_size); for _ in 0..expected_pages { let select_sql = format!( - "SELECT order_id, customer_id, sku, status, quantity, price, created_at, updated_at, paid, notes FROM {} WHERE order_id > {} ORDER BY order_id LIMIT {}", + "SELECT order_id, customer_id, sku, status, quantity, price, created_at, updated_at, \ + paid, notes FROM {} WHERE order_id > {} ORDER BY order_id LIMIT {}", full, last_id, page_size ); let _ = execute_sql_as_root_via_client(&select_sql).expect("select page (cursor)"); @@ -194,10 +189,7 @@ fn smoke_queries_benchmark() { "Benchmark SELECT: fetched {} pages ({} rows/page) in {:.3}s → {:.1} pages/sec", pages, page_size, select_elapsed, pages_per_sec ); - println!( - "Phase-0 baseline metric [query_select_pages_per_sec]={:.1}", - pages_per_sec - ); + println!("Phase-0 baseline metric [query_select_pages_per_sec]={:.1}", pages_per_sec); // Best-effort cleanup to keep the namespace tidy between runs let _ = execute_sql_as_root_via_client(&format!("DROP TABLE IF EXISTS {}", full)); diff --git a/cli/tests/smoke/query/smoke_test_stream_explain_planning.rs b/cli/tests/smoke/query/smoke_test_stream_explain_planning.rs index 9cd58e814..9b2f26cce 100644 --- a/cli/tests/smoke/query/smoke_test_stream_explain_planning.rs +++ b/cli/tests/smoke/query/smoke_test_stream_explain_planning.rs @@ -18,7 +18,8 @@ fn smoke_test_stream_explain_planning() { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)) .expect("CREATE NAMESPACE should succeed"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (event_id TEXT PRIMARY KEY, payload TEXT) WITH (TYPE = 'STREAM', TTL_SECONDS = 60)", + "CREATE TABLE {} (event_id TEXT PRIMARY KEY, payload TEXT) WITH (TYPE = 'STREAM', \ + TTL_SECONDS = 60)", full_stream_table )) .expect("CREATE STREAM TABLE should succeed"); @@ -61,4 +62,4 @@ fn smoke_test_stream_explain_planning() { ); let _ = execute_sql_as_root_via_client(&format!("DROP NAMESPACE {} CASCADE", namespace)); -} \ No newline at end of file +} diff --git a/cli/tests/smoke/security/smoke_test_rpc_auth.rs b/cli/tests/smoke/security/smoke_test_rpc_auth.rs index fd0ef9bb4..774bfd1d6 100644 --- a/cli/tests/smoke/security/smoke_test_rpc_auth.rs +++ b/cli/tests/smoke/security/smoke_test_rpc_auth.rs @@ -10,11 +10,13 @@ //! Run with: //! cargo nextest run --test smoke smoke_security_rpc_auth -use crate::common::*; +use std::time::Duration; + use base64::{engine::general_purpose, Engine as _}; use reqwest::Client; use serde_json::json; -use std::time::Duration; + +use crate::common::*; // ─── Helpers ───────────────────────────────────────────────────────────────── @@ -118,8 +120,8 @@ fn smoke_rpc_sql_forged_jwt_alg_none_returns_401() { } // header={"alg":"none"} payload={"sub":"attacker","role":"system","exp":9999999999} - let forged = "eyJhbGciOiJub25lIn0.\ - eyJzdWIiOiJhdHRhY2tlciIsInJvbGUiOiJzeXN0ZW0iLCJleHAiOjk5OTk5OTk5OTl9."; + let forged = + "eyJhbGciOiJub25lIn0.eyJzdWIiOiJhdHRhY2tlciIsInJvbGUiOiJzeXN0ZW0iLCJleHAiOjk5OTk5OTk5OTl9."; let status = block(async { http_client() @@ -266,7 +268,8 @@ fn smoke_rpc_health_is_public() { fn smoke_rpc_login_wrong_password_returns_401_generic_message() { if !is_server_running() { eprintln!( - "Skipping smoke_rpc_login_wrong_password_returns_401_generic_message: server not running" + "Skipping smoke_rpc_login_wrong_password_returns_401_generic_message: server not \ + running" ); return; } @@ -315,7 +318,8 @@ fn smoke_rpc_login_wrong_password_returns_401_generic_message() { fn smoke_rpc_login_nonexistent_user_matches_wrong_password_response() { if !is_server_running() { eprintln!( - "Skipping smoke_rpc_login_nonexistent_user_matches_wrong_password_response: server not running" + "Skipping smoke_rpc_login_nonexistent_user_matches_wrong_password_response: server \ + not running" ); return; } @@ -375,8 +379,8 @@ fn smoke_rpc_login_nonexistent_user_matches_wrong_password_response() { ); assert_eq!( msg_real_user, msg_fake_user, - "Error messages for wrong-password vs non-existent user must be identical \ - to prevent user enumeration. real_user='{}', fake_user='{}'", + "Error messages for wrong-password vs non-existent user must be identical to prevent user \ + enumeration. real_user='{}', fake_user='{}'", msg_real_user, msg_fake_user ); } diff --git a/cli/tests/smoke/security/smoke_test_security_access.rs b/cli/tests/smoke/security/smoke_test_security_access.rs index 9bc90f21d..a07966789 100644 --- a/cli/tests/smoke/security/smoke_test_security_access.rs +++ b/cli/tests/smoke/security/smoke_test_security_access.rs @@ -1,8 +1,9 @@ -use crate::common::*; -use kalam_client::models::ChangeEvent; -use kalam_client::KalamLinkTimeouts; use std::time::Duration; +use kalam_client::{models::ChangeEvent, KalamLinkTimeouts}; + +use crate::common::*; + const MAX_SQL_QUERY_LENGTH: usize = 1024 * 1024; fn expect_unauthorized(result: Result>, context: &str) { @@ -144,7 +145,8 @@ fn smoke_security_system_tables_blocked_in_batch() { "SELECT 1; SELECT * FROM system.users", "SELECT 1; SELECT * FROM system.schemas", "SELECT 1; SELECT * FROM (SELECT * FROM system.users) AS u", - "SELECT 1; SELECT u.user_id FROM system.users u JOIN (SELECT user_id FROM system.users) s ON u.user_id = s.user_id", + "SELECT 1; SELECT u.user_id FROM system.users u JOIN (SELECT user_id FROM system.users) s \ + ON u.user_id = s.user_id", "SELECT 1; SELECT * FROM system.users WHERE user_id IN (SELECT user_id FROM system.users)", "WITH u AS (SELECT * FROM system.users) SELECT * FROM u", "SELECT 1; EXPLAIN SELECT * FROM system.users", @@ -172,7 +174,8 @@ fn smoke_security_system_tables_blocked_in_batch() { fn smoke_security_private_shared_table_blocked_in_batch() { if !is_server_running() { eprintln!( - "Skipping smoke_security_private_shared_table_blocked_in_batch: server not running at {}", + "Skipping smoke_security_private_shared_table_blocked_in_batch: server not running at \ + {}", server_url() ); return; @@ -189,7 +192,8 @@ fn smoke_security_private_shared_table_blocked_in_batch() { .expect("Failed to create namespace"); let create_table_sql = format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', ACCESS_LEVEL='PRIVATE')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + ACCESS_LEVEL='PRIVATE')", full_table ); execute_sql_as_root_via_client(&create_table_sql).expect("Failed to create shared table"); @@ -207,7 +211,8 @@ fn smoke_security_private_shared_table_blocked_in_batch() { .expect("Failed to create regular user"); let batch_sql = format!( - "SELECT * FROM {table}; INSERT INTO {table} (id, name) VALUES (2, 'x'); UPDATE {table} SET name = 'y' WHERE id = 1; DELETE FROM {table} WHERE id = 1;", + "SELECT * FROM {table}; INSERT INTO {table} (id, name) VALUES (2, 'x'); UPDATE {table} \ + SET name = 'y' WHERE id = 1; DELETE FROM {table} WHERE id = 1;", table = full_table ); let batch_result = execute_sql_via_client_as(®ular_user, password, &batch_sql); @@ -246,7 +251,8 @@ fn smoke_security_private_shared_table_blocked_in_batch() { fn smoke_security_subscription_blocked_for_system_and_private_shared() { if !is_server_running() { eprintln!( - "Skipping smoke_security_subscription_blocked_for_system_and_private_shared: server not running at {}", + "Skipping smoke_security_subscription_blocked_for_system_and_private_shared: server \ + not running at {}", server_url() ); return; @@ -263,7 +269,8 @@ fn smoke_security_subscription_blocked_for_system_and_private_shared() { .expect("Failed to create namespace"); let create_private_sql = format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', ACCESS_LEVEL='PRIVATE')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + ACCESS_LEVEL='PRIVATE')", full_table ); execute_sql_as_root_via_client(&create_private_sql).expect("Failed to create shared table"); @@ -271,7 +278,8 @@ fn smoke_security_subscription_blocked_for_system_and_private_shared() { let public_table = generate_unique_table("smoke_sub_public_tbl"); let full_public = format!("{}.{}", namespace, public_table); let create_public_sql = format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', ACCESS_LEVEL='PUBLIC')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + ACCESS_LEVEL='PUBLIC')", full_public ); execute_sql_as_root_via_client(&create_public_sql) @@ -368,7 +376,9 @@ fn smoke_security_system_table_write_blocked() { format!("CREATE USER {} WITH PASSWORD '{}' ROLE 'user'", user_name, user_pass); execute_sql_as_root_via_client(&create_user_sql).expect("Failed to create user"); - let batch_sql = "INSERT INTO system.users (user_id) VALUES ('hacker'); UPDATE system.users SET user_id='x' WHERE user_id='root'; DELETE FROM system.users WHERE user_id='root';"; + let batch_sql = "INSERT INTO system.users (user_id) VALUES ('hacker'); UPDATE system.users \ + SET user_id='x' WHERE user_id='root'; DELETE FROM system.users WHERE \ + user_id='root';"; let result = execute_sql_via_client_as(&user_name, user_pass, batch_sql); expect_rejected(result, "system table write batch"); diff --git a/cli/tests/smoke/storage/smoke_test_show_storages.rs b/cli/tests/smoke/storage/smoke_test_show_storages.rs index 7f9a46231..3324ad6e0 100644 --- a/cli/tests/smoke/storage/smoke_test_show_storages.rs +++ b/cli/tests/smoke/storage/smoke_test_show_storages.rs @@ -3,9 +3,10 @@ // - Validates all expected columns are present // - Checks data types and non-empty values for required fields -use crate::common::*; use serde_json::Value as JsonValue; +use crate::common::*; + fn arrow_value_as_string(value: &JsonValue) -> Option { extract_arrow_value(value) .unwrap_or_else(|| value.clone()) @@ -78,7 +79,8 @@ fn smoke_show_storages_basic() { fn smoke_show_storages_cli_timestamps_are_not_epoch_shifted() { if !is_server_running() { println!( - "Skipping smoke_show_storages_cli_timestamps_are_not_epoch_shifted: server not running at {}", + "Skipping smoke_show_storages_cli_timestamps_are_not_epoch_shifted: server not \ + running at {}", server_url() ); return; diff --git a/cli/tests/smoke/storage/smoke_test_storage_compact.rs b/cli/tests/smoke/storage/smoke_test_storage_compact.rs index d55009915..ea8cd9d39 100644 --- a/cli/tests/smoke/storage/smoke_test_storage_compact.rs +++ b/cli/tests/smoke/storage/smoke_test_storage_compact.rs @@ -1,9 +1,13 @@ //! Smoke test: STORAGE COMPACT TABLE triggers job completion and updates RocksDB files -use crate::common::*; +use std::{ + path::{Path, PathBuf}, + time::{Duration, SystemTime}, +}; + use serde_json::Value as JsonValue; -use std::path::{Path, PathBuf}; -use std::time::{Duration, SystemTime}; + +use crate::common::*; fn rocksdb_dir() -> PathBuf { let storage_dir = storage_base_dir(); diff --git a/cli/tests/smoke/storage/smoke_test_storage_health.rs b/cli/tests/smoke/storage/smoke_test_storage_health.rs index 9d18bd0c6..d117d9cde 100644 --- a/cli/tests/smoke/storage/smoke_test_storage_health.rs +++ b/cli/tests/smoke/storage/smoke_test_storage_health.rs @@ -5,12 +5,16 @@ // - Verifies authorization (DBA+ only) // - Tests non-existent storage returns proper error -use crate::common::*; +use std::{ + collections::HashMap, + thread, + time::{Duration, Instant}, +}; + use chrono::{DateTime, Datelike, Utc}; use serde_json::Value as JsonValue; -use std::collections::HashMap; -use std::thread; -use std::time::{Duration, Instant}; + +use crate::common::*; fn arrow_value_as_string(value: &JsonValue) -> Option { extract_arrow_value(value) @@ -143,7 +147,8 @@ fn smoke_storage_check_local_basic() { ); assert!( age_seconds <= 60, - "tested_at should be within 60 seconds of now, but was {} seconds ago (tested_at: {}, now: {})", + "tested_at should be within 60 seconds of now, but was {} seconds ago (tested_at: {}, \ + now: {})", age_seconds, tested_at, now diff --git a/cli/tests/smoke/storage/smoke_test_storage_templates.rs b/cli/tests/smoke/storage/smoke_test_storage_templates.rs index b2211f039..a755cb681 100644 --- a/cli/tests/smoke/storage/smoke_test_storage_templates.rs +++ b/cli/tests/smoke/storage/smoke_test_storage_templates.rs @@ -4,12 +4,16 @@ // - Inserts rows, triggers flush, and verifies parquet output directories match templates // - Drops tables and asserts directories are removed -use crate::common::*; +use std::{ + fs, + path::{Path, PathBuf}, + thread, + time::{Duration, Instant}, +}; + use serde_json::Value as JsonValue; -use std::fs; -use std::path::{Path, PathBuf}; -use std::thread; -use std::time::{Duration, Instant}; + +use crate::common::*; struct CleanupActions { actions: Vec>, @@ -215,9 +219,10 @@ fn smoke_storage_custom_templates() { } if collected.is_empty() { panic!( - "Expected parquet files under {} (direct or recursive) but none were found", - expected_user_dir.display() - ); + "Expected parquet files under {} (direct or recursive) but none were \ + found", + expected_user_dir.display() + ); } collected }, @@ -303,9 +308,10 @@ fn smoke_storage_custom_templates() { } if collected.is_empty() { panic!( - "Expected parquet files under {} (direct or recursive) but none were found", - expected_shared_dir.display() - ); + "Expected parquet files under {} (direct or recursive) but none were \ + found", + expected_shared_dir.display() + ); } collected }, @@ -462,7 +468,8 @@ fn assert_storage_registered(storage_id: &str, expected_base_dir: &str) { fn assert_table_storage(namespace: &str, table_name: &str, expected_storage_id: &str) { let sql = format!( - "SELECT table_name, namespace_id, options FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT table_name, namespace_id, options FROM system.schemas WHERE namespace_id = '{}' \ + AND table_name = '{}'", namespace, table_name ); let rows = query_rows(&sql); diff --git a/cli/tests/smoke/subscription/smoke_test_shared_table_subscription.rs b/cli/tests/smoke/subscription/smoke_test_shared_table_subscription.rs index 9c7b3239c..6e34f7883 100644 --- a/cli/tests/smoke/subscription/smoke_test_shared_table_subscription.rs +++ b/cli/tests/smoke/subscription/smoke_test_shared_table_subscription.rs @@ -2,9 +2,10 @@ // Covers: shared table creation, inserts, subscription receiving snapshot + change events // Uses kalam-client directly instead of CLI to avoid macOS TCP subprocess limits -use crate::common::*; use kalam_client::models::ChangeEvent; +use crate::common::*; + /// Attempt to subscribe to a query as a given user and wait for ACK or error. /// Returns Ok(()) if subscription was accepted, Err(msg) if it was denied or failed. fn try_subscribe_as_user(username: &str, password: &str, query: &str) -> Result<(), String> { @@ -216,7 +217,8 @@ fn smoke_shared_table_subscription_private_denied() { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)) .expect("create namespace"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', ACCESS_LEVEL='PRIVATE')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + ACCESS_LEVEL='PRIVATE')", full )) .expect("create private shared table"); diff --git a/cli/tests/smoke/subscription/smoke_test_subscription_advanced.rs b/cli/tests/smoke/subscription/smoke_test_subscription_advanced.rs index 3132c7978..c13d3d0d6 100644 --- a/cli/tests/smoke/subscription/smoke_test_subscription_advanced.rs +++ b/cli/tests/smoke/subscription/smoke_test_subscription_advanced.rs @@ -2,12 +2,13 @@ // Covers: multi-batch initial data, seq_id resumption, high-volume changes // Tests subscription reliability under various edge cases -use crate::common::*; use std::time::Duration; // Re-import subscription-related types for advanced tests use kalam_client::{SubscriptionConfig, SubscriptionOptions}; +use crate::common::*; + fn is_ephemeral_port_error(message: &str) -> bool { message.contains("Can't assign requested address") || message.contains("os error 49") } @@ -241,7 +242,8 @@ fn smoke_subscription_multi_batch_initial_data() { // Create table let create_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, data VARCHAR, created_at TIMESTAMP) WITH (TYPE = 'USER')", + "CREATE TABLE {} (id INT PRIMARY KEY, data VARCHAR, created_at TIMESTAMP) WITH (TYPE = \ + 'USER')", full ); execute_sql_as_root_via_client(&create_sql).expect("create user table should succeed"); @@ -389,7 +391,8 @@ fn smoke_subscription_resume_from_seq_id() { Ok(Some(line)) => { println!("[FIRST_SUB] Event: {}...", &line[..std::cmp::min(200, line.len())]); if line.contains(&test_value) || line.contains("Insert") { - // Extract seq_id from the event (format: "_seq": Object {"Int64": String("123456789")}) + // Extract seq_id from the event (format: "_seq": Object {"Int64": + // String("123456789")}) if let Some(start) = line.find("\"_seq\"") { if let Some(seq_start) = line[start..].find("String(\"") { let seq_portion = &line[start + seq_start + 8..]; @@ -505,7 +508,8 @@ fn smoke_subscription_high_volume_changes() { // Create table let create_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, counter INT, updated_at TIMESTAMP) WITH (TYPE = 'USER')", + "CREATE TABLE {} (id INT PRIMARY KEY, counter INT, updated_at TIMESTAMP) WITH (TYPE = \ + 'USER')", full ); execute_sql_as_root_via_client(&create_sql).expect("create user table should succeed"); @@ -800,7 +804,8 @@ fn smoke_subscription_column_projection() { // Insert a row with all columns populated let test_username = format!("user_{}", std::process::id()); let insert_sql = format!( - "INSERT INTO {} (id, username, email, age, status, bio, created_at) VALUES (1, '{}', 'test@example.com', 25, 'active', 'A long bio text here', 1730497770045)", + "INSERT INTO {} (id, username, email, age, status, bio, created_at) VALUES (1, '{}', \ + 'test@example.com', 25, 'active', 'A long bio text here', 1730497770045)", full, test_username ); execute_sql_as_root_via_client(&insert_sql).expect("insert should succeed"); @@ -888,7 +893,8 @@ fn smoke_subscription_column_projection() { ); // Should NOT contain email, age, status, bio (non-selected columns) - // Note: We check for the actual values to avoid false positives from field names in debug output + // Note: We check for the actual values to avoid false positives from field names in debug + // output assert!( !initial_str.contains("test@example.com"), "Initial data should NOT contain email value. Events: {}", @@ -904,7 +910,8 @@ fn smoke_subscription_column_projection() { // Now perform an UPDATE and verify the change event also respects projection let updated_username = format!("updated_{}", std::process::id()); let update_sql = format!( - "UPDATE {} SET username = '{}', email = 'newemail@example.com', status = 'inactive' WHERE id = 1", + "UPDATE {} SET username = '{}', email = 'newemail@example.com', status = 'inactive' WHERE \ + id = 1", full, updated_username ); execute_sql_as_root_via_client(&update_sql).expect("update should succeed"); @@ -978,7 +985,10 @@ fn smoke_subscription_column_projection() { ); listener.stop().ok(); - println!("[TEST] Column projection test passed! Only selected columns returned in subscription events."); + println!( + "[TEST] Column projection test passed! Only selected columns returned in subscription \ + events." + ); // Cleanup let _ = execute_sql_as_root_via_client(&format!("DROP NAMESPACE {} CASCADE", namespace)); diff --git a/cli/tests/smoke/subscription/smoke_test_subscription_close.rs b/cli/tests/smoke/subscription/smoke_test_subscription_close.rs index e2a178052..2494ef843 100644 --- a/cli/tests/smoke/subscription/smoke_test_subscription_close.rs +++ b/cli/tests/smoke/subscription/smoke_test_subscription_close.rs @@ -1,11 +1,10 @@ // Smoke Tests: WebSocket Subscription Close & Cleanup // // Verifies that: -// 1. Explicitly calling `close()` on a SubscriptionManager sends an Unsubscribe -// frame and removes the subscription from `system.live`. -// 2. Dropping a SubscriptionManager without calling `close()` triggers the -// Drop impl, which spawns a background cleanup task that also removes -// the entry from `system.live`. +// 1. Explicitly calling `close()` on a SubscriptionManager sends an Unsubscribe frame and removes +// the subscription from `system.live`. +// 2. Dropping a SubscriptionManager without calling `close()` triggers the Drop impl, which +// spawns a background cleanup task that also removes the entry from `system.live`. // 3. `SubscriptionManager::is_closed()` returns the correct state. // // These tests use short `#[ntest::timeout]` values; individual WebSocket @@ -14,10 +13,12 @@ // Run with: // cargo test --test smoke smoke_test_subscription_close -use crate::common::*; -use kalam_client::{ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SubscriptionConfig}; use std::time::Duration; +use kalam_client::{ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SubscriptionConfig}; + +use crate::common::*; + // ── helpers ─────────────────────────────────────────────────────────────────── /// Build a kalam-client with fast timeouts (≤3s per operation). diff --git a/cli/tests/smoke/subscription/smoke_test_subscription_delta_updates.rs b/cli/tests/smoke/subscription/smoke_test_subscription_delta_updates.rs index f10bc4b63..85f03cd05 100644 --- a/cli/tests/smoke/subscription/smoke_test_subscription_delta_updates.rs +++ b/cli/tests/smoke/subscription/smoke_test_subscription_delta_updates.rs @@ -9,9 +9,10 @@ // This verifies the end-to-end delta update pipeline: // Backend (notification.rs compute_json_update_delta) → WebSocket → kalam-client SDK -use crate::common::*; use std::time::{Duration, Instant}; +use crate::common::*; + #[ntest::timeout(120000)] #[test] fn smoke_subscription_update_sends_delta_only() { @@ -156,7 +157,8 @@ fn smoke_subscription_update_sends_delta_only() { // Both email and age should be represented in the update payload assert!( update2_joined.contains("email") && update2_joined.contains("age"), - "Multi-column UPDATE should include both 'email' and 'age' in the update payload; got: {}", + "Multi-column UPDATE should include both 'email' and 'age' in the update payload; \ + got: {}", update2_joined ); diff --git a/cli/tests/smoke/subscription/smoke_test_subscription_listing.rs b/cli/tests/smoke/subscription/smoke_test_subscription_listing.rs index 50824f6ac..69f6e714e 100644 --- a/cli/tests/smoke/subscription/smoke_test_subscription_listing.rs +++ b/cli/tests/smoke/subscription/smoke_test_subscription_listing.rs @@ -9,10 +9,12 @@ // Run with: // cargo test --test smoke smoke_test_subscription_listing -use crate::common::*; -use kalam_client::{ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SubscriptionConfig}; use std::time::Duration; +use kalam_client::{ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SubscriptionConfig}; + +use crate::common::*; + // ── helpers ─────────────────────────────────────────────────────────────────── /// Build a kalam-client with fast timeouts. diff --git a/cli/tests/smoke/subscription/smoke_test_subscription_multi_reconnect.rs b/cli/tests/smoke/subscription/smoke_test_subscription_multi_reconnect.rs index fdb821bed..c7194ba5a 100644 --- a/cli/tests/smoke/subscription/smoke_test_subscription_multi_reconnect.rs +++ b/cli/tests/smoke/subscription/smoke_test_subscription_multi_reconnect.rs @@ -12,10 +12,12 @@ // Run with: // cargo test --test smoke smoke_subscription_multi_reconnect -use crate::common::*; -use kalam_client::{models::ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SubscriptionConfig}; use std::time::Duration; +use kalam_client::{models::ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SubscriptionConfig}; + +use crate::common::*; + // ── helpers ────────────────────────────────────────────────────────────────── fn build_reconnect_client() -> Result> { diff --git a/cli/tests/smoke/subscription/smoke_test_subscription_reconnect_resume.rs b/cli/tests/smoke/subscription/smoke_test_subscription_reconnect_resume.rs index 177b7af18..abee3ec72 100644 --- a/cli/tests/smoke/subscription/smoke_test_subscription_reconnect_resume.rs +++ b/cli/tests/smoke/subscription/smoke_test_subscription_reconnect_resume.rs @@ -11,25 +11,26 @@ // left off with no missed or replayed rows. // // Tests: -// 1. smoke_subscription_reconnect_basic_resume -// Subscribe → insert → disconnect → insert-while-disconnected → reconnect -// → verify gap rows appear in the new subscription's initial snapshot. +// 1. smoke_subscription_reconnect_basic_resume Subscribe → insert → disconnect → +// insert-while-disconnected → reconnect → verify gap rows appear in the new subscription's +// initial snapshot. // -// 2. smoke_subscription_resume_from_seq_id -// Subscribe → record last seq_id from the server Ack → disconnect → -// insert gap rows → reconnect → re-subscribe WITH from_seq_id → verify -// only post-seq_id rows arrive and pre-disconnect rows are not replayed. +// 2. smoke_subscription_resume_from_seq_id Subscribe → record last seq_id from the server Ack → +// disconnect → insert gap rows → reconnect → re-subscribe WITH from_seq_id → verify only +// post-seq_id rows arrive and pre-disconnect rows are not replayed. // // Run with: // cargo test --test smoke smoke_subscription_reconnect // cargo test --test smoke smoke_subscription_resume_from_seq_id -use crate::common::*; +use std::time::Duration; + use kalam_client::{ models::ChangeEvent, KalamLinkClient, KalamLinkTimeouts, SeqId, SubscriptionConfig, SubscriptionOptions, }; -use std::time::Duration; + +use crate::common::*; // ── helpers ────────────────────────────────────────────────────────────────── @@ -50,6 +51,14 @@ fn reconnect_client() -> Result Duration { + if is_cluster_mode() { + Duration::from_secs(base_secs + 5) + } else { + Duration::from_secs(base_secs) + } +} + /// Collect events from a subscription until either `predicate` is satisfied /// or `timeout` elapses. Returns all collected events. async fn collect_until( @@ -142,7 +151,7 @@ fn smoke_subscription_reconnect_basic_resume() { .expect("subscribe"); // Wait for ACK so the server has registered the subscription. - let _ = collect_until(&mut sub, Duration::from_secs(8), |evs| { + let _ = collect_until(&mut sub, event_timeout(8), |evs| { evs.iter().any(|e| matches!(e, ChangeEvent::Ack { .. })) }) .await; @@ -156,8 +165,7 @@ fn smoke_subscription_reconnect_basic_resume() { .expect("insert pre-disconnect row"); let pre_events = - collect_until(&mut sub, Duration::from_secs(10), |evs| contains_value(evs, &pre_val)) - .await; + collect_until(&mut sub, event_timeout(10), |evs| contains_value(evs, &pre_val)).await; assert!( contains_value(&pre_events, &pre_val), "pre-disconnect event should arrive; got {:?}", @@ -193,7 +201,7 @@ fn smoke_subscription_reconnect_basic_resume() { .expect("re-subscribe after reconnect"); // Gap rows should appear in the initial snapshot. - let snap_events = collect_until(&mut sub2, Duration::from_secs(15), |evs| { + let snap_events = collect_until(&mut sub2, event_timeout(15), |evs| { contains_value(evs, &gap1) && contains_value(evs, &gap2) }) .await; @@ -217,8 +225,7 @@ fn smoke_subscription_reconnect_basic_resume() { .expect("insert post-reconnect row"); let post_events = - collect_until(&mut sub2, Duration::from_secs(10), |evs| contains_value(evs, &post_val)) - .await; + collect_until(&mut sub2, event_timeout(10), |evs| contains_value(evs, &post_val)).await; assert!( contains_value(&post_events, &post_val), "post-reconnect Insert should arrive as live event; got {:?}", @@ -279,7 +286,7 @@ fn smoke_subscription_resume_from_seq_id() { .expect("subscribe"); // Collect the Ack — it carries the snapshot boundary seq_id. - let ack_events = collect_until(&mut sub, Duration::from_secs(8), |evs| { + let ack_events = collect_until(&mut sub, event_timeout(8), |evs| { evs.iter().any(|e| matches!(e, ChangeEvent::Ack { .. })) }) .await; @@ -303,8 +310,7 @@ fn smoke_subscription_resume_from_seq_id() { .expect("insert pre-disconnect row"); let change_events = - collect_until(&mut sub, Duration::from_secs(10), |evs| contains_value(evs, &pre_val)) - .await; + collect_until(&mut sub, event_timeout(10), |evs| contains_value(evs, &pre_val)).await; assert!( contains_value(&change_events, &pre_val), "pre-disconnect Insert should arrive; got {:?}", @@ -353,7 +359,7 @@ fn smoke_subscription_resume_from_seq_id() { client.subscribe_with_config(cfg2).await.expect("re-subscribe with from_seq_id"); // Gap rows must arrive (as catch-up initial data or change events). - let resume_events = collect_until(&mut sub2, Duration::from_secs(15), |evs| { + let resume_events = collect_until(&mut sub2, event_timeout(15), |evs| { contains_value(evs, &gap1) && contains_value(evs, &gap2) }) .await; @@ -399,8 +405,7 @@ fn smoke_subscription_resume_from_seq_id() { .expect("insert post-reconnect row"); let post_events = - collect_until(&mut sub2, Duration::from_secs(10), |evs| contains_value(evs, &post_val)) - .await; + collect_until(&mut sub2, event_timeout(10), |evs| contains_value(evs, &post_val)).await; assert!( contains_value(&post_events, &post_val), "post-reconnect Insert should arrive as live event; got {:?}", diff --git a/cli/tests/smoke/subscription/smoke_test_user_table_subscription.rs b/cli/tests/smoke/subscription/smoke_test_user_table_subscription.rs index c8f532760..8a8f09011 100644 --- a/cli/tests/smoke/subscription/smoke_test_user_table_subscription.rs +++ b/cli/tests/smoke/subscription/smoke_test_user_table_subscription.rs @@ -1,6 +1,6 @@ // Smoke Test 1 (revised): User table with subscription lifecycle -// Covers: namespace creation, user table creation, inserts, subscription receiving events, flush job visibility -// Uses kalam-client directly instead of CLI to avoid macOS TCP subprocess limits +// Covers: namespace creation, user table creation, inserts, subscription receiving events, flush +// job visibility Uses kalam-client directly instead of CLI to avoid macOS TCP subprocess limits use crate::common::*; @@ -55,7 +55,8 @@ fn smoke_user_table_subscription_lifecycle() { let query = format!("SELECT * FROM {}", full); let mut listener = SubscriptionListener::start(&query).expect("subscription should start"); - // 4a) Collect snapshot rows with extended timeout; if none captured, fallback to direct SELECT snapshot + // 4a) Collect snapshot rows with extended timeout; if none captured, fallback to direct SELECT + // snapshot let mut snapshot_lines: Vec = Vec::new(); let snapshot_deadline = std::time::Instant::now() + std::time::Duration::from_secs(6); while std::time::Instant::now() < snapshot_deadline { diff --git a/cli/tests/smoke/system/smoke_test_all_system_tables_schemas.rs b/cli/tests/smoke/system/smoke_test_all_system_tables_schemas.rs index c6ca7c34b..58c3ca6a4 100644 --- a/cli/tests/smoke/system/smoke_test_all_system_tables_schemas.rs +++ b/cli/tests/smoke/system/smoke_test_all_system_tables_schemas.rs @@ -4,10 +4,10 @@ //! can be queried without schema mismatches or errors. //! //! Covers: -//! - Persisted tables: users, namespaces, schemas, storages, -//! jobs, job_nodes, audit_log, manifest, topics, topic_offsets -//! - Virtual views: live, stats, settings, server_logs, cluster, -//! cluster_groups, datatypes, tables, columns +//! - Persisted tables: users, namespaces, schemas, storages, jobs, job_nodes, audit_log, manifest, +//! topics, topic_offsets +//! - Virtual views: live, stats, settings, server_logs, cluster, cluster_groups, datatypes, tables, +//! columns //! //! This test prevents schema definition bugs like the topic_offsets //! updated_at column mismatch (BigInt vs Timestamp). @@ -178,7 +178,8 @@ fn smoke_test_topic_offsets_schema_and_operations() { println!("🧪 Testing system.topic_offsets schema and timestamp operations"); // Query topic_offsets (may be empty, but schema should work) - let query = "SELECT topic_id, group_id, partition_id, last_acked_offset, updated_at FROM system.topic_offsets LIMIT 5"; + let query = "SELECT topic_id, group_id, partition_id, last_acked_offset, updated_at FROM \ + system.topic_offsets LIMIT 5"; let output = execute_sql_as_root_via_client(query).expect("Failed to query system.topic_offsets"); @@ -196,8 +197,8 @@ fn smoke_test_topic_offsets_schema_and_operations() { ); // Test timestamp filtering (should not error even if no rows) - let filter_query = - "SELECT COUNT(*) FROM system.topic_offsets WHERE updated_at > TIMESTAMP '1970-01-01 00:00:00'"; + let filter_query = "SELECT COUNT(*) FROM system.topic_offsets WHERE updated_at > TIMESTAMP \ + '1970-01-01 00:00:00'"; let filter_output = execute_sql_as_root_via_client(filter_query) .expect("Failed to filter topic_offsets by updated_at timestamp"); diff --git a/cli/tests/smoke/system/smoke_test_cleanup_job.rs b/cli/tests/smoke/system/smoke_test_cleanup_job.rs index 0850884be..1bbfc7616 100644 --- a/cli/tests/smoke/system/smoke_test_cleanup_job.rs +++ b/cli/tests/smoke/system/smoke_test_cleanup_job.rs @@ -1,6 +1,7 @@ -use crate::common::*; use std::time::Duration; +use crate::common::*; + fn wait_for_table_absent( namespace: &str, table: &str, diff --git a/cli/tests/smoke/system/smoke_test_system_and_users.rs b/cli/tests/smoke/system/smoke_test_system_and_users.rs index afa560db6..056f5ddde 100644 --- a/cli/tests/smoke/system/smoke_test_system_and_users.rs +++ b/cli/tests/smoke/system/smoke_test_system_and_users.rs @@ -70,7 +70,8 @@ fn smoke_system_tables_and_user_lifecycle() { let unique_tbl = generate_unique_table("test_flush_table"); let test_table = format!("{}.{}", test_ns, unique_tbl); let create_table_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', FLUSH_POLICY = 'rows:100')", + "CREATE TABLE {} (id INT PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', FLUSH_POLICY = \ + 'rows:100')", test_table ); execute_sql_as_root_via_client(&create_table_sql).expect("create test table should succeed"); diff --git a/cli/tests/smoke/system/smoke_test_system_tables_extended.rs b/cli/tests/smoke/system/smoke_test_system_tables_extended.rs index 9fb571e03..3a719a65c 100644 --- a/cli/tests/smoke/system/smoke_test_system_tables_extended.rs +++ b/cli/tests/smoke/system/smoke_test_system_tables_extended.rs @@ -85,7 +85,8 @@ fn smoke_test_system_tables_options_column() { // Query system.schemas for our namespace let query_sql = format!( - "SELECT table_name, table_type, options FROM system.schemas WHERE namespace_id = '{}' ORDER BY table_name", + "SELECT table_name, table_type, options FROM system.schemas WHERE namespace_id = '{}' \ + ORDER BY table_name", namespace ); let output = @@ -296,7 +297,8 @@ fn smoke_test_dt_meta_command() { // Create two tables for table in &[&table1, &table2] { let create_sql = format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'USER', FLUSH_POLICY = 'rows:1000')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE = 'USER', \ + FLUSH_POLICY = 'rows:1000')", namespace, table ); execute_sql_as_root_via_client(&create_sql).expect("Failed to create table"); @@ -306,7 +308,8 @@ fn smoke_test_dt_meta_command() { // Query system.schemas directly (equivalent to \dt for our namespace) let query_sql = format!( - "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = '{}' ORDER BY table_name", + "SELECT table_name, table_type FROM system.schemas WHERE namespace_id = '{}' ORDER BY \ + table_name", namespace ); let output = @@ -365,7 +368,8 @@ fn smoke_test_describe_table_meta_command() { // Query system.schemas for schema (equivalent to \d
) let query_sql = format!( - "SELECT table_name, table_type, options FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT table_name, table_type, options FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}'", namespace, table ); let output = execute_sql_as_root_via_client(&query_sql).expect("Failed to describe table"); diff --git a/cli/tests/smoke/tables/smoke_test_shared_table_crud.rs b/cli/tests/smoke/tables/smoke_test_shared_table_crud.rs index 808ef691d..5e832a05b 100644 --- a/cli/tests/smoke/tables/smoke_test_shared_table_crud.rs +++ b/cli/tests/smoke/tables/smoke_test_shared_table_crud.rs @@ -1,9 +1,11 @@ // Smoke Test 2: Shared table CRUD -// Covers: namespace creation, shared table creation, insert/select, delete/update, final select, drop table +// Covers: namespace creation, shared table creation, insert/select, delete/update, final select, +// drop table -use crate::common::*; use std::time::Duration; +use crate::common::*; + #[ntest::timeout(180000)] #[test] fn smoke_shared_table_crud() { @@ -49,7 +51,8 @@ fn smoke_shared_table_crud() { assert!(out.contains("alpha"), "expected 'alpha' in results: {}", out); assert!(out.contains("beta"), "expected 'beta' in results: {}", out); - // 4) Retrieve ids for rows we will mutate (backend requires primary key equality for UPDATE/DELETE) + // 4) Retrieve ids for rows we will mutate (backend requires primary key equality for + // UPDATE/DELETE) // Use JSON output for reliable parsing let id_sel = format!("SELECT id, name FROM {} WHERE name IN ('alpha','beta') ORDER BY name", full); diff --git a/cli/tests/smoke/tables/smoke_test_user_table_rls.rs b/cli/tests/smoke/tables/smoke_test_user_table_rls.rs index cd0d28318..acab15bee 100644 --- a/cli/tests/smoke/tables/smoke_test_user_table_rls.rs +++ b/cli/tests/smoke/tables/smoke_test_user_table_rls.rs @@ -8,7 +8,8 @@ use crate::common::*; // 3) Create a new regular user // 4) Login via CLI as the regular user // 5) As regular user: insert multiple rows, update one, delete one, then SELECT all -// 6) Verify: (a) regular user can insert, (b) login succeeds, (c) SELECT shows only this user's rows (no root rows) +// 6) Verify: (a) regular user can insert, (b) login succeeds, (c) SELECT shows only this user's +// rows (no root rows) #[ntest::timeout(180000)] #[test] fn smoke_user_table_rls_isolation() { diff --git a/cli/tests/smoke/topics/smoke_test_topic_consumption.rs b/cli/tests/smoke/topics/smoke_test_topic_consumption.rs index 9b82393cb..60c2dadf7 100644 --- a/cli/tests/smoke/topics/smoke_test_topic_consumption.rs +++ b/cli/tests/smoke/topics/smoke_test_topic_consumption.rs @@ -7,15 +7,16 @@ //! //! **Requirements**: Running KalamDB server with Topics feature enabled -use crate::common; -use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; -use base64::Engine; +use std::{ + collections::{HashMap, HashSet}, + time::Duration, +}; + +use base64::{engine::general_purpose::STANDARD as BASE64_STANDARD, Engine}; use kalam_client::consumer::{AutoOffsetReset, ConsumerRecord, TopicOp}; -use kalam_client::KalamLinkTimeouts; -use reqwest::Client; use serde::Deserialize; -use std::collections::{HashMap, HashSet}; -use std::time::Duration; + +use crate::{common, topic_test_support}; #[derive(Debug, Deserialize)] struct HttpTopicConsumeResponse { @@ -43,59 +44,17 @@ impl HttpTopicMessage { /// Create a test client using common infrastructure async fn create_test_client() -> kalam_client::KalamLinkClient { - let base_url = common::leader_or_server_url(); - common::client_for_user_on_url_with_timeouts( - &base_url, - common::default_username(), - common::default_password(), - KalamLinkTimeouts::builder() - .connection_timeout_secs(5) - .receive_timeout_secs(120) - .send_timeout_secs(30) - .subscribe_timeout_secs(10) - .auth_timeout_secs(10) - .initial_data_timeout(Duration::from_secs(120)) - .build(), - ) - .expect("Failed to build test client") + topic_test_support::create_test_client_with_timeouts(topic_test_support::long_topic_timeouts()) + .await } /// Execute SQL via HTTP helper async fn execute_sql(sql: &str) { - common::execute_sql_via_http_as_root(sql).await.expect("Failed to execute SQL"); + topic_test_support::execute_sql(sql).await.expect("Failed to execute SQL"); } async fn wait_for_topic_ready(topic: &str, expected_routes: usize) { - let sql = format!("SELECT routes FROM system.topics WHERE topic_id = '{}'", topic); - let deadline = std::time::Instant::now() + Duration::from_secs(20); - - while std::time::Instant::now() < deadline { - if let Ok(response) = common::execute_sql_via_http_as_root(&sql).await { - if let Some(rows) = common::get_rows_as_hashmaps(&response) { - if let Some(row) = rows.first() { - if let Some(routes_value) = row.get("routes") { - let routes_untyped = common::extract_typed_value(routes_value); - if let Some(routes_json) = routes_untyped - .as_str() - .and_then(|raw| serde_json::from_str::(raw).ok()) - { - let route_count = - routes_json.as_array().map(|routes| routes.len()).unwrap_or(0); - if route_count >= expected_routes { - return; - } - } - } - } - } - } - tokio::time::sleep(Duration::from_millis(100)).await; - } - - panic!( - "Timed out waiting for topic '{}' to have at least {} route(s)", - topic, expected_routes - ); + topic_test_support::wait_for_topic_ready(topic, expected_routes).await; } async fn create_topic_with_sources(topic: &str, table: &str, operations: &[&str]) { @@ -104,6 +63,9 @@ async fn create_topic_with_sources(topic: &str, table: &str, operations: &[&str] execute_sql(&format!("ALTER TOPIC {} ADD SOURCE {} ON {}", topic, table, op)).await; } wait_for_topic_ready(topic, operations.len()).await; + if common::is_cluster_mode() { + tokio::time::sleep(Duration::from_millis(750)).await; + } } fn response_is_success(response: &serde_json::Value) -> bool { @@ -185,11 +147,7 @@ async fn get_user_id(user_id: &str) -> String { )) .await .expect("Failed to query system.users"); - assert!( - response_is_success(&response), - "user lookup should succeed: {}", - response - ); + assert!(response_is_success(&response), "user lookup should succeed: {}", response); let rows = common::get_rows_as_hashmaps(&response).expect("user lookup should return rows"); let first_row = rows.first().expect("user lookup should return one row"); @@ -213,7 +171,7 @@ async fn poll_topic_messages_http_until( ) .await .expect("Failed to get root token for topic consume"); - let client = Client::new(); + let client = common::shared_http_client(); let deadline = std::time::Instant::now() + timeout; let mut last_count = 0usize; @@ -249,9 +207,42 @@ async fn poll_topic_messages_http_until( panic!( "Timed out waiting for at least {} topic messages on {} (last_count={})", - min_messages, - topic, - last_count + min_messages, topic, last_count + ); +} + +async fn poll_sql_consume_until( + sql: &str, + min_rows: usize, + timeout: Duration, +) -> serde_json::Value { + let deadline = std::time::Instant::now() + timeout; + let mut last_response = serde_json::Value::Null; + let mut last_count = 0usize; + + while std::time::Instant::now() < deadline { + let response = common::execute_sql_via_http_as_root(sql) + .await + .expect("SQL CONSUME should return a response"); + + assert!( + response_is_success(&response), + "SQL CONSUME should succeed while polling: {}", + response + ); + + last_count = common::get_rows_as_hashmaps(&response).map(|rows| rows.len()).unwrap_or(0); + if last_count >= min_rows { + return response; + } + + last_response = response; + tokio::time::sleep(Duration::from_millis(100)).await; + } + + panic!( + "Timed out waiting for SQL CONSUME to return at least {} rows (last_count={}, last_response={})", + min_rows, last_count, last_response ); } @@ -1152,7 +1143,8 @@ async fn test_topic_http_consume_preserves_impersonated_user_and_payloads() { &service_user, password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, body, status, version) VALUES (1, 'alice message v1', 'draft', 1))", + "EXECUTE AS USER '{}' (INSERT INTO {} (id, body, status, version) VALUES (1, 'alice \ + message v1', 'draft', 1))", alice_user_id, full_table ), ) @@ -1162,7 +1154,8 @@ async fn test_topic_http_consume_preserves_impersonated_user_and_payloads() { &service_user, password, &format!( - "EXECUTE AS USER '{}' (UPDATE {} SET body = 'alice message v2', status = 'sent', version = 2 WHERE id = 1)", + "EXECUTE AS USER '{}' (UPDATE {} SET body = 'alice message v2', status = 'sent', \ + version = 2 WHERE id = 1)", alice_user_id, full_table ), ) @@ -1172,7 +1165,8 @@ async fn test_topic_http_consume_preserves_impersonated_user_and_payloads() { &service_user, password, &format!( - "EXECUTE AS USER '{}' (INSERT INTO {} (id, body, status, version) VALUES (2, 'bob message v1', 'queued', 1))", + "EXECUTE AS USER '{}' (INSERT INTO {} (id, body, status, version) VALUES (2, 'bob \ + message v1', 'queued', 1))", bob_user_id, full_table ), ) @@ -1181,10 +1175,7 @@ async fn test_topic_http_consume_preserves_impersonated_user_and_payloads() { execute_sql_as( &service_user, password, - &format!( - "EXECUTE AS USER '{}' (DELETE FROM {} WHERE id = 2)", - bob_user_id, full_table - ), + &format!("EXECUTE AS USER '{}' (DELETE FROM {} WHERE id = 2)", bob_user_id, full_table), ) .await; @@ -1198,7 +1189,9 @@ async fn test_topic_http_consume_preserves_impersonated_user_and_payloads() { "Single-partition topic should preserve sequential offsets" ); assert!( - messages.iter().all(|message| message.topic_id == topic && message.partition_id == 0), + messages + .iter() + .all(|message| message.topic_id == topic && message.partition_id == 0), "All consumed messages should belong to the requested topic partition" ); assert!( @@ -1279,7 +1272,8 @@ async fn test_topic_http_consume_preserves_impersonated_user_and_payloads() { } let _ = common::execute_sql_via_http_as_root(&format!("DROP TOPIC {}", topic)).await; - let _ = common::execute_sql_via_http_as_root(&format!("DROP NAMESPACE {} CASCADE", namespace)).await; + let _ = common::execute_sql_via_http_as_root(&format!("DROP NAMESPACE {} CASCADE", namespace)) + .await; let _ = common::execute_sql_via_http_as_root(&format!("DROP USER {}", service_user)).await; let _ = common::execute_sql_via_http_as_root(&format!("DROP USER {}", alice_user)).await; let _ = common::execute_sql_via_http_as_root(&format!("DROP USER {}", bob_user)).await; @@ -1295,7 +1289,8 @@ async fn test_topic_sql_consume_docs_getting_started_flow() { execute_sql(&format!("CREATE NAMESPACE {}", namespace)).await; execute_sql(&format!( - "CREATE USER TABLE {} (id TEXT PRIMARY KEY DEFAULT ULID(), author TEXT NOT NULL, msg TEXT NOT NULL, attachment FILE, created TIMESTAMP NOT NULL DEFAULT NOW())", + "CREATE USER TABLE {} (id TEXT PRIMARY KEY DEFAULT ULID(), author TEXT NOT NULL, msg TEXT \ + NOT NULL, attachment FILE, created TIMESTAMP NOT NULL DEFAULT NOW())", full_table )) .await; @@ -1305,19 +1300,20 @@ async fn test_topic_sql_consume_docs_getting_started_flow() { topic, full_table )) .await; + wait_for_topic_ready(&topic, 1).await; + if common::is_cluster_mode() { + tokio::time::sleep(Duration::from_millis(750)).await; + } execute_sql(&format!( - "INSERT INTO {} (author, msg) VALUES ('user', 'Write a short summary of this support ticket.')", + "INSERT INTO {} (author, msg) VALUES ('user', 'Write a short summary of this support \ + ticket.')", full_table )) .await; - let consume_response = common::execute_sql_via_http_as_root(&format!( - "CONSUME FROM {} FROM EARLIEST LIMIT 1", - topic - )) - .await - .expect("SQL CONSUME should return a response"); + let consume_sql = format!("CONSUME FROM {} FROM EARLIEST LIMIT 1", topic); + let consume_response = poll_sql_consume_until(&consume_sql, 1, Duration::from_secs(20)).await; assert!( response_is_success(&consume_response), @@ -1351,11 +1347,15 @@ async fn test_topic_sql_consume_docs_getting_started_flow() { parse_string_field(&payload, "msg").as_deref(), Some("Write a short summary of this support ticket.") ); - assert!(parse_string_field(&payload, "id").is_some(), "payload should include generated ULID"); + assert!( + parse_string_field(&payload, "id").is_some(), + "payload should include generated ULID" + ); assert!(payload.get("created").is_some(), "payload should include created timestamp"); let _ = common::execute_sql_via_http_as_root(&format!("DROP TOPIC {}", topic)).await; - let _ = common::execute_sql_via_http_as_root(&format!("DROP NAMESPACE {} CASCADE", namespace)).await; + let _ = common::execute_sql_via_http_as_root(&format!("DROP NAMESPACE {} CASCADE", namespace)) + .await; } #[tokio::test] @@ -1413,10 +1413,7 @@ async fn test_topic_http_consume_direct_multi_user_publishers_no_missing_changes execute_sql_as( &user_a_name, &password_a, - &format!( - "UPDATE {} SET body = 'user-a-v2', version = 2 WHERE id = 101", - full_table_a - ), + &format!("UPDATE {} SET body = 'user-a-v2', version = 2 WHERE id = 101", full_table_a), ) .await; execute_sql_as( @@ -1440,10 +1437,7 @@ async fn test_topic_http_consume_direct_multi_user_publishers_no_missing_changes execute_sql_as( &user_b_name, &password_b, - &format!( - "UPDATE {} SET body = 'user-b-v2', version = 2 WHERE id = 202", - full_table_b - ), + &format!("UPDATE {} SET body = 'user-b-v2', version = 2 WHERE id = 202", full_table_b), ) .await; execute_sql_as( @@ -1467,10 +1461,7 @@ async fn test_topic_http_consume_direct_multi_user_publishers_no_missing_changes execute_sql_as( &user_c_name, &password_c, - &format!( - "UPDATE {} SET body = 'user-c-v2', version = 2 WHERE id = 303", - full_table_c - ), + &format!("UPDATE {} SET body = 'user-c-v2', version = 2 WHERE id = 303", full_table_c), ) .await; execute_sql_as( @@ -1495,7 +1486,9 @@ async fn test_topic_http_consume_direct_multi_user_publishers_no_missing_changes "Direct multi-user publishing should not leave gaps in offsets" ); assert!( - messages.iter().all(|message| message.topic_id == topic && message.partition_id == 0), + messages + .iter() + .all(|message| message.topic_id == topic && message.partition_id == 0), "All consumed messages should belong to the requested topic partition" ); @@ -1529,10 +1522,14 @@ async fn test_topic_http_consume_direct_multi_user_publishers_no_missing_changes .into_iter() .collect(); - assert_eq!(observed, expected, "Topic consume should return every direct user change exactly once"); + assert_eq!( + observed, expected, + "Topic consume should return every direct user change exactly once" + ); let _ = common::execute_sql_via_http_as_root(&format!("DROP TOPIC {}", topic)).await; - let _ = common::execute_sql_via_http_as_root(&format!("DROP NAMESPACE {} CASCADE", namespace)).await; + let _ = common::execute_sql_via_http_as_root(&format!("DROP NAMESPACE {} CASCADE", namespace)) + .await; let _ = common::execute_sql_via_http_as_root(&format!("DROP USER {}", user_a)).await; let _ = common::execute_sql_via_http_as_root(&format!("DROP USER {}", user_b)).await; let _ = common::execute_sql_via_http_as_root(&format!("DROP USER {}", user_c)).await; diff --git a/cli/tests/smoke/topics/smoke_test_topic_high_load.rs b/cli/tests/smoke/topics/smoke_test_topic_high_load.rs index 610a5d0bf..50f34b655 100644 --- a/cli/tests/smoke/topics/smoke_test_topic_high_load.rs +++ b/cli/tests/smoke/topics/smoke_test_topic_high_load.rs @@ -10,138 +10,56 @@ //! //! **Requirements**: Running KalamDB server with Topics feature enabled -use crate::common; +use std::{ + collections::{HashMap, HashSet}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::Duration, +}; + use kalam_client::consumer::{AutoOffsetReset, ConsumerRecord, TopicOp}; -use kalam_client::KalamLinkTimeouts; use kalamdb_configs::config::defaults::default_topic_visibility_timeout_secs; -use std::collections::{HashMap, HashSet}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::Duration; use tokio::sync::Mutex as TokioMutex; +use crate::{common, topic_test_support}; + /// Create a test client using common infrastructure async fn create_test_client() -> kalam_client::KalamLinkClient { - let base_url = common::leader_or_server_url(); - common::client_for_user_on_url_with_timeouts( - &base_url, - common::default_username(), - common::default_password(), - KalamLinkTimeouts::builder() - .connection_timeout_secs(10) - .receive_timeout_secs(15) - .send_timeout_secs(30) - .subscribe_timeout_secs(15) - .auth_timeout_secs(10) - .initial_data_timeout(Duration::from_secs(60)) - .build(), - ) - .expect("Failed to build test client") + topic_test_support::create_test_client().await } /// Execute SQL via HTTP helper with error handling async fn execute_sql(sql: &str) -> Result<(), String> { - let response = common::execute_sql_via_http_as_root(sql).await.map_err(|e| e.to_string())?; - let status = response.get("status").and_then(|s| s.as_str()).unwrap_or(""); - if status.eq_ignore_ascii_case("success") { - Ok(()) - } else { - let err_msg = response - .get("error") - .and_then(|e| e.get("message")) - .and_then(|m| m.as_str()) - .unwrap_or("Unknown error"); - Err(format!("SQL failed: {}", err_msg)) - } + topic_test_support::execute_sql(sql).await } fn is_retryable_consumer_poll_error(message: &str) -> bool { - let normalized = message.to_ascii_lowercase(); - normalized.contains("error decoding") || normalized.contains("network") + topic_test_support::is_retryable_consumer_poll_error(message) } -fn using_fresh_test_server() -> bool { - if let Ok(value) = std::env::var("KALAMDB_SERVER_TYPE") { - match value.trim().to_ascii_lowercase().as_str() { - "fresh" => return true, - "running" | "cluster" => return false, - _ => {}, - } - } - - if let Ok(value) = std::env::var("KALAMDB_AUTO_START_TEST_SERVER") { - match value.trim() { - "1" => return true, - "0" => return false, - _ => {}, +fn configured_topic_visibility_timeout_secs() -> u64 { + for env_key in [ + "KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS", + "KALAMDB_VISIBILITY_TIMEOUT_SECS", + ] { + if let Some(value) = std::env::var(env_key).ok().and_then(|raw| raw.parse().ok()) { + return value; } } - if std::env::var_os("KALAMDB_STORAGE_DIR").is_some() { - return true; - } - - std::env::var_os("KALAMDB_SERVER_URL").is_none() - && std::env::var_os("KALAMDB_CLUSTER_URLS").is_none() + default_topic_visibility_timeout_secs() } -fn configured_topic_visibility_timeout_secs() -> u64 { - if let Some(value) = std::env::var("KALAMDB_VISIBILITY_TIMEOUT_SECS") - .ok() - .and_then(|raw| raw.parse().ok()) - { - return value; - } - - if !using_fresh_test_server() { - return default_topic_visibility_timeout_secs(); - } - - let mut config_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); - config_path.pop(); - config_path.push("backend"); - config_path.push("server.toml"); - - std::fs::read_to_string(config_path) - .ok() - .and_then(|raw| toml::from_str::(&raw).ok()) - .and_then(|config| config.get("topics")?.get("visibility_timeout_secs")?.as_integer()) - .and_then(|value| (value >= 0).then_some(value as u64)) - .unwrap_or_else(default_topic_visibility_timeout_secs) +fn topic_recovery_deadline() -> Duration { + let configured = configured_topic_visibility_timeout_secs(); + let fallback = default_topic_visibility_timeout_secs(); + Duration::from_secs(configured.max(fallback) + 30) } async fn wait_for_topic_ready(topic: &str, expected_routes: usize) { - let sql = format!("SELECT routes FROM system.topics WHERE topic_id = '{}'", topic); - let deadline = std::time::Instant::now() + Duration::from_secs(30); - - while std::time::Instant::now() < deadline { - if let Ok(response) = common::execute_sql_via_http_as_root(&sql).await { - if let Some(rows) = common::get_rows_as_hashmaps(&response) { - if let Some(row) = rows.first() { - if let Some(routes_value) = row.get("routes") { - let routes_untyped = common::extract_typed_value(routes_value); - if let Some(routes_json) = routes_untyped - .as_str() - .and_then(|raw| serde_json::from_str::(raw).ok()) - { - let route_count = - routes_json.as_array().map(|routes| routes.len()).unwrap_or(0); - if route_count >= expected_routes { - tokio::time::sleep(Duration::from_millis(100)).await; - return; - } - } - } - } - } - } - tokio::time::sleep(Duration::from_millis(100)).await; - } - - panic!( - "Timed out waiting for topic '{}' to have at least {} route(s)", - topic, expected_routes - ); + topic_test_support::wait_for_topic_ready(topic, expected_routes).await; } /// Helper to parse JSON payload from binary @@ -191,7 +109,8 @@ async fn test_topic_high_load_concurrent_publishers() { // Create multiple tables with different types and schemas let shared_table = format!("{}.shared_metrics", namespace); execute_sql(&format!( - "CREATE SHARED TABLE {} (id BIGINT PRIMARY KEY, name TEXT, value DOUBLE, active BOOLEAN, counter INT, timestamp BIGINT)", + "CREATE SHARED TABLE {} (id BIGINT PRIMARY KEY, name TEXT, value DOUBLE, active BOOLEAN, \ + counter INT, timestamp BIGINT)", shared_table )) .await @@ -199,7 +118,8 @@ async fn test_topic_high_load_concurrent_publishers() { let user_table = format!("{}.user_profiles", namespace); execute_sql(&format!( - "CREATE USER TABLE {} (id INT PRIMARY KEY, username TEXT, score DOUBLE, level INT, verified BOOLEAN)", + "CREATE USER TABLE {} (id INT PRIMARY KEY, username TEXT, score DOUBLE, level INT, \ + verified BOOLEAN)", user_table )) .await @@ -207,7 +127,8 @@ async fn test_topic_high_load_concurrent_publishers() { let stream_table = format!("{}.event_stream", namespace); execute_sql(&format!( - "CREATE STREAM TABLE {} (event_id BIGINT, event_type TEXT, payload TEXT, value INT, success BOOLEAN) WITH (TTL_SECONDS = 3600)", + "CREATE STREAM TABLE {} (event_id BIGINT, event_type TEXT, payload TEXT, value INT, \ + success BOOLEAN) WITH (TTL_SECONDS = 3600)", stream_table )) .await @@ -215,7 +136,8 @@ async fn test_topic_high_load_concurrent_publishers() { let product_table = format!("{}.products", namespace); execute_sql(&format!( - "CREATE SHARED TABLE {} (product_id INT PRIMARY KEY, product_name TEXT, price DOUBLE, stock INT, available BOOLEAN)", + "CREATE SHARED TABLE {} (product_id INT PRIMARY KEY, product_name TEXT, price DOUBLE, \ + stock INT, available BOOLEAN)", product_table )) .await @@ -223,7 +145,8 @@ async fn test_topic_high_load_concurrent_publishers() { let session_table = format!("{}.user_sessions", namespace); execute_sql(&format!( - "CREATE USER TABLE {} (session_id BIGINT PRIMARY KEY, user_id INT, duration INT, active BOOLEAN, score DOUBLE)", + "CREATE USER TABLE {} (session_id BIGINT PRIMARY KEY, user_id INT, duration INT, active \ + BOOLEAN, score DOUBLE)", session_table )) .await @@ -360,7 +283,8 @@ async fn test_topic_high_load_concurrent_publishers() { || last_new_record_time.elapsed() > Duration::from_secs(3)) { eprintln!( - "[CONSUMER] No new records, stopping (unique: {}, time_since_new: {}s)", + "[CONSUMER] No new records, stopping (unique: {}, \ + time_since_new: {}s)", seen_offsets.len(), last_new_record_time.elapsed().as_secs() ); @@ -429,7 +353,8 @@ async fn test_topic_high_load_concurrent_publishers() { 0 => { // Shared metrics: INSERT then UPDATE let insert_sql = format!( - "INSERT INTO {} (id, name, value, active, counter, timestamp) VALUES ({}, 'metric_{}', {}, {}, {}, {})", + "INSERT INTO {} (id, name, value, active, counter, timestamp) VALUES \ + ({}, 'metric_{}', {}, {}, {}, {})", shared_table, record_id, record_id, @@ -476,7 +401,8 @@ async fn test_topic_high_load_concurrent_publishers() { 1 => { // User profiles: INSERT then UPDATE let insert_sql = format!( - "INSERT INTO {} (id, username, score, level, verified) VALUES ({}, 'user_{}', {}, {}, {})", + "INSERT INTO {} (id, username, score, level, verified) VALUES ({}, \ + 'user_{}', {}, {}, {})", user_table, record_id, record_id, @@ -522,7 +448,8 @@ async fn test_topic_high_load_concurrent_publishers() { 2 => { // Stream events: INSERT only (2 records per iteration) let insert_sql = format!( - "INSERT INTO {} (event_id, event_type, payload, value, success) VALUES ({}, 'type_{}', 'payload_{}', {}, {})", + "INSERT INTO {} (event_id, event_type, payload, value, success) \ + VALUES ({}, 'type_{}', 'payload_{}', {}, {})", stream_table, record_id, record_id % 10, @@ -548,7 +475,8 @@ async fn test_topic_high_load_concurrent_publishers() { // Another INSERT for stream let record_id2 = record_id + 100000; let insert_sql2 = format!( - "INSERT INTO {} (event_id, event_type, payload, value, success) VALUES ({}, 'type_{}', 'payload_{}', {}, {})", + "INSERT INTO {} (event_id, event_type, payload, value, success) \ + VALUES ({}, 'type_{}', 'payload_{}', {}, {})", stream_table, record_id2, record_id2 % 10, @@ -572,7 +500,8 @@ async fn test_topic_high_load_concurrent_publishers() { 3 => { // Products: INSERT then UPDATE let insert_sql = format!( - "INSERT INTO {} (product_id, product_name, price, stock, available) VALUES ({}, 'product_{}', {}, {}, {})", + "INSERT INTO {} (product_id, product_name, price, stock, available) \ + VALUES ({}, 'product_{}', {}, {}, {})", product_table, record_id, record_id, @@ -618,7 +547,8 @@ async fn test_topic_high_load_concurrent_publishers() { 4 => { // User sessions: INSERT then UPDATE let insert_sql = format!( - "INSERT INTO {} (session_id, user_id, duration, active, score) VALUES ({}, {}, {}, {}, {})", + "INSERT INTO {} (session_id, user_id, duration, active, score) VALUES \ + ({}, {}, {}, {}, {})", session_table, record_id as i64, record_id % 10000, @@ -769,8 +699,9 @@ async fn test_topic_high_load_concurrent_publishers() { assert!( unique_coverage >= min_unique_coverage, - "Expected at least {}% unique event coverage, got {:.1}% ({}/{}) - Synchronous publishing should capture all events.\n\ - Check for table creation failures or write errors that prevent events from being published.", + "Expected at least {}% unique event coverage, got {:.1}% ({}/{}) - Synchronous publishing \ + should capture all events.\nCheck for table creation failures or write errors that \ + prevent events from being published.", min_unique_coverage, unique_coverage, received_events.len(), @@ -872,50 +803,22 @@ async fn test_topic_high_load_two_consumers_same_group_single_delivery() { let topic = topic.clone(); let group_id = group_id.clone(); tokio::spawn(async move { - let client = create_test_client().await; - let mut consumer = client - .consumer() - .topic(&topic) - .group_id(&group_id) - .auto_offset_reset(AutoOffsetReset::Earliest) - .max_poll_records(200) - .build() - .expect("Failed to build consumer"); - - let mut seen_offsets = HashSet::<(u32, u64)>::new(); - let deadline = std::time::Instant::now() + Duration::from_secs(150); - let mut idle_loops: u32 = 0; - - while std::time::Instant::now() < deadline { - match consumer.poll().await { - Ok(batch) if batch.is_empty() => { - idle_loops += 1; - if publishers_done.load(Ordering::Relaxed) && idle_loops >= 40 { - break; - } - tokio::time::sleep(Duration::from_millis(100)).await; - }, - Ok(batch) => { - idle_loops = 0; - for record in &batch { - seen_offsets.insert((record.partition_id, record.offset)); - consumer.mark_processed(record); - } - - let _ = consumer.commit_sync().await; - }, - Err(err) => { - let message = err.to_string(); - if is_retryable_consumer_poll_error(&message) { - tokio::time::sleep(Duration::from_millis(100)).await; - continue; - } - panic!("{} poll error: {}", consumer_name, message); - }, - } - } - - let _ = consumer.commit_sync().await; + let mut consumer = + topic_test_support::build_test_consumer(&topic, &group_id, 200, false).await; + let seen_offsets = topic_test_support::poll_unique_offsets_until( + &mut consumer, + topic_test_support::UniqueOffsetPollConfig { + expected_messages: None, + publishers_done: Some(publishers_done), + deadline: Duration::from_secs(150), + idle_break_after: 40, + idle_sleep: Duration::from_millis(100), + per_record_delay: Duration::ZERO, + commit_each_batch: true, + }, + ) + .await; + eprintln!("[TEST] {} received {} offsets", consumer_name, seen_offsets.len()); seen_offsets }) }; @@ -926,27 +829,14 @@ async fn test_topic_high_load_two_consumers_same_group_single_delivery() { tokio::time::sleep(Duration::from_millis(300)).await; let publisher_parallelism = 24; - let per_publisher = expected_messages / publisher_parallelism; - let mut publish_handles = Vec::with_capacity(publisher_parallelism); - - for publisher in 0..publisher_parallelism { - let table = table.clone(); - publish_handles.push(tokio::spawn(async move { - for idx in 0..per_publisher { - let id = (publisher * per_publisher + idx) as i64; - execute_sql(&format!( - "INSERT INTO {} (id, payload) VALUES ({}, 'event_{}')", - table, id, id - )) - .await - .expect("Insert failed"); - } - })); - } - - for handle in publish_handles { - handle.await.expect("Publisher task failed"); - } + topic_test_support::publish_numbered_rows( + &table, + "payload", + "event", + expected_messages, + publisher_parallelism, + ) + .await; publishers_done.store(true, Ordering::Relaxed); let consumer_a_offsets = consumer_a_handle.await.expect("consumer-a failed"); @@ -1020,48 +910,22 @@ async fn test_topic_fan_out_different_groups_receive_all() { |group_id: String, publishers_done: Arc, label: &'static str| { let topic = topic.clone(); tokio::spawn(async move { - let client = create_test_client().await; - let mut consumer = client - .consumer() - .topic(&topic) - .group_id(&group_id) - .auto_offset_reset(AutoOffsetReset::Earliest) - .max_poll_records(200) - .build() - .expect("build consumer"); - - let mut seen = HashSet::<(u32, u64)>::new(); - let deadline = std::time::Instant::now() + Duration::from_secs(150); - let mut idle: u32 = 0; - - while std::time::Instant::now() < deadline { - match consumer.poll().await { - Ok(batch) if batch.is_empty() => { - idle += 1; - if publishers_done.load(Ordering::Relaxed) && idle >= 40 { - break; - } - tokio::time::sleep(Duration::from_millis(100)).await; - }, - Ok(batch) => { - idle = 0; - for rec in &batch { - seen.insert((rec.partition_id, rec.offset)); - consumer.mark_processed(rec); - } - let _ = consumer.commit_sync().await; - }, - Err(e) => { - let msg = e.to_string(); - if is_retryable_consumer_poll_error(&msg) { - tokio::time::sleep(Duration::from_millis(100)).await; - continue; - } - panic!("{} poll error: {}", label, msg); - }, - } - } - let _ = consumer.commit_sync().await; + let mut consumer = + topic_test_support::build_test_consumer(&topic, &group_id, 200, false).await; + let seen = topic_test_support::poll_unique_offsets_until( + &mut consumer, + topic_test_support::UniqueOffsetPollConfig { + expected_messages: None, + publishers_done: Some(publishers_done), + deadline: Duration::from_secs(150), + idle_break_after: 40, + idle_sleep: Duration::from_millis(100), + per_record_delay: Duration::ZERO, + commit_each_batch: true, + }, + ) + .await; + eprintln!("[TEST] {} received {} offsets", label, seen.len()); seen }) }; @@ -1071,27 +935,7 @@ async fn test_topic_fan_out_different_groups_receive_all() { tokio::time::sleep(Duration::from_millis(300)).await; - // Publish - let parallelism = 10; - let per = expected_messages / parallelism; - let mut pubs = Vec::new(); - for p in 0..parallelism { - let tbl = table.clone(); - pubs.push(tokio::spawn(async move { - for i in 0..per { - let id = (p * per + i) as i64; - execute_sql(&format!( - "INSERT INTO {} (id, data) VALUES ({}, 'val_{}')", - tbl, id, id - )) - .await - .expect("insert"); - } - })); - } - for h in pubs { - h.await.expect("pub task"); - } + topic_test_support::publish_numbered_rows(&table, "data", "val", expected_messages, 10).await; publishers_done.store(true, Ordering::Relaxed); let offsets_a = handle_a.await.expect("group-a consumer"); @@ -1159,75 +1003,28 @@ async fn test_topic_four_consumers_same_group_no_duplicates() { let label = format!("consumer-{}", idx); consumer_handles.push(tokio::spawn(async move { - let client = create_test_client().await; - let mut consumer = client - .consumer() - .topic(&topic) - .group_id(&group_id) - .auto_offset_reset(AutoOffsetReset::Earliest) - .max_poll_records(100) - .build() - .expect("build consumer"); - - let mut seen = HashSet::<(u32, u64)>::new(); - let deadline = std::time::Instant::now() + Duration::from_secs(180); - let mut idle: u32 = 0; - - while std::time::Instant::now() < deadline { - match consumer.poll().await { - Ok(batch) if batch.is_empty() => { - idle += 1; - if done.load(Ordering::Relaxed) && idle >= 45 { - break; - } - tokio::time::sleep(Duration::from_millis(80)).await; - }, - Ok(batch) => { - idle = 0; - for rec in &batch { - seen.insert((rec.partition_id, rec.offset)); - consumer.mark_processed(rec); - } - let _ = consumer.commit_sync().await; - }, - Err(e) => { - let msg = e.to_string(); - if is_retryable_consumer_poll_error(&msg) { - tokio::time::sleep(Duration::from_millis(80)).await; - continue; - } - panic!("{} poll error: {}", label, msg); - }, - } - } - let _ = consumer.commit_sync().await; + let mut consumer = + topic_test_support::build_test_consumer(&topic, &group_id, 100, false).await; + let seen = topic_test_support::poll_unique_offsets_until( + &mut consumer, + topic_test_support::UniqueOffsetPollConfig { + expected_messages: None, + publishers_done: Some(done), + deadline: Duration::from_secs(180), + idle_break_after: 45, + idle_sleep: Duration::from_millis(80), + per_record_delay: Duration::ZERO, + commit_each_batch: true, + }, + ) + .await; (label, seen) })); } tokio::time::sleep(Duration::from_millis(300)).await; - // Publish with high parallelism - let publisher_parallelism = 24; - let per_publisher = expected_messages / publisher_parallelism; - let mut pub_handles = Vec::with_capacity(publisher_parallelism); - for p in 0..publisher_parallelism { - let tbl = table.clone(); - pub_handles.push(tokio::spawn(async move { - for i in 0..per_publisher { - let id = (p * per_publisher + i) as i64; - execute_sql(&format!( - "INSERT INTO {} (id, value) VALUES ({}, 'item_{}')", - tbl, id, id - )) - .await - .expect("insert"); - } - })); - } - for h in pub_handles { - h.await.expect("pub task"); - } + topic_test_support::publish_numbered_rows(&table, "value", "item", expected_messages, 24).await; publishers_done.store(true, Ordering::Relaxed); // Collect results @@ -1288,8 +1085,8 @@ async fn test_topic_four_consumers_same_group_no_duplicates() { /// High-load recovery test: /// 1. Consumer A claims a range and never commits (simulated ack failure/crash). -/// 2. After visibility timeout, Consumer B (same group) must recover and process -/// the entire stream without offset gaps, even with per-message processing latency. +/// 2. After visibility timeout, Consumer B (same group) must recover and process the entire stream +/// without offset gaps, even with per-message processing latency. #[tokio::test] #[ntest::timeout(180000)] async fn test_topic_ack_failure_recovery_no_message_loss_with_latency() { @@ -1311,42 +1108,14 @@ async fn test_topic_ack_failure_recovery_no_message_loss_with_latency() { wait_for_topic_ready(&topic, 1).await; let expected_messages: usize = 480; - let publisher_parallelism = 12; - let per_publisher = expected_messages / publisher_parallelism; - let mut publisher_handles = Vec::with_capacity(publisher_parallelism); - - for p in 0..publisher_parallelism { - let tbl = table.clone(); - publisher_handles.push(tokio::spawn(async move { - for i in 0..per_publisher { - let id = (p * per_publisher + i) as i64; - execute_sql(&format!( - "INSERT INTO {} (id, payload) VALUES ({}, 'payload_{}')", - tbl, id, id - )) - .await - .expect("insert"); - } - })); - } - - for handle in publisher_handles { - handle.await.expect("publisher task"); - } + topic_test_support::publish_numbered_rows(&table, "payload", "payload", expected_messages, 12) + .await; let consumer_a_claim_target = 160usize; let mut claimed_by_a = HashSet::<(u32, u64)>::new(); { - let client = create_test_client().await; - let mut consumer_a = client - .consumer() - .topic(&topic) - .group_id(&group_id) - .auto_offset_reset(AutoOffsetReset::Earliest) - .enable_auto_commit(false) - .max_poll_records(80) - .build() - .expect("build consumer-a"); + let mut consumer_a = + topic_test_support::build_test_consumer(&topic, &group_id, 80, false).await; let deadline = std::time::Instant::now() + Duration::from_secs(35); while std::time::Instant::now() < deadline && claimed_by_a.len() < consumer_a_claim_target { @@ -1378,26 +1147,11 @@ async fn test_topic_ack_failure_recovery_no_message_loss_with_latency() { claimed_by_a.len() ); - // Sleep long enough for the server's topic visibility timeout to expire so - // Consumer B can recover the range claimed by Consumer A. Fresh-mode CLI - // tests start the backend with backend/server.toml, which overrides the - // library default here. - let visibility_timeout_secs = configured_topic_visibility_timeout_secs(); - tokio::time::sleep(Duration::from_secs(visibility_timeout_secs + 5)).await; - - let client = create_test_client().await; - let mut consumer_b = client - .consumer() - .topic(&topic) - .group_id(&group_id) - .auto_offset_reset(AutoOffsetReset::Earliest) - .enable_auto_commit(false) - .max_poll_records(120) - .build() - .expect("build consumer-b"); + let mut consumer_b = + topic_test_support::build_test_consumer(&topic, &group_id, 120, false).await; let mut recovered_offsets = HashSet::<(u32, u64)>::new(); - let deadline = std::time::Instant::now() + Duration::from_secs(80); + let deadline = std::time::Instant::now() + topic_recovery_deadline(); let mut idle_loops = 0u32; while std::time::Instant::now() < deadline && recovered_offsets.len() < expected_messages { diff --git a/cli/tests/smoke/topics/smoke_test_topic_throughput.rs b/cli/tests/smoke/topics/smoke_test_topic_throughput.rs index 7cd3d5782..15d3eecf9 100644 --- a/cli/tests/smoke/topics/smoke_test_topic_throughput.rs +++ b/cli/tests/smoke/topics/smoke_test_topic_throughput.rs @@ -1,13 +1,18 @@ // Topic throughput benchmark smoke test // Measures messages per second under various publisher/consumer configurations -use crate::common; +use std::{ + collections::HashSet, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + use kalam_client::consumer::AutoOffsetReset; -use kalam_client::KalamLinkTimeouts; -use std::collections::HashSet; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; + +use crate::{common, topic_test_support}; // Baseline thresholds (90% of current measured performance) // Single-publisher is bottlenecked by sequential HTTP round-trips (~385 inserts/s), @@ -18,70 +23,16 @@ const THRESHOLD_MULTI_PUB_MULTI_CONSUMER: f64 = 220.0; // Stable floor across lo /// Create a test client using common infrastructure async fn create_test_client() -> kalam_client::KalamLinkClient { - let base_url = common::leader_or_server_url(); - common::client_for_user_on_url_with_timeouts( - &base_url, - common::default_username(), - common::default_password(), - KalamLinkTimeouts::builder() - .connection_timeout_secs(10) - .receive_timeout_secs(15) - .send_timeout_secs(30) - .subscribe_timeout_secs(15) - .auth_timeout_secs(10) - .initial_data_timeout(Duration::from_secs(60)) - .build(), - ) - .expect("Failed to build test client") + topic_test_support::create_test_client().await } /// Execute SQL via HTTP helper with error handling async fn execute_sql(sql: &str) -> Result<(), String> { - let response = common::execute_sql_via_http_as_root(sql).await.map_err(|e| e.to_string())?; - let status = response.get("status").and_then(|s| s.as_str()).unwrap_or(""); - if status.eq_ignore_ascii_case("success") { - Ok(()) - } else { - let err_msg = response - .get("error") - .and_then(|e| e.get("message")) - .and_then(|m| m.as_str()) - .unwrap_or("Unknown error"); - Err(format!("SQL failed: {}", err_msg)) - } + topic_test_support::execute_sql(sql).await } async fn wait_for_topic_ready(topic: &str, expected_routes: usize) { - let sql = format!("SELECT routes FROM system.topics WHERE topic_id = '{}'", topic); - let deadline = Instant::now() + Duration::from_secs(30); - - while Instant::now() < deadline { - if let Ok(response) = common::execute_sql_via_http_as_root(&sql).await { - if let Some(rows) = common::get_rows_as_hashmaps(&response) { - if let Some(row) = rows.first() { - if let Some(routes_value) = row.get("routes") { - let routes_untyped = common::extract_typed_value(routes_value); - if let Some(routes_json) = routes_untyped - .as_str() - .and_then(|raw| serde_json::from_str::(raw).ok()) - { - let route_count = - routes_json.as_array().map(|routes| routes.len()).unwrap_or(0); - if route_count >= expected_routes { - return; - } - } - } - } - } - } - tokio::time::sleep(Duration::from_millis(100)).await; - } - - panic!( - "Timed out waiting for topic '{}' to have at least {} route(s)", - topic, expected_routes - ); + topic_test_support::wait_for_topic_ready(topic, expected_routes).await; } /// Test: Single publisher, single consumer @@ -506,7 +457,8 @@ async fn smoke_test_topic_throughput_benchmark() { if !(pass1 && pass2 && pass3) { println!( - "⚠️ Throughput below advisory baseline under current load (1P→1C: {:.1}/{:.1}, 24P→1C: {:.1}/{:.1}, 40P→4C: {:.1}/{:.1})", + "⚠️ Throughput below advisory baseline under current load (1P→1C: {:.1}/{:.1}, \ + 24P→1C: {:.1}/{:.1}, 40P→4C: {:.1}/{:.1})", rate1, threshold1, rate2, threshold2, rate3, threshold3 ); } diff --git a/cli/tests/smoke/topics/topic_test_support.rs b/cli/tests/smoke/topics/topic_test_support.rs new file mode 100644 index 000000000..2a3f244e9 --- /dev/null +++ b/cli/tests/smoke/topics/topic_test_support.rs @@ -0,0 +1,230 @@ +use std::{ + collections::HashSet, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + +use kalam_client::{ + consumer::{AutoOffsetReset, TopicConsumer}, + KalamLinkClient, KalamLinkTimeouts, +}; + +use crate::common; + +pub fn default_topic_timeouts() -> KalamLinkTimeouts { + KalamLinkTimeouts::builder() + .connection_timeout_secs(10) + .receive_timeout_secs(15) + .send_timeout_secs(30) + .subscribe_timeout_secs(15) + .auth_timeout_secs(10) + .initial_data_timeout(Duration::from_secs(60)) + .build() +} + +pub fn long_topic_timeouts() -> KalamLinkTimeouts { + KalamLinkTimeouts::builder() + .connection_timeout_secs(5) + .receive_timeout_secs(120) + .send_timeout_secs(30) + .subscribe_timeout_secs(10) + .auth_timeout_secs(10) + .initial_data_timeout(Duration::from_secs(120)) + .build() +} + +pub async fn create_test_client_with_timeouts(timeouts: KalamLinkTimeouts) -> KalamLinkClient { + let base_url = common::leader_or_server_url(); + common::client_for_user_on_url_with_timeouts( + &base_url, + common::default_username(), + common::default_password(), + timeouts, + ) + .expect("failed to build topic test client") +} + +pub async fn create_test_client() -> KalamLinkClient { + create_test_client_with_timeouts(default_topic_timeouts()).await +} + +pub async fn execute_sql(sql: &str) -> Result<(), String> { + let response = common::execute_sql_via_http_as_root(sql).await.map_err(|e| e.to_string())?; + let status = response.get("status").and_then(|s| s.as_str()).unwrap_or(""); + if status.eq_ignore_ascii_case("success") { + Ok(()) + } else { + let err_msg = response + .get("error") + .and_then(|e| e.get("message")) + .and_then(|m| m.as_str()) + .unwrap_or("Unknown error"); + Err(format!("SQL failed: {}", err_msg)) + } +} + +pub async fn wait_for_topic_ready(topic: &str, expected_routes: usize) { + let sql = format!("SELECT routes FROM system.topics WHERE topic_id = '{}'", topic); + let deadline = Instant::now() + Duration::from_secs(30); + + while Instant::now() < deadline { + if let Ok(response) = common::execute_sql_via_http_as_root(&sql).await { + if let Some(rows) = common::get_rows_as_hashmaps(&response) { + if let Some(row) = rows.first() { + if let Some(routes_value) = row.get("routes") { + let routes_untyped = common::extract_typed_value(routes_value); + if let Some(routes_json) = routes_untyped + .as_str() + .and_then(|raw| serde_json::from_str::(raw).ok()) + { + let route_count = + routes_json.as_array().map(|routes| routes.len()).unwrap_or(0); + if route_count >= expected_routes { + tokio::time::sleep(Duration::from_millis(100)).await; + return; + } + } + } + } + } + } + tokio::time::sleep(Duration::from_millis(100)).await; + } + + panic!( + "Timed out waiting for topic '{}' to have at least {} route(s)", + topic, expected_routes + ); +} + +pub fn is_retryable_consumer_poll_error(message: &str) -> bool { + let normalized = message.to_ascii_lowercase(); + normalized.contains("error decoding") + || normalized.contains("network") + || normalized.contains("invalid_credentials") + || normalized.contains("invalid credentials") + || normalized.contains("invalid token") + || normalized.contains("token expired") + || normalized.contains("unauthorized") + || normalized.contains("401") +} + +pub async fn build_test_consumer( + topic: &str, + group_id: &str, + max_poll_records: u32, + enable_auto_commit: bool, +) -> TopicConsumer { + let client = create_test_client().await; + client + .consumer() + .topic(topic) + .group_id(group_id) + .auto_offset_reset(AutoOffsetReset::Earliest) + .enable_auto_commit(enable_auto_commit) + .max_poll_records(max_poll_records) + .build() + .expect("failed to build topic test consumer") +} + +pub struct UniqueOffsetPollConfig { + pub expected_messages: Option, + pub publishers_done: Option>, + pub deadline: Duration, + pub idle_break_after: u32, + pub idle_sleep: Duration, + pub per_record_delay: Duration, + pub commit_each_batch: bool, +} + +pub async fn poll_unique_offsets_until( + consumer: &mut TopicConsumer, + config: UniqueOffsetPollConfig, +) -> HashSet<(u32, u64)> { + let mut seen = HashSet::<(u32, u64)>::new(); + let deadline = Instant::now() + config.deadline; + let mut idle_loops = 0u32; + + while Instant::now() < deadline + && config.expected_messages.map(|expected| seen.len() < expected).unwrap_or(true) + { + match consumer.poll().await { + Ok(batch) if batch.is_empty() => { + idle_loops += 1; + if config + .publishers_done + .as_ref() + .map(|done| done.load(Ordering::Relaxed)) + .unwrap_or(false) + && idle_loops >= config.idle_break_after + { + break; + } + tokio::time::sleep(config.idle_sleep).await; + }, + Ok(batch) => { + idle_loops = 0; + for record in &batch { + if !config.per_record_delay.is_zero() { + tokio::time::sleep(config.per_record_delay).await; + } + seen.insert((record.partition_id, record.offset)); + consumer.mark_processed(record); + } + + if config.commit_each_batch { + let _ = consumer.commit_sync().await; + } + }, + Err(err) => { + let message = err.to_string(); + if is_retryable_consumer_poll_error(&message) { + tokio::time::sleep(config.idle_sleep).await; + continue; + } + panic!("topic consumer poll error: {}", message); + }, + } + } + + seen +} + +pub async fn publish_numbered_rows( + table: &str, + value_column: &str, + value_prefix: &str, + expected_messages: usize, + publisher_parallelism: usize, +) { + let mut publish_handles = Vec::with_capacity(publisher_parallelism); + + for publisher in 0..publisher_parallelism { + let base_count = expected_messages / publisher_parallelism; + let extra = usize::from(publisher < expected_messages % publisher_parallelism); + let count = base_count + extra; + let start_id = + publisher * base_count + publisher.min(expected_messages % publisher_parallelism); + let table = table.to_string(); + let value_column = value_column.to_string(); + let value_prefix = value_prefix.to_string(); + publish_handles.push(tokio::spawn(async move { + for idx in 0..count { + let id = (start_id + idx) as i64; + execute_sql(&format!( + "INSERT INTO {} (id, {}) VALUES ({}, '{}_{}')", + table, value_column, id, value_prefix, id + )) + .await + .expect("topic test insert failed"); + } + })); + } + + for handle in publish_handles { + handle.await.expect("publisher task failed"); + } +} diff --git a/cli/tests/smoke/usecases/chat_ai_example_smoke.rs b/cli/tests/smoke/usecases/chat_ai_example_smoke.rs index 0651d5383..0f92cebbf 100644 --- a/cli/tests/smoke/usecases/chat_ai_example_smoke.rs +++ b/cli/tests/smoke/usecases/chat_ai_example_smoke.rs @@ -2,9 +2,10 @@ // Covers: current README schema (USER messages + STREAM agent_events), // topic wiring, insert/query operations, and live subscription to streamed agent events -use crate::common::*; use std::time::Duration; +use crate::common::*; + fn sql_literal(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) } @@ -36,7 +37,8 @@ fn smoke_chat_ai_example_from_readme() { .as_micros() ); let assistant_reply = format!( - "AI reply: KalamDB stored \"{}\" in {}, streamed the drafting state through {}, and committed the final assistant row.", + "AI reply: KalamDB stored \"{}\" in {}, streamed the drafting state through {}, and \ + committed the final assistant row.", user_message, messages_table, agent_events_table ); let typing_preview = assistant_reply.chars().take(48).collect::(); @@ -89,7 +91,8 @@ fn smoke_chat_ai_example_from_readme() { // 2. Insert a user message, as the browser app would. let insert_user_message_sql = format!( - "INSERT INTO {} (room, role, author, sender_username, content) VALUES ({}, 'user', {}, {}, {})", + "INSERT INTO {} (room, role, author, sender_username, content) VALUES ({}, 'user', {}, \ + {}, {})", messages_table, sql_literal(room), sql_literal(sender_username), @@ -133,7 +136,8 @@ fn smoke_chat_ai_example_from_readme() { for (stage, preview, message) in &events { let insert_event_sql = format!( - "INSERT INTO {} (response_id, room, sender_username, stage, preview, message) VALUES ({}, {}, {}, {}, {}, {})", + "INSERT INTO {} (response_id, room, sender_username, stage, preview, message) VALUES \ + ({}, {}, {}, {}, {}, {})", agent_events_table, sql_literal(&response_id), sql_literal(room), @@ -147,7 +151,8 @@ fn smoke_chat_ai_example_from_readme() { } let insert_assistant_message_sql = format!( - "INSERT INTO {} (room, role, author, sender_username, content) VALUES ({}, 'assistant', 'KalamDB Copilot', {}, {})", + "INSERT INTO {} (room, role, author, sender_username, content) VALUES ({}, 'assistant', \ + 'KalamDB Copilot', {}, {})", messages_table, sql_literal(room), sql_literal(sender_username), @@ -178,7 +183,8 @@ fn smoke_chat_ai_example_from_readme() { } let retry_event_sql = format!( - "INSERT INTO {} (response_id, room, sender_username, stage, preview, message) VALUES ({}, {}, {}, 'typing', {}, {})", + "INSERT INTO {} (response_id, room, sender_username, stage, preview, message) VALUES \ + ({}, {}, {}, 'typing', {}, {})", agent_events_table, sql_literal(&format!("{}-retry-{}", response_id, retry_count)), sql_literal(room), @@ -192,7 +198,8 @@ fn smoke_chat_ai_example_from_readme() { if !received_event { eprintln!( - "⚠️ Did not receive a live agent event in the README smoke test window; continuing with persisted stream verification" + "⚠️ Did not receive a live agent event in the README smoke test window; continuing \ + with persisted stream verification" ); } diff --git a/cli/tests/smoke/usecases/smoke_test_all_datatypes.rs b/cli/tests/smoke/usecases/smoke_test_all_datatypes.rs index dd46ae118..5c87f7f31 100644 --- a/cli/tests/smoke/usecases/smoke_test_all_datatypes.rs +++ b/cli/tests/smoke/usecases/smoke_test_all_datatypes.rs @@ -1,12 +1,13 @@ // Smoke Test: All datatypes coverage across USER, SHARED, and STREAM tables -// Creates user & shared tables enumerating every KalamDataType, performs CRUD (create/insert/update/delete/select) -// and creates a stream table with insert/select verification. +// Creates user & shared tables enumerating every KalamDataType, performs CRUD +// (create/insert/update/delete/select) and creates a stream table with insert/select verification. // // This validates parser + executor acceptance of full type list in DDL plus basic DML paths. -use crate::common::*; use std::time::Duration; +use crate::common::*; + fn execute_sql_as_root_via_http_json(sql: &str) -> Result> { let runtime = tokio::runtime::Runtime::new()?; let response = runtime.block_on(execute_sql_via_http_as("root", root_password(), sql))?; @@ -86,11 +87,13 @@ fn smoke_all_datatypes_user_shared_stream() { // Sample values (omit embedding_col to avoid complex literal syntax; it will remain NULL) // BYTES literal: use simple text (backend may coerce) or hex; choose text for simplicity. let insert_values_row1 = format!( - "INSERT INTO {} (bool_col, int_col, big_int_col, text_col) VALUES (true, 123, 1234567890123, 'hello')", + "INSERT INTO {} (bool_col, int_col, big_int_col, text_col) VALUES (true, 123, \ + 1234567890123, 'hello')", user_full ); let insert_values_row2 = format!( - "INSERT INTO {} (bool_col, int_col, big_int_col, text_col) VALUES (false, -321, 987654321, 'world')", + "INSERT INTO {} (bool_col, int_col, big_int_col, text_col) VALUES (false, -321, \ + 987654321, 'world')", user_full ); execute_sql_as_root_via_http(&insert_values_row1).expect("insert user row1 should succeed"); diff --git a/cli/tests/smoke/usecases/smoke_test_batch_control.rs b/cli/tests/smoke/usecases/smoke_test_batch_control.rs index ea56153ed..ea9a1dc3e 100644 --- a/cli/tests/smoke/usecases/smoke_test_batch_control.rs +++ b/cli/tests/smoke/usecases/smoke_test_batch_control.rs @@ -2,11 +2,12 @@ // Covers: batch_num tracking, has_more flag, status transitions, data ordering // Tests subscription batch control correctness for initial data loading -use crate::common::*; use std::time::Duration; use kalam_client::{SubscriptionConfig, SubscriptionOptions}; +use crate::common::*; + /// Helper to create a subscription listener with custom configuration fn start_subscription_with_config( query: &str, @@ -47,7 +48,8 @@ pub enum ParsedEvent { impl ParsedEvent { /// Parse a debug-formatted ChangeEvent string into a structured event fn parse(event_str: &str) -> Self { - // Parse Ack event: "Ack { subscription_id: "...", total_rows: N, batch_control: BatchControl { batch_num: N, has_more: bool, status: Status, ... } }" + // Parse Ack event: "Ack { subscription_id: "...", total_rows: N, batch_control: + // BatchControl { batch_num: N, has_more: bool, status: Status, ... } }" if event_str.contains("Ack {") { if let Some(sub_id) = Self::extract_subscription_id(event_str) { if let Some(bc) = Self::extract_batch_control(event_str) { @@ -508,7 +510,8 @@ fn smoke_batch_control_multi_batch() { // Verify status transitions: first batch=loading, middle=loading_batch, last=ready if let Some(ParsedEvent::InitialDataBatch { batch_control, .. }) = batch_events.first() { - // First batch should have status=loading (or ready if single batch which shouldn't happen here) + // First batch should have status=loading (or ready if single batch which shouldn't happen + // here) assert!( batch_control.status == "loading" || batch_control.status == "ready", "First batch should have status=loading or ready" diff --git a/cli/tests/smoke/usecases/smoke_test_core_operations.rs b/cli/tests/smoke/usecases/smoke_test_core_operations.rs index 15ced16dd..cf7a002aa 100644 --- a/cli/tests/smoke/usecases/smoke_test_core_operations.rs +++ b/cli/tests/smoke/usecases/smoke_test_core_operations.rs @@ -1,9 +1,10 @@ // Comprehensive smoke test for core KalamDB operations // Tests system tables, namespaces, users, storage, and flush operations -use crate::common::*; use std::time::Duration; +use crate::common::*; + #[ntest::timeout(180_000)] #[test] fn smoke_test_core_operations() { @@ -271,7 +272,8 @@ fn test_flush_operations() { .expect("CREATE NAMESPACE should succeed"); let create_table_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', FLUSH_POLICY = 'rows:100')", + "CREATE TABLE {} (id INT PRIMARY KEY, value VARCHAR) WITH (TYPE = 'USER', FLUSH_POLICY = \ + 'rows:100')", full_table_name ); execute_sql_as_root_via_client(&create_table_sql).expect("CREATE TABLE should succeed"); diff --git a/cli/tests/smoke/usecases/smoke_test_custom_functions.rs b/cli/tests/smoke/usecases/smoke_test_custom_functions.rs index ecb00b800..04a777f0e 100644 --- a/cli/tests/smoke/usecases/smoke_test_custom_functions.rs +++ b/cli/tests/smoke/usecases/smoke_test_custom_functions.rs @@ -71,7 +71,8 @@ fn smoke_test_snowflake_id_default() { println!("Query output:\n{}", output); // Parse JSON to extract IDs (simple string parsing for smoke test) - // Expected format: {"columns":["id","content"],"rows":[[123,"Message 1"],[124,"Message 2"],...]} + // Expected format: {"columns":["id","content"],"rows":[[123,"Message 1"],[124,"Message + // 2"],...]} assert!(output.contains("\"rows\""), "Expected JSON rows in output"); assert!( @@ -221,7 +222,8 @@ fn smoke_test_ulid_default() { println!("📝 Inserting 3 events without specifying event_id..."); for i in 1..=3 { let insert_sql = format!( - "INSERT INTO {} (event_type, user_id, payload) VALUES ('user_action', 'user_{}', '{{\"action\":\"click\"}}')", + "INSERT INTO {} (event_type, user_id, payload) VALUES ('user_action', 'user_{}', \ + '{{\"action\":\"click\"}}')", full_table, i ); execute_sql_as_root_via_client(&insert_sql) diff --git a/cli/tests/smoke/usecases/smoke_test_file_datatype.rs b/cli/tests/smoke/usecases/smoke_test_file_datatype.rs index 2bd49beb1..d45474820 100644 --- a/cli/tests/smoke/usecases/smoke_test_file_datatype.rs +++ b/cli/tests/smoke/usecases/smoke_test_file_datatype.rs @@ -9,18 +9,20 @@ //! //! Run with: cargo test --test smoke smoke_test_file_datatype +use reqwest::Client; +use serde_json::Value; + use crate::common::{ force_auto_test_server_url_async, generate_unique_namespace, get_access_token_for_url, - test_context, + get_available_server_urls, is_cluster_mode, is_leader_error, + is_retryable_cluster_error_for_sql, shared_http_client, test_context, }; -use reqwest::Client; -use serde_json::Value; #[tokio::test] #[ntest::timeout(60000)] async fn test_file_datatype_upload_and_download() { let ctx = test_context(); - let client = Client::new(); + let client = shared_http_client(); let base_url = force_auto_test_server_url_async().await; let ns = generate_unique_namespace("file_test"); let table = "documents"; @@ -45,7 +47,8 @@ async fn test_file_datatype_upload_and_download() { // 3. Upload file via multipart endpoint let test_content = b"This is the file content for testing FILE datatype!"; let sql = format!( - "INSERT INTO {}.{} (id, name, attachment) VALUES ('doc1', 'My Document', FILE(\"myfile.txt\"))", + "INSERT INTO {}.{} (id, name, attachment) VALUES ('doc1', 'My Document', \ + FILE(\"myfile.txt\"))", ns, table ); let boundary = "kalamdb-boundary"; @@ -59,7 +62,8 @@ async fn test_file_datatype_upload_and_download() { push_line(&mut body, &format!("--{}", boundary)); push_line( &mut body, - "Content-Disposition: form-data; name=\"file:myfile.txt\"; filename=\"test-attachment.txt\"", + "Content-Disposition: form-data; name=\"file:myfile.txt\"; \ + filename=\"test-attachment.txt\"", ); push_line(&mut body, "Content-Type: text/plain"); push_line(&mut body, ""); @@ -67,14 +71,10 @@ async fn test_file_datatype_upload_and_download() { body.extend_from_slice(b"\r\n"); push_line(&mut body, &format!("--{}--", boundary)); - let request = client - .post(format!("{}/v1/api/sql", &base_url)) - .bearer_auth(&token) - .header("Accept", "application/json") - .header("Content-Type", format!("multipart/form-data; boundary={}", boundary)) - .body(body) - .build() - .expect("Failed to build multipart request"); + let content_type = format!("multipart/form-data; boundary={}", boundary); + let request = + build_multipart_sql_request(&client, &base_url, &token, &content_type, body.clone()) + .expect("Failed to build multipart request"); let content_type_header = request .headers() .get(reqwest::header::CONTENT_TYPE) @@ -86,31 +86,17 @@ async fn test_file_datatype_upload_and_download() { content_type_header ); - let response = client.execute(request).await.expect("Failed to send multipart request"); - - let status = response.status(); - let response_content_type = response - .headers() - .get(reqwest::header::CONTENT_TYPE) - .and_then(|v: &reqwest::header::HeaderValue| v.to_str().ok()) - .unwrap_or("") - .to_string(); - if !status.is_success() { - let body_text = response.text().await.expect("Failed to read upload error body"); - panic!( - "File upload failed: status={}, content-type={}, body={}", - status, response_content_type, body_text - ); - } - - let body_text = response.text().await.expect("Failed to read upload response body"); + let (upload_base_url, upload_status, body_text) = + execute_multipart_sql_with_cluster_retry(&client, &base_url, &token, &content_type, &body) + .await + .expect("Failed to send multipart request"); let body: Value = serde_json::from_str(&body_text) .unwrap_or_else(|e| panic!("Failed to parse response: {} (body: {})", e, body_text)); assert!( body["status"] == "success", "File upload failed: status={}, body={}", - status, + upload_status, body ); @@ -150,7 +136,7 @@ async fn test_file_datatype_upload_and_download() { // 5. Download the file let download_url = - format!("{}/v1/files/{}/{}/{}/{}", &base_url, ns, table, subfolder, stored_name); + format!("{}/v1/files/{}/{}/{}/{}", upload_base_url, ns, table, subfolder, stored_name); let download_response = client .get(&download_url) @@ -240,16 +226,143 @@ async fn execute_sql_via_http_as_for_url( token: &str, sql: &str, ) -> Result> { - let response = client + let max_attempts = if is_cluster_mode() { 5 } else { 1 }; + let mut last_error = None; + + for attempt in 0..max_attempts { + let urls = if is_cluster_mode() { + get_available_server_urls() + } else { + vec![base_url.to_string()] + }; + + for url in urls { + let response = client + .post(format!("{}/v1/api/sql", url)) + .bearer_auth(token) + .json(&serde_json::json!({ "sql": sql })) + .send() + .await?; + + let status = response.status(); + let body_text = response.text().await?; + let parsed: Value = serde_json::from_str(&body_text)?; + let response_status = parsed.get("status").and_then(Value::as_str).unwrap_or(""); + + if status.is_success() && response_status.eq_ignore_ascii_case("success") { + return Ok(parsed); + } + + let message = json_error_message(&parsed) + .unwrap_or_else(|| format!("SQL failed: status={}, body={}", status, body_text)); + if is_retryable_cluster_error_for_sql(sql, &message) { + last_error = Some(message); + continue; + } + + return Err(message.into()); + } + + if attempt + 1 < max_attempts { + tokio::time::sleep(std::time::Duration::from_millis(200 + attempt as u64 * 150)).await; + } + } + + Err(last_error + .unwrap_or_else(|| "SQL request failed on all cluster nodes".to_string()) + .into()) +} + +fn json_error_message(parsed: &Value) -> Option { + let error = parsed.get("error")?; + let message = error.get("message")?.as_str().unwrap_or(""); + let details = error.get("details").and_then(Value::as_str); + if let Some(details) = details { + return Some(format!("{} ({})", message, details)); + } + Some(message.to_string()) +} + +fn build_multipart_sql_request( + client: &Client, + base_url: &str, + token: &str, + content_type: &str, + body: Vec, +) -> Result { + client .post(format!("{}/v1/api/sql", base_url)) .bearer_auth(token) - .json(&serde_json::json!({ "sql": sql })) - .send() - .await?; + .header("Accept", "application/json") + .header("Content-Type", content_type) + .body(body) + .build() +} + +async fn execute_multipart_sql_with_cluster_retry( + client: &Client, + initial_base_url: &str, + token: &str, + content_type: &str, + body: &[u8], +) -> Result<(String, reqwest::StatusCode, String), Box> { + let max_attempts = if is_cluster_mode() { 5 } else { 1 }; + let mut last_error = None; + + for attempt in 0..max_attempts { + let mut urls = if is_cluster_mode() { + get_available_server_urls() + } else { + vec![initial_base_url.to_string()] + }; + if !urls.iter().any(|url| url == initial_base_url) { + urls.push(initial_base_url.to_string()); + } + + for base_url in urls { + let response = client + .execute(build_multipart_sql_request( + client, + &base_url, + token, + content_type, + body.to_vec(), + )?) + .await?; + + let status = response.status(); + let response_content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v: &reqwest::header::HeaderValue| v.to_str().ok()) + .unwrap_or("") + .to_string(); + let body_text = response.text().await?; + + if status.is_success() { + return Ok((base_url, status, body_text)); + } + + let message = format!( + "File upload failed: status={}, content-type={}, body={}", + status, response_content_type, body_text + ); + if is_cluster_mode() && is_leader_error(&message) { + last_error = Some(message); + break; + } + + return Err(message.into()); + } + + if attempt + 1 < max_attempts { + tokio::time::sleep(std::time::Duration::from_millis(250 + attempt as u64 * 150)).await; + } + } - let body = response.text().await?; - let parsed: Value = serde_json::from_str(&body)?; - Ok(parsed) + Err(last_error + .unwrap_or_else(|| "File upload failed on all cluster nodes".to_string()) + .into()) } fn push_line(body: &mut Vec, line: &str) { diff --git a/cli/tests/smoke/usecases/smoke_test_schema_history.rs b/cli/tests/smoke/usecases/smoke_test_schema_history.rs index 9f9947c68..288128bbe 100644 --- a/cli/tests/smoke/usecases/smoke_test_schema_history.rs +++ b/cli/tests/smoke/usecases/smoke_test_schema_history.rs @@ -54,7 +54,8 @@ fn smoke_test_schema_history_in_system_tables() { // Check system.schemas - should have 1 row for this table let query_v1 = format!( - "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}' ORDER BY schema_version", + "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}' ORDER BY schema_version", namespace, table ); let output_v1 = execute_sql_as_root_via_client_json(&query_v1) @@ -102,7 +103,8 @@ fn smoke_test_schema_history_in_system_tables() { // Check system.schemas - should now have 2 rows let query_v2 = format!( - "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}' ORDER BY schema_version", + "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}' ORDER BY schema_version", namespace, table ); let output_v2 = execute_sql_as_root_via_client_json(&query_v2) @@ -142,7 +144,8 @@ fn smoke_test_schema_history_in_system_tables() { println!("✅ Added column 'age' (should create version 3)"); let query_v3 = format!( - "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}' ORDER BY schema_version", + "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}' ORDER BY schema_version", namespace, table ); let output_v3 = execute_sql_as_root_via_client_json(&query_v3) @@ -174,7 +177,8 @@ fn smoke_test_schema_history_in_system_tables() { println!("✅ Added {} more columns", num_additional_alters); let query_final = format!( - "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}' ORDER BY schema_version", + "SELECT schema_version, is_latest FROM system.schemas WHERE namespace_id = '{}' AND \ + table_name = '{}' ORDER BY schema_version", namespace, table ); let output_final = execute_sql_as_root_via_client_json(&query_final) @@ -188,7 +192,8 @@ fn smoke_test_schema_history_in_system_tables() { assert_eq!( final_rows.len(), expected_rows, - "Expected {} rows (1 CREATE + 2 + {} ALTERs), got {}. Schema history is NOT being preserved!", + "Expected {} rows (1 CREATE + 2 + {} ALTERs), got {}. Schema history is NOT being \ + preserved!", expected_rows, num_additional_alters, final_rows.len() @@ -229,8 +234,8 @@ fn smoke_test_schema_history_in_system_tables() { // ============================================================================ /// Extract rows from CLI JSON response -/// The response format is: {"status": "success", "results": [{"schema": [...], "rows": [[...], ...]}]} -/// Returns rows as objects with column names as keys +/// The response format is: {"status": "success", "results": [{"schema": [...], "rows": [[...], +/// ...]}]} Returns rows as objects with column names as keys fn extract_rows_from_response(json_str: &str) -> Vec { let json: serde_json::Value = serde_json::from_str(json_str).expect("Failed to parse JSON"); @@ -341,7 +346,8 @@ fn smoke_test_drop_table_removes_schema_history() { // Verify we have multiple versions let query_before = format!( - "SELECT COUNT(*) as cnt FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT COUNT(*) as cnt FROM system.schemas WHERE namespace_id = '{}' AND table_name = \ + '{}'", namespace, table ); let before_output = execute_sql_as_root_via_client_json(&query_before) @@ -355,7 +361,8 @@ fn smoke_test_drop_table_removes_schema_history() { // Verify all versions removed let query_after = format!( - "SELECT COUNT(*) as cnt FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + "SELECT COUNT(*) as cnt FROM system.schemas WHERE namespace_id = '{}' AND table_name = \ + '{}'", namespace, table ); let after_output = execute_sql_as_root_via_client_json(&query_after) diff --git a/cli/tests/smoke/usecases/smoke_test_timing_output.rs b/cli/tests/smoke/usecases/smoke_test_timing_output.rs index eac280f08..753323fc6 100644 --- a/cli/tests/smoke/usecases/smoke_test_timing_output.rs +++ b/cli/tests/smoke/usecases/smoke_test_timing_output.rs @@ -5,10 +5,12 @@ //! //! Reference: docs/CLI.md lines 146-152 -use crate::common::*; -use regex::Regex; use std::time::Duration; +use regex::Regex; + +use crate::common::*; + /// Parse timing from CLI output like "Took: 1.234 ms" /// Returns timing in milliseconds or None if not found fn parse_timing_ms(output: &str) -> Option { @@ -36,7 +38,8 @@ fn smoke_test_timing_output_format() { // Create simple table execute_sql_as_root_via_cli(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, value TEXT) WITH (TYPE='USER', FLUSH_POLICY='rows:1000')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, value TEXT) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:1000')", full )) .expect("create table"); @@ -212,7 +215,8 @@ fn smoke_test_timing_aggregation_query() { // Execute aggregation query let output = execute_sql_as_root_via_cli(&format!( - "SELECT category, COUNT(*) as count, SUM(amount) as total FROM {} GROUP BY category ORDER BY category", + "SELECT category, COUNT(*) as count, SUM(amount) as total FROM {} GROUP BY category ORDER \ + BY category", full )) .expect("aggregation query"); @@ -249,7 +253,8 @@ fn smoke_test_timing_join_query() { // Create users table execute_sql_as_root_via_cli(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', ACCESS_LEVEL='PUBLIC')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, name TEXT) WITH (TYPE='SHARED', \ + ACCESS_LEVEL='PUBLIC')", full1 )) .expect("create users table"); @@ -267,7 +272,8 @@ fn smoke_test_timing_join_query() { // Create orders table execute_sql_as_root_via_cli(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, user_id BIGINT, total DOUBLE) WITH (TYPE='SHARED', ACCESS_LEVEL='PUBLIC')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, user_id BIGINT, total DOUBLE) WITH \ + (TYPE='SHARED', ACCESS_LEVEL='PUBLIC')", full2 )) .expect("create orders table"); @@ -380,7 +386,8 @@ fn smoke_test_timing_flush_operation() { // Create table with low flush threshold execute_sql_as_root_via_cli(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, data TEXT) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, data TEXT) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:10')", full )) .expect("create table"); diff --git a/cli/tests/smoke/usecases/smoke_test_websocket_capacity.rs b/cli/tests/smoke/usecases/smoke_test_websocket_capacity.rs index 77ecc27a6..cb2917c57 100644 --- a/cli/tests/smoke/usecases/smoke_test_websocket_capacity.rs +++ b/cli/tests/smoke/usecases/smoke_test_websocket_capacity.rs @@ -1,20 +1,27 @@ // Smoke test to stress WebSocket connection capacity and ensure HTTP API stays responsive -use crate::common::*; +use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + use futures_util::{SinkExt, StreamExt}; use serde_json::json; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::time::{Duration, Instant}; -use tokio::net::TcpStream; -use tokio::runtime::Runtime; -use tokio::sync::mpsc; -use tokio_tungstenite::tungstenite::{ - client::IntoClientRequest, - http::header::{HeaderValue, AUTHORIZATION, USER_AGENT}, - protocol::Message, +use tokio::{net::TcpStream, runtime::Runtime, sync::mpsc}; +use tokio_tungstenite::{ + connect_async, + tungstenite::{ + client::IntoClientRequest, + http::header::{HeaderValue, AUTHORIZATION, USER_AGENT}, + protocol::Message, + }, + MaybeTlsStream, WebSocketStream, }; -use tokio_tungstenite::{connect_async, MaybeTlsStream, WebSocketStream}; + +use crate::common::*; const AUTH_USERNAME: &str = "admin"; // Use conservative count to avoid overwhelming server during testing @@ -58,7 +65,7 @@ fn smoke_test_websocket_capacity() { let token = get_access_token(AUTH_USERNAME, default_password()) .await .unwrap_or_else(|e| panic!("Failed to get access token: {}", e)); - + // We need to keep connections alive while running SQL queries // The server sends ping frames every 5s, and we need to respond with pong // To do this, we split connections into read/write halves and spawn tasks @@ -72,28 +79,22 @@ fn smoke_test_websocket_capacity() { // Timeout each connection attempt to avoid infinite hangs let stream = match tokio::time::timeout( CONNECTION_TIMEOUT, - open_authenticated_connection( - idx, - &token, - &table_for_rt, - &subscription_id, - ), + open_authenticated_connection(idx, &token, &table_for_rt, &subscription_id), ) .await { Ok(s) => s, - Err(_) => panic!( - "Timeout opening websocket #{} after {:?}", - idx, CONNECTION_TIMEOUT - ), + Err(_) => { + panic!("Timeout opening websocket #{} after {:?}", idx, CONNECTION_TIMEOUT) + }, }; - + // Split the stream and spawn a background task to handle incoming messages // This ensures we respond to ping frames and don't timeout let (write, mut read) = stream.split(); let (close_tx, mut close_rx) = mpsc::channel::<()>(1); close_senders.push(close_tx); - + let stop = Arc::clone(&stop_flag); let handle = tokio::spawn(async move { loop { @@ -136,15 +137,13 @@ fn smoke_test_websocket_capacity() { } println!( - "Opened {} authenticated WebSocket connections with keepalive. Verifying SQL responsiveness...", + "Opened {} authenticated WebSocket connections with keepalive. Verifying SQL \ + responsiveness...", close_senders.len() ); let sql_duration = run_simple_sql().await; - println!( - "SELECT 1 completed in {:?} while websockets were open", - sql_duration - ); + println!("SELECT 1 completed in {:?} while websockets were open", sql_duration); assert!( sql_duration <= SQL_RESPONSIVENESS_BUDGET, "SQL request took {:?}, exceeding {:?} budget while websockets were open", @@ -153,12 +152,10 @@ fn smoke_test_websocket_capacity() { ); let live_queries_snapshot = fetch_live_queries_snapshot().await; - println!( - "system.live snapshot while connections active:\n{}", - live_queries_snapshot - ); + println!("system.live snapshot while connections active:\n{}", live_queries_snapshot); - let active_subscription_count = count_live_query_subscriptions(subscription_prefix_for_rt).await; + let active_subscription_count = + count_live_query_subscriptions(subscription_prefix_for_rt).await; assert!( active_subscription_count >= close_senders.len(), "Expected at least {} live query rows, found {}", @@ -171,17 +168,17 @@ fn smoke_test_websocket_capacity() { for close_tx in close_senders { let _ = close_tx.send(()).await; } - + // Wait for all reader tasks to complete and collect write halves for handle in reader_handles { match handle.await { Ok((_idx, _write)) => { // Reunite not possible after split, just drop the write half // The connection will close when both halves are dropped - } + }, Err(e) => { eprintln!("Reader task panicked: {}", e); - } + }, } } @@ -191,7 +188,8 @@ fn smoke_test_websocket_capacity() { tokio::time::sleep(Duration::from_millis(100)).await; // Verify system.live entries are cleaned up after closing - let post_close_count = count_live_query_subscriptions(subscription_prefix_for_cleanup.clone()).await; + let post_close_count = + count_live_query_subscriptions(subscription_prefix_for_cleanup.clone()).await; println!( "Live queries count after closing connections: {} (should be 0)", post_close_count @@ -432,9 +430,10 @@ async fn wait_for_subscription_ack( "initial_data_batch" => continue, other => { return Err(format!( - "Websocket #{} received unexpected message type '{}' while awaiting ack: {}", - idx, other, payload - )); + "Websocket #{} received unexpected message type '{}' while \ + awaiting ack: {}", + idx, other, payload + )); }, } } @@ -447,7 +446,8 @@ async fn wait_for_subscription_ack( }, other => { return Err(format!( - "Websocket #{} received unexpected message while awaiting subscription ack: {:?}", + "Websocket #{} received unexpected message while awaiting subscription ack: \ + {:?}", idx, other )); }, @@ -460,7 +460,8 @@ fn setup_test_table(namespace: &str, full_table_name: &str) { .expect("CREATE NAMESPACE should succeed for websocket capacity test"); let create_sql = format!( - "CREATE TABLE IF NOT EXISTS {} (id INT PRIMARY KEY, value VARCHAR NOT NULL) WITH (TYPE = 'USER')", + "CREATE TABLE IF NOT EXISTS {} (id INT PRIMARY KEY, value VARCHAR NOT NULL) WITH (TYPE = \ + 'USER')", full_table_name ); execute_sql_as_root_via_client(&create_sql) diff --git a/cli/tests/storage/minio/test_minio_storage.rs b/cli/tests/storage/minio/test_minio_storage.rs index 7df512d9b..40564709f 100644 --- a/cli/tests/storage/minio/test_minio_storage.rs +++ b/cli/tests/storage/minio/test_minio_storage.rs @@ -3,20 +3,18 @@ //! This test verifies KalamDB can write flushed data to MinIO and that //! manifest.json + Parquet data files are created for both USER and SHARED tables. -use crate::common::*; +use std::{borrow::Cow, env, sync::Arc, time::Duration}; + use futures_util::StreamExt; -use object_store::aws::AmazonS3Builder; -use object_store::path::Path as ObjectPath; -use object_store::prefix::PrefixStore; -use object_store::ObjectStore; -use object_store::ObjectStoreExt; +use object_store::{ + aws::AmazonS3Builder, path::Path as ObjectPath, prefix::PrefixStore, ObjectStore, + ObjectStoreExt, +}; use serde_json::{json, Value as JsonValue}; -use std::borrow::Cow; -use std::env; -use std::sync::Arc; -use std::time::Duration; use tokio::runtime::Runtime; +use crate::common::*; + const MINIO_ENDPOINT: &str = "http://127.0.0.1:9120"; const MINIO_ACCESS_KEY: &str = "minioadmin"; const MINIO_SECRET_KEY: &str = "minioadmin"; @@ -47,7 +45,7 @@ fn test_minio_storage_end_to_end() { if let Err(err) = minio_bucket_reachable(&runtime, &probe_store) { eprintln!("❌ MinIO auth/connectivity check failed: {}", err); eprintln!(" Verify endpoint, access key, secret key, and bucket."); - //fail the test early + // fail the test early panic!("MinIO auth/connectivity check failed"); } println!("✅ MinIO auth/connectivity check passed"); @@ -85,13 +83,10 @@ fn test_minio_storage_end_to_end() { println!(" Base directory: {}", base_directory); let create_storage_sql = format!( - "CREATE STORAGE {storage_id} \ - TYPE s3 \ - NAME 'MinIO Test Storage' \ - BASE_DIRECTORY '{base_directory}' \ - CONFIG '{config_json}' \ - SHARED_TABLES_TEMPLATE 'ns_{{namespace}}/shared_{{tableName}}' \ - USER_TABLES_TEMPLATE 'ns_{{namespace}}/user_{{tableName}}/user_{{userId}}'", + "CREATE STORAGE {storage_id} TYPE s3 NAME 'MinIO Test Storage' BASE_DIRECTORY \ + '{base_directory}' CONFIG '{config_json}' SHARED_TABLES_TEMPLATE \ + 'ns_{{namespace}}/shared_{{tableName}}' USER_TABLES_TEMPLATE \ + 'ns_{{namespace}}/user_{{tableName}}/user_{{userId}}'", ); match execute_sql_as_root_via_cli(&create_storage_sql) { @@ -114,8 +109,8 @@ fn test_minio_storage_end_to_end() { println!("\n📊 Step 3: Creating USER table '{}.{}'...", namespace, user_table); let create_user_table_sql = format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name VARCHAR NOT NULL) \ - WITH (TYPE='USER', STORAGE_ID='{}', FLUSH_POLICY='rows:2')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, name VARCHAR NOT NULL) WITH (TYPE='USER', \ + STORAGE_ID='{}', FLUSH_POLICY='rows:2')", namespace, user_table, storage_id ); execute_sql_as_root_via_cli(&create_user_table_sql).expect("user table creation"); @@ -123,8 +118,8 @@ fn test_minio_storage_end_to_end() { println!("\n📊 Step 4: Creating SHARED table '{}.{}'...", namespace, shared_table); let create_shared_table_sql = format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, body TEXT NOT NULL) \ - WITH (TYPE='SHARED', STORAGE_ID='{}', FLUSH_POLICY='rows:2')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY, body TEXT NOT NULL) WITH (TYPE='SHARED', \ + STORAGE_ID='{}', FLUSH_POLICY='rows:2')", namespace, shared_table, storage_id ); execute_sql_as_root_via_cli(&create_shared_table_sql).expect("shared table creation"); @@ -262,13 +257,10 @@ fn test_minio_storage_check() { println!("\n🗄️ Step 1: Creating MinIO storage '{}' for check...", storage_id); let create_storage_sql = format!( - "CREATE STORAGE {storage_id} \ - TYPE s3 \ - NAME 'MinIO Check Storage' \ - BASE_DIRECTORY '{base_directory}' \ - CONFIG '{config_json}' \ - SHARED_TABLES_TEMPLATE 'ns_{{namespace}}/shared_{{tableName}}' \ - USER_TABLES_TEMPLATE 'ns_{{namespace}}/user_{{tableName}}/user_{{userId}}'", + "CREATE STORAGE {storage_id} TYPE s3 NAME 'MinIO Check Storage' BASE_DIRECTORY \ + '{base_directory}' CONFIG '{config_json}' SHARED_TABLES_TEMPLATE \ + 'ns_{{namespace}}/shared_{{tableName}}' USER_TABLES_TEMPLATE \ + 'ns_{{namespace}}/user_{{tableName}}/user_{{userId}}'", ); execute_sql_as_root_via_cli(&create_storage_sql).expect("storage creation"); @@ -397,8 +389,8 @@ struct StorageMeta { fn fetch_storage_metadata(storage_id: &str) -> StorageMeta { let sql = format!( - "SELECT base_directory, shared_tables_template, user_tables_template \ - FROM system.storages WHERE storage_id = '{}'", + "SELECT base_directory, shared_tables_template, user_tables_template FROM system.storages \ + WHERE storage_id = '{}'", storage_id ); let output = execute_sql_as_root_via_client_json(&sql).expect("storage metadata query"); @@ -571,8 +563,9 @@ fn wait_for_storage_check_healthy(storage_id: &str, timeout: Duration) -> Result } Err(format!( - "MinIO storage unhealthy after {:?}. Last check: {}. \n\ -Set MINIO_ENDPOINT/MINIO_ACCESS_KEY/MINIO_SECRET_KEY/MINIO_BUCKET/MINIO_REGION if server cannot reach MinIO.", + "MinIO storage unhealthy after {:?}. Last check: {}. \nSet \ + MINIO_ENDPOINT/MINIO_ACCESS_KEY/MINIO_SECRET_KEY/MINIO_BUCKET/MINIO_REGION if server \ + cannot reach MinIO.", timeout, last_error )) } diff --git a/cli/tests/storage/test_hot_cold_storage.rs b/cli/tests/storage/test_hot_cold_storage.rs index e3c803241..a853b1201 100644 --- a/cli/tests/storage/test_hot_cold_storage.rs +++ b/cli/tests/storage/test_hot_cold_storage.rs @@ -46,7 +46,8 @@ fn test_hot_cold_storage_data_integrity() { // === Phase 1: INSERT into hot storage === execute_sql(&format!( - "INSERT INTO {} (id, name, value) VALUES (1, 'Alice', 100), (2, 'Bob', 200), (3, 'Charlie', 300)", + "INSERT INTO {} (id, name, value) VALUES (1, 'Alice', 100), (2, 'Bob', 200), (3, \ + 'Charlie', 300)", full_table_name )) .expect("Initial INSERT failed"); @@ -296,7 +297,8 @@ fn test_update_operations_hot_and_cold() { // === Insert test data === execute_sql(&format!( - "INSERT INTO {} (id, status, count) VALUES (1, 'active', 10), (2, 'inactive', 20), (3, 'pending', 30)", + "INSERT INTO {} (id, status, count) VALUES (1, 'active', 10), (2, 'inactive', 20), (3, \ + 'pending', 30)", full_table_name )) .expect("INSERT failed"); diff --git a/cli/tests/storage/test_storage_lifecycle.rs b/cli/tests/storage/test_storage_lifecycle.rs index 7e0c20ec0..bdaacd7b2 100644 --- a/cli/tests/storage/test_storage_lifecycle.rs +++ b/cli/tests/storage/test_storage_lifecycle.rs @@ -3,10 +3,11 @@ //! Covers CREATE STORAGE / DROP STORAGE flows to ensure tables block deletion //! until referencing tables are removed. -use crate::common::*; +use std::{fs, path::PathBuf}; + use serde_json::Value as JsonValue; -use std::fs; -use std::path::PathBuf; + +use crate::common::*; /// Ensure DROP STORAGE fails while tables still reference the storage #[test] @@ -35,12 +36,9 @@ fn test_storage_drop_requires_detached_tables() { .expect("namespace creation"); let create_storage_sql = format!( - "CREATE STORAGE {storage_id} \ - TYPE filesystem \ - NAME 'CLI Storage Test' \ - PATH '{base_dir}' \ - SHARED_TABLES_TEMPLATE 'ns_{{namespace}}/shared_{{tableName}}' \ - USER_TABLES_TEMPLATE 'ns_{{namespace}}/user_{{tableName}}/user_{{userId}}'", + "CREATE STORAGE {storage_id} TYPE filesystem NAME 'CLI Storage Test' PATH '{base_dir}' \ + SHARED_TABLES_TEMPLATE 'ns_{{namespace}}/shared_{{tableName}}' USER_TABLES_TEMPLATE \ + 'ns_{{namespace}}/user_{{tableName}}/user_{{userId}}'", base_dir = base_dir_sql ); execute_sql_as_root_via_cli(&create_storage_sql).expect("storage creation"); @@ -61,11 +59,13 @@ fn test_storage_drop_requires_detached_tables() { ); let create_user_table_sql = format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY AUTO_INCREMENT, body TEXT) WITH (TYPE='USER', STORAGE_ID='{}', FLUSH_POLICY='rows:5')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY AUTO_INCREMENT, body TEXT) WITH (TYPE='USER', \ + STORAGE_ID='{}', FLUSH_POLICY='rows:5')", namespace, user_table, storage_id ); execute_sql_as_root_via_cli(&create_user_table_sql).expect("user table creation"); - // Insert a row to ensure per-user directory is created (some backends may lazy-create user folder) + // Insert a row to ensure per-user directory is created (some backends may lazy-create user + // folder) let _ = execute_sql_as_root_via_cli(&format!( "INSERT INTO {}.{} (body) VALUES ('init')", namespace, user_table @@ -97,7 +97,8 @@ fn test_storage_drop_requires_detached_tables() { } let create_shared_table_sql = format!( - "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY AUTO_INCREMENT, body TEXT) WITH (TYPE='SHARED', STORAGE_ID='{}', FLUSH_POLICY='rows:5')", + "CREATE TABLE {}.{} (id BIGINT PRIMARY KEY AUTO_INCREMENT, body TEXT) WITH \ + (TYPE='SHARED', STORAGE_ID='{}', FLUSH_POLICY='rows:5')", namespace, shared_table, storage_id ); execute_sql_as_root_via_cli(&create_shared_table_sql).expect("shared table creation"); diff --git a/cli/tests/subscription/live_connection_tests.rs b/cli/tests/subscription/live_connection_tests.rs index c95eec736..34c573ac5 100644 --- a/cli/tests/subscription/live_connection_tests.rs +++ b/cli/tests/subscription/live_connection_tests.rs @@ -9,10 +9,12 @@ //! Run with: //! cargo test --test connection live_connection_tests -- --test-threads=1 -use crate::common::*; -use kalam_client::{ConnectionOptions, HttpVersion, KalamLinkTimeouts, SubscriptionOptions}; use std::time::Duration; +use kalam_client::{ConnectionOptions, HttpVersion, KalamLinkTimeouts, SubscriptionOptions}; + +use crate::common::*; + fn start_subscription_with_retry(query: &str) -> SubscriptionListener { for attempt in 0..2 { let mut listener = SubscriptionListener::start(query) @@ -438,7 +440,6 @@ fn test_live_multiple_subscriptions() { } /// Test: Connection timeout with unreachable server (client-side option validation) -#[ntest::timeout(30000)] #[test] fn test_connection_timeout_option() { // This test validates that ConnectionOptions timeout settings work diff --git a/cli/tests/subscription/slow_subscriber.rs b/cli/tests/subscription/slow_subscriber.rs index 31e9e655f..61887ea39 100644 --- a/cli/tests/subscription/slow_subscriber.rs +++ b/cli/tests/subscription/slow_subscriber.rs @@ -17,10 +17,14 @@ // Run with: // cargo test --test subscription slow_subscriber -use crate::common::*; +use std::{ + sync::{Arc, Barrier}, + time::Duration, +}; + use kalam_client::{KalamLinkTimeouts, SubscriptionConfig, SubscriptionOptions}; -use std::sync::{Arc, Barrier}; -use std::time::Duration; + +use crate::common::*; const DRAIN_IDLE_GRACE: Duration = Duration::from_secs(3); @@ -582,7 +586,8 @@ fn subscription_multiple_concurrent_slow_subscribers() { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", ns)) .expect("create namespace"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id INT PRIMARY KEY, val VARCHAR) WITH (TYPE = 'SHARED', ACCESS_LEVEL = 'PUBLIC')", + "CREATE TABLE {} (id INT PRIMARY KEY, val VARCHAR) WITH (TYPE = 'SHARED', ACCESS_LEVEL = \ + 'PUBLIC')", full )) .expect("create shared table"); diff --git a/cli/tests/subscription/test_link_subscription_initial_data.rs b/cli/tests/subscription/test_link_subscription_initial_data.rs index 57b290da6..ee01b9f35 100644 --- a/cli/tests/subscription/test_link_subscription_initial_data.rs +++ b/cli/tests/subscription/test_link_subscription_initial_data.rs @@ -6,9 +6,10 @@ //! //! This matches the behavior expected by both CLI and UI clients. -use crate::common::*; use std::time::Duration; +use crate::common::*; + /// Test that subscription receives initial data batch for existing rows /// and then receives live INSERT events for new rows. #[test] @@ -27,7 +28,8 @@ fn test_link_subscription_initial_batch_then_inserts() { // Create user table let create_result = execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id INT PRIMARY KEY, content VARCHAR, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP) WITH (TYPE='USER', FLUSH_POLICY='rows:100')", + "CREATE TABLE {} (id INT PRIMARY KEY, content VARCHAR, created_at TIMESTAMP DEFAULT \ + CURRENT_TIMESTAMP) WITH (TYPE='USER', FLUSH_POLICY='rows:100')", table_full )); assert!(create_result.is_ok(), "Failed to create table: {:?}", create_result); @@ -125,7 +127,8 @@ fn test_link_subscription_empty_table_then_inserts() { // Create user table (empty) let create_result = execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id INT PRIMARY KEY, event_type VARCHAR) WITH (TYPE='USER', FLUSH_POLICY='rows:100')", + "CREATE TABLE {} (id INT PRIMARY KEY, event_type VARCHAR) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:100')", table_full )); assert!(create_result.is_ok(), "Failed to create table: {:?}", create_result); @@ -199,7 +202,8 @@ fn test_link_subscription_batch_status_transition() { let _ = execute_sql_as_root_via_client(&format!("CREATE NAMESPACE {}", namespace)); let _ = execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id INT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', FLUSH_POLICY='rows:100')", + "CREATE TABLE {} (id INT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:100')", table_full )); diff --git a/cli/tests/subscription/test_subscribe.rs b/cli/tests/subscription/test_subscribe.rs index 3f9d47a3e..ae52febc3 100644 --- a/cli/tests/subscription/test_subscribe.rs +++ b/cli/tests/subscription/test_subscribe.rs @@ -10,8 +10,6 @@ //! - Initial data in subscriptions //! - CRUD operations with live updates -use crate::common::*; - use std::{ io::{BufRead, BufReader, Read}, process::{Child, Command, Stdio}, @@ -19,6 +17,8 @@ use std::{ time::{Duration, Instant}, }; +use crate::common::*; + fn forward_process_output( reader: R, stream_name: &'static str, @@ -237,12 +237,11 @@ fn test_cli_live_query_with_filter() { let table_name = generate_unique_table("live_query_filter"); let table = format!("{}.{}", namespace_name, table_name); - let _ = execute_sql_as_root_via_cli(&format!( - "CREATE NAMESPACE IF NOT EXISTS {}", - namespace_name - )); + let _ = + execute_sql_as_root_via_cli(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace_name)); execute_sql_as_root_via_cli(&format!( - "CREATE TABLE {} (id INT PRIMARY KEY, content VARCHAR NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {} (id INT PRIMARY KEY, content VARCHAR NOT NULL, created_at TIMESTAMP \ + DEFAULT CURRENT_TIMESTAMP) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", table )) .unwrap(); @@ -264,17 +263,10 @@ fn test_cli_live_query_with_filter() { } else { format!("high_{}", i) }; - let insert_sql = format!( - "INSERT INTO {} (id, content) VALUES ({}, '{}')", - table, i, marker - ); + let insert_sql = + format!("INSERT INTO {} (id, content) VALUES ({}, '{}')", table, i, marker); let insert_result = execute_sql_as_root_via_cli(&insert_sql); - assert!( - insert_result.is_ok(), - "Insert {} should succeed: {:?}", - i, - insert_result.err() - ); + assert!(insert_result.is_ok(), "Insert {} should succeed: {:?}", i, insert_result.err()); } let mut lines = Vec::new(); @@ -341,7 +333,8 @@ fn test_cli_subscription_with_initial_data() { let _ = execute_sql_as_root_via_cli(&format!("CREATE NAMESPACE {}", namespace_name)); let create_table_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, event_type VARCHAR, timestamp BIGINT) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {} (id INT PRIMARY KEY, event_type VARCHAR, timestamp BIGINT) WITH \ + (TYPE='USER', FLUSH_POLICY='rows:10')", table_name ); let _ = execute_sql_as_root_via_cli(&create_table_sql); @@ -410,12 +403,15 @@ fn test_cli_binary_projected_subscription_receives_live_changes() { execute_sql_as_root_via_client(&format!("CREATE NAMESPACE {}", namespace_name)) .expect("namespace should be created"); execute_sql_as_root_via_client(&format!( - "CREATE TABLE {} (id BIGINT PRIMARY KEY, role TEXT NOT NULL, author TEXT NOT NULL, content TEXT NOT NULL, created_at TIMESTAMP NOT NULL DEFAULT NOW()) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {} (id BIGINT PRIMARY KEY, role TEXT NOT NULL, author TEXT NOT NULL, \ + content TEXT NOT NULL, created_at TIMESTAMP NOT NULL DEFAULT NOW()) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:10')", table_name )) .expect("table should be created"); execute_sql_as_root_via_client(&format!( - "INSERT INTO {} (id, role, author, content) VALUES (1, 'assistant', 'KalamDB Copilot', '{}')", + "INSERT INTO {} (id, role, author, content) VALUES (1, 'assistant', 'KalamDB Copilot', \ + '{}')", table_name, initial_content )) .expect("initial row should be inserted"); @@ -426,10 +422,7 @@ fn test_cli_binary_projected_subscription_receives_live_changes() { ) .expect("initial row should be queryable before subscribing"); - let query = format!( - "SELECT id, role, author, content, created_at FROM {}", - table_name - ); + let query = format!("SELECT id, role, author, content, created_at FROM {}", table_name); let (child, rx, _cli_home) = spawn_cli_subscription_process(&query).expect("CLI subscription should spawn"); let _child = ChildProcessGuard::new(child); @@ -482,7 +475,8 @@ fn test_cli_subscription_comprehensive_crud() { let _ = execute_sql_as_root_via_cli(&format!("CREATE NAMESPACE {}", namespace_name)); let create_table_sql = format!( - "CREATE TABLE {} (id INT PRIMARY KEY, event_type VARCHAR, data VARCHAR, timestamp BIGINT) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {} (id INT PRIMARY KEY, event_type VARCHAR, data VARCHAR, timestamp BIGINT) \ + WITH (TYPE='USER', FLUSH_POLICY='rows:10')", table_name ); let _ = execute_sql_as_root_via_cli(&create_table_sql); @@ -506,7 +500,11 @@ fn test_cli_subscription_comprehensive_crud() { ); // Test 2: Insert initial data via CLI - let insert_sql = format!("INSERT INTO {} (id, event_type, data, timestamp) VALUES (1, 'create', 'initial_data', 1000)", table_name); + let insert_sql = format!( + "INSERT INTO {} (id, event_type, data, timestamp) VALUES (1, 'create', 'initial_data', \ + 1000)", + table_name + ); let _ = execute_sql_as_root_via_cli(&insert_sql); // Test 3: Verify data was inserted correctly via CLI diff --git a/cli/tests/subscription/test_subscription_e2e.rs b/cli/tests/subscription/test_subscription_e2e.rs index 5d1c172cd..d0e066ce8 100644 --- a/cli/tests/subscription/test_subscription_e2e.rs +++ b/cli/tests/subscription/test_subscription_e2e.rs @@ -1,8 +1,9 @@ //! End-to-end subscription test: verifies initial snapshot + change events -use crate::common::*; use std::time::Duration; +use crate::common::*; + #[test] fn test_cli_subscription_initial_and_changes() { if !is_server_running() { @@ -19,7 +20,8 @@ fn test_cli_subscription_initial_and_changes() { // Create user table let _ = execute_sql_as_root_via_cli(&format!( - "CREATE TABLE {} (id INT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', FLUSH_POLICY='rows:10')", + "CREATE TABLE {} (id INT PRIMARY KEY, name VARCHAR) WITH (TYPE='USER', \ + FLUSH_POLICY='rows:10')", table_full )); diff --git a/cli/tests/subscription/test_subscription_manual.rs b/cli/tests/subscription/test_subscription_manual.rs index 83e569638..188dd9fe5 100644 --- a/cli/tests/subscription/test_subscription_manual.rs +++ b/cli/tests/subscription/test_subscription_manual.rs @@ -1,9 +1,9 @@ //! Test to verify subscription listener works correctly -use crate::common::*; - use std::time::Duration; +use crate::common::*; + #[test] fn test_subscription_listener_functionality() { if cfg!(windows) { diff --git a/cli/tests/tables/test_user_tables.rs b/cli/tests/tables/test_user_tables.rs index 6975e31a3..41fae98a3 100644 --- a/cli/tests/tables/test_user_tables.rs +++ b/cli/tests/tables/test_user_tables.rs @@ -1,6 +1,7 @@ //! Integration tests for user table operations //! -//! **Implements T037-T040, T064-T068**: User table CRUD operations, output formatting, and query features +//! **Implements T037-T040, T064-T068**: User table CRUD operations, output formatting, and query +//! features //! //! These tests validate: //! - Basic query execution on user tables @@ -9,8 +10,9 @@ //! - Empty queries and result pagination //! - Query result display and formatting -use crate::common; -use crate::common::*; +use std::time::Duration; + +use crate::{common, common::*}; /// T037: Test basic query execution #[test] @@ -93,31 +95,23 @@ fn test_cli_table_output_formatting() { full_table_name )) .expect("CREATE TABLE failed"); + wait_for_table_ready(&full_table_name, Duration::from_secs(15)).expect("table should be ready"); let _ = execute_sql_as_root_via_cli(&format!( "INSERT INTO {} (content) VALUES ('Hello World'), ('Test Message')", full_table_name )); - // Query with table format (default) - let mut cmd = create_cli_command(); - cmd.arg("-u") - .arg(server_url()) - .arg("--user") - .arg(default_username()) - .arg("--password") - .arg(root_password()) - .arg("--command") - .arg(format!("SELECT * FROM {}", full_table_name)); - - let output = cmd.output().unwrap(); - let stdout = String::from_utf8_lossy(&output.stdout); + let stdout = wait_for_sql_output_contains( + &format!("SELECT * FROM {}", full_table_name), + "Test Message", + Duration::from_secs(20), + ) + .expect("table query should return inserted rows"); // Verify table formatting and content assert!( - stdout.contains("Hello World") - && stdout.contains("Test Message") - && output.status.success(), + stdout.contains("Hello World") && stdout.contains("Test Message"), "Output should contain both messages: {}", stdout ); @@ -148,6 +142,7 @@ fn test_cli_json_output_format() { ) WITH (TYPE='USER', FLUSH_POLICY='rows:10')"#, full_table_name )); + wait_for_table_ready(&full_table_name, Duration::from_secs(15)).expect("table should be ready"); let _ = execute_sql_as_root_via_cli(&format!( "INSERT INTO {} (content) VALUES ('JSON Test')", @@ -372,25 +367,15 @@ fn test_cli_result_pagination() { .expect("INSERT failed"); } - // Query all rows via CLI - let mut cmd = create_cli_command(); - cmd.arg("-u") - .arg(server_url()) - .arg("--user") - .arg(default_username()) - .arg("--password") - .arg(root_password()) - .arg("--command") - .arg(format!("SELECT * FROM {}", full_table_name)); - - let output = cmd.output().unwrap(); - let stdout = String::from_utf8_lossy(&output.stdout); + let stdout = wait_for_sql_output_contains( + &format!("SELECT * FROM {}", full_table_name), + "Message", + Duration::from_secs(20), + ) + .expect("pagination query should return inserted rows"); // Should display results (pagination in interactive mode) - assert!( - stdout.contains("Message") || output.status.success(), - "Should handle result display" - ); + assert!(stdout.contains("Message"), "Should handle result display"); // Cleanup let _ = execute_sql_as_root_via_cli(&format!("DROP TABLE IF EXISTS {}", full_table_name)); diff --git a/cli/tests/usecases/test_batch_streaming.rs b/cli/tests/usecases/test_batch_streaming.rs index 9deaf959c..6036346b5 100644 --- a/cli/tests/usecases/test_batch_streaming.rs +++ b/cli/tests/usecases/test_batch_streaming.rs @@ -7,10 +7,10 @@ //! - Batch control metadata is properly communicated //! - Row count integrity is maintained across batches -use crate::common::*; - use std::time::Duration; +use crate::common::*; + const TOTAL_ROWS: usize = 200; const BATCH_SIZE: usize = 50; @@ -80,15 +80,16 @@ fn test_websocket_batch_streaming_rows() { // Create substantial data (~300 bytes per row) to exceed batch size let long_data = format!( - "Row {} with substantial text content that ensures each record is large enough \ - to force multiple batches during WebSocket streaming. This padding text helps \ - test the batch control mechanism by creating a dataset that cannot fit in a \ - single 8KB transmission. Additional padding to reach ~300 bytes total.", + "Row {} with substantial text content that ensures each record is large enough to \ + force multiple batches during WebSocket streaming. This padding text helps test \ + the batch control mechanism by creating a dataset that cannot fit in a single \ + 8KB transmission. Additional padding to reach ~300 bytes total.", row_id ); let description = format!( - "Detailed description and metadata for record number {} in the batch streaming test", + "Detailed description and metadata for record number {} in the batch streaming \ + test", row_id ); diff --git a/cli/tests/usecases/test_chat_simulation.rs b/cli/tests/usecases/test_chat_simulation.rs index a04fb38e0..9a77abf72 100644 --- a/cli/tests/usecases/test_chat_simulation.rs +++ b/cli/tests/usecases/test_chat_simulation.rs @@ -9,14 +9,14 @@ //! 4. User subscribes to messages and stream events, and sends messages. //! 5. Validates system stability under load. -use crate::common::*; - use std::{ sync::{Arc, Barrier}, thread, time::{Duration, Instant}, }; +use crate::common::*; + const DEFAULT_NUM_USERS: usize = 4; const DEFAULT_MESSAGES_PER_USER: usize = 5; const DEFAULT_MESSAGES_PER_AI: usize = 5; @@ -106,7 +106,8 @@ fn test_chat_simulation_memory_leak() { // Create Conversation (AI task) let create_conv_sql = format!( - "INSERT INTO {}.conversations (id, title, created_by) VALUES ('{}', 'Chat {}', '{}')", + "INSERT INTO {}.conversations (id, title, created_by) VALUES ('{}', 'Chat {}', \ + '{}')", ns_ref, conv_id_ref, user_idx, username ); if let Err(e) = execute_sql_via_client_as(&ai_creds.0, &ai_creds.1, &create_conv_sql) { @@ -119,16 +120,25 @@ fn test_chat_simulation_memory_leak() { // AI sends message let msg_id = format!("msg_ai_{}_{}", m, random_string(5)); let msg_sql = format!( - "INSERT INTO {}.messages (id, conversation_id, sender, content, timestamp) VALUES ('{}', '{}', 'AI_AGENT', 'AI Message {}', {})", - ns_ref, msg_id, conv_id_ref, m, chrono::Utc::now().timestamp_millis() + "INSERT INTO {}.messages (id, conversation_id, sender, content, \ + timestamp) VALUES ('{}', '{}', 'AI_AGENT', 'AI Message {}', {})", + ns_ref, + msg_id, + conv_id_ref, + m, + chrono::Utc::now().timestamp_millis() ); let _ = execute_sql_via_client_as(&ai_creds.0, &ai_creds.1, &msg_sql); // AI sends stream event (typing) let event_id = format!("evt_ai_{}_{}", m, random_string(5)); let event_sql = format!( - "INSERT INTO {}.stream_events (id, conversation_id, event_type, payload, timestamp) VALUES ('{}', '{}', 'typing', 'AI is typing...', {})", - ns_ref, event_id, conv_id_ref, chrono::Utc::now().timestamp_millis() + "INSERT INTO {}.stream_events (id, conversation_id, event_type, payload, \ + timestamp) VALUES ('{}', '{}', 'typing', 'AI is typing...', {})", + ns_ref, + event_id, + conv_id_ref, + chrono::Utc::now().timestamp_millis() ); let _ = execute_sql_via_client_as(&ai_creds.0, &ai_creds.1, &event_sql); @@ -168,12 +178,19 @@ fn test_chat_simulation_memory_leak() { for m in 0..messages_per_user { let msg_id = format!("msg_usr_{}_{}", m, random_string(5)); let msg_sql = format!( - "INSERT INTO {}.messages (id, conversation_id, sender, content, timestamp) VALUES ('{}', '{}', '{}', 'User Message {}', {})", - namespace, msg_id, conversation_id, user_creds.0, m, chrono::Utc::now().timestamp_millis() + "INSERT INTO {}.messages (id, conversation_id, sender, content, \ + timestamp) VALUES ('{}', '{}', '{}', 'User Message {}', {})", + namespace, + msg_id, + conversation_id, + user_creds.0, + m, + chrono::Utc::now().timestamp_millis() ); let _ = execute_sql_via_client_as(&user_creds.0, &user_creds.1, &msg_sql); - // Read from subscriptions to prevent buffer filling (and simulate active listening) + // Read from subscriptions to prevent buffer filling (and simulate active + // listening) let _ = msg_sub.try_read_line(Duration::from_millis(5)); let _ = stream_sub.try_read_line(Duration::from_millis(5)); diff --git a/cli/tests/usecases/test_datatypes_json.rs b/cli/tests/usecases/test_datatypes_json.rs index dddb295ea..2ccfa66d5 100644 --- a/cli/tests/usecases/test_datatypes_json.rs +++ b/cli/tests/usecases/test_datatypes_json.rs @@ -1,10 +1,11 @@ #![allow(unused_imports, dead_code)] -use crate::common; -use crate::common::*; +use std::time::Duration; + // (apply_patch sanity check) use serde_json::Value; -use std::time::Duration; + +use crate::{common, common::*}; #[test] fn test_datatypes_json_preservation() { @@ -42,7 +43,8 @@ fn test_datatypes_json_preservation() { // Note: Using a fixed timestamp for easier verification let timestamp_str = "2023-01-01 12:00:00"; let insert_sql = format!( - "INSERT INTO {}.{} (col_string, col_int, col_float, col_bool, col_timestamp) VALUES ('test_string', 123, 45.67, true, '{}')", + "INSERT INTO {}.{} (col_string, col_int, col_float, col_bool, col_timestamp) VALUES \ + ('test_string', 123, 45.67, true, '{}')", namespace, table_name, timestamp_str ); let result = common::execute_sql_as_root_via_cli(&insert_sql); diff --git a/cli/tests/usecases/test_update_all_types.rs b/cli/tests/usecases/test_update_all_types.rs index f830a01a0..b55b8ddf1 100644 --- a/cli/tests/usecases/test_update_all_types.rs +++ b/cli/tests/usecases/test_update_all_types.rs @@ -1,9 +1,9 @@ -use crate::common::*; +use std::{collections::HashMap, thread, time::Duration}; + use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Timelike}; use serde_json::Value; -use std::collections::HashMap; -use std::thread; -use std::time::Duration; + +use crate::common::*; fn extract_first_row_from_cli_json(output: &str) -> Value { let json: Value = parse_json_from_cli_output(output) @@ -367,7 +367,8 @@ fn test_update_all_types_user_table() { assert!(output.contains("456"), "Updated int not found"); let row = extract_first_row_from_cli_json(&output); assert_decimal_column_eq(&row, "col_decimal", 200.75, &output); - // Note: JSON formatting might vary (whitespace), so we might need loose check or just check presence of "updated" + // Note: JSON formatting might vary (whitespace), so we might need loose check or just check + // presence of "updated" assert!(output.contains("updated"), "Updated JSON content not found"); // Flush table diff --git a/cli/tests/usecases/test_update_null_values.rs b/cli/tests/usecases/test_update_null_values.rs index 46c4b04d7..e39386dba 100644 --- a/cli/tests/usecases/test_update_null_values.rs +++ b/cli/tests/usecases/test_update_null_values.rs @@ -1,6 +1,6 @@ +use std::{thread, time::Duration}; + use crate::common::*; -use std::thread; -use std::time::Duration; /// Test UPDATE on rows with NULL values in non-PK columns (hot storage) #[test] @@ -40,10 +40,12 @@ fn test_update_row_with_null_columns_hot() { "Table creation failed: {}", output ); + wait_for_table_ready(&full_table_name, Duration::from_secs(15)).unwrap(); // Insert row with NULL client_id let insert_sql = format!( - "INSERT INTO {} (id, client_id, conversation_id, sender, role, content, status) VALUES (12345, NULL, 999, 'AI Assistant', 'assistant', 'Test message', 'sent')", + "INSERT INTO {} (id, client_id, conversation_id, sender, role, content, status) VALUES \ + (12345, NULL, 999, 'AI Assistant', 'assistant', 'Test message', 'sent')", full_table_name ); @@ -56,7 +58,8 @@ fn test_update_row_with_null_columns_hot() { // Verify row exists with NULL client_id let select_sql = format!("SELECT * FROM {} WHERE id = 12345", full_table_name); - let output = execute_sql_as_root_via_cli(&select_sql).unwrap(); + let output = + wait_for_sql_output_contains(&select_sql, "12345", Duration::from_secs(15)).unwrap(); assert!(output.contains("12345"), "Row not found after insert: {}", output); assert!( output.contains("(1 row)") || output.contains("1 row"), @@ -81,8 +84,15 @@ fn test_update_row_with_null_columns_hot() { // Verify update took effect by checking the updated column value let verify_sql = format!("SELECT id, sender FROM {} WHERE id = 12345", full_table_name); - let output = execute_sql_as_root_via_cli(&verify_sql).unwrap(); + let output = + wait_for_sql_output_contains(&verify_sql, "Updated Assistant", Duration::from_secs(15)) + .unwrap(); assert!(output.contains("12345"), "Row disappeared after UPDATE: {}", output); + assert!( + output.contains("Updated Assistant"), + "Updated sender value not visible after UPDATE: {}", + output + ); // Verify we still get 1 row (row exists) assert!( output.contains("(1 row)") || output.contains("1 row"), @@ -134,7 +144,8 @@ fn test_update_row_with_null_columns_cold() { // Insert row with NULL client_id let insert_sql = format!( - "INSERT INTO {} (id, client_id, conversation_id, sender, content) VALUES (98765, NULL, 888, 'Test Sender', 'Test content')", + "INSERT INTO {} (id, client_id, conversation_id, sender, content) VALUES (98765, NULL, \ + 888, 'Test Sender', 'Test content')", full_table_name ); @@ -294,7 +305,8 @@ fn test_update_multiple_rows_with_nulls() { // Create table let create_sql = format!( - "CREATE USER TABLE {} (id BIGINT NOT NULL PRIMARY KEY, optional_field TEXT, required_field TEXT NOT NULL)", + "CREATE USER TABLE {} (id BIGINT NOT NULL PRIMARY KEY, optional_field TEXT, \ + required_field TEXT NOT NULL)", full_table_name ); diff --git a/cli/tests/users/test_admin.rs b/cli/tests/users/test_admin.rs index 8efd32e33..2cfa35b0f 100644 --- a/cli/tests/users/test_admin.rs +++ b/cli/tests/users/test_admin.rs @@ -9,8 +9,9 @@ //! - Administrative SQL operations //! - Namespace and table management +use std::{path::Path, time::Duration}; + use crate::common::*; -use std::time::Duration; /// Test configuration constants const TEST_TIMEOUT: Duration = Duration::from_secs(10); @@ -24,7 +25,11 @@ fn test_cli_list_tables() { } let table_name = generate_unique_table("messages_list_tables"); - let namespace = "test_cli"; + let namespace = generate_unique_namespace("test_cli"); + let full_table_name = format!("{}.{}", namespace, table_name); + + let _ = + execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)); // Create test table let create_sql = format!( @@ -41,23 +46,23 @@ fn test_cli_list_tables() { eprintln!("⚠️ Failed to create test table, skipping test"); return; } + wait_for_table_ready(&full_table_name, Duration::from_secs(15)).unwrap(); // Query system tables - let query_sql = "SELECT table_name FROM system.schemas WHERE namespace_id = 'test_cli'"; - let result = execute_sql_as_root_via_cli(query_sql); + let query_sql = format!( + "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + namespace, table_name + ); + let result = wait_for_sql_output_contains(&query_sql, &table_name, Duration::from_secs(15)); // Should list tables assert!(result.is_ok(), "Should list tables: {:?}", result.err()); let output = result.unwrap(); - assert!( - output.contains("messages") || output.contains("row"), - "Should contain table info: {}", - output - ); + assert!(output.contains(&table_name), "Should contain table info: {}", output); // Cleanup - let drop_sql = format!("DROP TABLE IF EXISTS {}.{}", namespace, table_name); - let _ = execute_sql_as_root_via_cli(&drop_sql); + let _ = + execute_sql_as_root_via_client(&format!("DROP NAMESPACE IF EXISTS {} CASCADE", namespace)); } /// T042: Test describe table command (\d table) @@ -69,7 +74,11 @@ fn test_cli_describe_table() { } let table_name = generate_unique_table("messages_describe"); - let namespace = "test_cli"; + let namespace = generate_unique_namespace("test_cli"); + let full_table_name = format!("{}.{}", namespace, table_name); + + let _ = + execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)); // Create test table let create_sql = format!( @@ -86,20 +95,22 @@ fn test_cli_describe_table() { eprintln!("⚠️ Failed to create test table, skipping test"); return; } + wait_for_table_ready(&full_table_name, Duration::from_secs(15)).unwrap(); - // Query table info - let table_full_name = format!("{}.{}", namespace, table_name); - let query_sql = format!("SELECT '{}' as table_info", table_full_name); - let result = execute_sql_as_root_via_cli(&query_sql); + let query_sql = format!( + "SELECT table_name FROM system.schemas WHERE namespace_id = '{}' AND table_name = '{}'", + namespace, table_name + ); + let result = wait_for_sql_output_contains(&query_sql, &table_name, Duration::from_secs(15)); // Should execute successfully and show table info assert!(result.is_ok(), "Should describe table: {:?}", result.err()); let output = result.unwrap(); - assert!(output.contains("messages"), "Should contain table name: {}", output); + assert!(output.contains(&table_name), "Should contain table name: {}", output); // Cleanup - let drop_sql = format!("DROP TABLE IF EXISTS {}.{}", namespace, table_name); - let _ = execute_sql_as_root_via_cli(&drop_sql); + let _ = + execute_sql_as_root_via_client(&format!("DROP NAMESPACE IF EXISTS {} CASCADE", namespace)); } /// T055: Test batch file execution @@ -138,48 +149,14 @@ SELECT * FROM {};"#, ) .unwrap(); - // Execute batch file - let target_url = leader_url().unwrap_or_else(|| server_url().to_string()); - let mut cmd = create_cli_command(); - cmd.arg("-u") - .arg(target_url) - .arg("--user") - .arg(default_username()) - .arg("--password") - .arg(root_password()) - .arg("--file") - .arg(sql_file.to_str().unwrap()) - .timeout(TEST_TIMEOUT); - - let mut output = cmd.output().unwrap(); - let mut stdout = String::from_utf8_lossy(&output.stdout).to_string(); - let mut stderr = String::from_utf8_lossy(&output.stderr).to_string(); - - if !output.status.success() && is_leader_error(&stderr) { - if let Some(leader) = leader_url() { - let mut retry_cmd = create_cli_command(); - retry_cmd - .arg("-u") - .arg(leader) - .arg("--user") - .arg(default_username()) - .arg("--password") - .arg(root_password()) - .arg("--file") - .arg(sql_file.to_str().unwrap()) - .timeout(TEST_TIMEOUT); - - output = retry_cmd.output().unwrap(); - stdout = String::from_utf8_lossy(&output.stdout).to_string(); - stderr = String::from_utf8_lossy(&output.stderr).to_string(); - } - } + let stdout = execute_batch_file(&sql_file) + .unwrap_or_else(|err| panic!("Batch execution should succeed. Error: {}", err)); // Verify execution - should show Query OK messages and final result assert!( - (stdout.contains("Item One") || stdout.contains("Query OK")) && output.status.success(), - "Batch execution should succeed with proper messages.\nstdout: {}\nstderr: {}\nstatus: {:?}", - stdout, stderr, output.status + stdout.contains("Item One") || stdout.contains("Query OK"), + "Batch execution should succeed with proper messages.\nstdout: {}", + stdout ); // Cleanup @@ -243,7 +220,11 @@ fn test_cli_health_check() { } // Test server health via SQL query - let result = execute_sql_as_root_via_cli("SELECT 1 as health_check"); + let result = wait_for_sql_output_contains( + "SELECT 1 as health_check", + "health_check", + Duration::from_secs(15), + ); assert!(result.is_ok(), "Server should respond to SQL queries: {:?}", result.err()); @@ -254,3 +235,7 @@ fn test_cli_health_check() { output ); } + +fn execute_batch_file(sql_file: &Path) -> Result> { + execute_sql_file_as_root_via_cli(sql_file) +} diff --git a/cli/tests/users/test_concurrent_users.rs b/cli/tests/users/test_concurrent_users.rs index 38a5b1872..662c5a3ac 100644 --- a/cli/tests/users/test_concurrent_users.rs +++ b/cli/tests/users/test_concurrent_users.rs @@ -2,13 +2,13 @@ //! //! Validates user isolation, parallel operations, and live query notifications -use crate::common::*; - use std::{ thread, time::{Duration, Instant}, }; +use crate::common::*; + const NUM_USERS: usize = 5; const ROWS_PER_USER: usize = 2000; const INSERT_BATCH_SIZE: usize = 250; @@ -27,42 +27,32 @@ fn test_concurrent_users_isolation() { let test_start = Instant::now(); // Generate unique user suffix to avoid conflicts - //Instead of timestamp add random str from 5 characters + // Instead of timestamp add random str from 5 characters let user_suffix = random_string(5); // Setup namespace and table - let namespace = "concurrent"; - let table_name = "user_data"; + let namespace = generate_unique_namespace("concurrent"); + let table_name = generate_unique_table("user_data"); let full_table = format!("{}.{}", namespace, table_name); let setup_start = Instant::now(); - //Drop the current table if it exists - let drop_sql = format!("DROP TABLE IF EXISTS {}", full_table); - let _ = execute_sql_as_root_via_cli(&drop_sql); - - //Check if the table was dropped successfully - let check_sql = format!("SELECT * FROM {}", full_table); - let check_result = execute_sql_as_root_via_cli(&check_sql); - if check_result.is_ok() { - eprintln!("⚠️ Failed to drop existing table. Skipping test."); - return; - } - if let Err(e) = - execute_sql_as_root_via_cli(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)) + execute_sql_as_root_via_client(&format!("CREATE NAMESPACE IF NOT EXISTS {}", namespace)) { eprintln!("⚠️ Failed to create namespace: {}. Skipping test.", e); return; } let create_table_sql = format!( - "CREATE TABLE {} (id INTEGER, message TEXT, timestamp BIGINT, current_user_id TEXT DEFAULT CURRENT_USER()) WITH (TYPE='USER', FLUSH_POLICY='rows:100')", + "CREATE TABLE {} (id INTEGER, message TEXT, timestamp BIGINT, current_user_id TEXT \ + DEFAULT CURRENT_USER()) WITH (TYPE='USER', FLUSH_POLICY='rows:100')", full_table ); - if let Err(e) = execute_sql_as_root_via_cli(&create_table_sql) { + if let Err(e) = execute_sql_as_root_via_client(&create_table_sql) { eprintln!("⚠️ Failed to create table: {}. Skipping test.", e); return; } + wait_for_table_ready(&full_table, Duration::from_secs(15)).expect("table should become ready"); println!("✅ Setup complete ({:.2?})", setup_start.elapsed()); // Create test users with unique timestamp suffix @@ -73,12 +63,12 @@ fn test_concurrent_users_isolation() { let username = format!("user{}_{}", num, user_suffix); let password = format!("password{}", num); - if let Err(e) = execute_sql_as_root_via_cli(&format!( + if let Err(e) = execute_sql_as_root_via_client(&format!( "CREATE USER {} WITH PASSWORD '{}' ROLE 'user'", username, password )) { eprintln!("⚠️ Failed to create user {}: {}. Skipping test.", username, e); - cleanup(namespace, &user_credentials); + cleanup(&namespace, &user_credentials); return; } @@ -211,40 +201,38 @@ fn test_concurrent_users_isolation() { let mut verify_cli_times = Vec::new(); for (username, password) in user_credentials.iter() { - let timing = match execute_sql_via_cli_as_with_timing( - username, - password, - &format!("SELECT * FROM {}", full_table), - ) { - Ok(t) => t, - Err(e) => { - eprintln!("⚠️ SELECT failed for {}: {}", username, e); - cleanup(namespace, &user_credentials); - return; - }, + let count_sql = format!("SELECT count(*) as count FROM {}", full_table); + let verify_deadline = Instant::now() + Duration::from_secs(20); + let timing = loop { + match execute_sql_via_cli_as_with_timing(username, password, &count_sql) { + Ok(timing) if timing.output.contains(&ROWS_PER_USER.to_string()) => break timing, + Ok(_) if Instant::now() < verify_deadline => { + thread::sleep(Duration::from_millis(200)); + }, + Ok(timing) => { + eprintln!( + "❌ User {} got unexpected count output after waiting: {}", + username, timing.output + ); + cleanup(&namespace, &user_credentials); + panic!("Row count mismatch for {}", username); + }, + Err(e) if Instant::now() < verify_deadline => { + eprintln!("⚠️ COUNT failed for {}: {}", username, e); + thread::sleep(Duration::from_millis(200)); + }, + Err(e) => { + eprintln!("⚠️ COUNT failed for {}: {}", username, e); + cleanup(&namespace, &user_credentials); + return; + }, + } }; if let Some(s) = timing.server_time_ms { verify_server_times.push(s); } verify_cli_times.push(timing.total_time_ms); - - // Count data rows (skip header/separator lines) - let row_count = timing - .output - .lines() - .filter(|l| { - l.contains("│") && l.contains(|c: char| c.is_ascii_digit()) && !l.contains("─") - }) - .count(); - - // Verify count - if row_count != ROWS_PER_USER { - eprintln!("❌ User {} got {} rows, expected {}", username, row_count, ROWS_PER_USER); - eprintln!("Output:\n{}", timing.output); - cleanup(namespace, &user_credentials); - panic!("Row count mismatch for {}", username); - } } let verify_elapsed = verify_start.elapsed(); @@ -267,7 +255,7 @@ fn test_concurrent_users_isolation() { ); // Cleanup - cleanup(namespace, &user_credentials); + cleanup(&namespace, &user_credentials); let total_time = test_start.elapsed(); println!("\n🎉 Test PASSED - All {} users correctly isolated", NUM_USERS); @@ -297,7 +285,8 @@ fn test_concurrent_users_isolation() { fn cleanup(namespace: &str, creds: &[(String, String)]) { for (username, _) in creds { - let _ = execute_sql_as_root_via_cli(&format!("DROP USER IF EXISTS {}", username)); + let _ = execute_sql_as_root_via_client(&format!("DROP USER IF EXISTS {}", username)); } - let _ = execute_sql_as_root_via_cli(&format!("DROP NAMESPACE IF EXISTS {} CASCADE", namespace)); + let _ = + execute_sql_as_root_via_client(&format!("DROP NAMESPACE IF EXISTS {} CASCADE", namespace)); } diff --git a/docs/api/websocket-protocol.md b/docs/api/websocket-protocol.md index 8cb8f2373..41a51ba98 100644 --- a/docs/api/websocket-protocol.md +++ b/docs/api/websocket-protocol.md @@ -171,8 +171,7 @@ No unsubscribe acknowledgement message is currently emitted. "batch_num": 0, "has_more": true, "status": "loading", - "last_seq_id": 12345, - "snapshot_end_seq": 13000 + "last_seq_id": 12345 }, "schema": [ {"name": "id", "data_type": "BigInt", "index": 0}, @@ -200,12 +199,14 @@ Notes: "batch_num": 0, "has_more": true, "status": "loading", - "last_seq_id": 12346, - "snapshot_end_seq": 13000 + "last_seq_id": 12346 } } ``` +The client resume cursor is always `last_seq_id`; snapshot and commit +boundaries are backend-owned and are not part of the WebSocket contract. + `batch_control.status` values: - `loading` (first batch, more pending) diff --git a/docs/architecture/decisions/adr-017-websocket-subscription-memory.md b/docs/architecture/decisions/adr-017-websocket-subscription-memory.md index bcea50eda..da235bf18 100644 --- a/docs/architecture/decisions/adr-017-websocket-subscription-memory.md +++ b/docs/architecture/decisions/adr-017-websocket-subscription-memory.md @@ -124,6 +124,8 @@ pub struct SubscriptionHandle { | last_heartbeat | 16 bytes | Instant | | notification_tx | 8 bytes | Arc pointer | | event_tx | 24 bytes | Channel sender | +| event channel slots | 1 pending event | One close reason is enough; full channel triggers force-unregister | +| notification channel slots | 64 pending notifications | Bounded live backlog per connection | | subscriptions (DashMap base) | ~64 bytes | Per-shard overhead | | **Base Connection Overhead** | **~300 bytes** | Without subscriptions | diff --git a/docs/architecture/manifest.md b/docs/architecture/manifest.md index d0d1db35b..8e8c07714 100644 --- a/docs/architecture/manifest.md +++ b/docs/architecture/manifest.md @@ -48,10 +48,11 @@ KalamDB employs a tiered architecture to balance sub-millisecond latency with lo ### 2. Flush Operation (Commit) When the MemTable fills up or a checkpoint is triggered: -1. **Write Parquet**: In-memory rows are sorted and written to a new `batch-N.parquet` file. -2. **Compute Stats**: `FlushManifestHelper` calculates min/max values for the primary key, `_seq`, and indexed columns *during* the write process. -3. **Update Hot Store**: A new `SegmentMetadata` entry is added to the `ManifestService` (updating L1 RAM and L2 RocksDB). -4. **Persist Cold Store**: The `ManifestService` serializes the updated state and performs an atomic write (write-tmp-rename) to `manifest.json`. +1. **Resolve Versions**: Hot rows are scanned in bounded batches and reduced to the latest `_seq` per primary key. User tables process one user scope at a time because hot keys are ordered by `(user_id, seq)`; shared tables resolve the shared scope as a whole to preserve the `_seq` fallback for rows with missing/null primary keys. +2. **Write Parquet**: The common flush scope writer writes the resolved rows to a temp `batch-N.parquet.tmp` Parquet file for either a shared scope or `Some(user_id)` scope. +3. **Compute Stats**: `FlushManifestHelper` calculates min/max values for the primary key, `_seq`, and indexed columns before the batch is moved into the Parquet writer. +4. **Commit File + Manifest**: The temp file is renamed to `batch-N.parquet`, then a new `SegmentMetadata` entry is added to the `ManifestService` (updating L1 RAM and L2 RocksDB) and persisted through the cold `manifest.json` atomic write. +5. **Clean Hot Store**: Flushed hot keys are removed in bounded indexed-store delete batches so cleanup does not build unbounded RocksDB operation vectors. ### 3. Query Execution (Read Path) 1. **Plan**: The `ManifestAccessPlanner` requests the Manifest from the **Hot Store**. diff --git a/docs/architecture/pg-extension-grpc-connectivity.md b/docs/architecture/pg-extension-grpc-connectivity.md index b641e6a1d..3f6ff7ad1 100644 --- a/docs/architecture/pg-extension-grpc-connectivity.md +++ b/docs/architecture/pg-extension-grpc-connectivity.md @@ -86,7 +86,7 @@ For the same PostgreSQL backend process and the same foreign-server config: - the same `RemoteExtensionState` is reused - the same session id is reused - the same `RemoteKalamClient` is reused -- each RPC constructs a fresh tonic client over `self.channel.clone()` +- each RPC goes through the shared client call helper in `RemoteKalamClient`, which clones the reusable tonic channel, applies the correct request metadata boundary, and maps gRPC status errors consistently The extension test suite explicitly verifies that the same config reuses the same state and only opens one remote session. diff --git a/docs/architecture/raft-replication.md b/docs/architecture/raft-replication.md index a0a4206e0..e0cea18c5 100644 --- a/docs/architecture/raft-replication.md +++ b/docs/architecture/raft-replication.md @@ -4,11 +4,11 @@ KalamDB uses a multi-Raft topology (OpenRaft 0.9) to replicate metadata, jobs, user data, and shared data across nodes. The Raft layer lives in [backend/crates/kalamdb-raft](backend/crates/kalamdb-raft/src/lib.rs) and is accessed through the `CommandExecutor` abstraction so handlers do not branch on cluster vs standalone mode. -- **Multi-group layout**: 1 unified metadata group, 32 user-data shards, 1 shared-data shard by default. Group identity and sharding helpers live in [backend/crates/kalamdb-sharding/src](backend/crates/kalamdb-sharding/src/lib.rs). +- **Multi-group layout**: 1 unified metadata group, 32 user-data shards, 1 shared-data shard by default. Group identity and sharding helpers live in [backend/crates/kalamdb-sharding/src](backend/crates/kalamdb-sharding/src/lib.rs), and group ID decoding accepts the configured user/shared shard ranges. - **Command path**: Handler → `CommandExecutor` (`DirectExecutor` in standalone or `RaftExecutor` in cluster) → `RaftManager` → `RaftGroup` → OpenRaft log → State machine → Applier → storage/provider. - **Replication modes**: `Quorum` (fast, default) or `All` (wait for every member to apply) configured via `ReplicationMode` in [backend/crates/kalamdb-raft/src/manager/config.rs](backend/crates/kalamdb-raft/src/manager/config.rs). - **Transport**: gRPC service in [backend/crates/kalamdb-raft/src/network/service.rs](backend/crates/kalamdb-raft/src/network/service.rs) handles Raft RPCs and follower→leader proposal forwarding. -- **Storage**: Combined in-memory log + state machine storage in [backend/crates/kalamdb-raft/src/storage/raft_store.rs](backend/crates/kalamdb-raft/src/storage/raft_store.rs); snapshots are used for compaction. +- **Storage**: Combined in-memory log + state machine storage in [backend/crates/kalamdb-raft/src/storage/raft_store.rs](backend/crates/kalamdb-raft/src/storage/raft_store.rs); snapshots are used for compaction. Real table writes are persisted by appliers after Raft apply. ## Topology & Sharding @@ -22,13 +22,31 @@ KalamDB uses a multi-Raft topology (OpenRaft 0.9) to replicate metadata, jobs, u 2. `RaftManager` proposes to the correct `RaftGroup`: - `propose_*` APIs forward to the leader if the local node is a follower via `propose_with_forward`. - In `ReplicationMode::All`, leaders wait for every member to apply the committed log before returning. -3. `RaftGroup` uses OpenRaft to append/replicate and applies entries through its `KalamRaftStorage`, which drives the state machine. If the caller was a follower, the gRPC service forwards the proposal to the leader and relays the result. -4. State machines deserialize the command, enforce idempotency via `last_applied_index`, and invoke an injected *applier* to persist to real providers (RocksDB/Parquet/etc.). Followers run the same apply path so state converges. +3. SQL write endpoints inspect the statement target and, on followers, forward directly to the target data-group leader instead of routing every write through the meta leader first. The group is chosen from the table definition and authenticated user shard. +4. `RaftGroup` uses OpenRaft to append/replicate and applies entries through its `KalamRaftStorage`, which drives the state machine. If the caller was a follower, the gRPC service forwards the proposal to the leader and relays the result. +5. State machines deserialize the command, enforce idempotency via `last_applied_index`, and invoke an injected *applier* to persist to real providers (RocksDB/Parquet/etc.). Followers run the same apply path so state converges. + +## SQL DML & Commit Ordering + +- SQL autocommit DML for user/shared tables is staged through the transaction mutation path and committed through the appropriate data Raft group. Direct local SQL provider writes are no longer the normal HTTP SQL write path in cluster mode. +- State machines derive `_commit_seq` deterministically from `(group_id, log_index)` via `commit_seq_from_log_position`. Appliers receive that value and stamp inserted, updated, deleted, and transaction-batch rows with the replicated commit order. +- `CommitSequenceTracker::observe_committed` advances the local high-watermark after persisted apply, so every node observes the same commit sequence for the same Raft entry. +- `_seq` remains the only client-visible live-query resume cursor. `_commit_seq` is internal state-machine metadata used for deterministic visibility and follower-side snapshot gating. + +## Live/WebSocket Followers + +- Live subscriptions are node-local: each leader or follower serves its own WebSocket clients after applying replicated data locally. +- Before taking an initial snapshot for a subscription, the live layer calls a local apply barrier. In cluster mode this maps the table to its Raft group and waits until the local state machine has applied every log entry already known on that node. +- Snapshot boundaries are computed from the local materialized `MAX(_seq)` instead of a wall-clock Snowflake upper bound. This avoids creating a boundary that can include leader commits not yet applied on a follower. +- WebSocket batch control carries only `last_seq_id`. Clients reconnect or request the next batch with that single `SeqId`; the backend recomputes snapshot boundaries per connection and keeps commit/snapshot markers out of the wire protocol. +- Buffered notifications are gated by backend-owned `_commit_seq` when present, falling back to `_seq` for legacy tables or stream/system paths. ## Network Layer - Service surface: `RaftRpc` (vote, append_entries, install_snapshot) and `ClientProposal` (follower→leader proposal forwarding) defined in [backend/crates/kalamdb-raft/src/network/service.rs](backend/crates/kalamdb-raft/src/network/service.rs). -- Client side: `RaftNetworkFactory` caches tonic channels and produces `RaftNetwork` clients per group ([backend/crates/kalamdb-raft/src/network/network.rs](backend/crates/kalamdb-raft/src/network/network.rs)). +- Client side: each `RaftNetworkFactory` produces `RaftNetwork` clients per group, but all groups in a `RaftManager` share one node-level tonic channel pool keyed by peer `NodeId` ([backend/crates/kalamdb-raft/src/network/network.rs](backend/crates/kalamdb-raft/src/network/network.rs)). This keeps the multi-Raft topology from opening one persistent HTTP/2 transport per `(group, peer)` pair. +- RPC call construction is centralized in the network layer: OpenRaft RPCs use a typed `RaftRpcKind` plus shared encode/send/decode helpers, follower proposal forwarding uses `RaftNetworkFactory::send_client_proposal`, and non-Raft cluster messages use `ClusterClient` shared request/metadata/error handling. This keeps channel reuse, auth metadata, and serde boundaries consistent across inter-node calls. +- Serialization boundaries are intentionally layered: tonic/prost frames the gRPC messages, MessagePack encodes OpenRaft request/response payloads, FlexBuffers encodes committed Raft commands and apply responses, and follower SQL forwarding returns already-serialized HTTP JSON bytes from the leader so followers do not deserialize and reserialize result bodies. - Server startup: `start_rpc_server` binds to the advertised RPC port and serves the Raft gRPC server; invoked by `RaftExecutor::start` before starting groups. - Forwarding: followers call `ClientProposal` with the group ID and command bytes; the leader applies and returns the response. If leadership changed, the leader hint is returned so callers can retry. @@ -36,7 +54,7 @@ KalamDB uses a multi-Raft topology (OpenRaft 0.9) to replicate metadata, jobs, u Implemented in [backend/crates/kalamdb-raft/src/manager/raft_manager.rs](backend/crates/kalamdb-raft/src/manager/raft_manager.rs). -- **Construction**: Creates 3 meta groups + N user shards + M shared shards (defaults 32/1). Each group is a `RaftGroup` wrapping its own storage and network factory. +- **Construction**: Creates 1 meta group + N user shards + M shared shards (defaults 32/1). Each group is a `RaftGroup` wrapping its own storage and network factory; the manager injects a shared channel pool into every factory so peer transports are reused across groups. - **Start**: Registers configured peers with every group, starts the RPC server, then starts all Raft groups with OpenRaft configs (heartbeat/election timeouts from `RaftManagerConfig`). - **Bootstrap (first node)**: `initialize_cluster` seeds every group with this node as the sole voter, then optionally adds peers as learners and promotes them. - **Adding nodes**: `add_node` registers a learner in every group, waits for catch-up per group, then promotes to voter. @@ -65,10 +83,8 @@ Implemented by `KalamRaftStorage` in [backend/crates/kalamdb-raft/src/storage/ra All state machines live under [backend/crates/kalamdb-raft/src/state_machine](backend/crates/kalamdb-raft/src/state_machine). -- **SystemStateMachine**: namespaces, tables, storages; caches snapshot data and drives `SystemApplier` for provider persistence ([system.rs](backend/crates/kalamdb-raft/src/state_machine/system.rs)). -- **UsersStateMachine**: user CRUD, login/lock bookkeeping; drives `UsersApplier` ([users.rs](backend/crates/kalamdb-raft/src/state_machine/users.rs)). -- **JobsStateMachine**: job lifecycle and schedules; leader-only execution is enforced by Claim/Release semantics ([jobs.rs](backend/crates/kalamdb-raft/src/state_machine/jobs.rs)). -- **UserDataStateMachine**: per-user tables and live-query bookkeeping; routes persistence through `UserDataApplier` ([user_data.rs](backend/crates/kalamdb-raft/src/state_machine/user_data.rs)). +- **MetaStateMachine**: namespaces, tables, storages, users, and jobs; caches snapshot data and drives metadata appliers. +- **UserDataStateMachine**: per-user tables; routes persistence through `UserDataApplier` ([user_data.rs](backend/crates/kalamdb-raft/src/state_machine/user_data.rs)). - **SharedDataStateMachine**: shared tables; uses `SharedDataApplier` ([shared_data.rs](backend/crates/kalamdb-raft/src/state_machine/shared_data.rs)). - Every state machine tracks `last_applied_index` for idempotency and produces snapshots capturing its cached state; row data persistence is delegated to appliers so followers apply real writes too. - Applier traits and no-op defaults live in [backend/crates/kalamdb-raft/src/applier](backend/crates/kalamdb-raft/src/applier/mod.rs). @@ -76,9 +92,7 @@ All state machines live under [backend/crates/kalamdb-raft/src/state_machine](ba ## Command Types Defined in [backend/crates/kalamdb-raft/src/commands](backend/crates/kalamdb-raft/src/commands/mod.rs) and split per group: -- System: namespaces/tables/storage metadata. -- Users: user accounts, login events, lock/unlock. -- Jobs: job creation, claim/release, status, schedules. +- Meta: namespaces/tables/storage metadata, user accounts, login events, locks, and jobs. - UserData: per-user DML plus live-query registrations. - SharedData: shared-table DML. Responses are serialized FlexBuffers payloads returned after apply. @@ -86,28 +100,31 @@ Responses are serialized FlexBuffers payloads returned after apply. ## Membership & Routing Details - Peer registry: `RaftManager::register_peer` plumbs peer addresses into every `RaftGroup` so OpenRaft can dial peers via the network factory. -- Leader discovery: `current_leader` is per-group; followers respond to forwarded proposals with leader hints. `RaftExecutor::get_cluster_info` aggregates OpenRaft metrics for UI/diagnostics. +- Leader discovery: `current_leader` is per-group; followers respond to forwarded proposals with leader hints. HTTP SQL write forwarding uses the target data group directly when possible. `RaftExecutor::get_cluster_info` aggregates OpenRaft metrics for UI/diagnostics. - Shard mapping: user shard via user_id hash; shared shard fixed to 0. Default counts exposed as `DEFAULT_USER_DATA_SHARDS` and `DEFAULT_SHARED_DATA_SHARDS` in [backend/crates/kalamdb-raft/src/manager/config.rs](backend/crates/kalamdb-raft/src/manager/config.rs). ## Operational Notes & Limitations - Leadership transfer is best-effort only during shutdown; OpenRaft re-elects leaders after a departing leader stops. +- Automatic leader balancing across user shards is not implemented yet. OpenRaft 0.9 does not expose a direct leadership-transfer API in the current wrapper, so balancing requires either a newer OpenRaft surface or coordinated restart/election controls. - Strong replication (`All`) waits for every configured node to apply; cluster size drives the wait set. Misconfigured peer counts can block proposals. - WAL/log durability is in-memory; ensure appliers persist real data paths (RocksDB/Parquet) for crash recovery. - Snapshot size is driven by state machine caches; large snapshots can impact install_snapshot traffic. Purge thresholds (`max_in_snapshot_log_to_keep`, `purge_batch_size`) are set in `RaftGroup::start`. +- Storage still creates per-table or per-shard lower-level storage structures in several providers. Consolidating RocksDB column families into a prefixed-key layout is a storage redesign and is not completed by the Raft routing changes. ## Proposed Reliability & Efficiency Improvements - **Persist Raft log/state**: Move `KalamRaftStorage` from in-memory BTreeMap to a disk-backed store (RocksDB or a filestore partition) so Raft itself survives process restarts without relying solely on appliers. Retain snapshots for compaction but anchor durability on disk. - **Snapshot tuning**: Enable streaming or chunked snapshot sends and compress snapshot payloads to reduce memory spikes; lower `max_in_snapshot_log_to_keep` and tune snapshot trigger thresholds per group size to minimize in-memory log growth. - **Stronger failover hygiene**: Add pre-vote and staggered election timeouts per group to reduce split votes; expose health probes and replication-lag metrics for each group via `RaftManager` so the API tier can drain traffic from lagging nodes. -- **Leader stickiness for metadata**: Default metadata groups (`MetaSystem`, `MetaUsers`, `MetaJobs`) to `ReplicationMode::All` and favor longer election timeouts there to reduce churn; allow data shards to stay on quorum for throughput. +- **Leader balancing across data shards**: Add shard-leader placement goals and metrics first, then implement a safe transfer or drain mechanism when the OpenRaft API surface supports it. Keep the meta group stable and allow user shards to spread leaders for write throughput. - **Proposal backpressure**: Add queue depth/lag-based backpressure in `RaftExecutor` before proposing, and surface `NotLeader` + `leader_hint` telemetry to callers to cut retry storms during elections. - **Membership safety rails**: Validate peer counts against `replication_mode` (e.g., prevent `All` with missing voters), and add a periodic reconciler that re-registers peers into all `RaftGroup` network factories to heal stale channel caches. -- **Job fencing**: For `MetaJobs` commands, include the acting leader term/ID in claims and reject stale claims on followers to avoid double-execution during leadership flaps. +- **Job fencing**: For job commands in the meta group, include the acting leader term/ID in claims and reject stale claims on followers to avoid double-execution during leadership flaps. - **Memory caps for state machines**: Bound live-query maps and recent-op buffers in `UserDataStateMachine` and `SharedDataStateMachine`; evict or summarize metrics past a fixed window to keep snapshot payloads small. - **Adaptive networking**: Batch and pipeline append_entries per target where possible; reuse tonic channels aggressively (already cached) and add exponential backoff jitter for client-proposal forwarding to reduce thundering herds. - **Observability hooks**: Export per-group gauges for `last_applied`, `replication_lag`, snapshot send/receive sizes, and propose latency; wire them into `ClusterInfo` so operators can alert on lag before it impacts HA. +- **Column-family consolidation**: Replace per-shard/per-table RocksDB column families with a small fixed set of column families and prefix keys by table/user/shard. This should be done behind storage abstractions so Raft group count and storage layout remain independently tunable. ## How to Run Cluster Mode (today) diff --git a/docs/architecture/topic-consumption.md b/docs/architecture/topic-consumption.md new file mode 100644 index 000000000..78a4edc1c --- /dev/null +++ b/docs/architecture/topic-consumption.md @@ -0,0 +1,62 @@ +# Topic Consumption Architecture + +## Overview + +KalamDB topics are durable append-only message streams backed by the +`topic_messages` storage partition. Table providers publish CDC messages through +`TopicPublisherService` during the write path, and consumers read them through +the HTTP topic consume/ack endpoints or SQL stream handlers. + +## Publish Path + +- Topic routes are cached in memory as `TableId -> routes` for fast write-path + checks. +- Matching row changes are serialized once per route and written to + `TopicMessageStore`. +- Per-topic-partition write locks serialize offset allocation and message writes + so persisted offsets remain gap-free and ordered within a partition. +- Batch publishing groups rows by partition and writes each group through one + storage batch. + +## Consumer Group Claims + +Consumer group state is tracked per `(topic_id, group_id, partition_id)`. +The in-memory state stores: + +- `cursor`: the next offset range to hand out. +- `pending`: unacked claimed ranges with a visibility deadline. + +Fetching uses optimistic claim reservation: + +1. Briefly lock the group state, expire stale pending claims, and read the + effective cursor. +2. Release the group state before scanning and deserializing topic messages. +3. Re-lock the group state and claim the fetched range only if the cursor is + unchanged. +4. If another consumer advanced the cursor first, retry from the new cursor. + +This keeps same-group consumers from blocking behind another consumer's storage +scan while still preventing overlapping offset delivery. If an older claim +expires before a newer claim, the cursor skips still-pending ranges so only the +expired range is redelivered. + +## Ack And Recovery + +Ack commits are persisted in `system.topic_offsets` and are monotonic: a lower +or equal ack never regresses the committed offset. Acking also clears pending +claims covered by the acknowledged offset. + +If a consumer claims messages and does not ack before +`topics.visibility_timeout_secs`, the next fetch expires that stale claim and +resets the group cursor to the earliest expired offset for redelivery. + +The visibility timeout can be configured in `server.toml`: + +```toml +[topics] +visibility_timeout_secs = 10 +``` + +It can also be overridden with `KALAMDB_TOPIC_VISIBILITY_TIMEOUT_SECS`. +`KALAMDB_VISIBILITY_TIMEOUT_SECS` remains accepted as a compatibility alias for +existing smoke-test and local scripts. diff --git a/docs/architecture/transactions.md b/docs/architecture/transactions.md index fbdcaece3..b54d54629 100644 --- a/docs/architecture/transactions.md +++ b/docs/architecture/transactions.md @@ -134,6 +134,11 @@ The durable apply path is: -> RocksDB-backed table storage +Topic publisher side effects are emitted from the local apply path on nodes that +have matching topic routes in their local publisher cache. They are not gated on +the node being the data-shard leader, so a topic-owning node can materialize CDC +events for writes whose Raft group is led by another node. + Parquet is not the commit target for explicit transactions. RocksDB-backed hot storage is the durable write path, and Parquet remains the later flush / cold-storage path. ### 5. Rollback diff --git a/docs/architecture/websocket-server.md b/docs/architecture/websocket-server.md index 6344f483c..8bd19b51b 100644 --- a/docs/architecture/websocket-server.md +++ b/docs/architecture/websocket-server.md @@ -16,13 +16,15 @@ Client Server ├── GET /v1/ws ───────────────►│ HTTP upgrade │ │ 1. Validate Origin header (if configured) │ │ 2. Check server not shutting down - │ │ 3. Check max_connections not exceeded (default 25,000) + │ │ 3. Atomically reserve max_connections slot │ │ 4. Generate ConnectionId (UUID) │ │ 5. Register in ConnectionsManager │ │ 6. Spawn per-connection Tokio task │◄──────────────── 101 Switch ─┤ │ │ - │ ┌─────┤ Auth timeout starts (default 10s) + │ ┌─────┤ Auth timeout starts (default 3s) + │ │ │ If Authorization: Bearer was supplied: + │ │ │ validate token after upgrade and send AuthSuccess/AuthError │── Authenticate{JWT} ──►│ │ │ │ │ 7. Rate limit check │ │ │ 8. Validate JWT token @@ -54,7 +56,7 @@ Client Server ## Core Components ### 1. `websocket_handler` (kalamdb-api) -HTTP endpoint (`GET /v1/ws`) that upgrades to WebSocket. Validates origin, generates connection ID, registers with ConnectionsManager, then spawns a per-connection `handle_websocket` task. +HTTP endpoint (`GET /v1/ws`) that upgrades to WebSocket. Validates origin, parses optional `Authorization: Bearer` metadata without validating it on the HTTP upgrade path, generates connection ID, registers with ConnectionsManager, then spawns a per-connection `handle_websocket` task. ### 2. `handle_websocket` (kalamdb-api) Per-connection `tokio::select!` loop with **biased** priority ordering: @@ -67,7 +69,8 @@ Shared singleton managing ALL connections: - **Primary storage**: `DashMap` - **Subscription indices**: `(UserId, TableId) → DashMap` for O(1) notification routing - **Background heartbeat checker**: Single Tokio task, ticks every `heartbeat_interval` (default 5s), iterates ALL connections -- **Channel-based communication**: Each connection gets bounded `event_tx/rx` (cap 64) and `notification_tx/rx` (cap 1000) +- **Channel-based communication**: Each connection gets bounded `event_tx/rx` (cap 1) and `notification_tx/rx` (cap 64) +- **Capacity enforcement**: Connection slots are reserved with atomic compare-and-swap before allocation, so concurrent handshakes cannot overshoot `max_connections` ### 4. `LiveQueryManager` (kalamdb-core) Handles subscription registration/unregistration: @@ -148,8 +151,8 @@ Per-connection select! loop | Channel | Capacity | Purpose | |---------|----------|---------| -| `event_tx/rx` | 16 | Control events (ping, timeout, shutdown) | -| `notification_tx/rx` | 1000 | Live query notifications per connection | +| `event_tx/rx` | 1 | Control close event (auth timeout, heartbeat timeout, shutdown) | +| `notification_tx/rx` | 64 | Live query notifications per connection | --- diff --git a/docs/getting-started/cli.md b/docs/getting-started/cli.md index f64821a39..2e4742c4f 100644 --- a/docs/getting-started/cli.md +++ b/docs/getting-started/cli.md @@ -134,19 +134,47 @@ In interactive mode, meta-commands start with `\`: | `\update-credentials

` | Update stored credentials | | `\delete-credentials` | Delete stored credentials | -**Cluster meta-commands**: +### Cluster meta-commands - `\cluster snapshot` - `\cluster purge --upto ` (or `\cluster purge `) - `\cluster trigger-election` - `\cluster transfer-leader ` +- `\cluster rebalance` - `\cluster stepdown` - `\cluster clear` - `\cluster list` (alias: `\cluster ls`) - `\cluster list groups` -- `\cluster status` -- `\cluster join ` (not yet implemented) -- `\cluster leave` (not yet implemented) +- `\cluster join ` + +### How follower writes are forwarded + +KalamDB uses Multi-Raft groups. A request does not have to land on the leader node first. + +Example: + +```bash +kalam --url http://node-2:8080 --command "INSERT INTO app.messages (id, body) VALUES (101, 'hello')" +``` + +Assume the authenticated or effective user for that request is `user-42`, and `user-42` hashes to user data group `DataUserShard(7)`. + +1. The request can hit any node, including a follower for that group. +2. The SQL layer prepares and classifies the statement once, then derives the target Raft group from the table type and current `user_id`. +3. For user and stream tables, KalamDB hashes `user_id` into one of `cluster.user_shards` groups. +4. If the receiving node is not the leader for that target group, it forwards the original SQL, params, auth header, and request id over gRPC to the current leader for that group. +5. The group leader executes the write, appends it to that Raft log, replicates it to followers, commits it, and returns the result. +6. The follower relays that leader-built response back to the client. + +This keeps writes local to the correct group leader even when clients connect to follower nodes. + +### Multi-Raft routing today + +- KalamDB runs one metadata Raft group plus multiple user data groups. +- User and stream data are routed by `user_id`, so all rows for the same user go through the same user-data Raft group leader at a given time instead of scattering one user's working set across many leaders. +- That locality reduces cross-group coordination and improves cache and write-path behavior. +- Shared tables are different today: they currently route to a single shared group. +- Shared-table sharding is still a work in progress. The planned direction is partition-by-key so each shared table can define how a row is partitioned and where it should be placed. ### Output formats diff --git a/docs/sdk/sdk.md b/docs/sdk/sdk.md index 22c2e355d..8bcf3a6f2 100644 --- a/docs/sdk/sdk.md +++ b/docs/sdk/sdk.md @@ -332,7 +332,6 @@ interface BatchControl { has_more: boolean; status: 'loading' | 'loading_batch' | 'ready'; last_seq_id?: string; - snapshot_end_seq?: string; } ``` diff --git a/examples/chat-with-ai/package-lock.json b/examples/chat-with-ai/package-lock.json index 8e7e8f4a2..839f6d68d 100644 --- a/examples/chat-with-ai/package-lock.json +++ b/examples/chat-with-ai/package-lock.json @@ -32,7 +32,7 @@ }, "../../link/sdks/typescript/client": { "name": "@kalamdb/client", - "version": "0.4.2-rc.2.1", + "version": "0.4.2-rc.3", "license": "Apache-2.0", "dependencies": { "ws": "^8.20.0" @@ -47,7 +47,7 @@ }, "../../link/sdks/typescript/consumer": { "name": "@kalamdb/consumer", - "version": "0.4.2-rc.2.1", + "version": "0.4.2-rc.3", "license": "Apache-2.0", "devDependencies": { "@types/node": "^25.5.0", diff --git a/examples/chat-with-ai/setup.sh b/examples/chat-with-ai/setup.sh index 45a6a01a3..4f2e4b9e4 100755 --- a/examples/chat-with-ai/setup.sh +++ b/examples/chat-with-ai/setup.sh @@ -7,6 +7,8 @@ ROOT_PASSWORD="${KALAMDB_ROOT_PASSWORD:-kalamdb123}" SQL_FILE="$SCRIPT_DIR/chat-app.sql" ENV_FILE="$SCRIPT_DIR/.env.local" ACCESS_TOKEN="" +ADMIN_USER="admin" +ADMIN_PASSWORD="kalamdb123" log() { echo "[setup] $*" @@ -53,12 +55,76 @@ done require_cmd curl require_cmd jq +try_login() { + local user="$1" + local password="$2" + local response + local http_code + local body + local payload + + payload="$(jq -cn --arg user "$user" --arg password "$password" '{user: $user, password: $password}')" + response="$(curl -sS -w "\n%{http_code}" -X POST "$KALAMDB_URL/v1/api/auth/login" \ + -H "Content-Type: application/json" \ + -d "$payload")" + + http_code="$(echo "$response" | tail -1)" + body="$(echo "$response" | sed '$d')" + + if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then + return 1 + fi + + ACCESS_TOKEN="$(echo "$body" | jq -r '.access_token // empty')" + [[ -n "$ACCESS_TOKEN" ]] +} + +server_needs_setup() { + local response + response="$(curl -fsS "$KALAMDB_URL/v1/api/auth/status")" + [[ "$(echo "$response" | jq -r '.needs_setup // false')" == "true" ]] +} + +run_initial_setup() { + local payload + local response + local http_code + local body + + log "Server requires initial setup - creating bootstrap DBA user '$ADMIN_USER'" + + payload="$(jq -cn \ + --arg user "$ADMIN_USER" \ + --arg password "$ADMIN_PASSWORD" \ + --arg root_password "$ROOT_PASSWORD" \ + '{user: $user, password: $password, root_password: $root_password}')" + response="$(curl -sS -w "\n%{http_code}" -X POST "$KALAMDB_URL/v1/api/auth/setup" \ + -H "Content-Type: application/json" \ + -d "$payload")" + + http_code="$(echo "$response" | tail -1)" + body="$(echo "$response" | sed '$d')" + + if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then + fail "Initial server setup failed: $body" + fi +} + +ensure_access_token() { + if server_needs_setup; then + run_initial_setup + fi + + if try_login "$ADMIN_USER" "$ADMIN_PASSWORD"; then + return 0 + fi + + try_login root "$ROOT_PASSWORD" || fail "Failed to obtain admin or root access token" +} + curl -fsS "$KALAMDB_URL/health" >/dev/null || fail "KalamDB is not reachable at $KALAMDB_URL" -ACCESS_TOKEN="$(curl -fsS -X POST "$KALAMDB_URL/v1/api/auth/login" \ - -H "Content-Type: application/json" \ - -d "{\"user\":\"root\",\"password\":\"$ROOT_PASSWORD\"}" | jq -r '.access_token // empty')" -[[ -n "$ACCESS_TOKEN" ]] || fail "Failed to obtain root access token" +ensure_access_token execute_sql() { local sql="$1" @@ -129,7 +195,7 @@ execute_sql_allow_exists "CREATE TOPIC chat_demo.ai_inbox" execute_sql_allow_exists "ALTER TOPIC chat_demo.ai_inbox ADD SOURCE chat_demo.messages ON INSERT" log "Ensuring demo admin user exists" -execute_sql_allow_exists "CREATE USER 'admin' WITH PASSWORD 'kalamdb123' ROLE dba" +execute_sql_allow_exists "CREATE USER '$ADMIN_USER' WITH PASSWORD '$ADMIN_PASSWORD' ROLE dba" count_result="$(curl -fsS -X POST "$KALAMDB_URL/v1/api/sql" \ -H "Content-Type: application/json" \ @@ -143,12 +209,12 @@ fi cat > "$ENV_FILE" </dev/null || fail "KalamDB is not reachable at $KALAMDB_URL" -log "Logging in as root" -ACCESS_TOKEN="$(curl -fsS -X POST "$KALAMDB_URL/v1/api/auth/login" \ - -H "Content-Type: application/json" \ - -d "{\"user\":\"root\",\"password\":\"$ROOT_PASSWORD\"}" | jq -r '.access_token // empty')" -[[ -n "$ACCESS_TOKEN" ]] || fail "Failed to obtain root access token" +ensure_access_token execute_root_sql() { local sql="$1" diff --git a/examples/summarizer-agent/package-lock.json b/examples/summarizer-agent/package-lock.json index a8be6243a..b20ec1841 100644 --- a/examples/summarizer-agent/package-lock.json +++ b/examples/summarizer-agent/package-lock.json @@ -25,7 +25,7 @@ }, "../../link/sdks/typescript/client": { "name": "@kalamdb/client", - "version": "0.4.2-rc.2.1", + "version": "0.4.2-rc.3", "license": "Apache-2.0", "dependencies": { "ws": "^8.20.0" @@ -40,7 +40,7 @@ }, "../../link/sdks/typescript/consumer": { "name": "@kalamdb/consumer", - "version": "0.4.2-rc.2.1", + "version": "0.4.2-rc.3", "license": "Apache-2.0", "devDependencies": { "@types/node": "^25.5.0", diff --git a/examples/summarizer-agent/setup.sh b/examples/summarizer-agent/setup.sh index 92cebda75..bb39977e3 100755 --- a/examples/summarizer-agent/setup.sh +++ b/examples/summarizer-agent/setup.sh @@ -3,12 +3,19 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" KALAMDB_URL="${KALAMDB_URL:-http://localhost:8080}" -ROOT_PASSWORD="${KALAMDB_ROOT_PASSWORD:-kalamdb123}" +ROOT_PASSWORD="${KALAMDB_ROOT_PASSWORD-kalamdb123}" SQL_FILE="$SCRIPT_DIR/setup.sql" ENV_FILE="$SCRIPT_DIR/.env.local" ACCESS_TOKEN="" SAMPLE_BLOG_ID="" FORCE_ENV_WRITE=0 +ROOT_PASSWORD_EXPLICIT=0 +ADMIN_USER="admin" +ADMIN_PASSWORD="kalamdb123" + +if [[ -n "${KALAMDB_ROOT_PASSWORD+x}" ]]; then + ROOT_PASSWORD_EXPLICIT=1 +fi log_info() { echo "[setup] $*" @@ -42,18 +49,84 @@ check_server() { fi } -login_root() { - log_info "Logging in as root" +try_login() { + local user="$1" + local password="$2" local response - response="$(curl -fsS -X POST "$KALAMDB_URL/v1/api/auth/login" \ + local http_code + local body + local payload + + payload="$(jq -cn --arg user "$user" --arg password "$password" '{user: $user, password: $password}')" + response="$(curl -sS -w "\n%{http_code}" -X POST "$KALAMDB_URL/v1/api/auth/login" \ -H "Content-Type: application/json" \ - -d "{\"user\":\"root\",\"password\":\"$ROOT_PASSWORD\"}")" + -d "$payload")" + + http_code="$(echo "$response" | tail -1)" + body="$(echo "$response" | sed '$d')" + + if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then + return 1 + fi - ACCESS_TOKEN="$(echo "$response" | jq -r '.access_token // empty')" - if [[ -z "$ACCESS_TOKEN" ]]; then - log_error "Could not get access token. Check KALAMDB_ROOT_PASSWORD." + ACCESS_TOKEN="$(echo "$body" | jq -r '.access_token // empty')" + [[ -n "$ACCESS_TOKEN" ]] +} + +server_needs_setup() { + local response + response="$(curl -fsS "$KALAMDB_URL/v1/api/auth/status")" + [[ "$(echo "$response" | jq -r '.needs_setup // false')" == "true" ]] +} + +run_initial_setup() { + local payload + local response + local http_code + local body + + if [[ -z "$ROOT_PASSWORD" ]]; then + log_error "Initial server setup requires a non-empty root password. Set KALAMDB_ROOT_PASSWORD or pass --password." exit 1 fi + + log_info "Server requires initial setup - creating bootstrap DBA user '$ADMIN_USER'" + + payload="$(jq -cn \ + --arg user "$ADMIN_USER" \ + --arg password "$ADMIN_PASSWORD" \ + --arg root_password "$ROOT_PASSWORD" \ + '{user: $user, password: $password, root_password: $root_password}')" + response="$(curl -sS -w "\n%{http_code}" -X POST "$KALAMDB_URL/v1/api/auth/setup" \ + -H "Content-Type: application/json" \ + -d "$payload")" + + http_code="$(echo "$response" | tail -1)" + body="$(echo "$response" | sed '$d')" + + if [[ "$http_code" -lt 200 || "$http_code" -ge 300 ]]; then + log_error "Initial server setup failed" + log_error "$body" + exit 1 + fi +} + +ensure_access_token() { + if server_needs_setup; then + run_initial_setup + fi + + if try_login "$ADMIN_USER" "$ADMIN_PASSWORD"; then + return 0 + fi + + log_info "Logging in as root" + if try_login root "$ROOT_PASSWORD"; then + return 0 + fi + + log_error "Could not get access token. Check KALAMDB_ROOT_PASSWORD or initialize the server via /v1/api/auth/setup." + exit 1 } execute_sql_raw() { @@ -163,8 +236,8 @@ generate_env_file() { cat > "$ENV_FILE" < { + const config = readConfig(); const client = createConsumerClient({ - url: KALAMDB_URL, - authProvider: async () => Auth.basic(KALAMDB_USER, KALAMDB_PASSWORD), + url: config.url, + authProvider: async () => Auth.basic(config.user, config.password), }); - const groupId = options.groupId ?? GROUP; + const groupId = options.groupId ?? config.group; const start = options.start ?? 'latest'; - console.log(`summarizer-agent ready (topic=${TOPIC}, group=${groupId})`); + console.log(`summarizer-agent ready (topic=${config.topic}, group=${groupId})`); try { await runAgent>({ client, name: 'summarizer-agent', - topic: TOPIC, + topic: config.topic, groupId, start, stopSignal: options.stopSignal, diff --git a/examples/summarizer-agent/tests/summarizer.integration.test.ts b/examples/summarizer-agent/tests/summarizer.integration.test.ts index 6e35c9e29..2a52c103d 100644 --- a/examples/summarizer-agent/tests/summarizer.integration.test.ts +++ b/examples/summarizer-agent/tests/summarizer.integration.test.ts @@ -3,11 +3,22 @@ import path from 'node:path'; import test from 'node:test'; import assert from 'node:assert/strict'; import { setTimeout as sleep } from 'node:timers/promises'; +import { config as loadEnv } from 'dotenv'; import { Auth, createClient } from '@kalamdb/client'; import { buildSummary, startSummarizerAgent } from '../src/agent.js'; const exampleRoot = path.resolve(process.cwd()); +function readRuntimeConfig() { + loadEnv({ path: path.join(exampleRoot, '.env.local'), quiet: true }); + + return { + serverUrl: process.env.KALAMDB_URL ?? 'http://127.0.0.1:8080', + user: process.env.KALAMDB_USER ?? 'root', + password: process.env.KALAMDB_PASSWORD ?? 'kalamdb123', + }; +} + test('agent writes summaries back into blog.blogs', async () => { execFileSync('./setup.sh', [], { cwd: exampleRoot, @@ -18,6 +29,8 @@ test('agent writes summaries back into blog.blogs', async () => { }, }); + const { serverUrl, user, password } = readRuntimeConfig(); + const controller = new AbortController(); const agentRun = startSummarizerAgent({ stopSignal: controller.signal, @@ -27,8 +40,8 @@ test('agent writes summaries back into blog.blogs', async () => { await sleep(750); const client = createClient({ - url: process.env.KALAMDB_URL ?? 'http://127.0.0.1:8080', - authProvider: async () => Auth.basic('root', process.env.KALAMDB_PASSWORD ?? 'kalamdb123'), + url: serverUrl, + authProvider: async () => Auth.basic(user, password), }); const content = `KalamDB topics wake lightweight workers immediately after a row changes ${Date.now()}. The worker can enrich the row without polling.`; diff --git a/link/kalam-client/tests/common/mod.rs b/link/kalam-client/tests/common/mod.rs index 23934c6d1..e0d619cdf 100644 --- a/link/kalam-client/tests/common/mod.rs +++ b/link/kalam-client/tests/common/mod.rs @@ -2,18 +2,23 @@ pub mod tcp_proxy; +use std::{ + collections::HashMap, + path::PathBuf, + sync::{Mutex, OnceLock}, + time::Duration, +}; + use kalamdb_configs::ServerConfig; use kalamdb_server::lifecycle::RunningTestHttpServer; use reqwest::Client; use serde_json::json; -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::{Mutex, OnceLock}; -use std::time::Duration; use tempfile::TempDir; -use tokio::runtime::{Handle, Runtime}; -use tokio::sync::Mutex as TokioMutex; -use tokio::time::{sleep, Instant}; +use tokio::{ + runtime::{Handle, Runtime}, + sync::Mutex as TokioMutex, + time::{sleep, Instant}, +}; static SERVER_URL: OnceLock = OnceLock::new(); static ISOLATED_SERVER_URL: OnceLock = OnceLock::new(); diff --git a/link/kalam-client/tests/common/tcp_proxy.rs b/link/kalam-client/tests/common/tcp_proxy.rs index a72e4c655..115aa988f 100644 --- a/link/kalam-client/tests/common/tcp_proxy.rs +++ b/link/kalam-client/tests/common/tcp_proxy.rs @@ -1,13 +1,20 @@ -use std::collections::HashMap; -use std::io::ErrorKind; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::Arc; -use std::time::Duration; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::net::{TcpListener, TcpStream}; -use tokio::sync::Mutex as TokioMutex; -use tokio::task::JoinHandle; -use tokio::time::sleep; +use std::{ + collections::HashMap, + io::ErrorKind, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, + }, + time::Duration, +}; + +use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + net::{TcpListener, TcpStream}, + sync::Mutex as TokioMutex, + task::JoinHandle, + time::sleep, +}; /// A TCP proxy that sits between a client and a real server, allowing tests to /// simulate network failures by pausing new connections and/or forcibly dropping diff --git a/link/kalam-client/tests/integration_tests.rs b/link/kalam-client/tests/integration_tests.rs index f827a1157..1ba0303c7 100644 --- a/link/kalam-client/tests/integration_tests.rs +++ b/link/kalam-client/tests/integration_tests.rs @@ -14,15 +14,22 @@ //! cd link && cargo test --test integration_tests //! ``` -use kalam_client::models::{BatchControl, BatchStatus, KalamDataType, ResponseStatus, SchemaField}; +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, OnceLock, + }, + time::Duration, +}; + use kalam_client::{ + models::{BatchControl, BatchStatus, KalamDataType, ResponseStatus, SchemaField}, AuthProvider, ChangeEvent, KalamLinkClient, KalamLinkError, SubscriptionConfig, }; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::{Arc, OnceLock}; -use std::time::Duration; -use tokio::sync::{OwnedSemaphorePermit, Semaphore}; -use tokio::time::{sleep, timeout}; +use tokio::{ + sync::{OwnedSemaphorePermit, Semaphore}, + time::{sleep, timeout}, +}; mod common; @@ -371,7 +378,6 @@ fn sample_batch_control() -> BatchControl { has_more: false, status: BatchStatus::Ready, last_seq_id: None, - snapshot_end_seq: None, } } diff --git a/link/kalam-client/tests/proxied/ack_before_first_batch.rs b/link/kalam-client/tests/proxied/ack_before_first_batch.rs index 36b313226..d60494f96 100644 --- a/link/kalam-client/tests/proxied/ack_before_first_batch.rs +++ b/link/kalam-client/tests/proxied/ack_before_first_batch.rs @@ -1,11 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{collections::HashSet, sync::atomic::Ordering, time::Duration}; + use kalam_client::{models::BatchStatus, ChangeEvent, SubscriptionConfig, SubscriptionOptions}; -use std::collections::HashSet; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Disconnect after the server acknowledges the subscription but before the /// client finishes receiving the initial data batches. The subscription should /// still replay the full snapshot after reconnect and then continue with live rows. diff --git a/link/kalam-client/tests/proxied/blackhole_during_subscribe.rs b/link/kalam-client/tests/proxied/blackhole_during_subscribe.rs index 2cc8d3140..885a239a0 100644 --- a/link/kalam-client/tests/proxied/blackhole_during_subscribe.rs +++ b/link/kalam-client/tests/proxied/blackhole_during_subscribe.rs @@ -1,9 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; -use kalam_client::SubscriptionConfig; use std::time::Duration; + +use kalam_client::SubscriptionConfig; use tokio::time::timeout; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Blackhole the proxy right as the client sends its subscribe request. /// The TCP socket stays open but no data flows. The client should detect a /// dead link (pong timeout), reconnect, re-subscribe, and ultimately deliver diff --git a/link/kalam-client/tests/proxied/double_outage.rs b/link/kalam-client/tests/proxied/double_outage.rs index 42ab3edde..48dd165c1 100644 --- a/link/kalam-client/tests/proxied/double_outage.rs +++ b/link/kalam-client/tests/proxied/double_outage.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Server goes down again while the client is in the process of reconnecting. /// After the second outage clears, the client should still recover. #[tokio::test] diff --git a/link/kalam-client/tests/proxied/event_counter_integrity.rs b/link/kalam-client/tests/proxied/event_counter_integrity.rs index 41e02ab42..fe3c76652 100644 --- a/link/kalam-client/tests/proxied/event_counter_integrity.rs +++ b/link/kalam-client/tests/proxied/event_counter_integrity.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::timeout; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// After a complex outage+recovery sequence, verify that connect and disconnect /// event counters are consistent: each reconnect fires exactly one on_connect, /// each forcible drop fires exactly one on_disconnect, and the final state after diff --git a/link/kalam-client/tests/proxied/gradual_degradation.rs b/link/kalam-client/tests/proxied/gradual_degradation.rs index 64b3eb5a7..485c396f1 100644 --- a/link/kalam-client/tests/proxied/gradual_degradation.rs +++ b/link/kalam-client/tests/proxied/gradual_degradation.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Latency increases in steps until it exceeds the pong timeout, forcing the /// client to reconnect. Once latency drops back to zero the client should /// resume and deliver any rows queued on the server during the degraded window. diff --git a/link/kalam-client/tests/proxied/heavy_write_burst_recovery.rs b/link/kalam-client/tests/proxied/heavy_write_burst_recovery.rs index da2eea953..7b552aaf9 100644 --- a/link/kalam-client/tests/proxied/heavy_write_burst_recovery.rs +++ b/link/kalam-client/tests/proxied/heavy_write_burst_recovery.rs @@ -1,11 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{collections::HashSet, sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::collections::HashSet; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// A large burst of writes (50 rows) is performed while the client is /// disconnected. After recovery, every single row must arrive exactly once /// with no duplicates and no gaps. This stress-tests the resume/replay diff --git a/link/kalam-client/tests/proxied/helpers.rs b/link/kalam-client/tests/proxied/helpers.rs index e2f494789..08066068b 100644 --- a/link/kalam-client/tests/proxied/helpers.rs +++ b/link/kalam-client/tests/proxied/helpers.rs @@ -1,18 +1,24 @@ #![allow(dead_code)] -use crate::common; -use kalam_client::auth::AuthProvider; -use kalam_client::seq_tracking::{extract_max_seq, row_seq}; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, + }, + time::Duration, +}; + use kalam_client::{ + auth::AuthProvider, + seq_tracking::{extract_max_seq, row_seq}, ChangeEvent, ConnectionOptions, EventHandlers, KalamCellValue, KalamLinkClient, KalamLinkTimeouts, SeqId, }; -use std::collections::HashMap; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::Arc; -use std::time::Duration; use tokio::time::{sleep, Instant}; +use crate::common; + pub const TEST_TIMEOUT: Duration = Duration::from_secs(10); pub const RECONNECT_WAIT_TIMEOUT: Duration = Duration::from_secs(30); @@ -240,12 +246,9 @@ pub async fn wait_for_reconnect( if Instant::now() >= deadline { panic!( - "{}: reconnect did not complete within {:?} (connect_count={}, expected_connects={}, connected={})", - context, - RECONNECT_WAIT_TIMEOUT, - current_connects, - expected_connects, - connected + "{}: reconnect did not complete within {:?} (connect_count={}, \ + expected_connects={}, connected={})", + context, RECONNECT_WAIT_TIMEOUT, current_connects, expected_connects, connected ); } diff --git a/link/kalam-client/tests/proxied/large_snapshot_repeated_outages.rs b/link/kalam-client/tests/proxied/large_snapshot_repeated_outages.rs index 300ac9603..41eca0fe3 100644 --- a/link/kalam-client/tests/proxied/large_snapshot_repeated_outages.rs +++ b/link/kalam-client/tests/proxied/large_snapshot_repeated_outages.rs @@ -1,11 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{collections::HashSet, sync::atomic::Ordering, time::Duration}; + use kalam_client::{SubscriptionConfig, SubscriptionOptions}; -use std::collections::HashSet; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// A large initial snapshot should still complete when the connection drops /// more than once before the client reaches steady-state live delivery. #[tokio::test] diff --git a/link/kalam-client/tests/proxied/latency_during_snapshot.rs b/link/kalam-client/tests/proxied/latency_during_snapshot.rs index 8590be2e7..7549e5671 100644 --- a/link/kalam-client/tests/proxied/latency_during_snapshot.rs +++ b/link/kalam-client/tests/proxied/latency_during_snapshot.rs @@ -1,9 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; -use kalam_client::SubscriptionConfig; use std::time::Duration; + +use kalam_client::SubscriptionConfig; use tokio::time::timeout; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Inject high latency while the initial snapshot is being loaded. /// If latency exceeds `initial_data_timeout` the subscription should fail /// gracefully; if below the timeout, the snapshot should complete normally. diff --git a/link/kalam-client/tests/proxied/live_updates_resume.rs b/link/kalam-client/tests/proxied/live_updates_resume.rs index 1a6b6e5cf..7c7992dcb 100644 --- a/link/kalam-client/tests/proxied/live_updates_resume.rs +++ b/link/kalam-client/tests/proxied/live_updates_resume.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Server goes down while the client is receiving live updates. /// After reconnect the subscription should resume from where it left off /// and NOT replay rows seen before the drop. diff --git a/link/kalam-client/tests/proxied/loading_resume_with_live_writes.rs b/link/kalam-client/tests/proxied/loading_resume_with_live_writes.rs index 4707ec1fb..800e9d3fb 100644 --- a/link/kalam-client/tests/proxied/loading_resume_with_live_writes.rs +++ b/link/kalam-client/tests/proxied/loading_resume_with_live_writes.rs @@ -1,12 +1,13 @@ +use std::{collections::HashSet, sync::atomic::Ordering, time::Duration}; + +use kalam_client::{ + models::BatchStatus, seq_tracking::row_seq, ChangeEvent, SeqId, SubscriptionConfig, + SubscriptionOptions, +}; +use tokio::time::{sleep, timeout, Instant}; + use super::helpers::*; use crate::common::tcp_proxy::TcpDisconnectProxy; -use kalam_client::models::BatchStatus; -use kalam_client::seq_tracking::row_seq; -use kalam_client::{ChangeEvent, SeqId, SubscriptionConfig, SubscriptionOptions}; -use std::collections::HashSet; -use std::sync::atomic::Ordering; -use std::time::Duration; -use tokio::time::{sleep, timeout, Instant}; fn observe_loading_event( event: &ChangeEvent, @@ -159,9 +160,8 @@ async fn test_loading_snapshot_with_live_writes_resumes_without_duplicate_rows() writer .execute_query( &format!( - "INSERT INTO {} (id, value) VALUES \ - ('loading-live-0', 'during-loading-0'), \ - ('loading-live-1', 'during-loading-1')", + "INSERT INTO {} (id, value) VALUES ('loading-live-0', \ + 'during-loading-0'), ('loading-live-1', 'during-loading-1')", table ), None, diff --git a/link/kalam-client/tests/proxied/mixed_stage_recovery.rs b/link/kalam-client/tests/proxied/mixed_stage_recovery.rs index 904cac826..16ca6af67 100644 --- a/link/kalam-client/tests/proxied/mixed_stage_recovery.rs +++ b/link/kalam-client/tests/proxied/mixed_stage_recovery.rs @@ -1,11 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{collections::HashSet, sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::collections::HashSet; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// One subscription is already in steady-state live mode while a second /// subscription is still loading a large initial snapshot when the network /// drops. After reconnect the live subscription must resume from its durable diff --git a/link/kalam-client/tests/proxied/multi_sub_bounce.rs b/link/kalam-client/tests/proxied/multi_sub_bounce.rs index 4d5ed1545..71e37edd9 100644 --- a/link/kalam-client/tests/proxied/multi_sub_bounce.rs +++ b/link/kalam-client/tests/proxied/multi_sub_bounce.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout, Instant}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Three active subscriptions on different tables experience a server bounce /// (down then up). After recovery ALL three should resume from their respective /// last-seen seq_ids and only deliver newer rows. diff --git a/link/kalam-client/tests/proxied/rapid_flap.rs b/link/kalam-client/tests/proxied/rapid_flap.rs index 16862e722..2fd5d2c86 100644 --- a/link/kalam-client/tests/proxied/rapid_flap.rs +++ b/link/kalam-client/tests/proxied/rapid_flap.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Rapid connection flapping: bring the proxy up and down in quick succession /// (sub-second cycles). The client must survive without panicking, must not /// get stuck in a reconnect loop, and must eventually stabilise and resume diff --git a/link/kalam-client/tests/proxied/server_down_connecting.rs b/link/kalam-client/tests/proxied/server_down_connecting.rs index 57614a4c6..27c529e89 100644 --- a/link/kalam-client/tests/proxied/server_down_connecting.rs +++ b/link/kalam-client/tests/proxied/server_down_connecting.rs @@ -1,8 +1,9 @@ +use std::{sync::atomic::Ordering, time::Duration}; + +use tokio::time::sleep; + use super::helpers::*; use crate::common::tcp_proxy::TcpDisconnectProxy; -use std::sync::atomic::Ordering; -use std::time::Duration; -use tokio::time::sleep; /// Server goes down before the client finishes connecting. /// The client should fail to connect (or auto-reconnect once the proxy resumes). diff --git a/link/kalam-client/tests/proxied/server_down_initial_load.rs b/link/kalam-client/tests/proxied/server_down_initial_load.rs index 8f8c64a20..1711f0d79 100644 --- a/link/kalam-client/tests/proxied/server_down_initial_load.rs +++ b/link/kalam-client/tests/proxied/server_down_initial_load.rs @@ -1,11 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{collections::HashSet, sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::collections::HashSet; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Server goes down while the client is receiving the initial data batch. /// After the proxy resumes, the subscription should reconnect and deliver the data. #[tokio::test] diff --git a/link/kalam-client/tests/proxied/socket_drop_resume.rs b/link/kalam-client/tests/proxied/socket_drop_resume.rs index dc03be699..701d0a6c6 100644 --- a/link/kalam-client/tests/proxied/socket_drop_resume.rs +++ b/link/kalam-client/tests/proxied/socket_drop_resume.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout, Instant}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Simulate a real network/socket drop by routing the client through a local /// TCP proxy, force-closing the active socket, then allowing the shared /// connection to auto-reconnect and resume the same subscription. @@ -104,7 +105,8 @@ async fn test_shared_connection_auto_reconnects_after_socket_drop_and_resumes() assert_eq!( observed_seq, Some(resume_from), - "pre-drop live insert should expose _seq to the client before checking shared state; last event: {:?}", + "pre-drop live insert should expose _seq to the client before checking shared state; last \ + event: {:?}", last_pre_event ); let subs = client.subscriptions().await; diff --git a/link/kalam-client/tests/proxied/staggered_outages.rs b/link/kalam-client/tests/proxied/staggered_outages.rs index ebe86cc64..92bf58238 100644 --- a/link/kalam-client/tests/proxied/staggered_outages.rs +++ b/link/kalam-client/tests/proxied/staggered_outages.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Repeated outages with different downtime windows should preserve /// forward-only resume semantics on a single shared subscription. #[tokio::test] diff --git a/link/kalam-client/tests/proxied/subscribe_during_reconnect.rs b/link/kalam-client/tests/proxied/subscribe_during_reconnect.rs index b44e71aa6..40e2368fa 100644 --- a/link/kalam-client/tests/proxied/subscribe_during_reconnect.rs +++ b/link/kalam-client/tests/proxied/subscribe_during_reconnect.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Add a new subscription while the client is actively reconnecting after an /// outage. The new subscription must eventually be established and deliver its /// data once the connection stabilises. diff --git a/link/kalam-client/tests/proxied/transport_impairments.rs b/link/kalam-client/tests/proxied/transport_impairments.rs index 5290b733a..eac3406fd 100644 --- a/link/kalam-client/tests/proxied/transport_impairments.rs +++ b/link/kalam-client/tests/proxied/transport_impairments.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + async fn wait_for_row_after_checkpoint( sub: &mut kalam_client::SubscriptionManager, checkpoint: kalam_client::SeqId, diff --git a/link/kalam-client/tests/proxied/unsubscribe_during_outage.rs b/link/kalam-client/tests/proxied/unsubscribe_during_outage.rs index a15cebc9e..bfc22dd17 100644 --- a/link/kalam-client/tests/proxied/unsubscribe_during_outage.rs +++ b/link/kalam-client/tests/proxied/unsubscribe_during_outage.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::SubscriptionConfig; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Close (unsubscribe from) a subscription while the client is disconnected. /// On reconnect the shared connection must NOT re-subscribe the dropped query, /// but must still re-subscribe any remaining active ones. diff --git a/link/kalam-client/tests/proxied/update_delete_resume.rs b/link/kalam-client/tests/proxied/update_delete_resume.rs index 98b06e488..f39ea1167 100644 --- a/link/kalam-client/tests/proxied/update_delete_resume.rs +++ b/link/kalam-client/tests/proxied/update_delete_resume.rs @@ -1,10 +1,11 @@ -use super::helpers::*; -use crate::common::tcp_proxy::TcpDisconnectProxy; +use std::{sync::atomic::Ordering, time::Duration}; + use kalam_client::{models::BatchStatus, ChangeEvent, SubscriptionConfig}; -use std::sync::atomic::Ordering; -use std::time::Duration; use tokio::time::{sleep, timeout}; +use super::helpers::*; +use crate::common::tcp_proxy::TcpDisconnectProxy; + /// Resume logic should work for update and delete events, not just inserts. /// Rows updated or deleted before the drop must not replay, while changes made /// during the outage and after reconnect must still arrive in order. diff --git a/link/kalam-client/tests/test_consumer.rs b/link/kalam-client/tests/test_consumer.rs index 401ed4502..b81a88d4e 100644 --- a/link/kalam-client/tests/test_consumer.rs +++ b/link/kalam-client/tests/test_consumer.rs @@ -5,12 +5,12 @@ //! //! Run with: cargo test --test test_consumer -- --nocapture +use std::{collections::HashMap, time::Duration}; + use kalam_client::{ consumer::{AutoOffsetReset, TopicConsumer}, AuthProvider, KalamLinkClient, }; -use std::collections::HashMap; -use std::time::Duration; fn get_server_url() -> String { std::env::var("KALAMDB_SERVER_URL").unwrap_or_else(|_| "http://localhost:3000".to_string()) @@ -50,7 +50,8 @@ async fn setup_topic_and_table(client: &KalamLinkClient, topic_name: &str, table // Create table let create_table = format!( - "CREATE TABLE IF NOT EXISTS {} (id INT PRIMARY KEY, message TEXT, created_at TIMESTAMP DEFAULT NOW())", + "CREATE TABLE IF NOT EXISTS {} (id INT PRIMARY KEY, message TEXT, created_at TIMESTAMP \ + DEFAULT NOW())", table_name ); let _ = client.execute_query(&create_table, None, None, None).await; diff --git a/link/kalam-client/tests/test_shared_connection.rs b/link/kalam-client/tests/test_shared_connection.rs index 68ea426c3..b84108139 100644 --- a/link/kalam-client/tests/test_shared_connection.rs +++ b/link/kalam-client/tests/test_shared_connection.rs @@ -8,17 +8,22 @@ //! //! **IMPORTANT**: These tests require a running KalamDB server (auto-started). -use kalam_client::auth::AuthProvider; -use kalam_client::seq_tracking::{extract_max_seq, row_seq}; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, + }, + time::Duration, +}; + use kalam_client::{ + auth::AuthProvider, + seq_tracking::{extract_max_seq, row_seq}, ChangeEvent, ConnectionOptions, EventHandlers, KalamCellValue, KalamLinkClient, KalamLinkTimeouts, LiveRowsConfig, LiveRowsEvent, SeqId, SubscriptionConfig, SubscriptionOptions, }; -use std::collections::HashMap; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::Arc; -use std::time::Duration; use tokio::time::{sleep, timeout, Instant}; mod common; diff --git a/link/kalam-client/tests/test_subscription_cleanup.rs b/link/kalam-client/tests/test_subscription_cleanup.rs index fba5dff54..addba416e 100644 --- a/link/kalam-client/tests/test_subscription_cleanup.rs +++ b/link/kalam-client/tests/test_subscription_cleanup.rs @@ -4,8 +4,8 @@ //! - `close()` is idempotent and marks the subscription as closed. //! - `is_closed()` reflects the expected state. //! - Explicitly closing a subscription removes it from `system.live`. -//! - Dropping without an explicit `close()` also sends a cleanup frame via -//! the `Drop` impl, removing the subscription server-side. +//! - Dropping without an explicit `close()` also sends a cleanup frame via the `Drop` impl, +//! removing the subscription server-side. //! //! All per-operation timeouts are short so the suite runs well within 30s/test. //! @@ -19,12 +19,15 @@ //! cd link && cargo test --features e2e-tests --test test_subscription_cleanup -- --nocapture //! ``` +use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::Duration, +}; + use kalam_client::{ AuthProvider, ChangeEvent, KalamLinkClient, KalamLinkError, KalamLinkTimeouts, SubscriptionConfig, SubscriptionManager, }; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Duration; use tokio::time::{sleep, timeout}; mod common; @@ -222,8 +225,8 @@ async fn test_explicit_close_removes_from_live() { if !appeared { // Some server configs may not expose this; soft-fail with a note eprintln!( - "WARN: subscription did not appear in system.live within 5s — \ - server may not expose this table; skipping assertion" + "WARN: subscription did not appear in system.live within 5s — server may not expose \ + this table; skipping assertion" ); } diff --git a/link/kalam-client/tests/test_user_table_subscriptions.rs b/link/kalam-client/tests/test_user_table_subscriptions.rs index 9bfd66652..17ed292a6 100644 --- a/link/kalam-client/tests/test_user_table_subscriptions.rs +++ b/link/kalam-client/tests/test_user_table_subscriptions.rs @@ -15,12 +15,14 @@ //! cargo test --test test_user_table_subscriptions -- --nocapture //! ``` -use kalam_client::auth::AuthProvider; -use kalam_client::models::{BatchStatus, ResponseStatus}; -use kalam_client::subscription::SubscriptionManager; -use kalam_client::{ChangeEvent, KalamLinkClient, QueryResponse, SubscriptionConfig}; -use std::time::Duration; -use std::time::Instant; +use std::time::{Duration, Instant}; + +use kalam_client::{ + auth::AuthProvider, + models::{BatchStatus, ResponseStatus}, + subscription::SubscriptionManager, + ChangeEvent, KalamLinkClient, QueryResponse, SubscriptionConfig, +}; use tokio::time::{sleep, timeout}; mod common; @@ -448,14 +450,19 @@ async fn test_multiple_filtered_subscriptions() { let type_str = extract_string_value(type_obj); println!("📊 Extracted type value: {:?}", type_str); - // CRITICAL: Verify filtering - 'thinking' subscription should ONLY receive 'thinking' rows + // CRITICAL: Verify filtering - 'thinking' subscription should ONLY receive + // 'thinking' rows assert_eq!( - type_str.as_deref(), + type_str.as_deref(), Some("thinking"), - "FILTERING CHECK FAILED: 'thinking' subscription received row with type={:?}, expected 'thinking'", + "FILTERING CHECK FAILED: 'thinking' subscription received row with type={:?}, \ + expected 'thinking'", type_str ); - println!("✅ FILTERING WORKS: 'thinking' subscription correctly received ONLY 'thinking' type row"); + println!( + "✅ FILTERING WORKS: 'thinking' subscription correctly received ONLY \ + 'thinking' type row" + ); } else { panic!("Row doesn't have 'type' field. Full row: {:?}", row); } @@ -482,14 +489,19 @@ async fn test_multiple_filtered_subscriptions() { if let Some(type_obj) = row.get("type") { let type_str = extract_string_value(type_obj); - // CRITICAL: Verify filtering - 'typing' subscription should ONLY receive 'typing' rows + // CRITICAL: Verify filtering - 'typing' subscription should ONLY receive 'typing' + // rows assert_eq!( type_str.as_deref(), Some("typing"), - "FILTERING CHECK FAILED: 'typing' subscription received row with type={:?}, expected 'typing'", + "FILTERING CHECK FAILED: 'typing' subscription received row with type={:?}, \ + expected 'typing'", type_str ); - println!("✅ FILTERING WORKS: 'typing' subscription correctly received ONLY 'typing' type row"); + println!( + "✅ FILTERING WORKS: 'typing' subscription correctly received ONLY 'typing' \ + type row" + ); } } } @@ -530,8 +542,13 @@ async fn test_multiple_filtered_subscriptions() { rows, old_rows, } => { - println!("📥 'thinking' sub received Update: subscription_id={}, rows={}, old_rows={}", - subscription_id, rows.len(), old_rows.len()); + println!( + "📥 'thinking' sub received Update: subscription_id={}, rows={}, \ + old_rows={}", + subscription_id, + rows.len(), + old_rows.len() + ); // Verify the update came through if let Some(row) = rows.first() { @@ -589,7 +606,8 @@ async fn test_multiple_filtered_subscriptions() { Err(e) => println!("⚠️ Error during unsubscribe (may be OK): {}", e), } - // === Step 7: Insert another 'typing' row and verify 'thinking' subscription does NOT receive it === + // === Step 7: Insert another 'typing' row and verify 'thinking' subscription does NOT receive + // it === println!("\n🔄 Step 7: Verifying filtered subscriptions don't receive unmatched inserts..."); sleep(Duration::from_millis(100)).await; @@ -628,10 +646,17 @@ async fn test_multiple_filtered_subscriptions() { let type_str = extract_string_value(type_obj); if type_str.as_deref() == Some("typing") { received_wrong_type = true; - println!("❌ FILTERING FAILED: 'thinking' subscription received 'typing' row!"); + println!( + "❌ FILTERING FAILED: 'thinking' subscription received \ + 'typing' row!" + ); println!("📊 Unexpected row: {:?}", row); } else { - println!("📥 'thinking' received insert with type={:?} (unexpected but not 'typing')", type_str); + println!( + "📥 'thinking' received insert with type={:?} (unexpected but \ + not 'typing')", + type_str + ); } } } @@ -654,7 +679,10 @@ async fn test_multiple_filtered_subscriptions() { if Instant::now() >= check_deadline && !received_wrong_type { // Timeout is EXPECTED if filtering works correctly - println!("✅ FILTERING VERIFIED: 'thinking' subscription correctly did NOT receive 'typing' insert (timeout)"); + println!( + "✅ FILTERING VERIFIED: 'thinking' subscription correctly did NOT receive 'typing' \ + insert (timeout)" + ); } assert!( diff --git a/link/kalam-client/tests/test_websocket_integration.rs b/link/kalam-client/tests/test_websocket_integration.rs index c8c57b69a..37cc17b99 100644 --- a/link/kalam-client/tests/test_websocket_integration.rs +++ b/link/kalam-client/tests/test_websocket_integration.rs @@ -18,11 +18,15 @@ //! //! Tests will be skipped if the server is not running. -use kalam_client::auth::AuthProvider; -use kalam_client::models::ResponseStatus; -use kalam_client::{ChangeEvent, KalamLinkClient, QueryResponse, SubscriptionConfig}; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Duration; +use std::{ + sync::atomic::{AtomicU64, Ordering}, + time::Duration, +}; + +use kalam_client::{ + auth::AuthProvider, models::ResponseStatus, ChangeEvent, KalamLinkClient, QueryResponse, + SubscriptionConfig, +}; use tokio::time::{sleep, timeout}; mod common; @@ -410,7 +414,8 @@ async fn test_websocket_initial_data_snapshot() { if let Ok(resp) = check { if resp.status == ResponseStatus::Success { eprintln!( - "⚠️ InitialData snapshot not received within timeout; data is queryable. Skipping strict assertion." + "⚠️ InitialData snapshot not received within timeout; data is queryable. \ + Skipping strict assertion." ); cleanup_test_data(&table).await.ok(); return; @@ -473,7 +478,8 @@ async fn test_websocket_insert_notification() { // Insert new data that should trigger notification execute_sql(&format!( - "INSERT INTO {} (event_id, event_type, data) VALUES ('{}', 'realtime', 'insert_test')", + "INSERT INTO {} (event_id, event_type, data) VALUES ('{}', 'realtime', \ + 'insert_test')", table, unique_event_id() )) @@ -958,11 +964,9 @@ async fn test_error_connection_refused() { async fn test_server_running_check() { if !common::is_server_running().await { eprintln!( - "\n⚠️ Server is not running at {}\n\n\ - To run these tests:\n\ - 1. Terminal 1: cd backend && cargo run --bin kalamdb-server\n\ - 2. Terminal 2: cd cli && cargo test --test test_websocket_integration\n\n\ - Tests will be skipped if server is not running.\n", + "\n⚠️ Server is not running at {}\n\nTo run these tests:\n1. Terminal 1: cd backend \ + && cargo run --bin kalamdb-server\n2. Terminal 2: cd cli && cargo test --test \ + test_websocket_integration\n\nTests will be skipped if server is not running.\n", common::server_url() ); // Don't panic - just skip diff --git a/link/kalam-client/tests/wasm_integration.rs b/link/kalam-client/tests/wasm_integration.rs index 713105956..055e2159f 100644 --- a/link/kalam-client/tests/wasm_integration.rs +++ b/link/kalam-client/tests/wasm_integration.rs @@ -89,10 +89,7 @@ async fn test_insert_rejects_invalid_table_name() { let client = create_test_client(); let result = client - .insert( - "bad table name".to_string(), - r#"{"title":"x"}"#.to_string(), - ) + .insert("bad table name".to_string(), r#"{"title":"x"}"#.to_string()) .await; let err = js_error_text(result.expect_err("invalid table name should fail")); @@ -107,15 +104,10 @@ async fn test_insert_rejects_invalid_table_name() { async fn test_insert_rejects_invalid_json_payload() { let client = create_test_client(); - let result = client - .insert("todos".to_string(), "{not-json}".to_string()) - .await; + let result = client.insert("todos".to_string(), "{not-json}".to_string()).await; let err = js_error_text(result.expect_err("invalid JSON should fail")); - assert!( - err.contains("Invalid JSON data"), - "unexpected invalid JSON error: {err}" - ); + assert!(err.contains("Invalid JSON data"), "unexpected invalid JSON error: {err}"); } // T063W: Test insert() rejects empty JSON objects @@ -159,9 +151,7 @@ async fn test_unsubscribe_not_connected() { #[wasm_bindgen_test] async fn test_delete_rejects_invalid_row_id() { let client = create_test_client(); - let result = client - .delete("todos".to_string(), "bad' OR 1=1 --".to_string()) - .await; + let result = client.delete("todos".to_string(), "bad' OR 1=1 --".to_string()).await; let err = js_error_text(result.expect_err("invalid row id should fail")); assert!( diff --git a/link/kalam-consumer-wasm/src/client.rs b/link/kalam-consumer-wasm/src/client.rs index c62eaf1e7..58cea89e7 100644 --- a/link/kalam-consumer-wasm/src/client.rs +++ b/link/kalam-consumer-wasm/src/client.rs @@ -1,7 +1,6 @@ use base64::Engine; -use wasm_bindgen::prelude::*; - use link_common::models::{AckResponse, ConsumeMessage, ConsumeResponse, RowData, UserId}; +use wasm_bindgen::prelude::*; use crate::helpers::{ fetch_json_response, js_value_to_json_string, response_text, serialize_json_to_js_value, @@ -142,7 +141,9 @@ fn decode_consume_message( .get("key") .and_then(serde_json::Value::as_str) .map(ToOwned::to_owned) - .or_else(|| raw.get("message_id").and_then(serde_json::Value::as_str).map(ToOwned::to_owned)), + .or_else(|| { + raw.get("message_id").and_then(serde_json::Value::as_str).map(ToOwned::to_owned) + }), op: raw.get("op").and_then(serde_json::Value::as_str).map(ToOwned::to_owned), timestamp_ms: raw .get("timestamp_ms") diff --git a/link/kalam-consumer/src/lib.rs b/link/kalam-consumer/src/lib.rs index 0b9952bfb..f99e067ca 100644 --- a/link/kalam-consumer/src/lib.rs +++ b/link/kalam-consumer/src/lib.rs @@ -8,11 +8,10 @@ pub mod models { #[cfg(feature = "native-sdk")] pub use kalam_client::consumer::ConsumerBuilder; -pub use kalam_client::models::UserId; #[cfg(feature = "native-sdk")] pub use kalam_client::TopicConsumer; pub use kalam_client::{ - AckResponse, AutoOffsetReset, CommitMode, CommitResult, ConsumeMessage, ConsumeRequest, - ConsumeResponse, ConsumerConfig, ConsumerOffsets, ConsumerRecord, PayloadMode, RowData, - TopicOp, + models::UserId, AckResponse, AutoOffsetReset, CommitMode, CommitResult, ConsumeMessage, + ConsumeRequest, ConsumeResponse, ConsumerConfig, ConsumerOffsets, ConsumerRecord, PayloadMode, + RowData, TopicOp, }; diff --git a/link/kalam-link-dart/src/api.rs b/link/kalam-link-dart/src/api.rs index 4af71f3bb..4f09ac87b 100644 --- a/link/kalam-link-dart/src/api.rs +++ b/link/kalam-link-dart/src/api.rs @@ -18,19 +18,26 @@ //! `true` in [`dart_create_client`], events are queued internally and //! retrieved via [`dart_next_connection_event`]. +use std::{ + collections::VecDeque, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; + +use flutter_rust_bridge::frb; +use tokio::sync::{Mutex, Notify}; + use crate::models::{ DartAuthProvider, DartChangeEvent, DartConnectionError, DartConnectionEvent, DartDisconnectReason, DartLiveRowsConfig, DartLiveRowsEvent, DartLoginResponse, DartQueryResponse, DartSubscriptionConfig, }; -use flutter_rust_bridge::frb; -use std::collections::VecDeque; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use tokio::sync::{Mutex, Notify}; const DART_INITIAL_RECONNECT_DELAY_MS: u64 = 200; const DART_MAX_RECONNECT_DELAY_MS: u64 = 2_000; +const DART_CONNECTION_EVENT_QUEUE_CAPACITY: usize = 256; // --------------------------------------------------------------------------- // Client wrapper @@ -63,18 +70,18 @@ pub struct DartKalamClient { /// * `auth` — authentication method (basic, JWT, or none). /// * `timeout_ms` — optional HTTP request timeout in milliseconds (default 30 000). /// * `max_retries` — optional retry count for idempotent queries (default 3). -/// * `enable_connection_events` — when `true`, connection lifecycle events -/// (connect, disconnect, error, receive, send) are queued internally and -/// can be retrieved via [`dart_next_connection_event`]. -/// * `disable_compression` — when `true`, the WebSocket URL includes -/// `?compress=false` so the server sends plain-text JSON frames instead of -/// gzip-compressed binary frames. Useful during development. -/// * `keepalive_interval_ms` — optional WebSocket keep-alive ping interval -/// in milliseconds (default 10 000). Set to 0 to disable keep-alive pings. -/// * `ws_lazy_connect` — controls when the WebSocket connection is established. -/// When `true` (the default), the connection is deferred until the first -/// `subscribe()` call. When `false`, the connection is established eagerly. -/// Authentication uses the same provider configured for HTTP queries. +/// * `enable_connection_events` — when `true`, connection lifecycle events (connect, disconnect, +/// error, receive, send) are queued internally and can be retrieved via +/// [`dart_next_connection_event`]. +/// * `disable_compression` — when `true`, the WebSocket URL includes `?compress=false` so the +/// server sends plain-text JSON frames instead of gzip-compressed binary frames. Useful during +/// development. +/// * `keepalive_interval_ms` — optional WebSocket keep-alive ping interval in milliseconds (default +/// 10 000). Set to 0 to disable keep-alive pings. +/// * `ws_lazy_connect` — controls when the WebSocket connection is established. When `true` (the +/// default), the connection is deferred until the first `subscribe()` call. When `false`, the +/// connection is established eagerly. Authentication uses the same provider configured for HTTP +/// queries. /// /// **Note:** This function intentionally omits `#[frb(sync)]` so that FRB /// dispatches it to a worker thread via `executeNormal`. The client @@ -206,9 +213,17 @@ pub fn dart_update_auth(client: &DartKalamClient, auth: DartAuthProvider) -> any #[cfg(test)] mod tests { - use super::build_dart_connection_options; - use super::{DART_INITIAL_RECONNECT_DELAY_MS, DART_MAX_RECONNECT_DELAY_MS}; + use std::{collections::VecDeque, sync::Arc}; + use kalam_client::models::SerializationType; + use tokio::sync::Notify; + + use super::{ + build_dart_connection_options, push_connection_event, push_debug_connection_event, + DART_CONNECTION_EVENT_QUEUE_CAPACITY, DART_INITIAL_RECONNECT_DELAY_MS, + DART_MAX_RECONNECT_DELAY_MS, + }; + use crate::models::DartConnectionEvent; #[test] fn dart_connection_options_default_to_msgpack() { @@ -231,6 +246,44 @@ mod tests { assert_eq!(options.reconnect_delay_ms, DART_INITIAL_RECONNECT_DELAY_MS); assert_eq!(options.max_reconnect_delay_ms, DART_MAX_RECONNECT_DELAY_MS); } + + #[test] + fn dart_debug_connection_events_are_bounded() { + let queue = Arc::new(std::sync::Mutex::new(VecDeque::new())); + let notify = Arc::new(Notify::new()); + + for index in 0..(DART_CONNECTION_EVENT_QUEUE_CAPACITY + 10) { + push_debug_connection_event(&queue, ¬ify, || DartConnectionEvent::Receive { + message: index.to_string(), + }); + } + + assert_eq!(queue.lock().unwrap().len(), DART_CONNECTION_EVENT_QUEUE_CAPACITY); + } + + #[test] + fn dart_lifecycle_connection_events_keep_latest_when_full() { + let queue = Arc::new(std::sync::Mutex::new(VecDeque::new())); + let notify = Arc::new(Notify::new()); + + for _ in 0..DART_CONNECTION_EVENT_QUEUE_CAPACITY { + push_connection_event(&queue, ¬ify, DartConnectionEvent::Connect); + } + push_connection_event( + &queue, + ¬ify, + DartConnectionEvent::Error { + error: crate::models::DartConnectionError { + message: "latest".to_string(), + recoverable: true, + }, + }, + ); + + let guard = queue.lock().unwrap(); + assert_eq!(guard.len(), DART_CONNECTION_EVENT_QUEUE_CAPACITY); + assert!(matches!(guard.back(), Some(DartConnectionEvent::Error { .. }))); + } } /// Build [`EventHandlers`](kalam_client::EventHandlers) that push events into @@ -246,10 +299,7 @@ fn build_event_handlers( let q = queue.clone(); let n = notify.clone(); handlers = handlers.on_connect(move || { - if let Ok(mut guard) = q.lock() { - guard.push_back(DartConnectionEvent::Connect); - } - n.notify_one(); + push_connection_event(&q, &n, DartConnectionEvent::Connect); }); } @@ -264,10 +314,7 @@ fn build_event_handlers( code: reason.code.map(|c| c as i32), }, }; - if let Ok(mut guard) = q.lock() { - guard.push_back(event); - } - n.notify_one(); + push_connection_event(&q, &n, event); }); } @@ -282,10 +329,7 @@ fn build_event_handlers( recoverable: error.recoverable, }, }; - if let Ok(mut guard) = q.lock() { - guard.push_back(event); - } - n.notify_one(); + push_connection_event(&q, &n, event); }); } @@ -294,13 +338,9 @@ fn build_event_handlers( let q = queue.clone(); let n = notify.clone(); handlers = handlers.on_receive(move |msg: &str| { - let event = DartConnectionEvent::Receive { + push_debug_connection_event(&q, &n, || DartConnectionEvent::Receive { message: msg.to_owned(), - }; - if let Ok(mut guard) = q.lock() { - guard.push_back(event); - } - n.notify_one(); + }); }); } @@ -309,27 +349,51 @@ fn build_event_handlers( let q = queue; let n = notify; handlers = handlers.on_send(move |msg: &str| { - let event = DartConnectionEvent::Send { + push_debug_connection_event(&q, &n, || DartConnectionEvent::Send { message: msg.to_owned(), - }; - if let Ok(mut guard) = q.lock() { - guard.push_back(event); - } - n.notify_one(); + }); }); } handlers } +fn push_connection_event( + queue: &Arc>>, + notify: &Arc, + event: DartConnectionEvent, +) { + if let Ok(mut guard) = queue.lock() { + if guard.len() >= DART_CONNECTION_EVENT_QUEUE_CAPACITY { + guard.pop_front(); + } + guard.push_back(event); + notify.notify_one(); + } +} + +fn push_debug_connection_event( + queue: &Arc>>, + notify: &Arc, + event: impl FnOnce() -> DartConnectionEvent, +) { + if let Ok(mut guard) = queue.lock() { + if guard.len() >= DART_CONNECTION_EVENT_QUEUE_CAPACITY { + return; + } + guard.push_back(event()); + notify.notify_one(); + } +} + // --------------------------------------------------------------------------- // Query // --------------------------------------------------------------------------- /// Execute a SQL query, optionally with parameters and namespace. /// -/// * `params_json` — JSON-encoded array of parameter values, e.g. `'["val1", 42]'`. -/// Pass `null` for no parameters. +/// * `params_json` — JSON-encoded array of parameter values, e.g. `'["val1", 42]'`. Pass `null` for +/// no parameters. /// * `namespace` — optional namespace context for unqualified table names. pub async fn dart_execute_query( client: &DartKalamClient, @@ -500,8 +564,8 @@ pub async fn dart_is_connected(client: &DartKalamClient) -> anyhow::Result /// This sends an explicit unsubscribe command that: /// 1. Removes the subscription from the client-side map /// 2. Sends an unsubscribe message to the server -/// 3. Drops the event channel, causing any blocking -/// [`dart_subscription_next`] call to return `None` +/// 3. Drops the event channel, causing any blocking [`dart_subscription_next`] call to return +/// `None` /// /// Unlike [`dart_subscription_close`], this does **not** require the /// `DartSubscription` mutex, so it can be called safely even while diff --git a/link/kalam-link-dart/src/frb_generated.rs b/link/kalam-link-dart/src/frb_generated.rs index 79f2857d9..22bce0d5b 100644 --- a/link/kalam-link-dart/src/frb_generated.rs +++ b/link/kalam-link-dart/src/frb_generated.rs @@ -26,10 +26,15 @@ // Section: imports +use flutter_rust_bridge::{ + for_generated::{ + byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}, + transform_result_dco, Lifetimeable, Lockable, + }, + Handler, IntoIntoDart, +}; + use crate::api::*; -use flutter_rust_bridge::for_generated::byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; -use flutter_rust_bridge::for_generated::{transform_result_dco, Lifetimeable, Lockable}; -use flutter_rust_bridge::{Handler, IntoIntoDart}; // Section: boilerplate @@ -3111,13 +3116,16 @@ mod io { // Section: imports + use flutter_rust_bridge::{ + for_generated::{ + byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}, + transform_result_dco, Lifetimeable, Lockable, + }, + Handler, IntoIntoDart, + }; + use super::*; use crate::api::*; - use flutter_rust_bridge::for_generated::byteorder::{ - NativeEndian, ReadBytesExt, WriteBytesExt, - }; - use flutter_rust_bridge::for_generated::{transform_result_dco, Lifetimeable, Lockable}; - use flutter_rust_bridge::{Handler, IntoIntoDart}; // Section: boilerplate @@ -3176,15 +3184,18 @@ mod web { // Section: imports + use flutter_rust_bridge::{ + for_generated::{ + byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}, + transform_result_dco, wasm_bindgen, + wasm_bindgen::prelude::*, + Lifetimeable, Lockable, + }, + Handler, IntoIntoDart, + }; + use super::*; use crate::api::*; - use flutter_rust_bridge::for_generated::byteorder::{ - NativeEndian, ReadBytesExt, WriteBytesExt, - }; - use flutter_rust_bridge::for_generated::wasm_bindgen; - use flutter_rust_bridge::for_generated::wasm_bindgen::prelude::*; - use flutter_rust_bridge::for_generated::{transform_result_dco, Lifetimeable, Lockable}; - use flutter_rust_bridge::{Handler, IntoIntoDart}; // Section: boilerplate diff --git a/link/kalam-link-dart/src/lib.rs b/link/kalam-link-dart/src/lib.rs index 885906dde..95901a976 100644 --- a/link/kalam-link-dart/src/lib.rs +++ b/link/kalam-link-dart/src/lib.rs @@ -1,5 +1,6 @@ pub mod api; -mod frb_generated; /* AUTO INJECTED BY flutter_rust_bridge. This line may not be accurate, and you can change it according to your needs. */ +mod frb_generated; // AUTO INJECTED BY flutter_rust_bridge. This line may not be accurate, and + // you can change it according to your needs. pub mod models; mod subscription; diff --git a/link/kalam-link-dart/src/models.rs b/link/kalam-link-dart/src/models.rs index 2d13ecbef..47729d270 100644 --- a/link/kalam-link-dart/src/models.rs +++ b/link/kalam-link-dart/src/models.rs @@ -3,11 +3,13 @@ //! All types here are simple structs/enums with only primitive fields //! and `Vec`/`Option` wrappers — fully compatible with flutter_rust_bridge codegen. -use kalam_client::models::{ - BatchStatus, ChangeEvent, ErrorDetail, LoginResponse, LoginUserInfo, QueryResponse, - QueryResult, ResponseStatus, SchemaField, +use kalam_client::{ + models::{ + BatchStatus, ChangeEvent, ErrorDetail, LoginResponse, LoginUserInfo, QueryResponse, + QueryResult, ResponseStatus, SchemaField, + }, + LiveRowsConfig, LiveRowsEvent, Role, }; -use kalam_client::{LiveRowsConfig, LiveRowsEvent, Role}; // --------------------------------------------------------------------------- // Connection lifecycle events (mirrors kalam_client::event_handlers) @@ -33,7 +35,8 @@ pub struct DartConnectionError { pub recoverable: bool, } -/// A connection lifecycle event pulled via [`dart_next_connection_event`](crate::api::dart_next_connection_event). +/// A connection lifecycle event pulled via +/// [`dart_next_connection_event`](crate::api::dart_next_connection_event). /// /// Follows the same async-pull model used for subscription events. /// On the Dart side, poll in a loop (or wrap in a `Stream`): @@ -478,7 +481,6 @@ impl DartSubscriptionConfig { batch_size: self.batch_size.map(|v| v as usize), last_rows: self.last_rows.map(|v| v as u32), from: self.from.map(kalam_client::SeqId::new), - snapshot_end_seq: None, }), ws_url: None, } diff --git a/link/kalam-link-dart/src/tests.rs b/link/kalam-link-dart/src/tests.rs index 32f29f3fc..6292de08e 100644 --- a/link/kalam-link-dart/src/tests.rs +++ b/link/kalam-link-dart/src/tests.rs @@ -1,13 +1,17 @@ #[cfg(test)] mod tests { - use crate::models::*; - use kalam_client::models::{ - BatchControl, BatchStatus, ChangeEvent, ErrorDetail, LoginResponse, LoginUserInfo, - QueryResponse, QueryResult, ResponseStatus, SchemaField, - }; - use kalam_client::{FieldFlag, KalamDataType}; use std::collections::BTreeSet; + use kalam_client::{ + models::{ + BatchControl, BatchStatus, ChangeEvent, ErrorDetail, LoginResponse, LoginUserInfo, + QueryResponse, QueryResult, ResponseStatus, SchemaField, + }, + FieldFlag, KalamDataType, + }; + + use crate::models::*; + // ----------------------------------------------------------------------- // Auth provider conversion // ----------------------------------------------------------------------- @@ -230,7 +234,6 @@ mod tests { has_more, status, last_seq_id: None, - snapshot_end_seq: None, } } @@ -471,8 +474,7 @@ mod tests { #[test] fn subscription_info_from_native() { - use kalam_client::models::SubscriptionInfo; - use kalam_client::SeqId; + use kalam_client::{models::SubscriptionInfo, SeqId}; let native = SubscriptionInfo { id: "sub-42".to_string(), diff --git a/link/link-common/src/auth/provider.rs b/link/link-common/src/auth/provider.rs index 6b7ef6259..a45f9a1c1 100644 --- a/link/link-common/src/auth/provider.rs +++ b/link/link-common/src/auth/provider.rs @@ -11,15 +11,18 @@ //! - Automatic refresh-token rotation //! //! ```rust,no_run +//! use std::{future::Future, pin::Pin, sync::Arc}; +//! //! use kalam_client::{AuthProvider, DynamicAuthProvider}; -//! use std::sync::Arc; -//! use std::pin::Pin; -//! use std::future::Future; //! -//! struct MyTokenStore { /* ... */ } +//! struct MyTokenStore { +//! // ... +//! } //! //! impl DynamicAuthProvider for MyTokenStore { -//! fn get_auth(&self) -> Pin> + Send + '_>> { +//! fn get_auth( +//! &self, +//! ) -> Pin> + Send + '_>> { //! Box::pin(async { //! // fetch / refresh token here //! Ok(AuthProvider::jwt_token("fresh-token".into())) @@ -31,10 +34,9 @@ //! // .auth_provider(Arc::new(MyTokenStore { ... })) //! ``` +use std::{future::Future, pin::Pin, sync::Arc}; + use crate::error::{KalamLinkError, Result}; -use std::future::Future; -use std::pin::Pin; -use std::sync::Arc; /// Authentication credentials for KalamDB server. /// @@ -102,7 +104,9 @@ impl AuthProvider { ) -> Result { match self { Self::BasicAuth(_, _) => Err(KalamLinkError::AuthenticationError( - "User/password credentials can only be used with /v1/api/auth/login; exchange them for a JWT before sending authenticated requests.".to_string(), + "User/password credentials can only be used with /v1/api/auth/login; exchange \ + them for a JWT before sending authenticated requests." + .to_string(), )), Self::JwtToken(token) => Ok(request.bearer_auth(token)), Self::None => Ok(request), diff --git a/link/link-common/src/client.rs b/link/link-common/src/client.rs index 1e8a8b98a..835d067ee 100644 --- a/link/link-common/src/client.rs +++ b/link/link-common/src/client.rs @@ -3,6 +3,15 @@ //! Provides the primary interface for connecting to KalamDB servers //! and executing operations. +#[cfg(feature = "healthcheck")] +use std::time::Instant; +use std::{ + sync::{Arc, RwLock}, + time::Duration, +}; + +use tokio::sync::Mutex; + #[cfg(feature = "healthcheck")] use crate::models::HealthCheckResponse; use crate::{ @@ -13,13 +22,6 @@ use crate::{ query::{AuthRefreshCallback, QueryExecutor}, timeouts::KalamLinkTimeouts, }; -#[cfg(feature = "healthcheck")] -use std::time::Instant; -use std::{ - sync::{Arc, RwLock}, - time::Duration, -}; -use tokio::sync::Mutex; mod builder; mod endpoints; diff --git a/link/link-common/src/client/builder.rs b/link/link-common/src/client/builder.rs index b233b047e..6274540c8 100644 --- a/link/link-common/src/client/builder.rs +++ b/link/link-common/src/client/builder.rs @@ -1,3 +1,11 @@ +use std::{ + sync::{Arc, RwLock}, + time::Duration, +}; + +#[cfg(feature = "healthcheck")] +use tokio::sync::Mutex; + #[cfg(feature = "healthcheck")] use super::HealthCheckCache; use super::{KalamLinkClient, KalamLinkClientBuilder}; @@ -8,12 +16,6 @@ use crate::{ query::AuthRefreshCallback, timeouts::KalamLinkTimeouts, }; -use std::{ - sync::{Arc, RwLock}, - time::Duration, -}; -#[cfg(feature = "healthcheck")] -use tokio::sync::Mutex; impl KalamLinkClientBuilder { pub(crate) fn new() -> Self { @@ -132,7 +134,10 @@ impl KalamLinkClientBuilder { }, #[cfg(not(feature = "http2"))] HttpVersion::Http2 => { - log::warn!("[CLIENT] HTTP/2 requested but 'http2' feature is not enabled; falling back to HTTP/1.1"); + log::warn!( + "[CLIENT] HTTP/2 requested but 'http2' feature is not enabled; falling back \ + to HTTP/1.1" + ); client_builder.http1_only() }, HttpVersion::Auto => { diff --git a/link/link-common/src/client/endpoints.rs b/link/link-common/src/client/endpoints.rs index e24890042..0dde0f935 100644 --- a/link/link-common/src/client/endpoints.rs +++ b/link/link-common/src/client/endpoints.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "healthcheck")] +use std::time::Instant; + use super::KalamLinkClient; #[cfg(feature = "healthcheck")] use super::HEALTH_CHECK_TTL; @@ -6,8 +9,6 @@ use crate::error::{KalamLinkError, Result}; use crate::models::ClusterHealthResponse; #[cfg(feature = "healthcheck")] use crate::models::HealthCheckResponse; -#[cfg(feature = "healthcheck")] -use std::time::Instant; impl KalamLinkClient { /// Check server health and get server information diff --git a/link/link-common/src/client/runtime.rs b/link/link-common/src/client/runtime.rs index d8f3df158..1a029dd2d 100644 --- a/link/link-common/src/client/runtime.rs +++ b/link/link-common/src/client/runtime.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use super::{KalamLinkClient, KalamLinkClientBuilder}; #[cfg(feature = "consumer")] use crate::consumer::ConsumerBuilder; @@ -10,7 +12,6 @@ use crate::{ subscription::{LiveRowsConfig, LiveRowsSubscription, SubscriptionManager}, timeouts::KalamLinkTimeouts, }; -use std::sync::Arc; impl KalamLinkClient { /// Create a new builder for configuring the client @@ -76,13 +77,7 @@ impl KalamLinkClient { }); self.query_executor - .execute_with_progress( - sql, - files_owned, - params, - namespace_id.map(|s| s.to_string()), - progress, - ) + .execute_with_progress_ref(sql, files_owned, params, namespace_id, progress) .await } @@ -154,9 +149,8 @@ impl KalamLinkClient { if let Some(ref conn) = *conn_guard { let (event_rx, result_rx) = conn.subscribe_send(config.id.clone(), config.sql, config.options).await?; - let unsub_tx = conn.unsubscribe_tx(); - let progress_tx = conn.progress_tx(); - Some((event_rx, result_rx, unsub_tx, progress_tx)) + let shared_control = conn.subscription_control(); + Some((event_rx, result_rx, shared_control)) } else { None } @@ -164,7 +158,7 @@ impl KalamLinkClient { // Lock released here ↑ // Phase 2: Wait for the server Ready ack without the lock held. - if let Some((event_rx, result_rx, unsub_tx, progress_tx)) = pending { + if let Some((event_rx, result_rx, shared_control)) = pending { let (generation, resume_from) = result_rx.await.map_err(|_| { KalamLinkError::WebSocketError( "Connection task died before confirming subscribe".to_string(), @@ -174,8 +168,7 @@ impl KalamLinkClient { return Ok(SubscriptionManager::from_shared( config.id, event_rx, - unsub_tx, - progress_tx, + shared_control, generation, resume_from, &self.timeouts, diff --git a/link/link-common/src/client/tests.rs b/link/link-common/src/client/tests.rs index c5be4063c..8d54258ed 100644 --- a/link/link-common/src/client/tests.rs +++ b/link/link-common/src/client/tests.rs @@ -1,11 +1,14 @@ +use std::{collections::HashMap, sync::Arc}; + +use serde_json::json; +use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + net::{TcpListener, TcpStream}, + sync::Mutex, +}; + use super::*; use crate::error::KalamLinkError; -use serde_json::json; -use std::collections::HashMap; -use std::sync::Arc; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::net::{TcpListener, TcpStream}; -use tokio::sync::Mutex; #[test] fn test_builder_pattern() { @@ -139,7 +142,8 @@ async fn handle_test_request( }; let response = format!( - "{status_line}\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: close\r\n\r\n{}", + "{status_line}\r\ncontent-type: application/json\r\ncontent-length: {}\r\nconnection: \ + close\r\n\r\n{}", body.len(), body ); diff --git a/link/link-common/src/compression.rs b/link/link-common/src/compression.rs index 754dbbc51..e605dd39b 100644 --- a/link/link-common/src/compression.rs +++ b/link/link-common/src/compression.rs @@ -200,7 +200,7 @@ mod tests { err, DecompressError::OutputTooLarge { advertised: 1024, - limit: 16 + limit: 16, } )); } diff --git a/link/link-common/src/connection/mod.rs b/link/link-common/src/connection/mod.rs index 94eb29fb5..9917111cc 100644 --- a/link/link-common/src/connection/mod.rs +++ b/link/link-common/src/connection/mod.rs @@ -2,8 +2,8 @@ //! //! This module contains: //! - [`models`]: Connection-level data models (always available) -//! - [`websocket`]: Low-level WebSocket helpers (URL resolution, auth headers, -//! message parsing, keepalive jitter, local bind addresses, decompression) +//! - [`websocket`]: Low-level WebSocket helpers (URL resolution, auth headers, message parsing, +//! keepalive jitter, local bind addresses, decompression) //! - [`shared`]: Shared multiplexed WebSocket connection with auto-reconnect pub mod models; @@ -15,7 +15,7 @@ pub mod websocket; // Re-export the shared connection type for crate-internal use. #[cfg(feature = "tokio-runtime")] -pub(crate) use shared::SharedConnection; +pub(crate) use shared::{SharedConnection, SharedSubscriptionControl}; #[cfg(feature = "tokio-runtime")] pub(crate) use websocket::{ apply_ws_auth_headers, authenticate_ws, connect_with_optional_local_bind, decode_ws_payload, @@ -25,7 +25,7 @@ pub(crate) use websocket::{ #[cfg(feature = "tokio-runtime")] /// Default capacity for subscription event channels. -pub(crate) const DEFAULT_EVENT_CHANNEL_CAPACITY: usize = 8192; +pub(crate) const DEFAULT_EVENT_CHANNEL_CAPACITY: usize = 1024; #[cfg(feature = "tokio-runtime")] /// Maximum text message size (64 MiB). diff --git a/link/link-common/src/connection/models/client_message.rs b/link/link-common/src/connection/models/client_message.rs index 6a743d7ad..6a8c1b414 100644 --- a/link/link-common/src/connection/models/client_message.rs +++ b/link/link-common/src/connection/models/client_message.rs @@ -1,10 +1,9 @@ use serde::{Deserialize, Serialize}; -use crate::auth::models::WsAuthCredentials; -use crate::seq_id::SeqId; -use crate::subscription::models::SubscriptionRequest; - use super::ProtocolOptions; +use crate::{ + auth::models::WsAuthCredentials, seq_id::SeqId, subscription::models::SubscriptionRequest, +}; /// Client-to-server request messages #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/link/link-common/src/connection/models/connection_options.rs b/link/link-common/src/connection/models/connection_options.rs index 95a3a2a82..21790958c 100644 --- a/link/link-common/src/connection/models/connection_options.rs +++ b/link/link-common/src/connection/models/connection_options.rs @@ -14,8 +14,7 @@ use crate::timestamp::{TimestampFormat, TimestampFormatter}; /// ```rust /// use kalam_client::{ConnectionOptions, HttpVersion}; /// -/// let options = ConnectionOptions::new() -/// .with_http_version(HttpVersion::Http2); +/// let options = ConnectionOptions::new().with_http_version(HttpVersion::Http2); /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(rename_all = "lowercase")] diff --git a/link/link-common/src/connection/models/server_message.rs b/link/link-common/src/connection/models/server_message.rs index f803f24cc..5b5a752a1 100644 --- a/link/link-common/src/connection/models/server_message.rs +++ b/link/link-common/src/connection/models/server_message.rs @@ -1,13 +1,13 @@ -use kalamdb_commons::{Role, UserId}; -use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use crate::models::KalamCellValue; -use crate::models::SchemaField; -use crate::subscription::models::BatchControl; -use crate::subscription::models::ChangeTypeRaw; +use kalamdb_commons::{Role, UserId}; +use serde::{Deserialize, Serialize}; use super::ProtocolOptions; +use crate::{ + models::{KalamCellValue, SchemaField}, + subscription::models::{BatchControl, ChangeTypeRaw}, +}; /// WebSocket message types sent from server to client #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/link/link-common/src/connection/shared.rs b/link/link-common/src/connection/shared.rs index c0905d650..9e276e099 100644 --- a/link/link-common/src/connection/shared.rs +++ b/link/link-common/src/connection/shared.rs @@ -4,6 +4,19 @@ //! subscriptions. Handles a shared connection handle, subscription registry, //! event routing, and reconnect behavior. +use std::{ + sync::{ + atomic::{AtomicBool, AtomicU32, Ordering}, + Arc, RwLock, + }, + time::Duration, +}; + +use tokio::{ + sync::{mpsc, oneshot}, + task::JoinHandle, +}; + use crate::{ auth::ResolvedAuth, error::{KalamLinkError, Result}, @@ -12,15 +25,6 @@ use crate::{ seq_id::SeqId, timeouts::KalamLinkTimeouts, }; -use std::{ - sync::{ - atomic::{AtomicBool, AtomicU32, Ordering}, - Arc, RwLock, - }, - time::Duration, -}; -use tokio::sync::{mpsc, oneshot}; -use tokio::task::JoinHandle; mod reconnect; mod registry; @@ -31,13 +35,57 @@ use registry::ConnCmd; pub(crate) struct SharedConnection { cmd_tx: mpsc::Sender, - unsub_tx: mpsc::Sender<(String, u64)>, - progress_tx: mpsc::Sender<(String, u64, SeqId, bool)>, connected: Arc, _reconnect_attempts: Arc, _task: JoinHandle<()>, - _unsub_bridge: JoinHandle<()>, - _progress_bridge: JoinHandle<()>, +} + +#[derive(Clone)] +pub(crate) struct SharedSubscriptionControl { + cmd_tx: mpsc::Sender, +} + +impl SharedSubscriptionControl { + pub(crate) async fn unsubscribe(&self, id: String, generation: u64) { + let _ = self + .cmd_tx + .send(ConnCmd::Unsubscribe { + id, + generation: Some(generation), + }) + .await; + } + + pub(crate) fn try_unsubscribe(&self, id: String, generation: u64) { + let _ = self.cmd_tx.try_send(ConnCmd::Unsubscribe { + id, + generation: Some(generation), + }); + } + + pub(crate) async fn progress( + &self, + id: String, + generation: u64, + seq_id: SeqId, + advance_resume: bool, + ) { + let _ = self + .cmd_tx + .send(ConnCmd::Progress { + id, + generation, + seq_id, + advance_resume, + }) + .await; + } + + #[cfg(test)] + pub(crate) fn test_control() -> Self { + let (cmd_tx, _cmd_rx) = mpsc::channel::(1); + Self { cmd_tx } + } } impl SharedConnection { @@ -81,43 +129,11 @@ impl SharedConnection { }, } - let (unsub_tx, mut unsub_rx) = mpsc::channel::<(String, u64)>(256); - let cmd_tx_bridge = cmd_tx.clone(); - let unsub_bridge = tokio::spawn(async move { - while let Some((id, generation)) = unsub_rx.recv().await { - let _ = cmd_tx_bridge - .send(ConnCmd::Unsubscribe { - id, - generation: Some(generation), - }) - .await; - } - }); - - let (progress_tx, mut progress_rx) = mpsc::channel::<(String, u64, SeqId, bool)>(256); - let cmd_tx_progress = cmd_tx.clone(); - let progress_bridge = tokio::spawn(async move { - while let Some((id, generation, seq_id, advance_resume)) = progress_rx.recv().await { - let _ = cmd_tx_progress - .send(ConnCmd::Progress { - id, - generation, - seq_id, - advance_resume, - }) - .await; - } - }); - Ok(Self { cmd_tx, - unsub_tx, - progress_tx, connected, _reconnect_attempts: reconnect_attempts, _task: task, - _unsub_bridge: unsub_bridge, - _progress_bridge: progress_bridge, }) } @@ -194,12 +210,10 @@ impl SharedConnection { self.connected.load(Ordering::Relaxed) } - pub(crate) fn unsubscribe_tx(&self) -> mpsc::Sender<(String, u64)> { - self.unsub_tx.clone() - } - - pub(crate) fn progress_tx(&self) -> mpsc::Sender<(String, u64, SeqId, bool)> { - self.progress_tx.clone() + pub(crate) fn subscription_control(&self) -> SharedSubscriptionControl { + SharedSubscriptionControl { + cmd_tx: self.cmd_tx.clone(), + } } } @@ -211,13 +225,18 @@ impl Drop for SharedConnection { #[cfg(test)] mod tests { - use super::registry::{ - clear_startup_deadline, reset_startup_deadline, resume_startup_deadline, - startup_deadline, SubEntry, + use tokio::{ + sync::{mpsc, oneshot}, + time::Instant as TokioInstant, + }; + + use super::{ + registry::{ + clear_startup_deadline, reset_startup_deadline, resume_startup_deadline, + startup_deadline, SubEntry, + }, + *, }; - use super::*; - use tokio::sync::{mpsc, oneshot}; - use tokio::time::Instant as TokioInstant; #[test] fn startup_deadline_disabled_when_initial_timeout_is_zero() { @@ -238,7 +257,6 @@ mod tests { last_seq_id: None, consumed_seq_id: None, batch_seq_id: None, - snapshot_end_seq: None, is_loading: true, generation: 1, created_at_ms: 0, diff --git a/link/link-common/src/connection/shared/reconnect.rs b/link/link-common/src/connection/shared/reconnect.rs index 334bfefae..2d9d18317 100644 --- a/link/link-common/src/connection/shared/reconnect.rs +++ b/link/link-common/src/connection/shared/reconnect.rs @@ -1,10 +1,30 @@ -use super::registry::{ - advance_entry_progress, cache_entry_seq, clear_startup_deadline, effective_entry_seq, - merge_resume_from, next_startup_deadline, register_subscription_entry, - remove_subscription_entry, reset_startup_deadline, should_send_subscription_options, - snapshot_subscriptions, ConnCmd, SubEntry, +use std::{ + borrow::Cow, + collections::HashMap, + sync::{ + atomic::{AtomicBool, AtomicU32, Ordering}, + Arc, RwLock, + }, + time::Duration, +}; + +use bytes::Bytes; +use futures_util::{SinkExt, StreamExt}; +use tokio::{ + sync::{mpsc, oneshot}, + time::Instant as TokioInstant, +}; +use tokio_tungstenite::tungstenite::{client::IntoClientRequest, protocol::Message}; + +use super::{ + registry::{ + advance_entry_progress, cache_entry_seq, clear_startup_deadline, effective_entry_seq, + merge_resume_from, next_startup_deadline, register_subscription_entry, + remove_subscription_entry, reset_startup_deadline, should_send_subscription_options, + snapshot_subscriptions, ConnCmd, SubEntry, + }, + routing::{route_event, send_subscribe, send_unsubscribe}, }; -use super::routing::{route_event, send_subscribe, send_unsubscribe}; use crate::{ auth::{AuthProvider, ResolvedAuth}, connection::{ @@ -17,19 +37,6 @@ use crate::{ models::{CompressionType, ConnectionOptions, SerializationType}, timeouts::KalamLinkTimeouts, }; -use bytes::Bytes; -use futures_util::{SinkExt, StreamExt}; -use std::{ - collections::HashMap, - sync::{ - atomic::{AtomicBool, AtomicU32, Ordering}, - Arc, RwLock, - }, - time::Duration, -}; -use tokio::sync::{mpsc, oneshot}; -use tokio::time::Instant as TokioInstant; -use tokio_tungstenite::tungstenite::{client::IntoClientRequest, protocol::Message}; fn all_resumes_ready(subs: &HashMap) -> bool { subs.values().all(|entry| !entry.reconnect_resubscribe_pending) @@ -173,7 +180,6 @@ async fn resubscribe_all( ); for (id, entry) in subs.iter_mut() { let mut options = entry.options.clone(); - let was_loading = entry.is_loading; entry.batch_seq_id = None; entry.is_loading = true; reset_startup_deadline(entry, timeouts, true); @@ -181,17 +187,11 @@ async fn resubscribe_all( options.from = Some(seq_id); entry.options.from = Some(seq_id); } - options.snapshot_end_seq = if was_loading { - entry.snapshot_end_seq - } else { - None - }; log::info!( - "[kalam-sdk] Re-subscribing '{}' with from={:?}, snapshot_end={:?}", + "[kalam-sdk] Re-subscribing '{}' with from={:?}", id, entry.options.from.map(|seq| seq.to_string()), - options.snapshot_end_seq.map(|seq| seq.to_string()) ); let send_options = should_send_subscription_options(entry.request_initial_data, &options) @@ -206,6 +206,23 @@ async fn resubscribe_all( } } +#[allow(clippy::too_many_arguments)] +async fn route_event_and_refresh_connection( + event: crate::models::ChangeEvent, + ws: &mut WebSocketStream, + subs: &mut HashMap, + seq_id_cache: &mut HashMap, + timeouts: &KalamLinkTimeouts, + serialization: SerializationType, + connected: &Arc, + event_handlers: &EventHandlers, +) { + route_event(event, ws, subs, seq_id_cache, timeouts, serialization).await; + if all_resumes_ready(subs) { + mark_connected(connected, event_handlers); + } +} + async fn handle_startup_timeouts( subs: &mut HashMap, seq_id_cache: &mut HashMap, @@ -237,7 +254,7 @@ async fn handle_startup_timeouts( ); log::warn!("[kalam-sdk] {}", message); clear_startup_deadline(&mut entry); - cache_entry_seq(seq_id_cache, id, &entry); + cache_entry_seq(seq_id_cache, id.as_str(), &entry); if let Some(result_tx) = entry.pending_result_tx.take() { let _ = result_tx.send(Err(KalamLinkError::TimeoutError(message))); } @@ -387,7 +404,12 @@ pub(super) async fn connection_task( log::debug!("[kalam-sdk] Replacing existing subscription '{}'", id); let _ = send_unsubscribe(ws, &id, negotiated_ser).await; if let Some(mut old_entry) = - remove_subscription_entry(&mut subs, &mut seq_id_cache, &id, None) + remove_subscription_entry( + &mut subs, + &mut seq_id_cache, + &id, + None, + ) { if let Some(old_tx) = old_entry.pending_result_tx.take() { let _ = old_tx.send(Err(KalamLinkError::Cancelled)); @@ -422,7 +444,12 @@ pub(super) async fn connection_task( }, Some(ConnCmd::Unsubscribe { id, generation }) => { if let Some(mut entry) = - remove_subscription_entry(&mut subs, &mut seq_id_cache, &id, generation) + remove_subscription_entry( + &mut subs, + &mut seq_id_cache, + &id, + generation, + ) { if let Some(result_tx) = entry.pending_result_tx.take() { let _ = result_tx.send(Err(KalamLinkError::Cancelled)); @@ -436,9 +463,19 @@ pub(super) async fn connection_task( ); } }, - Some(ConnCmd::Progress { id, generation, seq_id, advance_resume }) => { + Some(ConnCmd::Progress { + id, + generation, + seq_id, + advance_resume, + }) => { if let Some(entry) = subs.get_mut(&id) { - advance_entry_progress(entry, generation, seq_id, advance_resume); + advance_entry_progress( + entry, + generation, + seq_id, + advance_resume, + ); } }, Some(ConnCmd::ListSubscriptions { result_tx }) => { @@ -487,18 +524,17 @@ pub(super) async fn connection_task( event_handlers.emit_receive(&text); match parse_message(&text) { Ok(Some(event)) => { - route_event( + route_event_and_refresh_connection( event, ws, &mut subs, &mut seq_id_cache, &timeouts, negotiated_ser, + &connected, + &event_handlers, ) .await; - if all_resumes_ready(&subs) { - mark_connected(&connected, &event_handlers); - } }, Ok(None) => {}, Err(error) => log::warn!("Failed to parse WS message: {}", error), @@ -512,29 +548,28 @@ pub(super) async fn connection_task( &data, crate::connection::MAX_WS_DECOMPRESSED_MESSAGE_BYTES, ) { - Ok(decoded) => decoded, + Ok(decoded) => Cow::Owned(decoded), Err(error) => { log::warn!("Failed to decompress msgpack: {}", error); continue; }, } } else { - data.to_vec() + Cow::Borrowed(data.as_ref()) }; - match parse_message_msgpack(&raw) { + match parse_message_msgpack(raw.as_ref()) { Ok(Some(event)) => { - route_event( + route_event_and_refresh_connection( event, ws, &mut subs, &mut seq_id_cache, &timeouts, negotiated_ser, + &connected, + &event_handlers, ) .await; - if all_resumes_ready(&subs) { - mark_connected(&connected, &event_handlers); - } }, Ok(None) => {}, Err(error) => log::warn!("Failed to parse msgpack message: {}", error), @@ -546,13 +581,15 @@ pub(super) async fn connection_task( event_handlers.emit_receive(&text); match parse_message(&text) { Ok(Some(event)) => { - route_event( + route_event_and_refresh_connection( event, ws, &mut subs, &mut seq_id_cache, &timeouts, negotiated_ser, + &connected, + &event_handlers, ) .await; }, @@ -657,7 +694,7 @@ pub(super) async fn connection_task( )); let error_message = "Max reconnection attempts reached".to_string(); for (id, mut entry) in subs.drain() { - cache_entry_seq(&mut seq_id_cache, id, &entry); + cache_entry_seq(&mut seq_id_cache, id.as_str(), &entry); if let Some(result_tx) = entry.pending_result_tx.take() { let _ = result_tx .send(Err(KalamLinkError::WebSocketError(error_message.clone()))); @@ -727,7 +764,12 @@ pub(super) async fn connection_task( Some(ConnCmd::Subscribe { id, sql, options, request_initial_data, event_tx, result_tx }) => { if subs.contains_key(&id) { if let Some(mut old_entry) = - remove_subscription_entry(&mut subs, &mut seq_id_cache, &id, None) + remove_subscription_entry( + &mut subs, + &mut seq_id_cache, + &id, + None, + ) { if let Some(old_tx) = old_entry.pending_result_tx.take() { let _ = old_tx.send(Err(KalamLinkError::Cancelled)); @@ -749,16 +791,31 @@ pub(super) async fn connection_task( }, Some(ConnCmd::Unsubscribe { id, generation }) => { if let Some(mut entry) = - remove_subscription_entry(&mut subs, &mut seq_id_cache, &id, generation) + remove_subscription_entry( + &mut subs, + &mut seq_id_cache, + &id, + generation, + ) { if let Some(result_tx) = entry.pending_result_tx.take() { let _ = result_tx.send(Err(KalamLinkError::Cancelled)); } } }, - Some(ConnCmd::Progress { id, generation, seq_id, advance_resume }) => { + Some(ConnCmd::Progress { + id, + generation, + seq_id, + advance_resume, + }) => { if let Some(entry) = subs.get_mut(&id) { - advance_entry_progress(entry, generation, seq_id, advance_resume); + advance_entry_progress( + entry, + generation, + seq_id, + advance_resume, + ); } }, Some(ConnCmd::ListSubscriptions { result_tx }) => { diff --git a/link/link-common/src/connection/shared/registry.rs b/link/link-common/src/connection/shared/registry.rs index 75fdf4d42..f0134769b 100644 --- a/link/link-common/src/connection/shared/registry.rs +++ b/link/link-common/src/connection/shared/registry.rs @@ -1,3 +1,13 @@ +use std::{ + collections::HashMap, + time::{SystemTime, UNIX_EPOCH}, +}; + +use tokio::{ + sync::{mpsc, oneshot}, + time::Instant as TokioInstant, +}; + use crate::{ connection::FAR_FUTURE, error::Result, @@ -7,12 +17,6 @@ use crate::{ subscription::final_resume_seq, timeouts::KalamLinkTimeouts, }; -use std::{ - collections::HashMap, - time::{SystemTime, UNIX_EPOCH}, -}; -use tokio::sync::{mpsc, oneshot}; -use tokio::time::Instant as TokioInstant; #[inline] pub(super) fn now_ms() -> u64 { @@ -85,7 +89,6 @@ pub(super) fn should_send_subscription_options( || options.batch_size.is_some() || options.last_rows.is_some() || options.from.is_some() - || options.snapshot_end_seq.is_some() } #[allow(clippy::too_many_arguments)] @@ -115,7 +118,6 @@ pub(super) fn register_subscription_entry( last_seq_id: effective_from, consumed_seq_id: effective_from, batch_seq_id: None, - snapshot_end_seq: None, is_loading: true, generation, created_at_ms: now_ms(), @@ -145,8 +147,9 @@ pub(super) fn remove_subscription_entry( return None; } - subs.remove(id) - .inspect(|entry| cache_entry_seq(seq_id_cache, id.to_string(), entry)) + subs.remove(id).inspect(|entry| { + cache_entry_seq(seq_id_cache, id.to_string(), entry); + }) } pub(super) fn advance_entry_progress( @@ -219,14 +222,31 @@ pub(super) fn next_startup_deadline(subs: &HashMap) -> TokioIn .unwrap_or_else(|| TokioInstant::now() + FAR_FUTURE) } +pub(super) enum SubscriptionKeyMatch { + Direct, + Fallback(String), +} + +impl SubscriptionKeyMatch { + #[inline] + pub(super) fn as_str<'a>(&'a self, incoming_sub_id: &'a str) -> &'a str { + match self { + Self::Direct => incoming_sub_id, + Self::Fallback(key) => key.as_str(), + } + } +} + pub(super) fn resolve_subscription_key( sub_id: &str, subs: &HashMap, -) -> Option { +) -> Option { if subs.contains_key(sub_id) { - Some(sub_id.to_string()) + Some(SubscriptionKeyMatch::Direct) } else { - subs.keys().find(|client_id| sub_id.ends_with(client_id.as_str())).cloned() + subs.keys() + .find(|client_id| sub_id.ends_with(client_id.as_str())) + .map(|client_id| SubscriptionKeyMatch::Fallback(client_id.clone())) } } @@ -263,7 +283,6 @@ pub(super) struct SubEntry { pub(super) last_seq_id: Option, pub(super) consumed_seq_id: Option, pub(super) batch_seq_id: Option, - pub(super) snapshot_end_seq: Option, pub(super) is_loading: bool, pub(super) generation: u64, pub(super) created_at_ms: u64, diff --git a/link/link-common/src/connection/shared/routing.rs b/link/link-common/src/connection/shared/routing.rs index 9b83c2f71..f949cd733 100644 --- a/link/link-common/src/connection/shared/routing.rs +++ b/link/link-common/src/connection/shared/routing.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use super::registry::{ cache_entry_seq, clear_startup_deadline, effective_entry_seq, now_ms, refresh_startup_deadline, resolve_subscription_key, SubEntry, @@ -12,7 +14,6 @@ use crate::{ subscription::{batch_envelope, filter_replayed_event, subscription_start_ready}, timeouts::KalamLinkTimeouts, }; -use std::collections::HashMap; pub(super) async fn send_subscribe( ws: &mut WebSocketStream, @@ -57,25 +58,23 @@ pub(super) async fn route_event( let matched_key = resolve_subscription_key(&incoming_sub_id, subs); let resume_from = matched_key .as_ref() - .and_then(|key| subs.get(key.as_str())) + .and_then(|key| subs.get(key.as_str(&incoming_sub_id))) .and_then(effective_entry_seq); let Some(event) = filter_replayed_event(event, resume_from) else { return; }; + let event_time_ms = now_ms(); let auto_request_next_batch = matches!(event, ChangeEvent::InitialDataBatch { .. }); if let Some(batch) = batch_envelope(&event) { if let Some(key) = matched_key.as_ref() { - if let Some(entry) = subs.get_mut(key) { + if let Some(entry) = subs.get_mut(key.as_str(&incoming_sub_id)) { if let Some(seq_id) = batch.last_seq_id { entry.batch_seq_id = Some(seq_id); } - if let Some(snapshot_end_seq) = batch.snapshot_end_seq { - entry.snapshot_end_seq = Some(snapshot_end_seq); - } entry.is_loading = batch.status != crate::models::BatchStatus::Ready; - entry.last_event_time_ms = Some(now_ms()); + entry.last_event_time_ms = Some(event_time_ms); if entry.is_loading { refresh_startup_deadline(entry, timeouts); } @@ -84,7 +83,7 @@ pub(super) async fn route_event( if auto_request_next_batch && batch.has_more { let last_seq = matched_key .as_ref() - .and_then(|key| subs.get(key)) + .and_then(|key| subs.get(key.as_str(&incoming_sub_id))) .and_then(|entry| entry.batch_seq_id.or(entry.last_seq_id)); if let Err(error) = send_next_batch_request_with_format(ws, &incoming_sub_id, last_seq, serialization) @@ -95,26 +94,16 @@ pub(super) async fn route_event( } } - match &event { - ChangeEvent::Insert { .. } | ChangeEvent::Update { .. } | ChangeEvent::Delete { .. } => { - if let Some(key) = matched_key.as_ref() { - if let Some(entry) = subs.get_mut(key) { - entry.last_event_time_ms = Some(now_ms()); - } - } - }, - ChangeEvent::InitialDataBatch { .. } => {}, - _ => {}, - } - if let Some(key) = matched_key { let mut remove_after_send = false; + let key_str = key.as_str(&incoming_sub_id); - if let Some(entry) = subs.get_mut(&key) { - entry.last_event_time_ms = Some(now_ms()); + if let Some(entry) = subs.get_mut(key_str) { + entry.last_event_time_ms = Some(event_time_ms); + let is_start_ready = subscription_start_ready(&event); match &event { - _ if subscription_start_ready(&event) => { + _ if is_start_ready => { clear_startup_deadline(entry); if let Some(result_tx) = entry.pending_result_tx.take() { let _ = result_tx.send(Ok((entry.generation, entry.options.from))); @@ -133,7 +122,7 @@ pub(super) async fn route_event( _ => {}, } - if !subscription_start_ready(&event) { + if !is_start_ready { if entry.is_loading { refresh_startup_deadline(entry, timeouts); } else if entry.reconnect_resubscribe_pending { @@ -147,8 +136,8 @@ pub(super) async fn route_event( } if remove_after_send { - if let Some(entry) = subs.remove(&key) { - cache_entry_seq(seq_id_cache, key, &entry); + if let Some(entry) = subs.remove(key_str) { + cache_entry_seq(seq_id_cache, key_str, &entry); } } } else { diff --git a/link/link-common/src/connection/websocket.rs b/link/link-common/src/connection/websocket.rs index 97f556a14..2f0b5fa4c 100644 --- a/link/link-common/src/connection/websocket.rs +++ b/link/link-common/src/connection/websocket.rs @@ -4,25 +4,24 @@ //! with optional local bind addresses, message parsing, keepalive jitter, //! decompression, and protocol message helpers. -use crate::{ - auth::AuthProvider, - error::{KalamLinkError, Result}, - models::{ChangeEvent, ClientMessage, ServerMessage, WsAuthCredentials}, +use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + io::{Error as IoError, ErrorKind}, + net::{IpAddr, SocketAddr}, + time::Duration, }; + use futures_util::{SinkExt, StreamExt}; use reqwest::Url; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; -use std::io::{Error as IoError, ErrorKind}; -use std::net::{IpAddr, SocketAddr}; -use std::time::Duration; -use tokio::net::{lookup_host, TcpSocket, TcpStream}; -use tokio::time::Instant as TokioInstant; +use tokio::{ + net::{lookup_host, TcpSocket, TcpStream}, + time::Instant as TokioInstant, +}; use tokio_tungstenite::{ client_async_tls_with_config, connect_async, tungstenite::{ - error::Error as WsError, - error::UrlError, + error::{Error as WsError, UrlError}, handshake::client::Response as WsResponse, http::header::{HeaderValue, AUTHORIZATION}, protocol::Message, @@ -30,6 +29,11 @@ use tokio_tungstenite::{ }; use super::{MAX_WS_BINARY_MESSAGE_BYTES, MAX_WS_DECOMPRESSED_MESSAGE_BYTES}; +use crate::{ + auth::AuthProvider, + error::{KalamLinkError, Result}, + models::{ChangeEvent, ClientMessage, ServerMessage, WsAuthCredentials}, +}; /// The concrete WebSocket stream type used throughout the shared SDK transport layer. pub(crate) type WebSocketStream = @@ -280,7 +284,9 @@ pub(crate) fn apply_ws_auth_headers( ) -> Result<()> { match auth { AuthProvider::BasicAuth(_, _) => Err(KalamLinkError::AuthenticationError( - "WebSocket authentication requires a JWT token. Use AuthProvider::jwt_token or login first.".to_string(), + "WebSocket authentication requires a JWT token. Use AuthProvider::jwt_token or login \ + first." + .to_string(), )), AuthProvider::JwtToken(token) => { let value = format!("Bearer {}", token); @@ -333,7 +339,9 @@ async fn send_authenticate_message( let credentials = match auth { AuthProvider::BasicAuth(_, _) => { return Err(KalamLinkError::AuthenticationError( - "WebSocket authentication requires a JWT token. Use AuthProvider::jwt_token or login first.".to_string(), + "WebSocket authentication requires a JWT token. Use AuthProvider::jwt_token or \ + login first." + .to_string(), )); }, AuthProvider::JwtToken(token) => WsAuthCredentials::Jwt { @@ -557,11 +565,11 @@ pub(crate) async fn send_client_message( #[cfg(test)] mod tests { - use super::*; - use crate::auth::AuthProvider; - use crate::error::KalamLinkError; use tokio_tungstenite::tungstenite::{client::IntoClientRequest, http::header::AUTHORIZATION}; + use super::*; + use crate::{auth::AuthProvider, error::KalamLinkError}; + #[test] fn test_ws_url_conversion() { assert_eq!( @@ -684,9 +692,10 @@ mod tests { #[test] fn test_parse_message_msgpack_server_message() { - use crate::models::{ProtocolOptions, SerializationType, ServerMessage}; use kalamdb_commons::{Role, UserId}; + use crate::models::{ProtocolOptions, SerializationType, ServerMessage}; + let msg = ServerMessage::AuthSuccess { user: UserId::from("user-1"), role: Role::Dba, diff --git a/link/link-common/src/consumer/core/poller.rs b/link/link-common/src/consumer/core/poller.rs index 1bfe0b31c..4b965f1cf 100644 --- a/link/link-common/src/consumer/core/poller.rs +++ b/link/link-common/src/consumer/core/poller.rs @@ -1,16 +1,21 @@ -use crate::auth::AuthProvider; -use crate::consumer::models::consumer_record::ConsumerRecordWire; -use crate::consumer::models::AckResponse; -use crate::consumer::models::AutoOffsetReset; -use crate::consumer::models::CommitResult; -use crate::consumer::utils::backoff::jittered_exponential_backoff; -use crate::error::{KalamLinkError, Result}; -use crate::models::LoginResponse; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; + use log::{debug, warn}; use serde::{Deserialize, Serialize}; use serde_json; -use std::sync::{Arc, Mutex}; -use std::time::Duration; + +use crate::{ + auth::AuthProvider, + consumer::{ + models::{consumer_record::ConsumerRecordWire, AckResponse, AutoOffsetReset, CommitResult}, + utils::backoff::jittered_exponential_backoff, + }, + error::{KalamLinkError, Result}, + models::LoginResponse, +}; #[derive(Clone)] pub struct ConsumerPoller { @@ -159,7 +164,8 @@ impl ConsumerPoller { Duration::from_secs(10), ); warn!( - "[LINK_CONSUMER] Retriable consume error: status={} delay_ms={} duration_ms={}", + "[LINK_CONSUMER] Retriable consume error: status={} delay_ms={} \ + duration_ms={}", status, delay.as_millis(), attempt_start.elapsed().as_millis() diff --git a/link/link-common/src/consumer/core/topic_consumer.rs b/link/link-common/src/consumer/core/topic_consumer.rs index c38e89342..12d2f34d6 100644 --- a/link/link-common/src/consumer/core/topic_consumer.rs +++ b/link/link-common/src/consumer/core/topic_consumer.rs @@ -1,14 +1,20 @@ use std::time::{Duration, Instant}; -use crate::auth::AuthProvider; -use crate::client::KalamLinkClientBuilder; -use crate::consumer::core::offset_manager::OffsetManager; -use crate::consumer::core::poller::{AckRequest, ConsumeRequest, ConsumeResponse, ConsumerPoller}; -use crate::consumer::models::{AutoOffsetReset, CommitResult, ConsumerConfig, ConsumerRecord}; -use crate::error::{KalamLinkError, Result}; -use crate::models::ConnectionOptions; -use crate::timeouts::KalamLinkTimeouts; -use crate::KalamLinkClient; +use crate::{ + auth::AuthProvider, + client::KalamLinkClientBuilder, + consumer::{ + core::{ + offset_manager::OffsetManager, + poller::{AckRequest, ConsumeRequest, ConsumeResponse, ConsumerPoller}, + }, + models::{AutoOffsetReset, CommitResult, ConsumerConfig, ConsumerRecord}, + }, + error::{KalamLinkError, Result}, + models::ConnectionOptions, + timeouts::KalamLinkTimeouts, + KalamLinkClient, +}; pub struct TopicConsumer { #[allow(dead_code)] // retained for lifetime — owns the reqwest::Client @@ -422,8 +428,7 @@ impl ConsumerBuilder { #[cfg(test)] mod tests { use super::*; - use crate::consumer::core::poller::ConsumerPoller; - use crate::consumer::models::ConsumerConfig; + use crate::consumer::{core::poller::ConsumerPoller, models::ConsumerConfig}; /// Build a minimal `TopicConsumer` without a real server so we can test /// state-flag behaviour purely in-memory. diff --git a/link/link-common/src/consumer/mod.rs b/link/link-common/src/consumer/mod.rs index 0fd37c8c6..9fc997298 100644 --- a/link/link-common/src/consumer/mod.rs +++ b/link/link-common/src/consumer/mod.rs @@ -10,6 +10,7 @@ pub mod utils; #[cfg(all(feature = "tokio-runtime", feature = "consumer"))] pub use core::{ConsumerBuilder, TopicConsumer}; + pub use models::{ AckResponse, AutoOffsetReset, CommitMode, CommitResult, ConsumeMessage, ConsumeRequest, ConsumeResponse, ConsumerConfig, ConsumerOffsets, ConsumerRecord, PayloadMode, TopicOp, diff --git a/link/link-common/src/consumer/models/consume_message.rs b/link/link-common/src/consumer/models/consume_message.rs index a5221ebc3..e8c1a974d 100644 --- a/link/link-common/src/consumer/models/consume_message.rs +++ b/link/link-common/src/consumer/models/consume_message.rs @@ -1,7 +1,7 @@ +use kalamdb_commons::UserId; use serde::{Deserialize, Serialize}; use crate::models::RowData; -use kalamdb_commons::UserId; /// A single consumed message from a topic. /// diff --git a/link/link-common/src/consumer/models/consumer_config.rs b/link/link-common/src/consumer/models/consumer_config.rs index eff9cad8e..08eb7a567 100644 --- a/link/link-common/src/consumer/models/consumer_config.rs +++ b/link/link-common/src/consumer/models/consumer_config.rs @@ -1,8 +1,7 @@ use std::{collections::HashMap, time::Duration}; -use crate::error::{KalamLinkError, Result}; - use super::AutoOffsetReset; +use crate::error::{KalamLinkError, Result}; #[derive(Debug, Clone)] pub struct ConsumerConfig { diff --git a/link/link-common/src/consumer/models/consumer_record.rs b/link/link-common/src/consumer/models/consumer_record.rs index 5376db3a7..0e7982d95 100644 --- a/link/link-common/src/consumer/models/consumer_record.rs +++ b/link/link-common/src/consumer/models/consumer_record.rs @@ -56,8 +56,7 @@ impl ConsumerRecordWire { #[cfg(feature = "tokio-runtime")] mod base64_bytes { - use base64::engine::general_purpose::STANDARD; - use base64::Engine; + use base64::{engine::general_purpose::STANDARD, Engine}; use serde::{de::Error, Deserialize, Deserializer, Serializer}; #[allow(dead_code)] diff --git a/link/link-common/src/consumer/models/enums.rs b/link/link-common/src/consumer/models/enums.rs index e3910b76f..2d5196c7a 100644 --- a/link/link-common/src/consumer/models/enums.rs +++ b/link/link-common/src/consumer/models/enums.rs @@ -1,6 +1,7 @@ -use serde::{de::Visitor, ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; +use serde::{de::Visitor, ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; + #[derive(Debug, Clone, PartialEq, Eq)] pub enum AutoOffsetReset { Earliest, diff --git a/link/link-common/src/credentials.rs b/link/link-common/src/credentials.rs index 671f31853..16a01e779 100644 --- a/link/link-common/src/credentials.rs +++ b/link/link-common/src/credentials.rs @@ -15,10 +15,11 @@ //! - No plaintext passwords stored on disk //! - Tokens can have limited scopes -use crate::error::Result; use kalamdb_commons::UserId; use serde::{Deserialize, Serialize}; +use crate::error::Result; + /// Stored credentials for a KalamDB instance. /// /// Contains a JWT token that can be persisted and reused across sessions. @@ -239,10 +240,7 @@ pub trait CredentialStore { /// use kalam_client::credentials::{CredentialStore, Credentials, MemoryCredentialStore}; /// /// let mut store = MemoryCredentialStore::new(); -/// let creds = Credentials::new( -/// "local".to_string(), -/// "jwt.token.value".to_string(), -/// ); +/// let creds = Credentials::new("local".to_string(), "jwt.token.value".to_string()); /// /// store.set_credentials(&creds).unwrap(); /// let retrieved = store.get_credentials("local").unwrap(); diff --git a/link/link-common/src/error.rs b/link/link-common/src/error.rs index 96fdef9e0..6f5cde36c 100644 --- a/link/link-common/src/error.rs +++ b/link/link-common/src/error.rs @@ -16,15 +16,13 @@ pub type Result = std::result::Result; /// use kalam_client::{KalamLinkClient, KalamLinkError}; /// /// # async fn example() -> kalam_client::Result<()> { -/// let client = KalamLinkClient::builder() -/// .base_url("http://invalid-host:9999") -/// .build()?; +/// let client = KalamLinkClient::builder().base_url("http://invalid-host:9999").build()?; /// /// match client.execute_query("SELECT 1", None, None, None).await { /// Ok(response) => println!("Success: {:?}", response), /// Err(KalamLinkError::NetworkError(msg)) => { /// eprintln!("Connection failed: {}", msg); -/// } +/// }, /// Err(e) => eprintln!("Other error: {}", e), /// } /// # Ok(()) diff --git a/link/link-common/src/event_handlers.rs b/link/link-common/src/event_handlers.rs index 44f4cde33..13a921dab 100644 --- a/link/link-common/src/event_handlers.rs +++ b/link/link-common/src/event_handlers.rs @@ -11,7 +11,7 @@ //! # Example //! //! ```rust,no_run -//! use kalam_client::{KalamLinkClient, EventHandlers}; +//! use kalam_client::{EventHandlers, KalamLinkClient}; //! //! # async fn example() -> Result<(), Box> { //! let handlers = EventHandlers::new() @@ -33,8 +33,7 @@ //! # } //! ``` -use std::fmt; -use std::sync::Arc; +use std::{fmt, sync::Arc}; /// Reason for a disconnect event. #[derive(Debug, Clone)] @@ -185,8 +184,7 @@ impl EventHandlers { /// ```rust /// use kalam_client::EventHandlers; /// - /// let handlers = EventHandlers::new() - /// .on_connect(|| println!("Connected!")); + /// let handlers = EventHandlers::new().on_connect(|| println!("Connected!")); /// ``` pub fn on_connect(mut self, f: impl Fn() + Send + Sync + 'static) -> Self { self.on_connect = Some(Arc::new(f)); @@ -202,8 +200,8 @@ impl EventHandlers { /// ```rust /// use kalam_client::EventHandlers; /// - /// let handlers = EventHandlers::new() - /// .on_disconnect(|reason| println!("Disconnected: {}", reason)); + /// let handlers = + /// EventHandlers::new().on_disconnect(|reason| println!("Disconnected: {}", reason)); /// ``` pub fn on_disconnect(mut self, f: impl Fn(DisconnectReason) + Send + Sync + 'static) -> Self { self.on_disconnect = Some(Arc::new(f)); @@ -237,8 +235,7 @@ impl EventHandlers { /// ```rust /// use kalam_client::EventHandlers; /// - /// let handlers = EventHandlers::new() - /// .on_receive(|msg| println!("[RECV] {}", msg)); + /// let handlers = EventHandlers::new().on_receive(|msg| println!("[RECV] {}", msg)); /// ``` pub fn on_receive(mut self, f: impl Fn(&str) + Send + Sync + 'static) -> Self { self.on_receive = Some(Arc::new(f)); @@ -254,8 +251,7 @@ impl EventHandlers { /// ```rust /// use kalam_client::EventHandlers; /// - /// let handlers = EventHandlers::new() - /// .on_send(|msg| println!("[SEND] {}", msg)); + /// let handlers = EventHandlers::new().on_send(|msg| println!("[SEND] {}", msg)); /// ``` pub fn on_send(mut self, f: impl Fn(&str) + Send + Sync + 'static) -> Self { self.on_send = Some(Arc::new(f)); diff --git a/link/link-common/src/lib.rs b/link/link-common/src/lib.rs index 2168ba499..017e22a18 100644 --- a/link/link-common/src/lib.rs +++ b/link/link-common/src/lib.rs @@ -34,7 +34,6 @@ pub use consumer::{ }; #[cfg(all(feature = "tokio-runtime", feature = "consumer"))] pub use consumer::{ConsumerBuilder, TopicConsumer}; - pub use credentials::{CredentialStore, Credentials, MemoryCredentialStore}; pub use error::{KalamLinkError, Result}; pub use event_handlers::{ConnectionError, DisconnectReason, EventHandlers, MessageDirection}; @@ -49,20 +48,19 @@ pub use models::{ }; #[cfg(feature = "consumer")] pub use models::{AckResponse, ConsumeMessage, ConsumeRequest, ConsumeResponse}; -pub use seq_id::SeqId; -pub use timeouts::{KalamLinkTimeouts, KalamLinkTimeoutsBuilder}; -pub use timestamp::{now, parse_iso8601, TimestampFormat, TimestampFormatter}; - #[cfg(feature = "tokio-runtime")] pub use query::AuthRefreshCallback; #[cfg(feature = "tokio-runtime")] pub use query::QueryExecutor; #[cfg(feature = "tokio-runtime")] pub use query::UploadProgressCallback; +pub use seq_id::SeqId; #[cfg(feature = "tokio-runtime")] pub use subscription::LiveRowsSubscription; #[cfg(feature = "tokio-runtime")] pub use subscription::SubscriptionManager; pub use subscription::{LiveRowsConfig, LiveRowsEvent, LiveRowsMaterializer}; +pub use timeouts::{KalamLinkTimeouts, KalamLinkTimeoutsBuilder}; +pub use timestamp::{now, parse_iso8601, TimestampFormat, TimestampFormatter}; pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/link/link-common/src/models/kalam_cell_value.rs b/link/link-common/src/models/kalam_cell_value.rs index e7fd238a7..b94fcde1a 100644 --- a/link/link-common/src/models/kalam_cell_value.rs +++ b/link/link-common/src/models/kalam_cell_value.rs @@ -47,10 +47,10 @@ //! {"id":"..."} // File column (JSON object) //! ``` +use std::{fmt, ops::Deref}; + use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; -use std::fmt; -use std::ops::Deref; /// A single cell value in a query result row or subscription notification. /// diff --git a/link/link-common/src/models/mod.rs b/link/link-common/src/models/mod.rs index c925d17f0..c52c3c4d7 100644 --- a/link/link-common/src/models/mod.rs +++ b/link/link-common/src/models/mod.rs @@ -30,22 +30,18 @@ pub use crate::auth::models::{ LoginRequest, LoginResponse, LoginUserInfo, ServerSetupRequest, ServerSetupResponse, SetupStatusResponse, SetupUserInfo, WsAuthCredentials, }; - // ── Connection models ──────────────────────────────────────────────────────── pub use crate::connection::models::{ ClientMessage, ClusterHealthResponse, ClusterNodeHealth, CompressionType, ConnectionOptions, HealthCheckResponse, HttpVersion, ProtocolOptions, SerializationType, ServerMessage, }; - // ── Consumer models ────────────────────────────────────────────────────────── #[cfg(feature = "consumer")] pub use crate::consumer::models::{AckResponse, ConsumeMessage, ConsumeRequest, ConsumeResponse}; - // ── Query models ───────────────────────────────────────────────────────────── pub use crate::query::models::{ ErrorDetail, QueryRequest, QueryResponse, QueryResult, ResponseStatus, UploadProgress, }; - // ── Subscription models ────────────────────────────────────────────────────── pub use crate::subscription::models::{ BatchControl, BatchStatus, ChangeEvent, ChangeTypeRaw, SubscriptionConfig, SubscriptionInfo, diff --git a/link/link-common/src/models/schema_field.rs b/link/link-common/src/models/schema_field.rs index 948ab5789..811cd8dfb 100644 --- a/link/link-common/src/models/schema_field.rs +++ b/link/link-common/src/models/schema_field.rs @@ -1,6 +1,7 @@ -use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; +use serde::{Deserialize, Serialize}; + use super::kalam_data_type::KalamDataType; pub type FieldFlags = BTreeSet; diff --git a/link/link-common/src/models/tests.rs b/link/link-common/src/models/tests.rs index b5b7ab4c7..db0f9782d 100644 --- a/link/link-common/src/models/tests.rs +++ b/link/link-common/src/models/tests.rs @@ -1,8 +1,9 @@ -use crate::seq_id::SeqId; -use serde_json::json; use std::collections::BTreeSet; +use serde_json::json; + use super::*; +use crate::seq_id::SeqId; // ==================== ConnectionOptions Tests ==================== @@ -357,7 +358,6 @@ fn test_batch_control_with_seq_id() { has_more: true, status: BatchStatus::Loading, last_seq_id: Some(seq_id), - snapshot_end_seq: Some(SeqId::from(1000i64)), }; let json = serde_json::to_string(&batch_control).unwrap(); @@ -365,7 +365,6 @@ fn test_batch_control_with_seq_id() { assert!(json.contains("\"has_more\":true")); assert!(json.contains("\"status\":\"loading\"")); assert!(json.contains("last_seq_id")); - assert!(json.contains("snapshot_end_seq")); } #[test] @@ -375,7 +374,6 @@ fn test_batch_control_ready_status() { has_more: false, status: BatchStatus::Ready, last_seq_id: Some(SeqId::from(1000i64)), - snapshot_end_seq: Some(SeqId::from(1000i64)), }; let json = serde_json::to_string(&batch_control).unwrap(); @@ -544,7 +542,6 @@ fn test_change_event_helpers() { has_more: false, status: BatchStatus::Ready, last_seq_id: None, - snapshot_end_seq: None, }, schema: vec![SchemaField { name: "id".to_string(), diff --git a/link/link-common/src/models/utils.rs b/link/link-common/src/models/utils.rs index 0abc79b40..cece7ab85 100644 --- a/link/link-common/src/models/utils.rs +++ b/link/link-common/src/models/utils.rs @@ -8,8 +8,7 @@ use super::kalam_cell_value::KalamCellValue; /// # Example /// /// ```rust -/// use kalam_client::models::KalamCellValue; -/// use kalam_client::parse_i64; +/// use kalam_client::{models::KalamCellValue, parse_i64}; /// /// let num_value = KalamCellValue::int(42); /// let str_value = KalamCellValue::text("42"); diff --git a/link/link-common/src/query/executor.rs b/link/link-common/src/query/executor.rs index a6147864a..b2a42a7ee 100644 --- a/link/link-common/src/query/executor.rs +++ b/link/link-common/src/query/executor.rs @@ -1,10 +1,12 @@ //! SQL query execution via HTTP. -use crate::{ - auth::AuthProvider, - error::{KalamLinkError, Result}, - models::{QueryRequest, QueryResponse, UploadProgress}, +use std::{ + future::Future, + pin::Pin, + sync::{Arc, Mutex}, + time::Instant, }; + #[cfg(feature = "file-uploads")] use bytes::Bytes; #[cfg(feature = "file-uploads")] @@ -14,38 +16,20 @@ use http_body_util::StreamBody; use log::{debug, warn}; #[cfg(feature = "file-uploads")] use reqwest::multipart::{Form, Part}; -use std::{ - future::Future, - pin::Pin, - sync::{Arc, Mutex}, - time::Instant, -}; +use serde::Serialize; -/// Async callback that resolves fresh [`AuthProvider`] credentials. -/// -/// Called by the executor when a query requires a login exchange or returns -/// `TOKEN_EXPIRED`. -/// Implementations should obtain a fresh JWT (e.g. via login or dynamic -/// auth provider) and return it. -pub type AuthRefreshCallback = - Arc Pin> + Send>> + Send + Sync>; - -/// Handles SQL query execution via HTTP. -#[derive(Clone)] -pub struct QueryExecutor { - sql_url: String, - http_client: reqwest::Client, - auth: Arc>, - max_retries: u32, - auth_refresher: Option, -} +use crate::{ + auth::AuthProvider, + error::{KalamLinkError, Result}, + models::{QueryResponse, UploadProgress}, +}; /// Progress callback for multipart file uploads. pub type UploadProgressCallback = Arc; #[cfg(feature = "file-uploads")] fn build_progress_stream( - data: Arc>, + data: Bytes, file_name: Arc, file_index: usize, total_files: usize, @@ -54,7 +38,7 @@ fn build_progress_stream( { let chunk_size = 64 * 1024; futures_util::stream::unfold(0usize, move |offset| { - let data = Arc::clone(&data); + let data = data.clone(); let progress_cb = progress_cb.clone(); let file_name = Arc::clone(&file_name); async move { @@ -63,7 +47,7 @@ fn build_progress_stream( } let end = (offset + chunk_size).min(data.len()); - let chunk = Bytes::copy_from_slice(&data[offset..end]); + let chunk = data.slice(offset..end); let total_bytes = data.len() as u64; let bytes_sent = end as u64; let percent = if total_bytes == 0 { @@ -86,6 +70,57 @@ fn build_progress_stream( }) } +#[cfg(feature = "file-uploads")] +#[derive(Clone)] +struct MultipartUploadFile { + placeholder_name: String, + filename: String, + data: Bytes, + mime_type: Option, +} + +#[cfg(feature = "file-uploads")] +impl From<(String, String, Vec, Option)> for MultipartUploadFile { + fn from( + (placeholder_name, filename, data, mime_type): (String, String, Vec, Option), + ) -> Self { + Self { + placeholder_name, + filename, + data: Bytes::from(data), + mime_type, + } + } +} + +#[derive(Serialize)] +struct BorrowedQueryRequest<'a> { + sql: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + params: Option<&'a [serde_json::Value]>, + #[serde(default, skip_serializing_if = "Option::is_none")] + namespace_id: Option<&'a str>, +} + +/// Async callback that resolves fresh [`AuthProvider`] credentials. +/// +/// Called by the executor when a query requires a login exchange or returns +/// `TOKEN_EXPIRED`. +/// Implementations should obtain a fresh JWT (e.g. via login or dynamic +/// auth provider) and return it. +pub type AuthRefreshCallback = + Arc Pin> + Send>> + Send + Sync>; + +/// Handles SQL query execution via HTTP. +#[derive(Clone)] +pub struct QueryExecutor { + sql_url: String, + http_client: reqwest::Client, + auth: Arc>, + max_retries: u32, + auth_refresher: Option, +} + impl QueryExecutor { pub(crate) fn new( base_url: String, @@ -113,7 +148,9 @@ impl QueryExecutor { fn validate_request_auth(auth: &AuthProvider) -> Result<()> { if matches!(auth, AuthProvider::BasicAuth(_, _)) { return Err(KalamLinkError::AuthenticationError( - "User/password credentials can only be used with /v1/api/auth/login; exchange them for a JWT before executing SQL requests.".to_string(), + "User/password credentials can only be used with /v1/api/auth/login; exchange \ + them for a JWT before executing SQL requests." + .to_string(), )); } @@ -141,13 +178,17 @@ impl QueryExecutor { } fn is_retry_safe_sql(sql: &str) -> bool { - matches!( - Self::first_keyword(sql).as_deref(), - Some("SELECT" | "SHOW" | "DESCRIBE" | "EXPLAIN") - ) + let Some(keyword) = Self::first_keyword(sql) else { + return false; + }; + + keyword.eq_ignore_ascii_case("SELECT") + || keyword.eq_ignore_ascii_case("SHOW") + || keyword.eq_ignore_ascii_case("DESCRIBE") + || keyword.eq_ignore_ascii_case("EXPLAIN") } - fn first_keyword(sql: &str) -> Option { + fn first_keyword(sql: &str) -> Option<&str> { let bytes = sql.as_bytes(); let mut i = 0; while i < bytes.len() { @@ -189,7 +230,7 @@ impl QueryExecutor { if start == i { return None; } - return Some(sql[start..i].to_ascii_uppercase()); + return Some(&sql[start..i]); } None @@ -214,89 +255,77 @@ impl QueryExecutor { params: Option>, namespace_id: Option, progress: Option, + ) -> Result { + self.execute_with_progress_ref(sql, files, params, namespace_id.as_deref(), progress) + .await + } + + pub(crate) async fn execute_with_progress_ref( + &self, + sql: &str, + files: Option, Option)>>, + params: Option>, + namespace_id: Option<&str>, + progress: Option, ) -> Result { let has_files = files.as_ref().map(|f| !f.is_empty()).unwrap_or(false); #[cfg(not(feature = "file-uploads"))] if has_files { return Err(KalamLinkError::ConfigurationError( - "This SDK build does not include file upload support. Rebuild with the `file-uploads` feature.".to_string(), + "This SDK build does not include file upload support. Rebuild with the \ + `file-uploads` feature." + .to_string(), )); } #[cfg(feature = "file-uploads")] if has_files { - let mut form = Form::new().text("sql", sql.to_string()); - - if let Some(p) = ¶ms { - form = form.text("params", serde_json::to_string(p)?); - } - - if let Some(ns) = &namespace_id { - form = form.text("namespace_id", ns.clone()); - } - - if let Some(files) = files { - let total_files = files.len(); - for (index, (placeholder_name, filename, data, mime_type)) in - files.into_iter().enumerate() - { - let total_bytes = data.len() as u64; - let field_name = format!("file:{}", placeholder_name); - - let part = if let Some(progress_cb) = progress.clone() { - let data = Arc::new(data); - let file_name = Arc::::from(filename.clone()); - let file_index = index + 1; - - let stream = build_progress_stream( - Arc::clone(&data), - Arc::clone(&file_name), - file_index, - total_files, - progress_cb, - ); - - let body = reqwest::Body::wrap(StreamBody::new(stream)); - Part::stream_with_length(body, total_bytes) - } else { - Part::bytes(data) - }; - - let part = part - .file_name(filename) - .mime_str(mime_type.as_deref().unwrap_or("application/octet-stream")) - .map_err(|e| { - KalamLinkError::ConfigurationError(format!("Invalid MIME type: {}", e)) - })?; - - form = form.part(field_name, part); - } - } - + let files = files + .unwrap_or_default() + .into_iter() + .map(MultipartUploadFile::from) + .collect::>(); let auth_snapshot = self.ensure_request_auth().await?; - let mut req_builder = self.http_client.post(&self.sql_url).multipart(form); - req_builder = auth_snapshot.apply_to_request(req_builder)?; - - let attempt_start = Instant::now(); - debug!("[LINK_HTTP] Sending multipart POST to {}", self.sql_url); - - let response = req_builder.send().await?; - let http_duration_ms = attempt_start.elapsed().as_millis(); - debug!( - "[LINK_HTTP] Response received: status={} duration_ms={}", - response.status(), - http_duration_ms - ); + let mut result = self + .execute_multipart_once( + &self.sql_url, + &auth_snapshot, + sql, + &files, + params.as_ref(), + namespace_id.as_deref(), + progress.clone(), + ) + .await; + + if let Some(leader_sql_url) = Self::multipart_leader_retry_url(&result, &self.sql_url) { + warn!( + "[LINK_HTTP] Leader redirect for multipart request - retrying against {}", + leader_sql_url + ); + result = self + .execute_multipart_once( + &leader_sql_url, + &auth_snapshot, + sql, + &files, + params.as_ref(), + namespace_id.as_deref(), + progress.clone(), + ) + .await; + } - let result = Self::handle_response(response, sql).await?; + let result = result?; - // Auto-refresh on TOKEN_EXPIRED (multipart — no retry, just report). - // Multipart uploads consume the body so we cannot replay them, but - // we still refresh the token so the *next* request succeeds. + // Auto-refresh on TOKEN_EXPIRED for subsequent requests. if result.is_token_expired() { if let Some(refresher) = &self.auth_refresher { - warn!("[LINK_HTTP] TOKEN_EXPIRED on multipart request — refreshing auth for subsequent requests"); + warn!( + "[LINK_HTTP] TOKEN_EXPIRED on multipart request — refreshing auth for \ + subsequent requests" + ); if let Ok(new_auth) = refresher().await { if Self::validate_request_auth(&new_auth).is_ok() { *self.auth.lock().unwrap() = new_auth; @@ -310,9 +339,9 @@ impl QueryExecutor { let _ = progress; - let request = QueryRequest { - sql: sql.to_string(), - params, + let request = BorrowedQueryRequest { + sql, + params: params.as_deref(), namespace_id, }; @@ -428,9 +457,128 @@ impl QueryExecutor { err.is_timeout() || err.is_connect() } + fn multipart_leader_retry_url( + result: &Result, + current_sql_url: &str, + ) -> Option { + let leader_url = match result { + Ok(response) => Self::leader_url_from_query_response(response), + Err(KalamLinkError::ServerError { message, .. }) => { + Self::leader_url_from_error_text(message) + }, + Err(_) => None, + }?; + + let retry_url = format!("{}/v1/api/sql", leader_url.trim_end_matches('/')); + (retry_url != current_sql_url).then_some(retry_url) + } + + fn leader_url_from_query_response(response: &QueryResponse) -> Option { + let error = response.error.as_ref()?; + Self::extract_leader_url(&error.message) + .or_else(|| error.details.as_deref().and_then(Self::extract_leader_url)) + } + + fn leader_url_from_error_text(error_text: &str) -> Option { + serde_json::from_str::(error_text) + .ok() + .and_then(|response| Self::leader_url_from_query_response(&response)) + .or_else(|| Self::extract_leader_url(error_text)) + } + + fn extract_leader_url(text: &str) -> Option { + let marker = "Leader:"; + let index = text.find(marker)?; + let mut leader = text[index + marker.len()..].trim(); + + if let Some(stripped) = leader.strip_prefix("Some(\"") { + leader = stripped; + leader = &leader[..leader.find("\")").unwrap_or(leader.len())]; + } + + leader = leader.trim_matches(|ch| matches!(ch, '"' | '\\' | ')' | '(' | '[' | ']')); + leader = leader.split_whitespace().next().unwrap_or(leader).trim_end_matches([',', ';']); + + if leader.starts_with("http://") || leader.starts_with("https://") { + Some(leader.to_string()) + } else { + None + } + } + + #[cfg(feature = "file-uploads")] + async fn execute_multipart_once( + &self, + sql_url: &str, + auth_snapshot: &AuthProvider, + sql: &str, + files: &[MultipartUploadFile], + params: Option<&Vec>, + namespace_id: Option<&str>, + progress: Option, + ) -> Result { + let mut form = Form::new().text("sql", sql.to_string()); + + if let Some(p) = params { + form = form.text("params", serde_json::to_string(p)?); + } + + if let Some(ns) = namespace_id { + form = form.text("namespace_id", ns.to_string()); + } + + let total_files = files.len(); + for (index, file) in files.iter().enumerate() { + let total_bytes = file.data.len() as u64; + let field_name = format!("file:{}", file.placeholder_name); + + let part = if let Some(progress_cb) = progress.clone() { + let file_name = Arc::::from(file.filename.clone()); + let file_index = index + 1; + + let stream = build_progress_stream( + file.data.clone(), + Arc::clone(&file_name), + file_index, + total_files, + progress_cb, + ); + + let body = reqwest::Body::wrap(StreamBody::new(stream)); + Part::stream_with_length(body, total_bytes) + } else { + Part::stream_with_length(reqwest::Body::from(file.data.clone()), total_bytes) + }; + + let part = part + .file_name(file.filename.clone()) + .mime_str(file.mime_type.as_deref().unwrap_or("application/octet-stream")) + .map_err(|e| { + KalamLinkError::ConfigurationError(format!("Invalid MIME type: {}", e)) + })?; + + form = form.part(field_name, part); + } + + let mut req_builder = self.http_client.post(sql_url).multipart(form); + req_builder = auth_snapshot.apply_to_request(req_builder)?; + + let attempt_start = Instant::now(); + debug!("[LINK_HTTP] Sending multipart POST to {}", sql_url); + + let response = req_builder.send().await?; + let http_duration_ms = attempt_start.elapsed().as_millis(); + debug!( + "[LINK_HTTP] Response received: status={} duration_ms={}", + response.status(), + http_duration_ms + ); + + Self::handle_response(response, sql).await + } + async fn handle_response(response: reqwest::Response, _sql: &str) -> Result { let status = response.status(); - if status.is_success() { let parse_start = Instant::now(); let query_response: QueryResponse = response.json().await?; @@ -479,16 +627,18 @@ impl QueryExecutor { #[cfg(test)] mod tests { #[cfg(feature = "file-uploads")] - use super::{build_progress_stream, UploadProgress, UploadProgressCallback}; + use std::sync::{Arc, Mutex}; + #[cfg(feature = "file-uploads")] use futures_util::StreamExt; + #[cfg(feature = "file-uploads")] - use std::sync::{Arc, Mutex}; + use super::{build_progress_stream, UploadProgress, UploadProgressCallback}; #[cfg(feature = "file-uploads")] #[tokio::test] async fn progress_stream_reports_completion() { - let data = Arc::new(vec![1u8; 128 * 1024]); + let data = bytes::Bytes::from(vec![1u8; 128 * 1024]); let file_name = Arc::::from("example.txt"); let last_progress = Arc::new(Mutex::new(None::)); @@ -497,8 +647,7 @@ mod tests { *last_progress_clone.lock().unwrap() = Some(progress); }); - let stream = - build_progress_stream(Arc::clone(&data), Arc::clone(&file_name), 2, 3, progress_cb); + let stream = build_progress_stream(data.clone(), Arc::clone(&file_name), 2, 3, progress_cb); futures_util::pin_mut!(stream); while let Some(frame) = stream.next().await { @@ -513,4 +662,31 @@ mod tests { assert_eq!(progress.bytes_sent, data.len() as u64); assert!((progress.percent - 100.0).abs() < f64::EPSILON); } + + #[test] + fn extract_leader_url_handles_optional_url_hint() { + let url = super::QueryExecutor::extract_leader_url( + "Statement 1 failed: Not leader for shard. Leader: Some(\"http://127.0.0.1:8083\")", + ) + .expect("leader hint should parse"); + + assert_eq!(url, "http://127.0.0.1:8083"); + } + + #[test] + fn leader_retry_url_reads_structured_query_error() { + let error_text = r#"{ + "status": "error", + "results": [], + "error": { + "code": "SQL_EXECUTION_ERROR", + "message": "Statement 1 failed: Not leader for shard. Leader: Some(\"http://127.0.0.1:8083\")" + } + }"#; + + let url = super::QueryExecutor::leader_url_from_error_text(error_text) + .expect("structured query error should yield leader URL"); + + assert_eq!(url, "http://127.0.0.1:8083"); + } } diff --git a/link/link-common/src/query/models/query_request.rs b/link/link-common/src/query/models/query_request.rs index 8cbea91c4..0e44acc4a 100644 --- a/link/link-common/src/query/models/query_request.rs +++ b/link/link-common/src/query/models/query_request.rs @@ -11,15 +11,15 @@ use serde_json::Value as JsonValue; /// /// // Simple query without parameters /// let request = QueryRequest { -/// sql: "SELECT * FROM users".to_string(), -/// params: None, +/// sql: "SELECT * FROM users".to_string(), +/// params: None, /// namespace_id: None, /// }; /// /// // Parametrized query /// let request = QueryRequest { -/// sql: "SELECT * FROM users WHERE id = $1".to_string(), -/// params: Some(vec![json!(42)]), +/// sql: "SELECT * FROM users WHERE id = $1".to_string(), +/// params: Some(vec![json!(42)]), /// namespace_id: None, /// }; /// ``` diff --git a/link/link-common/src/query/models/query_response.rs b/link/link-common/src/query/models/query_response.rs index 80087e95e..95c1f4636 100644 --- a/link/link-common/src/query/models/query_response.rs +++ b/link/link-common/src/query/models/query_response.rs @@ -1,9 +1,10 @@ -use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use super::error_detail::ErrorDetail; -use super::query_result::QueryResult; -use super::response_status::ResponseStatus; +use serde::{Deserialize, Serialize}; + +use super::{ + error_detail::ErrorDetail, query_result::QueryResult, response_status::ResponseStatus, +}; use crate::models::KalamCellValue; /// Contains query results, execution metadata, and optional error information. diff --git a/link/link-common/src/query/models/query_result.rs b/link/link-common/src/query/models/query_result.rs index 78ee84659..ea40f2ca6 100644 --- a/link/link-common/src/query/models/query_result.rs +++ b/link/link-common/src/query/models/query_result.rs @@ -1,9 +1,11 @@ -use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use crate::models::kalam_cell_value::KalamCellValue; -use crate::models::kalam_cell_value::RowData; -use crate::models::schema_field::SchemaField; +use serde::{Deserialize, Serialize}; + +use crate::models::{ + kalam_cell_value::{KalamCellValue, RowData}, + schema_field::SchemaField, +}; /// Individual query result within a SQL response. #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/link/link-common/src/seq_id.rs b/link/link-common/src/seq_id.rs index de2f0b553..0704a8042 100644 --- a/link/link-common/src/seq_id.rs +++ b/link/link-common/src/seq_id.rs @@ -4,10 +4,15 @@ //! the client can serialize/deserialize `_seq` values without depending //! on the heavy `kalamdb-commons` crate. -use serde::de::{self, Visitor}; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::fmt; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::{ + fmt, + time::{SystemTime, UNIX_EPOCH}, +}; + +use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, Serialize, Serializer, +}; /// Sequence ID for MVCC versioning (Snowflake layout: timestamp | worker | seq) #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] diff --git a/link/link-common/src/seq_tracking.rs b/link/link-common/src/seq_tracking.rs index 7278e1fe8..719db9410 100644 --- a/link/link-common/src/seq_tracking.rs +++ b/link/link-common/src/seq_tracking.rs @@ -10,8 +10,7 @@ use std::collections::HashMap; -use crate::models::KalamCellValue; -use crate::seq_id::SeqId; +use crate::{models::KalamCellValue, seq_id::SeqId}; /// Name of the system sequence column in every subscription row. pub const SEQ_COLUMN: &str = "_seq"; diff --git a/link/link-common/src/subscription/checkpoint.rs b/link/link-common/src/subscription/checkpoint.rs index 261fdc3b7..9a191fd8a 100644 --- a/link/link-common/src/subscription/checkpoint.rs +++ b/link/link-common/src/subscription/checkpoint.rs @@ -6,13 +6,19 @@ use crate::models::BatchStatus; #[cfg(any(feature = "tokio-runtime", feature = "wasm", test))] use crate::{models::ChangeEvent, seq_id::SeqId, seq_tracking}; +#[cfg(any(feature = "tokio-runtime", test))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct EventProgress { + pub(crate) seq_id: SeqId, + pub(crate) advance_resume: bool, +} + #[cfg(any(feature = "tokio-runtime", test))] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) struct BatchEnvelope { pub(crate) status: BatchStatus, pub(crate) has_more: bool, pub(crate) last_seq_id: Option, - pub(crate) snapshot_end_seq: Option, } #[cfg(any(feature = "tokio-runtime", test))] @@ -23,7 +29,6 @@ pub(crate) fn batch_envelope(event: &ChangeEvent) -> Option { status: batch_control.status, has_more: batch_control.has_more, last_seq_id: batch_control.last_seq_id, - snapshot_end_seq: batch_control.snapshot_end_seq, }), _ => None, } @@ -35,18 +40,28 @@ pub(crate) fn subscription_start_ready(event: &ChangeEvent) -> bool { } #[cfg(any(feature = "tokio-runtime", test))] -pub(crate) fn event_progress(event: &ChangeEvent) -> Option<(SeqId, bool)> { +pub(crate) fn event_progress(event: &ChangeEvent) -> Option { match event { ChangeEvent::InitialDataBatch { rows, .. } if subscription_start_ready(event) => { - seq_tracking::extract_max_seq(rows) - .or_else(|| batch_envelope(event).and_then(|batch| batch.last_seq_id)) - .map(|seq_id| (seq_id, false)) + let batch = batch_envelope(event); + let seq_id = seq_tracking::extract_max_seq(rows) + .or_else(|| batch.and_then(|batch| batch.last_seq_id))?; + Some(EventProgress { + seq_id, + advance_resume: false, + }) }, ChangeEvent::Insert { rows, .. } | ChangeEvent::Update { rows, .. } => { - seq_tracking::extract_max_seq(rows).map(|seq_id| (seq_id, true)) + seq_tracking::extract_max_seq(rows).map(|seq_id| EventProgress { + seq_id, + advance_resume: true, + }) }, ChangeEvent::Delete { old_rows, .. } => { - seq_tracking::extract_max_seq(old_rows).map(|seq_id| (seq_id, true)) + seq_tracking::extract_max_seq(old_rows).map(|seq_id| EventProgress { + seq_id, + advance_resume: true, + }) }, _ => None, } @@ -214,7 +229,6 @@ mod tests { has_more: false, status, last_seq_id: None, - snapshot_end_seq: None, } } @@ -243,7 +257,13 @@ mod tests { batch_control: batch_control(BatchStatus::Ready), }; - assert_eq!(event_progress(&event), Some((SeqId::from_i64(11), false))); + assert_eq!( + event_progress(&event), + Some(EventProgress { + seq_id: SeqId::from_i64(11), + advance_resume: false, + }) + ); } #[test] diff --git a/link/link-common/src/subscription/live_rows_materializer.rs b/link/link-common/src/subscription/live_rows_materializer.rs index 110ef4a98..c89a2002a 100644 --- a/link/link-common/src/subscription/live_rows_materializer.rs +++ b/link/link-common/src/subscription/live_rows_materializer.rs @@ -1,6 +1,5 @@ -use crate::models::{ChangeEvent, KalamCellValue, RowData}; - use super::{LiveRowsConfig, LiveRowsEvent}; +use crate::models::{ChangeEvent, KalamCellValue, RowData}; /// Stateful reducer that materializes the current row set from change events. #[derive(Debug, Clone, Default)] @@ -168,7 +167,6 @@ mod tests { has_more: false, status, last_seq_id: None, - snapshot_end_seq: None, } } diff --git a/link/link-common/src/subscription/live_rows_subscription.rs b/link/link-common/src/subscription/live_rows_subscription.rs index b135de34e..970aee898 100644 --- a/link/link-common/src/subscription/live_rows_subscription.rs +++ b/link/link-common/src/subscription/live_rows_subscription.rs @@ -1,6 +1,5 @@ -use crate::error::Result; - use super::{LiveRowsConfig, LiveRowsEvent, LiveRowsMaterializer, SubscriptionManager}; +use crate::error::Result; /// High-level subscription that yields materialized live-query row snapshots. pub struct LiveRowsSubscription { diff --git a/link/link-common/src/subscription/manager.rs b/link/link-common/src/subscription/manager.rs index 8a1ea7a88..5d72a980a 100644 --- a/link/link-common/src/subscription/manager.rs +++ b/link/link-common/src/subscription/manager.rs @@ -3,13 +3,19 @@ //! Receives events routed by the shared //! [`SharedConnection`](crate::connection::SharedConnection). -use crate::{ - error::Result, models::ChangeEvent, seq_id::SeqId, subscription::buffer_event, - subscription::event_progress, timeouts::KalamLinkTimeouts, -}; use std::collections::VecDeque; + use tokio::sync::mpsc; +use crate::{ + connection::SharedSubscriptionControl, + error::Result, + models::ChangeEvent, + seq_id::SeqId, + subscription::{buffer_event, event_progress}, + timeouts::KalamLinkTimeouts, +}; + /// Manages WebSocket subscriptions for real-time change notifications. /// /// # Examples @@ -18,9 +24,7 @@ use tokio::sync::mpsc; /// use kalam_client::KalamLinkClient; /// /// # async fn example() -> Result<(), Box> { -/// let client = KalamLinkClient::builder() -/// .base_url("http://localhost:3000") -/// .build()?; +/// let client = KalamLinkClient::builder().base_url("http://localhost:3000").build()?; /// /// let mut subscription = client.subscribe("SELECT * FROM messages").await?; /// @@ -37,11 +41,8 @@ pub struct SubscriptionManager { subscription_id: String, /// Receives parsed events from the shared connection task. event_rx: mpsc::Receiver>, - /// When using a shared connection, this sender lets us unsubscribe. - /// The channel carries `(subscription_id, generation)`. - shared_unsubscribe_tx: Option>, - /// Sends consumer-observed checkpoint progress back to the shared connection. - shared_progress_tx: Option>, + /// Sends unsubscribe and checkpoint progress back to the shared connection. + shared_control: Option, /// Generation tag assigned by the shared `connection_task`. generation: u64, /// Local event buffer for yielding batched events from a single WS message. @@ -63,8 +64,7 @@ impl SubscriptionManager { pub(crate) fn from_shared( subscription_id: String, event_rx: mpsc::Receiver>, - unsubscribe_tx: mpsc::Sender<(String, u64)>, - progress_tx: mpsc::Sender<(String, u64, SeqId, bool)>, + shared_control: SharedSubscriptionControl, generation: u64, resume_from: Option, timeouts: &KalamLinkTimeouts, @@ -72,8 +72,7 @@ impl SubscriptionManager { Self { subscription_id, event_rx, - shared_unsubscribe_tx: Some(unsubscribe_tx), - shared_progress_tx: Some(progress_tx), + shared_control: Some(shared_control), generation, event_queue: VecDeque::new(), buffered_changes: Vec::new(), @@ -85,15 +84,20 @@ impl SubscriptionManager { } async fn report_shared_progress(&self, event: &ChangeEvent) { - let Some(progress_tx) = self.shared_progress_tx.as_ref() else { + let Some(shared_control) = self.shared_control.as_ref() else { return; }; - let Some((seq_id, advance_resume)) = event_progress(event) else { + let Some(progress) = event_progress(event) else { return; }; - let _ = progress_tx - .send((self.subscription_id.clone(), self.generation, seq_id, advance_resume)) + shared_control + .progress( + self.subscription_id.clone(), + self.generation, + progress.seq_id, + progress.advance_resume, + ) .await; } @@ -159,8 +163,8 @@ impl SubscriptionManager { } self.closed = true; - if let Some(tx) = self.shared_unsubscribe_tx.take() { - let _ = tx.send((self.subscription_id.clone(), self.generation)).await; + if let Some(shared_control) = self.shared_control.take() { + shared_control.unsubscribe(self.subscription_id.clone(), self.generation).await; } Ok(()) @@ -174,10 +178,8 @@ impl SubscriptionManager { impl Drop for SubscriptionManager { fn drop(&mut self) { - if let Some(tx) = self.shared_unsubscribe_tx.take() { - let id = self.subscription_id.clone(); - let gen = self.generation; - let _ = tx.try_send((id, gen)); + if let Some(shared_control) = self.shared_control.take() { + shared_control.try_unsubscribe(self.subscription_id.clone(), self.generation); } } } @@ -190,15 +192,12 @@ mod tests { /// for testing state-flag logic without a network dependency. fn make_test_sub() -> SubscriptionManager { let (event_tx, event_rx) = mpsc::channel(1); - let (unsubscribe_tx, _unsubscribe_rx) = mpsc::channel(1); - let (progress_tx, _progress_rx) = mpsc::channel(1); drop(event_tx); let mut subscription = SubscriptionManager::from_shared( "unit-test-id".to_string(), event_rx, - unsubscribe_tx, - progress_tx, + SharedSubscriptionControl::test_control(), 0, None, &KalamLinkTimeouts::default(), diff --git a/link/link-common/src/subscription/models/batch.rs b/link/link-common/src/subscription/models/batch.rs index fb3cef03b..f19dd3fd8 100644 --- a/link/link-common/src/subscription/models/batch.rs +++ b/link/link-common/src/subscription/models/batch.rs @@ -1,6 +1,7 @@ -use crate::seq_id::SeqId; use serde::{Deserialize, Serialize}; +use crate::seq_id::SeqId; + /// Status of the initial data loading process #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] @@ -34,8 +35,4 @@ pub struct BatchControl { /// The SeqId of the last row in this batch (used for subsequent requests) #[serde(skip_serializing_if = "Option::is_none")] pub last_seq_id: Option, - - /// Snapshot boundary SeqId captured at subscription time - #[serde(skip_serializing_if = "Option::is_none")] - pub snapshot_end_seq: Option, } diff --git a/link/link-common/src/subscription/models/change_event.rs b/link/link-common/src/subscription/models/change_event.rs index 396270947..eac86ffd2 100644 --- a/link/link-common/src/subscription/models/change_event.rs +++ b/link/link-common/src/subscription/models/change_event.rs @@ -1,11 +1,13 @@ +use std::collections::HashMap; + use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; -use std::collections::HashMap; use super::batch::BatchControl; -use crate::connection::models::ServerMessage; -use crate::models::KalamCellValue; -use crate::models::SchemaField; +use crate::{ + connection::models::ServerMessage, + models::{KalamCellValue, SchemaField}, +}; /// Type of change that occurred in the database #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] diff --git a/link/link-common/src/subscription/models/subscription_info.rs b/link/link-common/src/subscription/models/subscription_info.rs index c945f7b50..febf719fc 100644 --- a/link/link-common/src/subscription/models/subscription_info.rs +++ b/link/link-common/src/subscription/models/subscription_info.rs @@ -3,9 +3,10 @@ //! [`SubscriptionInfo`] provides a read-only snapshot of an active //! subscription's state — useful for debugging, tests, and UI dashboards. -use crate::seq_id::SeqId; use serde::{Deserialize, Serialize}; +use crate::seq_id::SeqId; + /// Read-only snapshot of an active subscription's metadata. /// /// Returned by [`KalamLinkClient::subscriptions()`] and the WASM diff --git a/link/link-common/src/subscription/models/subscription_options.rs b/link/link-common/src/subscription/models/subscription_options.rs index e0b39d148..5aa7e1c53 100644 --- a/link/link-common/src/subscription/models/subscription_options.rs +++ b/link/link-common/src/subscription/models/subscription_options.rs @@ -6,7 +6,7 @@ use crate::seq_id::SeqId; /// /// These options control individual subscription behavior including: /// - Initial data loading (batch_size, last_rows) -/// - Data resumption after reconnection (from) +/// - Data resumption after reconnection (`from`) /// /// Aligned with backend's SubscriptionOptions in kalamdb-commons/websocket.rs. /// @@ -16,14 +16,11 @@ use crate::seq_id::SeqId; /// use kalam_client::{SeqId, SubscriptionOptions}; /// /// // Fetch last 100 rows with batch size of 50 -/// let options = SubscriptionOptions::default() -/// .with_batch_size(50) -/// .with_last_rows(100); +/// let options = SubscriptionOptions::default().with_batch_size(50).with_last_rows(100); /// /// // Resume from a specific sequence ID after reconnection /// let some_seq_id = SeqId::new(123); -/// let options = SubscriptionOptions::default() -/// .with_from(some_seq_id); +/// let options = SubscriptionOptions::default().with_from(some_seq_id); /// ``` #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct SubscriptionOptions { @@ -42,11 +39,6 @@ pub struct SubscriptionOptions { /// Typically set automatically during reconnection to resume from last received event. #[serde(skip_serializing_if = "Option::is_none", alias = "from_seq_id")] pub from: Option, - - /// Preserve the original snapshot boundary across reconnects while the - /// initial load is still in progress. - #[serde(skip_serializing_if = "Option::is_none")] - pub snapshot_end_seq: Option, } impl SubscriptionOptions { @@ -74,12 +66,6 @@ impl SubscriptionOptions { self } - /// Preserve the original snapshot boundary across reconnects. - pub fn with_snapshot_end_seq(mut self, seq_id: SeqId) -> Self { - self.snapshot_end_seq = Some(seq_id); - self - } - /// Resume from a specific sequence ID. /// Deprecated alias retained for backward compatibility. pub fn with_from_seq_id(self, seq_id: SeqId) -> Self { diff --git a/link/link-common/src/timeouts.rs b/link/link-common/src/timeouts.rs index 122661ef6..3b0f1dfab 100644 --- a/link/link-common/src/timeouts.rs +++ b/link/link-common/src/timeouts.rs @@ -12,9 +12,10 @@ use std::time::Duration; /// # Examples /// /// ```rust -/// use kalam_client::KalamLinkTimeouts; /// use std::time::Duration; /// +/// use kalam_client::KalamLinkTimeouts; +/// /// // Use defaults (recommended for most cases) /// let timeouts = KalamLinkTimeouts::default(); /// diff --git a/link/link-common/src/timestamp.rs b/link/link-common/src/timestamp.rs index 05fe4f8b7..ea24df844 100644 --- a/link/link-common/src/timestamp.rs +++ b/link/link-common/src/timestamp.rs @@ -4,9 +4,12 @@ //! to various human-readable formats. This module is exposed via WASM bindings //! for use in all language SDKs (TypeScript, Python, etc.). +use std::{ + fmt, + time::{SystemTime, UNIX_EPOCH}, +}; + use serde::{Deserialize, Serialize}; -use std::fmt; -use std::time::{SystemTime, UNIX_EPOCH}; const MILLIS_PER_SECOND: i64 = 1_000; const MILLIS_PER_MINUTE: i64 = 60 * MILLIS_PER_SECOND; @@ -281,7 +284,7 @@ impl fmt::Display for TimestampFormat { /// # Examples /// /// ```rust -/// use kalam_client::timestamp::{TimestampFormatter, TimestampFormat}; +/// use kalam_client::timestamp::{TimestampFormat, TimestampFormatter}; /// /// let formatter = TimestampFormatter::new(TimestampFormat::Iso8601); /// let formatted = formatter.format(Some(1734211234567)); diff --git a/link/link-common/src/wasm/client.rs b/link/link-common/src/wasm/client.rs index b49a95f15..9921fa970 100644 --- a/link/link-common/src/wasm/client.rs +++ b/link/link-common/src/wasm/client.rs @@ -1,31 +1,43 @@ -use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::rc::Rc; +use std::{ + cell::{Cell, RefCell}, + collections::HashMap, + rc::Rc, +}; -use wasm_bindgen::prelude::*; -use wasm_bindgen::JsCast; +use serde::Serialize; +use wasm_bindgen::{prelude::*, JsCast}; use wasm_bindgen_futures::JsFuture; use web_sys::{CloseEvent, ErrorEvent, MessageEvent, WebSocket}; +use super::{ + auth::WasmAuthProvider, + helpers::{ + create_promise, decode_ws_binary_payload, decode_ws_message, send_ws_message, + serialize_json_to_js_value, subscription_hash, + }, + reconnect::{self, reconnect_internal_with_auth, resubscribe_all}, + state::{ + callback_payload, filter_subscription_event, track_subscription_checkpoint, + SubscriptionCallbackMode, SubscriptionState, WasmLiveRowsOptions, + }, + validation::{ + quote_table_name, validate_column_name, validate_row_id, validate_sql_identifier, + }, + wasm_debug_log, +}; use crate::models::{ - ClientMessage, ConnectionOptions, QueryRequest, SerializationType, ServerMessage, - SubscriptionOptions, SubscriptionRequest, + ClientMessage, ConnectionOptions, SerializationType, ServerMessage, SubscriptionOptions, + SubscriptionRequest, }; -use super::auth::WasmAuthProvider; -use super::helpers::{ - create_promise, decode_ws_binary_payload, decode_ws_message, send_ws_message, - serialize_json_to_js_value, subscription_hash, -}; -use super::reconnect::{self, reconnect_internal_with_auth, resubscribe_all}; -use super::state::{ - callback_payload, filter_subscription_event, track_subscription_checkpoint, - SubscriptionCallbackMode, SubscriptionState, WasmLiveRowsOptions, -}; -use super::validation::{ - quote_table_name, validate_column_name, validate_row_id, validate_sql_identifier, -}; -use super::wasm_debug_log; +#[derive(Serialize)] +struct BorrowedQueryRequest<'a> { + sql: &'a str, + #[serde(skip_serializing_if = "Option::is_none")] + params: Option<&'a [serde_json::Value]>, + #[serde(default, skip_serializing_if = "Option::is_none")] + namespace_id: Option<&'a str>, +} /// WASM-compatible KalamDB client with auto-reconnection support /// @@ -33,7 +45,8 @@ use super::wasm_debug_log; /// - Basic Auth: `new KalamClient(url, username, password)` /// - JWT Token: `KalamClient.withJwt(url, token)` /// - Anonymous: `KalamClient.anonymous(url)` -/// - Dynamic Auth: `KalamClient.anonymous(url)` + `setAuthProvider(async () => ({ jwt: { token } }))` +/// - Dynamic Auth: `KalamClient.anonymous(url)` + `setAuthProvider(async () => ({ jwt: { token } +/// }))` /// /// # Example (JavaScript) /// ```js @@ -247,7 +260,7 @@ impl SubscriptionDispatch { } } -fn subscription_id_from_server_message(event: &ServerMessage) -> Option { +fn subscription_id_from_server_message(event: &ServerMessage) -> Option<&str> { match event { ServerMessage::SubscriptionAck { subscription_id, @@ -258,7 +271,7 @@ fn subscription_id_from_server_message(event: &ServerMessage) -> Option "KalamClient: Parsed SubscriptionAck - id: {}, total_rows: {}", subscription_id, _total_rows )); - Some(subscription_id.clone()) + Some(subscription_id.as_str()) }, ServerMessage::InitialDataBatch { subscription_id, @@ -271,7 +284,7 @@ fn subscription_id_from_server_message(event: &ServerMessage) -> Option _rows.len(), _batch_control.status )); - Some(subscription_id.clone()) + Some(subscription_id.as_str()) }, ServerMessage::Change { subscription_id, @@ -285,7 +298,7 @@ fn subscription_id_from_server_message(event: &ServerMessage) -> Option _change_type, _rows.as_ref().map(|value| value.len()) )); - Some(subscription_id.clone()) + Some(subscription_id.as_str()) }, ServerMessage::Error { subscription_id, @@ -297,21 +310,21 @@ fn subscription_id_from_server_message(event: &ServerMessage) -> Option "KalamClient: Parsed Error - id: {}, code: {}, msg: {}", subscription_id, _code, _message )); - Some(subscription_id.clone()) + Some(subscription_id.as_str()) }, _ => None, } } -fn resolve_subscription_key( - subscription_id: &str, +fn resolve_subscription_key<'a>( + subscription_id: &'a str, subscriptions: &HashMap, -) -> Option { +) -> Option<&'a str> { // The server echoes the exact subscription_id the client sent, so an exact // match is always correct. The previous ends_with() fallback was unsafe with // multiple concurrent subscriptions because it could match the wrong entry. if subscriptions.contains_key(subscription_id) { - Some(subscription_id.to_string()) + Some(subscription_id) } else { None } @@ -348,7 +361,7 @@ fn dispatch_subscription_server_message( { let mut subs = subscriptions.borrow_mut(); - if let Some(state) = subs.get_mut(&client_id) { + if let Some(state) = subs.get_mut(client_id) { callback = Some(state.callback.clone()); if let Some(filtered_event) = filter_subscription_event(&state.options, event) { track_subscription_checkpoint(&mut state.last_seq_id, &filtered_event); @@ -381,7 +394,7 @@ fn dispatch_subscription_server_message( } if remove_state { - subs.remove(&client_id); + subs.remove(client_id); } } @@ -1015,7 +1028,6 @@ impl KalamClient { self.connection_options.borrow_mut().ws_lazy_connect = lazy; } - /// /// # Returns /// Promise that resolves when connection is established and authenticated pub async fn connect(&mut self) -> Result<(), JsValue> { @@ -1064,7 +1076,8 @@ impl KalamClient { if matches!(resolved_auth, WasmAuthProvider::Basic { .. }) { return Err(JsValue::from_str( - "WebSocket authentication requires a JWT token. Use KalamClient.withJwt, login first, or set an authProvider.", + "WebSocket authentication requires a JWT token. Use KalamClient.withJwt, login \ + first, or set an authProvider.", )); } @@ -1221,7 +1234,8 @@ impl KalamClient { // Set up auto-reconnect onclose handler self.setup_auto_reconnect(&ws); - // T063K: Implement WebSocket onmessage handler to parse events and invoke registered callbacks + // T063K: Implement WebSocket onmessage handler to parse events and invoke registered + // callbacks let subscriptions = Rc::clone(&self.subscription_state); let auth_resolve_clone = auth_resolve.clone(); let auth_reject_clone2 = auth_reject.clone(); @@ -1522,7 +1536,8 @@ impl KalamClient { validate_row_id(&row_id)?; // T063H: Implement using fetch API to execute DELETE statement via /v1/api/sql - // Security: Quote table name (handling namespace.table format) and use parameterized-style value + // Security: Quote table name (handling namespace.table format) and use parameterized-style + // value let sql = format!( "DELETE FROM {} WHERE id = '{}'", quote_table_name(&table_name), @@ -1759,7 +1774,8 @@ impl KalamClient { WasmAuthProvider::Basic { username, password } => (username.clone(), password.clone()), _ => { return Err(JsValue::from_str( - "login() requires user/password credentials. Create client with new KalamClient(url, user, password)", + "login() requires user/password credentials. Create client with new \ + KalamClient(url, user, password)", )) }, }; @@ -1854,7 +1870,8 @@ impl KalamClient { { if query_resp.is_token_expired() { wasm_debug_log!( - "KalamClient: TOKEN_EXPIRED detected — reauthenticating and retrying query", + "KalamClient: TOKEN_EXPIRED detected — reauthenticating and retrying \ + query", ); self.reauthenticate_for_http().await?; return self.execute_sql_http(sql, ¶ms).await; @@ -1870,7 +1887,8 @@ impl KalamClient { { if query_resp.is_token_expired() { wasm_debug_log!( - "KalamClient: TOKEN_EXPIRED detected in HTTP error — reauthenticating and retrying query", + "KalamClient: TOKEN_EXPIRED detected in HTTP error — \ + reauthenticating and retrying query", ); self.reauthenticate_for_http().await?; return self.execute_sql_http(sql, ¶ms).await; @@ -1889,9 +1907,9 @@ impl KalamClient { sql: &str, params: &Option>, ) -> Result { - let body = QueryRequest { - sql: sql.to_string(), - params: params.clone(), + let body = BorrowedQueryRequest { + sql, + params: params.as_deref(), namespace_id: None, }; let body_str = serde_json::to_string(&body) diff --git a/link/link-common/src/wasm/helpers.rs b/link/link-common/src/wasm/helpers.rs index 82496a1d7..7bdbc10b6 100644 --- a/link/link-common/src/wasm/helpers.rs +++ b/link/link-common/src/wasm/helpers.rs @@ -1,15 +1,15 @@ -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; +use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, +}; use serde::Serialize; -use wasm_bindgen::prelude::*; -use wasm_bindgen::JsCast; +use wasm_bindgen::{prelude::*, JsCast}; use wasm_bindgen_futures::JsFuture; use web_sys::{Headers, MessageEvent, Request, RequestInit, RequestMode, Response}; use super::wasm_debug_log; -use crate::compression; -use crate::models::SerializationType; +use crate::{compression, models::SerializationType}; #[inline] pub(crate) fn ws_url_from_http_opts( @@ -178,7 +178,8 @@ pub(crate) fn decode_ws_message(e: &MessageEvent) -> Option { let data = e.data(); if data.is_instance_of::() { wasm_debug_log!( - "KalamClient: Received Blob message - binary mode may be misconfigured. Attempting to read as text.", + "KalamClient: Received Blob message - binary mode may be misconfigured. Attempting to \ + read as text.", ); return data.as_string(); } diff --git a/link/link-common/src/wasm/reconnect.rs b/link/link-common/src/wasm/reconnect.rs index aeddc71c0..c5ff6721f 100644 --- a/link/link-common/src/wasm/reconnect.rs +++ b/link/link-common/src/wasm/reconnect.rs @@ -1,22 +1,27 @@ -use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::rc::Rc; +use std::{ + cell::{Cell, RefCell}, + collections::HashMap, + rc::Rc, +}; -use wasm_bindgen::prelude::{Closure, JsValue}; -use wasm_bindgen::JsCast; +use wasm_bindgen::{ + prelude::{Closure, JsValue}, + JsCast, +}; use wasm_bindgen_futures::JsFuture; use web_sys::{ErrorEvent, MessageEvent, WebSocket}; +use super::{ + auth::WasmAuthProvider, + helpers::{create_promise, send_ws_message, ws_url_from_http_opts}, + state::SubscriptionState, + wasm_debug_log, +}; use crate::models::{ ClientMessage, ConnectionOptions, ProtocolOptions, SerializationType, ServerMessage, SubscriptionRequest, }; -use super::auth::WasmAuthProvider; -use super::helpers::{create_promise, send_ws_message, ws_url_from_http_opts}; -use super::state::SubscriptionState; -use super::wasm_debug_log; - /// Resolve a `WasmAuthProvider` from an optional JS async callback. /// /// If `auth_provider_cb` is `Some`, the callback is invoked and the returned @@ -70,7 +75,8 @@ pub(crate) async fn reconnect_internal_with_auth( if matches!(resolved_auth, WasmAuthProvider::Basic { .. }) { return Err(JsValue::from_str( - "WebSocket authentication requires a JWT token. Use KalamClient.withJwt, login first, or set an authProvider.", + "WebSocket authentication requires a JWT token. Use KalamClient.withJwt, login first, \ + or set an authProvider.", )); } diff --git a/link/link-common/src/wasm/state.rs b/link/link-common/src/wasm/state.rs index 067d51990..19f6b5b1b 100644 --- a/link/link-common/src/wasm/state.rs +++ b/link/link-common/src/wasm/state.rs @@ -1,8 +1,9 @@ -use crate::models::SubscriptionOptions; -use crate::models::{ChangeEvent, ServerMessage}; -use crate::seq_id::SeqId; -use crate::seq_tracking; -use crate::subscription::{LiveRowsConfig, LiveRowsMaterializer}; +use crate::{ + models::{ChangeEvent, ServerMessage, SubscriptionOptions}, + seq_id::SeqId, + seq_tracking, + subscription::{LiveRowsConfig, LiveRowsMaterializer}, +}; #[derive(Clone)] pub(crate) enum SubscriptionCallbackMode { diff --git a/link/link-common/src/wasm/timestamp.rs b/link/link-common/src/wasm/timestamp.rs index f9b108610..3fa4c2ad8 100644 --- a/link/link-common/src/wasm/timestamp.rs +++ b/link/link-common/src/wasm/timestamp.rs @@ -27,7 +27,8 @@ impl WasmTimestampFormatter { /// Create a formatter with a specific format /// /// # Arguments - /// * `format` - One of: "iso8601", "iso8601-date", "iso8601-datetime", "unix-ms", "unix-sec", "relative", "rfc2822", "rfc3339" + /// * `format` - One of: "iso8601", "iso8601-date", "iso8601-datetime", "unix-ms", "unix-sec", + /// "relative", "rfc2822", "rfc3339" #[wasm_bindgen(js_name = withFormat)] pub fn with_format(format: &str) -> Result { let fmt = match format { diff --git a/link/link-common/src/wasm/validation.rs b/link/link-common/src/wasm/validation.rs index e30a14cb5..1c0e0ebc9 100644 --- a/link/link-common/src/wasm/validation.rs +++ b/link/link-common/src/wasm/validation.rs @@ -24,7 +24,8 @@ pub(crate) fn validate_sql_identifier(name: &str, context: &str) -> Result<(), J for c in name.chars() { if !c.is_ascii_alphanumeric() && c != '_' && c != '.' { return Err(JsValue::from_str(&format!( - "{} contains invalid character '{}'. Only letters, numbers, underscores, and dots allowed", + "{} contains invalid character '{}'. Only letters, numbers, underscores, and dots \ + allowed", context, c ))); } diff --git a/link/sdks/typescript/client/src/types.ts b/link/sdks/typescript/client/src/types.ts index 3c2f11a5e..428a90054 100644 --- a/link/sdks/typescript/client/src/types.ts +++ b/link/sdks/typescript/client/src/types.ts @@ -41,7 +41,6 @@ export interface BatchControl { has_more: boolean; status: BatchStatus; last_seq_id?: WireSeqId; - snapshot_end_seq?: WireSeqId; } export type ChangeTypeRaw = 'insert' | 'update' | 'delete'; diff --git a/nextest.toml b/nextest.toml index eccbee8be..b24a1b2ff 100644 --- a/nextest.toml +++ b/nextest.toml @@ -3,38 +3,209 @@ retries = 3 test-threads = 15 +[test-groups] +# Keep these known noisy shared-cluster/perf tests from overlapping with each +# other, while still allowing unrelated tests to use the remaining default +# concurrency budget. +stateful-heavy = { max-threads = 1 } + [[profile.default.overrides]] filter = 'test(test_scenario_08_subscription_reconnect)' slow-timeout = { period = "60s", terminate-after = 2 } [[profile.default.overrides]] filter = 'test(test_setup_complete_environment)' -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(idle_autocommit_transaction_checks_add_no_extra_allocations)' # This perf/allocation regression is stable in isolation but noisy under full workspace contention. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(autocommit_read_write_latency_regression_stays_within_five_percent)' # This perf regression compares two nearly identical code paths and should run in isolation. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(e2e_perf_sequential_insert_100)' # This PG perf check is stable in isolation but noisy under full-suite contention. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(e2e_perf_cross_verify_latency)' # This latency check is sensitive to concurrent heavy tests and should run in isolation. -threads-required = 15 +test-group = "stateful-heavy" [[profile.default.overrides]] filter = 'test(e2e_perf_local_memory_stays_bounded_under_batch_insert_and_scan)' # This memory-bound perf check is meaningful only without competing suite load. -threads-required = 15 +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_http_consume_direct_multi_user_publishers_no_missing_changes)' +# These topic smoke tests are stable in isolation but can contend under full workspace load. +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_http_consume_preserves_impersonated_user_and_payloads)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_consume_option_matrix_start_batch_auto_ack_modes)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_high_load_two_consumers_same_group_single_delivery)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_four_consumers_same_group_no_duplicates)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_ack_failure_recovery_no_message_loss_with_latency)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_fan_out_different_groups_receive_all)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_event_counter_integrity_through_multiple_outages)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_gradual_latency_ramp_forces_reconnect_then_recovers)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_heavy_write_burst_during_outage_all_delivered)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_large_initial_snapshot_survives_repeated_outages)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_latency_spike_during_initial_snapshot_recovers)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_proxy_server_down_during_live_updates_resumes)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_loading_snapshot_with_live_writes_resumes_without_duplicate_rows)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_proxy_three_subscriptions_resume_after_server_bounce)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_shared_connection_recovers_subscriptions_in_different_stages)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_cli_syntax_error_handling)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_connection_timeout_option)' +# Pure options test, but full-suite CLI subprocess load can make nextest assign leaked output +# handles here; keep it isolated from subprocess-heavy tests. +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_rapid_connect_disconnect)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_concurrent_websocket_subscriptions)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_test_leader_read_shared_table)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(cluster_test_table_identity_updates)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_all_datatypes_user_shared_stream)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(cluster_test_table_identity_mixed_operations)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(cluster_test_ws_follower_receives_leader_changes)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(cluster_test_table_identity_user_tables)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_security_regular_user_cannot_impersonate_privileged_users_in_batch)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_as_user_chat_delete_flow)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_insert_returning_seq_multi_row)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_rpc_login_nonexistent_user_matches_wrong_password_response)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_security_private_shared_table_blocked_in_batch)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_security_system_tables_blocked_in_batch)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_storage_custom_templates)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_storage_check_dba_access)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_storage_check_authorization)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_consume_update_events)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(test_topic_consume_offset_persistence)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(smoke_export_download_forbidden_for_other_user)' +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(cluster_test_leader_only_flush_jobs)' +# Uses system.jobs queries — must not race with other tests' flush jobs or background scheduler. +test-group = "stateful-heavy" + +[[profile.default.overrides]] +filter = 'test(cluster_test_jobs_table_consistency)' +# Queries global system.jobs count — must not race with concurrent tests creating jobs. +test-group = "stateful-heavy" # [[profile.default.overrides]] # filter = 'package(kalamdb-server)' diff --git a/pg/crates/kalam-pg-api/src/request.rs b/pg/crates/kalam-pg-api/src/request.rs index 5b84cf4ee..d13e2f415 100644 --- a/pg/crates/kalam-pg-api/src/request.rs +++ b/pg/crates/kalam-pg-api/src/request.rs @@ -1,8 +1,10 @@ -use crate::filter::ScanFilter; -use crate::session::{RemoteSessionContext, TenantContext}; use kalam_pg_common::KalamPgError; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{models::rows::Row, TableId, TableType}; + +use crate::{ + filter::ScanFilter, + session::{RemoteSessionContext, TenantContext}, +}; fn validate_write_scope( table_type: TableType, diff --git a/pg/crates/kalam-pg-api/src/traits.rs b/pg/crates/kalam-pg-api/src/traits.rs index 830f0d27a..4444bdd18 100644 --- a/pg/crates/kalam-pg-api/src/traits.rs +++ b/pg/crates/kalam-pg-api/src/traits.rs @@ -1,8 +1,11 @@ -use crate::request::{DeleteRequest, InsertRequest, ScanRequest, UpdateRequest}; -use crate::response::{MutationResponse, ScanResponse}; use async_trait::async_trait; use kalam_pg_common::KalamPgError; +use crate::{ + request::{DeleteRequest, InsertRequest, ScanRequest, UpdateRequest}, + response::{MutationResponse, ScanResponse}, +}; + /// Backend executor abstraction for remote mode. #[async_trait] pub trait KalamBackendExecutor: Send + Sync { diff --git a/pg/crates/kalam-pg-api/tests/request_validation.rs b/pg/crates/kalam-pg-api/tests/request_validation.rs index 4d85106ab..7757b06a7 100644 --- a/pg/crates/kalam-pg-api/tests/request_validation.rs +++ b/pg/crates/kalam-pg-api/tests/request_validation.rs @@ -1,8 +1,9 @@ use datafusion_common::ScalarValue; use kalam_pg_api::{InsertRequest, RemoteSessionContext, ScanRequest, TenantContext}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{NamespaceId, TableName, UserId}; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{ + models::{rows::Row, NamespaceId, TableName, UserId}, + TableId, TableType, +}; #[test] fn user_table_scan_requires_user_id() { diff --git a/pg/crates/kalam-pg-client/Cargo.toml b/pg/crates/kalam-pg-client/Cargo.toml index 351fd3912..e9145d266 100644 --- a/pg/crates/kalam-pg-client/Cargo.toml +++ b/pg/crates/kalam-pg-client/Cargo.toml @@ -25,6 +25,7 @@ tokio = { workspace = true } [dev-dependencies] arrow = { workspace = true } async-trait = { workspace = true } +kalamdb-commons = { workspace = true, default-features = false } kalamdb-pg = { path = "../../../backend/crates/kalamdb-pg", features = ["server"] } ntest = { workspace = true } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/pg/crates/kalam-pg-client/src/lib.rs b/pg/crates/kalam-pg-client/src/lib.rs index 3f70a65eb..9bbb616d4 100644 --- a/pg/crates/kalam-pg-client/src/lib.rs +++ b/pg/crates/kalam-pg-client/src/lib.rs @@ -1,5 +1,4 @@ -use std::io::Cursor; -use std::time::Duration; +use std::{future::Future, io::Cursor, time::Duration}; use arrow::record_batch::RecordBatch; use arrow_ipc::reader::StreamReader; @@ -7,14 +6,17 @@ use kalam_pg_api::{MutationResponse, ScanResponse}; use kalam_pg_common::{KalamPgError, RemoteAuthMode, RemoteServerConfig}; use kalamdb_pg::{ BeginTransactionRequest, CloseSessionRequest, CommitTransactionRequest, DeleteRpcRequest, - ExecuteQueryRpcRequest, ExecuteSqlRpcRequest, InsertRpcRequest, OpenSessionRequest, - PgServiceClient, PingRequest, RollbackTransactionRequest, ScanFilterExpression, ScanRpcRequest, - UpdateRpcRequest, + DeleteRpcResponse, ExecuteQueryRpcRequest, ExecuteSqlRpcRequest, InsertRpcRequest, + InsertRpcResponse, OpenSessionRequest, PgServiceClient, PingRequest, + RollbackTransactionRequest, ScanFilterExpression, ScanRpcRequest, ScanRpcResponse, + UpdateRpcRequest, UpdateRpcResponse, }; #[cfg(feature = "tls")] use tonic::transport::{Certificate, ClientTlsConfig, Identity}; -use tonic::transport::{Channel, Endpoint}; -use tonic::Request; +use tonic::{ + transport::{Channel, Endpoint}, + Request, Response, Status, +}; /// Load PEM material from either an inline PEM string or a file path. fn load_pem(value: &str) -> Result, String> { @@ -31,14 +33,12 @@ fn build_basic_auth_metadata( password: &str, ) -> Result, KalamPgError> { use base64::Engine; - let encoded = base64::engine::general_purpose::STANDARD.encode(format!("{}:{}", user, password)); + let encoded = + base64::engine::general_purpose::STANDARD.encode(format!("{}:{}", user, password)); format!("Basic {}", encoded) .parse::>() .map_err(|error| { - KalamPgError::Validation(format!( - "failed to build Basic auth metadata: {}", - error - )) + KalamPgError::Validation(format!("failed to build Basic auth metadata: {}", error)) }) } @@ -204,8 +204,8 @@ impl RemoteKalamClient { KalamPgError::ServerUnreachable(server_addr.to_string()) } else if detail.contains("timed out") || detail.contains("deadline") { KalamPgError::Execution(format!( - "connection to KalamDB server at {server_addr} timed out – \ - check the server is running and the port is correct" + "connection to KalamDB server at {server_addr} timed out – check the server is \ + running and the port is correct" )) } else if detail.contains("certificate") || detail.contains("tls") || detail.contains("TLS") { @@ -242,7 +242,8 @@ impl RemoteKalamClient { KalamPgError::Execution(format!("not found: {msg}")) }, Code::Unauthenticated => KalamPgError::Execution( - "authentication failed – check auth_mode, auth_header, or account_login credentials in CREATE SERVER OPTIONS" + "authentication failed – check auth_mode, auth_header, or account_login \ + credentials in CREATE SERVER OPTIONS" .to_string(), ), Code::PermissionDenied => { @@ -272,6 +273,123 @@ impl RemoteKalamClient { } } + fn leader_redirect_hint(status: &tonic::Status) -> Option<&str> { + if !matches!(status.code(), tonic::Code::Internal | tonic::Code::FailedPrecondition) { + return None; + } + + let marker = "Leader: "; + let message = status.message(); + let index = message.find(marker)?; + let leader = message[index + marker.len()..].trim(); + + if leader.is_empty() || leader.eq_ignore_ascii_case("unknown") { + None + } else { + Some(leader) + } + } + + fn parse_leader_authority(leader_hint: &str) -> Option<(String, Option)> { + let mut leader_hint = leader_hint.trim(); + + while let Some(stripped) = + leader_hint.strip_prefix("Some(").and_then(|value| value.strip_suffix(')')) + { + leader_hint = stripped.trim(); + } + + leader_hint = leader_hint.trim_matches(|ch| ch == '"' || ch == '\''); + + let authority = leader_hint + .split_once("://") + .map(|(_, value)| value) + .unwrap_or(leader_hint) + .split('/') + .next() + .unwrap_or(leader_hint) + .rsplit('@') + .next() + .unwrap_or(leader_hint) + .trim_matches(|ch| ch == '"' || ch == '\'') + .trim(); + + if authority.is_empty() { + return None; + } + + if let Some(rest) = authority.strip_prefix('[') { + let end = rest.find(']')?; + let host = rest[..end].trim(); + if host.is_empty() { + return None; + } + let port = rest[end + 1..] + .strip_prefix(':') + .map(|value| value.trim_matches(|ch| ch == '"' || ch == '\'')) + .and_then(|value| value.parse::().ok()); + return Some((host.to_string(), port)); + } + + if let Some((host, port)) = authority.rsplit_once(':') { + if !host.contains(':') { + return Some(( + host.trim().to_string(), + port.trim_matches(|ch| ch == '"' || ch == '\'').parse::().ok(), + )); + } + } + + Some((authority.to_string(), None)) + } + + fn leader_rpc_candidate_ports(leader_api_port: Option, current_rpc_port: u16) -> Vec { + let mut ports = Vec::with_capacity(2); + + if let Some(api_port) = leader_api_port { + let mapped_port = match api_port { + 8080 => Some(9188), + _ => api_port.checked_add(1000), + }; + + if let Some(port) = mapped_port { + ports.push(port); + } + } + + if !ports.contains(¤t_rpc_port) { + ports.push(current_rpc_port); + } + + ports + } + + async fn reconnect_to_leader(&self, status: &tonic::Status, session_id: &str) -> Option { + let leader_hint = Self::leader_redirect_hint(status)?; + let (host, leader_api_port) = Self::parse_leader_authority(leader_hint)?; + + for port in Self::leader_rpc_candidate_ports(leader_api_port, self.config.port) { + if host == self.config.host && port == self.config.port { + continue; + } + + let mut config = self.config.clone(); + config.host = host.clone(); + config.port = port; + + let client = match Self::connect(config).await { + Ok(client) => client, + Err(_) => continue, + }; + + if client.open_session(Some(session_id), None).await.is_ok() { + return Some(client); + } + } + + None + } + /// Build a TLS configuration from the remote server config. /// Supports both inline PEM strings and file paths. #[cfg(feature = "tls")] @@ -311,14 +429,47 @@ impl RemoteKalamClient { req } - pub async fn ping(&self) -> Result<(), KalamPgError> { - // Ping uses auth metadata directly (pre-session health check). - let request = self.authenticated_request(PingRequest {}); - let mut client = PgServiceClient::new(self.channel.clone()); - client - .ping(request) + fn grpc_client(&self) -> PgServiceClient { + PgServiceClient::new(self.channel.clone()) + } + + async fn call_with_request( + &self, + request: Request, + call: F, + ) -> Result + where + F: FnOnce(PgServiceClient, Request) -> Fut, + Fut: Future, Status>>, + { + let client = self.grpc_client(); + let response = call(client, request).await?; + Ok(response.into_inner()) + } + + async fn call_plain_status(&self, payload: T, call: F) -> Result + where + F: FnOnce(PgServiceClient, Request) -> Fut, + Fut: Future, Status>>, + { + self.call_with_request(Self::plain_request(payload), call).await + } + + async fn call_authenticated(&self, payload: T, call: F) -> Result + where + F: FnOnce(PgServiceClient, Request) -> Fut, + Fut: Future, Status>>, + { + self.call_with_request(self.authenticated_request(payload), call) .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))?; + .map_err(|status| Self::grpc_err(status, &self.server_addr)) + } + + pub async fn ping(&self) -> Result<(), KalamPgError> { + self.call_authenticated(PingRequest {}, |mut client, request| async move { + client.ping(request).await + }) + .await?; Ok(()) } @@ -329,23 +480,22 @@ impl RemoteKalamClient { session_id: Option<&str>, current_schema: Option<&str>, ) -> Result { - let request = self.authenticated_request(OpenSessionRequest { - session_id: session_id - .map(str::trim) - .filter(|value| !value.is_empty()) - .unwrap_or("") - .to_string(), - current_schema: current_schema - .map(str::trim) - .filter(|value| !value.is_empty()) - .map(ToOwned::to_owned), - }); - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .open_session(request) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let response = self + .call_authenticated( + OpenSessionRequest { + session_id: session_id + .map(str::trim) + .filter(|value| !value.is_empty()) + .unwrap_or("") + .to_string(), + current_schema: current_schema + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned), + }, + |mut client, request| async move { client.open_session(request).await }, + ) + .await?; Ok(RemoteSessionHandle { session_id: response.session_id, @@ -370,15 +520,77 @@ impl RemoteKalamClient { } async fn close_session_attempt(&self, session_id: &str) -> Result<(), tonic::Status> { - let mut client = PgServiceClient::new(self.channel.clone()); - client - .close_session(Self::plain_request(CloseSessionRequest { + self.call_plain_status( + CloseSessionRequest { session_id: session_id.trim().to_string(), - })) - .await?; + }, + |mut client, request| async move { client.close_session(request).await }, + ) + .await?; Ok(()) } + async fn scan_attempt( + &self, + payload: ScanRpcRequest, + ) -> Result { + self.call_plain_status( + payload, + |mut client, request| async move { client.scan(request).await }, + ) + .await + } + + async fn insert_attempt( + &self, + payload: InsertRpcRequest, + ) -> Result { + self.call_plain_status(payload, |mut client, request| async move { + client.insert(request).await + }) + .await + } + + async fn update_attempt( + &self, + payload: UpdateRpcRequest, + ) -> Result { + self.call_plain_status(payload, |mut client, request| async move { + client.update(request).await + }) + .await + } + + async fn delete_attempt( + &self, + payload: DeleteRpcRequest, + ) -> Result { + self.call_plain_status(payload, |mut client, request| async move { + client.delete(request).await + }) + .await + } + + async fn execute_sql_attempt( + &self, + payload: ExecuteSqlRpcRequest, + ) -> Result { + self.call_plain_status(payload, |mut client, request| async move { + client.execute_sql(request).await + }) + .await + } + + async fn execute_query_attempt( + &self, + payload: ExecuteQueryRpcRequest, + ) -> Result { + self.call_plain_status(payload, |mut client, request| async move { + client.execute_query(request).await + }) + .await + } + pub async fn scan( &self, namespace: &str, @@ -398,21 +610,29 @@ impl RemoteKalamClient { value, }) .collect(); - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .scan(Self::plain_request(ScanRpcRequest { - namespace: namespace.to_string(), - table_name: table_name.to_string(), - table_type: table_type.to_string(), - session_id: session_id.to_string(), - user_id: user_id.map(str::to_string), - columns, - limit, - filters: grpc_filters, - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let request = ScanRpcRequest { + namespace: namespace.to_string(), + table_name: table_name.to_string(), + table_type: table_type.to_string(), + session_id: session_id.to_string(), + user_id: user_id.map(str::to_string), + columns, + limit, + filters: grpc_filters, + }; + let response = match self.scan_attempt(request.clone()).await { + Ok(response) => response, + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .scan_attempt(request) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr))? + } else { + return Err(Self::grpc_err(status, &self.server_addr)); + } + }, + }; let batches = Self::decode_ipc_batches(&response.ipc_batches)?; Ok(ScanResponse::new(batches)) @@ -427,19 +647,27 @@ impl RemoteKalamClient { user_id: Option<&str>, rows_json: Vec, ) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .insert(Self::plain_request(InsertRpcRequest { - namespace: namespace.to_string(), - table_name: table_name.to_string(), - table_type: table_type.to_string(), - session_id: session_id.to_string(), - user_id: user_id.map(str::to_string), - rows_json, - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let request = InsertRpcRequest { + namespace: namespace.to_string(), + table_name: table_name.to_string(), + table_type: table_type.to_string(), + session_id: session_id.to_string(), + user_id: user_id.map(str::to_string), + rows_json, + }; + let response = match self.insert_attempt(request.clone()).await { + Ok(response) => response, + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .insert_attempt(request) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr))? + } else { + return Err(Self::grpc_err(status, &self.server_addr)); + } + }, + }; Ok(MutationResponse { affected_rows: response.affected_rows, @@ -456,20 +684,28 @@ impl RemoteKalamClient { pk_value: &str, updates_json: &str, ) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .update(Self::plain_request(UpdateRpcRequest { - namespace: namespace.to_string(), - table_name: table_name.to_string(), - table_type: table_type.to_string(), - session_id: session_id.to_string(), - user_id: user_id.map(str::to_string), - pk_value: pk_value.to_string(), - updates_json: updates_json.to_string(), - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let request = UpdateRpcRequest { + namespace: namespace.to_string(), + table_name: table_name.to_string(), + table_type: table_type.to_string(), + session_id: session_id.to_string(), + user_id: user_id.map(str::to_string), + pk_value: pk_value.to_string(), + updates_json: updates_json.to_string(), + }; + let response = match self.update_attempt(request.clone()).await { + Ok(response) => response, + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .update_attempt(request) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr))? + } else { + return Err(Self::grpc_err(status, &self.server_addr)); + } + }, + }; Ok(MutationResponse { affected_rows: response.affected_rows, @@ -485,19 +721,27 @@ impl RemoteKalamClient { user_id: Option<&str>, pk_value: &str, ) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .delete(Self::plain_request(DeleteRpcRequest { - namespace: namespace.to_string(), - table_name: table_name.to_string(), - table_type: table_type.to_string(), - session_id: session_id.to_string(), - user_id: user_id.map(str::to_string), - pk_value: pk_value.to_string(), - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let request = DeleteRpcRequest { + namespace: namespace.to_string(), + table_name: table_name.to_string(), + table_type: table_type.to_string(), + session_id: session_id.to_string(), + user_id: user_id.map(str::to_string), + pk_value: pk_value.to_string(), + }; + let response = match self.delete_attempt(request.clone()).await { + Ok(response) => response, + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .delete_attempt(request) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr))? + } else { + return Err(Self::grpc_err(status, &self.server_addr)); + } + }, + }; Ok(MutationResponse { affected_rows: response.affected_rows, @@ -506,14 +750,30 @@ impl RemoteKalamClient { /// Begin a new transaction within the given session. pub async fn begin_transaction(&self, session_id: &str) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .begin_transaction(Self::plain_request(BeginTransactionRequest { - session_id: session_id.to_string(), - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + match self.begin_transaction_attempt(session_id).await { + Ok(response) => Ok(response), + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .begin_transaction_attempt(session_id) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr)) + } else { + Err(Self::grpc_err(status, &self.server_addr)) + } + }, + } + } + + async fn begin_transaction_attempt(&self, session_id: &str) -> Result { + let response = self + .call_plain_status( + BeginTransactionRequest { + session_id: session_id.to_string(), + }, + |mut client, request| async move { client.begin_transaction(request).await }, + ) + .await?; Ok(response.transaction_id) } @@ -523,15 +783,35 @@ impl RemoteKalamClient { session_id: &str, transaction_id: &str, ) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .commit_transaction(Self::plain_request(CommitTransactionRequest { - session_id: session_id.to_string(), - transaction_id: transaction_id.to_string(), - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + match self.commit_transaction_attempt(session_id, transaction_id).await { + Ok(response) => Ok(response), + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .commit_transaction_attempt(session_id, transaction_id) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr)) + } else { + Err(Self::grpc_err(status, &self.server_addr)) + } + }, + } + } + + async fn commit_transaction_attempt( + &self, + session_id: &str, + transaction_id: &str, + ) -> Result { + let response = self + .call_plain_status( + CommitTransactionRequest { + session_id: session_id.to_string(), + transaction_id: transaction_id.to_string(), + }, + |mut client, request| async move { client.commit_transaction(request).await }, + ) + .await?; Ok(response.transaction_id) } @@ -543,6 +823,16 @@ impl RemoteKalamClient { ) -> Result { match self.rollback_transaction_attempt(session_id, transaction_id).await { Ok(response) => Ok(response), + Err(status) if Self::leader_redirect_hint(&status).is_some() => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .rollback_transaction_attempt(session_id, transaction_id) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr)) + } else { + Err(Self::grpc_err(status, &self.server_addr)) + } + }, Err(status) if Self::should_retry_cleanup_status(&status) => { let fresh_client = self.reconnect().await?; fresh_client @@ -559,28 +849,37 @@ impl RemoteKalamClient { session_id: &str, transaction_id: &str, ) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .rollback_transaction(Self::plain_request(RollbackTransactionRequest { - session_id: session_id.to_string(), - transaction_id: transaction_id.to_string(), - })) - .await? - .into_inner(); + let response = self + .call_plain_status( + RollbackTransactionRequest { + session_id: session_id.to_string(), + transaction_id: transaction_id.to_string(), + }, + |mut client, request| async move { client.rollback_transaction(request).await }, + ) + .await?; Ok(response.transaction_id) } /// Execute a DDL SQL statement on the KalamDB backend. pub async fn execute_sql(&self, sql: &str, session_id: &str) -> Result { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .execute_sql(Self::plain_request(ExecuteSqlRpcRequest { - sql: sql.to_string(), - session_id: session_id.to_string(), - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let request = ExecuteSqlRpcRequest { + sql: sql.to_string(), + session_id: session_id.to_string(), + }; + let response = match self.execute_sql_attempt(request.clone()).await { + Ok(response) => response, + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .execute_sql_attempt(request) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr))? + } else { + return Err(Self::grpc_err(status, &self.server_addr)); + } + }, + }; Ok(response.message) } @@ -590,15 +889,23 @@ impl RemoteKalamClient { sql: &str, session_id: &str, ) -> Result<(String, Vec), KalamPgError> { - let mut client = PgServiceClient::new(self.channel.clone()); - let response = client - .execute_query(Self::plain_request(ExecuteQueryRpcRequest { - sql: sql.to_string(), - session_id: session_id.to_string(), - })) - .await - .map_err(|status| Self::grpc_err(status, &self.server_addr))? - .into_inner(); + let request = ExecuteQueryRpcRequest { + sql: sql.to_string(), + session_id: session_id.to_string(), + }; + let response = match self.execute_query_attempt(request.clone()).await { + Ok(response) => response, + Err(status) => { + if let Some(leader_client) = self.reconnect_to_leader(&status, session_id).await { + leader_client + .execute_query_attempt(request) + .await + .map_err(|status| Self::grpc_err(status, &leader_client.server_addr))? + } else { + return Err(Self::grpc_err(status, &self.server_addr)); + } + }, + }; let batches = Self::decode_ipc_batches(&response.ipc_batches)?; let json_rows = Self::batches_to_json_rows(&batches); Ok((response.message, json_rows)) @@ -624,12 +931,14 @@ impl RemoteKalamClient { /// Serialize Arrow RecordBatches into a Vec of JSON object strings (one per row). fn batches_to_json_rows(batches: &[RecordBatch]) -> Vec { - use arrow::array::{ - Array, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, - Int8Array, LargeStringArray, StringArray, UInt16Array, UInt32Array, UInt64Array, - UInt8Array, + use arrow::{ + array::{ + Array, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, LargeStringArray, StringArray, UInt16Array, UInt32Array, + UInt64Array, UInt8Array, + }, + datatypes::DataType, }; - use arrow::datatypes::DataType; let mut rows = Vec::new(); for batch in batches { diff --git a/pg/crates/kalam-pg-client/tests/connectivity.rs b/pg/crates/kalam-pg-client/tests/connectivity.rs index 62fc403d1..08280ebc1 100644 --- a/pg/crates/kalam-pg-client/tests/connectivity.rs +++ b/pg/crates/kalam-pg-client/tests/connectivity.rs @@ -1,7 +1,8 @@ +use std::time::Duration; + use kalam_pg_client::RemoteKalamClient; use kalam_pg_common::{RemoteAuthMode, RemoteServerConfig}; use kalamdb_pg::{KalamPgService, PgServiceServer}; -use std::time::Duration; use tokio::net::TcpListener; /// Helper: start a gRPC PgService on an ephemeral port and return a connected client. @@ -30,18 +31,13 @@ async fn start_server_and_client() -> RemoteKalamClient { .expect("connect client") } - - #[tokio::test] #[ntest::timeout(10000)] async fn remote_client_connects_and_opens_session() { let client = start_server_and_client().await; client.ping().await.expect("ping"); - let session = client - .open_session(None, Some("tenant_app")) - .await - .expect("open session"); + let session = client.open_session(None, Some("tenant_app")).await.expect("open session"); assert!(!session.session_id.is_empty(), "server should issue a session ID"); assert_eq!(session.current_schema.as_deref(), Some("tenant_app")); @@ -88,7 +84,10 @@ async fn stale_transaction_auto_rollback_on_new_begin() { .await .expect("begin tx2 should auto-rollback stale tx1"); - client.commit_transaction(&session.session_id, &tx_id2).await.expect("commit tx2"); + client + .commit_transaction(&session.session_id, &tx_id2) + .await + .expect("commit tx2"); } /// Verify close_session removes the session from the server registry. @@ -145,9 +144,7 @@ async fn connect_with_timeout_fails_on_unreachable_server() { async fn account_login_sends_basic_auth_on_open_session() { // With gRPC-only auth, account_login sends Basic credentials as gRPC metadata // on open_session. The server validates them and issues a session handle. - let listener = TcpListener::bind("127.0.0.1:0") - .await - .expect("bind gRPC server"); + let listener = TcpListener::bind("127.0.0.1:0").await.expect("bind gRPC server"); let port = listener.local_addr().expect("gRPC local addr").port(); let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener); // No expected static header — the server should accept Basic auth. diff --git a/pg/crates/kalam-pg-client/tests/statement_round_trip.rs b/pg/crates/kalam-pg-client/tests/statement_round_trip.rs index 5dd3aa2ee..9390a4a92 100644 --- a/pg/crates/kalam-pg-client/tests/statement_round_trip.rs +++ b/pg/crates/kalam-pg-client/tests/statement_round_trip.rs @@ -4,21 +4,22 @@ //! a `RemoteKalamClient`, and exercises a full round-trip: //! client → gRPC → KalamPgService → OperationExecutor → response → client -use std::net::SocketAddr; -use std::sync::Arc; -use std::time::Duration; +use std::{net::SocketAddr, sync::Arc, time::Duration}; -use arrow::array::{Array, Int64Array, StringArray}; -use arrow::datatypes::{DataType, Field, Schema}; -use arrow::record_batch::RecordBatch; +use arrow::{ + array::{Array, Int64Array, StringArray}, + datatypes::{DataType, Field, Schema}, + record_batch::RecordBatch, +}; use async_trait::async_trait; use kalam_pg_client::RemoteKalamClient; use kalam_pg_common::RemoteServerConfig; +use kalamdb_commons::models::TransactionId; use kalamdb_pg::{ DeleteRequest, InsertRequest, KalamPgService, MutationResult, OperationExecutor, PgServiceServer, ScanRequest, ScanResult, UpdateRequest, }; -use tonic::Status; +use tonic::{Code, Status}; // --------------------------------------------------------------------------- // Mock executor @@ -70,14 +71,82 @@ impl OperationExecutor for MockExecutor { } } +struct NotLeaderExecutor { + leader_api_addr: String, + code: Code, +} + +impl NotLeaderExecutor { + fn status(&self) -> Status { + Status::new( + self.code, + format!( + "External error: Not leader for shard. Leader: Some(\"{}\")", + self.leader_api_addr + ), + ) + } +} + +#[async_trait] +impl OperationExecutor for NotLeaderExecutor { + async fn execute_scan(&self, _request: ScanRequest) -> Result { + Err(self.status()) + } + + async fn begin_transaction(&self, _session_id: &str) -> Result, Status> { + Err(self.status()) + } + + async fn commit_transaction( + &self, + _session_id: &str, + _transaction_id: &TransactionId, + ) -> Result, Status> { + Err(self.status()) + } + + async fn rollback_transaction( + &self, + _session_id: &str, + _transaction_id: &TransactionId, + ) -> Result, Status> { + Err(self.status()) + } + + async fn execute_insert(&self, _request: InsertRequest) -> Result { + Err(self.status()) + } + + async fn execute_update(&self, _request: UpdateRequest) -> Result { + Err(self.status()) + } + + async fn execute_delete(&self, _request: DeleteRequest) -> Result { + Err(self.status()) + } + + async fn execute_sql(&self, _sql: &str) -> Result { + Err(self.status()) + } + + async fn execute_query(&self, _sql: &str) -> Result<(String, Vec), Status> { + Err(self.status()) + } +} + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- /// Spin up a KalamPgService with the mock executor on the given port. async fn start_server(port: u16) { - let bind_addr: SocketAddr = format!("127.0.0.1:{port}").parse().expect("bind addr"); - let service = KalamPgService::new(false, None).with_operation_executor(Arc::new(MockExecutor)); + start_server_with_executor("127.0.0.1", port, Arc::new(MockExecutor)).await; +} + +async fn start_server_with_executor(host: &str, port: u16, executor: Arc) { + let bind_addr: SocketAddr = format!("{host}:{port}").parse().expect("bind addr"); + let service = KalamPgService::new(false, None).with_operation_executor(executor); tokio::spawn(async move { tonic::transport::Server::builder() @@ -91,8 +160,12 @@ async fn start_server(port: u16) { } async fn connect(port: u16) -> RemoteKalamClient { + connect_to("127.0.0.1", port).await +} + +async fn connect_to(host: &str, port: u16) -> RemoteKalamClient { RemoteKalamClient::connect(RemoteServerConfig { - host: "127.0.0.1".to_string(), + host: host.to_string(), port, ..Default::default() }) @@ -160,6 +233,251 @@ async fn scan_with_projection_and_limit() { assert!(!response.batches.is_empty()); } +#[tokio::test] +#[ntest::timeout(10000)] +async fn scan_follows_leader_redirect_hint() { + let host = "127.0.0.1"; + let follower_rpc_port = 39081; + let leader_rpc_port = 39083; + let leader_api_port = 38083; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::Internal, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let client = connect_to(host, follower_rpc_port).await; + let session = client.open_session(None, Some("app")).await.expect("open session"); + + let response = client + .scan("app", "messages", "shared", &session.session_id, None, vec![], None, vec![]) + .await + .expect("scan should follow leader redirect"); + + assert_eq!(response.batches.len(), 1); + assert_eq!(response.batches[0].num_rows(), 3); +} + +#[tokio::test] +#[ntest::timeout(10000)] +async fn execute_sql_follows_leader_redirect_hint() { + let host = "127.0.0.1"; + let follower_rpc_port = 39181; + let leader_rpc_port = 39183; + let leader_api_port = 38183; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::Internal, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let client = connect_to(host, follower_rpc_port).await; + let session = client.open_session(None, Some("app")).await.expect("open session"); + + let response = client + .execute_sql("ALTER TABLE app.messages ADD COLUMN name TEXT", &session.session_id) + .await + .expect("execute_sql should follow leader redirect"); + + assert_eq!(response, "executed: ALTER TABLE app.messages ADD COLUMN name TEXT"); +} + +#[tokio::test] +#[ntest::timeout(10000)] +async fn execute_query_follows_leader_redirect_hint() { + let host = "127.0.0.1"; + let follower_rpc_port = 39281; + let leader_rpc_port = 39283; + let leader_api_port = 38283; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::Internal, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let client = connect_to(host, follower_rpc_port).await; + let session = client.open_session(None, Some("app")).await.expect("open session"); + + let (message, rows) = client + .execute_query("SELECT 1", &session.session_id) + .await + .expect("execute_query should follow leader redirect"); + + assert_eq!(message, "executed: SELECT 1"); + assert!(rows.is_empty()); +} + +#[tokio::test] +#[ntest::timeout(10000)] +async fn insert_follows_leader_redirect_hint_from_failed_precondition() { + let host = "127.0.0.1"; + let follower_rpc_port = 39381; + let leader_rpc_port = 39383; + let leader_api_port = 38383; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::FailedPrecondition, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let client = connect_to(host, follower_rpc_port).await; + let session = client.open_session(None, Some("app")).await.expect("open session"); + + let response = client + .insert( + "app", + "messages", + "shared", + &session.session_id, + None, + vec![r#"{"id":"msg-1"}"#.to_string()], + ) + .await + .expect("insert should follow leader redirect"); + + assert_eq!(response.affected_rows, 1); +} + +#[tokio::test] +#[ntest::timeout(10000)] +async fn begin_transaction_follows_leader_redirect_hint() { + let host = "127.0.0.1"; + let follower_rpc_port = 39481; + let leader_rpc_port = 39483; + let leader_api_port = 38483; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::FailedPrecondition, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let client = connect_to(host, follower_rpc_port).await; + let session = client.open_session(None, Some("app")).await.expect("open session"); + + let transaction_id = client + .begin_transaction(&session.session_id) + .await + .expect("begin_transaction should follow leader redirect"); + + assert!(!transaction_id.is_empty()); +} + +#[tokio::test] +#[ntest::timeout(10000)] +async fn commit_transaction_follows_leader_redirect_hint() { + let host = "127.0.0.1"; + let follower_rpc_port = 39581; + let leader_rpc_port = 39583; + let leader_api_port = 38583; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::FailedPrecondition, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let follower_client = connect_to(host, follower_rpc_port).await; + let session = follower_client + .open_session(None, Some("app")) + .await + .expect("open follower session"); + + let leader_client = connect_to(host, leader_rpc_port).await; + leader_client + .open_session(Some(&session.session_id), Some("app")) + .await + .expect("mirror session on leader"); + let transaction_id = leader_client + .begin_transaction(&session.session_id) + .await + .expect("begin leader transaction"); + + let committed_id = follower_client + .commit_transaction(&session.session_id, &transaction_id) + .await + .expect("commit_transaction should follow leader redirect"); + + assert_eq!(committed_id, transaction_id); +} + +#[tokio::test] +#[ntest::timeout(10000)] +async fn rollback_transaction_follows_leader_redirect_hint() { + let host = "127.0.0.1"; + let follower_rpc_port = 39681; + let leader_rpc_port = 39683; + let leader_api_port = 38683; + + start_server_with_executor( + host, + follower_rpc_port, + Arc::new(NotLeaderExecutor { + leader_api_addr: format!("http://{host}:{leader_api_port}"), + code: Code::FailedPrecondition, + }), + ) + .await; + start_server_with_executor(host, leader_rpc_port, Arc::new(MockExecutor)).await; + + let follower_client = connect_to(host, follower_rpc_port).await; + let session = follower_client + .open_session(None, Some("app")) + .await + .expect("open follower session"); + + let leader_client = connect_to(host, leader_rpc_port).await; + leader_client + .open_session(Some(&session.session_id), Some("app")) + .await + .expect("mirror session on leader"); + let transaction_id = leader_client + .begin_transaction(&session.session_id) + .await + .expect("begin leader transaction"); + + let rolled_back_id = follower_client + .rollback_transaction(&session.session_id, &transaction_id) + .await + .expect("rollback_transaction should follow leader redirect"); + + assert_eq!(rolled_back_id, transaction_id); +} + #[tokio::test] #[ntest::timeout(10000)] async fn insert_single_row() { @@ -381,5 +699,8 @@ async fn transaction_rollback() { .await .expect("insert in tx"); - client.rollback_transaction(&session.session_id, &tx_id).await.expect("rollback tx"); + client + .rollback_transaction(&session.session_id, &tx_id) + .await + .expect("rollback tx"); } diff --git a/pg/crates/kalam-pg-common/src/config.rs b/pg/crates/kalam-pg-common/src/config.rs index e4f7caa28..cb3cfd087 100644 --- a/pg/crates/kalam-pg-common/src/config.rs +++ b/pg/crates/kalam-pg-common/src/config.rs @@ -1,16 +1,19 @@ -use crate::KalamPgError; use serde::{Deserialize, Serialize}; +use crate::KalamPgError; + /// Remote authentication mode for the PostgreSQL extension. #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, Default)] #[serde(rename_all = "snake_case")] pub enum RemoteAuthMode { - /// No application-level auth header. Use only for unsecured local development or mTLS-only deployments. + /// No application-level auth header. Use only for unsecured local development or mTLS-only + /// deployments. #[default] None, /// Send a pre-shared value in the gRPC `authorization` metadata header on `open_session`. StaticHeader, - /// Send Basic credentials (login_user/login_password) in the gRPC `authorization` metadata on `open_session`. + /// Send Basic credentials (login_user/login_password) in the gRPC `authorization` metadata on + /// `open_session`. AccountLogin, } @@ -98,7 +101,8 @@ impl RemoteServerConfig { RemoteAuthMode::None => { if self.login_user.is_some() || self.login_password.is_some() { return Err(KalamPgError::Validation( - "server options 'login_user' and 'login_password' require auth_mode 'account_login'" + "server options 'login_user' and 'login_password' require auth_mode \ + 'account_login'" .to_string(), )); } @@ -112,7 +116,8 @@ impl RemoteServerConfig { } if self.login_user.is_some() || self.login_password.is_some() { return Err(KalamPgError::Validation( - "server options 'login_user' and 'login_password' are only valid with auth_mode 'account_login'" + "server options 'login_user' and 'login_password' are only valid with \ + auth_mode 'account_login'" .to_string(), )); } @@ -120,7 +125,8 @@ impl RemoteServerConfig { RemoteAuthMode::AccountLogin => { if self.auth_header.is_some() { return Err(KalamPgError::Validation( - "server option 'auth_header' cannot be used when auth_mode is 'account_login'" + "server option 'auth_header' cannot be used when auth_mode is \ + 'account_login'" .to_string(), )); } @@ -130,15 +136,10 @@ impl RemoteServerConfig { .to_string(), )); } - if self - .login_password - .as_deref() - .map(str::trim) - .unwrap_or("") - .is_empty() - { + if self.login_password.as_deref().map(str::trim).unwrap_or("").is_empty() { return Err(KalamPgError::Validation( - "server option 'login_password' is required when auth_mode is 'account_login'" + "server option 'login_password' is required when auth_mode is \ + 'account_login'" .to_string(), )); } diff --git a/pg/crates/kalam-pg-common/src/error.rs b/pg/crates/kalam-pg-common/src/error.rs index 7b559d4e6..410d77541 100644 --- a/pg/crates/kalam-pg-common/src/error.rs +++ b/pg/crates/kalam-pg-common/src/error.rs @@ -11,8 +11,8 @@ pub enum KalamPgError { Unsupported(String), /// The KalamDB server could not be reached at the given address. #[error( - "KalamDB server is not running or unreachable at {0} – \ - start the server and verify the host/port in CREATE SERVER OPTIONS" + "KalamDB server is not running or unreachable at {0} – start the server and verify the \ + host/port in CREATE SERVER OPTIONS" )] ServerUnreachable(String), } diff --git a/pg/crates/kalam-pg-fdw/src/delete_input.rs b/pg/crates/kalam-pg-fdw/src/delete_input.rs index bf0990278..7cdaef521 100644 --- a/pg/crates/kalam-pg-fdw/src/delete_input.rs +++ b/pg/crates/kalam-pg-fdw/src/delete_input.rs @@ -1,5 +1,4 @@ -use kalamdb_commons::models::UserId; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{models::UserId, TableId, TableType}; /// Typed FDW delete input before it is converted into a backend request. #[derive(Debug, Clone)] diff --git a/pg/crates/kalam-pg-fdw/src/import_foreign_schema.rs b/pg/crates/kalam-pg-fdw/src/import_foreign_schema.rs index cd404e848..b2250e0be 100644 --- a/pg/crates/kalam-pg-fdw/src/import_foreign_schema.rs +++ b/pg/crates/kalam-pg-fdw/src/import_foreign_schema.rs @@ -1,7 +1,6 @@ use kalam_pg_common::{KalamPgError, DELETED_COLUMN, SEQ_COLUMN, USER_ID_COLUMN}; use kalam_pg_types::foreign_column_definition; -use kalamdb_commons::models::schemas::TableDefinition; -use kalamdb_commons::TableType; +use kalamdb_commons::{models::schemas::TableDefinition, TableType}; /// Build the SQL statement used by `IMPORT FOREIGN SCHEMA` for a Kalam table. pub fn create_foreign_table_sql( @@ -26,7 +25,8 @@ pub fn create_foreign_table_sql( columns.push(format!("\"{}\" BOOLEAN", DELETED_COLUMN)); Ok(format!( - "CREATE FOREIGN TABLE \"{}\".\"{}\" ({}) SERVER \"{}\" OPTIONS (namespace '{}', table '{}', table_type '{}')", + "CREATE FOREIGN TABLE \"{}\".\"{}\" ({}) SERVER \"{}\" OPTIONS (namespace '{}', table \ + '{}', table_type '{}')", quote_identifier(foreign_schema), quote_identifier(table_definition.table_name.as_str()), columns.join(", "), diff --git a/pg/crates/kalam-pg-fdw/src/insert_input.rs b/pg/crates/kalam-pg-fdw/src/insert_input.rs index 442ddedcd..a8bfa8036 100644 --- a/pg/crates/kalam-pg-fdw/src/insert_input.rs +++ b/pg/crates/kalam-pg-fdw/src/insert_input.rs @@ -1,6 +1,7 @@ -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{ + models::{rows::Row, UserId}, + TableId, TableType, +}; /// Typed FDW insert input before it is converted into a backend request. #[derive(Debug, Clone)] diff --git a/pg/crates/kalam-pg-fdw/src/lib.rs b/pg/crates/kalam-pg-fdw/src/lib.rs index 0616ff746..ae66a37b9 100644 --- a/pg/crates/kalam-pg-fdw/src/lib.rs +++ b/pg/crates/kalam-pg-fdw/src/lib.rs @@ -18,6 +18,8 @@ mod import_foreign_schema; pub use delete_input::DeleteInput; pub use delete_plan::DeletePlan; +#[cfg(feature = "import-foreign-schema")] +pub use import_foreign_schema::create_foreign_table_sql; pub use insert_input::InsertInput; pub use insert_plan::InsertPlan; pub use request_planner::RequestPlanner; @@ -28,6 +30,3 @@ pub use table_options::TableOptions; pub use update_input::UpdateInput; pub use update_plan::UpdatePlan; pub use virtual_column::VirtualColumn; - -#[cfg(feature = "import-foreign-schema")] -pub use import_foreign_schema::create_foreign_table_sql; diff --git a/pg/crates/kalam-pg-fdw/src/request_planner.rs b/pg/crates/kalam-pg-fdw/src/request_planner.rs index 0678d4b30..eee038a79 100644 --- a/pg/crates/kalam-pg-fdw/src/request_planner.rs +++ b/pg/crates/kalam-pg-fdw/src/request_planner.rs @@ -1,19 +1,15 @@ -use crate::delete_input::DeleteInput; -use crate::delete_plan::DeletePlan; -use crate::insert_input::InsertInput; -use crate::insert_plan::InsertPlan; -use crate::scan_input::ScanInput; -use crate::scan_plan::ScanPlan; -use crate::update_input::UpdateInput; -use crate::update_plan::UpdatePlan; -use crate::virtual_column::VirtualColumn; use datafusion_common::ScalarValue; use kalam_pg_api::{ DeleteRequest, InsertRequest, ScanFilter, ScanRequest, TenantContext, UpdateRequest, }; use kalam_pg_common::{KalamPgError, DELETED_COLUMN, SEQ_COLUMN, USER_ID_COLUMN}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; +use kalamdb_commons::models::{rows::Row, UserId}; + +use crate::{ + delete_input::DeleteInput, delete_plan::DeletePlan, insert_input::InsertInput, + insert_plan::InsertPlan, scan_input::ScanInput, scan_plan::ScanPlan, update_input::UpdateInput, + update_plan::UpdatePlan, virtual_column::VirtualColumn, +}; /// Backend-agnostic request planner used by the FDW layer. pub struct RequestPlanner; diff --git a/pg/crates/kalam-pg-fdw/src/scan_input.rs b/pg/crates/kalam-pg-fdw/src/scan_input.rs index bb0f9f196..c603cce2b 100644 --- a/pg/crates/kalam-pg-fdw/src/scan_input.rs +++ b/pg/crates/kalam-pg-fdw/src/scan_input.rs @@ -1,6 +1,5 @@ use kalam_pg_api::ScanFilter; -use kalamdb_commons::models::UserId; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{models::UserId, TableId, TableType}; /// Typed FDW scan input before it is converted into a backend request. #[derive(Debug, Clone)] diff --git a/pg/crates/kalam-pg-fdw/src/scan_plan.rs b/pg/crates/kalam-pg-fdw/src/scan_plan.rs index a7e54cf72..749c1ebe7 100644 --- a/pg/crates/kalam-pg-fdw/src/scan_plan.rs +++ b/pg/crates/kalam-pg-fdw/src/scan_plan.rs @@ -1,6 +1,7 @@ -use crate::virtual_column::VirtualColumn; use kalam_pg_api::ScanRequest; +use crate::virtual_column::VirtualColumn; + /// Planned FDW scan request plus FDW-only virtual-column metadata. #[derive(Debug, Clone)] pub struct ScanPlan { diff --git a/pg/crates/kalam-pg-fdw/src/server_options.rs b/pg/crates/kalam-pg-fdw/src/server_options.rs index 71b5afe94..89826fcc2 100644 --- a/pg/crates/kalam-pg-fdw/src/server_options.rs +++ b/pg/crates/kalam-pg-fdw/src/server_options.rs @@ -1,6 +1,7 @@ -use kalam_pg_common::{KalamPgError, RemoteAuthMode, RemoteServerConfig}; use std::collections::BTreeMap; +use kalam_pg_common::{KalamPgError, RemoteAuthMode, RemoteServerConfig}; + /// Parsed foreign-server options for the PostgreSQL extension. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ServerOptions { @@ -49,7 +50,8 @@ impl ServerOptions { "static_header" => Ok(RemoteAuthMode::StaticHeader), "account_login" => Ok(RemoteAuthMode::AccountLogin), _ => Err(KalamPgError::Validation(format!( - "server option 'auth_mode' must be one of: none, static_header, account_login (got '{}')", + "server option 'auth_mode' must be one of: none, static_header, account_login \ + (got '{}')", value ))), }) diff --git a/pg/crates/kalam-pg-fdw/src/update_input.rs b/pg/crates/kalam-pg-fdw/src/update_input.rs index cf53cb3b3..6e2da438a 100644 --- a/pg/crates/kalam-pg-fdw/src/update_input.rs +++ b/pg/crates/kalam-pg-fdw/src/update_input.rs @@ -1,6 +1,7 @@ -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{ + models::{rows::Row, UserId}, + TableId, TableType, +}; /// Typed FDW update input before it is converted into a backend request. #[derive(Debug, Clone)] diff --git a/pg/crates/kalam-pg-fdw/tests/import_foreign_schema.rs b/pg/crates/kalam-pg-fdw/tests/import_foreign_schema.rs index b9390ef56..72b4b7292 100644 --- a/pg/crates/kalam-pg-fdw/tests/import_foreign_schema.rs +++ b/pg/crates/kalam-pg-fdw/tests/import_foreign_schema.rs @@ -1,11 +1,13 @@ use kalam_pg_common::{DELETED_COLUMN, SEQ_COLUMN, USER_ID_COLUMN}; use kalam_pg_fdw::create_foreign_table_sql; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::schemas::{ - ColumnDefinition, SharedTableOptions, TableDefinition, TableOptions, +use kalamdb_commons::{ + models::{ + datatypes::KalamDataType, + schemas::{ColumnDefinition, SharedTableOptions, TableDefinition, TableOptions}, + NamespaceId, TableName, + }, + TableAccess, TableType, }; -use kalamdb_commons::models::{NamespaceId, TableName}; -use kalamdb_commons::{TableAccess, TableType}; #[test] fn import_sql_includes_virtual_columns_for_user_tables() { diff --git a/pg/crates/kalam-pg-fdw/tests/options.rs b/pg/crates/kalam-pg-fdw/tests/options.rs index 786537985..278146091 100644 --- a/pg/crates/kalam-pg-fdw/tests/options.rs +++ b/pg/crates/kalam-pg-fdw/tests/options.rs @@ -1,6 +1,7 @@ +use std::collections::BTreeMap; + use kalam_pg_common::RemoteAuthMode; use kalam_pg_fdw::ServerOptions; -use std::collections::BTreeMap; #[test] fn parses_remote_server_options() { @@ -13,10 +14,7 @@ fn parses_remote_server_options() { assert_eq!(parsed.remote.as_ref().expect("remote config").host, "127.0.0.1"); assert_eq!(parsed.remote.as_ref().expect("remote config").port, 50051); - assert_eq!( - parsed.remote.as_ref().expect("remote config").auth_mode, - RemoteAuthMode::None - ); + assert_eq!(parsed.remote.as_ref().expect("remote config").auth_mode, RemoteAuthMode::None); } #[test] @@ -150,7 +148,8 @@ fn account_login_requires_login_user() { ("login_password".to_string(), "super-secret".to_string()), ]); - let err = ServerOptions::parse(&options).expect_err("account_login without login_user should fail"); + let err = + ServerOptions::parse(&options).expect_err("account_login without login_user should fail"); assert!(err.to_string().contains("login_user")); } @@ -164,6 +163,7 @@ fn static_header_rejects_account_login_fields() { ("login_user".to_string(), "pg_dba".to_string()), ]); - let err = ServerOptions::parse(&options).expect_err("static_header with login fields should fail"); + let err = + ServerOptions::parse(&options).expect_err("static_header with login fields should fail"); assert!(err.to_string().contains("account_login")); } diff --git a/pg/crates/kalam-pg-fdw/tests/request_planner.rs b/pg/crates/kalam-pg-fdw/tests/request_planner.rs index a287416e9..43bd5ab57 100644 --- a/pg/crates/kalam-pg-fdw/tests/request_planner.rs +++ b/pg/crates/kalam-pg-fdw/tests/request_planner.rs @@ -4,9 +4,10 @@ use kalam_pg_common::{DELETED_COLUMN, SEQ_COLUMN, USER_ID_COLUMN}; use kalam_pg_fdw::{ DeleteInput, InsertInput, RequestPlanner, ScanInput, UpdateInput, VirtualColumn, }; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::{NamespaceId, TableName, UserId}; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{ + models::{rows::Row, NamespaceId, TableName, UserId}, + TableId, TableType, +}; fn table_id() -> TableId { TableId::new(NamespaceId::new("pg_fdw"), TableName::new("messages")) diff --git a/pg/crates/kalam-pg-types/src/foreign_column_definition.rs b/pg/crates/kalam-pg-types/src/foreign_column_definition.rs index 94d01917f..a758133e4 100644 --- a/pg/crates/kalam-pg-types/src/foreign_column_definition.rs +++ b/pg/crates/kalam-pg-types/src/foreign_column_definition.rs @@ -1,7 +1,8 @@ -use crate::pg_type_name::pg_type_name_for; use kalam_pg_common::KalamPgError; use kalamdb_commons::models::schemas::ColumnDefinition; +use crate::pg_type_name::pg_type_name_for; + /// Build a PostgreSQL column definition for a foreign table column. pub fn foreign_column_definition(column: &ColumnDefinition) -> Result { let type_name = pg_type_name_for(&column.data_type)?; diff --git a/pg/crates/kalam-pg-types/tests/type_mapping.rs b/pg/crates/kalam-pg-types/tests/type_mapping.rs index b863fa34c..ab44f18bd 100644 --- a/pg/crates/kalam-pg-types/tests/type_mapping.rs +++ b/pg/crates/kalam-pg-types/tests/type_mapping.rs @@ -1,6 +1,5 @@ use kalam_pg_types::{foreign_column_definition, pg_type_name_for}; -use kalamdb_commons::models::datatypes::KalamDataType; -use kalamdb_commons::models::schemas::ColumnDefinition; +use kalamdb_commons::models::{datatypes::KalamDataType, schemas::ColumnDefinition}; #[test] fn maps_embedding_to_pgvector_type_name() { diff --git a/pg/scripts/pgrx-test-setup.sh b/pg/scripts/pgrx-test-setup.sh index ebc3ae1a4..30d45f253 100755 --- a/pg/scripts/pgrx-test-setup.sh +++ b/pg/scripts/pgrx-test-setup.sh @@ -4,7 +4,8 @@ # # Prerequisites: # 1. pgrx PG installed: cargo pgrx init --pg download -# 2. KalamDB server running locally with cluster mode (gRPC on :9188, HTTP on :8080) +# 2. KalamDB server running locally with the gRPC endpoint exported via +# KALAMDB_GRPC_HOST/KALAMDB_GRPC_PORT (defaults: 127.0.0.1:9188) # # Usage: # ./pg/scripts/pgrx-test-setup.sh # Full setup (start PG + install extension + create DB + server) @@ -59,9 +60,9 @@ PG_PORT="${PG_PORT:-288${PG_MAJOR}}" PG_USER="$USER" TEST_DB="kalamdb_test" -# KalamDB gRPC (local server) -KALAMDB_GRPC_HOST="127.0.0.1" -KALAMDB_GRPC_PORT="9188" +# KalamDB gRPC target consumed by the foreign server. +KALAMDB_GRPC_HOST="${KALAMDB_GRPC_HOST:-127.0.0.1}" +KALAMDB_GRPC_PORT="${KALAMDB_GRPC_PORT:-9188}" KALAMDB_LOGIN_USER="${KALAMDB_USER:-root}" KALAMDB_LOGIN_PASSWORD="${KALAMDB_PASSWORD:-${KALAMDB_ROOT_PASSWORD:-kalamdb123}}" diff --git a/pg/src/arrow_to_pg.rs b/pg/src/arrow_to_pg.rs index 705a14ca9..859e9e22b 100644 --- a/pg/src/arrow_to_pg.rs +++ b/pg/src/arrow_to_pg.rs @@ -1,15 +1,17 @@ //! Arrow record batch value → PostgreSQL Datum conversion. -use arrow::array::{ - Array, BinaryArray, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, - Int32Array, Int64Array, LargeStringArray, StringArray, TimestampMicrosecondArray, - TimestampMillisecondArray, +use std::ffi::CString; + +use arrow::{ + array::{ + Array, BinaryArray, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array, + Int32Array, Int64Array, LargeStringArray, StringArray, TimestampMicrosecondArray, + TimestampMillisecondArray, + }, + datatypes::{DataType, TimeUnit}, }; -use arrow::datatypes::{DataType, TimeUnit}; use datafusion_common::ScalarValue; -use pgrx::pg_sys; -use pgrx::{rust_str_to_text_p, IntoDatum}; -use std::ffi::CString; +use pgrx::{pg_sys, rust_str_to_text_p, IntoDatum}; /// Days between Unix epoch (1970-01-01) and PostgreSQL epoch (2000-01-01). const UNIX_TO_PG_EPOCH_DAYS: i32 = 10_957; diff --git a/pg/src/fdw_ddl.rs b/pg/src/fdw_ddl.rs index 7972aa234..d7d5b17bd 100644 --- a/pg/src/fdw_ddl.rs +++ b/pg/src/fdw_ddl.rs @@ -1,20 +1,25 @@ //! ProcessUtility hook that propagates DDL on KalamDB-backed PostgreSQL tables. //! //! Intercepts: -//! - `CREATE TABLE ... USING kalamdb` → remote `CREATE TABLE` + internal `CREATE FOREIGN TABLE` +//! - `CREATE TABLE ... USING kalamdb` → remote `CREATE TABLE` + internal `CREATE FOREIGN +//! TABLE` //! - `CREATE FOREIGN TABLE` → `CREATE NAMESPACE IF NOT EXISTS` + `CREATE TABLE` -//! (auto-injects `_seq BIGINT` and `_userid TEXT` system columns into -//! the local PG schema; rejects explicit declarations of these columns) +//! (auto-injects `_seq BIGINT` and `_userid TEXT` system columns into the local PG schema; +//! rejects explicit declarations of these columns) //! - `ALTER FOREIGN TABLE` → `ALTER TABLE ADD/DROP COLUMN` //! - `DROP FOREIGN TABLE` → `DROP TABLE IF EXISTS` -use crate::fdw_options::parse_options; +use std::{ + collections::BTreeMap, + ffi::{CStr, CString}, + str::FromStr, +}; + use kalam_pg_common::{KalamPgError, SEQ_COLUMN, USER_ID_COLUMN}; use kalamdb_commons::TableType; use pgrx::pg_sys; -use std::collections::BTreeMap; -use std::ffi::{CStr, CString}; -use std::str::FromStr; + +use crate::fdw_options::parse_options; const DEFAULT_KALAM_SERVER: &str = "kalam_server"; @@ -246,8 +251,7 @@ fn call_prev( } unsafe fn report_sql_error(message: &str) -> ! { - use pgrx::pg_sys::elog::PgLogLevel; - use pgrx::pg_sys::errcodes::PgSqlErrorCode; + use pgrx::pg_sys::{elog::PgLogLevel, errcodes::PgSqlErrorCode}; const PERCENT_S: &CStr = c"%s"; const DOMAIN: *const std::os::raw::c_char = std::ptr::null_mut(); @@ -1074,8 +1078,8 @@ fn validate_no_system_columns(statement_sql: &str) -> Result<(), KalamPgError> { if let Some(ident) = first_sql_identifier(&entry) { if matches!(ident.as_str(), "_userid" | "_seq" | "_deleted") { return Err(KalamPgError::Validation(format!( - "system column '{}' must not be declared explicitly; \ - it is auto-injected by pg_kalam", + "system column '{}' must not be declared explicitly; it is auto-injected by \ + pg_kalam", ident ))); } @@ -1514,7 +1518,8 @@ mod tests { #[test] fn split_top_level_sql_list_handles_nested_parentheses() { let entries = split_top_level_sql_list( - "id BIGINT DEFAULT SNOWFLAKE_ID(), amount NUMERIC(10, 2), created TIMESTAMP DEFAULT NOW()", + "id BIGINT DEFAULT SNOWFLAKE_ID(), amount NUMERIC(10, 2), created TIMESTAMP DEFAULT \ + NOW()", ); assert_eq!(entries.len(), 3); assert_eq!(entries[1], "amount NUMERIC(10, 2)"); @@ -1595,7 +1600,8 @@ mod tests { #[test] fn extract_with_options_from_sql_works_for_using_kalamdb() { - let sql = "CREATE TABLE t (id INT) USING kalamdb WITH (type = 'user', storage_id = 'local', flush_policy = 'rows:100,interval:60');"; + let sql = "CREATE TABLE t (id INT) USING kalamdb WITH (type = 'user', storage_id = \ + 'local', flush_policy = 'rows:100,interval:60');"; let opts = super::extract_with_options_from_sql(sql); assert_eq!(opts.get("type").unwrap(), "user"); assert_eq!(opts.get("storage_id").unwrap(), "local"); @@ -1635,7 +1641,8 @@ mod tests { #[test] fn create_statement_has_if_not_exists_detects_clause() { - let sql = "CREATE TABLE IF NOT EXISTS app.items (id BIGINT) USING kalamdb WITH (type = 'shared');"; + let sql = "CREATE TABLE IF NOT EXISTS app.items (id BIGINT) USING kalamdb WITH (type = \ + 'shared');"; assert!(super::create_statement_has_if_not_exists(sql).expect("parse IF NOT EXISTS")); let sql = "CREATE TABLE app.items (id BIGINT) USING kalamdb WITH (type = 'shared');"; @@ -1668,7 +1675,8 @@ mod tests { #[test] fn validate_no_system_columns_rejects_deleted() { - let sql = "CREATE FOREIGN TABLE t (id TEXT, _deleted BOOLEAN) SERVER s OPTIONS (table_type 'shared');"; + let sql = "CREATE FOREIGN TABLE t (id TEXT, _deleted BOOLEAN) SERVER s OPTIONS \ + (table_type 'shared');"; let err = validate_no_system_columns(sql).expect_err("should reject _deleted"); assert!(err.to_string().contains("_deleted")); } diff --git a/pg/src/fdw_handler.rs b/pg/src/fdw_handler.rs index 6416595b4..3bdc486c7 100644 --- a/pg/src/fdw_handler.rs +++ b/pg/src/fdw_handler.rs @@ -3,8 +3,7 @@ //! Provides the `kalam_handler` and `kalam_validator` C functions //! and registers the `pg_kalam` foreign data wrapper via extension SQL. -use pgrx::pg_guard; -use pgrx::pg_sys; +use pgrx::{pg_guard, pg_sys}; /// Build a fully populated `FdwRoutine` for the Kalam FDW. fn create_fdw_routine() -> *mut pg_sys::FdwRoutine { diff --git a/pg/src/fdw_import.rs b/pg/src/fdw_import.rs index dfd8d8f05..b63d609bb 100644 --- a/pg/src/fdw_import.rs +++ b/pg/src/fdw_import.rs @@ -1,7 +1,6 @@ //! `ImportForeignSchema` callback: generate foreign table SQL from Kalam metadata. -use pgrx::pg_guard; -use pgrx::pg_sys; +use pgrx::{pg_guard, pg_sys}; /// `ImportForeignSchema` callback: return a list of SQL statements to create foreign tables. #[pg_guard] diff --git a/pg/src/fdw_modify.rs b/pg/src/fdw_modify.rs index 171fe4a32..8210a2561 100644 --- a/pg/src/fdw_modify.rs +++ b/pg/src/fdw_modify.rs @@ -2,18 +2,17 @@ //! PlanForeignModify, BeginForeignModify, ExecForeignInsert/Update/Delete, //! EndForeignModify. -use crate::fdw_options::parse_options; -use crate::fdw_state::KalamModifyState; -use crate::pg_to_kalam::datum_to_scalar; -use crate::relation_table_options::resolve_table_options_for_relation; +use std::{collections::BTreeMap, ffi::CStr}; + use kalam_pg_api::{DeleteRequest, InsertRequest, TenantContext, UpdateRequest}; use kalam_pg_common::{KalamPgError, DELETED_COLUMN, SEQ_COLUMN, USER_ID_COLUMN}; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use pgrx::pg_guard; -use pgrx::pg_sys; -use std::collections::BTreeMap; -use std::ffi::CStr; +use kalamdb_commons::models::{rows::Row, UserId}; +use pgrx::{pg_guard, pg_sys}; + +use crate::{ + fdw_options::parse_options, fdw_state::KalamModifyState, pg_to_kalam::datum_to_scalar, + relation_table_options::resolve_table_options_for_relation, +}; /// `IsForeignRelUpdatable` callback: report supported DML operations. #[pg_guard] diff --git a/pg/src/fdw_options.rs b/pg/src/fdw_options.rs index 4f2045513..358f12abb 100644 --- a/pg/src/fdw_options.rs +++ b/pg/src/fdw_options.rs @@ -1,11 +1,10 @@ //! Parse FDW options from PostgreSQL `List` of `DefElem` nodes. +use std::{collections::BTreeMap, ffi::CStr}; + use pgrx::pg_sys; -use std::collections::BTreeMap; -use std::ffi::CStr; /// Parse FDW options from a PostgreSQL `List*` of `DefElem*` nodes into a Rust map. -/// pub fn parse_options(options: *mut pg_sys::List) -> BTreeMap { let mut result = BTreeMap::new(); if options.is_null() { diff --git a/pg/src/fdw_scan.rs b/pg/src/fdw_scan.rs index c818a697d..cffbf58ad 100644 --- a/pg/src/fdw_scan.rs +++ b/pg/src/fdw_scan.rs @@ -1,16 +1,17 @@ //! FDW scan callbacks: GetForeignRelSize, GetForeignPaths, GetForeignPlan, //! BeginForeignScan, IterateForeignScan, ReScanForeignScan, EndForeignScan. -use crate::arrow_to_pg::arrow_value_to_datum; -use crate::fdw_options::parse_options; -use crate::fdw_state::KalamScanState; -use crate::relation_table_options::resolve_table_options_for_relation; +use std::ffi::CStr; + use datafusion_common::ScalarValue; use kalam_pg_api::ScanFilter; use kalam_pg_common::{KalamPgError, DELETED_COLUMN, SEQ_COLUMN, USER_ID_COLUMN}; -use pgrx::pg_guard; -use pgrx::pg_sys; -use std::ffi::CStr; +use pgrx::{pg_guard, pg_sys}; + +use crate::{ + arrow_to_pg::arrow_value_to_datum, fdw_options::parse_options, fdw_state::KalamScanState, + relation_table_options::resolve_table_options_for_relation, +}; /// `GetForeignRelSize` callback: estimate relation size. #[pg_guard] diff --git a/pg/src/fdw_state.rs b/pg/src/fdw_state.rs index 805f054be..d628c953c 100644 --- a/pg/src/fdw_state.rs +++ b/pg/src/fdw_state.rs @@ -1,9 +1,10 @@ //! Scan and modify state structs stored as `fdw_state` in FDW callbacks. +use std::sync::Arc; + use arrow::record_batch::RecordBatch; use kalam_pg_api::KalamBackendExecutor; use kalam_pg_fdw::TableOptions; -use std::sync::Arc; /// State stored in `ForeignScanState::fdw_state` during scan lifecycle. pub struct KalamScanState { diff --git a/pg/src/fdw_xact.rs b/pg/src/fdw_xact.rs index 2c896d3b1..b0e7132ce 100644 --- a/pg/src/fdw_xact.rs +++ b/pg/src/fdw_xact.rs @@ -5,27 +5,26 @@ //! (commit or rollback) when the PostgreSQL transaction ends. //! //! Current contract: -//! - Native PostgreSQL work can happen before the first foreign statement. -//! No KalamDB transaction is opened until the first FDW scan/modify path -//! needs one. -//! - The canonical transaction id is the one returned by KalamDB -//! `begin_transaction`; all later foreign work in the same PostgreSQL -//! transaction appends to that same remote transaction. -//! - Foreign work inside an explicit PostgreSQL transaction is staged into the -//! matching KalamDB transaction as each FDW statement runs. -//! - Autocommit insert batching still flushes at PostgreSQL -//! `XACT_EVENT_PRE_COMMIT` when a transaction-scoped buffer is active, before -//! PostgreSQL reaches its final COMMIT record. -//! - On PostgreSQL abort, or on a remote PRE_COMMIT failure, buffered writes are -//! discarded and the remote transaction is rolled back. +//! - Native PostgreSQL work can happen before the first foreign statement. No KalamDB transaction +//! is opened until the first FDW scan/modify path needs one. +//! - The canonical transaction id is the one returned by KalamDB `begin_transaction`; all later +//! foreign work in the same PostgreSQL transaction appends to that same remote transaction. +//! - Foreign work inside an explicit PostgreSQL transaction is staged into the matching KalamDB +//! transaction as each FDW statement runs. +//! - Autocommit insert batching still flushes at PostgreSQL `XACT_EVENT_PRE_COMMIT` when a +//! transaction-scoped buffer is active, before PostgreSQL reaches its final COMMIT record. +//! - On PostgreSQL abort, or on a remote PRE_COMMIT failure, buffered writes are discarded and the +//! remote transaction is rolled back. //! //! This gives one native explicit-transaction flow for PostgreSQL + KalamDB //! foreign tables without a separate custom write path. It is still not full //! distributed 2PC/XA: crash-safe atomicity across PostgreSQL local storage and //! KalamDB storage is not guaranteed. -use std::collections::HashMap; -use std::sync::{LazyLock, Mutex}; +use std::{ + collections::HashMap, + sync::{LazyLock, Mutex}, +}; use pgrx::pg_sys; diff --git a/pg/src/lib.rs b/pg/src/lib.rs index 2256f9324..bc25bec5c 100644 --- a/pg/src/lib.rs +++ b/pg/src/lib.rs @@ -3,16 +3,19 @@ #[cfg(any(not(test), feature = "pg_test"))] mod remote_executor; #[cfg(any(not(test), feature = "pg_test"))] -mod remote_state; -#[cfg(any(not(test), feature = "pg_test"))] mod remote_server; +#[cfg(any(not(test), feature = "pg_test"))] +mod remote_state; mod session_settings; #[cfg(any(feature = "pg_test", feature = "e2e"))] pub(crate) mod test_alloc { + use std::{ + alloc::{GlobalAlloc, Layout, System}, + sync::atomic::{AtomicUsize, Ordering}, + }; + use serde::Serialize; - use std::alloc::{GlobalAlloc, Layout, System}; - use std::sync::atomic::{AtomicUsize, Ordering}; pub struct TrackingAllocator; @@ -115,9 +118,10 @@ pub(crate) mod test_alloc { #[cfg(feature = "e2e")] pub(crate) mod conversion_test_stats { - use serde::Serialize; use std::sync::atomic::{AtomicUsize, Ordering}; + use serde::Serialize; + #[derive(Clone, Copy, Debug, Default, Serialize)] pub struct ConversionTestStats { pub text_to_pg_fast_path_calls: usize, diff --git a/pg/src/pg_to_kalam.rs b/pg/src/pg_to_kalam.rs index 7da1516c7..3b4c3a025 100644 --- a/pg/src/pg_to_kalam.rs +++ b/pg/src/pg_to_kalam.rs @@ -1,10 +1,10 @@ //! PostgreSQL Datum → KalamDB ScalarValue conversion for INSERT/UPDATE. -use datafusion_common::ScalarValue; -use pgrx::datum::JsonString; -use pgrx::{pg_sys, text_to_rust_str_unchecked, FromDatum}; use std::ffi::{c_void, CStr}; +use datafusion_common::ScalarValue; +use pgrx::{datum::JsonString, pg_sys, text_to_rust_str_unchecked, FromDatum}; + /// Convert a PostgreSQL datum to a DataFusion ScalarValue based on the column's type OID. /// /// # Safety diff --git a/pg/src/relation_table_options.rs b/pg/src/relation_table_options.rs index 2778f3aba..53d122fb6 100644 --- a/pg/src/relation_table_options.rs +++ b/pg/src/relation_table_options.rs @@ -1,11 +1,12 @@ +use std::{collections::BTreeMap, ffi::CStr, str::FromStr}; + use kalam_pg_common::KalamPgError; use kalam_pg_fdw::TableOptions; -use kalamdb_commons::models::{NamespaceId, TableName}; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{ + models::{NamespaceId, TableName}, + TableId, TableType, +}; use pgrx::pg_sys; -use std::collections::BTreeMap; -use std::ffi::CStr; -use std::str::FromStr; pub fn resolve_table_options_for_relation( relation: pg_sys::Relation, diff --git a/pg/src/remote_executor.rs b/pg/src/remote_executor.rs index 90c512cdc..282a434bc 100644 --- a/pg/src/remote_executor.rs +++ b/pg/src/remote_executor.rs @@ -1,8 +1,10 @@ use async_trait::async_trait; use datafusion_common::ScalarValue; -use kalam_pg_api::request::{DeleteRequest, InsertRequest, ScanRequest, UpdateRequest}; -use kalam_pg_api::response::{MutationResponse, ScanResponse}; -use kalam_pg_api::KalamBackendExecutor; +use kalam_pg_api::{ + request::{DeleteRequest, InsertRequest, ScanRequest, UpdateRequest}, + response::{MutationResponse, ScanResponse}, + KalamBackendExecutor, +}; use kalam_pg_client::RemoteKalamClient; use kalam_pg_common::KalamPgError; @@ -86,7 +88,7 @@ impl KalamBackendExecutor for RemoteBackendExecutor { request.validate()?; let user_id = request.tenant_context.effective_user_id().map(|u| u.as_str().to_string()); - let updates_json = serde_json::to_string(&request.updates).map_err(|error| { + let updates_json = serde_json::to_string(&[&request.updates]).map_err(|error| { KalamPgError::Execution(format!( "failed to serialize update payload for {} pk {}: {}", request.table_id.full_name(), diff --git a/pg/src/remote_server.rs b/pg/src/remote_server.rs index 827ac8f78..9b48d5f8d 100644 --- a/pg/src/remote_server.rs +++ b/pg/src/remote_server.rs @@ -1,10 +1,13 @@ -use crate::fdw_options::parse_options; -use crate::remote_state::{self, RemoteExtensionState}; +use std::{ffi::CString, sync::Arc}; + use kalam_pg_common::KalamPgError; use kalam_pg_fdw::ServerOptions; use pgrx::pg_sys; -use std::ffi::CString; -use std::sync::Arc; + +use crate::{ + fdw_options::parse_options, + remote_state::{self, RemoteExtensionState}, +}; unsafe fn remote_state_for_server( server: *mut pg_sys::ForeignServer, @@ -38,11 +41,8 @@ pub unsafe fn remote_state_for_server_name( let c_name = CString::new(server_name).unwrap_or_default(); let server = pg_sys::GetForeignServerByName(c_name.as_ptr(), true); if server.is_null() { - return Err(KalamPgError::Execution(format!( - "foreign server '{}' not found", - server_name - ))); + return Err(KalamPgError::Execution(format!("foreign server '{}' not found", server_name))); } remote_state_for_server(server) -} \ No newline at end of file +} diff --git a/pg/src/remote_state.rs b/pg/src/remote_state.rs index 0f671604b..2a622a814 100644 --- a/pg/src/remote_state.rs +++ b/pg/src/remote_state.rs @@ -1,7 +1,8 @@ -use std::collections::HashMap; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; -use std::sync::{Arc, Mutex, OnceLock}; +use std::{ + collections::{hash_map::DefaultHasher, HashMap}, + hash::{Hash, Hasher}, + sync::{Arc, Mutex, OnceLock}, +}; use kalam_pg_api::KalamBackendExecutor; use kalam_pg_client::RemoteKalamClient; @@ -126,7 +127,8 @@ fn build_remote_extension_state( let client = runtime.block_on(async { RemoteKalamClient::connect(config.clone()).await })?; let session_id = pg_backend_session_id(config, current_backend_pid()); - let session = runtime.block_on(async { client.open_session(Some(session_id.as_str()), None).await })?; + let session = + runtime.block_on(async { client.open_session(Some(session_id.as_str()), None).await })?; Ok(RemoteExtensionState { client, @@ -207,13 +209,14 @@ unsafe extern "C-unwind" fn on_proc_exit_close_sessions(_code: i32, _arg: pg_sys #[cfg(test)] mod tests { - use super::*; - - use std::net::SocketAddr; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::mpsc; - use std::sync::Mutex; - use std::time::Duration; + use std::{ + net::SocketAddr, + sync::{ + atomic::{AtomicUsize, Ordering}, + mpsc, Mutex, + }, + time::Duration, + }; use async_trait::async_trait; use bytes::Bytes; @@ -230,6 +233,8 @@ mod tests { use tokio_stream::wrappers::TcpListenerStream; use tonic::{Request, Response, Status}; + use super::*; + #[derive(Default)] struct CountingState { open_session_calls: AtomicUsize, diff --git a/pg/src/write_buffer.rs b/pg/src/write_buffer.rs index e6959c311..bc11dd437 100644 --- a/pg/src/write_buffer.rs +++ b/pg/src/write_buffer.rs @@ -16,14 +16,17 @@ //! - `PRE_COMMIT` xact callback → flush ALL pending writes before commit //! - `BeginForeignScan` on same table → flush for read-your-writes consistency -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; use kalam_pg_api::{InsertRequest, KalamBackendExecutor, TenantContext}; use kalam_pg_common::KalamPgError; -use kalamdb_commons::models::rows::Row; -use kalamdb_commons::models::UserId; -use kalamdb_commons::{TableId, TableType}; +use kalamdb_commons::{ + models::{rows::Row, UserId}, + TableId, TableType, +}; /// Maximum rows to buffer before auto-flushing. const FLUSH_THRESHOLD: usize = 256; @@ -194,16 +197,15 @@ pub fn discard_all() { #[cfg(test)] mod tests { - use super::*; - - use std::collections::BTreeMap; - use std::sync::LazyLock; + use std::{collections::BTreeMap, sync::LazyLock}; use async_trait::async_trait; use datafusion_common::ScalarValue; use kalam_pg_api::{DeleteRequest, MutationResponse, ScanRequest, ScanResponse, UpdateRequest}; use kalamdb_commons::models::{NamespaceId, TableName}; + use super::*; + static TEST_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); #[derive(Default)] diff --git a/pg/test.sh b/pg/test.sh index 98210b7b2..d7661d6a4 100755 --- a/pg/test.sh +++ b/pg/test.sh @@ -24,6 +24,8 @@ # # Environment variables (all optional): # KALAMDB_SERVER_URL KalamDB HTTP base URL (default: http://127.0.0.1:8080) +# KALAMDB_GRPC_HOST KalamDB gRPC host (default: inferred from server URL) +# KALAMDB_GRPC_PORT KalamDB gRPC port (default: inferred from server URL) # KALAMDB_PG_HOST Postgres host (default: 127.0.0.1) # KALAMDB_PG_PORT Postgres port (default: 28816) # KALAMDB_PG_USER Postgres user (default: $USER) @@ -60,14 +62,52 @@ step() { echo ""; echo "==> $*"; } ok() { echo " OK: $*"; } die() { echo ""; echo "ERROR: $*" >&2; exit 1; } +infer_kalamdb_grpc_target() { + local server_url="$1" + local authority="${server_url#*://}" + local host="" + local http_port="" + local grpc_port="9188" + + authority="${authority%%/*}" + authority="${authority##*@}" + + if [[ "$authority" =~ ^\[([^]]+)\](:(.+))?$ ]]; then + host="${BASH_REMATCH[1]}" + http_port="${BASH_REMATCH[3]:-}" + elif [[ "$authority" =~ ^([^:]+)(:([0-9]+))?$ ]]; then + host="${BASH_REMATCH[1]}" + http_port="${BASH_REMATCH[3]:-}" + fi + + if [[ -z "$host" ]]; then + host="127.0.0.1" + fi + + case "$http_port" in + 8080) grpc_port="9188" ;; + 8081) grpc_port="9081" ;; + 8082) grpc_port="9082" ;; + 8083) grpc_port="9083" ;; + esac + + printf '%s %s\n' "$host" "$grpc_port" +} + KALAMDB_SERVER_URL="${KALAMDB_SERVER_URL:-http://127.0.0.1:8080}" +read -r DEFAULT_KALAMDB_GRPC_HOST DEFAULT_KALAMDB_GRPC_PORT < <(infer_kalamdb_grpc_target "$KALAMDB_SERVER_URL") +KALAMDB_GRPC_HOST="${KALAMDB_GRPC_HOST:-$DEFAULT_KALAMDB_GRPC_HOST}" +KALAMDB_GRPC_PORT="${KALAMDB_GRPC_PORT:-$DEFAULT_KALAMDB_GRPC_PORT}" export KALAMDB_SERVER_URL +export KALAMDB_GRPC_HOST +export KALAMDB_GRPC_PORT # ── Print config ────────────────────────────────────────────────────────── echo "========================================================" echo " pg_kalam End-to-End Tests (local dev)" echo "========================================================" echo " KalamDB server: ${KALAMDB_SERVER_URL}" +echo " KalamDB gRPC: ${KALAMDB_GRPC_HOST}:${KALAMDB_GRPC_PORT}" echo " Postgres: ${KALAMDB_PG_HOST:-127.0.0.1}:${KALAMDB_PG_PORT:-28816} (pgrx)" echo " Nextest filter: $NEXTEST_FILTER" echo "========================================================" diff --git a/pg/tests/e2e_common/mod.rs b/pg/tests/e2e_common/mod.rs index 32dde2c2e..2c1c42b6a 100644 --- a/pg/tests/e2e_common/mod.rs +++ b/pg/tests/e2e_common/mod.rs @@ -9,17 +9,19 @@ pub mod tcp_proxy; #[path = "../support/http_client.rs"] mod http_client; -use std::hash::{Hash, Hasher}; -use std::ops::{Deref, DerefMut}; -use std::process::Command; -use std::sync::OnceLock; -use std::time::Duration; -use std::{env, fmt, future::Future}; +use std::{ + env, fmt, + future::Future, + hash::{Hash, Hasher}, + ops::{Deref, DerefMut}, + process::Command, + sync::OnceLock, + time::Duration, +}; use http_client::TestHttpClient; use serde_json::Value; -use tokio_postgres::error::SqlState; -use tokio_postgres::{Config, NoTls}; +use tokio_postgres::{error::SqlState, Config, NoTls}; pub fn unique_name(prefix: &str) -> String { use std::sync::atomic::{AtomicU64, Ordering}; @@ -184,10 +186,9 @@ async fn server_option_exists( option_name: &str, ) -> bool { pg.query_opt( - "SELECT 1 \ - FROM pg_foreign_server AS server \ - CROSS JOIN LATERAL pg_options_to_table(server.srvoptions) AS option_entry \ - WHERE server.srvname = $1 AND option_entry.option_name = $2", + "SELECT 1 FROM pg_foreign_server AS server CROSS JOIN LATERAL \ + pg_options_to_table(server.srvoptions) AS option_entry WHERE server.srvname = $1 AND \ + option_entry.option_name = $2", &[&server_name, &option_name], ) .await @@ -342,8 +343,18 @@ impl TestEnv { serde_json::from_str(&text).unwrap_or(Value::Null) } + pub async fn kalamdb_sql_at(&self, base_url: &str, sql: &str) -> Value { + let text = self.kalamdb_sql_text_at(base_url, sql).await; + serde_json::from_str(&text).unwrap_or(Value::Null) + } + pub async fn kalamdb_sql_text(&self, sql: &str) -> String { let base_url = kalamdb_auth_config().base_url; + self.kalamdb_sql_text_at(&base_url, sql).await + } + + pub async fn kalamdb_sql_text_at(&self, base_url: &str, sql: &str) -> String { + let base_url = base_url.trim_end_matches('/'); let url = format!("{base_url}/v1/api/sql"); let body = serde_json::json!({ "sql": sql }); let resp = self @@ -447,8 +458,7 @@ impl TestEnv { tokio::time::sleep(Duration::from_secs(1)).await; } panic!( - "KalamDB not reachable at {}\n\ - Start with: cd backend && cargo run", + "KalamDB not reachable at {}\nStart with: cd backend && cargo run", config.base_url ); } @@ -492,7 +502,8 @@ impl TestEnv { } panic!( - "Failed to authenticate KalamDB test environment ({config}). Set KALAMDB_USER/KALAMDB_PASSWORD or KALAMDB_ROOT_PASSWORD to match the running server." + "Failed to authenticate KalamDB test environment ({config}). Set \ + KALAMDB_USER/KALAMDB_PASSWORD or KALAMDB_ROOT_PASSWORD to match the running server." ); } @@ -541,20 +552,16 @@ impl TestEnv { .expect("create kalam_server foreign server"); pg.batch_execute(&format!( - "ALTER SERVER kalam_server OPTIONS (SET host '{grpc_host}', SET port '{grpc_port}');" + "ALTER SERVER kalam_server OPTIONS (SET host '{grpc_host}', SET port \ + '{grpc_port}');" )) .await .expect("repoint kalam_server foreign server"); drop_server_option_if_present(&pg, "kalam_server", "auth_header").await; ensure_server_option(&pg, "kalam_server", "auth_mode", "account_login").await; - ensure_server_option( - &pg, - "kalam_server", - "login_user", - &auth_config.login_username, - ) - .await; + ensure_server_option(&pg, "kalam_server", "login_user", &auth_config.login_username) + .await; ensure_server_option( &pg, "kalam_server", @@ -591,8 +598,8 @@ impl TestEnv { } } panic!( - "PostgreSQL not reachable at {pg_host}:{pg_port}\n\ - Start with: ./pg/scripts/pgrx-test-setup.sh --start" + "PostgreSQL not reachable at {pg_host}:{pg_port}\nStart with: \ + ./pg/scripts/pgrx-test-setup.sh --start" ); } @@ -665,8 +672,17 @@ pub async fn create_shared_kalam_table_in_schema( table: &str, columns: &str, ) { - create_kalam_table_in_schema(client, schema, table, columns, "shared", "create shared Kalam table") - .await; + create_kalam_table_in_schema( + client, + schema, + table, + columns, + "shared", + "create shared Kalam table", + ) + .await; + + rebind_shared_table_to_leader(client, schema, table, columns).await; } pub async fn create_user_kalam_table_in_schema( @@ -721,6 +737,173 @@ fn sql_first_cell_i64(result: &Value) -> Option { }) } +fn sql_first_cell_string(result: &Value) -> Option { + result["results"] + .as_array() + .and_then(|results| results.first()) + .and_then(|entry| entry["rows"].as_array()) + .and_then(|rows| rows.first()) + .and_then(|row| row.as_array()) + .and_then(|columns| columns.first()) + .and_then(Value::as_str) + .map(ToString::to_string) +} + +fn grpc_target_from_api_addr(api_addr: &str) -> Option<(String, u16)> { + let authority = api_addr + .trim_start_matches("http://") + .trim_start_matches("https://") + .split('/') + .next() + .unwrap_or(api_addr); + let (host, http_port) = authority.rsplit_once(':')?; + let http_port = http_port.parse::().ok()?; + let grpc_port = match http_port { + 8080 => 9188, + _ => http_port.checked_add(1000)?, + }; + Some((host.to_string(), grpc_port)) +} + +async fn shared_leader_grpc_target_for_env(env: &TestEnv) -> (String, u16) { + let deadline = std::time::Instant::now() + Duration::from_secs(2); + let mut triggered_elections = 0; + + loop { + let result = env + .kalamdb_sql( + "SELECT cluster.api_addr FROM system.cluster_groups AS groups JOIN \ + system.cluster AS cluster ON groups.current_leader = cluster.node_id WHERE \ + groups.group_type = 'shared_data' AND groups.current_leader IS NOT NULL LIMIT 1", + ) + .await; + + if let Some(target) = + sql_first_cell_string(&result).and_then(|api_addr| grpc_target_from_api_addr(&api_addr)) + { + return target; + } + + if std::time::Instant::now() >= deadline { + return kalamdb_grpc_target(); + } + + if triggered_elections < 3 { + env.kalamdb_sql("CLUSTER TRIGGER ELECTION").await; + triggered_elections += 1; + } + + tokio::time::sleep(Duration::from_millis(50)).await; + } +} + +pub async fn shared_leader_grpc_target() -> (String, u16) { + let env = TestEnv::global().await; + shared_leader_grpc_target_for_env(env).await +} + +async fn user_shard_leader_grpc_target_for_env(env: &TestEnv, user_id: &str) -> (String, u16) { + let num_user_shards = cluster_user_shard_count(env).await; + let group_id = user_shard_group_id(user_id, num_user_shards); + let deadline = std::time::Instant::now() + Duration::from_secs(2); + let mut triggered_elections = 0; + + loop { + let result = env + .kalamdb_sql(&format!( + "SELECT cluster.api_addr FROM system.cluster_groups AS groups JOIN \ + system.cluster AS cluster ON groups.current_leader = cluster.node_id WHERE \ + groups.group_id = {group_id} AND groups.current_leader IS NOT NULL LIMIT 1" + )) + .await; + + if let Some(target) = + sql_first_cell_string(&result).and_then(|api_addr| grpc_target_from_api_addr(&api_addr)) + { + return target; + } + + if std::time::Instant::now() >= deadline { + return kalamdb_grpc_target(); + } + + if triggered_elections < 3 { + env.kalamdb_sql("CLUSTER TRIGGER ELECTION").await; + triggered_elections += 1; + } + + tokio::time::sleep(Duration::from_millis(50)).await; + } +} + +pub async fn user_shard_leader_grpc_target(user_id: &str) -> (String, u16) { + let env = TestEnv::global().await; + user_shard_leader_grpc_target_for_env(env, user_id).await +} + +fn shared_table_server_name(schema: &str, table: &str) -> String { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + schema.hash(&mut hasher); + table.hash(&mut hasher); + format!("kalam_shared_{:x}", hasher.finish()) +} + +fn user_table_server_name(schema: &str, table: &str) -> String { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + schema.hash(&mut hasher); + table.hash(&mut hasher); + format!("kalam_user_{:x}", hasher.finish()) +} + +async fn rebind_shared_table_to_leader( + client: &tokio_postgres::Client, + schema: &str, + table: &str, + columns: &str, +) { + let (leader_host, leader_port) = shared_leader_grpc_target().await; + let server_name = shared_table_server_name(schema, table); + let server_options = kalamdb_account_login_server_options(&leader_host, leader_port); + + client + .batch_execute(&format!( + "DROP FOREIGN TABLE IF EXISTS {schema}.{table}; DROP SERVER IF EXISTS {server_name} \ + CASCADE; CREATE SERVER {server_name} FOREIGN DATA WRAPPER pg_kalam OPTIONS \ + ({server_options}); CREATE FOREIGN TABLE {schema}.{table} ({columns}) SERVER \ + {server_name} OPTIONS (namespace '{}', \"table\" '{}', table_type 'shared');", + sql_literal(schema), + sql_literal(table), + )) + .await + .expect("rebind shared Kalam table to shared leader"); +} + +pub async fn rebind_user_table_to_user_leader( + client: &tokio_postgres::Client, + schema: &str, + table: &str, + columns: &str, + user_id: &str, +) { + let (leader_host, leader_port) = user_shard_leader_grpc_target(user_id).await; + let server_name = user_table_server_name(schema, table); + let server_options = kalamdb_account_login_server_options(&leader_host, leader_port); + + client + .batch_execute(&format!( + "DROP FOREIGN TABLE IF EXISTS {schema}.{table}; DROP SERVER IF EXISTS {server_name} \ + CASCADE; CREATE SERVER {server_name} FOREIGN DATA WRAPPER pg_kalam OPTIONS \ + ({server_options}); CREATE FOREIGN TABLE {schema}.{table} ({columns}) SERVER \ + {server_name} OPTIONS (namespace '{}', \"table\" '{}', table_type 'user');", + sql_literal(schema), + sql_literal(table), + )) + .await + .expect("rebind user Kalam table to user shard leader"); + + wait_for_table_queryable(client, &format!("{schema}.{table}")).await; +} + pub async fn wait_for_remote_pg_session_cleanup(backend_pid: u32, timeout: Duration) { let env = TestEnv::global().await; let deadline = std::time::Instant::now() + timeout; @@ -728,7 +911,8 @@ pub async fn wait_for_remote_pg_session_cleanup(backend_pid: u32, timeout: Durat loop { let result = env .kalamdb_sql(&format!( - "SELECT COUNT(*) AS session_count FROM system.sessions WHERE backend_pid = {backend_pid} LIMIT 1" + "SELECT COUNT(*) AS session_count FROM system.sessions WHERE backend_pid = \ + {backend_pid} LIMIT 1" )) .await; let count = sql_first_cell_i64(&result).unwrap_or_default(); @@ -738,7 +922,8 @@ pub async fn wait_for_remote_pg_session_cleanup(backend_pid: u32, timeout: Durat if std::time::Instant::now() >= deadline { panic!( - "remote pg session for backend_pid {backend_pid} remained visible in system.sessions past timeout" + "remote pg session for backend_pid {backend_pid} remained visible in \ + system.sessions past timeout" ); } @@ -774,7 +959,8 @@ pub async fn await_user_shard_leader(user_id: &str) { loop { let result = env .kalamdb_sql(&format!( - "SELECT group_id FROM system.cluster_groups WHERE group_id = {group_id} AND current_leader IS NOT NULL" + "SELECT group_id FROM system.cluster_groups WHERE group_id = {group_id} AND \ + current_leader IS NOT NULL" )) .await; @@ -810,7 +996,8 @@ pub async fn same_user_shard_pair(first_user_id: &str, second_prefix: &str) -> ( } panic!( - "failed to find a user id with prefix '{second_prefix}' on the same shard as '{first_user_id}'" + "failed to find a user id with prefix '{second_prefix}' on the same shard as \ + '{first_user_id}'" ); } @@ -944,8 +1131,18 @@ pub async fn timed_query( } pub fn kalamdb_pid() -> u32 { + let base_url = kalamdb_auth_config().base_url; + let port = base_url + .trim_start_matches("http://") + .trim_start_matches("https://") + .split('/') + .next() + .and_then(|authority| authority.rsplit_once(':')) + .and_then(|(_, port)| port.parse::().ok()) + .unwrap_or(8080); + let port_arg = format!("-iTCP:{port}"); let output = Command::new("lsof") - .args(["-nP", "-iTCP:8080", "-sTCP:LISTEN", "-t"]) + .args(["-nP", port_arg.as_str(), "-sTCP:LISTEN", "-t"]) .output() .expect("run lsof for KalamDB pid"); assert!( @@ -957,7 +1154,7 @@ pub fn kalamdb_pid() -> u32 { stdout .lines() .find_map(|line| line.trim().parse::().ok()) - .expect("find KalamDB pid listening on 8080") + .unwrap_or_else(|| panic!("find KalamDB pid listening on {port}")) } pub fn process_rss_kb(pid: u32) -> u64 { diff --git a/pg/tests/e2e_common/tcp_proxy.rs b/pg/tests/e2e_common/tcp_proxy.rs index 90d697426..4fddf3778 100644 --- a/pg/tests/e2e_common/tcp_proxy.rs +++ b/pg/tests/e2e_common/tcp_proxy.rs @@ -1,13 +1,19 @@ -use std::collections::HashMap; -use std::io::ErrorKind; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::Arc; - -use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::net::{TcpListener, TcpStream}; -use tokio::sync::Mutex as TokioMutex; -use tokio::task::JoinHandle; -use tokio::time::{sleep, Duration}; +use std::{ + collections::HashMap, + io::ErrorKind, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, + }, +}; + +use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + net::{TcpListener, TcpStream}, + sync::Mutex as TokioMutex, + task::JoinHandle, + time::{sleep, Duration}, +}; pub struct TcpDisconnectProxy { base_url: String, @@ -56,10 +62,7 @@ impl TcpDisconnectProxy { let task = tokio::spawn(async move { if let Ok(mut outbound) = TcpStream::connect(&target_addr).await { if let Ok(local_addr) = outbound.local_addr() { - backend_addrs_for_task - .lock() - .await - .insert(id, local_addr.to_string()); + backend_addrs_for_task.lock().await.insert(id, local_addr.to_string()); } let (inbound_reader, inbound_writer) = inbound.split(); diff --git a/pg/tests/e2e_ddl/common.rs b/pg/tests/e2e_ddl/common.rs index a5a33c0e0..b0382e0ba 100644 --- a/pg/tests/e2e_ddl/common.rs +++ b/pg/tests/e2e_ddl/common.rs @@ -1,5 +1,7 @@ -pub use crate::e2e_common::{ensure_schema_exists, postgres_error_text, unique_name}; -pub use crate::e2e_ddl_common::DdlTestEnv; +pub use crate::{ + e2e_common::{ensure_schema_exists, postgres_error_text, unique_name}, + e2e_ddl_common::DdlTestEnv, +}; /// Early-return from a DDL test when pgrx prerequisites are not met. /// Usage: `let env = require_ddl_env!();` diff --git a/pg/tests/e2e_ddl/lifecycle.rs b/pg/tests/e2e_ddl/lifecycle.rs index 727394653..973d087db 100644 --- a/pg/tests/e2e_ddl/lifecycle.rs +++ b/pg/tests/e2e_ddl/lifecycle.rs @@ -1,6 +1,7 @@ -use kalam_client::{AuthProvider, KalamLinkClient}; use std::time::{Duration, Instant}; +use kalam_client::{AuthProvider, KalamLinkClient}; + use super::common::{ensure_schema_exists, require_ddl_env, unique_name}; fn kalamdb_server_url() -> String { @@ -246,7 +247,8 @@ async fn e2e_ddl_file_column_roundtrip_via_kalamlink() { .expect("build KalamLink client"); let insert_sql = format!( - "INSERT INTO {namespace}.{table} (id, attachment) VALUES ('{row_id}', FILE(\"attachment\"))" + "INSERT INTO {namespace}.{table} (id, attachment) VALUES ('{row_id}', \ + FILE(\"attachment\"))" ); let insert_result = client .execute_with_files( @@ -347,7 +349,8 @@ async fn e2e_ddl_multiple_file_columns_roundtrip_via_kalamlink() { let client = kalamlink_client(&env.bearer_token); let insert_sql = format!( - "INSERT INTO {namespace}.{table} (id, avatar, contract) VALUES ('{row_id}', FILE(\"avatar\"), FILE(\"contract\"))" + "INSERT INTO {namespace}.{table} (id, avatar, contract) VALUES ('{row_id}', \ + FILE(\"avatar\"), FILE(\"contract\"))" ); let insert_result = client .execute_with_files( @@ -411,7 +414,8 @@ async fn e2e_ddl_file_update_via_kalamlink_is_visible_in_postgres() { let client = kalamlink_client(&env.bearer_token); let insert_sql = format!( - "INSERT INTO {namespace}.{table} (id, attachment) VALUES ('{row_id}', FILE(\"attachment\"))" + "INSERT INTO {namespace}.{table} (id, attachment) VALUES ('{row_id}', \ + FILE(\"attachment\"))" ); let insert_result = client .execute_with_files( diff --git a/pg/tests/e2e_ddl/mirroring.rs b/pg/tests/e2e_ddl/mirroring.rs index 52e23f49c..439ca9edb 100644 --- a/pg/tests/e2e_ddl/mirroring.rs +++ b/pg/tests/e2e_ddl/mirroring.rs @@ -65,7 +65,8 @@ async fn e2e_ddl_create_table_mirrors_columns_identically() { kalam_columns.iter().all(|name| { pg_columns.contains(name) || name == "_userid" || name == "_seq" || name == "_deleted" }), - "KalamDB should only add known internal columns beyond the PostgreSQL schema: {kalam_columns:?}" + "KalamDB should only add known internal columns beyond the PostgreSQL schema: \ + {kalam_columns:?}" ); pg.batch_execute(&format!("DROP FOREIGN TABLE IF EXISTS {ns}.{table};")) @@ -97,10 +98,8 @@ async fn e2e_ddl_preserves_primary_key_not_null_and_defaults() { let columns = env .kalamdb_sql(&format!( - "SELECT column_name, default_value, nullable, primary_key \ - FROM system.columns \ - WHERE namespace_id = '{ns}' AND table_name = '{table}' \ - ORDER BY ordinal" + "SELECT column_name, default_value, nullable, primary_key FROM system.columns WHERE \ + namespace_id = '{ns}' AND table_name = '{table}' ORDER BY ordinal" )) .await; let rows = columns["results"] diff --git a/pg/tests/e2e_ddl/options.rs b/pg/tests/e2e_ddl/options.rs index c4f7ac04e..8c7f59462 100644 --- a/pg/tests/e2e_ddl/options.rs +++ b/pg/tests/e2e_ddl/options.rs @@ -1,8 +1,10 @@ +use std::env; + +use tokio_postgres::{Config, NoTls}; + use super::common::{ ensure_schema_exists, pg_kalam_exec, require_ddl_env, unique_name, DdlTestEnv, }; -use std::env; -use tokio_postgres::{Config, NoTls}; fn contains_status(text: &str, expected_terms: &[&str]) -> bool { let normalized = text.to_ascii_lowercase(); @@ -105,9 +107,8 @@ async fn e2e_ddl_create_table_using_kalamdb_forwards_shared_options() { let metadata = env .kalamdb_sql(&format!( - "SELECT table_type, storage_id, options \ - FROM system.tables \ - WHERE namespace_id = '{ns}' AND table_name = '{table}'" + "SELECT table_type, storage_id, options FROM system.tables WHERE namespace_id = \ + '{ns}' AND table_name = '{table}'" )) .await; @@ -165,9 +166,8 @@ async fn e2e_ddl_create_table_using_kalamdb_forwards_stream_ttl() { let metadata = env .kalamdb_sql(&format!( - "SELECT table_type, options \ - FROM system.tables \ - WHERE namespace_id = '{ns}' AND table_name = '{table}'" + "SELECT table_type, options FROM system.tables WHERE namespace_id = '{ns}' AND \ + table_name = '{table}'" )) .await; @@ -252,7 +252,8 @@ async fn e2e_ddl_kalam_exec_passthrough_statements() { let create_table = pg_kalam_exec( &pg, &format!( - "CREATE SHARED TABLE {ns}.{table} (id BIGINT PRIMARY KEY DEFAULT SNOWFLAKE_ID(), name TEXT)" + "CREATE SHARED TABLE {ns}.{table} (id BIGINT PRIMARY KEY DEFAULT SNOWFLAKE_ID(), name \ + TEXT)" ), ) .await; @@ -291,7 +292,7 @@ async fn e2e_ddl_kalam_exec_passthrough_statements() { let drop = pg_kalam_exec(&pg, &format!("DROP SHARED TABLE IF EXISTS {ns}.{table}")).await; assert!( - contains_status(&drop, &["dropped", "ok"]), + contains_status(&drop, &["dropped", "ok", "skipped"]), "unexpected DROP TABLE response: {drop}" ); env.wait_for_kalamdb_table_absent(&ns, &table).await; @@ -328,19 +329,19 @@ async fn e2e_ddl_kalam_exec_json_text_operator() { .await .expect("insert json row through postgres"); - let result = pg_kalam_exec( - &pg, - &format!("SELECT doc->>'name' AS name FROM {ns}.{table} WHERE id = 1"), - ) - .await; + let result = + pg_kalam_exec(&pg, &format!("SELECT doc->>'name' AS name FROM {ns}.{table} WHERE id = 1")) + .await; let value: serde_json::Value = serde_json::from_str(&result).expect("parse kalam_exec json"); let rows = value.as_array().expect("kalam_exec rows array"); assert_eq!(rows.len(), 1, "unexpected kalam_exec response: {result}"); - assert_eq!(rows[0]["name"].as_str(), Some("alice"), "unexpected kalam_exec response: {result}"); + assert_eq!( + rows[0]["name"].as_str(), + Some("alice"), + "unexpected kalam_exec response: {result}" + ); - pg.batch_execute(&format!("DROP SCHEMA IF EXISTS {ns} CASCADE;")) - .await - .ok(); + pg.batch_execute(&format!("DROP SCHEMA IF EXISTS {ns} CASCADE;")).await.ok(); } #[tokio::test] @@ -374,7 +375,8 @@ async fn e2e_ddl_local_postgres_jsonb_operator_query() { let row = pg .query_one( &format!( - "SELECT doc->>'name' AS name, doc ? 'profile' AS has_profile FROM {ns}.{table} WHERE id = 1" + "SELECT doc->>'name' AS name, doc ? 'profile' AS has_profile FROM {ns}.{table} \ + WHERE id = 1" ), &[], ) @@ -387,9 +389,7 @@ async fn e2e_ddl_local_postgres_jsonb_operator_query() { assert_eq!(name, "alice"); assert!(has_profile, "expected profile key to exist"); - pg.batch_execute(&format!("DROP SCHEMA IF EXISTS {ns} CASCADE;")) - .await - .ok(); + pg.batch_execute(&format!("DROP SCHEMA IF EXISTS {ns} CASCADE;")).await.ok(); } #[tokio::test] diff --git a/pg/tests/e2e_ddl_common/mod.rs b/pg/tests/e2e_ddl_common/mod.rs index 99993583f..d21b208dd 100644 --- a/pg/tests/e2e_ddl_common/mod.rs +++ b/pg/tests/e2e_ddl_common/mod.rs @@ -16,10 +16,12 @@ #[path = "../support/http_client.rs"] mod http_client; -use std::ops::{Deref, DerefMut}; -use std::sync::OnceLock; -use std::time::Duration; -use std::{env, fmt}; +use std::{ + env, fmt, + ops::{Deref, DerefMut}, + sync::OnceLock, + time::Duration, +}; /// Reason DDL tests are being skipped (set once during init). static SKIP_REASON: OnceLock> = OnceLock::new(); @@ -136,10 +138,9 @@ async fn server_option_exists( option_name: &str, ) -> bool { pg.query_opt( - "SELECT 1 \ - FROM pg_foreign_server AS server \ - CROSS JOIN LATERAL pg_options_to_table(server.srvoptions) AS option_entry \ - WHERE server.srvname = $1 AND option_entry.option_name = $2", + "SELECT 1 FROM pg_foreign_server AS server CROSS JOIN LATERAL \ + pg_options_to_table(server.srvoptions) AS option_entry WHERE server.srvname = $1 AND \ + option_entry.option_name = $2", &[&server_name, &option_name], ) .await @@ -325,7 +326,8 @@ impl DdlTestEnv { } let text = resp.body; let val: Value = serde_json::from_str(&text).unwrap_or(Value::Null); - // Response format: { "results": [{ "schema": [{"name": "col1", ...}, ...], "rows": [...] }] } + // Response format: { "results": [{ "schema": [{"name": "col1", ...}, ...], "rows": [...] }] + // } val["results"][0]["schema"] .as_array() .map(|arr| arr.iter().filter_map(|v| v["name"].as_str().map(String::from)).collect()) @@ -360,7 +362,8 @@ impl DdlTestEnv { if std::time::Instant::now() >= deadline { panic!( - "KalamDB columns for {namespace}.{table} did not satisfy {description} within timeout: {columns:?}" + "KalamDB columns for {namespace}.{table} did not satisfy {description} within \ + timeout: {columns:?}" ); } @@ -497,20 +500,15 @@ impl DdlTestEnv { .map_err(|e| format!("create kalam_server foreign server: {e}"))?; pg.batch_execute(&format!( - "ALTER SERVER kalam_server OPTIONS (SET host '{grpc_host}', SET port '{grpc_port}');" + "ALTER SERVER kalam_server OPTIONS (SET host '{grpc_host}', SET port \ + '{grpc_port}');" )) .await .map_err(|e| format!("repoint kalam_server foreign server: {e}"))?; drop_server_option_if_present(&pg, "kalam_server", "auth_header").await; ensure_server_option(&pg, "kalam_server", "auth_mode", "account_login").await; - ensure_server_option( - &pg, - "kalam_server", - "login_user", - &auth_config.login_user, - ) - .await; + ensure_server_option(&pg, "kalam_server", "login_user", &auth_config.login_user).await; ensure_server_option( &pg, "kalam_server", @@ -546,8 +544,8 @@ impl DdlTestEnv { } } Err(format!( - "PostgreSQL not reachable at {PG_HOST}:{PG_PORT}. \ - Start with: ./pg/scripts/pgrx-test-setup.sh --start" + "PostgreSQL not reachable at {PG_HOST}:{PG_PORT}. Start with: \ + ./pg/scripts/pgrx-test-setup.sh --start" )) } @@ -604,7 +602,8 @@ impl DdlTestEnv { } Err(format!( - "Failed to authenticate KalamDB test environment ({config}). Set KALAMDB_USER/KALAMDB_PASSWORD or KALAMDB_ROOT_PASSWORD." + "Failed to authenticate KalamDB test environment ({config}). Set \ + KALAMDB_USER/KALAMDB_PASSWORD or KALAMDB_ROOT_PASSWORD." )) } } diff --git a/pg/tests/e2e_dml/basic.rs b/pg/tests/e2e_dml/basic.rs index e69613770..56750d21e 100644 --- a/pg/tests/e2e_dml/basic.rs +++ b/pg/tests/e2e_dml/basic.rs @@ -47,12 +47,8 @@ async fn e2e_insert_update_shared_table() { delete_all(&pg, &qualified_table, "id").await; pg.batch_execute(&format!( - "INSERT INTO {qualified_table} (id, title, value) VALUES \ - ('u1', 'Alpha', 10), \ - ('u2', 'Beta', 20), \ - ('u3', 'Gamma', 30), \ - ('u4', 'Delta', 40), \ - ('u5', 'Epsilon', 50);" + "INSERT INTO {qualified_table} (id, title, value) VALUES ('u1', 'Alpha', 10), ('u2', \ + 'Beta', 20), ('u3', 'Gamma', 30), ('u4', 'Delta', 40), ('u5', 'Epsilon', 50);" )) .await .expect("insert 5 rows"); @@ -117,14 +113,14 @@ async fn e2e_user_table_isolation() { await_user_shard_leader("user-b").await; let user_a_insert = format!( - "INSERT INTO {qualified_table} (id, name, age) VALUES \ - ('a1', 'Alice', 30), ('a2', 'Ada', 25);" + "INSERT INTO {qualified_table} (id, name, age) VALUES ('a1', 'Alice', 30), ('a2', 'Ada', \ + 25);" ); retry_transient_user_leader_error("user-a insert", || pg_a.batch_execute(&user_a_insert)).await; let user_b_insert = format!( - "INSERT INTO {qualified_table} (id, name, age) VALUES \ - ('b1', 'Bob', 40), ('b2', 'Blake', 35), ('b3', 'Bea', 28);" + "INSERT INTO {qualified_table} (id, name, age) VALUES ('b1', 'Bob', 40), ('b2', 'Blake', \ + 35), ('b3', 'Bea', 28);" ); retry_transient_user_leader_error("user-b insert", || pg_b.batch_execute(&user_b_insert)).await; diff --git a/pg/tests/e2e_dml/interop.rs b/pg/tests/e2e_dml/interop.rs index 55353ccb8..c30980442 100644 --- a/pg/tests/e2e_dml/interop.rs +++ b/pg/tests/e2e_dml/interop.rs @@ -66,7 +66,8 @@ async fn wait_for_execute_as_user_count( loop { let result = env .kalamdb_sql(&format!( - "EXECUTE AS USER '{user_id}' (SELECT COUNT(*) FROM {qualified_table} WHERE id = '{row_id}')" + "EXECUTE AS USER '{user_id}' (SELECT COUNT(*) FROM {qualified_table} WHERE id = \ + '{row_id}')" )) .await; let count = sql_first_cell_i64(&result).unwrap_or_default(); @@ -76,7 +77,8 @@ async fn wait_for_execute_as_user_count( if std::time::Instant::now() >= deadline { panic!( - "EXECUTE AS USER '{user_id}' expected count {expected} for {qualified_table}.{row_id}, got {count}" + "EXECUTE AS USER '{user_id}' expected count {expected} for \ + {qualified_table}.{row_id}, got {count}" ); } @@ -178,10 +180,8 @@ async fn e2e_select_filters_and_postgres_join_work() { delete_all(&pg, &format!("e2e.{table}"), "id").await; pg.batch_execute(&format!( - "INSERT INTO e2e.{table} (id, title, value) VALUES \ - ('j1', 'Alpha', 10), \ - ('j2', 'Beta', 20), \ - ('j3', 'Gamma', 30);" + "INSERT INTO e2e.{table} (id, title, value) VALUES ('j1', 'Alpha', 10), ('j2', 'Beta', \ + 20), ('j3', 'Gamma', 30);" )) .await .expect("insert join test rows"); @@ -237,18 +237,14 @@ async fn e2e_shared_tables_can_join_each_other_in_postgres() { create_shared_kalam_table(&pg, &orders_table, "id TEXT, customer_id TEXT, total INTEGER").await; pg.batch_execute(&format!( - "INSERT INTO e2e.{customers_table} (id, name) VALUES \ - ('c1', 'Alice'), \ - ('c2', 'Bob');" + "INSERT INTO e2e.{customers_table} (id, name) VALUES ('c1', 'Alice'), ('c2', 'Bob');" )) .await .expect("insert shared customers"); pg.batch_execute(&format!( - "INSERT INTO e2e.{orders_table} (id, customer_id, total) VALUES \ - ('o1', 'c1', 15), \ - ('o2', 'c1', 20), \ - ('o3', 'c2', 30);" + "INSERT INTO e2e.{orders_table} (id, customer_id, total) VALUES ('o1', 'c1', 15), ('o2', \ + 'c1', 20), ('o3', 'c2', 30);" )) .await .expect("insert shared orders"); @@ -320,7 +316,8 @@ async fn e2e_search_path_schema_mirror_works_without_namespace_option() { let result_text = serde_json::to_string(&result).unwrap_or_default(); assert!( result_text.contains("spath-1") && result_text.contains("From search_path"), - "search_path-mirrored Kalam table should write into KalamDB namespace {schema}: {result_text}" + "search_path-mirrored Kalam table should write into KalamDB namespace {schema}: \ + {result_text}" ); pg.batch_execute(&format!( @@ -348,9 +345,8 @@ async fn e2e_user_tables_can_join_each_other_in_postgres() { set_user_id(&pg, &user_id).await; let insert_profiles_sql = format!( - "INSERT INTO e2e.{profiles_table} (id, display_name) VALUES \ - ('u1', 'Alice'), \ - ('u2', 'Bob');" + "INSERT INTO e2e.{profiles_table} (id, display_name) VALUES ('u1', 'Alice'), ('u2', \ + 'Bob');" ); retry_transient_user_leader_error("insert user profiles", || { pg.batch_execute(&insert_profiles_sql) @@ -358,8 +354,7 @@ async fn e2e_user_tables_can_join_each_other_in_postgres() { .await; let insert_memberships_sql = format!( - "INSERT INTO e2e.{memberships_table} (id, profile_id, plan) VALUES \ - ('m1', 'u1', 'pro'), \ + "INSERT INTO e2e.{memberships_table} (id, profile_id, plan) VALUES ('m1', 'u1', 'pro'), \ ('m2', 'u2', 'team');" ); retry_transient_user_leader_error("insert user memberships", || { @@ -402,7 +397,8 @@ async fn e2e_user_table_explicit_userid_routes_to_target_user() { set_user_id(&pg, &writer.user_id).await; let insert_sql = format!( - "INSERT INTO {qualified_table} (id, body, _userid) VALUES ('{row_id}', 'routed-via-explicit-userid', '{}')", + "INSERT INTO {qualified_table} (id, body, _userid) VALUES ('{row_id}', \ + 'routed-via-explicit-userid', '{}')", target.user_id ); diff --git a/pg/tests/e2e_dml/proxy_failures.rs b/pg/tests/e2e_dml/proxy_failures.rs index 7d711e29d..a7ddb1367 100644 --- a/pg/tests/e2e_dml/proxy_failures.rs +++ b/pg/tests/e2e_dml/proxy_failures.rs @@ -1,15 +1,15 @@ -use std::time::{Duration, Instant}; use std::{ env, ops::{Deref, DerefMut}, + time::{Duration, Instant}, }; use serde_json::Value; use tokio_postgres::{Config, NoTls}; use super::common::{ - kalamdb_account_login_server_options, kalamdb_grpc_target, pg_backend_pid, - postgres_error_text, unique_name, TestEnv, + kalamdb_account_login_server_options, pg_backend_pid, postgres_error_text, + shared_leader_grpc_target, unique_name, TestEnv, }; use crate::e2e_common::tcp_proxy::TcpDisconnectProxy; @@ -74,6 +74,11 @@ enum TerminalAction { Rollback, } +struct ProvisionedProxy { + proxy: TcpDisconnectProxy, + leader_base_url: String, +} + impl TerminalAction { fn label(self) -> &'static str { match self { @@ -141,9 +146,8 @@ async fn create_proxy_shared_foreign_table( extra_server_options: Option<&str>, ) { let mut server_options = kalamdb_account_login_server_options(host, port); - if let Some(extra_options) = extra_server_options - .map(str::trim) - .filter(|value| !value.is_empty()) + if let Some(extra_options) = + extra_server_options.map(str::trim).filter(|value| !value.is_empty()) { server_options.push_str(", "); server_options.push_str(extra_options); @@ -155,17 +159,11 @@ async fn create_proxy_shared_foreign_table( .expect("create e2e schema"); client .batch_execute(&format!( - "DROP FOREIGN TABLE IF EXISTS e2e.{table}; \ - DROP SERVER IF EXISTS {server_name} CASCADE; \ - CREATE SERVER {server_name} \ - FOREIGN DATA WRAPPER pg_kalam \ - OPTIONS ({server_options}); \ - CREATE FOREIGN TABLE e2e.{table} ( \ - id TEXT, \ - title TEXT, \ - value INTEGER \ - ) SERVER {server_name} \ - OPTIONS (namespace 'e2e', \"table\" '{table}', table_type 'shared');" + "DROP FOREIGN TABLE IF EXISTS e2e.{table}; DROP SERVER IF EXISTS {server_name} \ + CASCADE; CREATE SERVER {server_name} FOREIGN DATA WRAPPER pg_kalam OPTIONS \ + ({server_options}); CREATE FOREIGN TABLE e2e.{table} ( id TEXT, title TEXT, value \ + INTEGER ) SERVER {server_name} OPTIONS (namespace 'e2e', \"table\" '{table}', \ + table_type 'shared');" )) .await .expect("create proxy foreign table"); @@ -173,11 +171,118 @@ async fn create_proxy_shared_foreign_table( TestEnv::global().await.wait_for_kalamdb_table_exists("e2e", table).await; } +async fn update_proxy_foreign_server( + client: &tokio_postgres::Client, + server_name: &str, + host: &str, + port: u16, +) { + client + .batch_execute(&format!( + "ALTER SERVER {server_name} OPTIONS (SET host '{}', SET port '{}');", + sql_escape_literal(host), + port + )) + .await + .expect("retarget proxy foreign server"); +} + +fn leader_grpc_target_from_message(message: &str) -> Option<(String, u16)> { + let marker = "Leader:"; + let leader_hint = message.split_once(marker)?.1.trim(); + let leader_hint = leader_hint + .strip_prefix("Some(") + .and_then(|value| value.strip_suffix(')')) + .unwrap_or(leader_hint) + .trim() + .trim_matches('"'); + + if leader_hint.is_empty() || leader_hint.contains("Leader unknown") { + return None; + } + + let authority = leader_hint + .trim_start_matches("http://") + .trim_start_matches("https://") + .split('/') + .next() + .unwrap_or(leader_hint); + + let (host, http_port) = authority.rsplit_once(':')?; + let http_port = http_port.parse::().ok()?; + let grpc_port = match http_port { + 8080 => 9188, + _ => http_port.checked_add(1000)?, + }; + + Some((host.to_string(), grpc_port)) +} + +fn grpc_http_base_url(host: &str, grpc_port: u16) -> String { + let http_port = match grpc_port { + 9188 => 8080, + _ => grpc_port.saturating_sub(1000), + }; + format!("http://{host}:{http_port}") +} + +async fn probe_proxy_transaction_insert( + client: &tokio_postgres::Client, + qualified_table: &str, + probe_id: &str, +) -> Result<(), String> { + client + .batch_execute("BEGIN") + .await + .expect("begin proxy leader probe transaction"); + + let insert_result = client + .execute( + &format!("INSERT INTO {qualified_table} (id, title, value) VALUES ($1, $2, $3)"), + &[&probe_id, &"proxy leader probe", &0_i32], + ) + .await; + + let _ = client.batch_execute("ROLLBACK").await; + + match insert_result { + Ok(_) => Ok(()), + Err(error) => Err(postgres_error_text(&error)), + } +} + +async fn provision_proxy_shared_foreign_table( + client: &tokio_postgres::Client, + server_name: &str, + table: &str, + extra_server_options: Option<&str>, +) -> ProvisionedProxy { + let (target_host, target_port) = shared_leader_grpc_target().await; + + let proxy = TcpDisconnectProxy::start(&format!("http://{target_host}:{target_port}")).await; + let (proxy_host, proxy_port) = proxy_host_port(proxy.base_url()); + create_proxy_shared_foreign_table( + client, + server_name, + table, + &proxy_host, + proxy_port, + extra_server_options, + ) + .await; + + ProvisionedProxy { + proxy, + leader_base_url: grpc_http_base_url(&target_host, target_port), + } +} + async fn cleanup_proxy_table(env: &TestEnv, table: &str, server_name: &str) { let cleanup = env.pg_connect().await; cleanup .batch_execute(&format!( - "DROP FOREIGN TABLE IF EXISTS e2e.{table}; DROP SERVER IF EXISTS {server_name} CASCADE;" + "DROP FOREIGN TABLE IF EXISTS e2e.{table}; DROP SERVER IF EXISTS {server_name} \ + CASCADE;" )) .await .ok(); @@ -232,39 +337,59 @@ async fn wait_for_row_count( } } -async fn fetch_session_rows(env: &TestEnv, client_addr: &str) -> Vec> { - let client_addr = sql_escape_literal(client_addr); +async fn fetch_active_transaction_session_rows(env: &TestEnv, base_url: &str) -> Vec> { sql_rows( - &env.kalamdb_sql(&format!( - "SELECT session_id, state, transaction_id, transaction_state \ - FROM system.sessions \ - WHERE client_addr = '{client_addr}' \ - ORDER BY last_seen_at DESC" - )) + &env.kalamdb_sql_at( + base_url, + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE transaction_id IS NOT NULL AND transaction_state = 'active' ORDER BY \ + last_seen_at DESC", + ) .await, ) } -async fn fetch_transaction_rows(env: &TestEnv, transaction_id: &str) -> Vec> { +async fn fetch_session_rows(env: &TestEnv, base_url: &str, session_id: &str) -> Vec> { + let session_id = sql_escape_literal(session_id); + sql_rows( + &env.kalamdb_sql_at( + base_url, + &format!( + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE session_id = '{session_id}' ORDER BY last_seen_at DESC" + ), + ) + .await, + ) +} + +async fn fetch_transaction_rows( + env: &TestEnv, + base_url: &str, + transaction_id: &str, +) -> Vec> { sql_rows( - &env.kalamdb_sql(&format!( - "SELECT transaction_id, owner_id, origin, state, write_count \ - FROM system.transactions \ + &env.kalamdb_sql_at( + base_url, + &format!( + "SELECT transaction_id, owner_id, origin, state, write_count FROM system.transactions \ WHERE transaction_id = '{transaction_id}'" - )) + ), + ) .await, ) } async fn wait_for_transaction_row( env: &TestEnv, + base_url: &str, transaction_id: &str, timeout: Duration, ) -> Vec { let deadline = Instant::now() + timeout; loop { - let rows = fetch_transaction_rows(env, transaction_id).await; + let rows = fetch_transaction_rows(env, base_url, transaction_id).await; if let Some(row) = rows.first() { return row.clone(); } @@ -275,44 +400,54 @@ async fn wait_for_transaction_row( } } -async fn wait_for_session_rows( +async fn wait_for_active_transaction_session_row( env: &TestEnv, - client_addr: &str, + base_url: &str, timeout: Duration, -) -> Vec> { +) -> Vec { let deadline = Instant::now() + timeout; loop { - let rows = fetch_session_rows(env, client_addr).await; - if !rows.is_empty() { - return rows; + let rows = fetch_active_transaction_session_rows(env, base_url).await; + if let Some(row) = rows.first() { + return row.clone(); } if Instant::now() >= deadline { - panic!("client_addr {client_addr} did not appear in system.sessions within timeout"); + panic!("active PgRpc session did not appear in system.sessions within timeout"); } tokio::time::sleep(Duration::from_millis(100)).await; } } -async fn wait_for_session_cleanup(env: &TestEnv, client_addr: &str, timeout: Duration) { +async fn wait_for_session_cleanup( + env: &TestEnv, + base_url: &str, + session_id: &str, + timeout: Duration, +) { let deadline = Instant::now() + timeout; loop { - if fetch_session_rows(env, client_addr).await.is_empty() { + if fetch_session_rows(env, base_url, session_id).await.is_empty() { return; } if Instant::now() >= deadline { - panic!("client_addr {client_addr} remained in system.sessions past cleanup timeout"); + panic!("session {session_id} remained in system.sessions past cleanup timeout"); } tokio::time::sleep(Duration::from_millis(100)).await; } } -async fn wait_for_transaction_cleanup(env: &TestEnv, transaction_id: &str, timeout: Duration) { +async fn wait_for_transaction_cleanup( + env: &TestEnv, + base_url: &str, + transaction_id: &str, + timeout: Duration, +) { let deadline = Instant::now() + timeout; loop { - if fetch_transaction_rows(env, transaction_id).await.is_empty() { + if fetch_transaction_rows(env, base_url, transaction_id).await.is_empty() { return; } if Instant::now() >= deadline { @@ -335,15 +470,13 @@ fn proxy_host_port(base_url: &str) -> (String, u16) { async fn run_terminal_proxy_cleanup_scenario(action: TerminalAction) { let env = TestEnv::global().await; let mut pg = OwnedPgClient::connect().await; - let (grpc_host, grpc_port) = kalamdb_grpc_target(); - let proxy = TcpDisconnectProxy::start(&format!("http://{grpc_host}:{grpc_port}")).await; let server_name = unique_name(&format!("proxy_server_{}", action.label())); let table = unique_name(&format!("proxy_{}", action.label())); let qualified_table = format!("e2e.{table}"); - let (proxy_host, proxy_port) = proxy_host_port(proxy.base_url()); - - create_proxy_shared_foreign_table(&pg, &server_name, &table, &proxy_host, proxy_port, None) - .await; + let ProvisionedProxy { + proxy, + leader_base_url, + } = provision_proxy_shared_foreign_table(&pg, &server_name, &table, None).await; let tx = pg.transaction().await.expect("begin transaction through proxy"); tx.execute( @@ -362,19 +495,21 @@ async fn run_terminal_proxy_cleanup_scenario(action: TerminalAction) { "proxy should observe the gRPC connection before transport failure" ); - let session_client_addr = proxy + let _session_client_addr = proxy .wait_for_backend_client_addr(Duration::from_secs(3)) .await .expect("proxy should expose the backend-facing client address"); - let session_rows = wait_for_session_rows(env, &session_client_addr, Duration::from_secs(3)).await; - assert_eq!(session_rows.len(), 1); - assert_eq!(string_cell(&session_rows[0], 1).as_deref(), Some("idle in transaction")); - assert_eq!(string_cell(&session_rows[0], 3).as_deref(), Some("active")); - let transaction_id = - string_cell(&session_rows[0], 2).expect("transaction id in system.sessions"); + let session_row = + wait_for_active_transaction_session_row(env, &leader_base_url, Duration::from_secs(3)) + .await; + let session_id = string_cell(&session_row, 0).expect("session id in system.sessions"); + assert_eq!(string_cell(&session_row, 1).as_deref(), Some("idle in transaction")); + assert_eq!(string_cell(&session_row, 3).as_deref(), Some("active")); + let transaction_id = string_cell(&session_row, 2).expect("transaction id in system.sessions"); let transaction_row = - wait_for_transaction_row(env, &transaction_id, Duration::from_secs(3)).await; + wait_for_transaction_row(env, &leader_base_url, &transaction_id, Duration::from_secs(3)) + .await; assert_eq!(string_cell(&transaction_row, 2).as_deref(), Some("PgRpc")); assert!(matches!( string_cell(&transaction_row, 3).as_deref(), @@ -390,11 +525,11 @@ async fn run_terminal_proxy_cleanup_scenario(action: TerminalAction) { let message = postgres_error_text(&terminal_error); assert_transport_or_timeout_error(&message, action.label()); - let stuck_session_rows = - wait_for_session_rows(env, &session_client_addr, Duration::from_secs(2)).await; + let stuck_session_rows = fetch_session_rows(env, &leader_base_url, &session_id).await; assert_eq!(stuck_session_rows.len(), 1); assert_eq!(string_cell(&stuck_session_rows[0], 2), Some(transaction_id.clone())); - let stuck_transaction_rows = fetch_transaction_rows(env, &transaction_id).await; + let stuck_transaction_rows = + fetch_transaction_rows(env, &leader_base_url, &transaction_id).await; assert_eq!(stuck_transaction_rows.len(), 1); }, TerminalAction::Rollback => { @@ -408,8 +543,9 @@ async fn run_terminal_proxy_cleanup_scenario(action: TerminalAction) { proxy.simulate_server_up(); pg.disconnect().await; - wait_for_session_cleanup(env, &session_client_addr, Duration::from_secs(5)).await; - wait_for_transaction_cleanup(env, &transaction_id, Duration::from_secs(5)).await; + wait_for_session_cleanup(env, &leader_base_url, &session_id, Duration::from_secs(5)).await; + wait_for_transaction_cleanup(env, &leader_base_url, &transaction_id, Duration::from_secs(5)) + .await; let final_rows = env .kalamdb_sql(&format!("SELECT id FROM {qualified_table} WHERE id = '{}-1'", action.label())) @@ -454,15 +590,11 @@ async fn owned_pg_client_disconnect_terminates_backend() { async fn e2e_proxy_autocommit_query_recovers_after_disconnect() { let env = TestEnv::global().await; let pg = OwnedPgClient::connect().await; - let (grpc_host, grpc_port) = kalamdb_grpc_target(); - let proxy = TcpDisconnectProxy::start(&format!("http://{grpc_host}:{grpc_port}")).await; let server_name = unique_name("proxy_recover_server"); let table = unique_name("proxy_recover_items"); let qualified_table = format!("e2e.{table}"); - let (proxy_host, proxy_port) = proxy_host_port(proxy.base_url()); - - create_proxy_shared_foreign_table(&pg, &server_name, &table, &proxy_host, proxy_port, None) - .await; + let ProvisionedProxy { proxy, .. } = + provision_proxy_shared_foreign_table(&pg, &server_name, &table, None).await; pg.execute( &format!("INSERT INTO {qualified_table} (id, title, value) VALUES ($1, $2, $3)"), @@ -501,16 +633,13 @@ async fn e2e_proxy_autocommit_query_recovers_after_disconnect() { async fn e2e_proxy_slow_link_keeps_connection_usable() { let env = TestEnv::global().await; let pg = OwnedPgClient::connect().await; - let (grpc_host, grpc_port) = kalamdb_grpc_target(); - let proxy = TcpDisconnectProxy::start(&format!("http://{grpc_host}:{grpc_port}")).await; let server_name = unique_name("proxy_slow_server"); let table = unique_name("proxy_slow_items"); let qualified_table = format!("e2e.{table}"); - let (proxy_host, proxy_port) = proxy_host_port(proxy.base_url()); + let ProvisionedProxy { proxy, .. } = + provision_proxy_shared_foreign_table(&pg, &server_name, &table, None).await; proxy.set_chunk_delay(Duration::from_millis(200)); - create_proxy_shared_foreign_table(&pg, &server_name, &table, &proxy_host, proxy_port, None) - .await; pg.execute( &format!("INSERT INTO {qualified_table} (id, title, value) VALUES ($1, $2, $3)"), @@ -548,22 +677,11 @@ async fn e2e_proxy_slow_link_keeps_connection_usable() { async fn e2e_proxy_blackhole_timeout_recovers_after_traffic_is_restored() { let env = TestEnv::global().await; let pg = OwnedPgClient::connect().await; - let (grpc_host, grpc_port) = kalamdb_grpc_target(); - let proxy = TcpDisconnectProxy::start(&format!("http://{grpc_host}:{grpc_port}")).await; let server_name = unique_name("proxy_blackhole_server"); let table = unique_name("proxy_blackhole_items"); let qualified_table = format!("e2e.{table}"); - let (proxy_host, proxy_port) = proxy_host_port(proxy.base_url()); - - create_proxy_shared_foreign_table( - &pg, - &server_name, - &table, - &proxy_host, - proxy_port, - None, - ) - .await; + let ProvisionedProxy { proxy, .. } = + provision_proxy_shared_foreign_table(&pg, &server_name, &table, None).await; pg.execute( &format!("INSERT INTO {qualified_table} (id, title, value) VALUES ($1, $2, $3)"), @@ -573,11 +691,9 @@ async fn e2e_proxy_blackhole_timeout_recovers_after_traffic_is_restored() { .expect("seed row before blackhole"); wait_for_row_count(&pg, &qualified_table, 1, Duration::from_secs(3)).await; - pg.batch_execute(&format!( - "ALTER SERVER {server_name} OPTIONS (ADD timeout '1200');" - )) - .await - .expect("set low proxy timeout before blackhole phase"); + pg.batch_execute(&format!("ALTER SERVER {server_name} OPTIONS (ADD timeout '1200');")) + .await + .expect("set low proxy timeout before blackhole phase"); proxy.blackhole(); diff --git a/pg/tests/e2e_dml/sync_stress.rs b/pg/tests/e2e_dml/sync_stress.rs index 1cf109af2..a196b4762 100644 --- a/pg/tests/e2e_dml/sync_stress.rs +++ b/pg/tests/e2e_dml/sync_stress.rs @@ -1,11 +1,15 @@ -use super::common::{count_rows, create_shared_kalam_table, unique_name, TestEnv}; +use std::{ + collections::BTreeMap, + sync::Arc, + time::{Duration, Instant}, +}; + use futures_util::future::join_all; use serde_json::Value; -use std::collections::BTreeMap; -use std::sync::Arc; -use std::time::{Duration, Instant}; use tokio::sync::Barrier; +use super::common::{count_rows, create_shared_kalam_table, unique_name, TestEnv}; + type SqlRow = BTreeMap; fn sql_result_rows(result: &Value) -> Vec { @@ -107,7 +111,10 @@ async fn wait_for_api_sql_rows(sql: &str, expected_count: usize, timeout: Durati } if Instant::now() >= deadline { - panic!("API rows for SQL did not reach expected count {expected_count}: {sql}; rows={rows:?}"); + panic!( + "API rows for SQL did not reach expected count {expected_count}: {sql}; \ + rows={rows:?}" + ); } tokio::time::sleep(Duration::from_millis(100)).await; @@ -130,7 +137,8 @@ async fn wait_for_pg_count( if Instant::now() >= deadline { panic!( - "row count for {qualified_table} did not become {expected_count}; last count={count}" + "row count for {qualified_table} did not become {expected_count}; last \ + count={count}" ); } @@ -149,21 +157,23 @@ async fn e2e_bidirectional_typed_roundtrip_between_pg_and_api() { create_shared_kalam_table( &pg, &table, - "id TEXT, label VARCHAR, attempts SMALLINT, qty INTEGER, total BIGINT, ratio REAL, score DOUBLE PRECISION, active BOOLEAN, notes TEXT", + "id TEXT, label VARCHAR, attempts SMALLINT, qty INTEGER, total BIGINT, ratio REAL, score \ + DOUBLE PRECISION, active BOOLEAN, notes TEXT", ) .await; pg.batch_execute(&format!( - "INSERT INTO {qualified_table} (id, label, attempts, qty, total, ratio, score, active, notes) VALUES \ - ('pg-typed-1', 'alpha', 3, 25, 7000000, 1.5, 9.875, true, 'leader''s note'), \ - ('pg-typed-2', 'beta', 0, -5, 42, -3.25, 0.125, false, NULL);" + "INSERT INTO {qualified_table} (id, label, attempts, qty, total, ratio, score, active, \ + notes) VALUES ('pg-typed-1', 'alpha', 3, 25, 7000000, 1.5, 9.875, true, 'leader''s \ + note'), ('pg-typed-2', 'beta', 0, -5, 42, -3.25, 0.125, false, NULL);" )) .await .expect("insert typed rows through PostgreSQL"); let api_rows = wait_for_api_sql_rows( &format!( - "SELECT id, label, attempts, qty, total, ratio, score, active, notes FROM {qualified_table} WHERE id IN ('pg-typed-1', 'pg-typed-2') ORDER BY id" + "SELECT id, label, attempts, qty, total, ratio, score, active, notes FROM \ + {qualified_table} WHERE id IN ('pg-typed-1', 'pg-typed-2') ORDER BY id" ), 2, Duration::from_secs(5), @@ -190,15 +200,16 @@ async fn e2e_bidirectional_typed_roundtrip_between_pg_and_api() { assert!(second.get("notes").is_some_and(Value::is_null)); env.kalamdb_sql(&format!( - "INSERT INTO {qualified_table} (id, label, attempts, qty, total, ratio, score, active, notes) VALUES \ - ('api-typed-1', 'from-api', 7, 11, 123456, 2.5, 4.25, true, 'api inserted')" + "INSERT INTO {qualified_table} (id, label, attempts, qty, total, ratio, score, active, \ + notes) VALUES ('api-typed-1', 'from-api', 7, 11, 123456, 2.5, 4.25, true, 'api inserted')" )) .await; let api_inserted = pg .query_one( &format!( - "SELECT label, attempts, qty, total, ratio, score, active, notes FROM {qualified_table} WHERE id = $1" + "SELECT label, attempts, qty, total, ratio, score, active, notes FROM \ + {qualified_table} WHERE id = $1" ), &[&"api-typed-1"], ) @@ -214,7 +225,8 @@ async fn e2e_bidirectional_typed_roundtrip_between_pg_and_api() { assert_eq!(api_inserted.get::<_, Option>(7), Some("api inserted".to_string())); env.kalamdb_sql(&format!( - "UPDATE {qualified_table} SET label = 'updated-from-api', qty = 30, active = false, notes = 'api edit' WHERE id = 'pg-typed-1'" + "UPDATE {qualified_table} SET label = 'updated-from-api', qty = 30, active = false, notes \ + = 'api edit' WHERE id = 'pg-typed-1'" )) .await; @@ -232,9 +244,16 @@ async fn e2e_bidirectional_typed_roundtrip_between_pg_and_api() { pg.execute( &format!( - "UPDATE {qualified_table} SET label = $1, total = $2, score = $3, notes = $4 WHERE id = $5" + "UPDATE {qualified_table} SET label = $1, total = $2, score = $3, notes = $4 WHERE id \ + = $5" ), - &[&"updated-from-pg", &654_321_i64, &5.5_f64, &"pg edit", &"api-typed-1"], + &[ + &"updated-from-pg", + &654_321_i64, + &5.5_f64, + &"pg edit", + &"api-typed-1", + ], ) .await .expect("update API-created row through PostgreSQL"); @@ -295,7 +314,8 @@ async fn e2e_parallel_transactional_inserts_and_updates_stay_consistent() { coordinator .execute( &format!( - "INSERT INTO {qualified_table} (id, worker, ordinal, status, amount) VALUES ($1, $2, $3, $4, $5)" + "INSERT INTO {qualified_table} (id, worker, ordinal, status, amount) VALUES \ + ($1, $2, $3, $4, $5)" ), &[ &format!("seed-{worker}"), @@ -330,7 +350,8 @@ async fn e2e_parallel_transactional_inserts_and_updates_stay_consistent() { .collect::>() .join(", "); tx.batch_execute(&format!( - "INSERT INTO {qualified_table} (id, worker, ordinal, status, amount) VALUES {values};" + "INSERT INTO {qualified_table} (id, worker, ordinal, status, amount) VALUES \ + {values};" )) .await .expect("insert worker batch"); @@ -360,7 +381,10 @@ async fn e2e_parallel_transactional_inserts_and_updates_stay_consistent() { let pg_rows = coordinator .query( - &format!("SELECT id, status, amount FROM {qualified_table} WHERE id LIKE 'seed-%' ORDER BY id"), + &format!( + "SELECT id, status, amount FROM {qualified_table} WHERE id LIKE 'seed-%' ORDER BY \ + id" + ), &[], ) .await @@ -391,7 +415,11 @@ async fn e2e_parallel_transactional_inserts_and_updates_stay_consistent() { assert_eq!(row_i64(&api_inserted[0], "inserted_rows"), (WORKERS * ROWS_PER_WORKER) as i64); let api_seed_rows = wait_for_api_sql_rows( - &format!("SELECT id, status, amount FROM {qualified_table} WHERE id LIKE 'seed-%' ORDER BY id LIMIT {}", WORKERS + 1), + &format!( + "SELECT id, status, amount FROM {qualified_table} WHERE id LIKE 'seed-%' ORDER BY id \ + LIMIT {}", + WORKERS + 1 + ), WORKERS, Duration::from_secs(5), ) @@ -406,8 +434,8 @@ async fn e2e_parallel_transactional_inserts_and_updates_stay_consistent() { let api_sample_rows = wait_for_api_sql_rows( &format!( - "SELECT id, worker, ordinal, status, amount FROM {qualified_table} \ - WHERE id IN ('wrk-0-0', 'wrk-3-7', 'wrk-5-14') ORDER BY id LIMIT 4" + "SELECT id, worker, ordinal, status, amount FROM {qualified_table} WHERE id IN \ + ('wrk-0-0', 'wrk-3-7', 'wrk-5-14') ORDER BY id LIMIT 4" ), 3, Duration::from_secs(5), @@ -446,18 +474,18 @@ async fn e2e_transaction_rollback_discards_insert_update_delete_in_pg_and_api() .await; pg.batch_execute(&format!( - "INSERT INTO {qualified_table} (id, title, value, active) VALUES \ - ('keep-1', 'baseline one', 10, true), \ - ('keep-2', 'baseline two', 20, false);" + "INSERT INTO {qualified_table} (id, title, value, active) VALUES ('keep-1', 'baseline \ + one', 10, true), ('keep-2', 'baseline two', 20, false);" )) .await .expect("seed rollback test rows"); let tx = pg.transaction().await.expect("begin rollback transaction"); tx.batch_execute(&format!( - "INSERT INTO {qualified_table} (id, title, value, active) VALUES ('temp-insert', 'rollback me', 999, true); \ - UPDATE {qualified_table} SET title = 'changed in tx', value = 111, active = false WHERE id = 'keep-1'; \ - DELETE FROM {qualified_table} WHERE id = 'keep-2';" + "INSERT INTO {qualified_table} (id, title, value, active) VALUES ('temp-insert', \ + 'rollback me', 999, true); UPDATE {qualified_table} SET title = 'changed in tx', value = \ + 111, active = false WHERE id = 'keep-1'; DELETE FROM {qualified_table} WHERE id = \ + 'keep-2';" )) .await .expect("apply transactional insert/update/delete"); @@ -521,9 +549,8 @@ async fn e2e_disconnect_abort_discards_uncommitted_changes_in_pg_and_api() { coordinator .batch_execute(&format!( - "INSERT INTO {qualified_table} (id, title, value) VALUES \ - ('base-1', 'before disconnect', 10), \ - ('base-2', 'should survive', 20);" + "INSERT INTO {qualified_table} (id, title, value) VALUES ('base-1', 'before \ + disconnect', 10), ('base-2', 'should survive', 20);" )) .await .expect("seed disconnect-abort rows"); @@ -533,9 +560,9 @@ async fn e2e_disconnect_abort_discards_uncommitted_changes_in_pg_and_api() { .await .expect("begin SQL transaction before disconnect"); pg.batch_execute(&format!( - "INSERT INTO {qualified_table} (id, title, value) VALUES ('temp-drop', 'should vanish', 999); \ - UPDATE {qualified_table} SET title = 'mutated before disconnect', value = 77 WHERE id = 'base-1'; \ - DELETE FROM {qualified_table} WHERE id = 'base-2';" + "INSERT INTO {qualified_table} (id, title, value) VALUES ('temp-drop', 'should vanish', \ + 999); UPDATE {qualified_table} SET title = 'mutated before disconnect', value = 77 WHERE \ + id = 'base-1'; DELETE FROM {qualified_table} WHERE id = 'base-2';" )) .await .expect("apply uncommitted mutations before disconnect"); diff --git a/pg/tests/e2e_dml/transactional.rs b/pg/tests/e2e_dml/transactional.rs index 060b23f79..db15fac0b 100644 --- a/pg/tests/e2e_dml/transactional.rs +++ b/pg/tests/e2e_dml/transactional.rs @@ -1,21 +1,20 @@ +use std::time::{Duration, Instant}; + +use serde_json::Value; + use super::common::{ await_user_shard_leader, count_rows, create_shared_kalam_table, create_user_kalam_table, - pg_backend_pid, postgres_error_text, retry_transient_user_leader_error, - same_user_shard_pair, set_user_id, unique_name, TestEnv, + pg_backend_pid, postgres_error_text, rebind_user_table_to_user_leader, + retry_transient_user_leader_error, same_user_shard_pair, set_user_id, + shared_leader_grpc_target, unique_name, TestEnv, }; -use serde_json::Value; -use std::time::{Duration, Instant}; fn sql_rows(result: &Value) -> Vec> { result["results"] .as_array() .and_then(|results| results.first()) .and_then(|entry| entry["rows"].as_array()) - .map(|rows| { - rows.iter() - .filter_map(|row| row.as_array().cloned()) - .collect::>() - }) + .map(|rows| rows.iter().filter_map(|row| row.as_array().cloned()).collect::>()) .unwrap_or_default() } @@ -37,42 +36,71 @@ fn i64_cell(row: &[Value], index: usize) -> Option { } async fn fetch_session_rows(env: &TestEnv, backend_pid: u32) -> Vec> { + fetch_session_rows_at(env, &kalamdb_shared_leader_base_url().await, backend_pid).await +} + +async fn fetch_session_rows_at(env: &TestEnv, base_url: &str, backend_pid: u32) -> Vec> { sql_rows( - &env.kalamdb_sql(&format!( - "SELECT session_id, state, transaction_id, transaction_state \ - FROM system.sessions \ - WHERE backend_pid = {backend_pid} \ - ORDER BY last_seen_at DESC" - )) + &env.kalamdb_sql_at( + base_url, + &format!( + "SELECT session_id, state, transaction_id, transaction_state FROM system.sessions \ + WHERE backend_pid = {backend_pid} ORDER BY last_seen_at DESC" + ), + ) .await, ) } async fn fetch_transaction_rows(env: &TestEnv, transaction_id: &str) -> Vec> { + fetch_transaction_rows_at(env, &kalamdb_shared_leader_base_url().await, transaction_id).await +} + +async fn fetch_transaction_rows_at( + env: &TestEnv, + base_url: &str, + transaction_id: &str, +) -> Vec> { sql_rows( - &env.kalamdb_sql(&format!( - "SELECT transaction_id, owner_id, origin, state, write_count \ - FROM system.transactions \ + &env.kalamdb_sql_at( + base_url, + &format!( + "SELECT transaction_id, owner_id, origin, state, write_count FROM system.transactions \ WHERE transaction_id = '{transaction_id}'" - )) + ), + ) .await, ) } +fn grpc_http_base_url(host: &str, grpc_port: u16) -> String { + let http_port = match grpc_port { + 9188 => 8080, + _ => grpc_port.saturating_sub(1000), + }; + format!("http://{host}:{http_port}") +} + +async fn kalamdb_shared_leader_base_url() -> String { + let (host, grpc_port) = shared_leader_grpc_target().await; + grpc_http_base_url(&host, grpc_port) +} + async fn wait_for_active_pg_transaction( env: &TestEnv, + base_url: &str, backend_pid: u32, timeout: Duration, ) -> (String, Vec) { let deadline = Instant::now() + timeout; loop { - let session_rows = fetch_session_rows(env, backend_pid).await; + let session_rows = fetch_session_rows_at(env, base_url, backend_pid).await; if let Some(transaction_id) = session_rows .iter() .find_map(|row| string_cell(row, 2).filter(|value| !value.is_empty())) { - let transaction_rows = fetch_transaction_rows(env, &transaction_id).await; + let transaction_rows = fetch_transaction_rows_at(env, base_url, &transaction_id).await; if let Some(transaction_row) = transaction_rows.first() { return (transaction_id, transaction_row.clone()); } @@ -88,11 +116,16 @@ async fn wait_for_active_pg_transaction( } } -async fn wait_for_transaction_cleanup(env: &TestEnv, transaction_id: &str, timeout: Duration) { +async fn wait_for_transaction_cleanup( + env: &TestEnv, + base_url: &str, + transaction_id: &str, + timeout: Duration, +) { let deadline = Instant::now() + timeout; loop { - if fetch_transaction_rows(env, transaction_id).await.is_empty() { + if fetch_transaction_rows_at(env, base_url, transaction_id).await.is_empty() { return; } @@ -113,10 +146,12 @@ async fn e2e_transaction_begin_commit_persists_rows() { let mut pg = env.pg_connect().await; let table = unique_name("profiles_tx_commit"); let qualified_table = format!("e2e.{table}"); + let columns = "id TEXT, name TEXT, age INTEGER"; - create_user_kalam_table(&pg, &table, "id TEXT, name TEXT, age INTEGER").await; + create_user_kalam_table(&pg, &table, columns).await; set_user_id(&pg, "txn-commit-user").await; await_user_shard_leader("txn-commit-user").await; + rebind_user_table_to_user_leader(&pg, "e2e", &table, columns, "txn-commit-user").await; let tx = pg.transaction().await.expect("begin"); tx.execute( @@ -144,10 +179,12 @@ async fn e2e_transaction_begin_rollback_discards_rows() { let mut pg = env.pg_connect().await; let table = unique_name("profiles_tx_rollback"); let qualified_table = format!("e2e.{table}"); + let columns = "id TEXT, name TEXT, age INTEGER"; - create_user_kalam_table(&pg, &table, "id TEXT, name TEXT, age INTEGER").await; + create_user_kalam_table(&pg, &table, columns).await; set_user_id(&pg, "txn-rollback-user").await; await_user_shard_leader("txn-rollback-user").await; + rebind_user_table_to_user_leader(&pg, "e2e", &table, columns, "txn-rollback-user").await; let tx = pg.transaction().await.expect("begin"); tx.execute( @@ -169,10 +206,12 @@ async fn e2e_transaction_duplicate_primary_key_commit_fails() { let mut pg = env.pg_connect().await; let table = unique_name("profiles_tx_duplicate"); let qualified_table = format!("e2e.{table}"); + let columns = "id TEXT, name TEXT, age INTEGER"; - create_user_kalam_table(&pg, &table, "id TEXT, name TEXT, age INTEGER").await; + create_user_kalam_table(&pg, &table, columns).await; set_user_id(&pg, "txn-duplicate-user").await; await_user_shard_leader("txn-duplicate-user").await; + rebind_user_table_to_user_leader(&pg, "e2e", &table, columns, "txn-duplicate-user").await; let tx = pg.transaction().await.expect("begin"); tx.execute( @@ -229,12 +268,12 @@ async fn e2e_transaction_mixed_native_and_kalamdb_rows_share_one_remote_transact let mut pg = env.pg_connect().await; let backend_pid = pg_backend_pid(&pg).await; - let pre_foreign_rows = fetch_session_rows(env, backend_pid).await; + let shared_leader_base_url = kalamdb_shared_leader_base_url().await; + let pre_foreign_rows = fetch_session_rows_at(env, &shared_leader_base_url, backend_pid).await; assert!( - pre_foreign_rows - .iter() - .all(|row| string_cell(row, 2).is_none()), - "remote transaction should not exist before the first foreign statement: {pre_foreign_rows:?}" + pre_foreign_rows.iter().all(|row| string_cell(row, 2).is_none()), + "remote transaction should not exist before the first foreign statement: \ + {pre_foreign_rows:?}" ); let tx = pg.transaction().await.expect("begin mixed transaction"); @@ -252,12 +291,12 @@ async fn e2e_transaction_mixed_native_and_kalamdb_rows_share_one_remote_transact .await .expect("insert first native row"); - let still_no_remote_rows = fetch_session_rows(env, backend_pid).await; + let still_no_remote_rows = + fetch_session_rows_at(env, &shared_leader_base_url, backend_pid).await; assert!( - still_no_remote_rows - .iter() - .all(|row| string_cell(row, 2).is_none()), - "native PostgreSQL work alone should not open a KalamDB transaction: {still_no_remote_rows:?}" + still_no_remote_rows.iter().all(|row| string_cell(row, 2).is_none()), + "native PostgreSQL work alone should not open a KalamDB transaction: \ + {still_no_remote_rows:?}" ); tx.execute( @@ -267,8 +306,13 @@ async fn e2e_transaction_mixed_native_and_kalamdb_rows_share_one_remote_transact .await .expect("insert first foreign row"); - let (transaction_id, transaction_row) = - wait_for_active_pg_transaction(env, backend_pid, Duration::from_secs(3)).await; + let (transaction_id, transaction_row) = wait_for_active_pg_transaction( + env, + &shared_leader_base_url, + backend_pid, + Duration::from_secs(3), + ) + .await; assert_eq!(string_cell(&transaction_row, 2).as_deref(), Some("PgRpc")); assert_eq!(string_cell(&transaction_row, 3).as_deref(), Some("open_write")); assert_eq!(i64_cell(&transaction_row, 4), Some(1)); @@ -287,28 +331,33 @@ async fn e2e_transaction_mixed_native_and_kalamdb_rows_share_one_remote_transact .await .expect("insert second foreign row"); - let session_rows = fetch_session_rows(env, backend_pid).await; + let session_rows = fetch_session_rows_at(env, &shared_leader_base_url, backend_pid).await; assert_eq!(string_cell(&session_rows[0], 2).as_deref(), Some(transaction_id.as_str())); - let repeated_transaction_rows = fetch_transaction_rows(env, &transaction_id).await; + let repeated_transaction_rows = + fetch_transaction_rows_at(env, &shared_leader_base_url, &transaction_id).await; assert_eq!(repeated_transaction_rows.len(), 1); - assert_eq!(string_cell(&repeated_transaction_rows[0], 0).as_deref(), Some(transaction_id.as_str())); + assert_eq!( + string_cell(&repeated_transaction_rows[0], 0).as_deref(), + Some(transaction_id.as_str()) + ); assert_eq!(string_cell(&repeated_transaction_rows[0], 2).as_deref(), Some("PgRpc")); assert_eq!(string_cell(&repeated_transaction_rows[0], 3).as_deref(), Some("open_write")); assert_eq!(i64_cell(&repeated_transaction_rows[0], 4), Some(2)); tx.commit().await.expect("commit mixed native + foreign transaction"); - wait_for_transaction_cleanup(env, &transaction_id, Duration::from_secs(5)).await; + wait_for_transaction_cleanup( + env, + &shared_leader_base_url, + &transaction_id, + Duration::from_secs(5), + ) + .await; let pg_reader = env.pg_connect().await; let native_rows = pg_reader - .query( - &format!( - "SELECT id, note FROM {qualified_native_table} ORDER BY id" - ), - &[], - ) + .query(&format!("SELECT id, note FROM {qualified_native_table} ORDER BY id"), &[]) .await .expect("query committed native rows"); let committed_native_rows = native_rows @@ -324,10 +373,8 @@ async fn e2e_transaction_mixed_native_and_kalamdb_rows_share_one_remote_transact ); let api_rows = sql_rows( - &env.kalamdb_sql(&format!( - "SELECT id, name FROM {qualified_foreign_table} ORDER BY id" - )) - .await, + &env.kalamdb_sql(&format!("SELECT id, name FROM {qualified_foreign_table} ORDER BY id")) + .await, ); assert_eq!(api_rows.len(), 2); assert_eq!(string_cell(&api_rows[0], 0).as_deref(), Some("foreign-1")); @@ -356,6 +403,7 @@ async fn e2e_transaction_kalamdb_commit_failure_rolls_back_native_postgres_rows( let mut pg = env.pg_connect().await; let backend_pid = pg_backend_pid(&pg).await; + let shared_leader_base_url = kalamdb_shared_leader_base_url().await; let tx = pg.transaction().await.expect("begin mixed failure transaction"); tx.execute( @@ -383,8 +431,13 @@ async fn e2e_transaction_kalamdb_commit_failure_rolls_back_native_postgres_rows( .await .expect("insert second native row"); - let (transaction_id, transaction_row) = - wait_for_active_pg_transaction(env, backend_pid, Duration::from_secs(3)).await; + let (transaction_id, transaction_row) = wait_for_active_pg_transaction( + env, + &shared_leader_base_url, + backend_pid, + Duration::from_secs(3), + ) + .await; assert_eq!(string_cell(&transaction_row, 2).as_deref(), Some("PgRpc")); assert_eq!(string_cell(&transaction_row, 3).as_deref(), Some("open_write")); assert_eq!(i64_cell(&transaction_row, 4), Some(2)); @@ -402,7 +455,13 @@ async fn e2e_transaction_kalamdb_commit_failure_rolls_back_native_postgres_rows( "unexpected duplicate key error: {message}" ); - wait_for_transaction_cleanup(env, &transaction_id, Duration::from_secs(5)).await; + wait_for_transaction_cleanup( + env, + &shared_leader_base_url, + &transaction_id, + Duration::from_secs(5), + ) + .await; let pg_reader = env.pg_connect().await; let native_count = count_rows(&pg_reader, &qualified_native_table, None).await; @@ -429,24 +488,30 @@ async fn e2e_transaction_switching_user_id_keeps_rows_in_separate_user_scopes() let env = TestEnv::global().await; let table = unique_name("profiles_tx_user_scope"); let qualified_table = format!("e2e.{table}"); + let columns = "id TEXT, name TEXT, age INTEGER"; let (first_user_id, second_user_id) = same_user_shard_pair("txn-scope-user-a", "txn-scope-user-b").await; let pg = env.pg_connect().await; - create_user_kalam_table(&pg, &table, "id TEXT, name TEXT, age INTEGER").await; + create_user_kalam_table(&pg, &table, columns).await; await_user_shard_leader(&first_user_id).await; await_user_shard_leader(&second_user_id).await; + pg.disconnect().await; let (visible_a_in_tx, visible_b_in_tx) = retry_transient_user_leader_error("multi-user transaction inflight visibility", || { let env = &env; + let table = table.clone(); let qualified_table = qualified_table.clone(); + let columns = columns; let first_user_id = first_user_id.clone(); let second_user_id = second_user_id.clone(); async move { let mut pg = env.pg_connect().await; + set_user_id(&pg, &first_user_id).await; + rebind_user_table_to_user_leader(&pg, "e2e", &table, columns, &first_user_id).await; let tx = pg.transaction().await?; tx.batch_execute(&format!("SET LOCAL kalam.user_id = '{first_user_id}'")) diff --git a/pg/tests/e2e_perf/conversions.rs b/pg/tests/e2e_perf/conversions.rs index 0b11f3196..3cb7df4d8 100644 --- a/pg/tests/e2e_perf/conversions.rs +++ b/pg/tests/e2e_perf/conversions.rs @@ -1,10 +1,9 @@ -use super::common::{TestEnv, unique_name}; use serde_json::Value; +use super::common::{unique_name, TestEnv}; + fn large_json_text_payload() -> String { - let items = (0..4096) - .map(|index| format!("item-{index:04}")) - .collect::>(); + let items = (0..4096).map(|index| format!("item-{index:04}")).collect::>(); serde_json::to_string(&items).expect("serialize large json payload") } @@ -120,4 +119,4 @@ async fn e2e_perf_jsonb_to_scalar_stays_within_bounded_rust_allocations() { ); pg.disconnect().await; -} \ No newline at end of file +} diff --git a/pg/tests/e2e_perf/payload_sizes.rs b/pg/tests/e2e_perf/payload_sizes.rs index d9ae89a9d..9058bafc0 100644 --- a/pg/tests/e2e_perf/payload_sizes.rs +++ b/pg/tests/e2e_perf/payload_sizes.rs @@ -1,7 +1,8 @@ -use super::common::{count_rows, create_shared_kalam_table, unique_name, TestEnv}; +use std::{future::Future, time::Instant}; + use serde_json::Value; -use std::future::Future; -use std::time::Instant; + +use super::common::{count_rows, create_shared_kalam_table, unique_name, TestEnv}; const BENCH_ITERATIONS_PER_RUN: usize = 16; const BENCH_WARMUP_RUNS: usize = 1; @@ -114,7 +115,8 @@ fn benchmark_payload(bytes: usize, seed: usize) -> String { fn log_benchmark(label: &str, payload_bytes: usize, stats: &BenchStats) { eprintln!( - "[PERF] {label}: payload={}B warmup_runs={} measured_runs={} iterations/run={} run_totals_ms=[{}] median_total={:.1}ms median_avg={:.2}ms/op range={:.1}..{:.1}ms", + "[PERF] {label}: payload={}B warmup_runs={} measured_runs={} iterations/run={} \ + run_totals_ms=[{}] median_total={:.1}ms median_avg={:.2}ms/op range={:.1}..{:.1}ms", payload_bytes, BENCH_WARMUP_RUNS, BENCH_MEASURED_RUNS, @@ -136,7 +138,12 @@ fn log_select_breakdown(label: &str, breakdown: &SelectBreakdownStats) { breakdown.pg_full_row_query_median_ms - breakdown.api_full_row_http_median_ms; eprintln!( - "[PERF] {label} breakdown: samples={} pg_id_only_query_median={:.2}ms pg_full_row_query_median={:.2}ms pg_decode_median={:.2}ms api_id_only_http_median={:.2}ms api_id_only_parse_median={:.2}ms api_full_row_http_median={:.2}ms api_full_row_parse_median={:.2}ms inferred_pg_payload_fetch={:.2}ms inferred_api_payload_http={:.2}ms inferred_pg_over_http={:.2}ms", + "[PERF] {label} breakdown: samples={} pg_id_only_query_median={:.2}ms \ + pg_full_row_query_median={:.2}ms pg_decode_median={:.2}ms \ + api_id_only_http_median={:.2}ms api_id_only_parse_median={:.2}ms \ + api_full_row_http_median={:.2}ms api_full_row_parse_median={:.2}ms \ + inferred_pg_payload_fetch={:.2}ms inferred_api_payload_http={:.2}ms \ + inferred_pg_over_http={:.2}ms", SELECT_BREAKDOWN_SAMPLES, breakdown.pg_id_only_query_median_ms, breakdown.pg_full_row_query_median_ms, diff --git a/pg/tests/e2e_perf/stability.rs b/pg/tests/e2e_perf/stability.rs index 86ce23bd4..ccdd604e7 100644 --- a/pg/tests/e2e_perf/stability.rs +++ b/pg/tests/e2e_perf/stability.rs @@ -1,10 +1,13 @@ +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; + use super::common::{ bulk_delete_all, count_rows, create_shared_kalam_table, kalamdb_pid, pg_backend_pid, process_group_rss_kb, process_rss_kb, sample_process_group_peak_rss_kb, sample_process_peak_rss_kb, timed_query, unique_name, TestEnv, }; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; #[tokio::test] #[ntest::timeout(39000)] @@ -55,12 +58,9 @@ async fn e2e_perf_local_memory_stays_bounded_under_batch_insert_and_scan() { let final_delta_kb = final_rss_kb.saturating_sub(baseline_rss_kb); eprintln!( - "[PERF] Memory stability {TOTAL} rows: total {elapsed_ms:.1}ms, scan {scan_ms:.1}ms, baseline {} KB, peak {} KB, final {} KB, peak delta {} KB, final delta {} KB", - baseline_rss_kb, - peak_rss_kb, - final_rss_kb, - peak_delta_kb, - final_delta_kb + "[PERF] Memory stability {TOTAL} rows: total {elapsed_ms:.1}ms, scan {scan_ms:.1}ms, \ + baseline {} KB, peak {} KB, final {} KB, peak delta {} KB, final delta {} KB", + baseline_rss_kb, peak_rss_kb, final_rss_kb, peak_delta_kb, final_delta_kb ); assert!( @@ -138,18 +138,24 @@ async fn e2e_perf_multi_session_pg_extension_memory_stays_bounded() { let rows = client .query( &format!( - "SELECT id, payload FROM {qualified_table} WHERE worker_id = $1 ORDER BY id LIMIT 5" + "SELECT id, payload FROM {qualified_table} WHERE worker_id = $1 ORDER \ + BY id LIMIT 5" ), &[&(worker as i32)], ) .await .expect("multi-session point read"); - assert!(!rows.is_empty(), "session {worker} should be able to read its inserted rows"); + assert!( + !rows.is_empty(), + "session {worker} should be able to read its inserted rows" + ); let last_index = ((batch + 1) * ROWS_PER_BATCH) - 1; client .execute( - &format!("UPDATE {qualified_table} SET payload = payload || '-u' WHERE id = $1"), + &format!( + "UPDATE {qualified_table} SET payload = payload || '-u' WHERE id = $1" + ), &[&format!("sess-{worker}-{last_index}")], ) .await @@ -203,7 +209,8 @@ async fn e2e_perf_multi_session_pg_extension_memory_stays_bounded() { let final_delta_kb = final_rss_kb.saturating_sub(baseline_rss_kb); eprintln!( - "[PERF] Multi-session pg_kalam workload: sessions {}, rows {}, total {:.1}ms, baseline {} KB, peak {} KB, final {} KB, peak delta {} KB, final delta {} KB", + "[PERF] Multi-session pg_kalam workload: sessions {}, rows {}, total {:.1}ms, baseline {} \ + KB, peak {} KB, final {} KB, peak delta {} KB, final delta {} KB", SESSIONS, total_expected, elapsed_ms, diff --git a/pg/tests/e2e_perf/throughput.rs b/pg/tests/e2e_perf/throughput.rs index b75390b0a..7c8fcca0c 100644 --- a/pg/tests/e2e_perf/throughput.rs +++ b/pg/tests/e2e_perf/throughput.rs @@ -71,7 +71,8 @@ async fn e2e_perf_sequential_insert_100() { let rows_per_sec = TOTAL as f64 / (insert_ms / 1000.0); eprintln!( - "[PERF] Sequential INSERT {TOTAL} rows (txn): {insert_ms:.0}ms ({rows_per_sec:.0} rows/sec)" + "[PERF] Sequential INSERT {TOTAL} rows (txn): {insert_ms:.0}ms ({rows_per_sec:.0} \ + rows/sec)" ); assert!( @@ -106,7 +107,8 @@ async fn e2e_perf_sequential_insert_1k() { let autocommit_rps = TOTAL as f64 / (autocommit_ms / 1000.0); eprintln!( - "[PERF] Sequential INSERT {TOTAL} rows (autocommit): {autocommit_ms:.0}ms ({autocommit_rps:.0} rows/sec)" + "[PERF] Sequential INSERT {TOTAL} rows (autocommit): {autocommit_ms:.0}ms \ + ({autocommit_rps:.0} rows/sec)" ); bulk_delete_all(&pg, &qualified_table, "id").await; @@ -129,7 +131,8 @@ async fn e2e_perf_sequential_insert_1k() { assert!( txn_rps > 250.0 && txn_rps > autocommit_rps * 0.6, - "Transactional INSERT only {txn_rps:.0} rows/sec ({:.1}x autocommit) — expected > 250 rows/sec and > 0.6x autocommit", + "Transactional INSERT only {txn_rps:.0} rows/sec ({:.1}x autocommit) — expected > 250 \ + rows/sec and > 0.6x autocommit", txn_rps / autocommit_rps ); @@ -152,13 +155,15 @@ async fn e2e_perf_sequential_insert_1k() { let pipe_rps = TOTAL as f64 / (pipe_ms / 1000.0); eprintln!( - "[PERF] Sequential INSERT {TOTAL} rows (pipelined): {pipe_ms:.0}ms ({pipe_rps:.0} rows/sec)" + "[PERF] Sequential INSERT {TOTAL} rows (pipelined): {pipe_ms:.0}ms ({pipe_rps:.0} \ + rows/sec)" ); eprintln!("[PERF] Pipeline speedup vs autocommit: {:.1}x", pipe_rps / autocommit_rps); assert!( pipe_rps > 300.0 && pipe_rps > autocommit_rps * 0.8, - "Pipelined INSERT only {pipe_rps:.0} rows/sec ({:.1}x autocommit) — expected > 300 rows/sec and > 0.8x autocommit", + "Pipelined INSERT only {pipe_rps:.0} rows/sec ({:.1}x autocommit) — expected > 300 \ + rows/sec and > 0.8x autocommit", pipe_rps / autocommit_rps ); pg.disconnect().await; @@ -252,7 +257,8 @@ async fn e2e_perf_point_select() { let avg_ms = total_ms / QUERIES as f64; eprintln!( - "[PERF] Point SELECT ({QUERIES} queries over {TOTAL} rows): avg {avg_ms:.1}ms/query, total {total_ms:.0}ms" + "[PERF] Point SELECT ({QUERIES} queries over {TOTAL} rows): avg {avg_ms:.1}ms/query, \ + total {total_ms:.0}ms" ); assert!(avg_ms < 5_000.0, "Point SELECT avg {avg_ms:.0}ms — expected < 5000ms"); pg.disconnect().await; @@ -368,7 +374,8 @@ async fn e2e_perf_user_table_insert_scan() { timed_query(&pg, &format!("SELECT id, data FROM {qualified_table}")).await; eprintln!( - "[PERF] User table: INSERT {TOTAL} rows in {insert_ms:.0}ms, SCAN returned {} rows in {scan_ms:.1}ms", + "[PERF] User table: INSERT {TOTAL} rows in {insert_ms:.0}ms, SCAN returned {} rows in \ + {scan_ms:.1}ms", rows.len() ); assert_eq!(rows.len(), TOTAL, "user scan count mismatch"); @@ -424,7 +431,8 @@ async fn e2e_perf_cross_verify_latency() { let max_ms = latencies.iter().cloned().fold(0.0_f64, f64::max); eprintln!( - "[PERF] Cross-verify ({ITERATIONS} iterations): avg {avg_ms:.1}ms, min {min_ms:.1}ms, max {max_ms:.1}ms" + "[PERF] Cross-verify ({ITERATIONS} iterations): avg {avg_ms:.1}ms, min {min_ms:.1}ms, max \ + {max_ms:.1}ms" ); assert!(avg_ms < 10_000.0, "Cross-verify avg {avg_ms:.0}ms — expected < 10000ms"); pg.disconnect().await; diff --git a/pg/tests/e2e_scenarios/ai_app.rs b/pg/tests/e2e_scenarios/ai_app.rs index 732738bd5..84a4c2b53 100644 --- a/pg/tests/e2e_scenarios/ai_app.rs +++ b/pg/tests/e2e_scenarios/ai_app.rs @@ -23,14 +23,16 @@ async fn e2e_scenario_ai_app_rag_and_agent_run_flow() { &pg, &schema, &chunks, - "id TEXT, document_id TEXT, chunk_no INTEGER, token_count INTEGER, chunk_text TEXT, embedding_model TEXT", + "id TEXT, document_id TEXT, chunk_no INTEGER, token_count INTEGER, chunk_text TEXT, \ + embedding_model TEXT", ) .await; create_shared_kalam_table_in_schema( &pg, &schema, &runs, - "id TEXT, user_id TEXT, prompt TEXT, status TEXT, retrieved_chunks INTEGER, answer_tokens INTEGER", + "id TEXT, user_id TEXT, prompt TEXT, status TEXT, retrieved_chunks INTEGER, answer_tokens \ + INTEGER", ) .await; create_shared_kalam_table_in_schema( @@ -45,13 +47,20 @@ async fn e2e_scenario_ai_app_rag_and_agent_run_flow() { "INSERT INTO {schema}.{documents} (id, title, source_uri, owner_team) VALUES ('doc-1', 'Billing playbook', 'kb://billing/playbook', 'support-ai'), ('doc-2', 'Retention handbook', 'kb://retention/handbook', 'growth-ai'); - INSERT INTO {schema}.{chunks} (id, document_id, chunk_no, token_count, chunk_text, embedding_model) VALUES - ('chunk-1', 'doc-1', 0, 120, 'Refund escalation requires invoice verification and account review.', 'text-embed-3-large'), - ('chunk-2', 'doc-1', 1, 88, 'High-priority tickets must capture billing entity and renewal date.', 'text-embed-3-large'), - ('chunk-3', 'doc-2', 0, 95, 'Retention outreach should include discount guardrails and contract term.', 'text-embed-3-large'); - INSERT INTO {schema}.{runs} (id, user_id, prompt, status, retrieved_chunks, answer_tokens) VALUES - ('run-1', 'agent-user-42', 'How should I handle a refund request before renewal?', 'running', 0, 0); - INSERT INTO {schema}.{steps} (id, run_id, agent_name, tool_name, duration_ms, outcome) VALUES + INSERT INTO {schema}.{chunks} (id, document_id, chunk_no, token_count, chunk_text, \ + embedding_model) VALUES + ('chunk-1', 'doc-1', 0, 120, 'Refund escalation requires invoice verification and account \ + review.', 'text-embed-3-large'), + ('chunk-2', 'doc-1', 1, 88, 'High-priority tickets must capture billing entity and \ + renewal date.', 'text-embed-3-large'), + ('chunk-3', 'doc-2', 0, 95, 'Retention outreach should include discount guardrails and \ + contract term.', 'text-embed-3-large'); + INSERT INTO {schema}.{runs} (id, user_id, prompt, status, retrieved_chunks, \ + answer_tokens) VALUES + ('run-1', 'agent-user-42', 'How should I handle a refund request before renewal?', \ + 'running', 0, 0); + INSERT INTO {schema}.{steps} (id, run_id, agent_name, tool_name, duration_ms, outcome) \ + VALUES ('step-1', 'run-1', 'retriever', 'vector_search', 42, 'matched billing playbook'), ('step-2', 'run-1', 'planner', 'policy_check', 18, 'passed retention guardrails');" )) @@ -101,7 +110,8 @@ async fn e2e_scenario_ai_app_rag_and_agent_run_flow() { "UPDATE {schema}.{runs} SET status = 'completed', retrieved_chunks = 2, answer_tokens = 186 WHERE id = 'run-1'; - INSERT INTO {schema}.{steps} (id, run_id, agent_name, tool_name, duration_ms, outcome) VALUES + INSERT INTO {schema}.{steps} (id, run_id, agent_name, tool_name, duration_ms, outcome) \ + VALUES ('step-3', 'run-1', 'writer', 'answer_synthesis', 64, 'generated final response');" )) .await @@ -133,7 +143,8 @@ async fn e2e_scenario_ai_app_rag_and_agent_run_flow() { let remote_run = env .kalamdb_sql(&format!( - "SELECT status, retrieved_chunks, answer_tokens FROM {schema}.{runs} WHERE id = 'run-1'" + "SELECT status, retrieved_chunks, answer_tokens FROM {schema}.{runs} WHERE id = \ + 'run-1'" )) .await; let remote_run_text = serde_json::to_string(&remote_run).unwrap_or_default(); diff --git a/pg/tests/e2e_scenarios/chat_app.rs b/pg/tests/e2e_scenarios/chat_app.rs index 5a9a0700f..05bff0550 100644 --- a/pg/tests/e2e_scenarios/chat_app.rs +++ b/pg/tests/e2e_scenarios/chat_app.rs @@ -52,26 +52,47 @@ async fn e2e_scenario_chat_app_support_workspace_flow() { pg_admin .batch_execute(&format!( - "INSERT INTO {schema}.{messages} (id, room_id, sender_id, body, sentiment, token_count) VALUES - ('m1', 'room-billing', 'cust-100', 'My invoice is wrong and I need a refund today', 'negative', 11), - ('m2', 'room-billing', 'agent-7', 'I can review the invoice and open a credit ticket', 'neutral', 12), - ('m3', 'room-billing', 'cust-100', 'Please escalate this before renewal', 'negative', 6), - ('m4', 'room-retain', 'lead-4', 'Offer annual discount when renewal risk is high', 'positive', 9);" + "INSERT INTO {schema}.{messages} (id, room_id, sender_id, body, sentiment, \ + token_count) VALUES + ('m1', 'room-billing', 'cust-100', 'My invoice is wrong and I need a refund today', \ + 'negative', 11), + ('m2', 'room-billing', 'agent-7', 'I can review the invoice and open a credit \ + ticket', 'neutral', 12), + ('m3', 'room-billing', 'cust-100', 'Please escalate this before renewal', 'negative', \ + 6), + ('m4', 'room-retain', 'lead-4', 'Offer annual discount when renewal risk is high', \ + 'positive', 9);" )) .await .expect("seed chat messages"); pg_alice .execute( - &format!("INSERT INTO {schema}.{drafts} (id, room_id, body, last_model) VALUES ($1, $2, $3, $4)"), - &[&"draft-a1", &"room-billing", &"Prepared refund explanation with invoice steps", &"gpt-5.4"], + &format!( + "INSERT INTO {schema}.{drafts} (id, room_id, body, last_model) VALUES ($1, $2, \ + $3, $4)" + ), + &[ + &"draft-a1", + &"room-billing", + &"Prepared refund explanation with invoice steps", + &"gpt-5.4", + ], ) .await .expect("insert alice draft"); pg_bob .execute( - &format!("INSERT INTO {schema}.{drafts} (id, room_id, body, last_model) VALUES ($1, $2, $3, $4)"), - &[&"draft-b1", &"room-retain", &"Drafted renewal outreach for risk account", &"gpt-5.4"], + &format!( + "INSERT INTO {schema}.{drafts} (id, room_id, body, last_model) VALUES ($1, $2, \ + $3, $4)" + ), + &[ + &"draft-b1", + &"room-retain", + &"Drafted renewal outreach for risk account", + &"gpt-5.4", + ], ) .await .expect("insert bob draft"); @@ -105,7 +126,8 @@ async fn e2e_scenario_chat_app_support_workspace_flow() { let escalation_feed = env .kalamdb_sql(&format!( - "SELECT sender_id, body FROM {schema}.{messages} WHERE room_id = 'room-billing' ORDER BY id" + "SELECT sender_id, body FROM {schema}.{messages} WHERE room_id = 'room-billing' ORDER \ + BY id" )) .await; let escalation_feed_text = serde_json::to_string(&escalation_feed).unwrap_or_default(); diff --git a/pg/tests/e2e_scenarios/iot_agents.rs b/pg/tests/e2e_scenarios/iot_agents.rs index afc9819a8..f695a817f 100644 --- a/pg/tests/e2e_scenarios/iot_agents.rs +++ b/pg/tests/e2e_scenarios/iot_agents.rs @@ -60,7 +60,8 @@ async fn e2e_scenario_iot_agent_fleet_incident_flow() { let handle_a = tokio::spawn(async move { ingest_a .batch_execute(&format!( - "INSERT INTO {schema_a}.{telemetry_a} (id, device_id, temperature, battery_level, health) VALUES + "INSERT INTO {schema_a}.{telemetry_a} (id, device_id, temperature, battery_level, \ + health) VALUES ('t-1', 'dev-1', 74, 58, 'healthy'), ('t-2', 'dev-1', 88, 51, 'degraded'), ('t-3', 'dev-2', 82, 17, 'critical');" @@ -74,7 +75,8 @@ async fn e2e_scenario_iot_agent_fleet_incident_flow() { let handle_b = tokio::spawn(async move { ingest_b .batch_execute(&format!( - "INSERT INTO {schema_b}.{telemetry_b} (id, device_id, temperature, battery_level, health) VALUES + "INSERT INTO {schema_b}.{telemetry_b} (id, device_id, temperature, battery_level, \ + health) VALUES ('t-4', 'dev-3', 66, 42, 'healthy'), ('t-5', 'dev-3', 91, 39, 'degraded');" )) @@ -118,15 +120,33 @@ async fn e2e_scenario_iot_agent_fleet_incident_flow() { agent_pg .execute( - &format!("INSERT INTO {schema}.{commands} (id, device_id, action, status, issued_by) VALUES ($1, $2, $3, $4, $5)"), - &[&format!("cmd-{device_id}"), &device_id, &action, &"queued", &"fleet-agent"], + &format!( + "INSERT INTO {schema}.{commands} (id, device_id, action, status, issued_by) \ + VALUES ($1, $2, $3, $4, $5)" + ), + &[ + &format!("cmd-{device_id}"), + &device_id, + &action, + &"queued", + &"fleet-agent", + ], ) .await .expect("insert remediation command"); agent_pg .execute( - &format!("INSERT INTO {schema}.{alerts} (id, device_id, severity, summary, created_by) VALUES ($1, $2, $3, $4, $5)"), - &[&format!("alert-{device_id}"), &device_id, &severity, &format!("temp={max_temp}, battery={min_battery}"), &"fleet-agent"], + &format!( + "INSERT INTO {schema}.{alerts} (id, device_id, severity, summary, created_by) \ + VALUES ($1, $2, $3, $4, $5)" + ), + &[ + &format!("alert-{device_id}"), + &device_id, + &severity, + &format!("temp={max_temp}, battery={min_battery}"), + &"fleet-agent", + ], ) .await .expect("insert incident alert"); diff --git a/pg/tests/support/http_client.rs b/pg/tests/support/http_client.rs index bc2fb065d..74f4e4779 100644 --- a/pg/tests/support/http_client.rs +++ b/pg/tests/support/http_client.rs @@ -2,11 +2,14 @@ use std::time::Duration; use bytes::Bytes; use http_body_util::{BodyExt, Full}; -use hyper::header::{AUTHORIZATION, CONTENT_TYPE}; -use hyper::{Method, Request, StatusCode}; -use hyper_util::client::legacy::connect::HttpConnector; -use hyper_util::client::legacy::Client; -use hyper_util::rt::TokioExecutor; +use hyper::{ + header::{AUTHORIZATION, CONTENT_TYPE}, + Method, Request, StatusCode, +}; +use hyper_util::{ + client::legacy::{connect::HttpConnector, Client}, + rt::TokioExecutor, +}; use serde_json::Value; #[derive(Clone)] diff --git a/scripts/cluster.sh b/scripts/cluster.sh index 1ffc8bcd2..41cc4c816 100755 --- a/scripts/cluster.sh +++ b/scripts/cluster.sh @@ -354,7 +354,7 @@ api_addr = \"http://127.0.0.1:$NODE3_HTTP\" [server] host = "127.0.0.1" port = $http_port -workers = 0 +workers = 2 api_version = "v1" [storage] @@ -368,6 +368,12 @@ max_message_size = 1048576 max_query_limit = 1000 default_query_limit = 50 +[datafusion] +memory_limit = 33554432 +query_parallelism = 2 +max_partitions = 2 +batch_size = 1024 + [logging] level = "info" logs_path = "$data_dir/logs" @@ -378,7 +384,7 @@ request_timeout = 30 keepalive_timeout = 75 max_connections = 25000 backlog = 2048 -worker_max_blocking_threads = 512 +worker_max_blocking_threads = 64 client_request_timeout = 5 client_disconnect_timeout = 2 max_header_size = 16384 @@ -388,6 +394,9 @@ max_queries_per_sec = 10000 max_messages_per_sec = 1000 max_subscriptions_per_user = 1000 +[topics] +visibility_timeout_secs = 10 + [cluster] enabled = true cluster_id = "local-cluster" @@ -509,7 +518,13 @@ stop_node() { check_node_health() { local http_port=$1 - curl -sf "http://127.0.0.1:$http_port/v1/api/healthcheck" >/dev/null 2>&1 + for _attempt in 1 2 3; do + if curl -sf --max-time 1 "http://127.0.0.1:$http_port/v1/api/healthcheck" >/dev/null 2>&1; then + return 0 + fi + sleep 0.1 + done + return 1 } check_cluster_ready() { @@ -739,7 +754,7 @@ ensure_admin_user() { curl -fsS \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $access_token" \ - -d '{"sql":"SELECT username FROM system.users WHERE username = '\''admin'\'' LIMIT 1"}' \ + -d '{"sql":"SELECT user_id FROM system.users WHERE user_id = '\''admin'\'' LIMIT 1"}' \ "$base_url/v1/api/sql" ) || return 1 diff --git a/ui/src/components/sql-preview/SqlPreviewDialog.test.tsx b/ui/src/components/sql-preview/SqlPreviewDialog.test.tsx new file mode 100644 index 000000000..5f774f9bb --- /dev/null +++ b/ui/src/components/sql-preview/SqlPreviewDialog.test.tsx @@ -0,0 +1,88 @@ +// @vitest-environment jsdom + +import "@testing-library/jest-dom/vitest"; +import { fireEvent, render, screen, waitFor } from "@testing-library/react"; +import { describe, expect, it, vi } from "vitest"; +import { SqlPreviewDialog } from "./SqlPreviewDialog"; + +const monacoEditorState = vi.hoisted(() => ({ + setValue: vi.fn(), +})); + +vi.mock("@monaco-editor/react", () => ({ + default: ({ + value, + onChange, + onMount, + options, + }: { + value?: string; + onChange?: (value: string) => void; + onMount?: (editor: { setValue: (nextValue: string) => void }) => void; + options?: { readOnly?: boolean }; + }) => { + onMount?.({ setValue: monacoEditorState.setValue }); + + return ( +