diff --git a/Cargo.lock b/Cargo.lock index c8e6b9a3098..a697c765790 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,9 +113,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f" +checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" dependencies = [ "proc-macro2", "quote", @@ -167,7 +167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom 0.2.7", + "getrandom 0.2.8", "instant", "pin-project-lite", "rand 0.8.5", @@ -236,7 +236,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "clap 3.2.22", + "clap 3.2.23", "env_logger", "lazy_static", "lazycell", @@ -352,9 +352,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.11.0" +version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "byteorder" @@ -521,9 +521,9 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.22" +version = "3.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750" +checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" dependencies = [ "atty", "bitflags", @@ -648,7 +648,7 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "344adc371239ef32293cb1c4fe519592fcf21206c79c02854320afcdf3ab4917" dependencies = [ - "time 0.3.15", + "time 0.3.16", "version_check", ] @@ -729,7 +729,7 @@ dependencies = [ "atty", "cast", "ciborium", - "clap 3.2.22", + "clap 3.2.23", "criterion-plot", "futures", "itertools", @@ -937,6 +937,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "curve25519-dalek" version = "3.2.0" @@ -952,9 +962,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f83d0ebf42c6eafb8d7c52f7e5f2d3003b89c7aa4fd2b79229209459a849af8" +checksum = "6b7d4e43b25d3c994662706a1d4fcfc32aaa6afd287502c111b237093bb23f3a" dependencies = [ "cc", "cxxbridge-flags", @@ -964,9 +974,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07d050484b55975889284352b0ffc2ecbda25c0c55978017c132b29ba0818a86" +checksum = "84f8829ddc213e2c1368e51a2564c552b65a8cb6a28f31e576270ac81d5e5827" dependencies = [ "cc", "codespan-reporting", @@ -979,15 +989,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d2199b00553eda8012dfec8d3b1c75fce747cf27c169a270b3b99e3448ab78" +checksum = "e72537424b474af1460806647c41d4b6d35d09ef7fe031c5c2fa5766047cc56a" [[package]] name = "cxxbridge-macro" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb67a6de1f602736dd7eaead0080cf3435df806c61b24b13328db128c58868f" +checksum = "309e4fb93eed90e1e14bea0da16b209f81813ba9fc7830c20ed151dd7bc0a4d7" dependencies = [ "proc-macro2", "quote", @@ -1038,7 +1048,7 @@ dependencies = [ "cfg-if 1.0.0", "hashbrown", "lock_api", - "parking_lot_core 0.9.3", + "parking_lot_core 0.9.4", ] [[package]] @@ -1263,7 +1273,7 @@ checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" [[package]] name = "dropshot" version = "0.8.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#961a6715e832c490a6040ec01ebf2c3416fc95de" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ed0b9c85e085f999013d030fada9a5d7f5ced69d" dependencies = [ "async-stream", "async-trait", @@ -1303,7 +1313,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.8.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#961a6715e832c490a6040ec01ebf2c3416fc95de" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ed0b9c85e085f999013d030fada9a5d7f5ced69d" dependencies = [ "proc-macro2", "quote", @@ -1501,14 +1511,14 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3" dependencies = [ "cfg-if 1.0.0", "libc", "redox_syscall", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -1821,9 +1831,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if 1.0.0", "libc", @@ -1863,9 +1873,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca32592cf21ac7ccab1825cd87f6c9b3d9022c44d086172ed0966bec8af30be" +checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" dependencies = [ "bytes", "fnv", @@ -2419,9 +2429,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.135" +version = "0.2.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" +checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" [[package]] name = "libloading" @@ -2605,14 +2615,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -2683,9 +2693,9 @@ checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" [[package]] name = "newline-converter" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6f81c2b19eebbc4249b3ca6aff70ae05bf18d6a99b7cc63cf0248774e640565" +checksum = "f05d47d2bcf073a0a8864360195ac45e76acd18cadfe632ef01757a179070b32" [[package]] name = "newtype_derive" @@ -2719,6 +2729,7 @@ dependencies = [ "db-macros", "diesel", "hex", + "internal-dns-client", "ipnetwork", "macaddr", "newtype_derive", @@ -3060,6 +3071,7 @@ dependencies = [ "oximeter-producer", "parse-display", "pq-sys", + "pretty_assertions", "rand 0.8.5", "ref-cast", "regex", @@ -3391,6 +3403,15 @@ dependencies = [ "syn", ] +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + [[package]] name = "oxide-client" version = "0.1.0" @@ -3570,7 +3591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.3", + "parking_lot_core 0.9.4", ] [[package]] @@ -3589,15 +3610,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0" dependencies = [ "cfg-if 1.0.0", "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -3798,9 +3819,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "plotters" @@ -3969,6 +3990,18 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" +[[package]] +name = "pretty_assertions" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +dependencies = [ + "ctor", + "diff", + "output_vt100", + "yansi", +] + [[package]] name = "proc-macro-crate" version = "1.2.1" @@ -4016,7 +4049,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.2.1-dev" -source = "git+https://github.com/oxidecomputer/progenitor#54382d83d0dec348a56849f4cf8cc55479ffcb48" +source = "git+https://github.com/oxidecomputer/progenitor#f9708ef56c3a0b88dc88fc0a0fbf0d8885fdd3e8" dependencies = [ "anyhow", "clap 4.0.18", @@ -4031,7 +4064,7 @@ dependencies = [ [[package]] name = "progenitor-client" version = "0.2.1-dev" -source = "git+https://github.com/oxidecomputer/progenitor#54382d83d0dec348a56849f4cf8cc55479ffcb48" +source = "git+https://github.com/oxidecomputer/progenitor#f9708ef56c3a0b88dc88fc0a0fbf0d8885fdd3e8" dependencies = [ "bytes", "futures-core", @@ -4045,7 +4078,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.2.1-dev" -source = "git+https://github.com/oxidecomputer/progenitor#54382d83d0dec348a56849f4cf8cc55479ffcb48" +source = "git+https://github.com/oxidecomputer/progenitor#f9708ef56c3a0b88dc88fc0a0fbf0d8885fdd3e8" dependencies = [ "getopts", "heck 0.4.0", @@ -4067,7 +4100,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.2.1-dev" -source = "git+https://github.com/oxidecomputer/progenitor#54382d83d0dec348a56849f4cf8cc55479ffcb48" +source = "git+https://github.com/oxidecomputer/progenitor#f9708ef56c3a0b88dc88fc0a0fbf0d8885fdd3e8" dependencies = [ "openapiv3", "proc-macro2", @@ -4271,7 +4304,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.7", + "getrandom 0.2.8", ] [[package]] @@ -4340,7 +4373,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ - "getrandom 0.2.7", + "getrandom 0.2.8", "redox_syscall", "thiserror", ] @@ -4543,9 +4576,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.20.6" +version = "0.20.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" dependencies = [ "log", "ring", @@ -4657,7 +4690,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2" dependencies = [ "lazy_static", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -4858,9 +4891,9 @@ dependencies = [ [[package]] name = "serde_plain" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95455e7e29fada2052e72170af226fbe368a4ca33dee847875325d9fdb133858" +checksum = "d6018081315db179d0ce57b1fe4b62a12a0028c9cf9bbef868c9cf477b3c34ae" dependencies = [ "serde", ] @@ -4912,7 +4945,7 @@ dependencies = [ "serde", "serde_json", "serde_with_macros", - "time 0.3.15", + "time 0.3.16", ] [[package]] @@ -5147,7 +5180,7 @@ dependencies = [ "hostname", "slog", "slog-json", - "time 0.3.15", + "time 0.3.16", ] [[package]] @@ -5188,7 +5221,7 @@ dependencies = [ "serde", "serde_json", "slog", - "time 0.3.15", + "time 0.3.16", ] [[package]] @@ -5223,7 +5256,7 @@ dependencies = [ "slog", "term", "thread_local", - "time 0.3.15", + "time 0.3.16", ] [[package]] @@ -5254,9 +5287,9 @@ dependencies = [ [[package]] name = "snafu" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd726aec4ebad65756394ff89a9b9598793d4e30121cd71690244c1e497b3aee" +checksum = "a152ba99b054b22972ee794cf04e5ef572da1229e33b65f3c57abbff0525a454" dependencies = [ "doc-comment", "snafu-derive", @@ -5264,9 +5297,9 @@ dependencies = [ [[package]] name = "snafu-derive" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712529e9b0b014eabaa345b38e06032767e3dc393e8b017e853b1d7247094e74" +checksum = "d5e79cdebbabaebb06a9bdbaedc7f159b410461f63611d4d0e3fb0fab8fed850" dependencies = [ "heck 0.4.0", "proc-macro2", @@ -5359,7 +5392,7 @@ version = "0.1.0" source = "git+http://github.com/oxidecomputer/sprockets?rev=77df31efa5619d0767ffc837ef7468101608aee9#77df31efa5619d0767ffc837ef7468101608aee9" dependencies = [ "anyhow", - "clap 3.2.22", + "clap 3.2.23", "derive_more", "futures", "pin-project", @@ -5652,9 +5685,9 @@ checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" [[package]] name = "textwrap" -version = "0.15.1" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" @@ -5709,22 +5742,32 @@ dependencies = [ [[package]] name = "time" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d634a985c4d4238ec39cacaed2e7ae552fbd3c476b552c1deac3021b7d7eaf0c" +checksum = "0fab5c8b9980850e06d92ddbe3ab839c062c801f3927c0fb8abd6fc8e918fbca" dependencies = [ "itoa", "libc", "num_threads", "serde", + "time-core", "time-macros", ] +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + [[package]] name = "time-macros" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" +checksum = "65bb801831d812c562ae7d2bfb531f26e66e4e1f6b17307ba4149c5064710e5b" +dependencies = [ + "time-core", +] [[package]] name = "tiny-keccak" @@ -5985,7 +6028,7 @@ dependencies = [ "radix_trie", "rand 0.8.5", "thiserror", - "time 0.3.15", + "time 0.3.16", "tokio", "tracing", "trust-dns-proto", @@ -6050,7 +6093,7 @@ dependencies = [ "futures-util", "serde", "thiserror", - "time 0.3.15", + "time 0.3.16", "tokio", "toml", "tracing", @@ -6317,7 +6360,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ - "getrandom 0.2.7", + "getrandom 0.2.8", "serde", ] @@ -6583,43 +6626,100 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", +] + +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" + [[package]] name = "winreg" version = "0.7.0" @@ -6667,6 +6767,12 @@ dependencies = [ "libc", ] +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "zerocopy" version = "0.3.0" diff --git a/common/src/address.rs b/common/src/address.rs index 5fd69543456..6fc14964686 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -20,12 +20,12 @@ pub const SLED_PREFIX: u8 = 64; /// The amount of redundancy for DNS servers. /// /// Must be less than MAX_DNS_REDUNDANCY. -pub const DNS_REDUNDANCY: usize = 1; +pub const DNS_REDUNDANCY: u32 = 1; /// The maximum amount of redundancy for DNS servers. /// /// This determines the number of addresses which are /// reserved for DNS servers. -pub const MAX_DNS_REDUNDANCY: usize = 5; +pub const MAX_DNS_REDUNDANCY: u32 = 5; pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; @@ -34,10 +34,12 @@ pub const SLED_AGENT_PORT: u16 = 12345; /// The port propolis-server listens on inside the propolis zone. pub const PROPOLIS_PORT: u16 = 12400; pub const COCKROACH_PORT: u16 = 32221; +pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; pub const OXIMETER_PORT: u16 = 12223; pub const DENDRITE_PORT: u16 = 12224; +pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; // Anycast is a mechanism in which a single IP address is shared by multiple @@ -85,6 +87,11 @@ pub struct DnsSubnet { } impl DnsSubnet { + /// Creates a subnet, given an address for the DNS server itself. + pub fn from_dns_address(address: Ipv6Addr) -> Self { + Self { subnet: Ipv6Subnet::new(address) } + } + /// Returns the DNS server address within the subnet. /// /// This is the first address within the subnet. @@ -180,7 +187,7 @@ mod test { // Observe the first DNS subnet within this reserved rack subnet. let dns_subnets = rack_subnet.get_dns_subnets(); - assert_eq!(DNS_REDUNDANCY, dns_subnets.len()); + assert_eq!(DNS_REDUNDANCY, dns_subnets.len() as u32); // The DNS address and GZ address should be only differing by one. assert_eq!( diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index b8fc1bcbc26..1670e9b3cd5 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -89,10 +89,11 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); -/* Add an index which lets us look up sleds on a rack */ +/* Add an index which lets us look up the sleds on a rack */ CREATE INDEX ON omicron.public.sled ( rack_id -) WHERE time_deleted IS NULL; +) WHERE + time_deleted IS NULL; CREATE INDEX ON omicron.public.sled ( id @@ -125,7 +126,40 @@ CREATE TABLE omicron.public.service ( /* Add an index which lets us look up the services on a sled */ CREATE INDEX ON omicron.public.service ( - sled_id + sled_id, + kind +); + +/* Add an index which lets us look up services of a particular kind on a sled */ +CREATE INDEX ON omicron.public.service ( + kind +); + +/* + * Additional context for services of "kind = nexus" + * This table should be treated as an optional extension + * of the service table itself. + */ +CREATE TABLE omicron.public.nexus_service ( + id UUID PRIMARY KEY, + + /* FK to the service table */ + service_id UUID NOT NULL, + /* FK to the instance_external_ip table */ + external_ip_id UUID NOT NULL, + /* FK to the nexus_certificate table */ + certificate_id UUID NOT NULL +); + +/* + * Information about x509 certificates used to serve Nexus' external interface. + * These certificates may be used by multiple instantiations of the Nexus + * service simultaneously. + */ +CREATE TABLE omicron.public.nexus_certificate ( + id UUID PRIMARY KEY, + public_cert BYTES NOT NULL, + private_key BYTES NOT NULL ); /* @@ -148,6 +182,11 @@ CREATE TABLE omicron.public.Zpool ( total_size INT NOT NULL ); +/* Create an index which allows looking up all zpools on a sled */ +CREATE INDEX on omicron.public.Zpool ( + sled_id +) WHERE time_deleted IS NULL; + CREATE TYPE omicron.public.dataset_kind AS ENUM ( 'crucible', 'cockroach', @@ -184,6 +223,11 @@ CREATE TABLE omicron.public.Dataset ( ) ); +/* Create an index which allows looking up all datasets in a pool */ +CREATE INDEX on omicron.public.Dataset ( + pool_id +) WHERE time_deleted IS NULL; + /* Create an index on the size usage for Crucible's allocation */ CREATE INDEX on omicron.public.Dataset ( size_used diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 5616e0dd5d1..85bed2753ad 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -154,11 +154,11 @@ When we deploy, we're effectively creating a number of different zones for all the components that make up Omicron (Nexus, Clickhouse, Crucible, etc). Since all these services run in different zones they cannot communicate with each other (and Sled Agent in the global zone) via `localhost`. In practice, -we'll assign addresses as per RFD 63 as well as incorporating DNS based +we assign addresses as per RFD 63 as well as incorporating DNS based service discovery. -For the purposes of local development today, we specify some hardcoded IPv6 -unique local addresses in the subnet of the first Sled Agent: `fd00:1122:3344:1::/64`. +For the purposes of local development today, we specify some hardcoded IP +addresses. If you'd like to modify these values to suit your local network, you can modify them within the https://github.com/oxidecomputer/omicron/tree/main/smf[`smf/` subdirectory]. @@ -171,15 +171,6 @@ be set as a default route for the Nexus zone. |=================================================================================================== | Service | Endpoint | Sled Agent: Bootstrap | Derived from MAC address of physical data link. -| Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` -| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` -| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` -| Oximeter | `[fd00:1122:3344:0101::4]:12223` -| Clickhouse | `[fd00:1122:3344:0101::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` -| Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` | Nexus: External API | `192.168.1.20:80` | Internet Gateway | None, but can be set in `smf/sled-agent/config-rss.toml` |=================================================================================================== diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 6391f4632b4..994601d7d4c 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -125,6 +125,7 @@ nexus-test-utils-macros = { path = "test-utils-macros" } nexus-test-utils = { path = "test-utils" } omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" +pretty_assertions = "1.3" regex = "1.6.0" subprocess = "0.2.9" term = "0.7" diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 3ff0b4e02a4..49bd757e37c 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -12,6 +12,7 @@ anyhow = "1.0" chrono = { version = "0.4", features = ["serde"] } diesel = { version = "2.0.2", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } hex = "0.4.3" +internal-dns-client = { path = "../../internal-dns-client" } ipnetwork = "0.20" macaddr = { version = "1.0.1", features = [ "serde_std" ]} newtype_derive = "0.1.6" diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index d68097af274..d58cde4fb4b 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -4,11 +4,14 @@ use super::{DatasetKind, Generation, Region, SqlU16}; use crate::collection::DatastoreCollectionConfig; +use crate::ipv6; use crate::schema::{dataset, region}; use chrono::{DateTime, Utc}; use db_macros::Asset; +use internal_dns_client::names::{BackendName, ServiceName, AAAA, SRV}; +use nexus_types::identity::Asset; use serde::{Deserialize, Serialize}; -use std::net::SocketAddr; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Database representation of a Dataset. @@ -35,10 +38,10 @@ pub struct Dataset { pub pool_id: Uuid, - ip: ipnetwork::IpNetwork, + ip: ipv6::Ipv6Addr, port: SqlU16, - kind: DatasetKind, + pub kind: DatasetKind, pub size_used: Option, } @@ -46,7 +49,7 @@ impl Dataset { pub fn new( id: Uuid, pool_id: Uuid, - addr: SocketAddr, + addr: SocketAddrV6, kind: DatasetKind, ) -> Self { let size_used = match kind { @@ -65,12 +68,26 @@ impl Dataset { } } - pub fn address(&self) -> SocketAddr { + pub fn address(&self) -> SocketAddrV6 { self.address_with_port(self.port.into()) } - pub fn address_with_port(&self, port: u16) -> SocketAddr { - SocketAddr::new(self.ip.ip(), port) + pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } + + pub fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + pub fn srv(&self) -> SRV { + match self.kind { + DatasetKind::Crucible => { + SRV::Backend(BackendName::Crucible, self.id()) + } + DatasetKind::Clickhouse => SRV::Service(ServiceName::Clickhouse), + DatasetKind::Cockroach => SRV::Service(ServiceName::Cockroach), + } } } diff --git a/nexus/db-model/src/dataset_kind.rs b/nexus/db-model/src/dataset_kind.rs index e2c0510ab3d..b131e6b7a82 100644 --- a/nexus/db-model/src/dataset_kind.rs +++ b/nexus/db-model/src/dataset_kind.rs @@ -4,14 +4,15 @@ use super::impl_enum_type; use nexus_types::internal_api; +use omicron_common::address::{CLICKHOUSE_PORT, COCKROACH_PORT, CRUCIBLE_PORT}; use serde::{Deserialize, Serialize}; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "dataset_kind"))] pub struct DatasetKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = DatasetKindEnum)] pub enum DatasetKind; @@ -21,6 +22,16 @@ impl_enum_type!( Clickhouse => b"clickhouse" ); +impl DatasetKind { + pub fn port(&self) -> u16 { + match self { + DatasetKind::Crucible => CRUCIBLE_PORT, + DatasetKind::Cockroach => COCKROACH_PORT, + DatasetKind::Clickhouse => CLICKHOUSE_PORT, + } + } +} + impl From for DatasetKind { fn from(k: internal_api::params::DatasetKind) -> Self { match k { diff --git a/nexus/db-model/src/ipv6.rs b/nexus/db-model/src/ipv6.rs index 2b494100825..60f7c0558c6 100644 --- a/nexus/db-model/src/ipv6.rs +++ b/nexus/db-model/src/ipv6.rs @@ -16,9 +16,19 @@ use diesel::sql_types::Inet; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; use omicron_common::api::external::Error; +use serde::{Deserialize, Serialize}; #[derive( - Clone, Copy, AsExpression, FromSqlRow, PartialEq, Ord, PartialOrd, Eq, + Clone, + Copy, + AsExpression, + FromSqlRow, + PartialEq, + Ord, + PartialOrd, + Eq, + Deserialize, + Serialize, )] #[diesel(sql_type = Inet)] pub struct Ipv6Addr(std::net::Ipv6Addr); diff --git a/nexus/db-model/src/queries/mod.rs b/nexus/db-model/src/queries/mod.rs index 20c94b8285e..fe77594fd08 100644 --- a/nexus/db-model/src/queries/mod.rs +++ b/nexus/db-model/src/queries/mod.rs @@ -5,3 +5,4 @@ //! Subqueries used in CTEs. pub mod region_allocation; +pub mod service_provision; diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs index b150b05377b..4bd5b89239a 100644 --- a/nexus/db-model/src/queries/region_allocation.rs +++ b/nexus/db-model/src/queries/region_allocation.rs @@ -143,11 +143,12 @@ diesel::allow_tables_to_appear_in_same_query!( dataset, ); +diesel::allow_tables_to_appear_in_same_query!(candidate_regions, zpool,); + diesel::allow_tables_to_appear_in_same_query!( do_insert, candidate_regions, dataset, - zpool, ); diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,); diff --git a/nexus/db-model/src/queries/service_provision.rs b/nexus/db-model/src/queries/service_provision.rs new file mode 100644 index 00000000000..1f17c2b0418 --- /dev/null +++ b/nexus/db-model/src/queries/service_provision.rs @@ -0,0 +1,77 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Service prosioning subqueries used by CTEs. + +table! { + sled_allocation_pool { + id -> Uuid, + } +} + +table! { + previously_allocated_services { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + ip -> Inet, + kind -> crate::ServiceKindEnum, + } +} + +table! { + old_service_count (count) { + count -> Int8, + } +} + +table! { + new_service_count (count) { + count -> Int8, + } +} + +table! { + candidate_sleds { + id -> Uuid, + } +} + +table! { + new_internal_ips { + id -> Uuid, + last_used_address -> Inet, + } +} + +diesel::allow_tables_to_appear_in_same_query!( + candidate_sleds, + new_internal_ips, +); + +table! { + candidate_services { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + ip -> Inet, + kind -> crate::ServiceKindEnum, + } +} + +table! { + inserted_services { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + ip -> Inet, + kind -> crate::ServiceKindEnum, + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index df045c49284..1a88b6747f3 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -397,6 +397,23 @@ table! { } } +table! { + nexus_service (id) { + id -> Uuid, + service_id -> Uuid, + external_ip_id -> Uuid, + certificate_id -> Uuid, + } +} + +table! { + nexus_certificate (id) { + id -> Uuid, + public_cert -> Binary, + private_key -> Binary, + } +} + table! { zpool (id) { id -> Uuid, @@ -657,4 +674,5 @@ allow_tables_to_appear_in_same_query!( user_builtin, role_builtin, role_assignment, + zpool, ); diff --git a/nexus/db-model/src/service.rs b/nexus/db-model/src/service.rs index 3b9e57cfc62..5a7739416f9 100644 --- a/nexus/db-model/src/service.rs +++ b/nexus/db-model/src/service.rs @@ -4,13 +4,20 @@ use super::ServiceKind; use crate::ipv6; +use crate::schema::nexus_certificate; +use crate::schema::nexus_service; use crate::schema::service; use db_macros::Asset; -use std::net::Ipv6Addr; +use internal_dns_client::names::{ServiceName, AAAA, SRV}; +use nexus_types::identity::Asset; +use omicron_common::address::{ + DENDRITE_PORT, DNS_SERVER_PORT, NEXUS_INTERNAL_PORT, OXIMETER_PORT, +}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; /// Representation of services which may run on Sleds. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = service)] pub struct Service { #[diesel(embed)] @@ -21,6 +28,25 @@ pub struct Service { pub kind: ServiceKind, } +#[derive(Queryable, Insertable, Debug, Clone, Selectable, PartialEq)] +#[diesel(table_name = nexus_service)] +pub struct NexusService { + id: Uuid, + + service_id: Uuid, + external_ip_id: Uuid, + certificate_id: Uuid, +} + +#[derive(Queryable, Insertable, Debug, Clone, Selectable, PartialEq)] +#[diesel(table_name = nexus_certificate)] +pub struct NexusCertificate { + id: Uuid, + + public_cert: Vec, + private_key: Vec, +} + impl Service { pub fn new( id: Uuid, @@ -35,4 +61,30 @@ impl Service { kind, } } + + pub fn aaaa(&self) -> AAAA { + AAAA::Zone(self.id()) + } + + pub fn srv(&self) -> SRV { + match self.kind { + ServiceKind::InternalDNS => SRV::Service(ServiceName::InternalDNS), + ServiceKind::Nexus => SRV::Service(ServiceName::Nexus), + ServiceKind::Oximeter => SRV::Service(ServiceName::Oximeter), + ServiceKind::Dendrite => SRV::Service(ServiceName::Dendrite), + ServiceKind::Tfport => SRV::Service(ServiceName::Tfport), + } + } + + pub fn address(&self) -> SocketAddrV6 { + let port = match self.kind { + ServiceKind::InternalDNS => DNS_SERVER_PORT, + ServiceKind::Nexus => NEXUS_INTERNAL_PORT, + ServiceKind::Oximeter => OXIMETER_PORT, + ServiceKind::Dendrite => DENDRITE_PORT, + // TODO: Is this okay? + ServiceKind::Tfport => 0, + }; + SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + } } diff --git a/nexus/db-model/src/service_kind.rs b/nexus/db-model/src/service_kind.rs index 9b6e08bee1c..363fd11717c 100644 --- a/nexus/db-model/src/service_kind.rs +++ b/nexus/db-model/src/service_kind.rs @@ -7,11 +7,11 @@ use nexus_types::internal_api; use serde::{Deserialize, Serialize}; impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] + #[derive(Clone, SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "service_kind"))] pub struct ServiceKindEnum; - #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] #[diesel(sql_type = ServiceKindEnum)] pub enum ServiceKind; @@ -29,7 +29,9 @@ impl From for ServiceKind { internal_api::params::ServiceKind::InternalDNS => { ServiceKind::InternalDNS } - internal_api::params::ServiceKind::Nexus => ServiceKind::Nexus, + internal_api::params::ServiceKind::Nexus { .. } => { + ServiceKind::Nexus + } internal_api::params::ServiceKind::Oximeter => { ServiceKind::Oximeter } diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 73530f655f4..b845810618b 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -14,7 +14,7 @@ use std::net::SocketAddrV6; use uuid::Uuid; /// Database representation of a Sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = sled)] pub struct Sled { #[diesel(embed)] diff --git a/nexus/db-model/src/zpool.rs b/nexus/db-model/src/zpool.rs index bad66359131..42489b4e09f 100644 --- a/nexus/db-model/src/zpool.rs +++ b/nexus/db-model/src/zpool.rs @@ -14,7 +14,7 @@ use uuid::Uuid; /// /// A zpool represents a ZFS storage pool, allocated on a single /// physical sled. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = zpool)] pub struct Zpool { #[diesel(embed)] diff --git a/nexus/src/app/background/fakes.rs b/nexus/src/app/background/fakes.rs new file mode 100644 index 00000000000..86bf04236f4 --- /dev/null +++ b/nexus/src/app/background/fakes.rs @@ -0,0 +1,184 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test-only implementations of interfaces used by background tasks. + +use super::interfaces::{ + DnsUpdaterInterface, NexusInterface, SledClientInterface, +}; + +use crate::db::datastore::DataStore; +use async_trait::async_trait; +use internal_dns_client::{ + multiclient::{AAAARecord, DnsError}, + names::SRV, +}; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::api::external::Error; +use sled_agent_client::types as SledAgentTypes; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use uuid::Uuid; + +/// A fake implementation of a Sled Agent client. +/// +/// In lieu of any networked requests, stores the requested services and +/// datasets for later inspection. +pub struct FakeSledClientInner { + service_request: Option, + dataset_requests: Vec, +} + +#[derive(Clone)] +pub struct FakeSledClient { + inner: Arc>, +} + +impl FakeSledClient { + fn new() -> Arc { + Arc::new(Self { + inner: Arc::new(Mutex::new(FakeSledClientInner { + service_request: None, + dataset_requests: vec![], + })), + }) + } + + /// Returns the requests to create services on the sled. + pub fn service_requests(&self) -> Vec { + self.inner + .lock() + .unwrap() + .service_request + .as_ref() + .map(|request| request.services.clone()) + .unwrap_or(vec![]) + } + + /// Returns the requests to create datasets on the sled. + pub fn dataset_requests(&self) -> Vec { + self.inner.lock().unwrap().dataset_requests.clone() + } +} + +#[async_trait] +impl SledClientInterface for FakeSledClient { + async fn services_put( + &self, + body: &SledAgentTypes::ServiceEnsureBody, + ) -> Result<(), Error> { + let old = + self.inner.lock().unwrap().service_request.replace(body.clone()); + + // NOTE: This is technically a limitation of the fake. + // + // We can relax this constraint if it's useful, but we should + // deal with conflicts of prior invocations. + assert!( + old.is_none(), + "Should only set services once (was {old:?}, inserted {body:?})" + ); + Ok(()) + } + + async fn filesystem_put( + &self, + body: &SledAgentTypes::DatasetEnsureBody, + ) -> Result<(), Error> { + self.inner.lock().unwrap().dataset_requests.push(body.clone()); + Ok(()) + } +} + +/// Provides an abstraction of Nexus which can be used by tests. +/// +/// Wraps a real datastore, but fakes out all networked requests. +#[derive(Clone)] +pub struct FakeNexus { + datastore: Arc, + rack_id: Uuid, + rack_subnet: Ipv6Subnet, + sleds: Arc>>>, +} + +impl FakeNexus { + pub fn new( + datastore: Arc, + rack_subnet: Ipv6Subnet, + ) -> Arc { + Arc::new(Self { + datastore, + rack_id: Uuid::new_v4(), + rack_subnet, + sleds: Arc::new(Mutex::new(HashMap::new())), + }) + } +} + +#[async_trait] +impl NexusInterface for FakeNexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + + fn rack_subnet(&self) -> Ipv6Subnet { + self.rack_subnet + } + + fn datastore(&self) -> &Arc { + &self.datastore + } + + async fn sled_client( + &self, + id: &Uuid, + ) -> Result, Error> { + let sled = self + .sleds + .lock() + .unwrap() + .entry(*id) + .or_insert_with(|| FakeSledClient::new()) + .clone(); + Ok(sled) + } +} + +/// A fake implementation of the DNS updater. +/// +/// Avoids all networking, instead storing all outgoing requests for later +/// inspection. +#[derive(Clone)] +pub struct FakeDnsUpdater { + records: Arc>>>, +} + +impl FakeDnsUpdater { + pub fn new() -> Self { + Self { records: Arc::new(Mutex::new(HashMap::new())) } + } + + // Get a copy of all records. + pub fn records(&self) -> HashMap> { + self.records.lock().unwrap().clone() + } +} + +#[async_trait] +impl DnsUpdaterInterface for FakeDnsUpdater { + async fn insert_dns_records( + &self, + records: &HashMap>, + ) -> Result<(), DnsError> { + let mut our_records = self.records.lock().unwrap(); + for (k, v) in records { + let old = our_records.insert(k.clone(), v.clone()); + assert!( + old.is_none(), + "Inserted key {k}, but found old value: {old:?}" + ); + } + Ok(()) + } +} diff --git a/nexus/src/app/background/interfaces.rs b/nexus/src/app/background/interfaces.rs new file mode 100644 index 00000000000..daf0e849e44 --- /dev/null +++ b/nexus/src/app/background/interfaces.rs @@ -0,0 +1,107 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interfaces which can be faked out for testing. + +use crate::db::datastore::DataStore; +use crate::Nexus; +use async_trait::async_trait; +use internal_dns_client::{ + multiclient::{AAAARecord, DnsError, Updater as DnsUpdater}, + names::SRV, +}; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; +use omicron_common::api::external::Error; +use sled_agent_client::types as SledAgentTypes; +use std::collections::HashMap; +use std::sync::Arc; +use uuid::Uuid; + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +pub trait SledClientInterface { + async fn services_put( + &self, + body: &SledAgentTypes::ServiceEnsureBody, + ) -> Result<(), Error>; + async fn filesystem_put( + &self, + body: &SledAgentTypes::DatasetEnsureBody, + ) -> Result<(), Error>; +} + +#[async_trait] +impl SledClientInterface for sled_agent_client::Client { + async fn services_put( + &self, + body: &SledAgentTypes::ServiceEnsureBody, + ) -> Result<(), Error> { + self.services_put(body).await?; + Ok(()) + } + + async fn filesystem_put( + &self, + body: &SledAgentTypes::DatasetEnsureBody, + ) -> Result<(), Error> { + self.filesystem_put(body).await?; + Ok(()) + } +} + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +pub trait NexusInterface { + fn rack_id(&self) -> Uuid; + fn rack_subnet(&self) -> Ipv6Subnet; + fn datastore(&self) -> &Arc; + async fn sled_client(&self, id: &Uuid) -> Result, Error>; +} + +#[async_trait] +impl NexusInterface for Nexus { + fn rack_id(&self) -> Uuid { + self.rack_id + } + + fn rack_subnet(&self) -> Ipv6Subnet { + self.rack_subnet + } + + fn datastore(&self) -> &Arc { + self.datastore() + } + + async fn sled_client( + &self, + id: &Uuid, + ) -> Result, Error> { + self.sled_client(id).await + } +} + +// A trait intended to aid testing. +// +// The non-test implementation should be as simple as possible. +#[async_trait] +pub trait DnsUpdaterInterface { + async fn insert_dns_records( + &self, + records: &HashMap>, + ) -> Result<(), DnsError>; +} + +#[async_trait] +impl DnsUpdaterInterface for DnsUpdater { + async fn insert_dns_records( + &self, + records: &HashMap>, + ) -> Result<(), DnsError> { + self.insert_dns_records(records).await + } +} diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs new file mode 100644 index 00000000000..d7604f849d7 --- /dev/null +++ b/nexus/src/app/background/mod.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background tasks managed by Nexus. + +#[cfg(test)] +mod fakes; +mod interfaces; +mod services; + +use crate::app::Nexus; +use internal_dns_client::multiclient::Updater as DnsUpdater; +use std::sync::Arc; +use tokio::task::{spawn, JoinHandle}; + +/// Management structure which encapsulates periodically-executing background +/// tasks. +pub struct TaskRunner { + _handle: JoinHandle<()>, +} + +impl TaskRunner { + pub fn new(nexus: Arc) -> Self { + let handle = spawn(async move { + let log = nexus.log.new(o!("component" => "BackgroundTaskRunner")); + + let dns_updater = DnsUpdater::new( + &nexus.az_subnet(), + log.new(o!("component" => "DNS Updater")), + ); + let service_balancer = services::ServiceBalancer::new( + log.clone(), + nexus.clone(), + dns_updater, + ); + + loop { + // TODO: We may want triggers to exist here, to invoke this task + // more frequently (e.g., on Sled failure). + let opctx = nexus.opctx_for_service_balancer(); + if let Err(e) = service_balancer.balance_services(&opctx).await + { + warn!(log, "Failed to balance services: {:?}", e); + } + + tokio::time::sleep(std::time::Duration::from_secs(30)).await; + } + }); + Self { _handle: handle } + } +} diff --git a/nexus/src/app/background/services.rs b/nexus/src/app/background/services.rs new file mode 100644 index 00000000000..cce48a43ffe --- /dev/null +++ b/nexus/src/app/background/services.rs @@ -0,0 +1,906 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Task which ensures that expected Nexus services exist. + +use super::interfaces::{ + DnsUpdaterInterface, NexusInterface, SledClientInterface, +}; +use crate::context::OpContext; +use crate::db::datastore::DatasetRedundancy; +use crate::db::identity::Asset; +use crate::db::model::Dataset; +use crate::db::model::DatasetKind; +use crate::db::model::Service; +use crate::db::model::ServiceKind; +use crate::db::model::Sled; +use crate::db::model::Zpool; +use futures::stream::{self, StreamExt, TryStreamExt}; +use omicron_common::address::{ + DnsSubnet, DNS_PORT, DNS_REDUNDANCY, DNS_SERVER_PORT, +}; +use omicron_common::api::external::Error; +use sled_agent_client::types as SledAgentTypes; +use slog::Logger; +use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; +use std::net::{IpAddr, Ipv6Addr, SocketAddrV6}; +use std::sync::Arc; + +// Policy for the number of services to be provisioned. +#[derive(Debug)] +enum ServiceRedundancy { + // This service must exist on at least this many sleds + // within the rack. + PerRack(u32), + + // This service must exist on all Scrimlets within the rack. + AllScrimlets, + + // This service must exist on at least this many sleds + // within the availability zone. Note that this is specific + // for the DNS service, as some expectations surrounding + // addressing are specific to that service. + DnsPerAz(u32), +} + +#[derive(Debug)] +struct ExpectedService { + kind: ServiceKind, + redundancy: ServiceRedundancy, +} + +// TODO(https://github.com/oxidecomputer/omicron/issues/1276): +// Longer-term, when we integrate multi-rack support, +// it is expected that Nexus will manage multiple racks +// within the fleet, rather than simply per-rack services. +// +// When that happens, it is likely that many of the "per-rack" +// services will become "per-fleet", such as Nexus and CRDB. +const EXPECTED_SERVICES: [ExpectedService; 4] = [ + ExpectedService { + kind: ServiceKind::InternalDNS, + redundancy: ServiceRedundancy::DnsPerAz(DNS_REDUNDANCY), + }, + ExpectedService { + kind: ServiceKind::Nexus, + redundancy: ServiceRedundancy::PerRack(1), + }, + ExpectedService { + kind: ServiceKind::Oximeter, + redundancy: ServiceRedundancy::PerRack(1), + }, + ExpectedService { + kind: ServiceKind::Dendrite, + redundancy: ServiceRedundancy::AllScrimlets, + }, +]; + +#[derive(Debug)] +struct ExpectedDataset { + kind: DatasetKind, + redundancy: DatasetRedundancy, +} + +const EXPECTED_DATASETS: [ExpectedDataset; 3] = [ + ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }, + ExpectedDataset { + kind: DatasetKind::Cockroach, + // TODO(https://github.com/oxidecomputer/omicron/issues/727): + // Update this to more than one. + redundancy: DatasetRedundancy::PerRack(1), + }, + ExpectedDataset { + kind: DatasetKind::Clickhouse, + redundancy: DatasetRedundancy::PerRack(1), + }, +]; + +/// Contains logic for balancing services within a fleet. +/// +/// This struct operates on generic parameters to easily permit +/// dependency injection via testing, but in production, practically +/// operates on the same concrete types. +pub struct ServiceBalancer +where + D: DnsUpdaterInterface, + N: NexusInterface, + S: SledClientInterface, +{ + log: Logger, + nexus: Arc, + dns_updater: D, + phantom: PhantomData, +} + +impl ServiceBalancer +where + D: DnsUpdaterInterface, + N: NexusInterface, + S: SledClientInterface, +{ + pub fn new(log: Logger, nexus: Arc, dns_updater: D) -> Self { + Self { log, nexus, dns_updater, phantom: PhantomData } + } + + // If necessary, allocates an external IP for the service. + async fn allocate_external_ip( + &self, + service_kind: ServiceKind, + ) -> Result, Error> { + match service_kind { + ServiceKind::Nexus => { + // TODO: does this need to be in a txn somewhere? + // TODO: THIS SHOULDN'T BE DOING THE ALLOCATION HERE. + // let ip_id = uuid::Uuid::new_v4(); + // IncompleteInstanceExternalIp::for_service( + // ip_id, + // pool_id, + // ); + + // TODO: NO! + Ok(Some(IpAddr::V4(std::net::Ipv4Addr::LOCALHOST))) + } + ServiceKind::InternalDNS + | ServiceKind::Oximeter + | ServiceKind::Dendrite => Ok(None), + // TODO TODO TODO + ServiceKind::Tfport => todo!(), + } + } + + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_services( + &self, + opctx: &OpContext, + services: Vec, + ) -> Result<(), Error> { + let mut sled_ids = HashSet::new(); + for svc in &services { + sled_ids.insert(svc.sled_id); + } + + // For all sleds requiring an update, request all services be + // instantiated. + stream::iter(&sled_ids) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |sled_id| async { + // Query for all services that should be running on a Sled, + // and notify Sled Agent about all of them. + let services = self + .nexus + .datastore() + .service_list(opctx, *sled_id) + .await?; + let sled_client = self.nexus.sled_client(sled_id).await?; + + info!(self.log, "instantiate_services: {:?}", services); + + let mut service_requests = vec![]; + for service in &services { + let internal_address = Ipv6Addr::from(service.ip); + let external_address = + self.allocate_external_ip(service.kind).await?; + + let (name, service_type) = Self::get_service_name_and_type( + service.kind, + internal_address, + external_address, + ); + let gz_addresses = match &service.kind { + ServiceKind::InternalDNS => { + vec![DnsSubnet::from_dns_address(internal_address) + .gz_address() + .ip()] + } + _ => vec![], + }; + + service_requests.push(SledAgentTypes::ServiceZoneRequest { + id: service.id(), + zone_name: name, + addresses: vec![internal_address], + gz_addresses, + services: vec![service_type], + }); + } + + sled_client + .services_put(&SledAgentTypes::ServiceEnsureBody { + services: service_requests, + }) + .await?; + Ok(()) + }) + .await?; + + let mut records = HashMap::new(); + for service in &services { + records + .entry(service.srv()) + .or_insert_with(Vec::new) + .push((service.aaaa(), service.address())); + } + self.dns_updater + .insert_dns_records(&records) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; + + Ok(()) + } + + // Translates (address, db kind) to Sled Agent client types. + fn get_service_name_and_type( + kind: ServiceKind, + internal_address: Ipv6Addr, + external_address: Option, + ) -> (String, SledAgentTypes::ServiceType) { + match kind { + ServiceKind::Nexus => ( + "nexus".to_string(), + SledAgentTypes::ServiceType::Nexus { + internal_ip: internal_address, + // TODO: This is wrong! needs a separate address for Nexus + external_ip: external_address + .expect("Nexus needs an external address"), + }, + ), + ServiceKind::InternalDNS => ( + "internal-dns".to_string(), + SledAgentTypes::ServiceType::InternalDns { + server_address: SocketAddrV6::new( + internal_address, + DNS_SERVER_PORT, + 0, + 0, + ) + .to_string(), + dns_address: SocketAddrV6::new( + internal_address, + DNS_PORT, + 0, + 0, + ) + .to_string(), + }, + ), + ServiceKind::Oximeter => { + ("oximeter".to_string(), SledAgentTypes::ServiceType::Oximeter) + } + ServiceKind::Dendrite => ( + "dendrite".to_string(), + SledAgentTypes::ServiceType::Dendrite { + asic: SledAgentTypes::DendriteAsic::TofinoStub, + }, + ), + // TODO TODO TODO + ServiceKind::Tfport => todo!(), + } + } + + async fn ensure_services_provisioned( + &self, + opctx: &OpContext, + expected_services: &[ExpectedService], + ) -> Result<(), Error> { + // Provision services within the database. + let mut svcs = vec![]; + for expected_svc in expected_services { + info!(self.log, "Ensuring service {:?} exists", expected_svc); + match expected_svc.redundancy { + ServiceRedundancy::PerRack(desired_count) => { + svcs.extend_from_slice( + &self + .nexus + .datastore() + .ensure_rack_service( + opctx, + self.nexus.rack_id(), + expected_svc.kind, + desired_count, + ) + .await?, + ); + } + ServiceRedundancy::DnsPerAz(desired_count) => { + svcs.extend_from_slice( + &self + .nexus + .datastore() + .ensure_dns_service( + opctx, + self.nexus.rack_subnet(), + desired_count, + ) + .await?, + ); + } + ServiceRedundancy::AllScrimlets => { + svcs.extend_from_slice( + &self + .nexus + .datastore() + .ensure_scrimlet_service( + opctx, + self.nexus.rack_id(), + expected_svc.kind, + ) + .await?, + ); + } + } + } + + // Ensure services exist on the target sleds. + self.instantiate_services(opctx, svcs).await?; + Ok(()) + } + + async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result<(), Error> { + // Provision the datasets within the database. + let new_datasets = self + .nexus + .datastore() + .ensure_rack_dataset(opctx, self.nexus.rack_id(), kind, redundancy) + .await?; + + // Actually instantiate those datasets. + self.instantiate_datasets(new_datasets, kind).await + } + + // Reaches out to all sled agents implied in "services", and + // requests that the desired services are executing. + async fn instantiate_datasets( + &self, + datasets: Vec<(Sled, Zpool, Dataset)>, + kind: DatasetKind, + ) -> Result<(), Error> { + if datasets.is_empty() { + return Ok(()); + } + + // Ensure that there is one connection per sled. + let mut sled_clients = HashMap::new(); + for (sled, _, _) in &datasets { + if sled_clients.get(&sled.id()).is_none() { + let sled_client = self.nexus.sled_client(&sled.id()).await?; + sled_clients.insert(sled.id(), sled_client); + } + } + + // Issue all dataset instantiation requests concurrently. + stream::iter(&datasets) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |(sled, zpool, dataset)| async { + let sled_client = sled_clients.get(&sled.id()).unwrap(); + + let dataset_kind = match kind { + // TODO(https://github.com/oxidecomputer/omicron/issues/727): + // This set of "all addresses" isn't right. We'll need to + // deal with that before supporting multi-node CRDB. + DatasetKind::Cockroach => { + SledAgentTypes::DatasetKind::CockroachDb(vec![]) + } + DatasetKind::Crucible => { + SledAgentTypes::DatasetKind::Crucible + } + DatasetKind::Clickhouse => { + SledAgentTypes::DatasetKind::Clickhouse + } + }; + + // Instantiate each dataset. + sled_client + .filesystem_put(&SledAgentTypes::DatasetEnsureBody { + id: dataset.id(), + zpool_id: zpool.id(), + dataset_kind, + address: dataset.address().to_string(), + }) + .await?; + Ok(()) + }) + .await?; + + // Ensure all DNS records are updated for the created datasets. + let mut records = HashMap::new(); + for (_, _, dataset) in &datasets { + records + .entry(dataset.srv()) + .or_insert_with(Vec::new) + .push((dataset.aaaa(), dataset.address())); + } + self.dns_updater + .insert_dns_records(&records) + .await + .map_err(|e| Error::internal_error(&e.to_string()))?; + + Ok(()) + } + + async fn ensure_datasets_provisioned( + &self, + opctx: &OpContext, + expected_datasets: &[ExpectedDataset], + ) -> Result<(), Error> { + // Provision all dataset types concurrently. + stream::iter(expected_datasets) + .map(Ok::<_, Error>) + .try_for_each_concurrent(None, |expected_dataset| async move { + info!( + self.log, + "Ensuring dataset {:?} exists", expected_dataset + ); + self.ensure_rack_dataset( + opctx, + expected_dataset.kind, + expected_dataset.redundancy, + ) + .await?; + Ok(()) + }) + .await + } + + /// Provides a single point-in-time evaluation and adjustment of + /// the services provisioned within the rack. + /// + /// May adjust the provisioned services to meet the redundancy of the + /// rack, if necessary. + // TODO: Consider using sagas to ensure the rollout of services. + // + // Not using sagas *happens* to be fine because these operations are + // re-tried periodically, but that's kind forcing a dependency on the + // caller. + pub async fn balance_services( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + self.ensure_datasets_provisioned(opctx, &EXPECTED_DATASETS).await?; + self.ensure_services_provisioned(opctx, &EXPECTED_SERVICES).await?; + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + use crate::app::background::fakes::{FakeDnsUpdater, FakeNexus}; + use crate::db::datastore::DataStore; + use crate::{authn, authz}; + use dropshot::test_util::LogContext; + use internal_dns_client::names::{BackendName, AAAA, SRV}; + use nexus_test_utils::db::test_setup_database; + use omicron_common::address::Ipv6Subnet; + use omicron_common::api::external::ByteCount; + use omicron_test_utils::dev; + use std::sync::Arc; + use uuid::Uuid; + + struct ProvisionTest { + logctx: LogContext, + opctx: OpContext, + db: dev::db::CockroachInstance, + datastore: Arc, + } + + impl ProvisionTest { + // Create the logger and setup the database. + async fn new(name: &str) -> Self { + let logctx = dev::test_setup_log(name); + let db = test_setup_database(&logctx.log).await; + let (_, datastore) = + crate::db::datastore::datastore_test(&logctx, &db).await; + let opctx = OpContext::for_background( + logctx.log.new(o!()), + Arc::new(authz::Authz::new(&logctx.log)), + authn::Context::internal_service_balancer(), + datastore.clone(), + ); + Self { logctx, opctx, db, datastore } + } + + async fn cleanup(mut self) { + self.db.cleanup().await.unwrap(); + self.logctx.cleanup_successful(); + } + } + + async fn create_test_sled(rack_id: Uuid, datastore: &DataStore) -> Uuid { + let bogus_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = Sled::new(sled_id, bogus_addr.clone(), is_scrimlet, rack_id); + datastore.sled_upsert(sled).await.unwrap(); + sled_id + } + + async fn create_test_zpool(datastore: &DataStore, sled_id: Uuid) -> Uuid { + let zpool_id = Uuid::new_v4(); + let zpool = Zpool::new( + zpool_id, + sled_id, + &crate::internal_api::params::ZpoolPutRequest { + size: ByteCount::from_gibibytes_u32(10), + }, + ); + datastore.zpool_upsert(zpool).await.unwrap(); + zpool_id + } + + #[tokio::test] + async fn test_provision_dataset_on_all_no_zpools() { + let test = + ProvisionTest::new("test_provision_dataset_on_all_no_zpools").await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: One sled, no zpools. + let sled_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + + // Make the request to the service balancer for Crucibles on all Zpools. + // + // However, with no zpools, this is a no-op. + let expected_datasets = [ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }]; + service_balancer + .ensure_datasets_provisioned(&test.opctx, &expected_datasets) + .await + .unwrap(); + + // Observe that nothing was requested at the sled. + let sled = nexus.sled_client(&sled_id).await.unwrap(); + assert!(sled.service_requests().is_empty()); + assert!(sled.dataset_requests().is_empty()); + + // Observe that no DNS records were updated. + let records = dns_updater.records(); + assert!(records.is_empty()); + + test.cleanup().await; + } + + #[tokio::test] + async fn test_provision_dataset_on_all_zpools() { + let test = + ProvisionTest::new("test_provision_dataset_on_all_zpools").await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: One sled, multiple zpools + let sled_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + const ZPOOL_COUNT: usize = 3; + let mut zpools = vec![]; + for _ in 0..ZPOOL_COUNT { + zpools.push(create_test_zpool(&test.datastore, sled_id).await); + } + + // Make the request to the service balancer for Crucibles on all Zpools. + let expected_datasets = [ExpectedDataset { + kind: DatasetKind::Crucible, + redundancy: DatasetRedundancy::OnAll, + }]; + service_balancer + .ensure_datasets_provisioned(&test.opctx, &expected_datasets) + .await + .unwrap(); + + // Observe that datasets were requested on each zpool. + let sled = nexus.sled_client(&sled_id).await.unwrap(); + assert!(sled.service_requests().is_empty()); + let dataset_requests = sled.dataset_requests(); + assert_eq!(ZPOOL_COUNT, dataset_requests.len()); + for request in &dataset_requests { + assert!( + zpools.contains(&request.zpool_id), + "Dataset request for unexpected zpool" + ); + assert!(matches!( + request.dataset_kind, + SledAgentTypes::DatasetKind::Crucible + )); + } + + // Observe that DNS records for each Crucible exist. + let records = dns_updater.records(); + assert_eq!(ZPOOL_COUNT, records.len()); + for (srv, aaaas) in records { + match srv { + SRV::Backend(BackendName::Crucible, dataset_id) => { + let expected_address = dataset_requests + .iter() + .find_map(|request| { + if request.id == dataset_id { + Some(request.address.clone()) + } else { + None + } + }) + .unwrap(); + + assert_eq!(1, aaaas.len()); + let (aaaa_name, dns_addr) = &aaaas[0]; + assert_eq!(dns_addr.to_string(), expected_address); + if let AAAA::Zone(zone_id) = aaaa_name { + assert_eq!( + *zone_id, dataset_id, + "Expected AAAA UUID to match SRV record", + ); + } else { + panic!( + "Expected AAAA record for Zone from {aaaa_name}" + ); + } + } + _ => panic!("Unexpected SRV record"), + } + } + + test.cleanup().await; + } + + // Observe that "per-rack" dataset provisions can be completed. + // + // This test uses multiple racks, and verifies that a provision occurs + // on each one. + #[tokio::test] + async fn test_provision_dataset_per_rack() { + let test = ProvisionTest::new("test_provision_dataset_per_rack").await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: Create a couple sleds on the first rack, and create a third + // sled on a "different rack". + // + // Each sled gets a single zpool. + let mut zpools = vec![]; + + let sled1_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + zpools.push(create_test_zpool(&test.datastore, sled1_id).await); + + let sled2_id = create_test_sled(nexus.rack_id(), &test.datastore).await; + zpools.push(create_test_zpool(&test.datastore, sled2_id).await); + + let other_rack_id = Uuid::new_v4(); + let other_rack_sled_id = + create_test_sled(other_rack_id, &test.datastore).await; + zpools + .push(create_test_zpool(&test.datastore, other_rack_sled_id).await); + + // Ask for one dataset per rack. + let expected_datasets = [ExpectedDataset { + kind: DatasetKind::Cockroach, + redundancy: DatasetRedundancy::PerRack(1), + }]; + service_balancer + .ensure_datasets_provisioned(&test.opctx, &expected_datasets) + .await + .unwrap(); + + // Observe that the datasets were requested on each rack. + + // Rack 1: One of the two sleds should have a dataset. + let sled = nexus.sled_client(&sled1_id).await.unwrap(); + let requests1 = sled.dataset_requests(); + if !requests1.is_empty() { + assert_eq!(1, requests1.len()); + assert_eq!(zpools[0], requests1[0].zpool_id); + } + let sled = nexus.sled_client(&sled2_id).await.unwrap(); + let requests2 = sled.dataset_requests(); + if !requests2.is_empty() { + assert_eq!(1, requests2.len()); + assert_eq!(zpools[1], requests2[0].zpool_id); + } + assert!( + requests1.is_empty() ^ requests2.is_empty(), + "One of the sleds should have a dataset, the other should not" + ); + + // Rack 2: The sled should have a dataset. + let sled = nexus.sled_client(&other_rack_sled_id).await.unwrap(); + let requests = sled.dataset_requests(); + // TODO(https://github.com/oxidecomputer/omicron/issues/1276): + // We should see a request to the "other rack" when multi-rack + // is supported. + // + // At the moment, however, all requests for service-balancing are + // "rack-local". + assert_eq!(0, requests.len()); + + // We should be able to assert this when multi-rack is supported. + // assert_eq!(zpools[2], requests[0].zpool_id); + + test.cleanup().await; + } + + #[tokio::test] + async fn test_provision_oximeter_service_per_rack() { + let test = + ProvisionTest::new("test_provision_oximeter_service_per_rack") + .await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: Create three sleds, with the goal of putting services on two + // of them. + const SLED_COUNT: u32 = 3; + const SERVICE_COUNT: u32 = 2; + let mut sleds = vec![]; + for _ in 0..SLED_COUNT { + sleds + .push(create_test_sled(nexus.rack_id(), &test.datastore).await); + } + let expected_services = [ExpectedService { + kind: ServiceKind::Oximeter, + redundancy: ServiceRedundancy::PerRack(SERVICE_COUNT), + }]; + + // Request the services + service_balancer + .ensure_services_provisioned(&test.opctx, &expected_services) + .await + .unwrap(); + + // Observe the service on SERVICE_COUNT of the SLED_COUNT sleds. + let mut observed_service_count = 0; + for sled_id in &sleds { + let sled = nexus.sled_client(&sled_id).await.unwrap(); + let requests = sled.service_requests(); + + match requests.len() { + 0 => (), // Ignore the sleds where nothing was provisioned + 1 => { + assert_eq!(requests[0].zone_name, "oximeter"); + assert!(matches!( + requests[0].services[0], + SledAgentTypes::ServiceType::Oximeter + )); + assert!(requests[0].gz_addresses.is_empty()); + observed_service_count += 1; + } + _ => { + panic!("Unexpected requests (should only see one per sled): {:#?}", requests); + } + } + } + assert_eq!(observed_service_count, SERVICE_COUNT); + + test.cleanup().await; + } + + #[tokio::test] + async fn test_provision_nexus_service_per_rack() { + let test = + ProvisionTest::new("test_provision_nexus_service_per_rack").await; + + let rack_subnet = Ipv6Subnet::new(Ipv6Addr::LOCALHOST); + let nexus = FakeNexus::new(test.datastore.clone(), rack_subnet); + let dns_updater = FakeDnsUpdater::new(); + let service_balancer = ServiceBalancer::new( + test.logctx.log.clone(), + nexus.clone(), + dns_updater.clone(), + ); + + // Setup: Create three sleds, with the goal of putting services on two + // of them. + const SLED_COUNT: u32 = 3; + const SERVICE_COUNT: u32 = 2; + let mut sleds = vec![]; + for _ in 0..SLED_COUNT { + sleds + .push(create_test_sled(nexus.rack_id(), &test.datastore).await); + } + let expected_services = [ExpectedService { + kind: ServiceKind::Nexus, + redundancy: ServiceRedundancy::PerRack(SERVICE_COUNT), + }]; + + // Request the services + service_balancer + .ensure_services_provisioned(&test.opctx, &expected_services) + .await + .unwrap(); + + // Observe the service on SERVICE_COUNT of the SLED_COUNT sleds. + let mut observed_service_count = 0; + for sled_id in &sleds { + let sled = nexus.sled_client(&sled_id).await.unwrap(); + let requests = sled.service_requests(); + match requests.len() { + 0 => (), // Ignore the sleds where nothing was provisioned + 1 => { + assert_eq!(requests[0].zone_name, "nexus"); + match &requests[0].services[0] { + SledAgentTypes::ServiceType::Nexus { + internal_ip, + external_ip, + } => { + // TODO: This is currently failing! We need to make + // the Nexus external IP come from an IP pool for + // external addresses. + assert_ne!(internal_ip, external_ip,); + + // TODO: check ports too, maybe? + } + _ => panic!( + "unexpected service type: {:?}", + requests[0].services[0] + ), + } + assert!(requests[0].gz_addresses.is_empty()); + observed_service_count += 1; + } + _ => { + panic!("Unexpected requests (should only see one per sled): {:#?}", requests); + } + } + } + assert_eq!(observed_service_count, SERVICE_COUNT); + + test.cleanup().await; + } + + /* + + // TODO: Check for GZ? + #[tokio::test] + async fn test_provision_dns_service_per_az() { + todo!(); + } + + // TODO: Check for value of 'asic'? + #[tokio::test] + async fn test_provision_scrimlet_service() { + todo!(); + } + + */ +} diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 172d83b9123..30936a02dec 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -15,9 +15,11 @@ use crate::populate::PopulateArgs; use crate::populate::PopulateStatus; use crate::saga_interface::SagaContext; use anyhow::anyhow; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, RACK_PREFIX}; use omicron_common::api::external::Error; use slog::Logger; use std::sync::Arc; +use tokio::sync::OnceCell; use uuid::Uuid; // The implementation of Nexus is large, and split into a number of submodules @@ -44,6 +46,9 @@ mod vpc; mod vpc_router; mod vpc_subnet; +// Background tasks exist in the "background" module. +mod background; + // Sagas are not part of the "Nexus" implementation, but they are // application logic. mod sagas; @@ -66,6 +71,9 @@ pub struct Nexus { /// uuid for this rack rack_id: Uuid, + /// subnet of this rack + rack_subnet: Ipv6Subnet, + /// general server log log: Logger, @@ -84,6 +92,9 @@ pub struct Nexus { /// Status of background task to populate database populate_status: tokio::sync::watch::Receiver, + /// Background task for Nexus. + background_task_runner: OnceCell, + /// Client to the timeseries database. timeseries_client: LazyTimeseriesClient, @@ -142,7 +153,7 @@ impl Nexus { sec_store, )); - // Connect to clickhouse - but do so lazily. + // Connect to Clickhouse - but do so lazily. // Clickhouse may not be executing when Nexus starts. let timeseries_client = if let Some(address) = &config.pkg.timeseries_db.address { @@ -173,12 +184,14 @@ impl Nexus { let nexus = Nexus { id: config.deployment.id, rack_id, + rack_subnet: config.deployment.subnet, log: log.new(o!()), db_datastore: Arc::clone(&db_datastore), authz: Arc::clone(&authz), sec_client: Arc::clone(&sec_client), recovery_task: std::sync::Mutex::new(None), populate_status, + background_task_runner: OnceCell::new(), timeseries_client, updates_config: config.pkg.updates.clone(), tunables: config.pkg.tunables.clone(), @@ -223,6 +236,10 @@ impl Nexus { nexus } + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet.net().ip()) + } + /// Return the tunable configuration parameters, e.g. for use in tests. pub fn tunables(&self) -> &config::Tunables { &self.tunables @@ -245,6 +262,15 @@ impl Nexus { } } + pub fn start_background_tasks( + self: &Arc, + ) -> Result<(), anyhow::Error> { + let nexus = self.clone(); + self.background_task_runner + .set(background::TaskRunner::new(nexus)) + .map_err(|error| anyhow!(error.to_string())) + } + /// Returns an [`OpContext`] used for authenticating external requests pub fn opctx_external_authn(&self) -> &OpContext { &self.opctx_external_authn diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index dcc7ce92dbc..aa4ebba7e4a 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -8,7 +8,7 @@ use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::lookup::LookupPath; -use crate::internal_api::params::ServicePutRequest; +use crate::internal_api::params::RackInitializationRequest; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -57,12 +57,13 @@ impl super::Nexus { &self, opctx: &OpContext, rack_id: Uuid, - services: Vec, + request: RackInitializationRequest, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; // Convert from parameter -> DB type. - let services: Vec<_> = services + let services: Vec<_> = request + .services .into_iter() .map(|svc| { db::model::Service::new( @@ -74,10 +75,53 @@ impl super::Nexus { }) .collect(); + // TODO: If nexus, add a pool? + + let datasets: Vec<_> = request + .datasets + .into_iter() + .map(|dataset| { + db::model::Dataset::new( + dataset.dataset_id, + dataset.zpool_id, + dataset.request.address, + dataset.request.kind.into(), + ) + }) + .collect(); self.db_datastore - .rack_set_initialized(opctx, rack_id, services) + .rack_set_initialized(opctx, rack_id, services, datasets) .await?; Ok(()) } + + /// Awaits the initialization of the rack. + /// + /// This will occur by either: + /// 1. RSS invoking the internal API, handing off responsibility, or + /// 2. Re-reading a value from the DB, if the rack has already been + /// initialized. + /// + /// See RFD 278 for additional context. + pub async fn await_rack_initialization(&self, opctx: &OpContext) { + loop { + let result = self.rack_lookup(&opctx, &self.rack_id).await; + match result { + Ok(rack) => { + if rack.initialized { + return; + } + info!( + self.log, + "Still waiting for rack initialization: {:?}", rack + ); + } + Err(e) => { + warn!(self.log, "Cannot look up rack: {}", e); + } + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + } } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 3fdba98c73a..3d33e768bdf 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -23,6 +23,7 @@ pub mod disk_create; pub mod disk_delete; pub mod instance_create; pub mod instance_migrate; +pub mod service_balance; pub mod snapshot_create; pub mod volume_delete; pub mod volume_remove_rop; @@ -98,6 +99,9 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); ::register_actions( &mut registry, ); diff --git a/nexus/src/app/sagas/service_balance.rs b/nexus/src/app/sagas/service_balance.rs new file mode 100644 index 00000000000..bf5040f29f6 --- /dev/null +++ b/nexus/src/app/sagas/service_balance.rs @@ -0,0 +1,206 @@ +// This Source Ccode Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::{NexusActionContext, NexusSaga, SagaInitError}; +use crate::app::sagas::NexusAction; +use crate::authn; +//use crate::context::OpContext; +//use crate::db::identity::Resource; +use crate::db::model::ServiceKind; +//use chrono::Utc; +use lazy_static::lazy_static; +//use omicron_common::address::Ipv6Subnet; +//use omicron_common::address::RACK_PREFIX; +//use omicron_common::api::external::Error; +use serde::Deserialize; +use serde::Serialize; +use std::fmt::Debug; +use std::sync::Arc; +use steno::new_action_noop_undo; +use steno::ActionError; +use steno::ActionFunc; +use steno::Node; +use steno::{DagBuilder, SagaName}; +use uuid::Uuid; + +// service balance saga: input parameters + +/// Describes the target location where the services should +/// eventually be running. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub enum ServiceDestination { + Scrimlet, + Rack, +} + +/// Parameters used to balance many services. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct Params { + pub serialized_authn: authn::saga::Serialized, + pub destination: ServiceDestination, + pub kind: ServiceKind, + pub rack_id: Uuid, + pub redundancy: u32, +} + +/// Parameters used to instantiate a single service. +/// +/// This is used within a sub-saga. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct ServiceParams { + which: u32, + kind: ServiceKind, + rack_id: Uuid, +} + +lazy_static! { + static ref PICK_DESTINATION_SLEDS: NexusAction = new_action_noop_undo( + "service-balance.pick-destination-sleds", + pick_destination_sleds, + ); + static ref CREATE_SERVICE_RECORD: NexusAction = ActionFunc::new_action( + "service-balance.create-service-record", + create_service_record, + create_service_record_undo, + ); + static ref CREATE_INTERNAL_IP: NexusAction = ActionFunc::new_action( + "service-balance.create-internal-ip", + create_internal_ip, + create_internal_ip_undo, + ); + static ref CREATE_EXTERNAL_IP: NexusAction = ActionFunc::new_action( + "service-balance.create-external-ip", + create_external_ip, + destroy_external_ip, + ); +} + +// Helper function for appending subsagas to our parent saga. +fn subsaga_append( + node_basename: &'static str, + subsaga_builder: steno::DagBuilder, + parent_builder: &mut steno::DagBuilder, + params: S, + which: u32, +) -> Result<(), SagaInitError> { + // The "parameter" node is a constant node that goes into the outer saga. + let params_node_name = format!("{}_params{}", node_basename, which); + parent_builder.append(Node::constant( + ¶ms_node_name, + serde_json::to_value(¶ms).map_err(|e| { + SagaInitError::SerializeError(params_node_name.clone(), e) + })?, + )); + + let output_name = format!("{}{}", node_basename, which); + parent_builder.append(Node::subsaga( + output_name.as_str(), + subsaga_builder.build()?, + params_node_name, + )); + Ok(()) +} + +#[derive(Debug)] +pub struct SagaServiceBalance; +impl NexusSaga for SagaServiceBalance { + const NAME: &'static str = "service-balance"; + type Params = Params; + + fn register_actions(registry: &mut super::ActionRegistry) { + registry.register(Arc::clone(&*PICK_DESTINATION_SLEDS)); + registry.register(Arc::clone(&*CREATE_SERVICE_RECORD)); + registry.register(Arc::clone(&*CREATE_INTERNAL_IP)); + registry.register(Arc::clone(&*CREATE_EXTERNAL_IP)); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "destination_sleds", + "PickDestinationSleds", + PICK_DESTINATION_SLEDS.as_ref(), + )); + + // After selecting destination sleds for our desired number of services, + // we need to actually provision the services themselves. + // + // We do so by creating subsagas for each potential to-be-allocated + // service. + for i in 0..params.redundancy { + let repeat_params = ServiceParams { + which: i, + kind: params.kind, + rack_id: params.rack_id, + }; + let subsaga_name = SagaName::new(&format!("create-service{i}")); + let mut subsaga_builder = DagBuilder::new(subsaga_name); + subsaga_builder.append(Node::action( + "internal_ip{i}", + format!("CreateServiceIp{i}").as_str(), + CREATE_INTERNAL_IP.as_ref(), + )); + subsaga_append( + "network_interface", + subsaga_builder, + &mut builder, + repeat_params, + i, + )?; + } + + Ok(builder.build()?) + } +} + +async fn create_service_record( + _sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn create_service_record_undo( + _sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn create_internal_ip( + _sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn create_internal_ip_undo( + _sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn create_external_ip( + _sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} + +async fn destroy_external_ip( + _sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // TODO + Ok(()) +} + +async fn pick_destination_sleds( + _sagactx: NexusActionContext, +) -> Result<(), ActionError> { + // TODO + Ok(()) +} diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index bdb52465586..e51f7c7af8a 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -18,7 +18,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use sled_agent_client::Client as SledAgentClient; -use std::net::{Ipv6Addr, SocketAddr}; +use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -126,7 +126,7 @@ impl super::Nexus { &self, id: Uuid, zpool_id: Uuid, - address: SocketAddr, + address: SocketAddrV6, kind: DatasetKind, ) -> Result<(), Error> { info!(self.log, "upserting dataset"; "zpool_id" => zpool_id.to_string(), "dataset_id" => id.to_string(), "address" => address.to_string()); diff --git a/nexus/src/config.rs b/nexus/src/config.rs index d622368ef1b..bda1905edea 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -52,34 +52,30 @@ pub struct TimeseriesDbConfig { pub address: Option, } -// A deserializable type that does no validation on the tunable parameters. -#[derive(Clone, Debug, Deserialize, PartialEq)] -struct UnvalidatedTunables { - max_vpc_ipv4_subnet_prefix: u8, +fn deserialize_ipv4_subnet<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let prefix = u8::deserialize(deserializer)?; + Tunables::validate_ipv4_prefix(prefix) + .map_err(|e| serde::de::Error::custom(e))?; + Ok(prefix) } /// Tunable configuration parameters, intended for use in test environments or /// other situations in which experimentation / tuning is valuable. #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(try_from = "UnvalidatedTunables")] pub struct Tunables { /// The maximum prefix size supported for VPC Subnet IPv4 subnetworks. /// /// Note that this is the maximum _prefix_ size, which sets the minimum size /// of the subnet. + #[serde(default, deserialize_with = "deserialize_ipv4_subnet")] pub max_vpc_ipv4_subnet_prefix: u8, -} - -// Convert from the unvalidated tunables, verifying each parameter as needed. -impl TryFrom for Tunables { - type Error = InvalidTunable; - fn try_from(unvalidated: UnvalidatedTunables) -> Result { - Tunables::validate_ipv4_prefix(unvalidated.max_vpc_ipv4_subnet_prefix)?; - Ok(Tunables { - max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, - }) - } + /// Identifies whether or not background tasks will be enabled. + #[serde(default)] + pub enable_background_tasks: bool, } impl Tunables { @@ -121,7 +117,10 @@ pub const MAX_VPC_IPV4_SUBNET_PREFIX: u8 = 26; impl Default for Tunables { fn default() -> Self { - Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX } + Tunables { + max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX, + enable_background_tasks: true, + } } } @@ -393,7 +392,10 @@ mod test { trusted_root: PathBuf::from("/path/to/root.json"), default_base_url: "http://example.invalid/".into(), }), - tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, + tunables: Tunables { + max_vpc_ipv4_subnet_prefix: 27, + enable_background_tasks: false, + }, }, } ); diff --git a/nexus/src/db/datastore/dataset.rs b/nexus/src/db/datastore/dataset.rs index 55259e922fd..045dd6c427b 100644 --- a/nexus/src/db/datastore/dataset.rs +++ b/nexus/src/db/datastore/dataset.rs @@ -5,14 +5,25 @@ //! [`DataStore`] methods on [`Dataset`]s. use super::DataStore; +use super::RunnableQuery; +use super::REGION_REDUNDANCY_THRESHOLD; +use crate::authz; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; +use crate::db::datastore::DatasetRedundancy; +use crate::db::datastore::OpContext; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; +use crate::db::error::TransactionError; use crate::db::identity::Asset; use crate::db::model::Dataset; +use crate::db::model::DatasetKind; +use crate::db::model::Sled; use crate::db::model::Zpool; +use crate::db::pool::DbConnection; +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; @@ -20,6 +31,8 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use std::net::SocketAddrV6; +use uuid::Uuid; impl DataStore { /// Stores a new dataset in the database. @@ -62,4 +75,219 @@ impl DataStore { } }) } + + /// Stores a new dataset in the database. + async fn dataset_upsert_on_connection( + conn: &async_bb8_diesel::Connection, + dataset: Dataset, + ) -> CreateResult { + use db::schema::dataset::dsl; + + let zpool_id = dataset.pool_id; + Zpool::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result_async(conn) + .await + .map_err(|e| match e { + AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + }, + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + pub async fn dataset_list( + &self, + opctx: &OpContext, + zpool_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::dataset::dsl; + dsl::dataset + .filter(dsl::time_deleted.is_null()) + .filter(dsl::pool_id.eq(zpool_id)) + .select(Dataset::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + async fn sled_zpool_and_dataset_list_on_connection( + conn: &async_bb8_diesel::Connection, + rack_id: Uuid, + kind: DatasetKind, + ) -> Result)>, Error> { + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::sled::dsl as sled_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .filter(sled_dsl::rack_id.eq(rack_id)) + .inner_join( + db::schema::zpool::table.on(zpool_dsl::sled_id + .eq(sled_dsl::id) + .and(zpool_dsl::time_deleted.is_null())), + ) + .left_outer_join( + db::schema::dataset::table.on(dataset_dsl::pool_id + .eq(zpool_dsl::id) + .and(dataset_dsl::kind.eq(kind)) + .and(dataset_dsl::time_deleted.is_null())), + ) + .select(<(Sled, Zpool, Option)>::as_select()) + .get_results_async(conn) + .await + .map_err(|e| { + public_error_from_diesel_pool(e.into(), ErrorHandler::Server) + }) + } + + pub async fn ensure_rack_dataset( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: DatasetKind, + redundancy: DatasetRedundancy, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum DatasetError { + NotEnoughZpools, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction_async(|conn| async move { + let sleds_zpools_and_maybe_datasets = + Self::sled_zpool_and_dataset_list_on_connection( + &conn, rack_id, kind, + ) + .await + .map_err(|e| { + TxnError::CustomError(DatasetError::Other(e.into())) + })?; + + // Split the set of returned zpools into "those with" and "those + // without" the requested dataset. + let (zpools_with_dataset, zpools_without_dataset): ( + Vec<_>, + Vec<_>, + ) = sleds_zpools_and_maybe_datasets + .into_iter() + .partition(|(_, _, maybe_dataset)| maybe_dataset.is_some()); + let mut zpools_without_dataset = zpools_without_dataset + .into_iter() + .map(|(sled, zpool, _)| (sled, zpool)) + .peekable(); + + let mut datasets: Vec<_> = zpools_with_dataset + .into_iter() + .map(|(sled, zpool, maybe_dataset)| { + ( + sled, + zpool, + maybe_dataset.expect("Dataset should exist"), + ) + }) + .collect(); + + // Add datasets to zpools, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which zpools contain this dataset" is completely + // arbitrary. + loop { + match redundancy { + DatasetRedundancy::OnAll => { + if zpools_without_dataset.peek().is_none() { + break; + } + } + DatasetRedundancy::PerRack(desired) => { + if datasets.len() >= (desired as usize) { + break; + } + } + }; + + let (sled, zpool) = + zpools_without_dataset.next().ok_or_else(|| { + TxnError::CustomError(DatasetError::NotEnoughZpools) + })?; + let dataset_id = Uuid::new_v4(); + let address = + Self::next_ipv6_address_on_connection(&conn, sled.id()) + .await + .map_err(|e| { + TxnError::CustomError(DatasetError::Other( + e.into(), + )) + }) + .map(|ip| { + SocketAddrV6::new(ip, kind.port(), 0, 0) + })?; + + let dataset = db::model::Dataset::new( + dataset_id, + zpool.id(), + address, + kind, + ); + + let dataset = + Self::dataset_upsert_on_connection(&conn, dataset) + .await + .map_err(|e| { + TxnError::CustomError(DatasetError::Other( + e.into(), + )) + })?; + datasets.push((sled, zpool, dataset)); + } + + return Ok(datasets); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(DatasetError::NotEnoughZpools) => { + Error::unavail("Not enough zpools for dataset allocation") + } + TxnError::CustomError(DatasetError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + pub(super) fn get_allocatable_datasets_query() -> impl RunnableQuery + { + use db::schema::dataset::dsl; + + dsl::dataset + // We look for valid datasets (non-deleted crucible datasets). + .filter(dsl::size_used.is_not_null()) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::kind.eq(DatasetKind::Crucible)) + .order(dsl::size_used.asc()) + .select(Dataset::as_select()) + .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()) + } } diff --git a/nexus/src/db/datastore/external_ip.rs b/nexus/src/db/datastore/external_ip.rs index 4c908461924..58dc696c975 100644 --- a/nexus/src/db/datastore/external_ip.rs +++ b/nexus/src/db/datastore/external_ip.rs @@ -115,6 +115,19 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } + pub async fn allocate_service_ip_on_connection( + conn: &async_bb8_diesel::Connection, + ip_id: Uuid, + rack_id: Uuid, + ) -> CreateResult { + let (.., pool) = + Self::ip_pools_lookup_by_rack_id_on_connection(conn, rack_id) + .await?; + + let data = IncompleteExternalIp::for_service(ip_id, pool.id()); + Self::allocate_external_ip_on_connection(conn, data).await + } + async fn allocate_external_ip( &self, opctx: &OpContext, @@ -136,6 +149,25 @@ impl DataStore { }) } + async fn allocate_external_ip_on_connection( + conn: &async_bb8_diesel::Connection, + data: IncompleteExternalIp, + ) -> CreateResult { + NextExternalIp::new(data).get_result_async(conn).await.map_err(|e| { + use async_bb8_diesel::ConnectionError::Query; + use diesel::result::Error::NotFound; + match e { + Query(NotFound) => { + Error::invalid_request("No external IP addresses available") + } + _ => public_error_from_diesel_pool( + e.into(), + ErrorHandler::Server, + ), + } + }) + } + /// Deallocate the external IP address with the provided ID. /// /// To support idempotency, such as in saga operations, this method returns diff --git a/nexus/src/db/datastore/ip_pool.rs b/nexus/src/db/datastore/ip_pool.rs index cb229e6fce4..00b82f21b74 100644 --- a/nexus/src/db/datastore/ip_pool.rs +++ b/nexus/src/db/datastore/ip_pool.rs @@ -125,6 +125,37 @@ impl DataStore { Ok((authz_pool, pool)) } + pub async fn ip_pools_lookup_by_rack_id_on_connection( + conn: &async_bb8_diesel::Connection, + rack_id: Uuid, + ) -> LookupResult<(authz::IpPool, IpPool)> { + use db::schema::ip_pool::dsl; + + // Look up this IP pool by rack ID. + let (authz_pool, pool) = dsl::ip_pool + .filter(dsl::rack_id.eq(Some(rack_id))) + .filter(dsl::time_deleted.is_null()) + .select(IpPool::as_select()) + .get_result_async(conn) + .await + .map_err(|e| { + public_error_from_diesel_pool(e.into(), ErrorHandler::Server) + }) + .map(|ip_pool| { + ( + authz::IpPool::new( + authz::FLEET, + ip_pool.id(), + LookupType::ByCompositeId(format!( + "Rack ID: {rack_id}" + )), + ), + ip_pool, + ) + })?; + Ok((authz_pool, pool)) + } + /// Creates a new IP pool. /// /// - If `rack_id` is provided, this IP pool is used for Oxide diff --git a/nexus/src/db/datastore/mod.rs b/nexus/src/db/datastore/mod.rs index b096e9fd658..234c97b7bcb 100644 --- a/nexus/src/db/datastore/mod.rs +++ b/nexus/src/db/datastore/mod.rs @@ -101,6 +101,15 @@ impl RunnableQuery for T where { } +// Redundancy for the number of datasets to be provisioned. +#[derive(Clone, Copy, Debug)] +pub enum DatasetRedundancy { + // The dataset should exist on all zpools. + OnAll, + // The dataset should exist on at least this many zpools. + PerRack(u32), +} + pub struct DataStore { pool: Arc, } @@ -130,30 +139,38 @@ impl DataStore { Ok(self.pool.pool()) } - /// Return the next available IPv6 address for an Oxide service running on - /// the provided sled. - pub async fn next_ipv6_address( - &self, - opctx: &OpContext, + fn next_ipv6_address_query( sled_id: Uuid, - ) -> Result { + ) -> impl RunnableQuery { use db::schema::sled::dsl; - let net = diesel::update( + diesel::update( dsl::sled.find(sled_id).filter(dsl::time_deleted.is_null()), ) .set(dsl::last_used_address.eq(dsl::last_used_address + 1)) .returning(dsl::last_used_address) - .get_result_async(self.pool_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel_pool( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::Sled, - LookupType::ById(sled_id), - ), - ) - })?; + } + + pub async fn next_ipv6_address_on_connection( + conn: &(impl async_bb8_diesel::AsyncConnection + + Sync), + sled_id: Uuid, + ) -> Result + where + ConnErr: From + Send + 'static, + async_bb8_diesel::PoolError: From, + { + let net = Self::next_ipv6_address_query(sled_id) + .get_result_async(conn) + .await + .map_err(|e| { + public_error_from_diesel_pool( + async_bb8_diesel::PoolError::from(e), + ErrorHandler::NotFoundByLookup( + ResourceType::Sled, + LookupType::ById(sled_id), + ), + ) + })?; // TODO-correctness: We need to ensure that this address is actually // within the sled's underlay prefix, once that's included in the @@ -166,6 +183,17 @@ impl DataStore { } } + /// Return the next available IPv6 address for an Oxide service running on + /// the provided sled. + pub async fn next_ipv6_address( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result { + let conn = self.pool_authorized(opctx).await?; + Self::next_ipv6_address_on_connection(conn, sled_id).await + } + // Test interfaces #[cfg(test)] @@ -251,15 +279,14 @@ mod test { use crate::external_api::params; use chrono::{Duration, Utc}; use nexus_test_utils::db::test_setup_database; + use omicron_common::address::{Ipv6Subnet, DNS_REDUNDANCY, RACK_PREFIX}; use omicron_common::api::external::{ ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_test_utils::dev; use ref_cast::RefCast; - use std::collections::HashSet; - use std::net::Ipv6Addr; - use std::net::SocketAddrV6; - use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + use std::collections::{HashMap, HashSet}; + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -507,8 +534,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD * 2; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -607,8 +633,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -684,8 +709,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD - 1; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -741,8 +765,7 @@ mod test { // ... and datasets within that zpool. let dataset_count = REGION_REDUNDANCY_THRESHOLD; - let bogus_addr = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let dataset_ids: Vec = (0..dataset_count).map(|_| Uuid::new_v4()).collect(); for id in &dataset_ids { @@ -999,6 +1022,508 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_ensure_rack_service() { + let logctx = dev::test_setup_log("test_ensure_rack_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::Nexus, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = + Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + + // Ensure a service exists on the rack, with some redundancy. + const NEXUS_COUNT: u32 = 3; + let mut services = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(NEXUS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::Nexus, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test that ensuring services is idempotent. + let mut services_again = datastore + .ensure_rack_service( + &opctx, + rack_id, + ServiceKind::Nexus, + NEXUS_COUNT, + ) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + // Ask for a different service type on the rack. + let oximeter_services = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Oximeter, 1) + .await + .expect("Should have allocated service"); + + // This should only return a single service + assert_eq!(1, oximeter_services.len()); + + // The target sled should contain both the nexus and oximeter services + let observed_services = datastore + .service_list(&opctx, oximeter_services[0].sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(2, observed_services.len()); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_service_not_enough_sleds() { + let logctx = + dev::test_setup_log("test_ensure_rack_service_not_enough_sleds"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Try to request a redundancy which is larger than the number of sleds. + let err = datastore + .ensure_rack_service(&opctx, rack_id, ServiceKind::Nexus, 2) + .await + .expect_err("Should have failed to allocate service"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough sleds"), + "Error should have identified 'Not enough sleds' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service() { + let logctx = dev::test_setup_log("test_ensure_dns_service"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the service should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + let rack_subnet = Ipv6Subnet::::new(*sled_addr.ip()); + + // Ensure a service exists on the rack. + let services = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + + // Only a single service was allocated, with the type / address we + // expect. + assert_eq!(1, services.len()); + assert_eq!(ServiceKind::InternalDNS, services[0].kind); + assert_eq!(sled_id, services[0].sled_id); + + // Listing services only shows this one. + let observed_services = datastore + .service_list(&opctx, sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(services[0].id(), observed_services[0].id()); + + // Test that ensuring services is idempotent. + let services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, 1) + .await + .expect("Should have allocated service"); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_dns_service_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_dns_service_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: u32 = DNS_REDUNDANCY; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = + Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + } + let rack_subnet = Ipv6Subnet::::new(Ipv6Addr::from( + sleds.values().next().unwrap().ip, + )); + + for sled in sleds.values() { + assert_eq!( + rack_subnet, + Ipv6Subnet::::new(Ipv6Addr::from(sled.ip)), + "Test pre-condition violated: All sleds must belong to the same rack" + ); + } + + // Ensure a service exists on the rack. + const DNS_COUNT: u32 = DNS_REDUNDANCY; + let mut services = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services.sort_by(|a, b| a.id().cmp(&b.id())); + + assert_eq!(DNS_COUNT, services.len() as u32); + for svc in &services { + assert_eq!(ServiceKind::InternalDNS, svc.kind); + + // Each service should have been provisioned to a distinct sled. + let observed_services = datastore + .service_list(&opctx, svc.sled_id) + .await + .expect("Should be able to list services"); + assert_eq!(1, observed_services.len()); + assert_eq!(svc.id(), observed_services[0].id()); + } + + // Test for idempotency + let mut services_again = datastore + .ensure_dns_service(&opctx, rack_subnet, DNS_COUNT) + .await + .expect("Should have allocated service"); + services_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(services_again, services); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + let zpool_id = create_test_zpool(&datastore, sled_id).await; + + // Ensure a dataset exists on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + + // Observe that only a single dataset was allocated + assert_eq!(1, output.len()); + let (_, _, output_dataset) = &output[0]; + assert_eq!(DatasetKind::Crucible, output_dataset.kind); + assert_eq!(zpool_id, output_dataset.pool_id); + + // Listing datasets only shows this one. + let observed_datasets = datastore + .dataset_list(&opctx, zpool_id) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(output_dataset.id(), observed_datasets[0].id()); + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect("Should have allocated dataset"); + let (_, _, output_dataset_again) = &output_again[0]; + assert_eq!(output_dataset_again, output_dataset); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_not_enough_zpools() { + let logctx = + dev::test_setup_log("test_ensure_rack_dataset_not_enough_zpools"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled on which the dataset should exist. + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled) + .await + .expect("Should be able to upsert sled"); + + // Attempt to allocate a dataset on a rack without zpools. + let err = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::PerRack(1), + ) + .await + .expect_err("Should not have allocated dataset"); + + assert!( + matches!(err, Error::ServiceUnavailable { .. }), + "Error should have been ServiceUnavailable: {:?}", + err + ); + assert!( + err.to_string().contains("Not enough zpools"), + "Error should have identified 'Not enough zpools' as the cause: {:?}", err + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_rack_dataset_multi_sled() { + let logctx = dev::test_setup_log("test_ensure_rack_dataset_multi_sled"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let rack_id = Uuid::new_v4(); + + // Create sleds with distinct underlay subnets. + const SLED_COUNT: usize = 3; + let mut sleds = HashMap::new(); + for i in 0..SLED_COUNT { + let sled_addr = SocketAddrV6::new( + Ipv6Addr::new(0xfd00, 0, 0, i.try_into().unwrap(), 0, 0, 0, 1), + 8080, + 0, + 0, + ); + let sled_id = Uuid::new_v4(); + let is_scrimlet = false; + let sled = + Sled::new(sled_id, sled_addr.clone(), is_scrimlet, rack_id); + datastore + .sled_upsert(sled.clone()) + .await + .expect("Should be able to upsert sled"); + sleds.insert(sled.id(), sled); + create_test_zpool(&datastore, sled_id).await; + } + + // Ensure datasets exist on the rack. + let output = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + assert_eq!(SLED_COUNT, output.len()); + for (sled, zpool, dataset) in &output { + assert_eq!(DatasetKind::Crucible, dataset.kind); + assert_eq!(zpool.id(), dataset.pool_id); + assert_eq!(sled.id(), zpool.sled_id); + + let observed_datasets = datastore + .dataset_list(&opctx, zpool.id()) + .await + .expect("Should be able to list datasets"); + assert_eq!(1, observed_datasets.len()); + assert_eq!(dataset.id(), observed_datasets[0].id()) + } + + // Test that ensuring datasets is idempotent. + let output_again = datastore + .ensure_rack_dataset( + &opctx, + rack_id, + DatasetKind::Crucible, + DatasetRedundancy::OnAll, + ) + .await + .expect("Should have allocated dataset"); + + let mut output: Vec<_> = + output.into_iter().map(|(_, _, dataset)| dataset).collect(); + output.sort_by(|a, b| a.id().cmp(&b.id())); + let mut output_again: Vec<_> = + output_again.into_iter().map(|(_, _, dataset)| dataset).collect(); + output_again.sort_by(|a, b| a.id().cmp(&b.id())); + assert_eq!(output, output_again); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_rack_initialize_is_idempotent() { let logctx = dev::test_setup_log("test_rack_initialize_is_idempotent"); @@ -1018,14 +1543,14 @@ mod test { // Initialize the Rack. let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); // Re-initialize the rack (check for idempotency) let result = datastore - .rack_set_initialized(&opctx, rack.id(), vec![]) + .rack_set_initialized(&opctx, rack.id(), vec![], vec![]) .await .unwrap(); assert!(result.initialized); diff --git a/nexus/src/db/datastore/rack.rs b/nexus/src/db/datastore/rack.rs index 415e7e83623..c612c852df5 100644 --- a/nexus/src/db/datastore/rack.rs +++ b/nexus/src/db/datastore/rack.rs @@ -14,9 +14,11 @@ use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; use crate::db::identity::Asset; +use crate::db::model::Dataset; use crate::db::model::Rack; use crate::db::model::Service; use crate::db::model::Sled; +use crate::db::model::Zpool; use crate::db::pagination::paginated; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; @@ -83,19 +85,21 @@ impl DataStore { opctx: &OpContext, rack_id: Uuid, services: Vec, + datasets: Vec, ) -> UpdateResult { use db::schema::rack::dsl as rack_dsl; - use db::schema::service::dsl as service_dsl; #[derive(Debug)] enum RackInitError { ServiceInsert { err: AsyncInsertError, sled_id: Uuid, svc_id: Uuid }, + DatasetInsert { err: AsyncInsertError, zpool_id: Uuid }, RackUpdate(PoolError), } type TxnError = TransactionError; // NOTE: This operation could likely be optimized with a CTE, but given // the low-frequency of calls, this optimization has been deferred. + let log = opctx.log.clone(); self.pool_authorized(opctx) .await? .transaction_async(|conn| async move { @@ -111,25 +115,25 @@ impl DataStore { )) })?; if rack.initialized { + info!(log, "Early exit: Rack already initialized"); return Ok(rack); } - // Otherwise, insert services and set rack.initialized = true. + // Otherwise, insert services and datasets for svc in services { + use db::schema::service::dsl; let sled_id = svc.sled_id; >::insert_resource( sled_id, - diesel::insert_into(service_dsl::service) + diesel::insert_into(dsl::service) .values(svc.clone()) - .on_conflict(service_dsl::id) + .on_conflict(dsl::id) .do_update() .set(( - service_dsl::time_modified.eq(Utc::now()), - service_dsl::sled_id - .eq(excluded(service_dsl::sled_id)), - service_dsl::ip.eq(excluded(service_dsl::ip)), - service_dsl::kind - .eq(excluded(service_dsl::kind)), + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), )), ) .insert_and_get_result_async(&conn) @@ -142,7 +146,37 @@ impl DataStore { }) })?; } - diesel::update(rack_dsl::rack) + info!(log, "Inserted services"); + for dataset in datasets { + use db::schema::dataset::dsl; + let zpool_id = dataset.pool_id; + >::insert_resource( + zpool_id, + diesel::insert_into(dsl::dataset) + .values(dataset.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::pool_id.eq(excluded(dsl::pool_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result_async(&conn) + .await + .map_err(|err| { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + }) + })?; + } + info!(log, "Inserted datasets"); + + // Set the rack to "initialized" once the handoff is complete + let rack = diesel::update(rack_dsl::rack) .filter(rack_dsl::id.eq(rack_id)) .set(( rack_dsl::initialized.eq(true), @@ -155,10 +189,25 @@ impl DataStore { TxnError::CustomError(RackInitError::RackUpdate( PoolError::from(e), )) - }) + })?; + Ok(rack) }) .await .map_err(|e| match e { + TxnError::CustomError(RackInitError::DatasetInsert { + err, + zpool_id, + }) => match err { + AsyncInsertError::CollectionNotFound => { + Error::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ById(zpool_id), + } + } + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }, TxnError::CustomError(RackInitError::ServiceInsert { err, sled_id, diff --git a/nexus/src/db/datastore/service.rs b/nexus/src/db/datastore/service.rs index b2665b48cdd..e782f7ee9e2 100644 --- a/nexus/src/db/datastore/service.rs +++ b/nexus/src/db/datastore/service.rs @@ -5,22 +5,34 @@ //! [`DataStore`] methods on [`Service`]s. use super::DataStore; +use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; +use crate::db::error::TransactionError; use crate::db::identity::Asset; use crate::db::model::Service; +use crate::db::model::ServiceKind; use crate::db::model::Sled; +use crate::db::pool::DbConnection; +use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; +use nexus_types::identity::Resource; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::ReservedRackSubnet; +use omicron_common::address::DNS_REDUNDANCY; +use omicron_common::address::RACK_PREFIX; use omicron_common::api::external::CreateResult; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use std::net::Ipv6Addr; +use uuid::Uuid; impl DataStore { /// Stores a new service in the database. @@ -63,4 +75,455 @@ impl DataStore { } }) } + + async fn service_upsert_on_connection( + conn: &async_bb8_diesel::Connection, + service: Service, + ) -> Result { + use db::schema::service::dsl; + + let sled_id = service.sled_id; + Sled::insert_resource( + sled_id, + diesel::insert_into(dsl::service) + .values(service.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::kind.eq(excluded(dsl::kind)), + )), + ) + .insert_and_get_result_async(conn) + .await + .map_err(|e| match e { + AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: LookupType::ById(sled_id), + }, + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool( + e, + ErrorHandler::Conflict( + ResourceType::Service, + &service.id().to_string(), + ), + ) + } + }) + } + + async fn sled_list_with_limit_on_connection( + conn: &async_bb8_diesel::Connection, + limit: u32, + ) -> Result, async_bb8_diesel::ConnectionError> { + use db::schema::sled::dsl; + dsl::sled + .filter(dsl::time_deleted.is_null()) + .limit(limit as i64) + .select(Sled::as_select()) + .load_async(conn) + .await + } + + pub async fn service_list( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::service::dsl; + dsl::service + .filter(dsl::sled_id.eq(sled_id)) + .select(Service::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + + // List all sleds on a rack, with info about provisioned services of a + // particular type. + async fn sled_and_service_list( + conn: &async_bb8_diesel::Connection, + rack_id: Uuid, + kind: ServiceKind, + ) -> Result)>, async_bb8_diesel::ConnectionError> + { + use db::schema::service::dsl as svc_dsl; + use db::schema::sled::dsl as sled_dsl; + + db::schema::sled::table + .filter(sled_dsl::time_deleted.is_null()) + .filter(sled_dsl::rack_id.eq(rack_id)) + .left_outer_join(db::schema::service::table.on( + svc_dsl::sled_id.eq(sled_dsl::id).and(svc_dsl::kind.eq(kind)), + )) + .select(<(Sled, Option)>::as_select()) + .get_results_async(conn) + .await + } + + /// Ensures that all Scrimlets in `rack_id` have the `kind` service + /// provisioned. + /// + /// TODO: Returns what? + pub async fn ensure_scrimlet_service( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: ServiceKind, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + type TxnError = TransactionError; + + self.pool() + .transaction_async(|conn| async move { + let sleds_and_maybe_svcs = + Self::sled_and_service_list(&conn, rack_id, kind).await?; + + // Split the set of returned sleds into "those with" and "those + // without" the requested service. + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .into_iter() + .partition(|(_, maybe_svc)| maybe_svc.is_some()); + + // Identify sleds without services (targets for future + // allocation). + let sleds_without_svc = + sleds_without_svc.into_iter().map(|(sled, _)| sled); + + // Identify sleds with services (part of output). + let mut svcs: Vec<_> = sleds_with_svc + .into_iter() + .map(|(_, maybe_svc)| { + maybe_svc.expect( + "Should have filtered by sleds with the service", + ) + }) + .collect(); + + // Add this service to all scrimlets without it. + for sled in sleds_without_svc { + if sled.is_scrimlet() { + let svc_id = Uuid::new_v4(); + let address = Self::next_ipv6_address_on_connection( + &conn, + sled.id(), + ) + .await + .map_err(|e| TxnError::CustomError(e))?; + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + kind, + ); + + let svc = + Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| TxnError::CustomError(e))?; + svcs.push(svc); + } + } + + return Ok(svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(e) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + /// Ensures that `redundancy` sleds within `rack_id` have the `kind` service + /// provisioned. + /// + /// Returns all services which have been allocated within the rack. + pub async fn ensure_rack_service( + &self, + opctx: &OpContext, + rack_id: Uuid, + kind: ServiceKind, + redundancy: u32, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + opctx + .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) + .await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + Other(Error), + } + type TxnError = TransactionError; + + // NOTE: We could also make parts of this a saga? + // + // - TODO: DON'T mark/unmark as rebalancing!!!!!! + // - Use rcgen!!! It's what it's for - optimistic concurrency control. + // - Basically, create a new rcgen for Nexus to use, bail if + // somone else increments past us? *That* can be stored on the rack + // table. + // TODO: alternatively, this whole thing is happening in the DB. + // We *could* issue a CTE. + // + // + // - List sleds + services, return sleds with/without services + // - Pick sleds that are targets probably all up-front + // - FOR EACH + // - Provision IPv6 + // - Upsert service record + // - IF NEXUS + // - Provision external IP + // - Find cert + // - Upsert nexus service record + + // NOTE: It's probably possible to do this without the transaction. + // + // See: services.sql + self.pool() + .transaction_async(|conn| async move { + let sleds_and_maybe_svcs = + Self::sled_and_service_list(&conn, rack_id, kind) + .await + .map_err(|e| TxnError::Pool(e.into()))?; + + // Split the set of returned sleds into "those with" and "those + // without" the requested service. + let (sleds_with_svc, sleds_without_svc): (Vec<_>, Vec<_>) = + sleds_and_maybe_svcs + .into_iter() + .partition(|(_, maybe_svc)| maybe_svc.is_some()); + + // Identify sleds without services (targets for future + // allocation). + let mut sleds_without_svc = + sleds_without_svc.into_iter().map(|(sled, _)| sled); + + // Identify sleds with services (part of output). + let mut svcs: Vec<_> = sleds_with_svc + .into_iter() + .map(|(_, maybe_svc)| { + maybe_svc.expect( + "Should have filtered by sleds with the service", + ) + }) + .collect(); + + // Add services to sleds, in-order, until we've met a + // number sufficient for our redundancy. + // + // The selection of "which sleds run this service" is completely + // arbitrary. + while svcs.len() < (redundancy as usize) { + let sled = sleds_without_svc.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + + // TODO: With some work, you can get rid of the + // "...on_connection" versions of functions. + // + // See: https://github.com/oxidecomputer/omicron/pull/1621#discussion_r949796959 + // + // TODO: I *strongly* believe this means Connection vs Pool + // error unification in async_bb8_diesel. *always* return + // the pool error; keep it simple. + + // Always allocate an internal IP address to this service. + let address = + Self::next_ipv6_address_on_connection(&conn, sled.id()) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; + + // If requested, allocate an external IP address for this + // service too. + let external_ip = if matches!(kind, ServiceKind::Nexus) { + let (.., pool) = + Self::ip_pools_lookup_by_rack_id_on_connection( + &conn, rack_id, + ) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; + + let external_ip = + Self::allocate_service_ip_on_connection( + &conn, + Uuid::new_v4(), + pool.id(), + ) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; + + Some(external_ip) + } else { + None + }; + + // TODO: We actually have to *use* the external_ip + // - Use the NexusCertificate table (look up by UUID) + // - Create a NexusService table (reference service, ip, + // certs) + + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + kind, + ); + + let svc = + Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other( + e.into(), + )) + })?; + svcs.push(svc); + } + + return Ok(svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + } + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + /// Ensures that `redundancy` sleds within the `rack_subnet` have a DNS + /// service provisioned. + /// + /// TODO: Returns what? + pub async fn ensure_dns_service( + &self, + opctx: &OpContext, + rack_subnet: Ipv6Subnet, + redundancy: u32, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + #[derive(Debug)] + enum ServiceError { + NotEnoughSleds, + NotEnoughIps, + Other(Error), + } + type TxnError = TransactionError; + + self.pool() + .transaction_async(|conn| async move { + let mut svcs = Self::dns_service_list(&conn).await?; + + // Get all subnets not allocated to existing services. + let mut usable_dns_subnets = ReservedRackSubnet(rack_subnet) + .get_dns_subnets() + .into_iter() + .filter(|subnet| { + // If any existing services are using this address, + // skip it. + !svcs.iter().any(|svc| { + Ipv6Addr::from(svc.ip) == subnet.dns_address().ip() + }) + }) + .collect::>() + .into_iter(); + + // Get all sleds which aren't already running DNS services. + let mut target_sleds = + Self::sled_list_with_limit_on_connection(&conn, redundancy) + .await? + .into_iter() + .filter(|sled| { + // The target sleds are only considered if they aren't already + // running a DNS service. + svcs.iter().all(|svc| svc.sled_id != sled.id()) + }) + .collect::>() + .into_iter(); + + while svcs.len() < (redundancy as usize) { + let sled = target_sleds.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughSleds) + })?; + let svc_id = Uuid::new_v4(); + let dns_subnet = + usable_dns_subnets.next().ok_or_else(|| { + TxnError::CustomError(ServiceError::NotEnoughIps) + })?; + let address = dns_subnet.dns_address().ip(); + let service = db::model::Service::new( + svc_id, + sled.id(), + address, + ServiceKind::InternalDNS, + ); + + let svc = + Self::service_upsert_on_connection(&conn, service) + .await + .map_err(|e| { + TxnError::CustomError(ServiceError::Other(e)) + })?; + + svcs.push(svc); + } + return Ok(svcs); + }) + .await + .map_err(|e| match e { + TxnError::CustomError(ServiceError::NotEnoughSleds) => { + Error::unavail("Not enough sleds for service allocation") + } + TxnError::CustomError(ServiceError::NotEnoughIps) => { + Error::unavail( + "Not enough IP addresses for service allocation", + ) + } + TxnError::CustomError(ServiceError::Other(e)) => e, + TxnError::Pool(e) => { + public_error_from_diesel_pool(e, ErrorHandler::Server) + } + }) + } + + async fn dns_service_list( + conn: &async_bb8_diesel::Connection, + ) -> Result, async_bb8_diesel::ConnectionError> { + use db::schema::service::dsl as svc; + + svc::service + .filter(svc::kind.eq(ServiceKind::InternalDNS)) + .limit(DNS_REDUNDANCY.into()) + .select(Service::as_select()) + .get_results_async(conn) + .await + } } diff --git a/nexus/src/db/fixed_data/role_assignment.rs b/nexus/src/db/fixed_data/role_assignment.rs index 43635518c9a..7d7ddffab64 100644 --- a/nexus/src/db/fixed_data/role_assignment.rs +++ b/nexus/src/db/fixed_data/role_assignment.rs @@ -25,6 +25,20 @@ lazy_static! { role_builtin::FLEET_ADMIN.role_name, ), + // The "USER_SERVICE_BALANCER" user gets the "admin" role on the + // Fleet. + // + // This is necessary as services exist as resources implied by + // "FLEET" - if they ever become more fine-grained, this scope + // could also become smaller. + RoleAssignment::new( + IdentityType::UserBuiltin, + user_builtin::USER_SERVICE_BALANCER.id, + role_builtin::FLEET_ADMIN.resource_type, + *FLEET_ID, + role_builtin::FLEET_ADMIN.role_name, + ), + // The "internal-read" user gets the "viewer" role on the sole // Fleet. This will grant them the ability to read various control // plane data (like the list of sleds), which is in turn used to diff --git a/nexus/src/db/queries/external_ip.rs b/nexus/src/db/queries/external_ip.rs index 2496fdbde3d..20c3449557a 100644 --- a/nexus/src/db/queries/external_ip.rs +++ b/nexus/src/db/queries/external_ip.rs @@ -303,7 +303,6 @@ impl NextExternalIp { out.push_bind_param::, Option>(&None)?; } }; - out.push_sql(" AS "); out.push_identifier(dsl::project_id::NAME)?; out.push_sql(", "); diff --git a/nexus/src/db/queries/mod.rs b/nexus/src/db/queries/mod.rs index 077d542dbbb..4ea95d5f14e 100644 --- a/nexus/src/db/queries/mod.rs +++ b/nexus/src/db/queries/mod.rs @@ -11,5 +11,6 @@ pub mod ip_pool; mod next_item; pub mod network_interface; pub mod region_allocation; +pub mod service_provision; pub mod vpc; pub mod vpc_subnet; diff --git a/nexus/src/db/queries/service_provision.rs b/nexus/src/db/queries/service_provision.rs new file mode 100644 index 00000000000..1c7d19faa59 --- /dev/null +++ b/nexus/src/db/queries/service_provision.rs @@ -0,0 +1,558 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of queries for provisioning services. + +use crate::db::alias::ExpressionAlias; +use crate::db::model::queries::service_provision::{ + candidate_services, candidate_sleds, inserted_services, new_internal_ips, + new_service_count, old_service_count, previously_allocated_services, + sled_allocation_pool, +}; +use crate::db::model::Service; +use crate::db::model::ServiceKind; +use crate::db::pool::DbConnection; +use crate::db::schema; +use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; +use chrono::DateTime; +use chrono::Utc; +use db_macros::Subquery; +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::Query; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; +use diesel::sql_types; +use diesel::CombineDsl; +use diesel::ExpressionMethods; +use diesel::Insertable; +use diesel::IntoSql; +use diesel::JoinOnDsl; +use diesel::NullableExpressionMethods; +use diesel::QueryDsl; +use diesel::RunQueryDsl; + +/// A subquery to find all sleds that could run services. +#[derive(Subquery)] +#[subquery(name = sled_allocation_pool)] +struct SledAllocationPool { + query: Box>, +} + +impl SledAllocationPool { + fn new(rack_id: uuid::Uuid) -> Self { + use crate::db::schema::sled::dsl; + Self { + query: Box::new( + dsl::sled + .filter(dsl::time_deleted.is_null()) + .filter(dsl::rack_id.eq(rack_id)) + .select((dsl::id,)), + ), + } + } +} + +/// A subquery to find all services of a particular type which have already been +/// allocated. +#[derive(Subquery)] +#[subquery(name = previously_allocated_services)] +struct PreviouslyAllocatedServices { + query: Box>, +} + +impl PreviouslyAllocatedServices { + fn new(allocation_pool: &SledAllocationPool, kind: ServiceKind) -> Self { + use crate::db::schema::service::dsl as service_dsl; + use sled_allocation_pool::dsl as alloc_pool_dsl; + + let select_from_pool = allocation_pool + .query_source() + .select(alloc_pool_dsl::id) + .into_boxed(); + Self { + query: Box::new( + service_dsl::service + .filter(service_dsl::kind.eq(kind)) + .filter(service_dsl::sled_id.eq_any(select_from_pool)), + ), + } + } +} + +/// A subquery to find the number of old services. +#[derive(Subquery)] +#[subquery(name = old_service_count)] +struct OldServiceCount { + query: Box>, +} + +impl OldServiceCount { + fn new( + previously_allocated_services: &PreviouslyAllocatedServices, + ) -> Self { + Self { + query: Box::new( + previously_allocated_services + .query_source() + .select((diesel::dsl::count_star(),)), + ), + } + } +} + +/// A subquery to find the number of additional services which should be +/// provisioned. +#[derive(Subquery)] +#[subquery(name = new_service_count)] +struct NewServiceCount { + query: Box>, +} + +diesel::sql_function!(fn greatest(a: sql_types::BigInt, b: sql_types::BigInt) -> sql_types::BigInt); + +impl NewServiceCount { + fn new(redundancy: i32, old_service_count: &OldServiceCount) -> Self { + let old_count = old_service_count + .query_source() + .select(old_service_count::dsl::count) + .single_value() + .assume_not_null(); + Self { + query: Box::new(diesel::select(ExpressionAlias::new::< + new_service_count::dsl::count, + >((greatest( + (redundancy as i64).into_sql::(), + old_count, + ) - old_count,)))), + } + } +} + +/// A subquery to find new sleds to host the proposed services. +#[derive(Subquery)] +#[subquery(name = candidate_sleds)] +struct CandidateSleds { + query: Box>, +} + +impl CandidateSleds { + fn new( + sled_allocation_pool: &SledAllocationPool, + previously_allocated_services: &PreviouslyAllocatedServices, + _new_service_count: &NewServiceCount, + ) -> Self { + let select_from_previously_allocated = previously_allocated_services + .query_source() + .select(previously_allocated_services::dsl::sled_id) + .into_boxed(); + + let mut select_stmt = sled_allocation_pool + .query_source() + .filter( + sled_allocation_pool::dsl::id + .ne_all(select_from_previously_allocated), + ) + .select((sled_allocation_pool::dsl::id,)) + .into_boxed(); + + // TODO: I'd really prefer to just pass the 'new_service_count' as the + // `.limit(...)` here, but the API cannot currently operate on an + // expression. + // + // See: https://github.com/diesel-rs/diesel/discussions/3328 for further + // discussion. + select_stmt.limit_offset = + diesel::query_builder::BoxedLimitOffsetClause { + limit: Some(Box::new(diesel::dsl::sql::( + " LIMIT SELECT * FROM new_service_count", + ))), + offset: select_stmt.limit_offset.offset, + }; + + Self { query: Box::new(select_stmt) } + } +} + +/// A subquery to provision internal IPs for all the new services. +#[derive(Subquery)] +#[subquery(name = new_internal_ips)] +struct NewInternalIps { + query: Box>, +} + +impl NewInternalIps { + fn new(candidate_sleds: &CandidateSleds) -> Self { + use crate::db::schema::sled::dsl as sled_dsl; + use candidate_sleds::dsl as candidate_sleds_dsl; + + let select_from_candidate_sleds = candidate_sleds + .query_source() + .select(candidate_sleds_dsl::id) + .into_boxed(); + Self { + query: Box::new( + diesel::update( + sled_dsl::sled.filter( + sled_dsl::id.eq_any(select_from_candidate_sleds), + ), + ) + .set( + sled_dsl::last_used_address + .eq(sled_dsl::last_used_address + 1), + ) + .returning((sled_dsl::id, sled_dsl::last_used_address)), + ), + } + } +} + +/// A subquery to create the new services which should be inserted. +#[derive(Subquery)] +#[subquery(name = candidate_services)] +struct CandidateServices { + query: Box>, +} + +diesel::sql_function!(fn gen_random_uuid() -> Uuid); +diesel::sql_function!(fn now() -> Timestamptz); + +impl CandidateServices { + fn new( + candidate_sleds: &CandidateSleds, + new_internal_ips: &NewInternalIps, + kind: ServiceKind, + ) -> Self { + use candidate_sleds::dsl as candidate_sleds_dsl; + use new_internal_ips::dsl as new_internal_ips_dsl; + use schema::service::dsl as service_dsl; + + let kind = kind.into_sql::(); + Self { + query: + Box::new( + candidate_sleds + .query_source() + .inner_join( + new_internal_ips + .query_source() + .on(candidate_sleds_dsl::id + .eq(new_internal_ips_dsl::id)), + ) + .select(( + ExpressionAlias::new::( + gen_random_uuid(), + ), + ExpressionAlias::new::( + now(), + ), + ExpressionAlias::new::( + now(), + ), + ExpressionAlias::new::( + candidate_sleds_dsl::id, + ), + ExpressionAlias::new::( + new_internal_ips_dsl::last_used_address, + ), + ExpressionAlias::new::(kind), + )), + ), + } + } +} + +/// A subquery to insert the new services. +#[derive(Subquery)] +#[subquery(name = inserted_services)] +struct InsertServices { + query: Box>, +} + +impl InsertServices { + fn new(candidate: &CandidateServices) -> Self { + use crate::db::schema::service; + + Self { + query: Box::new( + candidate + .query_source() + .select(candidate_services::all_columns) + .insert_into(service::table) + .returning(service::all_columns), + ), + } + } +} + +/// Provision services of a particular type within a rack. +/// +/// TODO: Document +pub struct ServiceProvision { + now: DateTime, + + cte: Cte, +} + +impl ServiceProvision { + pub fn new( + redundancy: i32, + rack_id: uuid::Uuid, + kind: ServiceKind, + ) -> Self { + let now = Utc::now(); + let sled_allocation_pool = SledAllocationPool::new(rack_id); + let previously_allocated_services = + PreviouslyAllocatedServices::new(&sled_allocation_pool, kind); + let old_service_count = + OldServiceCount::new(&previously_allocated_services); + let new_service_count = + NewServiceCount::new(redundancy, &old_service_count); + let candidate_sleds = CandidateSleds::new( + &sled_allocation_pool, + &previously_allocated_services, + &new_service_count, + ); + let new_internal_ips = NewInternalIps::new(&candidate_sleds); + let candidate_services = + CandidateServices::new(&candidate_sleds, &new_internal_ips, kind); + let inserted_services = InsertServices::new(&candidate_services); + + let final_select = Box::new( + previously_allocated_services + .query_source() + .select(previously_allocated_services::all_columns) + .union( + inserted_services + .query_source() + .select(inserted_services::all_columns), + ), + ); + + let cte = CteBuilder::new() + .add_subquery(sled_allocation_pool) + .add_subquery(previously_allocated_services) + .add_subquery(old_service_count) + .add_subquery(new_service_count) + .add_subquery(candidate_sleds) + .add_subquery(new_internal_ips) + .add_subquery(candidate_services) + .add_subquery(inserted_services) + .build(final_select); + + Self { now, cte } + } +} + +// TODO: +// We could probably make this generic over the Cte "build" method, enforce the +// type there, and auto-impl: +// - QueryId +// - QueryFragment +// - Query +// +// If we know what the SqlType is supposed to be. +impl QueryId for ServiceProvision { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for ServiceProvision { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + self.cte.walk_ast(out.reborrow())?; + Ok(()) + } +} + +impl Query for ServiceProvision { + type SqlType = <>::SelectExpression as diesel::Expression>::SqlType; +} + +impl RunQueryDsl for ServiceProvision {} + +#[cfg(test)] +mod tests { + use crate::context::OpContext; + use crate::db::datastore::DataStore; + use diesel::pg::Pg; + use dropshot::test_util::LogContext; + use nexus_test_utils::db::test_setup_database; + use nexus_test_utils::RACK_UUID; + use omicron_test_utils::dev; + use omicron_test_utils::dev::db::CockroachInstance; + use std::sync::Arc; + use uuid::Uuid; + + use super::ServiceProvision; + + struct TestContext { + logctx: LogContext, + opctx: OpContext, + db: CockroachInstance, + db_datastore: Arc, + } + + impl TestContext { + async fn new(test_name: &str) -> Self { + let logctx = dev::test_setup_log(test_name); + let log = logctx.log.new(o!()); + let db = test_setup_database(&log).await; + crate::db::datastore::datastore_test(&logctx, &db).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&cfg)); + let db_datastore = + Arc::new(crate::db::DataStore::new(Arc::clone(&pool))); + let opctx = + OpContext::for_tests(log.new(o!()), db_datastore.clone()); + Self { logctx, opctx, db, db_datastore } + } + + async fn success(mut self) { + self.db.cleanup().await.unwrap(); + self.logctx.cleanup_successful(); + } + } + + #[tokio::test] + async fn test_query_output() { + let context = TestContext::new("test_query_output").await; + + let redundancy = 3; + let query = ServiceProvision::new( + redundancy, + Uuid::parse_str(RACK_UUID).unwrap(), + crate::db::model::ServiceKind::Nexus, + ); + + pretty_assertions::assert_eq!( + diesel::debug_query::(&query).to_string(), + format!( + "WITH \ + \"sled_allocation_pool\" AS (\ + SELECT \ + \"sled\".\"id\" \ + FROM \"sled\" \ + WHERE (\ + (\"sled\".\"time_deleted\" IS NULL) AND \ + (\"sled\".\"rack_id\" = $1)\ + )\ + ), \ + \"previously_allocated_services\" AS (\ + SELECT \ + \"service\".\"id\", \ + \"service\".\"time_created\", \ + \"service\".\"time_modified\", \ + \"service\".\"sled_id\", \ + \"service\".\"ip\", \ + \"service\".\"kind\" \ + FROM \"service\" \ + WHERE (\ + (\"service\".\"kind\" = $2) AND \ + (\"service\".\"sled_id\" = \ + ANY(SELECT \"sled_allocation_pool\".\"id\" FROM \"sled_allocation_pool\")\ + )\ + )\ + ), \ + \"old_service_count\" AS (\ + SELECT COUNT(*) FROM \"previously_allocated_services\"\ + ), \ + \"new_service_count\" AS (\ + SELECT (\ + greatest(\ + $3, \ + (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $4)\ + ) - (SELECT \"old_service_count\".\"count\" FROM \"old_service_count\" LIMIT $5)\ + ) \ + AS count\ + ), \ + \"candidate_sleds\" AS (\ + SELECT \ + \"sled_allocation_pool\".\"id\" \ + FROM \"sled_allocation_pool\" \ + WHERE (\ + \"sled_allocation_pool\".\"id\" != ALL(\ + SELECT \ + \"previously_allocated_services\".\"sled_id\" \ + FROM \"previously_allocated_services\"\ + )\ + ) \ + LIMIT SELECT * FROM new_service_count\ + ), \ + \"new_internal_ips\" AS (\ + UPDATE \ + \"sled\" \ + SET \ + \"last_used_address\" = (\"sled\".\"last_used_address\" + $6) \ + WHERE \ + (\"sled\".\"id\" = ANY(SELECT \"candidate_sleds\".\"id\" FROM \"candidate_sleds\")) \ + RETURNING \ + \"sled\".\"id\", \ + \"sled\".\"last_used_address\"\ + ), \ + \"candidate_services\" AS (\ + SELECT \ + gen_random_uuid() AS id, \ + now() AS time_created, \ + now() AS time_modified, \ + \"candidate_sleds\".\"id\" AS sled_id, \ + \"new_internal_ips\".\"last_used_address\" AS ip, \ + $7 AS kind \ + FROM (\ + \"candidate_sleds\" \ + INNER JOIN \ + \"new_internal_ips\" \ + ON (\ + \"candidate_sleds\".\"id\" = \"new_internal_ips\".\"id\"\ + ))\ + ), \ + \"inserted_services\" AS (\ + INSERT INTO \"service\" \ + (\"id\", \"time_created\", \"time_modified\", \"sled_id\", \"ip\", \"kind\") \ + SELECT \ + \"candidate_services\".\"id\", \ + \"candidate_services\".\"time_created\", \ + \"candidate_services\".\"time_modified\", \ + \"candidate_services\".\"sled_id\", \ + \"candidate_services\".\"ip\", \ + \"candidate_services\".\"kind\" \ + FROM \"candidate_services\" \ + RETURNING \ + \"service\".\"id\", \ + \"service\".\"time_created\", \ + \"service\".\"time_modified\", \ + \"service\".\"sled_id\", \ + \"service\".\"ip\", \"service\".\"kind\"\ + ) \ + (\ + SELECT \ + \"previously_allocated_services\".\"id\", \ + \"previously_allocated_services\".\"time_created\", \ + \"previously_allocated_services\".\"time_modified\", \ + \"previously_allocated_services\".\"sled_id\", \ + \"previously_allocated_services\".\"ip\", \ + \"previously_allocated_services\".\"kind\" \ + FROM \"previously_allocated_services\"\ + ) UNION \ + (\ + SELECT \ + \"inserted_services\".\"id\", \ + \"inserted_services\".\"time_created\", \ + \"inserted_services\".\"time_modified\", \ + \"inserted_services\".\"sled_id\", \ + \"inserted_services\".\"ip\", \ + \"inserted_services\".\"kind\" \ + FROM \"inserted_services\"\ + ) -- binds: [{RACK_UUID}, Nexus, {redundancy}, 1, 1, 1, Nexus]", + ), + ); + + context.success().await; + } +} diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 692dd82b9b6..307a3f02941 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -7,8 +7,9 @@ use crate::context::OpContext; use crate::ServerContext; use super::params::{ - DatasetPutRequest, DatasetPutResponse, OximeterInfo, ServicePutRequest, - SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, + DatasetPutRequest, DatasetPutResponse, OximeterInfo, + RackInitializationRequest, SledAgentStartupInfo, ZpoolPutRequest, + ZpoolPutResponse, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -101,15 +102,15 @@ struct RackPathParam { async fn rack_initialization_complete( rqctx: Arc>>, path_params: Path, - info: TypedBody>, + info: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; let path = path_params.into_inner(); - let svcs = info.into_inner(); + let request = info.into_inner(); let opctx = OpContext::for_internal_api(&rqctx).await; - nexus.rack_initialize(&opctx, path.rack_id, svcs).await?; + nexus.rack_initialize(&opctx, path.rack_id, request).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index a29c02cfb14..660c16fde4f 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -65,23 +65,27 @@ pub fn run_openapi_internal() -> Result<(), String> { .map_err(|e| e.to_string()) } -/// Packages up a [`Nexus`], running both external and internal HTTP API servers -/// wired up to Nexus -pub struct Server { +/// A partially-initialized Nexus server, which exposes an internal interface, +/// but is not ready to receive external requests. +pub struct InternalServer<'a> { /// shared state used by API request handlers pub apictx: Arc, - /// dropshot servers for external API - pub http_servers_external: Vec>>, /// dropshot server for internal API pub http_server_internal: dropshot::HttpServer>, + + config: &'a Config, + log: Logger, } -impl Server { - /// Start a nexus server. +impl<'a> InternalServer<'a> { + /// Creates a Nexus instance with only the internal API exposed. + /// + /// This is often used as an argument when creating a [`Server`], + /// which also exposes the external API. pub async fn start( - config: &Config, + config: &'a Config, log: &Logger, - ) -> Result { + ) -> Result, String> { let log = log.new(o!("name" => config.deployment.id.to_string())); info!(log, "setting up nexus server"); @@ -91,15 +95,46 @@ impl Server { ServerContext::new(config.deployment.rack_id, ctxlog, &config) .await?; - // Launch the internal server. - let server_starter_internal = dropshot::HttpServerStarter::new( + let http_server_starter_internal = dropshot::HttpServerStarter::new( &config.deployment.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), ) .map_err(|error| format!("initializing internal server: {}", error))?; - let http_server_internal = server_starter_internal.start(); + let http_server_internal = http_server_starter_internal.start(); + + Ok(Self { apictx, http_server_internal, config, log }) + } +} + +/// Packages up a [`Nexus`], running both external and internal HTTP API servers +/// wired up to Nexus +pub struct Server { + /// shared state used by API request handlers + pub apictx: Arc, + /// dropshot servers for external API + pub http_servers_external: Vec>>, + /// dropshot server for internal API + pub http_server_internal: dropshot::HttpServer>, +} + +impl Server { + pub async fn start(internal: InternalServer<'_>) -> Result { + let apictx = internal.apictx; + let http_server_internal = internal.http_server_internal; + let log = internal.log; + let config = internal.config; + + // Wait until RSS handoff completes. + let opctx = apictx.nexus.opctx_for_service_balancer(); + apictx.nexus.await_rack_initialization(&opctx).await; + + // With the exception of integration tests environments, + // we expect background tasks to be enabled. + if config.pkg.tunables.enable_background_tasks { + apictx.nexus.start_background_tasks().map_err(|e| e.to_string())?; + } // Launch the external server(s). let http_servers_external = config @@ -178,7 +213,8 @@ pub async fn run_server(config: &Config) -> Result<(), String> { } else { debug!(log, "registered DTrace probes"); } - let server = Server::start(config, &log).await?; + let internal_server = InternalServer::start(config, &log).await?; + let server = Server::start(internal_server).await?; server.register_as_producer().await; server.wait_for_finish().await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 82b01c3a0ff..d80b31c6fea 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -111,15 +111,42 @@ pub async fn test_setup_with_config( .expect("Tests expect to set a port of Clickhouse") .set_port(clickhouse.port()); - let server = - omicron_nexus::Server::start(&config, &logctx.log).await.unwrap(); - server + // Start the Nexus internal API. + let internal_server = + omicron_nexus::InternalServer::start(&config, &logctx.log) + .await + .unwrap(); + internal_server .apictx .nexus .wait_for_populate() .await .expect("Nexus never loaded users"); + // Perform the "handoff from RSS". + // + // However, RSS isn't running, so we'll do the handoff ourselves. + let opctx = internal_server.apictx.nexus.opctx_for_service_balancer(); + internal_server + .apictx + .nexus + .rack_initialize( + &opctx, + config.deployment.rack_id, + // NOTE: In the context of this test utility, we arguably do have an + // instance of CRDB and Nexus running. However, as this info isn't + // necessary for most tests, we pass no information here. + omicron_nexus::internal_api::params::RackInitializationRequest { + services: vec![], + datasets: vec![], + }, + ) + .await + .expect("Could not initialize rack"); + + // Start the Nexus external API. + let server = omicron_nexus::Server::start(internal_server).await.unwrap(); + let testctx_external = ClientTestContext::new( server.http_servers_external[0].local_addr(), logctx.log.new(o!("component" => "external client test context")), diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 59c8dc4614d..b25a78d669c 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -34,6 +34,8 @@ address = "[::1]:0" [tunables] # Allow small subnets, so we can test IP address exhaustion easily / quickly max_vpc_ipv4_subnet_prefix = 29 +# Disable background tests to help with test determinism +enable_background_tasks = false [deployment] # Identifier for this instance of Nexus. diff --git a/nexus/tests/integration_tests/datasets.rs b/nexus/tests/integration_tests/datasets.rs index 42b0d48a847..721e9f6f382 100644 --- a/nexus/tests/integration_tests/datasets.rs +++ b/nexus/tests/integration_tests/datasets.rs @@ -8,7 +8,7 @@ use omicron_common::api::external::ByteCount; use omicron_nexus::internal_api::params::{ DatasetKind, DatasetPutRequest, ZpoolPutRequest, }; -use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::net::{Ipv6Addr, SocketAddrV6}; use uuid::Uuid; use nexus_test_utils::{ControlPlaneTestContext, SLED_AGENT_UUID}; @@ -36,8 +36,7 @@ async fn test_dataset_put_success(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; let dataset_id = Uuid::new_v4(); @@ -69,8 +68,7 @@ async fn test_dataset_put_bad_zpool_returns_not_found( let dataset_put_url = format!("/zpools/{}/dataset/{}", zpool_id, dataset_id); - let address = - SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080); + let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); let kind = DatasetKind::Crucible; let request = DatasetPutRequest { address, kind }; diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 098059fbbba..9f738efd244 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -7,6 +7,7 @@ use omicron_common::api::external::ByteCount; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt; +use std::net::IpAddr; use std::net::Ipv6Addr; use std::net::SocketAddr; use std::net::SocketAddrV6; @@ -93,7 +94,7 @@ impl FromStr for DatasetKind { pub struct DatasetPutRequest { /// Address on which a service is responding to requests for the /// dataset. - pub address: SocketAddr, + pub address: SocketAddrV6, /// Type of dataset being inserted. pub kind: DatasetKind, @@ -122,10 +123,16 @@ pub struct DatasetPutResponse { #[derive( Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq, )] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "snake_case", tag = "type", content = "content")] pub enum ServiceKind { InternalDNS, - Nexus, + Nexus { + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): + // While it's true that Nexus will only run with a single address, + // we want to convey information about the available pool of addresses + // when handing off from RSS -> Nexus. + external_address: IpAddr, + }, Oximeter, Dendrite, Tfport, @@ -136,7 +143,7 @@ impl fmt::Display for ServiceKind { use ServiceKind::*; let s = match self { InternalDNS => "internal_dns", - Nexus => "nexus", + Nexus { .. } => "nexus", Oximeter => "oximeter", Dendrite => "dendrite", Tfport => "tfport", @@ -145,24 +152,6 @@ impl fmt::Display for ServiceKind { } } -impl FromStr for ServiceKind { - type Err = omicron_common::api::external::Error; - - fn from_str(s: &str) -> Result { - use ServiceKind::*; - match s { - "nexus" => Ok(Nexus), - "oximeter" => Ok(Oximeter), - "internal_dns" => Ok(InternalDNS), - "dendrite" => Ok(Dendrite), - "tfport" => Ok(Tfport), - _ => Err(Self::Err::InternalError { - internal_message: format!("Unknown service kind: {}", s), - }), - } - } -} - /// Describes a service on a sled #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct ServicePutRequest { @@ -176,6 +165,26 @@ pub struct ServicePutRequest { pub kind: ServiceKind, } +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct DatasetCreateRequest { + pub zpool_id: Uuid, + pub dataset_id: Uuid, + pub request: DatasetPutRequest, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct RackInitializationRequest { + pub services: Vec, + pub datasets: Vec, + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): + // While it's true that Nexus will only run with a single address, + // we want to convey information about the available pool of addresses + // when handing off from RSS -> Nexus. + + // TODO(https://github.com/oxidecomputer/omicron/issues/1528): + // Support passing x509 cert info. +} + /// Message used to notify Nexus that this oximeter instance is up and running. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize)] pub struct OximeterInfo { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index e5864ca4e83..b92bfab9fce 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -255,11 +255,7 @@ "content": { "application/json": { "schema": { - "title": "Array_of_ServicePutRequest", - "type": "array", - "items": { - "$ref": "#/components/schemas/ServicePutRequest" - } + "$ref": "#/components/schemas/RackInitializationRequest" } } }, @@ -704,6 +700,27 @@ "value" ] }, + "DatasetCreateRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "format": "uuid" + }, + "request": { + "$ref": "#/components/schemas/DatasetPutRequest" + }, + "zpool_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dataset_id", + "request", + "zpool_id" + ] + }, "DatasetKind": { "description": "Describes the purpose of the dataset.", "type": "string", @@ -1800,6 +1817,27 @@ } ] }, + "RackInitializationRequest": { + "type": "object", + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetCreateRequest" + } + }, + "services": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ServicePutRequest" + } + } + }, + "required": [ + "datasets", + "services" + ] + }, "Sample": { "description": "A concrete type representing a single, timestamped measurement from a timeseries.", "type": "object", @@ -1832,13 +1870,90 @@ }, "ServiceKind": { "description": "Describes the purpose of the service.", - "type": "string", - "enum": [ - "internal_d_n_s", - "nexus", - "oximeter", - "dendrite", - "tfport" + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internal_d_n_s" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "content": { + "type": "object", + "properties": { + "external_address": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "external_address" + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "content", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "dendrite" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "tfport" + ] + } + }, + "required": [ + "type" + ] + } ] }, "ServicePutRequest": { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 8b9bdc64d52..a6d8a2b6920 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -390,6 +390,33 @@ } } } + }, + "/zpools": { + "get": { + "operationId": "zpools_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Zpool", + "type": "array", + "items": { + "$ref": "#/components/schemas/Zpool" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } } }, "components": { @@ -1834,6 +1861,18 @@ "required": [ "rules" ] + }, + "Zpool": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "id" + ] } } } diff --git a/services.sql b/services.sql new file mode 100644 index 00000000000..eeccaf53d1d --- /dev/null +++ b/services.sql @@ -0,0 +1,427 @@ +/* + * + * TAKEN FROM DBINIT.SQL + * + */ + +/* dbwipe.sql */ +CREATE DATABASE IF NOT EXISTS omicron; +CREATE USER IF NOT EXISTS omicron; +ALTER DEFAULT PRIVILEGES FOR ROLE root REVOKE ALL ON TABLES FROM omicron; +DROP DATABASE IF EXISTS omicron; +DROP USER IF EXISTS omicron; + +/* dbinit.sql */ +CREATE DATABASE IF NOT EXISTS omicron; +CREATE USER IF NOT EXISTS omicron; +ALTER DEFAULT PRIVILEGES GRANT INSERT, SELECT, UPDATE, DELETE ON TABLES to omicron; + +set disallow_full_table_scans = on; +set large_full_scan_rows = 0; + +/* + * Racks + */ +CREATE TABLE omicron.public.rack ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + + /* + * Identifies if rack management has been transferred from RSS -> Nexus. + * If "false", RSS is still managing sleds, services, and DNS records. + * + * This value is set to "true" when RSS calls the + * "rack_initialization_complete" endpoint on Nexus' internal interface. + * + * See RFD 278 for more detail. + */ + initialized BOOL NOT NULL, + + /* Used to configure the updates service URL */ + tuf_base_url STRING(512) +); + +/* + * Sleds + */ + +CREATE TABLE omicron.public.sled ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + rcgen INT NOT NULL, + + /* FK into the Rack table */ + rack_id UUID NOT NULL, + + /* Idenfities if this Sled is a Scrimlet */ + is_scrimlet BOOL NOT NULL, + + /* The IP address and bound port of the sled agent server. */ + ip INET NOT NULL, + port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + + /* The last address allocated to an Oxide service on this sled. */ + last_used_address INET NOT NULL +); + +/* Add an index which lets us look up the sleds on a rack */ +CREATE INDEX ON omicron.public.sled ( + rack_id +) WHERE + time_deleted IS NULL; + +CREATE INDEX ON omicron.public.sled ( + id +) WHERE + time_deleted IS NULL; + +/* + * Services + */ + +CREATE TYPE omicron.public.service_kind AS ENUM ( + 'internal_dns', + 'nexus', + 'oximeter' +); + +CREATE TABLE omicron.public.service ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + + /* FK into the Sled table */ + sled_id UUID NOT NULL, + /* The IP address of the service. */ + ip INET NOT NULL, + /* Indicates the type of service. */ + kind omicron.public.service_kind NOT NULL +); + +/* Add an index which lets us look up the services on a sled */ +CREATE INDEX ON omicron.public.service ( + sled_id, + kind +); + +/* Add an index which lets us look up services of a particular kind on a sled */ +CREATE INDEX ON omicron.public.service ( + kind +); + +/* + * Additional context for services of "kind = nexus" + * This table should be treated as an optional extension + * of the service table itself. + */ +CREATE TABLE omicron.public.nexus_service ( + id UUID PRIMARY KEY, + + /* FK to the service table */ + service_id UUID NOT NULL, + /* FK to the instance_external_ip table */ + external_ip_id UUID NOT NULL, + /* FK to the nexus_certificate table */ + certificate_id UUID NOT NULL +); + +/* + * Information about x509 certificates used to serve Nexus' external interface. + * These certificates may be used by multiple instantiations of the Nexus + * service simultaneously. + */ +CREATE TABLE omicron.public.nexus_certificate ( + id UUID PRIMARY KEY, + public_cert BYTES NOT NULL, + private_key BYTES NOT NULL +); + +/* + * ZPools of Storage, attached to Sleds. + * Typically these are backed by a single physical disk. + */ +CREATE TABLE omicron.public.Zpool ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + rcgen INT NOT NULL, + + /* FK into the Sled table */ + sled_id UUID NOT NULL, + + /* TODO: Could also store physical disk FK here */ + + total_size INT NOT NULL +); + +CREATE TYPE omicron.public.dataset_kind AS ENUM ( + 'crucible', + 'cockroach', + 'clickhouse' +); + +/* + * A dataset of allocated space within a zpool. + */ +CREATE TABLE omicron.public.Dataset ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + rcgen INT NOT NULL, + + /* FK into the Pool table */ + pool_id UUID NOT NULL, + + /* Contact information for the dataset */ + ip INET NOT NULL, + port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + + kind omicron.public.dataset_kind NOT NULL, + + /* An upper bound on the amount of space that is allowed to be in-use */ + quota INT NOT NULL, + reservation INT NOT NULL, + + /* An upper bound on the amount of space that might be in-use */ + size_used INT, + + /* A quota smaller than a reservation would reserve unusable space */ + CONSTRAINT reservation_less_than_or_equal_to_quota CHECK ( + reservation <= quota + ), + + /* Crucible must make use of 'size_used'; other datasets manage their own storage */ + CONSTRAINT size_used_column_set_for_crucible CHECK ( + (kind != 'crucible') OR + (kind = 'crucible' AND size_used IS NOT NULL) + ), + + /* Validate that the size usage is less than the quota */ + CONSTRAINT size_used_less_than_or_equal_to_quota CHECK ( + (size_used IS NULL) OR + (size_used IS NOT NULL AND size_used <= quota) + ) +); + +/* Create an index on the size usage for Crucible's allocation */ +CREATE INDEX on omicron.public.Dataset ( + size_used +) WHERE size_used IS NOT NULL AND time_deleted IS NULL AND kind = 'crucible'; + +/* Create an index on the size usage for any dataset */ +CREATE INDEX on omicron.public.Dataset ( + size_used +) WHERE size_used IS NOT NULL AND time_deleted IS NULL; + +-- TODO: Obviously, there's more stuff here. But this is a proxy. +CREATE TABLE omicron.public.external_ip ( + id UUID PRIMARY KEY +); + + +/* + * + * TEST DATA + * + */ + +-- Add a rack +INSERT INTO omicron.public.rack (id, time_created, time_modified, initialized, tuf_base_url) VALUES + ( + '11111111-aaaa-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + TRUE, + NULL + ); + +-- Add some sleds (aaaa / bbbb are gimlets, cccc is scrimlet) +INSERT INTO omicron.public.sled (id, time_created, time_modified, time_deleted, rcgen, rack_id, is_scrimlet, ip, port, last_used_address) VALUES + ( + '22222222-aaaa-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + NULL, + 0, + '11111111-aaaa-407e-aa8d-602ed78f38be', + false, + '127.0.0.1', + 0, + '127.0.0.1' + ), + ( + '22222222-bbbb-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + NULL, + 0, + '11111111-aaaa-407e-aa8d-602ed78f38be', + false, + '127.0.0.1', + 0, + '127.0.100.1' + ), + ( + '22222222-cccc-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + NULL, + 0, + '11111111-aaaa-407e-aa8d-602ed78f38be', + true, + '127.0.0.1', + 0, + '127.0.200.1' + ); + +INSERT INTO omicron.public.service (id, time_created, time_modified, sled_id, ip, kind) VALUES + ( + '33333333-aaaa-407e-aa8d-602ed78f38be', + TIMESTAMPTZ '2016-03-26', + TIMESTAMPTZ '2016-03-26', + '22222222-aaaa-407e-aa8d-602ed78f38be', + '127.0.0.1', + 'nexus' + ); + +/* + * CTE: Allocate a particular service within a rack. + * + * Inputs: Rack ID, service type, desired count + */ + +WITH + -- Find all allocation targets. + -- This includes sleds which may already be running the service, + -- and sleds which could run the service in the future. + sled_allocation_pool AS ( + SELECT + omicron.public.sled.id + FROM + omicron.public.sled + WHERE + omicron.public.sled.time_deleted IS NULL AND + -- XXX: Constraints can be user-supplied? + omicron.public.sled.rack_id = '11111111-aaaa-407e-aa8d-602ed78f38be' + ), + + -- Get all services which already have been allocated from this pool. + previously_allocated_services AS ( + SELECT + omicron.public.service.id, + omicron.public.service.time_created, + omicron.public.service.time_modified, + omicron.public.service.sled_id, + omicron.public.service.ip, + omicron.public.service.kind + FROM + omicron.public.service + WHERE + -- XXX: 'nexus' is the name of this particular service + omicron.public.service.kind = 'nexus' AND + omicron.public.service.sled_id IN (SELECT id FROM sled_allocation_pool) + ), + + -- Calculate how many services we already have + old_service_count AS ( + SELECT COUNT(1) FROM previously_allocated_services + ), + -- Calculate the number of new services we need + new_service_count AS ( + -- XXX: 3 is the user-supplied redundancy + SELECT (greatest(3, (SELECT old_service_count.count FROM old_service_count)) + - (SELECT old_service_count.count FROM old_service_count)) + ), + + -- Get allocation candidates from the pool, as long as they don't already + -- have the service. + candidate_sleds AS ( + SELECT + sled_allocation_pool.id + FROM + sled_allocation_pool + WHERE + sled_allocation_pool.id NOT IN (SELECT sled_id FROM previously_allocated_services) + LIMIT (SELECT * FROM new_service_count) + ), + + -- Allocate an internal IP address for the service + new_internal_ips AS ( + UPDATE omicron.public.sled + SET + last_used_address = last_used_address + 1 + WHERE + omicron.public.sled.id in (SELECT id from candidate_sleds) + RETURNING + omicron.public.sled.id as sled_id, + omicron.public.sled.last_used_address as ip + ), + + -- TODO: External IPs??? + + -- TODO: This fails; data-modifying statements must be at a top level. +-- new_external_ips AS ( +-- WITH +-- new_ips AS ( +-- INSERT INTO omicron.public.external_ip (id) VALUES ( +-- gen_random_uuid() +-- ) +-- RETURNING * +-- ) +-- SELECT * FROM (SELECT * FROM new_ips) +-- ), + + -- Construct the services we want to insert + candidate_services AS ( + SELECT + gen_random_uuid() as id, + now() as time_created, + now() as time_modified, + candidate_sleds.id as sled_id, + new_internal_ips.ip as ip, + -- XXX service type + CAST('nexus' AS omicron.public.service_kind) as kind + FROM + candidate_sleds + INNER JOIN + new_internal_ips + ON + candidate_sleds.id = new_internal_ips.sled_id + ), + + inserted_services AS ( + INSERT INTO omicron.public.service + ( + -- XXX: "SELECT *" isn't currently possible with Diesel... + -- ... but it *COULD* be, when the source is a CTE Query! + SELECT * + FROM candidate_services + ) + RETURNING * + ) + ( + SELECT + -- XXX: Do we care about the new/not new distinction? + FALSE as new, + * + FROM previously_allocated_services + UNION + SELECT + TRUE as new, + * + FROM inserted_services + ); + +set disallow_full_table_scans = off; + +-- SELECT * FROM omicron.public.Sled; diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index ffa88dbcda3..368d5be1057 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -15,6 +15,7 @@ use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig; use omicron_sled_agent::sp::SimSpConfig; use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; use std::path::PathBuf; +use uuid::Uuid; #[derive(Debug, Parser)] #[clap( @@ -98,7 +99,9 @@ async fn do_run() -> Result<(), CmdError> { // Configure and run the Bootstrap server. let bootstrap_config = BootstrapConfig { - id: config.id, + // NOTE: The UUID of this bootstrap server is not stable across + // reboots. + id: Uuid::new_v4(), bind_address: bootstrap_address, log: config.log.clone(), rss_config, diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index d44e5e9a299..457db638e2d 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -136,7 +136,6 @@ impl Agent { ) -> Result<(Self, TrustQuorumMembership), BootstrapError> { let ba_log = log.new(o!( "component" => "BootstrapAgent", - "server" => sled_config.id.to_string(), )); // We expect this directory to exist - ensure that it does, before any @@ -238,7 +237,14 @@ impl Agent { // Server already exists, return it. info!(&self.log, "Sled Agent already loaded"); - if &server.address().ip() != sled_address.ip() { + if server.id() != request.id { + let err_str = format!( + "Sled Agent already running with UUID {}, but {} was requested", + server.id(), + request.id, + ); + return Err(BootstrapError::SledError(err_str)); + } else if &server.address().ip() != sled_address.ip() { let err_str = format!( "Sled Agent already running on address {}, but {} was requested", server.address().ip(), @@ -280,6 +286,7 @@ impl Agent { let server = SledServer::start( &self.sled_config, self.parent_log.clone(), + request.id, sled_address, is_scrimlet, request.clone(), @@ -326,7 +333,7 @@ impl Agent { // indicating which kind of address we're advertising). self.ddmd_client.advertise_prefix(request.subnet); - Ok(SledAgentResponse { id: self.sled_config.id }) + Ok(SledAgentResponse { id: request.id }) } /// Communicates with peers, sharing secrets, until the rack has been diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 42acba7562a..f4871ba8b5a 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -87,7 +87,6 @@ impl Server { info!(log, "detecting (real or simulated) SP"); let sp = SpHandle::detect( config.sp_config.as_ref().map(|c| &c.local_sp), - &sled_config, &log, ) .await diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index e1042f3b4bd..fe1b5c57764 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -10,13 +10,10 @@ use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; use serde::Deserialize; use std::path::{Path, PathBuf}; -use uuid::Uuid; /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { - /// Unique id for the sled - pub id: Uuid, /// Configuration for the sled agent debug log pub log: ConfigLogging, /// Optional VLAN ID to be used for tagging guest VNICs. diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index e2e8bae0cd2..07db4b46e42 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -7,7 +7,7 @@ use crate::params::{ DatasetEnsureBody, DiskEnsureBody, InstanceEnsureBody, InstanceSerialConsoleData, InstanceSerialConsoleRequest, ServiceEnsureBody, - VpcFirewallRulesEnsureBody, + VpcFirewallRulesEnsureBody, Zpool, }; use crate::serial::ByteOffset; use dropshot::{ @@ -33,6 +33,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(services_put)?; + api.register(zpools_get)?; api.register(filesystem_put)?; api.register(instance_put)?; api.register(disk_put)?; @@ -66,6 +67,17 @@ async fn services_put( Ok(HttpResponseUpdatedNoContent()) } +#[endpoint { + method = GET, + path = "/zpools", +}] +async fn zpools_get( + rqctx: Arc>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.zpools_get().await.map_err(|e| Error::from(e))?)) +} + #[endpoint { method = PUT, path = "/filesystem", diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index e9b1eeccf72..c46a596b3d8 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -229,6 +229,11 @@ pub struct InstanceSerialConsoleData { pub last_byte_offset: u64, } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct Zpool { + pub id: Uuid, +} + // The type of networking 'ASIC' the Dendrite service is expected to manage #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Copy, Hash, diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 576344d6f9d..48f1f1ab1c9 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -12,7 +12,7 @@ use omicron_common::address::{ }; use serde::Deserialize; use serde::Serialize; -use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv6Addr}; use std::path::Path; /// Configuration for the "rack setup service", which is controlled during @@ -29,9 +29,6 @@ use std::path::Path; pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, - #[serde(default, rename = "request")] - pub requests: Vec, - /// The minimum number of sleds required to unlock the rack secret. /// /// If this value is less than 2, no rack secret will be created on startup; @@ -40,22 +37,11 @@ pub struct SetupServiceConfig { /// Internet gateway information. pub gateway: Gateway, -} - -/// A request to initialize a sled. -#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct HardcodedSledRequest { - /// Datasets to be created. - #[serde(default, rename = "dataset")] - pub datasets: Vec, - - /// Services to be instantiated. - #[serde(default, rename = "service_zone")] - pub service_zones: Vec, - /// DNS Services to be instantiated. - #[serde(default, rename = "dns_service")] - pub dns_services: Vec, + /// The address on which Nexus should serve an external interface. + // TODO(https://github.com/oxidecomputer/omicron/issues/1530): Eventually, + // this should be pulled from a pool of addresses. + pub nexus_external_address: IpAddr, } impl SetupServiceConfig { @@ -90,9 +76,9 @@ mod test { fn test_subnets() { let cfg = SetupServiceConfig { rack_subnet: "fd00:1122:3344:0100::".parse().unwrap(), - requests: vec![], rack_secret_threshold: 0, gateway: Gateway { address: None, mac: macaddr::MacAddr6::nil() }, + nexus_external_address: "192.168.1.20".parse().unwrap(), }; assert_eq!( diff --git a/sled-agent/src/rack_setup/mod.rs b/sled-agent/src/rack_setup/mod.rs index e947ff99ef0..4df85a7727f 100644 --- a/sled-agent/src/rack_setup/mod.rs +++ b/sled-agent/src/rack_setup/mod.rs @@ -5,4 +5,5 @@ //! Rack Setup Service pub mod config; +mod plan; pub mod service; diff --git a/sled-agent/src/rack_setup/plan/mod.rs b/sled-agent/src/rack_setup/plan/mod.rs new file mode 100644 index 00000000000..2343a3be2e6 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/mod.rs @@ -0,0 +1,8 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Rack Setup Service plan generation + +pub mod service; +pub mod sled; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs new file mode 100644 index 00000000000..5db1be965c2 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -0,0 +1,294 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "where should services be initialized". + +use crate::params::{DatasetEnsureBody, ServiceType, ServiceZoneRequest}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + RSS_RESERVED_ADDRESSES, +}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use serde::{Deserialize, Serialize}; +use sled_agent_client::{ + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, +}; +use slog::Logger; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + +// The number of CRDB instances to create from RSS. +const CRDB_COUNT: usize = 1; + +fn rss_service_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-service-plan.toml") +} + +/// Describes errors which may occur while generating a plan for services. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + + #[error("Error making HTTP request to Sled Agent: {0}")] + SledApi(#[from] SledAgentError), + + #[error("Error initializing sled via sled-agent: {0}")] + SledInitialization(String), + + #[error("Failed to construct an HTTP client: {0}")] + HttpClient(reqwest::Error), +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub services: HashMap, +} + +impl Plan { + pub async fn load(log: &Logger) -> Result, PlanError> { + // If we already created a plan for this RSS to allocate + // services to sleds, re-use that existing plan. + let rss_service_plan_path = rss_service_plan_path(); + if rss_service_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_service_plan_path) + .await + .map_err(|err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_service_plan_path:?}" + ), + err, + })?, + ) + .map_err(|err| PlanError::Toml { + path: rss_service_plan_path, + err, + })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + // Gets a zpool UUID from the sled. + async fn get_a_zpool_from_sled( + log: &Logger, + address: SocketAddrV6, + ) -> Result { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .map_err(PlanError::HttpClient)?; + let client = SledAgentClient::new_with_client( + &format!("http://{}", address), + client, + log.new(o!("SledAgentClient" => address.to_string())), + ); + + let get_zpools = || async { + let zpools: Vec = client + .zpools_get() + .await + .map(|response| { + response + .into_inner() + .into_iter() + .map(|zpool| zpool.id) + .collect() + }) + .map_err(|err| { + BackoffError::transient(PlanError::SledApi(err)) + })?; + + if zpools.is_empty() { + return Err(BackoffError::transient( + PlanError::SledInitialization( + "Awaiting zpools".to_string(), + ), + )); + } + + Ok(zpools) + }; + let log_failure = |error, _| { + warn!(log, "failed to get zpools"; "error" => ?error); + }; + let zpools = + retry_notify(internal_service_policy(), get_zpools, log_failure) + .await?; + + Ok(zpools[0]) + } + + pub async fn create( + log: &Logger, + config: &Config, + sled_addrs: &Vec, + ) -> Result { + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); + + let mut allocations = vec![]; + + for idx in 0..sled_addrs.len() { + let sled_address = sled_addrs[idx]; + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceZoneRequest { + id: Uuid::new_v4(), + zone_name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + services: vec![ServiceType::Nexus { + internal_ip: address, + external_ip: config.nexus_external_address, + }], + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < CRDB_COUNT { + let zpool_id = + Self::get_a_zpool_from_sled(log, sled_address).await?; + + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_id, + dataset_kind: crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], + }, + address, + }); + } + + // The first enumerated sleds get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); + request.dns_services.push(ServiceZoneRequest { + id: Uuid::new_v4(), + zone_name: "internal-dns".to_string(), + addresses: vec![dns_addr], + gz_addresses: vec![dns_subnet.gz_address().ip()], + services: vec![ServiceType::InternalDns { + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), + }], + }); + } + + allocations.push((sled_address, request)); + } + + let mut services = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + services.insert(addr, allocation); + } + + let plan = Self { services }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_service_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS service plan to {path:?}"), + err, + } + })?; + info!(log, "Service plan written to storage"); + + Ok(plan) + } +} + +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +// TODO: Could exist in another file? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { last_addr: sled_addr } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } +} diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs new file mode 100644 index 00000000000..cde70deb690 --- /dev/null +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -0,0 +1,245 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Plan generation for "how should sleds be initialized". + +use crate::bootstrap::{ + config::BOOTSTRAP_AGENT_PORT, + params::SledAgentRequest, + trust_quorum::{RackSecret, ShareDistribution}, +}; +use crate::rack_setup::config::SetupServiceConfig as Config; +use serde::{Deserialize, Serialize}; +use slog::Logger; +use sprockets_host::Ed25519Certificate; +use std::collections::HashMap; +use std::net::{Ipv6Addr, SocketAddrV6}; +use std::path::{Path, PathBuf}; +use thiserror::Error; +use uuid::Uuid; + +fn rss_sled_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH).join("rss-sled-plan.toml") +} + +pub fn generate_rack_secret<'a>( + rack_secret_threshold: usize, + member_device_id_certs: &'a [Ed25519Certificate], + log: &Logger, +) -> Result< + Option + 'a>, + PlanError, +> { + // We do not generate a rack secret if we only have a single sled or if our + // config specifies that the threshold for unlock is only a single sled. + let total_shares = member_device_id_certs.len(); + if total_shares <= 1 { + info!(log, "Skipping rack secret creation (only one sled present)"); + return Ok(None); + } + + if rack_secret_threshold <= 1 { + warn!( + log, + concat!( + "Skipping rack secret creation due to config", + " (despite discovery of {} bootstrap agents)" + ), + total_shares, + ); + return Ok(None); + } + + let secret = RackSecret::new(); + let (shares, verifier) = secret + .split(rack_secret_threshold, total_shares) + .map_err(PlanError::SplitRackSecret)?; + + Ok(Some(shares.into_iter().map(move |share| ShareDistribution { + threshold: rack_secret_threshold, + verifier: verifier.clone(), + share, + member_device_id_certs: member_device_id_certs.to_vec(), + }))) +} + +/// Describes errors which may occur while generating a plan for sleds. +#[derive(Error, Debug)] +pub enum PlanError { + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, + + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { path: PathBuf, err: toml::de::Error }, + + #[error("Failed to split rack secret: {0:?}")] + SplitRackSecret(vsss_rs::Error), +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Plan { + pub rack_id: Uuid, + pub sleds: HashMap, + // TODO: Consider putting the rack subnet here? This may be operator-driven + // in the future, so it should exist in the "plan". + // + // TL;DR: The more we decouple rom "rss-config.toml", the easier it'll be to + // switch to an operator-driven interface. +} + +impl Plan { + pub async fn load(log: &Logger) -> Result, PlanError> { + // If we already created a plan for this RSS to allocate + // subnets/requests to sleds, re-use that existing plan. + let rss_sled_plan_path = rss_sled_plan_path(); + if rss_sled_plan_path.exists() { + info!(log, "RSS plan already created, loading from file"); + + let plan: Self = toml::from_str( + &tokio::fs::read_to_string(&rss_sled_plan_path).await.map_err( + |err| PlanError::Io { + message: format!( + "Loading RSS plan {rss_sled_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| PlanError::Toml { path: rss_sled_plan_path, err })?; + Ok(Some(plan)) + } else { + Ok(None) + } + } + + pub async fn create( + log: &Logger, + config: &Config, + bootstrap_addrs: Vec, + ) -> Result { + let rack_id = Uuid::new_v4(); + + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + let allocations = bootstrap_addrs.map(|(idx, bootstrap_addr)| { + info!(log, "Creating plan for the sled at {:?}", bootstrap_addr); + let bootstrap_addr = + SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + + ( + bootstrap_addr, + SledAgentRequest { + id: Uuid::new_v4(), + subnet, + gateway: config.gateway.clone(), + rack_id, + }, + ) + }); + + info!(log, "Serializing plan"); + + let mut sleds = std::collections::HashMap::new(); + for (addr, allocation) in allocations { + sleds.insert(addr, allocation); + } + + let plan = Self { rack_id, sleds }; + + // Once we've constructed a plan, write it down to durable storage. + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); + let plan_str = toml::to_string(&serialized_plan) + .expect("Cannot turn config to string"); + + info!(log, "Plan serialized as: {}", plan_str); + let path = rss_sled_plan_path(); + tokio::fs::write(&path, plan_str).await.map_err(|err| { + PlanError::Io { + message: format!("Storing RSS sled plan to {path:?}"), + err, + } + })?; + info!(log, "Sled plan written to storage"); + + Ok(plan) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use omicron_test_utils::dev::test_setup_log; + use sprockets_common::certificates::Ed25519Signature; + use sprockets_common::certificates::KeyType; + use std::collections::HashSet; + + fn dummy_certs(n: usize) -> Vec { + vec![ + Ed25519Certificate { + subject_key_type: KeyType::DeviceId, + subject_public_key: sprockets_host::Ed25519PublicKey([0; 32]), + signer_key_type: KeyType::Manufacturing, + signature: Ed25519Signature([0; 64]), + }; + n + ] + } + + #[test] + fn test_generate_rack_secret() { + let logctx = test_setup_log("test_generate_rack_secret"); + + // No secret generated if we have <= 1 sled + assert!(generate_rack_secret(10, &dummy_certs(1), &logctx.log) + .unwrap() + .is_none()); + + // No secret generated if threshold <= 1 + assert!(generate_rack_secret(1, &dummy_certs(10), &logctx.log) + .unwrap() + .is_none()); + + // Secret generation fails if threshold > total sleds + assert!(matches!( + generate_rack_secret(10, &dummy_certs(5), &logctx.log), + Err(PlanError::SplitRackSecret(_)) + )); + + // Secret generation succeeds if threshold <= total shares and both are + // > 1, and the returned iterator satifies: + // + // * total length == total shares + // * each share is distinct + for total_shares in 2..=32 { + for threshold in 2..=total_shares { + let certs = dummy_certs(total_shares); + let shares = + generate_rack_secret(threshold, &certs, &logctx.log) + .unwrap() + .unwrap(); + + assert_eq!(shares.len(), total_shares); + + // `Share` doesn't implement `Hash`, but it's a newtype around + // `Vec` (which does). Unwrap the newtype to check that all + // shares are distinct. + let shares_set = shares + .map(|share_dist| share_dist.share.0) + .collect::>(); + assert_eq!(shares_set.len(), total_shares); + } + } + + logctx.cleanup_successful(); + } +} diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4997ce7f2fc..7f4b3135366 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,30 +4,44 @@ //! Rack Setup Service implementation -use super::config::{HardcodedSledRequest, SetupServiceConfig as Config}; -use crate::bootstrap::config::BOOTSTRAP_AGENT_PORT; -use crate::bootstrap::ddm_admin_client::{DdmAdminClient, DdmError}; -use crate::bootstrap::params::SledAgentRequest; -use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::bootstrap::trust_quorum::{RackSecret, ShareDistribution}; -use crate::params::{ServiceType, ServiceZoneRequest}; -use internal_dns_client::multiclient::{DnsError, Updater as DnsUpdater}; -use omicron_common::address::{ - get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, +use super::config::SetupServiceConfig as Config; +use crate::bootstrap::{ + ddm_admin_client::{DdmAdminClient, DdmError}, + rss_handle::BootstrapAgentHandle, }; +use crate::params::{DatasetEnsureBody, ServiceType, ServiceZoneRequest}; +use crate::rack_setup::plan::service::{ + Plan as ServicePlan, PlanError as ServicePlanError, +}; +use crate::rack_setup::plan::sled::{ + generate_rack_secret, Plan as SledPlan, PlanError as SledPlanError, +}; +use internal_dns_client::multiclient::{ + DnsError, Resolver as DnsResolver, Updater as DnsUpdater, +}; +use internal_dns_client::names::{ServiceName, SRV}; +use nexus_client::{ + types as NexusTypes, Client as NexusClient, Error as NexusError, +}; +use omicron_common::address::{get_sled_address, NEXUS_INTERNAL_PORT}; use omicron_common::backoff::{ - internal_service_policy, retry_notify, BackoffError, + internal_service_policy, internal_service_policy_with_max, retry_notify, + BackoffError, +}; +use sled_agent_client::{ + types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, }; -use serde::{Deserialize, Serialize}; use slog::Logger; use sprockets_host::Ed25519Certificate; use std::collections::{HashMap, HashSet}; use std::iter; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use thiserror::Error; use tokio::sync::OnceCell; -use uuid::Uuid; + +// The minimum number of sleds to initialize the rack. +const MINIMUM_SLED_COUNT: usize = 1; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] @@ -39,38 +53,37 @@ pub enum SetupServiceError { err: std::io::Error, }, + #[error("Cannot create plan for sled services: {0}")] + ServicePlan(#[from] ServicePlanError), + + #[error("Cannot create plan for sled setup: {0}")] + SledPlan(#[from] SledPlanError), + + #[error("Bad configuration for setting up rack: {0}")] + BadConfig(String), + #[error("Error initializing sled via sled-agent: {0}")] SledInitialization(String), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), + SledApi(#[from] SledAgentError), + + #[error("Error making HTTP request to Nexus: {0}")] + NexusApi(#[from] NexusError), #[error("Error contacting ddmd: {0}")] DdmError(#[from] DdmError), - #[error("Cannot deserialize TOML file at {path}: {err}")] - Toml { path: PathBuf, err: toml::de::Error }, - #[error("Failed to monitor for peers: {0}")] PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), - #[error("Failed to split rack secret: {0:?}")] - SplitRackSecret(vsss_rs::Error), - - #[error("Failed to access DNS servers: {0}")] + #[error("Failed to access DNS server: {0}")] Dns(#[from] DnsError), } -// The workload / information allocated to a single sled. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -struct SledAllocation { - initialization_request: SledAgentRequest, - services_request: HardcodedSledRequest, -} - /// The interface to the Rack Setup Service. pub struct RackSetupService { handle: tokio::task::JoinHandle>, @@ -123,14 +136,9 @@ impl RackSetupService { } } -fn rss_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan.toml") -} - -fn rss_completed_plan_path() -> std::path::PathBuf { - std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) - .join("rss-plan-completed.toml") +fn rss_completed_plan_path() -> PathBuf { + Path::new(omicron_common::OMICRON_CONFIG_PATH) + .join("rss-plan-completed.marker") } // Describes the options when awaiting for peers. @@ -164,20 +172,19 @@ impl ServiceInner { async fn initialize_datasets( &self, - sled_address: SocketAddr, - datasets: &Vec, + sled_address: SocketAddrV6, + datasets: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending dataset requests..."); @@ -188,14 +195,7 @@ impl ServiceInner { .filesystem_put(&dataset.clone().into()) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - sled_agent_client::Error< - sled_agent_client::types::Error, - >, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to create filesystem"; "error" => ?error); @@ -207,12 +207,35 @@ impl ServiceInner { ) .await?; } + + let mut records = HashMap::new(); + for dataset in datasets { + records + .entry(dataset.srv()) + .or_insert_with(Vec::new) + .push((dataset.aaaa(), dataset.address())); + } + let records_put = || async { + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) + .await?; + Ok(()) } async fn initialize_services( &self, - sled_address: SocketAddr, + sled_address: SocketAddrV6, services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); @@ -221,17 +244,17 @@ impl ServiceInner { .timeout(dur) .build() .map_err(SetupServiceError::HttpClient)?; - let client = sled_agent_client::Client::new_with_client( + let client = SledAgentClient::new_with_client( &format!("http://{}", sled_address), client, - self.log.new(o!("SledAgentClient" => sled_address)), + self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); info!(self.log, "sending service requests..."); let services_put = || async { info!(self.log, "initializing sled services: {:?}", services); client - .services_put(&sled_agent_client::types::ServiceEnsureBody { + .services_put(&SledAgentTypes::ServiceEnsureBody { services: services .iter() .map(|s| s.clone().into()) @@ -239,156 +262,42 @@ impl ServiceInner { }) .await .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - sled_agent_client::Error, - >, - >(()) + Ok::<(), BackoffError>>(()) }; let log_failure = |error, _| { warn!(self.log, "failed to initialize services"; "error" => ?error); }; retry_notify(internal_service_policy(), services_put, log_failure) .await?; - Ok(()) - } - - async fn load_plan( - &self, - ) -> Result>, SetupServiceError> - { - // If we already created a plan for this RSS to allocate - // subnets/requests to sleds, re-use that existing plan. - let rss_plan_path = rss_plan_path(); - if rss_plan_path.exists() { - info!(self.log, "RSS plan already created, loading from file"); - - let plan: std::collections::HashMap = - toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path).await.map_err( - |err| SetupServiceError::Io { - message: format!( - "Loading RSS plan {rss_plan_path:?}" - ), - err, - }, - )?, - ) - .map_err(|err| SetupServiceError::Toml { - path: rss_plan_path, - err, - })?; - Ok(Some(plan)) - } else { - Ok(None) - } - } - async fn create_plan( - &self, - config: &Config, - bootstrap_addrs: Vec, - ) -> Result, SetupServiceError> { - let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); - let dns_subnets = reserved_rack_subnet.get_dns_subnets(); - - info!(self.log, "dns_subnets: {:#?}", dns_subnets); - - let requests_and_sleds = - bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - config.requests[idx].clone() - } else { - HardcodedSledRequest::default() + // Insert DNS records, if the DNS servers have been initialized + if let Some(dns_servers) = self.dns_servers.get() { + let mut records = HashMap::new(); + for zone in services { + for service in &zone.services { + if let Some(addr) = zone.address(&service) { + records + .entry(zone.srv(&service)) + .or_insert_with(Vec::new) + .push((zone.aaaa(), addr)); } - }; - - // The first enumerated sleds get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - let dns_addr = dns_subnet.dns_address().ip(); - request.dns_services.push(ServiceZoneRequest { - id: Uuid::new_v4(), - zone_name: "internal-dns".to_string(), - addresses: vec![dns_addr], - gz_addresses: vec![dns_subnet.gz_address().ip()], - services: vec![ServiceType::InternalDns { - server_address: SocketAddrV6::new( - dns_addr, - DNS_SERVER_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new( - dns_addr, DNS_PORT, 0, 0, - ), - }], - }); } - - (request, (idx, bootstrap_addr)) - }); - - let rack_id = Uuid::new_v4(); - let allocations = requests_and_sleds.map(|(request, sled)| { - let (idx, bootstrap_addr) = sled; - info!( - self.log, - "Creating plan for the sled at {:?}", bootstrap_addr - ); - let bootstrap_addr = - SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - - ( - bootstrap_addr, - SledAllocation { - initialization_request: SledAgentRequest { - id: Uuid::new_v4(), - subnet, - rack_id, - gateway: config.gateway.clone(), - }, - services_request: request, - }, - ) - }); - - info!(self.log, "Serializing plan"); - - let mut plan = std::collections::HashMap::new(); - for (addr, allocation) in allocations { - plan.insert(addr, allocation); - } - - // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = - toml::Value::try_from(&plan).unwrap_or_else(|e| { - panic!("Cannot serialize configuration: {:#?}: {}", plan, e) - }); - let plan_str = toml::to_string(&serialized_plan) - .expect("Cannot turn config to string"); - - info!(self.log, "Plan serialized as: {}", plan_str); - let path = rss_plan_path(); - tokio::fs::write(&path, plan_str).await.map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS plan to {path:?}"), - err, } - })?; - info!(self.log, "Plan written to storage"); + let records_put = || async { + dns_servers + .insert_dns_records(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to set DNS records"; "error" => ?error); + }; + retry_notify(internal_service_policy(), records_put, log_failure) + .await?; + } - Ok(plan) + Ok(()) } // Waits for sufficient neighbors to exist so the initial set of requests @@ -459,6 +368,118 @@ impl ServiceInner { Ok(addrs) } + async fn handoff_to_nexus( + &self, + config: &Config, + sled_plan: &SledPlan, + service_plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + info!(self.log, "Handing off control to Nexus"); + + let resolver = DnsResolver::new(&config.az_subnet()) + .expect("Failed to create DNS resolver"); + let ip = resolver + .lookup_ip(SRV::Service(ServiceName::Nexus)) + .await + .expect("Failed to lookup IP"); + let nexus_address = SocketAddr::new(ip, NEXUS_INTERNAL_PORT); + + info!(self.log, "Nexus address: {}", nexus_address.to_string()); + + let nexus_client = NexusClient::new( + &format!("http://{}", nexus_address), + self.log.new(o!("component" => "NexusClient")), + ); + + // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". + // + // We need the ID when passing info to Nexus. + let mut id_map = HashMap::new(); + for (_, sled_request) in sled_plan.sleds.iter() { + id_map + .insert(get_sled_address(sled_request.subnet), sled_request.id); + } + + // Convert all the information we have about services and datasets into + // a format which can be processed by Nexus. + let mut services: Vec = vec![]; + let mut datasets: Vec = vec![]; + for (addr, service_request) in service_plan.services.iter() { + let sled_id = *id_map + .get(addr) + .expect("Sled address in service plan, but not sled plan"); + + for zone in service_request + .services + .iter() + .chain(service_request.dns_services.iter()) + { + for svc in &zone.services { + let kind = match svc { + ServiceType::Nexus { external_ip, internal_ip: _ } => { + NexusTypes::ServiceKind::Nexus { + external_address: *external_ip, + } + } + ServiceType::InternalDns { .. } => { + NexusTypes::ServiceKind::InternalDNS + } + ServiceType::Oximeter => { + NexusTypes::ServiceKind::Oximeter + } + ServiceType::Dendrite { .. } => { + NexusTypes::ServiceKind::Dendrite + } + // TODO TODO TODO + ServiceType::Tfport { .. } => todo!(), + }; + + services.push(NexusTypes::ServicePutRequest { + service_id: zone.id, + sled_id, + // TODO: Should this be a vec, or a single value? + address: zone.addresses[0], + kind, + }) + } + } + + for dataset in service_request.datasets.iter() { + datasets.push(NexusTypes::DatasetCreateRequest { + zpool_id: dataset.zpool_id, + dataset_id: dataset.id, + request: NexusTypes::DatasetPutRequest { + address: dataset.address.to_string(), + kind: dataset.dataset_kind.clone().into(), + }, + }) + } + } + + let request = + NexusTypes::RackInitializationRequest { services, datasets }; + + let notify_nexus = || async { + nexus_client + .rack_initialization_complete(&sled_plan.rack_id, &request) + .await + .map_err(BackoffError::transient) + }; + let log_failure = |err, _| { + info!(self.log, "Failed to handoff to nexus: {err}"); + }; + + retry_notify( + internal_service_policy_with_max(std::time::Duration::from_secs(1)), + notify_nexus, + log_failure, + ) + .await?; + + info!(self.log, "Handoff to Nexus is complete"); + Ok(()) + } + // In lieu of having an operator send requests to all sleds via an // initialization service, the sled-agent configuration may allow for the // automated injection of setup requests from a sled. @@ -466,19 +487,23 @@ impl ServiceInner { // This method has a few distinct phases, identified by files in durable // storage: // - // 1. ALLOCATION PLAN CREATION. When the RSS starts up for the first time, - // it creates an allocation plan to provision subnets and services - // to an initial set of sleds. + // 1. SLED ALLOCATION PLAN CREATION. When the RSS starts up for the first + // time, it creates an allocation plan to provision subnets to an initial + // set of sleds. // - // This plan is stored at "rss_plan_path()". - // - // 2. ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making + // 2. SLED ALLOCATION PLAN EXECUTION. The RSS then carries out this plan, making // requests to the sleds enumerated within the "allocation plan". // - // 3. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the - // rack, the "rss_plan_path()" file is renamed to - // "rss_completed_plan_path()". This indicates that the plan executed - // successfully, and no work remains. + // 3. SERVICE ALLOCATION PLAN CREATION. Now that Sled Agents are executing + // on their respsective subnets, they can be queried to create an + // allocation plan for services. + // + // 4. SERVICE ALLOCATION PLAN EXECUTION. RSS requests that the services + // outlined in the aforementioned step are created. + // + // 5. MARKING SETUP COMPLETE. Once the RSS has successfully initialized the + // rack, a marker file is created at "rss_completed_plan_path()". This + // indicates that the plan executed successfully, and no work remains. async fn inject_rack_setup_requests( &self, config: &Config, @@ -499,6 +524,15 @@ impl ServiceInner { self.log, "RSS configuration looks like it has already been applied", ); + + let sled_plan = SledPlan::load(&self.log) + .await? + .expect("Sled plan should exist if completed marker exists"); + let service_plan = ServicePlan::load(&self.log) + .await? + .expect("Service plan should exist if completed marker exists"); + self.handoff_to_nexus(&config, &sled_plan, &service_plan).await?; + return Ok(()); } else { info!(self.log, "RSS configuration has not been fully applied yet",); @@ -507,11 +541,13 @@ impl ServiceInner { // Wait for either: // - All the peers to re-load an old plan (if one exists) // - Enough peers to create a new plan (if one does not exist) - let maybe_plan = self.load_plan().await?; - let expectation = if let Some(plan) = &maybe_plan { - PeerExpectation::LoadOldPlan(plan.keys().map(|a| *a.ip()).collect()) + let maybe_sled_plan = SledPlan::load(&self.log).await?; + let expectation = if let Some(plan) = &maybe_sled_plan { + PeerExpectation::LoadOldPlan( + plan.sleds.keys().map(|a| *a.ip()).collect(), + ) } else { - PeerExpectation::CreateNewPlan(config.requests.len()) + PeerExpectation::CreateNewPlan(MINIMUM_SLED_COUNT) }; let addrs = self .wait_for_peers(expectation, local_bootstrap_agent.our_address()) @@ -522,14 +558,14 @@ impl ServiceInner { // // NOTE: This is a "point-of-no-return" -- before sending any requests // to neighboring sleds, the plan must be recorded to durable storage. - // This way, if the RSS power-cycles, it can idempotently execute the - // same allocation plan. - let plan = if let Some(plan) = maybe_plan { + // This way, if the RSS power-cycles, it can idempotently provide the + // same subnets to the same sleds. + let plan = if let Some(plan) = maybe_sled_plan { info!(self.log, "Re-using existing allocation plan"); plan } else { info!(self.log, "Creating new allocation plan"); - self.create_plan(config, addrs).await? + SledPlan::create(&self.log, &config, addrs).await? }; // Generate our rack secret, unless we're in the single-sled case. @@ -549,7 +585,7 @@ impl ServiceInner { // addrs, which would remove the need for this assertion. assert_eq!( rack_secret_shares.len(), - plan.len(), + plan.sleds.len(), concat!( "Number of trust quorum members does not match ", "number of sleds in the plan" @@ -560,11 +596,12 @@ impl ServiceInner { // Forward the sled initialization requests to our sled-agent. local_bootstrap_agent .initialize_sleds( - plan.iter() - .map(move |(bootstrap_addr, allocation)| { + plan.sleds + .iter() + .map(move |(bootstrap_addr, initialization_request)| { ( *bootstrap_addr, - allocation.initialization_request.clone(), + initialization_request.clone(), maybe_rack_secret_shares .as_mut() .map(|shares| shares.next().unwrap()), @@ -575,22 +612,37 @@ impl ServiceInner { .await .map_err(SetupServiceError::SledInitialization)?; + let sled_addresses: Vec<_> = plan + .sleds + .iter() + .map(|(_, initialization_request)| { + get_sled_address(initialization_request.subnet) + }) + .collect(); + + // Now that sled agents have been initialized, we can create + // a service allocation plan. + let service_plan = + if let Some(plan) = ServicePlan::load(&self.log).await? { + plan + } else { + ServicePlan::create(&self.log, &config, &sled_addresses).await? + }; + // Set up internal DNS services. futures::future::join_all( - plan.iter() - .filter(|(_, allocation)| { + service_plan + .services + .iter() + .filter(|(_, service_request)| { // Only send requests to sleds that are supposed to be running // DNS services. - !allocation.services_request.dns_services.is_empty() + !service_request.dns_services.is_empty() }) - .map(|(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - + .map(|(sled_address, services_request)| async move { self.initialize_services( - sled_address, - &allocation.services_request.dns_services, + *sled_address, + &services_request.dns_services, ) .await?; Ok(()) @@ -607,33 +659,19 @@ impl ServiceInner { self.dns_servers .set(dns_servers) .map_err(|_| ()) - .expect("DNS servers should only be set once"); - - // Issue the dataset initialization requests to all sleds. - futures::future::join_all(plan.values().map(|allocation| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - self.initialize_datasets( - sled_address, - &allocation.services_request.datasets, - ) - .await?; - - let mut records = HashMap::new(); - for dataset in &allocation.services_request.datasets { - records - .entry(dataset.srv()) - .or_insert_with(Vec::new) - .push((dataset.aaaa(), dataset.address())); - } - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(&records) + .expect("Already set DNS servers"); + + // Issue the crdb initialization requests to all sleds. + futures::future::join_all(service_plan.services.iter().map( + |(sled_address, services_request)| async move { + self.initialize_datasets( + *sled_address, + &services_request.datasets, + ) .await?; - Ok(()) - })) + Ok(()) + }, + )) .await .into_iter() .collect::>()?; @@ -642,42 +680,32 @@ impl ServiceInner { // Issue service initialization requests. // - // Note that this must happen *after* the dataset initialization, + // NOTE: This must happen *after* the dataset initialization, // to ensure that CockroachDB has been initialized before Nexus // starts. - futures::future::join_all(plan.values().map(|allocation| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - - let all_zones = allocation - .services_request - .service_zones - .iter() - .chain(allocation.services_request.dns_services.iter()) - .map(|s| s.clone()) - .collect::>(); - - self.initialize_services(sled_address, &all_zones).await?; - - let mut records = HashMap::new(); - for zone in &all_zones { - for service in &zone.services { - if let Some(addr) = zone.address(service) { - records - .entry(zone.srv(service)) - .or_insert_with(Vec::new) - .push((zone.aaaa(), addr)) - } - } - } - self.dns_servers - .get() - .expect("DNS servers must be initialized first") - .insert_dns_records(&records) - .await?; - Ok(()) - })) + // + // If Nexus was more resilient to concurrent initialization + // of CRDB, this requirement could be relaxed. + futures::future::join_all(service_plan.services.iter().map( + |(sled_address, services_request)| async move { + // With the current implementation of "initialize_services", + // we must provide the set of *all* services that should be + // executing on a sled. + // + // This means re-requesting the DNS service, even if it is + // already running - this is fine, however, as the receiving + // sled agent doesn't modify the already-running service. + let all_services = services_request + .services + .iter() + .chain(services_request.dns_services.iter()) + .map(|s| s.clone()) + .collect::>(); + + self.initialize_services(*sled_address, &all_services).await?; + Ok(()) + }, + )) .await .into_iter() .collect::, SetupServiceError>>()?; @@ -686,16 +714,18 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. - let plan_path = rss_plan_path(); - tokio::fs::rename(&plan_path, &rss_completed_plan_path).await.map_err( + tokio::fs::File::create(&rss_completed_plan_path).await.map_err( |err| SetupServiceError::Io { - message: format!( - "renaming {plan_path:?} to {rss_completed_plan_path:?}" - ), + message: format!("creating {rss_completed_plan_path:?}"), err, }, )?; + // At this point, even if we reboot, we must not try to manage sleds, + // services, or DNS records. + + self.handoff_to_nexus(&config, &plan, &service_plan).await?; + // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does // it get a /64? @@ -703,112 +733,3 @@ impl ServiceInner { Ok(()) } } - -fn generate_rack_secret<'a>( - rack_secret_threshold: usize, - member_device_id_certs: &'a [Ed25519Certificate], - log: &Logger, -) -> Result< - Option + 'a>, - SetupServiceError, -> { - // We do not generate a rack secret if we only have a single sled or if our - // config specifies that the threshold for unlock is only a single sled. - let total_shares = member_device_id_certs.len(); - if total_shares <= 1 { - info!(log, "Skipping rack secret creation (only one sled present)"); - return Ok(None); - } - - if rack_secret_threshold <= 1 { - warn!( - log, - concat!( - "Skipping rack secret creation due to config", - " (despite discovery of {} bootstrap agents)" - ), - total_shares, - ); - return Ok(None); - } - - let secret = RackSecret::new(); - let (shares, verifier) = secret - .split(rack_secret_threshold, total_shares) - .map_err(SetupServiceError::SplitRackSecret)?; - - Ok(Some(shares.into_iter().map(move |share| ShareDistribution { - threshold: rack_secret_threshold, - verifier: verifier.clone(), - share, - member_device_id_certs: member_device_id_certs.to_vec(), - }))) -} - -#[cfg(test)] -mod tests { - use super::*; - use omicron_test_utils::dev::test_setup_log; - use sprockets_common::certificates::Ed25519Signature; - use sprockets_common::certificates::KeyType; - - fn dummy_certs(n: usize) -> Vec { - vec![ - Ed25519Certificate { - subject_key_type: KeyType::DeviceId, - subject_public_key: sprockets_host::Ed25519PublicKey([0; 32]), - signer_key_type: KeyType::Manufacturing, - signature: Ed25519Signature([0; 64]), - }; - n - ] - } - - #[test] - fn test_generate_rack_secret() { - let logctx = test_setup_log("test_generate_rack_secret"); - - // No secret generated if we have <= 1 sled - assert!(generate_rack_secret(10, &dummy_certs(1), &logctx.log) - .unwrap() - .is_none()); - - // No secret generated if threshold <= 1 - assert!(generate_rack_secret(1, &dummy_certs(10), &logctx.log) - .unwrap() - .is_none()); - - // Secret generation fails if threshold > total sleds - assert!(matches!( - generate_rack_secret(10, &dummy_certs(5), &logctx.log), - Err(SetupServiceError::SplitRackSecret(_)) - )); - - // Secret generation succeeds if threshold <= total shares and both are - // > 1, and the returned iterator satifies: - // - // * total length == total shares - // * each share is distinct - for total_shares in 2..=32 { - for threshold in 2..=total_shares { - let certs = dummy_certs(total_shares); - let shares = - generate_rack_secret(threshold, &certs, &logctx.log) - .unwrap() - .unwrap(); - - assert_eq!(shares.len(), total_shares); - - // `Share` doesn't implement `Hash`, but it's a newtype around - // `Vec` (which does). Unwrap the newtype to check that all - // shares are distinct. - let shares_set = shares - .map(|share_dist| share_dist.share.0) - .collect::>(); - assert_eq!(shares_set.len(), total_shares); - } - } - - logctx.cleanup_successful(); - } -} diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index 4319f7019af..1c34b57b0c4 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -37,6 +37,7 @@ impl Server { pub async fn start( config: &Config, log: Logger, + sled_id: Uuid, addr: SocketAddrV6, is_scrimlet: bool, request: SledAgentRequest, @@ -52,6 +53,7 @@ impl Server { &config, log.clone(), lazy_nexus_client.clone(), + sled_id, addr, request, ) @@ -72,7 +74,6 @@ impl Server { .start(); let sled_address = http_server.local_addr(); - let sled_id = config.id; let nexus_notifier_handle = tokio::task::spawn(async move { // Notify the control plane that we're up, and continue trying this // until it succeeds. We retry with an randomized, capped exponential diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 5eef7654359..36a524c5fee 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -22,13 +22,11 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::nexus_config::{ self, DeploymentConfig as NexusDeploymentConfig, }; -use omicron_common::postgres_config::PostgresConfigWithUrl; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::str::FromStr; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; use uuid::Uuid; @@ -502,38 +500,45 @@ impl ServiceManager { // Nexus takes a separate config file for parameters which // cannot be known at packaging time. let deployment_config = NexusDeploymentConfig { - id: req.id, - rack_id: self.rack_id, - - // Request two dropshot servers: One for HTTP (port 80), - // one for HTTPS (port 443). - dropshot_external: vec![ - dropshot::ConfigDropshot { - bind_address: SocketAddr::new(*external_ip, 443), - request_body_max_bytes: 1048576, - tls: Some(dropshot::ConfigTls { cert_file, key_file }), - }, - dropshot::ConfigDropshot { - bind_address: SocketAddr::new(*external_ip, 80), + id: req.id, + rack_id: self.rack_id, + + // Request two dropshot servers: One for HTTP (port 80), + // one for HTTPS (port 443). + dropshot_external: vec![ + dropshot::ConfigDropshot { + bind_address: SocketAddr::new( + *external_ip, + 443, + ), + request_body_max_bytes: 1048576, + tls: Some(dropshot::ConfigTls { + cert_file, + key_file, + }), + }, + dropshot::ConfigDropshot { + bind_address: SocketAddr::new( + *external_ip, + 80, + ), + request_body_max_bytes: 1048576, + ..Default::default() + }, + ], + dropshot_internal: dropshot::ConfigDropshot { + bind_address: SocketAddr::new( + IpAddr::V6(*internal_ip), + NEXUS_INTERNAL_PORT, + ), request_body_max_bytes: 1048576, ..Default::default() }, - ], - dropshot_internal: dropshot::ConfigDropshot { - bind_address: SocketAddr::new(IpAddr::V6(*internal_ip), NEXUS_INTERNAL_PORT), - request_body_max_bytes: 1048576, - ..Default::default() - }, - subnet: Ipv6Subnet::::new( - self.underlay_address, - ), - // TODO: Switch to inferring this URL by DNS. - database: nexus_config::Database::FromUrl { - url: PostgresConfigWithUrl::from_str( - "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - ).unwrap(), - } - }; + subnet: Ipv6Subnet::::new( + self.underlay_address, + ), + database: nexus_config::Database::FromDns, + }; // Copy the partial config file to the expected location. let config_dir = (self.config.get_svc_config_dir)( diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 3829959dbc5..470b9e8935f 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -17,7 +17,7 @@ use crate::nexus::LazyNexusClient; use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, InstanceSerialConsoleData, - ServiceEnsureBody, VpcFirewallRule, + ServiceEnsureBody, VpcFirewallRule, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; @@ -169,11 +169,10 @@ impl SledAgent { config: &Config, log: Logger, lazy_nexus_client: LazyNexusClient, + id: Uuid, sled_address: SocketAddrV6, request: SledAgentRequest, ) -> Result { - let id = config.id; - // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. let parent_log = log.clone(); @@ -335,6 +334,11 @@ impl SledAgent { Ok(()) } + pub async fn zpools_get(&self) -> Result, Error> { + let zpools = self.storage.get_zpools().await?; + Ok(zpools) + } + /// Ensures that a filesystem type exists within the zpool. pub async fn filesystem_ensure( &self, diff --git a/sled-agent/src/sp/mod.rs b/sled-agent/src/sp/mod.rs index d5e89673dd3..b664e1ac125 100644 --- a/sled-agent/src/sp/mod.rs +++ b/sled-agent/src/sp/mod.rs @@ -4,7 +4,6 @@ //! Interface to a (simulated or real) SP / RoT. -use crate::config::Config as SledConfig; use crate::config::ConfigError; use crate::illumos; use crate::illumos::dladm::CreateVnicError; @@ -86,11 +85,10 @@ impl SpHandle { /// A return value of `Ok(None)` means no SP is available. pub async fn detect( sp_config: Option<&GimletConfig>, - sled_config: &SledConfig, log: &Logger, ) -> Result, SpError> { - let inner = if let Some(config) = sp_config { - let sim_sp = SimulatedSp::start(config, sled_config, log).await?; + let inner = if let Some(config) = sp_config.as_ref() { + let sim_sp = SimulatedSp::start(config, log).await?; Some(Inner::SimulatedSp(sim_sp)) } else { None diff --git a/sled-agent/src/sp/simulated.rs b/sled-agent/src/sp/simulated.rs index a37deabc816..3b74713a0d8 100644 --- a/sled-agent/src/sp/simulated.rs +++ b/sled-agent/src/sp/simulated.rs @@ -5,7 +5,6 @@ //! Implementation of a simulated SP / RoT. use super::SpError; -use crate::config::Config as SledConfig; use crate::illumos::dladm::Dladm; use crate::zone::Zones; use slog::Logger; @@ -36,7 +35,6 @@ pub(super) struct SimulatedSp { impl SimulatedSp { pub(super) async fn start( sp_config: &GimletConfig, - sled_config: &SledConfig, log: &Logger, ) -> Result { // Is our simulated SP going to bind to addresses (acting like @@ -80,7 +78,6 @@ impl SimulatedSp { info!(log, "starting simulated gimlet SP"); let sp_log = log.new(o!( "component" => "sp-sim", - "server" => sled_config.id.clone().to_string(), )); let sp = Arc::new( sp_sim::Gimlet::spawn(&sp_config, sp_log) @@ -92,7 +89,6 @@ impl SimulatedSp { info!(log, "starting simulated gimlet RoT"); let rot_log = log.new(o!( "component" => "rot-sim", - "server" => sled_config.id.clone().to_string(), )); let transport = SimRotTransport { sp: Arc::clone(&sp), responses: VecDeque::new() }; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index e9cfa6f6b4b..7f163a450b1 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -15,7 +15,7 @@ use crate::params::DatasetKind; use futures::stream::FuturesOrdered; use futures::FutureExt; use futures::StreamExt; -use nexus_client::types::{DatasetPutRequest, ZpoolPutRequest}; +use nexus_client::types::ZpoolPutRequest; use omicron_common::api::external::{ByteCount, ByteCountRangeError}; use omicron_common::backoff; use schemars::JsonSchema; @@ -241,6 +241,9 @@ impl DatasetInfo { address: SocketAddrV6, do_format: bool, ) -> Result<(), Error> { + // TODO: Related to + // https://github.com/oxidecomputer/omicron/pull/1124 , should we + // avoid importing these manifests? match self.kind { DatasetKind::CockroachDb { .. } => { info!(log, "start_zone: Loading CRDB manifest"); @@ -317,7 +320,9 @@ impl DatasetInfo { warn!(log, "cockroachdb not yet alive"); }; backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), check_health, log_failure, ) @@ -653,58 +658,14 @@ impl StorageWorker { let log_post_failure = move |_, delay| { warn!( log, - "failed to notify nexus, will retry in {:?}", delay; + "failed to notify nexus about zpool, will retry in {:?}", delay; ); }; nexus_notifications.push_back( backoff::retry_notify( - backoff::internal_service_policy(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); - } - - // Adds a "notification to nexus" to `nexus_notifications`, - // informing it about the addition of `datasets` to `pool_id`. - fn add_datasets_notify( - &self, - nexus_notifications: &mut FuturesOrdered>>, - datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, - pool_id: Uuid, - ) { - let lazy_nexus_client = self.lazy_nexus_client.clone(); - let notify_nexus = move || { - let lazy_nexus_client = lazy_nexus_client.clone(); - let datasets = datasets.clone(); - async move { - let nexus = lazy_nexus_client.get().await.map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - - for (id, address, kind) in datasets { - let request = DatasetPutRequest { - address: address.to_string(), - kind: kind.into(), - }; - nexus.dataset_put(&pool_id, &id, &request).await.map_err( - |e| backoff::BackoffError::transient(e.to_string()), - )?; - } - Ok(()) - } - }; - let log = self.log.clone(); - let log_post_failure = move |_, delay| { - warn!( - log, - "failed to notify nexus about datasets, will retry in {:?}", delay; - ); - }; - nexus_notifications.push_back( - backoff::retry_notify( - backoff::internal_service_policy(), + backoff::internal_service_policy_with_max( + std::time::Duration::from_secs(1), + ), notify_nexus, log_post_failure, ) @@ -718,7 +679,6 @@ impl StorageWorker { // Attempts to add a dataset within a zpool, according to `request`. async fn add_dataset( &self, - nexus_notifications: &mut FuturesOrdered>>, request: &NewFilesystemRequest, ) -> Result<(), Error> { info!(self.log, "add_dataset: {:?}", request); @@ -765,12 +725,6 @@ impl StorageWorker { err, })?; - self.add_datasets_notify( - nexus_notifications, - vec![(id, dataset_info.address, dataset_info.kind)], - pool.id(), - ); - Ok(()) } @@ -864,21 +818,16 @@ impl StorageWorker { } } - // Notify Nexus of the zpool and all datasets within. + // Notify Nexus of the zpool. self.add_zpool_notify( &mut nexus_notifications, pool.id(), size, ); - self.add_datasets_notify( - &mut nexus_notifications, - datasets, - pool.id(), - ); }, Some(request) = self.new_filesystems_rx.recv() => { - let result = self.add_dataset(&mut nexus_notifications, &request).await; + let result = self.add_dataset(&request).await; let _ = request.responder.send(result); } } @@ -955,6 +904,14 @@ impl StorageManager { Ok(()) } + pub async fn get_zpools(&self) -> Result, Error> { + let pools = self.pools.lock().await; + Ok(pools + .keys() + .map(|zpool| crate::params::Zpool { id: zpool.id() }) + .collect()) + } + pub async fn upsert_filesystem( &self, zpool_id: Uuid, diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index c2ba69e5384..dd38aed438e 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -19,7 +19,3 @@ level = "info" mode = "file" path = "/dev/stdout" if_exists = "append" - -# Configuration for interacting with the timeseries database -[timeseries_db] -address = "[fd00:1122:3344:0101::5]:8123" diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index efc066260dd..18e4992068a 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -12,7 +12,6 @@ rack_subnet = "fd00:1122:3344:0100::" rack_secret_threshold = 1 [gateway] - # IP address of Internet gateway # # NOTE: In the lab, use "172.20.15.225" @@ -27,75 +26,5 @@ rack_secret_threshold = 1 # how-to-run.adoc for details on how to determine the value for your network. mac = "00:0d:b9:54:fe:e4" -[[request]] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate crucible datasets. -[[request.dataset]] -id = "09a9a25f-2602-4e2f-9630-31af9c492c3e" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "2713b37a-3043-4ed5-aaff-f38200e45cfb" -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "ffd16cad-e5d5-495e-9c59-4312a3857d91" -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -id = "4d08fc19-3d5f-4f6b-9c48-925f8eac7255" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::2]:32221" -dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate clickhouse datasets. -[[request.dataset]] -id = "a3505b41-a592-420b-84f2-3d76bf0e0a81" -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" - -[[request.service_zone]] -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" -zone_name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] -[[request.service_zone.services]] -type = "nexus" -internal_ip = "fd00:1122:3344:0101::3" # NOTE: In the lab, use "172.20.15.226" -external_ip = "192.168.1.20" - -# TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus -# should allocate Oximeter services. -[[request.service_zone]] -id = "1da65e5b-210c-4859-a7d7-200c1e659972" -zone_name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] -[[request.service_zone.services]] -type = "oximeter" - -[[request.service_zone]] -id = "a0fe5ebc-9261-6f77-acc1-972481755789" -zone_name = "switch" -addresses = [ "fd00:1122:3344:0101::9" ] -gz_addresses = [] -[[request.service_zone.services]] -type = "dendrite" -asic = "tofino_stub" -#[[request.service_zone.services]] -# The tfport service will not work with the tofino_stub asic, -# as there is no network traffic to multiplex or network device -# to layer over. -#type = "tfport" -#pkt_source = "tfpkt0" +nexus_external_address = "192.168.1.20" diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 9af1db6f2e2..170350afacd 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -1,7 +1,5 @@ # Sled Agent Configuration -id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" - # A file-backed zpool can be manually created with the following: # # truncate -s 10GB testpool.vdev # # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev"