From 79fb8f3787674dfc91320029e9b5945dd08ff515 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 22 Mar 2022 19:25:52 -0400 Subject: [PATCH 01/61] Add internal-dns service --- Cargo.lock | 501 ++++++++++++++++++++++++++-- Cargo.toml | 2 + internal-dns-client/Cargo.toml | 12 + internal-dns-client/src/lib.rs | 18 + internal-dns/Cargo.toml | 36 ++ internal-dns/src/bin/apigen.rs | 27 ++ internal-dns/src/bin/dns-server.rs | 54 +++ internal-dns/src/dns_data.rs | 356 ++++++++++++++++++++ internal-dns/src/dns_server.rs | 185 ++++++++++ internal-dns/src/dropshot_server.rs | 73 ++++ internal-dns/src/lib.rs | 47 +++ internal-dns/tests/basic_test.rs | 188 +++++++++++ internal-dns/tests/openapi_test.rs | 27 ++ openapi/internal-dns.json | 237 +++++++++++++ 14 files changed, 1736 insertions(+), 27 deletions(-) create mode 100644 internal-dns-client/Cargo.toml create mode 100644 internal-dns-client/src/lib.rs create mode 100644 internal-dns/Cargo.toml create mode 100644 internal-dns/src/bin/apigen.rs create mode 100644 internal-dns/src/bin/dns-server.rs create mode 100644 internal-dns/src/dns_data.rs create mode 100644 internal-dns/src/dns_server.rs create mode 100644 internal-dns/src/dropshot_server.rs create mode 100644 internal-dns/src/lib.rs create mode 100644 internal-dns/tests/basic_test.rs create mode 100644 internal-dns/tests/openapi_test.rs create mode 100644 openapi/internal-dns.json diff --git a/Cargo.lock b/Cargo.lock index 28d12a1b65a..ff90b29d91e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,6 +41,12 @@ dependencies = [ "syn", ] +[[package]] +name = "arc-swap" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f" + [[package]] name = "array-init" version = "2.0.0" @@ -127,7 +133,7 @@ dependencies = [ "getrandom", "instant", "pin-project", - "rand", + "rand 0.8.5", "tokio", ] @@ -562,7 +568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f83bd3bb4314701c568e340cd8cf78c975aa0ca79e03d3f6d1677d5b0c9c0c03" dependencies = [ "generic-array 0.14.5", - "rand_core", + "rand_core 0.6.3", "subtle", "zeroize", ] @@ -644,6 +650,12 @@ dependencies = [ "syn", ] +[[package]] +name = "data-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" + [[package]] name = "db-macros" version = "0.1.0" @@ -801,7 +813,45 @@ dependencies = [ "base64", "bytes", "chrono", - "dropshot_endpoint", + "dropshot_endpoint 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", + "futures", + "hostname", + "http", + "hyper", + "indexmap", + "openapiv3", + "paste", + "percent-encoding", + "proc-macro2", + "rustls", + "rustls-pemfile", + "schemars", + "serde", + "serde_json", + "serde_urlencoded", + "slog", + "slog-async", + "slog-bunyan", + "slog-json", + "slog-term", + "tokio", + "tokio-rustls", + "toml", + "usdt 0.3.2", + "uuid", +] + +[[package]] +name = "dropshot" +version = "0.6.1-dev" +source = "git+https://github.com/oxidecomputer/dropshot#da1d2db1411e1edbbe0101cc1db855606e8dabfc" +dependencies = [ + "async-stream", + "async-trait", + "base64", + "bytes", + "chrono", + "dropshot_endpoint 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot)", "futures", "hostname", "http", @@ -841,6 +891,18 @@ dependencies = [ "syn", ] +[[package]] +name = "dropshot_endpoint" +version = "0.6.1-dev" +source = "git+https://github.com/oxidecomputer/dropshot#da1d2db1411e1edbbe0101cc1db855606e8dabfc" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "serde_tokenstream", + "syn", +] + [[package]] name = "dtrace-parser" version = "0.1.12" @@ -887,7 +949,7 @@ dependencies = [ "generic-array 0.14.5", "group", "pkcs8", - "rand_core", + "rand_core 0.6.3", "subtle", "zeroize", ] @@ -916,6 +978,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "enum-as-inner" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73" +dependencies = [ + "heck 0.4.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_logger" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "expectorate" version = "1.0.4" @@ -954,7 +1047,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0f40b2dcd8bc322217a5f6559ae5f9e9d1de202a2ecee2e9eafcbece7562a4f" dependencies = [ "bitvec", - "rand_core", + "rand_core 0.6.3", "subtle", ] @@ -1040,6 +1133,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da1b8f89c5b5a5b7e59405cfcf0bb9588e5ed19f0b57a4cd542bbba3f164a6d" +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "funty" version = "1.2.0" @@ -1135,6 +1244,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "gateway-client" version = "0.1.0" @@ -1226,7 +1344,7 @@ checksum = "1c363a5301b8f153d80747126a04b3c82073b9fe3130571a9d170cacdeaf7912" dependencies = [ "byteorder", "ff", - "rand_core", + "rand_core 0.6.3", "subtle", ] @@ -1408,6 +1526,12 @@ dependencies = [ "syn", ] +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.17" @@ -1527,6 +1651,61 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "internal-dns" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap 3.1.6", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot)", + "expectorate", + "internal-dns-client", + "omicron-test-utils", + "openapi-lint", + "openapiv3", + "portpicker", + "pretty-hex", + "schemars", + "serde", + "serde_json", + "sled", + "slog", + "slog-async", + "slog-envlogger", + "slog-term", + "structopt", + "subprocess", + "tempdir", + "tokio", + "toml", + "trust-dns-proto", + "trust-dns-resolver", + "trust-dns-server", +] + +[[package]] +name = "internal-dns-client" +version = "0.1.0" +dependencies = [ + "progenitor", + "reqwest", + "serde", + "serde_json", + "slog", +] + +[[package]] +name = "ipconfig" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "723519edce41262b05d4143ceb95050e4c614f483e78e9fd9e39a8275a84ad98" +dependencies = [ + "socket2", + "widestring", + "winapi", + "winreg 0.7.0", +] + [[package]] name = "ipnet" version = "2.4.0" @@ -1612,6 +1791,12 @@ version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + [[package]] name = "lock_api" version = "0.4.6" @@ -1630,6 +1815,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "macaddr" version = "1.0.1" @@ -1817,7 +2011,7 @@ dependencies = [ "anyhow", "bytes", "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "headers", "http", "hyper", @@ -1845,6 +2039,15 @@ dependencies = [ "syn", ] +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1923,7 +2126,7 @@ dependencies = [ "api_identity", "backoff", "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "http", @@ -1932,7 +2135,7 @@ dependencies = [ "macaddr", "parse-display", "progenitor", - "rand", + "rand 0.8.5", "reqwest", "ring", "schemars", @@ -1956,7 +2159,7 @@ name = "omicron-gateway" version = "0.1.0" dependencies = [ "clap 3.1.6", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "gateway-messages", @@ -1997,7 +2200,7 @@ dependencies = [ "db-macros", "diesel", "diesel-dtrace", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "headers", @@ -2026,7 +2229,7 @@ dependencies = [ "oximeter-producer", "parse-display", "pq-sys", - "rand", + "rand 0.8.5", "ref-cast", "regex", "reqwest", @@ -2094,7 +2297,7 @@ dependencies = [ "cfg-if", "chrono", "crucible-agent-client", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "http", @@ -2109,7 +2312,7 @@ dependencies = [ "percent-encoding", "progenitor", "propolis-client", - "rand", + "rand 0.8.5", "reqwest", "schemars", "serde", @@ -2141,7 +2344,7 @@ name = "omicron-test-utils" version = "0.1.0" dependencies = [ "anyhow", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "libc", @@ -2322,7 +2525,7 @@ dependencies = [ name = "oximeter-collector" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "nexus-client", "omicron-common", @@ -2352,7 +2555,7 @@ dependencies = [ "async-trait", "bytes", "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "itertools", "omicron-test-utils", "oximeter", @@ -2376,7 +2579,7 @@ name = "oximeter-instruments" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "futures", "http", "oximeter", @@ -2398,7 +2601,7 @@ name = "oximeter-producer" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "nexus-client", "omicron-common", "oximeter", @@ -2709,6 +2912,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "portpicker" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "postgres-protocol" version = "0.6.3" @@ -2722,7 +2934,7 @@ dependencies = [ "hmac 0.12.1", "md-5", "memchr", - "rand", + "rand 0.8.5", "sha2 0.10.2", "stringprep", ] @@ -2792,6 +3004,12 @@ dependencies = [ "termtree", ] +[[package]] +name = "pretty-hex" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5c99d529f0d30937f6f4b8a86d988047327bb88d04d2c4afc356de74722131" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2906,6 +3124,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.16" @@ -2932,6 +3156,29 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "643f8f41a8ebc4c5dc4515c82bb8abd397b527fc20fd681b7c011c2aee5d44fb" +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.8.5" @@ -2940,7 +3187,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", - "rand_core", + "rand_core 0.6.3", ] [[package]] @@ -2950,9 +3197,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", ] +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.6.3" @@ -2987,6 +3249,15 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "redox_syscall" version = "0.2.11" @@ -3098,7 +3369,17 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "webpki-roots", - "winreg", + "winreg 0.10.1", +] + +[[package]] +name = "resolv-conf" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" +dependencies = [ + "hostname", + "quick-error", ] [[package]] @@ -3557,7 +3838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2807892cfa58e081aa1f1111391c7a0649d4fa127a4ffbe34bcbfb35a1171a4" dependencies = [ "digest 0.9.0", - "rand_core", + "rand_core 0.6.3", ] [[package]] @@ -3572,6 +3853,22 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" +[[package]] +name = "sled" +version = "0.34.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" +dependencies = [ + "crc32fast", + "crossbeam-epoch", + "crossbeam-utils", + "fs2", + "fxhash", + "libc", + "log", + "parking_lot 0.11.2", +] + [[package]] name = "sled-agent-client" version = "0.1.0" @@ -3629,6 +3926,21 @@ dependencies = [ "usdt 0.2.1", ] +[[package]] +name = "slog-envlogger" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "906a1a0bc43fed692df4b82a5e2fbfc3733db8dad8bb514ab27a4f23ad04f5c0" +dependencies = [ + "log", + "regex", + "slog", + "slog-async", + "slog-scope", + "slog-stdlog", + "slog-term", +] + [[package]] name = "slog-json" version = "2.6.0" @@ -3641,6 +3953,28 @@ dependencies = [ "time 0.3.7", ] +[[package]] +name = "slog-scope" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786" +dependencies = [ + "arc-swap", + "lazy_static", + "slog", +] + +[[package]] +name = "slog-stdlog" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8228ab7302adbf4fcb37e66f3cda78003feb521e7fd9e3847ec117a7784d0f5a" +dependencies = [ + "log", + "slog", + "slog-scope", +] + [[package]] name = "slog-term" version = "2.9.0" @@ -3707,7 +4041,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "gateway-messages", "hex", "omicron-common", @@ -3726,7 +4060,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/spdm?rev=9742f6e#9742f6eae7b86cc8bc8bc2fb0feeb44f770a1fb6" dependencies = [ "bitflags", - "rand", + "rand 0.8.5", "ring", "webpki", ] @@ -3923,6 +4257,16 @@ dependencies = [ "xattr", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" version = "3.3.0" @@ -4294,6 +4638,94 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "trust-dns-client" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3be5f2ead860f0d3aabc01433bc6fff0fe5e86bfbe2dd16e32b9c79959310ad" +dependencies = [ + "cfg-if", + "data-encoding", + "futures-channel", + "futures-util", + "lazy_static", + "log", + "radix_trie", + "rand 0.8.5", + "thiserror", + "time 0.3.7", + "tokio", + "trust-dns-proto", +] + +[[package]] +name = "trust-dns-proto" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2861b3ed517888174d13909e675c4e94b3291867512068be59d76533e4d1270c" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "lazy_static", + "log", + "rand 0.8.5", + "smallvec", + "thiserror", + "tinyvec", + "tokio", + "url", +] + +[[package]] +name = "trust-dns-resolver" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9e737a252a617bd4774649e245dbf705e207275db0893b9fa824d49f074fc1c" +dependencies = [ + "cfg-if", + "futures-util", + "ipconfig", + "lazy_static", + "log", + "lru-cache", + "parking_lot 0.12.0", + "resolv-conf", + "smallvec", + "thiserror", + "tokio", + "trust-dns-proto", +] + +[[package]] +name = "trust-dns-server" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4058838790565ba870cb800008c7b3b8a3f154afaece824ad9a91a80a4b81dfb" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "enum-as-inner", + "env_logger", + "futures-executor", + "futures-util", + "log", + "serde", + "thiserror", + "time 0.3.7", + "tokio", + "toml", + "trust-dns-client", + "trust-dns-proto", +] + [[package]] name = "try-lock" version = "0.2.3" @@ -4599,7 +5031,7 @@ dependencies = [ "ff", "group", "rand_chacha", - "rand_core", + "rand_core 0.6.3", "serde", "serde-big-array", "serde_cbor", @@ -4735,6 +5167,12 @@ dependencies = [ "webpki", ] +[[package]] +name = "widestring" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" + [[package]] name = "winapi" version = "0.3.9" @@ -4809,6 +5247,15 @@ version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" +[[package]] +name = "winreg" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +dependencies = [ + "winapi", +] + [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index 6b68be368cd..d2be41fde94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,8 @@ members = [ "gateway", "gateway-client", "gateway-messages", + "internal-dns", + "internal-dns-client", "nexus", "nexus/src/db/db-macros", "nexus/test-utils", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml new file mode 100644 index 00000000000..af67e13d716 --- /dev/null +++ b/internal-dns-client/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "internal-dns-client" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +progenitor = { git = "https://github.com/oxidecomputer/progenitor" } +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" +slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs new file mode 100644 index 00000000000..49daa3d58ae --- /dev/null +++ b/internal-dns-client/src/lib.rs @@ -0,0 +1,18 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +progenitor::generate_api!( + spec = "../openapi/internal-dns.json", + inner_type = slog::Logger, + pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { + slog::debug!(log, "client request"; + "method" => %request.method(), + "uri" => %request.url(), + "body" => ?&request.body(), + ); + }), + post_hook = (|log: &slog::Logger, result: &Result<_, _>| { + slog::debug!(log, "client response"; "result" => ?result); + }), +); diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml new file mode 100644 index 00000000000..6abe17b75f0 --- /dev/null +++ b/internal-dns/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "internal-dns" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow = "1.0" +clap = { version = "3.1", features = [ "derive" ] } +dropshot = { git = "https://github.com/oxidecomputer/dropshot" } +pretty-hex = "0.2.1" +schemars = "0.8" +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" +sled = "0.34" +slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +slog-term = "2.7" +slog-async = "2.7" +slog-envlogger = "2.2" +structopt = "0.3" +tempdir = "0.3" +tokio = { version = "1.17", features = [ "full" ] } +toml = "0.5" +trust-dns-proto = "0.21" +trust-dns-server = "0.21" + +[dev-dependencies] +expectorate = "1.0.4" +internal-dns-client = { path = "../internal-dns-client" } +omicron-test-utils = { path = "../test-utils" } +openapiv3 = "1.0" +openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } +portpicker = "0.1" +serde_json = "1.0" +subprocess = "0.2.8" +trust-dns-resolver = "0.21" diff --git a/internal-dns/src/bin/apigen.rs b/internal-dns/src/bin/apigen.rs new file mode 100644 index 00000000000..6f21201e4b0 --- /dev/null +++ b/internal-dns/src/bin/apigen.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Result}; +use std::fs::File; +use std::io; +use internal_dns::dropshot_server::api; + +fn usage(args: &Vec) -> String { + format!("{} [output path]", args[0]) +} + +fn main() -> Result<()> { + let args: Vec = std::env::args().collect(); + + let mut out = match args.len() { + 1 => Box::new(io::stdout()) as Box, + 2 => Box::new(File::create(args[1].clone())?) as Box, + _ => bail!(usage(&args)), + }; + + let api = api(); + let openapi = api.openapi("Internal DNS", "v0.1.0"); + openapi.write(&mut out)?; + Ok(()) +} diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs new file mode 100644 index 00000000000..12d4b4458f0 --- /dev/null +++ b/internal-dns/src/bin/dns-server.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See RFD 248 +// See https://github.com/oxidecomputer/omicron/issues/718 +// +// Milestones: +// - Dropshot server +// - Sqlite task +// - DNS task + +use anyhow::anyhow; +use anyhow::Context; +use clap::Parser; +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Parser, Debug)] +struct Args { + #[clap(long)] + config_file: PathBuf, +} + +#[tokio::main] +async fn main() -> Result<(), anyhow::Error> { + let args = Args::parse(); + let config_file = &args.config_file; + let config_file_contents = std::fs::read_to_string(config_file) + .with_context(|| format!("read config file {:?}", config_file))?; + let config: internal_dns::Config = toml::from_str(&config_file_contents) + .with_context(|| format!("parse config file {:?}", config_file))?; + eprintln!("{:?}", config); + + let log = + config.log.to_logger("internal-dns").context("failed to create logger")?; + + let db = Arc::new(sled::open(&config.data.storage_path)?); + + { + let db = db.clone(); + let log = log.clone(); + let config = config.dns.clone(); + + tokio::spawn( + async move { internal_dns::dns_server::run(log, db, config).await }, + ); + } + + let server = internal_dns::start_server(config, log, db).await?; + server + .await + .map_err(|error_message| anyhow!("server exiting: {}", error_message)) +} diff --git a/internal-dns/src/dns_data.rs b/internal-dns/src/dns_data.rs new file mode 100644 index 00000000000..0ddc2978365 --- /dev/null +++ b/internal-dns/src/dns_data.rs @@ -0,0 +1,356 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manages DNS data (configured zone(s), records, etc.) + +use anyhow::Context; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use slog::{error, info, o, trace}; +use std::net::Ipv6Addr; +use std::sync::Arc; + +/// Configuration related to data model +#[derive(Deserialize, Debug)] +pub struct Config { + /// maximum number of channel messages to buffer + pub nmax_messages: usize, + + /// The path for the embedded kv store + pub storage_path: String, +} + +/// default maximum number of messages to buffer +const NMAX_MESSAGES_DEFAULT: usize = 16; + +impl Default for Config { + fn default() -> Self { + Config { + nmax_messages: NMAX_MESSAGES_DEFAULT, + storage_path: ".".into(), + } + } +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +#[serde(rename = "Srv")] +pub struct SRV { + pub prio: u16, + pub weight: u16, + pub port: u16, + pub target: String, +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub enum DnsRecord { + AAAA(Ipv6Addr), + SRV(SRV), +} +#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct DnsRecordKey { + name: String, +} +#[derive(Debug)] +pub struct DnsResponse { + tx: tokio::sync::oneshot::Sender, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename = "DnsKv")] +pub struct DnsKV { + key: DnsRecordKey, + record: DnsRecord, +} + +// XXX some refactors to help +// - each variant should have its own struct containing the data. This way we +// can pass it to functions as a bundle without them having to consume the +// whole enum (which might in principle be a different variant) +// - each variant's data should include some generic responder so that we can +// have common functions for logging and sending the T +#[derive(Debug)] +pub enum DnsCmd { + // XXX + // MakeExist(DnsRecord, DnsResponse<()>), + // MakeGone(DnsRecordKey, DnsResponse<()>), + Get(Option, DnsResponse>), + Set(Vec, DnsResponse<()>), + Delete(Vec, DnsResponse<()>), +} + +/// Data model client +/// +/// The Dropshot server has one of these to send commands to modify and update +/// the data model. +pub struct Client { + log: slog::Logger, + sender: tokio::sync::mpsc::Sender, +} + +impl Client { + pub fn new( + log: slog::Logger, + config: &Config, + db: Arc, + ) -> Client { + let (sender, receiver) = + tokio::sync::mpsc::channel(config.nmax_messages); + let server = Server { + log: log.new(o!("component" => "DataServer")), + receiver, + db, + }; + tokio::spawn(async move { data_server(server).await }); + Client { log, sender } + } + + // XXX error type needs to be rich enough for appropriate HTTP response + pub async fn get_records( + &self, + key: Option, + ) -> Result, anyhow::Error> { + slog::trace!(&self.log, "get_records"; "key" => ?key); + let (tx, rx) = tokio::sync::oneshot::channel(); + self.sender + .try_send(DnsCmd::Get(key, DnsResponse { tx })) + .context("send message")?; + rx.await.context("recv response") + } + + // XXX error type needs to be rich enough for appropriate HTTP response + pub async fn set_records( + &self, + records: Vec, + ) -> Result<(), anyhow::Error> { + slog::trace!(&self.log, "set_records"; "records" => ?records); + let (tx, rx) = tokio::sync::oneshot::channel(); + self.sender + .try_send(DnsCmd::Set(records, DnsResponse { tx })) + .context("send message")?; + rx.await.context("recv response") + } + + // XXX error type needs to be rich enough for appropriate HTTP response + pub async fn delete_records( + &self, + records: Vec, + ) -> Result<(), anyhow::Error> { + slog::trace!(&self.log, "delete_records"; "records" => ?records); + let (tx, rx) = tokio::sync::oneshot::channel(); + self.sender + .try_send(DnsCmd::Delete(records, DnsResponse { tx })) + .context("send message")?; + rx.await.context("recv response") + } +} + +/// Runs the body of the data model server event loop +async fn data_server(mut server: Server) { + let log = &server.log; + loop { + trace!(log, "waiting for message"); + let msg = match server.receiver.recv().await { + None => { + info!(log, "exiting due to channel close"); + break; + } + Some(m) => m, + }; + + trace!(log, "rx message"; "message" => ?msg); + match msg { + DnsCmd::Get(key, response) => { + server.cmd_get_records(key, response).await; + } + DnsCmd::Set(records, response) => { + server.cmd_set_records(records, response).await; + } + DnsCmd::Delete(records, response) => { + server.cmd_delete_records(records, response).await; + } + } + } +} + +/// Data model server +pub struct Server { + log: slog::Logger, + receiver: tokio::sync::mpsc::Receiver, + db: Arc, +} + +impl Server { + async fn cmd_get_records( + &self, + key: Option, + response: DnsResponse>, + ) { + // If a key is provided search just for that key. Otherwise return all + // the db entries. + if let Some(key) = key { + let bits = match self.db.get(key.name.as_bytes()) { + Ok(Some(bits)) => bits, + _ => { + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + }; + let record: DnsRecord = match serde_json::from_slice(bits.as_ref()) + { + Ok(r) => r, + Err(e) => { + error!(self.log, "deserialize record: {}", e); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + }; + match response.tx.send(vec![DnsKV { key, record }]) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } else { + let mut result = Vec::new(); + let mut iter = self.db.iter(); + loop { + match iter.next() { + Some(Ok((k, v))) => { + let record: DnsRecord = + match serde_json::from_slice(v.as_ref()) { + Ok(r) => r, + Err(e) => { + error!( + self.log, + "deserialize record: {}", e + ); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!( + self.log, + "response tx: {:?}", e + ); + } + } + return; + } + }; + let key = match std::str::from_utf8(k.as_ref()) { + Ok(s) => s.to_string(), + Err(e) => { + error!(self.log, "key encoding: {}", e); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!( + self.log, + "response tx: {:?}", e + ); + } + } + return; + } + }; + result.push(DnsKV { + key: DnsRecordKey { name: key }, + record, + }); + } + Some(Err(e)) => { + error!(self.log, "db iteration error: {}", e); + break; + } + None => break, + } + } + match response.tx.send(result) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } + } + + async fn cmd_set_records( + &self, + records: Vec, + response: DnsResponse<()>, + ) { + for kv in records { + let bits = match serde_json::to_string(&kv.record) { + Ok(bits) => bits, + Err(e) => { + error!(self.log, "serialize record: {}", e); + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + }; + match self.db.insert(kv.key.name.as_bytes(), bits.as_bytes()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "db insert: {}", e); + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + } + } + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } + + async fn cmd_delete_records( + &self, + records: Vec, + response: DnsResponse<()>, + ) { + for k in records { + match self.db.remove(k.name.as_bytes()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "db delete: {}", e); + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + } + } + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } +} diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs new file mode 100644 index 00000000000..f6f5ed5209f --- /dev/null +++ b/internal-dns/src/dns_server.rs @@ -0,0 +1,185 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::io::Result; +use std::net::SocketAddr; +use std::str::FromStr; +use std::sync::Arc; + +use crate::dns_data::DnsRecord; +use pretty_hex::*; +use serde::Deserialize; +use slog::{error, Logger}; +use tokio::net::UdpSocket; +use trust_dns_proto::op::header::Header; +use trust_dns_proto::rr::rdata::SRV; +use trust_dns_proto::rr::record_data::RData; +use trust_dns_proto::rr::record_type::RecordType; +use trust_dns_proto::rr::{Name, Record}; +use trust_dns_proto::serialize::binary::{ + BinDecodable, BinDecoder, BinEncoder, +}; +use trust_dns_server::authority::{MessageRequest, MessageResponseBuilder}; + +/// Configuration related to the DNS server +#[derive(Deserialize, Debug, Clone)] +pub struct Config { + /// The address to listen for DNS requests on + pub bind_address: String, +} + +pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { + let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); + + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + + tokio::spawn( + async move { handle_req(log, db, socket, src, buf).await }, + ); + } +} + +async fn handle_req<'a, 'b, 'c>( + log: Logger, + db: Arc, + socket: Arc, + src: SocketAddr, + buf: Vec, +) { + println!("{:?}", buf.hex_dump()); + + let mut dec = BinDecoder::new(&buf); + let mr = match MessageRequest::read(&mut dec) { + Ok(mr) => mr, + Err(e) => { + error!(log, "read message: {}", e); + return; + } + }; + + println!("{:#?}", mr); + + let rb = MessageResponseBuilder::from_message_request(&mr); + let header = Header::response_from_request(mr.header()); + + let name = mr.query().original().name().clone(); + let key = name.to_string(); + let key = key.trim_end_matches('.'); + + let bits = match db.get(key.as_bytes()) { + Ok(Some(bits)) => bits, + Err(e) => { + error!(log, "db get: {}", e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + _ => { + nack(&log, &mr, &socket, &header, &src).await; + return; + } + }; + + let record: crate::dns_data::DnsRecord = + match serde_json::from_slice(bits.as_ref()) { + Ok(r) => r, + Err(e) => { + error!(log, "deserialize record: {}", e); + return; + } + }; + + match record { + DnsRecord::AAAA(addr) => { + let mut aaaa = Record::new(); + aaaa.set_name(name) + .set_rr_type(RecordType::AAAA) + .set_data(Some(RData::AAAA(addr))); + + let mresp = rb.build(header, vec![&aaaa], vec![], vec![], vec![]); + + let mut resp_data = Vec::new(); + let mut enc = BinEncoder::new(&mut resp_data); + match mresp.destructive_emit(&mut enc) { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + } + match socket.send_to(&resp_data, &src).await { + Ok(_) => {} + Err(e) => { + error!(log, "send: {}", e); + } + } + } + DnsRecord::SRV(crate::dns_data::SRV { prio, weight, port, target }) => { + let mut srv = Record::new(); + let tgt = match Name::from_str(&target) { + Ok(tgt) => tgt, + Err(e) => { + error!(log, "srv target: '{}' {}", target, e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + }; + srv.set_name(name) + .set_rr_type(RecordType::SRV) + .set_data(Some(RData::SRV(SRV::new(prio, weight, port, tgt)))); + + let mresp = rb.build(header, vec![&srv], vec![], vec![], vec![]); + + let mut resp_data = Vec::new(); + let mut enc = BinEncoder::new(&mut resp_data); + match mresp.destructive_emit(&mut enc) { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + } + match socket.send_to(&resp_data, &src).await { + Ok(_) => {} + Err(e) => { + error!(log, "send: {}", e); + } + } + } + }; +} + +async fn nack( + log: &Logger, + mr: &MessageRequest, + socket: &UdpSocket, + header: &Header, + src: &SocketAddr, +) { + let rb = MessageResponseBuilder::from_message_request(mr); + let mresp = rb.build_no_records(*header); + let mut resp_data = Vec::new(); + let mut enc = BinEncoder::new(&mut resp_data); + match mresp.destructive_emit(&mut enc) { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + return; + } + } + match socket.send_to(&resp_data, &src).await { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + } + } +} diff --git a/internal-dns/src/dropshot_server.rs b/internal-dns/src/dropshot_server.rs new file mode 100644 index 00000000000..51d40e5053e --- /dev/null +++ b/internal-dns/src/dropshot_server.rs @@ -0,0 +1,73 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Dropshot server for configuring DNS namespace + +use crate::dns_data::{self, DnsKV, DnsRecordKey}; +use dropshot::endpoint; +use std::sync::Arc; + +pub struct Context { + client: dns_data::Client, +} + +impl Context { + pub fn new(client: dns_data::Client) -> Context { + Context { client } + } +} + +pub fn api() -> dropshot::ApiDescription> { + let mut api = dropshot::ApiDescription::new(); + + api.register(dns_records_get).expect("register dns_records_get"); + api.register(dns_records_set).expect("register dns_records_set"); + api.register(dns_records_delete).expect("register dns_records_delete"); + api +} + +#[endpoint( + method = GET, + path = "/get-records", +)] +async fn dns_records_get( + rqctx: Arc>>, +) -> Result>, dropshot::HttpError> { + let apictx = rqctx.context(); + // XXX record key + let records = apictx.client.get_records(None).await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!("uh oh: {:?}", e)) + })?; + Ok(dropshot::HttpResponseOk(records)) +} + +#[endpoint( + method = PUT, + path = "/set-records", +)] +async fn dns_records_set( + rqctx: Arc>>, + rq: dropshot::TypedBody>, +) -> Result, dropshot::HttpError> { + let apictx = rqctx.context(); + apictx.client.set_records(rq.into_inner()).await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!("uh oh: {:?}", e)) + })?; + Ok(dropshot::HttpResponseOk(())) +} + +#[endpoint( + method = PUT, + path = "/delete-records", +)] +async fn dns_records_delete( + rqctx: Arc>>, + rq: dropshot::TypedBody>, +) -> Result, dropshot::HttpError> { + let apictx = rqctx.context(); + apictx.client.delete_records(rq.into_inner()).await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!("uh oh: {:?}", e)) + })?; + Ok(dropshot::HttpResponseOk(())) +} diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs new file mode 100644 index 00000000000..d94684d75e5 --- /dev/null +++ b/internal-dns/src/lib.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(clippy::type_complexity)] +#![allow(clippy::ptr_arg)] + +use anyhow::anyhow; +use serde::Deserialize; +use std::sync::Arc; + +pub mod dns_data; +pub mod dns_server; +pub mod dropshot_server; + +#[derive(Deserialize, Debug)] +pub struct Config { + pub log: dropshot::ConfigLogging, + pub dropshot: dropshot::ConfigDropshot, + pub data: dns_data::Config, + pub dns: dns_server::Config, +} + +pub async fn start_server( + config: Config, + log: slog::Logger, + db: Arc, +) -> Result>, anyhow::Error> +{ + let data_client = dns_data::Client::new( + log.new(slog::o!("component" => "DataClient")), + &config.data, + db, + ); + + let api = dropshot_server::api(); + let api_context = Arc::new(dropshot_server::Context::new(data_client)); + + Ok(dropshot::HttpServerStarter::new( + &config.dropshot, + api, + api_context, + &log, + ) + .map_err(|e| anyhow!("{}", e))? + .start()) +} diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs new file mode 100644 index 00000000000..0363a696e6f --- /dev/null +++ b/internal-dns/tests/basic_test.rs @@ -0,0 +1,188 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use std::net::Ipv6Addr; +use internal_dns_client::{ + types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, + Client, +}; +use trust_dns_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use trust_dns_resolver::TokioAsyncResolver; + +#[tokio::test] +pub async fn aaaa_crud() -> Result<(), anyhow::Error> { + let (client, resolver) = init_client_server().await?; + + // records should initially be empty + let records = client.dns_records_get().await?; + assert!(records.is_empty()); + + // add an aaaa record + let name = DnsRecordKey { name: "devron.system".into() }; + let addr = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1); + let aaaa = DnsRecord::Aaaa(addr); + client + .dns_records_set(&vec![DnsKv { + key: name.clone(), + record: aaaa.clone(), + }]) + .await?; + + // read back the aaaa record + let records = client.dns_records_get().await?; + assert_eq!(1, records.len()); + assert_eq!(records[0].key.name, name.name); + match records[0].record { + DnsRecord::Aaaa(ra) => { + assert_eq!(ra, addr); + } + _ => { + panic!("expected aaaa record") + } + } + + // resolve the name + let response = resolver.lookup_ip(name.name + ".").await?; + let address = response.iter().next().expect("no addresses returned!"); + assert_eq!(address, addr); + + Ok(()) +} + +#[tokio::test] +pub async fn srv_crud() -> Result<(), anyhow::Error> { + let (client, resolver) = init_client_server().await?; + + // records should initially be empty + let records = client.dns_records_get().await?; + assert!(records.is_empty()); + + // add a srv record + let name = DnsRecordKey { name: "hromi.cluster".into() }; + let srv = + Srv { prio: 47, weight: 74, port: 99, target: "outpost47".into() }; + let rec = DnsRecord::Srv(srv.clone()); + client + .dns_records_set(&vec![DnsKv { + key: name.clone(), + record: rec.clone(), + }]) + .await?; + + // read back the srv record + let records = client.dns_records_get().await?; + assert_eq!(1, records.len()); + assert_eq!(records[0].key.name, name.name); + match records[0].record { + DnsRecord::Srv(ref rs) => { + assert_eq!(rs.prio, srv.prio); + assert_eq!(rs.weight, srv.weight); + assert_eq!(rs.port, srv.port); + assert_eq!(rs.target, srv.target); + } + _ => { + panic!("expected srv record") + } + } + + // resolve the srv + let response = resolver.srv_lookup(name.name).await?; + let srvr = response.iter().next().expect("no addresses returned!"); + assert_eq!(srvr.priority(), srv.prio); + assert_eq!(srvr.weight(), srv.weight); + assert_eq!(srvr.port(), srv.port); + assert_eq!(srvr.target().to_string(), srv.target + "."); + + Ok(()) +} + +async fn init_client_server( +) -> Result<(Client, TokioAsyncResolver), anyhow::Error> { + // initialize dns server config + let (config, dropshot_port, dns_port) = test_config()?; + let log = + config.log.to_logger("internal-dns").context("failed to create logger")?; + + // initialize dns server db + let db = Arc::new(sled::open(&config.data.storage_path)?); + db.clear()?; + + let client = Client::new( + &format!("http://127.0.0.1:{}", dropshot_port), + log.clone(), + ); + + let mut rc = ResolverConfig::new(); + rc.add_name_server(NameServerConfig { + socket_addr: SocketAddr::V4(SocketAddrV4::new( + Ipv4Addr::new(127, 0, 0, 1), + dns_port, + )), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + + let resolver = + TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); + + // launch a dns server + { + let db = db.clone(); + let log = log.clone(); + let config = config.dns.clone(); + + tokio::spawn( + async move { internal_dns::dns_server::run(log, db, config).await }, + ); + } + + // launch a dropshot server + tokio::spawn(async move { + let server = internal_dns::start_server(config, log, db).await?; + server.await.map_err(|error_message| { + anyhow!("server exiting: {}", error_message) + }) + }); + + // wait for server to start + tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; + + Ok((client, resolver)) +} + +fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { + let dropshot_port = portpicker::pick_unused_port().expect("pick port"); + let dns_port = portpicker::pick_unused_port().expect("pick port"); + let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; + let mut storage_path = tmp_dir.path().to_path_buf(); + storage_path.push("test"); + let storage_path = storage_path.to_str().unwrap().into(); + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: format!("127.0.0.1:{}", dropshot_port) + .parse() + .unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { nmax_messages: 16, storage_path }, + dns: internal_dns::dns_server::Config { + bind_address: format!("127.0.0.1:{}", dns_port).parse().unwrap(), + }, + }; + + Ok((config, dropshot_port, dns_port)) +} diff --git a/internal-dns/tests/openapi_test.rs b/internal-dns/tests/openapi_test.rs new file mode 100644 index 00000000000..3d6e6d56386 --- /dev/null +++ b/internal-dns/tests/openapi_test.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use expectorate::assert_contents; +use subprocess::Exec; +use omicron_test_utils::dev::test_cmds::assert_exit_code; +use omicron_test_utils::dev::test_cmds::path_to_executable; +use omicron_test_utils::dev::test_cmds::run_command; +use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; +use openapiv3::OpenAPI; + +const CMD_API_GEN: &str = env!("CARGO_BIN_EXE_apigen"); + +#[test] +fn test_internal_dns_openapi() { + let exec = Exec::cmd(path_to_executable(CMD_API_GEN)); + let (exit_status, stdout, _) = run_command(exec); + assert_exit_code(exit_status, EXIT_SUCCESS); + + let spec: OpenAPI = serde_json::from_str(&stdout) + .expect("stdout was not valid OpenAPI"); + let errors = openapi_lint::validate(&spec); + assert!(errors.is_empty(), "{}", errors.join("\n\n")); + + assert_contents("../openapi/internal-dns.json", &stdout); +} diff --git a/openapi/internal-dns.json b/openapi/internal-dns.json new file mode 100644 index 00000000000..708983bd9cd --- /dev/null +++ b/openapi/internal-dns.json @@ -0,0 +1,237 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Internal DNS", + "version": "v0.1.0" + }, + "paths": { + "/delete-records": { + "put": { + "operationId": "dns_records_delete", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_DnsRecordKey", + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsRecordKey" + } + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Null", + "type": "string", + "enum": [ + null + ] + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/get-records": { + "get": { + "operationId": "dns_records_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_DnsKv", + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsKv" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/set-records": { + "put": { + "operationId": "dns_records_set", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_DnsKv", + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsKv" + } + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Null", + "type": "string", + "enum": [ + null + ] + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + }, + "schemas": { + "DnsKv": { + "type": "object", + "properties": { + "key": { + "$ref": "#/components/schemas/DnsRecordKey" + }, + "record": { + "$ref": "#/components/schemas/DnsRecord" + } + }, + "required": [ + "key", + "record" + ] + }, + "DnsRecord": { + "oneOf": [ + { + "type": "object", + "properties": { + "AAAA": { + "type": "string", + "format": "ipv6" + } + }, + "required": [ + "AAAA" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "SRV": { + "$ref": "#/components/schemas/Srv" + } + }, + "required": [ + "SRV" + ], + "additionalProperties": false + } + ] + }, + "DnsRecordKey": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + }, + "Srv": { + "type": "object", + "properties": { + "port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "prio": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "target": { + "type": "string" + }, + "weight": { + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "port", + "prio", + "target", + "weight" + ] + } + } + } +} \ No newline at end of file From 1a15d6c5a67a96c1ac8869b81d570b43b402f9c2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 22 Mar 2022 19:26:31 -0400 Subject: [PATCH 02/61] fmt --- internal-dns/src/bin/apigen.rs | 2 +- internal-dns/src/bin/dns-server.rs | 12 +++++++----- internal-dns/tests/basic_test.rs | 19 ++++++++++++------- internal-dns/tests/openapi_test.rs | 6 +++--- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/internal-dns/src/bin/apigen.rs b/internal-dns/src/bin/apigen.rs index 6f21201e4b0..095291c9571 100644 --- a/internal-dns/src/bin/apigen.rs +++ b/internal-dns/src/bin/apigen.rs @@ -3,9 +3,9 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use anyhow::{bail, Result}; +use internal_dns::dropshot_server::api; use std::fs::File; use std::io; -use internal_dns::dropshot_server::api; fn usage(args: &Vec) -> String { format!("{} [output path]", args[0]) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 12d4b4458f0..505a42a7dc0 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -32,8 +32,10 @@ async fn main() -> Result<(), anyhow::Error> { .with_context(|| format!("parse config file {:?}", config_file))?; eprintln!("{:?}", config); - let log = - config.log.to_logger("internal-dns").context("failed to create logger")?; + let log = config + .log + .to_logger("internal-dns") + .context("failed to create logger")?; let db = Arc::new(sled::open(&config.data.storage_path)?); @@ -42,9 +44,9 @@ async fn main() -> Result<(), anyhow::Error> { let log = log.clone(); let config = config.dns.clone(); - tokio::spawn( - async move { internal_dns::dns_server::run(log, db, config).await }, - ); + tokio::spawn(async move { + internal_dns::dns_server::run(log, db, config).await + }); } let server = internal_dns::start_server(config, log, db).await?; diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 0363a696e6f..24e5b11744e 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -6,11 +6,11 @@ use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; -use std::net::Ipv6Addr; use internal_dns_client::{ types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, Client, }; +use std::net::Ipv6Addr; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; @@ -107,8 +107,10 @@ async fn init_client_server( ) -> Result<(Client, TokioAsyncResolver), anyhow::Error> { // initialize dns server config let (config, dropshot_port, dns_port) = test_config()?; - let log = - config.log.to_logger("internal-dns").context("failed to create logger")?; + let log = config + .log + .to_logger("internal-dns") + .context("failed to create logger")?; // initialize dns server db let db = Arc::new(sled::open(&config.data.storage_path)?); @@ -140,9 +142,9 @@ async fn init_client_server( let log = log.clone(); let config = config.dns.clone(); - tokio::spawn( - async move { internal_dns::dns_server::run(log, db, config).await }, - ); + tokio::spawn(async move { + internal_dns::dns_server::run(log, db, config).await + }); } // launch a dropshot server @@ -178,7 +180,10 @@ fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { request_body_max_bytes: 1024, ..Default::default() }, - data: internal_dns::dns_data::Config { nmax_messages: 16, storage_path }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path, + }, dns: internal_dns::dns_server::Config { bind_address: format!("127.0.0.1:{}", dns_port).parse().unwrap(), }, diff --git a/internal-dns/tests/openapi_test.rs b/internal-dns/tests/openapi_test.rs index 3d6e6d56386..cf4cd7ff83f 100644 --- a/internal-dns/tests/openapi_test.rs +++ b/internal-dns/tests/openapi_test.rs @@ -3,12 +3,12 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use expectorate::assert_contents; -use subprocess::Exec; use omicron_test_utils::dev::test_cmds::assert_exit_code; use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use openapiv3::OpenAPI; +use subprocess::Exec; const CMD_API_GEN: &str = env!("CARGO_BIN_EXE_apigen"); @@ -18,8 +18,8 @@ fn test_internal_dns_openapi() { let (exit_status, stdout, _) = run_command(exec); assert_exit_code(exit_status, EXIT_SUCCESS); - let spec: OpenAPI = serde_json::from_str(&stdout) - .expect("stdout was not valid OpenAPI"); + let spec: OpenAPI = + serde_json::from_str(&stdout).expect("stdout was not valid OpenAPI"); let errors = openapi_lint::validate(&spec); assert!(errors.is_empty(), "{}", errors.join("\n\n")); From 4faef91d1d8dc44a08b855878978d880304b9595 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 23 Mar 2022 16:17:40 -0400 Subject: [PATCH 03/61] wip --- package-manifest.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/package-manifest.toml b/package-manifest.toml index b16eaf4100f..8aa6c9cd253 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -55,6 +55,15 @@ to = "/opt/oxide/cockroachdb/sql" from = "smf/cockroachdb" to = "/var/svc/manifest/site/cockroachdb" +[package.internal-dns] +rust.binary_names = ["dns-server"] +rust.release = true +service_name = "internal-dns" +zone = true +[[package.internal-dns.paths]] +from = "smf/internal-dns" +to = "/var/svc/manifest/site/internal-dns" + # Packages not built within Omicron, but which must be imported. # Refer to From 8f373bd71f966f6d3218370646dbd91dbf4fc006 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 24 Mar 2022 11:02:14 -0400 Subject: [PATCH 04/61] Added dnsadm --- Cargo.lock | 7 ++ Cargo.toml | 2 + internal-dns-client/Cargo.toml | 7 ++ internal-dns-client/src/bin/dnsadm.rs | 117 ++++++++++++++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 internal-dns-client/src/bin/dnsadm.rs diff --git a/Cargo.lock b/Cargo.lock index 7f2568b5357..67c3d4ef061 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1701,11 +1701,18 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "anyhow", + "clap 3.1.6", "progenitor", "reqwest", "serde", "serde_json", "slog", + "slog-async", + "slog-envlogger", + "slog-term", + "structopt", + "tokio", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4dd7b1dbff3..9e8055be407 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,8 @@ default-members = [ "gateway", "gateway-client", "gateway-messages", + "internal-dns", + "internal-dns-client", "nexus", "nexus/src/db/db-macros", "package", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index af67e13d716..70af97d5ac4 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,8 +5,15 @@ edition = "2021" license = "MPL-2.0" [dependencies] +anyhow = "1.0" +clap = { version = "3.1", features = [ "derive" ] } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +slog-term = "2.7" +slog-async = "2.7" +slog-envlogger = "2.2" +structopt = "0.3" +tokio = { version = "1.17", features = [ "full" ] } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/bin/dnsadm.rs b/internal-dns-client/src/bin/dnsadm.rs new file mode 100644 index 00000000000..1c2d9a876fa --- /dev/null +++ b/internal-dns-client/src/bin/dnsadm.rs @@ -0,0 +1,117 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Result; +use internal_dns_client::{ + types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, + Client, +}; +use slog::{Drain, Logger}; +use std::net::Ipv6Addr; +use structopt::{clap::AppSettings::*, StructOpt}; + +#[derive(Debug, StructOpt)] +#[structopt( + name = "dnsadm", + about = "Administer DNS records", + global_setting(ColorAuto), + global_setting(ColoredHelp) +)] +struct Opt { + #[structopt(short, long)] + address: Option, + + #[structopt(short, long)] + port: Option, + + #[structopt(subcommand)] + subcommand: SubCommand, +} + +#[derive(Debug, StructOpt)] +enum SubCommand { + ListRecords, + AddAAAA(AddAAAACommand), + AddSRV(AddSRVCommand), + DeleteRecord(DeleteRecordCommand), +} + +#[derive(Debug, StructOpt)] +struct AddAAAACommand { + name: String, + addr: Ipv6Addr, +} + +#[derive(Debug, StructOpt)] +struct AddSRVCommand { + name: String, + prio: u16, + weight: u16, + port: u16, + target: String, +} + +#[derive(Debug, StructOpt)] +struct DeleteRecordCommand { + name: String, +} + +#[tokio::main] +async fn main() -> Result<()> { + let opt = Opt::from_args(); + let log = init_logger(); + + let addr = match opt.address { + Some(a) => a, + None => "localhost".into(), + }; + let port = opt.port.unwrap_or(5353); + + let endpoint = format!("http://{}:{}", addr, port); + let client = Client::new(&endpoint, log.clone()); + + let opt = Opt::from_args(); + match opt.subcommand { + SubCommand::ListRecords => { + let records = client.dns_records_get().await?; + println!("{:#?}", records); + } + SubCommand::AddAAAA(cmd) => { + client + .dns_records_set(&vec![DnsKv { + key: DnsRecordKey { name: cmd.name }, + record: DnsRecord::Aaaa(cmd.addr), + }]) + .await?; + } + SubCommand::AddSRV(cmd) => { + client + .dns_records_set(&vec![DnsKv { + key: DnsRecordKey { name: cmd.name }, + record: DnsRecord::Srv(Srv { + prio: cmd.prio, + weight: cmd.weight, + port: cmd.port, + target: cmd.target, + }), + }]) + .await?; + } + SubCommand::DeleteRecord(cmd) => { + client + .dns_records_delete(&vec![DnsRecordKey { name: cmd.name }]) + .await?; + } + } + + Ok(()) +} + +fn init_logger() -> Logger { + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_envlogger::new(drain).fuse(); + let drain = slog_async::Async::new(drain).chan_size(0x2000).build().fuse(); + slog::Logger::root(drain, slog::o!()) +} From a575e42548e1f0c94295dcfa4a714de7c4276464 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 24 Mar 2022 13:42:45 -0400 Subject: [PATCH 05/61] Add internal-dns SMF config, start it by RSS --- smf/internal-dns/config.toml | 22 +++++++++++++++++++++ smf/internal-dns/manifest.xml | 36 ++++++++++++++++++++++++++++++++++ smf/sled-agent/config-rss.toml | 4 ++++ 3 files changed, 62 insertions(+) create mode 100644 smf/internal-dns/config.toml create mode 100644 smf/internal-dns/manifest.xml diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml new file mode 100644 index 00000000000..83b70b0380d --- /dev/null +++ b/smf/internal-dns/config.toml @@ -0,0 +1,22 @@ +[dropshot] +bind_address = "[fd00:1234::4]:5353" +request_body_max_bytes = 1048576 + +[dns] +bind_address = "[fd00:1234::4]:4753" + +[log] +# Show log messages of this level and more severe +level = "debug" + +# Example output to a terminal (with colors) +mode = "stderr-terminal" + +# Example output to a file, appending if it already exists. +#mode = "file" +#path = "logs/server.log" +#if_exists = "append" + +[data] +nmax_messages = 16 +storage_path = "/var/tmp/oxide/dns" diff --git a/smf/internal-dns/manifest.xml b/smf/internal-dns/manifest.xml new file mode 100644 index 00000000000..25b03434c34 --- /dev/null +++ b/smf/internal-dns/manifest.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 8c3e5d976b4..87af8fb0e07 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -38,3 +38,7 @@ addresses = [ "[fd00:1234::7]:12221", "192.168.1.123:12220" ] [[request.service]] name = "oximeter" addresses = [ "[fd00:1234::6]:12223" ] + +[[request.service]] +name = "internal-dns" +addresses = [ "[fd00:1234::4]:5353" ] From a52e4b60b3d427cb848755f8d188ae7655a11793 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 25 Mar 2022 11:58:11 -0400 Subject: [PATCH 06/61] review feedback --- Cargo.lock | 6 ------ internal-dns-client/Cargo.toml | 6 ------ internal-dns/Cargo.toml | 2 +- {internal-dns-client => internal-dns}/src/bin/dnsadm.rs | 0 package-manifest.toml | 2 +- smf/internal-dns/config.toml | 4 ++-- 6 files changed, 4 insertions(+), 16 deletions(-) rename {internal-dns-client => internal-dns}/src/bin/dnsadm.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 67c3d4ef061..0b6e0d8f30f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1701,18 +1701,12 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ - "anyhow", - "clap 3.1.6", "progenitor", "reqwest", "serde", "serde_json", "slog", - "slog-async", - "slog-envlogger", - "slog-term", "structopt", - "tokio", ] [[package]] diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 70af97d5ac4..22e28c91bc9 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,15 +5,9 @@ edition = "2021" license = "MPL-2.0" [dependencies] -anyhow = "1.0" -clap = { version = "3.1", features = [ "derive" ] } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } -slog-term = "2.7" -slog-async = "2.7" -slog-envlogger = "2.2" structopt = "0.3" -tokio = { version = "1.17", features = [ "full" ] } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 6abe17b75f0..ce9cf5b24a4 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" anyhow = "1.0" clap = { version = "3.1", features = [ "derive" ] } dropshot = { git = "https://github.com/oxidecomputer/dropshot" } +internal-dns-client = { path = "../internal-dns-client" } pretty-hex = "0.2.1" schemars = "0.8" serde = { version = "1.0", features = [ "derive" ] } @@ -26,7 +27,6 @@ trust-dns-server = "0.21" [dev-dependencies] expectorate = "1.0.4" -internal-dns-client = { path = "../internal-dns-client" } omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } diff --git a/internal-dns-client/src/bin/dnsadm.rs b/internal-dns/src/bin/dnsadm.rs similarity index 100% rename from internal-dns-client/src/bin/dnsadm.rs rename to internal-dns/src/bin/dnsadm.rs diff --git a/package-manifest.toml b/package-manifest.toml index 3d3f6af2d94..f48043cf0a3 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -56,7 +56,7 @@ from = "smf/cockroachdb" to = "/var/svc/manifest/site/cockroachdb" [package.internal-dns] -rust.binary_names = ["dns-server"] +rust.binary_names = ["dnsadm", "dns-server"] rust.release = true service_name = "internal-dns" zone = true diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 83b70b0380d..a52f73c1837 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -3,11 +3,11 @@ bind_address = "[fd00:1234::4]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1234::4]:4753" +bind_address = "[fd00:1234::4]:53" [log] # Show log messages of this level and more severe -level = "debug" +level = "info" # Example output to a terminal (with colors) mode = "stderr-terminal" From a9840d6d9314ef0d6a9bc314d9ad73532365796d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 20 Apr 2022 10:07:10 -0400 Subject: [PATCH 07/61] Patch addresses --- smf/internal-dns/config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index a52f73c1837..b4fbf3b8b1d 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,9 +1,9 @@ [dropshot] -bind_address = "[fd00:1234::4]:5353" +bind_address = "[fd00:1122:3344::9]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1234::4]:53" +bind_address = "[fd00:1122:3344::9]:53" [log] # Show log messages of this level and more severe From 437d699d9e2c2ed7c8f9cbca27260ae9f5807dfd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 20 Apr 2022 10:15:26 -0400 Subject: [PATCH 08/61] Updated cfg path --- common/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 27ee00671ac..c1c8a54f870 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -52,4 +52,4 @@ macro_rules! generate_logging_api { /// /// NOTE: Be careful when modifying this path - the installation tools will /// **remove the entire directory** to re-install/uninstall the system. -pub const OMICRON_CONFIG_PATH: &'static str = "/var/tmp/oxide"; +pub const OMICRON_CONFIG_PATH: &'static str = "/var/oxide"; From 4dc45ff3abdb5c5216b896cd68847456ed15c74f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 20 Apr 2022 11:44:00 -0400 Subject: [PATCH 09/61] patch addresses --- smf/internal-dns/config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index b4fbf3b8b1d..2f407c76e79 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,9 +1,9 @@ [dropshot] -bind_address = "[fd00:1122:3344::9]:5353" +bind_address = "[fd00:1122:3344:1::9]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1122:3344::9]:53" +bind_address = "[fd00:1122:3344:1::9]:53" [log] # Show log messages of this level and more severe From 528204d228d5006445c597da38d974310f65699a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 21 Apr 2022 17:35:34 -0400 Subject: [PATCH 10/61] Add support for 'make GZ address', add DNS addrs --- common/src/address.rs | 105 +++++++++++++++++++++++++++ common/src/lib.rs | 1 + docs/how-to-run.adoc | 20 ++--- openapi/sled-agent.json | 7 ++ sled-agent/src/bootstrap/agent.rs | 2 +- sled-agent/src/config.rs | 13 +--- sled-agent/src/illumos/mod.rs | 9 ++- sled-agent/src/illumos/zone.rs | 2 +- sled-agent/src/params.rs | 9 ++- sled-agent/src/rack_setup/config.rs | 26 +++++-- sled-agent/src/rack_setup/service.rs | 84 ++++++++++++++++++--- sled-agent/src/services.rs | 82 +++++++++++++-------- smf/internal-dns/config.toml | 6 +- smf/nexus/config.toml | 8 +- smf/oximeter/config.toml | 6 +- smf/sled-agent/config-rss.toml | 26 +++---- smf/sled-agent/config.toml | 2 +- 17 files changed, 308 insertions(+), 100 deletions(-) create mode 100644 common/src/address.rs diff --git a/common/src/address.rs b/common/src/address.rs new file mode 100644 index 00000000000..4e145149b46 --- /dev/null +++ b/common/src/address.rs @@ -0,0 +1,105 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common IP addressing functionality. +//! +//! This addressing functionality is shared by both initialization services +//! and Nexus, who need to agree upon addressing schemes. + +use std::net::{Ipv6Addr, SocketAddrV6}; +use serde::{Serialize, Deserialize}; +use ipnetwork::Ipv6Network; + +pub const AZ_PREFIX: u8 = 48; +pub const RACK_PREFIX: u8 = 56; +pub const SLED_PREFIX: u8 = 64; + +/// The amount of redundancy for DNS servers. +/// +/// Must be less than MAX_DNS_REDUNDANCY. +pub const DNS_REDUNDANCY: usize = 1; +/// The maximum amount of redundancy for DNS servers. +/// +/// This determines the number of addresses which are +/// reserved for DNS servers. +pub const MAX_DNS_REDUNDANCY: usize = 5; + +pub const DNS_SERVER_PORT: u16 = 5353; +pub const SLED_AGENT_PORT: u16 = 12345; + +/// Represents a subnet which may be used for contacting DNS services. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct DnsSubnet { + network: Ipv6Network, +} + +impl DnsSubnet { + /// Returns the DNS server address within the subnet. + /// + /// This is the first address within the subnet. + pub fn dns_address(&self) -> SocketAddrV6 { + let mut iter = self.network.iter(); + let _anycast_ip = iter.next().unwrap(); + let dns_ip = iter.next().unwrap(); + SocketAddrV6::new(dns_ip, DNS_SERVER_PORT, 0, 0) + } + + /// Returns the address which the Global Zone should create + /// to be able to contact the DNS server. + /// + /// This is the second address within the subnet. + pub fn gz_address(&self) -> Ipv6Network { + let mut iter = self.network.iter(); + let _anycast_ip = iter.next().unwrap(); + let _dns_ip = iter.next().unwrap(); + Ipv6Network::new(iter.next().unwrap(), SLED_PREFIX).unwrap() + } +} + +/// Given a particular rack subnet, return the DNS addresses. +/// +/// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the +/// [`RACK_PREFIX`] subnet. +pub fn get_dns_subnets(reserved_rack_subnet: Ipv6Network) -> Vec { + assert_eq!(reserved_rack_subnet.prefix(), RACK_PREFIX); + + let mut iter = reserved_rack_subnet.iter(); + let _anycast_ip = iter.next().unwrap(); + + (0..DNS_REDUNDANCY).map(|idx| { + let network = get_64_subnet( + reserved_rack_subnet, + u8::try_from(idx + 1).unwrap() + ); + + DnsSubnet { + network + } + }).collect() +} + +/// Return the sled agent address for a subnet. +/// +/// This address will come from the first address of the [`SLED_PREFIX`] subnet. +pub fn get_sled_address(sled_subnet: Ipv6Network) -> SocketAddrV6 { + assert_eq!(sled_subnet.prefix(), SLED_PREFIX); + + let mut iter = sled_subnet.iter(); + let _anycast_ip = iter.next().unwrap(); + let sled_agent_ip = iter.next().unwrap(); + SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) +} + +/// Returns a sled subnet within a rack subnet. +/// +/// The subnet at index == 0 is used for rack-local services. +pub fn get_64_subnet(rack_subnet: Ipv6Network, index: u8) -> Ipv6Network { + assert_eq!(rack_subnet.prefix(), RACK_PREFIX); + + let mut rack_network = rack_subnet.network().octets(); + + // To set bits distinguishing the /64 from the /56, we modify the 7th octet. + rack_network[7] = index; + Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() +} diff --git a/common/src/lib.rs b/common/src/lib.rs index c1c8a54f870..2a933283425 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -20,6 +20,7 @@ // TODO(#32): Remove this exception once resolved. #![allow(clippy::field_reassign_with_default)] +pub mod address; pub mod api; pub mod backoff; pub mod cmd; diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 663c3bae52e..aab25ddc434 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -119,16 +119,16 @@ unique local addresses in the subnet of the first Sled Agent: `fd00:1122:3344:1: |=================================================================================================== | Service | Endpoint | Sled Agent: Bootstrap | Derived from MAC address of physical data link. -| Sled Agent: Dropshot API | `[fd00:1122:3344:1::1]:12345` -| Cockroach DB | `[fd00:1122:3344:1::2]:32221` -| Nexus: External API | `[fd00:1122:3344:1::3]:12220` -| Nexus: Internal API | `[fd00:1122:3344:1::3]:12221` -| Oximeter | `[fd00:1122:3344:1::4]:12223` -| Clickhouse | `[fd00:1122:3344:1::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:1::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:1::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:1::8]:32345` -| Internal DNS | `[fd00:1122:3344:1::9]:5353` +| Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` +| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` +| Nexus: External API | `[fd00:1122:3344:0101::3]:12220` +| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` +| Oximeter | `[fd00:1122:3344:0101::4]:12223` +| Clickhouse | `[fd00:1122:3344:0101::5]:8123` +| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` +| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` +| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` +| Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` |=================================================================================================== Note that Sled Agent runs in the global zone and is the one responsible for bringing up all the other diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 36e51a24083..dca641e06d3 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -968,6 +968,13 @@ "type": "string" } }, + "gz_addresses": { + "type": "array", + "items": { + "type": "string", + "format": "ipv6" + } + }, "name": { "type": "string" } diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 8e3ed304b94..8e57cc9b577 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -182,7 +182,7 @@ impl Agent { info!(&self.log, "Loading Sled Agent: {:?}", request); let sled_address = - crate::config::get_sled_address(*request.subnet.as_ref()); + omicron_common::address::get_sled_address(request.subnet.as_ref().0); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index b02fbdabcd9..ca332b2c07c 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -8,22 +8,11 @@ use crate::common::vlan::VlanID; use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; -use omicron_common::api::external::Ipv6Net; use serde::Deserialize; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::SocketAddr; use std::path::Path; use uuid::Uuid; -pub const SLED_AGENT_PORT: u16 = 12345; - -/// Given a subnet, return the sled agent address. -pub(crate) fn get_sled_address(subnet: Ipv6Net) -> SocketAddrV6 { - let mut iter = subnet.iter(); - let _anycast_ip = iter.next().unwrap(); - let sled_agent_ip = iter.next().unwrap(); - SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) -} - /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index c155f9d7a78..42df4da26a5 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -23,9 +23,13 @@ pub enum ExecutionError { ExecutionStart(std::io::Error), #[error( - "Command executed and failed with status: {status}. Output: {stderr}" + "Command [{command}] executed and failed with status: {status}. Output: {stderr}" )] - CommandFailure { status: std::process::ExitStatus, stderr: String }, + CommandFailure { + command: String, + status: std::process::ExitStatus, + stderr: String, + }, } // We wrap this method in an inner module to make it possible to mock @@ -44,6 +48,7 @@ mod inner { if !output.status.success() { return Err(ExecutionError::CommandFailure { + command: command.get_args().map(|s| s.to_string_lossy().into()).collect::>().join(" "), status: output.status, stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index ceb7cb5c57c..53cabff6a04 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -479,7 +479,7 @@ impl Zones { Self::ensure_address( None, &gz_link_local_addrobj.on_same_interface(name)?, - AddressRequest::new_static(IpAddr::V6(address), Some(64)), + AddressRequest::new_static(IpAddr::V6(address), Some(omicron_common::address::SLED_PREFIX)), )?; Ok(()) } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 5fe493eccea..553954e5c7f 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,7 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::SocketAddr; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; /// Used to request a Disk state change @@ -233,7 +233,11 @@ pub struct ServiceRequest { // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. - pub addresses: Vec, + pub addresses: Vec, + // The addresses in the global zone which should be created, if necessary + // to route to the service. + #[serde(default)] + pub gz_addresses: Vec, } impl From for sled_agent_client::types::ServiceRequest { @@ -241,6 +245,7 @@ impl From for sled_agent_client::types::ServiceRequest { Self { name: s.name, addresses: s.addresses.into_iter().map(|s| s.to_string()).collect(), + gz_addresses: s.gz_addresses, } } } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 53545b28984..fff9dee19a3 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -7,6 +7,7 @@ use crate::config::ConfigError; use crate::params::{DatasetEnsureBody, ServiceRequest}; use ipnetwork::Ipv6Network; +use omicron_common::address::{AZ_PREFIX, RACK_PREFIX}; use serde::Deserialize; use serde::Serialize; use std::net::Ipv6Addr; @@ -31,7 +32,7 @@ pub struct SetupServiceConfig { } /// A request to initialize a sled. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] pub struct SledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] @@ -40,6 +41,10 @@ pub struct SledRequest { /// Services to be instantiated. #[serde(default, rename = "service")] pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, } fn new_network(addr: Ipv6Addr, prefix: u8) -> Ipv6Network { @@ -59,19 +64,24 @@ impl SetupServiceConfig { } pub fn az_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, 48) + new_network(self.rack_subnet, AZ_PREFIX) } + /// Returns the subnet for our rack. pub fn rack_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, 56) + new_network(self.rack_subnet, RACK_PREFIX) } - pub fn sled_subnet(&self, index: u8) -> Ipv6Network { - let mut rack_network = self.rack_subnet().network().octets(); + /// Returns the subnet for the "reserved" rack subnet. + /// + /// This is used for AZ-wide services, such as DNS. + pub fn reserved_rack_subnet(&self) -> Ipv6Network { + new_network(self.az_subnet().ip(), RACK_PREFIX) + } - // To set bits distinguishing the /64 from the /56, we modify the 7th octet. - rack_network[7] = index; - Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() + /// Returns the subnet for the `index`-th sled in the rack. + pub fn sled_subnet(&self, index: u8) -> Ipv6Network { + omicron_common::address::get_64_subnet(self.rack_subnet(), index) } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c3de81fd451..b25868e45d3 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -5,12 +5,16 @@ //! Rack Setup Service implementation use super::config::{SetupServiceConfig as Config, SledRequest}; +use crate::params::ServiceRequest; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, params::SledSubnet, }; -use crate::config::get_sled_address; +use omicron_common::address::{ + get_dns_subnets, + get_sled_address, +}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -300,14 +304,38 @@ impl ServiceInner { async fn create_plan( &self, config: &Config, - addrs: impl IntoIterator, + bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { - let addrs = addrs.into_iter().enumerate(); + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + let dns_subnets = get_dns_subnets(config.reserved_rack_subnet()); + + info!(self.log, "dns_subnets: {:#?}", dns_subnets); + + let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { + // If a sled was explicitly requested from the RSS configuration, + // use that. Otherwise, just give it a "default" (empty) set of + // services. + let mut request = { + if idx < config.requests.len() { + config.requests[idx].clone() + } else { + SledRequest::default() + } + }; - // TODO: The use of "zip" here means that if we have more addrs than - // requests, we won't initialize some of them. Maybe that's okay? - // Maybe that's the responsibility of Nexus? - let requests_and_sleds = config.requests.iter().zip(addrs); + // The first enumerated addresses get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + request.dns_services.push(ServiceRequest { + name: "internal-dns".to_string(), + addresses: vec![dns_subnet.dns_address()], + gz_addresses: vec![dns_subnet.gz_address().ip()], + }); + } + + (request, (idx, bootstrap_addr)) + }); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; @@ -488,10 +516,38 @@ impl ServiceInner { "Initialized sled agent on sled with bootstrap address: {}", bootstrap_addr ); + Ok(()) + }, + )) + .await + .into_iter() + .collect::>()?; - // Next, initialize any datasets on sleds that need it. + // Set up internal DNS services. + futures::future::join_all(plan.iter().map( + |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - *allocation.initialization_request.subnet.as_ref(), + allocation.initialization_request.subnet.as_ref().0, + )); + + // TODO: also tell sled to make GZ address + self.initialize_services( + sled_address, + &allocation.services_request.dns_services, + ) + .await?; + Ok(()) + }, + )) + .await + .into_iter() + .collect::>()?; + + // Issue the dataset initialization requests to all sleds. + futures::future::join_all(plan.iter().map( + |(_, allocation)| async move { + let sled_address = SocketAddr::V6(get_sled_address( + allocation.initialization_request.subnet.as_ref().0, )); self.initialize_datasets( sled_address, @@ -515,11 +571,17 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - *allocation.initialization_request.subnet.as_ref(), + allocation.initialization_request.subnet.as_ref().0, )); + + let all_services = allocation.services_request.services.iter() + .chain(allocation.services_request.dns_services.iter()) + .map(|s| s.clone()) + .collect::>(); + self.initialize_services( sled_address, - &allocation.services_request.services, + &all_services, ) .await?; Ok(()) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 96739f79eec..50e534fefdd 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -7,11 +7,12 @@ use crate::illumos::dladm::PhysicalLink; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; -use crate::illumos::zone::AddressRequest; +use crate::illumos::zone::{AddressRequest, Zones}; use crate::params::{ServiceEnsureBody, ServiceRequest}; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; +use std::net::IpAddr; use std::path::{Path, PathBuf}; use tokio::sync::Mutex; @@ -29,6 +30,9 @@ pub enum Error { #[error(transparent)] RunningZone(#[from] crate::illumos::running_zone::Error), + #[error("Failed to add address to the global zone: {0}")] + GzAddressFailure(crate::illumos::zone::Error), + #[error(transparent)] Dladm(#[from] crate::illumos::dladm::Error), @@ -56,6 +60,7 @@ pub struct ServiceManager { config_path: Option, zones: Mutex>, vnic_allocator: VnicAllocator, + physical_link: Option, } impl ServiceManager { @@ -79,7 +84,8 @@ impl ServiceManager { log, config_path, zones: Mutex::new(vec![]), - vnic_allocator: VnicAllocator::new("Service", physical_link)?, + vnic_allocator: VnicAllocator::new("Service", physical_link.clone())?, + physical_link, }; let config_path = mgr.services_config_path(); @@ -157,7 +163,7 @@ impl ServiceManager { for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); - let addr_request = AddressRequest::new_static(addr.ip(), None); + let addr_request = AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); running_zone.ensure_address(addr_request).await?; info!( self.log, @@ -166,6 +172,18 @@ impl ServiceManager { ); } + info!(self.log, "GZ addresses: {:#?}", service.gz_addresses); + for addr in &service.gz_addresses { + info!(self.log, "Ensuring GZ address {} exists", addr.to_string()); + + let addr_name = service.name.replace(&['-', '_'][..], ""); + Zones::ensure_has_global_zone_v6_address( + self.physical_link.clone(), + *addr, + &addr_name, + ).map_err(|e| Error::GzAddressFailure(e))?; + } + debug!(self.log, "importing manifest"); running_zone.run_cmd(&[ @@ -201,34 +219,38 @@ impl ServiceManager { ) -> Result<(), Error> { let mut existing_zones = self.zones.lock().await; let config_path = self.services_config_path(); - if config_path.exists() { - let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path).await?, - )?; - let known_services = cfg.services; - - let known_set: HashSet<&ServiceRequest> = - HashSet::from_iter(known_services.iter()); - let requested_set = HashSet::from_iter(request.services.iter()); - - if known_set != requested_set { - // If the caller is requesting we instantiate a - // zone that exists, but isn't what they're asking for, throw an - // error. - // - // We may want to use a different mechanism for zone removal, in - // the case of changing configurations, rather than just doing - // that removal implicitly. - warn!( - self.log, - "Cannot request services on this sled, differing configurations: {:?}", - known_set.symmetric_difference(&requested_set) - ); - return Err(Error::ServicesAlreadyConfigured); + + let services_to_initialize = { + if config_path.exists() { + let cfg: ServiceEnsureBody = toml::from_str( + &tokio::fs::read_to_string(&config_path).await?, + )?; + let known_services = cfg.services; + + let known_set: HashSet<&ServiceRequest> = + HashSet::from_iter(known_services.iter()); + let requested_set = HashSet::from_iter(request.services.iter()); + + if !requested_set.is_superset(&known_set) { + // The caller may only request services additively. + // + // We may want to use a different mechanism for zone removal, in + // the case of changing configurations, rather than just doing + // that removal implicitly. + warn!( + self.log, + "Cannot request services on this sled, differing configurations: {:?}", + known_set.symmetric_difference(&requested_set) + ); + return Err(Error::ServicesAlreadyConfigured); + } + requested_set.difference(&known_set).map(|s| (*s).clone()).collect::>() + } else { + request.services.clone() } - } + }; - self.initialize_services_locked(&mut existing_zones, &request.services) + self.initialize_services_locked(&mut existing_zones, &services_to_initialize) .await?; let serialized_services = toml::Value::try_from(&request) @@ -305,6 +327,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], + gz_addresses: vec!{}, }], }) .await @@ -318,6 +341,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], + gz_addresses: vec!{}, }], }) .await diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 2f407c76e79..a0dae0a73aa 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,9 +1,11 @@ +# TODO: remove the addresses here! They're getting assigned to us! + [dropshot] -bind_address = "[fd00:1122:3344:1::9]:5353" +bind_address = "[fd00:1122:3344:1::1]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1122:3344:1::9]:53" +bind_address = "[fd00:1122:3344:1::1]:53" [log] # Show log messages of this level and more severe diff --git a/smf/nexus/config.toml b/smf/nexus/config.toml index f11c275c31e..ab135febb7b 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config.toml @@ -18,15 +18,15 @@ schemes_external = ["spoof", "session_cookie"] [database] # URL for connecting to the database -url = "postgresql://root@[fd00:1122:3344:1::2]:32221/omicron?sslmode=disable" +url = "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" [dropshot_external] # IP address and TCP port on which to listen for the external API -bind_address = "[fd00:1122:3344:1::3]:12220" +bind_address = "[fd00:1122:3344:0101::3]:12220" [dropshot_internal] # IP address and TCP port on which to listen for the internal API -bind_address = "[fd00:1122:3344:1::3]:12221" +bind_address = "[fd00:1122:3344:0101::3]:12221" [log] # Show log messages of this level and more severe @@ -42,4 +42,4 @@ mode = "stderr-terminal" # Configuration for interacting with the timeseries database [timeseries_db] -address = "[fd00:1122:3344:1::5]:8123" +address = "[fd00:1122:3344:0101::5]:8123" diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index a4812d01fd1..76fc182b316 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -2,10 +2,10 @@ id = "1da65e5b-210c-4859-a7d7-200c1e659972" # Internal address of nexus -nexus_address = "[fd00:1122:3344:1::3]:12221" +nexus_address = "[fd00:1122:3344:0101::3]:12221" [db] -address = "[fd00:1122:3344:1::5]:8123" +address = "[fd00:1122:3344:0101::5]:8123" batch_size = 1000 batch_interval = 5 # In seconds @@ -14,4 +14,4 @@ level = "debug" mode = "stderr-terminal" [dropshot] -bind_address = "[fd00:1122:3344:1::4]:12223" +bind_address = "[fd00:1122:3344:0101::4]:12223" diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 2efa04c507b..002e17fcc22 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -4,7 +4,7 @@ # Also implies the /48 AZ subnet. # |............| <- This /48 is the AZ Subnet # |...............| <- This /56 is the Rack Subnet -rack_subnet = "fd00:1122:3344:1::" +rack_subnet = "fd00:1122:3344:0100::" [[request]] @@ -12,49 +12,47 @@ rack_subnet = "fd00:1122:3344:1::" # should allocate crucible datasets. [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:1::6]:32345" +address = "[fd00:1122:3344:0101::6]:32345" dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:1::7]:32345" +address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:1::8]:32345" +address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:1::2]:32221" +address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" dataset_kind.all_addresses = [ - "[fd00:1122:3344:1::2]:32221", + "[fd00:1122:3344:0101::2]:32221", ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:1::5]:8123" +address = "[fd00:1122:3344:0101::5]:8123" dataset_kind.type = "clickhouse" [[request.service]] name = "nexus" addresses = [ - "[fd00:1122:3344:1::3]:12220", - "[fd00:1122:3344:1::3]:12221", + "[fd00:1122:3344:0101::3]:12220", + "[fd00:1122:3344:0101::3]:12221", ] +gz_addresses = [] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] name = "oximeter" addresses = [ - "[fd00:1122:3344:1::4]:12223", + "[fd00:1122:3344:0101::4]:12223", ] - -[[request.service]] -name = "internal-dns" -addresses = [ "[fd00:1122:3344:1::9]:5353" ] +gz_addresses = [] diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 6dfe87fe9bf..eaebce97f39 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -5,7 +5,7 @@ id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" # TODO: Remove this address # Internal address of Nexus -nexus_address = "[fd00:1122:3344:01::3]:12221" +nexus_address = "[fd00:1122:3344:0101::3]:12221" # A file-backed zpool can be manually created with the following: # $ truncate -s 10GB testpool.vdev From bea8c7e3c8b5c7762929abf5f1ed03ed7471428a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 22 Apr 2022 11:13:54 -0400 Subject: [PATCH 11/61] Add some tests --- common/src/address.rs | 98 ++++++++++++++++++++++------ sled-agent/src/bootstrap/agent.rs | 5 +- sled-agent/src/illumos/mod.rs | 6 +- sled-agent/src/illumos/zone.rs | 5 +- sled-agent/src/rack_setup/config.rs | 7 -- sled-agent/src/rack_setup/service.rs | 68 ++++++++++--------- sled-agent/src/services.rs | 33 +++++++--- 7 files changed, 148 insertions(+), 74 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 4e145149b46..e9eef2e0c1a 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -7,9 +7,9 @@ //! This addressing functionality is shared by both initialization services //! and Nexus, who need to agree upon addressing schemes. -use std::net::{Ipv6Addr, SocketAddrV6}; -use serde::{Serialize, Deserialize}; use ipnetwork::Ipv6Network; +use serde::{Deserialize, Serialize}; +use std::net::{Ipv6Addr, SocketAddrV6}; pub const AZ_PREFIX: u8 = 48; pub const RACK_PREFIX: u8 = 56; @@ -57,26 +57,39 @@ impl DnsSubnet { } } -/// Given a particular rack subnet, return the DNS addresses. -/// -/// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the -/// [`RACK_PREFIX`] subnet. -pub fn get_dns_subnets(reserved_rack_subnet: Ipv6Network) -> Vec { - assert_eq!(reserved_rack_subnet.prefix(), RACK_PREFIX); +/// A wrapper around an IPv6 network, indicating it is a "reserved" rack +/// subnet which can be used for AZ-wide services. +#[derive(Debug, Clone)] +pub struct ReservedRackSubnet(pub Ipv6Network); + +impl ReservedRackSubnet { + /// Returns the subnet for the reserved rack subnet. + pub fn new(subnet: Ipv6Network) -> Self { + let net = Ipv6Network::new(subnet.network(), AZ_PREFIX).unwrap(); + ReservedRackSubnet( + Ipv6Network::new(net.network(), RACK_PREFIX).unwrap(), + ) + } - let mut iter = reserved_rack_subnet.iter(); - let _anycast_ip = iter.next().unwrap(); + /// Given a particular rack subnet, return the DNS addresses. + /// + /// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the + /// [`RACK_PREFIX`] subnet. + pub fn get_dns_subnets(&self) -> Vec { + assert_eq!(self.0.prefix(), RACK_PREFIX); - (0..DNS_REDUNDANCY).map(|idx| { - let network = get_64_subnet( - reserved_rack_subnet, - u8::try_from(idx + 1).unwrap() - ); + let mut iter = self.0.iter(); + let _anycast_ip = iter.next().unwrap(); + + (0..DNS_REDUNDANCY) + .map(|idx| { + let network = + get_64_subnet(self.0, u8::try_from(idx + 1).unwrap()); - DnsSubnet { - network - } - }).collect() + DnsSubnet { network } + }) + .collect() + } } /// Return the sled agent address for a subnet. @@ -103,3 +116,50 @@ pub fn get_64_subnet(rack_subnet: Ipv6Network, index: u8) -> Ipv6Network { rack_network[7] = index; Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_dns_subnets() { + let subnet = "fd00:1122:3344:0100::/64".parse::().unwrap(); + let rack_subnet = ReservedRackSubnet::new(subnet); + + assert_eq!( + // Note that these bits (indicating the rack) are zero. + // vv + "fd00:1122:3344:0001::/56".parse::().unwrap(), + rack_subnet.0, + ); + + // Observe the first DNS subnet within this reserved rack subnet. + let dns_subnets = rack_subnet.get_dns_subnets(); + assert_eq!(DNS_REDUNDANCY, dns_subnets.len()); + + // The DNS address and GZ address should be only differing by one. + assert_eq!( + "[fd00:1122:3344:0001::1]:5353".parse::().unwrap(), + dns_subnets[0].dns_address(), + ); + assert_eq!( + "fd00:1122:3344:0001::2/64".parse::().unwrap(), + dns_subnets[0].gz_address(), + ); + } + + #[test] + fn test_sled_address() { + let subnet = "fd00:1122:3344:0101::/64".parse::().unwrap(); + assert_eq!( + "[fd00:1122:3344:0101::1]:12345".parse::().unwrap(), + get_sled_address(subnet) + ); + + let subnet = "fd00:1122:3344:0308::/64".parse::().unwrap(); + assert_eq!( + "[fd00:1122:3344:0308::1]:12345".parse::().unwrap(), + get_sled_address(subnet) + ); + } +} diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 8e57cc9b577..da20e4f8e58 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -181,8 +181,9 @@ impl Agent { ) -> Result { info!(&self.log, "Loading Sled Agent: {:?}", request); - let sled_address = - omicron_common::address::get_sled_address(request.subnet.as_ref().0); + let sled_address = omicron_common::address::get_sled_address( + request.subnet.as_ref().0, + ); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index 42df4da26a5..bdec8e7e702 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -48,7 +48,11 @@ mod inner { if !output.status.success() { return Err(ExecutionError::CommandFailure { - command: command.get_args().map(|s| s.to_string_lossy().into()).collect::>().join(" "), + command: command + .get_args() + .map(|s| s.to_string_lossy().into()) + .collect::>() + .join(" "), status: output.status, stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 53cabff6a04..c3d5e47f3cf 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -479,7 +479,10 @@ impl Zones { Self::ensure_address( None, &gz_link_local_addrobj.on_same_interface(name)?, - AddressRequest::new_static(IpAddr::V6(address), Some(omicron_common::address::SLED_PREFIX)), + AddressRequest::new_static( + IpAddr::V6(address), + Some(omicron_common::address::SLED_PREFIX), + ), )?; Ok(()) } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index fff9dee19a3..1ec2858c0b7 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -72,13 +72,6 @@ impl SetupServiceConfig { new_network(self.rack_subnet, RACK_PREFIX) } - /// Returns the subnet for the "reserved" rack subnet. - /// - /// This is used for AZ-wide services, such as DNS. - pub fn reserved_rack_subnet(&self) -> Ipv6Network { - new_network(self.az_subnet().ip(), RACK_PREFIX) - } - /// Returns the subnet for the `index`-th sled in the rack. pub fn sled_subnet(&self, index: u8) -> Ipv6Network { omicron_common::address::get_64_subnet(self.rack_subnet(), index) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index b25868e45d3..4b04234210d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -5,16 +5,13 @@ //! Rack Setup Service implementation use super::config::{SetupServiceConfig as Config, SledRequest}; -use crate::params::ServiceRequest; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, params::SledSubnet, }; -use omicron_common::address::{ - get_dns_subnets, - get_sled_address, -}; +use crate::params::ServiceRequest; +use omicron_common::address::{get_sled_address, ReservedRackSubnet}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -307,35 +304,37 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let dns_subnets = get_dns_subnets(config.reserved_rack_subnet()); + let rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); + let dns_subnets = rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); - let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - config.requests[idx].clone() - } else { - SledRequest::default() + let requests_and_sleds = + bootstrap_addrs.map(|(idx, bootstrap_addr)| { + // If a sled was explicitly requested from the RSS configuration, + // use that. Otherwise, just give it a "default" (empty) set of + // services. + let mut request = { + if idx < config.requests.len() { + config.requests[idx].clone() + } else { + SledRequest::default() + } + }; + + // The first enumerated addresses get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + request.dns_services.push(ServiceRequest { + name: "internal-dns".to_string(), + addresses: vec![dns_subnet.dns_address()], + gz_addresses: vec![dns_subnet.gz_address().ip()], + }); } - }; - - // The first enumerated addresses get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - request.dns_services.push(ServiceRequest { - name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address()], - gz_addresses: vec![dns_subnet.gz_address().ip()], - }); - } - (request, (idx, bootstrap_addr)) - }); + (request, (idx, bootstrap_addr)) + }); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; @@ -574,16 +573,15 @@ impl ServiceInner { allocation.initialization_request.subnet.as_ref().0, )); - let all_services = allocation.services_request.services.iter() + let all_services = allocation + .services_request + .services + .iter() .chain(allocation.services_request.dns_services.iter()) .map(|s| s.clone()) .collect::>(); - self.initialize_services( - sled_address, - &all_services, - ) - .await?; + self.initialize_services(sled_address, &all_services).await?; Ok(()) }, )) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 50e534fefdd..f2e76fd361d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -84,7 +84,10 @@ impl ServiceManager { log, config_path, zones: Mutex::new(vec![]), - vnic_allocator: VnicAllocator::new("Service", physical_link.clone())?, + vnic_allocator: VnicAllocator::new( + "Service", + physical_link.clone(), + )?, physical_link, }; @@ -163,7 +166,8 @@ impl ServiceManager { for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); - let addr_request = AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); + let addr_request = + AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); running_zone.ensure_address(addr_request).await?; info!( self.log, @@ -174,14 +178,19 @@ impl ServiceManager { info!(self.log, "GZ addresses: {:#?}", service.gz_addresses); for addr in &service.gz_addresses { - info!(self.log, "Ensuring GZ address {} exists", addr.to_string()); + info!( + self.log, + "Ensuring GZ address {} exists", + addr.to_string() + ); let addr_name = service.name.replace(&['-', '_'][..], ""); Zones::ensure_has_global_zone_v6_address( self.physical_link.clone(), *addr, &addr_name, - ).map_err(|e| Error::GzAddressFailure(e))?; + ) + .map_err(|e| Error::GzAddressFailure(e))?; } debug!(self.log, "importing manifest"); @@ -244,14 +253,20 @@ impl ServiceManager { ); return Err(Error::ServicesAlreadyConfigured); } - requested_set.difference(&known_set).map(|s| (*s).clone()).collect::>() + requested_set + .difference(&known_set) + .map(|s| (*s).clone()) + .collect::>() } else { request.services.clone() } }; - self.initialize_services_locked(&mut existing_zones, &services_to_initialize) - .await?; + self.initialize_services_locked( + &mut existing_zones, + &services_to_initialize, + ) + .await?; let serialized_services = toml::Value::try_from(&request) .expect("Cannot serialize service list"); @@ -327,7 +342,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], - gz_addresses: vec!{}, + gz_addresses: vec![], }], }) .await @@ -341,7 +356,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], - gz_addresses: vec!{}, + gz_addresses: vec![], }], }) .await From fcbc0ab5bf2362f24186ce0740b1c2b0b9b61f78 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 21:00:33 -0400 Subject: [PATCH 12/61] Correctly passing addresses, GZ addresses to DNS service for setup --- common/src/address.rs | 6 ++-- internal-dns/src/bin/dns-server.rs | 19 +++++++++--- internal-dns/src/lib.rs | 1 - sled-agent/src/rack_setup/service.rs | 4 +-- sled-agent/src/services.rs | 44 +++++++++++++++++++++++++++- smf/internal-dns/config.toml | 8 +---- smf/internal-dns/manifest.xml | 7 ++++- 7 files changed, 69 insertions(+), 20 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index e9eef2e0c1a..89077c632ab 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -25,6 +25,7 @@ pub const DNS_REDUNDANCY: usize = 1; /// reserved for DNS servers. pub const MAX_DNS_REDUNDANCY: usize = 5; +pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; @@ -78,9 +79,6 @@ impl ReservedRackSubnet { pub fn get_dns_subnets(&self) -> Vec { assert_eq!(self.0.prefix(), RACK_PREFIX); - let mut iter = self.0.iter(); - let _anycast_ip = iter.next().unwrap(); - (0..DNS_REDUNDANCY) .map(|idx| { let network = @@ -129,7 +127,7 @@ mod test { assert_eq!( // Note that these bits (indicating the rack) are zero. // vv - "fd00:1122:3344:0001::/56".parse::().unwrap(), + "fd00:1122:3344:0000::/56".parse::().unwrap(), rack_subnet.0, ); diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 505a42a7dc0..3e3b98f81b5 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -15,21 +15,31 @@ use anyhow::Context; use clap::Parser; use std::path::PathBuf; use std::sync::Arc; +use std::net::{SocketAddr, SocketAddrV6}; #[derive(Parser, Debug)] struct Args { #[clap(long)] config_file: PathBuf, + + #[clap(long)] + server_address: SocketAddrV6, + + #[clap(long)] + dns_address: SocketAddrV6, } #[tokio::main] async fn main() -> Result<(), anyhow::Error> { let args = Args::parse(); let config_file = &args.config_file; + let dns_address = &args.dns_address; let config_file_contents = std::fs::read_to_string(config_file) .with_context(|| format!("read config file {:?}", config_file))?; - let config: internal_dns::Config = toml::from_str(&config_file_contents) + let mut config: internal_dns::Config = toml::from_str(&config_file_contents) .with_context(|| format!("parse config file {:?}", config_file))?; + + config.dropshot.bind_address = SocketAddr::V6(args.server_address); eprintln!("{:?}", config); let log = config @@ -42,10 +52,11 @@ async fn main() -> Result<(), anyhow::Error> { { let db = db.clone(); let log = log.clone(); - let config = config.dns.clone(); - + let dns_config = internal_dns::dns_server::Config { + bind_address: dns_address.to_string() + }; tokio::spawn(async move { - internal_dns::dns_server::run(log, db, config).await + internal_dns::dns_server::run(log, db, dns_config).await }); } diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs index d94684d75e5..786750c1a8f 100644 --- a/internal-dns/src/lib.rs +++ b/internal-dns/src/lib.rs @@ -18,7 +18,6 @@ pub struct Config { pub log: dropshot::ConfigLogging, pub dropshot: dropshot::ConfigDropshot, pub data: dns_data::Config, - pub dns: dns_server::Config, } pub async fn start_server( diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4b04234210d..0eb4f141266 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -304,8 +304,8 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); - let dns_subnets = rack_subnet.get_dns_subnets(); + let reserved_rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f2e76fd361d..0a0386f0945 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -9,6 +9,7 @@ use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, Zones}; use crate::params::{ServiceEnsureBody, ServiceRequest}; +use omicron_common::address::{DNS_PORT, DNS_SERVER_PORT}; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; @@ -204,13 +205,54 @@ impl ServiceManager { ), ])?; + let smf_name = format!("svc:/system/illumos/{}", service.name); + let default_smf_name = format!("{}:default", smf_name); + + match service.name.as_str() { + "internal-dns" => { + info!(self.log, "Setting up internal-dns service"); + // TODO: This is a hack! + // - Should we only supply one address, and drop the port? + // ^ this seems like a good start + // - Should we provide a mechanism for providing multiple addresses? + let address = service.addresses[0].ip(); + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/server_address=[{}]:{}", address, DNS_SERVER_PORT), + ])?; + + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/dns_address=[{}]:{}", address, DNS_PORT), + ])?; + }, + _ => { + info!(self.log, "Service name {} did not match", service.name); + }, + } + debug!(self.log, "enabling service"); + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ])?; + running_zone.run_cmd(&[ crate::illumos::zone::SVCADM, "enable", "-t", - &format!("svc:/system/illumos/{}:default", service.name), + &default_smf_name, ])?; existing_zones.push(running_zone); diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index a0dae0a73aa..b6fd0e4fa8c 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,12 +1,6 @@ -# TODO: remove the addresses here! They're getting assigned to us! - [dropshot] -bind_address = "[fd00:1122:3344:1::1]:5353" request_body_max_bytes = 1048576 -[dns] -bind_address = "[fd00:1122:3344:1::1]:53" - [log] # Show log messages of this level and more severe level = "info" @@ -21,4 +15,4 @@ mode = "stderr-terminal" [data] nmax_messages = 16 -storage_path = "/var/tmp/oxide/dns" +storage_path = "/var/oxide/dns" diff --git a/smf/internal-dns/manifest.xml b/smf/internal-dns/manifest.xml index 25b03434c34..d7364ce12f1 100644 --- a/smf/internal-dns/manifest.xml +++ b/smf/internal-dns/manifest.xml @@ -13,10 +13,15 @@ + + + + + From f214fcf030179b513d44e6cb0585b8d1c136cb7b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:10:59 -0400 Subject: [PATCH 13/61] Avoid specifying port when not necessary --- common/src/address.rs | 6 +++--- sled-agent/src/params.rs | 4 ++-- sled-agent/src/rack_setup/service.rs | 2 +- sled-agent/src/services.rs | 4 ++-- smf/sled-agent/config-rss.toml | 13 +++---------- 5 files changed, 11 insertions(+), 18 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 89077c632ab..69a6c3143d2 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -39,11 +39,11 @@ impl DnsSubnet { /// Returns the DNS server address within the subnet. /// /// This is the first address within the subnet. - pub fn dns_address(&self) -> SocketAddrV6 { + pub fn dns_address(&self) -> Ipv6Network { let mut iter = self.network.iter(); let _anycast_ip = iter.next().unwrap(); let dns_ip = iter.next().unwrap(); - SocketAddrV6::new(dns_ip, DNS_SERVER_PORT, 0, 0) + Ipv6Network::new(dns_ip, SLED_PREFIX).unwrap() } /// Returns the address which the Global Zone should create @@ -137,7 +137,7 @@ mod test { // The DNS address and GZ address should be only differing by one. assert_eq!( - "[fd00:1122:3344:0001::1]:5353".parse::().unwrap(), + "fd00:1122:3344:0001::1/64".parse::().unwrap(), dns_subnets[0].dns_address(), ); assert_eq!( diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 553954e5c7f..ca107e4f3dd 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,7 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddr}; use uuid::Uuid; /// Used to request a Disk state change @@ -233,7 +233,7 @@ pub struct ServiceRequest { // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. - pub addresses: Vec, + pub addresses: Vec, // The addresses in the global zone which should be created, if necessary // to route to the service. #[serde(default)] diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0eb4f141266..8dc4021f6f3 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -328,7 +328,7 @@ impl ServiceInner { let dns_subnet = &dns_subnets[idx]; request.dns_services.push(ServiceRequest { name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address()], + addresses: vec![dns_subnet.dns_address().ip()], gz_addresses: vec![dns_subnet.gz_address().ip()], }); } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0a0386f0945..9f95a3bc4ea 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -168,7 +168,7 @@ impl ServiceManager { for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); let addr_request = - AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); + AddressRequest::new_static(IpAddr::V6(*addr), None); running_zone.ensure_address(addr_request).await?; info!( self.log, @@ -215,7 +215,7 @@ impl ServiceManager { // - Should we only supply one address, and drop the port? // ^ this seems like a good start // - Should we provide a mechanism for providing multiple addresses? - let address = service.addresses[0].ip(); + let address = service.addresses[0]; running_zone.run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 002e17fcc22..cf9582ab71a 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -29,9 +29,7 @@ dataset_kind.type = "crucible" zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ - "[fd00:1122:3344:0101::2]:32221", -] +dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. @@ -42,17 +40,12 @@ dataset_kind.type = "clickhouse" [[request.service]] name = "nexus" -addresses = [ - "[fd00:1122:3344:0101::3]:12220", - "[fd00:1122:3344:0101::3]:12221", -] +addresses = [ "fd00:1122:3344:0101::3" ] gz_addresses = [] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] name = "oximeter" -addresses = [ - "[fd00:1122:3344:0101::4]:12223", -] +addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] From baea4a81823b04e3fe5a4dde2faafda14f5edddf Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:18:02 -0400 Subject: [PATCH 14/61] safer vec access, better errors --- sled-agent/src/rack_setup/service.rs | 1 - sled-agent/src/services.rs | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 8dc4021f6f3..9e846ecf1dc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -529,7 +529,6 @@ impl ServiceInner { allocation.initialization_request.subnet.as_ref().0, )); - // TODO: also tell sled to make GZ address self.initialize_services( sled_address, &allocation.services_request.dns_services, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9f95a3bc4ea..c4ccdea379d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -37,6 +37,12 @@ pub enum Error { #[error(transparent)] Dladm(#[from] crate::illumos::dladm::Error), + #[error("Could not initialize service as requested: {message}")] + BadServiceRequest { + service: String, + message: String, + }, + #[error("Services already configured for this Sled Agent")] ServicesAlreadyConfigured, } @@ -211,11 +217,12 @@ impl ServiceManager { match service.name.as_str() { "internal-dns" => { info!(self.log, "Setting up internal-dns service"); - // TODO: This is a hack! - // - Should we only supply one address, and drop the port? - // ^ this seems like a good start - // - Should we provide a mechanism for providing multiple addresses? - let address = service.addresses[0]; + let address = service.addresses.get(0).ok_or_else(|| { + Error::BadServiceRequest { + service: service.name.clone(), + message: "Not enough addresses".to_string(), + } + })?; running_zone.run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", From 58744c4ee8d3d8864b6020f70de4b92f1d415803 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:18:24 -0400 Subject: [PATCH 15/61] fmt --- internal-dns/src/bin/dns-server.rs | 9 ++++--- sled-agent/src/rack_setup/service.rs | 3 ++- sled-agent/src/services.rs | 37 +++++++++++++++++----------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 3e3b98f81b5..b8cca5af301 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -13,9 +13,9 @@ use anyhow::anyhow; use anyhow::Context; use clap::Parser; +use std::net::{SocketAddr, SocketAddrV6}; use std::path::PathBuf; use std::sync::Arc; -use std::net::{SocketAddr, SocketAddrV6}; #[derive(Parser, Debug)] struct Args { @@ -36,8 +36,9 @@ async fn main() -> Result<(), anyhow::Error> { let dns_address = &args.dns_address; let config_file_contents = std::fs::read_to_string(config_file) .with_context(|| format!("read config file {:?}", config_file))?; - let mut config: internal_dns::Config = toml::from_str(&config_file_contents) - .with_context(|| format!("parse config file {:?}", config_file))?; + let mut config: internal_dns::Config = + toml::from_str(&config_file_contents) + .with_context(|| format!("parse config file {:?}", config_file))?; config.dropshot.bind_address = SocketAddr::V6(args.server_address); eprintln!("{:?}", config); @@ -53,7 +54,7 @@ async fn main() -> Result<(), anyhow::Error> { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { - bind_address: dns_address.to_string() + bind_address: dns_address.to_string(), }; tokio::spawn(async move { internal_dns::dns_server::run(log, db, dns_config).await diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 9e846ecf1dc..98c6962e1a5 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -304,7 +304,8 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); + let reserved_rack_subnet = + ReservedRackSubnet::new(config.rack_subnet()); let dns_subnets = reserved_rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c4ccdea379d..9f1966866c4 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -38,10 +38,7 @@ pub enum Error { Dladm(#[from] crate::illumos::dladm::Error), #[error("Could not initialize service as requested: {message}")] - BadServiceRequest { - service: String, - message: String, - }, + BadServiceRequest { service: String, message: String }, #[error("Services already configured for this Sled Agent")] ServicesAlreadyConfigured, @@ -217,18 +214,22 @@ impl ServiceManager { match service.name.as_str() { "internal-dns" => { info!(self.log, "Setting up internal-dns service"); - let address = service.addresses.get(0).ok_or_else(|| { - Error::BadServiceRequest { - service: service.name.clone(), - message: "Not enough addresses".to_string(), - } - })?; + let address = + service.addresses.get(0).ok_or_else(|| { + Error::BadServiceRequest { + service: service.name.clone(), + message: "Not enough addresses".to_string(), + } + })?; running_zone.run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", &smf_name, "setprop", - &format!("config/server_address=[{}]:{}", address, DNS_SERVER_PORT), + &format!( + "config/server_address=[{}]:{}", + address, DNS_SERVER_PORT + ), ])?; running_zone.run_cmd(&[ @@ -236,12 +237,18 @@ impl ServiceManager { "-s", &smf_name, "setprop", - &format!("config/dns_address=[{}]:{}", address, DNS_PORT), + &format!( + "config/dns_address=[{}]:{}", + address, DNS_PORT + ), ])?; - }, + } _ => { - info!(self.log, "Service name {} did not match", service.name); - }, + info!( + self.log, + "Service name {} did not match", service.name + ); + } } debug!(self.log, "enabling service"); From c1e2180015bc05d37a81ca0acb1490f5913c3e5e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:19:14 -0400 Subject: [PATCH 16/61] updated storage path --- smf/internal-dns/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 2f407c76e79..2049036d338 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -19,4 +19,4 @@ mode = "stderr-terminal" [data] nmax_messages = 16 -storage_path = "/var/tmp/oxide/dns" +storage_path = "/var/oxide/dns" From 39431c613205816b80c8691c75f45357858f41fb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:31:00 -0400 Subject: [PATCH 17/61] fix tests, clippy --- internal-dns/tests/basic_test.rs | 9 ++++----- sled-agent/src/rack_setup/service.rs | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 24e5b11744e..b58ecdb62f9 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -140,10 +140,12 @@ async fn init_client_server( { let db = db.clone(); let log = log.clone(); - let config = config.dns.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: format!("127.0.0.1:{}", dns_port), + }; tokio::spawn(async move { - internal_dns::dns_server::run(log, db, config).await + internal_dns::dns_server::run(log, db, dns_config).await }); } @@ -184,9 +186,6 @@ fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { nmax_messages: 16, storage_path, }, - dns: internal_dns::dns_server::Config { - bind_address: format!("127.0.0.1:{}", dns_port).parse().unwrap(), - }, }; Ok((config, dropshot_port, dns_port)) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 98c6962e1a5..153ca1bc18b 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -355,7 +355,7 @@ impl ServiceInner { bootstrap_addr, SledAllocation { initialization_request: SledAgentRequest { subnet }, - services_request: request.clone(), + services_request: request, }, ) }); From 22dfb79600a0d91d9061e29a0dfae19f1bb0a0cc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:43:54 -0400 Subject: [PATCH 18/61] Fix another test --- sled-agent/src/services.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9f1966866c4..b699842c868 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -242,6 +242,15 @@ impl ServiceManager { address, DNS_PORT ), ])?; + + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ])?; } _ => { info!( @@ -253,15 +262,6 @@ impl ServiceManager { debug!(self.log, "enabling service"); - // Refresh the manifest with the new properties we set, - // so they become "effective" properties when the service is enabled. - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ])?; - running_zone.run_cmd(&[ crate::illumos::zone::SVCADM, "enable", From 9f7f55b87816d434aa851072b3bb572b372dc694 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:49:35 -0400 Subject: [PATCH 19/61] Bunyan formatted --- smf/internal-dns/config.toml | 11 +++-------- smf/nexus/config.toml | 11 +++-------- smf/oximeter/config.toml | 4 +++- smf/sled-agent/config.toml | 4 +++- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 2049036d338..8edc8d33725 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -8,14 +8,9 @@ bind_address = "[fd00:1122:3344:1::9]:53" [log] # Show log messages of this level and more severe level = "info" - -# Example output to a terminal (with colors) -mode = "stderr-terminal" - -# Example output to a file, appending if it already exists. -#mode = "file" -#path = "logs/server.log" -#if_exists = "append" +mode = "file" +path = "/var/oxide/internal-dns.log" +if_exists = "append" [data] nmax_messages = 16 diff --git a/smf/nexus/config.toml b/smf/nexus/config.toml index f11c275c31e..3377acfbffc 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config.toml @@ -31,14 +31,9 @@ bind_address = "[fd00:1122:3344:1::3]:12221" [log] # Show log messages of this level and more severe level = "info" - -# Example output to a terminal (with colors) -mode = "stderr-terminal" - -# Example output to a file, appending if it already exists. -#mode = "file" -#path = "logs/server.log" -#if_exists = "append" +mode = "file" +path = "/var/oxide/nexus.log" +if_exists = "append" # Configuration for interacting with the timeseries database [timeseries_db] diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index a4812d01fd1..8f66cb0603e 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -11,7 +11,9 @@ batch_interval = 5 # In seconds [log] level = "debug" -mode = "stderr-terminal" +mode = "file" +path = "/var/oxide/oximeter.log" +if_exists = "append" [dropshot] bind_address = "[fd00:1122:3344:1::4]:12223" diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 6dfe87fe9bf..ac4a4283bf8 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -23,4 +23,6 @@ zpools = [ [log] level = "info" -mode = "stderr-terminal" +mode = "file" +path = "/var/oxide/sled-agent.log" +if_exists = "append" From 981f7449a72d387c37d38cca768944b36e4ae668 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 12:10:15 -0400 Subject: [PATCH 20/61] Regenerate bindings --- openapi/sled-agent.json | 3 ++- sled-agent/src/params.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index dca641e06d3..cc6cfc8c20c 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -965,7 +965,8 @@ "addresses": { "type": "array", "items": { - "type": "string" + "type": "string", + "format": "ipv6" } }, "gz_addresses": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index ca107e4f3dd..7ad76a634b2 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -244,7 +244,7 @@ impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { name: s.name, - addresses: s.addresses.into_iter().map(|s| s.to_string()).collect(), + addresses: s.addresses, gz_addresses: s.gz_addresses, } } From 57a6697e10f7618d972ab13eb68078ce47704f23 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 12:52:38 -0400 Subject: [PATCH 21/61] Start on service errors --- sled-agent/src/services.rs | 76 +++++++++++++++++++++++++++++------- sled-agent/src/sled_agent.rs | 21 ++++++++-- 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b699842c868..3c4b28795d5 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -19,14 +19,23 @@ use tokio::sync::Mutex; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("Cannot serialize TOML file: {0}")] - TomlSerialize(#[from] toml::ser::Error), - - #[error("Cannot deserialize TOML file: {0}")] - TomlDeserialize(#[from] toml::de::Error), - - #[error("Error accessing filesystem: {0}")] - Io(#[from] std::io::Error), + #[error("Cannot serialize TOML to file {path}: {err}")] + TomlSerialize { + path: PathBuf, + err: toml::ser::Error, + }, + + #[error("Cannot deserialize TOML from file {path}: {err}")] + TomlDeserialize { + path: PathBuf, + err: toml::de::Error, + }, + + #[error("I/O Error accessing {path}: {err}")] + Io { + path: PathBuf, + err: std::io::Error, + }, #[error(transparent)] RunningZone(#[from] crate::illumos::running_zone::Error), @@ -37,7 +46,7 @@ pub enum Error { #[error(transparent)] Dladm(#[from] crate::illumos::dladm::Error), - #[error("Could not initialize service as requested: {message}")] + #[error("Could not initialize service {service} as requested: {message}")] BadServiceRequest { service: String, message: String }, #[error("Services already configured for this Sled Agent")] @@ -103,8 +112,20 @@ impl ServiceManager { config_path.to_string_lossy() ); let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path).await?, - )?; + &tokio::fs::read_to_string(&config_path) + .await + .map_err(|err| { + Error::Io { + path: config_path.clone(), + err, + } + })?, + ).map_err(|err| { + Error::TomlDeserialize { + path: config_path.clone(), + err, + } + })?; let mut existing_zones = mgr.zones.lock().await; mgr.initialize_services_locked(&mut existing_zones, &cfg.services) .await?; @@ -288,8 +309,20 @@ impl ServiceManager { let services_to_initialize = { if config_path.exists() { let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path).await?, - )?; + &tokio::fs::read_to_string(&config_path) + .await + .map_err(|err| { + Error::Io { + path: config_path.clone(), + err, + } + })?, + ).map_err(|err| { + Error::TomlDeserialize { + path: config_path.clone(), + err, + } + })?; let known_services = cfg.services; let known_set: HashSet<&ServiceRequest> = @@ -326,8 +359,21 @@ impl ServiceManager { let serialized_services = toml::Value::try_from(&request) .expect("Cannot serialize service list"); - tokio::fs::write(&config_path, toml::to_string(&serialized_services)?) - .await?; + let services_str = toml::to_string(&serialized_services) + .map_err(|err| { + Error::TomlSerialize { + path: config_path.clone(), + err, + } + })?; + tokio::fs::write(&config_path, services_str) + .await + .map_err(|err| { + Error::Io { + path: config_path.clone(), + err, + } + })?; Ok(()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index df3e9e816a4..0e92e46abda 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -34,8 +34,11 @@ use crate::illumos::{ #[derive(thiserror::Error, Debug)] pub enum Error { - #[error(transparent)] - Datalink(#[from] crate::illumos::dladm::Error), + #[error("Datalink error: {message}, {err}")] + Datalink { + message: String, + err: crate::illumos::dladm::Error, + }, #[error(transparent)] Services(#[from] crate::services::Error), @@ -140,10 +143,20 @@ impl SledAgent { // // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE - let vnics = Dladm::get_vnics()?; + let vnics = Dladm::get_vnics().map_err(|err| { + Error::Datalink { + message: "Looking up VNICs on boot".to_string(), + err, + } + })?; for vnic in vnics { warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic)?; + Dladm::delete_vnic(&vnic).map_err(|err| { + Error::Datalink { + message: "Deleting VNIC during boot".to_string(), + err, + } + })?; } let storage = StorageManager::new( From 5031561434131c2567cdc6802faded2e9ef92dca Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 13:04:43 -0400 Subject: [PATCH 22/61] Push the 'find_physical' call upwards. Simplifies error handling --- sled-agent/src/bootstrap/agent.rs | 10 +++++++++- sled-agent/src/illumos/vnic.rs | 9 ++------- sled-agent/src/illumos/zone.rs | 10 ++-------- sled-agent/src/instance_manager.rs | 2 +- sled-agent/src/services.rs | 7 +++---- sled-agent/src/sled_agent.rs | 19 +++++++++++++++---- sled-agent/src/storage_manager.rs | 2 +- 7 files changed, 33 insertions(+), 26 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index da20e4f8e58..c06a6538674 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -130,8 +130,16 @@ impl Agent { sled_config: SledConfig, address: Ipv6Addr, ) -> Result { + let data_link = if let Some(link) = sled_config.data_link.clone() { + link + } else { + Dladm::find_physical().map_err(|err| { + BootstrapError::SledError(format!("Can't access physical link: {}", err)) + })? + }; + Zones::ensure_has_global_zone_v6_address( - sled_config.data_link.clone(), + data_link.clone(), address, "bootstrap6", )?; diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index 5d5d8292923..b18a08391af 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -43,17 +43,12 @@ impl VnicAllocator { /// - oxControlStorage[NNN] pub fn new>( scope: S, - physical_link: Option, + physical_link: PhysicalLink, ) -> Result { - let data_link = if let Some(link) = physical_link { - link - } else { - Dladm::find_physical()? - }; Ok(Self { value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), - data_link, + data_link: physical_link, }) } diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index c3d5e47f3cf..9f4a499e861 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -9,7 +9,7 @@ use slog::Logger; use std::net::{IpAddr, Ipv6Addr}; use crate::illumos::addrobj::AddrObject; -use crate::illumos::dladm::{Dladm, PhysicalLink, VNIC_PREFIX_CONTROL}; +use crate::illumos::dladm::{PhysicalLink, VNIC_PREFIX_CONTROL}; use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::{execute, PFEXEC}; @@ -457,16 +457,10 @@ impl Zones { // should remove this function when Sled Agents are provided IPv6 addresses // from RSS. pub fn ensure_has_global_zone_v6_address( - physical_link: Option, + link: PhysicalLink, address: Ipv6Addr, name: &str, ) -> Result<(), Error> { - // Ensure that addrconf has been set up in the Global Zone. - let link = if let Some(link) = physical_link { - link - } else { - Dladm::find_physical()? - }; let gz_link_local_addrobj = AddrObject::new(&link.0, "linklocal")?; Self::ensure_has_link_local_v6_address(None, &gz_link_local_addrobj)?; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 9a86b3ef62a..ca01815acaf 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -59,7 +59,7 @@ impl InstanceManager { log: Logger, vlan: Option, nexus_client: Arc, - physical_link: Option, + physical_link: PhysicalLink, ) -> Result { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3c4b28795d5..8262bf17ff8 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -73,7 +73,7 @@ pub struct ServiceManager { config_path: Option, zones: Mutex>, vnic_allocator: VnicAllocator, - physical_link: Option, + physical_link: PhysicalLink, } impl ServiceManager { @@ -82,14 +82,13 @@ impl ServiceManager { /// /// Args: /// - `log`: The logger - /// - `physical_link`: An optional physical link on which to allocate - /// datalinks. By default, the first physical link is used. + /// - `physical_link`: A physical link on which to allocate datalinks. /// - `config_path`: An optional path to a configuration file to store /// the record of services. By default, [`default_services_config_path`] /// is used. pub async fn new( log: Logger, - physical_link: Option, + physical_link: PhysicalLink, config_path: Option, ) -> Result { debug!(log, "Creating new ServiceManager"); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 0e92e46abda..e659beea9aa 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -98,6 +98,17 @@ impl SledAgent { let vlan = config.vlan; info!(&log, "created sled agent"; "id" => ?id); + let data_link = if let Some(link) = config.data_link.clone() { + link + } else { + Dladm::find_physical().map_err(|err| { + Error::Datalink { + message: "Looking up physical link".to_string(), + err, + } + })? + }; + // Before we start creating zones, we need to ensure that the // necessary ZFS and Zone resources are ready. Zfs::ensure_zoned_filesystem( @@ -116,7 +127,7 @@ impl SledAgent { // RSS-provided IP address. In the meantime, we use one from the // configuration file. Zones::ensure_has_global_zone_v6_address( - config.data_link.clone(), + data_link.clone(), *sled_address.ip(), "sled6", )?; @@ -163,7 +174,7 @@ impl SledAgent { &log, *id, nexus_client.clone(), - config.data_link.clone(), + data_link.clone(), ) .await?; if let Some(pools) = &config.zpools { @@ -180,10 +191,10 @@ impl SledAgent { log.clone(), vlan, nexus_client.clone(), - config.data_link.clone(), + data_link.clone(), )?; let services = - ServiceManager::new(log.clone(), config.data_link.clone(), None) + ServiceManager::new(log.clone(), data_link.clone(), None) .await?; Ok(SledAgent { diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index ff09437993e..386db23c7f5 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -826,7 +826,7 @@ impl StorageManager { log: &Logger, sled_id: Uuid, nexus_client: Arc, - physical_link: Option, + physical_link: PhysicalLink, ) -> Result { let log = log.new(o!("component" => "sled agent storage manager")); let pools = Arc::new(Mutex::new(HashMap::new())); From 37afdd64b34d4c4aa8506554155814372d24fca5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 13:13:39 -0400 Subject: [PATCH 23/61] Avoid using Result for now infallible funcs, fix tests --- sled-agent/src/illumos/vnic.rs | 12 +++++------- sled-agent/src/instance.rs | 5 ++--- sled-agent/src/instance_manager.rs | 18 ++++++++---------- sled-agent/src/services.rs | 14 +++++++------- sled-agent/src/sled_agent.rs | 4 ++-- sled-agent/src/storage_manager.rs | 8 ++++---- 6 files changed, 28 insertions(+), 33 deletions(-) diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index b18a08391af..3e9f93a26d7 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -44,12 +44,12 @@ impl VnicAllocator { pub fn new>( scope: S, physical_link: PhysicalLink, - ) -> Result { - Ok(Self { + ) -> Self { + Self { value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), data_link: physical_link, - }) + } } /// Creates a new NIC, intended for usage by the guest. @@ -144,8 +144,7 @@ mod test { #[test] fn test_allocate() { let allocator = - VnicAllocator::new("Foo", Some(PhysicalLink("mylink".to_string()))) - .unwrap(); + VnicAllocator::new("Foo", PhysicalLink("mylink".to_string())); assert_eq!("oxFoo0", allocator.next()); assert_eq!("oxFoo1", allocator.next()); assert_eq!("oxFoo2", allocator.next()); @@ -154,8 +153,7 @@ mod test { #[test] fn test_allocate_within_scopes() { let allocator = - VnicAllocator::new("Foo", Some(PhysicalLink("mylink".to_string()))) - .unwrap(); + VnicAllocator::new("Foo", PhysicalLink("mylink".to_string())); assert_eq!("oxFoo0", allocator.next()); let allocator = allocator.new_superscope("Baz"); assert_eq!("oxBazFoo1", allocator.next()); diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 142037515c3..099998075be 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -717,9 +717,8 @@ mod test { let log = logger(); let vnic_allocator = VnicAllocator::new( "Test".to_string(), - Some(PhysicalLink("mylink".to_string())), - ) - .unwrap(); + PhysicalLink("mylink".to_string()), + ); let nexus_client = MockNexusClient::default(); let inst = Instance::new( diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index ca01815acaf..608dd76233d 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -60,16 +60,16 @@ impl InstanceManager { vlan: Option, nexus_client: Arc, physical_link: PhysicalLink, - ) -> Result { - Ok(InstanceManager { + ) -> InstanceManager { + InstanceManager { inner: Arc::new(InstanceManagerInternal { log, nexus_client, instances: Mutex::new(BTreeMap::new()), vlan, - vnic_allocator: VnicAllocator::new("Instance", physical_link)?, + vnic_allocator: VnicAllocator::new("Instance", physical_link), }), - }) + } } /// Idempotently ensures that the given Instance (described by @@ -266,9 +266,8 @@ mod test { log, None, nexus_client, - Some(PhysicalLink("mylink".to_string())), - ) - .unwrap(); + PhysicalLink("mylink".to_string()), + ); // Verify that no instances exist. assert!(im.inner.instances.lock().unwrap().is_empty()); @@ -347,9 +346,8 @@ mod test { log, None, nexus_client, - Some(PhysicalLink("mylink".to_string())), - ) - .unwrap(); + PhysicalLink("mylink".to_string()), + ); let ticket = Arc::new(std::sync::Mutex::new(None)); let ticket_clone = ticket.clone(); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 8262bf17ff8..338eed2d625 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -99,7 +99,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new( "Service", physical_link.clone(), - )?, + ), physical_link, }; @@ -492,7 +492,7 @@ mod test { let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config), ) .await @@ -516,7 +516,7 @@ mod test { let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config), ) .await @@ -543,7 +543,7 @@ mod test { // down. let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await @@ -556,7 +556,7 @@ mod test { let _expectations = expect_new_service(); let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await @@ -580,7 +580,7 @@ mod test { // down. let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await @@ -595,7 +595,7 @@ mod test { // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e659beea9aa..42249352b72 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -176,7 +176,7 @@ impl SledAgent { nexus_client.clone(), data_link.clone(), ) - .await?; + .await; if let Some(pools) = &config.zpools { for pool in pools { info!( @@ -192,7 +192,7 @@ impl SledAgent { vlan, nexus_client.clone(), data_link.clone(), - )?; + ); let services = ServiceManager::new(log.clone(), data_link.clone(), None) .await?; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 386db23c7f5..f911f1356df 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -827,7 +827,7 @@ impl StorageManager { sled_id: Uuid, nexus_client: Arc, physical_link: PhysicalLink, - ) -> Result { + ) -> Self { let log = log.new(o!("component" => "sled agent storage manager")); let pools = Arc::new(Mutex::new(HashMap::new())); let (new_pools_tx, new_pools_rx) = mpsc::channel(10); @@ -839,14 +839,14 @@ impl StorageManager { pools: pools.clone(), new_pools_rx, new_filesystems_rx, - vnic_allocator: VnicAllocator::new("Storage", physical_link)?, + vnic_allocator: VnicAllocator::new("Storage", physical_link), }; - Ok(StorageManager { + StorageManager { pools, new_pools_tx, new_filesystems_tx, task: tokio::task::spawn(async move { worker.do_work().await }), - }) + } } /// Adds a zpool to the storage manager. From 1d0a5cec7c451c1e3445c6ee981a05003238a41e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 16:45:03 -0400 Subject: [PATCH 24/61] More specific, contextual zone errors --- sled-agent/src/bootstrap/agent.rs | 14 +- sled-agent/src/illumos/addrobj.rs | 36 ++- sled-agent/src/illumos/running_zone.rs | 12 +- sled-agent/src/illumos/vnic.rs | 5 +- sled-agent/src/illumos/zone.rs | 304 ++++++++++++++++--------- sled-agent/src/instance.rs | 3 - sled-agent/src/instance_manager.rs | 6 - sled-agent/src/services.rs | 232 ++++++++++--------- sled-agent/src/sled_agent.rs | 40 ++-- sled-agent/src/storage_manager.rs | 3 - 10 files changed, 396 insertions(+), 259 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index c06a6538674..50bb833ed5b 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -13,7 +13,7 @@ use super::trust_quorum::{ use super::views::{ShareResponse, SledAgentResponse}; use crate::config::Config as SledConfig; use crate::illumos::dladm::{self, Dladm, PhysicalLink}; -use crate::illumos::zone::{self, Zones}; +use crate::illumos::zone::Zones; use crate::rack_setup::service::Service as RackSetupService; use crate::server::Server as SledServer; use omicron_common::api::external::{Error as ExternalError, MacAddr}; @@ -49,8 +49,8 @@ pub enum BootstrapError { #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), - #[error(transparent)] - Zone(#[from] zone::Error), + #[error("Failed to initialize bootstrap address: {err}")] + BootstrapAddress { err: crate::illumos::zone::EnsureGzAddressError }, } impl From for ExternalError { @@ -134,7 +134,10 @@ impl Agent { link } else { Dladm::find_physical().map_err(|err| { - BootstrapError::SledError(format!("Can't access physical link: {}", err)) + BootstrapError::SledError(format!( + "Can't access physical link: {}", + err + )) })? }; @@ -142,7 +145,8 @@ impl Agent { data_link.clone(), address, "bootstrap6", - )?; + ) + .map_err(|err| BootstrapError::BootstrapAddress { err })?; let peer_monitor = discovery::PeerMonitor::new(&log, address)?; let share = read_key_share()?; diff --git a/sled-agent/src/illumos/addrobj.rs b/sled-agent/src/illumos/addrobj.rs index 80f41fd9010..5b1d3668da3 100644 --- a/sled-agent/src/illumos/addrobj.rs +++ b/sled-agent/src/illumos/addrobj.rs @@ -19,28 +19,48 @@ pub struct AddrObject { name: String, } +#[derive(Debug, PartialEq, Clone)] +enum BadName { + Interface(String), + Object(String), +} + +impl std::fmt::Display for BadName { + fn fmt( + &self, + f: &mut std::fmt::Formatter<'_>, + ) -> Result<(), std::fmt::Error> { + match self { + BadName::Interface(s) => write!(f, "Bad interface name: {}", s), + BadName::Object(s) => write!(f, "Bad object name: {}", s), + } + } +} + /// Errors which may be returned from constructing an [`AddrObject`]. #[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("Failed to parse addrobj name: {0}")] - Parse(String), +#[error("Failed to parse addrobj name: {name}")] +pub struct ParseError { + name: BadName, } impl AddrObject { - pub fn new_control(interface: &str) -> Result { + pub fn new_control(interface: &str) -> Result { Self::new(interface, "omicron") } - pub fn on_same_interface(&self, name: &str) -> Result { + pub fn on_same_interface(&self, name: &str) -> Result { Self::new(&self.interface, name) } - pub fn new(interface: &str, name: &str) -> Result { + pub fn new(interface: &str, name: &str) -> Result { if interface.contains('/') { - return Err(Error::Parse(interface.to_string())); + return Err(ParseError { + name: BadName::Interface(interface.to_string()), + }); } if name.contains('/') { - return Err(Error::Parse(name.to_string())); + return Err(ParseError { name: BadName::Object(name.to_string()) }); } Ok(Self { interface: interface.to_string(), name: name.to_string() }) } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index fda2165a714..8898ae69654 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -31,17 +31,23 @@ pub enum Error { #[error("Failed to parse output: {0}")] Parse(#[from] std::string::FromUtf8Error), - #[error("Zone operation failed: {0}")] - Operation(#[from] crate::illumos::zone::Error), + #[error("Failed to create address: {0}")] + AddressCreation(#[from] crate::illumos::zone::EnsureAddressError), + + #[error("Zone management command failed: {0}")] + ZoneOperation(#[from] crate::illumos::zone::AdmError), #[error("Zone error accessing datalink: {0}")] Datalink(#[from] crate::illumos::dladm::Error), #[error(transparent)] - AddrObject(#[from] crate::illumos::addrobj::Error), + AddrObject(#[from] crate::illumos::addrobj::ParseError), #[error("Timeout waiting for a service: {0}")] Timeout(String), + + #[error(transparent)] + NoControlInterface(#[from] crate::illumos::zone::GetControlInterfaceError), } /// Represents a running zone. diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index 3e9f93a26d7..a2d297609b9 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -41,10 +41,7 @@ impl VnicAllocator { /// /// VnicAllocator::new("Storage") produces /// - oxControlStorage[NNN] - pub fn new>( - scope: S, - physical_link: PhysicalLink, - ) -> Self { + pub fn new>(scope: S, physical_link: PhysicalLink) -> Self { Self { value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 9f4a499e861..08209cc4123 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -4,6 +4,7 @@ //! API for interacting with Zones running Propolis. +use anyhow::anyhow; use ipnetwork::IpNetwork; use slog::Logger; use std::net::{IpAddr, Ipv6Addr}; @@ -24,50 +25,82 @@ pub const ZONE_PREFIX: &str = "oxz_"; pub const PROPOLIS_ZONE_PREFIX: &str = "oxz_propolis-server_"; #[derive(thiserror::Error, Debug)] -pub enum Error { - // TODO: These could be grouped into an "operation" error with an enum - // variant, if we want... - #[error("Cannot halt zone: {0}")] - Halt(zone::ZoneError), - - #[error("Cannot uninstall zone: {0}")] - Uninstall(zone::ZoneError), - - #[error("Cannot delete zone: {0}")] - Delete(zone::ZoneError), - - #[error("Cannot install zone: {0}")] - Install(zone::ZoneError), - - #[error("Cannot configure zone: {0}")] - Configure(zone::ZoneError), +enum Error { + #[error("Zone execution error: {0}")] + Execution(#[from] crate::illumos::ExecutionError), - #[error("Cannot clone zone: {0}")] - Clone(zone::ZoneError), + #[error(transparent)] + AddrObject(#[from] crate::illumos::addrobj::ParseError), - #[error("Cannot boot zone: {0}")] - Boot(zone::ZoneError), + #[error("Address not found: {addrobj}")] + AddressNotFound { addrobj: AddrObject }, +} - #[error("Cannot list zones: {0}")] - List(zone::ZoneError), +/// Operations issued via [`zone::Adm`]. +#[derive(Debug, Clone)] +pub enum Operation { + Boot, + Configure, + Delete, + Halt, + Install, + List, + Uninstall, +} - #[error("Zone execution error: {0}")] - Execution(#[from] crate::illumos::ExecutionError), +/// Errors from issuing [`zone::Adm`] commands. +#[derive(thiserror::Error, Debug)] +#[error("Failed to execute zoneadm command '{op:?}' for zone '{zone}': {err}")] +pub struct AdmError { + op: Operation, + zone: String, + #[source] + err: zone::ZoneError, +} - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), +/// Errors which may be encountered when deleting addresses. +#[derive(thiserror::Error, Debug)] +#[error("Failed to delete address '{addrobj}' in zone '{zone}': {err}")] +pub struct DeleteAddressError { + zone: String, + addrobj: AddrObject, + #[source] + err: crate::illumos::ExecutionError, +} +/// Errors from [`Zones::get_control_interface`]. +/// Error which may be returned accessing the control interface of a zone. +#[derive(thiserror::Error, Debug)] +pub enum GetControlInterfaceError { #[error(transparent)] - Dladm(#[from] crate::illumos::dladm::Error), + Execution(#[from] crate::illumos::ExecutionError), - #[error(transparent)] - AddrObject(#[from] crate::illumos::addrobj::Error), + #[error("VNIC starting with 'oxControl' not found in {zone}")] + NotFound { zone: String }, +} - #[error("Error accessing filesystem: {0}")] - Filesystem(std::io::Error), +/// Errors which may be encountered ensuring addresses. +#[derive(thiserror::Error, Debug)] +#[error( + "Failed to create address {request:?} with name {name} in {zone}: {err}" +)] +pub struct EnsureAddressError { + zone: String, + request: AddressRequest, + name: AddrObject, + #[source] + err: anyhow::Error, +} - #[error("Value not found")] - NotFound, +/// Errors from [`Zones::ensure_has_global_zone_v6_address`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to create address {address} with name {name} in the GZ on {link:?}: {err}")] +pub struct EnsureGzAddressError { + address: Ipv6Addr, + link: PhysicalLink, + name: String, + #[source] + err: anyhow::Error, } /// Describes the type of addresses which may be requested from a zone. @@ -101,7 +134,9 @@ impl Zones { /// /// Returns the state the zone was in before it was removed, or None if the /// zone did not exist. - pub fn halt_and_remove(name: &str) -> Result, Error> { + pub fn halt_and_remove( + name: &str, + ) -> Result, AdmError> { match Self::find(name)? { None => Ok(None), Some(zone) => { @@ -117,17 +152,29 @@ impl Zones { }; if halt { - zone::Adm::new(name).halt().map_err(Error::Halt)?; + zone::Adm::new(name).halt().map_err(|err| AdmError { + op: Operation::Halt, + zone: name.to_string(), + err, + })?; } if uninstall { - zone::Adm::new(name) - .uninstall(/* force= */ true) - .map_err(Error::Uninstall)?; + zone::Adm::new(name).uninstall(/* force= */ true).map_err( + |err| AdmError { + op: Operation::Uninstall, + zone: name.to_string(), + err, + }, + )?; } zone::Config::new(name) .delete(/* force= */ true) .run() - .map_err(Error::Delete)?; + .map_err(|err| AdmError { + op: Operation::Delete, + zone: name.to_string(), + err, + })?; Ok(Some(state)) } } @@ -137,7 +184,7 @@ impl Zones { pub fn halt_and_remove_logged( log: &Logger, name: &str, - ) -> Result<(), Error> { + ) -> Result<(), AdmError> { if let Some(state) = Self::halt_and_remove(name)? { info!( log, @@ -154,7 +201,7 @@ impl Zones { datasets: &[zone::Dataset], devices: &[zone::Device], vnics: Vec, - ) -> Result<(), Error> { + ) -> Result<(), AdmError> { if let Some(zone) = Self::find(zone_name)? { info!( log, @@ -204,28 +251,44 @@ impl Zones { ..Default::default() }); } - cfg.run().map_err(Error::Configure)?; + cfg.run().map_err(|err| AdmError { + op: Operation::Configure, + zone: zone_name.to_string(), + err, + })?; info!(log, "Installing Omicron zone: {}", zone_name); - zone::Adm::new(zone_name) - .install(&[zone_image.as_ref()]) - .map_err(Error::Install)?; + zone::Adm::new(zone_name).install(&[zone_image.as_ref()]).map_err( + |err| AdmError { + op: Operation::Install, + zone: zone_name.to_string(), + err, + }, + )?; Ok(()) } /// Boots a zone (named `name`). - pub fn boot(name: &str) -> Result<(), Error> { - zone::Adm::new(name).boot().map_err(Error::Boot)?; + pub fn boot(name: &str) -> Result<(), AdmError> { + zone::Adm::new(name).boot().map_err(|err| AdmError { + op: Operation::Boot, + zone: name.to_string(), + err, + })?; Ok(()) } /// Returns all zones that may be managed by the Sled Agent. /// /// These zones must have names starting with [`ZONE_PREFIX`]. - pub fn get() -> Result, Error> { + pub fn get() -> Result, AdmError> { Ok(zone::Adm::list() - .map_err(Error::List)? + .map_err(|err| AdmError { + op: Operation::List, + zone: "".to_string(), + err, + })? .into_iter() .filter(|z| z.name().starts_with(ZONE_PREFIX)) .collect()) @@ -235,12 +298,14 @@ impl Zones { /// /// Can only return zones that start with [`ZONE_PREFIX`], as they /// are managed by the Sled Agent. - pub fn find(name: &str) -> Result, Error> { + pub fn find(name: &str) -> Result, AdmError> { Ok(Self::get()?.into_iter().find(|zone| zone.name() == name)) } /// Returns the name of the VNIC used to communicate with the control plane. - pub fn get_control_interface(zone: &str) -> Result { + pub fn get_control_interface( + zone: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ ZLOGIN, @@ -252,8 +317,7 @@ impl Zones { "LINK", ]); let output = execute(cmd)?; - String::from_utf8(output.stdout) - .map_err(Error::Parse)? + String::from_utf8_lossy(&output.stdout) .lines() .find_map(|name| { if name.starts_with(VNIC_PREFIX_CONTROL) { @@ -262,7 +326,9 @@ impl Zones { None } }) - .ok_or(Error::NotFound) + .ok_or(GetControlInterfaceError::NotFound { + zone: zone.to_string(), + }) } /// Ensures that an IP address on an interface matches the requested value. @@ -277,23 +343,36 @@ impl Zones { zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, - ) -> Result { - match Self::get_address(zone, addrobj) { - Ok(addr) => { - if let AddressRequest::Static(expected_addr) = addrtype { - // If the address is static, we need to validate that it - // matches the value we asked for. - if addr != expected_addr { - // If the address doesn't match, try removing the old - // value before using the new one. - Self::delete_address(zone, addrobj)?; - return Self::create_address(zone, addrobj, addrtype); + ) -> Result { + |zone, addrobj, addrtype| -> Result { + match Self::get_address(zone, addrobj) { + Ok(addr) => { + if let AddressRequest::Static(expected_addr) = addrtype { + // If the address is static, we need to validate that it + // matches the value we asked for. + if addr != expected_addr { + // If the address doesn't match, try removing the old + // value before using the new one. + Self::delete_address(zone, addrobj) + .map_err(|e| anyhow!(e))?; + return Self::create_address( + zone, addrobj, addrtype, + ) + .map_err(|e| anyhow!(e)); + } } + Ok(addr) } - Ok(addr) + Err(_) => Self::create_address(zone, addrobj, addrtype) + .map_err(|e| anyhow!(e)), } - Err(_) => Self::create_address(zone, addrobj, addrtype), - } + }(zone, addrobj, addrtype) + .map_err(|err| EnsureAddressError { + zone: zone.unwrap_or("global").to_string(), + request: addrtype, + name: addrobj.clone(), + err, + }) } /// Gets the IP address of an interface. @@ -317,10 +396,10 @@ impl Zones { let cmd = command.args(args); let output = execute(cmd)?; - String::from_utf8(output.stdout)? + String::from_utf8_lossy(&output.stdout) .lines() .find_map(|s| s.parse().ok()) - .ok_or(Error::NotFound) + .ok_or(Error::AddressNotFound { addrobj: addrobj.clone() }) } /// Returns Ok(()) if `addrobj` has a corresponding link-local IPv6 address. @@ -344,13 +423,13 @@ impl Zones { let args = prefix.iter().chain(show_addr_args); let cmd = command.args(args); let output = execute(cmd)?; - if let Some(_) = String::from_utf8(output.stdout)? + if let Some(_) = String::from_utf8_lossy(&output.stdout) .lines() .find(|s| s.trim() == "addrconf") { return Ok(()); } - Err(Error::NotFound) + Err(Error::AddressNotFound { addrobj: addrobj.clone() }) } // Attempts to create the requested address. @@ -361,7 +440,7 @@ impl Zones { zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, - ) -> Result<(), Error> { + ) -> Result<(), crate::illumos::ExecutionError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -396,7 +475,7 @@ impl Zones { pub fn delete_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, - ) -> Result<(), Error> { + ) -> Result<(), DeleteAddressError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -409,7 +488,11 @@ impl Zones { args.push(addrobj.to_string()); let cmd = command.args(args); - execute(cmd)?; + execute(cmd).map_err(|err| DeleteAddressError { + zone: zone.unwrap_or("global").to_string(), + addrobj: addrobj.clone(), + err, + })?; Ok(()) } @@ -423,12 +506,8 @@ impl Zones { fn ensure_has_link_local_v6_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, - ) -> Result<(), Error> { - let link_local_addrobj = addrobj.on_same_interface("linklocal")?; - - if let Ok(()) = - Self::has_link_local_v6_address(zone, &link_local_addrobj) - { + ) -> Result<(), crate::illumos::ExecutionError> { + if let Ok(()) = Self::has_link_local_v6_address(zone, &addrobj) { return Ok(()); } @@ -444,7 +523,7 @@ impl Zones { "-t", "-T", "addrconf", - &link_local_addrobj.to_string(), + &addrobj.to_string(), ]; let args = prefix.iter().chain(create_addr_args); @@ -460,24 +539,43 @@ impl Zones { link: PhysicalLink, address: Ipv6Addr, name: &str, - ) -> Result<(), Error> { - let gz_link_local_addrobj = AddrObject::new(&link.0, "linklocal")?; - Self::ensure_has_link_local_v6_address(None, &gz_link_local_addrobj)?; - - // Ensure that a static IPv6 address has been allocated - // to the Global Zone. Without this, we don't have a way - // to route to IP addresses that we want to create in - // the non-GZ. Note that we use a `/64` prefix, as all addresses - // allocated for services on this sled itself are within the underlay - // prefix. Anything else must be routed through Sidecar. - Self::ensure_address( - None, - &gz_link_local_addrobj.on_same_interface(name)?, - AddressRequest::new_static( - IpAddr::V6(address), - Some(omicron_common::address::SLED_PREFIX), - ), - )?; + ) -> Result<(), EnsureGzAddressError> { + // Call the guts of this function within a closure to make it easier + // to wrap the error with appropriate context. + |link: PhysicalLink, address, name| -> Result<(), anyhow::Error> { + let gz_link_local_addrobj = AddrObject::new(&link.0, "linklocal") + .map_err(|err| anyhow!(err))?; + Self::ensure_has_link_local_v6_address( + None, + &gz_link_local_addrobj, + ) + .map_err(|err| anyhow!(err))?; + + // Ensure that a static IPv6 address has been allocated + // to the Global Zone. Without this, we don't have a way + // to route to IP addresses that we want to create in + // the non-GZ. Note that we use a `/64` prefix, as all addresses + // allocated for services on this sled itself are within the underlay + // prefix. Anything else must be routed through Sidecar. + Self::ensure_address( + None, + &gz_link_local_addrobj + .on_same_interface(name) + .map_err(|err| anyhow!(err))?, + AddressRequest::new_static( + IpAddr::V6(address), + Some(omicron_common::address::SLED_PREFIX), + ), + ) + .map_err(|err| anyhow!(err))?; + Ok(()) + }(link.clone(), address, name) + .map_err(|err| EnsureGzAddressError { + address, + link, + name: name.to_string(), + err, + })?; Ok(()) } @@ -500,9 +598,11 @@ impl Zones { if addr.is_ipv6() { // Finally, actually ensure that the v6 address we want // exists within the zone. + let link_local_addrobj = + addrobj.on_same_interface("linklocal")?; Self::ensure_has_link_local_v6_address( Some(zone), - addrobj, + &link_local_addrobj, )?; } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 099998075be..08775d837c6 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -44,9 +44,6 @@ pub enum Error { #[error("Failure accessing data links: {0}")] Datalink(#[from] crate::illumos::dladm::Error), - #[error("Error accessing zones: {0}")] - Zone(#[from] crate::illumos::zone::Error), - #[error("Failure from Propolis Client: {0}")] Propolis(#[from] propolis_client::Error), diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 608dd76233d..2e5970374c6 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -26,12 +26,6 @@ use crate::instance::MockInstance as Instance; pub enum Error { #[error("Instance error: {0}")] Instance(#[from] crate::instance::Error), - - #[error(transparent)] - Dladm(#[from] crate::illumos::dladm::Error), - - #[error(transparent)] - Zone(#[from] crate::illumos::zone::Error), } struct InstanceManagerInternal { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 338eed2d625..16eab79a707 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,31 +20,22 @@ use tokio::sync::Mutex; #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] - TomlSerialize { - path: PathBuf, - err: toml::ser::Error, - }, + TomlSerialize { path: PathBuf, err: toml::ser::Error }, #[error("Cannot deserialize TOML from file {path}: {err}")] - TomlDeserialize { - path: PathBuf, - err: toml::de::Error, - }, + TomlDeserialize { path: PathBuf, err: toml::de::Error }, #[error("I/O Error accessing {path}: {err}")] - Io { - path: PathBuf, - err: std::io::Error, - }, + Io { path: PathBuf, err: std::io::Error }, - #[error(transparent)] - RunningZone(#[from] crate::illumos::running_zone::Error), + #[error("Zone error: {message}: {err}")] + RunningZone { message: String, err: crate::illumos::running_zone::Error }, - #[error("Failed to add address to the global zone: {0}")] - GzAddressFailure(crate::illumos::zone::Error), - - #[error(transparent)] - Dladm(#[from] crate::illumos::dladm::Error), + #[error("Failed to add GZ addresses: {message}: {err}")] + GzAddressFailure { + message: String, + err: crate::illumos::zone::EnsureGzAddressError, + }, #[error("Could not initialize service {service} as requested: {message}")] BadServiceRequest { service: String, message: String }, @@ -111,19 +102,13 @@ impl ServiceManager { config_path.to_string_lossy() ); let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| { - Error::Io { - path: config_path.clone(), - err, - } - })?, - ).map_err(|err| { - Error::TomlDeserialize { - path: config_path.clone(), - err, - } + &tokio::fs::read_to_string(&config_path).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?, + ) + .map_err(|err| Error::TomlDeserialize { + path: config_path.clone(), + err, })?; let mut existing_zones = mgr.zones.lock().await; mgr.initialize_services_locked(&mut existing_zones, &cfg.services) @@ -184,15 +169,33 @@ impl ServiceManager { // vnics= vec![], ) - .await?; + .await + .map_err(|err| Error::RunningZone { + message: format!("Could not install {}", service.name), + err, + })?; - let running_zone = RunningZone::boot(installed_zone).await?; + let running_zone = RunningZone::boot(installed_zone) + .await + .map_err(|err| Error::RunningZone { + message: format!("Could not boot {}", service.name), + err, + })?; for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); let addr_request = AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await?; + running_zone.ensure_address(addr_request).await.map_err( + |err| Error::RunningZone { + message: format!( + "Failed to create address {} for {}", + addr.to_string(), + service.name + ), + err, + }, + )?; info!( self.log, "Ensuring address {} exists - OK", @@ -214,19 +217,30 @@ impl ServiceManager { *addr, &addr_name, ) - .map_err(|e| Error::GzAddressFailure(e))?; + .map_err(|err| Error::GzAddressFailure { + message: format!( + "Failed adding address for {}", + service.name + ), + err, + })?; } debug!(self.log, "importing manifest"); - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "import", - &format!( - "/var/svc/manifest/site/{}/manifest.xml", - service.name - ), - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "import", + &format!( + "/var/svc/manifest/site/{}/manifest.xml", + service.name + ), + ]) + .map_err(|err| Error::RunningZone { + message: "Failed to import manifest".to_string(), + err, + })?; let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); @@ -241,36 +255,56 @@ impl ServiceManager { message: "Not enough addresses".to_string(), } })?; - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT - ), - ])?; - - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/dns_address=[{}]:{}", - address, DNS_PORT - ), - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/server_address=[{}]:{}", + address, DNS_SERVER_PORT + ), + ]) + .map_err(|err| Error::RunningZone { + message: "Could not set server address property" + .to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/dns_address=[{}]:{}", + address, DNS_PORT + ), + ]) + .map_err(|err| Error::RunningZone { + message: "Could not set DNS address property" + .to_string(), + err, + })?; // Refresh the manifest with the new properties we set, // so they become "effective" properties when the service is enabled. - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::RunningZone { + message: format!( + "Failed to refresh SMF manifest: {}", + default_smf_name + ), + err, + })?; } _ => { info!( @@ -282,12 +316,20 @@ impl ServiceManager { debug!(self.log, "enabling service"); - running_zone.run_cmd(&[ - crate::illumos::zone::SVCADM, - "enable", - "-t", - &default_smf_name, - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCADM, + "enable", + "-t", + &default_smf_name, + ]) + .map_err(|err| Error::RunningZone { + message: format!( + "Failed to enable {} service", + default_smf_name + ), + err, + })?; existing_zones.push(running_zone); } @@ -308,19 +350,13 @@ impl ServiceManager { let services_to_initialize = { if config_path.exists() { let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| { - Error::Io { - path: config_path.clone(), - err, - } - })?, - ).map_err(|err| { - Error::TomlDeserialize { - path: config_path.clone(), - err, - } + &tokio::fs::read_to_string(&config_path).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?, + ) + .map_err(|err| Error::TomlDeserialize { + path: config_path.clone(), + err, })?; let known_services = cfg.services; @@ -358,21 +394,13 @@ impl ServiceManager { let serialized_services = toml::Value::try_from(&request) .expect("Cannot serialize service list"); - let services_str = toml::to_string(&serialized_services) - .map_err(|err| { - Error::TomlSerialize { - path: config_path.clone(), - err, - } + let services_str = + toml::to_string(&serialized_services).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } })?; tokio::fs::write(&config_path, services_str) .await - .map_err(|err| { - Error::Io { - path: config_path.clone(), - err, - } - })?; + .map_err(|err| Error::Io { path: config_path.clone(), err })?; Ok(()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 42249352b72..737c2ec2cd7 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -35,16 +35,16 @@ use crate::illumos::{ #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Datalink error: {message}, {err}")] - Datalink { - message: String, - err: crate::illumos::dladm::Error, - }, + Datalink { message: String, err: crate::illumos::dladm::Error }, #[error(transparent)] Services(#[from] crate::services::Error), #[error(transparent)] - Zone(#[from] crate::illumos::zone::Error), + ZoneOperation(#[from] crate::illumos::zone::AdmError), + + #[error("Failed to create Sled Subnet: {err}")] + SledSubnet { err: crate::illumos::zone::EnsureGzAddressError }, #[error(transparent)] Zfs(#[from] crate::illumos::zfs::Error), @@ -101,11 +101,9 @@ impl SledAgent { let data_link = if let Some(link) = config.data_link.clone() { link } else { - Dladm::find_physical().map_err(|err| { - Error::Datalink { - message: "Looking up physical link".to_string(), - err, - } + Dladm::find_physical().map_err(|err| Error::Datalink { + message: "Looking up physical link".to_string(), + err, })? }; @@ -130,7 +128,8 @@ impl SledAgent { data_link.clone(), *sled_address.ip(), "sled6", - )?; + ) + .map_err(|err| Error::SledSubnet { err })?; // Identify all existing zones which should be managed by the Sled // Agent. @@ -154,19 +153,15 @@ impl SledAgent { // // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE - let vnics = Dladm::get_vnics().map_err(|err| { - Error::Datalink { - message: "Looking up VNICs on boot".to_string(), - err, - } + let vnics = Dladm::get_vnics().map_err(|err| Error::Datalink { + message: "Looking up VNICs on boot".to_string(), + err, })?; for vnic in vnics { warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic).map_err(|err| { - Error::Datalink { - message: "Deleting VNIC during boot".to_string(), - err, - } + Dladm::delete_vnic(&vnic).map_err(|err| Error::Datalink { + message: "Deleting VNIC during boot".to_string(), + err, })?; } @@ -194,8 +189,7 @@ impl SledAgent { data_link.clone(), ); let services = - ServiceManager::new(log.clone(), data_link.clone(), None) - .await?; + ServiceManager::new(log.clone(), data_link.clone(), None).await?; Ok(SledAgent { id: config.id, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index f911f1356df..d5b90887753 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -60,9 +60,6 @@ pub enum Error { #[error(transparent)] Zpool(#[from] crate::illumos::zpool::Error), - #[error("Failed to configure a zone: {0}")] - ZoneConfiguration(crate::illumos::zone::Error), - #[error("Failed to manage a running zone: {0}")] ZoneManagement(#[from] crate::illumos::running_zone::Error), From 1ab32aae74aa89dd4c7fd0dcb01e66c1ec306fad Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 17:23:42 -0400 Subject: [PATCH 25/61] dladm errors --- sled-agent/src/bootstrap/agent.rs | 2 +- sled-agent/src/config.rs | 4 +- sled-agent/src/illumos/dladm.rs | 80 +++++++++++++++++++------- sled-agent/src/illumos/running_zone.rs | 6 +- sled-agent/src/illumos/vnic.rs | 16 ++++-- sled-agent/src/illumos/zone.rs | 2 +- sled-agent/src/instance.rs | 5 +- sled-agent/src/sled_agent.rs | 25 ++++---- sled-agent/src/storage_manager.rs | 3 - 9 files changed, 93 insertions(+), 50 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 50bb833ed5b..7ede44b06bf 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -119,7 +119,7 @@ fn mac_to_socket_addr(mac: MacAddr) -> SocketAddrV6 { // could be randomly generated when it no longer needs to be durable. pub fn bootstrap_address( link: PhysicalLink, -) -> Result { +) -> Result { let mac = Dladm::get_mac(link)?; Ok(mac_to_socket_addr(mac)) } diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index ca332b2c07c..63b9b1115cc 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -49,7 +49,9 @@ impl Config { Ok(config) } - pub fn get_link(&self) -> Result { + pub fn get_link( + &self, + ) -> Result { let link = if let Some(link) = self.data_link.clone() { link } else { diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 92c1ed03916..441fe18fa49 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -15,21 +15,56 @@ pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; pub const DLADM: &str = "/usr/sbin/dladm"; +/// Errors returned from [`Dladm::find_physical`]. #[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Device not found")] - NotFound, - - #[error("Subcommand failure: {0}")] +pub enum FindPhysicalLinkError { + #[error("Failed to execute command to find physical link: {0}")] Execution(#[from] ExecutionError), - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), + #[error("No Physical Link devices found")] + NoPhysicalLinkFound, +} + +/// Errors returned from [`Dladm::get_mac`]. +#[derive(thiserror::Error, Debug)] +pub enum GetMacError { + #[error("Mac Address cannot be looked up; Link not found: {0:?}")] + NotFound(PhysicalLink), + + #[error("Failed to execute command to get MAC address: {0}")] + Execution(#[from] ExecutionError), #[error("Failed to parse MAC: {0}")] ParseMac(#[from] macaddr::ParseError), } +/// Errors returned from [`Dladm::create_vnic`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to create VNIC {name} on link {link:?}: {err}")] +pub struct CreateVnicError { + name: String, + link: PhysicalLink, + #[source] + err: ExecutionError, +} + +/// Errors returned from [`Dladm::get_vnics`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to get vnics: {err}")] +pub struct GetVnicError { + #[source] + err: ExecutionError, +} + +/// Errors returned from [`Dladm::delete_vnic`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to delete vnic {name}: {err}")] +pub struct DeleteVnicError { + name: String, + #[source] + err: ExecutionError, +} + /// The name of a physical datalink. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] pub struct PhysicalLink(pub String); @@ -40,24 +75,24 @@ pub struct Dladm {} #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Dladm { /// Returns the name of the first observed physical data link. - pub fn find_physical() -> Result { + pub fn find_physical() -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-phys", "-p", "-o", "LINK"]); let output = execute(cmd)?; - let name = String::from_utf8(output.stdout)? + let name = String::from_utf8_lossy(&output.stdout) .lines() // TODO: This is arbitrary, but we're currently grabbing the first // physical device. Should we have a more sophisticated method for // selection? .next() .map(|s| s.trim()) - .ok_or_else(|| Error::NotFound)? + .ok_or_else(|| FindPhysicalLinkError::NoPhysicalLinkFound)? .to_string(); Ok(PhysicalLink(name)) } /// Returns the MAC address of a physical link. - pub fn get_mac(link: PhysicalLink) -> Result { + pub fn get_mac(link: PhysicalLink) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ DLADM, @@ -69,11 +104,11 @@ impl Dladm { &link.0, ]); let output = execute(cmd)?; - let name = String::from_utf8(output.stdout)? + let name = String::from_utf8_lossy(&output.stdout) .lines() .next() .map(|s| s.trim()) - .ok_or_else(|| Error::NotFound)? + .ok_or_else(|| GetMacError::NotFound(link))? .to_string(); // Ensure the MAC address is zero-padded, so it may be parsed as a @@ -99,7 +134,7 @@ impl Dladm { vnic_name: &str, mac: Option, vlan: Option, - ) -> Result<(), Error> { + ) -> Result<(), CreateVnicError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![ DLADM.to_string(), @@ -121,17 +156,21 @@ impl Dladm { args.push(vnic_name.to_string()); let cmd = command.args(&args); - execute(cmd)?; + execute(cmd).map_err(|err| CreateVnicError { + name: vnic_name.to_string(), + link: physical.clone(), + err, + })?; Ok(()) } /// Returns all VNICs that may be managed by the Sled Agent. - pub fn get_vnics() -> Result, Error> { + pub fn get_vnics() -> Result, GetVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); - let output = execute(cmd)?; + let output = execute(cmd).map_err(|err| GetVnicError { err })?; - let vnics = String::from_utf8(output.stdout)? + let vnics = String::from_utf8_lossy(&output.stdout) .lines() .filter(|vnic| vnic.starts_with(VNIC_PREFIX)) .map(|s| s.to_owned()) @@ -140,10 +179,11 @@ impl Dladm { } /// Remove a vnic from the sled. - pub fn delete_vnic(name: &str) -> Result<(), Error> { + pub fn delete_vnic(name: &str) -> Result<(), DeleteVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "delete-vnic", name]); - execute(cmd)?; + execute(cmd) + .map_err(|err| DeleteVnicError { name: name.to_string(), err })?; Ok(()) } } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index 8898ae69654..ca51b9b112f 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -37,8 +37,10 @@ pub enum Error { #[error("Zone management command failed: {0}")] ZoneOperation(#[from] crate::illumos::zone::AdmError), - #[error("Zone error accessing datalink: {0}")] - Datalink(#[from] crate::illumos::dladm::Error), + // TODO: Extra context could be useful - who were we allocating the VNIC + // for? + #[error("Failed to create VNIC: {0}")] + VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), #[error(transparent)] AddrObject(#[from] crate::illumos::addrobj::ParseError), diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index a2d297609b9..d200e4f4bc0 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -5,7 +5,10 @@ //! API for controlling a single instance. use crate::common::vlan::VlanID; -use crate::illumos::dladm::{PhysicalLink, VNIC_PREFIX, VNIC_PREFIX_CONTROL}; +use crate::illumos::dladm::{ + CreateVnicError, DeleteVnicError, PhysicalLink, VNIC_PREFIX, + VNIC_PREFIX_CONTROL, +}; use omicron_common::api::external::MacAddr; use std::sync::{ atomic::{AtomicU64, Ordering}, @@ -17,8 +20,6 @@ use crate::illumos::dladm::Dladm; #[cfg(test)] use crate::illumos::dladm::MockDladm as Dladm; -type Error = crate::illumos::dladm::Error; - /// A shareable wrapper around an atomic counter. /// May be used to allocate runtime-unique IDs for objects /// which have naming constraints - such as VNICs. @@ -54,7 +55,7 @@ impl VnicAllocator { &self, mac: Option, vlan: Option, - ) -> Result { + ) -> Result { let allocator = self.new_superscope("Guest"); let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); @@ -64,7 +65,10 @@ impl VnicAllocator { /// Creates a new NIC, intended for allowing Propolis to communicate /// with the control plane. - pub fn new_control(&self, mac: Option) -> Result { + pub fn new_control( + &self, + mac: Option, + ) -> Result { let allocator = self.new_superscope("Control"); let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); @@ -111,7 +115,7 @@ impl Vnic { } /// Deletes a NIC (if it has not already been deleted). - pub fn delete(&mut self) -> Result<(), Error> { + pub fn delete(&mut self) -> Result<(), DeleteVnicError> { if self.deleted { Ok(()) } else { diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 08209cc4123..2cc69283065 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -72,7 +72,7 @@ pub struct DeleteAddressError { /// Error which may be returned accessing the control interface of a zone. #[derive(thiserror::Error, Debug)] pub enum GetControlInterfaceError { - #[error(transparent)] + #[error("Failed to query for control interface: {0}")] Execution(#[from] crate::illumos::ExecutionError), #[error("VNIC starting with 'oxControl' not found in {zone}")] diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 08775d837c6..cbcf957154d 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -41,8 +41,9 @@ pub enum Error { #[error("Failed to wait for service: {0}")] Timeout(String), - #[error("Failure accessing data links: {0}")] - Datalink(#[from] crate::illumos::dladm::Error), + // TODO: Who are we allocating the VNIC for? + #[error("Failed to create VNIC: {0}")] + VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), #[error("Failure from Propolis Client: {0}")] Propolis(#[from] propolis_client::Error), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 737c2ec2cd7..99965018e25 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -34,8 +34,14 @@ use crate::illumos::{ #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("Datalink error: {message}, {err}")] - Datalink { message: String, err: crate::illumos::dladm::Error }, + #[error("Physical link not in config, nor found automatically: {0}")] + FindPhysicalLink(#[from] crate::illumos::dladm::FindPhysicalLinkError), + + #[error("Failed to lookup VNICs on boot: {0}")] + GetVnics(#[from] crate::illumos::dladm::GetVnicError), + + #[error("Failed to delete VNIC on boot: {0}")] + DeleteVnic(#[from] crate::illumos::dladm::DeleteVnicError), #[error(transparent)] Services(#[from] crate::services::Error), @@ -101,10 +107,7 @@ impl SledAgent { let data_link = if let Some(link) = config.data_link.clone() { link } else { - Dladm::find_physical().map_err(|err| Error::Datalink { - message: "Looking up physical link".to_string(), - err, - })? + Dladm::find_physical()? }; // Before we start creating zones, we need to ensure that the @@ -153,16 +156,10 @@ impl SledAgent { // // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE - let vnics = Dladm::get_vnics().map_err(|err| Error::Datalink { - message: "Looking up VNICs on boot".to_string(), - err, - })?; + let vnics = Dladm::get_vnics()?; for vnic in vnics { warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic).map_err(|err| Error::Datalink { - message: "Deleting VNIC during boot".to_string(), - err, - })?; + Dladm::delete_vnic(&vnic)?; } let storage = StorageManager::new( diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index d5b90887753..ba399228590 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -51,9 +51,6 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error(transparent)] - Datalink(#[from] crate::illumos::dladm::Error), - #[error(transparent)] Zfs(#[from] crate::illumos::zfs::Error), From 6d165a77969590f912fd6c94e5ee2cee9b4d6995 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 17:33:09 -0400 Subject: [PATCH 26/61] Zpool errors --- sled-agent/src/illumos/zpool.rs | 50 ++++++++++++++++++------------- sled-agent/src/storage_manager.rs | 2 +- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index 67c1c0eaf88..b4021dd8ce9 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -5,7 +5,6 @@ //! Utilities for managing Zpools. use crate::illumos::execute; -use omicron_common::api::external::Error as ExternalError; use serde::{Deserialize, Deserializer}; use std::str::FromStr; use uuid::Uuid; @@ -13,24 +12,24 @@ use uuid::Uuid; const ZPOOL: &str = "/usr/sbin/zpool"; #[derive(thiserror::Error, Debug, PartialEq, Eq)] -pub enum ParseError { - #[error("Failed to parse output as UTF-8: {0}")] - Utf8(#[from] std::string::FromUtf8Error), - - #[error("Failed to parse output: {0}")] - Parse(String), -} +#[error("Failed to parse output: {0}")] +pub struct ParseError(String); #[derive(thiserror::Error, Debug)] -pub enum Error { +enum Error { #[error("Zpool execution error: {0}")] Execution(#[from] crate::illumos::ExecutionError), #[error(transparent)] Parse(#[from] ParseError), +} - #[error("Failed to execute subcommand: {0}")] - Command(ExternalError), +#[derive(thiserror::Error, Debug)] +#[error("Failed to get info for zpool {name}: {err}")] +pub struct GetInfoError { + name: String, + #[source] + err: Error, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -62,7 +61,7 @@ impl FromStr for ZpoolHealth { "OFFLINE" => Ok(ZpoolHealth::Offline), "REMOVED" => Ok(ZpoolHealth::Removed), "UNAVAIL" => Ok(ZpoolHealth::Unavailable), - _ => Err(ParseError::Parse(format!( + _ => Err(ParseError(format!( "Unrecognized zpool 'health': {}", s ))), @@ -111,13 +110,13 @@ impl FromStr for ZpoolInfo { fn from_str(s: &str) -> Result { // Lambda helpers for error handling. let expected_field = |name| { - ParseError::Parse(format!( + ParseError(format!( "Missing '{}' value in zpool list output", name )) }; let failed_to_parse = |name, err| { - ParseError::Parse(format!( + ParseError(format!( "Failed to parse field '{}': {}", name, err )) @@ -155,7 +154,7 @@ pub struct Zpool {} #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zpool { - pub fn get_info(name: &str) -> Result { + pub fn get_info(name: &str) -> Result { let mut command = std::process::Command::new(ZPOOL); let cmd = command.args(&[ "list", @@ -164,11 +163,20 @@ impl Zpool { name, ]); - let output = execute(cmd)?; - let stdout = String::from_utf8(output.stdout) - .map_err(|e| ParseError::Utf8(e))?; - - let zpool = stdout.parse::()?; + let output = execute(cmd).map_err(|err| { + GetInfoError { + name: name.to_string(), + err: err.into(), + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); + let zpool = stdout.parse::() + .map_err(|err| { + GetInfoError { + name: name.to_string(), + err: err.into(), + } + })?; Ok(zpool) } } @@ -294,7 +302,7 @@ mod test { let input = format!("{} {} {} {}", name, size, allocated, free); let result: Result = input.parse(); - let expected_err = ParseError::Parse( + let expected_err = ParseError( "Missing 'health' value in zpool list output".to_owned(), ); assert_eq!(result.unwrap_err(), expected_err,); diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index ba399228590..8b9ce638583 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -55,7 +55,7 @@ pub enum Error { Zfs(#[from] crate::illumos::zfs::Error), #[error(transparent)] - Zpool(#[from] crate::illumos::zpool::Error), + GetZpoolInfo(#[from] crate::illumos::zpool::GetInfoError), #[error("Failed to manage a running zone: {0}")] ZoneManagement(#[from] crate::illumos::running_zone::Error), From 4be8ed0321a50d868ab2e5c3d7f124f89bf87002 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 18:03:36 -0400 Subject: [PATCH 27/61] zfs errors --- sled-agent/src/illumos/zfs.rs | 131 +++++++++++++++++++++++------- sled-agent/src/sled_agent.rs | 2 +- sled-agent/src/storage_manager.rs | 13 ++- 3 files changed, 116 insertions(+), 30 deletions(-) diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index 61b468afaa6..b1ca75002ac 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -12,25 +12,70 @@ pub const ZONE_ZFS_DATASET_MOUNTPOINT: &str = "/zone"; pub const ZONE_ZFS_DATASET: &str = "rpool/zone"; const ZFS: &str = "/usr/sbin/zfs"; +/// Error returned by [`Zfs::list_filesystems`]. #[derive(thiserror::Error, Debug)] -pub enum Error { +#[error("Could not list filesystems within dataset {name}: {err}")] +pub struct ListFilesystemsError { + name: String, + #[source] + err: crate::illumos::ExecutionError, +} + +#[derive(thiserror::Error, Debug)] +enum EnsureFilesystemErrorRaw { #[error("ZFS execution error: {0}")] Execution(#[from] crate::illumos::ExecutionError), - #[error("Does not exist: {0}")] - NotFound(String), + #[error("Filesystem does not exist, and formatting was not requested")] + NotFoundNotFormatted, #[error("Unexpected output from ZFS commands: {0}")] Output(String), +} + +/// Error returned by [`Zfs::ensure_zoned_filesystem`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}")] +pub struct EnsureFilesystemError { + name: String, + mountpoint: Mountpoint, + #[source] + err: EnsureFilesystemErrorRaw, +} + +/// Error returned by [`Zfs::set_oxide_value`] +#[derive(thiserror::Error, Debug)] +#[error("Failed to set value '{name}={value}' on filesystem {filesystem}: {err}")] +pub struct SetValueError { + filesystem: String, + name: String, + value: String, + err: crate::illumos::ExecutionError, +} - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), +#[derive(thiserror::Error, Debug)] +enum GetValueErrorRaw { + #[error(transparent)] + Execution(#[from] crate::illumos::ExecutionError), + + #[error("No value found with that name")] + MissingValue, +} + +/// Error returned by [`Zfs:get_oxide_value`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to get value '{name}' from filesystem {filesystem}: {err}")] +pub struct GetValueError { + filesystem: String, + name: String, + err: GetValueErrorRaw, } /// Wraps commands for interacting with ZFS. pub struct Zfs {} /// Describes a mountpoint for a ZFS filesystem. +#[derive(Debug, Clone)] pub enum Mountpoint { #[allow(dead_code)] Legacy, @@ -49,12 +94,17 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { /// Lists all filesystems within a dataset. - pub fn list_filesystems(name: &str) -> Result, Error> { + pub fn list_filesystems(name: &str) -> Result, ListFilesystemsError> { let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd)?; - let stdout = String::from_utf8(output.stdout)?; + let output = execute(cmd).map_err(|err| { + ListFilesystemsError { + name: name.to_string(), + err, + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout .trim() .split('\n') @@ -71,26 +121,31 @@ impl Zfs { name: &str, mountpoint: Mountpoint, do_format: bool, - ) -> Result<(), Error> { + ) -> Result<(), EnsureFilesystemError> { // If the dataset exists, we're done. let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-Hpo", "name,type,mountpoint", name]); // If the list command returns any valid output, validate it. if let Ok(output) = execute(cmd) { - let stdout = String::from_utf8(output.stdout)?; + let stdout = String::from_utf8_lossy(&output.stdout); let values: Vec<&str> = stdout.trim().split('\t').collect(); if values != &[name, "filesystem", &mountpoint.to_string()] { - return Err(Error::Output(stdout)); + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: EnsureFilesystemErrorRaw::Output(stdout.to_string()), + }); } return Ok(()); } if !do_format { - return Err(Error::NotFound(format!( - "Filesystem {} not found", - name - ))); + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: EnsureFilesystemErrorRaw::NotFoundNotFormatted, + }); } // If it doesn't exist, make it. @@ -102,10 +157,16 @@ impl Zfs { "-o", "zoned=on", "-o", - &format!("mountpoint={}", mountpoint), + &format!("mountpoint={}", &mountpoint), name, ]); - execute(cmd)?; + execute(cmd).map_err(|err| { + EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: err.into(), + } + })?; Ok(()) } @@ -113,7 +174,7 @@ impl Zfs { filesystem_name: &str, name: &str, value: &str, - ) -> Result<(), Error> { + ) -> Result<(), SetValueError> { Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value) } @@ -121,33 +182,47 @@ impl Zfs { filesystem_name: &str, name: &str, value: &str, - ) -> Result<(), Error> { + ) -> Result<(), SetValueError> { let mut command = std::process::Command::new(PFEXEC); let value_arg = format!("{}={}", name, value); let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]); - execute(cmd)?; + execute(cmd).map_err(|err| { + SetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + value: value.to_string(), + err, + } + })?; Ok(()) } pub fn get_oxide_value( filesystem_name: &str, name: &str, - ) -> Result { + ) -> Result { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value(filesystem_name: &str, name: &str) -> Result { + fn get_value(filesystem_name: &str, name: &str) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd)?; - let stdout = String::from_utf8(output.stdout)?; + let output = execute(cmd).map_err(|err| { + GetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + err: err.into(), + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); let value = stdout.trim(); if value == "-" { - return Err(Error::NotFound(format!( - "Property {}, within filesystem {}", - name, filesystem_name - ))); + return Err(GetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + err: GetValueErrorRaw::MissingValue, + }); } Ok(value.to_string()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 99965018e25..46090319783 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -53,7 +53,7 @@ pub enum Error { SledSubnet { err: crate::illumos::zone::EnsureGzAddressError }, #[error(transparent)] - Zfs(#[from] crate::illumos::zfs::Error), + ZfsEnsureFilesystem(#[from] crate::illumos::zfs::EnsureFilesystemError), #[error("Error managing instances: {0}")] Instance(#[from] crate::instance_manager::Error), diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 8b9ce638583..35bbda8cf10 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -51,8 +51,19 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; #[derive(thiserror::Error, Debug)] pub enum Error { + // TODO: We could add the context of "why are we doint this op", maybe? + + #[error(transparent)] + ZfsListFilesystems(#[from] crate::illumos::zfs::ListFilesystemsError), + + #[error(transparent)] + ZfsEnsureFilesystem(#[from] crate::illumos::zfs::EnsureFilesystemError), + + #[error(transparent)] + ZfsSetValue(#[from] crate::illumos::zfs::SetValueError), + #[error(transparent)] - Zfs(#[from] crate::illumos::zfs::Error), + ZfsGetValue(#[from] crate::illumos::zfs::GetValueError), #[error(transparent)] GetZpoolInfo(#[from] crate::illumos::zpool::GetInfoError), From 7dd7fd927b7051b3c8bfa634095581a2af910408 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 18:07:25 -0400 Subject: [PATCH 28/61] execution errs --- sled-agent/src/illumos/mod.rs | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index bdec8e7e702..c03549769ac 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -19,8 +19,11 @@ const PFEXEC: &str = "/usr/bin/pfexec"; #[derive(thiserror::Error, Debug)] pub enum ExecutionError { - #[error("Failed to start execution of process: {0}")] - ExecutionStart(std::io::Error), + #[error("Failed to start execution of [{command}]: {err}")] + ExecutionStart { + command: String, + err: std::io::Error, + }, #[error( "Command [{command}] executed and failed with status: {status}. Output: {stderr}" @@ -38,21 +41,32 @@ pub enum ExecutionError { mod inner { use super::*; + fn to_string( + command: &mut std::process::Command + ) -> String { + command + .get_args() + .map(|s| s.to_string_lossy().into()) + .collect::>() + .join(" ") + } + // Helper function for starting the process and checking the // exit code result. pub fn execute( command: &mut std::process::Command, ) -> Result { let output = - command.output().map_err(|e| ExecutionError::ExecutionStart(e))?; + command.output().map_err(|err| { + ExecutionError::ExecutionStart { + command: to_string(command), + err, + } + })?; if !output.status.success() { return Err(ExecutionError::CommandFailure { - command: command - .get_args() - .map(|s| s.to_string_lossy().into()) - .collect::>() - .join(" "), + command: to_string(command), status: output.status, stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); From a094f8ed213a5e016fa0016bbd5fae1959cca08a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 19:14:47 -0400 Subject: [PATCH 29/61] Running zone --- sled-agent/src/illumos/running_zone.rs | 207 +++++++++++++++++++------ sled-agent/src/illumos/zone.rs | 15 +- sled-agent/src/instance.rs | 11 +- sled-agent/src/services.rs | 73 ++++----- sled-agent/src/storage_manager.rs | 30 ++-- 5 files changed, 233 insertions(+), 103 deletions(-) diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index ca51b9b112f..f0f53a628f0 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -17,39 +17,83 @@ use crate::illumos::zone::MockZones as Zones; #[cfg(not(test))] use crate::illumos::zone::Zones; +/// Errors returned from [`RunningZone::run_cmd`]. #[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Zone not found")] - NotFound, - - #[error("Zone is not running; it is in the {0:?} state instead")] - NotRunning(zone::State), - - #[error("Execution error: {0}")] - Execution(#[from] crate::illumos::ExecutionError), - - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), - - #[error("Failed to create address: {0}")] - AddressCreation(#[from] crate::illumos::zone::EnsureAddressError), +#[error("Error running command in zone '{zone}': {err}")] +pub struct RunCommandError { + zone: String, + #[source] + err: crate::illumos::ExecutionError, +} - #[error("Zone management command failed: {0}")] - ZoneOperation(#[from] crate::illumos::zone::AdmError), +/// Errors returned from [`RunningZone::boot`]. +#[derive(thiserror::Error, Debug)] +pub enum BootError { + #[error("Error booting zone: {0}")] + Booting(#[from] crate::illumos::zone::AdmError), + + #[error("Zone booted, but timed out waiting for {service} in {zone}")] + Timeout { + service: String, + zone: String, + }, +} - // TODO: Extra context could be useful - who were we allocating the VNIC - // for? - #[error("Failed to create VNIC: {0}")] - VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), +/// Errors returned from [`RunningZone::ensure_address`]. +#[derive(thiserror::Error, Debug)] +pub enum EnsureAddressError { + #[error("Failed ensuring address {request:?} in {zone}: could not construct addrobj name: {err}")] + AddrObject{ + request: AddressRequest, + zone: String, + err: crate::illumos::addrobj::ParseError + }, #[error(transparent)] - AddrObject(#[from] crate::illumos::addrobj::ParseError), - - #[error("Timeout waiting for a service: {0}")] - Timeout(String), + EnsureAddressError(#[from] crate::illumos::zone::EnsureAddressError), +} - #[error(transparent)] - NoControlInterface(#[from] crate::illumos::zone::GetControlInterfaceError), +/// Erros returned from [`RunningZone::get_zone`]. +#[derive(thiserror::Error, Debug)] +pub enum GetZoneError { + #[error("While looking up zones with prefix '{prefix}', could not get zones: {err}")] + GetZones { + prefix: String, + #[source] + err: crate::illumos::zone::AdmError, + }, + + #[error("Zone with prefix '{prefix}' not found")] + NotFound { + prefix: String + }, + + #[error("Cannot get zone '{name}': it is in the {state:?} state instead of running")] + NotRunning{ + name: String, + state: zone::State + }, + + #[error("Cannot get zone '{name}': Failed to acquire control interface {err}")] + ControlInterface { + name: String, + #[source] + err: crate::illumos::zone::GetControlInterfaceError, + }, + + #[error("Cannot get zone '{name}': Failed to create addrobj: {err}")] + AddrObject{ + name: String, + #[source] + err: crate::illumos::addrobj::ParseError, + }, + + #[error("Cannot get zone '{name}': Failed to ensure address exists: {err}")] + EnsureAddress { + name: String, + #[source] + err: crate::illumos::zone::EnsureAddressError, + }, } /// Represents a running zone. @@ -63,7 +107,7 @@ impl RunningZone { } /// Runs a command within the Zone, return the output. - pub fn run_cmd(&self, args: I) -> Result + pub fn run_cmd(&self, args: I) -> Result where I: IntoIterator, S: AsRef, @@ -79,15 +123,21 @@ impl RunningZone { .chain(suffix.iter().map(|a| a.as_ref())); let cmd = command.args(full_args); - let output = crate::illumos::execute(cmd)?; - let stdout = String::from_utf8(output.stdout)?; - Ok(stdout) + let output = crate::illumos::execute(cmd) + .map_err(|err| { + RunCommandError { + zone: name.to_string(), + err, + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); + Ok(stdout.to_string()) } /// Boots a new zone. /// /// Note that the zone must already be configured to be booted. - pub async fn boot(zone: InstalledZone) -> Result { + pub async fn boot(zone: InstalledZone) -> Result { // Boot the zone. info!(zone.log, "Zone booting"); @@ -98,7 +148,10 @@ impl RunningZone { let fmri = "svc:/milestone/network:default"; wait_for_service(Some(&zone.name), fmri) .await - .map_err(|_| Error::Timeout(fmri.to_string()))?; + .map_err(|_| BootError::Timeout { + service: fmri.to_string(), + zone: zone.name.to_string(), + })?; Ok(RunningZone { inner: zone }) } @@ -106,7 +159,7 @@ impl RunningZone { pub async fn ensure_address( &self, addrtype: AddressRequest, - ) -> Result { + ) -> Result { info!(self.inner.log, "Adding address: {:?}", addrtype); let name = match addrtype { AddressRequest::Dhcp => "omicron", @@ -115,7 +168,14 @@ impl RunningZone { std::net::IpAddr::V6(_) => "omicron6", }, }; - let addrobj = AddrObject::new(self.inner.control_vnic.name(), name)?; + let addrobj = AddrObject::new(self.inner.control_vnic.name(), name) + .map_err(|err| { + EnsureAddressError::AddrObject { + request: addrtype, + zone: self.inner.name.clone(), + err: err.into(), + } + })?; let network = Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; Ok(network) @@ -134,20 +194,46 @@ impl RunningZone { log: &Logger, zone_prefix: &str, addrtype: AddressRequest, - ) -> Result { - let zone_info = Zones::get()? + ) -> Result { + let zone_info = Zones::get() + .map_err(|err| { + GetZoneError::GetZones { + prefix: zone_prefix.to_string(), + err, + } + })? .into_iter() .find(|zone_info| zone_info.name().starts_with(&zone_prefix)) - .ok_or_else(|| Error::NotFound)?; + .ok_or_else(|| GetZoneError::NotFound { prefix: zone_prefix.to_string() })?; if zone_info.state() != zone::State::Running { - return Err(Error::NotRunning(zone_info.state())); + return Err(GetZoneError::NotRunning { + name: zone_info.name().to_string(), + state: zone_info.state() + }); } let zone_name = zone_info.name(); - let vnic_name = Zones::get_control_interface(zone_name)?; - let addrobj = AddrObject::new_control(&vnic_name)?; - Zones::ensure_address(Some(zone_name), &addrobj, addrtype)?; + let vnic_name = Zones::get_control_interface(zone_name) + .map_err(|err| { + GetZoneError::ControlInterface { + name: zone_name.to_string(), + err, + } + })?; + let addrobj = AddrObject::new_control(&vnic_name).map_err(|err| { + GetZoneError::AddrObject { + name: zone_name.to_string(), + err, + } + })?; + Zones::ensure_address(Some(zone_name), &addrobj, addrtype) + .map_err(|err| { + GetZoneError::EnsureAddress { + name: zone_name.to_string(), + err, + } + })?; Ok(Self { inner: InstalledZone { @@ -180,6 +266,25 @@ impl Drop for RunningZone { } } +/// Errors returned from [`InstalledZone::install`]. +#[derive(thiserror::Error, Debug)] +pub enum InstallZoneError { + #[error("Cannot create '{service}': failed to create control VNIC: {err}")] + CreateVnic { + service: String, + #[source] + err: crate::illumos::dladm::CreateVnicError, + }, + + #[error("Failed to install zone '{zone}' from '{image_path}': {err}")] + InstallZone { + zone: String, + image_path: PathBuf, + #[source] + err: crate::illumos::zone::AdmError, + }, +} + pub struct InstalledZone { log: Logger, @@ -223,8 +328,14 @@ impl InstalledZone { datasets: &[zone::Dataset], devices: &[zone::Device], vnics: Vec, - ) -> Result { - let control_vnic = vnic_allocator.new_control(None)?; + ) -> Result { + let control_vnic = vnic_allocator.new_control(None) + .map_err(|err| { + InstallZoneError::CreateVnic { + service: service_name.to_string(), + err, + } + })?; let zone_name = Self::get_zone_name(service_name, unique_name); let zone_image_path = @@ -243,7 +354,13 @@ impl InstalledZone { &datasets, &devices, vnic_names, - )?; + ).map_err(|err| { + InstallZoneError::InstallZone { + zone: zone_name.to_string(), + image_path: zone_image_path.clone(), + err, + } + })?; Ok(InstalledZone { log: log.new(o!("zone" => zone_name.clone())), diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 2cc69283065..0b68f4c773a 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -72,8 +72,12 @@ pub struct DeleteAddressError { /// Error which may be returned accessing the control interface of a zone. #[derive(thiserror::Error, Debug)] pub enum GetControlInterfaceError { - #[error("Failed to query for control interface: {0}")] - Execution(#[from] crate::illumos::ExecutionError), + #[error("Failed to query zone '{zone}' for control interface: {err}")] + Execution { + zone: String, + #[source] + err: crate::illumos::ExecutionError, + }, #[error("VNIC starting with 'oxControl' not found in {zone}")] NotFound { zone: String }, @@ -316,7 +320,12 @@ impl Zones { "-o", "LINK", ]); - let output = execute(cmd)?; + let output = execute(cmd).map_err(|err| { + GetControlInterfaceError::Execution { + zone: zone.to_string(), + err, + } + })?; String::from_utf8_lossy(&output.stdout) .lines() .find_map(|name| { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index cbcf957154d..87af13085a7 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -61,7 +61,16 @@ pub enum Error { Migration(anyhow::Error), #[error(transparent)] - RunningZone(#[from] crate::illumos::running_zone::Error), + ZoneCommand(#[from] crate::illumos::running_zone::RunCommandError), + + #[error(transparent)] + ZoneBoot(#[from] crate::illumos::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] crate::illumos::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), #[error("serde_json failure: {0}")] SerdeJsonError(#[from] serde_json::Error), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 16eab79a707..9f3aebf6dfc 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -28,11 +28,24 @@ pub enum Error { #[error("I/O Error accessing {path}: {err}")] Io { path: PathBuf, err: std::io::Error }, - #[error("Zone error: {message}: {err}")] - RunningZone { message: String, err: crate::illumos::running_zone::Error }, + #[error("Failed to do '{intent}' by running command in zone: {err}")] + ZoneCommand { + intent: String, + #[source] + err: crate::illumos::running_zone::RunCommandError, + }, + + #[error("Failed to boot zone: {0}")] + ZoneBoot(#[from] crate::illumos::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] crate::illumos::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), #[error("Failed to add GZ addresses: {message}: {err}")] - GzAddressFailure { + GzAddress { message: String, err: crate::illumos::zone::EnsureGzAddressError, }, @@ -169,33 +182,15 @@ impl ServiceManager { // vnics= vec![], ) - .await - .map_err(|err| Error::RunningZone { - message: format!("Could not install {}", service.name), - err, - })?; + .await?; - let running_zone = RunningZone::boot(installed_zone) - .await - .map_err(|err| Error::RunningZone { - message: format!("Could not boot {}", service.name), - err, - })?; + let running_zone = RunningZone::boot(installed_zone).await?; for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); let addr_request = AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await.map_err( - |err| Error::RunningZone { - message: format!( - "Failed to create address {} for {}", - addr.to_string(), - service.name - ), - err, - }, - )?; + running_zone.ensure_address(addr_request).await?; info!( self.log, "Ensuring address {} exists - OK", @@ -217,7 +212,7 @@ impl ServiceManager { *addr, &addr_name, ) - .map_err(|err| Error::GzAddressFailure { + .map_err(|err| Error::GzAddress { message: format!( "Failed adding address for {}", service.name @@ -237,8 +232,8 @@ impl ServiceManager { service.name ), ]) - .map_err(|err| Error::RunningZone { - message: "Failed to import manifest".to_string(), + .map_err(|err| Error::ZoneCommand { + intent: "importing manifest".to_string(), err, })?; @@ -266,9 +261,8 @@ impl ServiceManager { address, DNS_SERVER_PORT ), ]) - .map_err(|err| Error::RunningZone { - message: "Could not set server address property" - .to_string(), + .map_err(|err| Error::ZoneCommand { + intent: "set server address".to_string(), err, })?; @@ -283,9 +277,8 @@ impl ServiceManager { address, DNS_PORT ), ]) - .map_err(|err| Error::RunningZone { - message: "Could not set DNS address property" - .to_string(), + .map_err(|err| Error::ZoneCommand { + intent: "Set DNS address".to_string(), err, })?; @@ -298,11 +291,8 @@ impl ServiceManager { &default_smf_name, "refresh", ]) - .map_err(|err| Error::RunningZone { - message: format!( - "Failed to refresh SMF manifest: {}", - default_smf_name - ), + .map_err(|err| Error::ZoneCommand { + intent: format!("Refresh SMF manifest {}", default_smf_name), err, })?; } @@ -323,11 +313,8 @@ impl ServiceManager { "-t", &default_smf_name, ]) - .map_err(|err| Error::RunningZone { - message: format!( - "Failed to enable {} service", - default_smf_name - ), + .map_err(|err| Error::ZoneCommand { + intent: format!("Enable {} service", default_smf_name), err, })?; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 35bbda8cf10..7f15e0961bf 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -6,7 +6,7 @@ use crate::illumos::dladm::PhysicalLink; use crate::illumos::running_zone::{ - Error as RunningZoneError, InstalledZone, RunningZone, + InstalledZone, RunningZone, }; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; @@ -68,8 +68,17 @@ pub enum Error { #[error(transparent)] GetZpoolInfo(#[from] crate::illumos::zpool::GetInfoError), - #[error("Failed to manage a running zone: {0}")] - ZoneManagement(#[from] crate::illumos::running_zone::Error), + #[error(transparent)] + ZoneCommand(#[from] crate::illumos::running_zone::RunCommandError), + + #[error(transparent)] + ZoneBoot(#[from] crate::illumos::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] crate::illumos::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), #[error("Error parsing pool size: {0}")] BadPoolSize(#[from] ByteCountRangeError), @@ -433,14 +442,13 @@ async fn ensure_running_zone( let address_request = AddressRequest::new_static(dataset_info.address.ip(), None); - match RunningZone::get(log, &dataset_info.zone_prefix(), address_request) - .await - { + let err = RunningZone::get(log, &dataset_info.zone_prefix(), address_request).await; + match err { Ok(zone) => { info!(log, "Zone for {} is already running", dataset_name.full()); return Ok(zone); } - Err(RunningZoneError::NotFound) => { + Err(crate::illumos::running_zone::GetZoneError::NotFound { .. }) => { info!(log, "Zone for {} was not found", dataset_name.full()); let installed_zone = InstalledZone::install( @@ -463,14 +471,14 @@ async fn ensure_running_zone( Ok(zone) } - Err(RunningZoneError::NotRunning(_state)) => { + Err(crate::illumos::running_zone::GetZoneError::NotRunning { name, state }) => { // TODO(https://github.com/oxidecomputer/omicron/issues/725): - unimplemented!("Handle a zone which exists, but is not running"); + unimplemented!("Handle a zone which exists, but is not running: {name}, in {state:?}"); } - Err(_) => { + Err(err) => { // TODO(https://github.com/oxidecomputer/omicron/issues/725): unimplemented!( - "Handle a zone which exists, has some other problem" + "Handle a zone which exists, has some other problem: {err}" ); } } From bc1c40a42d4783b405830acb49b213e2e620ce6b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 19:15:12 -0400 Subject: [PATCH 30/61] fmt --- sled-agent/src/illumos/mod.rs | 19 ++--- sled-agent/src/illumos/running_zone.rs | 111 +++++++++++-------------- sled-agent/src/illumos/zfs.rs | 57 ++++++------- sled-agent/src/illumos/zone.rs | 5 +- sled-agent/src/illumos/zpool.rs | 33 ++------ sled-agent/src/services.rs | 5 +- sled-agent/src/storage_manager.rs | 18 ++-- 7 files changed, 106 insertions(+), 142 deletions(-) diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index c03549769ac..bb6eb5cae7f 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -20,10 +20,7 @@ const PFEXEC: &str = "/usr/bin/pfexec"; #[derive(thiserror::Error, Debug)] pub enum ExecutionError { #[error("Failed to start execution of [{command}]: {err}")] - ExecutionStart { - command: String, - err: std::io::Error, - }, + ExecutionStart { command: String, err: std::io::Error }, #[error( "Command [{command}] executed and failed with status: {status}. Output: {stderr}" @@ -41,9 +38,7 @@ pub enum ExecutionError { mod inner { use super::*; - fn to_string( - command: &mut std::process::Command - ) -> String { + fn to_string(command: &mut std::process::Command) -> String { command .get_args() .map(|s| s.to_string_lossy().into()) @@ -56,13 +51,9 @@ mod inner { pub fn execute( command: &mut std::process::Command, ) -> Result { - let output = - command.output().map_err(|err| { - ExecutionError::ExecutionStart { - command: to_string(command), - err, - } - })?; + let output = command.output().map_err(|err| { + ExecutionError::ExecutionStart { command: to_string(command), err } + })?; if !output.status.success() { return Err(ExecutionError::CommandFailure { diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index f0f53a628f0..ddfa7753982 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -33,20 +33,17 @@ pub enum BootError { Booting(#[from] crate::illumos::zone::AdmError), #[error("Zone booted, but timed out waiting for {service} in {zone}")] - Timeout { - service: String, - zone: String, - }, + Timeout { service: String, zone: String }, } /// Errors returned from [`RunningZone::ensure_address`]. #[derive(thiserror::Error, Debug)] pub enum EnsureAddressError { #[error("Failed ensuring address {request:?} in {zone}: could not construct addrobj name: {err}")] - AddrObject{ + AddrObject { request: AddressRequest, zone: String, - err: crate::illumos::addrobj::ParseError + err: crate::illumos::addrobj::ParseError, }, #[error(transparent)] @@ -64,17 +61,14 @@ pub enum GetZoneError { }, #[error("Zone with prefix '{prefix}' not found")] - NotFound { - prefix: String - }, + NotFound { prefix: String }, #[error("Cannot get zone '{name}': it is in the {state:?} state instead of running")] - NotRunning{ - name: String, - state: zone::State - }, + NotRunning { name: String, state: zone::State }, - #[error("Cannot get zone '{name}': Failed to acquire control interface {err}")] + #[error( + "Cannot get zone '{name}': Failed to acquire control interface {err}" + )] ControlInterface { name: String, #[source] @@ -82,13 +76,15 @@ pub enum GetZoneError { }, #[error("Cannot get zone '{name}': Failed to create addrobj: {err}")] - AddrObject{ + AddrObject { name: String, #[source] err: crate::illumos::addrobj::ParseError, }, - #[error("Cannot get zone '{name}': Failed to ensure address exists: {err}")] + #[error( + "Cannot get zone '{name}': Failed to ensure address exists: {err}" + )] EnsureAddress { name: String, #[source] @@ -124,12 +120,7 @@ impl RunningZone { let cmd = command.args(full_args); let output = crate::illumos::execute(cmd) - .map_err(|err| { - RunCommandError { - zone: name.to_string(), - err, - } - })?; + .map_err(|err| RunCommandError { zone: name.to_string(), err })?; let stdout = String::from_utf8_lossy(&output.stdout); Ok(stdout.to_string()) } @@ -146,12 +137,12 @@ impl RunningZone { // Wait for the network services to come online, so future // requests to create addresses can operate immediately. let fmri = "svc:/milestone/network:default"; - wait_for_service(Some(&zone.name), fmri) - .await - .map_err(|_| BootError::Timeout { + wait_for_service(Some(&zone.name), fmri).await.map_err(|_| { + BootError::Timeout { service: fmri.to_string(), zone: zone.name.to_string(), - })?; + } + })?; Ok(RunningZone { inner: zone }) } @@ -169,12 +160,10 @@ impl RunningZone { }, }; let addrobj = AddrObject::new(self.inner.control_vnic.name(), name) - .map_err(|err| { - EnsureAddressError::AddrObject { - request: addrtype, - zone: self.inner.name.clone(), - err: err.into(), - } + .map_err(|err| EnsureAddressError::AddrObject { + request: addrtype, + zone: self.inner.name.clone(), + err: err.into(), })?; let network = Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; @@ -196,44 +185,40 @@ impl RunningZone { addrtype: AddressRequest, ) -> Result { let zone_info = Zones::get() - .map_err(|err| { - GetZoneError::GetZones { - prefix: zone_prefix.to_string(), - err, - } + .map_err(|err| GetZoneError::GetZones { + prefix: zone_prefix.to_string(), + err, })? .into_iter() .find(|zone_info| zone_info.name().starts_with(&zone_prefix)) - .ok_or_else(|| GetZoneError::NotFound { prefix: zone_prefix.to_string() })?; + .ok_or_else(|| GetZoneError::NotFound { + prefix: zone_prefix.to_string(), + })?; if zone_info.state() != zone::State::Running { return Err(GetZoneError::NotRunning { name: zone_info.name().to_string(), - state: zone_info.state() + state: zone_info.state(), }); } let zone_name = zone_info.name(); - let vnic_name = Zones::get_control_interface(zone_name) - .map_err(|err| { + let vnic_name = + Zones::get_control_interface(zone_name).map_err(|err| { GetZoneError::ControlInterface { name: zone_name.to_string(), err, } })?; let addrobj = AddrObject::new_control(&vnic_name).map_err(|err| { - GetZoneError::AddrObject { + GetZoneError::AddrObject { name: zone_name.to_string(), err } + })?; + Zones::ensure_address(Some(zone_name), &addrobj, addrtype).map_err( + |err| GetZoneError::EnsureAddress { name: zone_name.to_string(), err, - } - })?; - Zones::ensure_address(Some(zone_name), &addrobj, addrtype) - .map_err(|err| { - GetZoneError::EnsureAddress { - name: zone_name.to_string(), - err, - } - })?; + }, + )?; Ok(Self { inner: InstalledZone { @@ -329,13 +314,12 @@ impl InstalledZone { devices: &[zone::Device], vnics: Vec, ) -> Result { - let control_vnic = vnic_allocator.new_control(None) - .map_err(|err| { - InstallZoneError::CreateVnic { - service: service_name.to_string(), - err, - } - })?; + let control_vnic = vnic_allocator.new_control(None).map_err(|err| { + InstallZoneError::CreateVnic { + service: service_name.to_string(), + err, + } + })?; let zone_name = Self::get_zone_name(service_name, unique_name); let zone_image_path = @@ -354,12 +338,11 @@ impl InstalledZone { &datasets, &devices, vnic_names, - ).map_err(|err| { - InstallZoneError::InstallZone { - zone: zone_name.to_string(), - image_path: zone_image_path.clone(), - err, - } + ) + .map_err(|err| InstallZoneError::InstallZone { + zone: zone_name.to_string(), + image_path: zone_image_path.clone(), + err, })?; Ok(InstalledZone { diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index b1ca75002ac..00e39ef267c 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -35,7 +35,9 @@ enum EnsureFilesystemErrorRaw { /// Error returned by [`Zfs::ensure_zoned_filesystem`]. #[derive(thiserror::Error, Debug)] -#[error("Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}")] +#[error( + "Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}" +)] pub struct EnsureFilesystemError { name: String, mountpoint: Mountpoint, @@ -45,7 +47,9 @@ pub struct EnsureFilesystemError { /// Error returned by [`Zfs::set_oxide_value`] #[derive(thiserror::Error, Debug)] -#[error("Failed to set value '{name}={value}' on filesystem {filesystem}: {err}")] +#[error( + "Failed to set value '{name}={value}' on filesystem {filesystem}: {err}" +)] pub struct SetValueError { filesystem: String, name: String, @@ -94,15 +98,15 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { /// Lists all filesystems within a dataset. - pub fn list_filesystems(name: &str) -> Result, ListFilesystemsError> { + pub fn list_filesystems( + name: &str, + ) -> Result, ListFilesystemsError> { let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd).map_err(|err| { - ListFilesystemsError { - name: name.to_string(), - err, - } + let output = execute(cmd).map_err(|err| ListFilesystemsError { + name: name.to_string(), + err, })?; let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout @@ -160,12 +164,10 @@ impl Zfs { &format!("mountpoint={}", &mountpoint), name, ]); - execute(cmd).map_err(|err| { - EnsureFilesystemError { - name: name.to_string(), - mountpoint, - err: err.into(), - } + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: err.into(), })?; Ok(()) } @@ -186,13 +188,11 @@ impl Zfs { let mut command = std::process::Command::new(PFEXEC); let value_arg = format!("{}={}", name, value); let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]); - execute(cmd).map_err(|err| { - SetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - value: value.to_string(), - err, - } + execute(cmd).map_err(|err| SetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + value: value.to_string(), + err, })?; Ok(()) } @@ -204,16 +204,17 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value(filesystem_name: &str, name: &str) -> Result { + fn get_value( + filesystem_name: &str, + name: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd).map_err(|err| { - GetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - err: err.into(), - } + let output = execute(cmd).map_err(|err| GetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + err: err.into(), })?; let stdout = String::from_utf8_lossy(&output.stdout); let value = stdout.trim(); diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 0b68f4c773a..35f2edbe45a 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -321,10 +321,7 @@ impl Zones { "LINK", ]); let output = execute(cmd).map_err(|err| { - GetControlInterfaceError::Execution { - zone: zone.to_string(), - err, - } + GetControlInterfaceError::Execution { zone: zone.to_string(), err } })?; String::from_utf8_lossy(&output.stdout) .lines() diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index b4021dd8ce9..0ca3bea712b 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -61,10 +61,7 @@ impl FromStr for ZpoolHealth { "OFFLINE" => Ok(ZpoolHealth::Offline), "REMOVED" => Ok(ZpoolHealth::Removed), "UNAVAIL" => Ok(ZpoolHealth::Unavailable), - _ => Err(ParseError(format!( - "Unrecognized zpool 'health': {}", - s - ))), + _ => Err(ParseError(format!("Unrecognized zpool 'health': {}", s))), } } } @@ -110,16 +107,10 @@ impl FromStr for ZpoolInfo { fn from_str(s: &str) -> Result { // Lambda helpers for error handling. let expected_field = |name| { - ParseError(format!( - "Missing '{}' value in zpool list output", - name - )) + ParseError(format!("Missing '{}' value in zpool list output", name)) }; let failed_to_parse = |name, err| { - ParseError(format!( - "Failed to parse field '{}': {}", - name, err - )) + ParseError(format!("Failed to parse field '{}': {}", name, err)) }; let mut values = s.trim().split_whitespace(); @@ -163,20 +154,14 @@ impl Zpool { name, ]); - let output = execute(cmd).map_err(|err| { - GetInfoError { - name: name.to_string(), - err: err.into(), - } + let output = execute(cmd).map_err(|err| GetInfoError { + name: name.to_string(), + err: err.into(), })?; let stdout = String::from_utf8_lossy(&output.stdout); - let zpool = stdout.parse::() - .map_err(|err| { - GetInfoError { - name: name.to_string(), - err: err.into(), - } - })?; + let zpool = stdout.parse::().map_err(|err| { + GetInfoError { name: name.to_string(), err: err.into() } + })?; Ok(zpool) } } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9f3aebf6dfc..574e6af1e5d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -292,7 +292,10 @@ impl ServiceManager { "refresh", ]) .map_err(|err| Error::ZoneCommand { - intent: format!("Refresh SMF manifest {}", default_smf_name), + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), err, })?; } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 7f15e0961bf..41d13dd88ce 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,9 +5,7 @@ //! Management of sled-local storage. use crate::illumos::dladm::PhysicalLink; -use crate::illumos::running_zone::{ - InstalledZone, RunningZone, -}; +use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; @@ -52,7 +50,6 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; #[derive(thiserror::Error, Debug)] pub enum Error { // TODO: We could add the context of "why are we doint this op", maybe? - #[error(transparent)] ZfsListFilesystems(#[from] crate::illumos::zfs::ListFilesystemsError), @@ -442,13 +439,17 @@ async fn ensure_running_zone( let address_request = AddressRequest::new_static(dataset_info.address.ip(), None); - let err = RunningZone::get(log, &dataset_info.zone_prefix(), address_request).await; + let err = + RunningZone::get(log, &dataset_info.zone_prefix(), address_request) + .await; match err { Ok(zone) => { info!(log, "Zone for {} is already running", dataset_name.full()); return Ok(zone); } - Err(crate::illumos::running_zone::GetZoneError::NotFound { .. }) => { + Err(crate::illumos::running_zone::GetZoneError::NotFound { + .. + }) => { info!(log, "Zone for {} was not found", dataset_name.full()); let installed_zone = InstalledZone::install( @@ -471,7 +472,10 @@ async fn ensure_running_zone( Ok(zone) } - Err(crate::illumos::running_zone::GetZoneError::NotRunning { name, state }) => { + Err(crate::illumos::running_zone::GetZoneError::NotRunning { + name, + state, + }) => { // TODO(https://github.com/oxidecomputer/omicron/issues/725): unimplemented!("Handle a zone which exists, but is not running: {name}, in {state:?}"); } From 7ade54000ee024c5e7250117f554a3e6804357c1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 19:55:13 -0400 Subject: [PATCH 31/61] Bootstrap params, rack setup service errors --- sled-agent/src/bootstrap/params.rs | 21 ++++--- sled-agent/src/illumos/running_zone.rs | 2 +- sled-agent/src/rack_setup/service.rs | 87 ++++++++++++++++++++------ 3 files changed, 82 insertions(+), 28 deletions(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b6c55bb1479..b6b12cfa7a1 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,7 +4,10 @@ //! Request body types for the bootstrap agent -use omicron_common::api::external::Ipv6Net; +use omicron_common::{ + api::external::Ipv6Net, + address::SLED_PREFIX, +}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -17,8 +20,10 @@ pub struct ShareRequest { #[derive(thiserror::Error, Debug)] pub enum SubnetError { - #[error("Subnet has unexpected prefix length: {0}")] - BadPrefixLength(u8), + #[error("Subnet {subnet} has unexpected prefix length, wanted {}", SLED_PREFIX)] + BadPrefixLength { + subnet: ipnetwork::Ipv6Network, + }, } /// Represents subnets belonging to Sleds. @@ -33,12 +38,12 @@ pub enum SubnetError { pub struct SledSubnet(Ipv6Net); impl SledSubnet { - pub fn new(ip: Ipv6Net) -> Result { - let prefix = ip.0.prefix(); - if prefix != 64 { - return Err(SubnetError::BadPrefixLength(prefix)); + pub fn new(net: Ipv6Net) -> Result { + let prefix = net.0.prefix(); + if prefix != SLED_PREFIX { + return Err(SubnetError::BadPrefixLength { subnet: net.0 }); } - Ok(SledSubnet(ip)) + Ok(SledSubnet(net)) } } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index ddfa7753982..7dbfbc05cd8 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -163,7 +163,7 @@ impl RunningZone { .map_err(|err| EnsureAddressError::AddrObject { request: addrtype, zone: self.inner.name.clone(), - err: err.into(), + err, })?; let network = Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 153ca1bc18b..b31740fed6e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -19,14 +19,19 @@ use serde::{Deserialize, Serialize}; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::path::PathBuf; use thiserror::Error; use tokio::sync::Mutex; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { - #[error("Error accessing filesystem: {0}")] - Io(#[from] std::io::Error), + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, #[error("Error making HTTP request to Bootstrap Agent: {0}")] BootstrapApi( @@ -35,19 +40,25 @@ pub enum SetupServiceError { ), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), + SledApi( + #[from] + sled_agent_client::Error, + ), - #[error("Cannot deserialize TOML file")] - Toml(#[from] toml::de::Error), + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { + path: PathBuf, + err: toml::de::Error, + }, #[error("Failed to monitor for peers: {0}")] - PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), - - #[error(transparent)] - Http(#[from] reqwest::Error), + PeerMonitor( + #[from] + tokio::sync::broadcast::error::RecvError + ), - #[error("Configuration changed")] - Configuration, + #[error("Failed to construct an HTTP client: {0}")] + HttpClient(reqwest::Error), } // The workload / information allocated to a single sled. @@ -143,7 +154,8 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build()?; + .build() + .map_err(SetupServiceError::HttpClient)?; let url = format!("http://{}", bootstrap_addr); info!(self.log, "Sending request to peer agent: {}", url); @@ -198,7 +210,8 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build()?; + .build() + .map_err(SetupServiceError::HttpClient)?; let client = sled_agent_client::Client::new_with_client( &format!("http://{}", sled_address), client, @@ -244,7 +257,8 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build()?; + .build() + .map_err(SetupServiceError::HttpClient)?; let client = sled_agent_client::Client::new_with_client( &format!("http://{}", sled_address), client, @@ -290,8 +304,20 @@ impl ServiceInner { let plan: std::collections::HashMap = toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path).await?, - )?; + &tokio::fs::read_to_string(&rss_plan_path) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("Loading RSS plan {rss_plan_path:?}"), + err, + } + })?, + ).map_err(|err| { + SetupServiceError::Toml { + path: rss_plan_path, + err, + } + })?; Ok(Some(plan)) } else { Ok(None) @@ -374,7 +400,15 @@ impl ServiceInner { .expect("Cannot turn config to string"); info!(self.log, "Plan serialized as: {}", plan_str); - tokio::fs::write(&rss_plan_path(), plan_str).await?; + let path = rss_plan_path(); + tokio::fs::write(&path, plan_str) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("Storing RSS plan to {path:?}"), + err, + } + })?; info!(self.log, "Plan written to storage"); Ok(plan) @@ -451,7 +485,14 @@ impl ServiceInner { // We expect this directory to exist - ensure that it does, before any // subsequent operations which may write configs here. - tokio::fs::create_dir_all(omicron_common::OMICRON_CONFIG_PATH).await?; + tokio::fs::create_dir_all(omicron_common::OMICRON_CONFIG_PATH) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("Creating config directory {}", omicron_common::OMICRON_CONFIG_PATH), + err, + } + })?; // Check if a previous RSS plan has completed successfully. // @@ -593,7 +634,15 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. - tokio::fs::rename(rss_plan_path(), rss_completed_plan_path).await?; + let plan_path = rss_plan_path(); + tokio::fs::rename(&plan_path, &rss_completed_plan_path) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("renaming {plan_path:?} to {rss_completed_plan_path:?}"), + err, + } + })?; // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does From f09348f39589b60c6a0dd04491187feacca7b18b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:12:41 -0400 Subject: [PATCH 32/61] Bootstrap agent, trust quorum errors --- sled-agent/src/bootstrap/agent.rs | 72 +++++++++++++------ .../src/bootstrap/trust_quorum/client.rs | 8 ++- .../src/bootstrap/trust_quorum/error.rs | 8 ++- .../src/bootstrap/trust_quorum/server.rs | 9 ++- .../trust_quorum/share_distribution.rs | 15 +++- 5 files changed, 86 insertions(+), 26 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 7ede44b06bf..523c58858fc 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -31,20 +31,21 @@ use tokio::sync::Mutex; /// Describes errors which may occur while operating the bootstrap service. #[derive(Error, Debug)] pub enum BootstrapError { - #[error("Error accessing filesystem: {0}")] - Io(#[from] std::io::Error), - - #[error("Error configuring SMF: {0}")] - SmfConfig(#[from] smf::ConfigError), - - #[error("Error modifying SMF service: {0}")] - SmfAdm(#[from] smf::AdmError), + #[error("IO error: {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, #[error("Error starting sled agent: {0}")] SledError(String), - #[error(transparent)] - Toml(#[from] toml::de::Error), + #[error("Error deserializing toml from {path}: {err}")] + Toml { + path: PathBuf, + err: toml::de::Error, + }, #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), @@ -70,11 +71,11 @@ fn read_key_share() -> Result, BootstrapError> { match ShareDistribution::read(&key_share_dir) { Ok(share) => Ok(Some(share)), - Err(TrustQuorumError::Io(err)) => { + Err(TrustQuorumError::Io { message, err }) => { if err.kind() == io::ErrorKind::NotFound { Ok(None) } else { - Err(BootstrapError::Io(err)) + Err(BootstrapError::Io { message, err }) } } Err(e) => Err(e.into()), @@ -135,7 +136,7 @@ impl Agent { } else { Dladm::find_physical().map_err(|err| { BootstrapError::SledError(format!( - "Can't access physical link: {}", + "Can't access physical link, and none in config: {}", err )) })? @@ -148,7 +149,13 @@ impl Agent { ) .map_err(|err| BootstrapError::BootstrapAddress { err })?; - let peer_monitor = discovery::PeerMonitor::new(&log, address)?; + let peer_monitor = discovery::PeerMonitor::new(&log, address) + .map_err(|err| { + BootstrapError::Io { + message: format!("Monitoring for peers from {address}"), + err, + } + })?; let share = read_key_share()?; let agent = Agent { log, @@ -163,8 +170,20 @@ impl Agent { if subnet_path.exists() { info!(agent.log, "Sled already configured, loading sled agent"); let sled_request: SledAgentRequest = toml::from_str( - &tokio::fs::read_to_string(&subnet_path).await?, - )?; + &tokio::fs::read_to_string(&subnet_path) + .await + .map_err(|err| { + BootstrapError::Io { + message: format!("Reading subnet path from {subnet_path:?}"), + err, + } + })?, + ).map_err(|err| { + BootstrapError::Toml { + path: subnet_path, + err, + } + })?; agent.request_agent(sled_request).await?; } @@ -216,21 +235,28 @@ impl Agent { // Server does not exist, initialize it. let server = SledServer::start(&self.sled_config, sled_address) .await - .map_err(|e| BootstrapError::SledError(e))?; + .map_err(|e| BootstrapError::SledError(format!("Could not start sled agent server: {e}")))?; maybe_agent.replace(server); info!(&self.log, "Sled Agent loaded; recording configuration"); // Record the subnet, so the sled agent can be automatically // initialized on the next boot. + let path = get_subnet_path(); tokio::fs::write( - get_subnet_path(), + &path, &toml::to_string( &toml::Value::try_from(&request.subnet) .expect("Cannot serialize IP"), ) .expect("Cannot convert toml to string"), ) - .await?; + .await + .map_err(|err| { + BootstrapError::Io { + message: format!("Recording subnet to {path:?}"), + err, + } + })?; Ok(SledAgentResponse { id: self.sled_config.id }) } @@ -337,7 +363,13 @@ impl Agent { async fn run_trust_quorum_server(&self) -> Result<(), BootstrapError> { let my_share = self.share.as_ref().unwrap().share.clone(); - let mut server = trust_quorum::Server::new(&self.log, my_share)?; + let mut server = trust_quorum::Server::new(&self.log, my_share) + .map_err(|err| { + BootstrapError::Io { + message: "Cannot run trust quorum server".to_string(), + err, + } + })?; tokio::spawn(async move { server.run().await }); Ok(()) } diff --git a/sled-agent/src/bootstrap/trust_quorum/client.rs b/sled-agent/src/bootstrap/trust_quorum/client.rs index 7eb1ff2808b..5ec0a24791a 100644 --- a/sled-agent/src/bootstrap/trust_quorum/client.rs +++ b/sled-agent/src/bootstrap/trust_quorum/client.rs @@ -31,7 +31,13 @@ impl Client { // Connect to a trust quorum server, establish an SPDM channel, and retrieve // a share. pub async fn get_share(&self) -> Result { - let sock = TcpStream::connect(&self.addr).await?; + let sock = TcpStream::connect(&self.addr).await + .map_err(|err| { + TrustQuorumError::Io { + message: format!("Connecting to {}", self.addr), + err, + } + })?; let transport = spdm::Transport::new(sock, self.log.clone()); // Complete SPDM negotiation and return a secure transport diff --git a/sled-agent/src/bootstrap/trust_quorum/error.rs b/sled-agent/src/bootstrap/trust_quorum/error.rs index 968e7ee9a25..69c98bc6c31 100644 --- a/sled-agent/src/bootstrap/trust_quorum/error.rs +++ b/sled-agent/src/bootstrap/trust_quorum/error.rs @@ -29,6 +29,10 @@ pub enum TrustQuorumError { #[error("Rack secret construction failed: {0:?}")] RackSecretConstructionFailed(vsss_rs::Error), - #[error("IO error: {0}")] - Io(#[from] std::io::Error), + #[error("IO error {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, } diff --git a/sled-agent/src/bootstrap/trust_quorum/server.rs b/sled-agent/src/bootstrap/trust_quorum/server.rs index 6dfbe4206bb..1608698d914 100644 --- a/sled-agent/src/bootstrap/trust_quorum/server.rs +++ b/sled-agent/src/bootstrap/trust_quorum/server.rs @@ -65,7 +65,14 @@ impl Server { &mut self, ) -> Result>, TrustQuorumError> { - let (sock, addr) = self.listener.accept().await?; + let (sock, addr) = self.listener.accept() + .await + .map_err(|err| { + TrustQuorumError::Io { + message: "Accepting a connection from TCP listener".to_string(), + err, + } + })?; debug!(self.log, "Accepted connection from {}", addr); let share = self.share.clone(); let log = self.log.clone(); diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index 73dc2148ee3..9eaa424da46 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -32,7 +32,12 @@ impl ShareDistribution { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); let json = serde_json::to_string(&self)?; - fs::write(path, &json)?; + fs::write(&path, &json).map_err(|err| { + TrustQuorumError::Io { + message: format!("Writing share to {path:?}"), + err, + } + })?; Ok(()) } @@ -41,7 +46,13 @@ impl ShareDistribution { ) -> Result { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); - let json = fs::read_to_string(path.to_str().unwrap())?; + let json = fs::read_to_string(path.to_str().unwrap()) + .map_err(|err| { + TrustQuorumError::Io { + message: format!("Reading share from {path:?}"), + err, + } + })?; serde_json::from_str(&json).map_err(|e| e.into()) } } From fb405165f8b77938de72962fad52d63703240d41 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:13:29 -0400 Subject: [PATCH 33/61] fmt --- sled-agent/src/bootstrap/agent.rs | 55 ++++++------- sled-agent/src/bootstrap/params.rs | 14 ++-- .../src/bootstrap/trust_quorum/client.rs | 13 ++- .../src/bootstrap/trust_quorum/server.rs | 14 ++-- .../trust_quorum/share_distribution.rs | 12 ++- sled-agent/src/rack_setup/service.rs | 81 ++++++++----------- 6 files changed, 82 insertions(+), 107 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 523c58858fc..ea285b14440 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -42,10 +42,7 @@ pub enum BootstrapError { SledError(String), #[error("Error deserializing toml from {path}: {err}")] - Toml { - path: PathBuf, - err: toml::de::Error, - }, + Toml { path: PathBuf, err: toml::de::Error }, #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), @@ -149,8 +146,8 @@ impl Agent { ) .map_err(|err| BootstrapError::BootstrapAddress { err })?; - let peer_monitor = discovery::PeerMonitor::new(&log, address) - .map_err(|err| { + let peer_monitor = + discovery::PeerMonitor::new(&log, address).map_err(|err| { BootstrapError::Io { message: format!("Monitoring for peers from {address}"), err, @@ -170,20 +167,16 @@ impl Agent { if subnet_path.exists() { info!(agent.log, "Sled already configured, loading sled agent"); let sled_request: SledAgentRequest = toml::from_str( - &tokio::fs::read_to_string(&subnet_path) - .await - .map_err(|err| { - BootstrapError::Io { - message: format!("Reading subnet path from {subnet_path:?}"), - err, - } - })?, - ).map_err(|err| { - BootstrapError::Toml { - path: subnet_path, - err, - } - })?; + &tokio::fs::read_to_string(&subnet_path).await.map_err( + |err| BootstrapError::Io { + message: format!( + "Reading subnet path from {subnet_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| BootstrapError::Toml { path: subnet_path, err })?; agent.request_agent(sled_request).await?; } @@ -235,7 +228,11 @@ impl Agent { // Server does not exist, initialize it. let server = SledServer::start(&self.sled_config, sled_address) .await - .map_err(|e| BootstrapError::SledError(format!("Could not start sled agent server: {e}")))?; + .map_err(|e| { + BootstrapError::SledError(format!( + "Could not start sled agent server: {e}" + )) + })?; maybe_agent.replace(server); info!(&self.log, "Sled Agent loaded; recording configuration"); @@ -251,11 +248,9 @@ impl Agent { .expect("Cannot convert toml to string"), ) .await - .map_err(|err| { - BootstrapError::Io { - message: format!("Recording subnet to {path:?}"), - err, - } + .map_err(|err| BootstrapError::Io { + message: format!("Recording subnet to {path:?}"), + err, })?; Ok(SledAgentResponse { id: self.sled_config.id }) @@ -364,11 +359,9 @@ impl Agent { async fn run_trust_quorum_server(&self) -> Result<(), BootstrapError> { let my_share = self.share.as_ref().unwrap().share.clone(); let mut server = trust_quorum::Server::new(&self.log, my_share) - .map_err(|err| { - BootstrapError::Io { - message: "Cannot run trust quorum server".to_string(), - err, - } + .map_err(|err| BootstrapError::Io { + message: "Cannot run trust quorum server".to_string(), + err, })?; tokio::spawn(async move { server.run().await }); Ok(()) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b6b12cfa7a1..9901eab5069 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,10 +4,7 @@ //! Request body types for the bootstrap agent -use omicron_common::{ - api::external::Ipv6Net, - address::SLED_PREFIX, -}; +use omicron_common::{address::SLED_PREFIX, api::external::Ipv6Net}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -20,10 +17,11 @@ pub struct ShareRequest { #[derive(thiserror::Error, Debug)] pub enum SubnetError { - #[error("Subnet {subnet} has unexpected prefix length, wanted {}", SLED_PREFIX)] - BadPrefixLength { - subnet: ipnetwork::Ipv6Network, - }, + #[error( + "Subnet {subnet} has unexpected prefix length, wanted {}", + SLED_PREFIX + )] + BadPrefixLength { subnet: ipnetwork::Ipv6Network }, } /// Represents subnets belonging to Sleds. diff --git a/sled-agent/src/bootstrap/trust_quorum/client.rs b/sled-agent/src/bootstrap/trust_quorum/client.rs index 5ec0a24791a..0d6cdaf2c1d 100644 --- a/sled-agent/src/bootstrap/trust_quorum/client.rs +++ b/sled-agent/src/bootstrap/trust_quorum/client.rs @@ -31,13 +31,12 @@ impl Client { // Connect to a trust quorum server, establish an SPDM channel, and retrieve // a share. pub async fn get_share(&self) -> Result { - let sock = TcpStream::connect(&self.addr).await - .map_err(|err| { - TrustQuorumError::Io { - message: format!("Connecting to {}", self.addr), - err, - } - })?; + let sock = TcpStream::connect(&self.addr).await.map_err(|err| { + TrustQuorumError::Io { + message: format!("Connecting to {}", self.addr), + err, + } + })?; let transport = spdm::Transport::new(sock, self.log.clone()); // Complete SPDM negotiation and return a secure transport diff --git a/sled-agent/src/bootstrap/trust_quorum/server.rs b/sled-agent/src/bootstrap/trust_quorum/server.rs index 1608698d914..9016bc7e9e1 100644 --- a/sled-agent/src/bootstrap/trust_quorum/server.rs +++ b/sled-agent/src/bootstrap/trust_quorum/server.rs @@ -65,14 +65,12 @@ impl Server { &mut self, ) -> Result>, TrustQuorumError> { - let (sock, addr) = self.listener.accept() - .await - .map_err(|err| { - TrustQuorumError::Io { - message: "Accepting a connection from TCP listener".to_string(), - err, - } - })?; + let (sock, addr) = self.listener.accept().await.map_err(|err| { + TrustQuorumError::Io { + message: "Accepting a connection from TCP listener".to_string(), + err, + } + })?; debug!(self.log, "Accepted connection from {}", addr); let share = self.share.clone(); let log = self.log.clone(); diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index 9eaa424da46..799e94fc5d7 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -32,11 +32,9 @@ impl ShareDistribution { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); let json = serde_json::to_string(&self)?; - fs::write(&path, &json).map_err(|err| { - TrustQuorumError::Io { - message: format!("Writing share to {path:?}"), - err, - } + fs::write(&path, &json).map_err(|err| TrustQuorumError::Io { + message: format!("Writing share to {path:?}"), + err, })?; Ok(()) } @@ -46,8 +44,8 @@ impl ShareDistribution { ) -> Result { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); - let json = fs::read_to_string(path.to_str().unwrap()) - .map_err(|err| { + let json = + fs::read_to_string(path.to_str().unwrap()).map_err(|err| { TrustQuorumError::Io { message: format!("Reading share from {path:?}"), err, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index b31740fed6e..6d7dd09358c 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -40,22 +40,13 @@ pub enum SetupServiceError { ), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi( - #[from] - sled_agent_client::Error, - ), + SledApi(#[from] sled_agent_client::Error), #[error("Cannot deserialize TOML file at {path}: {err}")] - Toml { - path: PathBuf, - err: toml::de::Error, - }, + Toml { path: PathBuf, err: toml::de::Error }, #[error("Failed to monitor for peers: {0}")] - PeerMonitor( - #[from] - tokio::sync::broadcast::error::RecvError - ), + PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), @@ -304,19 +295,18 @@ impl ServiceInner { let plan: std::collections::HashMap = toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path) - .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("Loading RSS plan {rss_plan_path:?}"), - err, - } - })?, - ).map_err(|err| { - SetupServiceError::Toml { - path: rss_plan_path, - err, - } + &tokio::fs::read_to_string(&rss_plan_path).await.map_err( + |err| SetupServiceError::Io { + message: format!( + "Loading RSS plan {rss_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| SetupServiceError::Toml { + path: rss_plan_path, + err, })?; Ok(Some(plan)) } else { @@ -401,14 +391,12 @@ impl ServiceInner { info!(self.log, "Plan serialized as: {}", plan_str); let path = rss_plan_path(); - tokio::fs::write(&path, plan_str) - .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS plan to {path:?}"), - err, - } - })?; + tokio::fs::write(&path, plan_str).await.map_err(|err| { + SetupServiceError::Io { + message: format!("Storing RSS plan to {path:?}"), + err, + } + })?; info!(self.log, "Plan written to storage"); Ok(plan) @@ -487,11 +475,12 @@ impl ServiceInner { // subsequent operations which may write configs here. tokio::fs::create_dir_all(omicron_common::OMICRON_CONFIG_PATH) .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("Creating config directory {}", omicron_common::OMICRON_CONFIG_PATH), - err, - } + .map_err(|err| SetupServiceError::Io { + message: format!( + "Creating config directory {}", + omicron_common::OMICRON_CONFIG_PATH + ), + err, })?; // Check if a previous RSS plan has completed successfully. @@ -635,14 +624,14 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. let plan_path = rss_plan_path(); - tokio::fs::rename(&plan_path, &rss_completed_plan_path) - .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("renaming {plan_path:?} to {rss_completed_plan_path:?}"), - err, - } - })?; + tokio::fs::rename(&plan_path, &rss_completed_plan_path).await.map_err( + |err| SetupServiceError::Io { + message: format!( + "renaming {plan_path:?} to {rss_completed_plan_path:?}" + ), + err, + }, + )?; // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does From 25237b48ca4607ec3d079d2dfe74f9037d9deabd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:41:56 -0400 Subject: [PATCH 34/61] storage manager --- sled-agent/src/illumos/zfs.rs | 4 +- sled-agent/src/services.rs | 2 +- sled-agent/src/storage_manager.rs | 106 +++++++++++++++++++++--------- 3 files changed, 79 insertions(+), 33 deletions(-) diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index 00e39ef267c..da6510a29f3 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -14,7 +14,7 @@ const ZFS: &str = "/usr/sbin/zfs"; /// Error returned by [`Zfs::list_filesystems`]. #[derive(thiserror::Error, Debug)] -#[error("Could not list filesystems within dataset {name}: {err}")] +#[error("Could not list filesystems within zpool {name}: {err}")] pub struct ListFilesystemsError { name: String, #[source] @@ -97,7 +97,7 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { - /// Lists all filesystems within a dataset. + /// Lists all filesystems within a zpool. pub fn list_filesystems( name: &str, ) -> Result, ListFilesystemsError> { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 574e6af1e5d..13ae8d1fd3a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -214,7 +214,7 @@ impl ServiceManager { ) .map_err(|err| Error::GzAddress { message: format!( - "Failed adding address for {}", + "adding address on behalf of service '{}'", service.name ), err, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 41d13dd88ce..24a4d01b330 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -77,26 +77,43 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), - #[error("Error parsing pool size: {0}")] - BadPoolSize(#[from] ByteCountRangeError), - - #[error("Failed to parse as UUID: {0}")] - Parse(#[from] uuid::Error), - - #[error("Timed out waiting for service: {0}")] - Timeout(String), - - #[error("Object Not Found: {0}")] - NotFound(String), - - #[error("Failed to serialize toml: {0}")] - Serialize(#[from] toml::ser::Error), - - #[error("Failed to deserialize toml: {0}")] - Deserialize(#[from] toml::de::Error), - - #[error("Failed to perform I/O: {0}")] - Io(#[from] std::io::Error), + #[error("Error parsing pool {name}'s size: {err}")] + BadPoolSize { + name: String, + #[source] + err: ByteCountRangeError, + }, + + #[error("Failed to parse the dataset {name}'s UUID: {err}")] + ParseDatasetUuid { + name: String, + #[source] + err: uuid::Error, + }, + + #[error("Zpool Not Found: {0}")] + ZpoolNotFound(String), + + #[error("Failed to serialize toml (intended for {path:?}): {err}")] + Serialize { + path: PathBuf, + #[source] + err: toml::ser::Error, + }, + + #[error("Failed to deserialize toml from {path:?}: {err}")] + Deserialize { + path: PathBuf, + #[source] + err: toml::de::Error, + }, + + #[error("Failed to perform I/O: {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, } /// A ZFS storage pool. @@ -148,7 +165,10 @@ impl Pool { ) -> Result { let path = std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) .join(self.id.to_string()); - create_dir_all(&path).await?; + create_dir_all(&path).await.map_err(|err| Error::Io { + message: format!("creating config dir {path:?}, which would contain config for {dataset_id}"), + err, + })?; let mut path = path.join(dataset_id.to_string()); path.set_extension("toml"); Ok(path) @@ -691,11 +711,15 @@ impl StorageWorker { let mut pools = self.pools.lock().await; let name = ZpoolName::new(request.zpool_id); let pool = pools.get_mut(&name).ok_or_else(|| { - Error::NotFound(format!("zpool: {}", request.zpool_id)) + Error::ZpoolNotFound(format!( + "{}, looked up while trying to add dataset", + request.zpool_id + )) })?; + let pool_name = pool.info.name(); let dataset_info = DatasetInfo::new( - pool.info.name(), + pool_name, request.dataset_kind.clone(), request.address, ); @@ -714,10 +738,18 @@ impl StorageWorker { // Now that the dataset has been initialized, record the configuration // so it can re-initialize itself after a reboot. - let info_str = toml::to_string(&dataset_info)?; let path = pool.dataset_config_path(id).await?; - let mut file = File::create(path).await?; - file.write_all(info_str.as_bytes()).await?; + let info_str = toml::to_string(&dataset_info) + .map_err(|err| Error::Serialize { path: path.clone(), err })?; + let pool_name = pool.info.name(); + let mut file = File::create(&path).await.map_err(|err| Error::Io { + message: format!("Failed creating config file at {path:?} for pool {pool_name}, dataset: {id}"), + err, + })?; + file.write_all(info_str.as_bytes()).await.map_err(|err| Error::Io { + message: format!("Failed writing config to {path:?} for pool {pool_name}, dataset: {id}"), + err, + })?; self.add_datasets_notify( nexus_notifications, @@ -733,16 +765,29 @@ impl StorageWorker { pool: &mut Pool, dataset_name: &DatasetName, ) -> Result<(Uuid, SocketAddr, DatasetKind), Error> { - let id = Zfs::get_oxide_value(&dataset_name.full(), "uuid")? - .parse::()?; + let name = dataset_name.full(); + let id = Zfs::get_oxide_value(&name, "uuid")? + .parse::() + .map_err(|err| Error::ParseDatasetUuid { name, err })?; let config_path = pool.dataset_config_path(id).await?; info!( self.log, "Loading Dataset from {}", config_path.to_string_lossy() ); + let pool_name = pool.info.name(); let dataset_info: DatasetInfo = - toml::from_slice(&tokio::fs::read(config_path).await?)?; + toml::from_slice( + &tokio::fs::read(&config_path).await.map_err(|err| Error::Io { + message: format!("read config for pool {pool_name}, dataset {dataset_name:?} from {config_path:?}"), + err, + })? + ).map_err(|err| { + Error::Deserialize { + path: config_path, + err, + } + })?; self.initialize_dataset_and_zone( pool, &dataset_info, @@ -783,7 +828,8 @@ impl StorageWorker { "Storage manager processing zpool: {:#?}", pool.info ); - let size = ByteCount::try_from(pool.info.size())?; + let size = ByteCount::try_from(pool.info.size()) + .map_err(|err| Error::BadPoolSize { name: pool_name.to_string(), err })?; // If we find filesystems within our datasets, ensure their // zones are up-and-running. From a9af5d1c96d906f5255e6bec0c3356bc8ba0512f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:52:37 -0400 Subject: [PATCH 35/61] last few --- sled-agent/src/config.rs | 24 ++++++++++++++------ sled-agent/src/instance.rs | 4 ---- sled-agent/src/rack_setup/config.rs | 6 +++-- sled-agent/src/updates.rs | 34 ++++++++++++++++++++++++----- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 63b9b1115cc..cfa6e84927b 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -10,7 +10,7 @@ use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; use serde::Deserialize; use std::net::SocketAddr; -use std::path::Path; +use std::path::{Path, PathBuf}; use uuid::Uuid; /// Configuration for a sled agent @@ -35,17 +35,27 @@ pub struct Config { #[derive(Debug, thiserror::Error)] pub enum ConfigError { - #[error("Failed to read config: {0}")] - Io(#[from] std::io::Error), - #[error("Failed to parse config: {0}")] - Parse(#[from] toml::de::Error), + #[error("Failed to read config from {path}: {err}")] + Io { + path: PathBuf, + #[source] + err: std::io::Error, + }, + #[error("Failed to parse config from {path}: {err}")] + Parse { + path: PathBuf, + #[source] + err: toml::de::Error, + }, } impl Config { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); - let contents = std::fs::read_to_string(path)?; - let config = toml::from_str(&contents)?; + let contents = std::fs::read_to_string(&path) + .map_err(|err| ConfigError::Io { path: path.into(), err })?; + let config = toml::from_str(&contents) + .map_err(|err| ConfigError::Parse { path: path.into(), err })?; Ok(config) } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 87af13085a7..bf0cce1fa57 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -41,7 +41,6 @@ pub enum Error { #[error("Failed to wait for service: {0}")] Timeout(String), - // TODO: Who are we allocating the VNIC for? #[error("Failed to create VNIC: {0}")] VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), @@ -71,9 +70,6 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), - - #[error("serde_json failure: {0}")] - SerdeJsonError(#[from] serde_json::Error), } // Issues read-only, idempotent HTTP requests at propolis until it responds with diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 1ec2858c0b7..01299e0de2b 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -58,8 +58,10 @@ fn new_network(addr: Ipv6Addr, prefix: u8) -> Ipv6Network { impl SetupServiceConfig { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); - let contents = std::fs::read_to_string(path)?; - let config = toml::from_str(&contents)?; + let contents = std::fs::read_to_string(&path) + .map_err(|err| ConfigError::Io { path: path.into(), err })?; + let config = toml::from_str(&contents) + .map_err(|err| ConfigError::Parse { path: path.into(), err })?; Ok(config) } diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 38c132a92ab..721861ca074 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -12,8 +12,12 @@ use std::path::PathBuf; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("I/O Error: {0}")] - Io(#[from] std::io::Error), + #[error("I/O Error: {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, #[error("Failed to contact nexus: {0}")] Nexus(anyhow::Error), @@ -29,7 +33,12 @@ pub async fn download_artifact( match artifact.kind { UpdateArtifactKind::Zone => { let directory = PathBuf::from("/var/tmp/zones"); - tokio::fs::create_dir_all(&directory).await?; + tokio::fs::create_dir_all(&directory).await.map_err(|err| { + Error::Io { + message: format!("creating diretory {directory:?}"), + err, + } + })?; // We download the file to a location named "-". // We then rename it to "" after it has successfully @@ -57,10 +66,25 @@ pub async fn download_artifact( .map_err(Error::Response)?; let contents = response.bytes().await.map_err(|e| Error::Response(e))?; - tokio::fs::write(&tmp_path, contents).await?; + tokio::fs::write(&tmp_path, contents).await.map_err(|err| { + Error::Io { + message: format!( + "Downloading artifact to temporary path: {tmp_path:?}" + ), + err, + } + })?; // Write the file to its final path. - tokio::fs::rename(&tmp_path, directory.join(artifact.name)).await?; + let destination = directory.join(artifact.name); + tokio::fs::rename(&tmp_path, &destination).await.map_err( + |err| Error::Io { + message: format!( + "Renaming {tmp_path:?} to {destination:?}" + ), + err, + }, + )?; Ok(()) } } From 31e3c1b8e881790bb42152d67a2f79c6d27a7f24 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 25 Apr 2022 11:18:48 -0400 Subject: [PATCH 36/61] review feedback --- sled-agent/src/bootstrap/agent.rs | 2 +- sled-agent/src/illumos/dladm.rs | 4 ++-- sled-agent/src/illumos/zfs.rs | 2 +- sled-agent/src/illumos/zpool.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index ea285b14440..07bf8df9640 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -140,7 +140,7 @@ impl Agent { }; Zones::ensure_has_global_zone_v6_address( - data_link.clone(), + data_link, address, "bootstrap6", ) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 441fe18fa49..6bf9c182414 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -18,7 +18,7 @@ pub const DLADM: &str = "/usr/sbin/dladm"; /// Errors returned from [`Dladm::find_physical`]. #[derive(thiserror::Error, Debug)] pub enum FindPhysicalLinkError { - #[error("Failed to execute command to find physical link: {0}")] + #[error("Failed to find physical link: {0}")] Execution(#[from] ExecutionError), #[error("No Physical Link devices found")] @@ -31,7 +31,7 @@ pub enum GetMacError { #[error("Mac Address cannot be looked up; Link not found: {0:?}")] NotFound(PhysicalLink), - #[error("Failed to execute command to get MAC address: {0}")] + #[error("Failed to get MAC address: {0}")] Execution(#[from] ExecutionError), #[error("Failed to parse MAC: {0}")] diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index da6510a29f3..e47825d0585 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -161,7 +161,7 @@ impl Zfs { "-o", "zoned=on", "-o", - &format!("mountpoint={}", &mountpoint), + &format!("mountpoint={}", mountpoint), name, ]); execute(cmd).map_err(|err| EnsureFilesystemError { diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index 0ca3bea712b..b1d528c05db 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -25,7 +25,7 @@ enum Error { } #[derive(thiserror::Error, Debug)] -#[error("Failed to get info for zpool {name}: {err}")] +#[error("Failed to get info for zpool '{name}': {err}")] pub struct GetInfoError { name: String, #[source] From 857fe85f2166158161ffb05d1faa07f7fa510b68 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 28 Apr 2022 14:39:55 -0400 Subject: [PATCH 37/61] Const generic subnet prefix --- common/src/address.rs | 109 ++++++++++++++++++--------- openapi/bootstrap-agent.json | 22 +++--- sled-agent/src/bootstrap/agent.rs | 5 +- sled-agent/src/bootstrap/params.rs | 47 +----------- sled-agent/src/rack_setup/config.rs | 65 ++++++++-------- sled-agent/src/rack_setup/service.rs | 22 +++--- 6 files changed, 133 insertions(+), 137 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 69a6c3143d2..e18e3639e96 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -7,7 +7,9 @@ //! This addressing functionality is shared by both initialization services //! and Nexus, who need to agree upon addressing schemes. +use crate::api::external::Ipv6Net; use ipnetwork::Ipv6Network; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -29,10 +31,43 @@ pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; +// Anycast is a mechanism in which a single IP address is shared by multiple +// devices, and the destination is located based on routing distance. +// +// This is covered by RFC 4291 in much more detail: +// +// +// Anycast addresses are always the "zeroeth" address within a subnet. We +// always explicitly skip these addresses within our network. +const _ANYCAST_ADDRESS_INDEX: usize = 0; +const DNS_ADDRESS_INDEX: usize = 1; +const GZ_ADDRESS_INDEX: usize = 2; + +/// Wraps an [`Ipv6Network`] with a compile-time prefix length. +#[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize, PartialEq)] +pub struct Ipv6Subnet { + net: Ipv6Net, +} + +impl Ipv6Subnet { + pub fn new(addr: Ipv6Addr) -> Self { + // Create a network with the compile-time prefix length. + let net = Ipv6Network::new(addr, N).unwrap(); + // Ensure the address is set to within-prefix only components. + let net = Ipv6Network::new(net.network(), N).unwrap(); + Self { net: Ipv6Net(net) } + } + + /// Returns the underlying network. + pub fn net(&self) -> Ipv6Network { + self.net.0 + } +} + /// Represents a subnet which may be used for contacting DNS services. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] pub struct DnsSubnet { - network: Ipv6Network, + subnet: Ipv6Subnet, } impl DnsSubnet { @@ -40,10 +75,11 @@ impl DnsSubnet { /// /// This is the first address within the subnet. pub fn dns_address(&self) -> Ipv6Network { - let mut iter = self.network.iter(); - let _anycast_ip = iter.next().unwrap(); - let dns_ip = iter.next().unwrap(); - Ipv6Network::new(dns_ip, SLED_PREFIX).unwrap() + Ipv6Network::new( + self.subnet.net().iter().nth(DNS_ADDRESS_INDEX).unwrap(), + SLED_PREFIX, + ) + .unwrap() } /// Returns the address which the Global Zone should create @@ -51,68 +87,63 @@ impl DnsSubnet { /// /// This is the second address within the subnet. pub fn gz_address(&self) -> Ipv6Network { - let mut iter = self.network.iter(); - let _anycast_ip = iter.next().unwrap(); - let _dns_ip = iter.next().unwrap(); - Ipv6Network::new(iter.next().unwrap(), SLED_PREFIX).unwrap() + Ipv6Network::new( + self.subnet.net().iter().nth(GZ_ADDRESS_INDEX).unwrap(), + SLED_PREFIX, + ) + .unwrap() } } /// A wrapper around an IPv6 network, indicating it is a "reserved" rack /// subnet which can be used for AZ-wide services. #[derive(Debug, Clone)] -pub struct ReservedRackSubnet(pub Ipv6Network); +pub struct ReservedRackSubnet(pub Ipv6Subnet); impl ReservedRackSubnet { /// Returns the subnet for the reserved rack subnet. - pub fn new(subnet: Ipv6Network) -> Self { - let net = Ipv6Network::new(subnet.network(), AZ_PREFIX).unwrap(); - ReservedRackSubnet( - Ipv6Network::new(net.network(), RACK_PREFIX).unwrap(), - ) + pub fn new(subnet: Ipv6Subnet) -> Self { + ReservedRackSubnet(Ipv6Subnet::::new(subnet.net().ip())) } - /// Given a particular rack subnet, return the DNS addresses. + /// Returns the DNS addresses from this reserved rack subnet. /// /// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the /// [`RACK_PREFIX`] subnet. pub fn get_dns_subnets(&self) -> Vec { - assert_eq!(self.0.prefix(), RACK_PREFIX); - (0..DNS_REDUNDANCY) .map(|idx| { - let network = + let subnet = get_64_subnet(self.0, u8::try_from(idx + 1).unwrap()); - - DnsSubnet { network } + DnsSubnet { subnet } }) .collect() } } +const SLED_AGENT_ADDRESS_INDEX: usize = 1; + /// Return the sled agent address for a subnet. /// /// This address will come from the first address of the [`SLED_PREFIX`] subnet. -pub fn get_sled_address(sled_subnet: Ipv6Network) -> SocketAddrV6 { - assert_eq!(sled_subnet.prefix(), SLED_PREFIX); - - let mut iter = sled_subnet.iter(); - let _anycast_ip = iter.next().unwrap(); - let sled_agent_ip = iter.next().unwrap(); +pub fn get_sled_address(sled_subnet: Ipv6Subnet) -> SocketAddrV6 { + let sled_agent_ip = + sled_subnet.net().iter().nth(SLED_AGENT_ADDRESS_INDEX).unwrap(); SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) } /// Returns a sled subnet within a rack subnet. /// /// The subnet at index == 0 is used for rack-local services. -pub fn get_64_subnet(rack_subnet: Ipv6Network, index: u8) -> Ipv6Network { - assert_eq!(rack_subnet.prefix(), RACK_PREFIX); - - let mut rack_network = rack_subnet.network().octets(); +pub fn get_64_subnet( + rack_subnet: Ipv6Subnet, + index: u8, +) -> Ipv6Subnet { + let mut rack_network = rack_subnet.net().network().octets(); // To set bits distinguishing the /64 from the /56, we modify the 7th octet. rack_network[7] = index; - Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() + Ipv6Subnet::::new(Ipv6Addr::from(rack_network)) } #[cfg(test)] @@ -121,14 +152,16 @@ mod test { #[test] fn test_dns_subnets() { - let subnet = "fd00:1122:3344:0100::/64".parse::().unwrap(); + let subnet = Ipv6Subnet::::new( + "fd00:1122:3344:0100::".parse::().unwrap(), + ); let rack_subnet = ReservedRackSubnet::new(subnet); assert_eq!( // Note that these bits (indicating the rack) are zero. // vv "fd00:1122:3344:0000::/56".parse::().unwrap(), - rack_subnet.0, + rack_subnet.0.net(), ); // Observe the first DNS subnet within this reserved rack subnet. @@ -148,13 +181,17 @@ mod test { #[test] fn test_sled_address() { - let subnet = "fd00:1122:3344:0101::/64".parse::().unwrap(); + let subnet = Ipv6Subnet::::new( + "fd00:1122:3344:0101::".parse::().unwrap(), + ); assert_eq!( "[fd00:1122:3344:0101::1]:12345".parse::().unwrap(), get_sled_address(subnet) ); - let subnet = "fd00:1122:3344:0308::/64".parse::().unwrap(); + let subnet = Ipv6Subnet::::new( + "fd00:1122:3344:0308::".parse::().unwrap(), + ); assert_eq!( "[fd00:1122:3344:0308::1]:12345".parse::().unwrap(), get_sled_address(subnet) diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index cf04477f449..d503e01b20c 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -117,6 +117,18 @@ "pattern": "^(fd|FD)[0-9a-fA-F]{2}:((([0-9a-fA-F]{1,4}\\:){6}[0-9a-fA-F]{1,4})|(([0-9a-fA-F]{1,4}:){1,6}:))/(6[4-9]|[7-9][0-9]|1[0-1][0-9]|12[0-6])$", "maxLength": 43 }, + "Ipv6Subnet": { + "description": "Wraps an [`Ipv6Network`] with a compile-time prefix length.", + "type": "object", + "properties": { + "net": { + "$ref": "#/components/schemas/Ipv6Net" + } + }, + "required": [ + "net" + ] + }, "ShareRequest": { "description": "Identity signed by local RoT and Oxide certificate chain.", "type": "object", @@ -159,7 +171,7 @@ "description": "Portion of the IP space to be managed by the Sled Agent.", "allOf": [ { - "$ref": "#/components/schemas/SledSubnet" + "$ref": "#/components/schemas/Ipv6Subnet" } ] } @@ -180,14 +192,6 @@ "required": [ "id" ] - }, - "SledSubnet": { - "description": "Represents subnets belonging to Sleds.\n\nThis is a thin wrapper around the [`Ipv6Net`] type - which may be accessed by [`AsRef`] - which adds additional validation that this is a /64 subnet with an expected prefix.", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv6Net" - } - ] } } } diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index da20e4f8e58..92988a694f9 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -16,6 +16,7 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zone::{self, Zones}; use crate::rack_setup::service::Service as RackSetupService; use crate::server::Server as SledServer; +use omicron_common::address::get_sled_address; use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -181,9 +182,7 @@ impl Agent { ) -> Result { info!(&self.log, "Loading Sled Agent: {:?}", request); - let sled_address = omicron_common::address::get_sled_address( - request.subnet.as_ref().0, - ); + let sled_address = get_sled_address(request.subnet); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b6c55bb1479..55d6e2c117e 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,7 +4,7 @@ //! Request body types for the bootstrap agent -use omicron_common::api::external::Ipv6Net; +use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -15,52 +15,9 @@ pub struct ShareRequest { pub identity: Vec, } -#[derive(thiserror::Error, Debug)] -pub enum SubnetError { - #[error("Subnet has unexpected prefix length: {0}")] - BadPrefixLength(u8), -} - -/// Represents subnets belonging to Sleds. -/// -/// This is a thin wrapper around the [`Ipv6Net`] type - which may be accessed -/// by [`AsRef`] - which adds additional validation that this is a /64 -/// subnet with an expected prefix. -// Note: The inner field is intentionally non-pub; this makes it -// more difficult to construct a sled subnet which avoids the -// validation performed by the constructor. -#[derive(Clone, Debug, Serialize, JsonSchema, PartialEq)] -pub struct SledSubnet(Ipv6Net); - -impl SledSubnet { - pub fn new(ip: Ipv6Net) -> Result { - let prefix = ip.0.prefix(); - if prefix != 64 { - return Err(SubnetError::BadPrefixLength(prefix)); - } - Ok(SledSubnet(ip)) - } -} - -impl<'de> serde::Deserialize<'de> for SledSubnet { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let net = Ipv6Net::deserialize(deserializer)?; - SledSubnet::new(net).map_err(serde::de::Error::custom) - } -} - -impl AsRef for SledSubnet { - fn as_ref(&self) -> &Ipv6Net { - &self.0 - } -} - /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct SledAgentRequest { /// Portion of the IP space to be managed by the Sled Agent. - pub subnet: SledSubnet, + pub subnet: Ipv6Subnet, } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 1ec2858c0b7..6aa8e61df6d 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -6,8 +6,9 @@ use crate::config::ConfigError; use crate::params::{DatasetEnsureBody, ServiceRequest}; -use ipnetwork::Ipv6Network; -use omicron_common::address::{AZ_PREFIX, RACK_PREFIX}; +use omicron_common::address::{ + get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, +}; use serde::Deserialize; use serde::Serialize; use std::net::Ipv6Addr; @@ -47,14 +48,6 @@ pub struct SledRequest { pub dns_services: Vec, } -fn new_network(addr: Ipv6Addr, prefix: u8) -> Ipv6Network { - let net = Ipv6Network::new(addr, prefix).unwrap(); - - // ipnetwork inputs/outputs the provided IPv6 address, unmodified by the - // prefix. We manually mask `addr` based on `prefix` ourselves. - Ipv6Network::new(net.network(), prefix).unwrap() -} - impl SetupServiceConfig { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); @@ -63,18 +56,18 @@ impl SetupServiceConfig { Ok(config) } - pub fn az_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, AZ_PREFIX) + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet) } /// Returns the subnet for our rack. - pub fn rack_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, RACK_PREFIX) + pub fn rack_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet) } /// Returns the subnet for the `index`-th sled in the rack. - pub fn sled_subnet(&self, index: u8) -> Ipv6Network { - omicron_common::address::get_64_subnet(self.rack_subnet(), index) + pub fn sled_subnet(&self, index: u8) -> Ipv6Subnet { + get_64_subnet(self.rack_subnet(), index) } } @@ -90,33 +83,43 @@ mod test { }; assert_eq!( - // Masked out in AZ Subnet - // vv - "fd00:1122:3344:0000::/48".parse::().unwrap(), + Ipv6Subnet::::new( + // Masked out in AZ Subnet + // vv + "fd00:1122:3344:0000::".parse::().unwrap(), + ), cfg.az_subnet() ); assert_eq!( - // Shows up from Rack Subnet - // vv - "fd00:1122:3344:0100::/56".parse::().unwrap(), + Ipv6Subnet::::new( + // Shows up from Rack Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), cfg.rack_subnet() ); assert_eq!( - // 0th Sled Subnet - // vv - "fd00:1122:3344:0100::/64".parse::().unwrap(), + Ipv6Subnet::::new( + // 0th Sled Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), cfg.sled_subnet(0) ); assert_eq!( - // 1st Sled Subnet - // vv - "fd00:1122:3344:0101::/64".parse::().unwrap(), + Ipv6Subnet::::new( + // 1st Sled Subnet + // vv + "fd00:1122:3344:0101::".parse::().unwrap(), + ), cfg.sled_subnet(1) ); assert_eq!( - // Last Sled Subnet - // vv - "fd00:1122:3344:01ff::/64".parse::().unwrap(), + Ipv6Subnet::::new( + // Last Sled Subnet + // vv + "fd00:1122:3344:01ff::".parse::().unwrap(), + ), cfg.sled_subnet(255) ); } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 153ca1bc18b..c1daf50e9ec 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -8,7 +8,6 @@ use super::config::{SetupServiceConfig as Config, SledRequest}; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, - params::SledSubnet, }; use crate::params::ServiceRequest; use omicron_common::address::{get_sled_address, ReservedRackSubnet}; @@ -156,11 +155,11 @@ impl ServiceInner { let sled_agent_initialize = || async { client .start_sled(&bootstrap_agent_client::types::SledAgentRequest { - subnet: bootstrap_agent_client::types::SledSubnet( - bootstrap_agent_client::types::Ipv6Net( - request.subnet.as_ref().to_string(), + subnet: bootstrap_agent_client::types::Ipv6Subnet { + net: bootstrap_agent_client::types::Ipv6Net( + request.subnet.net().to_string(), ), - ), + }, }) .await .map_err(BackoffError::transient)?; @@ -304,8 +303,7 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = - ReservedRackSubnet::new(config.rack_subnet()); + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); let dns_subnets = reserved_rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); @@ -347,9 +345,7 @@ impl ServiceInner { SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); let sled_subnet_index = u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = - SledSubnet::new(config.sled_subnet(sled_subnet_index).into()) - .expect("Created Invalid Subnet"); + let subnet = config.sled_subnet(sled_subnet_index); ( bootstrap_addr, @@ -527,7 +523,7 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet.as_ref().0, + allocation.initialization_request.subnet, )); self.initialize_services( @@ -546,7 +542,7 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet.as_ref().0, + allocation.initialization_request.subnet, )); self.initialize_datasets( sled_address, @@ -570,7 +566,7 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet.as_ref().0, + allocation.initialization_request.subnet, )); let all_services = allocation From be3bc1bc847b4548f6e99f91162865916f7da360 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 28 Apr 2022 15:19:38 -0400 Subject: [PATCH 38/61] Ipv6, comments --- internal-dns/tests/basic_test.rs | 21 ++++++++--------- sled-agent/src/params.rs | 5 ++++ sled-agent/src/rack_setup/service.rs | 34 ++++++++++++++++------------ sled-agent/src/services.rs | 4 ++++ 4 files changed, 38 insertions(+), 26 deletions(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index b58ecdb62f9..6218d87d334 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -10,7 +10,6 @@ use internal_dns_client::{ types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, Client, }; -use std::net::Ipv6Addr; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; @@ -116,16 +115,16 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = Client::new( - &format!("http://127.0.0.1:{}", dropshot_port), - log.clone(), - ); + let client = + Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V4(SocketAddrV4::new( - Ipv4Addr::new(127, 0, 0, 1), + socket_addr: SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, dns_port, + 0, + 0, )), protocol: Protocol::Udp, tls_dns_name: None, @@ -141,7 +140,7 @@ async fn init_client_server( let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { - bind_address: format!("127.0.0.1:{}", dns_port), + bind_address: format!("[::1]:{}", dns_port), }; tokio::spawn(async move { @@ -176,9 +175,7 @@ fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("127.0.0.1:{}", dropshot_port) - .parse() - .unwrap(), + bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 7ad76a634b2..21c0182a22e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -236,6 +236,11 @@ pub struct ServiceRequest { pub addresses: Vec, // The addresses in the global zone which should be created, if necessary // to route to the service. + // + // For addresses allocated within the Sled's Subnet, no extra address should + // be necessary. However, for other services - such the DNS service, which + // exists outside the sleds's typical subnet - adding an address in the GZ + // is necessary to allow inter-zone traffic routing. #[serde(default)] pub gz_addresses: Vec, } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c1daf50e9ec..0cba727b598 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -520,20 +520,26 @@ impl ServiceInner { .collect::>()?; // Set up internal DNS services. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - - self.initialize_services( - sled_address, - &allocation.services_request.dns_services, - ) - .await?; - Ok(()) - }, - )) + futures::future::join_all( + plan.iter() + .filter(|(_, allocation)| { + // Only send requests to sleds that are supposed to be running + // DNS services. + !allocation.services_request.dns_services.is_empty() + }) + .map(|(_, allocation)| async move { + let sled_address = SocketAddr::V6(get_sled_address( + allocation.initialization_request.subnet, + )); + + self.initialize_services( + sled_address, + &allocation.services_request.dns_services, + ) + .await?; + Ok(()) + }), + ) .await .into_iter() .collect::>()?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b699842c868..f47da988d35 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -130,6 +130,10 @@ impl ServiceManager { } // Populates `existing_zones` according to the requests in `services`. + // + // At the point this function is invoked, IP addresses have already been + // allocated (by either RSS or Nexus). However, this function explicitly + // assigns such addresses to interfaces within zones. async fn initialize_services_locked( &self, existing_zones: &mut Vec, From 44139b9cba262c3e4e5f770b1693a075ac137f69 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 2 May 2022 11:31:52 -0400 Subject: [PATCH 39/61] fmt --- sled-agent/src/bootstrap/params.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 1b1adcee598..55d6e2c117e 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,10 +4,7 @@ //! Request body types for the bootstrap agent -use omicron_common::address::{ - SLED_PREFIX, - Ipv6Subnet, -}; +use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; From 0de925cf146068a78a46a07de22943ccf2662bde Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 3 May 2022 11:51:33 -0400 Subject: [PATCH 40/61] Placeholder DNS setting / retreival --- Cargo.lock | 4 ++ common/src/address.rs | 2 + internal-dns-client/Cargo.toml | 5 +- internal-dns-client/src/lib.rs | 2 + internal-dns-client/src/multiclient.rs | 99 ++++++++++++++++++++++++++ sled-agent/Cargo.toml | 1 + sled-agent/src/rack_setup/service.rs | 58 +++++++++++++++ 7 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 internal-dns-client/src/multiclient.rs diff --git a/Cargo.lock b/Cargo.lock index d21ff3cab3a..0c9821c8851 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1896,12 +1896,15 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "omicron-common", "progenitor", "reqwest", "serde", "serde_json", "slog", "structopt", + "trust-dns-proto", + "trust-dns-resolver", ] [[package]] @@ -2543,6 +2546,7 @@ dependencies = [ "expectorate", "futures", "http", + "internal-dns-client", "ipnetwork", "macaddr", "mockall", diff --git a/common/src/address.rs b/common/src/address.rs index e18e3639e96..19d1c44211a 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -27,7 +27,9 @@ pub const DNS_REDUNDANCY: usize = 1; /// reserved for DNS servers. pub const MAX_DNS_REDUNDANCY: usize = 5; +/// The port for the UDP-based internal DNS name server. pub const DNS_PORT: u16 = 53; +/// The port for the HTTP-based internal DNS dropshot server. pub const DNS_SERVER_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 22e28c91bc9..50ed6dbd3d2 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,9 +5,12 @@ edition = "2021" license = "MPL-2.0" [dependencies] +omicron-common = { path = "../common" } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } +reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } structopt = "0.3" -reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } +trust-dns-proto = "0.21" +trust-dns-resolver = "0.21" diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs index 49daa3d58ae..a8c5394f5b4 100644 --- a/internal-dns-client/src/lib.rs +++ b/internal-dns-client/src/lib.rs @@ -16,3 +16,5 @@ progenitor::generate_api!( slog::debug!(log, "client response"; "result" => ?result); }), ); + +pub mod multiclient; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs new file mode 100644 index 00000000000..6fc664ee367 --- /dev/null +++ b/internal-dns-client/src/multiclient.rs @@ -0,0 +1,99 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use omicron_common::address::{ + Ipv6Subnet, + ReservedRackSubnet, + AZ_PREFIX, + DNS_SERVER_PORT, + DNS_PORT, +}; +use slog::{info, Logger}; +use std::net::{SocketAddr, SocketAddrV6}; +use trust_dns_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use trust_dns_resolver::TokioAsyncResolver; + + +/// A connection used to update multiple DNS servers. +pub struct Updater { + clients: Vec, +} + +impl Updater { + pub fn new(subnet: Ipv6Subnet, log: Logger) -> Self { + let clients = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|dns_subnet| { + let addr = dns_subnet.dns_address().ip(); + info!(log, "Adding DNS server: {}", addr); + crate::Client::new(&format!("http://[{}]:{}", addr, DNS_SERVER_PORT), log.clone()) + }) + .collect::>(); + + Self { + clients + } + } + + /// Sets a records on all DNS servers. + /// + /// Returns an error if setting the record fails on any server. + pub async fn dns_records_set<'a>( + &'a self, + body: &'a Vec + ) -> Result<(), crate::Error> { + + // TODO: Could be sent concurrently. + for client in &self.clients { + client.dns_records_set(body).await?; + } + + Ok(()) + } + + /// Deletes records in all DNS servers. + /// + /// Returns an error if deleting the record fails on any server. + pub async fn dns_records_delete<'a>( + &'a self, + body: &'a Vec + ) -> Result<(), crate::Error> { + // TODO: Could be sent concurrently + for client in &self.clients { + client.dns_records_delete(body).await?; + } + Ok(()) + } +} + +/// Creates a resolver using all internal DNS name servers. +pub fn create_resolver(subnet: Ipv6Subnet) + -> Result +{ + let mut rc = ResolverConfig::new(); + let dns_ips = ReservedRackSubnet::new(subnet) + .get_dns_subnets() + .into_iter() + .map(|subnet| subnet.dns_address().ip()) + .collect::>(); + + for dns_ip in dns_ips { + rc.add_name_server(NameServerConfig { + socket_addr: SocketAddr::V6(SocketAddrV6::new( + dns_ip, + DNS_PORT, + 0, + 0, + )), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + } + TokioAsyncResolver::tokio(rc, ResolverOpts::default()) +} diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 6848b96991b..29126e27aae 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -16,6 +16,7 @@ chrono = { version = "0.4", features = [ "serde" ] } crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "257032d1e842901d427f344a396d78b9b85b183f" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } futures = "0.3.21" +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" macaddr = { version = "1.0.1", features = [ "serde_std" ] } nexus-client = { path = "../nexus-client" } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 9591aec07b6..e7571832c99 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -10,6 +10,7 @@ use crate::bootstrap::{ discovery::PeerMonitorObserver, params::SledAgentRequest, }; use crate::params::ServiceRequest; +use internal_dns_client::types::{DnsRecord, DnsKv, DnsRecordKey}; use omicron_common::address::{get_sled_address, ReservedRackSubnet}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -49,6 +50,11 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), + + // XXX CLEAN UP + + #[error(transparent)] + Dns(#[from] internal_dns_client::Error), } // The workload / information allocated to a single sled. @@ -574,6 +580,58 @@ impl ServiceInner { .into_iter() .collect::>()?; + let dns_servers = internal_dns_client::multiclient::Updater::new( + config.az_subnet(), + self.log.new(o!("client" => "DNS")), + ); + + // XXX Test record insertion + + let name = "hello.world"; + let addr = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1); + let aaaa = DnsRecord::Aaaa(addr); + + let set_record = || async { + dns_servers.dns_records_set( + &vec![ + DnsKv { + key: DnsRecordKey { + name: name.into(), + }, + record: aaaa.clone(), + } + ], + ) + .await + .map_err(BackoffError::transient)?; + Ok::< + (), + BackoffError< + internal_dns_client::Error< + internal_dns_client::types::Error, + >, + >, + >(()) + }; + let log_failure = |error, _| { + warn!(self.log, "Failed to set DNS records"; "error" => ?error); + }; + + retry_notify( + internal_service_policy(), + set_record, + log_failure, + ).await?; + + // XXX test record retreival + + let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) + .expect("Failed to create DNS resolver"); + let response = resolver.lookup_ip(name.to_owned() + ".").await.expect("Failed to lookup IP"); + let address = response.iter().next().expect("no addresses returned from DNS resolver"); + assert_eq!(address, addr); + + // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( |(_, allocation)| async move { From cf65591ef8476d3e561b5986a306bc17c9cca327 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 3 May 2022 11:59:25 -0400 Subject: [PATCH 41/61] Share logfile for sled agent and bootstrap agent --- sled-agent/src/bin/sled-agent.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 82000ceb308..175f18b2bd6 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -5,8 +5,6 @@ //! Executable program to run the sled agent use dropshot::ConfigDropshot; -use dropshot::ConfigLogging; -use dropshot::ConfigLoggingLevel; use omicron_common::api::external::Error; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; @@ -126,9 +124,7 @@ async fn do_run() -> Result<(), CmdError> { request_body_max_bytes: 1024 * 1024, ..Default::default() }, - log: ConfigLogging::StderrTerminal { - level: ConfigLoggingLevel::Info, - }, + log: config.log.clone(), rss_config, }; From 15f3efc03e75dc1964ff70f277de0891fb065c67 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 3 May 2022 15:35:03 -0400 Subject: [PATCH 42/61] wip --- sled-agent/src/rack_setup/service.rs | 84 ++++++++++++++++------------ 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index e7571832c99..a3a5ede9779 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -129,6 +129,47 @@ enum PeerExpectation { CreateNewPlan(usize), } +async fn insert_dns_record( + log: &Logger, + dns_servers: &internal_dns_client::multiclient::Updater, + name: &str, + addr: Ipv6Addr, +) -> Result<(), SetupServiceError> { + let aaaa = DnsRecord::Aaaa(addr); + let set_record = || async { + dns_servers.dns_records_set( + &vec![ + DnsKv { + key: DnsRecordKey { + name: name.into(), + }, + record: aaaa.clone(), + } + ], + ) + .await + .map_err(BackoffError::transient)?; + Ok::< + (), + BackoffError< + internal_dns_client::Error< + internal_dns_client::types::Error, + >, + >, + >(()) + }; + let log_failure = |error, _| { + warn!(log, "Failed to set DNS records"; "error" => ?error); + }; + + retry_notify( + internal_service_policy(), + set_record, + log_failure, + ).await?; + Ok(()) +} + /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, @@ -586,51 +627,22 @@ impl ServiceInner { ); // XXX Test record insertion - - let name = "hello.world"; - let addr = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1); - let aaaa = DnsRecord::Aaaa(addr); - - let set_record = || async { - dns_servers.dns_records_set( - &vec![ - DnsKv { - key: DnsRecordKey { - name: name.into(), - }, - record: aaaa.clone(), - } - ], - ) - .await - .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - internal_dns_client::Error< - internal_dns_client::types::Error, - >, - >, - >(()) - }; - let log_failure = |error, _| { - warn!(self.log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify( - internal_service_policy(), - set_record, - log_failure, + insert_dns_record( + &self.log, + &dns_servers, + "hello.world", + Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1), ).await?; // XXX test record retreival +/* let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) .expect("Failed to create DNS resolver"); let response = resolver.lookup_ip(name.to_owned() + ".").await.expect("Failed to lookup IP"); let address = response.iter().next().expect("no addresses returned from DNS resolver"); assert_eq!(address, addr); - +*/ // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( From d0bb994ddf8b7cc3ae98c9e6c96e3b7131e29c04 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 4 May 2022 12:37:05 -0400 Subject: [PATCH 43/61] Multi-record KV pairs, utility functions, names --- Cargo.lock | 1 + internal-dns-client/Cargo.toml | 1 + internal-dns-client/src/lib.rs | 1 + internal-dns-client/src/multiclient.rs | 69 ++++++++++++++++++++++++-- internal-dns-client/src/names.rs | 55 ++++++++++++++++++++ internal-dns/src/bin/dnsadm.rs | 6 +-- internal-dns/src/dns_data.rs | 12 ++--- internal-dns/tests/basic_test.rs | 12 +++-- openapi/internal-dns.json | 9 ++-- sled-agent/src/rack_setup/service.rs | 2 +- 10 files changed, 148 insertions(+), 20 deletions(-) create mode 100644 internal-dns-client/src/names.rs diff --git a/Cargo.lock b/Cargo.lock index 606f5a6a6ca..66e161abdde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1909,6 +1909,7 @@ dependencies = [ "structopt", "trust-dns-proto", "trust-dns-resolver", + "uuid", ] [[package]] diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 50ed6dbd3d2..8dd85341a5b 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -14,3 +14,4 @@ slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_d structopt = "0.3" trust-dns-proto = "0.21" trust-dns-resolver = "0.21" +uuid = { version = "0.8", features = [ "v4", "serde" ] } diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs index a8c5394f5b4..f7ce56f8521 100644 --- a/internal-dns-client/src/lib.rs +++ b/internal-dns-client/src/lib.rs @@ -18,3 +18,4 @@ progenitor::generate_api!( ); pub mod multiclient; +pub mod names; diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 6fc664ee367..9917b31bae6 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::types::{DnsRecord, DnsKv, DnsRecordKey, Srv}; use omicron_common::address::{ Ipv6Subnet, ReservedRackSubnet, @@ -9,13 +10,17 @@ use omicron_common::address::{ DNS_SERVER_PORT, DNS_PORT, }; -use slog::{info, Logger}; +use omicron_common::backoff::{ + internal_service_policy, retry_notify, BackoffError, +}; +use slog::{info, warn, Logger}; use std::net::{SocketAddr, SocketAddrV6}; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; use trust_dns_resolver::TokioAsyncResolver; +type DnsError = crate::Error; /// A connection used to update multiple DNS servers. pub struct Updater { @@ -39,13 +44,71 @@ impl Updater { } } + /// Utility function to insert: + /// - A set of uniquely-named AAAA records, each corresponding to an address + /// - An SRV record, pointing to each of the AAAA records. + pub async fn insert_dns_records( + &self, + log: &Logger, + aaaa: Vec<(crate::names::AAAA, SocketAddrV6)>, + srv_key: crate::names::SRV, + ) -> Result<(), DnsError> { + let mut records = Vec::with_capacity(aaaa.len() + 1); + + // Add one DnsKv per AAAA, each with a single record. + records.extend( + aaaa.iter().map(|(name, addr)| { + DnsKv { + key: DnsRecordKey { + name: name.to_string(), + }, + records: vec![DnsRecord::Aaaa(*addr.ip())], + } + }) + ); + + // Add the DnsKv for the SRV, with a record for each AAAA. + records.push( + DnsKv { + key: DnsRecordKey { + name: srv_key.to_string(), + }, + records: aaaa.iter().map(|(name, addr)| { + DnsRecord::Srv(Srv{ + prio: 0, + weight: 0, + port: addr.port(), + target: name.to_string(), + }) + }).collect::>(), + } + ); + + let set_record = || async { + self.dns_records_set(&records) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>(()) + }; + let log_failure = |error, _| { + warn!(log, "Failed to set DNS records"; "error" => ?error); + }; + + retry_notify( + internal_service_policy(), + set_record, + log_failure, + ).await?; + Ok(()) + } + /// Sets a records on all DNS servers. /// /// Returns an error if setting the record fails on any server. pub async fn dns_records_set<'a>( &'a self, body: &'a Vec - ) -> Result<(), crate::Error> { + ) -> Result<(), DnsError> { // TODO: Could be sent concurrently. for client in &self.clients { @@ -61,7 +124,7 @@ impl Updater { pub async fn dns_records_delete<'a>( &'a self, body: &'a Vec - ) -> Result<(), crate::Error> { + ) -> Result<(), DnsError> { // TODO: Could be sent concurrently for client in &self.clients { client.dns_records_delete(body).await?; diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs new file mode 100644 index 00000000000..2d72ce7cabd --- /dev/null +++ b/internal-dns-client/src/names.rs @@ -0,0 +1,55 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use uuid::Uuid; + +const DNS_ZONE: &str = "control-plane.oxide.internal"; + +pub enum SRV { + /// A service identified and accessed by name, such as "nexus", "CRDB", etc. + /// + /// This is used in cases where services are interchangeable. + Service(String), + + /// A service identified by name and a unique identifier. + /// + /// This is used in cases where services are not interchangeable, such as + /// for the Sled agent. + Backend(String, Uuid), +} + +impl fmt::Display for SRV { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + SRV::Service(name) => { + write!(f, "_{}._tcp.{}", name, DNS_ZONE) + }, + SRV::Backend(name, id) => { + write!(f, "_{}._tcp.{}.{}", name, id, DNS_ZONE) + } + } + } +} + +pub enum AAAA { + /// Identifies an AAAA record for a sled. + Sled(Uuid), + + /// Identifies an AAAA record for a zone within a sled. + Zone(Uuid), +} + +impl fmt::Display for AAAA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + AAAA::Sled(id) => { + write!(f, "{}.sled.{}", id, DNS_ZONE) + }, + AAAA::Zone(id) => { + write!(f, "{}.host.{}", id, DNS_ZONE) + } + } + } +} diff --git a/internal-dns/src/bin/dnsadm.rs b/internal-dns/src/bin/dnsadm.rs index 1c2d9a876fa..b968df7212d 100644 --- a/internal-dns/src/bin/dnsadm.rs +++ b/internal-dns/src/bin/dnsadm.rs @@ -81,7 +81,7 @@ async fn main() -> Result<()> { client .dns_records_set(&vec![DnsKv { key: DnsRecordKey { name: cmd.name }, - record: DnsRecord::Aaaa(cmd.addr), + records: vec![DnsRecord::Aaaa(cmd.addr)], }]) .await?; } @@ -89,12 +89,12 @@ async fn main() -> Result<()> { client .dns_records_set(&vec![DnsKv { key: DnsRecordKey { name: cmd.name }, - record: DnsRecord::Srv(Srv { + records: vec![DnsRecord::Srv(Srv { prio: cmd.prio, weight: cmd.weight, port: cmd.port, target: cmd.target, - }), + })], }]) .await?; } diff --git a/internal-dns/src/dns_data.rs b/internal-dns/src/dns_data.rs index 0ddc2978365..5f5f4e15a4e 100644 --- a/internal-dns/src/dns_data.rs +++ b/internal-dns/src/dns_data.rs @@ -61,7 +61,7 @@ pub struct DnsResponse { #[serde(rename = "DnsKv")] pub struct DnsKV { key: DnsRecordKey, - record: DnsRecord, + records: Vec, } // XXX some refactors to help @@ -202,7 +202,7 @@ impl Server { return; } }; - let record: DnsRecord = match serde_json::from_slice(bits.as_ref()) + let records: Vec = match serde_json::from_slice(bits.as_ref()) { Ok(r) => r, Err(e) => { @@ -216,7 +216,7 @@ impl Server { return; } }; - match response.tx.send(vec![DnsKV { key, record }]) { + match response.tx.send(vec![DnsKV { key, records }]) { Ok(_) => {} Err(e) => { error!(self.log, "response tx: {:?}", e); @@ -228,7 +228,7 @@ impl Server { loop { match iter.next() { Some(Ok((k, v))) => { - let record: DnsRecord = + let records: Vec = match serde_json::from_slice(v.as_ref()) { Ok(r) => r, Err(e) => { @@ -266,7 +266,7 @@ impl Server { }; result.push(DnsKV { key: DnsRecordKey { name: key }, - record, + records, }); } Some(Err(e)) => { @@ -291,7 +291,7 @@ impl Server { response: DnsResponse<()>, ) { for kv in records { - let bits = match serde_json::to_string(&kv.record) { + let bits = match serde_json::to_string(&kv.records) { Ok(bits) => bits, Err(e) => { error!(self.log, "serialize record: {}", e); diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index d03b1262c90..ecc5af3830f 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -32,7 +32,7 @@ pub async fn aaaa_crud() -> Result<(), anyhow::Error> { client .dns_records_set(&vec![DnsKv { key: name.clone(), - record: aaaa.clone(), + records: vec![aaaa.clone()], }]) .await?; @@ -40,7 +40,9 @@ pub async fn aaaa_crud() -> Result<(), anyhow::Error> { let records = client.dns_records_get().await?; assert_eq!(1, records.len()); assert_eq!(records[0].key.name, name.name); - match records[0].record { + + assert_eq!(1, records[0].records.len()); + match records[0].records[0] { DnsRecord::Aaaa(ra) => { assert_eq!(ra, addr); } @@ -76,7 +78,7 @@ pub async fn srv_crud() -> Result<(), anyhow::Error> { client .dns_records_set(&vec![DnsKv { key: name.clone(), - record: rec.clone(), + records: vec![rec.clone()], }]) .await?; @@ -84,7 +86,9 @@ pub async fn srv_crud() -> Result<(), anyhow::Error> { let records = client.dns_records_get().await?; assert_eq!(1, records.len()); assert_eq!(records[0].key.name, name.name); - match records[0].record { + + assert_eq!(1, records[0].records.len()); + match records[0].records[0] { DnsRecord::Srv(ref rs) => { assert_eq!(rs.prio, srv.prio); assert_eq!(rs.weight, srv.weight); diff --git a/openapi/internal-dns.json b/openapi/internal-dns.json index 708983bd9cd..ace9b7bd917 100644 --- a/openapi/internal-dns.json +++ b/openapi/internal-dns.json @@ -135,13 +135,16 @@ "key": { "$ref": "#/components/schemas/DnsRecordKey" }, - "record": { - "$ref": "#/components/schemas/DnsRecord" + "records": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsRecord" + } } }, "required": [ "key", - "record" + "records" ] }, "DnsRecord": { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index a3a5ede9779..5c0c2487d0a 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -143,7 +143,7 @@ async fn insert_dns_record( key: DnsRecordKey { name: name.into(), }, - record: aaaa.clone(), + records: vec![aaaa.clone()], } ], ) From ce3e487890ef04e5678cc60593ac1e6c4cd26c6e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 4 May 2022 20:27:20 -0400 Subject: [PATCH 44/61] RSS address allocation --- common/src/address.rs | 9 +- internal-dns/src/dns_server.rs | 16 ++- sled-agent/src/params.rs | 12 +- sled-agent/src/rack_setup/config.rs | 33 ++++-- sled-agent/src/rack_setup/service.rs | 167 +++++++++++++++++++-------- sled-agent/src/services.rs | 22 ++-- sled-agent/src/sled_agent.rs | 4 +- sled-agent/src/storage_manager.rs | 26 ++--- 8 files changed, 199 insertions(+), 90 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 19d1c44211a..b4e9fbdd7b8 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -33,6 +33,9 @@ pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; +pub const COCKROACH_PORT: u16 = 32221; +pub const CRUCIBLE_PORT: u16 = 32345; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // @@ -123,14 +126,16 @@ impl ReservedRackSubnet { } } -const SLED_AGENT_ADDRESS_INDEX: usize = 1; +pub const SLED_AGENT_ADDRESS_INDEX: u16 = 1; +/// The maximum number of addresses per sled subnet reserved for RSS. +pub const RSS_RESERVED_ADDRESSES: u16 = 10; /// Return the sled agent address for a subnet. /// /// This address will come from the first address of the [`SLED_PREFIX`] subnet. pub fn get_sled_address(sled_subnet: Ipv6Subnet) -> SocketAddrV6 { let sled_agent_ip = - sled_subnet.net().iter().nth(SLED_AGENT_ADDRESS_INDEX).unwrap(); + sled_subnet.net().iter().nth(SLED_AGENT_ADDRESS_INDEX.into()).unwrap(); SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) } diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index f6f5ed5209f..d7c1f085e4b 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -87,21 +87,29 @@ async fn handle_req<'a, 'b, 'c>( } }; - let record: crate::dns_data::DnsRecord = + let records: Vec = match serde_json::from_slice(bits.as_ref()) { Ok(r) => r, Err(e) => { error!(log, "deserialize record: {}", e); + nack(&log, &mr, &socket, &header, &src).await; return; } }; - match record { + if records.is_empty() { + error!(log, "No records found for {}", key); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + + // TODO: Pick something other than the first record? + match &records[0] { DnsRecord::AAAA(addr) => { let mut aaaa = Record::new(); aaaa.set_name(name) .set_rr_type(RecordType::AAAA) - .set_data(Some(RData::AAAA(addr))); + .set_data(Some(RData::AAAA(*addr))); let mresp = rb.build(header, vec![&aaaa], vec![], vec![], vec![]); @@ -134,7 +142,7 @@ async fn handle_req<'a, 'b, 'c>( }; srv.set_name(name) .set_rr_type(RecordType::SRV) - .set_data(Some(RData::SRV(SRV::new(prio, weight, port, tgt)))); + .set_data(Some(RData::SRV(SRV::new(*prio, *weight, *port, tgt)))); let mresp = rb.build(header, vec![&srv], vec![], vec![], vec![]); diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 21c0182a22e..49ecacff669 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,7 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::{Ipv6Addr, SocketAddr}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; /// Used to request a Disk state change @@ -152,7 +152,7 @@ pub struct InstanceRuntimeStateRequested { pub enum DatasetKind { CockroachDb { /// The addresses of all nodes within the cluster. - all_addresses: Vec, + all_addresses: Vec, }, Crucible, Clickhouse, @@ -200,12 +200,14 @@ impl std::fmt::Display for DatasetKind { /// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct DatasetEnsureBody { + // The UUID of the dataset, as well as the service using it directly. + pub id: Uuid, // The name (and UUID) of the Zpool which we are inserting into. pub zpool_uuid: Uuid, // The type of the filesystem. pub dataset_kind: DatasetKind, // The address on which the zone will listen for requests. - pub address: SocketAddr, + pub address: SocketAddrV6, // NOTE: We could insert a UUID here, if we want that to be set by the // caller explicitly? Currently, the lack of a UUID implies that // "at most one dataset type" exists within a zpool. @@ -222,6 +224,7 @@ impl From for sled_agent_client::types::DatasetEnsureBody { zpool_uuid: p.zpool_uuid, dataset_kind: p.dataset_kind.into(), address: p.address.to_string(), + id: p.id, } } } @@ -230,6 +233,8 @@ impl From for sled_agent_client::types::DatasetEnsureBody { Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] pub struct ServiceRequest { + // The UUID of the service to be initialized. + pub id: Uuid, // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. @@ -248,6 +253,7 @@ pub struct ServiceRequest { impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { + id: s.id, name: s.name, addresses: s.addresses, gz_addresses: s.gz_addresses, diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 26f3ce8a321..db242fd73db 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -5,7 +5,6 @@ //! Interfaces for working with RSS config. use crate::config::ConfigError; -use crate::params::{DatasetEnsureBody, ServiceRequest}; use omicron_common::address::{ get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, }; @@ -13,6 +12,7 @@ use serde::Deserialize; use serde::Serialize; use std::net::Ipv6Addr; use std::path::Path; +use uuid::Uuid; /// Configuration for the "rack setup service", which is controlled during /// bootstrap. @@ -29,23 +29,38 @@ pub struct SetupServiceConfig { pub rack_subnet: Ipv6Addr, #[serde(default, rename = "request")] - pub requests: Vec, + pub requests: Vec, +} + +/// Hard-coded configurations for where to place CRDB datasets. +/// +/// Converts into a [`crate::params::DatasetEnsureBody`]. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct CockroachDataset { + pub zpool_uuid: Uuid, +} + +/// Hard-coded configurations for where to place services +/// +/// Converts into a [`crate::params::ServiceRequest`]. +// TODO: Should this exist? It should just be Nexus + DNS servers. +// We could hard-code their bringup in the RSS, since we know where +// it's coming from. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct Service { + pub name: String, } /// A request to initialize a sled. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] -pub struct SledRequest { +pub struct HardcodedSledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] - pub datasets: Vec, + pub datasets: Vec, /// Services to be instantiated. #[serde(default, rename = "service")] - pub services: Vec, - - /// DNS Services to be instantiated. - #[serde(default, rename = "dns_service")] - pub dns_services: Vec, + pub services: Vec, } impl SetupServiceConfig { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 5c0c2487d0a..23741adfe2f 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,14 +4,13 @@ //! Rack Setup Service implementation -use super::config::{SetupServiceConfig as Config, SledRequest}; +use super::config::{SetupServiceConfig as Config}; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, }; -use crate::params::ServiceRequest; -use internal_dns_client::types::{DnsRecord, DnsKv, DnsRecordKey}; -use omicron_common::address::{get_sled_address, ReservedRackSubnet}; +use crate::params::{DatasetEnsureBody, ServiceRequest}; +use omicron_common::address::{get_sled_address, ReservedRackSubnet, RSS_RESERVED_ADDRESSES}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -21,7 +20,8 @@ use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::PathBuf; use thiserror::Error; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, OnceCell}; +use uuid::Uuid; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] @@ -57,6 +57,21 @@ pub enum SetupServiceError { Dns(#[from] internal_dns_client::Error), } +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SledRequest { + /// Datasets to be created. + #[serde(default, rename = "dataset")] + pub datasets: Vec, + + /// Services to be instantiated. + #[serde(default, rename = "service")] + pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, +} + // The workload / information allocated to a single sled. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] struct SledAllocation { @@ -129,56 +144,43 @@ enum PeerExpectation { CreateNewPlan(usize), } -async fn insert_dns_record( - log: &Logger, - dns_servers: &internal_dns_client::multiclient::Updater, - name: &str, - addr: Ipv6Addr, -) -> Result<(), SetupServiceError> { - let aaaa = DnsRecord::Aaaa(addr); - let set_record = || async { - dns_servers.dns_records_set( - &vec![ - DnsKv { - key: DnsRecordKey { - name: name.into(), - }, - records: vec![aaaa.clone()], - } - ], - ) - .await - .map_err(BackoffError::transient)?; - Ok::< - (), - BackoffError< - internal_dns_client::Error< - internal_dns_client::types::Error, - >, - >, - >(()) - }; - let log_failure = |error, _| { - warn!(log, "Failed to set DNS records"; "error" => ?error); - }; - - retry_notify( - internal_service_policy(), - set_record, - log_failure, - ).await?; - Ok(()) +struct AddressBumpAllocator { + last_addr: Ipv6Addr, +} + +// TODO: Testable? +impl AddressBumpAllocator { + fn new(sled_addr: Ipv6Addr) -> Self { + Self { + last_addr: sled_addr, + } + } + + fn next(&mut self) -> Option { + let mut segments: [u16; 8] = self.last_addr.segments(); + segments[7] = segments[7].checked_add(1)?; + if segments[7] > RSS_RESERVED_ADDRESSES { + return None; + } + self.last_addr = Ipv6Addr::from(segments); + Some(self.last_addr) + } } /// The implementation of the Rack Setup Service. struct ServiceInner { log: Logger, peer_monitor: Mutex, + dns_servers: OnceCell, } impl ServiceInner { fn new(log: Logger, peer_monitor: PeerMonitorObserver) -> Self { - ServiceInner { log, peer_monitor: Mutex::new(peer_monitor) } + ServiceInner { + log, + peer_monitor: Mutex::new(peer_monitor), + dns_servers: OnceCell::new() + } } async fn initialize_sled_agent( @@ -282,13 +284,40 @@ impl ServiceInner { ) .await?; } + + // CRDB datasets are treated as services. + let crdb_datasets = datasets.iter().filter(|dataset| { + matches!(dataset.dataset_kind, crate::params::DatasetKind::CockroachDb { .. }) + }); + + let aaaa = crdb_datasets.map(|dataset| { + ( + internal_dns_client::names::AAAA::Zone(dataset.id), + dataset.address, + ) + }).collect::>(); + let srv_key = internal_dns_client::names::SRV::Service("cockroachdb".into()); + + self.dns_servers + .get() + .expect("DNS servers must be initialized first") + .insert_dns_records( + &self.log, + aaaa, + srv_key + ).await?; + + + // TODO: add dns records for non-crdb datasets too + // TODO: alternatively, REMOVE THEM + Ok(()) } async fn initialize_services( &self, sled_address: SocketAddr, - services: &Vec, + services: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() @@ -378,17 +407,56 @@ impl ServiceInner { // services. let mut request = { if idx < config.requests.len() { - config.requests[idx].clone() + + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let datasets = config.requests[idx].datasets.iter().map(|dataset| { + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_uuid: dataset.zpool_uuid, + dataset_kind: crate::params::DatasetKind::CockroachDb { + all_addresses: vec![ address ], + }, + address, + } + }).collect::>(); + + let services = config.requests[idx].services.iter().map(|svc| { + let address = addr_alloc.next().expect("Not enough addrs"); + ServiceRequest { + id: Uuid::new_v4(), + name: svc.name.clone(), + addresses: vec![ address ], + gz_addresses: vec![], + + } + }).collect::>(); + + SledRequest { + datasets, + services, + ..Default::default() + } } else { SledRequest::default() } }; - // The first enumerated addresses get assigned the additional + // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. if idx < dns_subnets.len() { let dns_subnet = &dns_subnets[idx]; request.dns_services.push(ServiceRequest { + id: Uuid::new_v4(), name: "internal-dns".to_string(), addresses: vec![dns_subnet.dns_address().ip()], gz_addresses: vec![dns_subnet.gz_address().ip()], @@ -625,8 +693,10 @@ impl ServiceInner { config.az_subnet(), self.log.new(o!("client" => "DNS")), ); + self.dns_servers.set(dns_servers).map_err(|_| ()).expect("Already set DNS servers"); // XXX Test record insertion +/* insert_dns_record( &self.log, &dns_servers, @@ -636,7 +706,6 @@ impl ServiceInner { // XXX test record retreival -/* let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) .expect("Failed to create DNS resolver"); let response = resolver.lookup_ip(name.to_owned() + ".").await.expect("Failed to lookup IP"); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 36437198e70..347ae40dee7 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -407,6 +407,7 @@ mod test { dladm::MockDladm, dladm::PhysicalLink, svc, zone::MockZones, }; use std::os::unix::process::ExitStatusExt; + use uuid::Uuid; const SVC_NAME: &str = "my_svc"; const EXPECTED_ZONE_NAME: &str = "oxz_my_svc"; @@ -458,11 +459,12 @@ mod test { } // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service(mgr: &ServiceManager) { + async fn ensure_new_service(mgr: &ServiceManager, id: Uuid) { let _expectations = expect_new_service(); mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], @@ -474,9 +476,10 @@ mod test { // Prepare to call "ensure" for a service which already exists. We should // return the service without actually installing a new zone. - async fn ensure_existing_service(mgr: &ServiceManager) { + async fn ensure_existing_service(mgr: &ServiceManager, id: Uuid) { mgr.ensure(ServiceEnsureBody { services: vec![ServiceRequest { + id, name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], @@ -520,7 +523,8 @@ mod test { .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -544,8 +548,9 @@ mod test { .await .unwrap(); - ensure_new_service(&mgr).await; - ensure_existing_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; + ensure_existing_service(&mgr, id).await; drop_service_manager(mgr); logctx.cleanup_successful(); @@ -570,7 +575,9 @@ mod test { ) .await .unwrap(); - ensure_new_service(&mgr).await; + + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Before we re-create the service manager - notably, using the same @@ -607,7 +614,8 @@ mod test { ) .await .unwrap(); - ensure_new_service(&mgr).await; + let id = Uuid::new_v4(); + ensure_new_service(&mgr, id).await; drop_service_manager(mgr); // Next, delete the config. This means the service we just created will diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 46090319783..4beb378decf 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -21,7 +21,7 @@ use omicron_common::api::{ internal::nexus::UpdateArtifact, }; use slog::Logger; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::SocketAddrV6; use std::sync::Arc; use uuid::Uuid; @@ -218,7 +218,7 @@ impl SledAgent { &self, zpool_uuid: Uuid, dataset_kind: DatasetKind, - address: SocketAddr, + address: SocketAddrV6, ) -> Result<(), Error> { self.storage .upsert_filesystem(zpool_uuid, dataset_kind, address) diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 24a4d01b330..9b50aeb14f6 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,8 +5,7 @@ //! Management of sled-local storage. use crate::illumos::dladm::PhysicalLink; -use crate::illumos::running_zone::{InstalledZone, RunningZone}; -use crate::illumos::vnic::VnicAllocator; +use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; use crate::illumos::{zfs::Mountpoint, zone::ZONE_PREFIX, zpool::ZpoolInfo}; @@ -23,7 +22,7 @@ use serde::{Deserialize, Serialize}; use slog::Logger; use std::collections::HashMap; use std::convert::TryFrom; -use std::net::SocketAddr; +use std::net::{IpAddr, SocketAddrV6}; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; @@ -200,13 +199,13 @@ impl DatasetName { // by the Sled Agent. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] struct DatasetInfo { - address: SocketAddr, + address: SocketAddrV6, kind: DatasetKind, name: DatasetName, } impl DatasetInfo { - fn new(pool: &str, kind: DatasetKind, address: SocketAddr) -> DatasetInfo { + fn new(pool: &str, kind: DatasetKind, address: SocketAddrV6) -> DatasetInfo { match kind { DatasetKind::CockroachDb { .. } => DatasetInfo { name: DatasetName::new(pool, "cockroachdb"), @@ -234,7 +233,7 @@ impl DatasetInfo { &self, log: &Logger, zone: &RunningZone, - address: SocketAddr, + address: SocketAddrV6, do_format: bool, ) -> Result<(), Error> { match self.kind { @@ -303,8 +302,7 @@ impl DatasetInfo { // Await liveness of the cluster. info!(log, "start_zone: awaiting liveness of CRDB"); let check_health = || async { - let http_addr = SocketAddr::new(address.ip(), 8080); - reqwest::get(format!("http://{}/health?ready=1", http_addr)) + reqwest::get(format!("http://[{}]:{}/health?ready=1", address.ip(), 8080)) .await .map_err(backoff::BackoffError::transient) }; @@ -457,7 +455,7 @@ async fn ensure_running_zone( do_format: bool, ) -> Result { let address_request = - AddressRequest::new_static(dataset_info.address.ip(), None); + AddressRequest::new_static(IpAddr::V6(*dataset_info.address.ip()), None); let err = RunningZone::get(log, &dataset_info.zone_prefix(), address_request) @@ -516,7 +514,7 @@ type NotifyFut = dyn futures::Future< struct NewFilesystemRequest { zpool_id: Uuid, dataset_kind: DatasetKind, - address: SocketAddr, + address: SocketAddrV6, responder: oneshot::Sender>, } @@ -654,7 +652,7 @@ impl StorageWorker { fn add_datasets_notify( &self, nexus_notifications: &mut FuturesOrdered>>, - datasets: Vec<(Uuid, SocketAddr, DatasetKind)>, + datasets: Vec<(Uuid, SocketAddrV6, DatasetKind)>, pool_id: Uuid, ) { let nexus = self.nexus_client.clone(); @@ -764,7 +762,7 @@ impl StorageWorker { &self, pool: &mut Pool, dataset_name: &DatasetName, - ) -> Result<(Uuid, SocketAddr, DatasetKind), Error> { + ) -> Result<(Uuid, SocketAddrV6, DatasetKind), Error> { let name = dataset_name.full(); let id = Zfs::get_oxide_value(&name, "uuid")? .parse::() @@ -943,7 +941,7 @@ impl StorageManager { &self, zpool_id: Uuid, dataset_kind: DatasetKind, - address: SocketAddr, + address: SocketAddrV6, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); let request = NewFilesystemRequest { @@ -984,7 +982,7 @@ mod test { #[test] fn serialize_dataset_info() { let dataset_info = DatasetInfo { - address: "127.0.0.1:8080".parse().unwrap(), + address: "[fd00::1]:8080".parse().unwrap(), kind: DatasetKind::Crucible, name: DatasetName::new("pool", "dataset"), }; From 0090ec21e45874f09462b6f9e9db68f127e35fb0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 4 May 2022 20:44:38 -0400 Subject: [PATCH 45/61] Add nexus model, disable RSS services --- common/src/sql/dbinit.sql | 18 ++++++++++++++ nexus/src/db/datastore.rs | 2 +- nexus/src/db/model.rs | 33 +++++++++++++++++-------- nexus/src/db/schema.rs | 13 ++++++++++ openapi/sled-agent.json | 10 ++++++++ smf/sled-agent/config-rss.toml | 44 +++++++++++++++++----------------- 6 files changed, 87 insertions(+), 33 deletions(-) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 099bd736579..c95175eb73e 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -72,6 +72,24 @@ CREATE TABLE omicron.public.sled ( last_used_address INET NOT NULL ); +/* + * Services + */ + +CREATE TABLE omicron.public.service ( + /* Identity metadata (asset) */ + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + + /* FK into the Sled table */ + sled_id UUID NOT NULL, + + /* The IP address and bound port of the service. */ + ip INET NOT NULL, + port INT4 NOT NULL, +); + /* * ZPools of Storage, attached to Sleds. * Typically these are backed by a single physical disk. diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 66a1e94c0b9..67c01bb7169 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -3412,7 +3412,7 @@ mod test { // Test sled-specific IPv6 address allocation #[tokio::test] async fn test_sled_ipv6_address_allocation() { - use crate::db::model::STATIC_IPV6_ADDRESS_OFFSET; + use omicron_common::address::RSS_RESERVED_ADDRESSES as STATIC_IPV6_ADDRESS_OFFSET; use std::net::Ipv6Addr; let logctx = dev::test_setup_log("test_sled_ipv6_address_allocation"); diff --git a/nexus/src/db/model.rs b/nexus/src/db/model.rs index a74045ba1c5..f3e442e5951 100644 --- a/nexus/src/db/model.rs +++ b/nexus/src/db/model.rs @@ -11,7 +11,7 @@ use crate::db::schema::{ console_session, dataset, disk, global_image, image, instance, metric_producer, network_interface, organization, oximeter, project, rack, region, role_assignment_builtin, role_builtin, router_route, silo, - silo_user, sled, snapshot, ssh_key, update_available_artifact, + silo_user, sled, service, snapshot, ssh_key, update_available_artifact, user_builtin, volume, vpc, vpc_firewall_rule, vpc_router, vpc_subnet, zpool, }; @@ -626,19 +626,11 @@ pub struct Sled { pub last_used_address: ipv6::Ipv6Addr, } -// TODO-correctness: We need a small offset here, while services and -// their addresses are still hardcoded in the mock RSS config file at -// `./smf/sled-agent/config-rss.toml`. This avoids conflicts with those -// addresses, but should be removed when they are entirely under the -// control of Nexus or RSS. -// -// See https://github.com/oxidecomputer/omicron/issues/732 for tracking issue. -pub(crate) const STATIC_IPV6_ADDRESS_OFFSET: u16 = 20; impl Sled { pub fn new(id: Uuid, addr: SocketAddrV6) -> Self { let last_used_address = { let mut segments = addr.ip().segments(); - segments[7] += STATIC_IPV6_ADDRESS_OFFSET; + segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; ipv6::Ipv6Addr::from(Ipv6Addr::from(segments)) }; Self { @@ -672,6 +664,27 @@ impl DatastoreCollection for Sled { type CollectionIdColumn = zpool::dsl::sled_id; } +impl DatastoreCollection for Sled { + type CollectionId = Uuid; + type GenerationNumberColumn = sled::dsl::rcgen; + type CollectionTimeDeletedColumn = sled::dsl::time_deleted; + type CollectionIdColumn = service::dsl::sled_id; +} + +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[diesel(table_name = service)] +pub struct Service { + #[diesel(embed)] + identity: ServiceIdentity, + + // Sled to which this Zpool belongs. + pub sled_id: Uuid, + + // ServiceAddress (Sled Agent). + pub ip: ipv6::Ipv6Addr, + pub port: i32, +} + /// Database representation of a Pool. /// /// A zpool represents a ZFS storage pool, allocated on a single diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index 21245dc5dad..a5cb4dcf212 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -262,6 +262,19 @@ table! { } } +table! { + service (id) { + id -> Uuid, + time_created -> Timestamptz, + time_modified -> Timestamptz, + + sled_id -> Uuid, + + ip -> Inet, + port -> Int4, + } +} + table! { zpool (id) { id -> Uuid, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index cc6cfc8c20c..0b53c9330b8 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -247,6 +247,10 @@ "dataset_kind": { "$ref": "#/components/schemas/DatasetKind" }, + "id": { + "type": "string", + "format": "uuid" + }, "zpool_uuid": { "type": "string", "format": "uuid" @@ -255,6 +259,7 @@ "required": [ "address", "dataset_kind", + "id", "zpool_uuid" ] }, @@ -976,12 +981,17 @@ "format": "ipv6" } }, + "id": { + "type": "string", + "format": "uuid" + }, "name": { "type": "string" } }, "required": [ "addresses", + "id", "name" ] }, diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index cf9582ab71a..61799f168cd 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -10,20 +10,20 @@ rack_subnet = "fd00:1122:3344:0100::" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. -[[request.dataset]] -zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" +# [[request.dataset]] +# zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::6]:32345" +# dataset_kind.type = "crucible" +# +# [[request.dataset]] +# zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# address = "[fd00:1122:3344:0101::7]:32345" +# dataset_kind.type = "crucible" +# +# [[request.dataset]] +# zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# address = "[fd00:1122:3344:0101::8]:32345" +# dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" @@ -33,10 +33,10 @@ dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. -[[request.dataset]] -zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" +# [[request.dataset]] +# zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# address = "[fd00:1122:3344:0101::5]:8123" +# dataset_kind.type = "clickhouse" [[request.service]] name = "nexus" @@ -45,7 +45,7 @@ gz_addresses = [] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. -[[request.service]] -name = "oximeter" -addresses = [ "fd00:1122:3344:0101::4" ] -gz_addresses = [] +# [[request.service]] +# name = "oximeter" +# addresses = [ "fd00:1122:3344:0101::4" ] +# gz_addresses = [] From 83f4019df18fc9665d312d4e1f9d3c173682d39c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 4 May 2022 21:36:01 -0400 Subject: [PATCH 46/61] fmt --- sled-agent/src/services.rs | 99 ++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 1d54696cc2b..6e588b7fe4f 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -324,57 +324,60 @@ impl ServiceManager { message: "Not enough addresses".to_string(), } })?; - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT - ), - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS server address [{}]:{}", - address, DNS_SERVER_PORT - ), - err, - })?; - - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/dns_address=[{}]:{}", - address, DNS_PORT - ), - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS address [{}]:{}", - address, DNS_SERVER_PORT - ), - err, - })?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/server_address=[{}]:{}", + address, DNS_SERVER_PORT + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Setting DNS server address [{}]:{}", + address, DNS_SERVER_PORT + ), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/dns_address=[{}]:{}", + address, DNS_PORT + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Setting DNS address [{}]:{}", + address, DNS_SERVER_PORT + ), + err, + })?; // Refresh the manifest with the new properties we set, // so they become "effective" properties when the service is enabled. - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Refreshing DNS service config for {}", - default_smf_name - ), - err, - })?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refreshing DNS service config for {}", + default_smf_name + ), + err, + })?; } _ => { info!( From a61284808d2a352d1a2ba9d2a486dc29757a4866 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 5 May 2022 11:19:55 -0400 Subject: [PATCH 47/61] fix illumos-only test --- sled-agent/tests/integration_tests/multicast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/tests/integration_tests/multicast.rs b/sled-agent/tests/integration_tests/multicast.rs index 1aa9f8b103f..61a0d8284ab 100644 --- a/sled-agent/tests/integration_tests/multicast.rs +++ b/sled-agent/tests/integration_tests/multicast.rs @@ -30,7 +30,7 @@ async fn test_multicast_bootstrap_address() { let address_name = "testbootstrap6"; let addrobj = AddrObject::new(&link.0, address_name).unwrap(); zone::Zones::ensure_has_global_zone_v6_address( - Some(link), + link, *address.ip(), address_name, ) From ea674f1a275b9ab559d17bb4302f712eb7a9d561 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 5 May 2022 12:11:14 -0400 Subject: [PATCH 48/61] Use SqlU16 as port --- common/src/sql/dbinit.sql | 2 +- nexus/src/db/model.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 661f5156d80..51bd85bc811 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -87,7 +87,7 @@ CREATE TABLE omicron.public.service ( /* The IP address and bound port of the service. */ ip INET NOT NULL, - port INT4 NOT NULL, + port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL ); /* diff --git a/nexus/src/db/model.rs b/nexus/src/db/model.rs index cd9cb00a8d4..30affeb1ba4 100644 --- a/nexus/src/db/model.rs +++ b/nexus/src/db/model.rs @@ -326,7 +326,7 @@ impl From for sled_agent_client::types::Generation { /// Representation of a [`u16`] in the database. /// We need this because the database does not support unsigned types. /// This handles converting from the database's INT4 to the actual u16. -#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd, FromSqlRow)] +#[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, Eq, Ord, PartialEq, PartialOrd)] #[diesel(sql_type = sql_types::Int4)] #[repr(transparent)] pub struct SqlU16(pub u16); @@ -681,7 +681,7 @@ pub struct Service { // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, - pub port: i32, + pub port: SqlU16, } /// Database representation of a Pool. @@ -2407,7 +2407,10 @@ impl ToSql for VpcFirewallRulePriority { &'a self, out: &mut serialize::Output<'a, '_, Pg>, ) -> serialize::Result { - SqlU16(self.0 .0).to_sql(&mut out.reborrow()) + >::to_sql( + &SqlU16(self.0.0), + &mut out.reborrow(), + ) } } From 15ea04314a5db39c08217d315dbc46de39e5b891 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 5 May 2022 13:54:23 -0400 Subject: [PATCH 49/61] index --- common/src/sql/dbinit.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 51bd85bc811..e59826e0b20 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -90,6 +90,11 @@ CREATE TABLE omicron.public.service ( port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL ); +/* Add an index which lets us look up the services on a sled */ +CREATE INDEX ON omicron.public.service ( + sled_id +) + /* * ZPools of Storage, attached to Sleds. * Typically these are backed by a single physical disk. From 317547189677785a3d8adf3c1a4537810c00681e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 6 May 2022 09:44:05 -0400 Subject: [PATCH 50/61] fix sql format --- common/src/sql/dbinit.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 58de055cd0f..5320aa68089 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -93,7 +93,7 @@ CREATE TABLE omicron.public.service ( /* Add an index which lets us look up the services on a sled */ CREATE INDEX ON omicron.public.service ( sled_id -) +); /* * ZPools of Storage, attached to Sleds. From d7954060058977d3fba79e9890218f3d7bb55fdc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 6 May 2022 14:04:17 -0400 Subject: [PATCH 51/61] more on upserting services --- common/src/api/external/mod.rs | 1 + nexus/src/db/datastore.rs | 39 +++++++++++++++++++++++++++++++++- nexus/src/db/model/service.rs | 15 +++++++++++++ nexus/src/nexus.rs | 13 ++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 6ceab0ddcb3..5d3edbcde62 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -527,6 +527,7 @@ pub enum ResourceType { Instance, NetworkInterface, Rack, + Service, Sled, SagaDbg, Snapshot, diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 8b7745fa161..31602307de0 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -41,7 +41,7 @@ use crate::db::{ InstanceRuntimeState, Name, NetworkInterface, Organization, OrganizationUpdate, OximeterInfo, ProducerEndpoint, Project, ProjectUpdate, Region, RoleAssignment, RoleBuiltin, RouterRoute, - RouterRouteUpdate, Silo, SiloUser, Sled, SshKey, + RouterRouteUpdate, Service, Silo, SiloUser, Sled, SshKey, UpdateAvailableArtifact, UserBuiltin, Volume, Vpc, VpcFirewallRule, VpcRouter, VpcRouterUpdate, VpcSubnet, VpcSubnetUpdate, VpcUpdate, Zpool, @@ -252,6 +252,43 @@ impl DataStore { }) } + /// Stores a new service in the database. + pub async fn service_upsert(&self, service: Service) -> CreateResult { + use db::schema::service::dsl; + + let sled_id = service.sled_id; + Sled::insert_resource( + sled_id, + diesel::insert_into(dsl::service) + .values(service.clone()) + .on_conflict(dsl::id) + .do_update() + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::sled_id.eq(excluded(dsl::sled_id)), + dsl::ip.eq(excluded(dsl::ip)), + dsl::port.eq(excluded(dsl::port)), + )), + ) + .insert_and_get_result_async(self.pool()) + .await + .map_err(|e| match e { + AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { + type_name: ResourceType::Sled, + lookup_type: LookupType::ById(sled_id), + }, + AsyncInsertError::DatabaseError(e) => { + public_error_from_diesel_pool( + e, + ErrorHandler::Conflict( + ResourceType::Service, + &service.id().to_string(), + ), + ) + } + }) + } + fn get_allocated_regions_query( volume_id: Uuid, ) -> impl RunnableQuery<(Dataset, Region)> { diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 45a98fd46e2..28e22925ab7 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -6,6 +6,7 @@ use super::SqlU16; use crate::db::schema::service; use crate::db::ipv6; use db_macros::Asset; +use std::net::SocketAddrV6; use uuid::Uuid; #[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] @@ -22,3 +23,17 @@ pub struct Service { pub port: SqlU16, } +impl Service { + pub fn new( + id: Uuid, + sled_id: Uuid, + addr: SocketAddrV6, + ) -> Self { + Self { + identity: ServiceIdentity::new(id), + sled_id, + ip: addr.ip().into(), + port: addr.port().into(), + } + } +} diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index fa04499d862..725745ee712 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -517,6 +517,19 @@ impl Nexus { Ok(()) } + /// Upserts a Service into the database, updating it if it already exists. + pub async fn upsert_service( + &self, + id: Uuid, + sled_id: Uuid, + info: ServicePutRequest, + ) -> Result<(), Error> { + info!(self.log, "upserting service"; "sled_id" => sled_id.to_string(), "service_id" => id.to_string()); + let service = db::model::Service::new(id, sled_id, &info); + self.db_datastore.service_upsert(service).await?; + Ok(()) + } + /// Insert a new record of an Oximeter collector server. pub async fn upsert_oximeter_collector( &self, From 17ff5b84a318a9ff8e95dfb1b403aae4a3099581 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 9 May 2022 10:37:28 -0400 Subject: [PATCH 52/61] Add endpoint for 'service_put' --- common/src/sql/dbinit.sql | 5 +++ nexus/src/app/sled.rs | 7 ++-- nexus/src/db/model/mod.rs | 2 + nexus/src/db/model/service.rs | 6 ++- nexus/src/db/model/service_kind.rs | 35 ++++++++++++++++++ nexus/src/db/schema.rs | 3 ++ nexus/src/internal_api/http_entrypoints.rs | 36 +++++++++++++++++- nexus/src/internal_api/params.rs | 43 ++++++++++++++++++++++ 8 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 nexus/src/db/model/service_kind.rs diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 5320aa68089..bda2488adf6 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -95,6 +95,11 @@ CREATE INDEX ON omicron.public.service ( sled_id ); +CREATE TYPE omicron.public.service_kind AS ENUM ( + 'nexus', + 'oximeter' +); + /* * ZPools of Storage, attached to Sleds. * Typically these are backed by a single physical disk. diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index ab149993d03..98f01920a60 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -9,6 +9,7 @@ use crate::db; use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::DatasetKind; +use crate::db::model::ServiceKind; use crate::internal_api::params::ZpoolPutRequest; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; @@ -150,12 +151,12 @@ impl super::Nexus { &self, id: Uuid, sled_id: Uuid, - info: ServicePutRequest, + address: SocketAddrV6, + kind: ServiceKind, ) -> Result<(), Error> { info!(self.log, "upserting service"; "sled_id" => sled_id.to_string(), "service_id" => id.to_string()); - let service = db::model::Service::new(id, sled_id, &info); + let service = db::model::Service::new(id, sled_id, address, kind); self.db_datastore.service_upsert(service).await?; Ok(()) } - } diff --git a/nexus/src/db/model/mod.rs b/nexus/src/db/model/mod.rs index 83f03d85567..d5f61611197 100644 --- a/nexus/src/db/model/mod.rs +++ b/nexus/src/db/model/mod.rs @@ -33,6 +33,7 @@ mod region; mod role_assignment; mod role_builtin; mod service; +mod service_kind; mod silo; mod silo_user; mod sled; @@ -80,6 +81,7 @@ pub use region::*; pub use role_assignment::*; pub use role_builtin::*; pub use service::*; +pub use service_kind::*; pub use silo::*; pub use silo_user::*; pub use sled::*; diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index 28e22925ab7..b0b1f35e825 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::SqlU16; +use super::{ServiceKind, SqlU16}; use crate::db::schema::service; use crate::db::ipv6; use db_macros::Asset; @@ -21,6 +21,8 @@ pub struct Service { // ServiceAddress (Sled Agent). pub ip: ipv6::Ipv6Addr, pub port: SqlU16, + + kind: ServiceKind, } impl Service { @@ -28,12 +30,14 @@ impl Service { id: Uuid, sled_id: Uuid, addr: SocketAddrV6, + kind: ServiceKind, ) -> Self { Self { identity: ServiceIdentity::new(id), sled_id, ip: addr.ip().into(), port: addr.port().into(), + kind, } } } diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs new file mode 100644 index 00000000000..257217ef365 --- /dev/null +++ b/nexus/src/db/model/service_kind.rs @@ -0,0 +1,35 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use crate::internal_api; +use serde::{Deserialize, Serialize}; +use std::io::Write; + +impl_enum_type!( + #[derive(SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "service_kind"))] + pub struct ServiceKindEnum; + + #[derive(Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[diesel(sql_type = ServiceKindEnum)] + pub enum ServiceKind; + + // Enum values + Nexus => b"nexus" + Oximeter => b"oximeter" +); + +impl From for ServiceKind { + fn from(k: internal_api::params::ServiceKind) -> Self { + match k { + internal_api::params::ServiceKind::Nexus => { + ServiceKind::Nexus + } + internal_api::params::ServiceKind::Oximeter => { + ServiceKind::Oximeter + } + } + } +} diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index e3a5f30fc70..1490d4a364d 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -272,6 +272,8 @@ table! { ip -> Inet, port -> Int4, + + kind -> crate::db::model::ServiceKindEnum, } } @@ -480,6 +482,7 @@ allow_tables_to_appear_in_same_query!( saga, saga_node_event, console_session, + service, sled, router_route, vpc, diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 4df80916d77..7dd9146df3f 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -7,8 +7,8 @@ use crate::context::OpContext; use crate::ServerContext; use super::params::{ - DatasetPutRequest, DatasetPutResponse, OximeterInfo, SledAgentStartupInfo, - ZpoolPutRequest, ZpoolPutResponse, + DatasetPutRequest, DatasetPutResponse, OximeterInfo, ServicePutRequest, + SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -37,6 +37,7 @@ type NexusApiDescription = ApiDescription>; pub fn internal_api() -> NexusApiDescription { fn register_endpoints(api: &mut NexusApiDescription) -> Result<(), String> { api.register(cpapi_sled_agents_post)?; + api.register(service_put)?; api.register(zpool_put)?; api.register(dataset_put)?; api.register(cpapi_instances_put)?; @@ -87,6 +88,37 @@ async fn cpapi_sled_agents_post( apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } +#[derive(Deserialize, JsonSchema)] +struct ServicePathParam { + sled_id: Uuid, + service_id: Uuid, +} + +/// Report that a service should be running on a sled. +#[endpoint { + method = PUT, + path = "/sled_agents/{sled_id}/services/{service_id}", + }] +async fn service_put( + rqctx: Arc>>, + path_params: Path, + info: TypedBody, +) -> Result { + let apictx = rqctx.context(); + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let info = info.into_inner(); + nexus + .upsert_service( + path.service_id, + path.sled_id, + info.address, + info.kind.into(), + ) + .await?; + Ok(HttpResponseUpdatedNoContent()) +} + /// Path parameters for Sled Agent requests (internal API) #[derive(Deserialize, JsonSchema)] struct ZpoolPathParam { diff --git a/nexus/src/internal_api/params.rs b/nexus/src/internal_api/params.rs index 185a55df077..e637e4e8014 100644 --- a/nexus/src/internal_api/params.rs +++ b/nexus/src/internal_api/params.rs @@ -97,6 +97,49 @@ pub struct DatasetPutResponse { pub quota: Option, } +/// Describes the purpose of the service. +#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq)] +pub enum ServiceKind { + Nexus, + Oximeter, +} + +impl fmt::Display for ServiceKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ServiceKind::*; + let s = match self { + Nexus => "nexus", + Oximeter => "oximeter", + }; + write!(f, "{}", s) + } +} + +impl FromStr for ServiceKind { + type Err = omicron_common::api::external::Error; + + fn from_str(s: &str) -> Result { + use ServiceKind::*; + match s { + "nexus" => Ok(Nexus), + "oximeter" => Ok(Oximeter), + _ => Err(Self::Err::InternalError { + internal_message: format!("Unknown service kind: {}", s), + }), + } + } +} + +/// Describes a service on a sled +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ServicePutRequest { + /// Address on which a service is responding to requests. + pub address: SocketAddrV6, + + /// Type of service being inserted. + pub kind: ServiceKind, +} + /// Message used to notify Nexus that this oximeter instance is up and running. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize)] pub struct OximeterInfo { From cef66af12b7017af57a2bfdc77fc037415d44213 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 9 May 2022 15:14:25 -0400 Subject: [PATCH 53/61] Working on refactoring rt config --- common/src/lib.rs | 3 ++- common/src/nexus_config.rs | 27 ++++++++++++++++++++ common/src/{config.rs => postgres_config.rs} | 0 nexus/src/app/mod.rs | 4 +-- nexus/src/config.rs | 15 ++++------- nexus/src/context.rs | 4 +-- nexus/src/db/config.rs | 2 +- nexus/src/lib.rs | 10 +++++--- sled-agent/src/rack_setup/service.rs | 4 ++- test-utils/src/dev/db.rs | 2 +- 10 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 common/src/nexus_config.rs rename common/src/{config.rs => postgres_config.rs} (100%) diff --git a/common/src/lib.rs b/common/src/lib.rs index 2a933283425..d90ecdb7333 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -24,7 +24,8 @@ pub mod address; pub mod api; pub mod backoff; pub mod cmd; -pub mod config; +pub mod nexus_config; +pub mod postgres_config; #[macro_export] macro_rules! generate_logging_api { diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs new file mode 100644 index 00000000000..ade3bd33dda --- /dev/null +++ b/common/src/nexus_config.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Configuration parameters to Nexus that are usually only known +//! at runtime. + +use super::postgres_config::PostgresConfigWithUrl; +use dropshot::ConfigDropshot; +use serde::{Deserialize, Serialize}; +use serde_with::{serde_as, DisplayFromStr}; +use uuid::Uuid; + +#[serde_as] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct RuntimeConfig { + /// Uuid of the Nexus instance + pub id: Uuid, + /// Dropshot configuration for external API server + pub dropshot_external: ConfigDropshot, + /// Dropshot configuration for internal API server + pub dropshot_internal: ConfigDropshot, + /// Database parameters + #[serde_as(as = "DisplayFromStr")] + pub database_url: PostgresConfigWithUrl, +} + diff --git a/common/src/config.rs b/common/src/postgres_config.rs similarity index 100% rename from common/src/config.rs rename to common/src/postgres_config.rs diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 71eb3dfdca8..2acbf7aec38 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -109,7 +109,7 @@ impl Nexus { authz: Arc, ) -> Arc { let pool = Arc::new(pool); - let my_sec_id = db::SecId::from(config.id); + let my_sec_id = db::SecId::from(config.runtime.id); let db_datastore = Arc::new(db::DataStore::new(Arc::clone(&pool))); let sec_store = Arc::new(db::CockroachDbSecStore::new( my_sec_id, @@ -140,7 +140,7 @@ impl Nexus { populate_start(populate_ctx, Arc::clone(&db_datastore)); let nexus = Nexus { - id: config.id, + id: config.runtime.id, rack_id, log: log.new(o!()), api_rack_identity: db::model::RackIdentity::new(rack_id), diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 64501288243..d3e7de91c9d 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -5,10 +5,9 @@ //! Interfaces for parsing configuration files and working with a nexus server //! configuration -use crate::db; use anyhow::anyhow; -use dropshot::ConfigDropshot; use dropshot::ConfigLogging; +use omicron_common::nexus_config::RuntimeConfig; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; @@ -54,21 +53,17 @@ pub struct TimeseriesDbConfig { /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct Config { - /// Dropshot configuration for external API server - pub dropshot_external: ConfigDropshot, - /// Dropshot configuration for internal API server - pub dropshot_internal: ConfigDropshot, - /// Identifier for this instance of Nexus - pub id: uuid::Uuid, + /// A variety of configuration parameters only known at runtime. + #[serde(flatten)] + pub runtime: RuntimeConfig, /// Console-related tunables pub console: ConsoleConfig, /// Server-wide logging configuration. pub log: ConfigLogging, - /// Database parameters - pub database: db::Config, /// Authentication-related configuration pub authn: AuthnConfig, /// Timeseries database configuration. + // TODO: Should this be removed? Nexus needs to initialize it. pub timeseries_db: TimeseriesDbConfig, /// Updates-related configuration. Updates APIs return 400 Bad Request when this is /// unconfigured. diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 235852717d4..9f98b358167 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -90,7 +90,7 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = HttpService { name: name.to_string(), id: config.id }; + let target = HttpService { name: name.to_string(), id: config.runtime.id }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -102,7 +102,7 @@ impl ServerContext { }; let internal_latencies = create_tracker("nexus-internal"); let external_latencies = create_tracker("nexus-external"); - let producer_registry = ProducerRegistry::with_id(config.id); + let producer_registry = ProducerRegistry::with_id(config.runtime.id); producer_registry .register_producer(internal_latencies.clone()) .unwrap(); diff --git a/nexus/src/db/config.rs b/nexus/src/db/config.rs index b4066ce3cbe..afe51bca66d 100644 --- a/nexus/src/db/config.rs +++ b/nexus/src/db/config.rs @@ -4,7 +4,7 @@ //! Nexus database configuration -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use serde::Deserialize; use serde::Serialize; use serde_with::serde_as; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index e56503c3c09..fca111b3147 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -85,15 +85,17 @@ impl Server { rack_id: Uuid, log: &Logger, ) -> Result { - let log = log.new(o!("name" => config.id.to_string())); + let log = log.new(o!("name" => config.runtime.id.to_string())); info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); - let pool = db::Pool::new(&config.database); + let pool = db::Pool::new(&db::Config { + url: config.runtime.database_url.clone(), + }); let apictx = ServerContext::new(rack_id, ctxlog, pool, &config)?; let http_server_starter_external = dropshot::HttpServerStarter::new( - &config.dropshot_external, + &config.runtime.dropshot_external, external_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_external")), @@ -101,7 +103,7 @@ impl Server { .map_err(|error| format!("initializing external server: {}", error))?; let http_server_starter_internal = dropshot::HttpServerStarter::new( - &config.dropshot_internal, + &config.runtime.dropshot_internal, internal_api(), Arc::clone(&apictx), &log.new(o!("component" => "dropshot_internal")), diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 23741adfe2f..d2cf07241c3 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -286,6 +286,8 @@ impl ServiceInner { } // CRDB datasets are treated as services. + // + // XXX: Hardcoding CRDB? let crdb_datasets = datasets.iter().filter(|dataset| { matches!(dataset.dataset_kind, crate::params::DatasetKind::CockroachDb { .. }) }); @@ -309,7 +311,7 @@ impl ServiceInner { // TODO: add dns records for non-crdb datasets too - // TODO: alternatively, REMOVE THEM + // TODO: alternatively, REMOVE THEM! Make RSS set up crdb exclusively. Ok(()) } diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index 5449bfc4139..b7112ae1a37 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -8,7 +8,7 @@ use crate::dev::poll; use anyhow::anyhow; use anyhow::bail; use anyhow::Context; -use omicron_common::config::PostgresConfigWithUrl; +use omicron_common::postgres_config::PostgresConfigWithUrl; use std::ffi::{OsStr, OsString}; use std::fmt; use std::ops::Deref; From 186aa0392ea1e734dac632f178daebedd82dd99e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 10 May 2022 21:13:54 -0400 Subject: [PATCH 54/61] Separate rt/pkg config, start to use DNS in nexus --- Cargo.lock | 2 + common/Cargo.toml | 1 + common/src/address.rs | 3 + common/src/nexus_config.rs | 84 +++++++++++++++-- nexus/Cargo.toml | 3 +- nexus/src/app/mod.rs | 4 +- nexus/src/bin/nexus.rs | 20 ++++- nexus/src/config.rs | 83 ++++------------- nexus/src/context.rs | 46 ++++++++-- nexus/src/lib.rs | 9 +- sled-agent/src/params.rs | 22 +++++ sled-agent/src/rack_setup/config.rs | 15 ---- sled-agent/src/rack_setup/service.rs | 92 ++++++++++--------- sled-agent/src/services.rs | 130 ++++++++++++++++++++------- sled-agent/src/sled_agent.rs | 9 +- smf/nexus/manifest.xml | 6 +- smf/sled-agent/config-rss.toml | 8 +- 17 files changed, 350 insertions(+), 187 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1abc6582d6..1597f4c79e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2696,6 +2696,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", + "toml", "uuid", ] @@ -2773,6 +2774,7 @@ dependencies = [ "http", "httptest", "hyper", + "internal-dns-client", "ipnetwork", "lazy_static", "libc", diff --git a/common/Cargo.toml b/common/Cargo.toml index 60ae13ba071..8492e84cefc 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -30,6 +30,7 @@ structopt = "0.3" thiserror = "1.0" tokio = { version = "1.18", features = [ "full" ] } tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-0_8" ] } +toml = "0.5.9" uuid = { version = "0.8", features = [ "serde", "v4" ] } parse-display = "0.5.4" progenitor = { git = "https://github.com/oxidecomputer/progenitor" } diff --git a/common/src/address.rs b/common/src/address.rs index b4e9fbdd7b8..462ff907efc 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -36,6 +36,9 @@ pub const SLED_AGENT_PORT: u16 = 12345; pub const COCKROACH_PORT: u16 = 32221; pub const CRUCIBLE_PORT: u16 = 32345; +pub const NEXUS_EXTERNAL_PORT: u16 = 12220; +pub const NEXUS_INTERNAL_PORT: u16 = 12221; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index ade3bd33dda..def28803279 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -5,13 +5,63 @@ //! Configuration parameters to Nexus that are usually only known //! at runtime. -use super::postgres_config::PostgresConfigWithUrl; +use super::address::{Ipv6Subnet, RACK_PREFIX}; use dropshot::ConfigDropshot; use serde::{Deserialize, Serialize}; -use serde_with::{serde_as, DisplayFromStr}; +use std::fmt; +use std::net::SocketAddr; +use std::path::{Path, PathBuf}; use uuid::Uuid; -#[serde_as] +#[derive(Debug)] +pub struct LoadError { + path: PathBuf, + kind: LoadErrorKind, +} + +#[derive(Debug)] +pub enum LoadErrorKind { + Io(std::io::Error), + Parse(toml::de::Error), +} + +impl From<(PathBuf, std::io::Error)> for LoadError { + fn from((path, err): (PathBuf, std::io::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Io(err) } + } +} + +impl From<(PathBuf, toml::de::Error)> for LoadError { + fn from((path, err): (PathBuf, toml::de::Error)) -> Self { + LoadError { path, kind: LoadErrorKind::Parse(err) } + } +} + +impl std::error::Error for LoadError {} + +impl fmt::Display for LoadError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.kind { + LoadErrorKind::Io(e) => { + write!(f, "read \"{}\": {}", self.path.display(), e) + } + LoadErrorKind::Parse(e) => { + write!(f, "parse \"{}\": {}", self.path.display(), e) + } + } + } +} + +impl std::cmp::PartialEq for LoadError { + fn eq(&self, other: &std::io::Error) -> bool { + if let LoadErrorKind::Io(e) = &self.kind { + e.kind() == other.kind() + } else { + false + } + } +} + #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct RuntimeConfig { /// Uuid of the Nexus instance @@ -20,8 +70,30 @@ pub struct RuntimeConfig { pub dropshot_external: ConfigDropshot, /// Dropshot configuration for internal API server pub dropshot_internal: ConfigDropshot, - /// Database parameters - #[serde_as(as = "DisplayFromStr")] - pub database_url: PostgresConfigWithUrl, + /// Portion of the IP space to be managed by the Rack. + pub subnet: Ipv6Subnet, + + /// An optional database address. + /// + /// If `None`, Nexus will use DNS to infer this value. + pub database_address: Option, + +// /// Database parameters +// #[serde_as(as = "DisplayFromStr")] +// pub database_url: PostgresConfigWithUrl, } +impl RuntimeConfig { + /// Load a `RuntimeConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) + } +} diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index e03bfd2b2ac..c5494e2e8b5 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -16,6 +16,7 @@ base64 = "0.13.0" bb8 = "0.8.0" cookie = "0.16" crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "257032d1e842901d427f344a396d78b9b85b183f" } +db-macros = { path = "src/db/db-macros" } diesel = { version = "2.0.0-rc.0", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } fatfs = "0.3.5" futures = "0.3.21" @@ -23,7 +24,7 @@ headers = "0.3.7" hex = "0.4.3" http = "0.2.7" hyper = "0.14" -db-macros = { path = "src/db/db-macros" } +internal-dns-client = { path = "../internal-dns-client" } ipnetwork = "0.18" lazy_static = "1.4.0" libc = "0.2.123" diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 2acbf7aec38..01be311d348 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -124,7 +124,7 @@ impl Nexus { sec_store, )); let timeseries_client = - oximeter_db::Client::new(config.timeseries_db.address, &log); + oximeter_db::Client::new(config.pkg.timeseries_db.address, &log); // TODO-cleanup We may want a first-class subsystem for managing startup // background tasks. It could use a Future for each one, a status enum @@ -150,7 +150,7 @@ impl Nexus { recovery_task: std::sync::Mutex::new(None), populate_status, timeseries_client, - updates_config: config.updates.clone(), + updates_config: config.pkg.updates.clone(), opctx_alloc: OpContext::for_background( log.new(o!("component" => "InstanceAllocator")), Arc::clone(&authz), diff --git a/nexus/src/bin/nexus.rs b/nexus/src/bin/nexus.rs index 161d88f7b95..e6480f3822f 100644 --- a/nexus/src/bin/nexus.rs +++ b/nexus/src/bin/nexus.rs @@ -15,10 +15,11 @@ use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; +use omicron_common::nexus_config::RuntimeConfig; use omicron_nexus::run_openapi_external; use omicron_nexus::run_openapi_internal; use omicron_nexus::run_server; -use omicron_nexus::Config; +use omicron_nexus::{Config, PackageConfig}; use std::path::PathBuf; use structopt::StructOpt; @@ -39,8 +40,11 @@ struct Args { )] openapi_internal: bool, - #[structopt(name = "CONFIG_FILE_PATH", parse(from_os_str))] - config_file_path: PathBuf, + #[structopt(name = "PACKAGE_CONFIG_FILE_PATH", parse(from_os_str))] + pkg_config_file_path: PathBuf, + + #[structopt(name = "RUNTIME_CONFIG_FILE_PATH", parse(from_os_str))] + rt_config_file_path: PathBuf, } #[tokio::main] @@ -55,9 +59,17 @@ async fn do_run() -> Result<(), CmdError> { CmdError::Usage(format!("parsing arguments: {}", err.message)) })?; - let config = Config::from_file(args.config_file_path) + let rt_config = RuntimeConfig::from_file(args.rt_config_file_path) + .map_err(|e| CmdError::Failure(e.to_string()))?; + + let pkg_config = PackageConfig::from_file(args.pkg_config_file_path) .map_err(|e| CmdError::Failure(e.to_string()))?; + let config = Config { + runtime: rt_config, + pkg: pkg_config, + }; + if args.openapi { run_openapi_external().map_err(CmdError::Failure) } else if args.openapi_internal { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index d3e7de91c9d..f64dbbbfeed 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -7,12 +7,11 @@ use anyhow::anyhow; use dropshot::ConfigLogging; -use omicron_common::nexus_config::RuntimeConfig; +use omicron_common::nexus_config::{LoadError, RuntimeConfig}; use serde::Deserialize; use serde::Serialize; use serde_with::DeserializeFromStr; use serde_with::SerializeDisplay; -use std::fmt; use std::net::SocketAddr; use std::path::{Path, PathBuf}; @@ -52,10 +51,7 @@ pub struct TimeseriesDbConfig { /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -pub struct Config { - /// A variety of configuration parameters only known at runtime. - #[serde(flatten)] - pub runtime: RuntimeConfig, +pub struct PackageConfig { /// Console-related tunables pub console: ConsoleConfig, /// Server-wide logging configuration. @@ -71,52 +67,26 @@ pub struct Config { pub updates: Option, } -#[derive(Debug)] -pub struct LoadError { - path: PathBuf, - kind: LoadErrorKind, -} -#[derive(Debug)] -pub enum LoadErrorKind { - Io(std::io::Error), - Parse(toml::de::Error), -} - -impl From<(PathBuf, std::io::Error)> for LoadError { - fn from((path, err): (PathBuf, std::io::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Io(err) } - } -} - -impl From<(PathBuf, toml::de::Error)> for LoadError { - fn from((path, err): (PathBuf, toml::de::Error)) -> Self { - LoadError { path, kind: LoadErrorKind::Parse(err) } - } -} - -impl std::error::Error for LoadError {} - -impl fmt::Display for LoadError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match &self.kind { - LoadErrorKind::Io(e) => { - write!(f, "read \"{}\": {}", self.path.display(), e) - } - LoadErrorKind::Parse(e) => { - write!(f, "parse \"{}\": {}", self.path.display(), e) - } - } +impl PackageConfig { + /// Load a `PackageConfig` from the given TOML file + /// + /// This config object can then be used to create a new `Nexus`. + /// The format is described in the README. + pub fn from_file>(path: P) -> Result { + let path = path.as_ref(); + let file_contents = std::fs::read_to_string(path) + .map_err(|e| (path.to_path_buf(), e))?; + let config_parsed: Self = toml::from_str(&file_contents) + .map_err(|e| (path.to_path_buf(), e))?; + Ok(config_parsed) } } -impl std::cmp::PartialEq for LoadError { - fn eq(&self, other: &std::io::Error) -> bool { - if let LoadErrorKind::Io(e) = &self.kind { - e.kind() == other.kind() - } else { - false - } - } +#[derive(Clone, Debug, PartialEq)] +pub struct Config { + /// A variety of configuration parameters only known at runtime. + pub runtime: RuntimeConfig, + pub pkg: PackageConfig, } /// List of supported external authn schemes @@ -153,21 +123,6 @@ impl std::fmt::Display for SchemeName { } } -impl Config { - /// Load a `Config` from the given TOML file - /// - /// This config object can then be used to create a new `Nexus`. - /// The format is described in the README. - pub fn from_file>(path: P) -> Result { - let path = path.as_ref(); - let file_contents = std::fs::read_to_string(path) - .map_err(|e| (path.to_path_buf(), e))?; - let config_parsed: Config = toml::from_str(&file_contents) - .map_err(|e| (path.to_path_buf(), e))?; - Ok(config_parsed) - } -} - #[cfg(test)] mod test { use super::{ diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 9f98b358167..0c47762da02 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -19,13 +19,17 @@ use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use omicron_common::api::external::Error; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; +use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; use slog::Logger; use std::collections::BTreeMap; use std::env; use std::fmt::Debug; +use std::net::SocketAddr; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use std::time::Instant; use std::time::SystemTime; @@ -67,13 +71,13 @@ pub struct ConsoleConfig { impl ServerContext { /// Create a new context with the given rack id and log. This creates the /// underlying nexus as well. - pub fn new( + pub async fn new( rack_id: Uuid, log: Logger, - pool: db::Pool, config: &config::Config, ) -> Result, String> { let nexus_schemes = config + .pkg .authn .schemes_external .iter() @@ -113,11 +117,11 @@ impl ServerContext { // Support both absolute and relative paths. If configured dir is // absolute, use it directly. If not, assume it's relative to the // current working directory. - let static_dir = if config.console.static_dir.is_absolute() { - Some(config.console.static_dir.to_owned()) + let static_dir = if config.pkg.console.static_dir.is_absolute() { + Some(config.pkg.console.static_dir.to_owned()) } else { env::current_dir() - .map(|root| root.join(&config.console.static_dir)) + .map(|root| root.join(&config.pkg.console.static_dir)) .ok() }; @@ -132,6 +136,32 @@ impl ServerContext { // like console index.html. leaving that out for now so we don't break // nexus in dev for everyone + // Set up DNS Client + let az_subnet = Ipv6Subnet::::new(config.runtime.subnet.net().ip()); + let resolver = internal_dns_client::multiclient::create_resolver(az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e.to_string()))?; + + // Set up DB pool + let address = if let Some(address) = config.runtime.database_address { + address + } else { + let response = resolver.lookup_ip("cockroachdb.") + .await + .map_err(|e| format!("Failed to lookup IP: {}", e.to_string()))?; + let address = response + .iter() + .next() + .ok_or_else(|| "no addresses returned from DNS resolver".to_string())?; + SocketAddr::new(address, COCKROACH_PORT) + }; + let url = PostgresConfigWithUrl::from_str( + &format!("postgresql://root@{}/omicron?sslmode=disable", address) + ) + .map_err(|e| format!("Cannot parse Postgres URL: {}", e.to_string()))?; + let pool = db::Pool::new(&db::Config { + url + }); + Ok(Arc::new(ServerContext { nexus: Nexus::new_with_id( rack_id, @@ -149,14 +179,14 @@ impl ServerContext { producer_registry, console_config: ConsoleConfig { session_idle_timeout: Duration::minutes( - config.console.session_idle_timeout_minutes.into(), + config.pkg.console.session_idle_timeout_minutes.into(), ), session_absolute_timeout: Duration::minutes( - config.console.session_absolute_timeout_minutes.into(), + config.pkg.console.session_absolute_timeout_minutes.into(), ), static_dir, cache_control_max_age: Duration::minutes( - config.console.cache_control_max_age_minutes.into(), + config.pkg.console.cache_control_max_age_minutes.into(), ), }, })) diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index fca111b3147..d2db3240100 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -29,7 +29,7 @@ pub mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; -pub use config::Config; +pub use config::{Config, PackageConfig}; pub use context::ServerContext; pub use crucible_agent_client; use external_api::http_entrypoints::external_api; @@ -89,10 +89,8 @@ impl Server { info!(log, "setting up nexus server"); let ctxlog = log.new(o!("component" => "ServerContext")); - let pool = db::Pool::new(&db::Config { - url: config.runtime.database_url.clone(), - }); - let apictx = ServerContext::new(rack_id, ctxlog, pool, &config)?; + + let apictx = ServerContext::new(rack_id, ctxlog, &config).await?; let http_server_starter_external = dropshot::HttpServerStarter::new( &config.runtime.dropshot_external, @@ -157,6 +155,7 @@ pub async fn run_server(config: &Config) -> Result<(), String> { use slog::Drain; let (drain, registration) = slog_dtrace::with_drain( config + .pkg .log .to_logger("nexus") .map_err(|message| format!("initializing logger: {}", message))?, diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index c716ef819d4..232b1ffee7f 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -240,6 +240,26 @@ impl From for sled_agent_client::types::DatasetEnsureBody { } } +/// Describes service-specific parameters. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +// Struct variant enums require some assistance for serialization to TOML. +#[serde(tag = "type")] +pub enum ServiceType { + Nexus { + internal_address: SocketAddrV6, + external_address: SocketAddrV6, + }, + InternalDns { + server_address: SocketAddrV6, + dns_address: SocketAddrV6, + }, +} + +/// Describes a request to create a service. This information +/// should be sufficient for a Sled Agent to start a zone +/// containing the requested service. #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] @@ -259,6 +279,8 @@ pub struct ServiceRequest { // is necessary to allow inter-zone traffic routing. #[serde(default)] pub gz_addresses: Vec, + // Any other service-specific parameters. + pub service_type: ServiceType, } impl From for sled_agent_client::types::ServiceRequest { diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index db242fd73db..dfb24de1de0 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -40,27 +40,12 @@ pub struct CockroachDataset { pub zpool_uuid: Uuid, } -/// Hard-coded configurations for where to place services -/// -/// Converts into a [`crate::params::ServiceRequest`]. -// TODO: Should this exist? It should just be Nexus + DNS servers. -// We could hard-code their bringup in the RSS, since we know where -// it's coming from. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -pub struct Service { - pub name: String, -} - /// A request to initialize a sled. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] pub struct HardcodedSledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] pub datasets: Vec, - - /// Services to be instantiated. - #[serde(default, rename = "service")] - pub services: Vec, } impl SetupServiceConfig { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index d2cf07241c3..1f56634339f 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -9,8 +9,8 @@ use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, }; -use crate::params::{DatasetEnsureBody, ServiceRequest}; -use omicron_common::address::{get_sled_address, ReservedRackSubnet, RSS_RESERVED_ADDRESSES}; +use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; +use omicron_common::address::{get_sled_address, ReservedRackSubnet, RSS_RESERVED_ADDRESSES, NEXUS_INTERNAL_PORT, NEXUS_EXTERNAL_PORT, DNS_PORT, DNS_SERVER_PORT}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -23,6 +23,9 @@ use thiserror::Error; use tokio::sync::{Mutex, OnceCell}; use uuid::Uuid; +// The number of Nexus instances to create from RSS. +const NEXUS_COUNT: usize = 1; + /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { @@ -149,6 +152,7 @@ struct AddressBumpAllocator { } // TODO: Testable? +// TODO: Could exist in another file? impl AddressBumpAllocator { fn new(sled_addr: Ipv6Addr) -> Self { Self { @@ -404,24 +408,40 @@ impl ServiceInner { let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - - let sled_subnet_index = - u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = config.sled_subnet(sled_subnet_index); - let mut addr_alloc = AddressBumpAllocator::new(*get_sled_address(subnet).ip()); - - let datasets = config.requests[idx].datasets.iter().map(|dataset| { - let address = SocketAddrV6::new( - addr_alloc.next().expect("Not enough addrs"), - omicron_common::address::COCKROACH_PORT, - 0, - 0, - ); + let sled_subnet_index = + u8::try_from(idx + 1).expect("Too many peers!"); + let subnet = config.sled_subnet(sled_subnet_index); + let mut addr_alloc = AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + + let mut request = SledRequest::default(); + + // The first enumerated sleds get assigned the responsibility + // of hosting Nexus. + if idx < NEXUS_COUNT { + let address = addr_alloc.next().expect("Not enough addrs"); + request.services.push(ServiceRequest { + id: Uuid::new_v4(), + name: "nexus".to_string(), + addresses: vec![address], + gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new(address, NEXUS_INTERNAL_PORT, 0, 0), + external_address: SocketAddrV6::new(address, NEXUS_EXTERNAL_PORT, 0, 0), + }, + }) + } + + // The first enumerated sleds host the CRDB datasets, using + // zpools described from the underlying config file. + if idx < config.requests.len() { + for dataset in &config.requests[idx].datasets { + let address = SocketAddrV6::new( + addr_alloc.next().expect("Not enough addrs"), + omicron_common::address::COCKROACH_PORT, + 0, + 0, + ); + request.datasets.push( DatasetEnsureBody { id: Uuid::new_v4(), zpool_uuid: dataset.zpool_uuid, @@ -430,38 +450,24 @@ impl ServiceInner { }, address, } - }).collect::>(); - - let services = config.requests[idx].services.iter().map(|svc| { - let address = addr_alloc.next().expect("Not enough addrs"); - ServiceRequest { - id: Uuid::new_v4(), - name: svc.name.clone(), - addresses: vec![ address ], - gz_addresses: vec![], - - } - }).collect::>(); - - SledRequest { - datasets, - services, - ..Default::default() - } - } else { - SledRequest::default() + ); } - }; + } // The first enumerated sleds get assigned the additional // responsibility of being internal DNS servers. if idx < dns_subnets.len() { let dns_subnet = &dns_subnets[idx]; + let dns_addr = dns_subnet.dns_address().ip(); request.dns_services.push(ServiceRequest { id: Uuid::new_v4(), name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address().ip()], + addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], + service_type: ServiceType::InternalDns { + server_address: SocketAddrV6::new(dns_addr, DNS_SERVER_PORT, 0, 0), + dns_address: SocketAddrV6::new(dns_addr, DNS_PORT, 0, 0), + }, }); } @@ -498,7 +504,7 @@ impl ServiceInner { // Once we've constructed a plan, write it down to durable storage. let serialized_plan = toml::Value::try_from(&plan) - .expect("Cannot serialize configuration"); + .expect(&format!("Cannot serialize configuration: {:#?}", plan)); let plan_str = toml::to_string(&serialized_plan) .expect("Cannot turn config to string"); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 347ae40dee7..b750afafaae 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -8,12 +8,15 @@ use crate::illumos::dladm::PhysicalLink; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, Zones}; -use crate::params::{ServiceEnsureBody, ServiceRequest}; -use omicron_common::address::{DNS_PORT, DNS_SERVER_PORT}; +use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; +use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; +use dropshot::ConfigDropshot; +use omicron_common::address::{Ipv6Subnet, SLED_PREFIX, RACK_PREFIX}; +use omicron_common::nexus_config::RuntimeConfig as NexusRuntimeConfig; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; -use std::net::IpAddr; +use std::net::{IpAddr, SocketAddr}; use std::path::{Path, PathBuf}; use tokio::sync::Mutex; @@ -78,6 +81,7 @@ pub struct ServiceManager { zones: Mutex>, vnic_allocator: VnicAllocator, physical_link: PhysicalLink, + sled_subnet: Ipv6Subnet, } impl ServiceManager { @@ -93,6 +97,7 @@ impl ServiceManager { pub async fn new( log: Logger, physical_link: PhysicalLink, + sled_subnet: Ipv6Subnet, config_path: Option, ) -> Result { debug!(log, "Creating new ServiceManager"); @@ -105,6 +110,7 @@ impl ServiceManager { physical_link.clone(), ), physical_link, + sled_subnet, }; let config_path = mgr.services_config_path(); @@ -244,16 +250,60 @@ impl ServiceManager { let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); - match service.name.as_str() { - "internal-dns" => { - info!(self.log, "Setting up internal-dns service"); - let address = - service.addresses.get(0).ok_or_else(|| { - Error::BadServiceRequest { - service: service.name.clone(), - message: "Not enough addresses".to_string(), - } + // TODO: match by type + // TODO: add nexus? + match service.service_type { + ServiceType::Nexus { + internal_address, + external_address, + } => { + info!(self.log, "Setting up Nexus service"); + + // Nexus takes a separate config file for parameters which + // cannot be known at packaging time. + let runtime_config = NexusRuntimeConfig { + id: service.id, + dropshot_external: ConfigDropshot { + bind_address: SocketAddr::V6(external_address), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: SocketAddr::V6(internal_address), + ..Default::default() + }, + subnet: Ipv6Subnet::::new(self.sled_subnet.net().ip()), + database_address: None, + }; + + // Construct a temporary file within the zone. + let output = running_zone + .run_cmd(&[ + "/usr/bin/mktemp", + "nexus-config.toml.XXXXXX", + ]) + .map_err(|err| Error::ZoneCommand { + intent: "creating temporary file for nexus configuration".to_string(), + err, })?; + let runtime_config_path = PathBuf::from(output); + + // Serialize the configuration and write it into the file. + let serialized_cfg = toml::Value::try_from(&runtime_config) + .expect("Cannot serialize config"); + let config_str = + toml::to_string(&serialized_cfg).map_err(|err| { + Error::TomlSerialize { path: runtime_config_path.clone(), err } + })?; + // Inject the path directly into the non-global Zone, based + // on the result of the mktempcommand. + let path_from_gz = PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) + .join(PathBuf::from(running_zone.name())) + .join("root") + .join(runtime_config_path.strip_prefix("/").unwrap()); + tokio::fs::write(&path_from_gz, config_str) + .await + .map_err(|err| Error::Io { path: path_from_gz.clone(), err })?; + running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, @@ -261,15 +311,21 @@ impl ServiceManager { &smf_name, "setprop", &format!( - "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT + "config/runtime_config={}", + runtime_config_path.to_string_lossy(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: "set server address".to_string(), + intent: "setting runtime config path".to_string(), err, })?; + }, + ServiceType::InternalDns { + server_address, + dns_address, + } => { + info!(self.log, "Setting up internal-dns service"); running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, @@ -277,40 +333,50 @@ impl ServiceManager { &smf_name, "setprop", &format!( - "config/dns_address=[{}]:{}", - address, DNS_PORT + "config/server_address=[{}]:{}", + server_address.ip(), server_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { - intent: "Set DNS address".to_string(), + intent: "set server address".to_string(), err, })?; - // Refresh the manifest with the new properties we set, - // so they become "effective" properties when the service is enabled. running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", - &default_smf_name, - "refresh", + &smf_name, + "setprop", + &format!( + "config/dns_address=[{}]:{}", + dns_address.ip(), dns_address.port(), + ), ]) .map_err(|err| Error::ZoneCommand { - intent: format!( - "Refresh SMF manifest {}", - default_smf_name - ), + intent: "Set DNS address".to_string(), err, })?; } - _ => { - info!( - self.log, - "Service name {} did not match", service.name - ); - } } + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), + err, + })?; + debug!(self.log, "enabling service"); running_zone diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 3211845162d..14a28bf3481 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,6 +16,7 @@ use crate::params::{ }; use crate::services::ServiceManager; use crate::storage_manager::StorageManager; +use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::nexus::UpdateArtifact, @@ -197,8 +198,12 @@ impl SledAgent { data_link.clone(), *sled_address.ip(), ); - let services = - ServiceManager::new(log.clone(), data_link.clone(), None).await?; + let services = ServiceManager::new( + log.clone(), + data_link.clone(), + Ipv6Subnet::::new(*sled_address.ip()), + None, + ).await?; Ok(SledAgent { id: config.id, diff --git a/smf/nexus/manifest.xml b/smf/nexus/manifest.xml index 0b8da2ff62f..a01da82af4a 100644 --- a/smf/nexus/manifest.xml +++ b/smf/nexus/manifest.xml @@ -18,10 +18,14 @@ + + + + diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 61799f168cd..e07ef6f9073 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -38,10 +38,10 @@ dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # address = "[fd00:1122:3344:0101::5]:8123" # dataset_kind.type = "clickhouse" -[[request.service]] -name = "nexus" -addresses = [ "fd00:1122:3344:0101::3" ] -gz_addresses = [] +# [[request.service]] +# name = "nexus" +# addresses = [ "fd00:1122:3344:0101::3" ] +# gz_addresses = [] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. From 0e74c962c77d8425f970ac9224743ab0b01049cc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 May 2022 00:15:46 -0400 Subject: [PATCH 55/61] Trying to fix tests, use just one config file, update bindings --- common/src/nexus_config.rs | 30 +++-- nexus/examples/config.toml | 4 + nexus/src/bin/nexus.rs | 20 +-- nexus/src/config.rs | 119 ++++++++++-------- nexus/src/context.rs | 32 ++--- nexus/test-utils/src/lib.rs | 11 +- nexus/tests/config.test.toml | 4 + nexus/tests/integration_tests/authn_http.rs | 4 +- nexus/tests/integration_tests/commands.rs | 3 +- nexus/tests/integration_tests/console_api.rs | 2 +- nexus/tests/integration_tests/updates.rs | 4 +- openapi/nexus-internal.json | 79 ++++++++++++ sled-agent/src/services.rs | 62 +++++---- .../{config.toml => config-partial.toml} | 0 smf/nexus/manifest.xml | 6 +- 15 files changed, 241 insertions(+), 139 deletions(-) rename smf/nexus/{config.toml => config-partial.toml} (100%) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index def28803279..f46dc65cd04 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -6,17 +6,19 @@ //! at runtime. use super::address::{Ipv6Subnet, RACK_PREFIX}; +use super::postgres_config::PostgresConfigWithUrl; use dropshot::ConfigDropshot; use serde::{Deserialize, Serialize}; +use serde_with::serde_as; +use serde_with::DisplayFromStr; use std::fmt; -use std::net::SocketAddr; use std::path::{Path, PathBuf}; use uuid::Uuid; #[derive(Debug)] pub struct LoadError { - path: PathBuf, - kind: LoadErrorKind, + pub path: PathBuf, + pub kind: LoadErrorKind, } #[derive(Debug)] @@ -62,6 +64,17 @@ impl std::cmp::PartialEq for LoadError { } } +#[serde_as] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum Database { + FromDns, + FromUrl { + #[serde_as(as = "DisplayFromStr")] + url: PostgresConfigWithUrl + }, +} + #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct RuntimeConfig { /// Uuid of the Nexus instance @@ -72,15 +85,8 @@ pub struct RuntimeConfig { pub dropshot_internal: ConfigDropshot, /// Portion of the IP space to be managed by the Rack. pub subnet: Ipv6Subnet, - - /// An optional database address. - /// - /// If `None`, Nexus will use DNS to infer this value. - pub database_address: Option, - -// /// Database parameters -// #[serde_as(as = "DisplayFromStr")] -// pub database_url: PostgresConfigWithUrl, + /// DB configuration. + pub database: Database, } impl RuntimeConfig { diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 8503f789816..f6855590a89 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -23,6 +23,7 @@ schemes_external = ["spoof", "session_cookie"] [database] # URL for connecting to the database +type = "from_url" url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" [dropshot_external] @@ -36,6 +37,9 @@ request_body_max_bytes = 1048576 # IP address and TCP port on which to listen for the internal API bind_address = "127.0.0.1:12221" +[subnet] +net = "fd00:1122:3344:0100::/56" + [log] # Show log messages of this level and more severe level = "info" diff --git a/nexus/src/bin/nexus.rs b/nexus/src/bin/nexus.rs index e6480f3822f..161d88f7b95 100644 --- a/nexus/src/bin/nexus.rs +++ b/nexus/src/bin/nexus.rs @@ -15,11 +15,10 @@ use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use omicron_common::nexus_config::RuntimeConfig; use omicron_nexus::run_openapi_external; use omicron_nexus::run_openapi_internal; use omicron_nexus::run_server; -use omicron_nexus::{Config, PackageConfig}; +use omicron_nexus::Config; use std::path::PathBuf; use structopt::StructOpt; @@ -40,11 +39,8 @@ struct Args { )] openapi_internal: bool, - #[structopt(name = "PACKAGE_CONFIG_FILE_PATH", parse(from_os_str))] - pkg_config_file_path: PathBuf, - - #[structopt(name = "RUNTIME_CONFIG_FILE_PATH", parse(from_os_str))] - rt_config_file_path: PathBuf, + #[structopt(name = "CONFIG_FILE_PATH", parse(from_os_str))] + config_file_path: PathBuf, } #[tokio::main] @@ -59,17 +55,9 @@ async fn do_run() -> Result<(), CmdError> { CmdError::Usage(format!("parsing arguments: {}", err.message)) })?; - let rt_config = RuntimeConfig::from_file(args.rt_config_file_path) - .map_err(|e| CmdError::Failure(e.to_string()))?; - - let pkg_config = PackageConfig::from_file(args.pkg_config_file_path) + let config = Config::from_file(args.config_file_path) .map_err(|e| CmdError::Failure(e.to_string()))?; - let config = Config { - runtime: rt_config, - pkg: pkg_config, - }; - if args.openapi { run_openapi_external().map_err(CmdError::Failure) } else if args.openapi_internal { diff --git a/nexus/src/config.rs b/nexus/src/config.rs index f64dbbbfeed..1682c846b42 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -67,7 +67,16 @@ pub struct PackageConfig { pub updates: Option, } -impl PackageConfig { +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] +pub struct Config { + /// A variety of configuration parameters only known at runtime. + #[serde(flatten)] + pub runtime: RuntimeConfig, + #[serde(flatten)] + pub pkg: PackageConfig, +} + +impl Config { /// Load a `PackageConfig` from the given TOML file /// /// This config object can then be used to create a new `Nexus`. @@ -82,13 +91,6 @@ impl PackageConfig { } } -#[derive(Clone, Debug, PartialEq)] -pub struct Config { - /// A variety of configuration parameters only known at runtime. - pub runtime: RuntimeConfig, - pub pkg: PackageConfig, -} - /// List of supported external authn schemes /// /// Note that the authn subsystem doesn't know about this type. It allows @@ -126,17 +128,18 @@ impl std::fmt::Display for SchemeName { #[cfg(test)] mod test { use super::{ - AuthnConfig, Config, ConsoleConfig, LoadError, LoadErrorKind, - SchemeName, TimeseriesDbConfig, UpdatesConfig, + AuthnConfig, Config, ConsoleConfig, LoadError, + PackageConfig, SchemeName, TimeseriesDbConfig, UpdatesConfig, }; - use crate::db; + use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; + use omicron_common::nexus_config::{LoadErrorKind, Database, RuntimeConfig}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use libc; + use std::net::{SocketAddr, Ipv6Addr}; use std::fs; - use std::net::SocketAddr; use std::path::Path; use std::path::PathBuf; @@ -207,7 +210,7 @@ mod test { let error = read_config("empty", "").expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert_eq!(error.line_col(), None); - assert_eq!(error.to_string(), "missing field `dropshot_external`"); + assert_eq!(error.to_string(), "missing field `id`"); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", @@ -239,8 +242,10 @@ mod test { [dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 + [subnet] + net = "::/56" [database] - url = "postgresql://127.0.0.1?sslmode=disable" + type = "from_dns" [log] mode = "file" level = "debug" @@ -258,43 +263,44 @@ mod test { assert_eq!( config, Config { - id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), - console: ConsoleConfig { - static_dir: "tests/static".parse().unwrap(), - cache_control_max_age_minutes: 10, - session_idle_timeout_minutes: 60, - session_absolute_timeout_minutes: 480 - }, - authn: AuthnConfig { schemes_external: Vec::new() }, - dropshot_external: ConfigDropshot { - bind_address: "10.1.2.3:4567" - .parse::() - .unwrap(), - ..Default::default() - }, - dropshot_internal: ConfigDropshot { - bind_address: "10.1.2.3:4568" - .parse::() - .unwrap(), - ..Default::default() - }, - log: ConfigLogging::File { - level: ConfigLoggingLevel::Debug, - if_exists: ConfigLoggingIfExists::Fail, - path: "/nonexistent/path".to_string() - }, - database: db::Config { - url: "postgresql://127.0.0.1?sslmode=disable" - .parse() - .unwrap() + runtime: RuntimeConfig { + id: "28b90dc4-c22a-65ba-f49a-f051fe01208f".parse().unwrap(), + dropshot_external: ConfigDropshot { + bind_address: "10.1.2.3:4567" + .parse::() + .unwrap(), + ..Default::default() + }, + dropshot_internal: ConfigDropshot { + bind_address: "10.1.2.3:4568" + .parse::() + .unwrap(), + ..Default::default() + }, + subnet: Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), + database: Database::FromDns, }, - timeseries_db: TimeseriesDbConfig { - address: "[::1]:8123".parse().unwrap() + pkg: PackageConfig { + console: ConsoleConfig { + static_dir: "tests/static".parse().unwrap(), + cache_control_max_age_minutes: 10, + session_idle_timeout_minutes: 60, + session_absolute_timeout_minutes: 480 + }, + authn: AuthnConfig { schemes_external: Vec::new() }, + log: ConfigLogging::File { + level: ConfigLoggingLevel::Debug, + if_exists: ConfigLoggingIfExists::Fail, + path: "/nonexistent/path".to_string() + }, + timeseries_db: TimeseriesDbConfig { + address: "[::1]:8123".parse().unwrap() + }, + updates: Some(UpdatesConfig { + trusted_root: PathBuf::from("/path/to/root.json"), + default_base_url: "http://example.invalid/".into(), + }), }, - updates: Some(UpdatesConfig { - trusted_root: PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), - }), } ); @@ -315,8 +321,10 @@ mod test { [dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 + [subnet] + net = "::/56" [database] - url = "postgresql://127.0.0.1?sslmode=disable" + type = "from_dns" [log] mode = "file" level = "debug" @@ -329,7 +337,7 @@ mod test { .unwrap(); assert_eq!( - config.authn.schemes_external, + config.pkg.authn.schemes_external, vec![SchemeName::Spoof, SchemeName::SessionCookie], ); } @@ -353,8 +361,10 @@ mod test { [dropshot_internal] bind_address = "10.1.2.3:4568" request_body_max_bytes = 1024 + [subnet] + net = "::/56" [database] - url = "postgresql://127.0.0.1?sslmode=disable" + type = "from_dns" [log] mode = "file" level = "debug" @@ -367,9 +377,8 @@ mod test { .expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { assert!(error.to_string().starts_with( - "unsupported authn scheme: \"trust-me\" \ - for key `authn.schemes_external`" - )); + "unsupported authn scheme: \"trust-me\"" + ), "error = {}", error.to_string()); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 0c47762da02..895d80ba096 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -20,6 +20,7 @@ use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; use omicron_common::api::external::Error; use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; +use omicron_common::nexus_config; use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; use oximeter_instruments::http::{HttpService, LatencyTracker}; @@ -27,7 +28,6 @@ use slog::Logger; use std::collections::BTreeMap; use std::env; use std::fmt::Debug; -use std::net::SocketAddr; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; @@ -142,22 +142,22 @@ impl ServerContext { .map_err(|e| format!("Failed to create DNS resolver: {}", e.to_string()))?; // Set up DB pool - let address = if let Some(address) = config.runtime.database_address { - address - } else { - let response = resolver.lookup_ip("cockroachdb.") - .await - .map_err(|e| format!("Failed to lookup IP: {}", e.to_string()))?; - let address = response - .iter() - .next() - .ok_or_else(|| "no addresses returned from DNS resolver".to_string())?; - SocketAddr::new(address, COCKROACH_PORT) + let url = match &config.runtime.database { + nexus_config::Database::FromUrl { url } => url.clone(), + nexus_config::Database::FromDns => { + let response = resolver.lookup_ip("cockroachdb.") + .await + .map_err(|e| format!("Failed to lookup IP: {}", e.to_string()))?; + let address = response + .iter() + .next() + .ok_or_else(|| "no addresses returned from DNS resolver".to_string())?; + PostgresConfigWithUrl::from_str( + &format!("postgresql://root@[{}]:{}/omicron?sslmode=disable", address, COCKROACH_PORT) + ) + .map_err(|e| format!("Cannot parse Postgres URL: {}", e.to_string()))? + }, }; - let url = PostgresConfigWithUrl::from_str( - &format!("postgresql://root@{}/omicron?sslmode=disable", address) - ) - .map_err(|e| format!("Cannot parse Postgres URL: {}", e.to_string()))?; let pool = db::Pool::new(&db::Config { url }); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index a53ad85d585..f70a46372b9 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -11,6 +11,7 @@ use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use omicron_common::api::external::IdentityMetadata; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::nexus_config; use omicron_sled_agent::sim; use omicron_test_utils::dev; use oximeter_collector::Oximeter; @@ -75,7 +76,7 @@ pub fn load_test_config() -> omicron_nexus::Config { let config_file_path = Path::new("tests/config.test.toml"); let mut config = omicron_nexus::Config::from_file(config_file_path) .expect("failed to load config.test.toml"); - config.id = Uuid::new_v4(); + config.runtime.id = Uuid::new_v4(); config } @@ -88,7 +89,7 @@ pub async fn test_setup_with_config( test_name: &str, config: &mut omicron_nexus::Config, ) -> ControlPlaneTestContext { - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let rack_id = Uuid::parse_str(RACK_UUID).unwrap(); let log = &logctx.log; @@ -99,8 +100,10 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.database.url = database.pg_config().clone(); - config.timeseries_db.address.set_port(clickhouse.port()); + config.runtime.database = nexus_config::Database::FromUrl { + url: database.pg_config().clone() + }; + config.pkg.timeseries_db.address.set_port(clickhouse.port()); let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) .await diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 5ae440e6620..b9e51037a35 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -22,6 +22,7 @@ schemes_external = [ "spoof", "session_cookie" ] # appropriate for the database that's started by the test runner. # [database] +type = "from_url" url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" # @@ -38,6 +39,9 @@ request_body_max_bytes = 1048576 bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 +[subnet] +net = "fd00:1122:3344:0100::/56" + # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel # string "UNUSED". The actual path will be generated by the test suite for each diff --git a/nexus/tests/integration_tests/authn_http.rs b/nexus/tests/integration_tests/authn_http.rs index 2cbe0f3768e..3db23fb5b59 100644 --- a/nexus/tests/integration_tests/authn_http.rs +++ b/nexus/tests/integration_tests/authn_http.rs @@ -275,7 +275,7 @@ async fn start_whoami_server( sessions: HashMap, ) -> TestContext { let config = nexus_test_utils::load_test_config(); - let logctx = LogContext::new(test_name, &config.log); + let logctx = LogContext::new(test_name, &config.pkg.log); let whoami_api = { let mut whoami_api = ApiDescription::new(); @@ -297,7 +297,7 @@ async fn start_whoami_server( TestContext::new( whoami_api, server_state, - &config.dropshot_external, + &config.runtime.dropshot_external, Some(logctx), log, ) diff --git a/nexus/tests/integration_tests/commands.rs b/nexus/tests/integration_tests/commands.rs index 7d3855d5a6c..9396314e2d8 100644 --- a/nexus/tests/integration_tests/commands.rs +++ b/nexus/tests/integration_tests/commands.rs @@ -76,8 +76,7 @@ fn test_nexus_invalid_config() { assert_eq!( stderr_text, format!( - "nexus: parse \"{}\": missing field \ - `dropshot_external`\n", + "nexus: parse \"{}\": missing field `id`\n", config_path.display() ), ); diff --git a/nexus/tests/integration_tests/console_api.rs b/nexus/tests/integration_tests/console_api.rs index b1d2c5b423c..7de7c3ee73e 100644 --- a/nexus/tests/integration_tests/console_api.rs +++ b/nexus/tests/integration_tests/console_api.rs @@ -195,7 +195,7 @@ async fn test_assets(cptestctx: &ControlPlaneTestContext) { #[tokio::test] async fn test_absolute_static_dir() { let mut config = load_test_config(); - config.console.static_dir = current_dir().unwrap().join("tests/static"); + config.pkg.console.static_dir = current_dir().unwrap().join("tests/static"); let cptestctx = test_setup_with_config("test_absolute_static_dir", &mut config).await; let testctx = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index 1bfa25d0a2c..c09ca0b7fea 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -62,7 +62,7 @@ async fn test_update_end_to_end() { let mut api = ApiDescription::new(); api.register(static_content).unwrap(); let context = FileServerContext { base: tuf_repo.path().to_owned() }; - let logctx = LogContext::new("test_update_end_to_end", &config.log); + let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); let server = HttpServerStarter::new(&dropshot_config, api, context, &logctx.log) .unwrap() @@ -70,7 +70,7 @@ async fn test_update_end_to_end() { let local_addr = server.local_addr(); // stand up the test environment - config.updates = Some(UpdatesConfig { + config.pkg.updates = Some(UpdatesConfig { trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), default_base_url: format!("http://{}/", local_addr), }); diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 4cfadbdfc74..aa98a0fcc8b 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -267,6 +267,55 @@ } } }, + "/sled_agents/{sled_id}/services/{service_id}": { + "put": { + "summary": "Report that a service should be running on a sled.", + "operationId": "service_put", + "parameters": [ + { + "in": "path", + "name": "service_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + }, + "style": "simple" + }, + { + "in": "path", + "name": "sled_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ServicePutRequest" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sled_agents/{sled_id}/zpools/{zpool_id}": { "put": { "summary": "Report that a pool for a specified sled has come online.", @@ -1575,6 +1624,36 @@ "timeseries_name" ] }, + "ServiceKind": { + "description": "Describes the purpose of the service.", + "type": "string", + "enum": [ + "Nexus", + "Oximeter" + ] + }, + "ServicePutRequest": { + "description": "Describes a service on a sled", + "type": "object", + "properties": { + "address": { + "description": "Address on which a service is responding to requests.", + "type": "string" + }, + "kind": { + "description": "Type of service being inserted.", + "allOf": [ + { + "$ref": "#/components/schemas/ServiceKind" + } + ] + } + }, + "required": [ + "address", + "kind" + ] + }, "SledAgentStartupInfo": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b750afafaae..fa48e212b14 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -12,12 +12,13 @@ use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use dropshot::ConfigDropshot; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX, RACK_PREFIX}; -use omicron_common::nexus_config::RuntimeConfig as NexusRuntimeConfig; +use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; use std::net::{IpAddr, SocketAddr}; use std::path::{Path, PathBuf}; +use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; #[derive(thiserror::Error, Debug)] @@ -272,38 +273,36 @@ impl ServiceManager { ..Default::default() }, subnet: Ipv6Subnet::::new(self.sled_subnet.net().ip()), - database_address: None, + database: nexus_config::Database::FromDns, }; - // Construct a temporary file within the zone. - let output = running_zone - .run_cmd(&[ - "/usr/bin/mktemp", - "nexus-config.toml.XXXXXX", - ]) - .map_err(|err| Error::ZoneCommand { - intent: "creating temporary file for nexus configuration".to_string(), - err, - })?; - let runtime_config_path = PathBuf::from(output); + // Copy the partial config file + let partial_config_path = PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) + .join(PathBuf::from(running_zone.name())) + .join("root") + .join("var/svc/manifest/site/nexus/config-partial.toml"); + let mut config_path = partial_config_path.clone(); + config_path.set_file_name("config.toml"); - // Serialize the configuration and write it into the file. + tokio::fs::copy(partial_config_path, &config_path) + .await + .map_err(|err| Error::Io { path: config_path.clone(), err })?; + + // Serialize the configuration and append it into the file. let serialized_cfg = toml::Value::try_from(&runtime_config) .expect("Cannot serialize config"); let config_str = toml::to_string(&serialized_cfg).map_err(|err| { - Error::TomlSerialize { path: runtime_config_path.clone(), err } + Error::TomlSerialize { path: config_path.clone(), err } })?; - // Inject the path directly into the non-global Zone, based - // on the result of the mktempcommand. - let path_from_gz = PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) - .join(PathBuf::from(running_zone.name())) - .join("root") - .join(runtime_config_path.strip_prefix("/").unwrap()); - tokio::fs::write(&path_from_gz, config_str) + let mut file = tokio::fs::OpenOptions::new().append(true).open(&config_path) + .await + .map_err(|err| Error::Io { path: config_path.clone(), err })?; + file.write_all(config_str.as_bytes()) .await - .map_err(|err| Error::Io { path: path_from_gz.clone(), err })?; + .map_err(|err| Error::Io { path: config_path.clone(), err })?; + /* running_zone .run_cmd(&[ crate::illumos::zone::SVCCFG, @@ -319,7 +318,7 @@ impl ServiceManager { intent: "setting runtime config path".to_string(), err, })?; - + */ }, ServiceType::InternalDns { server_address, @@ -472,6 +471,7 @@ mod test { use crate::illumos::{ dladm::MockDladm, dladm::PhysicalLink, svc, zone::MockZones, }; + use std::net::{SocketAddrV6, Ipv6Addr}; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; @@ -534,6 +534,10 @@ mod test { name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + external_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + }, }], }) .await @@ -549,6 +553,10 @@ mod test { name: SVC_NAME.to_string(), addresses: vec![], gz_addresses: vec![], + service_type: ServiceType::Nexus { + internal_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + external_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + }, }], }) .await @@ -584,6 +592,7 @@ mod test { let mgr = ServiceManager::new( log, PhysicalLink(EXPECTED_LINK_NAME.to_string()), + Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), Some(config), ) .await @@ -609,6 +618,7 @@ mod test { let mgr = ServiceManager::new( log, PhysicalLink(EXPECTED_LINK_NAME.to_string()), + Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), Some(config), ) .await @@ -637,6 +647,7 @@ mod test { let mgr = ServiceManager::new( logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), + Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), Some(config.clone()), ) .await @@ -652,6 +663,7 @@ mod test { let mgr = ServiceManager::new( logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), + Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), Some(config.clone()), ) .await @@ -676,6 +688,7 @@ mod test { let mgr = ServiceManager::new( logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), + Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), Some(config.clone()), ) .await @@ -692,6 +705,7 @@ mod test { let mgr = ServiceManager::new( logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), + Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), Some(config.clone()), ) .await diff --git a/smf/nexus/config.toml b/smf/nexus/config-partial.toml similarity index 100% rename from smf/nexus/config.toml rename to smf/nexus/config-partial.toml diff --git a/smf/nexus/manifest.xml b/smf/nexus/manifest.xml index a01da82af4a..0b8da2ff62f 100644 --- a/smf/nexus/manifest.xml +++ b/smf/nexus/manifest.xml @@ -18,14 +18,10 @@ - - - - From 4c7c5525e29b93d7d0616a61620c4a5cf6a87dac Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 May 2022 14:06:12 -0400 Subject: [PATCH 56/61] openapi re-gen, fixing servicemanager tests / config --- openapi/sled-agent.json | 56 +++++++++++- sled-agent/src/params.rs | 23 +++++ sled-agent/src/services.rs | 171 +++++++++++++++++++++-------------- sled-agent/src/sled_agent.rs | 4 +- 4 files changed, 181 insertions(+), 73 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 05d6a80b0aa..a28fd9dda73 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -973,6 +973,7 @@ ] }, "ServiceRequest": { + "description": "Describes a request to create a service. This information should be sufficient for a Sled Agent to start a zone containing the requested service.", "type": "object", "properties": { "addresses": { @@ -995,12 +996,65 @@ }, "name": { "type": "string" + }, + "service_type": { + "$ref": "#/components/schemas/ServiceType" } }, "required": [ "addresses", "id", - "name" + "name", + "service_type" + ] + }, + "ServiceType": { + "description": "Describes service-specific parameters.", + "oneOf": [ + { + "type": "object", + "properties": { + "external_address": { + "type": "string" + }, + "internal_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "Nexus" + ] + } + }, + "required": [ + "external_address", + "internal_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "dns_address": { + "type": "string" + }, + "server_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "InternalDns" + ] + } + }, + "required": [ + "dns_address", + "server_address", + "type" + ] + } ] }, "Slot": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 232b1ffee7f..aa15292074c 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -257,6 +257,28 @@ pub enum ServiceType { }, } +impl From for sled_agent_client::types::ServiceType { + fn from(s: ServiceType) -> Self { + use ServiceType as St; + use sled_agent_client::types::ServiceType as AutoSt; + + match s { + St::Nexus { internal_address, external_address } => { + AutoSt::Nexus { + internal_address: internal_address.to_string(), + external_address: external_address.to_string(), + } + }, + St::InternalDns { server_address, dns_address } => { + AutoSt::InternalDns { + server_address: server_address.to_string(), + dns_address: dns_address.to_string(), + } + }, + } + } +} + /// Describes a request to create a service. This information /// should be sufficient for a Sled Agent to start a zone /// containing the requested service. @@ -290,6 +312,7 @@ impl From for sled_agent_client::types::ServiceRequest { name: s.name, addresses: s.addresses, gz_addresses: s.gz_addresses, + service_type: s.service_type.into(), } } } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index fa48e212b14..38ef794a028 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -21,6 +21,15 @@ use std::path::{Path, PathBuf}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; +// The filename of ServiceManager's internal storage. +const SERVICE_CONFIG_FILENAME: &str = "service.toml"; +// The filename of a half-completed config, in need of parameters supplied at +// runtime. +const PARTIAL_CONFIG_FILENAME: &str = "config-partial.toml"; +// The filename of a completed config, merging the partial config with +// additional appended parameters known at runtime. +const COMPLETE_CONFIG_FILENAME: &str = "config.toml"; + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] @@ -72,13 +81,40 @@ impl From for omicron_common::api::external::Error { /// The default path to service configuration, if one is not /// explicitly provided. pub fn default_services_config_path() -> PathBuf { - Path::new(omicron_common::OMICRON_CONFIG_PATH).join("services.toml") + Path::new(omicron_common::OMICRON_CONFIG_PATH).join(SERVICE_CONFIG_FILENAME) +} + +/// Configuration parameters which modify the [`ServiceManager`]'s behavior. +/// +/// These are typically used to make testing easier; production usage +/// should generally prefer to use the defaults. +pub struct Config { + /// The path for the ServiceManager to store information about + /// all running services. + pub all_svcs_config_path: PathBuf, + /// A function which returns the path the directory holding the + /// service's configuration file. + pub get_svc_config_dir: Box PathBuf + Send + Sync>, +} + +impl Default for Config { + fn default() -> Self { + Self { + all_svcs_config_path: default_services_config_path(), + get_svc_config_dir: Box::new(|zone_name: &str, svc_name: &str| { + PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) + .join(PathBuf::from(zone_name)) + .join("root") + .join(format!("var/svc/manifest/site/{}", svc_name)) + }) + } + } } /// Manages miscellaneous Sled-local services. pub struct ServiceManager { log: Logger, - config_path: Option, + config: Config, zones: Mutex>, vnic_allocator: VnicAllocator, physical_link: PhysicalLink, @@ -99,12 +135,12 @@ impl ServiceManager { log: Logger, physical_link: PhysicalLink, sled_subnet: Ipv6Subnet, - config_path: Option, + config: Config, ) -> Result { debug!(log, "Creating new ServiceManager"); let mgr = Self { log, - config_path, + config, zones: Mutex::new(vec![]), vnic_allocator: VnicAllocator::new( "Service", @@ -147,11 +183,7 @@ impl ServiceManager { // Returns either the path to the explicitly provided config path, or // chooses the default one. fn services_config_path(&self) -> PathBuf { - if let Some(path) = &self.config_path { - path.clone() - } else { - default_services_config_path() - } + self.config.all_svcs_config_path.clone() } // Populates `existing_zones` according to the requests in `services`. @@ -251,8 +283,6 @@ impl ServiceManager { let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); - // TODO: match by type - // TODO: add nexus? match service.service_type { ServiceType::Nexus { internal_address, @@ -276,14 +306,12 @@ impl ServiceManager { database: nexus_config::Database::FromDns, }; - // Copy the partial config file - let partial_config_path = PathBuf::from(ZONE_ZFS_DATASET_MOUNTPOINT) - .join(PathBuf::from(running_zone.name())) - .join("root") - .join("var/svc/manifest/site/nexus/config-partial.toml"); - let mut config_path = partial_config_path.clone(); - config_path.set_file_name("config.toml"); - + // Copy the partial config file to the expected location. + let config_dir = (self.config.get_svc_config_dir)( + running_zone.name(), &service.name + ); + let partial_config_path = config_dir.join(PARTIAL_CONFIG_FILENAME); + let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); tokio::fs::copy(partial_config_path, &config_path) .await .map_err(|err| Error::Io { path: config_path.clone(), err })?; @@ -302,23 +330,6 @@ impl ServiceManager { .await .map_err(|err| Error::Io { path: config_path.clone(), err })?; - /* - running_zone - .run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/runtime_config={}", - runtime_config_path.to_string_lossy(), - ), - ]) - .map_err(|err| Error::ZoneCommand { - intent: "setting runtime config path".to_string(), - err, - })?; - */ }, ServiceType::InternalDns { server_address, @@ -356,26 +367,26 @@ impl ServiceManager { intent: "Set DNS address".to_string(), err, })?; + + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), + err, + })?; } } - // Refresh the manifest with the new properties we set, - // so they become "effective" properties when the service is enabled. - running_zone - .run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Refresh SMF manifest {}", - default_smf_name - ), - err, - })?; - debug!(self.log, "enabling service"); running_zone @@ -580,20 +591,44 @@ mod test { drop(mgr); } + struct TestConfig { + config_dir: tempfile::TempDir, + } + + impl TestConfig { + async fn new() -> Self { + let config_dir = tempfile::TempDir::new().unwrap(); + tokio::fs::File::create(config_dir.path().join(PARTIAL_CONFIG_FILENAME)).await.unwrap(); + Self { + config_dir + } + } + + fn make_config(&self) -> Config { + let all_svcs_config_path = self.config_dir.path().join(SERVICE_CONFIG_FILENAME); + let svc_config_dir = self.config_dir.path().to_path_buf(); + Config { + all_svcs_config_path, + get_svc_config_dir: Box::new(move |_zone_name: &str, _svc_name: &str| { + svc_config_dir.clone() + }) + } + } + } + #[tokio::test] #[serial_test::serial] async fn test_ensure_service() { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, PhysicalLink(EXPECTED_LINK_NAME.to_string()), Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - Some(config), + test_config.make_config(), ) .await .unwrap(); @@ -612,14 +647,13 @@ mod test { "test_ensure_service_which_already_exists", ); let log = logctx.log.clone(); + let test_config = TestConfig::new().await; - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, PhysicalLink(EXPECTED_LINK_NAME.to_string()), Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - Some(config), + test_config.make_config(), ) .await .unwrap(); @@ -638,9 +672,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_are_recreated_on_reboot", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -648,7 +680,7 @@ mod test { logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); @@ -664,7 +696,7 @@ mod test { logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); @@ -679,9 +711,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log( "test_services_do_not_persist_without_config", ); - - let config_dir = tempfile::TempDir::new().unwrap(); - let config = config_dir.path().join("services.toml"); + let test_config = TestConfig::new().await; // First, spin up a ServiceManager, create a new service, and tear it // down. @@ -689,7 +719,7 @@ mod test { logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - Some(config.clone()), + test_config.make_config(), ) .await .unwrap(); @@ -699,14 +729,15 @@ mod test { // Next, delete the config. This means the service we just created will // not be remembered on the next initialization. - std::fs::remove_file(&config).unwrap(); + let config = test_config.make_config(); + std::fs::remove_file(&config.all_svcs_config_path).unwrap(); // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( logctx.log.clone(), PhysicalLink(EXPECTED_LINK_NAME.to_string()), Ipv6Subnet::::new(Ipv6Addr::LOCALHOST), - Some(config.clone()), + config, ) .await .unwrap(); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 14a28bf3481..7c0e6c4ade1 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -14,7 +14,7 @@ use crate::params::{ DatasetKind, DiskStateRequested, InstanceHardware, InstanceMigrateParams, InstanceRuntimeStateRequested, ServiceEnsureBody, }; -use crate::services::ServiceManager; +use crate::services::{self, ServiceManager}; use crate::storage_manager::StorageManager; use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use omicron_common::api::{ @@ -202,7 +202,7 @@ impl SledAgent { log.clone(), data_link.clone(), Ipv6Subnet::::new(*sled_address.ip()), - None, + services::Config::default(), ).await?; Ok(SledAgent { From 02b75338244d904dc6f55d9f70368f4dbc72c23d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 May 2022 14:13:15 -0400 Subject: [PATCH 57/61] clippy / fmt --- common/src/nexus_config.rs | 3 +- internal-dns-client/src/multiclient.rs | 77 ++++++--------- internal-dns-client/src/names.rs | 4 +- internal-dns/src/dns_data.rs | 24 ++--- internal-dns/src/dns_server.rs | 6 +- nexus/src/config.rs | 22 +++-- nexus/src/context.rs | 40 ++++---- nexus/src/db/datastore.rs | 5 +- nexus/src/db/model/service.rs | 2 +- nexus/src/db/model/service_kind.rs | 4 +- nexus/src/db/model/sled.rs | 2 +- nexus/src/lib.rs | 13 ++- nexus/test-utils/src/lib.rs | 5 +- sled-agent/src/params.rs | 22 ++--- sled-agent/src/rack_setup/service.rs | 132 +++++++++++++++---------- sled-agent/src/services.rs | 111 ++++++++++++++------- sled-agent/src/sled_agent.rs | 3 +- sled-agent/src/storage_manager.rs | 22 +++-- 18 files changed, 275 insertions(+), 222 deletions(-) diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index f46dc65cd04..ca11eda2839 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -67,11 +67,12 @@ impl std::cmp::PartialEq for LoadError { #[serde_as] #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] #[serde(tag = "type", rename_all = "snake_case")] +#[allow(clippy::large_enum_variant)] pub enum Database { FromDns, FromUrl { #[serde_as(as = "DisplayFromStr")] - url: PostgresConfigWithUrl + url: PostgresConfigWithUrl, }, } diff --git a/internal-dns-client/src/multiclient.rs b/internal-dns-client/src/multiclient.rs index 9917b31bae6..e01fb5a2139 100644 --- a/internal-dns-client/src/multiclient.rs +++ b/internal-dns-client/src/multiclient.rs @@ -2,13 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::types::{DnsRecord, DnsKv, DnsRecordKey, Srv}; +use crate::types::{DnsKv, DnsRecord, DnsRecordKey, Srv}; use omicron_common::address::{ - Ipv6Subnet, - ReservedRackSubnet, - AZ_PREFIX, - DNS_SERVER_PORT, - DNS_PORT, + Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT, DNS_SERVER_PORT, }; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -35,13 +31,14 @@ impl Updater { .map(|dns_subnet| { let addr = dns_subnet.dns_address().ip(); info!(log, "Adding DNS server: {}", addr); - crate::Client::new(&format!("http://[{}]:{}", addr, DNS_SERVER_PORT), log.clone()) + crate::Client::new( + &format!("http://[{}]:{}", addr, DNS_SERVER_PORT), + log.clone(), + ) }) .collect::>(); - Self { - clients - } + Self { clients } } /// Utility function to insert: @@ -56,49 +53,39 @@ impl Updater { let mut records = Vec::with_capacity(aaaa.len() + 1); // Add one DnsKv per AAAA, each with a single record. - records.extend( - aaaa.iter().map(|(name, addr)| { - DnsKv { - key: DnsRecordKey { - name: name.to_string(), - }, - records: vec![DnsRecord::Aaaa(*addr.ip())], - } - }) - ); + records.extend(aaaa.iter().map(|(name, addr)| DnsKv { + key: DnsRecordKey { name: name.to_string() }, + records: vec![DnsRecord::Aaaa(*addr.ip())], + })); // Add the DnsKv for the SRV, with a record for each AAAA. - records.push( - DnsKv { - key: DnsRecordKey { - name: srv_key.to_string(), - }, - records: aaaa.iter().map(|(name, addr)| { - DnsRecord::Srv(Srv{ + records.push(DnsKv { + key: DnsRecordKey { name: srv_key.to_string() }, + records: aaaa + .iter() + .map(|(name, addr)| { + DnsRecord::Srv(Srv { prio: 0, weight: 0, port: addr.port(), target: name.to_string(), }) - }).collect::>(), - } - ); + }) + .collect::>(), + }); let set_record = || async { self.dns_records_set(&records) - .await - .map_err(BackoffError::transient)?; + .await + .map_err(BackoffError::transient)?; Ok::<(), BackoffError>(()) }; let log_failure = |error, _| { warn!(log, "Failed to set DNS records"; "error" => ?error); }; - retry_notify( - internal_service_policy(), - set_record, - log_failure, - ).await?; + retry_notify(internal_service_policy(), set_record, log_failure) + .await?; Ok(()) } @@ -107,9 +94,8 @@ impl Updater { /// Returns an error if setting the record fails on any server. pub async fn dns_records_set<'a>( &'a self, - body: &'a Vec + body: &'a Vec, ) -> Result<(), DnsError> { - // TODO: Could be sent concurrently. for client in &self.clients { client.dns_records_set(body).await?; @@ -123,7 +109,7 @@ impl Updater { /// Returns an error if deleting the record fails on any server. pub async fn dns_records_delete<'a>( &'a self, - body: &'a Vec + body: &'a Vec, ) -> Result<(), DnsError> { // TODO: Could be sent concurrently for client in &self.clients { @@ -134,9 +120,9 @@ impl Updater { } /// Creates a resolver using all internal DNS name servers. -pub fn create_resolver(subnet: Ipv6Subnet) - -> Result -{ +pub fn create_resolver( + subnet: Ipv6Subnet, +) -> Result { let mut rc = ResolverConfig::new(); let dns_ips = ReservedRackSubnet::new(subnet) .get_dns_subnets() @@ -147,10 +133,7 @@ pub fn create_resolver(subnet: Ipv6Subnet) for dns_ip in dns_ips { rc.add_name_server(NameServerConfig { socket_addr: SocketAddr::V6(SocketAddrV6::new( - dns_ip, - DNS_PORT, - 0, - 0, + dns_ip, DNS_PORT, 0, 0, )), protocol: Protocol::Udp, tls_dns_name: None, diff --git a/internal-dns-client/src/names.rs b/internal-dns-client/src/names.rs index 2d72ce7cabd..6384ec9e503 100644 --- a/internal-dns-client/src/names.rs +++ b/internal-dns-client/src/names.rs @@ -25,7 +25,7 @@ impl fmt::Display for SRV { match &self { SRV::Service(name) => { write!(f, "_{}._tcp.{}", name, DNS_ZONE) - }, + } SRV::Backend(name, id) => { write!(f, "_{}._tcp.{}.{}", name, id, DNS_ZONE) } @@ -46,7 +46,7 @@ impl fmt::Display for AAAA { match &self { AAAA::Sled(id) => { write!(f, "{}.sled.{}", id, DNS_ZONE) - }, + } AAAA::Zone(id) => { write!(f, "{}.host.{}", id, DNS_ZONE) } diff --git a/internal-dns/src/dns_data.rs b/internal-dns/src/dns_data.rs index 5f5f4e15a4e..4f6172c0c4e 100644 --- a/internal-dns/src/dns_data.rs +++ b/internal-dns/src/dns_data.rs @@ -202,20 +202,20 @@ impl Server { return; } }; - let records: Vec = match serde_json::from_slice(bits.as_ref()) - { - Ok(r) => r, - Err(e) => { - error!(self.log, "deserialize record: {}", e); - match response.tx.send(Vec::new()) { - Ok(_) => {} - Err(e) => { - error!(self.log, "response tx: {:?}", e); + let records: Vec = + match serde_json::from_slice(bits.as_ref()) { + Ok(r) => r, + Err(e) => { + error!(self.log, "deserialize record: {}", e); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } } + return; } - return; - } - }; + }; match response.tx.send(vec![DnsKV { key, records }]) { Ok(_) => {} Err(e) => { diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs index d7c1f085e4b..13b67486cec 100644 --- a/internal-dns/src/dns_server.rs +++ b/internal-dns/src/dns_server.rs @@ -140,9 +140,9 @@ async fn handle_req<'a, 'b, 'c>( return; } }; - srv.set_name(name) - .set_rr_type(RecordType::SRV) - .set_data(Some(RData::SRV(SRV::new(*prio, *weight, *port, tgt)))); + srv.set_name(name).set_rr_type(RecordType::SRV).set_data(Some( + RData::SRV(SRV::new(*prio, *weight, *port, tgt)), + )); let mresp = rb.build(header, vec![&srv], vec![], vec![], vec![]); diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 1682c846b42..23e1b0c2785 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -128,18 +128,20 @@ impl std::fmt::Display for SchemeName { #[cfg(test)] mod test { use super::{ - AuthnConfig, Config, ConsoleConfig, LoadError, - PackageConfig, SchemeName, TimeseriesDbConfig, UpdatesConfig, + AuthnConfig, Config, ConsoleConfig, LoadError, PackageConfig, + SchemeName, TimeseriesDbConfig, UpdatesConfig, }; - use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; - use omicron_common::nexus_config::{LoadErrorKind, Database, RuntimeConfig}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use libc; - use std::net::{SocketAddr, Ipv6Addr}; + use omicron_common::address::{Ipv6Subnet, RACK_PREFIX}; + use omicron_common::nexus_config::{ + Database, LoadErrorKind, RuntimeConfig, + }; use std::fs; + use std::net::{Ipv6Addr, SocketAddr}; use std::path::Path; use std::path::PathBuf; @@ -376,9 +378,13 @@ mod test { ) .expect_err("expected failure"); if let LoadErrorKind::Parse(error) = &error.kind { - assert!(error.to_string().starts_with( - "unsupported authn scheme: \"trust-me\"" - ), "error = {}", error.to_string()); + assert!( + error + .to_string() + .starts_with("unsupported authn scheme: \"trust-me\""), + "error = {}", + error.to_string() + ); } else { panic!( "Got an unexpected error, expected Parse but got {:?}", diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 895d80ba096..afd3682ce7b 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,8 +18,8 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; -use omicron_common::api::external::Error; use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; +use omicron_common::api::external::Error; use omicron_common::nexus_config; use omicron_common::postgres_config::PostgresConfigWithUrl; use oximeter::types::ProducerRegistry; @@ -94,7 +94,8 @@ impl ServerContext { let internal_authn = Arc::new(authn::Context::internal_api()); let authz = Arc::new(authz::Authz::new(&log)); let create_tracker = |name: &str| { - let target = HttpService { name: name.to_string(), id: config.runtime.id }; + let target = + HttpService { name: name.to_string(), id: config.runtime.id }; const START_LATENCY_DECADE: i8 = -6; const END_LATENCY_DECADE: i8 = 3; LatencyTracker::with_latency_decades( @@ -137,30 +138,31 @@ impl ServerContext { // nexus in dev for everyone // Set up DNS Client - let az_subnet = Ipv6Subnet::::new(config.runtime.subnet.net().ip()); - let resolver = internal_dns_client::multiclient::create_resolver(az_subnet) - .map_err(|e| format!("Failed to create DNS resolver: {}", e.to_string()))?; + let az_subnet = + Ipv6Subnet::::new(config.runtime.subnet.net().ip()); + let resolver = + internal_dns_client::multiclient::create_resolver(az_subnet) + .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; // Set up DB pool let url = match &config.runtime.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { - let response = resolver.lookup_ip("cockroachdb.") + let response = resolver + .lookup_ip("cockroachdb.") .await - .map_err(|e| format!("Failed to lookup IP: {}", e.to_string()))?; - let address = response - .iter() - .next() - .ok_or_else(|| "no addresses returned from DNS resolver".to_string())?; - PostgresConfigWithUrl::from_str( - &format!("postgresql://root@[{}]:{}/omicron?sslmode=disable", address, COCKROACH_PORT) - ) - .map_err(|e| format!("Cannot parse Postgres URL: {}", e.to_string()))? - }, + .map_err(|e| format!("Failed to lookup IP: {}", e))?; + let address = response.iter().next().ok_or_else(|| { + "no addresses returned from DNS resolver".to_string() + })?; + PostgresConfigWithUrl::from_str(&format!( + "postgresql://root@[{}]:{}/omicron?sslmode=disable", + address, COCKROACH_PORT + )) + .map_err(|e| format!("Cannot parse Postgres URL: {}", e))? + } }; - let pool = db::Pool::new(&db::Config { - url - }); + let pool = db::Pool::new(&db::Config { url }); Ok(Arc::new(ServerContext { nexus: Nexus::new_with_id( diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 31602307de0..4d6995b0a04 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -253,7 +253,10 @@ impl DataStore { } /// Stores a new service in the database. - pub async fn service_upsert(&self, service: Service) -> CreateResult { + pub async fn service_upsert( + &self, + service: Service, + ) -> CreateResult { use db::schema::service::dsl; let sled_id = service.sled_id; diff --git a/nexus/src/db/model/service.rs b/nexus/src/db/model/service.rs index b0b1f35e825..426f1e6aad5 100644 --- a/nexus/src/db/model/service.rs +++ b/nexus/src/db/model/service.rs @@ -3,8 +3,8 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{ServiceKind, SqlU16}; -use crate::db::schema::service; use crate::db::ipv6; +use crate::db::schema::service; use db_macros::Asset; use std::net::SocketAddrV6; use uuid::Uuid; diff --git a/nexus/src/db/model/service_kind.rs b/nexus/src/db/model/service_kind.rs index 257217ef365..6cd4e72d0db 100644 --- a/nexus/src/db/model/service_kind.rs +++ b/nexus/src/db/model/service_kind.rs @@ -24,9 +24,7 @@ impl_enum_type!( impl From for ServiceKind { fn from(k: internal_api::params::ServiceKind) -> Self { match k { - internal_api::params::ServiceKind::Nexus => { - ServiceKind::Nexus - } + internal_api::params::ServiceKind::Nexus => ServiceKind::Nexus, internal_api::params::ServiceKind::Oximeter => { ServiceKind::Oximeter } diff --git a/nexus/src/db/model/sled.rs b/nexus/src/db/model/sled.rs index 95abcd375a6..ad756c3473f 100644 --- a/nexus/src/db/model/sled.rs +++ b/nexus/src/db/model/sled.rs @@ -5,7 +5,7 @@ use super::{Generation, SqlU16}; use crate::db::collection_insert::DatastoreCollection; use crate::db::ipv6; -use crate::db::schema::{sled, service, zpool}; +use crate::db::schema::{service, sled, zpool}; use chrono::{DateTime, Utc}; use db_macros::Asset; use std::net::Ipv6Addr; diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index d2db3240100..61abe04b1ba 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -153,13 +153,12 @@ impl Server { /// Run an instance of the [Server]. pub async fn run_server(config: &Config) -> Result<(), String> { use slog::Drain; - let (drain, registration) = slog_dtrace::with_drain( - config - .pkg - .log - .to_logger("nexus") - .map_err(|message| format!("initializing logger: {}", message))?, - ); + let (drain, registration) = + slog_dtrace::with_drain( + config.pkg.log.to_logger("nexus").map_err(|message| { + format!("initializing logger: {}", message) + })?, + ); let log = slog::Logger::root(drain.fuse(), slog::o!()); if let slog_dtrace::ProbeRegistration::Failed(e) = registration { let msg = format!("failed to register DTrace probes: {}", e); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index f70a46372b9..e4eb744e2fa 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -100,9 +100,8 @@ pub async fn test_setup_with_config( let clickhouse = dev::clickhouse::ClickHouseInstance::new(0).await.unwrap(); // Store actual address/port information for the databases after they start. - config.runtime.database = nexus_config::Database::FromUrl { - url: database.pg_config().clone() - }; + config.runtime.database = + nexus_config::Database::FromUrl { url: database.pg_config().clone() }; config.pkg.timeseries_db.address.set_port(clickhouse.port()); let server = omicron_nexus::Server::start(&config, rack_id, &logctx.log) diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index aa15292074c..e3a4e0572ac 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -247,34 +247,26 @@ impl From for sled_agent_client::types::DatasetEnsureBody { // Struct variant enums require some assistance for serialization to TOML. #[serde(tag = "type")] pub enum ServiceType { - Nexus { - internal_address: SocketAddrV6, - external_address: SocketAddrV6, - }, - InternalDns { - server_address: SocketAddrV6, - dns_address: SocketAddrV6, - }, + Nexus { internal_address: SocketAddrV6, external_address: SocketAddrV6 }, + InternalDns { server_address: SocketAddrV6, dns_address: SocketAddrV6 }, } impl From for sled_agent_client::types::ServiceType { fn from(s: ServiceType) -> Self { - use ServiceType as St; use sled_agent_client::types::ServiceType as AutoSt; + use ServiceType as St; match s { - St::Nexus { internal_address, external_address } => { - AutoSt::Nexus { - internal_address: internal_address.to_string(), - external_address: external_address.to_string(), - } + St::Nexus { internal_address, external_address } => AutoSt::Nexus { + internal_address: internal_address.to_string(), + external_address: external_address.to_string(), }, St::InternalDns { server_address, dns_address } => { AutoSt::InternalDns { server_address: server_address.to_string(), dns_address: dns_address.to_string(), } - }, + } } } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1f56634339f..a8917d1991b 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -4,13 +4,16 @@ //! Rack Setup Service implementation -use super::config::{SetupServiceConfig as Config}; +use super::config::SetupServiceConfig as Config; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, }; use crate::params::{DatasetEnsureBody, ServiceRequest, ServiceType}; -use omicron_common::address::{get_sled_address, ReservedRackSubnet, RSS_RESERVED_ADDRESSES, NEXUS_INTERNAL_PORT, NEXUS_EXTERNAL_PORT, DNS_PORT, DNS_SERVER_PORT}; +use omicron_common::address::{ + get_sled_address, ReservedRackSubnet, DNS_PORT, DNS_SERVER_PORT, + NEXUS_EXTERNAL_PORT, NEXUS_INTERNAL_PORT, RSS_RESERVED_ADDRESSES, +}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -55,7 +58,6 @@ pub enum SetupServiceError { HttpClient(reqwest::Error), // XXX CLEAN UP - #[error(transparent)] Dns(#[from] internal_dns_client::Error), } @@ -155,9 +157,7 @@ struct AddressBumpAllocator { // TODO: Could exist in another file? impl AddressBumpAllocator { fn new(sled_addr: Ipv6Addr) -> Self { - Self { - last_addr: sled_addr, - } + Self { last_addr: sled_addr } } fn next(&mut self) -> Option { @@ -183,7 +183,7 @@ impl ServiceInner { ServiceInner { log, peer_monitor: Mutex::new(peer_monitor), - dns_servers: OnceCell::new() + dns_servers: OnceCell::new(), } } @@ -293,26 +293,28 @@ impl ServiceInner { // // XXX: Hardcoding CRDB? let crdb_datasets = datasets.iter().filter(|dataset| { - matches!(dataset.dataset_kind, crate::params::DatasetKind::CockroachDb { .. }) + matches!( + dataset.dataset_kind, + crate::params::DatasetKind::CockroachDb { .. } + ) }); - let aaaa = crdb_datasets.map(|dataset| { - ( - internal_dns_client::names::AAAA::Zone(dataset.id), - dataset.address, - ) - }).collect::>(); - let srv_key = internal_dns_client::names::SRV::Service("cockroachdb".into()); + let aaaa = crdb_datasets + .map(|dataset| { + ( + internal_dns_client::names::AAAA::Zone(dataset.id), + dataset.address, + ) + }) + .collect::>(); + let srv_key = + internal_dns_client::names::SRV::Service("cockroachdb".into()); self.dns_servers .get() .expect("DNS servers must be initialized first") - .insert_dns_records( - &self.log, - aaaa, - srv_key - ).await?; - + .insert_dns_records(&self.log, aaaa, srv_key) + .await?; // TODO: add dns records for non-crdb datasets too // TODO: alternatively, REMOVE THEM! Make RSS set up crdb exclusively. @@ -411,7 +413,8 @@ impl ServiceInner { let sled_subnet_index = u8::try_from(idx + 1).expect("Too many peers!"); let subnet = config.sled_subnet(sled_subnet_index); - let mut addr_alloc = AddressBumpAllocator::new(*get_sled_address(subnet).ip()); + let mut addr_alloc = + AddressBumpAllocator::new(*get_sled_address(subnet).ip()); let mut request = SledRequest::default(); @@ -425,8 +428,18 @@ impl ServiceInner { addresses: vec![address], gz_addresses: vec![], service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new(address, NEXUS_INTERNAL_PORT, 0, 0), - external_address: SocketAddrV6::new(address, NEXUS_EXTERNAL_PORT, 0, 0), + internal_address: SocketAddrV6::new( + address, + NEXUS_INTERNAL_PORT, + 0, + 0, + ), + external_address: SocketAddrV6::new( + address, + NEXUS_EXTERNAL_PORT, + 0, + 0, + ), }, }) } @@ -441,16 +454,15 @@ impl ServiceInner { 0, 0, ); - request.datasets.push( - DatasetEnsureBody { - id: Uuid::new_v4(), - zpool_uuid: dataset.zpool_uuid, - dataset_kind: crate::params::DatasetKind::CockroachDb { - all_addresses: vec![ address ], + request.datasets.push(DatasetEnsureBody { + id: Uuid::new_v4(), + zpool_uuid: dataset.zpool_uuid, + dataset_kind: + crate::params::DatasetKind::CockroachDb { + all_addresses: vec![address], }, - address, - } - ); + address, + }); } } @@ -465,8 +477,15 @@ impl ServiceInner { addresses: vec![dns_addr], gz_addresses: vec![dns_subnet.gz_address().ip()], service_type: ServiceType::InternalDns { - server_address: SocketAddrV6::new(dns_addr, DNS_SERVER_PORT, 0, 0), - dns_address: SocketAddrV6::new(dns_addr, DNS_PORT, 0, 0), + server_address: SocketAddrV6::new( + dns_addr, + DNS_SERVER_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_addr, DNS_PORT, 0, 0, + ), }, }); } @@ -503,8 +522,10 @@ impl ServiceInner { } // Once we've constructed a plan, write it down to durable storage. - let serialized_plan = toml::Value::try_from(&plan) - .expect(&format!("Cannot serialize configuration: {:#?}", plan)); + let serialized_plan = + toml::Value::try_from(&plan).unwrap_or_else(|e| { + panic!("Cannot serialize configuration: {:#?}: {}", plan, e) + }); let plan_str = toml::to_string(&serialized_plan) .expect("Cannot turn config to string"); @@ -701,25 +722,28 @@ impl ServiceInner { config.az_subnet(), self.log.new(o!("client" => "DNS")), ); - self.dns_servers.set(dns_servers).map_err(|_| ()).expect("Already set DNS servers"); + self.dns_servers + .set(dns_servers) + .map_err(|_| ()) + .expect("Already set DNS servers"); // XXX Test record insertion -/* - insert_dns_record( - &self.log, - &dns_servers, - "hello.world", - Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1), - ).await?; - - // XXX test record retreival - - let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) - .expect("Failed to create DNS resolver"); - let response = resolver.lookup_ip(name.to_owned() + ".").await.expect("Failed to lookup IP"); - let address = response.iter().next().expect("no addresses returned from DNS resolver"); - assert_eq!(address, addr); -*/ + /* + insert_dns_record( + &self.log, + &dns_servers, + "hello.world", + Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1), + ).await?; + + // XXX test record retreival + + let resolver = internal_dns_client::multiclient::create_resolver(config.az_subnet()) + .expect("Failed to create DNS resolver"); + let response = resolver.lookup_ip(name.to_owned() + ".").await.expect("Failed to lookup IP"); + let address = response.iter().next().expect("no addresses returned from DNS resolver"); + assert_eq!(address, addr); + */ // Issue the dataset initialization requests to all sleds. futures::future::join_all(plan.iter().map( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 38ef794a028..503144e0d03 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -7,11 +7,11 @@ use crate::illumos::dladm::PhysicalLink; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; -use crate::illumos::zone::{AddressRequest, Zones}; use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; +use crate::illumos::zone::{AddressRequest, Zones}; use crate::params::{ServiceEnsureBody, ServiceRequest, ServiceType}; use dropshot::ConfigDropshot; -use omicron_common::address::{Ipv6Subnet, SLED_PREFIX, RACK_PREFIX}; +use omicron_common::address::{Ipv6Subnet, RACK_PREFIX, SLED_PREFIX}; use omicron_common::nexus_config::{self, RuntimeConfig as NexusRuntimeConfig}; use slog::Logger; use std::collections::HashSet; @@ -106,7 +106,7 @@ impl Default for Config { .join(PathBuf::from(zone_name)) .join("root") .join(format!("var/svc/manifest/site/{}", svc_name)) - }) + }), } } } @@ -284,10 +284,7 @@ impl ServiceManager { let default_smf_name = format!("{}:default", smf_name); match service.service_type { - ServiceType::Nexus { - internal_address, - external_address, - } => { + ServiceType::Nexus { internal_address, external_address } => { info!(self.log, "Setting up Nexus service"); // Nexus takes a separate config file for parameters which @@ -302,39 +299,50 @@ impl ServiceManager { bind_address: SocketAddr::V6(internal_address), ..Default::default() }, - subnet: Ipv6Subnet::::new(self.sled_subnet.net().ip()), + subnet: Ipv6Subnet::::new( + self.sled_subnet.net().ip(), + ), database: nexus_config::Database::FromDns, }; // Copy the partial config file to the expected location. let config_dir = (self.config.get_svc_config_dir)( - running_zone.name(), &service.name + running_zone.name(), + &service.name, ); - let partial_config_path = config_dir.join(PARTIAL_CONFIG_FILENAME); + let partial_config_path = + config_dir.join(PARTIAL_CONFIG_FILENAME); let config_path = config_dir.join(COMPLETE_CONFIG_FILENAME); tokio::fs::copy(partial_config_path, &config_path) .await - .map_err(|err| Error::Io { path: config_path.clone(), err })?; + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; // Serialize the configuration and append it into the file. let serialized_cfg = toml::Value::try_from(&runtime_config) .expect("Cannot serialize config"); let config_str = toml::to_string(&serialized_cfg).map_err(|err| { - Error::TomlSerialize { path: config_path.clone(), err } + Error::TomlSerialize { + path: config_path.clone(), + err, + } })?; - let mut file = tokio::fs::OpenOptions::new().append(true).open(&config_path) + let mut file = tokio::fs::OpenOptions::new() + .append(true) + .open(&config_path) .await - .map_err(|err| Error::Io { path: config_path.clone(), err })?; - file.write_all(config_str.as_bytes()) - .await - .map_err(|err| Error::Io { path: config_path.clone(), err })?; - - }, - ServiceType::InternalDns { - server_address, - dns_address, - } => { + .map_err(|err| Error::Io { + path: config_path.clone(), + err, + })?; + file.write_all(config_str.as_bytes()).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?; + } + ServiceType::InternalDns { server_address, dns_address } => { info!(self.log, "Setting up internal-dns service"); running_zone .run_cmd(&[ @@ -344,7 +352,8 @@ impl ServiceManager { "setprop", &format!( "config/server_address=[{}]:{}", - server_address.ip(), server_address.port(), + server_address.ip(), + server_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { @@ -360,7 +369,8 @@ impl ServiceManager { "setprop", &format!( "config/dns_address=[{}]:{}", - dns_address.ip(), dns_address.port(), + dns_address.ip(), + dns_address.port(), ), ]) .map_err(|err| Error::ZoneCommand { @@ -482,7 +492,7 @@ mod test { use crate::illumos::{ dladm::MockDladm, dladm::PhysicalLink, svc, zone::MockZones, }; - use std::net::{SocketAddrV6, Ipv6Addr}; + use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; @@ -546,8 +556,18 @@ mod test { addresses: vec![], gz_addresses: vec![], service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), - external_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), }, }], }) @@ -565,8 +585,18 @@ mod test { addresses: vec![], gz_addresses: vec![], service_type: ServiceType::Nexus { - internal_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), - external_address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + internal_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), + external_address: SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + ), }, }], }) @@ -598,20 +628,25 @@ mod test { impl TestConfig { async fn new() -> Self { let config_dir = tempfile::TempDir::new().unwrap(); - tokio::fs::File::create(config_dir.path().join(PARTIAL_CONFIG_FILENAME)).await.unwrap(); - Self { - config_dir - } + tokio::fs::File::create( + config_dir.path().join(PARTIAL_CONFIG_FILENAME), + ) + .await + .unwrap(); + Self { config_dir } } fn make_config(&self) -> Config { - let all_svcs_config_path = self.config_dir.path().join(SERVICE_CONFIG_FILENAME); + let all_svcs_config_path = + self.config_dir.path().join(SERVICE_CONFIG_FILENAME); let svc_config_dir = self.config_dir.path().to_path_buf(); Config { all_svcs_config_path, - get_svc_config_dir: Box::new(move |_zone_name: &str, _svc_name: &str| { - svc_config_dir.clone() - }) + get_svc_config_dir: Box::new( + move |_zone_name: &str, _svc_name: &str| { + svc_config_dir.clone() + }, + ), } } } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 7c0e6c4ade1..9b8c62b44e8 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -203,7 +203,8 @@ impl SledAgent { data_link.clone(), Ipv6Subnet::::new(*sled_address.ip()), services::Config::default(), - ).await?; + ) + .await?; Ok(SledAgent { id: config.id, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index b324545caf2..1e6c8f92a07 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -206,7 +206,11 @@ struct DatasetInfo { } impl DatasetInfo { - fn new(pool: &str, kind: DatasetKind, address: SocketAddrV6) -> DatasetInfo { + fn new( + pool: &str, + kind: DatasetKind, + address: SocketAddrV6, + ) -> DatasetInfo { match kind { DatasetKind::CockroachDb { .. } => DatasetInfo { name: DatasetName::new(pool, "cockroachdb"), @@ -303,9 +307,13 @@ impl DatasetInfo { // Await liveness of the cluster. info!(log, "start_zone: awaiting liveness of CRDB"); let check_health = || async { - reqwest::get(format!("http://[{}]:{}/health?ready=1", address.ip(), 8080)) - .await - .map_err(backoff::BackoffError::transient) + reqwest::get(format!( + "http://[{}]:{}/health?ready=1", + address.ip(), + 8080 + )) + .await + .map_err(backoff::BackoffError::transient) }; let log_failure = |_, _| { warn!(log, "cockroachdb not yet alive"); @@ -455,8 +463,10 @@ async fn ensure_running_zone( dataset_name: &DatasetName, do_format: bool, ) -> Result { - let address_request = - AddressRequest::new_static(IpAddr::V6(*dataset_info.address.ip()), None); + let address_request = AddressRequest::new_static( + IpAddr::V6(*dataset_info.address.ip()), + None, + ); let err = RunningZone::get(log, &dataset_info.zone_prefix(), address_request) From 9970c107ce50f5e47fb7efc3b265ed1158954ba1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 May 2022 15:25:26 -0400 Subject: [PATCH 58/61] Fix benchmark --- nexus/benches/setup_benchmark.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index c4c27bd2a97..24584670ce5 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -19,7 +19,7 @@ async fn do_full_setup() { // Wraps exclusively the CockroachDB portion of setup/teardown. async fn do_crdb_setup() { let cfg = nexus_test_utils::load_test_config(); - let logctx = LogContext::new("crdb_setup", &cfg.log); + let logctx = LogContext::new("crdb_setup", &cfg.pkg.log); let mut db = test_setup_database(&logctx.log).await; db.cleanup().await.unwrap(); } From 5ef48d5f6687d5dd94023e52df7db28f888d35ed Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 May 2022 15:43:06 -0400 Subject: [PATCH 59/61] remove rt parts of config --- smf/nexus/config-partial.toml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index a152d398f02..f88d62387f1 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -1,10 +1,7 @@ # -# Oxide API: example configuration file +# Oxide API: partial configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "/var/nexus/static" @@ -16,18 +13,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -url = "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "[fd00:1122:3344:0101::3]:12220" - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "[fd00:1122:3344:0101::3]:12221" - [log] # Show log messages of this level and more severe level = "info" From 63b1cd610ee7d8c525813bb762dd71b75224591d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 May 2022 20:02:44 -0400 Subject: [PATCH 60/61] make nexus happier w/config, fix resolver, nexus depends on routing --- common/src/address.rs | 2 ++ nexus/examples/config.toml | 45 ++++++++++++------------- nexus/src/config.rs | 7 ++-- nexus/src/context.rs | 7 ++-- nexus/tests/config.test.toml | 53 +++++++++++++++--------------- package/src/bin/omicron-package.rs | 3 +- sled-agent/src/services.rs | 4 ++- smf/nexus/manifest.xml | 8 +++++ smf/sled-agent/manifest.xml | 4 +++ 9 files changed, 78 insertions(+), 55 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 462ff907efc..17e72383607 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -39,6 +39,8 @@ pub const CRUCIBLE_PORT: u16 = 32345; pub const NEXUS_EXTERNAL_PORT: u16 = 12220; pub const NEXUS_INTERNAL_PORT: u16 = 12221; +pub const COCKROACH_DNS_NAME: &str = "_cockroachdb._tcp.control-plane.oxide.internal"; + // Anycast is a mechanism in which a single IP address is shared by multiple // devices, and the destination is located based on routing distance. // diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f6855590a89..b54ff03280d 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -2,9 +2,6 @@ # Oxide API: example configuration file # -# Identifier for this instance of Nexus -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "nexus/static" # TODO: figure out value @@ -21,25 +18,6 @@ session_absolute_timeout_minutes = 480 # TODO(https://github.com/oxidecomputer/omicron/issues/372): Remove "spoof". schemes_external = ["spoof", "session_cookie"] -[database] -# URL for connecting to the database -type = "from_url" -url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" - -[dropshot_external] -# IP address and TCP port on which to listen for the external API -bind_address = "127.0.0.1:12220" -# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one -# rule is ~500 bytes) -request_body_max_bytes = 1048576 - -[dropshot_internal] -# IP address and TCP port on which to listen for the internal API -bind_address = "127.0.0.1:12221" - -[subnet] -net = "fd00:1122:3344:0100::/56" - [log] # Show log messages of this level and more severe level = "info" @@ -55,3 +33,26 @@ mode = "stderr-terminal" # Configuration for interacting with the timeseries database [timeseries_db] address = "[::1]:8123" + +[runtime] +# Identifier for this instance of Nexus +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" + +[runtime.dropshot_external] +# IP address and TCP port on which to listen for the external API +bind_address = "127.0.0.1:12220" +# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one +# rule is ~500 bytes) +request_body_max_bytes = 1048576 + +[runtime.dropshot_internal] +# IP address and TCP port on which to listen for the internal API +bind_address = "127.0.0.1:12221" + +[runtime.subnet] +net = "fd00:1122:3344:0100::/56" + +[runtime.database] +# URL for connecting to the database +type = "from_url" +url = "postgresql://root@127.0.0.1:32221/omicron?sslmode=disable" diff --git a/nexus/src/config.rs b/nexus/src/config.rs index 23e1b0c2785..3ff05208477 100644 --- a/nexus/src/config.rs +++ b/nexus/src/config.rs @@ -69,11 +69,12 @@ pub struct PackageConfig { #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] pub struct Config { - /// A variety of configuration parameters only known at runtime. - #[serde(flatten)] - pub runtime: RuntimeConfig, + /// Configuration parameters known at compile-time. #[serde(flatten)] pub pkg: PackageConfig, + + /// A variety of configuration parameters only known at runtime. + pub runtime: RuntimeConfig, } impl Config { diff --git a/nexus/src/context.rs b/nexus/src/context.rs index afd3682ce7b..2c8e8eaea0b 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -18,7 +18,7 @@ use authn::external::session_cookie::HttpAuthnSessionCookie; use authn::external::spoof::HttpAuthnSpoof; use authn::external::HttpAuthnScheme; use chrono::{DateTime, Duration, Utc}; -use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT}; +use omicron_common::address::{Ipv6Subnet, AZ_PREFIX, COCKROACH_PORT, COCKROACH_DNS_NAME}; use omicron_common::api::external::Error; use omicron_common::nexus_config; use omicron_common::postgres_config::PostgresConfigWithUrl; @@ -140,6 +140,7 @@ impl ServerContext { // Set up DNS Client let az_subnet = Ipv6Subnet::::new(config.runtime.subnet.net().ip()); + info!(log, "Setting up resolver on subnet: {:?}", az_subnet); let resolver = internal_dns_client::multiclient::create_resolver(az_subnet) .map_err(|e| format!("Failed to create DNS resolver: {}", e))?; @@ -148,13 +149,15 @@ impl ServerContext { let url = match &config.runtime.database { nexus_config::Database::FromUrl { url } => url.clone(), nexus_config::Database::FromDns => { + info!(log, "Accessing DB url from DNS"); let response = resolver - .lookup_ip("cockroachdb.") + .lookup_ip(COCKROACH_DNS_NAME) .await .map_err(|e| format!("Failed to lookup IP: {}", e))?; let address = response.iter().next().ok_or_else(|| { "no addresses returned from DNS resolver".to_string() })?; + info!(log, "DB addreess: {}", address); PostgresConfigWithUrl::from_str(&format!( "postgresql://root@[{}]:{}/omicron?sslmode=disable", address, COCKROACH_PORT diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index b9e51037a35..bdabeca2413 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -2,10 +2,6 @@ # Oxide API: configuration file for test suite # -# Identifier for this instance of Nexus. -# NOTE: The test suite always overrides this. -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" - [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "tests/static" @@ -18,43 +14,48 @@ session_absolute_timeout_minutes = 480 schemes_external = [ "spoof", "session_cookie" ] # -# NOTE: for the test suite, the database URL will be replaced with one -# appropriate for the database that's started by the test runner. +# NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel +# string "UNUSED". The actual path will be generated by the test suite for each +# test. # -[database] -type = "from_url" -url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" +[log] +level = "trace" +mode = "file" +if_exists = "fail" +path = "UNUSED" + +# Configuration for interacting with the timeseries database. This is overwritten +# by the test suite once ClickHouse starts, with the actual address on which it +# is listening. +[timeseries_db] +address = "[::1]:0" + +[runtime] +# Identifier for this instance of Nexus. +# NOTE: The test suite always overrides this. +id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" # # NOTE: for the test suite, the port MUST be 0 (in order to bind to any # available port) because the test suite will be running many servers # concurrently. # -[dropshot_external] +[runtime.dropshot_external] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 # port must be 0. see above -[dropshot_internal] +[runtime.dropshot_internal] bind_address = "127.0.0.1:0" request_body_max_bytes = 1048576 -[subnet] +[runtime.subnet] net = "fd00:1122:3344:0100::/56" # -# NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel -# string "UNUSED". The actual path will be generated by the test suite for each -# test. +# NOTE: for the test suite, the database URL will be replaced with one +# appropriate for the database that's started by the test runner. # -[log] -level = "trace" -mode = "file" -if_exists = "fail" -path = "UNUSED" - -# Configuration for interacting with the timeseries database. This is overwritten -# by the test suite once ClickHouse starts, with the actual address on which it -# is listening. -[timeseries_db] -address = "[::1]:0" +[runtime.database] +type = "from_url" +url = "postgresql://root@127.0.0.1:0/omicron?sslmode=disable" diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index d3daf67d67d..d40795d79e3 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -325,7 +325,8 @@ fn do_install( Ok(()) })?; - // Ensure we start from a clean slate - remove all packages. + // Ensure we start from a clean slate - remove all zones & packages. + uninstall_all_omicron_zones()?; uninstall_all_packages(config); // Extract and install the bootstrap service, which itself extracts and diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 503144e0d03..9448059d37d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -323,8 +323,10 @@ impl ServiceManager { // Serialize the configuration and append it into the file. let serialized_cfg = toml::Value::try_from(&runtime_config) .expect("Cannot serialize config"); + let mut map = toml::map::Map::new(); + map.insert("runtime".to_string(), serialized_cfg); let config_str = - toml::to_string(&serialized_cfg).map_err(|err| { + toml::to_string(&map).map_err(|err| { Error::TomlSerialize { path: config_path.clone(), err, diff --git a/smf/nexus/manifest.xml b/smf/nexus/manifest.xml index 0b8da2ff62f..3ff92b2fbac 100644 --- a/smf/nexus/manifest.xml +++ b/smf/nexus/manifest.xml @@ -11,6 +11,14 @@ type='service'> + + + + + + diff --git a/smf/sled-agent/manifest.xml b/smf/sled-agent/manifest.xml index 378b77776c8..96f029d96e0 100644 --- a/smf/sled-agent/manifest.xml +++ b/smf/sled-agent/manifest.xml @@ -28,6 +28,10 @@ type='service'> + + + From 973a1e8dd1b686ab29bd4a536ced95cc0afed696 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 8 Jun 2022 13:22:49 -0400 Subject: [PATCH 61/61] Fix merge conflict --- smf/sled-agent/config-rss.toml | 52 +++------------------------------- 1 file changed, 4 insertions(+), 48 deletions(-) diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 36f54e35e6b..83e0ee5fd40 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -10,52 +10,20 @@ rack_subnet = "fd00:1122:3344:0100::" # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate crucible datasets. -<<<<<<< HEAD # [[request.dataset]] -# zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" # address = "[fd00:1122:3344:0101::6]:32345" # dataset_kind.type = "crucible" # # [[request.dataset]] -# zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" # address = "[fd00:1122:3344:0101::7]:32345" # dataset_kind.type = "crucible" # # [[request.dataset]] -# zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" +# zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" # address = "[fd00:1122:3344:0101::8]:32345" # dataset_kind.type = "crucible" -||||||| 813a8596 -[[request.dataset]] -zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" -======= -[[request.dataset]] -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::6]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -zpool_id = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::7]:32345" -dataset_kind.type = "crucible" - -[[request.dataset]] -zpool_id = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:0101::8]:32345" -dataset_kind.type = "crucible" ->>>>>>> 35521afd3f1e3039f2cb2913272479e6da076d8b [[request.dataset]] zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" @@ -65,22 +33,10 @@ dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. -<<<<<<< HEAD # [[request.dataset]] -# zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" +# zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" # address = "[fd00:1122:3344:0101::5]:8123" # dataset_kind.type = "clickhouse" -||||||| 813a8596 -[[request.dataset]] -zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" -======= -[[request.dataset]] -zpool_id = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:0101::5]:8123" -dataset_kind.type = "clickhouse" ->>>>>>> 35521afd3f1e3039f2cb2913272479e6da076d8b # [[request.service]] # name = "nexus"