From 79fb8f3787674dfc91320029e9b5945dd08ff515 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 22 Mar 2022 19:25:52 -0400 Subject: [PATCH 01/41] Add internal-dns service --- Cargo.lock | 501 ++++++++++++++++++++++++++-- Cargo.toml | 2 + internal-dns-client/Cargo.toml | 12 + internal-dns-client/src/lib.rs | 18 + internal-dns/Cargo.toml | 36 ++ internal-dns/src/bin/apigen.rs | 27 ++ internal-dns/src/bin/dns-server.rs | 54 +++ internal-dns/src/dns_data.rs | 356 ++++++++++++++++++++ internal-dns/src/dns_server.rs | 185 ++++++++++ internal-dns/src/dropshot_server.rs | 73 ++++ internal-dns/src/lib.rs | 47 +++ internal-dns/tests/basic_test.rs | 188 +++++++++++ internal-dns/tests/openapi_test.rs | 27 ++ openapi/internal-dns.json | 237 +++++++++++++ 14 files changed, 1736 insertions(+), 27 deletions(-) create mode 100644 internal-dns-client/Cargo.toml create mode 100644 internal-dns-client/src/lib.rs create mode 100644 internal-dns/Cargo.toml create mode 100644 internal-dns/src/bin/apigen.rs create mode 100644 internal-dns/src/bin/dns-server.rs create mode 100644 internal-dns/src/dns_data.rs create mode 100644 internal-dns/src/dns_server.rs create mode 100644 internal-dns/src/dropshot_server.rs create mode 100644 internal-dns/src/lib.rs create mode 100644 internal-dns/tests/basic_test.rs create mode 100644 internal-dns/tests/openapi_test.rs create mode 100644 openapi/internal-dns.json diff --git a/Cargo.lock b/Cargo.lock index 28d12a1b65a..ff90b29d91e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,6 +41,12 @@ dependencies = [ "syn", ] +[[package]] +name = "arc-swap" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f" + [[package]] name = "array-init" version = "2.0.0" @@ -127,7 +133,7 @@ dependencies = [ "getrandom", "instant", "pin-project", - "rand", + "rand 0.8.5", "tokio", ] @@ -562,7 +568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f83bd3bb4314701c568e340cd8cf78c975aa0ca79e03d3f6d1677d5b0c9c0c03" dependencies = [ "generic-array 0.14.5", - "rand_core", + "rand_core 0.6.3", "subtle", "zeroize", ] @@ -644,6 +650,12 @@ dependencies = [ "syn", ] +[[package]] +name = "data-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" + [[package]] name = "db-macros" version = "0.1.0" @@ -801,7 +813,45 @@ dependencies = [ "base64", "bytes", "chrono", - "dropshot_endpoint", + "dropshot_endpoint 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", + "futures", + "hostname", + "http", + "hyper", + "indexmap", + "openapiv3", + "paste", + "percent-encoding", + "proc-macro2", + "rustls", + "rustls-pemfile", + "schemars", + "serde", + "serde_json", + "serde_urlencoded", + "slog", + "slog-async", + "slog-bunyan", + "slog-json", + "slog-term", + "tokio", + "tokio-rustls", + "toml", + "usdt 0.3.2", + "uuid", +] + +[[package]] +name = "dropshot" +version = "0.6.1-dev" +source = "git+https://github.com/oxidecomputer/dropshot#da1d2db1411e1edbbe0101cc1db855606e8dabfc" +dependencies = [ + "async-stream", + "async-trait", + "base64", + "bytes", + "chrono", + "dropshot_endpoint 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot)", "futures", "hostname", "http", @@ -841,6 +891,18 @@ dependencies = [ "syn", ] +[[package]] +name = "dropshot_endpoint" +version = "0.6.1-dev" +source = "git+https://github.com/oxidecomputer/dropshot#da1d2db1411e1edbbe0101cc1db855606e8dabfc" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "serde_tokenstream", + "syn", +] + [[package]] name = "dtrace-parser" version = "0.1.12" @@ -887,7 +949,7 @@ dependencies = [ "generic-array 0.14.5", "group", "pkcs8", - "rand_core", + "rand_core 0.6.3", "subtle", "zeroize", ] @@ -916,6 +978,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "enum-as-inner" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73" +dependencies = [ + "heck 0.4.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "env_logger" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "expectorate" version = "1.0.4" @@ -954,7 +1047,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0f40b2dcd8bc322217a5f6559ae5f9e9d1de202a2ecee2e9eafcbece7562a4f" dependencies = [ "bitvec", - "rand_core", + "rand_core 0.6.3", "subtle", ] @@ -1040,6 +1133,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da1b8f89c5b5a5b7e59405cfcf0bb9588e5ed19f0b57a4cd542bbba3f164a6d" +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "funty" version = "1.2.0" @@ -1135,6 +1244,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "gateway-client" version = "0.1.0" @@ -1226,7 +1344,7 @@ checksum = "1c363a5301b8f153d80747126a04b3c82073b9fe3130571a9d170cacdeaf7912" dependencies = [ "byteorder", "ff", - "rand_core", + "rand_core 0.6.3", "subtle", ] @@ -1408,6 +1526,12 @@ dependencies = [ "syn", ] +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.17" @@ -1527,6 +1651,61 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "internal-dns" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap 3.1.6", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot)", + "expectorate", + "internal-dns-client", + "omicron-test-utils", + "openapi-lint", + "openapiv3", + "portpicker", + "pretty-hex", + "schemars", + "serde", + "serde_json", + "sled", + "slog", + "slog-async", + "slog-envlogger", + "slog-term", + "structopt", + "subprocess", + "tempdir", + "tokio", + "toml", + "trust-dns-proto", + "trust-dns-resolver", + "trust-dns-server", +] + +[[package]] +name = "internal-dns-client" +version = "0.1.0" +dependencies = [ + "progenitor", + "reqwest", + "serde", + "serde_json", + "slog", +] + +[[package]] +name = "ipconfig" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "723519edce41262b05d4143ceb95050e4c614f483e78e9fd9e39a8275a84ad98" +dependencies = [ + "socket2", + "widestring", + "winapi", + "winreg 0.7.0", +] + [[package]] name = "ipnet" version = "2.4.0" @@ -1612,6 +1791,12 @@ version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + [[package]] name = "lock_api" version = "0.4.6" @@ -1630,6 +1815,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "macaddr" version = "1.0.1" @@ -1817,7 +2011,7 @@ dependencies = [ "anyhow", "bytes", "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "headers", "http", "hyper", @@ -1845,6 +2039,15 @@ dependencies = [ "syn", ] +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1923,7 +2126,7 @@ dependencies = [ "api_identity", "backoff", "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "http", @@ -1932,7 +2135,7 @@ dependencies = [ "macaddr", "parse-display", "progenitor", - "rand", + "rand 0.8.5", "reqwest", "ring", "schemars", @@ -1956,7 +2159,7 @@ name = "omicron-gateway" version = "0.1.0" dependencies = [ "clap 3.1.6", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "gateway-messages", @@ -1997,7 +2200,7 @@ dependencies = [ "db-macros", "diesel", "diesel-dtrace", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "headers", @@ -2026,7 +2229,7 @@ dependencies = [ "oximeter-producer", "parse-display", "pq-sys", - "rand", + "rand 0.8.5", "ref-cast", "regex", "reqwest", @@ -2094,7 +2297,7 @@ dependencies = [ "cfg-if", "chrono", "crucible-agent-client", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "http", @@ -2109,7 +2312,7 @@ dependencies = [ "percent-encoding", "progenitor", "propolis-client", - "rand", + "rand 0.8.5", "reqwest", "schemars", "serde", @@ -2141,7 +2344,7 @@ name = "omicron-test-utils" version = "0.1.0" dependencies = [ "anyhow", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "futures", "libc", @@ -2322,7 +2525,7 @@ dependencies = [ name = "oximeter-collector" version = "0.1.0" dependencies = [ - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "expectorate", "nexus-client", "omicron-common", @@ -2352,7 +2555,7 @@ dependencies = [ "async-trait", "bytes", "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "itertools", "omicron-test-utils", "oximeter", @@ -2376,7 +2579,7 @@ name = "oximeter-instruments" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "futures", "http", "oximeter", @@ -2398,7 +2601,7 @@ name = "oximeter-producer" version = "0.1.0" dependencies = [ "chrono", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "nexus-client", "omicron-common", "oximeter", @@ -2709,6 +2912,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "portpicker" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "postgres-protocol" version = "0.6.3" @@ -2722,7 +2934,7 @@ dependencies = [ "hmac 0.12.1", "md-5", "memchr", - "rand", + "rand 0.8.5", "sha2 0.10.2", "stringprep", ] @@ -2792,6 +3004,12 @@ dependencies = [ "termtree", ] +[[package]] +name = "pretty-hex" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5c99d529f0d30937f6f4b8a86d988047327bb88d04d2c4afc356de74722131" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2906,6 +3124,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.16" @@ -2932,6 +3156,29 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "643f8f41a8ebc4c5dc4515c82bb8abd397b527fc20fd681b7c011c2aee5d44fb" +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.8.5" @@ -2940,7 +3187,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", - "rand_core", + "rand_core 0.6.3", ] [[package]] @@ -2950,9 +3197,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", ] +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.6.3" @@ -2987,6 +3249,15 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "redox_syscall" version = "0.2.11" @@ -3098,7 +3369,17 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "webpki-roots", - "winreg", + "winreg 0.10.1", +] + +[[package]] +name = "resolv-conf" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" +dependencies = [ + "hostname", + "quick-error", ] [[package]] @@ -3557,7 +3838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2807892cfa58e081aa1f1111391c7a0649d4fa127a4ffbe34bcbfb35a1171a4" dependencies = [ "digest 0.9.0", - "rand_core", + "rand_core 0.6.3", ] [[package]] @@ -3572,6 +3853,22 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" +[[package]] +name = "sled" +version = "0.34.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" +dependencies = [ + "crc32fast", + "crossbeam-epoch", + "crossbeam-utils", + "fs2", + "fxhash", + "libc", + "log", + "parking_lot 0.11.2", +] + [[package]] name = "sled-agent-client" version = "0.1.0" @@ -3629,6 +3926,21 @@ dependencies = [ "usdt 0.2.1", ] +[[package]] +name = "slog-envlogger" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "906a1a0bc43fed692df4b82a5e2fbfc3733db8dad8bb514ab27a4f23ad04f5c0" +dependencies = [ + "log", + "regex", + "slog", + "slog-async", + "slog-scope", + "slog-stdlog", + "slog-term", +] + [[package]] name = "slog-json" version = "2.6.0" @@ -3641,6 +3953,28 @@ dependencies = [ "time 0.3.7", ] +[[package]] +name = "slog-scope" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786" +dependencies = [ + "arc-swap", + "lazy_static", + "slog", +] + +[[package]] +name = "slog-stdlog" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8228ab7302adbf4fcb37e66f3cda78003feb521e7fd9e3847ec117a7784d0f5a" +dependencies = [ + "log", + "slog", + "slog-scope", +] + [[package]] name = "slog-term" version = "2.9.0" @@ -3707,7 +4041,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "dropshot", + "dropshot 0.6.1-dev (git+https://github.com/oxidecomputer/dropshot?branch=main)", "gateway-messages", "hex", "omicron-common", @@ -3726,7 +4060,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/spdm?rev=9742f6e#9742f6eae7b86cc8bc8bc2fb0feeb44f770a1fb6" dependencies = [ "bitflags", - "rand", + "rand 0.8.5", "ring", "webpki", ] @@ -3923,6 +4257,16 @@ dependencies = [ "xattr", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" version = "3.3.0" @@ -4294,6 +4638,94 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "trust-dns-client" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3be5f2ead860f0d3aabc01433bc6fff0fe5e86bfbe2dd16e32b9c79959310ad" +dependencies = [ + "cfg-if", + "data-encoding", + "futures-channel", + "futures-util", + "lazy_static", + "log", + "radix_trie", + "rand 0.8.5", + "thiserror", + "time 0.3.7", + "tokio", + "trust-dns-proto", +] + +[[package]] +name = "trust-dns-proto" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2861b3ed517888174d13909e675c4e94b3291867512068be59d76533e4d1270c" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "lazy_static", + "log", + "rand 0.8.5", + "smallvec", + "thiserror", + "tinyvec", + "tokio", + "url", +] + +[[package]] +name = "trust-dns-resolver" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9e737a252a617bd4774649e245dbf705e207275db0893b9fa824d49f074fc1c" +dependencies = [ + "cfg-if", + "futures-util", + "ipconfig", + "lazy_static", + "log", + "lru-cache", + "parking_lot 0.12.0", + "resolv-conf", + "smallvec", + "thiserror", + "tokio", + "trust-dns-proto", +] + +[[package]] +name = "trust-dns-server" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4058838790565ba870cb800008c7b3b8a3f154afaece824ad9a91a80a4b81dfb" +dependencies = [ + "async-trait", + "bytes", + "cfg-if", + "enum-as-inner", + "env_logger", + "futures-executor", + "futures-util", + "log", + "serde", + "thiserror", + "time 0.3.7", + "tokio", + "toml", + "trust-dns-client", + "trust-dns-proto", +] + [[package]] name = "try-lock" version = "0.2.3" @@ -4599,7 +5031,7 @@ dependencies = [ "ff", "group", "rand_chacha", - "rand_core", + "rand_core 0.6.3", "serde", "serde-big-array", "serde_cbor", @@ -4735,6 +5167,12 @@ dependencies = [ "webpki", ] +[[package]] +name = "widestring" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" + [[package]] name = "winapi" version = "0.3.9" @@ -4809,6 +5247,15 @@ version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" +[[package]] +name = "winreg" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +dependencies = [ + "winapi", +] + [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index 6b68be368cd..d2be41fde94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,8 @@ members = [ "gateway", "gateway-client", "gateway-messages", + "internal-dns", + "internal-dns-client", "nexus", "nexus/src/db/db-macros", "nexus/test-utils", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml new file mode 100644 index 00000000000..af67e13d716 --- /dev/null +++ b/internal-dns-client/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "internal-dns-client" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +progenitor = { git = "https://github.com/oxidecomputer/progenitor" } +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" +slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/lib.rs b/internal-dns-client/src/lib.rs new file mode 100644 index 00000000000..49daa3d58ae --- /dev/null +++ b/internal-dns-client/src/lib.rs @@ -0,0 +1,18 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +progenitor::generate_api!( + spec = "../openapi/internal-dns.json", + inner_type = slog::Logger, + pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { + slog::debug!(log, "client request"; + "method" => %request.method(), + "uri" => %request.url(), + "body" => ?&request.body(), + ); + }), + post_hook = (|log: &slog::Logger, result: &Result<_, _>| { + slog::debug!(log, "client response"; "result" => ?result); + }), +); diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml new file mode 100644 index 00000000000..6abe17b75f0 --- /dev/null +++ b/internal-dns/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "internal-dns" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow = "1.0" +clap = { version = "3.1", features = [ "derive" ] } +dropshot = { git = "https://github.com/oxidecomputer/dropshot" } +pretty-hex = "0.2.1" +schemars = "0.8" +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" +sled = "0.34" +slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +slog-term = "2.7" +slog-async = "2.7" +slog-envlogger = "2.2" +structopt = "0.3" +tempdir = "0.3" +tokio = { version = "1.17", features = [ "full" ] } +toml = "0.5" +trust-dns-proto = "0.21" +trust-dns-server = "0.21" + +[dev-dependencies] +expectorate = "1.0.4" +internal-dns-client = { path = "../internal-dns-client" } +omicron-test-utils = { path = "../test-utils" } +openapiv3 = "1.0" +openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } +portpicker = "0.1" +serde_json = "1.0" +subprocess = "0.2.8" +trust-dns-resolver = "0.21" diff --git a/internal-dns/src/bin/apigen.rs b/internal-dns/src/bin/apigen.rs new file mode 100644 index 00000000000..6f21201e4b0 --- /dev/null +++ b/internal-dns/src/bin/apigen.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Result}; +use std::fs::File; +use std::io; +use internal_dns::dropshot_server::api; + +fn usage(args: &Vec) -> String { + format!("{} [output path]", args[0]) +} + +fn main() -> Result<()> { + let args: Vec = std::env::args().collect(); + + let mut out = match args.len() { + 1 => Box::new(io::stdout()) as Box, + 2 => Box::new(File::create(args[1].clone())?) as Box, + _ => bail!(usage(&args)), + }; + + let api = api(); + let openapi = api.openapi("Internal DNS", "v0.1.0"); + openapi.write(&mut out)?; + Ok(()) +} diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs new file mode 100644 index 00000000000..12d4b4458f0 --- /dev/null +++ b/internal-dns/src/bin/dns-server.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See RFD 248 +// See https://github.com/oxidecomputer/omicron/issues/718 +// +// Milestones: +// - Dropshot server +// - Sqlite task +// - DNS task + +use anyhow::anyhow; +use anyhow::Context; +use clap::Parser; +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Parser, Debug)] +struct Args { + #[clap(long)] + config_file: PathBuf, +} + +#[tokio::main] +async fn main() -> Result<(), anyhow::Error> { + let args = Args::parse(); + let config_file = &args.config_file; + let config_file_contents = std::fs::read_to_string(config_file) + .with_context(|| format!("read config file {:?}", config_file))?; + let config: internal_dns::Config = toml::from_str(&config_file_contents) + .with_context(|| format!("parse config file {:?}", config_file))?; + eprintln!("{:?}", config); + + let log = + config.log.to_logger("internal-dns").context("failed to create logger")?; + + let db = Arc::new(sled::open(&config.data.storage_path)?); + + { + let db = db.clone(); + let log = log.clone(); + let config = config.dns.clone(); + + tokio::spawn( + async move { internal_dns::dns_server::run(log, db, config).await }, + ); + } + + let server = internal_dns::start_server(config, log, db).await?; + server + .await + .map_err(|error_message| anyhow!("server exiting: {}", error_message)) +} diff --git a/internal-dns/src/dns_data.rs b/internal-dns/src/dns_data.rs new file mode 100644 index 00000000000..0ddc2978365 --- /dev/null +++ b/internal-dns/src/dns_data.rs @@ -0,0 +1,356 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manages DNS data (configured zone(s), records, etc.) + +use anyhow::Context; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use slog::{error, info, o, trace}; +use std::net::Ipv6Addr; +use std::sync::Arc; + +/// Configuration related to data model +#[derive(Deserialize, Debug)] +pub struct Config { + /// maximum number of channel messages to buffer + pub nmax_messages: usize, + + /// The path for the embedded kv store + pub storage_path: String, +} + +/// default maximum number of messages to buffer +const NMAX_MESSAGES_DEFAULT: usize = 16; + +impl Default for Config { + fn default() -> Self { + Config { + nmax_messages: NMAX_MESSAGES_DEFAULT, + storage_path: ".".into(), + } + } +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +#[serde(rename = "Srv")] +pub struct SRV { + pub prio: u16, + pub weight: u16, + pub port: u16, + pub target: String, +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub enum DnsRecord { + AAAA(Ipv6Addr), + SRV(SRV), +} +#[derive(Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +pub struct DnsRecordKey { + name: String, +} +#[derive(Debug)] +pub struct DnsResponse { + tx: tokio::sync::oneshot::Sender, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename = "DnsKv")] +pub struct DnsKV { + key: DnsRecordKey, + record: DnsRecord, +} + +// XXX some refactors to help +// - each variant should have its own struct containing the data. This way we +// can pass it to functions as a bundle without them having to consume the +// whole enum (which might in principle be a different variant) +// - each variant's data should include some generic responder so that we can +// have common functions for logging and sending the T +#[derive(Debug)] +pub enum DnsCmd { + // XXX + // MakeExist(DnsRecord, DnsResponse<()>), + // MakeGone(DnsRecordKey, DnsResponse<()>), + Get(Option, DnsResponse>), + Set(Vec, DnsResponse<()>), + Delete(Vec, DnsResponse<()>), +} + +/// Data model client +/// +/// The Dropshot server has one of these to send commands to modify and update +/// the data model. +pub struct Client { + log: slog::Logger, + sender: tokio::sync::mpsc::Sender, +} + +impl Client { + pub fn new( + log: slog::Logger, + config: &Config, + db: Arc, + ) -> Client { + let (sender, receiver) = + tokio::sync::mpsc::channel(config.nmax_messages); + let server = Server { + log: log.new(o!("component" => "DataServer")), + receiver, + db, + }; + tokio::spawn(async move { data_server(server).await }); + Client { log, sender } + } + + // XXX error type needs to be rich enough for appropriate HTTP response + pub async fn get_records( + &self, + key: Option, + ) -> Result, anyhow::Error> { + slog::trace!(&self.log, "get_records"; "key" => ?key); + let (tx, rx) = tokio::sync::oneshot::channel(); + self.sender + .try_send(DnsCmd::Get(key, DnsResponse { tx })) + .context("send message")?; + rx.await.context("recv response") + } + + // XXX error type needs to be rich enough for appropriate HTTP response + pub async fn set_records( + &self, + records: Vec, + ) -> Result<(), anyhow::Error> { + slog::trace!(&self.log, "set_records"; "records" => ?records); + let (tx, rx) = tokio::sync::oneshot::channel(); + self.sender + .try_send(DnsCmd::Set(records, DnsResponse { tx })) + .context("send message")?; + rx.await.context("recv response") + } + + // XXX error type needs to be rich enough for appropriate HTTP response + pub async fn delete_records( + &self, + records: Vec, + ) -> Result<(), anyhow::Error> { + slog::trace!(&self.log, "delete_records"; "records" => ?records); + let (tx, rx) = tokio::sync::oneshot::channel(); + self.sender + .try_send(DnsCmd::Delete(records, DnsResponse { tx })) + .context("send message")?; + rx.await.context("recv response") + } +} + +/// Runs the body of the data model server event loop +async fn data_server(mut server: Server) { + let log = &server.log; + loop { + trace!(log, "waiting for message"); + let msg = match server.receiver.recv().await { + None => { + info!(log, "exiting due to channel close"); + break; + } + Some(m) => m, + }; + + trace!(log, "rx message"; "message" => ?msg); + match msg { + DnsCmd::Get(key, response) => { + server.cmd_get_records(key, response).await; + } + DnsCmd::Set(records, response) => { + server.cmd_set_records(records, response).await; + } + DnsCmd::Delete(records, response) => { + server.cmd_delete_records(records, response).await; + } + } + } +} + +/// Data model server +pub struct Server { + log: slog::Logger, + receiver: tokio::sync::mpsc::Receiver, + db: Arc, +} + +impl Server { + async fn cmd_get_records( + &self, + key: Option, + response: DnsResponse>, + ) { + // If a key is provided search just for that key. Otherwise return all + // the db entries. + if let Some(key) = key { + let bits = match self.db.get(key.name.as_bytes()) { + Ok(Some(bits)) => bits, + _ => { + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + }; + let record: DnsRecord = match serde_json::from_slice(bits.as_ref()) + { + Ok(r) => r, + Err(e) => { + error!(self.log, "deserialize record: {}", e); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + }; + match response.tx.send(vec![DnsKV { key, record }]) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } else { + let mut result = Vec::new(); + let mut iter = self.db.iter(); + loop { + match iter.next() { + Some(Ok((k, v))) => { + let record: DnsRecord = + match serde_json::from_slice(v.as_ref()) { + Ok(r) => r, + Err(e) => { + error!( + self.log, + "deserialize record: {}", e + ); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!( + self.log, + "response tx: {:?}", e + ); + } + } + return; + } + }; + let key = match std::str::from_utf8(k.as_ref()) { + Ok(s) => s.to_string(), + Err(e) => { + error!(self.log, "key encoding: {}", e); + match response.tx.send(Vec::new()) { + Ok(_) => {} + Err(e) => { + error!( + self.log, + "response tx: {:?}", e + ); + } + } + return; + } + }; + result.push(DnsKV { + key: DnsRecordKey { name: key }, + record, + }); + } + Some(Err(e)) => { + error!(self.log, "db iteration error: {}", e); + break; + } + None => break, + } + } + match response.tx.send(result) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } + } + + async fn cmd_set_records( + &self, + records: Vec, + response: DnsResponse<()>, + ) { + for kv in records { + let bits = match serde_json::to_string(&kv.record) { + Ok(bits) => bits, + Err(e) => { + error!(self.log, "serialize record: {}", e); + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + }; + match self.db.insert(kv.key.name.as_bytes(), bits.as_bytes()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "db insert: {}", e); + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + } + } + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } + + async fn cmd_delete_records( + &self, + records: Vec, + response: DnsResponse<()>, + ) { + for k in records { + match self.db.remove(k.name.as_bytes()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "db delete: {}", e); + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + return; + } + } + } + match response.tx.send(()) { + Ok(_) => {} + Err(e) => { + error!(self.log, "response tx: {:?}", e); + } + } + } +} diff --git a/internal-dns/src/dns_server.rs b/internal-dns/src/dns_server.rs new file mode 100644 index 00000000000..f6f5ed5209f --- /dev/null +++ b/internal-dns/src/dns_server.rs @@ -0,0 +1,185 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::io::Result; +use std::net::SocketAddr; +use std::str::FromStr; +use std::sync::Arc; + +use crate::dns_data::DnsRecord; +use pretty_hex::*; +use serde::Deserialize; +use slog::{error, Logger}; +use tokio::net::UdpSocket; +use trust_dns_proto::op::header::Header; +use trust_dns_proto::rr::rdata::SRV; +use trust_dns_proto::rr::record_data::RData; +use trust_dns_proto::rr::record_type::RecordType; +use trust_dns_proto::rr::{Name, Record}; +use trust_dns_proto::serialize::binary::{ + BinDecodable, BinDecoder, BinEncoder, +}; +use trust_dns_server::authority::{MessageRequest, MessageResponseBuilder}; + +/// Configuration related to the DNS server +#[derive(Deserialize, Debug, Clone)] +pub struct Config { + /// The address to listen for DNS requests on + pub bind_address: String, +} + +pub async fn run(log: Logger, db: Arc, config: Config) -> Result<()> { + let socket = Arc::new(UdpSocket::bind(config.bind_address).await?); + + loop { + let mut buf = vec![0u8; 16384]; + let (n, src) = socket.recv_from(&mut buf).await?; + buf.resize(n, 0); + + let socket = socket.clone(); + let log = log.clone(); + let db = db.clone(); + + tokio::spawn( + async move { handle_req(log, db, socket, src, buf).await }, + ); + } +} + +async fn handle_req<'a, 'b, 'c>( + log: Logger, + db: Arc, + socket: Arc, + src: SocketAddr, + buf: Vec, +) { + println!("{:?}", buf.hex_dump()); + + let mut dec = BinDecoder::new(&buf); + let mr = match MessageRequest::read(&mut dec) { + Ok(mr) => mr, + Err(e) => { + error!(log, "read message: {}", e); + return; + } + }; + + println!("{:#?}", mr); + + let rb = MessageResponseBuilder::from_message_request(&mr); + let header = Header::response_from_request(mr.header()); + + let name = mr.query().original().name().clone(); + let key = name.to_string(); + let key = key.trim_end_matches('.'); + + let bits = match db.get(key.as_bytes()) { + Ok(Some(bits)) => bits, + Err(e) => { + error!(log, "db get: {}", e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + _ => { + nack(&log, &mr, &socket, &header, &src).await; + return; + } + }; + + let record: crate::dns_data::DnsRecord = + match serde_json::from_slice(bits.as_ref()) { + Ok(r) => r, + Err(e) => { + error!(log, "deserialize record: {}", e); + return; + } + }; + + match record { + DnsRecord::AAAA(addr) => { + let mut aaaa = Record::new(); + aaaa.set_name(name) + .set_rr_type(RecordType::AAAA) + .set_data(Some(RData::AAAA(addr))); + + let mresp = rb.build(header, vec![&aaaa], vec![], vec![], vec![]); + + let mut resp_data = Vec::new(); + let mut enc = BinEncoder::new(&mut resp_data); + match mresp.destructive_emit(&mut enc) { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + } + match socket.send_to(&resp_data, &src).await { + Ok(_) => {} + Err(e) => { + error!(log, "send: {}", e); + } + } + } + DnsRecord::SRV(crate::dns_data::SRV { prio, weight, port, target }) => { + let mut srv = Record::new(); + let tgt = match Name::from_str(&target) { + Ok(tgt) => tgt, + Err(e) => { + error!(log, "srv target: '{}' {}", target, e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + }; + srv.set_name(name) + .set_rr_type(RecordType::SRV) + .set_data(Some(RData::SRV(SRV::new(prio, weight, port, tgt)))); + + let mresp = rb.build(header, vec![&srv], vec![], vec![], vec![]); + + let mut resp_data = Vec::new(); + let mut enc = BinEncoder::new(&mut resp_data); + match mresp.destructive_emit(&mut enc) { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + nack(&log, &mr, &socket, &header, &src).await; + return; + } + } + match socket.send_to(&resp_data, &src).await { + Ok(_) => {} + Err(e) => { + error!(log, "send: {}", e); + } + } + } + }; +} + +async fn nack( + log: &Logger, + mr: &MessageRequest, + socket: &UdpSocket, + header: &Header, + src: &SocketAddr, +) { + let rb = MessageResponseBuilder::from_message_request(mr); + let mresp = rb.build_no_records(*header); + let mut resp_data = Vec::new(); + let mut enc = BinEncoder::new(&mut resp_data); + match mresp.destructive_emit(&mut enc) { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + return; + } + } + match socket.send_to(&resp_data, &src).await { + Ok(_) => {} + Err(e) => { + error!(log, "destructive emit: {}", e); + } + } +} diff --git a/internal-dns/src/dropshot_server.rs b/internal-dns/src/dropshot_server.rs new file mode 100644 index 00000000000..51d40e5053e --- /dev/null +++ b/internal-dns/src/dropshot_server.rs @@ -0,0 +1,73 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Dropshot server for configuring DNS namespace + +use crate::dns_data::{self, DnsKV, DnsRecordKey}; +use dropshot::endpoint; +use std::sync::Arc; + +pub struct Context { + client: dns_data::Client, +} + +impl Context { + pub fn new(client: dns_data::Client) -> Context { + Context { client } + } +} + +pub fn api() -> dropshot::ApiDescription> { + let mut api = dropshot::ApiDescription::new(); + + api.register(dns_records_get).expect("register dns_records_get"); + api.register(dns_records_set).expect("register dns_records_set"); + api.register(dns_records_delete).expect("register dns_records_delete"); + api +} + +#[endpoint( + method = GET, + path = "/get-records", +)] +async fn dns_records_get( + rqctx: Arc>>, +) -> Result>, dropshot::HttpError> { + let apictx = rqctx.context(); + // XXX record key + let records = apictx.client.get_records(None).await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!("uh oh: {:?}", e)) + })?; + Ok(dropshot::HttpResponseOk(records)) +} + +#[endpoint( + method = PUT, + path = "/set-records", +)] +async fn dns_records_set( + rqctx: Arc>>, + rq: dropshot::TypedBody>, +) -> Result, dropshot::HttpError> { + let apictx = rqctx.context(); + apictx.client.set_records(rq.into_inner()).await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!("uh oh: {:?}", e)) + })?; + Ok(dropshot::HttpResponseOk(())) +} + +#[endpoint( + method = PUT, + path = "/delete-records", +)] +async fn dns_records_delete( + rqctx: Arc>>, + rq: dropshot::TypedBody>, +) -> Result, dropshot::HttpError> { + let apictx = rqctx.context(); + apictx.client.delete_records(rq.into_inner()).await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!("uh oh: {:?}", e)) + })?; + Ok(dropshot::HttpResponseOk(())) +} diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs new file mode 100644 index 00000000000..d94684d75e5 --- /dev/null +++ b/internal-dns/src/lib.rs @@ -0,0 +1,47 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(clippy::type_complexity)] +#![allow(clippy::ptr_arg)] + +use anyhow::anyhow; +use serde::Deserialize; +use std::sync::Arc; + +pub mod dns_data; +pub mod dns_server; +pub mod dropshot_server; + +#[derive(Deserialize, Debug)] +pub struct Config { + pub log: dropshot::ConfigLogging, + pub dropshot: dropshot::ConfigDropshot, + pub data: dns_data::Config, + pub dns: dns_server::Config, +} + +pub async fn start_server( + config: Config, + log: slog::Logger, + db: Arc, +) -> Result>, anyhow::Error> +{ + let data_client = dns_data::Client::new( + log.new(slog::o!("component" => "DataClient")), + &config.data, + db, + ); + + let api = dropshot_server::api(); + let api_context = Arc::new(dropshot_server::Context::new(data_client)); + + Ok(dropshot::HttpServerStarter::new( + &config.dropshot, + api, + api_context, + &log, + ) + .map_err(|e| anyhow!("{}", e))? + .start()) +} diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs new file mode 100644 index 00000000000..0363a696e6f --- /dev/null +++ b/internal-dns/tests/basic_test.rs @@ -0,0 +1,188 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use std::net::Ipv6Addr; +use internal_dns_client::{ + types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, + Client, +}; +use trust_dns_resolver::config::{ + NameServerConfig, Protocol, ResolverConfig, ResolverOpts, +}; +use trust_dns_resolver::TokioAsyncResolver; + +#[tokio::test] +pub async fn aaaa_crud() -> Result<(), anyhow::Error> { + let (client, resolver) = init_client_server().await?; + + // records should initially be empty + let records = client.dns_records_get().await?; + assert!(records.is_empty()); + + // add an aaaa record + let name = DnsRecordKey { name: "devron.system".into() }; + let addr = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1); + let aaaa = DnsRecord::Aaaa(addr); + client + .dns_records_set(&vec![DnsKv { + key: name.clone(), + record: aaaa.clone(), + }]) + .await?; + + // read back the aaaa record + let records = client.dns_records_get().await?; + assert_eq!(1, records.len()); + assert_eq!(records[0].key.name, name.name); + match records[0].record { + DnsRecord::Aaaa(ra) => { + assert_eq!(ra, addr); + } + _ => { + panic!("expected aaaa record") + } + } + + // resolve the name + let response = resolver.lookup_ip(name.name + ".").await?; + let address = response.iter().next().expect("no addresses returned!"); + assert_eq!(address, addr); + + Ok(()) +} + +#[tokio::test] +pub async fn srv_crud() -> Result<(), anyhow::Error> { + let (client, resolver) = init_client_server().await?; + + // records should initially be empty + let records = client.dns_records_get().await?; + assert!(records.is_empty()); + + // add a srv record + let name = DnsRecordKey { name: "hromi.cluster".into() }; + let srv = + Srv { prio: 47, weight: 74, port: 99, target: "outpost47".into() }; + let rec = DnsRecord::Srv(srv.clone()); + client + .dns_records_set(&vec![DnsKv { + key: name.clone(), + record: rec.clone(), + }]) + .await?; + + // read back the srv record + let records = client.dns_records_get().await?; + assert_eq!(1, records.len()); + assert_eq!(records[0].key.name, name.name); + match records[0].record { + DnsRecord::Srv(ref rs) => { + assert_eq!(rs.prio, srv.prio); + assert_eq!(rs.weight, srv.weight); + assert_eq!(rs.port, srv.port); + assert_eq!(rs.target, srv.target); + } + _ => { + panic!("expected srv record") + } + } + + // resolve the srv + let response = resolver.srv_lookup(name.name).await?; + let srvr = response.iter().next().expect("no addresses returned!"); + assert_eq!(srvr.priority(), srv.prio); + assert_eq!(srvr.weight(), srv.weight); + assert_eq!(srvr.port(), srv.port); + assert_eq!(srvr.target().to_string(), srv.target + "."); + + Ok(()) +} + +async fn init_client_server( +) -> Result<(Client, TokioAsyncResolver), anyhow::Error> { + // initialize dns server config + let (config, dropshot_port, dns_port) = test_config()?; + let log = + config.log.to_logger("internal-dns").context("failed to create logger")?; + + // initialize dns server db + let db = Arc::new(sled::open(&config.data.storage_path)?); + db.clear()?; + + let client = Client::new( + &format!("http://127.0.0.1:{}", dropshot_port), + log.clone(), + ); + + let mut rc = ResolverConfig::new(); + rc.add_name_server(NameServerConfig { + socket_addr: SocketAddr::V4(SocketAddrV4::new( + Ipv4Addr::new(127, 0, 0, 1), + dns_port, + )), + protocol: Protocol::Udp, + tls_dns_name: None, + trust_nx_responses: false, + bind_addr: None, + }); + + let resolver = + TokioAsyncResolver::tokio(rc, ResolverOpts::default()).unwrap(); + + // launch a dns server + { + let db = db.clone(); + let log = log.clone(); + let config = config.dns.clone(); + + tokio::spawn( + async move { internal_dns::dns_server::run(log, db, config).await }, + ); + } + + // launch a dropshot server + tokio::spawn(async move { + let server = internal_dns::start_server(config, log, db).await?; + server.await.map_err(|error_message| { + anyhow!("server exiting: {}", error_message) + }) + }); + + // wait for server to start + tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; + + Ok((client, resolver)) +} + +fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { + let dropshot_port = portpicker::pick_unused_port().expect("pick port"); + let dns_port = portpicker::pick_unused_port().expect("pick port"); + let tmp_dir = tempdir::TempDir::new("internal-dns-test")?; + let mut storage_path = tmp_dir.path().to_path_buf(); + storage_path.push("test"); + let storage_path = storage_path.to_str().unwrap().into(); + + let config = internal_dns::Config { + log: dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + dropshot: dropshot::ConfigDropshot { + bind_address: format!("127.0.0.1:{}", dropshot_port) + .parse() + .unwrap(), + request_body_max_bytes: 1024, + ..Default::default() + }, + data: internal_dns::dns_data::Config { nmax_messages: 16, storage_path }, + dns: internal_dns::dns_server::Config { + bind_address: format!("127.0.0.1:{}", dns_port).parse().unwrap(), + }, + }; + + Ok((config, dropshot_port, dns_port)) +} diff --git a/internal-dns/tests/openapi_test.rs b/internal-dns/tests/openapi_test.rs new file mode 100644 index 00000000000..3d6e6d56386 --- /dev/null +++ b/internal-dns/tests/openapi_test.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use expectorate::assert_contents; +use subprocess::Exec; +use omicron_test_utils::dev::test_cmds::assert_exit_code; +use omicron_test_utils::dev::test_cmds::path_to_executable; +use omicron_test_utils::dev::test_cmds::run_command; +use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; +use openapiv3::OpenAPI; + +const CMD_API_GEN: &str = env!("CARGO_BIN_EXE_apigen"); + +#[test] +fn test_internal_dns_openapi() { + let exec = Exec::cmd(path_to_executable(CMD_API_GEN)); + let (exit_status, stdout, _) = run_command(exec); + assert_exit_code(exit_status, EXIT_SUCCESS); + + let spec: OpenAPI = serde_json::from_str(&stdout) + .expect("stdout was not valid OpenAPI"); + let errors = openapi_lint::validate(&spec); + assert!(errors.is_empty(), "{}", errors.join("\n\n")); + + assert_contents("../openapi/internal-dns.json", &stdout); +} diff --git a/openapi/internal-dns.json b/openapi/internal-dns.json new file mode 100644 index 00000000000..708983bd9cd --- /dev/null +++ b/openapi/internal-dns.json @@ -0,0 +1,237 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Internal DNS", + "version": "v0.1.0" + }, + "paths": { + "/delete-records": { + "put": { + "operationId": "dns_records_delete", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_DnsRecordKey", + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsRecordKey" + } + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Null", + "type": "string", + "enum": [ + null + ] + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/get-records": { + "get": { + "operationId": "dns_records_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_DnsKv", + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsKv" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/set-records": { + "put": { + "operationId": "dns_records_set", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_DnsKv", + "type": "array", + "items": { + "$ref": "#/components/schemas/DnsKv" + } + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Null", + "type": "string", + "enum": [ + null + ] + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + }, + "schemas": { + "DnsKv": { + "type": "object", + "properties": { + "key": { + "$ref": "#/components/schemas/DnsRecordKey" + }, + "record": { + "$ref": "#/components/schemas/DnsRecord" + } + }, + "required": [ + "key", + "record" + ] + }, + "DnsRecord": { + "oneOf": [ + { + "type": "object", + "properties": { + "AAAA": { + "type": "string", + "format": "ipv6" + } + }, + "required": [ + "AAAA" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "SRV": { + "$ref": "#/components/schemas/Srv" + } + }, + "required": [ + "SRV" + ], + "additionalProperties": false + } + ] + }, + "DnsRecordKey": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + }, + "Srv": { + "type": "object", + "properties": { + "port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "prio": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "target": { + "type": "string" + }, + "weight": { + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "port", + "prio", + "target", + "weight" + ] + } + } + } +} \ No newline at end of file From 1a15d6c5a67a96c1ac8869b81d570b43b402f9c2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 22 Mar 2022 19:26:31 -0400 Subject: [PATCH 02/41] fmt --- internal-dns/src/bin/apigen.rs | 2 +- internal-dns/src/bin/dns-server.rs | 12 +++++++----- internal-dns/tests/basic_test.rs | 19 ++++++++++++------- internal-dns/tests/openapi_test.rs | 6 +++--- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/internal-dns/src/bin/apigen.rs b/internal-dns/src/bin/apigen.rs index 6f21201e4b0..095291c9571 100644 --- a/internal-dns/src/bin/apigen.rs +++ b/internal-dns/src/bin/apigen.rs @@ -3,9 +3,9 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use anyhow::{bail, Result}; +use internal_dns::dropshot_server::api; use std::fs::File; use std::io; -use internal_dns::dropshot_server::api; fn usage(args: &Vec) -> String { format!("{} [output path]", args[0]) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 12d4b4458f0..505a42a7dc0 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -32,8 +32,10 @@ async fn main() -> Result<(), anyhow::Error> { .with_context(|| format!("parse config file {:?}", config_file))?; eprintln!("{:?}", config); - let log = - config.log.to_logger("internal-dns").context("failed to create logger")?; + let log = config + .log + .to_logger("internal-dns") + .context("failed to create logger")?; let db = Arc::new(sled::open(&config.data.storage_path)?); @@ -42,9 +44,9 @@ async fn main() -> Result<(), anyhow::Error> { let log = log.clone(); let config = config.dns.clone(); - tokio::spawn( - async move { internal_dns::dns_server::run(log, db, config).await }, - ); + tokio::spawn(async move { + internal_dns::dns_server::run(log, db, config).await + }); } let server = internal_dns::start_server(config, log, db).await?; diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 0363a696e6f..24e5b11744e 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -6,11 +6,11 @@ use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; -use std::net::Ipv6Addr; use internal_dns_client::{ types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, Client, }; +use std::net::Ipv6Addr; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; @@ -107,8 +107,10 @@ async fn init_client_server( ) -> Result<(Client, TokioAsyncResolver), anyhow::Error> { // initialize dns server config let (config, dropshot_port, dns_port) = test_config()?; - let log = - config.log.to_logger("internal-dns").context("failed to create logger")?; + let log = config + .log + .to_logger("internal-dns") + .context("failed to create logger")?; // initialize dns server db let db = Arc::new(sled::open(&config.data.storage_path)?); @@ -140,9 +142,9 @@ async fn init_client_server( let log = log.clone(); let config = config.dns.clone(); - tokio::spawn( - async move { internal_dns::dns_server::run(log, db, config).await }, - ); + tokio::spawn(async move { + internal_dns::dns_server::run(log, db, config).await + }); } // launch a dropshot server @@ -178,7 +180,10 @@ fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { request_body_max_bytes: 1024, ..Default::default() }, - data: internal_dns::dns_data::Config { nmax_messages: 16, storage_path }, + data: internal_dns::dns_data::Config { + nmax_messages: 16, + storage_path, + }, dns: internal_dns::dns_server::Config { bind_address: format!("127.0.0.1:{}", dns_port).parse().unwrap(), }, diff --git a/internal-dns/tests/openapi_test.rs b/internal-dns/tests/openapi_test.rs index 3d6e6d56386..cf4cd7ff83f 100644 --- a/internal-dns/tests/openapi_test.rs +++ b/internal-dns/tests/openapi_test.rs @@ -3,12 +3,12 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use expectorate::assert_contents; -use subprocess::Exec; use omicron_test_utils::dev::test_cmds::assert_exit_code; use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use openapiv3::OpenAPI; +use subprocess::Exec; const CMD_API_GEN: &str = env!("CARGO_BIN_EXE_apigen"); @@ -18,8 +18,8 @@ fn test_internal_dns_openapi() { let (exit_status, stdout, _) = run_command(exec); assert_exit_code(exit_status, EXIT_SUCCESS); - let spec: OpenAPI = serde_json::from_str(&stdout) - .expect("stdout was not valid OpenAPI"); + let spec: OpenAPI = + serde_json::from_str(&stdout).expect("stdout was not valid OpenAPI"); let errors = openapi_lint::validate(&spec); assert!(errors.is_empty(), "{}", errors.join("\n\n")); From 4faef91d1d8dc44a08b855878978d880304b9595 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 23 Mar 2022 16:17:40 -0400 Subject: [PATCH 03/41] wip --- package-manifest.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/package-manifest.toml b/package-manifest.toml index b16eaf4100f..8aa6c9cd253 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -55,6 +55,15 @@ to = "/opt/oxide/cockroachdb/sql" from = "smf/cockroachdb" to = "/var/svc/manifest/site/cockroachdb" +[package.internal-dns] +rust.binary_names = ["dns-server"] +rust.release = true +service_name = "internal-dns" +zone = true +[[package.internal-dns.paths]] +from = "smf/internal-dns" +to = "/var/svc/manifest/site/internal-dns" + # Packages not built within Omicron, but which must be imported. # Refer to From 8f373bd71f966f6d3218370646dbd91dbf4fc006 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 24 Mar 2022 11:02:14 -0400 Subject: [PATCH 04/41] Added dnsadm --- Cargo.lock | 7 ++ Cargo.toml | 2 + internal-dns-client/Cargo.toml | 7 ++ internal-dns-client/src/bin/dnsadm.rs | 117 ++++++++++++++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 internal-dns-client/src/bin/dnsadm.rs diff --git a/Cargo.lock b/Cargo.lock index 7f2568b5357..67c3d4ef061 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1701,11 +1701,18 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ + "anyhow", + "clap 3.1.6", "progenitor", "reqwest", "serde", "serde_json", "slog", + "slog-async", + "slog-envlogger", + "slog-term", + "structopt", + "tokio", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4dd7b1dbff3..9e8055be407 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,8 @@ default-members = [ "gateway", "gateway-client", "gateway-messages", + "internal-dns", + "internal-dns-client", "nexus", "nexus/src/db/db-macros", "package", diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index af67e13d716..70af97d5ac4 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,8 +5,15 @@ edition = "2021" license = "MPL-2.0" [dependencies] +anyhow = "1.0" +clap = { version = "3.1", features = [ "derive" ] } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } +slog-term = "2.7" +slog-async = "2.7" +slog-envlogger = "2.2" +structopt = "0.3" +tokio = { version = "1.17", features = [ "full" ] } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns-client/src/bin/dnsadm.rs b/internal-dns-client/src/bin/dnsadm.rs new file mode 100644 index 00000000000..1c2d9a876fa --- /dev/null +++ b/internal-dns-client/src/bin/dnsadm.rs @@ -0,0 +1,117 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Result; +use internal_dns_client::{ + types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, + Client, +}; +use slog::{Drain, Logger}; +use std::net::Ipv6Addr; +use structopt::{clap::AppSettings::*, StructOpt}; + +#[derive(Debug, StructOpt)] +#[structopt( + name = "dnsadm", + about = "Administer DNS records", + global_setting(ColorAuto), + global_setting(ColoredHelp) +)] +struct Opt { + #[structopt(short, long)] + address: Option, + + #[structopt(short, long)] + port: Option, + + #[structopt(subcommand)] + subcommand: SubCommand, +} + +#[derive(Debug, StructOpt)] +enum SubCommand { + ListRecords, + AddAAAA(AddAAAACommand), + AddSRV(AddSRVCommand), + DeleteRecord(DeleteRecordCommand), +} + +#[derive(Debug, StructOpt)] +struct AddAAAACommand { + name: String, + addr: Ipv6Addr, +} + +#[derive(Debug, StructOpt)] +struct AddSRVCommand { + name: String, + prio: u16, + weight: u16, + port: u16, + target: String, +} + +#[derive(Debug, StructOpt)] +struct DeleteRecordCommand { + name: String, +} + +#[tokio::main] +async fn main() -> Result<()> { + let opt = Opt::from_args(); + let log = init_logger(); + + let addr = match opt.address { + Some(a) => a, + None => "localhost".into(), + }; + let port = opt.port.unwrap_or(5353); + + let endpoint = format!("http://{}:{}", addr, port); + let client = Client::new(&endpoint, log.clone()); + + let opt = Opt::from_args(); + match opt.subcommand { + SubCommand::ListRecords => { + let records = client.dns_records_get().await?; + println!("{:#?}", records); + } + SubCommand::AddAAAA(cmd) => { + client + .dns_records_set(&vec![DnsKv { + key: DnsRecordKey { name: cmd.name }, + record: DnsRecord::Aaaa(cmd.addr), + }]) + .await?; + } + SubCommand::AddSRV(cmd) => { + client + .dns_records_set(&vec![DnsKv { + key: DnsRecordKey { name: cmd.name }, + record: DnsRecord::Srv(Srv { + prio: cmd.prio, + weight: cmd.weight, + port: cmd.port, + target: cmd.target, + }), + }]) + .await?; + } + SubCommand::DeleteRecord(cmd) => { + client + .dns_records_delete(&vec![DnsRecordKey { name: cmd.name }]) + .await?; + } + } + + Ok(()) +} + +fn init_logger() -> Logger { + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_envlogger::new(drain).fuse(); + let drain = slog_async::Async::new(drain).chan_size(0x2000).build().fuse(); + slog::Logger::root(drain, slog::o!()) +} From a575e42548e1f0c94295dcfa4a714de7c4276464 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 24 Mar 2022 13:42:45 -0400 Subject: [PATCH 05/41] Add internal-dns SMF config, start it by RSS --- smf/internal-dns/config.toml | 22 +++++++++++++++++++++ smf/internal-dns/manifest.xml | 36 ++++++++++++++++++++++++++++++++++ smf/sled-agent/config-rss.toml | 4 ++++ 3 files changed, 62 insertions(+) create mode 100644 smf/internal-dns/config.toml create mode 100644 smf/internal-dns/manifest.xml diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml new file mode 100644 index 00000000000..83b70b0380d --- /dev/null +++ b/smf/internal-dns/config.toml @@ -0,0 +1,22 @@ +[dropshot] +bind_address = "[fd00:1234::4]:5353" +request_body_max_bytes = 1048576 + +[dns] +bind_address = "[fd00:1234::4]:4753" + +[log] +# Show log messages of this level and more severe +level = "debug" + +# Example output to a terminal (with colors) +mode = "stderr-terminal" + +# Example output to a file, appending if it already exists. +#mode = "file" +#path = "logs/server.log" +#if_exists = "append" + +[data] +nmax_messages = 16 +storage_path = "/var/tmp/oxide/dns" diff --git a/smf/internal-dns/manifest.xml b/smf/internal-dns/manifest.xml new file mode 100644 index 00000000000..25b03434c34 --- /dev/null +++ b/smf/internal-dns/manifest.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 8c3e5d976b4..87af8fb0e07 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -38,3 +38,7 @@ addresses = [ "[fd00:1234::7]:12221", "192.168.1.123:12220" ] [[request.service]] name = "oximeter" addresses = [ "[fd00:1234::6]:12223" ] + +[[request.service]] +name = "internal-dns" +addresses = [ "[fd00:1234::4]:5353" ] From a52e4b60b3d427cb848755f8d188ae7655a11793 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 25 Mar 2022 11:58:11 -0400 Subject: [PATCH 06/41] review feedback --- Cargo.lock | 6 ------ internal-dns-client/Cargo.toml | 6 ------ internal-dns/Cargo.toml | 2 +- {internal-dns-client => internal-dns}/src/bin/dnsadm.rs | 0 package-manifest.toml | 2 +- smf/internal-dns/config.toml | 4 ++-- 6 files changed, 4 insertions(+), 16 deletions(-) rename {internal-dns-client => internal-dns}/src/bin/dnsadm.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 67c3d4ef061..0b6e0d8f30f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1701,18 +1701,12 @@ dependencies = [ name = "internal-dns-client" version = "0.1.0" dependencies = [ - "anyhow", - "clap 3.1.6", "progenitor", "reqwest", "serde", "serde_json", "slog", - "slog-async", - "slog-envlogger", - "slog-term", "structopt", - "tokio", ] [[package]] diff --git a/internal-dns-client/Cargo.toml b/internal-dns-client/Cargo.toml index 70af97d5ac4..22e28c91bc9 100644 --- a/internal-dns-client/Cargo.toml +++ b/internal-dns-client/Cargo.toml @@ -5,15 +5,9 @@ edition = "2021" license = "MPL-2.0" [dependencies] -anyhow = "1.0" -clap = { version = "3.1", features = [ "derive" ] } progenitor = { git = "https://github.com/oxidecomputer/progenitor" } serde = { version = "1.0", features = [ "derive" ] } serde_json = "1.0" slog = { version = "2.5.0", features = [ "max_level_trace", "release_max_level_debug" ] } -slog-term = "2.7" -slog-async = "2.7" -slog-envlogger = "2.2" structopt = "0.3" -tokio = { version = "1.17", features = [ "full" ] } reqwest = { version = "0.11", features = ["json", "rustls-tls", "stream"] } diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 6abe17b75f0..ce9cf5b24a4 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" anyhow = "1.0" clap = { version = "3.1", features = [ "derive" ] } dropshot = { git = "https://github.com/oxidecomputer/dropshot" } +internal-dns-client = { path = "../internal-dns-client" } pretty-hex = "0.2.1" schemars = "0.8" serde = { version = "1.0", features = [ "derive" ] } @@ -26,7 +27,6 @@ trust-dns-server = "0.21" [dev-dependencies] expectorate = "1.0.4" -internal-dns-client = { path = "../internal-dns-client" } omicron-test-utils = { path = "../test-utils" } openapiv3 = "1.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } diff --git a/internal-dns-client/src/bin/dnsadm.rs b/internal-dns/src/bin/dnsadm.rs similarity index 100% rename from internal-dns-client/src/bin/dnsadm.rs rename to internal-dns/src/bin/dnsadm.rs diff --git a/package-manifest.toml b/package-manifest.toml index 3d3f6af2d94..f48043cf0a3 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -56,7 +56,7 @@ from = "smf/cockroachdb" to = "/var/svc/manifest/site/cockroachdb" [package.internal-dns] -rust.binary_names = ["dns-server"] +rust.binary_names = ["dnsadm", "dns-server"] rust.release = true service_name = "internal-dns" zone = true diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 83b70b0380d..a52f73c1837 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -3,11 +3,11 @@ bind_address = "[fd00:1234::4]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1234::4]:4753" +bind_address = "[fd00:1234::4]:53" [log] # Show log messages of this level and more severe -level = "debug" +level = "info" # Example output to a terminal (with colors) mode = "stderr-terminal" From a9840d6d9314ef0d6a9bc314d9ad73532365796d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 20 Apr 2022 10:07:10 -0400 Subject: [PATCH 07/41] Patch addresses --- smf/internal-dns/config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index a52f73c1837..b4fbf3b8b1d 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,9 +1,9 @@ [dropshot] -bind_address = "[fd00:1234::4]:5353" +bind_address = "[fd00:1122:3344::9]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1234::4]:53" +bind_address = "[fd00:1122:3344::9]:53" [log] # Show log messages of this level and more severe From 437d699d9e2c2ed7c8f9cbca27260ae9f5807dfd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 20 Apr 2022 10:15:26 -0400 Subject: [PATCH 08/41] Updated cfg path --- common/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 27ee00671ac..c1c8a54f870 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -52,4 +52,4 @@ macro_rules! generate_logging_api { /// /// NOTE: Be careful when modifying this path - the installation tools will /// **remove the entire directory** to re-install/uninstall the system. -pub const OMICRON_CONFIG_PATH: &'static str = "/var/tmp/oxide"; +pub const OMICRON_CONFIG_PATH: &'static str = "/var/oxide"; From 4dc45ff3abdb5c5216b896cd68847456ed15c74f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 20 Apr 2022 11:44:00 -0400 Subject: [PATCH 09/41] patch addresses --- smf/internal-dns/config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index b4fbf3b8b1d..2f407c76e79 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,9 +1,9 @@ [dropshot] -bind_address = "[fd00:1122:3344::9]:5353" +bind_address = "[fd00:1122:3344:1::9]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1122:3344::9]:53" +bind_address = "[fd00:1122:3344:1::9]:53" [log] # Show log messages of this level and more severe From 528204d228d5006445c597da38d974310f65699a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 21 Apr 2022 17:35:34 -0400 Subject: [PATCH 10/41] Add support for 'make GZ address', add DNS addrs --- common/src/address.rs | 105 +++++++++++++++++++++++++++ common/src/lib.rs | 1 + docs/how-to-run.adoc | 20 ++--- openapi/sled-agent.json | 7 ++ sled-agent/src/bootstrap/agent.rs | 2 +- sled-agent/src/config.rs | 13 +--- sled-agent/src/illumos/mod.rs | 9 ++- sled-agent/src/illumos/zone.rs | 2 +- sled-agent/src/params.rs | 9 ++- sled-agent/src/rack_setup/config.rs | 26 +++++-- sled-agent/src/rack_setup/service.rs | 84 ++++++++++++++++++--- sled-agent/src/services.rs | 82 +++++++++++++-------- smf/internal-dns/config.toml | 6 +- smf/nexus/config.toml | 8 +- smf/oximeter/config.toml | 6 +- smf/sled-agent/config-rss.toml | 26 +++---- smf/sled-agent/config.toml | 2 +- 17 files changed, 308 insertions(+), 100 deletions(-) create mode 100644 common/src/address.rs diff --git a/common/src/address.rs b/common/src/address.rs new file mode 100644 index 00000000000..4e145149b46 --- /dev/null +++ b/common/src/address.rs @@ -0,0 +1,105 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common IP addressing functionality. +//! +//! This addressing functionality is shared by both initialization services +//! and Nexus, who need to agree upon addressing schemes. + +use std::net::{Ipv6Addr, SocketAddrV6}; +use serde::{Serialize, Deserialize}; +use ipnetwork::Ipv6Network; + +pub const AZ_PREFIX: u8 = 48; +pub const RACK_PREFIX: u8 = 56; +pub const SLED_PREFIX: u8 = 64; + +/// The amount of redundancy for DNS servers. +/// +/// Must be less than MAX_DNS_REDUNDANCY. +pub const DNS_REDUNDANCY: usize = 1; +/// The maximum amount of redundancy for DNS servers. +/// +/// This determines the number of addresses which are +/// reserved for DNS servers. +pub const MAX_DNS_REDUNDANCY: usize = 5; + +pub const DNS_SERVER_PORT: u16 = 5353; +pub const SLED_AGENT_PORT: u16 = 12345; + +/// Represents a subnet which may be used for contacting DNS services. +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct DnsSubnet { + network: Ipv6Network, +} + +impl DnsSubnet { + /// Returns the DNS server address within the subnet. + /// + /// This is the first address within the subnet. + pub fn dns_address(&self) -> SocketAddrV6 { + let mut iter = self.network.iter(); + let _anycast_ip = iter.next().unwrap(); + let dns_ip = iter.next().unwrap(); + SocketAddrV6::new(dns_ip, DNS_SERVER_PORT, 0, 0) + } + + /// Returns the address which the Global Zone should create + /// to be able to contact the DNS server. + /// + /// This is the second address within the subnet. + pub fn gz_address(&self) -> Ipv6Network { + let mut iter = self.network.iter(); + let _anycast_ip = iter.next().unwrap(); + let _dns_ip = iter.next().unwrap(); + Ipv6Network::new(iter.next().unwrap(), SLED_PREFIX).unwrap() + } +} + +/// Given a particular rack subnet, return the DNS addresses. +/// +/// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the +/// [`RACK_PREFIX`] subnet. +pub fn get_dns_subnets(reserved_rack_subnet: Ipv6Network) -> Vec { + assert_eq!(reserved_rack_subnet.prefix(), RACK_PREFIX); + + let mut iter = reserved_rack_subnet.iter(); + let _anycast_ip = iter.next().unwrap(); + + (0..DNS_REDUNDANCY).map(|idx| { + let network = get_64_subnet( + reserved_rack_subnet, + u8::try_from(idx + 1).unwrap() + ); + + DnsSubnet { + network + } + }).collect() +} + +/// Return the sled agent address for a subnet. +/// +/// This address will come from the first address of the [`SLED_PREFIX`] subnet. +pub fn get_sled_address(sled_subnet: Ipv6Network) -> SocketAddrV6 { + assert_eq!(sled_subnet.prefix(), SLED_PREFIX); + + let mut iter = sled_subnet.iter(); + let _anycast_ip = iter.next().unwrap(); + let sled_agent_ip = iter.next().unwrap(); + SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) +} + +/// Returns a sled subnet within a rack subnet. +/// +/// The subnet at index == 0 is used for rack-local services. +pub fn get_64_subnet(rack_subnet: Ipv6Network, index: u8) -> Ipv6Network { + assert_eq!(rack_subnet.prefix(), RACK_PREFIX); + + let mut rack_network = rack_subnet.network().octets(); + + // To set bits distinguishing the /64 from the /56, we modify the 7th octet. + rack_network[7] = index; + Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() +} diff --git a/common/src/lib.rs b/common/src/lib.rs index c1c8a54f870..2a933283425 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -20,6 +20,7 @@ // TODO(#32): Remove this exception once resolved. #![allow(clippy::field_reassign_with_default)] +pub mod address; pub mod api; pub mod backoff; pub mod cmd; diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 663c3bae52e..aab25ddc434 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -119,16 +119,16 @@ unique local addresses in the subnet of the first Sled Agent: `fd00:1122:3344:1: |=================================================================================================== | Service | Endpoint | Sled Agent: Bootstrap | Derived from MAC address of physical data link. -| Sled Agent: Dropshot API | `[fd00:1122:3344:1::1]:12345` -| Cockroach DB | `[fd00:1122:3344:1::2]:32221` -| Nexus: External API | `[fd00:1122:3344:1::3]:12220` -| Nexus: Internal API | `[fd00:1122:3344:1::3]:12221` -| Oximeter | `[fd00:1122:3344:1::4]:12223` -| Clickhouse | `[fd00:1122:3344:1::5]:8123` -| Crucible Downstairs 1 | `[fd00:1122:3344:1::6]:32345` -| Crucible Downstairs 2 | `[fd00:1122:3344:1::7]:32345` -| Crucible Downstairs 3 | `[fd00:1122:3344:1::8]:32345` -| Internal DNS | `[fd00:1122:3344:1::9]:5353` +| Sled Agent: Dropshot API | `[fd00:1122:3344:0101::1]:12345` +| Cockroach DB | `[fd00:1122:3344:0101::2]:32221` +| Nexus: External API | `[fd00:1122:3344:0101::3]:12220` +| Nexus: Internal API | `[fd00:1122:3344:0101::3]:12221` +| Oximeter | `[fd00:1122:3344:0101::4]:12223` +| Clickhouse | `[fd00:1122:3344:0101::5]:8123` +| Crucible Downstairs 1 | `[fd00:1122:3344:0101::6]:32345` +| Crucible Downstairs 2 | `[fd00:1122:3344:0101::7]:32345` +| Crucible Downstairs 3 | `[fd00:1122:3344:0101::8]:32345` +| Internal DNS Service | `[fd00:1122:3344:0001::1]:5353` |=================================================================================================== Note that Sled Agent runs in the global zone and is the one responsible for bringing up all the other diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 36e51a24083..dca641e06d3 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -968,6 +968,13 @@ "type": "string" } }, + "gz_addresses": { + "type": "array", + "items": { + "type": "string", + "format": "ipv6" + } + }, "name": { "type": "string" } diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 8e3ed304b94..8e57cc9b577 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -182,7 +182,7 @@ impl Agent { info!(&self.log, "Loading Sled Agent: {:?}", request); let sled_address = - crate::config::get_sled_address(*request.subnet.as_ref()); + omicron_common::address::get_sled_address(request.subnet.as_ref().0); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index b02fbdabcd9..ca332b2c07c 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -8,22 +8,11 @@ use crate::common::vlan::VlanID; use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; -use omicron_common::api::external::Ipv6Net; use serde::Deserialize; -use std::net::{SocketAddr, SocketAddrV6}; +use std::net::SocketAddr; use std::path::Path; use uuid::Uuid; -pub const SLED_AGENT_PORT: u16 = 12345; - -/// Given a subnet, return the sled agent address. -pub(crate) fn get_sled_address(subnet: Ipv6Net) -> SocketAddrV6 { - let mut iter = subnet.iter(); - let _anycast_ip = iter.next().unwrap(); - let sled_agent_ip = iter.next().unwrap(); - SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) -} - /// Configuration for a sled agent #[derive(Clone, Debug, Deserialize)] pub struct Config { diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index c155f9d7a78..42df4da26a5 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -23,9 +23,13 @@ pub enum ExecutionError { ExecutionStart(std::io::Error), #[error( - "Command executed and failed with status: {status}. Output: {stderr}" + "Command [{command}] executed and failed with status: {status}. Output: {stderr}" )] - CommandFailure { status: std::process::ExitStatus, stderr: String }, + CommandFailure { + command: String, + status: std::process::ExitStatus, + stderr: String, + }, } // We wrap this method in an inner module to make it possible to mock @@ -44,6 +48,7 @@ mod inner { if !output.status.success() { return Err(ExecutionError::CommandFailure { + command: command.get_args().map(|s| s.to_string_lossy().into()).collect::>().join(" "), status: output.status, stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index ceb7cb5c57c..53cabff6a04 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -479,7 +479,7 @@ impl Zones { Self::ensure_address( None, &gz_link_local_addrobj.on_same_interface(name)?, - AddressRequest::new_static(IpAddr::V6(address), Some(64)), + AddressRequest::new_static(IpAddr::V6(address), Some(omicron_common::address::SLED_PREFIX)), )?; Ok(()) } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 5fe493eccea..553954e5c7f 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,7 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::SocketAddr; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use uuid::Uuid; /// Used to request a Disk state change @@ -233,7 +233,11 @@ pub struct ServiceRequest { // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. - pub addresses: Vec, + pub addresses: Vec, + // The addresses in the global zone which should be created, if necessary + // to route to the service. + #[serde(default)] + pub gz_addresses: Vec, } impl From for sled_agent_client::types::ServiceRequest { @@ -241,6 +245,7 @@ impl From for sled_agent_client::types::ServiceRequest { Self { name: s.name, addresses: s.addresses.into_iter().map(|s| s.to_string()).collect(), + gz_addresses: s.gz_addresses, } } } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 53545b28984..fff9dee19a3 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -7,6 +7,7 @@ use crate::config::ConfigError; use crate::params::{DatasetEnsureBody, ServiceRequest}; use ipnetwork::Ipv6Network; +use omicron_common::address::{AZ_PREFIX, RACK_PREFIX}; use serde::Deserialize; use serde::Serialize; use std::net::Ipv6Addr; @@ -31,7 +32,7 @@ pub struct SetupServiceConfig { } /// A request to initialize a sled. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] pub struct SledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] @@ -40,6 +41,10 @@ pub struct SledRequest { /// Services to be instantiated. #[serde(default, rename = "service")] pub services: Vec, + + /// DNS Services to be instantiated. + #[serde(default, rename = "dns_service")] + pub dns_services: Vec, } fn new_network(addr: Ipv6Addr, prefix: u8) -> Ipv6Network { @@ -59,19 +64,24 @@ impl SetupServiceConfig { } pub fn az_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, 48) + new_network(self.rack_subnet, AZ_PREFIX) } + /// Returns the subnet for our rack. pub fn rack_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, 56) + new_network(self.rack_subnet, RACK_PREFIX) } - pub fn sled_subnet(&self, index: u8) -> Ipv6Network { - let mut rack_network = self.rack_subnet().network().octets(); + /// Returns the subnet for the "reserved" rack subnet. + /// + /// This is used for AZ-wide services, such as DNS. + pub fn reserved_rack_subnet(&self) -> Ipv6Network { + new_network(self.az_subnet().ip(), RACK_PREFIX) + } - // To set bits distinguishing the /64 from the /56, we modify the 7th octet. - rack_network[7] = index; - Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() + /// Returns the subnet for the `index`-th sled in the rack. + pub fn sled_subnet(&self, index: u8) -> Ipv6Network { + omicron_common::address::get_64_subnet(self.rack_subnet(), index) } } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c3de81fd451..b25868e45d3 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -5,12 +5,16 @@ //! Rack Setup Service implementation use super::config::{SetupServiceConfig as Config, SledRequest}; +use crate::params::ServiceRequest; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, params::SledSubnet, }; -use crate::config::get_sled_address; +use omicron_common::address::{ + get_dns_subnets, + get_sled_address, +}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -300,14 +304,38 @@ impl ServiceInner { async fn create_plan( &self, config: &Config, - addrs: impl IntoIterator, + bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { - let addrs = addrs.into_iter().enumerate(); + let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); + let dns_subnets = get_dns_subnets(config.reserved_rack_subnet()); + + info!(self.log, "dns_subnets: {:#?}", dns_subnets); + + let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { + // If a sled was explicitly requested from the RSS configuration, + // use that. Otherwise, just give it a "default" (empty) set of + // services. + let mut request = { + if idx < config.requests.len() { + config.requests[idx].clone() + } else { + SledRequest::default() + } + }; - // TODO: The use of "zip" here means that if we have more addrs than - // requests, we won't initialize some of them. Maybe that's okay? - // Maybe that's the responsibility of Nexus? - let requests_and_sleds = config.requests.iter().zip(addrs); + // The first enumerated addresses get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + request.dns_services.push(ServiceRequest { + name: "internal-dns".to_string(), + addresses: vec![dns_subnet.dns_address()], + gz_addresses: vec![dns_subnet.gz_address().ip()], + }); + } + + (request, (idx, bootstrap_addr)) + }); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; @@ -488,10 +516,38 @@ impl ServiceInner { "Initialized sled agent on sled with bootstrap address: {}", bootstrap_addr ); + Ok(()) + }, + )) + .await + .into_iter() + .collect::>()?; - // Next, initialize any datasets on sleds that need it. + // Set up internal DNS services. + futures::future::join_all(plan.iter().map( + |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - *allocation.initialization_request.subnet.as_ref(), + allocation.initialization_request.subnet.as_ref().0, + )); + + // TODO: also tell sled to make GZ address + self.initialize_services( + sled_address, + &allocation.services_request.dns_services, + ) + .await?; + Ok(()) + }, + )) + .await + .into_iter() + .collect::>()?; + + // Issue the dataset initialization requests to all sleds. + futures::future::join_all(plan.iter().map( + |(_, allocation)| async move { + let sled_address = SocketAddr::V6(get_sled_address( + allocation.initialization_request.subnet.as_ref().0, )); self.initialize_datasets( sled_address, @@ -515,11 +571,17 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - *allocation.initialization_request.subnet.as_ref(), + allocation.initialization_request.subnet.as_ref().0, )); + + let all_services = allocation.services_request.services.iter() + .chain(allocation.services_request.dns_services.iter()) + .map(|s| s.clone()) + .collect::>(); + self.initialize_services( sled_address, - &allocation.services_request.services, + &all_services, ) .await?; Ok(()) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 96739f79eec..50e534fefdd 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -7,11 +7,12 @@ use crate::illumos::dladm::PhysicalLink; use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; -use crate::illumos::zone::AddressRequest; +use crate::illumos::zone::{AddressRequest, Zones}; use crate::params::{ServiceEnsureBody, ServiceRequest}; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; +use std::net::IpAddr; use std::path::{Path, PathBuf}; use tokio::sync::Mutex; @@ -29,6 +30,9 @@ pub enum Error { #[error(transparent)] RunningZone(#[from] crate::illumos::running_zone::Error), + #[error("Failed to add address to the global zone: {0}")] + GzAddressFailure(crate::illumos::zone::Error), + #[error(transparent)] Dladm(#[from] crate::illumos::dladm::Error), @@ -56,6 +60,7 @@ pub struct ServiceManager { config_path: Option, zones: Mutex>, vnic_allocator: VnicAllocator, + physical_link: Option, } impl ServiceManager { @@ -79,7 +84,8 @@ impl ServiceManager { log, config_path, zones: Mutex::new(vec![]), - vnic_allocator: VnicAllocator::new("Service", physical_link)?, + vnic_allocator: VnicAllocator::new("Service", physical_link.clone())?, + physical_link, }; let config_path = mgr.services_config_path(); @@ -157,7 +163,7 @@ impl ServiceManager { for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); - let addr_request = AddressRequest::new_static(addr.ip(), None); + let addr_request = AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); running_zone.ensure_address(addr_request).await?; info!( self.log, @@ -166,6 +172,18 @@ impl ServiceManager { ); } + info!(self.log, "GZ addresses: {:#?}", service.gz_addresses); + for addr in &service.gz_addresses { + info!(self.log, "Ensuring GZ address {} exists", addr.to_string()); + + let addr_name = service.name.replace(&['-', '_'][..], ""); + Zones::ensure_has_global_zone_v6_address( + self.physical_link.clone(), + *addr, + &addr_name, + ).map_err(|e| Error::GzAddressFailure(e))?; + } + debug!(self.log, "importing manifest"); running_zone.run_cmd(&[ @@ -201,34 +219,38 @@ impl ServiceManager { ) -> Result<(), Error> { let mut existing_zones = self.zones.lock().await; let config_path = self.services_config_path(); - if config_path.exists() { - let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path).await?, - )?; - let known_services = cfg.services; - - let known_set: HashSet<&ServiceRequest> = - HashSet::from_iter(known_services.iter()); - let requested_set = HashSet::from_iter(request.services.iter()); - - if known_set != requested_set { - // If the caller is requesting we instantiate a - // zone that exists, but isn't what they're asking for, throw an - // error. - // - // We may want to use a different mechanism for zone removal, in - // the case of changing configurations, rather than just doing - // that removal implicitly. - warn!( - self.log, - "Cannot request services on this sled, differing configurations: {:?}", - known_set.symmetric_difference(&requested_set) - ); - return Err(Error::ServicesAlreadyConfigured); + + let services_to_initialize = { + if config_path.exists() { + let cfg: ServiceEnsureBody = toml::from_str( + &tokio::fs::read_to_string(&config_path).await?, + )?; + let known_services = cfg.services; + + let known_set: HashSet<&ServiceRequest> = + HashSet::from_iter(known_services.iter()); + let requested_set = HashSet::from_iter(request.services.iter()); + + if !requested_set.is_superset(&known_set) { + // The caller may only request services additively. + // + // We may want to use a different mechanism for zone removal, in + // the case of changing configurations, rather than just doing + // that removal implicitly. + warn!( + self.log, + "Cannot request services on this sled, differing configurations: {:?}", + known_set.symmetric_difference(&requested_set) + ); + return Err(Error::ServicesAlreadyConfigured); + } + requested_set.difference(&known_set).map(|s| (*s).clone()).collect::>() + } else { + request.services.clone() } - } + }; - self.initialize_services_locked(&mut existing_zones, &request.services) + self.initialize_services_locked(&mut existing_zones, &services_to_initialize) .await?; let serialized_services = toml::Value::try_from(&request) @@ -305,6 +327,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], + gz_addresses: vec!{}, }], }) .await @@ -318,6 +341,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], + gz_addresses: vec!{}, }], }) .await diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 2f407c76e79..a0dae0a73aa 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,9 +1,11 @@ +# TODO: remove the addresses here! They're getting assigned to us! + [dropshot] -bind_address = "[fd00:1122:3344:1::9]:5353" +bind_address = "[fd00:1122:3344:1::1]:5353" request_body_max_bytes = 1048576 [dns] -bind_address = "[fd00:1122:3344:1::9]:53" +bind_address = "[fd00:1122:3344:1::1]:53" [log] # Show log messages of this level and more severe diff --git a/smf/nexus/config.toml b/smf/nexus/config.toml index f11c275c31e..ab135febb7b 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config.toml @@ -18,15 +18,15 @@ schemes_external = ["spoof", "session_cookie"] [database] # URL for connecting to the database -url = "postgresql://root@[fd00:1122:3344:1::2]:32221/omicron?sslmode=disable" +url = "postgresql://root@[fd00:1122:3344:0101::2]:32221/omicron?sslmode=disable" [dropshot_external] # IP address and TCP port on which to listen for the external API -bind_address = "[fd00:1122:3344:1::3]:12220" +bind_address = "[fd00:1122:3344:0101::3]:12220" [dropshot_internal] # IP address and TCP port on which to listen for the internal API -bind_address = "[fd00:1122:3344:1::3]:12221" +bind_address = "[fd00:1122:3344:0101::3]:12221" [log] # Show log messages of this level and more severe @@ -42,4 +42,4 @@ mode = "stderr-terminal" # Configuration for interacting with the timeseries database [timeseries_db] -address = "[fd00:1122:3344:1::5]:8123" +address = "[fd00:1122:3344:0101::5]:8123" diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index a4812d01fd1..76fc182b316 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -2,10 +2,10 @@ id = "1da65e5b-210c-4859-a7d7-200c1e659972" # Internal address of nexus -nexus_address = "[fd00:1122:3344:1::3]:12221" +nexus_address = "[fd00:1122:3344:0101::3]:12221" [db] -address = "[fd00:1122:3344:1::5]:8123" +address = "[fd00:1122:3344:0101::5]:8123" batch_size = 1000 batch_interval = 5 # In seconds @@ -14,4 +14,4 @@ level = "debug" mode = "stderr-terminal" [dropshot] -bind_address = "[fd00:1122:3344:1::4]:12223" +bind_address = "[fd00:1122:3344:0101::4]:12223" diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 2efa04c507b..002e17fcc22 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -4,7 +4,7 @@ # Also implies the /48 AZ subnet. # |............| <- This /48 is the AZ Subnet # |...............| <- This /56 is the Rack Subnet -rack_subnet = "fd00:1122:3344:1::" +rack_subnet = "fd00:1122:3344:0100::" [[request]] @@ -12,49 +12,47 @@ rack_subnet = "fd00:1122:3344:1::" # should allocate crucible datasets. [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:1::6]:32345" +address = "[fd00:1122:3344:0101::6]:32345" dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "e4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:1::7]:32345" +address = "[fd00:1122:3344:0101::7]:32345" dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "f4b4dc87-ab46-49fb-a4b4-d361ae214c03" -address = "[fd00:1122:3344:1::8]:32345" +address = "[fd00:1122:3344:0101::8]:32345" dataset_kind.type = "crucible" [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:1::2]:32221" +address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" dataset_kind.all_addresses = [ - "[fd00:1122:3344:1::2]:32221", + "[fd00:1122:3344:0101::2]:32221", ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. [[request.dataset]] zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" -address = "[fd00:1122:3344:1::5]:8123" +address = "[fd00:1122:3344:0101::5]:8123" dataset_kind.type = "clickhouse" [[request.service]] name = "nexus" addresses = [ - "[fd00:1122:3344:1::3]:12220", - "[fd00:1122:3344:1::3]:12221", + "[fd00:1122:3344:0101::3]:12220", + "[fd00:1122:3344:0101::3]:12221", ] +gz_addresses = [] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] name = "oximeter" addresses = [ - "[fd00:1122:3344:1::4]:12223", + "[fd00:1122:3344:0101::4]:12223", ] - -[[request.service]] -name = "internal-dns" -addresses = [ "[fd00:1122:3344:1::9]:5353" ] +gz_addresses = [] diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 6dfe87fe9bf..eaebce97f39 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -5,7 +5,7 @@ id = "fb0f7546-4d46-40ca-9d56-cbb810684ca7" # TODO: Remove this address # Internal address of Nexus -nexus_address = "[fd00:1122:3344:01::3]:12221" +nexus_address = "[fd00:1122:3344:0101::3]:12221" # A file-backed zpool can be manually created with the following: # $ truncate -s 10GB testpool.vdev From bea8c7e3c8b5c7762929abf5f1ed03ed7471428a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 22 Apr 2022 11:13:54 -0400 Subject: [PATCH 11/41] Add some tests --- common/src/address.rs | 98 ++++++++++++++++++++++------ sled-agent/src/bootstrap/agent.rs | 5 +- sled-agent/src/illumos/mod.rs | 6 +- sled-agent/src/illumos/zone.rs | 5 +- sled-agent/src/rack_setup/config.rs | 7 -- sled-agent/src/rack_setup/service.rs | 68 ++++++++++--------- sled-agent/src/services.rs | 33 +++++++--- 7 files changed, 148 insertions(+), 74 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 4e145149b46..e9eef2e0c1a 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -7,9 +7,9 @@ //! This addressing functionality is shared by both initialization services //! and Nexus, who need to agree upon addressing schemes. -use std::net::{Ipv6Addr, SocketAddrV6}; -use serde::{Serialize, Deserialize}; use ipnetwork::Ipv6Network; +use serde::{Deserialize, Serialize}; +use std::net::{Ipv6Addr, SocketAddrV6}; pub const AZ_PREFIX: u8 = 48; pub const RACK_PREFIX: u8 = 56; @@ -57,26 +57,39 @@ impl DnsSubnet { } } -/// Given a particular rack subnet, return the DNS addresses. -/// -/// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the -/// [`RACK_PREFIX`] subnet. -pub fn get_dns_subnets(reserved_rack_subnet: Ipv6Network) -> Vec { - assert_eq!(reserved_rack_subnet.prefix(), RACK_PREFIX); +/// A wrapper around an IPv6 network, indicating it is a "reserved" rack +/// subnet which can be used for AZ-wide services. +#[derive(Debug, Clone)] +pub struct ReservedRackSubnet(pub Ipv6Network); + +impl ReservedRackSubnet { + /// Returns the subnet for the reserved rack subnet. + pub fn new(subnet: Ipv6Network) -> Self { + let net = Ipv6Network::new(subnet.network(), AZ_PREFIX).unwrap(); + ReservedRackSubnet( + Ipv6Network::new(net.network(), RACK_PREFIX).unwrap(), + ) + } - let mut iter = reserved_rack_subnet.iter(); - let _anycast_ip = iter.next().unwrap(); + /// Given a particular rack subnet, return the DNS addresses. + /// + /// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the + /// [`RACK_PREFIX`] subnet. + pub fn get_dns_subnets(&self) -> Vec { + assert_eq!(self.0.prefix(), RACK_PREFIX); - (0..DNS_REDUNDANCY).map(|idx| { - let network = get_64_subnet( - reserved_rack_subnet, - u8::try_from(idx + 1).unwrap() - ); + let mut iter = self.0.iter(); + let _anycast_ip = iter.next().unwrap(); + + (0..DNS_REDUNDANCY) + .map(|idx| { + let network = + get_64_subnet(self.0, u8::try_from(idx + 1).unwrap()); - DnsSubnet { - network - } - }).collect() + DnsSubnet { network } + }) + .collect() + } } /// Return the sled agent address for a subnet. @@ -103,3 +116,50 @@ pub fn get_64_subnet(rack_subnet: Ipv6Network, index: u8) -> Ipv6Network { rack_network[7] = index; Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_dns_subnets() { + let subnet = "fd00:1122:3344:0100::/64".parse::().unwrap(); + let rack_subnet = ReservedRackSubnet::new(subnet); + + assert_eq!( + // Note that these bits (indicating the rack) are zero. + // vv + "fd00:1122:3344:0001::/56".parse::().unwrap(), + rack_subnet.0, + ); + + // Observe the first DNS subnet within this reserved rack subnet. + let dns_subnets = rack_subnet.get_dns_subnets(); + assert_eq!(DNS_REDUNDANCY, dns_subnets.len()); + + // The DNS address and GZ address should be only differing by one. + assert_eq!( + "[fd00:1122:3344:0001::1]:5353".parse::().unwrap(), + dns_subnets[0].dns_address(), + ); + assert_eq!( + "fd00:1122:3344:0001::2/64".parse::().unwrap(), + dns_subnets[0].gz_address(), + ); + } + + #[test] + fn test_sled_address() { + let subnet = "fd00:1122:3344:0101::/64".parse::().unwrap(); + assert_eq!( + "[fd00:1122:3344:0101::1]:12345".parse::().unwrap(), + get_sled_address(subnet) + ); + + let subnet = "fd00:1122:3344:0308::/64".parse::().unwrap(); + assert_eq!( + "[fd00:1122:3344:0308::1]:12345".parse::().unwrap(), + get_sled_address(subnet) + ); + } +} diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 8e57cc9b577..da20e4f8e58 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -181,8 +181,9 @@ impl Agent { ) -> Result { info!(&self.log, "Loading Sled Agent: {:?}", request); - let sled_address = - omicron_common::address::get_sled_address(request.subnet.as_ref().0); + let sled_address = omicron_common::address::get_sled_address( + request.subnet.as_ref().0, + ); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index 42df4da26a5..bdec8e7e702 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -48,7 +48,11 @@ mod inner { if !output.status.success() { return Err(ExecutionError::CommandFailure { - command: command.get_args().map(|s| s.to_string_lossy().into()).collect::>().join(" "), + command: command + .get_args() + .map(|s| s.to_string_lossy().into()) + .collect::>() + .join(" "), status: output.status, stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 53cabff6a04..c3d5e47f3cf 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -479,7 +479,10 @@ impl Zones { Self::ensure_address( None, &gz_link_local_addrobj.on_same_interface(name)?, - AddressRequest::new_static(IpAddr::V6(address), Some(omicron_common::address::SLED_PREFIX)), + AddressRequest::new_static( + IpAddr::V6(address), + Some(omicron_common::address::SLED_PREFIX), + ), )?; Ok(()) } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index fff9dee19a3..1ec2858c0b7 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -72,13 +72,6 @@ impl SetupServiceConfig { new_network(self.rack_subnet, RACK_PREFIX) } - /// Returns the subnet for the "reserved" rack subnet. - /// - /// This is used for AZ-wide services, such as DNS. - pub fn reserved_rack_subnet(&self) -> Ipv6Network { - new_network(self.az_subnet().ip(), RACK_PREFIX) - } - /// Returns the subnet for the `index`-th sled in the rack. pub fn sled_subnet(&self, index: u8) -> Ipv6Network { omicron_common::address::get_64_subnet(self.rack_subnet(), index) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index b25868e45d3..4b04234210d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -5,16 +5,13 @@ //! Rack Setup Service implementation use super::config::{SetupServiceConfig as Config, SledRequest}; -use crate::params::ServiceRequest; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, params::SledSubnet, }; -use omicron_common::address::{ - get_dns_subnets, - get_sled_address, -}; +use crate::params::ServiceRequest; +use omicron_common::address::{get_sled_address, ReservedRackSubnet}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, }; @@ -307,35 +304,37 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let dns_subnets = get_dns_subnets(config.reserved_rack_subnet()); + let rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); + let dns_subnets = rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); - let requests_and_sleds = bootstrap_addrs.map(|(idx, bootstrap_addr)| { - // If a sled was explicitly requested from the RSS configuration, - // use that. Otherwise, just give it a "default" (empty) set of - // services. - let mut request = { - if idx < config.requests.len() { - config.requests[idx].clone() - } else { - SledRequest::default() + let requests_and_sleds = + bootstrap_addrs.map(|(idx, bootstrap_addr)| { + // If a sled was explicitly requested from the RSS configuration, + // use that. Otherwise, just give it a "default" (empty) set of + // services. + let mut request = { + if idx < config.requests.len() { + config.requests[idx].clone() + } else { + SledRequest::default() + } + }; + + // The first enumerated addresses get assigned the additional + // responsibility of being internal DNS servers. + if idx < dns_subnets.len() { + let dns_subnet = &dns_subnets[idx]; + request.dns_services.push(ServiceRequest { + name: "internal-dns".to_string(), + addresses: vec![dns_subnet.dns_address()], + gz_addresses: vec![dns_subnet.gz_address().ip()], + }); } - }; - - // The first enumerated addresses get assigned the additional - // responsibility of being internal DNS servers. - if idx < dns_subnets.len() { - let dns_subnet = &dns_subnets[idx]; - request.dns_services.push(ServiceRequest { - name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address()], - gz_addresses: vec![dns_subnet.gz_address().ip()], - }); - } - (request, (idx, bootstrap_addr)) - }); + (request, (idx, bootstrap_addr)) + }); let allocations = requests_and_sleds.map(|(request, sled)| { let (idx, bootstrap_addr) = sled; @@ -574,16 +573,15 @@ impl ServiceInner { allocation.initialization_request.subnet.as_ref().0, )); - let all_services = allocation.services_request.services.iter() + let all_services = allocation + .services_request + .services + .iter() .chain(allocation.services_request.dns_services.iter()) .map(|s| s.clone()) .collect::>(); - self.initialize_services( - sled_address, - &all_services, - ) - .await?; + self.initialize_services(sled_address, &all_services).await?; Ok(()) }, )) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 50e534fefdd..f2e76fd361d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -84,7 +84,10 @@ impl ServiceManager { log, config_path, zones: Mutex::new(vec![]), - vnic_allocator: VnicAllocator::new("Service", physical_link.clone())?, + vnic_allocator: VnicAllocator::new( + "Service", + physical_link.clone(), + )?, physical_link, }; @@ -163,7 +166,8 @@ impl ServiceManager { for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); - let addr_request = AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); + let addr_request = + AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); running_zone.ensure_address(addr_request).await?; info!( self.log, @@ -174,14 +178,19 @@ impl ServiceManager { info!(self.log, "GZ addresses: {:#?}", service.gz_addresses); for addr in &service.gz_addresses { - info!(self.log, "Ensuring GZ address {} exists", addr.to_string()); + info!( + self.log, + "Ensuring GZ address {} exists", + addr.to_string() + ); let addr_name = service.name.replace(&['-', '_'][..], ""); Zones::ensure_has_global_zone_v6_address( self.physical_link.clone(), *addr, &addr_name, - ).map_err(|e| Error::GzAddressFailure(e))?; + ) + .map_err(|e| Error::GzAddressFailure(e))?; } debug!(self.log, "importing manifest"); @@ -244,14 +253,20 @@ impl ServiceManager { ); return Err(Error::ServicesAlreadyConfigured); } - requested_set.difference(&known_set).map(|s| (*s).clone()).collect::>() + requested_set + .difference(&known_set) + .map(|s| (*s).clone()) + .collect::>() } else { request.services.clone() } }; - self.initialize_services_locked(&mut existing_zones, &services_to_initialize) - .await?; + self.initialize_services_locked( + &mut existing_zones, + &services_to_initialize, + ) + .await?; let serialized_services = toml::Value::try_from(&request) .expect("Cannot serialize service list"); @@ -327,7 +342,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], - gz_addresses: vec!{}, + gz_addresses: vec![], }], }) .await @@ -341,7 +356,7 @@ mod test { services: vec![ServiceRequest { name: SVC_NAME.to_string(), addresses: vec![], - gz_addresses: vec!{}, + gz_addresses: vec![], }], }) .await From fcbc0ab5bf2362f24186ce0740b1c2b0b9b61f78 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 21:00:33 -0400 Subject: [PATCH 12/41] Correctly passing addresses, GZ addresses to DNS service for setup --- common/src/address.rs | 6 ++-- internal-dns/src/bin/dns-server.rs | 19 +++++++++--- internal-dns/src/lib.rs | 1 - sled-agent/src/rack_setup/service.rs | 4 +-- sled-agent/src/services.rs | 44 +++++++++++++++++++++++++++- smf/internal-dns/config.toml | 8 +---- smf/internal-dns/manifest.xml | 7 ++++- 7 files changed, 69 insertions(+), 20 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index e9eef2e0c1a..89077c632ab 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -25,6 +25,7 @@ pub const DNS_REDUNDANCY: usize = 1; /// reserved for DNS servers. pub const MAX_DNS_REDUNDANCY: usize = 5; +pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; @@ -78,9 +79,6 @@ impl ReservedRackSubnet { pub fn get_dns_subnets(&self) -> Vec { assert_eq!(self.0.prefix(), RACK_PREFIX); - let mut iter = self.0.iter(); - let _anycast_ip = iter.next().unwrap(); - (0..DNS_REDUNDANCY) .map(|idx| { let network = @@ -129,7 +127,7 @@ mod test { assert_eq!( // Note that these bits (indicating the rack) are zero. // vv - "fd00:1122:3344:0001::/56".parse::().unwrap(), + "fd00:1122:3344:0000::/56".parse::().unwrap(), rack_subnet.0, ); diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 505a42a7dc0..3e3b98f81b5 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -15,21 +15,31 @@ use anyhow::Context; use clap::Parser; use std::path::PathBuf; use std::sync::Arc; +use std::net::{SocketAddr, SocketAddrV6}; #[derive(Parser, Debug)] struct Args { #[clap(long)] config_file: PathBuf, + + #[clap(long)] + server_address: SocketAddrV6, + + #[clap(long)] + dns_address: SocketAddrV6, } #[tokio::main] async fn main() -> Result<(), anyhow::Error> { let args = Args::parse(); let config_file = &args.config_file; + let dns_address = &args.dns_address; let config_file_contents = std::fs::read_to_string(config_file) .with_context(|| format!("read config file {:?}", config_file))?; - let config: internal_dns::Config = toml::from_str(&config_file_contents) + let mut config: internal_dns::Config = toml::from_str(&config_file_contents) .with_context(|| format!("parse config file {:?}", config_file))?; + + config.dropshot.bind_address = SocketAddr::V6(args.server_address); eprintln!("{:?}", config); let log = config @@ -42,10 +52,11 @@ async fn main() -> Result<(), anyhow::Error> { { let db = db.clone(); let log = log.clone(); - let config = config.dns.clone(); - + let dns_config = internal_dns::dns_server::Config { + bind_address: dns_address.to_string() + }; tokio::spawn(async move { - internal_dns::dns_server::run(log, db, config).await + internal_dns::dns_server::run(log, db, dns_config).await }); } diff --git a/internal-dns/src/lib.rs b/internal-dns/src/lib.rs index d94684d75e5..786750c1a8f 100644 --- a/internal-dns/src/lib.rs +++ b/internal-dns/src/lib.rs @@ -18,7 +18,6 @@ pub struct Config { pub log: dropshot::ConfigLogging, pub dropshot: dropshot::ConfigDropshot, pub data: dns_data::Config, - pub dns: dns_server::Config, } pub async fn start_server( diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 4b04234210d..0eb4f141266 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -304,8 +304,8 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); - let dns_subnets = rack_subnet.get_dns_subnets(); + let reserved_rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); + let dns_subnets = reserved_rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f2e76fd361d..0a0386f0945 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -9,6 +9,7 @@ use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::{AddressRequest, Zones}; use crate::params::{ServiceEnsureBody, ServiceRequest}; +use omicron_common::address::{DNS_PORT, DNS_SERVER_PORT}; use slog::Logger; use std::collections::HashSet; use std::iter::FromIterator; @@ -204,13 +205,54 @@ impl ServiceManager { ), ])?; + let smf_name = format!("svc:/system/illumos/{}", service.name); + let default_smf_name = format!("{}:default", smf_name); + + match service.name.as_str() { + "internal-dns" => { + info!(self.log, "Setting up internal-dns service"); + // TODO: This is a hack! + // - Should we only supply one address, and drop the port? + // ^ this seems like a good start + // - Should we provide a mechanism for providing multiple addresses? + let address = service.addresses[0].ip(); + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/server_address=[{}]:{}", address, DNS_SERVER_PORT), + ])?; + + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!("config/dns_address=[{}]:{}", address, DNS_PORT), + ])?; + }, + _ => { + info!(self.log, "Service name {} did not match", service.name); + }, + } + debug!(self.log, "enabling service"); + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ])?; + running_zone.run_cmd(&[ crate::illumos::zone::SVCADM, "enable", "-t", - &format!("svc:/system/illumos/{}:default", service.name), + &default_smf_name, ])?; existing_zones.push(running_zone); diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index a0dae0a73aa..b6fd0e4fa8c 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -1,12 +1,6 @@ -# TODO: remove the addresses here! They're getting assigned to us! - [dropshot] -bind_address = "[fd00:1122:3344:1::1]:5353" request_body_max_bytes = 1048576 -[dns] -bind_address = "[fd00:1122:3344:1::1]:53" - [log] # Show log messages of this level and more severe level = "info" @@ -21,4 +15,4 @@ mode = "stderr-terminal" [data] nmax_messages = 16 -storage_path = "/var/tmp/oxide/dns" +storage_path = "/var/oxide/dns" diff --git a/smf/internal-dns/manifest.xml b/smf/internal-dns/manifest.xml index 25b03434c34..d7364ce12f1 100644 --- a/smf/internal-dns/manifest.xml +++ b/smf/internal-dns/manifest.xml @@ -13,10 +13,15 @@ + + + + + From f214fcf030179b513d44e6cb0585b8d1c136cb7b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:10:59 -0400 Subject: [PATCH 13/41] Avoid specifying port when not necessary --- common/src/address.rs | 6 +++--- sled-agent/src/params.rs | 4 ++-- sled-agent/src/rack_setup/service.rs | 2 +- sled-agent/src/services.rs | 4 ++-- smf/sled-agent/config-rss.toml | 13 +++---------- 5 files changed, 11 insertions(+), 18 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 89077c632ab..69a6c3143d2 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -39,11 +39,11 @@ impl DnsSubnet { /// Returns the DNS server address within the subnet. /// /// This is the first address within the subnet. - pub fn dns_address(&self) -> SocketAddrV6 { + pub fn dns_address(&self) -> Ipv6Network { let mut iter = self.network.iter(); let _anycast_ip = iter.next().unwrap(); let dns_ip = iter.next().unwrap(); - SocketAddrV6::new(dns_ip, DNS_SERVER_PORT, 0, 0) + Ipv6Network::new(dns_ip, SLED_PREFIX).unwrap() } /// Returns the address which the Global Zone should create @@ -137,7 +137,7 @@ mod test { // The DNS address and GZ address should be only differing by one. assert_eq!( - "[fd00:1122:3344:0001::1]:5353".parse::().unwrap(), + "fd00:1122:3344:0001::1/64".parse::().unwrap(), dns_subnets[0].dns_address(), ); assert_eq!( diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 553954e5c7f..ca107e4f3dd 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,7 +9,7 @@ use omicron_common::api::internal::nexus::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt::{Debug, Display, Formatter, Result as FormatResult}; -use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddr}; use uuid::Uuid; /// Used to request a Disk state change @@ -233,7 +233,7 @@ pub struct ServiceRequest { // The name of the service to be created. pub name: String, // The addresses on which the service should listen for requests. - pub addresses: Vec, + pub addresses: Vec, // The addresses in the global zone which should be created, if necessary // to route to the service. #[serde(default)] diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 0eb4f141266..8dc4021f6f3 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -328,7 +328,7 @@ impl ServiceInner { let dns_subnet = &dns_subnets[idx]; request.dns_services.push(ServiceRequest { name: "internal-dns".to_string(), - addresses: vec![dns_subnet.dns_address()], + addresses: vec![dns_subnet.dns_address().ip()], gz_addresses: vec![dns_subnet.gz_address().ip()], }); } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0a0386f0945..9f95a3bc4ea 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -168,7 +168,7 @@ impl ServiceManager { for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); let addr_request = - AddressRequest::new_static(IpAddr::V6(*addr.ip()), None); + AddressRequest::new_static(IpAddr::V6(*addr), None); running_zone.ensure_address(addr_request).await?; info!( self.log, @@ -215,7 +215,7 @@ impl ServiceManager { // - Should we only supply one address, and drop the port? // ^ this seems like a good start // - Should we provide a mechanism for providing multiple addresses? - let address = service.addresses[0].ip(); + let address = service.addresses[0]; running_zone.run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", diff --git a/smf/sled-agent/config-rss.toml b/smf/sled-agent/config-rss.toml index 002e17fcc22..cf9582ab71a 100644 --- a/smf/sled-agent/config-rss.toml +++ b/smf/sled-agent/config-rss.toml @@ -29,9 +29,7 @@ dataset_kind.type = "crucible" zpool_uuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b" address = "[fd00:1122:3344:0101::2]:32221" dataset_kind.type = "cockroach_db" -dataset_kind.all_addresses = [ - "[fd00:1122:3344:0101::2]:32221", -] +dataset_kind.all_addresses = [ "[fd00:1122:3344:0101::2]:32221" ] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate clickhouse datasets. @@ -42,17 +40,12 @@ dataset_kind.type = "clickhouse" [[request.service]] name = "nexus" -addresses = [ - "[fd00:1122:3344:0101::3]:12220", - "[fd00:1122:3344:0101::3]:12221", -] +addresses = [ "fd00:1122:3344:0101::3" ] gz_addresses = [] # TODO(https://github.com/oxidecomputer/omicron/issues/732): Nexus # should allocate Oximeter services. [[request.service]] name = "oximeter" -addresses = [ - "[fd00:1122:3344:0101::4]:12223", -] +addresses = [ "fd00:1122:3344:0101::4" ] gz_addresses = [] From baea4a81823b04e3fe5a4dde2faafda14f5edddf Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:18:02 -0400 Subject: [PATCH 14/41] safer vec access, better errors --- sled-agent/src/rack_setup/service.rs | 1 - sled-agent/src/services.rs | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 8dc4021f6f3..9e846ecf1dc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -529,7 +529,6 @@ impl ServiceInner { allocation.initialization_request.subnet.as_ref().0, )); - // TODO: also tell sled to make GZ address self.initialize_services( sled_address, &allocation.services_request.dns_services, diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9f95a3bc4ea..c4ccdea379d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -37,6 +37,12 @@ pub enum Error { #[error(transparent)] Dladm(#[from] crate::illumos::dladm::Error), + #[error("Could not initialize service as requested: {message}")] + BadServiceRequest { + service: String, + message: String, + }, + #[error("Services already configured for this Sled Agent")] ServicesAlreadyConfigured, } @@ -211,11 +217,12 @@ impl ServiceManager { match service.name.as_str() { "internal-dns" => { info!(self.log, "Setting up internal-dns service"); - // TODO: This is a hack! - // - Should we only supply one address, and drop the port? - // ^ this seems like a good start - // - Should we provide a mechanism for providing multiple addresses? - let address = service.addresses[0]; + let address = service.addresses.get(0).ok_or_else(|| { + Error::BadServiceRequest { + service: service.name.clone(), + message: "Not enough addresses".to_string(), + } + })?; running_zone.run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", From 58744c4ee8d3d8864b6020f70de4b92f1d415803 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:18:24 -0400 Subject: [PATCH 15/41] fmt --- internal-dns/src/bin/dns-server.rs | 9 ++++--- sled-agent/src/rack_setup/service.rs | 3 ++- sled-agent/src/services.rs | 37 +++++++++++++++++----------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/internal-dns/src/bin/dns-server.rs b/internal-dns/src/bin/dns-server.rs index 3e3b98f81b5..b8cca5af301 100644 --- a/internal-dns/src/bin/dns-server.rs +++ b/internal-dns/src/bin/dns-server.rs @@ -13,9 +13,9 @@ use anyhow::anyhow; use anyhow::Context; use clap::Parser; +use std::net::{SocketAddr, SocketAddrV6}; use std::path::PathBuf; use std::sync::Arc; -use std::net::{SocketAddr, SocketAddrV6}; #[derive(Parser, Debug)] struct Args { @@ -36,8 +36,9 @@ async fn main() -> Result<(), anyhow::Error> { let dns_address = &args.dns_address; let config_file_contents = std::fs::read_to_string(config_file) .with_context(|| format!("read config file {:?}", config_file))?; - let mut config: internal_dns::Config = toml::from_str(&config_file_contents) - .with_context(|| format!("parse config file {:?}", config_file))?; + let mut config: internal_dns::Config = + toml::from_str(&config_file_contents) + .with_context(|| format!("parse config file {:?}", config_file))?; config.dropshot.bind_address = SocketAddr::V6(args.server_address); eprintln!("{:?}", config); @@ -53,7 +54,7 @@ async fn main() -> Result<(), anyhow::Error> { let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { - bind_address: dns_address.to_string() + bind_address: dns_address.to_string(), }; tokio::spawn(async move { internal_dns::dns_server::run(log, db, dns_config).await diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 9e846ecf1dc..98c6962e1a5 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -304,7 +304,8 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = ReservedRackSubnet::new(config.rack_subnet()); + let reserved_rack_subnet = + ReservedRackSubnet::new(config.rack_subnet()); let dns_subnets = reserved_rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c4ccdea379d..9f1966866c4 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -38,10 +38,7 @@ pub enum Error { Dladm(#[from] crate::illumos::dladm::Error), #[error("Could not initialize service as requested: {message}")] - BadServiceRequest { - service: String, - message: String, - }, + BadServiceRequest { service: String, message: String }, #[error("Services already configured for this Sled Agent")] ServicesAlreadyConfigured, @@ -217,18 +214,22 @@ impl ServiceManager { match service.name.as_str() { "internal-dns" => { info!(self.log, "Setting up internal-dns service"); - let address = service.addresses.get(0).ok_or_else(|| { - Error::BadServiceRequest { - service: service.name.clone(), - message: "Not enough addresses".to_string(), - } - })?; + let address = + service.addresses.get(0).ok_or_else(|| { + Error::BadServiceRequest { + service: service.name.clone(), + message: "Not enough addresses".to_string(), + } + })?; running_zone.run_cmd(&[ crate::illumos::zone::SVCCFG, "-s", &smf_name, "setprop", - &format!("config/server_address=[{}]:{}", address, DNS_SERVER_PORT), + &format!( + "config/server_address=[{}]:{}", + address, DNS_SERVER_PORT + ), ])?; running_zone.run_cmd(&[ @@ -236,12 +237,18 @@ impl ServiceManager { "-s", &smf_name, "setprop", - &format!("config/dns_address=[{}]:{}", address, DNS_PORT), + &format!( + "config/dns_address=[{}]:{}", + address, DNS_PORT + ), ])?; - }, + } _ => { - info!(self.log, "Service name {} did not match", service.name); - }, + info!( + self.log, + "Service name {} did not match", service.name + ); + } } debug!(self.log, "enabling service"); From c1e2180015bc05d37a81ca0acb1490f5913c3e5e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:19:14 -0400 Subject: [PATCH 16/41] updated storage path --- smf/internal-dns/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 2f407c76e79..2049036d338 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -19,4 +19,4 @@ mode = "stderr-terminal" [data] nmax_messages = 16 -storage_path = "/var/tmp/oxide/dns" +storage_path = "/var/oxide/dns" From 39431c613205816b80c8691c75f45357858f41fb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:31:00 -0400 Subject: [PATCH 17/41] fix tests, clippy --- internal-dns/tests/basic_test.rs | 9 ++++----- sled-agent/src/rack_setup/service.rs | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index 24e5b11744e..b58ecdb62f9 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -140,10 +140,12 @@ async fn init_client_server( { let db = db.clone(); let log = log.clone(); - let config = config.dns.clone(); + let dns_config = internal_dns::dns_server::Config { + bind_address: format!("127.0.0.1:{}", dns_port), + }; tokio::spawn(async move { - internal_dns::dns_server::run(log, db, config).await + internal_dns::dns_server::run(log, db, dns_config).await }); } @@ -184,9 +186,6 @@ fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { nmax_messages: 16, storage_path, }, - dns: internal_dns::dns_server::Config { - bind_address: format!("127.0.0.1:{}", dns_port).parse().unwrap(), - }, }; Ok((config, dropshot_port, dns_port)) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 98c6962e1a5..153ca1bc18b 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -355,7 +355,7 @@ impl ServiceInner { bootstrap_addr, SledAllocation { initialization_request: SledAgentRequest { subnet }, - services_request: request.clone(), + services_request: request, }, ) }); From 22dfb79600a0d91d9061e29a0dfae19f1bb0a0cc Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:43:54 -0400 Subject: [PATCH 18/41] Fix another test --- sled-agent/src/services.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9f1966866c4..b699842c868 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -242,6 +242,15 @@ impl ServiceManager { address, DNS_PORT ), ])?; + + // Refresh the manifest with the new properties we set, + // so they become "effective" properties when the service is enabled. + running_zone.run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ])?; } _ => { info!( @@ -253,15 +262,6 @@ impl ServiceManager { debug!(self.log, "enabling service"); - // Refresh the manifest with the new properties we set, - // so they become "effective" properties when the service is enabled. - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ])?; - running_zone.run_cmd(&[ crate::illumos::zone::SVCADM, "enable", From 9f7f55b87816d434aa851072b3bb572b372dc694 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 23 Apr 2022 22:49:35 -0400 Subject: [PATCH 19/41] Bunyan formatted --- smf/internal-dns/config.toml | 11 +++-------- smf/nexus/config.toml | 11 +++-------- smf/oximeter/config.toml | 4 +++- smf/sled-agent/config.toml | 4 +++- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/smf/internal-dns/config.toml b/smf/internal-dns/config.toml index 2049036d338..8edc8d33725 100644 --- a/smf/internal-dns/config.toml +++ b/smf/internal-dns/config.toml @@ -8,14 +8,9 @@ bind_address = "[fd00:1122:3344:1::9]:53" [log] # Show log messages of this level and more severe level = "info" - -# Example output to a terminal (with colors) -mode = "stderr-terminal" - -# Example output to a file, appending if it already exists. -#mode = "file" -#path = "logs/server.log" -#if_exists = "append" +mode = "file" +path = "/var/oxide/internal-dns.log" +if_exists = "append" [data] nmax_messages = 16 diff --git a/smf/nexus/config.toml b/smf/nexus/config.toml index f11c275c31e..3377acfbffc 100644 --- a/smf/nexus/config.toml +++ b/smf/nexus/config.toml @@ -31,14 +31,9 @@ bind_address = "[fd00:1122:3344:1::3]:12221" [log] # Show log messages of this level and more severe level = "info" - -# Example output to a terminal (with colors) -mode = "stderr-terminal" - -# Example output to a file, appending if it already exists. -#mode = "file" -#path = "logs/server.log" -#if_exists = "append" +mode = "file" +path = "/var/oxide/nexus.log" +if_exists = "append" # Configuration for interacting with the timeseries database [timeseries_db] diff --git a/smf/oximeter/config.toml b/smf/oximeter/config.toml index a4812d01fd1..8f66cb0603e 100644 --- a/smf/oximeter/config.toml +++ b/smf/oximeter/config.toml @@ -11,7 +11,9 @@ batch_interval = 5 # In seconds [log] level = "debug" -mode = "stderr-terminal" +mode = "file" +path = "/var/oxide/oximeter.log" +if_exists = "append" [dropshot] bind_address = "[fd00:1122:3344:1::4]:12223" diff --git a/smf/sled-agent/config.toml b/smf/sled-agent/config.toml index 6dfe87fe9bf..ac4a4283bf8 100644 --- a/smf/sled-agent/config.toml +++ b/smf/sled-agent/config.toml @@ -23,4 +23,6 @@ zpools = [ [log] level = "info" -mode = "stderr-terminal" +mode = "file" +path = "/var/oxide/sled-agent.log" +if_exists = "append" From 981f7449a72d387c37d38cca768944b36e4ae668 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 12:10:15 -0400 Subject: [PATCH 20/41] Regenerate bindings --- openapi/sled-agent.json | 3 ++- sled-agent/src/params.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index dca641e06d3..cc6cfc8c20c 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -965,7 +965,8 @@ "addresses": { "type": "array", "items": { - "type": "string" + "type": "string", + "format": "ipv6" } }, "gz_addresses": { diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index ca107e4f3dd..7ad76a634b2 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -244,7 +244,7 @@ impl From for sled_agent_client::types::ServiceRequest { fn from(s: ServiceRequest) -> Self { Self { name: s.name, - addresses: s.addresses.into_iter().map(|s| s.to_string()).collect(), + addresses: s.addresses, gz_addresses: s.gz_addresses, } } From 57a6697e10f7618d972ab13eb68078ce47704f23 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 12:52:38 -0400 Subject: [PATCH 21/41] Start on service errors --- sled-agent/src/services.rs | 76 +++++++++++++++++++++++++++++------- sled-agent/src/sled_agent.rs | 21 ++++++++-- 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b699842c868..3c4b28795d5 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -19,14 +19,23 @@ use tokio::sync::Mutex; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("Cannot serialize TOML file: {0}")] - TomlSerialize(#[from] toml::ser::Error), - - #[error("Cannot deserialize TOML file: {0}")] - TomlDeserialize(#[from] toml::de::Error), - - #[error("Error accessing filesystem: {0}")] - Io(#[from] std::io::Error), + #[error("Cannot serialize TOML to file {path}: {err}")] + TomlSerialize { + path: PathBuf, + err: toml::ser::Error, + }, + + #[error("Cannot deserialize TOML from file {path}: {err}")] + TomlDeserialize { + path: PathBuf, + err: toml::de::Error, + }, + + #[error("I/O Error accessing {path}: {err}")] + Io { + path: PathBuf, + err: std::io::Error, + }, #[error(transparent)] RunningZone(#[from] crate::illumos::running_zone::Error), @@ -37,7 +46,7 @@ pub enum Error { #[error(transparent)] Dladm(#[from] crate::illumos::dladm::Error), - #[error("Could not initialize service as requested: {message}")] + #[error("Could not initialize service {service} as requested: {message}")] BadServiceRequest { service: String, message: String }, #[error("Services already configured for this Sled Agent")] @@ -103,8 +112,20 @@ impl ServiceManager { config_path.to_string_lossy() ); let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path).await?, - )?; + &tokio::fs::read_to_string(&config_path) + .await + .map_err(|err| { + Error::Io { + path: config_path.clone(), + err, + } + })?, + ).map_err(|err| { + Error::TomlDeserialize { + path: config_path.clone(), + err, + } + })?; let mut existing_zones = mgr.zones.lock().await; mgr.initialize_services_locked(&mut existing_zones, &cfg.services) .await?; @@ -288,8 +309,20 @@ impl ServiceManager { let services_to_initialize = { if config_path.exists() { let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path).await?, - )?; + &tokio::fs::read_to_string(&config_path) + .await + .map_err(|err| { + Error::Io { + path: config_path.clone(), + err, + } + })?, + ).map_err(|err| { + Error::TomlDeserialize { + path: config_path.clone(), + err, + } + })?; let known_services = cfg.services; let known_set: HashSet<&ServiceRequest> = @@ -326,8 +359,21 @@ impl ServiceManager { let serialized_services = toml::Value::try_from(&request) .expect("Cannot serialize service list"); - tokio::fs::write(&config_path, toml::to_string(&serialized_services)?) - .await?; + let services_str = toml::to_string(&serialized_services) + .map_err(|err| { + Error::TomlSerialize { + path: config_path.clone(), + err, + } + })?; + tokio::fs::write(&config_path, services_str) + .await + .map_err(|err| { + Error::Io { + path: config_path.clone(), + err, + } + })?; Ok(()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index df3e9e816a4..0e92e46abda 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -34,8 +34,11 @@ use crate::illumos::{ #[derive(thiserror::Error, Debug)] pub enum Error { - #[error(transparent)] - Datalink(#[from] crate::illumos::dladm::Error), + #[error("Datalink error: {message}, {err}")] + Datalink { + message: String, + err: crate::illumos::dladm::Error, + }, #[error(transparent)] Services(#[from] crate::services::Error), @@ -140,10 +143,20 @@ impl SledAgent { // // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE - let vnics = Dladm::get_vnics()?; + let vnics = Dladm::get_vnics().map_err(|err| { + Error::Datalink { + message: "Looking up VNICs on boot".to_string(), + err, + } + })?; for vnic in vnics { warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic)?; + Dladm::delete_vnic(&vnic).map_err(|err| { + Error::Datalink { + message: "Deleting VNIC during boot".to_string(), + err, + } + })?; } let storage = StorageManager::new( From 5031561434131c2567cdc6802faded2e9ef92dca Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 13:04:43 -0400 Subject: [PATCH 22/41] Push the 'find_physical' call upwards. Simplifies error handling --- sled-agent/src/bootstrap/agent.rs | 10 +++++++++- sled-agent/src/illumos/vnic.rs | 9 ++------- sled-agent/src/illumos/zone.rs | 10 ++-------- sled-agent/src/instance_manager.rs | 2 +- sled-agent/src/services.rs | 7 +++---- sled-agent/src/sled_agent.rs | 19 +++++++++++++++---- sled-agent/src/storage_manager.rs | 2 +- 7 files changed, 33 insertions(+), 26 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index da20e4f8e58..c06a6538674 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -130,8 +130,16 @@ impl Agent { sled_config: SledConfig, address: Ipv6Addr, ) -> Result { + let data_link = if let Some(link) = sled_config.data_link.clone() { + link + } else { + Dladm::find_physical().map_err(|err| { + BootstrapError::SledError(format!("Can't access physical link: {}", err)) + })? + }; + Zones::ensure_has_global_zone_v6_address( - sled_config.data_link.clone(), + data_link.clone(), address, "bootstrap6", )?; diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index 5d5d8292923..b18a08391af 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -43,17 +43,12 @@ impl VnicAllocator { /// - oxControlStorage[NNN] pub fn new>( scope: S, - physical_link: Option, + physical_link: PhysicalLink, ) -> Result { - let data_link = if let Some(link) = physical_link { - link - } else { - Dladm::find_physical()? - }; Ok(Self { value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), - data_link, + data_link: physical_link, }) } diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index c3d5e47f3cf..9f4a499e861 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -9,7 +9,7 @@ use slog::Logger; use std::net::{IpAddr, Ipv6Addr}; use crate::illumos::addrobj::AddrObject; -use crate::illumos::dladm::{Dladm, PhysicalLink, VNIC_PREFIX_CONTROL}; +use crate::illumos::dladm::{PhysicalLink, VNIC_PREFIX_CONTROL}; use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::{execute, PFEXEC}; @@ -457,16 +457,10 @@ impl Zones { // should remove this function when Sled Agents are provided IPv6 addresses // from RSS. pub fn ensure_has_global_zone_v6_address( - physical_link: Option, + link: PhysicalLink, address: Ipv6Addr, name: &str, ) -> Result<(), Error> { - // Ensure that addrconf has been set up in the Global Zone. - let link = if let Some(link) = physical_link { - link - } else { - Dladm::find_physical()? - }; let gz_link_local_addrobj = AddrObject::new(&link.0, "linklocal")?; Self::ensure_has_link_local_v6_address(None, &gz_link_local_addrobj)?; diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 9a86b3ef62a..ca01815acaf 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -59,7 +59,7 @@ impl InstanceManager { log: Logger, vlan: Option, nexus_client: Arc, - physical_link: Option, + physical_link: PhysicalLink, ) -> Result { Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3c4b28795d5..8262bf17ff8 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -73,7 +73,7 @@ pub struct ServiceManager { config_path: Option, zones: Mutex>, vnic_allocator: VnicAllocator, - physical_link: Option, + physical_link: PhysicalLink, } impl ServiceManager { @@ -82,14 +82,13 @@ impl ServiceManager { /// /// Args: /// - `log`: The logger - /// - `physical_link`: An optional physical link on which to allocate - /// datalinks. By default, the first physical link is used. + /// - `physical_link`: A physical link on which to allocate datalinks. /// - `config_path`: An optional path to a configuration file to store /// the record of services. By default, [`default_services_config_path`] /// is used. pub async fn new( log: Logger, - physical_link: Option, + physical_link: PhysicalLink, config_path: Option, ) -> Result { debug!(log, "Creating new ServiceManager"); diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 0e92e46abda..e659beea9aa 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -98,6 +98,17 @@ impl SledAgent { let vlan = config.vlan; info!(&log, "created sled agent"; "id" => ?id); + let data_link = if let Some(link) = config.data_link.clone() { + link + } else { + Dladm::find_physical().map_err(|err| { + Error::Datalink { + message: "Looking up physical link".to_string(), + err, + } + })? + }; + // Before we start creating zones, we need to ensure that the // necessary ZFS and Zone resources are ready. Zfs::ensure_zoned_filesystem( @@ -116,7 +127,7 @@ impl SledAgent { // RSS-provided IP address. In the meantime, we use one from the // configuration file. Zones::ensure_has_global_zone_v6_address( - config.data_link.clone(), + data_link.clone(), *sled_address.ip(), "sled6", )?; @@ -163,7 +174,7 @@ impl SledAgent { &log, *id, nexus_client.clone(), - config.data_link.clone(), + data_link.clone(), ) .await?; if let Some(pools) = &config.zpools { @@ -180,10 +191,10 @@ impl SledAgent { log.clone(), vlan, nexus_client.clone(), - config.data_link.clone(), + data_link.clone(), )?; let services = - ServiceManager::new(log.clone(), config.data_link.clone(), None) + ServiceManager::new(log.clone(), data_link.clone(), None) .await?; Ok(SledAgent { diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index ff09437993e..386db23c7f5 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -826,7 +826,7 @@ impl StorageManager { log: &Logger, sled_id: Uuid, nexus_client: Arc, - physical_link: Option, + physical_link: PhysicalLink, ) -> Result { let log = log.new(o!("component" => "sled agent storage manager")); let pools = Arc::new(Mutex::new(HashMap::new())); From 37afdd64b34d4c4aa8506554155814372d24fca5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 13:13:39 -0400 Subject: [PATCH 23/41] Avoid using Result for now infallible funcs, fix tests --- sled-agent/src/illumos/vnic.rs | 12 +++++------- sled-agent/src/instance.rs | 5 ++--- sled-agent/src/instance_manager.rs | 18 ++++++++---------- sled-agent/src/services.rs | 14 +++++++------- sled-agent/src/sled_agent.rs | 4 ++-- sled-agent/src/storage_manager.rs | 8 ++++---- 6 files changed, 28 insertions(+), 33 deletions(-) diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index b18a08391af..3e9f93a26d7 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -44,12 +44,12 @@ impl VnicAllocator { pub fn new>( scope: S, physical_link: PhysicalLink, - ) -> Result { - Ok(Self { + ) -> Self { + Self { value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), data_link: physical_link, - }) + } } /// Creates a new NIC, intended for usage by the guest. @@ -144,8 +144,7 @@ mod test { #[test] fn test_allocate() { let allocator = - VnicAllocator::new("Foo", Some(PhysicalLink("mylink".to_string()))) - .unwrap(); + VnicAllocator::new("Foo", PhysicalLink("mylink".to_string())); assert_eq!("oxFoo0", allocator.next()); assert_eq!("oxFoo1", allocator.next()); assert_eq!("oxFoo2", allocator.next()); @@ -154,8 +153,7 @@ mod test { #[test] fn test_allocate_within_scopes() { let allocator = - VnicAllocator::new("Foo", Some(PhysicalLink("mylink".to_string()))) - .unwrap(); + VnicAllocator::new("Foo", PhysicalLink("mylink".to_string())); assert_eq!("oxFoo0", allocator.next()); let allocator = allocator.new_superscope("Baz"); assert_eq!("oxBazFoo1", allocator.next()); diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 142037515c3..099998075be 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -717,9 +717,8 @@ mod test { let log = logger(); let vnic_allocator = VnicAllocator::new( "Test".to_string(), - Some(PhysicalLink("mylink".to_string())), - ) - .unwrap(); + PhysicalLink("mylink".to_string()), + ); let nexus_client = MockNexusClient::default(); let inst = Instance::new( diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index ca01815acaf..608dd76233d 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -60,16 +60,16 @@ impl InstanceManager { vlan: Option, nexus_client: Arc, physical_link: PhysicalLink, - ) -> Result { - Ok(InstanceManager { + ) -> InstanceManager { + InstanceManager { inner: Arc::new(InstanceManagerInternal { log, nexus_client, instances: Mutex::new(BTreeMap::new()), vlan, - vnic_allocator: VnicAllocator::new("Instance", physical_link)?, + vnic_allocator: VnicAllocator::new("Instance", physical_link), }), - }) + } } /// Idempotently ensures that the given Instance (described by @@ -266,9 +266,8 @@ mod test { log, None, nexus_client, - Some(PhysicalLink("mylink".to_string())), - ) - .unwrap(); + PhysicalLink("mylink".to_string()), + ); // Verify that no instances exist. assert!(im.inner.instances.lock().unwrap().is_empty()); @@ -347,9 +346,8 @@ mod test { log, None, nexus_client, - Some(PhysicalLink("mylink".to_string())), - ) - .unwrap(); + PhysicalLink("mylink".to_string()), + ); let ticket = Arc::new(std::sync::Mutex::new(None)); let ticket_clone = ticket.clone(); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 8262bf17ff8..338eed2d625 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -99,7 +99,7 @@ impl ServiceManager { vnic_allocator: VnicAllocator::new( "Service", physical_link.clone(), - )?, + ), physical_link, }; @@ -492,7 +492,7 @@ mod test { let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config), ) .await @@ -516,7 +516,7 @@ mod test { let config = config_dir.path().join("services.toml"); let mgr = ServiceManager::new( log, - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config), ) .await @@ -543,7 +543,7 @@ mod test { // down. let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await @@ -556,7 +556,7 @@ mod test { let _expectations = expect_new_service(); let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await @@ -580,7 +580,7 @@ mod test { // down. let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await @@ -595,7 +595,7 @@ mod test { // Observe that the old service is not re-initialized. let mgr = ServiceManager::new( logctx.log.clone(), - Some(PhysicalLink(EXPECTED_LINK_NAME.to_string())), + PhysicalLink(EXPECTED_LINK_NAME.to_string()), Some(config.clone()), ) .await diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e659beea9aa..42249352b72 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -176,7 +176,7 @@ impl SledAgent { nexus_client.clone(), data_link.clone(), ) - .await?; + .await; if let Some(pools) = &config.zpools { for pool in pools { info!( @@ -192,7 +192,7 @@ impl SledAgent { vlan, nexus_client.clone(), data_link.clone(), - )?; + ); let services = ServiceManager::new(log.clone(), data_link.clone(), None) .await?; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 386db23c7f5..f911f1356df 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -827,7 +827,7 @@ impl StorageManager { sled_id: Uuid, nexus_client: Arc, physical_link: PhysicalLink, - ) -> Result { + ) -> Self { let log = log.new(o!("component" => "sled agent storage manager")); let pools = Arc::new(Mutex::new(HashMap::new())); let (new_pools_tx, new_pools_rx) = mpsc::channel(10); @@ -839,14 +839,14 @@ impl StorageManager { pools: pools.clone(), new_pools_rx, new_filesystems_rx, - vnic_allocator: VnicAllocator::new("Storage", physical_link)?, + vnic_allocator: VnicAllocator::new("Storage", physical_link), }; - Ok(StorageManager { + StorageManager { pools, new_pools_tx, new_filesystems_tx, task: tokio::task::spawn(async move { worker.do_work().await }), - }) + } } /// Adds a zpool to the storage manager. From 1d0a5cec7c451c1e3445c6ee981a05003238a41e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 16:45:03 -0400 Subject: [PATCH 24/41] More specific, contextual zone errors --- sled-agent/src/bootstrap/agent.rs | 14 +- sled-agent/src/illumos/addrobj.rs | 36 ++- sled-agent/src/illumos/running_zone.rs | 12 +- sled-agent/src/illumos/vnic.rs | 5 +- sled-agent/src/illumos/zone.rs | 304 ++++++++++++++++--------- sled-agent/src/instance.rs | 3 - sled-agent/src/instance_manager.rs | 6 - sled-agent/src/services.rs | 232 ++++++++++--------- sled-agent/src/sled_agent.rs | 40 ++-- sled-agent/src/storage_manager.rs | 3 - 10 files changed, 396 insertions(+), 259 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index c06a6538674..50bb833ed5b 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -13,7 +13,7 @@ use super::trust_quorum::{ use super::views::{ShareResponse, SledAgentResponse}; use crate::config::Config as SledConfig; use crate::illumos::dladm::{self, Dladm, PhysicalLink}; -use crate::illumos::zone::{self, Zones}; +use crate::illumos::zone::Zones; use crate::rack_setup::service::Service as RackSetupService; use crate::server::Server as SledServer; use omicron_common::api::external::{Error as ExternalError, MacAddr}; @@ -49,8 +49,8 @@ pub enum BootstrapError { #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), - #[error(transparent)] - Zone(#[from] zone::Error), + #[error("Failed to initialize bootstrap address: {err}")] + BootstrapAddress { err: crate::illumos::zone::EnsureGzAddressError }, } impl From for ExternalError { @@ -134,7 +134,10 @@ impl Agent { link } else { Dladm::find_physical().map_err(|err| { - BootstrapError::SledError(format!("Can't access physical link: {}", err)) + BootstrapError::SledError(format!( + "Can't access physical link: {}", + err + )) })? }; @@ -142,7 +145,8 @@ impl Agent { data_link.clone(), address, "bootstrap6", - )?; + ) + .map_err(|err| BootstrapError::BootstrapAddress { err })?; let peer_monitor = discovery::PeerMonitor::new(&log, address)?; let share = read_key_share()?; diff --git a/sled-agent/src/illumos/addrobj.rs b/sled-agent/src/illumos/addrobj.rs index 80f41fd9010..5b1d3668da3 100644 --- a/sled-agent/src/illumos/addrobj.rs +++ b/sled-agent/src/illumos/addrobj.rs @@ -19,28 +19,48 @@ pub struct AddrObject { name: String, } +#[derive(Debug, PartialEq, Clone)] +enum BadName { + Interface(String), + Object(String), +} + +impl std::fmt::Display for BadName { + fn fmt( + &self, + f: &mut std::fmt::Formatter<'_>, + ) -> Result<(), std::fmt::Error> { + match self { + BadName::Interface(s) => write!(f, "Bad interface name: {}", s), + BadName::Object(s) => write!(f, "Bad object name: {}", s), + } + } +} + /// Errors which may be returned from constructing an [`AddrObject`]. #[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("Failed to parse addrobj name: {0}")] - Parse(String), +#[error("Failed to parse addrobj name: {name}")] +pub struct ParseError { + name: BadName, } impl AddrObject { - pub fn new_control(interface: &str) -> Result { + pub fn new_control(interface: &str) -> Result { Self::new(interface, "omicron") } - pub fn on_same_interface(&self, name: &str) -> Result { + pub fn on_same_interface(&self, name: &str) -> Result { Self::new(&self.interface, name) } - pub fn new(interface: &str, name: &str) -> Result { + pub fn new(interface: &str, name: &str) -> Result { if interface.contains('/') { - return Err(Error::Parse(interface.to_string())); + return Err(ParseError { + name: BadName::Interface(interface.to_string()), + }); } if name.contains('/') { - return Err(Error::Parse(name.to_string())); + return Err(ParseError { name: BadName::Object(name.to_string()) }); } Ok(Self { interface: interface.to_string(), name: name.to_string() }) } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index fda2165a714..8898ae69654 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -31,17 +31,23 @@ pub enum Error { #[error("Failed to parse output: {0}")] Parse(#[from] std::string::FromUtf8Error), - #[error("Zone operation failed: {0}")] - Operation(#[from] crate::illumos::zone::Error), + #[error("Failed to create address: {0}")] + AddressCreation(#[from] crate::illumos::zone::EnsureAddressError), + + #[error("Zone management command failed: {0}")] + ZoneOperation(#[from] crate::illumos::zone::AdmError), #[error("Zone error accessing datalink: {0}")] Datalink(#[from] crate::illumos::dladm::Error), #[error(transparent)] - AddrObject(#[from] crate::illumos::addrobj::Error), + AddrObject(#[from] crate::illumos::addrobj::ParseError), #[error("Timeout waiting for a service: {0}")] Timeout(String), + + #[error(transparent)] + NoControlInterface(#[from] crate::illumos::zone::GetControlInterfaceError), } /// Represents a running zone. diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index 3e9f93a26d7..a2d297609b9 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -41,10 +41,7 @@ impl VnicAllocator { /// /// VnicAllocator::new("Storage") produces /// - oxControlStorage[NNN] - pub fn new>( - scope: S, - physical_link: PhysicalLink, - ) -> Self { + pub fn new>(scope: S, physical_link: PhysicalLink) -> Self { Self { value: Arc::new(AtomicU64::new(0)), scope: scope.as_ref().to_string(), diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 9f4a499e861..08209cc4123 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -4,6 +4,7 @@ //! API for interacting with Zones running Propolis. +use anyhow::anyhow; use ipnetwork::IpNetwork; use slog::Logger; use std::net::{IpAddr, Ipv6Addr}; @@ -24,50 +25,82 @@ pub const ZONE_PREFIX: &str = "oxz_"; pub const PROPOLIS_ZONE_PREFIX: &str = "oxz_propolis-server_"; #[derive(thiserror::Error, Debug)] -pub enum Error { - // TODO: These could be grouped into an "operation" error with an enum - // variant, if we want... - #[error("Cannot halt zone: {0}")] - Halt(zone::ZoneError), - - #[error("Cannot uninstall zone: {0}")] - Uninstall(zone::ZoneError), - - #[error("Cannot delete zone: {0}")] - Delete(zone::ZoneError), - - #[error("Cannot install zone: {0}")] - Install(zone::ZoneError), - - #[error("Cannot configure zone: {0}")] - Configure(zone::ZoneError), +enum Error { + #[error("Zone execution error: {0}")] + Execution(#[from] crate::illumos::ExecutionError), - #[error("Cannot clone zone: {0}")] - Clone(zone::ZoneError), + #[error(transparent)] + AddrObject(#[from] crate::illumos::addrobj::ParseError), - #[error("Cannot boot zone: {0}")] - Boot(zone::ZoneError), + #[error("Address not found: {addrobj}")] + AddressNotFound { addrobj: AddrObject }, +} - #[error("Cannot list zones: {0}")] - List(zone::ZoneError), +/// Operations issued via [`zone::Adm`]. +#[derive(Debug, Clone)] +pub enum Operation { + Boot, + Configure, + Delete, + Halt, + Install, + List, + Uninstall, +} - #[error("Zone execution error: {0}")] - Execution(#[from] crate::illumos::ExecutionError), +/// Errors from issuing [`zone::Adm`] commands. +#[derive(thiserror::Error, Debug)] +#[error("Failed to execute zoneadm command '{op:?}' for zone '{zone}': {err}")] +pub struct AdmError { + op: Operation, + zone: String, + #[source] + err: zone::ZoneError, +} - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), +/// Errors which may be encountered when deleting addresses. +#[derive(thiserror::Error, Debug)] +#[error("Failed to delete address '{addrobj}' in zone '{zone}': {err}")] +pub struct DeleteAddressError { + zone: String, + addrobj: AddrObject, + #[source] + err: crate::illumos::ExecutionError, +} +/// Errors from [`Zones::get_control_interface`]. +/// Error which may be returned accessing the control interface of a zone. +#[derive(thiserror::Error, Debug)] +pub enum GetControlInterfaceError { #[error(transparent)] - Dladm(#[from] crate::illumos::dladm::Error), + Execution(#[from] crate::illumos::ExecutionError), - #[error(transparent)] - AddrObject(#[from] crate::illumos::addrobj::Error), + #[error("VNIC starting with 'oxControl' not found in {zone}")] + NotFound { zone: String }, +} - #[error("Error accessing filesystem: {0}")] - Filesystem(std::io::Error), +/// Errors which may be encountered ensuring addresses. +#[derive(thiserror::Error, Debug)] +#[error( + "Failed to create address {request:?} with name {name} in {zone}: {err}" +)] +pub struct EnsureAddressError { + zone: String, + request: AddressRequest, + name: AddrObject, + #[source] + err: anyhow::Error, +} - #[error("Value not found")] - NotFound, +/// Errors from [`Zones::ensure_has_global_zone_v6_address`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to create address {address} with name {name} in the GZ on {link:?}: {err}")] +pub struct EnsureGzAddressError { + address: Ipv6Addr, + link: PhysicalLink, + name: String, + #[source] + err: anyhow::Error, } /// Describes the type of addresses which may be requested from a zone. @@ -101,7 +134,9 @@ impl Zones { /// /// Returns the state the zone was in before it was removed, or None if the /// zone did not exist. - pub fn halt_and_remove(name: &str) -> Result, Error> { + pub fn halt_and_remove( + name: &str, + ) -> Result, AdmError> { match Self::find(name)? { None => Ok(None), Some(zone) => { @@ -117,17 +152,29 @@ impl Zones { }; if halt { - zone::Adm::new(name).halt().map_err(Error::Halt)?; + zone::Adm::new(name).halt().map_err(|err| AdmError { + op: Operation::Halt, + zone: name.to_string(), + err, + })?; } if uninstall { - zone::Adm::new(name) - .uninstall(/* force= */ true) - .map_err(Error::Uninstall)?; + zone::Adm::new(name).uninstall(/* force= */ true).map_err( + |err| AdmError { + op: Operation::Uninstall, + zone: name.to_string(), + err, + }, + )?; } zone::Config::new(name) .delete(/* force= */ true) .run() - .map_err(Error::Delete)?; + .map_err(|err| AdmError { + op: Operation::Delete, + zone: name.to_string(), + err, + })?; Ok(Some(state)) } } @@ -137,7 +184,7 @@ impl Zones { pub fn halt_and_remove_logged( log: &Logger, name: &str, - ) -> Result<(), Error> { + ) -> Result<(), AdmError> { if let Some(state) = Self::halt_and_remove(name)? { info!( log, @@ -154,7 +201,7 @@ impl Zones { datasets: &[zone::Dataset], devices: &[zone::Device], vnics: Vec, - ) -> Result<(), Error> { + ) -> Result<(), AdmError> { if let Some(zone) = Self::find(zone_name)? { info!( log, @@ -204,28 +251,44 @@ impl Zones { ..Default::default() }); } - cfg.run().map_err(Error::Configure)?; + cfg.run().map_err(|err| AdmError { + op: Operation::Configure, + zone: zone_name.to_string(), + err, + })?; info!(log, "Installing Omicron zone: {}", zone_name); - zone::Adm::new(zone_name) - .install(&[zone_image.as_ref()]) - .map_err(Error::Install)?; + zone::Adm::new(zone_name).install(&[zone_image.as_ref()]).map_err( + |err| AdmError { + op: Operation::Install, + zone: zone_name.to_string(), + err, + }, + )?; Ok(()) } /// Boots a zone (named `name`). - pub fn boot(name: &str) -> Result<(), Error> { - zone::Adm::new(name).boot().map_err(Error::Boot)?; + pub fn boot(name: &str) -> Result<(), AdmError> { + zone::Adm::new(name).boot().map_err(|err| AdmError { + op: Operation::Boot, + zone: name.to_string(), + err, + })?; Ok(()) } /// Returns all zones that may be managed by the Sled Agent. /// /// These zones must have names starting with [`ZONE_PREFIX`]. - pub fn get() -> Result, Error> { + pub fn get() -> Result, AdmError> { Ok(zone::Adm::list() - .map_err(Error::List)? + .map_err(|err| AdmError { + op: Operation::List, + zone: "".to_string(), + err, + })? .into_iter() .filter(|z| z.name().starts_with(ZONE_PREFIX)) .collect()) @@ -235,12 +298,14 @@ impl Zones { /// /// Can only return zones that start with [`ZONE_PREFIX`], as they /// are managed by the Sled Agent. - pub fn find(name: &str) -> Result, Error> { + pub fn find(name: &str) -> Result, AdmError> { Ok(Self::get()?.into_iter().find(|zone| zone.name() == name)) } /// Returns the name of the VNIC used to communicate with the control plane. - pub fn get_control_interface(zone: &str) -> Result { + pub fn get_control_interface( + zone: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ ZLOGIN, @@ -252,8 +317,7 @@ impl Zones { "LINK", ]); let output = execute(cmd)?; - String::from_utf8(output.stdout) - .map_err(Error::Parse)? + String::from_utf8_lossy(&output.stdout) .lines() .find_map(|name| { if name.starts_with(VNIC_PREFIX_CONTROL) { @@ -262,7 +326,9 @@ impl Zones { None } }) - .ok_or(Error::NotFound) + .ok_or(GetControlInterfaceError::NotFound { + zone: zone.to_string(), + }) } /// Ensures that an IP address on an interface matches the requested value. @@ -277,23 +343,36 @@ impl Zones { zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, - ) -> Result { - match Self::get_address(zone, addrobj) { - Ok(addr) => { - if let AddressRequest::Static(expected_addr) = addrtype { - // If the address is static, we need to validate that it - // matches the value we asked for. - if addr != expected_addr { - // If the address doesn't match, try removing the old - // value before using the new one. - Self::delete_address(zone, addrobj)?; - return Self::create_address(zone, addrobj, addrtype); + ) -> Result { + |zone, addrobj, addrtype| -> Result { + match Self::get_address(zone, addrobj) { + Ok(addr) => { + if let AddressRequest::Static(expected_addr) = addrtype { + // If the address is static, we need to validate that it + // matches the value we asked for. + if addr != expected_addr { + // If the address doesn't match, try removing the old + // value before using the new one. + Self::delete_address(zone, addrobj) + .map_err(|e| anyhow!(e))?; + return Self::create_address( + zone, addrobj, addrtype, + ) + .map_err(|e| anyhow!(e)); + } } + Ok(addr) } - Ok(addr) + Err(_) => Self::create_address(zone, addrobj, addrtype) + .map_err(|e| anyhow!(e)), } - Err(_) => Self::create_address(zone, addrobj, addrtype), - } + }(zone, addrobj, addrtype) + .map_err(|err| EnsureAddressError { + zone: zone.unwrap_or("global").to_string(), + request: addrtype, + name: addrobj.clone(), + err, + }) } /// Gets the IP address of an interface. @@ -317,10 +396,10 @@ impl Zones { let cmd = command.args(args); let output = execute(cmd)?; - String::from_utf8(output.stdout)? + String::from_utf8_lossy(&output.stdout) .lines() .find_map(|s| s.parse().ok()) - .ok_or(Error::NotFound) + .ok_or(Error::AddressNotFound { addrobj: addrobj.clone() }) } /// Returns Ok(()) if `addrobj` has a corresponding link-local IPv6 address. @@ -344,13 +423,13 @@ impl Zones { let args = prefix.iter().chain(show_addr_args); let cmd = command.args(args); let output = execute(cmd)?; - if let Some(_) = String::from_utf8(output.stdout)? + if let Some(_) = String::from_utf8_lossy(&output.stdout) .lines() .find(|s| s.trim() == "addrconf") { return Ok(()); } - Err(Error::NotFound) + Err(Error::AddressNotFound { addrobj: addrobj.clone() }) } // Attempts to create the requested address. @@ -361,7 +440,7 @@ impl Zones { zone: Option<&'a str>, addrobj: &AddrObject, addrtype: AddressRequest, - ) -> Result<(), Error> { + ) -> Result<(), crate::illumos::ExecutionError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -396,7 +475,7 @@ impl Zones { pub fn delete_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, - ) -> Result<(), Error> { + ) -> Result<(), DeleteAddressError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![]; if let Some(zone) = zone { @@ -409,7 +488,11 @@ impl Zones { args.push(addrobj.to_string()); let cmd = command.args(args); - execute(cmd)?; + execute(cmd).map_err(|err| DeleteAddressError { + zone: zone.unwrap_or("global").to_string(), + addrobj: addrobj.clone(), + err, + })?; Ok(()) } @@ -423,12 +506,8 @@ impl Zones { fn ensure_has_link_local_v6_address<'a>( zone: Option<&'a str>, addrobj: &AddrObject, - ) -> Result<(), Error> { - let link_local_addrobj = addrobj.on_same_interface("linklocal")?; - - if let Ok(()) = - Self::has_link_local_v6_address(zone, &link_local_addrobj) - { + ) -> Result<(), crate::illumos::ExecutionError> { + if let Ok(()) = Self::has_link_local_v6_address(zone, &addrobj) { return Ok(()); } @@ -444,7 +523,7 @@ impl Zones { "-t", "-T", "addrconf", - &link_local_addrobj.to_string(), + &addrobj.to_string(), ]; let args = prefix.iter().chain(create_addr_args); @@ -460,24 +539,43 @@ impl Zones { link: PhysicalLink, address: Ipv6Addr, name: &str, - ) -> Result<(), Error> { - let gz_link_local_addrobj = AddrObject::new(&link.0, "linklocal")?; - Self::ensure_has_link_local_v6_address(None, &gz_link_local_addrobj)?; - - // Ensure that a static IPv6 address has been allocated - // to the Global Zone. Without this, we don't have a way - // to route to IP addresses that we want to create in - // the non-GZ. Note that we use a `/64` prefix, as all addresses - // allocated for services on this sled itself are within the underlay - // prefix. Anything else must be routed through Sidecar. - Self::ensure_address( - None, - &gz_link_local_addrobj.on_same_interface(name)?, - AddressRequest::new_static( - IpAddr::V6(address), - Some(omicron_common::address::SLED_PREFIX), - ), - )?; + ) -> Result<(), EnsureGzAddressError> { + // Call the guts of this function within a closure to make it easier + // to wrap the error with appropriate context. + |link: PhysicalLink, address, name| -> Result<(), anyhow::Error> { + let gz_link_local_addrobj = AddrObject::new(&link.0, "linklocal") + .map_err(|err| anyhow!(err))?; + Self::ensure_has_link_local_v6_address( + None, + &gz_link_local_addrobj, + ) + .map_err(|err| anyhow!(err))?; + + // Ensure that a static IPv6 address has been allocated + // to the Global Zone. Without this, we don't have a way + // to route to IP addresses that we want to create in + // the non-GZ. Note that we use a `/64` prefix, as all addresses + // allocated for services on this sled itself are within the underlay + // prefix. Anything else must be routed through Sidecar. + Self::ensure_address( + None, + &gz_link_local_addrobj + .on_same_interface(name) + .map_err(|err| anyhow!(err))?, + AddressRequest::new_static( + IpAddr::V6(address), + Some(omicron_common::address::SLED_PREFIX), + ), + ) + .map_err(|err| anyhow!(err))?; + Ok(()) + }(link.clone(), address, name) + .map_err(|err| EnsureGzAddressError { + address, + link, + name: name.to_string(), + err, + })?; Ok(()) } @@ -500,9 +598,11 @@ impl Zones { if addr.is_ipv6() { // Finally, actually ensure that the v6 address we want // exists within the zone. + let link_local_addrobj = + addrobj.on_same_interface("linklocal")?; Self::ensure_has_link_local_v6_address( Some(zone), - addrobj, + &link_local_addrobj, )?; } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 099998075be..08775d837c6 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -44,9 +44,6 @@ pub enum Error { #[error("Failure accessing data links: {0}")] Datalink(#[from] crate::illumos::dladm::Error), - #[error("Error accessing zones: {0}")] - Zone(#[from] crate::illumos::zone::Error), - #[error("Failure from Propolis Client: {0}")] Propolis(#[from] propolis_client::Error), diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 608dd76233d..2e5970374c6 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -26,12 +26,6 @@ use crate::instance::MockInstance as Instance; pub enum Error { #[error("Instance error: {0}")] Instance(#[from] crate::instance::Error), - - #[error(transparent)] - Dladm(#[from] crate::illumos::dladm::Error), - - #[error(transparent)] - Zone(#[from] crate::illumos::zone::Error), } struct InstanceManagerInternal { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 338eed2d625..16eab79a707 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,31 +20,22 @@ use tokio::sync::Mutex; #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Cannot serialize TOML to file {path}: {err}")] - TomlSerialize { - path: PathBuf, - err: toml::ser::Error, - }, + TomlSerialize { path: PathBuf, err: toml::ser::Error }, #[error("Cannot deserialize TOML from file {path}: {err}")] - TomlDeserialize { - path: PathBuf, - err: toml::de::Error, - }, + TomlDeserialize { path: PathBuf, err: toml::de::Error }, #[error("I/O Error accessing {path}: {err}")] - Io { - path: PathBuf, - err: std::io::Error, - }, + Io { path: PathBuf, err: std::io::Error }, - #[error(transparent)] - RunningZone(#[from] crate::illumos::running_zone::Error), + #[error("Zone error: {message}: {err}")] + RunningZone { message: String, err: crate::illumos::running_zone::Error }, - #[error("Failed to add address to the global zone: {0}")] - GzAddressFailure(crate::illumos::zone::Error), - - #[error(transparent)] - Dladm(#[from] crate::illumos::dladm::Error), + #[error("Failed to add GZ addresses: {message}: {err}")] + GzAddressFailure { + message: String, + err: crate::illumos::zone::EnsureGzAddressError, + }, #[error("Could not initialize service {service} as requested: {message}")] BadServiceRequest { service: String, message: String }, @@ -111,19 +102,13 @@ impl ServiceManager { config_path.to_string_lossy() ); let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| { - Error::Io { - path: config_path.clone(), - err, - } - })?, - ).map_err(|err| { - Error::TomlDeserialize { - path: config_path.clone(), - err, - } + &tokio::fs::read_to_string(&config_path).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?, + ) + .map_err(|err| Error::TomlDeserialize { + path: config_path.clone(), + err, })?; let mut existing_zones = mgr.zones.lock().await; mgr.initialize_services_locked(&mut existing_zones, &cfg.services) @@ -184,15 +169,33 @@ impl ServiceManager { // vnics= vec![], ) - .await?; + .await + .map_err(|err| Error::RunningZone { + message: format!("Could not install {}", service.name), + err, + })?; - let running_zone = RunningZone::boot(installed_zone).await?; + let running_zone = RunningZone::boot(installed_zone) + .await + .map_err(|err| Error::RunningZone { + message: format!("Could not boot {}", service.name), + err, + })?; for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); let addr_request = AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await?; + running_zone.ensure_address(addr_request).await.map_err( + |err| Error::RunningZone { + message: format!( + "Failed to create address {} for {}", + addr.to_string(), + service.name + ), + err, + }, + )?; info!( self.log, "Ensuring address {} exists - OK", @@ -214,19 +217,30 @@ impl ServiceManager { *addr, &addr_name, ) - .map_err(|e| Error::GzAddressFailure(e))?; + .map_err(|err| Error::GzAddressFailure { + message: format!( + "Failed adding address for {}", + service.name + ), + err, + })?; } debug!(self.log, "importing manifest"); - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "import", - &format!( - "/var/svc/manifest/site/{}/manifest.xml", - service.name - ), - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "import", + &format!( + "/var/svc/manifest/site/{}/manifest.xml", + service.name + ), + ]) + .map_err(|err| Error::RunningZone { + message: "Failed to import manifest".to_string(), + err, + })?; let smf_name = format!("svc:/system/illumos/{}", service.name); let default_smf_name = format!("{}:default", smf_name); @@ -241,36 +255,56 @@ impl ServiceManager { message: "Not enough addresses".to_string(), } })?; - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT - ), - ])?; - - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/dns_address=[{}]:{}", - address, DNS_PORT - ), - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/server_address=[{}]:{}", + address, DNS_SERVER_PORT + ), + ]) + .map_err(|err| Error::RunningZone { + message: "Could not set server address property" + .to_string(), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/dns_address=[{}]:{}", + address, DNS_PORT + ), + ]) + .map_err(|err| Error::RunningZone { + message: "Could not set DNS address property" + .to_string(), + err, + })?; // Refresh the manifest with the new properties we set, // so they become "effective" properties when the service is enabled. - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::RunningZone { + message: format!( + "Failed to refresh SMF manifest: {}", + default_smf_name + ), + err, + })?; } _ => { info!( @@ -282,12 +316,20 @@ impl ServiceManager { debug!(self.log, "enabling service"); - running_zone.run_cmd(&[ - crate::illumos::zone::SVCADM, - "enable", - "-t", - &default_smf_name, - ])?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCADM, + "enable", + "-t", + &default_smf_name, + ]) + .map_err(|err| Error::RunningZone { + message: format!( + "Failed to enable {} service", + default_smf_name + ), + err, + })?; existing_zones.push(running_zone); } @@ -308,19 +350,13 @@ impl ServiceManager { let services_to_initialize = { if config_path.exists() { let cfg: ServiceEnsureBody = toml::from_str( - &tokio::fs::read_to_string(&config_path) - .await - .map_err(|err| { - Error::Io { - path: config_path.clone(), - err, - } - })?, - ).map_err(|err| { - Error::TomlDeserialize { - path: config_path.clone(), - err, - } + &tokio::fs::read_to_string(&config_path).await.map_err( + |err| Error::Io { path: config_path.clone(), err }, + )?, + ) + .map_err(|err| Error::TomlDeserialize { + path: config_path.clone(), + err, })?; let known_services = cfg.services; @@ -358,21 +394,13 @@ impl ServiceManager { let serialized_services = toml::Value::try_from(&request) .expect("Cannot serialize service list"); - let services_str = toml::to_string(&serialized_services) - .map_err(|err| { - Error::TomlSerialize { - path: config_path.clone(), - err, - } + let services_str = + toml::to_string(&serialized_services).map_err(|err| { + Error::TomlSerialize { path: config_path.clone(), err } })?; tokio::fs::write(&config_path, services_str) .await - .map_err(|err| { - Error::Io { - path: config_path.clone(), - err, - } - })?; + .map_err(|err| Error::Io { path: config_path.clone(), err })?; Ok(()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 42249352b72..737c2ec2cd7 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -35,16 +35,16 @@ use crate::illumos::{ #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Datalink error: {message}, {err}")] - Datalink { - message: String, - err: crate::illumos::dladm::Error, - }, + Datalink { message: String, err: crate::illumos::dladm::Error }, #[error(transparent)] Services(#[from] crate::services::Error), #[error(transparent)] - Zone(#[from] crate::illumos::zone::Error), + ZoneOperation(#[from] crate::illumos::zone::AdmError), + + #[error("Failed to create Sled Subnet: {err}")] + SledSubnet { err: crate::illumos::zone::EnsureGzAddressError }, #[error(transparent)] Zfs(#[from] crate::illumos::zfs::Error), @@ -101,11 +101,9 @@ impl SledAgent { let data_link = if let Some(link) = config.data_link.clone() { link } else { - Dladm::find_physical().map_err(|err| { - Error::Datalink { - message: "Looking up physical link".to_string(), - err, - } + Dladm::find_physical().map_err(|err| Error::Datalink { + message: "Looking up physical link".to_string(), + err, })? }; @@ -130,7 +128,8 @@ impl SledAgent { data_link.clone(), *sled_address.ip(), "sled6", - )?; + ) + .map_err(|err| Error::SledSubnet { err })?; // Identify all existing zones which should be managed by the Sled // Agent. @@ -154,19 +153,15 @@ impl SledAgent { // // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE - let vnics = Dladm::get_vnics().map_err(|err| { - Error::Datalink { - message: "Looking up VNICs on boot".to_string(), - err, - } + let vnics = Dladm::get_vnics().map_err(|err| Error::Datalink { + message: "Looking up VNICs on boot".to_string(), + err, })?; for vnic in vnics { warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic).map_err(|err| { - Error::Datalink { - message: "Deleting VNIC during boot".to_string(), - err, - } + Dladm::delete_vnic(&vnic).map_err(|err| Error::Datalink { + message: "Deleting VNIC during boot".to_string(), + err, })?; } @@ -194,8 +189,7 @@ impl SledAgent { data_link.clone(), ); let services = - ServiceManager::new(log.clone(), data_link.clone(), None) - .await?; + ServiceManager::new(log.clone(), data_link.clone(), None).await?; Ok(SledAgent { id: config.id, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index f911f1356df..d5b90887753 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -60,9 +60,6 @@ pub enum Error { #[error(transparent)] Zpool(#[from] crate::illumos::zpool::Error), - #[error("Failed to configure a zone: {0}")] - ZoneConfiguration(crate::illumos::zone::Error), - #[error("Failed to manage a running zone: {0}")] ZoneManagement(#[from] crate::illumos::running_zone::Error), From 1ab32aae74aa89dd4c7fd0dcb01e66c1ec306fad Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 17:23:42 -0400 Subject: [PATCH 25/41] dladm errors --- sled-agent/src/bootstrap/agent.rs | 2 +- sled-agent/src/config.rs | 4 +- sled-agent/src/illumos/dladm.rs | 80 +++++++++++++++++++------- sled-agent/src/illumos/running_zone.rs | 6 +- sled-agent/src/illumos/vnic.rs | 16 ++++-- sled-agent/src/illumos/zone.rs | 2 +- sled-agent/src/instance.rs | 5 +- sled-agent/src/sled_agent.rs | 25 ++++---- sled-agent/src/storage_manager.rs | 3 - 9 files changed, 93 insertions(+), 50 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 50bb833ed5b..7ede44b06bf 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -119,7 +119,7 @@ fn mac_to_socket_addr(mac: MacAddr) -> SocketAddrV6 { // could be randomly generated when it no longer needs to be durable. pub fn bootstrap_address( link: PhysicalLink, -) -> Result { +) -> Result { let mac = Dladm::get_mac(link)?; Ok(mac_to_socket_addr(mac)) } diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index ca332b2c07c..63b9b1115cc 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -49,7 +49,9 @@ impl Config { Ok(config) } - pub fn get_link(&self) -> Result { + pub fn get_link( + &self, + ) -> Result { let link = if let Some(link) = self.data_link.clone() { link } else { diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 92c1ed03916..441fe18fa49 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -15,21 +15,56 @@ pub const VNIC_PREFIX_CONTROL: &str = "oxControl"; pub const DLADM: &str = "/usr/sbin/dladm"; +/// Errors returned from [`Dladm::find_physical`]. #[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Device not found")] - NotFound, - - #[error("Subcommand failure: {0}")] +pub enum FindPhysicalLinkError { + #[error("Failed to execute command to find physical link: {0}")] Execution(#[from] ExecutionError), - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), + #[error("No Physical Link devices found")] + NoPhysicalLinkFound, +} + +/// Errors returned from [`Dladm::get_mac`]. +#[derive(thiserror::Error, Debug)] +pub enum GetMacError { + #[error("Mac Address cannot be looked up; Link not found: {0:?}")] + NotFound(PhysicalLink), + + #[error("Failed to execute command to get MAC address: {0}")] + Execution(#[from] ExecutionError), #[error("Failed to parse MAC: {0}")] ParseMac(#[from] macaddr::ParseError), } +/// Errors returned from [`Dladm::create_vnic`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to create VNIC {name} on link {link:?}: {err}")] +pub struct CreateVnicError { + name: String, + link: PhysicalLink, + #[source] + err: ExecutionError, +} + +/// Errors returned from [`Dladm::get_vnics`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to get vnics: {err}")] +pub struct GetVnicError { + #[source] + err: ExecutionError, +} + +/// Errors returned from [`Dladm::delete_vnic`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to delete vnic {name}: {err}")] +pub struct DeleteVnicError { + name: String, + #[source] + err: ExecutionError, +} + /// The name of a physical datalink. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] pub struct PhysicalLink(pub String); @@ -40,24 +75,24 @@ pub struct Dladm {} #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Dladm { /// Returns the name of the first observed physical data link. - pub fn find_physical() -> Result { + pub fn find_physical() -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-phys", "-p", "-o", "LINK"]); let output = execute(cmd)?; - let name = String::from_utf8(output.stdout)? + let name = String::from_utf8_lossy(&output.stdout) .lines() // TODO: This is arbitrary, but we're currently grabbing the first // physical device. Should we have a more sophisticated method for // selection? .next() .map(|s| s.trim()) - .ok_or_else(|| Error::NotFound)? + .ok_or_else(|| FindPhysicalLinkError::NoPhysicalLinkFound)? .to_string(); Ok(PhysicalLink(name)) } /// Returns the MAC address of a physical link. - pub fn get_mac(link: PhysicalLink) -> Result { + pub fn get_mac(link: PhysicalLink) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ DLADM, @@ -69,11 +104,11 @@ impl Dladm { &link.0, ]); let output = execute(cmd)?; - let name = String::from_utf8(output.stdout)? + let name = String::from_utf8_lossy(&output.stdout) .lines() .next() .map(|s| s.trim()) - .ok_or_else(|| Error::NotFound)? + .ok_or_else(|| GetMacError::NotFound(link))? .to_string(); // Ensure the MAC address is zero-padded, so it may be parsed as a @@ -99,7 +134,7 @@ impl Dladm { vnic_name: &str, mac: Option, vlan: Option, - ) -> Result<(), Error> { + ) -> Result<(), CreateVnicError> { let mut command = std::process::Command::new(PFEXEC); let mut args = vec![ DLADM.to_string(), @@ -121,17 +156,21 @@ impl Dladm { args.push(vnic_name.to_string()); let cmd = command.args(&args); - execute(cmd)?; + execute(cmd).map_err(|err| CreateVnicError { + name: vnic_name.to_string(), + link: physical.clone(), + err, + })?; Ok(()) } /// Returns all VNICs that may be managed by the Sled Agent. - pub fn get_vnics() -> Result, Error> { + pub fn get_vnics() -> Result, GetVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); - let output = execute(cmd)?; + let output = execute(cmd).map_err(|err| GetVnicError { err })?; - let vnics = String::from_utf8(output.stdout)? + let vnics = String::from_utf8_lossy(&output.stdout) .lines() .filter(|vnic| vnic.starts_with(VNIC_PREFIX)) .map(|s| s.to_owned()) @@ -140,10 +179,11 @@ impl Dladm { } /// Remove a vnic from the sled. - pub fn delete_vnic(name: &str) -> Result<(), Error> { + pub fn delete_vnic(name: &str) -> Result<(), DeleteVnicError> { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "delete-vnic", name]); - execute(cmd)?; + execute(cmd) + .map_err(|err| DeleteVnicError { name: name.to_string(), err })?; Ok(()) } } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index 8898ae69654..ca51b9b112f 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -37,8 +37,10 @@ pub enum Error { #[error("Zone management command failed: {0}")] ZoneOperation(#[from] crate::illumos::zone::AdmError), - #[error("Zone error accessing datalink: {0}")] - Datalink(#[from] crate::illumos::dladm::Error), + // TODO: Extra context could be useful - who were we allocating the VNIC + // for? + #[error("Failed to create VNIC: {0}")] + VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), #[error(transparent)] AddrObject(#[from] crate::illumos::addrobj::ParseError), diff --git a/sled-agent/src/illumos/vnic.rs b/sled-agent/src/illumos/vnic.rs index a2d297609b9..d200e4f4bc0 100644 --- a/sled-agent/src/illumos/vnic.rs +++ b/sled-agent/src/illumos/vnic.rs @@ -5,7 +5,10 @@ //! API for controlling a single instance. use crate::common::vlan::VlanID; -use crate::illumos::dladm::{PhysicalLink, VNIC_PREFIX, VNIC_PREFIX_CONTROL}; +use crate::illumos::dladm::{ + CreateVnicError, DeleteVnicError, PhysicalLink, VNIC_PREFIX, + VNIC_PREFIX_CONTROL, +}; use omicron_common::api::external::MacAddr; use std::sync::{ atomic::{AtomicU64, Ordering}, @@ -17,8 +20,6 @@ use crate::illumos::dladm::Dladm; #[cfg(test)] use crate::illumos::dladm::MockDladm as Dladm; -type Error = crate::illumos::dladm::Error; - /// A shareable wrapper around an atomic counter. /// May be used to allocate runtime-unique IDs for objects /// which have naming constraints - such as VNICs. @@ -54,7 +55,7 @@ impl VnicAllocator { &self, mac: Option, vlan: Option, - ) -> Result { + ) -> Result { let allocator = self.new_superscope("Guest"); let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); @@ -64,7 +65,10 @@ impl VnicAllocator { /// Creates a new NIC, intended for allowing Propolis to communicate /// with the control plane. - pub fn new_control(&self, mac: Option) -> Result { + pub fn new_control( + &self, + mac: Option, + ) -> Result { let allocator = self.new_superscope("Control"); let name = allocator.next(); debug_assert!(name.starts_with(VNIC_PREFIX)); @@ -111,7 +115,7 @@ impl Vnic { } /// Deletes a NIC (if it has not already been deleted). - pub fn delete(&mut self) -> Result<(), Error> { + pub fn delete(&mut self) -> Result<(), DeleteVnicError> { if self.deleted { Ok(()) } else { diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 08209cc4123..2cc69283065 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -72,7 +72,7 @@ pub struct DeleteAddressError { /// Error which may be returned accessing the control interface of a zone. #[derive(thiserror::Error, Debug)] pub enum GetControlInterfaceError { - #[error(transparent)] + #[error("Failed to query for control interface: {0}")] Execution(#[from] crate::illumos::ExecutionError), #[error("VNIC starting with 'oxControl' not found in {zone}")] diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 08775d837c6..cbcf957154d 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -41,8 +41,9 @@ pub enum Error { #[error("Failed to wait for service: {0}")] Timeout(String), - #[error("Failure accessing data links: {0}")] - Datalink(#[from] crate::illumos::dladm::Error), + // TODO: Who are we allocating the VNIC for? + #[error("Failed to create VNIC: {0}")] + VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), #[error("Failure from Propolis Client: {0}")] Propolis(#[from] propolis_client::Error), diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 737c2ec2cd7..99965018e25 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -34,8 +34,14 @@ use crate::illumos::{ #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("Datalink error: {message}, {err}")] - Datalink { message: String, err: crate::illumos::dladm::Error }, + #[error("Physical link not in config, nor found automatically: {0}")] + FindPhysicalLink(#[from] crate::illumos::dladm::FindPhysicalLinkError), + + #[error("Failed to lookup VNICs on boot: {0}")] + GetVnics(#[from] crate::illumos::dladm::GetVnicError), + + #[error("Failed to delete VNIC on boot: {0}")] + DeleteVnic(#[from] crate::illumos::dladm::DeleteVnicError), #[error(transparent)] Services(#[from] crate::services::Error), @@ -101,10 +107,7 @@ impl SledAgent { let data_link = if let Some(link) = config.data_link.clone() { link } else { - Dladm::find_physical().map_err(|err| Error::Datalink { - message: "Looking up physical link".to_string(), - err, - })? + Dladm::find_physical()? }; // Before we start creating zones, we need to ensure that the @@ -153,16 +156,10 @@ impl SledAgent { // // This should be accessible via: // $ dladm show-linkprop -c -p zone -o LINK,VALUE - let vnics = Dladm::get_vnics().map_err(|err| Error::Datalink { - message: "Looking up VNICs on boot".to_string(), - err, - })?; + let vnics = Dladm::get_vnics()?; for vnic in vnics { warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic).map_err(|err| Error::Datalink { - message: "Deleting VNIC during boot".to_string(), - err, - })?; + Dladm::delete_vnic(&vnic)?; } let storage = StorageManager::new( diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index d5b90887753..ba399228590 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -51,9 +51,6 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error(transparent)] - Datalink(#[from] crate::illumos::dladm::Error), - #[error(transparent)] Zfs(#[from] crate::illumos::zfs::Error), From 6d165a77969590f912fd6c94e5ee2cee9b4d6995 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 17:33:09 -0400 Subject: [PATCH 26/41] Zpool errors --- sled-agent/src/illumos/zpool.rs | 50 ++++++++++++++++++------------- sled-agent/src/storage_manager.rs | 2 +- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index 67c1c0eaf88..b4021dd8ce9 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -5,7 +5,6 @@ //! Utilities for managing Zpools. use crate::illumos::execute; -use omicron_common::api::external::Error as ExternalError; use serde::{Deserialize, Deserializer}; use std::str::FromStr; use uuid::Uuid; @@ -13,24 +12,24 @@ use uuid::Uuid; const ZPOOL: &str = "/usr/sbin/zpool"; #[derive(thiserror::Error, Debug, PartialEq, Eq)] -pub enum ParseError { - #[error("Failed to parse output as UTF-8: {0}")] - Utf8(#[from] std::string::FromUtf8Error), - - #[error("Failed to parse output: {0}")] - Parse(String), -} +#[error("Failed to parse output: {0}")] +pub struct ParseError(String); #[derive(thiserror::Error, Debug)] -pub enum Error { +enum Error { #[error("Zpool execution error: {0}")] Execution(#[from] crate::illumos::ExecutionError), #[error(transparent)] Parse(#[from] ParseError), +} - #[error("Failed to execute subcommand: {0}")] - Command(ExternalError), +#[derive(thiserror::Error, Debug)] +#[error("Failed to get info for zpool {name}: {err}")] +pub struct GetInfoError { + name: String, + #[source] + err: Error, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -62,7 +61,7 @@ impl FromStr for ZpoolHealth { "OFFLINE" => Ok(ZpoolHealth::Offline), "REMOVED" => Ok(ZpoolHealth::Removed), "UNAVAIL" => Ok(ZpoolHealth::Unavailable), - _ => Err(ParseError::Parse(format!( + _ => Err(ParseError(format!( "Unrecognized zpool 'health': {}", s ))), @@ -111,13 +110,13 @@ impl FromStr for ZpoolInfo { fn from_str(s: &str) -> Result { // Lambda helpers for error handling. let expected_field = |name| { - ParseError::Parse(format!( + ParseError(format!( "Missing '{}' value in zpool list output", name )) }; let failed_to_parse = |name, err| { - ParseError::Parse(format!( + ParseError(format!( "Failed to parse field '{}': {}", name, err )) @@ -155,7 +154,7 @@ pub struct Zpool {} #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zpool { - pub fn get_info(name: &str) -> Result { + pub fn get_info(name: &str) -> Result { let mut command = std::process::Command::new(ZPOOL); let cmd = command.args(&[ "list", @@ -164,11 +163,20 @@ impl Zpool { name, ]); - let output = execute(cmd)?; - let stdout = String::from_utf8(output.stdout) - .map_err(|e| ParseError::Utf8(e))?; - - let zpool = stdout.parse::()?; + let output = execute(cmd).map_err(|err| { + GetInfoError { + name: name.to_string(), + err: err.into(), + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); + let zpool = stdout.parse::() + .map_err(|err| { + GetInfoError { + name: name.to_string(), + err: err.into(), + } + })?; Ok(zpool) } } @@ -294,7 +302,7 @@ mod test { let input = format!("{} {} {} {}", name, size, allocated, free); let result: Result = input.parse(); - let expected_err = ParseError::Parse( + let expected_err = ParseError( "Missing 'health' value in zpool list output".to_owned(), ); assert_eq!(result.unwrap_err(), expected_err,); diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index ba399228590..8b9ce638583 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -55,7 +55,7 @@ pub enum Error { Zfs(#[from] crate::illumos::zfs::Error), #[error(transparent)] - Zpool(#[from] crate::illumos::zpool::Error), + GetZpoolInfo(#[from] crate::illumos::zpool::GetInfoError), #[error("Failed to manage a running zone: {0}")] ZoneManagement(#[from] crate::illumos::running_zone::Error), From 4be8ed0321a50d868ab2e5c3d7f124f89bf87002 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 18:03:36 -0400 Subject: [PATCH 27/41] zfs errors --- sled-agent/src/illumos/zfs.rs | 131 +++++++++++++++++++++++------- sled-agent/src/sled_agent.rs | 2 +- sled-agent/src/storage_manager.rs | 13 ++- 3 files changed, 116 insertions(+), 30 deletions(-) diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index 61b468afaa6..b1ca75002ac 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -12,25 +12,70 @@ pub const ZONE_ZFS_DATASET_MOUNTPOINT: &str = "/zone"; pub const ZONE_ZFS_DATASET: &str = "rpool/zone"; const ZFS: &str = "/usr/sbin/zfs"; +/// Error returned by [`Zfs::list_filesystems`]. #[derive(thiserror::Error, Debug)] -pub enum Error { +#[error("Could not list filesystems within dataset {name}: {err}")] +pub struct ListFilesystemsError { + name: String, + #[source] + err: crate::illumos::ExecutionError, +} + +#[derive(thiserror::Error, Debug)] +enum EnsureFilesystemErrorRaw { #[error("ZFS execution error: {0}")] Execution(#[from] crate::illumos::ExecutionError), - #[error("Does not exist: {0}")] - NotFound(String), + #[error("Filesystem does not exist, and formatting was not requested")] + NotFoundNotFormatted, #[error("Unexpected output from ZFS commands: {0}")] Output(String), +} + +/// Error returned by [`Zfs::ensure_zoned_filesystem`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}")] +pub struct EnsureFilesystemError { + name: String, + mountpoint: Mountpoint, + #[source] + err: EnsureFilesystemErrorRaw, +} + +/// Error returned by [`Zfs::set_oxide_value`] +#[derive(thiserror::Error, Debug)] +#[error("Failed to set value '{name}={value}' on filesystem {filesystem}: {err}")] +pub struct SetValueError { + filesystem: String, + name: String, + value: String, + err: crate::illumos::ExecutionError, +} - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), +#[derive(thiserror::Error, Debug)] +enum GetValueErrorRaw { + #[error(transparent)] + Execution(#[from] crate::illumos::ExecutionError), + + #[error("No value found with that name")] + MissingValue, +} + +/// Error returned by [`Zfs:get_oxide_value`]. +#[derive(thiserror::Error, Debug)] +#[error("Failed to get value '{name}' from filesystem {filesystem}: {err}")] +pub struct GetValueError { + filesystem: String, + name: String, + err: GetValueErrorRaw, } /// Wraps commands for interacting with ZFS. pub struct Zfs {} /// Describes a mountpoint for a ZFS filesystem. +#[derive(Debug, Clone)] pub enum Mountpoint { #[allow(dead_code)] Legacy, @@ -49,12 +94,17 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { /// Lists all filesystems within a dataset. - pub fn list_filesystems(name: &str) -> Result, Error> { + pub fn list_filesystems(name: &str) -> Result, ListFilesystemsError> { let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd)?; - let stdout = String::from_utf8(output.stdout)?; + let output = execute(cmd).map_err(|err| { + ListFilesystemsError { + name: name.to_string(), + err, + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout .trim() .split('\n') @@ -71,26 +121,31 @@ impl Zfs { name: &str, mountpoint: Mountpoint, do_format: bool, - ) -> Result<(), Error> { + ) -> Result<(), EnsureFilesystemError> { // If the dataset exists, we're done. let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-Hpo", "name,type,mountpoint", name]); // If the list command returns any valid output, validate it. if let Ok(output) = execute(cmd) { - let stdout = String::from_utf8(output.stdout)?; + let stdout = String::from_utf8_lossy(&output.stdout); let values: Vec<&str> = stdout.trim().split('\t').collect(); if values != &[name, "filesystem", &mountpoint.to_string()] { - return Err(Error::Output(stdout)); + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: EnsureFilesystemErrorRaw::Output(stdout.to_string()), + }); } return Ok(()); } if !do_format { - return Err(Error::NotFound(format!( - "Filesystem {} not found", - name - ))); + return Err(EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: EnsureFilesystemErrorRaw::NotFoundNotFormatted, + }); } // If it doesn't exist, make it. @@ -102,10 +157,16 @@ impl Zfs { "-o", "zoned=on", "-o", - &format!("mountpoint={}", mountpoint), + &format!("mountpoint={}", &mountpoint), name, ]); - execute(cmd)?; + execute(cmd).map_err(|err| { + EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: err.into(), + } + })?; Ok(()) } @@ -113,7 +174,7 @@ impl Zfs { filesystem_name: &str, name: &str, value: &str, - ) -> Result<(), Error> { + ) -> Result<(), SetValueError> { Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value) } @@ -121,33 +182,47 @@ impl Zfs { filesystem_name: &str, name: &str, value: &str, - ) -> Result<(), Error> { + ) -> Result<(), SetValueError> { let mut command = std::process::Command::new(PFEXEC); let value_arg = format!("{}={}", name, value); let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]); - execute(cmd)?; + execute(cmd).map_err(|err| { + SetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + value: value.to_string(), + err, + } + })?; Ok(()) } pub fn get_oxide_value( filesystem_name: &str, name: &str, - ) -> Result { + ) -> Result { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value(filesystem_name: &str, name: &str) -> Result { + fn get_value(filesystem_name: &str, name: &str) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd)?; - let stdout = String::from_utf8(output.stdout)?; + let output = execute(cmd).map_err(|err| { + GetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + err: err.into(), + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); let value = stdout.trim(); if value == "-" { - return Err(Error::NotFound(format!( - "Property {}, within filesystem {}", - name, filesystem_name - ))); + return Err(GetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + err: GetValueErrorRaw::MissingValue, + }); } Ok(value.to_string()) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 99965018e25..46090319783 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -53,7 +53,7 @@ pub enum Error { SledSubnet { err: crate::illumos::zone::EnsureGzAddressError }, #[error(transparent)] - Zfs(#[from] crate::illumos::zfs::Error), + ZfsEnsureFilesystem(#[from] crate::illumos::zfs::EnsureFilesystemError), #[error("Error managing instances: {0}")] Instance(#[from] crate::instance_manager::Error), diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 8b9ce638583..35bbda8cf10 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -51,8 +51,19 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; #[derive(thiserror::Error, Debug)] pub enum Error { + // TODO: We could add the context of "why are we doint this op", maybe? + + #[error(transparent)] + ZfsListFilesystems(#[from] crate::illumos::zfs::ListFilesystemsError), + + #[error(transparent)] + ZfsEnsureFilesystem(#[from] crate::illumos::zfs::EnsureFilesystemError), + + #[error(transparent)] + ZfsSetValue(#[from] crate::illumos::zfs::SetValueError), + #[error(transparent)] - Zfs(#[from] crate::illumos::zfs::Error), + ZfsGetValue(#[from] crate::illumos::zfs::GetValueError), #[error(transparent)] GetZpoolInfo(#[from] crate::illumos::zpool::GetInfoError), From 7dd7fd927b7051b3c8bfa634095581a2af910408 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 18:07:25 -0400 Subject: [PATCH 28/41] execution errs --- sled-agent/src/illumos/mod.rs | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index bdec8e7e702..c03549769ac 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -19,8 +19,11 @@ const PFEXEC: &str = "/usr/bin/pfexec"; #[derive(thiserror::Error, Debug)] pub enum ExecutionError { - #[error("Failed to start execution of process: {0}")] - ExecutionStart(std::io::Error), + #[error("Failed to start execution of [{command}]: {err}")] + ExecutionStart { + command: String, + err: std::io::Error, + }, #[error( "Command [{command}] executed and failed with status: {status}. Output: {stderr}" @@ -38,21 +41,32 @@ pub enum ExecutionError { mod inner { use super::*; + fn to_string( + command: &mut std::process::Command + ) -> String { + command + .get_args() + .map(|s| s.to_string_lossy().into()) + .collect::>() + .join(" ") + } + // Helper function for starting the process and checking the // exit code result. pub fn execute( command: &mut std::process::Command, ) -> Result { let output = - command.output().map_err(|e| ExecutionError::ExecutionStart(e))?; + command.output().map_err(|err| { + ExecutionError::ExecutionStart { + command: to_string(command), + err, + } + })?; if !output.status.success() { return Err(ExecutionError::CommandFailure { - command: command - .get_args() - .map(|s| s.to_string_lossy().into()) - .collect::>() - .join(" "), + command: to_string(command), status: output.status, stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); From a094f8ed213a5e016fa0016bbd5fae1959cca08a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 19:14:47 -0400 Subject: [PATCH 29/41] Running zone --- sled-agent/src/illumos/running_zone.rs | 207 +++++++++++++++++++------ sled-agent/src/illumos/zone.rs | 15 +- sled-agent/src/instance.rs | 11 +- sled-agent/src/services.rs | 73 ++++----- sled-agent/src/storage_manager.rs | 30 ++-- 5 files changed, 233 insertions(+), 103 deletions(-) diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index ca51b9b112f..f0f53a628f0 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -17,39 +17,83 @@ use crate::illumos::zone::MockZones as Zones; #[cfg(not(test))] use crate::illumos::zone::Zones; +/// Errors returned from [`RunningZone::run_cmd`]. #[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Zone not found")] - NotFound, - - #[error("Zone is not running; it is in the {0:?} state instead")] - NotRunning(zone::State), - - #[error("Execution error: {0}")] - Execution(#[from] crate::illumos::ExecutionError), - - #[error("Failed to parse output: {0}")] - Parse(#[from] std::string::FromUtf8Error), - - #[error("Failed to create address: {0}")] - AddressCreation(#[from] crate::illumos::zone::EnsureAddressError), +#[error("Error running command in zone '{zone}': {err}")] +pub struct RunCommandError { + zone: String, + #[source] + err: crate::illumos::ExecutionError, +} - #[error("Zone management command failed: {0}")] - ZoneOperation(#[from] crate::illumos::zone::AdmError), +/// Errors returned from [`RunningZone::boot`]. +#[derive(thiserror::Error, Debug)] +pub enum BootError { + #[error("Error booting zone: {0}")] + Booting(#[from] crate::illumos::zone::AdmError), + + #[error("Zone booted, but timed out waiting for {service} in {zone}")] + Timeout { + service: String, + zone: String, + }, +} - // TODO: Extra context could be useful - who were we allocating the VNIC - // for? - #[error("Failed to create VNIC: {0}")] - VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), +/// Errors returned from [`RunningZone::ensure_address`]. +#[derive(thiserror::Error, Debug)] +pub enum EnsureAddressError { + #[error("Failed ensuring address {request:?} in {zone}: could not construct addrobj name: {err}")] + AddrObject{ + request: AddressRequest, + zone: String, + err: crate::illumos::addrobj::ParseError + }, #[error(transparent)] - AddrObject(#[from] crate::illumos::addrobj::ParseError), - - #[error("Timeout waiting for a service: {0}")] - Timeout(String), + EnsureAddressError(#[from] crate::illumos::zone::EnsureAddressError), +} - #[error(transparent)] - NoControlInterface(#[from] crate::illumos::zone::GetControlInterfaceError), +/// Erros returned from [`RunningZone::get_zone`]. +#[derive(thiserror::Error, Debug)] +pub enum GetZoneError { + #[error("While looking up zones with prefix '{prefix}', could not get zones: {err}")] + GetZones { + prefix: String, + #[source] + err: crate::illumos::zone::AdmError, + }, + + #[error("Zone with prefix '{prefix}' not found")] + NotFound { + prefix: String + }, + + #[error("Cannot get zone '{name}': it is in the {state:?} state instead of running")] + NotRunning{ + name: String, + state: zone::State + }, + + #[error("Cannot get zone '{name}': Failed to acquire control interface {err}")] + ControlInterface { + name: String, + #[source] + err: crate::illumos::zone::GetControlInterfaceError, + }, + + #[error("Cannot get zone '{name}': Failed to create addrobj: {err}")] + AddrObject{ + name: String, + #[source] + err: crate::illumos::addrobj::ParseError, + }, + + #[error("Cannot get zone '{name}': Failed to ensure address exists: {err}")] + EnsureAddress { + name: String, + #[source] + err: crate::illumos::zone::EnsureAddressError, + }, } /// Represents a running zone. @@ -63,7 +107,7 @@ impl RunningZone { } /// Runs a command within the Zone, return the output. - pub fn run_cmd(&self, args: I) -> Result + pub fn run_cmd(&self, args: I) -> Result where I: IntoIterator, S: AsRef, @@ -79,15 +123,21 @@ impl RunningZone { .chain(suffix.iter().map(|a| a.as_ref())); let cmd = command.args(full_args); - let output = crate::illumos::execute(cmd)?; - let stdout = String::from_utf8(output.stdout)?; - Ok(stdout) + let output = crate::illumos::execute(cmd) + .map_err(|err| { + RunCommandError { + zone: name.to_string(), + err, + } + })?; + let stdout = String::from_utf8_lossy(&output.stdout); + Ok(stdout.to_string()) } /// Boots a new zone. /// /// Note that the zone must already be configured to be booted. - pub async fn boot(zone: InstalledZone) -> Result { + pub async fn boot(zone: InstalledZone) -> Result { // Boot the zone. info!(zone.log, "Zone booting"); @@ -98,7 +148,10 @@ impl RunningZone { let fmri = "svc:/milestone/network:default"; wait_for_service(Some(&zone.name), fmri) .await - .map_err(|_| Error::Timeout(fmri.to_string()))?; + .map_err(|_| BootError::Timeout { + service: fmri.to_string(), + zone: zone.name.to_string(), + })?; Ok(RunningZone { inner: zone }) } @@ -106,7 +159,7 @@ impl RunningZone { pub async fn ensure_address( &self, addrtype: AddressRequest, - ) -> Result { + ) -> Result { info!(self.inner.log, "Adding address: {:?}", addrtype); let name = match addrtype { AddressRequest::Dhcp => "omicron", @@ -115,7 +168,14 @@ impl RunningZone { std::net::IpAddr::V6(_) => "omicron6", }, }; - let addrobj = AddrObject::new(self.inner.control_vnic.name(), name)?; + let addrobj = AddrObject::new(self.inner.control_vnic.name(), name) + .map_err(|err| { + EnsureAddressError::AddrObject { + request: addrtype, + zone: self.inner.name.clone(), + err: err.into(), + } + })?; let network = Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; Ok(network) @@ -134,20 +194,46 @@ impl RunningZone { log: &Logger, zone_prefix: &str, addrtype: AddressRequest, - ) -> Result { - let zone_info = Zones::get()? + ) -> Result { + let zone_info = Zones::get() + .map_err(|err| { + GetZoneError::GetZones { + prefix: zone_prefix.to_string(), + err, + } + })? .into_iter() .find(|zone_info| zone_info.name().starts_with(&zone_prefix)) - .ok_or_else(|| Error::NotFound)?; + .ok_or_else(|| GetZoneError::NotFound { prefix: zone_prefix.to_string() })?; if zone_info.state() != zone::State::Running { - return Err(Error::NotRunning(zone_info.state())); + return Err(GetZoneError::NotRunning { + name: zone_info.name().to_string(), + state: zone_info.state() + }); } let zone_name = zone_info.name(); - let vnic_name = Zones::get_control_interface(zone_name)?; - let addrobj = AddrObject::new_control(&vnic_name)?; - Zones::ensure_address(Some(zone_name), &addrobj, addrtype)?; + let vnic_name = Zones::get_control_interface(zone_name) + .map_err(|err| { + GetZoneError::ControlInterface { + name: zone_name.to_string(), + err, + } + })?; + let addrobj = AddrObject::new_control(&vnic_name).map_err(|err| { + GetZoneError::AddrObject { + name: zone_name.to_string(), + err, + } + })?; + Zones::ensure_address(Some(zone_name), &addrobj, addrtype) + .map_err(|err| { + GetZoneError::EnsureAddress { + name: zone_name.to_string(), + err, + } + })?; Ok(Self { inner: InstalledZone { @@ -180,6 +266,25 @@ impl Drop for RunningZone { } } +/// Errors returned from [`InstalledZone::install`]. +#[derive(thiserror::Error, Debug)] +pub enum InstallZoneError { + #[error("Cannot create '{service}': failed to create control VNIC: {err}")] + CreateVnic { + service: String, + #[source] + err: crate::illumos::dladm::CreateVnicError, + }, + + #[error("Failed to install zone '{zone}' from '{image_path}': {err}")] + InstallZone { + zone: String, + image_path: PathBuf, + #[source] + err: crate::illumos::zone::AdmError, + }, +} + pub struct InstalledZone { log: Logger, @@ -223,8 +328,14 @@ impl InstalledZone { datasets: &[zone::Dataset], devices: &[zone::Device], vnics: Vec, - ) -> Result { - let control_vnic = vnic_allocator.new_control(None)?; + ) -> Result { + let control_vnic = vnic_allocator.new_control(None) + .map_err(|err| { + InstallZoneError::CreateVnic { + service: service_name.to_string(), + err, + } + })?; let zone_name = Self::get_zone_name(service_name, unique_name); let zone_image_path = @@ -243,7 +354,13 @@ impl InstalledZone { &datasets, &devices, vnic_names, - )?; + ).map_err(|err| { + InstallZoneError::InstallZone { + zone: zone_name.to_string(), + image_path: zone_image_path.clone(), + err, + } + })?; Ok(InstalledZone { log: log.new(o!("zone" => zone_name.clone())), diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 2cc69283065..0b68f4c773a 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -72,8 +72,12 @@ pub struct DeleteAddressError { /// Error which may be returned accessing the control interface of a zone. #[derive(thiserror::Error, Debug)] pub enum GetControlInterfaceError { - #[error("Failed to query for control interface: {0}")] - Execution(#[from] crate::illumos::ExecutionError), + #[error("Failed to query zone '{zone}' for control interface: {err}")] + Execution { + zone: String, + #[source] + err: crate::illumos::ExecutionError, + }, #[error("VNIC starting with 'oxControl' not found in {zone}")] NotFound { zone: String }, @@ -316,7 +320,12 @@ impl Zones { "-o", "LINK", ]); - let output = execute(cmd)?; + let output = execute(cmd).map_err(|err| { + GetControlInterfaceError::Execution { + zone: zone.to_string(), + err, + } + })?; String::from_utf8_lossy(&output.stdout) .lines() .find_map(|name| { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index cbcf957154d..87af13085a7 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -61,7 +61,16 @@ pub enum Error { Migration(anyhow::Error), #[error(transparent)] - RunningZone(#[from] crate::illumos::running_zone::Error), + ZoneCommand(#[from] crate::illumos::running_zone::RunCommandError), + + #[error(transparent)] + ZoneBoot(#[from] crate::illumos::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] crate::illumos::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), #[error("serde_json failure: {0}")] SerdeJsonError(#[from] serde_json::Error), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 16eab79a707..9f3aebf6dfc 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -28,11 +28,24 @@ pub enum Error { #[error("I/O Error accessing {path}: {err}")] Io { path: PathBuf, err: std::io::Error }, - #[error("Zone error: {message}: {err}")] - RunningZone { message: String, err: crate::illumos::running_zone::Error }, + #[error("Failed to do '{intent}' by running command in zone: {err}")] + ZoneCommand { + intent: String, + #[source] + err: crate::illumos::running_zone::RunCommandError, + }, + + #[error("Failed to boot zone: {0}")] + ZoneBoot(#[from] crate::illumos::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] crate::illumos::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), #[error("Failed to add GZ addresses: {message}: {err}")] - GzAddressFailure { + GzAddress { message: String, err: crate::illumos::zone::EnsureGzAddressError, }, @@ -169,33 +182,15 @@ impl ServiceManager { // vnics= vec![], ) - .await - .map_err(|err| Error::RunningZone { - message: format!("Could not install {}", service.name), - err, - })?; + .await?; - let running_zone = RunningZone::boot(installed_zone) - .await - .map_err(|err| Error::RunningZone { - message: format!("Could not boot {}", service.name), - err, - })?; + let running_zone = RunningZone::boot(installed_zone).await?; for addr in &service.addresses { info!(self.log, "Ensuring address {} exists", addr.to_string()); let addr_request = AddressRequest::new_static(IpAddr::V6(*addr), None); - running_zone.ensure_address(addr_request).await.map_err( - |err| Error::RunningZone { - message: format!( - "Failed to create address {} for {}", - addr.to_string(), - service.name - ), - err, - }, - )?; + running_zone.ensure_address(addr_request).await?; info!( self.log, "Ensuring address {} exists - OK", @@ -217,7 +212,7 @@ impl ServiceManager { *addr, &addr_name, ) - .map_err(|err| Error::GzAddressFailure { + .map_err(|err| Error::GzAddress { message: format!( "Failed adding address for {}", service.name @@ -237,8 +232,8 @@ impl ServiceManager { service.name ), ]) - .map_err(|err| Error::RunningZone { - message: "Failed to import manifest".to_string(), + .map_err(|err| Error::ZoneCommand { + intent: "importing manifest".to_string(), err, })?; @@ -266,9 +261,8 @@ impl ServiceManager { address, DNS_SERVER_PORT ), ]) - .map_err(|err| Error::RunningZone { - message: "Could not set server address property" - .to_string(), + .map_err(|err| Error::ZoneCommand { + intent: "set server address".to_string(), err, })?; @@ -283,9 +277,8 @@ impl ServiceManager { address, DNS_PORT ), ]) - .map_err(|err| Error::RunningZone { - message: "Could not set DNS address property" - .to_string(), + .map_err(|err| Error::ZoneCommand { + intent: "Set DNS address".to_string(), err, })?; @@ -298,11 +291,8 @@ impl ServiceManager { &default_smf_name, "refresh", ]) - .map_err(|err| Error::RunningZone { - message: format!( - "Failed to refresh SMF manifest: {}", - default_smf_name - ), + .map_err(|err| Error::ZoneCommand { + intent: format!("Refresh SMF manifest {}", default_smf_name), err, })?; } @@ -323,11 +313,8 @@ impl ServiceManager { "-t", &default_smf_name, ]) - .map_err(|err| Error::RunningZone { - message: format!( - "Failed to enable {} service", - default_smf_name - ), + .map_err(|err| Error::ZoneCommand { + intent: format!("Enable {} service", default_smf_name), err, })?; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 35bbda8cf10..7f15e0961bf 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -6,7 +6,7 @@ use crate::illumos::dladm::PhysicalLink; use crate::illumos::running_zone::{ - Error as RunningZoneError, InstalledZone, RunningZone, + InstalledZone, RunningZone, }; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; @@ -68,8 +68,17 @@ pub enum Error { #[error(transparent)] GetZpoolInfo(#[from] crate::illumos::zpool::GetInfoError), - #[error("Failed to manage a running zone: {0}")] - ZoneManagement(#[from] crate::illumos::running_zone::Error), + #[error(transparent)] + ZoneCommand(#[from] crate::illumos::running_zone::RunCommandError), + + #[error(transparent)] + ZoneBoot(#[from] crate::illumos::running_zone::BootError), + + #[error(transparent)] + ZoneEnsureAddress(#[from] crate::illumos::running_zone::EnsureAddressError), + + #[error(transparent)] + ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), #[error("Error parsing pool size: {0}")] BadPoolSize(#[from] ByteCountRangeError), @@ -433,14 +442,13 @@ async fn ensure_running_zone( let address_request = AddressRequest::new_static(dataset_info.address.ip(), None); - match RunningZone::get(log, &dataset_info.zone_prefix(), address_request) - .await - { + let err = RunningZone::get(log, &dataset_info.zone_prefix(), address_request).await; + match err { Ok(zone) => { info!(log, "Zone for {} is already running", dataset_name.full()); return Ok(zone); } - Err(RunningZoneError::NotFound) => { + Err(crate::illumos::running_zone::GetZoneError::NotFound { .. }) => { info!(log, "Zone for {} was not found", dataset_name.full()); let installed_zone = InstalledZone::install( @@ -463,14 +471,14 @@ async fn ensure_running_zone( Ok(zone) } - Err(RunningZoneError::NotRunning(_state)) => { + Err(crate::illumos::running_zone::GetZoneError::NotRunning { name, state }) => { // TODO(https://github.com/oxidecomputer/omicron/issues/725): - unimplemented!("Handle a zone which exists, but is not running"); + unimplemented!("Handle a zone which exists, but is not running: {name}, in {state:?}"); } - Err(_) => { + Err(err) => { // TODO(https://github.com/oxidecomputer/omicron/issues/725): unimplemented!( - "Handle a zone which exists, has some other problem" + "Handle a zone which exists, has some other problem: {err}" ); } } From bc1c40a42d4783b405830acb49b213e2e620ce6b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 19:15:12 -0400 Subject: [PATCH 30/41] fmt --- sled-agent/src/illumos/mod.rs | 19 ++--- sled-agent/src/illumos/running_zone.rs | 111 +++++++++++-------------- sled-agent/src/illumos/zfs.rs | 57 ++++++------- sled-agent/src/illumos/zone.rs | 5 +- sled-agent/src/illumos/zpool.rs | 33 ++------ sled-agent/src/services.rs | 5 +- sled-agent/src/storage_manager.rs | 18 ++-- 7 files changed, 106 insertions(+), 142 deletions(-) diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index c03549769ac..bb6eb5cae7f 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -20,10 +20,7 @@ const PFEXEC: &str = "/usr/bin/pfexec"; #[derive(thiserror::Error, Debug)] pub enum ExecutionError { #[error("Failed to start execution of [{command}]: {err}")] - ExecutionStart { - command: String, - err: std::io::Error, - }, + ExecutionStart { command: String, err: std::io::Error }, #[error( "Command [{command}] executed and failed with status: {status}. Output: {stderr}" @@ -41,9 +38,7 @@ pub enum ExecutionError { mod inner { use super::*; - fn to_string( - command: &mut std::process::Command - ) -> String { + fn to_string(command: &mut std::process::Command) -> String { command .get_args() .map(|s| s.to_string_lossy().into()) @@ -56,13 +51,9 @@ mod inner { pub fn execute( command: &mut std::process::Command, ) -> Result { - let output = - command.output().map_err(|err| { - ExecutionError::ExecutionStart { - command: to_string(command), - err, - } - })?; + let output = command.output().map_err(|err| { + ExecutionError::ExecutionStart { command: to_string(command), err } + })?; if !output.status.success() { return Err(ExecutionError::CommandFailure { diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index f0f53a628f0..ddfa7753982 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -33,20 +33,17 @@ pub enum BootError { Booting(#[from] crate::illumos::zone::AdmError), #[error("Zone booted, but timed out waiting for {service} in {zone}")] - Timeout { - service: String, - zone: String, - }, + Timeout { service: String, zone: String }, } /// Errors returned from [`RunningZone::ensure_address`]. #[derive(thiserror::Error, Debug)] pub enum EnsureAddressError { #[error("Failed ensuring address {request:?} in {zone}: could not construct addrobj name: {err}")] - AddrObject{ + AddrObject { request: AddressRequest, zone: String, - err: crate::illumos::addrobj::ParseError + err: crate::illumos::addrobj::ParseError, }, #[error(transparent)] @@ -64,17 +61,14 @@ pub enum GetZoneError { }, #[error("Zone with prefix '{prefix}' not found")] - NotFound { - prefix: String - }, + NotFound { prefix: String }, #[error("Cannot get zone '{name}': it is in the {state:?} state instead of running")] - NotRunning{ - name: String, - state: zone::State - }, + NotRunning { name: String, state: zone::State }, - #[error("Cannot get zone '{name}': Failed to acquire control interface {err}")] + #[error( + "Cannot get zone '{name}': Failed to acquire control interface {err}" + )] ControlInterface { name: String, #[source] @@ -82,13 +76,15 @@ pub enum GetZoneError { }, #[error("Cannot get zone '{name}': Failed to create addrobj: {err}")] - AddrObject{ + AddrObject { name: String, #[source] err: crate::illumos::addrobj::ParseError, }, - #[error("Cannot get zone '{name}': Failed to ensure address exists: {err}")] + #[error( + "Cannot get zone '{name}': Failed to ensure address exists: {err}" + )] EnsureAddress { name: String, #[source] @@ -124,12 +120,7 @@ impl RunningZone { let cmd = command.args(full_args); let output = crate::illumos::execute(cmd) - .map_err(|err| { - RunCommandError { - zone: name.to_string(), - err, - } - })?; + .map_err(|err| RunCommandError { zone: name.to_string(), err })?; let stdout = String::from_utf8_lossy(&output.stdout); Ok(stdout.to_string()) } @@ -146,12 +137,12 @@ impl RunningZone { // Wait for the network services to come online, so future // requests to create addresses can operate immediately. let fmri = "svc:/milestone/network:default"; - wait_for_service(Some(&zone.name), fmri) - .await - .map_err(|_| BootError::Timeout { + wait_for_service(Some(&zone.name), fmri).await.map_err(|_| { + BootError::Timeout { service: fmri.to_string(), zone: zone.name.to_string(), - })?; + } + })?; Ok(RunningZone { inner: zone }) } @@ -169,12 +160,10 @@ impl RunningZone { }, }; let addrobj = AddrObject::new(self.inner.control_vnic.name(), name) - .map_err(|err| { - EnsureAddressError::AddrObject { - request: addrtype, - zone: self.inner.name.clone(), - err: err.into(), - } + .map_err(|err| EnsureAddressError::AddrObject { + request: addrtype, + zone: self.inner.name.clone(), + err: err.into(), })?; let network = Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; @@ -196,44 +185,40 @@ impl RunningZone { addrtype: AddressRequest, ) -> Result { let zone_info = Zones::get() - .map_err(|err| { - GetZoneError::GetZones { - prefix: zone_prefix.to_string(), - err, - } + .map_err(|err| GetZoneError::GetZones { + prefix: zone_prefix.to_string(), + err, })? .into_iter() .find(|zone_info| zone_info.name().starts_with(&zone_prefix)) - .ok_or_else(|| GetZoneError::NotFound { prefix: zone_prefix.to_string() })?; + .ok_or_else(|| GetZoneError::NotFound { + prefix: zone_prefix.to_string(), + })?; if zone_info.state() != zone::State::Running { return Err(GetZoneError::NotRunning { name: zone_info.name().to_string(), - state: zone_info.state() + state: zone_info.state(), }); } let zone_name = zone_info.name(); - let vnic_name = Zones::get_control_interface(zone_name) - .map_err(|err| { + let vnic_name = + Zones::get_control_interface(zone_name).map_err(|err| { GetZoneError::ControlInterface { name: zone_name.to_string(), err, } })?; let addrobj = AddrObject::new_control(&vnic_name).map_err(|err| { - GetZoneError::AddrObject { + GetZoneError::AddrObject { name: zone_name.to_string(), err } + })?; + Zones::ensure_address(Some(zone_name), &addrobj, addrtype).map_err( + |err| GetZoneError::EnsureAddress { name: zone_name.to_string(), err, - } - })?; - Zones::ensure_address(Some(zone_name), &addrobj, addrtype) - .map_err(|err| { - GetZoneError::EnsureAddress { - name: zone_name.to_string(), - err, - } - })?; + }, + )?; Ok(Self { inner: InstalledZone { @@ -329,13 +314,12 @@ impl InstalledZone { devices: &[zone::Device], vnics: Vec, ) -> Result { - let control_vnic = vnic_allocator.new_control(None) - .map_err(|err| { - InstallZoneError::CreateVnic { - service: service_name.to_string(), - err, - } - })?; + let control_vnic = vnic_allocator.new_control(None).map_err(|err| { + InstallZoneError::CreateVnic { + service: service_name.to_string(), + err, + } + })?; let zone_name = Self::get_zone_name(service_name, unique_name); let zone_image_path = @@ -354,12 +338,11 @@ impl InstalledZone { &datasets, &devices, vnic_names, - ).map_err(|err| { - InstallZoneError::InstallZone { - zone: zone_name.to_string(), - image_path: zone_image_path.clone(), - err, - } + ) + .map_err(|err| InstallZoneError::InstallZone { + zone: zone_name.to_string(), + image_path: zone_image_path.clone(), + err, })?; Ok(InstalledZone { diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index b1ca75002ac..00e39ef267c 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -35,7 +35,9 @@ enum EnsureFilesystemErrorRaw { /// Error returned by [`Zfs::ensure_zoned_filesystem`]. #[derive(thiserror::Error, Debug)] -#[error("Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}")] +#[error( + "Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}" +)] pub struct EnsureFilesystemError { name: String, mountpoint: Mountpoint, @@ -45,7 +47,9 @@ pub struct EnsureFilesystemError { /// Error returned by [`Zfs::set_oxide_value`] #[derive(thiserror::Error, Debug)] -#[error("Failed to set value '{name}={value}' on filesystem {filesystem}: {err}")] +#[error( + "Failed to set value '{name}={value}' on filesystem {filesystem}: {err}" +)] pub struct SetValueError { filesystem: String, name: String, @@ -94,15 +98,15 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { /// Lists all filesystems within a dataset. - pub fn list_filesystems(name: &str) -> Result, ListFilesystemsError> { + pub fn list_filesystems( + name: &str, + ) -> Result, ListFilesystemsError> { let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-d", "1", "-rHpo", "name", name]); - let output = execute(cmd).map_err(|err| { - ListFilesystemsError { - name: name.to_string(), - err, - } + let output = execute(cmd).map_err(|err| ListFilesystemsError { + name: name.to_string(), + err, })?; let stdout = String::from_utf8_lossy(&output.stdout); let filesystems: Vec = stdout @@ -160,12 +164,10 @@ impl Zfs { &format!("mountpoint={}", &mountpoint), name, ]); - execute(cmd).map_err(|err| { - EnsureFilesystemError { - name: name.to_string(), - mountpoint, - err: err.into(), - } + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint, + err: err.into(), })?; Ok(()) } @@ -186,13 +188,11 @@ impl Zfs { let mut command = std::process::Command::new(PFEXEC); let value_arg = format!("{}={}", name, value); let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]); - execute(cmd).map_err(|err| { - SetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - value: value.to_string(), - err, - } + execute(cmd).map_err(|err| SetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + value: value.to_string(), + err, })?; Ok(()) } @@ -204,16 +204,17 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value(filesystem_name: &str, name: &str) -> Result { + fn get_value( + filesystem_name: &str, + name: &str, + ) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd).map_err(|err| { - GetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - err: err.into(), - } + let output = execute(cmd).map_err(|err| GetValueError { + filesystem: filesystem_name.to_string(), + name: name.to_string(), + err: err.into(), })?; let stdout = String::from_utf8_lossy(&output.stdout); let value = stdout.trim(); diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 0b68f4c773a..35f2edbe45a 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -321,10 +321,7 @@ impl Zones { "LINK", ]); let output = execute(cmd).map_err(|err| { - GetControlInterfaceError::Execution { - zone: zone.to_string(), - err, - } + GetControlInterfaceError::Execution { zone: zone.to_string(), err } })?; String::from_utf8_lossy(&output.stdout) .lines() diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index b4021dd8ce9..0ca3bea712b 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -61,10 +61,7 @@ impl FromStr for ZpoolHealth { "OFFLINE" => Ok(ZpoolHealth::Offline), "REMOVED" => Ok(ZpoolHealth::Removed), "UNAVAIL" => Ok(ZpoolHealth::Unavailable), - _ => Err(ParseError(format!( - "Unrecognized zpool 'health': {}", - s - ))), + _ => Err(ParseError(format!("Unrecognized zpool 'health': {}", s))), } } } @@ -110,16 +107,10 @@ impl FromStr for ZpoolInfo { fn from_str(s: &str) -> Result { // Lambda helpers for error handling. let expected_field = |name| { - ParseError(format!( - "Missing '{}' value in zpool list output", - name - )) + ParseError(format!("Missing '{}' value in zpool list output", name)) }; let failed_to_parse = |name, err| { - ParseError(format!( - "Failed to parse field '{}': {}", - name, err - )) + ParseError(format!("Failed to parse field '{}': {}", name, err)) }; let mut values = s.trim().split_whitespace(); @@ -163,20 +154,14 @@ impl Zpool { name, ]); - let output = execute(cmd).map_err(|err| { - GetInfoError { - name: name.to_string(), - err: err.into(), - } + let output = execute(cmd).map_err(|err| GetInfoError { + name: name.to_string(), + err: err.into(), })?; let stdout = String::from_utf8_lossy(&output.stdout); - let zpool = stdout.parse::() - .map_err(|err| { - GetInfoError { - name: name.to_string(), - err: err.into(), - } - })?; + let zpool = stdout.parse::().map_err(|err| { + GetInfoError { name: name.to_string(), err: err.into() } + })?; Ok(zpool) } } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 9f3aebf6dfc..574e6af1e5d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -292,7 +292,10 @@ impl ServiceManager { "refresh", ]) .map_err(|err| Error::ZoneCommand { - intent: format!("Refresh SMF manifest {}", default_smf_name), + intent: format!( + "Refresh SMF manifest {}", + default_smf_name + ), err, })?; } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 7f15e0961bf..41d13dd88ce 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,9 +5,7 @@ //! Management of sled-local storage. use crate::illumos::dladm::PhysicalLink; -use crate::illumos::running_zone::{ - InstalledZone, RunningZone, -}; +use crate::illumos::running_zone::{InstalledZone, RunningZone}; use crate::illumos::vnic::VnicAllocator; use crate::illumos::zone::AddressRequest; use crate::illumos::zpool::ZpoolName; @@ -52,7 +50,6 @@ const CRUCIBLE_AGENT_DEFAULT_SVC: &str = "svc:/oxide/crucible/agent:default"; #[derive(thiserror::Error, Debug)] pub enum Error { // TODO: We could add the context of "why are we doint this op", maybe? - #[error(transparent)] ZfsListFilesystems(#[from] crate::illumos::zfs::ListFilesystemsError), @@ -442,13 +439,17 @@ async fn ensure_running_zone( let address_request = AddressRequest::new_static(dataset_info.address.ip(), None); - let err = RunningZone::get(log, &dataset_info.zone_prefix(), address_request).await; + let err = + RunningZone::get(log, &dataset_info.zone_prefix(), address_request) + .await; match err { Ok(zone) => { info!(log, "Zone for {} is already running", dataset_name.full()); return Ok(zone); } - Err(crate::illumos::running_zone::GetZoneError::NotFound { .. }) => { + Err(crate::illumos::running_zone::GetZoneError::NotFound { + .. + }) => { info!(log, "Zone for {} was not found", dataset_name.full()); let installed_zone = InstalledZone::install( @@ -471,7 +472,10 @@ async fn ensure_running_zone( Ok(zone) } - Err(crate::illumos::running_zone::GetZoneError::NotRunning { name, state }) => { + Err(crate::illumos::running_zone::GetZoneError::NotRunning { + name, + state, + }) => { // TODO(https://github.com/oxidecomputer/omicron/issues/725): unimplemented!("Handle a zone which exists, but is not running: {name}, in {state:?}"); } From 7ade54000ee024c5e7250117f554a3e6804357c1 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 19:55:13 -0400 Subject: [PATCH 31/41] Bootstrap params, rack setup service errors --- sled-agent/src/bootstrap/params.rs | 21 ++++--- sled-agent/src/illumos/running_zone.rs | 2 +- sled-agent/src/rack_setup/service.rs | 87 ++++++++++++++++++++------ 3 files changed, 82 insertions(+), 28 deletions(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b6c55bb1479..b6b12cfa7a1 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,7 +4,10 @@ //! Request body types for the bootstrap agent -use omicron_common::api::external::Ipv6Net; +use omicron_common::{ + api::external::Ipv6Net, + address::SLED_PREFIX, +}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -17,8 +20,10 @@ pub struct ShareRequest { #[derive(thiserror::Error, Debug)] pub enum SubnetError { - #[error("Subnet has unexpected prefix length: {0}")] - BadPrefixLength(u8), + #[error("Subnet {subnet} has unexpected prefix length, wanted {}", SLED_PREFIX)] + BadPrefixLength { + subnet: ipnetwork::Ipv6Network, + }, } /// Represents subnets belonging to Sleds. @@ -33,12 +38,12 @@ pub enum SubnetError { pub struct SledSubnet(Ipv6Net); impl SledSubnet { - pub fn new(ip: Ipv6Net) -> Result { - let prefix = ip.0.prefix(); - if prefix != 64 { - return Err(SubnetError::BadPrefixLength(prefix)); + pub fn new(net: Ipv6Net) -> Result { + let prefix = net.0.prefix(); + if prefix != SLED_PREFIX { + return Err(SubnetError::BadPrefixLength { subnet: net.0 }); } - Ok(SledSubnet(ip)) + Ok(SledSubnet(net)) } } diff --git a/sled-agent/src/illumos/running_zone.rs b/sled-agent/src/illumos/running_zone.rs index ddfa7753982..7dbfbc05cd8 100644 --- a/sled-agent/src/illumos/running_zone.rs +++ b/sled-agent/src/illumos/running_zone.rs @@ -163,7 +163,7 @@ impl RunningZone { .map_err(|err| EnsureAddressError::AddrObject { request: addrtype, zone: self.inner.name.clone(), - err: err.into(), + err, })?; let network = Zones::ensure_address(Some(&self.inner.name), &addrobj, addrtype)?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 153ca1bc18b..b31740fed6e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -19,14 +19,19 @@ use serde::{Deserialize, Serialize}; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; +use std::path::PathBuf; use thiserror::Error; use tokio::sync::Mutex; /// Describes errors which may occur while operating the setup service. #[derive(Error, Debug)] pub enum SetupServiceError { - #[error("Error accessing filesystem: {0}")] - Io(#[from] std::io::Error), + #[error("I/O error while {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, #[error("Error making HTTP request to Bootstrap Agent: {0}")] BootstrapApi( @@ -35,19 +40,25 @@ pub enum SetupServiceError { ), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi(#[from] sled_agent_client::Error), + SledApi( + #[from] + sled_agent_client::Error, + ), - #[error("Cannot deserialize TOML file")] - Toml(#[from] toml::de::Error), + #[error("Cannot deserialize TOML file at {path}: {err}")] + Toml { + path: PathBuf, + err: toml::de::Error, + }, #[error("Failed to monitor for peers: {0}")] - PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), - - #[error(transparent)] - Http(#[from] reqwest::Error), + PeerMonitor( + #[from] + tokio::sync::broadcast::error::RecvError + ), - #[error("Configuration changed")] - Configuration, + #[error("Failed to construct an HTTP client: {0}")] + HttpClient(reqwest::Error), } // The workload / information allocated to a single sled. @@ -143,7 +154,8 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build()?; + .build() + .map_err(SetupServiceError::HttpClient)?; let url = format!("http://{}", bootstrap_addr); info!(self.log, "Sending request to peer agent: {}", url); @@ -198,7 +210,8 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build()?; + .build() + .map_err(SetupServiceError::HttpClient)?; let client = sled_agent_client::Client::new_with_client( &format!("http://{}", sled_address), client, @@ -244,7 +257,8 @@ impl ServiceInner { let client = reqwest::ClientBuilder::new() .connect_timeout(dur) .timeout(dur) - .build()?; + .build() + .map_err(SetupServiceError::HttpClient)?; let client = sled_agent_client::Client::new_with_client( &format!("http://{}", sled_address), client, @@ -290,8 +304,20 @@ impl ServiceInner { let plan: std::collections::HashMap = toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path).await?, - )?; + &tokio::fs::read_to_string(&rss_plan_path) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("Loading RSS plan {rss_plan_path:?}"), + err, + } + })?, + ).map_err(|err| { + SetupServiceError::Toml { + path: rss_plan_path, + err, + } + })?; Ok(Some(plan)) } else { Ok(None) @@ -374,7 +400,15 @@ impl ServiceInner { .expect("Cannot turn config to string"); info!(self.log, "Plan serialized as: {}", plan_str); - tokio::fs::write(&rss_plan_path(), plan_str).await?; + let path = rss_plan_path(); + tokio::fs::write(&path, plan_str) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("Storing RSS plan to {path:?}"), + err, + } + })?; info!(self.log, "Plan written to storage"); Ok(plan) @@ -451,7 +485,14 @@ impl ServiceInner { // We expect this directory to exist - ensure that it does, before any // subsequent operations which may write configs here. - tokio::fs::create_dir_all(omicron_common::OMICRON_CONFIG_PATH).await?; + tokio::fs::create_dir_all(omicron_common::OMICRON_CONFIG_PATH) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("Creating config directory {}", omicron_common::OMICRON_CONFIG_PATH), + err, + } + })?; // Check if a previous RSS plan has completed successfully. // @@ -593,7 +634,15 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. - tokio::fs::rename(rss_plan_path(), rss_completed_plan_path).await?; + let plan_path = rss_plan_path(); + tokio::fs::rename(&plan_path, &rss_completed_plan_path) + .await + .map_err(|err| { + SetupServiceError::Io { + message: format!("renaming {plan_path:?} to {rss_completed_plan_path:?}"), + err, + } + })?; // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does From f09348f39589b60c6a0dd04491187feacca7b18b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:12:41 -0400 Subject: [PATCH 32/41] Bootstrap agent, trust quorum errors --- sled-agent/src/bootstrap/agent.rs | 72 +++++++++++++------ .../src/bootstrap/trust_quorum/client.rs | 8 ++- .../src/bootstrap/trust_quorum/error.rs | 8 ++- .../src/bootstrap/trust_quorum/server.rs | 9 ++- .../trust_quorum/share_distribution.rs | 15 +++- 5 files changed, 86 insertions(+), 26 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 7ede44b06bf..523c58858fc 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -31,20 +31,21 @@ use tokio::sync::Mutex; /// Describes errors which may occur while operating the bootstrap service. #[derive(Error, Debug)] pub enum BootstrapError { - #[error("Error accessing filesystem: {0}")] - Io(#[from] std::io::Error), - - #[error("Error configuring SMF: {0}")] - SmfConfig(#[from] smf::ConfigError), - - #[error("Error modifying SMF service: {0}")] - SmfAdm(#[from] smf::AdmError), + #[error("IO error: {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, #[error("Error starting sled agent: {0}")] SledError(String), - #[error(transparent)] - Toml(#[from] toml::de::Error), + #[error("Error deserializing toml from {path}: {err}")] + Toml { + path: PathBuf, + err: toml::de::Error, + }, #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), @@ -70,11 +71,11 @@ fn read_key_share() -> Result, BootstrapError> { match ShareDistribution::read(&key_share_dir) { Ok(share) => Ok(Some(share)), - Err(TrustQuorumError::Io(err)) => { + Err(TrustQuorumError::Io { message, err }) => { if err.kind() == io::ErrorKind::NotFound { Ok(None) } else { - Err(BootstrapError::Io(err)) + Err(BootstrapError::Io { message, err }) } } Err(e) => Err(e.into()), @@ -135,7 +136,7 @@ impl Agent { } else { Dladm::find_physical().map_err(|err| { BootstrapError::SledError(format!( - "Can't access physical link: {}", + "Can't access physical link, and none in config: {}", err )) })? @@ -148,7 +149,13 @@ impl Agent { ) .map_err(|err| BootstrapError::BootstrapAddress { err })?; - let peer_monitor = discovery::PeerMonitor::new(&log, address)?; + let peer_monitor = discovery::PeerMonitor::new(&log, address) + .map_err(|err| { + BootstrapError::Io { + message: format!("Monitoring for peers from {address}"), + err, + } + })?; let share = read_key_share()?; let agent = Agent { log, @@ -163,8 +170,20 @@ impl Agent { if subnet_path.exists() { info!(agent.log, "Sled already configured, loading sled agent"); let sled_request: SledAgentRequest = toml::from_str( - &tokio::fs::read_to_string(&subnet_path).await?, - )?; + &tokio::fs::read_to_string(&subnet_path) + .await + .map_err(|err| { + BootstrapError::Io { + message: format!("Reading subnet path from {subnet_path:?}"), + err, + } + })?, + ).map_err(|err| { + BootstrapError::Toml { + path: subnet_path, + err, + } + })?; agent.request_agent(sled_request).await?; } @@ -216,21 +235,28 @@ impl Agent { // Server does not exist, initialize it. let server = SledServer::start(&self.sled_config, sled_address) .await - .map_err(|e| BootstrapError::SledError(e))?; + .map_err(|e| BootstrapError::SledError(format!("Could not start sled agent server: {e}")))?; maybe_agent.replace(server); info!(&self.log, "Sled Agent loaded; recording configuration"); // Record the subnet, so the sled agent can be automatically // initialized on the next boot. + let path = get_subnet_path(); tokio::fs::write( - get_subnet_path(), + &path, &toml::to_string( &toml::Value::try_from(&request.subnet) .expect("Cannot serialize IP"), ) .expect("Cannot convert toml to string"), ) - .await?; + .await + .map_err(|err| { + BootstrapError::Io { + message: format!("Recording subnet to {path:?}"), + err, + } + })?; Ok(SledAgentResponse { id: self.sled_config.id }) } @@ -337,7 +363,13 @@ impl Agent { async fn run_trust_quorum_server(&self) -> Result<(), BootstrapError> { let my_share = self.share.as_ref().unwrap().share.clone(); - let mut server = trust_quorum::Server::new(&self.log, my_share)?; + let mut server = trust_quorum::Server::new(&self.log, my_share) + .map_err(|err| { + BootstrapError::Io { + message: "Cannot run trust quorum server".to_string(), + err, + } + })?; tokio::spawn(async move { server.run().await }); Ok(()) } diff --git a/sled-agent/src/bootstrap/trust_quorum/client.rs b/sled-agent/src/bootstrap/trust_quorum/client.rs index 7eb1ff2808b..5ec0a24791a 100644 --- a/sled-agent/src/bootstrap/trust_quorum/client.rs +++ b/sled-agent/src/bootstrap/trust_quorum/client.rs @@ -31,7 +31,13 @@ impl Client { // Connect to a trust quorum server, establish an SPDM channel, and retrieve // a share. pub async fn get_share(&self) -> Result { - let sock = TcpStream::connect(&self.addr).await?; + let sock = TcpStream::connect(&self.addr).await + .map_err(|err| { + TrustQuorumError::Io { + message: format!("Connecting to {}", self.addr), + err, + } + })?; let transport = spdm::Transport::new(sock, self.log.clone()); // Complete SPDM negotiation and return a secure transport diff --git a/sled-agent/src/bootstrap/trust_quorum/error.rs b/sled-agent/src/bootstrap/trust_quorum/error.rs index 968e7ee9a25..69c98bc6c31 100644 --- a/sled-agent/src/bootstrap/trust_quorum/error.rs +++ b/sled-agent/src/bootstrap/trust_quorum/error.rs @@ -29,6 +29,10 @@ pub enum TrustQuorumError { #[error("Rack secret construction failed: {0:?}")] RackSecretConstructionFailed(vsss_rs::Error), - #[error("IO error: {0}")] - Io(#[from] std::io::Error), + #[error("IO error {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, } diff --git a/sled-agent/src/bootstrap/trust_quorum/server.rs b/sled-agent/src/bootstrap/trust_quorum/server.rs index 6dfbe4206bb..1608698d914 100644 --- a/sled-agent/src/bootstrap/trust_quorum/server.rs +++ b/sled-agent/src/bootstrap/trust_quorum/server.rs @@ -65,7 +65,14 @@ impl Server { &mut self, ) -> Result>, TrustQuorumError> { - let (sock, addr) = self.listener.accept().await?; + let (sock, addr) = self.listener.accept() + .await + .map_err(|err| { + TrustQuorumError::Io { + message: "Accepting a connection from TCP listener".to_string(), + err, + } + })?; debug!(self.log, "Accepted connection from {}", addr); let share = self.share.clone(); let log = self.log.clone(); diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index 73dc2148ee3..9eaa424da46 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -32,7 +32,12 @@ impl ShareDistribution { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); let json = serde_json::to_string(&self)?; - fs::write(path, &json)?; + fs::write(&path, &json).map_err(|err| { + TrustQuorumError::Io { + message: format!("Writing share to {path:?}"), + err, + } + })?; Ok(()) } @@ -41,7 +46,13 @@ impl ShareDistribution { ) -> Result { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); - let json = fs::read_to_string(path.to_str().unwrap())?; + let json = fs::read_to_string(path.to_str().unwrap()) + .map_err(|err| { + TrustQuorumError::Io { + message: format!("Reading share from {path:?}"), + err, + } + })?; serde_json::from_str(&json).map_err(|e| e.into()) } } From fb405165f8b77938de72962fad52d63703240d41 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:13:29 -0400 Subject: [PATCH 33/41] fmt --- sled-agent/src/bootstrap/agent.rs | 55 ++++++------- sled-agent/src/bootstrap/params.rs | 14 ++-- .../src/bootstrap/trust_quorum/client.rs | 13 ++- .../src/bootstrap/trust_quorum/server.rs | 14 ++-- .../trust_quorum/share_distribution.rs | 12 ++- sled-agent/src/rack_setup/service.rs | 81 ++++++++----------- 6 files changed, 82 insertions(+), 107 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index 523c58858fc..ea285b14440 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -42,10 +42,7 @@ pub enum BootstrapError { SledError(String), #[error("Error deserializing toml from {path}: {err}")] - Toml { - path: PathBuf, - err: toml::de::Error, - }, + Toml { path: PathBuf, err: toml::de::Error }, #[error(transparent)] TrustQuorum(#[from] TrustQuorumError), @@ -149,8 +146,8 @@ impl Agent { ) .map_err(|err| BootstrapError::BootstrapAddress { err })?; - let peer_monitor = discovery::PeerMonitor::new(&log, address) - .map_err(|err| { + let peer_monitor = + discovery::PeerMonitor::new(&log, address).map_err(|err| { BootstrapError::Io { message: format!("Monitoring for peers from {address}"), err, @@ -170,20 +167,16 @@ impl Agent { if subnet_path.exists() { info!(agent.log, "Sled already configured, loading sled agent"); let sled_request: SledAgentRequest = toml::from_str( - &tokio::fs::read_to_string(&subnet_path) - .await - .map_err(|err| { - BootstrapError::Io { - message: format!("Reading subnet path from {subnet_path:?}"), - err, - } - })?, - ).map_err(|err| { - BootstrapError::Toml { - path: subnet_path, - err, - } - })?; + &tokio::fs::read_to_string(&subnet_path).await.map_err( + |err| BootstrapError::Io { + message: format!( + "Reading subnet path from {subnet_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| BootstrapError::Toml { path: subnet_path, err })?; agent.request_agent(sled_request).await?; } @@ -235,7 +228,11 @@ impl Agent { // Server does not exist, initialize it. let server = SledServer::start(&self.sled_config, sled_address) .await - .map_err(|e| BootstrapError::SledError(format!("Could not start sled agent server: {e}")))?; + .map_err(|e| { + BootstrapError::SledError(format!( + "Could not start sled agent server: {e}" + )) + })?; maybe_agent.replace(server); info!(&self.log, "Sled Agent loaded; recording configuration"); @@ -251,11 +248,9 @@ impl Agent { .expect("Cannot convert toml to string"), ) .await - .map_err(|err| { - BootstrapError::Io { - message: format!("Recording subnet to {path:?}"), - err, - } + .map_err(|err| BootstrapError::Io { + message: format!("Recording subnet to {path:?}"), + err, })?; Ok(SledAgentResponse { id: self.sled_config.id }) @@ -364,11 +359,9 @@ impl Agent { async fn run_trust_quorum_server(&self) -> Result<(), BootstrapError> { let my_share = self.share.as_ref().unwrap().share.clone(); let mut server = trust_quorum::Server::new(&self.log, my_share) - .map_err(|err| { - BootstrapError::Io { - message: "Cannot run trust quorum server".to_string(), - err, - } + .map_err(|err| BootstrapError::Io { + message: "Cannot run trust quorum server".to_string(), + err, })?; tokio::spawn(async move { server.run().await }); Ok(()) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b6b12cfa7a1..9901eab5069 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,10 +4,7 @@ //! Request body types for the bootstrap agent -use omicron_common::{ - api::external::Ipv6Net, - address::SLED_PREFIX, -}; +use omicron_common::{address::SLED_PREFIX, api::external::Ipv6Net}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -20,10 +17,11 @@ pub struct ShareRequest { #[derive(thiserror::Error, Debug)] pub enum SubnetError { - #[error("Subnet {subnet} has unexpected prefix length, wanted {}", SLED_PREFIX)] - BadPrefixLength { - subnet: ipnetwork::Ipv6Network, - }, + #[error( + "Subnet {subnet} has unexpected prefix length, wanted {}", + SLED_PREFIX + )] + BadPrefixLength { subnet: ipnetwork::Ipv6Network }, } /// Represents subnets belonging to Sleds. diff --git a/sled-agent/src/bootstrap/trust_quorum/client.rs b/sled-agent/src/bootstrap/trust_quorum/client.rs index 5ec0a24791a..0d6cdaf2c1d 100644 --- a/sled-agent/src/bootstrap/trust_quorum/client.rs +++ b/sled-agent/src/bootstrap/trust_quorum/client.rs @@ -31,13 +31,12 @@ impl Client { // Connect to a trust quorum server, establish an SPDM channel, and retrieve // a share. pub async fn get_share(&self) -> Result { - let sock = TcpStream::connect(&self.addr).await - .map_err(|err| { - TrustQuorumError::Io { - message: format!("Connecting to {}", self.addr), - err, - } - })?; + let sock = TcpStream::connect(&self.addr).await.map_err(|err| { + TrustQuorumError::Io { + message: format!("Connecting to {}", self.addr), + err, + } + })?; let transport = spdm::Transport::new(sock, self.log.clone()); // Complete SPDM negotiation and return a secure transport diff --git a/sled-agent/src/bootstrap/trust_quorum/server.rs b/sled-agent/src/bootstrap/trust_quorum/server.rs index 1608698d914..9016bc7e9e1 100644 --- a/sled-agent/src/bootstrap/trust_quorum/server.rs +++ b/sled-agent/src/bootstrap/trust_quorum/server.rs @@ -65,14 +65,12 @@ impl Server { &mut self, ) -> Result>, TrustQuorumError> { - let (sock, addr) = self.listener.accept() - .await - .map_err(|err| { - TrustQuorumError::Io { - message: "Accepting a connection from TCP listener".to_string(), - err, - } - })?; + let (sock, addr) = self.listener.accept().await.map_err(|err| { + TrustQuorumError::Io { + message: "Accepting a connection from TCP listener".to_string(), + err, + } + })?; debug!(self.log, "Accepted connection from {}", addr); let share = self.share.clone(); let log = self.log.clone(); diff --git a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs index 9eaa424da46..799e94fc5d7 100644 --- a/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs +++ b/sled-agent/src/bootstrap/trust_quorum/share_distribution.rs @@ -32,11 +32,9 @@ impl ShareDistribution { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); let json = serde_json::to_string(&self)?; - fs::write(&path, &json).map_err(|err| { - TrustQuorumError::Io { - message: format!("Writing share to {path:?}"), - err, - } + fs::write(&path, &json).map_err(|err| TrustQuorumError::Io { + message: format!("Writing share to {path:?}"), + err, })?; Ok(()) } @@ -46,8 +44,8 @@ impl ShareDistribution { ) -> Result { let mut path = PathBuf::from(dir.as_ref()); path.push(FILENAME); - let json = fs::read_to_string(path.to_str().unwrap()) - .map_err(|err| { + let json = + fs::read_to_string(path.to_str().unwrap()).map_err(|err| { TrustQuorumError::Io { message: format!("Reading share from {path:?}"), err, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index b31740fed6e..6d7dd09358c 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -40,22 +40,13 @@ pub enum SetupServiceError { ), #[error("Error making HTTP request to Sled Agent: {0}")] - SledApi( - #[from] - sled_agent_client::Error, - ), + SledApi(#[from] sled_agent_client::Error), #[error("Cannot deserialize TOML file at {path}: {err}")] - Toml { - path: PathBuf, - err: toml::de::Error, - }, + Toml { path: PathBuf, err: toml::de::Error }, #[error("Failed to monitor for peers: {0}")] - PeerMonitor( - #[from] - tokio::sync::broadcast::error::RecvError - ), + PeerMonitor(#[from] tokio::sync::broadcast::error::RecvError), #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), @@ -304,19 +295,18 @@ impl ServiceInner { let plan: std::collections::HashMap = toml::from_str( - &tokio::fs::read_to_string(&rss_plan_path) - .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("Loading RSS plan {rss_plan_path:?}"), - err, - } - })?, - ).map_err(|err| { - SetupServiceError::Toml { - path: rss_plan_path, - err, - } + &tokio::fs::read_to_string(&rss_plan_path).await.map_err( + |err| SetupServiceError::Io { + message: format!( + "Loading RSS plan {rss_plan_path:?}" + ), + err, + }, + )?, + ) + .map_err(|err| SetupServiceError::Toml { + path: rss_plan_path, + err, })?; Ok(Some(plan)) } else { @@ -401,14 +391,12 @@ impl ServiceInner { info!(self.log, "Plan serialized as: {}", plan_str); let path = rss_plan_path(); - tokio::fs::write(&path, plan_str) - .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("Storing RSS plan to {path:?}"), - err, - } - })?; + tokio::fs::write(&path, plan_str).await.map_err(|err| { + SetupServiceError::Io { + message: format!("Storing RSS plan to {path:?}"), + err, + } + })?; info!(self.log, "Plan written to storage"); Ok(plan) @@ -487,11 +475,12 @@ impl ServiceInner { // subsequent operations which may write configs here. tokio::fs::create_dir_all(omicron_common::OMICRON_CONFIG_PATH) .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("Creating config directory {}", omicron_common::OMICRON_CONFIG_PATH), - err, - } + .map_err(|err| SetupServiceError::Io { + message: format!( + "Creating config directory {}", + omicron_common::OMICRON_CONFIG_PATH + ), + err, })?; // Check if a previous RSS plan has completed successfully. @@ -635,14 +624,14 @@ impl ServiceInner { // Finally, make sure the configuration is saved so we don't inject // the requests on the next iteration. let plan_path = rss_plan_path(); - tokio::fs::rename(&plan_path, &rss_completed_plan_path) - .await - .map_err(|err| { - SetupServiceError::Io { - message: format!("renaming {plan_path:?} to {rss_completed_plan_path:?}"), - err, - } - })?; + tokio::fs::rename(&plan_path, &rss_completed_plan_path).await.map_err( + |err| SetupServiceError::Io { + message: format!( + "renaming {plan_path:?} to {rss_completed_plan_path:?}" + ), + err, + }, + )?; // TODO Questions to consider: // - What if a sled comes online *right after* this setup? How does From 25237b48ca4607ec3d079d2dfe74f9037d9deabd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:41:56 -0400 Subject: [PATCH 34/41] storage manager --- sled-agent/src/illumos/zfs.rs | 4 +- sled-agent/src/services.rs | 2 +- sled-agent/src/storage_manager.rs | 106 +++++++++++++++++++++--------- 3 files changed, 79 insertions(+), 33 deletions(-) diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index 00e39ef267c..da6510a29f3 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -14,7 +14,7 @@ const ZFS: &str = "/usr/sbin/zfs"; /// Error returned by [`Zfs::list_filesystems`]. #[derive(thiserror::Error, Debug)] -#[error("Could not list filesystems within dataset {name}: {err}")] +#[error("Could not list filesystems within zpool {name}: {err}")] pub struct ListFilesystemsError { name: String, #[source] @@ -97,7 +97,7 @@ impl fmt::Display for Mountpoint { #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { - /// Lists all filesystems within a dataset. + /// Lists all filesystems within a zpool. pub fn list_filesystems( name: &str, ) -> Result, ListFilesystemsError> { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 574e6af1e5d..13ae8d1fd3a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -214,7 +214,7 @@ impl ServiceManager { ) .map_err(|err| Error::GzAddress { message: format!( - "Failed adding address for {}", + "adding address on behalf of service '{}'", service.name ), err, diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 41d13dd88ce..24a4d01b330 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -77,26 +77,43 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), - #[error("Error parsing pool size: {0}")] - BadPoolSize(#[from] ByteCountRangeError), - - #[error("Failed to parse as UUID: {0}")] - Parse(#[from] uuid::Error), - - #[error("Timed out waiting for service: {0}")] - Timeout(String), - - #[error("Object Not Found: {0}")] - NotFound(String), - - #[error("Failed to serialize toml: {0}")] - Serialize(#[from] toml::ser::Error), - - #[error("Failed to deserialize toml: {0}")] - Deserialize(#[from] toml::de::Error), - - #[error("Failed to perform I/O: {0}")] - Io(#[from] std::io::Error), + #[error("Error parsing pool {name}'s size: {err}")] + BadPoolSize { + name: String, + #[source] + err: ByteCountRangeError, + }, + + #[error("Failed to parse the dataset {name}'s UUID: {err}")] + ParseDatasetUuid { + name: String, + #[source] + err: uuid::Error, + }, + + #[error("Zpool Not Found: {0}")] + ZpoolNotFound(String), + + #[error("Failed to serialize toml (intended for {path:?}): {err}")] + Serialize { + path: PathBuf, + #[source] + err: toml::ser::Error, + }, + + #[error("Failed to deserialize toml from {path:?}: {err}")] + Deserialize { + path: PathBuf, + #[source] + err: toml::de::Error, + }, + + #[error("Failed to perform I/O: {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, } /// A ZFS storage pool. @@ -148,7 +165,10 @@ impl Pool { ) -> Result { let path = std::path::Path::new(omicron_common::OMICRON_CONFIG_PATH) .join(self.id.to_string()); - create_dir_all(&path).await?; + create_dir_all(&path).await.map_err(|err| Error::Io { + message: format!("creating config dir {path:?}, which would contain config for {dataset_id}"), + err, + })?; let mut path = path.join(dataset_id.to_string()); path.set_extension("toml"); Ok(path) @@ -691,11 +711,15 @@ impl StorageWorker { let mut pools = self.pools.lock().await; let name = ZpoolName::new(request.zpool_id); let pool = pools.get_mut(&name).ok_or_else(|| { - Error::NotFound(format!("zpool: {}", request.zpool_id)) + Error::ZpoolNotFound(format!( + "{}, looked up while trying to add dataset", + request.zpool_id + )) })?; + let pool_name = pool.info.name(); let dataset_info = DatasetInfo::new( - pool.info.name(), + pool_name, request.dataset_kind.clone(), request.address, ); @@ -714,10 +738,18 @@ impl StorageWorker { // Now that the dataset has been initialized, record the configuration // so it can re-initialize itself after a reboot. - let info_str = toml::to_string(&dataset_info)?; let path = pool.dataset_config_path(id).await?; - let mut file = File::create(path).await?; - file.write_all(info_str.as_bytes()).await?; + let info_str = toml::to_string(&dataset_info) + .map_err(|err| Error::Serialize { path: path.clone(), err })?; + let pool_name = pool.info.name(); + let mut file = File::create(&path).await.map_err(|err| Error::Io { + message: format!("Failed creating config file at {path:?} for pool {pool_name}, dataset: {id}"), + err, + })?; + file.write_all(info_str.as_bytes()).await.map_err(|err| Error::Io { + message: format!("Failed writing config to {path:?} for pool {pool_name}, dataset: {id}"), + err, + })?; self.add_datasets_notify( nexus_notifications, @@ -733,16 +765,29 @@ impl StorageWorker { pool: &mut Pool, dataset_name: &DatasetName, ) -> Result<(Uuid, SocketAddr, DatasetKind), Error> { - let id = Zfs::get_oxide_value(&dataset_name.full(), "uuid")? - .parse::()?; + let name = dataset_name.full(); + let id = Zfs::get_oxide_value(&name, "uuid")? + .parse::() + .map_err(|err| Error::ParseDatasetUuid { name, err })?; let config_path = pool.dataset_config_path(id).await?; info!( self.log, "Loading Dataset from {}", config_path.to_string_lossy() ); + let pool_name = pool.info.name(); let dataset_info: DatasetInfo = - toml::from_slice(&tokio::fs::read(config_path).await?)?; + toml::from_slice( + &tokio::fs::read(&config_path).await.map_err(|err| Error::Io { + message: format!("read config for pool {pool_name}, dataset {dataset_name:?} from {config_path:?}"), + err, + })? + ).map_err(|err| { + Error::Deserialize { + path: config_path, + err, + } + })?; self.initialize_dataset_and_zone( pool, &dataset_info, @@ -783,7 +828,8 @@ impl StorageWorker { "Storage manager processing zpool: {:#?}", pool.info ); - let size = ByteCount::try_from(pool.info.size())?; + let size = ByteCount::try_from(pool.info.size()) + .map_err(|err| Error::BadPoolSize { name: pool_name.to_string(), err })?; // If we find filesystems within our datasets, ensure their // zones are up-and-running. From a9af5d1c96d906f5255e6bec0c3356bc8ba0512f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sun, 24 Apr 2022 20:52:37 -0400 Subject: [PATCH 35/41] last few --- sled-agent/src/config.rs | 24 ++++++++++++++------ sled-agent/src/instance.rs | 4 ---- sled-agent/src/rack_setup/config.rs | 6 +++-- sled-agent/src/updates.rs | 34 ++++++++++++++++++++++++----- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 63b9b1115cc..cfa6e84927b 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -10,7 +10,7 @@ use crate::illumos::zpool::ZpoolName; use dropshot::ConfigLogging; use serde::Deserialize; use std::net::SocketAddr; -use std::path::Path; +use std::path::{Path, PathBuf}; use uuid::Uuid; /// Configuration for a sled agent @@ -35,17 +35,27 @@ pub struct Config { #[derive(Debug, thiserror::Error)] pub enum ConfigError { - #[error("Failed to read config: {0}")] - Io(#[from] std::io::Error), - #[error("Failed to parse config: {0}")] - Parse(#[from] toml::de::Error), + #[error("Failed to read config from {path}: {err}")] + Io { + path: PathBuf, + #[source] + err: std::io::Error, + }, + #[error("Failed to parse config from {path}: {err}")] + Parse { + path: PathBuf, + #[source] + err: toml::de::Error, + }, } impl Config { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); - let contents = std::fs::read_to_string(path)?; - let config = toml::from_str(&contents)?; + let contents = std::fs::read_to_string(&path) + .map_err(|err| ConfigError::Io { path: path.into(), err })?; + let config = toml::from_str(&contents) + .map_err(|err| ConfigError::Parse { path: path.into(), err })?; Ok(config) } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 87af13085a7..bf0cce1fa57 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -41,7 +41,6 @@ pub enum Error { #[error("Failed to wait for service: {0}")] Timeout(String), - // TODO: Who are we allocating the VNIC for? #[error("Failed to create VNIC: {0}")] VnicCreation(#[from] crate::illumos::dladm::CreateVnicError), @@ -71,9 +70,6 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] crate::illumos::running_zone::InstallZoneError), - - #[error("serde_json failure: {0}")] - SerdeJsonError(#[from] serde_json::Error), } // Issues read-only, idempotent HTTP requests at propolis until it responds with diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 1ec2858c0b7..01299e0de2b 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -58,8 +58,10 @@ fn new_network(addr: Ipv6Addr, prefix: u8) -> Ipv6Network { impl SetupServiceConfig { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); - let contents = std::fs::read_to_string(path)?; - let config = toml::from_str(&contents)?; + let contents = std::fs::read_to_string(&path) + .map_err(|err| ConfigError::Io { path: path.into(), err })?; + let config = toml::from_str(&contents) + .map_err(|err| ConfigError::Parse { path: path.into(), err })?; Ok(config) } diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 38c132a92ab..721861ca074 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -12,8 +12,12 @@ use std::path::PathBuf; #[derive(thiserror::Error, Debug)] pub enum Error { - #[error("I/O Error: {0}")] - Io(#[from] std::io::Error), + #[error("I/O Error: {message}: {err}")] + Io { + message: String, + #[source] + err: std::io::Error, + }, #[error("Failed to contact nexus: {0}")] Nexus(anyhow::Error), @@ -29,7 +33,12 @@ pub async fn download_artifact( match artifact.kind { UpdateArtifactKind::Zone => { let directory = PathBuf::from("/var/tmp/zones"); - tokio::fs::create_dir_all(&directory).await?; + tokio::fs::create_dir_all(&directory).await.map_err(|err| { + Error::Io { + message: format!("creating diretory {directory:?}"), + err, + } + })?; // We download the file to a location named "-". // We then rename it to "" after it has successfully @@ -57,10 +66,25 @@ pub async fn download_artifact( .map_err(Error::Response)?; let contents = response.bytes().await.map_err(|e| Error::Response(e))?; - tokio::fs::write(&tmp_path, contents).await?; + tokio::fs::write(&tmp_path, contents).await.map_err(|err| { + Error::Io { + message: format!( + "Downloading artifact to temporary path: {tmp_path:?}" + ), + err, + } + })?; // Write the file to its final path. - tokio::fs::rename(&tmp_path, directory.join(artifact.name)).await?; + let destination = directory.join(artifact.name); + tokio::fs::rename(&tmp_path, &destination).await.map_err( + |err| Error::Io { + message: format!( + "Renaming {tmp_path:?} to {destination:?}" + ), + err, + }, + )?; Ok(()) } } From 31e3c1b8e881790bb42152d67a2f79c6d27a7f24 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 25 Apr 2022 11:18:48 -0400 Subject: [PATCH 36/41] review feedback --- sled-agent/src/bootstrap/agent.rs | 2 +- sled-agent/src/illumos/dladm.rs | 4 ++-- sled-agent/src/illumos/zfs.rs | 2 +- sled-agent/src/illumos/zpool.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index ea285b14440..07bf8df9640 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -140,7 +140,7 @@ impl Agent { }; Zones::ensure_has_global_zone_v6_address( - data_link.clone(), + data_link, address, "bootstrap6", ) diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 441fe18fa49..6bf9c182414 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -18,7 +18,7 @@ pub const DLADM: &str = "/usr/sbin/dladm"; /// Errors returned from [`Dladm::find_physical`]. #[derive(thiserror::Error, Debug)] pub enum FindPhysicalLinkError { - #[error("Failed to execute command to find physical link: {0}")] + #[error("Failed to find physical link: {0}")] Execution(#[from] ExecutionError), #[error("No Physical Link devices found")] @@ -31,7 +31,7 @@ pub enum GetMacError { #[error("Mac Address cannot be looked up; Link not found: {0:?}")] NotFound(PhysicalLink), - #[error("Failed to execute command to get MAC address: {0}")] + #[error("Failed to get MAC address: {0}")] Execution(#[from] ExecutionError), #[error("Failed to parse MAC: {0}")] diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index da6510a29f3..e47825d0585 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -161,7 +161,7 @@ impl Zfs { "-o", "zoned=on", "-o", - &format!("mountpoint={}", &mountpoint), + &format!("mountpoint={}", mountpoint), name, ]); execute(cmd).map_err(|err| EnsureFilesystemError { diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs index 0ca3bea712b..b1d528c05db 100644 --- a/sled-agent/src/illumos/zpool.rs +++ b/sled-agent/src/illumos/zpool.rs @@ -25,7 +25,7 @@ enum Error { } #[derive(thiserror::Error, Debug)] -#[error("Failed to get info for zpool {name}: {err}")] +#[error("Failed to get info for zpool '{name}': {err}")] pub struct GetInfoError { name: String, #[source] From 857fe85f2166158161ffb05d1faa07f7fa510b68 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 28 Apr 2022 14:39:55 -0400 Subject: [PATCH 37/41] Const generic subnet prefix --- common/src/address.rs | 109 ++++++++++++++++++--------- openapi/bootstrap-agent.json | 22 +++--- sled-agent/src/bootstrap/agent.rs | 5 +- sled-agent/src/bootstrap/params.rs | 47 +----------- sled-agent/src/rack_setup/config.rs | 65 ++++++++-------- sled-agent/src/rack_setup/service.rs | 22 +++--- 6 files changed, 133 insertions(+), 137 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 69a6c3143d2..e18e3639e96 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -7,7 +7,9 @@ //! This addressing functionality is shared by both initialization services //! and Nexus, who need to agree upon addressing schemes. +use crate::api::external::Ipv6Net; use ipnetwork::Ipv6Network; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -29,10 +31,43 @@ pub const DNS_PORT: u16 = 53; pub const DNS_SERVER_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; +// Anycast is a mechanism in which a single IP address is shared by multiple +// devices, and the destination is located based on routing distance. +// +// This is covered by RFC 4291 in much more detail: +// +// +// Anycast addresses are always the "zeroeth" address within a subnet. We +// always explicitly skip these addresses within our network. +const _ANYCAST_ADDRESS_INDEX: usize = 0; +const DNS_ADDRESS_INDEX: usize = 1; +const GZ_ADDRESS_INDEX: usize = 2; + +/// Wraps an [`Ipv6Network`] with a compile-time prefix length. +#[derive(Debug, Clone, Copy, JsonSchema, Serialize, Deserialize, PartialEq)] +pub struct Ipv6Subnet { + net: Ipv6Net, +} + +impl Ipv6Subnet { + pub fn new(addr: Ipv6Addr) -> Self { + // Create a network with the compile-time prefix length. + let net = Ipv6Network::new(addr, N).unwrap(); + // Ensure the address is set to within-prefix only components. + let net = Ipv6Network::new(net.network(), N).unwrap(); + Self { net: Ipv6Net(net) } + } + + /// Returns the underlying network. + pub fn net(&self) -> Ipv6Network { + self.net.0 + } +} + /// Represents a subnet which may be used for contacting DNS services. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] pub struct DnsSubnet { - network: Ipv6Network, + subnet: Ipv6Subnet, } impl DnsSubnet { @@ -40,10 +75,11 @@ impl DnsSubnet { /// /// This is the first address within the subnet. pub fn dns_address(&self) -> Ipv6Network { - let mut iter = self.network.iter(); - let _anycast_ip = iter.next().unwrap(); - let dns_ip = iter.next().unwrap(); - Ipv6Network::new(dns_ip, SLED_PREFIX).unwrap() + Ipv6Network::new( + self.subnet.net().iter().nth(DNS_ADDRESS_INDEX).unwrap(), + SLED_PREFIX, + ) + .unwrap() } /// Returns the address which the Global Zone should create @@ -51,68 +87,63 @@ impl DnsSubnet { /// /// This is the second address within the subnet. pub fn gz_address(&self) -> Ipv6Network { - let mut iter = self.network.iter(); - let _anycast_ip = iter.next().unwrap(); - let _dns_ip = iter.next().unwrap(); - Ipv6Network::new(iter.next().unwrap(), SLED_PREFIX).unwrap() + Ipv6Network::new( + self.subnet.net().iter().nth(GZ_ADDRESS_INDEX).unwrap(), + SLED_PREFIX, + ) + .unwrap() } } /// A wrapper around an IPv6 network, indicating it is a "reserved" rack /// subnet which can be used for AZ-wide services. #[derive(Debug, Clone)] -pub struct ReservedRackSubnet(pub Ipv6Network); +pub struct ReservedRackSubnet(pub Ipv6Subnet); impl ReservedRackSubnet { /// Returns the subnet for the reserved rack subnet. - pub fn new(subnet: Ipv6Network) -> Self { - let net = Ipv6Network::new(subnet.network(), AZ_PREFIX).unwrap(); - ReservedRackSubnet( - Ipv6Network::new(net.network(), RACK_PREFIX).unwrap(), - ) + pub fn new(subnet: Ipv6Subnet) -> Self { + ReservedRackSubnet(Ipv6Subnet::::new(subnet.net().ip())) } - /// Given a particular rack subnet, return the DNS addresses. + /// Returns the DNS addresses from this reserved rack subnet. /// /// These addresses will come from the first [`DNS_REDUNDANCY`] `/64s` of the /// [`RACK_PREFIX`] subnet. pub fn get_dns_subnets(&self) -> Vec { - assert_eq!(self.0.prefix(), RACK_PREFIX); - (0..DNS_REDUNDANCY) .map(|idx| { - let network = + let subnet = get_64_subnet(self.0, u8::try_from(idx + 1).unwrap()); - - DnsSubnet { network } + DnsSubnet { subnet } }) .collect() } } +const SLED_AGENT_ADDRESS_INDEX: usize = 1; + /// Return the sled agent address for a subnet. /// /// This address will come from the first address of the [`SLED_PREFIX`] subnet. -pub fn get_sled_address(sled_subnet: Ipv6Network) -> SocketAddrV6 { - assert_eq!(sled_subnet.prefix(), SLED_PREFIX); - - let mut iter = sled_subnet.iter(); - let _anycast_ip = iter.next().unwrap(); - let sled_agent_ip = iter.next().unwrap(); +pub fn get_sled_address(sled_subnet: Ipv6Subnet) -> SocketAddrV6 { + let sled_agent_ip = + sled_subnet.net().iter().nth(SLED_AGENT_ADDRESS_INDEX).unwrap(); SocketAddrV6::new(sled_agent_ip, SLED_AGENT_PORT, 0, 0) } /// Returns a sled subnet within a rack subnet. /// /// The subnet at index == 0 is used for rack-local services. -pub fn get_64_subnet(rack_subnet: Ipv6Network, index: u8) -> Ipv6Network { - assert_eq!(rack_subnet.prefix(), RACK_PREFIX); - - let mut rack_network = rack_subnet.network().octets(); +pub fn get_64_subnet( + rack_subnet: Ipv6Subnet, + index: u8, +) -> Ipv6Subnet { + let mut rack_network = rack_subnet.net().network().octets(); // To set bits distinguishing the /64 from the /56, we modify the 7th octet. rack_network[7] = index; - Ipv6Network::new(Ipv6Addr::from(rack_network), 64).unwrap() + Ipv6Subnet::::new(Ipv6Addr::from(rack_network)) } #[cfg(test)] @@ -121,14 +152,16 @@ mod test { #[test] fn test_dns_subnets() { - let subnet = "fd00:1122:3344:0100::/64".parse::().unwrap(); + let subnet = Ipv6Subnet::::new( + "fd00:1122:3344:0100::".parse::().unwrap(), + ); let rack_subnet = ReservedRackSubnet::new(subnet); assert_eq!( // Note that these bits (indicating the rack) are zero. // vv "fd00:1122:3344:0000::/56".parse::().unwrap(), - rack_subnet.0, + rack_subnet.0.net(), ); // Observe the first DNS subnet within this reserved rack subnet. @@ -148,13 +181,17 @@ mod test { #[test] fn test_sled_address() { - let subnet = "fd00:1122:3344:0101::/64".parse::().unwrap(); + let subnet = Ipv6Subnet::::new( + "fd00:1122:3344:0101::".parse::().unwrap(), + ); assert_eq!( "[fd00:1122:3344:0101::1]:12345".parse::().unwrap(), get_sled_address(subnet) ); - let subnet = "fd00:1122:3344:0308::/64".parse::().unwrap(); + let subnet = Ipv6Subnet::::new( + "fd00:1122:3344:0308::".parse::().unwrap(), + ); assert_eq!( "[fd00:1122:3344:0308::1]:12345".parse::().unwrap(), get_sled_address(subnet) diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index cf04477f449..d503e01b20c 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -117,6 +117,18 @@ "pattern": "^(fd|FD)[0-9a-fA-F]{2}:((([0-9a-fA-F]{1,4}\\:){6}[0-9a-fA-F]{1,4})|(([0-9a-fA-F]{1,4}:){1,6}:))/(6[4-9]|[7-9][0-9]|1[0-1][0-9]|12[0-6])$", "maxLength": 43 }, + "Ipv6Subnet": { + "description": "Wraps an [`Ipv6Network`] with a compile-time prefix length.", + "type": "object", + "properties": { + "net": { + "$ref": "#/components/schemas/Ipv6Net" + } + }, + "required": [ + "net" + ] + }, "ShareRequest": { "description": "Identity signed by local RoT and Oxide certificate chain.", "type": "object", @@ -159,7 +171,7 @@ "description": "Portion of the IP space to be managed by the Sled Agent.", "allOf": [ { - "$ref": "#/components/schemas/SledSubnet" + "$ref": "#/components/schemas/Ipv6Subnet" } ] } @@ -180,14 +192,6 @@ "required": [ "id" ] - }, - "SledSubnet": { - "description": "Represents subnets belonging to Sleds.\n\nThis is a thin wrapper around the [`Ipv6Net`] type - which may be accessed by [`AsRef`] - which adds additional validation that this is a /64 subnet with an expected prefix.", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv6Net" - } - ] } } } diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index da20e4f8e58..92988a694f9 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -16,6 +16,7 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zone::{self, Zones}; use crate::rack_setup::service::Service as RackSetupService; use crate::server::Server as SledServer; +use omicron_common::address::get_sled_address; use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ internal_service_policy, retry_notify, BackoffError, @@ -181,9 +182,7 @@ impl Agent { ) -> Result { info!(&self.log, "Loading Sled Agent: {:?}", request); - let sled_address = omicron_common::address::get_sled_address( - request.subnet.as_ref().0, - ); + let sled_address = get_sled_address(request.subnet); let mut maybe_agent = self.sled_agent.lock().await; if let Some(server) = &*maybe_agent { diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index b6c55bb1479..55d6e2c117e 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,7 +4,7 @@ //! Request body types for the bootstrap agent -use omicron_common::api::external::Ipv6Net; +use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -15,52 +15,9 @@ pub struct ShareRequest { pub identity: Vec, } -#[derive(thiserror::Error, Debug)] -pub enum SubnetError { - #[error("Subnet has unexpected prefix length: {0}")] - BadPrefixLength(u8), -} - -/// Represents subnets belonging to Sleds. -/// -/// This is a thin wrapper around the [`Ipv6Net`] type - which may be accessed -/// by [`AsRef`] - which adds additional validation that this is a /64 -/// subnet with an expected prefix. -// Note: The inner field is intentionally non-pub; this makes it -// more difficult to construct a sled subnet which avoids the -// validation performed by the constructor. -#[derive(Clone, Debug, Serialize, JsonSchema, PartialEq)] -pub struct SledSubnet(Ipv6Net); - -impl SledSubnet { - pub fn new(ip: Ipv6Net) -> Result { - let prefix = ip.0.prefix(); - if prefix != 64 { - return Err(SubnetError::BadPrefixLength(prefix)); - } - Ok(SledSubnet(ip)) - } -} - -impl<'de> serde::Deserialize<'de> for SledSubnet { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let net = Ipv6Net::deserialize(deserializer)?; - SledSubnet::new(net).map_err(serde::de::Error::custom) - } -} - -impl AsRef for SledSubnet { - fn as_ref(&self) -> &Ipv6Net { - &self.0 - } -} - /// Configuration information for launching a Sled Agent. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] pub struct SledAgentRequest { /// Portion of the IP space to be managed by the Sled Agent. - pub subnet: SledSubnet, + pub subnet: Ipv6Subnet, } diff --git a/sled-agent/src/rack_setup/config.rs b/sled-agent/src/rack_setup/config.rs index 1ec2858c0b7..6aa8e61df6d 100644 --- a/sled-agent/src/rack_setup/config.rs +++ b/sled-agent/src/rack_setup/config.rs @@ -6,8 +6,9 @@ use crate::config::ConfigError; use crate::params::{DatasetEnsureBody, ServiceRequest}; -use ipnetwork::Ipv6Network; -use omicron_common::address::{AZ_PREFIX, RACK_PREFIX}; +use omicron_common::address::{ + get_64_subnet, Ipv6Subnet, AZ_PREFIX, RACK_PREFIX, SLED_PREFIX, +}; use serde::Deserialize; use serde::Serialize; use std::net::Ipv6Addr; @@ -47,14 +48,6 @@ pub struct SledRequest { pub dns_services: Vec, } -fn new_network(addr: Ipv6Addr, prefix: u8) -> Ipv6Network { - let net = Ipv6Network::new(addr, prefix).unwrap(); - - // ipnetwork inputs/outputs the provided IPv6 address, unmodified by the - // prefix. We manually mask `addr` based on `prefix` ourselves. - Ipv6Network::new(net.network(), prefix).unwrap() -} - impl SetupServiceConfig { pub fn from_file>(path: P) -> Result { let path = path.as_ref(); @@ -63,18 +56,18 @@ impl SetupServiceConfig { Ok(config) } - pub fn az_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, AZ_PREFIX) + pub fn az_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet) } /// Returns the subnet for our rack. - pub fn rack_subnet(&self) -> Ipv6Network { - new_network(self.rack_subnet, RACK_PREFIX) + pub fn rack_subnet(&self) -> Ipv6Subnet { + Ipv6Subnet::::new(self.rack_subnet) } /// Returns the subnet for the `index`-th sled in the rack. - pub fn sled_subnet(&self, index: u8) -> Ipv6Network { - omicron_common::address::get_64_subnet(self.rack_subnet(), index) + pub fn sled_subnet(&self, index: u8) -> Ipv6Subnet { + get_64_subnet(self.rack_subnet(), index) } } @@ -90,33 +83,43 @@ mod test { }; assert_eq!( - // Masked out in AZ Subnet - // vv - "fd00:1122:3344:0000::/48".parse::().unwrap(), + Ipv6Subnet::::new( + // Masked out in AZ Subnet + // vv + "fd00:1122:3344:0000::".parse::().unwrap(), + ), cfg.az_subnet() ); assert_eq!( - // Shows up from Rack Subnet - // vv - "fd00:1122:3344:0100::/56".parse::().unwrap(), + Ipv6Subnet::::new( + // Shows up from Rack Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), cfg.rack_subnet() ); assert_eq!( - // 0th Sled Subnet - // vv - "fd00:1122:3344:0100::/64".parse::().unwrap(), + Ipv6Subnet::::new( + // 0th Sled Subnet + // vv + "fd00:1122:3344:0100::".parse::().unwrap(), + ), cfg.sled_subnet(0) ); assert_eq!( - // 1st Sled Subnet - // vv - "fd00:1122:3344:0101::/64".parse::().unwrap(), + Ipv6Subnet::::new( + // 1st Sled Subnet + // vv + "fd00:1122:3344:0101::".parse::().unwrap(), + ), cfg.sled_subnet(1) ); assert_eq!( - // Last Sled Subnet - // vv - "fd00:1122:3344:01ff::/64".parse::().unwrap(), + Ipv6Subnet::::new( + // Last Sled Subnet + // vv + "fd00:1122:3344:01ff::".parse::().unwrap(), + ), cfg.sled_subnet(255) ); } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 153ca1bc18b..c1daf50e9ec 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -8,7 +8,6 @@ use super::config::{SetupServiceConfig as Config, SledRequest}; use crate::bootstrap::{ client as bootstrap_agent_client, config::BOOTSTRAP_AGENT_PORT, discovery::PeerMonitorObserver, params::SledAgentRequest, - params::SledSubnet, }; use crate::params::ServiceRequest; use omicron_common::address::{get_sled_address, ReservedRackSubnet}; @@ -156,11 +155,11 @@ impl ServiceInner { let sled_agent_initialize = || async { client .start_sled(&bootstrap_agent_client::types::SledAgentRequest { - subnet: bootstrap_agent_client::types::SledSubnet( - bootstrap_agent_client::types::Ipv6Net( - request.subnet.as_ref().to_string(), + subnet: bootstrap_agent_client::types::Ipv6Subnet { + net: bootstrap_agent_client::types::Ipv6Net( + request.subnet.net().to_string(), ), - ), + }, }) .await .map_err(BackoffError::transient)?; @@ -304,8 +303,7 @@ impl ServiceInner { bootstrap_addrs: impl IntoIterator, ) -> Result, SetupServiceError> { let bootstrap_addrs = bootstrap_addrs.into_iter().enumerate(); - let reserved_rack_subnet = - ReservedRackSubnet::new(config.rack_subnet()); + let reserved_rack_subnet = ReservedRackSubnet::new(config.az_subnet()); let dns_subnets = reserved_rack_subnet.get_dns_subnets(); info!(self.log, "dns_subnets: {:#?}", dns_subnets); @@ -347,9 +345,7 @@ impl ServiceInner { SocketAddrV6::new(bootstrap_addr, BOOTSTRAP_AGENT_PORT, 0, 0); let sled_subnet_index = u8::try_from(idx + 1).expect("Too many peers!"); - let subnet = - SledSubnet::new(config.sled_subnet(sled_subnet_index).into()) - .expect("Created Invalid Subnet"); + let subnet = config.sled_subnet(sled_subnet_index); ( bootstrap_addr, @@ -527,7 +523,7 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet.as_ref().0, + allocation.initialization_request.subnet, )); self.initialize_services( @@ -546,7 +542,7 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet.as_ref().0, + allocation.initialization_request.subnet, )); self.initialize_datasets( sled_address, @@ -570,7 +566,7 @@ impl ServiceInner { futures::future::join_all(plan.iter().map( |(_, allocation)| async move { let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet.as_ref().0, + allocation.initialization_request.subnet, )); let all_services = allocation From be3bc1bc847b4548f6e99f91162865916f7da360 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 28 Apr 2022 15:19:38 -0400 Subject: [PATCH 38/41] Ipv6, comments --- internal-dns/tests/basic_test.rs | 21 ++++++++--------- sled-agent/src/params.rs | 5 ++++ sled-agent/src/rack_setup/service.rs | 34 ++++++++++++++++------------ sled-agent/src/services.rs | 4 ++++ 4 files changed, 38 insertions(+), 26 deletions(-) diff --git a/internal-dns/tests/basic_test.rs b/internal-dns/tests/basic_test.rs index b58ecdb62f9..6218d87d334 100644 --- a/internal-dns/tests/basic_test.rs +++ b/internal-dns/tests/basic_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -10,7 +10,6 @@ use internal_dns_client::{ types::{DnsKv, DnsRecord, DnsRecordKey, Srv}, Client, }; -use std::net::Ipv6Addr; use trust_dns_resolver::config::{ NameServerConfig, Protocol, ResolverConfig, ResolverOpts, }; @@ -116,16 +115,16 @@ async fn init_client_server( let db = Arc::new(sled::open(&config.data.storage_path)?); db.clear()?; - let client = Client::new( - &format!("http://127.0.0.1:{}", dropshot_port), - log.clone(), - ); + let client = + Client::new(&format!("http://[::1]:{}", dropshot_port), log.clone()); let mut rc = ResolverConfig::new(); rc.add_name_server(NameServerConfig { - socket_addr: SocketAddr::V4(SocketAddrV4::new( - Ipv4Addr::new(127, 0, 0, 1), + socket_addr: SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, dns_port, + 0, + 0, )), protocol: Protocol::Udp, tls_dns_name: None, @@ -141,7 +140,7 @@ async fn init_client_server( let db = db.clone(); let log = log.clone(); let dns_config = internal_dns::dns_server::Config { - bind_address: format!("127.0.0.1:{}", dns_port), + bind_address: format!("[::1]:{}", dns_port), }; tokio::spawn(async move { @@ -176,9 +175,7 @@ fn test_config() -> Result<(internal_dns::Config, u16, u16), anyhow::Error> { level: dropshot::ConfigLoggingLevel::Info, }, dropshot: dropshot::ConfigDropshot { - bind_address: format!("127.0.0.1:{}", dropshot_port) - .parse() - .unwrap(), + bind_address: format!("[::1]:{}", dropshot_port).parse().unwrap(), request_body_max_bytes: 1024, ..Default::default() }, diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 7ad76a634b2..21c0182a22e 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -236,6 +236,11 @@ pub struct ServiceRequest { pub addresses: Vec, // The addresses in the global zone which should be created, if necessary // to route to the service. + // + // For addresses allocated within the Sled's Subnet, no extra address should + // be necessary. However, for other services - such the DNS service, which + // exists outside the sleds's typical subnet - adding an address in the GZ + // is necessary to allow inter-zone traffic routing. #[serde(default)] pub gz_addresses: Vec, } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c1daf50e9ec..0cba727b598 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -520,20 +520,26 @@ impl ServiceInner { .collect::>()?; // Set up internal DNS services. - futures::future::join_all(plan.iter().map( - |(_, allocation)| async move { - let sled_address = SocketAddr::V6(get_sled_address( - allocation.initialization_request.subnet, - )); - - self.initialize_services( - sled_address, - &allocation.services_request.dns_services, - ) - .await?; - Ok(()) - }, - )) + futures::future::join_all( + plan.iter() + .filter(|(_, allocation)| { + // Only send requests to sleds that are supposed to be running + // DNS services. + !allocation.services_request.dns_services.is_empty() + }) + .map(|(_, allocation)| async move { + let sled_address = SocketAddr::V6(get_sled_address( + allocation.initialization_request.subnet, + )); + + self.initialize_services( + sled_address, + &allocation.services_request.dns_services, + ) + .await?; + Ok(()) + }), + ) .await .into_iter() .collect::>()?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index b699842c868..f47da988d35 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -130,6 +130,10 @@ impl ServiceManager { } // Populates `existing_zones` according to the requests in `services`. + // + // At the point this function is invoked, IP addresses have already been + // allocated (by either RSS or Nexus). However, this function explicitly + // assigns such addresses to interfaces within zones. async fn initialize_services_locked( &self, existing_zones: &mut Vec, From 44139b9cba262c3e4e5f770b1693a075ac137f69 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 2 May 2022 11:31:52 -0400 Subject: [PATCH 39/41] fmt --- sled-agent/src/bootstrap/params.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sled-agent/src/bootstrap/params.rs b/sled-agent/src/bootstrap/params.rs index 1b1adcee598..55d6e2c117e 100644 --- a/sled-agent/src/bootstrap/params.rs +++ b/sled-agent/src/bootstrap/params.rs @@ -4,10 +4,7 @@ //! Request body types for the bootstrap agent -use omicron_common::address::{ - SLED_PREFIX, - Ipv6Subnet, -}; +use omicron_common::address::{Ipv6Subnet, SLED_PREFIX}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; From 83f4019df18fc9665d312d4e1f9d3c173682d39c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 4 May 2022 21:36:01 -0400 Subject: [PATCH 40/41] fmt --- sled-agent/src/services.rs | 99 ++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 1d54696cc2b..6e588b7fe4f 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -324,57 +324,60 @@ impl ServiceManager { message: "Not enough addresses".to_string(), } })?; - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/server_address=[{}]:{}", - address, DNS_SERVER_PORT - ), - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS server address [{}]:{}", - address, DNS_SERVER_PORT - ), - err, - })?; - - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &smf_name, - "setprop", - &format!( - "config/dns_address=[{}]:{}", - address, DNS_PORT - ), - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Setting DNS address [{}]:{}", - address, DNS_SERVER_PORT - ), - err, - })?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/server_address=[{}]:{}", + address, DNS_SERVER_PORT + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Setting DNS server address [{}]:{}", + address, DNS_SERVER_PORT + ), + err, + })?; + + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &smf_name, + "setprop", + &format!( + "config/dns_address=[{}]:{}", + address, DNS_PORT + ), + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Setting DNS address [{}]:{}", + address, DNS_SERVER_PORT + ), + err, + })?; // Refresh the manifest with the new properties we set, // so they become "effective" properties when the service is enabled. - running_zone.run_cmd(&[ - crate::illumos::zone::SVCCFG, - "-s", - &default_smf_name, - "refresh", - ]) - .map_err(|err| Error::ZoneCommand { - intent: format!( - "Refreshing DNS service config for {}", - default_smf_name - ), - err, - })?; + running_zone + .run_cmd(&[ + crate::illumos::zone::SVCCFG, + "-s", + &default_smf_name, + "refresh", + ]) + .map_err(|err| Error::ZoneCommand { + intent: format!( + "Refreshing DNS service config for {}", + default_smf_name + ), + err, + })?; } _ => { info!( From a61284808d2a352d1a2ba9d2a486dc29757a4866 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 5 May 2022 11:19:55 -0400 Subject: [PATCH 41/41] fix illumos-only test --- sled-agent/tests/integration_tests/multicast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/tests/integration_tests/multicast.rs b/sled-agent/tests/integration_tests/multicast.rs index 1aa9f8b103f..61a0d8284ab 100644 --- a/sled-agent/tests/integration_tests/multicast.rs +++ b/sled-agent/tests/integration_tests/multicast.rs @@ -30,7 +30,7 @@ async fn test_multicast_bootstrap_address() { let address_name = "testbootstrap6"; let addrobj = AddrObject::new(&link.0, address_name).unwrap(); zone::Zones::ensure_has_global_zone_v6_address( - Some(link), + link, *address.ip(), address_name, )