diff --git a/.drone.jsonnet b/.drone.jsonnet index 3509e6ac0..c753769f0 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -291,7 +291,7 @@ local clang(version) = debian_pipeline( local full_llvm(version) = debian_pipeline( 'Debian sid/llvm-' + version, docker_base + 'debian-sid-clang', - deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], + deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libunwind-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], remove='g++'), oxen_repo=[], cmake_extra='-DCMAKE_C_COMPILER=clang-' + version + diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 5318d72e8..911a1f967 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -46,36 +46,84 @@ namespace srouter::dns return name; } - size_t encode_name(std::span buf, std::string_view name) + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset) { - auto orig = buf.size(); if (name.size() && name.back() == '.') name.remove_suffix(1); - for (auto part : srouter::split(name, ".")) + // Look for a previously used suffix of this name. For instance, if we have a response + // consisting of: + // + // localhost.sesh IN CNAME mylongpubkey.sesh + // foo.mylongpubkey.sesh IN AAAA 1:2:3::4 + // + // then when we repeat the question itself (IN AAAA localhost.sesh) we echo that question + // back into the response as the 16 bytes: + // \x09localhost\x04sesh\x00 + // Suppose that this was written at location Z in the DNS message, this creates two + // pointable addresses: + // - "localhost.sesh" -> Z + // - "sesh" -> Z+10 + // + // Then we come to the answers, and for the first "localhost.sesh" value, we can simply + // write that as a single pointer [Z] (where the pointer is a 16-bit, big-endian value with + // the highest two bits set and the remaining 14 bits set to "Z"). + // + // Then we get to "mylongpubkey.sesh" and we can encode that as: + // + // \x34mylongpubkey[pointer to Z+10] + // + // This also creates a new pointable address: + // - "mylongpubkey.sesh" -> Y + // + // Then we come to foo.mylongpubkey.sesh and we can encode this as: + // + // - \x03foo[pointer to Y] + // + // i.e. we only need 6 bytes for this address instead of 1+3+1+52+1+4+1=63 bytes that we + // would need for the uncompressed version. + // + // Although this compression is optional, given how frequently we reuse long session router + // names (particularly for something like SRV records where a name can be repeated multiple + // times), and the DNS response size limit of 512 bytes, we implement that here. + + for (size_t pos = name.empty() ? std::string::npos : 0; pos != std::string_view::npos;) { + std::string_view check = name.substr(pos); + if (auto it = prev_names.find(check); it != prev_names.end()) + { + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + uint16_t ptr = uint16_t{0b11000000'00000000} | it->second; + oxenc::write_host_as_big(ptr, buf.data()); + buf = buf.subspan(2); + buf_offset += 2; + // A pointer is terminal (i.e. no nullptr to add), so we're done. + return; + } + + auto next = name.find('.', pos + 1); + auto part = next == std::string_view::npos ? check : name.substr(pos, next - pos); + size_t l = part.size(); if (l > 63 || l >= buf.size()) - return false; - buf.front() = static_cast(l); + throw std::out_of_range{"Buffer too small"}; + buf.front() = static_cast(l); // Length prefix std::memcpy(buf.data() + 1, part.data(), part.size()); + prev_names.emplace(std::string{check}, static_cast(buf_offset)); buf = buf.subspan(1 + part.size()); + buf_offset += 1 + part.size(); + + pos = next == std::string_view::npos ? next : next + 1; } + + // If we get here we wrote all the pieces without pointing at anything, so we need to append + // a null byte to terminate the name: if (buf.empty()) - return false; + throw std::out_of_range{"Buffer too small"}; buf.front() = std::byte{0}; buf = buf.subspan(1); - return orig - buf.size(); - } - - bool write_name_into(std::span& buf, std::string_view name) - { - if (auto s = encode_name(buf, name)) - { - buf = buf.subspan(s); - return true; - } - return false; + buf_offset++; } std::optional> decode_ptr(std::string_view name) diff --git a/src/dns/encode.hpp b/src/dns/encode.hpp index e0d5b7e4a..46ddd4b9e 100644 --- a/src/dns/encode.hpp +++ b/src/dns/encode.hpp @@ -6,45 +6,59 @@ #include #include +#include #include #include namespace srouter::dns { - /// Writes the encoded version of DNS name `name` into buf, and returns how many bytes of buf - /// were written. If buf is too small to store the encoded name, returns 0. - size_t encode_name(std::span buf, std::string_view name); + // Custom hasher to let us look up a string_view key in a string-keyed unordered map: + struct transparent_string_hash + { + using is_transparent = void; + [[nodiscard]] size_t operator()(std::string_view txt) const { return std::hash{}(txt); } + }; + + using prev_names_t = std::unordered_map>; - /// Same as encode_name, except that instead of returning the written size, on success it mutates the span - /// to drop the written prefix. Returns true (and prefix-drops the written part of the span) on success, - /// false on failure. Note that the failure case can still partially write into span. - bool write_name_into(std::span& buf, std::string_view name); + /// Writes the encoded version of DNS name `name` into buf, mutating buf to eliminate the + /// written bytes. Throws if buf is too small to store the encoded name. + /// + /// prev_names contains pointer values relative to the start of the message, used for name + /// compression, and buf_offset contains the relative positive of the beginning of buf to the + /// start of the message. New names added here should be added into it so that later repeated + /// names (or name suffixes) can use compression. + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset); /// decode name from buffer, mutating the buffer to begin just past the extracted name. Return - /// nullopt (without mutating buf) on failure. + /// nullopt (without mutating buf) on failure. Does not currently support compressed names (but + /// those are not typically used in questions). std::optional extract_name(std::span& buf); /// Encodes an integer in big-endian order into the buffer, mutating the span to start just - /// after the written integer. Returns true on success, false if the span was too small. + /// after the written integer. Throws if buf is too small. Returns sizeof(T) (i.e. the amount + /// written into the buffer), for convenience. template - bool write_int_into(std::span& buf, T value) + size_t write_int_into(std::span& buf, T value) { if (buf.size() < sizeof(T)) - return false; + throw std::out_of_range{"Buffer too small"}; oxenc::write_host_as_big(value, buf.data()); buf = buf.subspan(sizeof(T)); - return true; + return sizeof(T); } - // Calls write_int_info multiple times with the given integers. Returns true (and modifies buf) - // if all success. If any fail then false is returned and buf is left unchanged. + // Calls write_int_info multiple times with the given integers. Throws if the buffer is too + // small. Returns the total size of the given integers (i.e. the number of bytes written to + // buf), for convenience. template - bool write_ints_into(std::span& buf, T... values) + size_t write_ints_into(std::span& buf, T... values) { - if (buf.size() < (0 + ... + sizeof(T))) - return false; + // NB: it's tempting to want to use `return (0 + ... + write_int_into())` here, but + // left-to-right evaluation of + operands isn't guaranteed, and that could put things into + // buf in the wrong order. With , as used here it is guaranteed (similarly to || or &&). ((void)write_int_into(buf, values), ...); - return true; + return (0 + ... + sizeof(T)); } /// Extracts a big-endian integer of the given type from the buffer, mutating the span to start @@ -72,23 +86,6 @@ namespace srouter::dns return true; } - // Takes some object T with an `size_t encode(buf)` function (such as various classes in this - // dns code) and attempts to call it with the given buffer. If it returns success (non-0) then - // this mutates `buf` to skip the written data and returns true; on failure it returns false. - template - bool encode_into(std::span& buf, const T& thing) - { - if (auto written = thing.encode(buf)) - { - buf = buf.subspan(written); - return true; - } - return false; - } - - // Writes encoded rr data into buf, mutating buf to point beyond the written data. Returns - // false (without mutating buf) if buf is too short; true on success. - bool write_rdata_into(std::span& buf, std::span rdata); // Extracts encoded rr data from buf, mutating buf to point beyond the extracted data. Returns // nullopt (without mutating buf) on error, the vector of decoded data on success. std::optional> extract_rdata(std::span& buf); diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ac699b738..8d4c12075 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -8,8 +8,12 @@ #include #include +#include #include +#include +#include +#include namespace srouter::dns { @@ -17,32 +21,101 @@ namespace srouter::dns Message::Message(const Question& question) : hdr_id{0}, hdr_fields{} { questions.push_back(question); } - size_t Message::encode(std::span buf) const + Message Message::clone() const { - auto orig = buf.size(); - if (!write_ints_into( - buf, - hdr_id, - hdr_fields, - static_cast(questions.size()), - static_cast(answers.size()), - static_cast(authorities.size()), - static_cast(additional.size()))) - return 0; + Message c; + c.hdr_id = hdr_id; + c.hdr_fields = hdr_fields; + c.questions = questions; + c.additional_edns = additional_edns; + // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) + return c; + } + + std::vector Message::encode() const + { + std::vector tmp; + // If the client signalled EDNS support then we can use a larger payload, otherwise DNS is + // limited to 512 bytes. + tmp.resize(additional_edns ? additional_edns->max_payload() : 512); + + prev_names_t prev_names; + std::span buf{tmp}; + uint16_t buf_offset = 0; + + buf_offset += write_ints_into( + buf, + hdr_id, + hdr_fields, + static_cast(questions.size()), + static_cast(answers.size()), + static_cast(0 /*authorities.size()*/), + static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); for (const auto& question : questions) - if (!encode_into(buf, question)) - return 0; + question.encode(buf, prev_names, buf_offset); for (auto& a : answers) - if (!encode_into(buf, a)) - return 0; + a->encode(buf, prev_names, buf_offset); - return orig - buf.size(); + if (additional_edns) + additional_edns->encode(buf, prev_names, buf_offset); + + // Trim the excess: + tmp.resize(tmp.size() - buf.size()); + + return tmp; + } + + static std::array make_server_cookie( + std::span client_cookie, + std::span client_ip, + std::span server_cookie_secret, + std::chrono::sys_seconds ts = std::chrono::floor(std::chrono::system_clock::now())) + { + assert(client_ip.size() == 4 || client_ip.size() == 16); + + static_assert(server_cookie_secret.size() == crypto_shorthash_siphash24_KEYBYTES); + + std::array cookie; + auto ccookie = std::span{cookie}.first<8>(); + auto scookie = std::span{cookie}.last<16>(); + std::memcpy(ccookie.data(), client_cookie.data(), 8); + + // The first 8 bytes of the server cookie (as per RFC 9018) are: + // - version (always 1) + // - three reserved bytes + // - 4-byte, uint32 unix timestamp + scookie[0] = std::byte{1}; // Version + scookie[1] = std::byte{0}; // - + scookie[2] = std::byte{0}; // - reserved + scookie[3] = std::byte{0}; // - + auto ts_val = static_cast(ts.time_since_epoch().count()); + oxenc::write_host_as_big(ts_val, &scookie[4]); + + // The last 8 bytes of the server cookie are a hash of 8-byte client + // cookie, then the above 8 bytes server cookie fields, then the + // 4- or 16-byte client IP (in network order notation). + std::array hash_data{{0}}; + std::memcpy(hash_data.data(), ccookie.data(), 8); + std::memcpy(hash_data.data() + 8, scookie.data(), 8); + std::memcpy(hash_data.data() + 16, client_ip.data(), client_ip.size()); + crypto_shorthash_siphash24( + reinterpret_cast(scookie.data() + 8), + hash_data.data(), + 16 + client_ip.size(), + reinterpret_cast(server_cookie_secret.data())); + + return cookie; } - std::optional Message::extract(std::span& buf) + std::optional Message::extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_ip) { + if (client_ip.size() != 4 && client_ip.size() != 16) + throw std::logic_error{"Invalid client IP for Message::extract_question"}; auto maybe = std::make_optional(); auto& m = *maybe; uint16_t qd_count, an_count, ns_count, ar_count; @@ -52,23 +125,128 @@ namespace srouter::dns return maybe; } m.questions.resize(qd_count); - m.answers.resize(an_count); // Ignore these: + // m.answers.resize(an_count); // m.authorities.resize(ns_count); // m.additional.resize(ar_count); - for (auto& q : m.questions) + try { - if (!q.extract(buf)) + for (auto& q : m.questions) + if (!q.extract(buf)) + throw std::invalid_argument{"invalid question"}; + + // Skip any answers or authority records: + for (uint16_t i = 0; i < an_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid answer RR"}; + for (uint16_t i = 0; i < ns_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid authority RR"}; + + // In the additional section we look for an EDNS entry, and skip anything else: + for (uint16_t i = 0; i < ar_count; i++) { - log::debug(logcat, "failed to decode question"); - maybe.reset(); - return maybe; + static_assert(crypto_shorthash_siphash24_KEYBYTES == 16); + auto a_rr = ParsedRR::extract(buf); + if (!a_rr) + throw std::invalid_argument{"invalid additional RR"}; + if (a_rr->name != "." || a_rr->rr_type != RRType::OPT) + { + continue; + } + + if (m.additional_edns) + throw std::invalid_argument{"found invalid multiple additional OPT records"}; + + auto max_payload = static_cast(a_rr->rr_class); + m.additional_edns.emplace(std::min(max_payload, 1232)); + + std::optional> cookie; + for (auto optbuf = a_rr->rdata; !optbuf.empty();) + { + if (optbuf.size() < 4) + throw std::invalid_argument{"additional OPT data section too small"}; + auto opt_code = oxenc::load_big_to_host(optbuf.data()); + auto opt_len = oxenc::load_big_to_host(optbuf.data() + 2); + optbuf = optbuf.subspan(4); + if (opt_len > optbuf.size()) + throw std::invalid_argument{"additional OPT option value length too small"}; + auto value = optbuf.subspan(0, opt_len); + optbuf = optbuf.subspan(opt_len); + + if (opt_code == PRR_EDNS::OPT_COOKIE) + { + if (m.additional_edns->cookie) + throw std::invalid_argument{"Duplicate OPT client cookies"}; + + if (value.size() == 8) + { + // This is the client sending a new cookie, requesting a new server + // cookie (i.e. because it doesn't currently have one). + + m.additional_edns->cookie = + make_server_cookie(value.first<8>(), client_ip, server_cookie_secret); + } + else if (value.size() == 24) + { + // This is the client sending its cookie along with a previously + // obtained server cookie for that client cookie, so we are supposed + // to validate it. + auto ccookie = value.first<8>(); + auto scookie = value.last<16>(); + + std::chrono::sys_seconds ts{ + std::chrono::seconds{oxenc::load_big_to_host(&scookie[4])}}; + + auto expected = make_server_cookie(ccookie, client_ip, server_cookie_secret, ts); + bool bad_cookie = std::memcmp(value.data(), expected.data(), 24) != 0; + + auto now = std::chrono::floor(std::chrono::system_clock::now()); + + if (!bad_cookie && ts >= now - 30min && ts <= now + 5min) + // Cookie is good and the timestamp in it is close to now, so the + // cookie stays as-is. + std::memcpy(m.additional_edns->cookie.emplace().data(), value.data(), 24); + + else + { + // If the cookie timestamp is too far away then it is a badcookie + // failure. (We don't have to worry about client clock skew because + // supposedly *we* issued this with the timestamp in it). + if (bad_cookie || ts < now - 1h || ts > now + 5min) + { + // When this is set we'll send a proper bad cookie response + // immediately after parsing: + m.additional_edns->bad_cookie = true; + // Extended rcode is, um, a wee bit hacky: we put the high 8 + // bits of the 12-bit error code into the OPT TTL field, and + // then continue to use the 4-bit RCODE for the bottom 4 bits. + m.additional_edns->ttl = + std::chrono::seconds{(uint32_t{PRR_EDNS::EXT_RCODE_BADCOOKIE} >> 4) << 24}; + // (The other bytes are all 0 values) + } + + // else it's valid, just a little bit (but not too) old and they are + // due for a new cookie. + + // In either of the above cases, we give the client a new cookie + // to use, with an updated new timestamp + m.additional_edns->cookie = + make_server_cookie(ccookie, client_ip, server_cookie_secret, now); + } + } + // Else we have an unparseable/non-understood cookie, and so we are supposed + // to ignore the option and discard the cookie data. + } + } } } - for (auto* as : {&m.answers, &m.authorities, &m.additional}) - if (!as->empty()) - log::debug(logcat, "Ignoring answer/authorities/additional sections in dns Message"); + catch (const std::exception& e) + { + log::debug(logcat, "failed to parse DNS message: {}", e.what()); + maybe.reset(); + } return maybe; } @@ -81,158 +259,77 @@ namespace srouter::dns for (const auto& q : questions) ques.push_back(q.ToJSON()); for (const auto& a : answers) - ans.push_back(a.ToJSON()); + ans.push_back(a->ToJSON()); return result; } - std::vector Message::encode() const - { - std::vector tmp; - tmp.resize(1500); - auto size = encode(tmp); - if (size == 0) - throw std::runtime_error("cannot encode dns message"); - tmp.resize(size); - return tmp; - } - - void Message::add_serv_fail() - { - if (questions.size()) - { - hdr_fields |= flags_RCODEServFail; - // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - } - } + void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } + // TODO FIXME: "RA" means we advertise that we support recursion, but we should only do that + // when we have an upstream DNS server available. (This TODO is also in server.cpp) static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; - void Message::add_reply(ipv4 addr, std::chrono::seconds ttl) - { - std::vector a; - a.resize(4); - oxenc::write_host_as_big(addr.addr, a.data()); - add_reply(RRClass::IN, RRType::A, std::move(a), ttl); - } - - void Message::add_reply(ipv6 addr, std::chrono::seconds ttl) + void Message::add_nodata_reply() { - std::vector aaaa; - aaaa.resize(16); - oxenc::write_host_as_big(addr.hi, aaaa.data()); - oxenc::write_host_as_big(addr.lo, aaaa.data() + 8); - return add_reply(RRClass::IN, RRType::AAAA, std::move(aaaa), ttl); + if (not questions.empty()) + hdr_fields |= reply_flags; } - void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } - - void Message::add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl) + template RR, typename... Args> + void make_reply(Message& m, std::chrono::seconds ttl, Args&&... args) { - if (questions.empty()) + if (m.questions.empty()) return; - hdr_fields |= reply_flags; + m.hdr_fields |= reply_flags; - auto& ans = answers.emplace_back(); - ans.rr_name = get_rr_name(); - ans.rr_type = type; - ans.rr_class = cls; - ans.ttl = ttl; - ans.rData = std::move(data); + m.answers.push_back(std::make_unique(std::string{m.get_rr_name()}, ttl, std::forward(args)...)); } - void Message::add_nodata_reply() - { - if (not questions.empty()) - hdr_fields |= reply_flags; - } + void Message::add_reply(const ipv4& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } + + void Message::add_reply(const ipv6& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } void Message::add_cname_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::CNAME, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode CNAME value {}", name); + make_reply(*this, ttl, std::string{name}); } void Message::add_ptr_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::PTR, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode PTR value {}", name); + make_reply(*this, ttl, std::string{name}); } - void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) - { - std::array tmp; - std::span remaining{tmp}; - if (!write_ints_into(remaining, srv.priority, srv.weight, srv.port)) - return; - if (!write_name_into(remaining, srv.target)) - return; + void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) { make_reply(*this, ttl, srv); } - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); - } + void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) { make_reply(*this, ttl, txt); } - void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) + void Message::set_nx_reply() { - std::array tmp; - std::span remaining{tmp}; - while (!txt.empty()) + answers.clear(); + // authorities.clear(); + // additional.clear(); + + if (questions.size()) { - auto piecelen = std::min(txt.size(), size_t{255}); - if (remaining.size() <= piecelen) - throw std::length_error{"TXT record too big"}; - remaining.front() = static_cast(piecelen); - std::memcpy(remaining.data() + 1, txt.data(), piecelen); - txt.remove_prefix(piecelen); - remaining = remaining.subspan(1 + piecelen); + hdr_fields |= flags_RCODENxDomain; + // authorative response with recursion available + hdr_fields |= reply_flags; } - - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); } - void Message::add_nx_reply() + void Message::set_serv_fail() { + answers.clear(); + if (questions.size()) { - answers.clear(); - authorities.clear(); - additional.clear(); - + hdr_fields |= flags_RCODEServFail; // authorative response with recursion available hdr_fields |= reply_flags; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - hdr_fields |= flags_RCODENxDomain; + // A servfail is not an authoritative answer, so clear that bit: + hdr_fields &= ~flags_AA; } } - std::string Message::to_string() const - { - return fmt::format( - "[DNSMessage id={:x} fields={:x} questions={{{}}} answers={{{}}} authorities={{{}}} " - "additional={{{}}}]", - hdr_id, - hdr_fields, - fmt::join(questions, ","), - fmt::join(answers, ","), - fmt::join(authorities, ","), - fmt::join(additional, ",")); - } - } // namespace srouter::dns diff --git a/src/dns/message.hpp b/src/dns/message.hpp index b12369091..32bb8ee01 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -21,13 +21,28 @@ namespace srouter Message() = default; explicit Message(const Question& question); + // Non-copyable; see clone() if you want a copy with just the questions. + Message(const Message&) = delete; + + Message(Message&&) = default; + + // Clones the message with question/flags/edns response data, but with no answers + Message clone() const; + nlohmann::json ToJSON() const; static constexpr auto DEFAULT_ANSWER_TTL = 10s; - void add_nx_reply(); + // These two clear any answers that may have been added and then set the appropriate + // flags for a NXDomain (i.e. authoritative reply that the requested thing does not + // exist) or a ServFail (i.e. we don't know how to answer, maybe try someone else). + void set_nx_reply(); + void set_serv_fail(); - void add_serv_fail(); + // This clears any answers and sets the appropriate header flags for a BADCOOKIE + // response. Note that this is only valid when the message has `additional_edns` as + // part of this error code value is carried in that additional RR data. + void set_badcookie_flags(); // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the @@ -43,9 +58,9 @@ namespace srouter void add_cname_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN A' reply containing the given ipv4 address - void add_reply(ipv4 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv4& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN AAAA' reply containing the given ipv6 address - void add_reply(ipv6 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv6& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); void add_reply(const SRVData& srv, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); @@ -53,19 +68,34 @@ namespace srouter void add_ptr_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); - size_t encode(std::span buf) const; std::vector encode() const; - static std::optional extract(std::span& buf); + // Parses a question Message from the given buf, removing the question from the prefix + // of buf. `server_cookie_secret` and `client_addr` contains information needed for DNS + // cookie handling; `server_cookie_secret` is something derived from the SR private key + // seed + startup time, while client_addr is the raw bytes of the IP address (4 or 16 + // bytes for IPv4/IPv6, respectively). + static std::optional extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_addr); std::string to_string() const; uint16_t hdr_id; uint16_t hdr_fields; + std::vector questions; - std::vector answers; - std::vector authorities; - std::vector additional; + std::vector> answers; + + // Currently unused: + // std::vector authorities; + // std::vector additional; + + // Currently the only additional record we do anything with is the OPT section for + // enabling EDNS (most significantly for allowing large DNS packets) + std::optional additional_edns; + std::optional rr_name_override; private: diff --git a/src/dns/question.cpp b/src/dns/question.cpp index 0473e6243..a72abef17 100644 --- a/src/dns/question.cpp +++ b/src/dns/question.cpp @@ -19,14 +19,10 @@ namespace srouter::dns throw std::invalid_argument{"qname cannot be empty"}; } - size_t Question::encode(std::span buf) const + void Question::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf; - if (!write_name_into(buf, qname)) - return 0; - if (!write_ints_into(buf, static_cast(qtype), static_cast(qclass))) - return 0; - return orig.size() - buf.size(); + encode_name(buf, qname, prev_names, buf_offset); + buf_offset += write_ints_into(buf, static_cast(qtype), static_cast(qclass)); } bool Question::extract(std::span& buf) diff --git a/src/dns/question.hpp b/src/dns/question.hpp index 1647b6a81..4efc35b5d 100644 --- a/src/dns/question.hpp +++ b/src/dns/question.hpp @@ -11,7 +11,7 @@ namespace srouter::dns Question() = default; Question(std::string name, RRType type); - size_t encode(std::span buf) const; + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; bool extract(std::span& buf); diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index fcd1070a8..b0c53a925 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -5,41 +5,129 @@ #include #include +#include +#include namespace srouter::dns { - ResourceRecord::ResourceRecord(std::string name, RRType type, std::vector data) - : rr_name{std::move(name)}, rr_type{type}, rr_class{RRClass::IN}, ttl{1s}, rData{std::move(data)} - {} + std::optional ParsedRR::extract(std::span& buf) + { + auto name = extract_name(buf); + if (!name || buf.size() < 2 + 2 + 4 + 2 /* type + class + ttl + rdatalen */) + return std::nullopt; + auto typ = oxenc::load_big_to_host(buf.data()); + auto cls = oxenc::load_big_to_host(buf.data() + 2); + auto ttl = oxenc::load_big_to_host(buf.data() + 4); + auto len = oxenc::load_big_to_host(buf.data() + 8); + buf = buf.subspan(10); + + if (buf.size() < len) + return std::nullopt; + + auto rdata = buf.subspan(0, len); + buf = buf.subspan(len); + + return ParsedRR{ + .name = std::move(*name), + .rr_type = static_cast(typ), + .rr_class = static_cast(cls), + .ttl = std::chrono::seconds{ttl}, + .rdata = rdata}; + } + + static void check_buf_size(const std::span& buf, size_t needed) + { + if (buf.size() < needed) + throw std::out_of_range{"DNS response exceeds max size"}; + } - size_t ResourceRecord::encode(std::span buf) const + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf.size(); - if (write_name_into(buf, rr_name) - && write_ints_into( - buf, - static_cast(rr_type), - static_cast(rr_class), - static_cast(ttl.count())) - && write_rdata_into(buf, rData)) - return orig - buf.size(); - return 0; + encode_name(buf, rr_name, prev_names, buf_offset); + buf_offset += write_ints_into( + buf, static_cast(rr_type()), static_cast(rr_class), static_cast(ttl.count())); + // The RR value is in a chunk with a 2-byte length in front of it. We don't actually know + // the length yet (especially for things like CNAME, where there might be name compression), + // so we're going to stick a 0 in and then come back and fill it in after we write the + // value. + check_buf_size(buf, 2); + auto size_buf = buf.subspan(0, 2); + buf_offset += 2; + buf = buf.subspan(2); + encode_data(buf, prev_names, buf_offset); + uint16_t size = buf.data() - size_buf.data() - 2; + oxenc::write_host_as_big(size, size_buf.data()); } nlohmann::json ResourceRecord::ToJSON() const { return nlohmann::json{ {"name", rr_name}, - {"type", static_cast(rr_type)}, + {"type", static_cast(rr_type())}, {"class", static_cast(rr_class)}, {"ttl", ttl.count()}, - {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}}; + /* FIXME: need to virtualize a display for the data, if we care about json representation: + {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}*/}; } std::string ResourceRecord::to_string() const { - return "RR:[ name:{} | type:{} | class:{} | ttl:{} | rdata-size:{} ]"_format( - rr_name, static_cast(rr_type), static_cast(rr_class), ttl, rData.size()); + return "RR:[name:{}|type:{}|class:{}|ttl:{}]"_format( + rr_name, static_cast(rr_type()), static_cast(rr_class), ttl); + } + + void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + check_buf_size(buf, rData.size()); + std::memcpy(buf.data(), rData.data(), rData.size()); + buf = buf.subspan(rData.size()); + buf_offset += rData.size(); + } + + RR_A::RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr) : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(4); + oxenc::write_host_as_big(addr.addr, rData.data()); + } + + RR_AAAA::RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr) + : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(16); + oxenc::write_host_as_big(addr.hi, rData.data()); + oxenc::write_host_as_big(addr.lo, rData.data() + 8); + } + + RR_TXT::RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value) + : RR_bytes{std::move(rr_name), ttl} + { + auto* bytes = reinterpret_cast(value.data()); + rData.assign(bytes, bytes + value.size()); + } + + void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + encode_name(buf, name, prev_names, buf_offset); + } + + void RR_SRV::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + buf_offset += write_ints_into(buf, priority, weight, port); + encode_name(buf, target, prev_names, buf_offset); + } + + void PRR_EDNS::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + if (cookie) + { + uint16_t datalen = 2 + 2 + cookie->size(); // code + length + data + check_buf_size(buf, datalen); + oxenc::write_host_as_big(OPT_COOKIE, buf.data()); + oxenc::write_host_as_big(static_cast(cookie->size()), buf.data() + 2); + std::memcpy(buf.data() + 4, cookie->data(), cookie->size()); + buf = buf.subspan(datalen); + buf_offset += datalen; + } } } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 798a624a4..85148f171 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -1,5 +1,8 @@ #pragma once +#include "encode.hpp" +#include "srv_data.hpp" + #include #include @@ -15,34 +18,164 @@ namespace srouter::dns enum class RRType : uint16_t { A = 1, - NS = 2, CNAME = 5, PTR = 12, - MX = 15, TXT = 16, AAAA = 28, SRV = 33, + + OPT = 41, + }; + + // Parsed RR data: this is intentionally very raw and is only for extracting the data, not + // interpreting it. Note that the rdata value points into the input buf: the ParsedRR data + // should not be held longer than the input buffer! + struct ParsedRR + { + std::string name; + RRType rr_type; // *Not* necessarily one of the values defined above + RRClass rr_class; // *Not* necessarily one of the values defined above + std::chrono::seconds ttl; + std::span rdata; + + // Attempts to parse an RR from the beginning of `buf`. `buf` will have the prefix removed + // containing the extracted record. Returns nullopt on extraction error. + static std::optional extract(std::span& buf); }; + // Abstract base class we use for building RR responses struct ResourceRecord { - ResourceRecord() = default; - explicit ResourceRecord(std::string name, RRType type, std::vector rdata); + ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} + + // Writes this RR to the beginning of buf, eliminating the written section from buf. Throws if buf is exceeded. + // + // This takes care of the basic stuff (name, type, class, ttl), then calls the virtual + // encode_data() to write the value. + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; - // Writes this RR to the beginning of buf. Returns the number of bytes written, or 0 if the - // buffer is too small to hold it. - size_t encode(std::span buf) const; + virtual void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const = 0; nlohmann::json ToJSON() const; std::string to_string() const; std::string rr_name; - RRType rr_type; - RRClass rr_class; + RRClass rr_class = RRClass::IN; std::chrono::seconds ttl; - std::vector rData; + + virtual RRType rr_type() const = 0; static constexpr bool to_string_formattable = true; }; + + // Subclass of ResourceRecord that just has a binary check of data. Should not be used for data + // types containing compressible names in the value. The subclass must take care of encoding + // the rData member value as required; this base class encode_data simply barfs it into the + // buffer as-is. + struct RR_bytes : ResourceRecord + { + std::vector rData; + + using ResourceRecord::ResourceRecord; + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_A : RR_bytes + { + RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr); + RRType rr_type() const override { return RRType::A; } + }; + struct RR_AAAA : RR_bytes + { + RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr); + RRType rr_type() const override { return RRType::AAAA; } + }; + struct RR_TXT : RR_bytes + { + RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value); + RRType rr_type() const override { return RRType::TXT; } + }; + + // Base class for RR types that have a single target name as the value, such as CNAME and PTR + struct RR_target : ResourceRecord + { + std::string name; + + RR_target(std::string rr_name, std::chrono::seconds ttl, std::string name) + : ResourceRecord{std::move(rr_name), ttl}, name{std::move(name)} + {} + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_PTR : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::A; } + }; + struct RR_CNAME : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::CNAME; } + }; + struct RR_SRV : ResourceRecord + { + uint16_t priority; + uint16_t weight; + uint16_t port; + std::string target; + + RR_SRV(std::string rr_name, std::chrono::seconds ttl, const SRVData& srv) + : ResourceRecord{std::move(rr_name), ttl}, + priority{srv.priority}, + weight{srv.weight}, + port{srv.port}, + target{srv.target} + {} + + RRType rr_type() const override { return RRType::SRV; } + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + // Psuedo-RR for EDNS; a client sends this in the additional section if it supports EDNS, and + // the server sends it back (if provided) to confirm that the server also supports EDNS. + struct PRR_EDNS : ResourceRecord + { + static constexpr uint16_t OPT_COOKIE = 10; + static constexpr uint16_t EXT_RCODE_BADCOOKIE = 23; + + std::optional> cookie; + + // Will be true if the full cookie we were provided was invalid or expired, in which case we + // are supposed to immediately fail with an extended BADCOOKIE error code (which will be + // encoded if this object is encoded into the output with this bool set to true). + bool bad_cookie{false}; + + // Constructs an EDNS value. This is rather hacky, to try to mash it into the fairly + // inflexible older DNS protocol: + // - NAME is always empty (i.e. ".", the root domain) + // - 32-bit TTL is nothing to do with ttl, but actually 3 packed fields: + // - 8-bit "extended rcode" + // - 8-bit version (currently 0) + // - 16-bit flags of which there is one for DNSSEC and all others are reserved + // We currently always use 0 as we don't use extended rcode or dnssec. + // - CLASS isn't a class at all but rather contains the supported UDP payload size. We set + // it to the recommended 1232 size, but if a client gave us a smaller value we should + // reflect that instead. + // + // Beyond that, we support an optional DNS server cookie value (see RFC 7873 and 9018), + // which must be the 8-byte cookie sent by the client followed by a 16 byte server cookie. + PRR_EDNS(uint16_t max_payload, std::optional> cookie = std::nullopt) + : ResourceRecord{"", 0s}, cookie{std::move(cookie)} + { + rr_class = static_cast(max_payload); + } + + uint16_t max_payload() const { return static_cast(rr_class); } + constexpr RRType rr_type() const override { return RRType::OPT; } + void encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const override; + }; + } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp index 2395538fb..721ec9724 100644 --- a/src/dns/server.cpp +++ b/src/dns/server.cpp @@ -2,6 +2,7 @@ #include "constants/apple.hpp" #include "constants/platform.hpp" +#include "dns.hpp" #include "message.hpp" #include "nm_platform.hpp" #include "sd_platform.hpp" @@ -9,6 +10,7 @@ #include #include #include +#include #include #include @@ -22,8 +24,8 @@ namespace srouter::dns void QueryJob_Base::cancel() { - Message reply{_query}; - reply.add_serv_fail(); + Message reply = _query.clone(); + reply.set_serv_fail(); send_reply(reply.encode()); } @@ -416,7 +418,7 @@ namespace srouter::dns const quic::Address& from) override { log::trace(logcat, "maybe_hook_dns called"); - auto tmp = std::make_shared(weak_from_this(), query, source, to, from); + auto tmp = std::make_shared(weak_from_this(), query.clone(), source, to, from); // no questions, send fail if (query.questions.empty()) { @@ -522,7 +524,9 @@ namespace srouter::dns Server::Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif) : _loop{loop}, _conf{std::move(conf)}, _platform{create_platform()}, m_NetIfIndex{std::move(netif)} - {} + { + randombytes_buf(_cookie_secret.data(), _cookie_secret.size()); + } std::vector> Server::get_all_resolvers() const { @@ -656,14 +660,38 @@ namespace srouter::dns return false; } - auto maybe = Message::extract(payload); + std::span client_ip; + if (from.is_ipv4()) + client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; + else + client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; + + auto maybe = Message::extract_question(payload, _cookie_secret, client_ip); if (not maybe) { log::warning(logcat, "invalid dns message format from {} to dns listener on {}", from, to); return false; } - auto& msg = *maybe; + + if (msg.additional_edns && msg.additional_edns->bad_cookie) + { + // Client gave a bad cookie; reply with a request failure, but one containing the new + // cookie so that the client can retry. + + // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS + // value. + msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; + // TODO FIXME: we currently always set the RA flag but that really should only be set + // when we have an upstream DNS server. (This TODO is also in message.cpp) + msg.hdr_fields |= flags_QR | flags_RA; + // badcookie is not an authoritative answer: + msg.hdr_fields &= ~flags_AA; + + ptr->send_udp(from, to, msg.encode()); + return true; + } + // we don't provide a DoH resolver because it requires verified TLS // TLS needs X509/ASN.1-DER and opting into the Root CA Cabal // thankfully mozilla added a backdoor that allows ISPs to turn it off @@ -675,7 +703,7 @@ namespace srouter::dns if (q.name() == "use-application-dns.net") { // yea it is, let's turn off DoH because god is dead. - msg.add_nx_reply(); + msg.set_nx_reply(); // press F to pay respects and send it back where it came from ptr->send_udp(from, to, msg.encode()); return true; diff --git a/src/dns/server.hpp b/src/dns/server.hpp index a01b90757..4ba6e43a9 100644 --- a/src/dns/server.hpp +++ b/src/dns/server.hpp @@ -68,11 +68,8 @@ namespace srouter::dns public: explicit QueryJob( - std::shared_ptr source, - const Message& query, - const quic::Address& to_, - const quic::Address& from_) - : QueryJob_Base{query}, src{std::move(source)}, resolver{to_}, asker{from_} + std::shared_ptr source, Message query, const quic::Address& to_, const quic::Address& from_) + : QueryJob_Base{std::move(query)}, src{std::move(source)}, resolver{to_}, asker{from_} {} void send_reply(std::vector buf) override { src->send_udp(asker, resolver, buf); } @@ -185,6 +182,12 @@ namespace srouter::dns private: const unsigned int m_NetIfIndex; + + // Secret value we use as a key in DNS server cookie hashing. We generate a random once on + // each startup as we currently have no need for this to be deterministic, and that + // introduces rotation whenever we restart. + std::array _cookie_secret; + // TODO FIXME: this ownership model is cursed. std::set, ComparePtr>> _owned_resolvers; std::set, CompareWeakPtr> _resolvers; diff --git a/src/handlers/tun.cpp b/src/handlers/tun.cpp index c33612ac5..76d119a80 100644 --- a/src/handlers/tun.cpp +++ b/src/handlers/tun.cpp @@ -36,8 +36,8 @@ namespace srouter::handlers if (not should_hook_dns_message(query)) return false; - auto job = std::make_shared(source, query, to, from); - if (!handle_hooked_dns_message(query, [job](dns::Message msg) { job->send_reply(msg.encode()); })) + auto job = std::make_shared(source, query.clone(), to, from); + if (!handle_hooked_dns_message(query.clone(), [job](dns::Message msg) { job->send_reply(msg.encode()); })) job->cancel(); return true; } @@ -336,8 +336,8 @@ namespace srouter::handlers static dns::Message& clear_dns_message(dns::Message& msg) { - msg.authorities.clear(); - msg.additional.clear(); + // msg.authorities.clear(); + // msg.additional.clear(); msg.answers.clear(); msg.hdr_fields &= ~dns::flags_RCODENxDomain; return msg; @@ -451,7 +451,7 @@ namespace srouter::handlers } else { - msg.add_nx_reply(); + msg.set_nx_reply(); reply(std::move(msg)); return true; } @@ -472,11 +472,12 @@ namespace srouter::handlers lookup, sub = std::move(sub), reply = std::move(reply), - msg = std::move(msg), + msg_ptr = std::make_shared(std::move(msg)), cname_only = q.qtype == dns::RRType::CNAME]( std::optional maybe_netaddr, bool assertive, std::chrono::milliseconds ttl) mutable { + auto& msg = *msg_ptr; msg.set_rr_name(lookup); if (maybe_netaddr) { @@ -494,7 +495,7 @@ namespace srouter::handlers { // We got an assertive "does not exist" message (and not just a failure // or timeout), so add the nx reply - msg.add_nx_reply(); + msg.set_nx_reply(); // FIXME: we should be able to provide a TTL here } else @@ -505,7 +506,7 @@ namespace srouter::handlers // server). assert(!assertive); // FIXME: should be able to specify a TTL here - msg.add_nx_reply(); + msg.set_nx_reply(); } reply(std::move(msg)); }); @@ -530,11 +531,11 @@ namespace srouter::handlers fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); } else - msg.add_nx_reply(); + msg.set_nx_reply(); } else - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } @@ -576,15 +577,15 @@ namespace srouter::handlers // "this record does not exist"). } else - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } // Otherwise it's some query type we don't support, so return does-not-exist. - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } @@ -605,9 +606,9 @@ namespace srouter::handlers *ip); if (!found) - msg.add_nx_reply(); + msg.set_nx_reply(); - reply(msg); + reply(std::move(msg)); return true; } @@ -618,25 +619,25 @@ namespace srouter::handlers { _router.session_endpoint().lookup_client_intro( *rid, - [msg = std::move(msg), sub, reply = std::move(reply)]( + [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply)]( const std::optional& cc) mutable { if (cc) { for (const auto& srv : cc->SRVs()) if (srv.service == sub[0] && srv.proto == sub[1]) - msg.add_reply(srv); + msg->add_reply(srv); } else - msg.add_nx_reply(); + msg->set_nx_reply(); - reply(msg); + reply(std::move(*msg)); }); return true; } } - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } diff --git a/src/rpc/rpc_server.cpp b/src/rpc/rpc_server.cpp index 9f19c073a..c70079969 100644 --- a/src/rpc/rpc_server.cpp +++ b/src/rpc/rpc_server.cpp @@ -28,6 +28,7 @@ namespace srouter::rpc log::info(logcat, "RPC Server received request for endpoint `{}`", req.name); } +#if 0 // Fake packet source that serializes repsonses back into dns class DummyPacketSource final : public dns::PacketSource { @@ -47,6 +48,7 @@ namespace srouter::rpc /// returns the sockaddr we are bound on if applicable std::optional bound_on() const override { return std::nullopt; } }; +#endif bool check_path(std::string path) {