From 7473acf0d431ca53367ff1eb84a1a6daf915a6cd Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Sat, 8 Nov 2025 02:29:43 -0400 Subject: [PATCH 1/4] Implement DNS name compression This implements DNS name compression (see the decription added in dns/encode.cpp `encode_name()` for details). This reduces the sizes of nearly all query responses (because every response answer includes the question, and then uses the name again for the response), and in some cases significantly so. This is rather important for Session Router all of our names are 52 byte pubkeys (plus the 4 or 5 byte tld), and so we are potentially running up against the DNS 512-byte max message size. (We should also enable EDNS to allow longer messages, but that is left here as a FIXME and not yet implemented). An example shows how the compression helps: For example, an AAAA query for `localhost.sesh` responds with an answer of: ;; QUESTION SECTION: ;localhost.sesh. IN AAAA ;; ANSWER SECTION: localhost.sesh. 10 IN CNAME sh6tnpf84s885m8ygsjw7g8qjuo1jk7ydufiog8sjdtgkhb3w8iy.sesh. sh6tnpf84s885m8ygsjw7g8qjuo1jk7ydufiog8sjdtgkhb3w8iy.sesh. 10 IN AAAA fd2e:7365:7368::1 The repeated question doesn't compress anything, of course, but once you hit the answer, you start getting savings: The repeated `localhost.sesh` in the first answer gets compressed from 16 bytes (without compression) to a 2 byte pointer (back to the same address in the question). The first PUBKEY.sesh (in the CNAME target) gets slightly reduced by being able to encode the trailing `sesh` from 6 bytes uncompressed (4+"sesh"+\0) to a 2-byte pointer (again back into the question, pointing just at the sesh tld rather than the entire lokinet.sesh value). The pubkey.sesh. in the second answers gets hugely reduced: an uncompressed 59 bytes (52+"pubkey"+4+"sesh"+0) becomes a simple 2-byte pointer to the same name in the previous answer line. For some queries like SRV records the savings are even potentially even larger, especially when there are multiple SRV entries for a .sesh address. --- src/dns/encode.cpp | 85 ++++++++++++++---- src/dns/encode.hpp | 67 +++++++------- src/dns/message.cpp | 208 ++++++++++++++++--------------------------- src/dns/message.hpp | 30 +++++-- src/dns/question.cpp | 10 +-- src/dns/question.hpp | 2 +- src/dns/rr.cpp | 83 +++++++++++++---- src/dns/rr.hpp | 94 ++++++++++++++++--- src/dns/server.cpp | 8 +- src/dns/server.hpp | 7 +- src/handlers/tun.cpp | 47 +++++----- 11 files changed, 379 insertions(+), 262 deletions(-) diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 5318d72e8..7923d26fe 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -46,36 +46,85 @@ namespace srouter::dns return name; } - size_t encode_name(std::span buf, std::string_view name) + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset) { - auto orig = buf.size(); if (name.size() && name.back() == '.') name.remove_suffix(1); - for (auto part : srouter::split(name, ".")) + // Look for a previously used suffix of this name. For instance, if we have a response + // consisting of: + // + // localhost.sesh IN CNAME mylongpubkey.sesh + // foo.mylongpubkey.sesh IN AAAA 1:2:3::4 + // + // then when we repeat the question itself (IN AAAA localhost.sesh) we echo that question + // back into the response as the 16 bytes: + // \x09localhost\x04sesh\x00 + // Suppose that this was written at location Z in the DNS message, this creates two + // pointable addresses: + // - "localhost.sesh" -> Z + // - "sesh" -> Z+10 + // + // Then we come to the answers, and for the first "localhost.sesh" value, we can simply + // write that as a single pointer [Z] (where the pointer is a 16-bit, big-endian value with + // the highest two bits set and the remaining 14 bits set to "Z"). + // + // Then we get to "mylongpubkey.sesh" and we can encode that as: + // + // \x34mylongpubkey[pointer to Z+10] + // + // This also creates a new pointable address: + // - "mylongpubkey.sesh" -> Y + // + // Then we come to foo.mylongpubkey.sesh and we can encode this as: + // + // - \x03foo[pointer to Y] + // + // i.e. we only need 6 bytes for this address instead of 1+3+1+52+1+4+1=63 bytes that we + // would need for the uncompressed version. + // + // Although this compression is optional, given how frequently we reuse long session router + // names (particularly for something like SRV records where a name can be repeated multiple + // times), and the DNS response size limit of 512 bytes, we implement that here. + + size_t pos = 0; + do { + std::string_view check = name.substr(pos); + if (auto it = prev_names.find(check); it != prev_names.end()) + { + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + uint16_t ptr = uint16_t{0b11000000'00000000} | it->second; + oxenc::write_host_as_big(ptr, buf.data()); + buf = buf.subspan(2); + buf_offset += 2; + // A pointer is terminal (i.e. no nullptr to add), so we're done. + return; + } + + auto next = name.find('.', pos + 1); + auto part = next == std::string_view::npos ? check : name.substr(pos, next - pos); + size_t l = part.size(); if (l > 63 || l >= buf.size()) - return false; - buf.front() = static_cast(l); + throw std::out_of_range{"Buffer too small"}; + buf.front() = static_cast(l); // Length prefix std::memcpy(buf.data() + 1, part.data(), part.size()); + prev_names.emplace(std::string{check}, static_cast(buf_offset)); buf = buf.subspan(1 + part.size()); - } + buf_offset += 1 + part.size(); + + pos = next == std::string_view::npos ? next : next + 1; + } while (pos != std::string_view::npos); + + // If we get here we wrote all the pieces without pointing at anything, so we need to append + // a null byte to terminate the name: if (buf.empty()) - return false; + throw std::out_of_range{"Buffer too small"}; buf.front() = std::byte{0}; buf = buf.subspan(1); - return orig - buf.size(); - } - - bool write_name_into(std::span& buf, std::string_view name) - { - if (auto s = encode_name(buf, name)) - { - buf = buf.subspan(s); - return true; - } - return false; + buf_offset++; } std::optional> decode_ptr(std::string_view name) diff --git a/src/dns/encode.hpp b/src/dns/encode.hpp index e0d5b7e4a..46ddd4b9e 100644 --- a/src/dns/encode.hpp +++ b/src/dns/encode.hpp @@ -6,45 +6,59 @@ #include #include +#include #include #include namespace srouter::dns { - /// Writes the encoded version of DNS name `name` into buf, and returns how many bytes of buf - /// were written. If buf is too small to store the encoded name, returns 0. - size_t encode_name(std::span buf, std::string_view name); + // Custom hasher to let us look up a string_view key in a string-keyed unordered map: + struct transparent_string_hash + { + using is_transparent = void; + [[nodiscard]] size_t operator()(std::string_view txt) const { return std::hash{}(txt); } + }; + + using prev_names_t = std::unordered_map>; - /// Same as encode_name, except that instead of returning the written size, on success it mutates the span - /// to drop the written prefix. Returns true (and prefix-drops the written part of the span) on success, - /// false on failure. Note that the failure case can still partially write into span. - bool write_name_into(std::span& buf, std::string_view name); + /// Writes the encoded version of DNS name `name` into buf, mutating buf to eliminate the + /// written bytes. Throws if buf is too small to store the encoded name. + /// + /// prev_names contains pointer values relative to the start of the message, used for name + /// compression, and buf_offset contains the relative positive of the beginning of buf to the + /// start of the message. New names added here should be added into it so that later repeated + /// names (or name suffixes) can use compression. + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset); /// decode name from buffer, mutating the buffer to begin just past the extracted name. Return - /// nullopt (without mutating buf) on failure. + /// nullopt (without mutating buf) on failure. Does not currently support compressed names (but + /// those are not typically used in questions). std::optional extract_name(std::span& buf); /// Encodes an integer in big-endian order into the buffer, mutating the span to start just - /// after the written integer. Returns true on success, false if the span was too small. + /// after the written integer. Throws if buf is too small. Returns sizeof(T) (i.e. the amount + /// written into the buffer), for convenience. template - bool write_int_into(std::span& buf, T value) + size_t write_int_into(std::span& buf, T value) { if (buf.size() < sizeof(T)) - return false; + throw std::out_of_range{"Buffer too small"}; oxenc::write_host_as_big(value, buf.data()); buf = buf.subspan(sizeof(T)); - return true; + return sizeof(T); } - // Calls write_int_info multiple times with the given integers. Returns true (and modifies buf) - // if all success. If any fail then false is returned and buf is left unchanged. + // Calls write_int_info multiple times with the given integers. Throws if the buffer is too + // small. Returns the total size of the given integers (i.e. the number of bytes written to + // buf), for convenience. template - bool write_ints_into(std::span& buf, T... values) + size_t write_ints_into(std::span& buf, T... values) { - if (buf.size() < (0 + ... + sizeof(T))) - return false; + // NB: it's tempting to want to use `return (0 + ... + write_int_into())` here, but + // left-to-right evaluation of + operands isn't guaranteed, and that could put things into + // buf in the wrong order. With , as used here it is guaranteed (similarly to || or &&). ((void)write_int_into(buf, values), ...); - return true; + return (0 + ... + sizeof(T)); } /// Extracts a big-endian integer of the given type from the buffer, mutating the span to start @@ -72,23 +86,6 @@ namespace srouter::dns return true; } - // Takes some object T with an `size_t encode(buf)` function (such as various classes in this - // dns code) and attempts to call it with the given buffer. If it returns success (non-0) then - // this mutates `buf` to skip the written data and returns true; on failure it returns false. - template - bool encode_into(std::span& buf, const T& thing) - { - if (auto written = thing.encode(buf)) - { - buf = buf.subspan(written); - return true; - } - return false; - } - - // Writes encoded rr data into buf, mutating buf to point beyond the written data. Returns - // false (without mutating buf) if buf is too short; true on success. - bool write_rdata_into(std::span& buf, std::span rdata); // Extracts encoded rr data from buf, mutating buf to point beyond the extracted data. Returns // nullopt (without mutating buf) on error, the vector of decoded data on success. std::optional> extract_rdata(std::span& buf); diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ac699b738..ef341a4c0 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -10,6 +10,7 @@ #include #include +#include namespace srouter::dns { @@ -17,28 +18,59 @@ namespace srouter::dns Message::Message(const Question& question) : hdr_id{0}, hdr_fields{} { questions.push_back(question); } - size_t Message::encode(std::span buf) const + Message Message::clone() const { - auto orig = buf.size(); - if (!write_ints_into( - buf, - hdr_id, - hdr_fields, - static_cast(questions.size()), - static_cast(answers.size()), - static_cast(authorities.size()), - static_cast(additional.size()))) - return 0; + Message c; + c.hdr_id = hdr_id; + c.hdr_fields = hdr_fields; + c.questions = questions; + // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) + return c; + } + + std::vector Message::encode() const + { + // TODO FIXME: We currently aren't respect the EDNS bit, and that means our maximum message + // size is 512 bytes. We should support EDNS (by checking and setting the appropriate flag + // in `additional`), in which case 1232 becomes the (practical) maximum. + // + // Basically: + // - if the client supports EDNS it sets the size in an additional flag + // - we can then go up to whichever of that size or 1232 is smaller. + // - we set the pseudo-RR in the additional flags section of the response. + + std::vector tmp; + tmp.resize(512); + + prev_names_t prev_names; + std::span buf{tmp}; + uint16_t buf_offset = 0; + + buf_offset += write_ints_into( + buf, + hdr_id, + hdr_fields, + static_cast(questions.size()), + static_cast(answers.size()), + static_cast(0 /*authorities.size()*/), + static_cast(0 /*additional.size()*/)); + + // if (auto written = thing.encode(buf)) + //{ + // buf = buf.subspan(written); + // return true; + // } for (const auto& question : questions) - if (!encode_into(buf, question)) - return 0; + question.encode(buf, prev_names, buf_offset); for (auto& a : answers) - if (!encode_into(buf, a)) - return 0; + a->encode(buf, prev_names, buf_offset); + + // Trim the excess: + tmp.resize(tmp.size() - buf.size()); - return orig - buf.size(); + return tmp; } std::optional Message::extract(std::span& buf) @@ -66,9 +98,6 @@ namespace srouter::dns return maybe; } } - for (auto* as : {&m.answers, &m.authorities, &m.additional}) - if (!as->empty()) - log::debug(logcat, "Ignoring answer/authorities/additional sections in dns Message"); return maybe; } @@ -81,138 +110,56 @@ namespace srouter::dns for (const auto& q : questions) ques.push_back(q.ToJSON()); for (const auto& a : answers) - ans.push_back(a.ToJSON()); + ans.push_back(a->ToJSON()); return result; } - std::vector Message::encode() const - { - std::vector tmp; - tmp.resize(1500); - auto size = encode(tmp); - if (size == 0) - throw std::runtime_error("cannot encode dns message"); - tmp.resize(size); - return tmp; - } - - void Message::add_serv_fail() - { - if (questions.size()) - { - hdr_fields |= flags_RCODEServFail; - // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - } - } + void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; - void Message::add_reply(ipv4 addr, std::chrono::seconds ttl) - { - std::vector a; - a.resize(4); - oxenc::write_host_as_big(addr.addr, a.data()); - add_reply(RRClass::IN, RRType::A, std::move(a), ttl); - } - - void Message::add_reply(ipv6 addr, std::chrono::seconds ttl) + void Message::add_nodata_reply() { - std::vector aaaa; - aaaa.resize(16); - oxenc::write_host_as_big(addr.hi, aaaa.data()); - oxenc::write_host_as_big(addr.lo, aaaa.data() + 8); - return add_reply(RRClass::IN, RRType::AAAA, std::move(aaaa), ttl); + if (not questions.empty()) + hdr_fields |= reply_flags; } - void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } - - void Message::add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl) + template RR, typename... Args> + void make_reply(Message& m, std::chrono::seconds ttl, Args&&... args) { - if (questions.empty()) + if (m.questions.empty()) return; - hdr_fields |= reply_flags; + m.hdr_fields |= reply_flags; - auto& ans = answers.emplace_back(); - ans.rr_name = get_rr_name(); - ans.rr_type = type; - ans.rr_class = cls; - ans.ttl = ttl; - ans.rData = std::move(data); + m.answers.push_back(std::make_unique(std::string{m.get_rr_name()}, ttl, std::forward(args)...)); } - void Message::add_nodata_reply() - { - if (not questions.empty()) - hdr_fields |= reply_flags; - } + void Message::add_reply(const ipv4& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } + + void Message::add_reply(const ipv6& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } void Message::add_cname_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::CNAME, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode CNAME value {}", name); + make_reply(*this, ttl, std::string{name}); } void Message::add_ptr_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::PTR, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode PTR value {}", name); - } - - void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) - { - std::array tmp; - std::span remaining{tmp}; - if (!write_ints_into(remaining, srv.priority, srv.weight, srv.port)) - return; - if (!write_name_into(remaining, srv.target)) - return; - - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); + make_reply(*this, ttl, std::string{name}); } - void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) - { - std::array tmp; - std::span remaining{tmp}; - while (!txt.empty()) - { - auto piecelen = std::min(txt.size(), size_t{255}); - if (remaining.size() <= piecelen) - throw std::length_error{"TXT record too big"}; - remaining.front() = static_cast(piecelen); - std::memcpy(remaining.data() + 1, txt.data(), piecelen); - txt.remove_prefix(piecelen); - remaining = remaining.subspan(1 + piecelen); - } + void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) { make_reply(*this, ttl, srv); } - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); - } + void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) { make_reply(*this, ttl, txt); } - void Message::add_nx_reply() + void Message::set_nx_reply() { if (questions.size()) { answers.clear(); - authorities.clear(); - additional.clear(); + // authorities.clear(); + // additional.clear(); // authorative response with recursion available hdr_fields |= reply_flags; @@ -222,17 +169,16 @@ namespace srouter::dns } } - std::string Message::to_string() const + void Message::set_serv_fail() { - return fmt::format( - "[DNSMessage id={:x} fields={:x} questions={{{}}} answers={{{}}} authorities={{{}}} " - "additional={{{}}}]", - hdr_id, - hdr_fields, - fmt::join(questions, ","), - fmt::join(answers, ","), - fmt::join(authorities, ","), - fmt::join(additional, ",")); + if (questions.size()) + { + hdr_fields |= flags_RCODEServFail; + // authorative response with recursion available + hdr_fields |= flags_QR | flags_AA | flags_RA; + // don't allow recursion on this request + hdr_fields &= ~flags_RD; + } } } // namespace srouter::dns diff --git a/src/dns/message.hpp b/src/dns/message.hpp index b12369091..0d45c1537 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -21,13 +21,23 @@ namespace srouter Message() = default; explicit Message(const Question& question); + // Non-copyable; see clone() if you want a copy with just the questions. + Message(const Message&) = delete; + + Message(Message&&) = default; + + // Clones the message with question/flag, but with no answers + Message clone() const; + nlohmann::json ToJSON() const; static constexpr auto DEFAULT_ANSWER_TTL = 10s; - void add_nx_reply(); - - void add_serv_fail(); + // These two clear any answers that may have been added and then set the appropriate + // flags for a NXDomain (i.e. authoritative reply that the requested thing does not + // exist) or a ServFail (i.e. we don't know how to answer, maybe try someone else). + void set_nx_reply(); + void set_serv_fail(); // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the @@ -43,9 +53,9 @@ namespace srouter void add_cname_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN A' reply containing the given ipv4 address - void add_reply(ipv4 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv4& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN AAAA' reply containing the given ipv6 address - void add_reply(ipv6 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv6& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); void add_reply(const SRVData& srv, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); @@ -53,7 +63,6 @@ namespace srouter void add_ptr_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); - size_t encode(std::span buf) const; std::vector encode() const; static std::optional extract(std::span& buf); @@ -62,10 +71,13 @@ namespace srouter uint16_t hdr_id; uint16_t hdr_fields; + std::vector questions; - std::vector answers; - std::vector authorities; - std::vector additional; + std::vector> answers; + + // Currently unused: + // std::vector authorities; + // std::vector additional; std::optional rr_name_override; private: diff --git a/src/dns/question.cpp b/src/dns/question.cpp index 0473e6243..a72abef17 100644 --- a/src/dns/question.cpp +++ b/src/dns/question.cpp @@ -19,14 +19,10 @@ namespace srouter::dns throw std::invalid_argument{"qname cannot be empty"}; } - size_t Question::encode(std::span buf) const + void Question::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf; - if (!write_name_into(buf, qname)) - return 0; - if (!write_ints_into(buf, static_cast(qtype), static_cast(qclass))) - return 0; - return orig.size() - buf.size(); + encode_name(buf, qname, prev_names, buf_offset); + buf_offset += write_ints_into(buf, static_cast(qtype), static_cast(qclass)); } bool Question::extract(std::span& buf) diff --git a/src/dns/question.hpp b/src/dns/question.hpp index 1647b6a81..4efc35b5d 100644 --- a/src/dns/question.hpp +++ b/src/dns/question.hpp @@ -11,7 +11,7 @@ namespace srouter::dns Question() = default; Question(std::string name, RRType type); - size_t encode(std::span buf) const; + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; bool extract(std::span& buf); diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index fcd1070a8..6dacef373 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -6,40 +6,85 @@ #include #include +#include + namespace srouter::dns { - ResourceRecord::ResourceRecord(std::string name, RRType type, std::vector data) - : rr_name{std::move(name)}, rr_type{type}, rr_class{RRClass::IN}, ttl{1s}, rData{std::move(data)} - {} - - size_t ResourceRecord::encode(std::span buf) const + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf.size(); - if (write_name_into(buf, rr_name) - && write_ints_into( - buf, - static_cast(rr_type), - static_cast(rr_class), - static_cast(ttl.count())) - && write_rdata_into(buf, rData)) - return orig - buf.size(); - return 0; + encode_name(buf, rr_name, prev_names, buf_offset); + buf_offset += write_ints_into( + buf, static_cast(rr_type()), static_cast(rr_class), static_cast(ttl.count())); + // The RR value is in a chunk with a 2-byte length in front of it. We don't actually know + // the length yet (especially for things like CNAME, where there might be name compression), + // so we're going to stick a 0 in and then come back and fill it in after we write the + // value. + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + auto size_buf = buf.subspan(0, 2); + buf_offset += 2; + buf = buf.subspan(2); + encode_data(buf, prev_names, buf_offset); + uint16_t size = buf.data() - size_buf.data() - 2; + oxenc::write_host_as_big(size, size_buf.data()); } nlohmann::json ResourceRecord::ToJSON() const { return nlohmann::json{ {"name", rr_name}, - {"type", static_cast(rr_type)}, + {"type", static_cast(rr_type())}, {"class", static_cast(rr_class)}, {"ttl", ttl.count()}, - {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}}; + /* FIXME: need to virtualize a display for the data, if we care about json representation: + {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}*/}; } std::string ResourceRecord::to_string() const { - return "RR:[ name:{} | type:{} | class:{} | ttl:{} | rdata-size:{} ]"_format( - rr_name, static_cast(rr_type), static_cast(rr_class), ttl, rData.size()); + return "RR:[name:{}|type:{}|class:{}|ttl:{}]"_format( + rr_name, static_cast(rr_type()), static_cast(rr_class), ttl); + } + + void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + if (rData.size() > buf.size()) + throw std::out_of_range{"Buffer too small"}; + std::memcpy(buf.data(), rData.data(), rData.size()); + buf = buf.subspan(rData.size()); + buf_offset += rData.size(); + } + + RR_A::RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr) : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(4); + oxenc::write_host_as_big(addr.addr, rData.data()); + } + + RR_AAAA::RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr) + : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(16); + oxenc::write_host_as_big(addr.hi, rData.data()); + oxenc::write_host_as_big(addr.lo, rData.data() + 8); + } + + RR_TXT::RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value) + : RR_bytes{std::move(rr_name), ttl} + { + auto* bytes = reinterpret_cast(value.data()); + rData.assign(bytes, bytes + value.size()); + } + + void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + encode_name(buf, name, prev_names, buf_offset); + } + + void RR_SRV::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + buf_offset += write_ints_into(buf, priority, weight, port); + encode_name(buf, target, prev_names, buf_offset); } } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 798a624a4..dd9343726 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -1,5 +1,8 @@ #pragma once +#include "encode.hpp" +#include "srv_data.hpp" + #include #include @@ -15,10 +18,8 @@ namespace srouter::dns enum class RRType : uint16_t { A = 1, - NS = 2, CNAME = 5, PTR = 12, - MX = 15, TXT = 16, AAAA = 28, SRV = 33, @@ -26,23 +27,96 @@ namespace srouter::dns struct ResourceRecord { - ResourceRecord() = default; - explicit ResourceRecord(std::string name, RRType type, std::vector rdata); + ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} + + // Writes this RR to the beginning of buf, eliminating the written section from buf. Throws if buf is exceeded. + // + // This takes care of the basic stuff (name, type, class, ttl), then calls the virtual + // encode_data() to write the value. + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; - // Writes this RR to the beginning of buf. Returns the number of bytes written, or 0 if the - // buffer is too small to hold it. - size_t encode(std::span buf) const; + virtual void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const = 0; nlohmann::json ToJSON() const; std::string to_string() const; std::string rr_name; - RRType rr_type; - RRClass rr_class; + RRClass rr_class = RRClass::IN; std::chrono::seconds ttl; - std::vector rData; + + virtual RRType rr_type() const = 0; static constexpr bool to_string_formattable = true; }; + + // Subclass of ResourceRecord that just has a binary check of data. Should not be used for data + // types containing compressible names in the value. The subclass must take care of encoding + // the rData member value as required; this base class encode_data simply barfs it into the + // buffer as-is. + struct RR_bytes : ResourceRecord + { + std::vector rData; + + using ResourceRecord::ResourceRecord; + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_A : RR_bytes + { + RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr); + RRType rr_type() const override { return RRType::A; } + }; + struct RR_AAAA : RR_bytes + { + RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr); + RRType rr_type() const override { return RRType::AAAA; } + }; + struct RR_TXT : RR_bytes + { + RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value); + RRType rr_type() const override { return RRType::TXT; } + }; + + // Base class for RR types that have a single target name as the value, such as CNAME and PTR + struct RR_target : ResourceRecord + { + std::string name; + + RR_target(std::string rr_name, std::chrono::seconds ttl, std::string name) + : ResourceRecord{std::move(rr_name), ttl}, name{std::move(name)} + {} + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_PTR : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::A; } + }; + struct RR_CNAME : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::CNAME; } + }; + struct RR_SRV : ResourceRecord + { + uint16_t priority; + uint16_t weight; + uint16_t port; + std::string target; + + RR_SRV(std::string rr_name, std::chrono::seconds ttl, const SRVData& srv) + : ResourceRecord{std::move(rr_name), ttl}, + priority{srv.priority}, + weight{srv.weight}, + port{srv.port}, + target{srv.target} + {} + + RRType rr_type() const override { return RRType::SRV; } + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp index 2395538fb..7f34b67ac 100644 --- a/src/dns/server.cpp +++ b/src/dns/server.cpp @@ -22,8 +22,8 @@ namespace srouter::dns void QueryJob_Base::cancel() { - Message reply{_query}; - reply.add_serv_fail(); + Message reply = _query.clone(); + reply.set_serv_fail(); send_reply(reply.encode()); } @@ -416,7 +416,7 @@ namespace srouter::dns const quic::Address& from) override { log::trace(logcat, "maybe_hook_dns called"); - auto tmp = std::make_shared(weak_from_this(), query, source, to, from); + auto tmp = std::make_shared(weak_from_this(), query.clone(), source, to, from); // no questions, send fail if (query.questions.empty()) { @@ -675,7 +675,7 @@ namespace srouter::dns if (q.name() == "use-application-dns.net") { // yea it is, let's turn off DoH because god is dead. - msg.add_nx_reply(); + msg.set_nx_reply(); // press F to pay respects and send it back where it came from ptr->send_udp(from, to, msg.encode()); return true; diff --git a/src/dns/server.hpp b/src/dns/server.hpp index a01b90757..7b224a24b 100644 --- a/src/dns/server.hpp +++ b/src/dns/server.hpp @@ -68,11 +68,8 @@ namespace srouter::dns public: explicit QueryJob( - std::shared_ptr source, - const Message& query, - const quic::Address& to_, - const quic::Address& from_) - : QueryJob_Base{query}, src{std::move(source)}, resolver{to_}, asker{from_} + std::shared_ptr source, Message query, const quic::Address& to_, const quic::Address& from_) + : QueryJob_Base{std::move(query)}, src{std::move(source)}, resolver{to_}, asker{from_} {} void send_reply(std::vector buf) override { src->send_udp(asker, resolver, buf); } diff --git a/src/handlers/tun.cpp b/src/handlers/tun.cpp index c33612ac5..76d119a80 100644 --- a/src/handlers/tun.cpp +++ b/src/handlers/tun.cpp @@ -36,8 +36,8 @@ namespace srouter::handlers if (not should_hook_dns_message(query)) return false; - auto job = std::make_shared(source, query, to, from); - if (!handle_hooked_dns_message(query, [job](dns::Message msg) { job->send_reply(msg.encode()); })) + auto job = std::make_shared(source, query.clone(), to, from); + if (!handle_hooked_dns_message(query.clone(), [job](dns::Message msg) { job->send_reply(msg.encode()); })) job->cancel(); return true; } @@ -336,8 +336,8 @@ namespace srouter::handlers static dns::Message& clear_dns_message(dns::Message& msg) { - msg.authorities.clear(); - msg.additional.clear(); + // msg.authorities.clear(); + // msg.additional.clear(); msg.answers.clear(); msg.hdr_fields &= ~dns::flags_RCODENxDomain; return msg; @@ -451,7 +451,7 @@ namespace srouter::handlers } else { - msg.add_nx_reply(); + msg.set_nx_reply(); reply(std::move(msg)); return true; } @@ -472,11 +472,12 @@ namespace srouter::handlers lookup, sub = std::move(sub), reply = std::move(reply), - msg = std::move(msg), + msg_ptr = std::make_shared(std::move(msg)), cname_only = q.qtype == dns::RRType::CNAME]( std::optional maybe_netaddr, bool assertive, std::chrono::milliseconds ttl) mutable { + auto& msg = *msg_ptr; msg.set_rr_name(lookup); if (maybe_netaddr) { @@ -494,7 +495,7 @@ namespace srouter::handlers { // We got an assertive "does not exist" message (and not just a failure // or timeout), so add the nx reply - msg.add_nx_reply(); + msg.set_nx_reply(); // FIXME: we should be able to provide a TTL here } else @@ -505,7 +506,7 @@ namespace srouter::handlers // server). assert(!assertive); // FIXME: should be able to specify a TTL here - msg.add_nx_reply(); + msg.set_nx_reply(); } reply(std::move(msg)); }); @@ -530,11 +531,11 @@ namespace srouter::handlers fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); } else - msg.add_nx_reply(); + msg.set_nx_reply(); } else - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } @@ -576,15 +577,15 @@ namespace srouter::handlers // "this record does not exist"). } else - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } // Otherwise it's some query type we don't support, so return does-not-exist. - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } @@ -605,9 +606,9 @@ namespace srouter::handlers *ip); if (!found) - msg.add_nx_reply(); + msg.set_nx_reply(); - reply(msg); + reply(std::move(msg)); return true; } @@ -618,25 +619,25 @@ namespace srouter::handlers { _router.session_endpoint().lookup_client_intro( *rid, - [msg = std::move(msg), sub, reply = std::move(reply)]( + [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply)]( const std::optional& cc) mutable { if (cc) { for (const auto& srv : cc->SRVs()) if (srv.service == sub[0] && srv.proto == sub[1]) - msg.add_reply(srv); + msg->add_reply(srv); } else - msg.add_nx_reply(); + msg->set_nx_reply(); - reply(msg); + reply(std::move(*msg)); }); return true; } } - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } From 31c9ec123fe0d5d7cc4fd9c37de0c5b8683e6382 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Mon, 10 Nov 2025 21:18:09 -0400 Subject: [PATCH 2/4] fix empty name encoding An empty name was getting encoded as two \0's instead of just one. --- src/dns/encode.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 7923d26fe..911a1f967 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -87,8 +87,7 @@ namespace srouter::dns // names (particularly for something like SRV records where a name can be repeated multiple // times), and the DNS response size limit of 512 bytes, we implement that here. - size_t pos = 0; - do + for (size_t pos = name.empty() ? std::string::npos : 0; pos != std::string_view::npos;) { std::string_view check = name.substr(pos); if (auto it = prev_names.find(check); it != prev_names.end()) @@ -116,7 +115,7 @@ namespace srouter::dns buf_offset += 1 + part.size(); pos = next == std::string_view::npos ? next : next + 1; - } while (pos != std::string_view::npos); + } // If we get here we wrote all the pieces without pointing at anything, so we need to append // a null byte to terminate the name: From 4ce87939b28a858e628e51e29d819ecf756b9f85 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Mon, 10 Nov 2025 22:55:46 -0400 Subject: [PATCH 3/4] EDNS query and cookie support This adds support for EDNS requests, allowing for larger responses. As part of that, it adds support for handling DNS cookies, which are a sort of pseudo-mac over DNS requests to prevent out-of-path attackers from being able to forge responses. --- src/dns/message.cpp | 219 ++++++++++++++++++++++++++++++++++------- src/dns/message.hpp | 22 ++++- src/dns/rr.cpp | 53 +++++++++- src/dns/rr.hpp | 59 +++++++++++ src/dns/server.cpp | 34 ++++++- src/dns/server.hpp | 6 ++ src/rpc/rpc_server.cpp | 2 + 7 files changed, 351 insertions(+), 44 deletions(-) diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ef341a4c0..8d4c12075 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -8,9 +8,12 @@ #include #include +#include #include +#include #include +#include namespace srouter::dns { @@ -24,23 +27,17 @@ namespace srouter::dns c.hdr_id = hdr_id; c.hdr_fields = hdr_fields; c.questions = questions; + c.additional_edns = additional_edns; // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) return c; } std::vector Message::encode() const { - // TODO FIXME: We currently aren't respect the EDNS bit, and that means our maximum message - // size is 512 bytes. We should support EDNS (by checking and setting the appropriate flag - // in `additional`), in which case 1232 becomes the (practical) maximum. - // - // Basically: - // - if the client supports EDNS it sets the size in an additional flag - // - we can then go up to whichever of that size or 1232 is smaller. - // - we set the pseudo-RR in the additional flags section of the response. - std::vector tmp; - tmp.resize(512); + // If the client signalled EDNS support then we can use a larger payload, otherwise DNS is + // limited to 512 bytes. + tmp.resize(additional_edns ? additional_edns->max_payload() : 512); prev_names_t prev_names; std::span buf{tmp}; @@ -53,13 +50,7 @@ namespace srouter::dns static_cast(questions.size()), static_cast(answers.size()), static_cast(0 /*authorities.size()*/), - static_cast(0 /*additional.size()*/)); - - // if (auto written = thing.encode(buf)) - //{ - // buf = buf.subspan(written); - // return true; - // } + static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); for (const auto& question : questions) question.encode(buf, prev_names, buf_offset); @@ -67,14 +58,64 @@ namespace srouter::dns for (auto& a : answers) a->encode(buf, prev_names, buf_offset); + if (additional_edns) + additional_edns->encode(buf, prev_names, buf_offset); + // Trim the excess: tmp.resize(tmp.size() - buf.size()); return tmp; } - std::optional Message::extract(std::span& buf) + static std::array make_server_cookie( + std::span client_cookie, + std::span client_ip, + std::span server_cookie_secret, + std::chrono::sys_seconds ts = std::chrono::floor(std::chrono::system_clock::now())) + { + assert(client_ip.size() == 4 || client_ip.size() == 16); + + static_assert(server_cookie_secret.size() == crypto_shorthash_siphash24_KEYBYTES); + + std::array cookie; + auto ccookie = std::span{cookie}.first<8>(); + auto scookie = std::span{cookie}.last<16>(); + std::memcpy(ccookie.data(), client_cookie.data(), 8); + + // The first 8 bytes of the server cookie (as per RFC 9018) are: + // - version (always 1) + // - three reserved bytes + // - 4-byte, uint32 unix timestamp + scookie[0] = std::byte{1}; // Version + scookie[1] = std::byte{0}; // - + scookie[2] = std::byte{0}; // - reserved + scookie[3] = std::byte{0}; // - + auto ts_val = static_cast(ts.time_since_epoch().count()); + oxenc::write_host_as_big(ts_val, &scookie[4]); + + // The last 8 bytes of the server cookie are a hash of 8-byte client + // cookie, then the above 8 bytes server cookie fields, then the + // 4- or 16-byte client IP (in network order notation). + std::array hash_data{{0}}; + std::memcpy(hash_data.data(), ccookie.data(), 8); + std::memcpy(hash_data.data() + 8, scookie.data(), 8); + std::memcpy(hash_data.data() + 16, client_ip.data(), client_ip.size()); + crypto_shorthash_siphash24( + reinterpret_cast(scookie.data() + 8), + hash_data.data(), + 16 + client_ip.size(), + reinterpret_cast(server_cookie_secret.data())); + + return cookie; + } + + std::optional Message::extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_ip) { + if (client_ip.size() != 4 && client_ip.size() != 16) + throw std::logic_error{"Invalid client IP for Message::extract_question"}; auto maybe = std::make_optional(); auto& m = *maybe; uint16_t qd_count, an_count, ns_count, ar_count; @@ -84,20 +125,128 @@ namespace srouter::dns return maybe; } m.questions.resize(qd_count); - m.answers.resize(an_count); // Ignore these: + // m.answers.resize(an_count); // m.authorities.resize(ns_count); // m.additional.resize(ar_count); - for (auto& q : m.questions) + try { - if (!q.extract(buf)) + for (auto& q : m.questions) + if (!q.extract(buf)) + throw std::invalid_argument{"invalid question"}; + + // Skip any answers or authority records: + for (uint16_t i = 0; i < an_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid answer RR"}; + for (uint16_t i = 0; i < ns_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid authority RR"}; + + // In the additional section we look for an EDNS entry, and skip anything else: + for (uint16_t i = 0; i < ar_count; i++) { - log::debug(logcat, "failed to decode question"); - maybe.reset(); - return maybe; + static_assert(crypto_shorthash_siphash24_KEYBYTES == 16); + auto a_rr = ParsedRR::extract(buf); + if (!a_rr) + throw std::invalid_argument{"invalid additional RR"}; + if (a_rr->name != "." || a_rr->rr_type != RRType::OPT) + { + continue; + } + + if (m.additional_edns) + throw std::invalid_argument{"found invalid multiple additional OPT records"}; + + auto max_payload = static_cast(a_rr->rr_class); + m.additional_edns.emplace(std::min(max_payload, 1232)); + + std::optional> cookie; + for (auto optbuf = a_rr->rdata; !optbuf.empty();) + { + if (optbuf.size() < 4) + throw std::invalid_argument{"additional OPT data section too small"}; + auto opt_code = oxenc::load_big_to_host(optbuf.data()); + auto opt_len = oxenc::load_big_to_host(optbuf.data() + 2); + optbuf = optbuf.subspan(4); + if (opt_len > optbuf.size()) + throw std::invalid_argument{"additional OPT option value length too small"}; + auto value = optbuf.subspan(0, opt_len); + optbuf = optbuf.subspan(opt_len); + + if (opt_code == PRR_EDNS::OPT_COOKIE) + { + if (m.additional_edns->cookie) + throw std::invalid_argument{"Duplicate OPT client cookies"}; + + if (value.size() == 8) + { + // This is the client sending a new cookie, requesting a new server + // cookie (i.e. because it doesn't currently have one). + + m.additional_edns->cookie = + make_server_cookie(value.first<8>(), client_ip, server_cookie_secret); + } + else if (value.size() == 24) + { + // This is the client sending its cookie along with a previously + // obtained server cookie for that client cookie, so we are supposed + // to validate it. + auto ccookie = value.first<8>(); + auto scookie = value.last<16>(); + + std::chrono::sys_seconds ts{ + std::chrono::seconds{oxenc::load_big_to_host(&scookie[4])}}; + + auto expected = make_server_cookie(ccookie, client_ip, server_cookie_secret, ts); + bool bad_cookie = std::memcmp(value.data(), expected.data(), 24) != 0; + + auto now = std::chrono::floor(std::chrono::system_clock::now()); + + if (!bad_cookie && ts >= now - 30min && ts <= now + 5min) + // Cookie is good and the timestamp in it is close to now, so the + // cookie stays as-is. + std::memcpy(m.additional_edns->cookie.emplace().data(), value.data(), 24); + + else + { + // If the cookie timestamp is too far away then it is a badcookie + // failure. (We don't have to worry about client clock skew because + // supposedly *we* issued this with the timestamp in it). + if (bad_cookie || ts < now - 1h || ts > now + 5min) + { + // When this is set we'll send a proper bad cookie response + // immediately after parsing: + m.additional_edns->bad_cookie = true; + // Extended rcode is, um, a wee bit hacky: we put the high 8 + // bits of the 12-bit error code into the OPT TTL field, and + // then continue to use the 4-bit RCODE for the bottom 4 bits. + m.additional_edns->ttl = + std::chrono::seconds{(uint32_t{PRR_EDNS::EXT_RCODE_BADCOOKIE} >> 4) << 24}; + // (The other bytes are all 0 values) + } + + // else it's valid, just a little bit (but not too) old and they are + // due for a new cookie. + + // In either of the above cases, we give the client a new cookie + // to use, with an updated new timestamp + m.additional_edns->cookie = + make_server_cookie(ccookie, client_ip, server_cookie_secret, now); + } + } + // Else we have an unparseable/non-understood cookie, and so we are supposed + // to ignore the option and discard the cookie data. + } + } } } + catch (const std::exception& e) + { + log::debug(logcat, "failed to parse DNS message: {}", e.what()); + maybe.reset(); + } return maybe; } @@ -116,6 +265,8 @@ namespace srouter::dns void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } + // TODO FIXME: "RA" means we advertise that we support recursion, but we should only do that + // when we have an upstream DNS server available. (This TODO is also in server.cpp) static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; void Message::add_nodata_reply() @@ -155,29 +306,29 @@ namespace srouter::dns void Message::set_nx_reply() { + answers.clear(); + // authorities.clear(); + // additional.clear(); + if (questions.size()) { - answers.clear(); - // authorities.clear(); - // additional.clear(); - + hdr_fields |= flags_RCODENxDomain; // authorative response with recursion available hdr_fields |= reply_flags; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - hdr_fields |= flags_RCODENxDomain; } } void Message::set_serv_fail() { + answers.clear(); + if (questions.size()) { hdr_fields |= flags_RCODEServFail; // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; + hdr_fields |= reply_flags; + // A servfail is not an authoritative answer, so clear that bit: + hdr_fields &= ~flags_AA; } } diff --git a/src/dns/message.hpp b/src/dns/message.hpp index 0d45c1537..32bb8ee01 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -26,7 +26,7 @@ namespace srouter Message(Message&&) = default; - // Clones the message with question/flag, but with no answers + // Clones the message with question/flags/edns response data, but with no answers Message clone() const; nlohmann::json ToJSON() const; @@ -39,6 +39,11 @@ namespace srouter void set_nx_reply(); void set_serv_fail(); + // This clears any answers and sets the appropriate header flags for a BADCOOKIE + // response. Note that this is only valid when the message has `additional_edns` as + // part of this error code value is carried in that additional RR data. + void set_badcookie_flags(); + // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the // value persists for any added answers until this method is called again. @@ -65,7 +70,15 @@ namespace srouter std::vector encode() const; - static std::optional extract(std::span& buf); + // Parses a question Message from the given buf, removing the question from the prefix + // of buf. `server_cookie_secret` and `client_addr` contains information needed for DNS + // cookie handling; `server_cookie_secret` is something derived from the SR private key + // seed + startup time, while client_addr is the raw bytes of the IP address (4 or 16 + // bytes for IPv4/IPv6, respectively). + static std::optional extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_addr); std::string to_string() const; @@ -78,6 +91,11 @@ namespace srouter // Currently unused: // std::vector authorities; // std::vector additional; + + // Currently the only additional record we do anything with is the OPT section for + // enabling EDNS (most significantly for allowing large DNS packets) + std::optional additional_edns; + std::optional rr_name_override; private: diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index 6dacef373..b0c53a925 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -5,11 +5,42 @@ #include #include +#include #include - namespace srouter::dns { + std::optional ParsedRR::extract(std::span& buf) + { + auto name = extract_name(buf); + if (!name || buf.size() < 2 + 2 + 4 + 2 /* type + class + ttl + rdatalen */) + return std::nullopt; + auto typ = oxenc::load_big_to_host(buf.data()); + auto cls = oxenc::load_big_to_host(buf.data() + 2); + auto ttl = oxenc::load_big_to_host(buf.data() + 4); + auto len = oxenc::load_big_to_host(buf.data() + 8); + buf = buf.subspan(10); + + if (buf.size() < len) + return std::nullopt; + + auto rdata = buf.subspan(0, len); + buf = buf.subspan(len); + + return ParsedRR{ + .name = std::move(*name), + .rr_type = static_cast(typ), + .rr_class = static_cast(cls), + .ttl = std::chrono::seconds{ttl}, + .rdata = rdata}; + } + + static void check_buf_size(const std::span& buf, size_t needed) + { + if (buf.size() < needed) + throw std::out_of_range{"DNS response exceeds max size"}; + } + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { encode_name(buf, rr_name, prev_names, buf_offset); @@ -19,8 +50,7 @@ namespace srouter::dns // the length yet (especially for things like CNAME, where there might be name compression), // so we're going to stick a 0 in and then come back and fill it in after we write the // value. - if (buf.size() < 2) - throw std::out_of_range{"Buffer too small"}; + check_buf_size(buf, 2); auto size_buf = buf.subspan(0, 2); buf_offset += 2; buf = buf.subspan(2); @@ -48,8 +78,7 @@ namespace srouter::dns void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const { - if (rData.size() > buf.size()) - throw std::out_of_range{"Buffer too small"}; + check_buf_size(buf, rData.size()); std::memcpy(buf.data(), rData.data(), rData.size()); buf = buf.subspan(rData.size()); buf_offset += rData.size(); @@ -87,4 +116,18 @@ namespace srouter::dns encode_name(buf, target, prev_names, buf_offset); } + void PRR_EDNS::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + if (cookie) + { + uint16_t datalen = 2 + 2 + cookie->size(); // code + length + data + check_buf_size(buf, datalen); + oxenc::write_host_as_big(OPT_COOKIE, buf.data()); + oxenc::write_host_as_big(static_cast(cookie->size()), buf.data() + 2); + std::memcpy(buf.data() + 4, cookie->data(), cookie->size()); + buf = buf.subspan(datalen); + buf_offset += datalen; + } + } + } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index dd9343726..85148f171 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -23,8 +23,27 @@ namespace srouter::dns TXT = 16, AAAA = 28, SRV = 33, + + OPT = 41, + }; + + // Parsed RR data: this is intentionally very raw and is only for extracting the data, not + // interpreting it. Note that the rdata value points into the input buf: the ParsedRR data + // should not be held longer than the input buffer! + struct ParsedRR + { + std::string name; + RRType rr_type; // *Not* necessarily one of the values defined above + RRClass rr_class; // *Not* necessarily one of the values defined above + std::chrono::seconds ttl; + std::span rdata; + + // Attempts to parse an RR from the beginning of `buf`. `buf` will have the prefix removed + // containing the extracted record. Returns nullopt on extraction error. + static std::optional extract(std::span& buf); }; + // Abstract base class we use for building RR responses struct ResourceRecord { ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} @@ -119,4 +138,44 @@ namespace srouter::dns RRType rr_type() const override { return RRType::SRV; } void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; }; + + // Psuedo-RR for EDNS; a client sends this in the additional section if it supports EDNS, and + // the server sends it back (if provided) to confirm that the server also supports EDNS. + struct PRR_EDNS : ResourceRecord + { + static constexpr uint16_t OPT_COOKIE = 10; + static constexpr uint16_t EXT_RCODE_BADCOOKIE = 23; + + std::optional> cookie; + + // Will be true if the full cookie we were provided was invalid or expired, in which case we + // are supposed to immediately fail with an extended BADCOOKIE error code (which will be + // encoded if this object is encoded into the output with this bool set to true). + bool bad_cookie{false}; + + // Constructs an EDNS value. This is rather hacky, to try to mash it into the fairly + // inflexible older DNS protocol: + // - NAME is always empty (i.e. ".", the root domain) + // - 32-bit TTL is nothing to do with ttl, but actually 3 packed fields: + // - 8-bit "extended rcode" + // - 8-bit version (currently 0) + // - 16-bit flags of which there is one for DNSSEC and all others are reserved + // We currently always use 0 as we don't use extended rcode or dnssec. + // - CLASS isn't a class at all but rather contains the supported UDP payload size. We set + // it to the recommended 1232 size, but if a client gave us a smaller value we should + // reflect that instead. + // + // Beyond that, we support an optional DNS server cookie value (see RFC 7873 and 9018), + // which must be the 8-byte cookie sent by the client followed by a 16 byte server cookie. + PRR_EDNS(uint16_t max_payload, std::optional> cookie = std::nullopt) + : ResourceRecord{"", 0s}, cookie{std::move(cookie)} + { + rr_class = static_cast(max_payload); + } + + uint16_t max_payload() const { return static_cast(rr_class); } + constexpr RRType rr_type() const override { return RRType::OPT; } + void encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const override; + }; + } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp index 7f34b67ac..721ec9724 100644 --- a/src/dns/server.cpp +++ b/src/dns/server.cpp @@ -2,6 +2,7 @@ #include "constants/apple.hpp" #include "constants/platform.hpp" +#include "dns.hpp" #include "message.hpp" #include "nm_platform.hpp" #include "sd_platform.hpp" @@ -9,6 +10,7 @@ #include #include #include +#include #include #include @@ -522,7 +524,9 @@ namespace srouter::dns Server::Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif) : _loop{loop}, _conf{std::move(conf)}, _platform{create_platform()}, m_NetIfIndex{std::move(netif)} - {} + { + randombytes_buf(_cookie_secret.data(), _cookie_secret.size()); + } std::vector> Server::get_all_resolvers() const { @@ -656,14 +660,38 @@ namespace srouter::dns return false; } - auto maybe = Message::extract(payload); + std::span client_ip; + if (from.is_ipv4()) + client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; + else + client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; + + auto maybe = Message::extract_question(payload, _cookie_secret, client_ip); if (not maybe) { log::warning(logcat, "invalid dns message format from {} to dns listener on {}", from, to); return false; } - auto& msg = *maybe; + + if (msg.additional_edns && msg.additional_edns->bad_cookie) + { + // Client gave a bad cookie; reply with a request failure, but one containing the new + // cookie so that the client can retry. + + // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS + // value. + msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; + // TODO FIXME: we currently always set the RA flag but that really should only be set + // when we have an upstream DNS server. (This TODO is also in message.cpp) + msg.hdr_fields |= flags_QR | flags_RA; + // badcookie is not an authoritative answer: + msg.hdr_fields &= ~flags_AA; + + ptr->send_udp(from, to, msg.encode()); + return true; + } + // we don't provide a DoH resolver because it requires verified TLS // TLS needs X509/ASN.1-DER and opting into the Root CA Cabal // thankfully mozilla added a backdoor that allows ISPs to turn it off diff --git a/src/dns/server.hpp b/src/dns/server.hpp index 7b224a24b..4ba6e43a9 100644 --- a/src/dns/server.hpp +++ b/src/dns/server.hpp @@ -182,6 +182,12 @@ namespace srouter::dns private: const unsigned int m_NetIfIndex; + + // Secret value we use as a key in DNS server cookie hashing. We generate a random once on + // each startup as we currently have no need for this to be deterministic, and that + // introduces rotation whenever we restart. + std::array _cookie_secret; + // TODO FIXME: this ownership model is cursed. std::set, ComparePtr>> _owned_resolvers; std::set, CompareWeakPtr> _resolvers; diff --git a/src/rpc/rpc_server.cpp b/src/rpc/rpc_server.cpp index 9f19c073a..c70079969 100644 --- a/src/rpc/rpc_server.cpp +++ b/src/rpc/rpc_server.cpp @@ -28,6 +28,7 @@ namespace srouter::rpc log::info(logcat, "RPC Server received request for endpoint `{}`", req.name); } +#if 0 // Fake packet source that serializes repsonses back into dns class DummyPacketSource final : public dns::PacketSource { @@ -47,6 +48,7 @@ namespace srouter::rpc /// returns the sockaddr we are bound on if applicable std::optional bound_on() const override { return std::nullopt; } }; +#endif bool check_path(std::string path) { From 28aecaecf7c9cb29e022b2ec57a331c6d0817eb8 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Wed, 12 Nov 2025 13:44:48 -0400 Subject: [PATCH 4/4] CI: make sure libunwind-NN-dev installed in llvm builds --- .drone.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 3509e6ac0..c753769f0 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -291,7 +291,7 @@ local clang(version) = debian_pipeline( local full_llvm(version) = debian_pipeline( 'Debian sid/llvm-' + version, docker_base + 'debian-sid-clang', - deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], + deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libunwind-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], remove='g++'), oxen_repo=[], cmake_extra='-DCMAKE_C_COMPILER=clang-' + version +