From 7473acf0d431ca53367ff1eb84a1a6daf915a6cd Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Sat, 8 Nov 2025 02:29:43 -0400 Subject: [PATCH 1/8] Implement DNS name compression This implements DNS name compression (see the decription added in dns/encode.cpp `encode_name()` for details). This reduces the sizes of nearly all query responses (because every response answer includes the question, and then uses the name again for the response), and in some cases significantly so. This is rather important for Session Router all of our names are 52 byte pubkeys (plus the 4 or 5 byte tld), and so we are potentially running up against the DNS 512-byte max message size. (We should also enable EDNS to allow longer messages, but that is left here as a FIXME and not yet implemented). An example shows how the compression helps: For example, an AAAA query for `localhost.sesh` responds with an answer of: ;; QUESTION SECTION: ;localhost.sesh. IN AAAA ;; ANSWER SECTION: localhost.sesh. 10 IN CNAME sh6tnpf84s885m8ygsjw7g8qjuo1jk7ydufiog8sjdtgkhb3w8iy.sesh. sh6tnpf84s885m8ygsjw7g8qjuo1jk7ydufiog8sjdtgkhb3w8iy.sesh. 10 IN AAAA fd2e:7365:7368::1 The repeated question doesn't compress anything, of course, but once you hit the answer, you start getting savings: The repeated `localhost.sesh` in the first answer gets compressed from 16 bytes (without compression) to a 2 byte pointer (back to the same address in the question). The first PUBKEY.sesh (in the CNAME target) gets slightly reduced by being able to encode the trailing `sesh` from 6 bytes uncompressed (4+"sesh"+\0) to a 2-byte pointer (again back into the question, pointing just at the sesh tld rather than the entire lokinet.sesh value). The pubkey.sesh. in the second answers gets hugely reduced: an uncompressed 59 bytes (52+"pubkey"+4+"sesh"+0) becomes a simple 2-byte pointer to the same name in the previous answer line. For some queries like SRV records the savings are even potentially even larger, especially when there are multiple SRV entries for a .sesh address. --- src/dns/encode.cpp | 85 ++++++++++++++---- src/dns/encode.hpp | 67 +++++++------- src/dns/message.cpp | 208 ++++++++++++++++--------------------------- src/dns/message.hpp | 30 +++++-- src/dns/question.cpp | 10 +-- src/dns/question.hpp | 2 +- src/dns/rr.cpp | 83 +++++++++++++---- src/dns/rr.hpp | 94 ++++++++++++++++--- src/dns/server.cpp | 8 +- src/dns/server.hpp | 7 +- src/handlers/tun.cpp | 47 +++++----- 11 files changed, 379 insertions(+), 262 deletions(-) diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 5318d72e8..7923d26fe 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -46,36 +46,85 @@ namespace srouter::dns return name; } - size_t encode_name(std::span buf, std::string_view name) + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset) { - auto orig = buf.size(); if (name.size() && name.back() == '.') name.remove_suffix(1); - for (auto part : srouter::split(name, ".")) + // Look for a previously used suffix of this name. For instance, if we have a response + // consisting of: + // + // localhost.sesh IN CNAME mylongpubkey.sesh + // foo.mylongpubkey.sesh IN AAAA 1:2:3::4 + // + // then when we repeat the question itself (IN AAAA localhost.sesh) we echo that question + // back into the response as the 16 bytes: + // \x09localhost\x04sesh\x00 + // Suppose that this was written at location Z in the DNS message, this creates two + // pointable addresses: + // - "localhost.sesh" -> Z + // - "sesh" -> Z+10 + // + // Then we come to the answers, and for the first "localhost.sesh" value, we can simply + // write that as a single pointer [Z] (where the pointer is a 16-bit, big-endian value with + // the highest two bits set and the remaining 14 bits set to "Z"). + // + // Then we get to "mylongpubkey.sesh" and we can encode that as: + // + // \x34mylongpubkey[pointer to Z+10] + // + // This also creates a new pointable address: + // - "mylongpubkey.sesh" -> Y + // + // Then we come to foo.mylongpubkey.sesh and we can encode this as: + // + // - \x03foo[pointer to Y] + // + // i.e. we only need 6 bytes for this address instead of 1+3+1+52+1+4+1=63 bytes that we + // would need for the uncompressed version. + // + // Although this compression is optional, given how frequently we reuse long session router + // names (particularly for something like SRV records where a name can be repeated multiple + // times), and the DNS response size limit of 512 bytes, we implement that here. + + size_t pos = 0; + do { + std::string_view check = name.substr(pos); + if (auto it = prev_names.find(check); it != prev_names.end()) + { + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + uint16_t ptr = uint16_t{0b11000000'00000000} | it->second; + oxenc::write_host_as_big(ptr, buf.data()); + buf = buf.subspan(2); + buf_offset += 2; + // A pointer is terminal (i.e. no nullptr to add), so we're done. + return; + } + + auto next = name.find('.', pos + 1); + auto part = next == std::string_view::npos ? check : name.substr(pos, next - pos); + size_t l = part.size(); if (l > 63 || l >= buf.size()) - return false; - buf.front() = static_cast(l); + throw std::out_of_range{"Buffer too small"}; + buf.front() = static_cast(l); // Length prefix std::memcpy(buf.data() + 1, part.data(), part.size()); + prev_names.emplace(std::string{check}, static_cast(buf_offset)); buf = buf.subspan(1 + part.size()); - } + buf_offset += 1 + part.size(); + + pos = next == std::string_view::npos ? next : next + 1; + } while (pos != std::string_view::npos); + + // If we get here we wrote all the pieces without pointing at anything, so we need to append + // a null byte to terminate the name: if (buf.empty()) - return false; + throw std::out_of_range{"Buffer too small"}; buf.front() = std::byte{0}; buf = buf.subspan(1); - return orig - buf.size(); - } - - bool write_name_into(std::span& buf, std::string_view name) - { - if (auto s = encode_name(buf, name)) - { - buf = buf.subspan(s); - return true; - } - return false; + buf_offset++; } std::optional> decode_ptr(std::string_view name) diff --git a/src/dns/encode.hpp b/src/dns/encode.hpp index e0d5b7e4a..46ddd4b9e 100644 --- a/src/dns/encode.hpp +++ b/src/dns/encode.hpp @@ -6,45 +6,59 @@ #include #include +#include #include #include namespace srouter::dns { - /// Writes the encoded version of DNS name `name` into buf, and returns how many bytes of buf - /// were written. If buf is too small to store the encoded name, returns 0. - size_t encode_name(std::span buf, std::string_view name); + // Custom hasher to let us look up a string_view key in a string-keyed unordered map: + struct transparent_string_hash + { + using is_transparent = void; + [[nodiscard]] size_t operator()(std::string_view txt) const { return std::hash{}(txt); } + }; + + using prev_names_t = std::unordered_map>; - /// Same as encode_name, except that instead of returning the written size, on success it mutates the span - /// to drop the written prefix. Returns true (and prefix-drops the written part of the span) on success, - /// false on failure. Note that the failure case can still partially write into span. - bool write_name_into(std::span& buf, std::string_view name); + /// Writes the encoded version of DNS name `name` into buf, mutating buf to eliminate the + /// written bytes. Throws if buf is too small to store the encoded name. + /// + /// prev_names contains pointer values relative to the start of the message, used for name + /// compression, and buf_offset contains the relative positive of the beginning of buf to the + /// start of the message. New names added here should be added into it so that later repeated + /// names (or name suffixes) can use compression. + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset); /// decode name from buffer, mutating the buffer to begin just past the extracted name. Return - /// nullopt (without mutating buf) on failure. + /// nullopt (without mutating buf) on failure. Does not currently support compressed names (but + /// those are not typically used in questions). std::optional extract_name(std::span& buf); /// Encodes an integer in big-endian order into the buffer, mutating the span to start just - /// after the written integer. Returns true on success, false if the span was too small. + /// after the written integer. Throws if buf is too small. Returns sizeof(T) (i.e. the amount + /// written into the buffer), for convenience. template - bool write_int_into(std::span& buf, T value) + size_t write_int_into(std::span& buf, T value) { if (buf.size() < sizeof(T)) - return false; + throw std::out_of_range{"Buffer too small"}; oxenc::write_host_as_big(value, buf.data()); buf = buf.subspan(sizeof(T)); - return true; + return sizeof(T); } - // Calls write_int_info multiple times with the given integers. Returns true (and modifies buf) - // if all success. If any fail then false is returned and buf is left unchanged. + // Calls write_int_info multiple times with the given integers. Throws if the buffer is too + // small. Returns the total size of the given integers (i.e. the number of bytes written to + // buf), for convenience. template - bool write_ints_into(std::span& buf, T... values) + size_t write_ints_into(std::span& buf, T... values) { - if (buf.size() < (0 + ... + sizeof(T))) - return false; + // NB: it's tempting to want to use `return (0 + ... + write_int_into())` here, but + // left-to-right evaluation of + operands isn't guaranteed, and that could put things into + // buf in the wrong order. With , as used here it is guaranteed (similarly to || or &&). ((void)write_int_into(buf, values), ...); - return true; + return (0 + ... + sizeof(T)); } /// Extracts a big-endian integer of the given type from the buffer, mutating the span to start @@ -72,23 +86,6 @@ namespace srouter::dns return true; } - // Takes some object T with an `size_t encode(buf)` function (such as various classes in this - // dns code) and attempts to call it with the given buffer. If it returns success (non-0) then - // this mutates `buf` to skip the written data and returns true; on failure it returns false. - template - bool encode_into(std::span& buf, const T& thing) - { - if (auto written = thing.encode(buf)) - { - buf = buf.subspan(written); - return true; - } - return false; - } - - // Writes encoded rr data into buf, mutating buf to point beyond the written data. Returns - // false (without mutating buf) if buf is too short; true on success. - bool write_rdata_into(std::span& buf, std::span rdata); // Extracts encoded rr data from buf, mutating buf to point beyond the extracted data. Returns // nullopt (without mutating buf) on error, the vector of decoded data on success. std::optional> extract_rdata(std::span& buf); diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ac699b738..ef341a4c0 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -10,6 +10,7 @@ #include #include +#include namespace srouter::dns { @@ -17,28 +18,59 @@ namespace srouter::dns Message::Message(const Question& question) : hdr_id{0}, hdr_fields{} { questions.push_back(question); } - size_t Message::encode(std::span buf) const + Message Message::clone() const { - auto orig = buf.size(); - if (!write_ints_into( - buf, - hdr_id, - hdr_fields, - static_cast(questions.size()), - static_cast(answers.size()), - static_cast(authorities.size()), - static_cast(additional.size()))) - return 0; + Message c; + c.hdr_id = hdr_id; + c.hdr_fields = hdr_fields; + c.questions = questions; + // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) + return c; + } + + std::vector Message::encode() const + { + // TODO FIXME: We currently aren't respect the EDNS bit, and that means our maximum message + // size is 512 bytes. We should support EDNS (by checking and setting the appropriate flag + // in `additional`), in which case 1232 becomes the (practical) maximum. + // + // Basically: + // - if the client supports EDNS it sets the size in an additional flag + // - we can then go up to whichever of that size or 1232 is smaller. + // - we set the pseudo-RR in the additional flags section of the response. + + std::vector tmp; + tmp.resize(512); + + prev_names_t prev_names; + std::span buf{tmp}; + uint16_t buf_offset = 0; + + buf_offset += write_ints_into( + buf, + hdr_id, + hdr_fields, + static_cast(questions.size()), + static_cast(answers.size()), + static_cast(0 /*authorities.size()*/), + static_cast(0 /*additional.size()*/)); + + // if (auto written = thing.encode(buf)) + //{ + // buf = buf.subspan(written); + // return true; + // } for (const auto& question : questions) - if (!encode_into(buf, question)) - return 0; + question.encode(buf, prev_names, buf_offset); for (auto& a : answers) - if (!encode_into(buf, a)) - return 0; + a->encode(buf, prev_names, buf_offset); + + // Trim the excess: + tmp.resize(tmp.size() - buf.size()); - return orig - buf.size(); + return tmp; } std::optional Message::extract(std::span& buf) @@ -66,9 +98,6 @@ namespace srouter::dns return maybe; } } - for (auto* as : {&m.answers, &m.authorities, &m.additional}) - if (!as->empty()) - log::debug(logcat, "Ignoring answer/authorities/additional sections in dns Message"); return maybe; } @@ -81,138 +110,56 @@ namespace srouter::dns for (const auto& q : questions) ques.push_back(q.ToJSON()); for (const auto& a : answers) - ans.push_back(a.ToJSON()); + ans.push_back(a->ToJSON()); return result; } - std::vector Message::encode() const - { - std::vector tmp; - tmp.resize(1500); - auto size = encode(tmp); - if (size == 0) - throw std::runtime_error("cannot encode dns message"); - tmp.resize(size); - return tmp; - } - - void Message::add_serv_fail() - { - if (questions.size()) - { - hdr_fields |= flags_RCODEServFail; - // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - } - } + void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; - void Message::add_reply(ipv4 addr, std::chrono::seconds ttl) - { - std::vector a; - a.resize(4); - oxenc::write_host_as_big(addr.addr, a.data()); - add_reply(RRClass::IN, RRType::A, std::move(a), ttl); - } - - void Message::add_reply(ipv6 addr, std::chrono::seconds ttl) + void Message::add_nodata_reply() { - std::vector aaaa; - aaaa.resize(16); - oxenc::write_host_as_big(addr.hi, aaaa.data()); - oxenc::write_host_as_big(addr.lo, aaaa.data() + 8); - return add_reply(RRClass::IN, RRType::AAAA, std::move(aaaa), ttl); + if (not questions.empty()) + hdr_fields |= reply_flags; } - void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } - - void Message::add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl) + template RR, typename... Args> + void make_reply(Message& m, std::chrono::seconds ttl, Args&&... args) { - if (questions.empty()) + if (m.questions.empty()) return; - hdr_fields |= reply_flags; + m.hdr_fields |= reply_flags; - auto& ans = answers.emplace_back(); - ans.rr_name = get_rr_name(); - ans.rr_type = type; - ans.rr_class = cls; - ans.ttl = ttl; - ans.rData = std::move(data); + m.answers.push_back(std::make_unique(std::string{m.get_rr_name()}, ttl, std::forward(args)...)); } - void Message::add_nodata_reply() - { - if (not questions.empty()) - hdr_fields |= reply_flags; - } + void Message::add_reply(const ipv4& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } + + void Message::add_reply(const ipv6& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } void Message::add_cname_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::CNAME, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode CNAME value {}", name); + make_reply(*this, ttl, std::string{name}); } void Message::add_ptr_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::PTR, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode PTR value {}", name); - } - - void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) - { - std::array tmp; - std::span remaining{tmp}; - if (!write_ints_into(remaining, srv.priority, srv.weight, srv.port)) - return; - if (!write_name_into(remaining, srv.target)) - return; - - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); + make_reply(*this, ttl, std::string{name}); } - void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) - { - std::array tmp; - std::span remaining{tmp}; - while (!txt.empty()) - { - auto piecelen = std::min(txt.size(), size_t{255}); - if (remaining.size() <= piecelen) - throw std::length_error{"TXT record too big"}; - remaining.front() = static_cast(piecelen); - std::memcpy(remaining.data() + 1, txt.data(), piecelen); - txt.remove_prefix(piecelen); - remaining = remaining.subspan(1 + piecelen); - } + void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) { make_reply(*this, ttl, srv); } - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); - } + void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) { make_reply(*this, ttl, txt); } - void Message::add_nx_reply() + void Message::set_nx_reply() { if (questions.size()) { answers.clear(); - authorities.clear(); - additional.clear(); + // authorities.clear(); + // additional.clear(); // authorative response with recursion available hdr_fields |= reply_flags; @@ -222,17 +169,16 @@ namespace srouter::dns } } - std::string Message::to_string() const + void Message::set_serv_fail() { - return fmt::format( - "[DNSMessage id={:x} fields={:x} questions={{{}}} answers={{{}}} authorities={{{}}} " - "additional={{{}}}]", - hdr_id, - hdr_fields, - fmt::join(questions, ","), - fmt::join(answers, ","), - fmt::join(authorities, ","), - fmt::join(additional, ",")); + if (questions.size()) + { + hdr_fields |= flags_RCODEServFail; + // authorative response with recursion available + hdr_fields |= flags_QR | flags_AA | flags_RA; + // don't allow recursion on this request + hdr_fields &= ~flags_RD; + } } } // namespace srouter::dns diff --git a/src/dns/message.hpp b/src/dns/message.hpp index b12369091..0d45c1537 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -21,13 +21,23 @@ namespace srouter Message() = default; explicit Message(const Question& question); + // Non-copyable; see clone() if you want a copy with just the questions. + Message(const Message&) = delete; + + Message(Message&&) = default; + + // Clones the message with question/flag, but with no answers + Message clone() const; + nlohmann::json ToJSON() const; static constexpr auto DEFAULT_ANSWER_TTL = 10s; - void add_nx_reply(); - - void add_serv_fail(); + // These two clear any answers that may have been added and then set the appropriate + // flags for a NXDomain (i.e. authoritative reply that the requested thing does not + // exist) or a ServFail (i.e. we don't know how to answer, maybe try someone else). + void set_nx_reply(); + void set_serv_fail(); // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the @@ -43,9 +53,9 @@ namespace srouter void add_cname_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN A' reply containing the given ipv4 address - void add_reply(ipv4 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv4& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN AAAA' reply containing the given ipv6 address - void add_reply(ipv6 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv6& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); void add_reply(const SRVData& srv, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); @@ -53,7 +63,6 @@ namespace srouter void add_ptr_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); - size_t encode(std::span buf) const; std::vector encode() const; static std::optional extract(std::span& buf); @@ -62,10 +71,13 @@ namespace srouter uint16_t hdr_id; uint16_t hdr_fields; + std::vector questions; - std::vector answers; - std::vector authorities; - std::vector additional; + std::vector> answers; + + // Currently unused: + // std::vector authorities; + // std::vector additional; std::optional rr_name_override; private: diff --git a/src/dns/question.cpp b/src/dns/question.cpp index 0473e6243..a72abef17 100644 --- a/src/dns/question.cpp +++ b/src/dns/question.cpp @@ -19,14 +19,10 @@ namespace srouter::dns throw std::invalid_argument{"qname cannot be empty"}; } - size_t Question::encode(std::span buf) const + void Question::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf; - if (!write_name_into(buf, qname)) - return 0; - if (!write_ints_into(buf, static_cast(qtype), static_cast(qclass))) - return 0; - return orig.size() - buf.size(); + encode_name(buf, qname, prev_names, buf_offset); + buf_offset += write_ints_into(buf, static_cast(qtype), static_cast(qclass)); } bool Question::extract(std::span& buf) diff --git a/src/dns/question.hpp b/src/dns/question.hpp index 1647b6a81..4efc35b5d 100644 --- a/src/dns/question.hpp +++ b/src/dns/question.hpp @@ -11,7 +11,7 @@ namespace srouter::dns Question() = default; Question(std::string name, RRType type); - size_t encode(std::span buf) const; + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; bool extract(std::span& buf); diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index fcd1070a8..6dacef373 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -6,40 +6,85 @@ #include #include +#include + namespace srouter::dns { - ResourceRecord::ResourceRecord(std::string name, RRType type, std::vector data) - : rr_name{std::move(name)}, rr_type{type}, rr_class{RRClass::IN}, ttl{1s}, rData{std::move(data)} - {} - - size_t ResourceRecord::encode(std::span buf) const + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf.size(); - if (write_name_into(buf, rr_name) - && write_ints_into( - buf, - static_cast(rr_type), - static_cast(rr_class), - static_cast(ttl.count())) - && write_rdata_into(buf, rData)) - return orig - buf.size(); - return 0; + encode_name(buf, rr_name, prev_names, buf_offset); + buf_offset += write_ints_into( + buf, static_cast(rr_type()), static_cast(rr_class), static_cast(ttl.count())); + // The RR value is in a chunk with a 2-byte length in front of it. We don't actually know + // the length yet (especially for things like CNAME, where there might be name compression), + // so we're going to stick a 0 in and then come back and fill it in after we write the + // value. + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + auto size_buf = buf.subspan(0, 2); + buf_offset += 2; + buf = buf.subspan(2); + encode_data(buf, prev_names, buf_offset); + uint16_t size = buf.data() - size_buf.data() - 2; + oxenc::write_host_as_big(size, size_buf.data()); } nlohmann::json ResourceRecord::ToJSON() const { return nlohmann::json{ {"name", rr_name}, - {"type", static_cast(rr_type)}, + {"type", static_cast(rr_type())}, {"class", static_cast(rr_class)}, {"ttl", ttl.count()}, - {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}}; + /* FIXME: need to virtualize a display for the data, if we care about json representation: + {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}*/}; } std::string ResourceRecord::to_string() const { - return "RR:[ name:{} | type:{} | class:{} | ttl:{} | rdata-size:{} ]"_format( - rr_name, static_cast(rr_type), static_cast(rr_class), ttl, rData.size()); + return "RR:[name:{}|type:{}|class:{}|ttl:{}]"_format( + rr_name, static_cast(rr_type()), static_cast(rr_class), ttl); + } + + void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + if (rData.size() > buf.size()) + throw std::out_of_range{"Buffer too small"}; + std::memcpy(buf.data(), rData.data(), rData.size()); + buf = buf.subspan(rData.size()); + buf_offset += rData.size(); + } + + RR_A::RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr) : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(4); + oxenc::write_host_as_big(addr.addr, rData.data()); + } + + RR_AAAA::RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr) + : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(16); + oxenc::write_host_as_big(addr.hi, rData.data()); + oxenc::write_host_as_big(addr.lo, rData.data() + 8); + } + + RR_TXT::RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value) + : RR_bytes{std::move(rr_name), ttl} + { + auto* bytes = reinterpret_cast(value.data()); + rData.assign(bytes, bytes + value.size()); + } + + void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + encode_name(buf, name, prev_names, buf_offset); + } + + void RR_SRV::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + buf_offset += write_ints_into(buf, priority, weight, port); + encode_name(buf, target, prev_names, buf_offset); } } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 798a624a4..dd9343726 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -1,5 +1,8 @@ #pragma once +#include "encode.hpp" +#include "srv_data.hpp" + #include #include @@ -15,10 +18,8 @@ namespace srouter::dns enum class RRType : uint16_t { A = 1, - NS = 2, CNAME = 5, PTR = 12, - MX = 15, TXT = 16, AAAA = 28, SRV = 33, @@ -26,23 +27,96 @@ namespace srouter::dns struct ResourceRecord { - ResourceRecord() = default; - explicit ResourceRecord(std::string name, RRType type, std::vector rdata); + ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} + + // Writes this RR to the beginning of buf, eliminating the written section from buf. Throws if buf is exceeded. + // + // This takes care of the basic stuff (name, type, class, ttl), then calls the virtual + // encode_data() to write the value. + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; - // Writes this RR to the beginning of buf. Returns the number of bytes written, or 0 if the - // buffer is too small to hold it. - size_t encode(std::span buf) const; + virtual void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const = 0; nlohmann::json ToJSON() const; std::string to_string() const; std::string rr_name; - RRType rr_type; - RRClass rr_class; + RRClass rr_class = RRClass::IN; std::chrono::seconds ttl; - std::vector rData; + + virtual RRType rr_type() const = 0; static constexpr bool to_string_formattable = true; }; + + // Subclass of ResourceRecord that just has a binary check of data. Should not be used for data + // types containing compressible names in the value. The subclass must take care of encoding + // the rData member value as required; this base class encode_data simply barfs it into the + // buffer as-is. + struct RR_bytes : ResourceRecord + { + std::vector rData; + + using ResourceRecord::ResourceRecord; + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_A : RR_bytes + { + RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr); + RRType rr_type() const override { return RRType::A; } + }; + struct RR_AAAA : RR_bytes + { + RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr); + RRType rr_type() const override { return RRType::AAAA; } + }; + struct RR_TXT : RR_bytes + { + RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value); + RRType rr_type() const override { return RRType::TXT; } + }; + + // Base class for RR types that have a single target name as the value, such as CNAME and PTR + struct RR_target : ResourceRecord + { + std::string name; + + RR_target(std::string rr_name, std::chrono::seconds ttl, std::string name) + : ResourceRecord{std::move(rr_name), ttl}, name{std::move(name)} + {} + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_PTR : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::A; } + }; + struct RR_CNAME : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::CNAME; } + }; + struct RR_SRV : ResourceRecord + { + uint16_t priority; + uint16_t weight; + uint16_t port; + std::string target; + + RR_SRV(std::string rr_name, std::chrono::seconds ttl, const SRVData& srv) + : ResourceRecord{std::move(rr_name), ttl}, + priority{srv.priority}, + weight{srv.weight}, + port{srv.port}, + target{srv.target} + {} + + RRType rr_type() const override { return RRType::SRV; } + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp index 2395538fb..7f34b67ac 100644 --- a/src/dns/server.cpp +++ b/src/dns/server.cpp @@ -22,8 +22,8 @@ namespace srouter::dns void QueryJob_Base::cancel() { - Message reply{_query}; - reply.add_serv_fail(); + Message reply = _query.clone(); + reply.set_serv_fail(); send_reply(reply.encode()); } @@ -416,7 +416,7 @@ namespace srouter::dns const quic::Address& from) override { log::trace(logcat, "maybe_hook_dns called"); - auto tmp = std::make_shared(weak_from_this(), query, source, to, from); + auto tmp = std::make_shared(weak_from_this(), query.clone(), source, to, from); // no questions, send fail if (query.questions.empty()) { @@ -675,7 +675,7 @@ namespace srouter::dns if (q.name() == "use-application-dns.net") { // yea it is, let's turn off DoH because god is dead. - msg.add_nx_reply(); + msg.set_nx_reply(); // press F to pay respects and send it back where it came from ptr->send_udp(from, to, msg.encode()); return true; diff --git a/src/dns/server.hpp b/src/dns/server.hpp index a01b90757..7b224a24b 100644 --- a/src/dns/server.hpp +++ b/src/dns/server.hpp @@ -68,11 +68,8 @@ namespace srouter::dns public: explicit QueryJob( - std::shared_ptr source, - const Message& query, - const quic::Address& to_, - const quic::Address& from_) - : QueryJob_Base{query}, src{std::move(source)}, resolver{to_}, asker{from_} + std::shared_ptr source, Message query, const quic::Address& to_, const quic::Address& from_) + : QueryJob_Base{std::move(query)}, src{std::move(source)}, resolver{to_}, asker{from_} {} void send_reply(std::vector buf) override { src->send_udp(asker, resolver, buf); } diff --git a/src/handlers/tun.cpp b/src/handlers/tun.cpp index c33612ac5..76d119a80 100644 --- a/src/handlers/tun.cpp +++ b/src/handlers/tun.cpp @@ -36,8 +36,8 @@ namespace srouter::handlers if (not should_hook_dns_message(query)) return false; - auto job = std::make_shared(source, query, to, from); - if (!handle_hooked_dns_message(query, [job](dns::Message msg) { job->send_reply(msg.encode()); })) + auto job = std::make_shared(source, query.clone(), to, from); + if (!handle_hooked_dns_message(query.clone(), [job](dns::Message msg) { job->send_reply(msg.encode()); })) job->cancel(); return true; } @@ -336,8 +336,8 @@ namespace srouter::handlers static dns::Message& clear_dns_message(dns::Message& msg) { - msg.authorities.clear(); - msg.additional.clear(); + // msg.authorities.clear(); + // msg.additional.clear(); msg.answers.clear(); msg.hdr_fields &= ~dns::flags_RCODENxDomain; return msg; @@ -451,7 +451,7 @@ namespace srouter::handlers } else { - msg.add_nx_reply(); + msg.set_nx_reply(); reply(std::move(msg)); return true; } @@ -472,11 +472,12 @@ namespace srouter::handlers lookup, sub = std::move(sub), reply = std::move(reply), - msg = std::move(msg), + msg_ptr = std::make_shared(std::move(msg)), cname_only = q.qtype == dns::RRType::CNAME]( std::optional maybe_netaddr, bool assertive, std::chrono::milliseconds ttl) mutable { + auto& msg = *msg_ptr; msg.set_rr_name(lookup); if (maybe_netaddr) { @@ -494,7 +495,7 @@ namespace srouter::handlers { // We got an assertive "does not exist" message (and not just a failure // or timeout), so add the nx reply - msg.add_nx_reply(); + msg.set_nx_reply(); // FIXME: we should be able to provide a TTL here } else @@ -505,7 +506,7 @@ namespace srouter::handlers // server). assert(!assertive); // FIXME: should be able to specify a TTL here - msg.add_nx_reply(); + msg.set_nx_reply(); } reply(std::move(msg)); }); @@ -530,11 +531,11 @@ namespace srouter::handlers fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); } else - msg.add_nx_reply(); + msg.set_nx_reply(); } else - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } @@ -576,15 +577,15 @@ namespace srouter::handlers // "this record does not exist"). } else - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } // Otherwise it's some query type we don't support, so return does-not-exist. - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } @@ -605,9 +606,9 @@ namespace srouter::handlers *ip); if (!found) - msg.add_nx_reply(); + msg.set_nx_reply(); - reply(msg); + reply(std::move(msg)); return true; } @@ -618,25 +619,25 @@ namespace srouter::handlers { _router.session_endpoint().lookup_client_intro( *rid, - [msg = std::move(msg), sub, reply = std::move(reply)]( + [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply)]( const std::optional& cc) mutable { if (cc) { for (const auto& srv : cc->SRVs()) if (srv.service == sub[0] && srv.proto == sub[1]) - msg.add_reply(srv); + msg->add_reply(srv); } else - msg.add_nx_reply(); + msg->set_nx_reply(); - reply(msg); + reply(std::move(*msg)); }); return true; } } - msg.add_nx_reply(); - reply(msg); + msg.set_nx_reply(); + reply(std::move(msg)); return true; } From 31c9ec123fe0d5d7cc4fd9c37de0c5b8683e6382 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Mon, 10 Nov 2025 21:18:09 -0400 Subject: [PATCH 2/8] fix empty name encoding An empty name was getting encoded as two \0's instead of just one. --- src/dns/encode.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 7923d26fe..911a1f967 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -87,8 +87,7 @@ namespace srouter::dns // names (particularly for something like SRV records where a name can be repeated multiple // times), and the DNS response size limit of 512 bytes, we implement that here. - size_t pos = 0; - do + for (size_t pos = name.empty() ? std::string::npos : 0; pos != std::string_view::npos;) { std::string_view check = name.substr(pos); if (auto it = prev_names.find(check); it != prev_names.end()) @@ -116,7 +115,7 @@ namespace srouter::dns buf_offset += 1 + part.size(); pos = next == std::string_view::npos ? next : next + 1; - } while (pos != std::string_view::npos); + } // If we get here we wrote all the pieces without pointing at anything, so we need to append // a null byte to terminate the name: From 4ce87939b28a858e628e51e29d819ecf756b9f85 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Mon, 10 Nov 2025 22:55:46 -0400 Subject: [PATCH 3/8] EDNS query and cookie support This adds support for EDNS requests, allowing for larger responses. As part of that, it adds support for handling DNS cookies, which are a sort of pseudo-mac over DNS requests to prevent out-of-path attackers from being able to forge responses. --- src/dns/message.cpp | 219 ++++++++++++++++++++++++++++++++++------- src/dns/message.hpp | 22 ++++- src/dns/rr.cpp | 53 +++++++++- src/dns/rr.hpp | 59 +++++++++++ src/dns/server.cpp | 34 ++++++- src/dns/server.hpp | 6 ++ src/rpc/rpc_server.cpp | 2 + 7 files changed, 351 insertions(+), 44 deletions(-) diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ef341a4c0..8d4c12075 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -8,9 +8,12 @@ #include #include +#include #include +#include #include +#include namespace srouter::dns { @@ -24,23 +27,17 @@ namespace srouter::dns c.hdr_id = hdr_id; c.hdr_fields = hdr_fields; c.questions = questions; + c.additional_edns = additional_edns; // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) return c; } std::vector Message::encode() const { - // TODO FIXME: We currently aren't respect the EDNS bit, and that means our maximum message - // size is 512 bytes. We should support EDNS (by checking and setting the appropriate flag - // in `additional`), in which case 1232 becomes the (practical) maximum. - // - // Basically: - // - if the client supports EDNS it sets the size in an additional flag - // - we can then go up to whichever of that size or 1232 is smaller. - // - we set the pseudo-RR in the additional flags section of the response. - std::vector tmp; - tmp.resize(512); + // If the client signalled EDNS support then we can use a larger payload, otherwise DNS is + // limited to 512 bytes. + tmp.resize(additional_edns ? additional_edns->max_payload() : 512); prev_names_t prev_names; std::span buf{tmp}; @@ -53,13 +50,7 @@ namespace srouter::dns static_cast(questions.size()), static_cast(answers.size()), static_cast(0 /*authorities.size()*/), - static_cast(0 /*additional.size()*/)); - - // if (auto written = thing.encode(buf)) - //{ - // buf = buf.subspan(written); - // return true; - // } + static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); for (const auto& question : questions) question.encode(buf, prev_names, buf_offset); @@ -67,14 +58,64 @@ namespace srouter::dns for (auto& a : answers) a->encode(buf, prev_names, buf_offset); + if (additional_edns) + additional_edns->encode(buf, prev_names, buf_offset); + // Trim the excess: tmp.resize(tmp.size() - buf.size()); return tmp; } - std::optional Message::extract(std::span& buf) + static std::array make_server_cookie( + std::span client_cookie, + std::span client_ip, + std::span server_cookie_secret, + std::chrono::sys_seconds ts = std::chrono::floor(std::chrono::system_clock::now())) + { + assert(client_ip.size() == 4 || client_ip.size() == 16); + + static_assert(server_cookie_secret.size() == crypto_shorthash_siphash24_KEYBYTES); + + std::array cookie; + auto ccookie = std::span{cookie}.first<8>(); + auto scookie = std::span{cookie}.last<16>(); + std::memcpy(ccookie.data(), client_cookie.data(), 8); + + // The first 8 bytes of the server cookie (as per RFC 9018) are: + // - version (always 1) + // - three reserved bytes + // - 4-byte, uint32 unix timestamp + scookie[0] = std::byte{1}; // Version + scookie[1] = std::byte{0}; // - + scookie[2] = std::byte{0}; // - reserved + scookie[3] = std::byte{0}; // - + auto ts_val = static_cast(ts.time_since_epoch().count()); + oxenc::write_host_as_big(ts_val, &scookie[4]); + + // The last 8 bytes of the server cookie are a hash of 8-byte client + // cookie, then the above 8 bytes server cookie fields, then the + // 4- or 16-byte client IP (in network order notation). + std::array hash_data{{0}}; + std::memcpy(hash_data.data(), ccookie.data(), 8); + std::memcpy(hash_data.data() + 8, scookie.data(), 8); + std::memcpy(hash_data.data() + 16, client_ip.data(), client_ip.size()); + crypto_shorthash_siphash24( + reinterpret_cast(scookie.data() + 8), + hash_data.data(), + 16 + client_ip.size(), + reinterpret_cast(server_cookie_secret.data())); + + return cookie; + } + + std::optional Message::extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_ip) { + if (client_ip.size() != 4 && client_ip.size() != 16) + throw std::logic_error{"Invalid client IP for Message::extract_question"}; auto maybe = std::make_optional(); auto& m = *maybe; uint16_t qd_count, an_count, ns_count, ar_count; @@ -84,20 +125,128 @@ namespace srouter::dns return maybe; } m.questions.resize(qd_count); - m.answers.resize(an_count); // Ignore these: + // m.answers.resize(an_count); // m.authorities.resize(ns_count); // m.additional.resize(ar_count); - for (auto& q : m.questions) + try { - if (!q.extract(buf)) + for (auto& q : m.questions) + if (!q.extract(buf)) + throw std::invalid_argument{"invalid question"}; + + // Skip any answers or authority records: + for (uint16_t i = 0; i < an_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid answer RR"}; + for (uint16_t i = 0; i < ns_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid authority RR"}; + + // In the additional section we look for an EDNS entry, and skip anything else: + for (uint16_t i = 0; i < ar_count; i++) { - log::debug(logcat, "failed to decode question"); - maybe.reset(); - return maybe; + static_assert(crypto_shorthash_siphash24_KEYBYTES == 16); + auto a_rr = ParsedRR::extract(buf); + if (!a_rr) + throw std::invalid_argument{"invalid additional RR"}; + if (a_rr->name != "." || a_rr->rr_type != RRType::OPT) + { + continue; + } + + if (m.additional_edns) + throw std::invalid_argument{"found invalid multiple additional OPT records"}; + + auto max_payload = static_cast(a_rr->rr_class); + m.additional_edns.emplace(std::min(max_payload, 1232)); + + std::optional> cookie; + for (auto optbuf = a_rr->rdata; !optbuf.empty();) + { + if (optbuf.size() < 4) + throw std::invalid_argument{"additional OPT data section too small"}; + auto opt_code = oxenc::load_big_to_host(optbuf.data()); + auto opt_len = oxenc::load_big_to_host(optbuf.data() + 2); + optbuf = optbuf.subspan(4); + if (opt_len > optbuf.size()) + throw std::invalid_argument{"additional OPT option value length too small"}; + auto value = optbuf.subspan(0, opt_len); + optbuf = optbuf.subspan(opt_len); + + if (opt_code == PRR_EDNS::OPT_COOKIE) + { + if (m.additional_edns->cookie) + throw std::invalid_argument{"Duplicate OPT client cookies"}; + + if (value.size() == 8) + { + // This is the client sending a new cookie, requesting a new server + // cookie (i.e. because it doesn't currently have one). + + m.additional_edns->cookie = + make_server_cookie(value.first<8>(), client_ip, server_cookie_secret); + } + else if (value.size() == 24) + { + // This is the client sending its cookie along with a previously + // obtained server cookie for that client cookie, so we are supposed + // to validate it. + auto ccookie = value.first<8>(); + auto scookie = value.last<16>(); + + std::chrono::sys_seconds ts{ + std::chrono::seconds{oxenc::load_big_to_host(&scookie[4])}}; + + auto expected = make_server_cookie(ccookie, client_ip, server_cookie_secret, ts); + bool bad_cookie = std::memcmp(value.data(), expected.data(), 24) != 0; + + auto now = std::chrono::floor(std::chrono::system_clock::now()); + + if (!bad_cookie && ts >= now - 30min && ts <= now + 5min) + // Cookie is good and the timestamp in it is close to now, so the + // cookie stays as-is. + std::memcpy(m.additional_edns->cookie.emplace().data(), value.data(), 24); + + else + { + // If the cookie timestamp is too far away then it is a badcookie + // failure. (We don't have to worry about client clock skew because + // supposedly *we* issued this with the timestamp in it). + if (bad_cookie || ts < now - 1h || ts > now + 5min) + { + // When this is set we'll send a proper bad cookie response + // immediately after parsing: + m.additional_edns->bad_cookie = true; + // Extended rcode is, um, a wee bit hacky: we put the high 8 + // bits of the 12-bit error code into the OPT TTL field, and + // then continue to use the 4-bit RCODE for the bottom 4 bits. + m.additional_edns->ttl = + std::chrono::seconds{(uint32_t{PRR_EDNS::EXT_RCODE_BADCOOKIE} >> 4) << 24}; + // (The other bytes are all 0 values) + } + + // else it's valid, just a little bit (but not too) old and they are + // due for a new cookie. + + // In either of the above cases, we give the client a new cookie + // to use, with an updated new timestamp + m.additional_edns->cookie = + make_server_cookie(ccookie, client_ip, server_cookie_secret, now); + } + } + // Else we have an unparseable/non-understood cookie, and so we are supposed + // to ignore the option and discard the cookie data. + } + } } } + catch (const std::exception& e) + { + log::debug(logcat, "failed to parse DNS message: {}", e.what()); + maybe.reset(); + } return maybe; } @@ -116,6 +265,8 @@ namespace srouter::dns void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } + // TODO FIXME: "RA" means we advertise that we support recursion, but we should only do that + // when we have an upstream DNS server available. (This TODO is also in server.cpp) static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; void Message::add_nodata_reply() @@ -155,29 +306,29 @@ namespace srouter::dns void Message::set_nx_reply() { + answers.clear(); + // authorities.clear(); + // additional.clear(); + if (questions.size()) { - answers.clear(); - // authorities.clear(); - // additional.clear(); - + hdr_fields |= flags_RCODENxDomain; // authorative response with recursion available hdr_fields |= reply_flags; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - hdr_fields |= flags_RCODENxDomain; } } void Message::set_serv_fail() { + answers.clear(); + if (questions.size()) { hdr_fields |= flags_RCODEServFail; // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; + hdr_fields |= reply_flags; + // A servfail is not an authoritative answer, so clear that bit: + hdr_fields &= ~flags_AA; } } diff --git a/src/dns/message.hpp b/src/dns/message.hpp index 0d45c1537..32bb8ee01 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -26,7 +26,7 @@ namespace srouter Message(Message&&) = default; - // Clones the message with question/flag, but with no answers + // Clones the message with question/flags/edns response data, but with no answers Message clone() const; nlohmann::json ToJSON() const; @@ -39,6 +39,11 @@ namespace srouter void set_nx_reply(); void set_serv_fail(); + // This clears any answers and sets the appropriate header flags for a BADCOOKIE + // response. Note that this is only valid when the message has `additional_edns` as + // part of this error code value is carried in that additional RR data. + void set_badcookie_flags(); + // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the // value persists for any added answers until this method is called again. @@ -65,7 +70,15 @@ namespace srouter std::vector encode() const; - static std::optional extract(std::span& buf); + // Parses a question Message from the given buf, removing the question from the prefix + // of buf. `server_cookie_secret` and `client_addr` contains information needed for DNS + // cookie handling; `server_cookie_secret` is something derived from the SR private key + // seed + startup time, while client_addr is the raw bytes of the IP address (4 or 16 + // bytes for IPv4/IPv6, respectively). + static std::optional extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_addr); std::string to_string() const; @@ -78,6 +91,11 @@ namespace srouter // Currently unused: // std::vector authorities; // std::vector additional; + + // Currently the only additional record we do anything with is the OPT section for + // enabling EDNS (most significantly for allowing large DNS packets) + std::optional additional_edns; + std::optional rr_name_override; private: diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index 6dacef373..b0c53a925 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -5,11 +5,42 @@ #include #include +#include #include - namespace srouter::dns { + std::optional ParsedRR::extract(std::span& buf) + { + auto name = extract_name(buf); + if (!name || buf.size() < 2 + 2 + 4 + 2 /* type + class + ttl + rdatalen */) + return std::nullopt; + auto typ = oxenc::load_big_to_host(buf.data()); + auto cls = oxenc::load_big_to_host(buf.data() + 2); + auto ttl = oxenc::load_big_to_host(buf.data() + 4); + auto len = oxenc::load_big_to_host(buf.data() + 8); + buf = buf.subspan(10); + + if (buf.size() < len) + return std::nullopt; + + auto rdata = buf.subspan(0, len); + buf = buf.subspan(len); + + return ParsedRR{ + .name = std::move(*name), + .rr_type = static_cast(typ), + .rr_class = static_cast(cls), + .ttl = std::chrono::seconds{ttl}, + .rdata = rdata}; + } + + static void check_buf_size(const std::span& buf, size_t needed) + { + if (buf.size() < needed) + throw std::out_of_range{"DNS response exceeds max size"}; + } + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { encode_name(buf, rr_name, prev_names, buf_offset); @@ -19,8 +50,7 @@ namespace srouter::dns // the length yet (especially for things like CNAME, where there might be name compression), // so we're going to stick a 0 in and then come back and fill it in after we write the // value. - if (buf.size() < 2) - throw std::out_of_range{"Buffer too small"}; + check_buf_size(buf, 2); auto size_buf = buf.subspan(0, 2); buf_offset += 2; buf = buf.subspan(2); @@ -48,8 +78,7 @@ namespace srouter::dns void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const { - if (rData.size() > buf.size()) - throw std::out_of_range{"Buffer too small"}; + check_buf_size(buf, rData.size()); std::memcpy(buf.data(), rData.data(), rData.size()); buf = buf.subspan(rData.size()); buf_offset += rData.size(); @@ -87,4 +116,18 @@ namespace srouter::dns encode_name(buf, target, prev_names, buf_offset); } + void PRR_EDNS::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + if (cookie) + { + uint16_t datalen = 2 + 2 + cookie->size(); // code + length + data + check_buf_size(buf, datalen); + oxenc::write_host_as_big(OPT_COOKIE, buf.data()); + oxenc::write_host_as_big(static_cast(cookie->size()), buf.data() + 2); + std::memcpy(buf.data() + 4, cookie->data(), cookie->size()); + buf = buf.subspan(datalen); + buf_offset += datalen; + } + } + } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index dd9343726..85148f171 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -23,8 +23,27 @@ namespace srouter::dns TXT = 16, AAAA = 28, SRV = 33, + + OPT = 41, + }; + + // Parsed RR data: this is intentionally very raw and is only for extracting the data, not + // interpreting it. Note that the rdata value points into the input buf: the ParsedRR data + // should not be held longer than the input buffer! + struct ParsedRR + { + std::string name; + RRType rr_type; // *Not* necessarily one of the values defined above + RRClass rr_class; // *Not* necessarily one of the values defined above + std::chrono::seconds ttl; + std::span rdata; + + // Attempts to parse an RR from the beginning of `buf`. `buf` will have the prefix removed + // containing the extracted record. Returns nullopt on extraction error. + static std::optional extract(std::span& buf); }; + // Abstract base class we use for building RR responses struct ResourceRecord { ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} @@ -119,4 +138,44 @@ namespace srouter::dns RRType rr_type() const override { return RRType::SRV; } void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; }; + + // Psuedo-RR for EDNS; a client sends this in the additional section if it supports EDNS, and + // the server sends it back (if provided) to confirm that the server also supports EDNS. + struct PRR_EDNS : ResourceRecord + { + static constexpr uint16_t OPT_COOKIE = 10; + static constexpr uint16_t EXT_RCODE_BADCOOKIE = 23; + + std::optional> cookie; + + // Will be true if the full cookie we were provided was invalid or expired, in which case we + // are supposed to immediately fail with an extended BADCOOKIE error code (which will be + // encoded if this object is encoded into the output with this bool set to true). + bool bad_cookie{false}; + + // Constructs an EDNS value. This is rather hacky, to try to mash it into the fairly + // inflexible older DNS protocol: + // - NAME is always empty (i.e. ".", the root domain) + // - 32-bit TTL is nothing to do with ttl, but actually 3 packed fields: + // - 8-bit "extended rcode" + // - 8-bit version (currently 0) + // - 16-bit flags of which there is one for DNSSEC and all others are reserved + // We currently always use 0 as we don't use extended rcode or dnssec. + // - CLASS isn't a class at all but rather contains the supported UDP payload size. We set + // it to the recommended 1232 size, but if a client gave us a smaller value we should + // reflect that instead. + // + // Beyond that, we support an optional DNS server cookie value (see RFC 7873 and 9018), + // which must be the 8-byte cookie sent by the client followed by a 16 byte server cookie. + PRR_EDNS(uint16_t max_payload, std::optional> cookie = std::nullopt) + : ResourceRecord{"", 0s}, cookie{std::move(cookie)} + { + rr_class = static_cast(max_payload); + } + + uint16_t max_payload() const { return static_cast(rr_class); } + constexpr RRType rr_type() const override { return RRType::OPT; } + void encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const override; + }; + } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp index 7f34b67ac..721ec9724 100644 --- a/src/dns/server.cpp +++ b/src/dns/server.cpp @@ -2,6 +2,7 @@ #include "constants/apple.hpp" #include "constants/platform.hpp" +#include "dns.hpp" #include "message.hpp" #include "nm_platform.hpp" #include "sd_platform.hpp" @@ -9,6 +10,7 @@ #include #include #include +#include #include #include @@ -522,7 +524,9 @@ namespace srouter::dns Server::Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif) : _loop{loop}, _conf{std::move(conf)}, _platform{create_platform()}, m_NetIfIndex{std::move(netif)} - {} + { + randombytes_buf(_cookie_secret.data(), _cookie_secret.size()); + } std::vector> Server::get_all_resolvers() const { @@ -656,14 +660,38 @@ namespace srouter::dns return false; } - auto maybe = Message::extract(payload); + std::span client_ip; + if (from.is_ipv4()) + client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; + else + client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; + + auto maybe = Message::extract_question(payload, _cookie_secret, client_ip); if (not maybe) { log::warning(logcat, "invalid dns message format from {} to dns listener on {}", from, to); return false; } - auto& msg = *maybe; + + if (msg.additional_edns && msg.additional_edns->bad_cookie) + { + // Client gave a bad cookie; reply with a request failure, but one containing the new + // cookie so that the client can retry. + + // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS + // value. + msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; + // TODO FIXME: we currently always set the RA flag but that really should only be set + // when we have an upstream DNS server. (This TODO is also in message.cpp) + msg.hdr_fields |= flags_QR | flags_RA; + // badcookie is not an authoritative answer: + msg.hdr_fields &= ~flags_AA; + + ptr->send_udp(from, to, msg.encode()); + return true; + } + // we don't provide a DoH resolver because it requires verified TLS // TLS needs X509/ASN.1-DER and opting into the Root CA Cabal // thankfully mozilla added a backdoor that allows ISPs to turn it off diff --git a/src/dns/server.hpp b/src/dns/server.hpp index 7b224a24b..4ba6e43a9 100644 --- a/src/dns/server.hpp +++ b/src/dns/server.hpp @@ -182,6 +182,12 @@ namespace srouter::dns private: const unsigned int m_NetIfIndex; + + // Secret value we use as a key in DNS server cookie hashing. We generate a random once on + // each startup as we currently have no need for this to be deterministic, and that + // introduces rotation whenever we restart. + std::array _cookie_secret; + // TODO FIXME: this ownership model is cursed. std::set, ComparePtr>> _owned_resolvers; std::set, CompareWeakPtr> _resolvers; diff --git a/src/rpc/rpc_server.cpp b/src/rpc/rpc_server.cpp index 9f19c073a..c70079969 100644 --- a/src/rpc/rpc_server.cpp +++ b/src/rpc/rpc_server.cpp @@ -28,6 +28,7 @@ namespace srouter::rpc log::info(logcat, "RPC Server received request for endpoint `{}`", req.name); } +#if 0 // Fake packet source that serializes repsonses back into dns class DummyPacketSource final : public dns::PacketSource { @@ -47,6 +48,7 @@ namespace srouter::rpc /// returns the sockaddr we are bound on if applicable std::optional bound_on() const override { return std::nullopt; } }; +#endif bool check_path(std::string path) { From 28aecaecf7c9cb29e022b2ec57a331c6d0817eb8 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Wed, 12 Nov 2025 13:44:48 -0400 Subject: [PATCH 4/8] CI: make sure libunwind-NN-dev installed in llvm builds --- .drone.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 3509e6ac0..c753769f0 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -291,7 +291,7 @@ local clang(version) = debian_pipeline( local full_llvm(version) = debian_pipeline( 'Debian sid/llvm-' + version, docker_base + 'debian-sid-clang', - deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], + deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libunwind-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], remove='g++'), oxen_repo=[], cmake_extra='-DCMAKE_C_COMPILER=clang-' + version + From 1f4515fc00b11bbd60f1134a74204c7062f567fc Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Wed, 12 Nov 2025 17:24:52 -0400 Subject: [PATCH 5/8] Remove dead code --- src/ev/udp.cpp | 105 ------------------------------------------------- src/ev/udp.hpp | 40 ------------------- 2 files changed, 145 deletions(-) delete mode 100644 src/ev/udp.cpp delete mode 100644 src/ev/udp.hpp diff --git a/src/ev/udp.cpp b/src/ev/udp.cpp deleted file mode 100644 index c48592e12..000000000 --- a/src/ev/udp.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "udp.hpp" - -namespace srouter -{ - static auto logcat = log::Cat("ev-udp"); - - inline constexpr size_t MAX_BATCH = -#if defined(OXEN_LIBQUIC_UDP_SENDMMSG) || defined(OXEN_LIBQUIC_UDP_GSO) - 24; -#else - 1; -#endif - - UDPHandle::UDPHandle(const std::shared_ptr& ev, const quic::Address& bind, net_pkt_hook cb) : _loop{ev} - { - socket = std::make_unique(ev->get_event_base(), bind, std::move(cb)); - _local = socket->address(); - } - - UDPHandle::~UDPHandle() { socket.reset(); } - - io_result UDPHandle::_send_impl(const quic::Path& path, std::byte* buf, size_t size, uint8_t ecn, size_t& n_pkts) - { - log::trace(logcat, "{} called", __PRETTY_FUNCTION__); - - auto* bufsize = &size; - - if (!socket) - { - log::warning(logcat, "Cannot send packets on closed socket ({})", path); - return io_result{EBADF}; - } - - assert(n_pkts >= 1 && n_pkts <= MAX_BATCH); - - log::trace(logcat, "Sending {} UDP packet(s) {}...", n_pkts, path); - - auto [ret, sent] = socket->send(path, buf, bufsize, ecn, n_pkts); - - if (ret.failure() && !ret.blocked()) - { - log::error(logcat, "Error sending packets {}: {}", path, ret.str_error()); - n_pkts = 0; // Drop any packets, as we had a serious error - return ret; - } - - if (sent < n_pkts) - { - if (sent == 0) // Didn't send *any* packets, i.e. we got entirely blocked - log::debug(logcat, "UDP sent none of {}", n_pkts); - - else - { - // We sent some but not all, so shift the unsent packets back to the beginning of buf/bufsize - log::debug(logcat, "UDP undersent {}/{}", sent, n_pkts); - size_t offset = std::accumulate(bufsize, bufsize + sent, size_t{0}); - size_t len = std::accumulate(bufsize + sent, bufsize + n_pkts, size_t{0}); - std::memmove(buf, buf + offset, len); - std::copy(bufsize + sent, bufsize + n_pkts, bufsize); - n_pkts -= sent; - } - - // We always return EAGAIN (so that .blocked() is true) if we failed to send all, even - // if that isn't strictly what we got back as the return value (sendmmsg gives back a - // non-error on *partial* success). - return io_result{EAGAIN}; - } - - n_pkts = 0; - - return ret; - } - - void UDPHandle::_send_or_queue( - const quic::Path& path, std::vector buf, uint8_t ecn, std::function callback) - { - log::trace(logcat, "{} called", __PRETTY_FUNCTION__); - - if (!socket) - { - log::warning(logcat, "Cannot sent to dead socket for path {}", path); - if (callback) - callback(io_result{EBADF}); - return; - } - - size_t n_pkts = 1; - // size_t bufsize = buf.size(); - auto res = _send_impl(path, buf.data(), buf.size(), ecn, n_pkts); - - if (res.blocked()) - { - socket->when_writeable([this, path, buf = std::move(buf), ecn, cb = std::move(callback)]() mutable { - _send_or_queue(path, std::move(buf), ecn, std::move(cb)); - }); - } - else if (callback) - callback({}); - } - - io_result UDPHandle::send(const quic::Address& dest, std::span data) - { - return _send_impl(quic::Path{_local, dest}, data.data(), data.size(), 0); - } -} // namespace srouter diff --git a/src/ev/udp.hpp b/src/ev/udp.hpp deleted file mode 100644 index 6179b9e11..000000000 --- a/src/ev/udp.hpp +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "net/ip_packet.hpp" -#include "util/logging.hpp" - -#include -#include -#include - -namespace srouter -{ - using UDPSocket = quic::UDPSocket; - - using io_result = quic::io_result; - - class UDPHandle - { - public: - UDPHandle() = delete; - explicit UDPHandle(const std::shared_ptr& ev, const quic::Address& bind, net_pkt_hook cb); - ~UDPHandle(); - - private: - std::shared_ptr _loop; - std::unique_ptr socket; - quic::Address _local; - - void _send_or_queue( - const quic::Path& path, - std::vector buf, - uint8_t ecn, - std::function callback = nullptr); - - public: - io_result send(const quic::Address& dest, std::span data); - - quic::Address bind() { return _local; } - }; - -} // namespace srouter From 39eccd88a789410090b85c2df0dfeea3984ebc04 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 14 Nov 2025 14:01:29 -0400 Subject: [PATCH 6/8] Internal DNS handling overhaul - Greatly simplify the DNS code into a dns::Handler + dns::Listener instead of 14 (approximately, it's hard to tell) intermixed classes doing various things in needlessly complicated ways. - Move all of the actual DNS record logic from tun.cpp (which was more than half of tun.cpp!) into dns/handler.cpp. - Remove DNSInterceptor (and l3-intercept config option), as it doesn't work reliably even in current stable Lokinet. The only platform that might actually need it is Android (eventually) but it can either be resurrected or (more likely) an Android-specific interceptor built. Other platforms have better alternatives than attempting IP packet interception. - Implement TCP DNS requests, so that we can successful return queries that might not fit in a UDP packet (even with EDNS). - Implement dns truncation bit, which is meant to tell an application that the response is too large and it needs to retry via TCP (hence the above addition): - Catch the exception and set the truncation flag when we overrun the reply buffer. Previously we were just not responding. - Make sure we can't return sliced RR records in a truncated reply by throwing away all the answer section if we run out of space. --- src/CMakeLists.txt | 3 +- src/config/config.cpp | 2 + src/dns/dns.hpp | 16 - src/dns/flags.hpp | 22 ++ src/dns/handler.cpp | 417 ++++++++++++++++++++++ src/dns/handler.hpp | 48 +++ src/dns/listener.cpp | 178 +++++++++ src/dns/listener.hpp | 51 +++ src/dns/message.cpp | 142 +++++--- src/dns/message.hpp | 46 ++- src/dns/question.cpp | 1 - src/dns/rr.cpp | 1 - src/dns/rr.hpp | 2 + src/dns/server.cpp | 733 -------------------------------------- src/dns/server.hpp | 199 ----------- src/handlers/tun.cpp | 563 +---------------------------- src/handlers/tun.hpp | 81 ++--- src/handlers/tun_base.hpp | 27 -- src/router/router.cpp | 74 +++- src/router/router.hpp | 27 +- src/rpc/rpc_server.cpp | 2 - src/session/session.cpp | 5 +- src/session/session.hpp | 1 - src/util/logging.hpp | 2 +- 24 files changed, 966 insertions(+), 1677 deletions(-) delete mode 100644 src/dns/dns.hpp create mode 100644 src/dns/flags.hpp create mode 100644 src/dns/handler.cpp create mode 100644 src/dns/handler.hpp create mode 100644 src/dns/listener.cpp create mode 100644 src/dns/listener.hpp delete mode 100644 src/dns/server.cpp delete mode 100644 src/dns/server.hpp delete mode 100644 src/handlers/tun_base.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 534c7c189..dbbe79343 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -156,11 +156,12 @@ if (SROUTER_FULL) # parse modify and reconstitute dns wire proto, dns queries and RR target_sources(session-router-dns PRIVATE dns/encode.cpp + dns/handler.cpp + dns/listener.cpp dns/message.cpp dns/platform.cpp dns/question.cpp dns/rr.cpp - dns/server.cpp ) # platform specific bits and bobs for setting dns diff --git a/src/config/config.cpp b/src/config/config.cpp index 17d2cbe46..03352b2d9 100644 --- a/src/config/config.cpp +++ b/src/config/config.cpp @@ -1035,6 +1035,8 @@ namespace srouter MultiValue, Comment{ "Address to bind to for handling DNS requests.", + "", + "Can be specified multiple times to bind to multiple addresses; can be set to empty to disable.", }, [this, parse_addr_for_dns](std::string arg) { if (not arg.empty()) diff --git a/src/dns/dns.hpp b/src/dns/dns.hpp deleted file mode 100644 index 0dae13dee..000000000 --- a/src/dns/dns.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace srouter::dns -{ - constexpr uint16_t flags_QR = 1 << 15; - constexpr uint16_t flags_AA = 1 << 10; - constexpr uint16_t flags_TC = 1 << 9; - constexpr uint16_t flags_RD = 1 << 8; - constexpr uint16_t flags_RA = 1 << 7; - constexpr uint16_t flags_RCODENxDomain = 3; - constexpr uint16_t flags_RCODEServFail = 2; - constexpr uint16_t flags_RCODENoError = 0; - -} // namespace srouter::dns diff --git a/src/dns/flags.hpp b/src/dns/flags.hpp new file mode 100644 index 000000000..de14b6663 --- /dev/null +++ b/src/dns/flags.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include + +namespace srouter::dns +{ + constexpr uint16_t flags_QR = 1 << 15; + constexpr uint16_t flags_AA = 1 << 10; + constexpr uint16_t flags_TC = 1 << 9; + constexpr uint16_t flags_RD = 1 << 8; + constexpr uint16_t flags_RA = 1 << 7; + + constexpr uint16_t flags_RCODE_mask = ~uint16_t{0b1111}; + + constexpr uint16_t RCODE_NxDomain = 3; + constexpr uint16_t RCODE_ServFail = 2; + constexpr uint16_t RCODE_FormErr = 1; + constexpr uint16_t RCODE_NoError = 0; + + inline constexpr uint16_t set_rcode(uint16_t flags, uint16_t rcode) { return (flags & flags_RCODE_mask) | rcode; } + +} // namespace srouter::dns diff --git a/src/dns/handler.cpp b/src/dns/handler.cpp new file mode 100644 index 000000000..24cf52b6a --- /dev/null +++ b/src/dns/handler.cpp @@ -0,0 +1,417 @@ + +#include "handler.hpp" + +#include "dns/rr.hpp" +#include "flags.hpp" +#include "message.hpp" +#include "nodedb.hpp" +#include "router/router.hpp" +#include "util/logging.hpp" + +namespace srouter::dns +{ +#ifdef SROUTER_EMBEDDED_ONLY + static_assert(false, "dns::RequestHandler requires a full lokinet build!"); +#endif + + namespace + { + auto logcat = log::Cat("dns"); + + const auto random_snode = "random.{}"_format(RELAY_TLD); + + const auto localhost_ctld = "localhost.{}"_format(CLIENT_TLD); + const auto dot_localhost_ctld = ".localhost.{}"_format(CLIENT_TLD); + bool is_localhost(std::string_view qname) + { + return qname == "localhost.loki" or qname.ends_with(".localhost.loki") or qname == localhost_ctld + or qname.ends_with(dot_localhost_ctld); + } + + std::optional parse_rid(std::string_view b32rid) + { + auto rid = std::make_optional(); + if (not rid->from_base32z(b32rid)) + rid.reset(); + return rid; + } + + std::optional is_snode(std::string_view name) + { + if (name.ends_with(RELAY_DOT_TLD)) + name.remove_suffix(RELAY_DOT_TLD.size()); + else + return std::nullopt; + return parse_rid(name); + } + + template + std::optional try_making(Args&&... args) + { + try + { + return std::make_optional(std::forward(args)...); + } + catch (...) + { + return std::nullopt; + } + } + + } // namespace + + RequestHandler::RequestHandler(Router& router) : _router{router} + { + if (!_router.tun_endpoint()) + throw std::logic_error{"dns::RequestHandler requires a TUN endpoint"}; + } + + void RequestHandler::operator()(std::span request, const quic::Address& from, ReplyCallback reply) + { + std::span client_ip; + if (from.is_ipv4()) + client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; + else + client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; + + auto maybe = Message::extract_question(request, _cookie_secret, client_ip); + if (not maybe) + { + log::warning(logcat, "Ignoring unparseable DNS request from {}", from); + return; + } + auto& msg = *maybe; + + if (msg.bad_extract) + { + reply(std::move(msg)); + return; + } + + if (msg.additional_edns && msg.additional_edns->bad_cookie) + { + // Client gave a bad cookie; reply with a request failure, but one containing the new + // cookie so that the client can retry. + + // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS + // value. + msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; + // TODO FIXME: we currently always set the RA flag but that really should only be set + // when we have an upstream DNS server. (This TODO is also in message.cpp) + msg.hdr_fields |= flags_QR | flags_RA; + // badcookie is not an authoritative answer: + msg.hdr_fields &= ~flags_AA; + + reply(std::move(msg)); + return; + } + + // If there is no question then there is no answer to worry about. This is a bit weird, but + // is sometimes used by clients to get an initial DNS cookie (via EDNS) without making an + // actual request. + if (!msg.question) + { + reply(std::move(msg)); + return; + } + + auto& q = *msg.question; + + if (handle_local(reply, msg, std::string{q.name()})) + return; + + // we don't provide a DoH resolver because it requires verified TLS TLS needs X509/ASN.1-DER + // and opting into the Root CA Cabal thankfully mozilla added a backdoor that allows ISPs to + // turn it off so we disable DoH for firefox using mozilla's ISP backdoor see: + // https://github.com/oxen-io/lokinet/issues/832 + + // is this firefox looking for their backdoor record? + if (q.name() == "use-application-dns.net") + // yea it is, let's turn off DoH because god is dead. + return reply(msg.nxdomain()); // press F to pay respects and send it back where it came from + + // Not for us, so forward to upstream handler + forward(std::move(msg), std::move(reply)); + } + + bool RequestHandler::handle_local(ReplyCallback& reply, Message& msg, std::string qname) + { + // hook any PTR (reverse DNS) lookups for our local ranges + if (handle_local_ptr(msg, reply)) + return true; + + auto& q = *msg.question; + + if (!(q.has_tld(CLIENT_TLD) || q.has_tld(RELAY_TLD) || q.has_tld("loki"sv))) + return false; + + std::string hostname, tld; + std::vector sub; + { + auto nameparts = split(qname, "."); + if (nameparts.size() < 2) + { + log::warning(logcat, "bad DNS request, no TLD or hostname: {}", qname); + reply(msg.formerr()); + return true; + } + hostname = nameparts[nameparts.size() - 2]; + tld = nameparts.back(); + sub.reserve(nameparts.size() - 2); + for (auto s : std::views::take(nameparts, static_cast(nameparts.size()) - 2)) + sub.emplace_back(s); + } + + bool localhost = is_localhost(qname); + + // localhost.sesh/localhost.loki is always a CNAME to our own pubkey, regardless of the + // question type. + if (localhost) + { + auto our_hostname = _router.id().to_string(); + auto our_tld = _router.is_service_node ? RELAY_TLD : CLIENT_TLD; + auto our_name = "{}.{}"_format(our_hostname, our_tld); + + if (tld == "loki") + { + // first: report a cname for the deprecated localhost.loki -> localhost.sesh + + msg.set_rr_name("localhost.loki"); + msg.add_cname_reply("localhost.{}"_format(our_tld)); + } + // report CNAME: localhost.sesh -> pubkey.sesh + msg.set_rr_name("localhost.{}"_format(our_tld)); + msg.add_cname_reply(our_name); + + if (q.qtype == dns::RRType::CNAME) + { + // If we were queried specifically for a cname, then we are done. + reply(std::move(msg)); + return true; + } + + // Otherwise we continue processing to be able to return supplemental records through + // the cname, so that if you request "foo.localhost.loki" we end up returning: + // localhost.loki CNAME for localhost.sesh + // localhost.sesh CNAME for PUBKEY.sesh + // foo.PUBKEY.sesh IN X VALUE (or whatever) + // And so for for the rest of the answer processing that we were given PUBKEY.sesh, + // rather than localhost.loki/.sesh: + qname = sub.empty() ? our_name : "{}.{}"_format(fmt::join(sub, "."), our_name); + msg.set_rr_name(qname); + + tld = our_tld; + hostname = std::move(our_hostname); + } + else if (qname == random_snode) + { + // Similar to the localhost case: we first return a CNAME of random.snode -> + // SOMEPK.snode, then continue processing as if that was what you asked for. + + if (auto* rc = _router.node_db().get_random_rc()) + { + hostname = rc->router_id().to_string(); + qname = "{}.{}"_format(hostname, RELAY_TLD); + msg.add_cname_reply(qname, 1s); + if (q.qtype == dns::RRType::CNAME) + { + reply(std::move(msg)); + return true; + } + + msg.set_rr_name(qname); + } + else + { + // We found no RC at all, which probably means our connection is dead. + reply(msg.nxdomain()); + return true; + } + } + else if (tld == "loki" && hostname.size() != oxenc::to_base32z_size(RouterID::SIZE)) + { + // ONS lookup: initiate a lookup and, when we get the response, set up a CNAME of + // NAME.loki -> PUBKEY.sesh, then recurse to process other parts of the request (such as + // mapping to a AAAA). + + // TODO: .sesh SNS resolution, once implemented + + // ONS lookup: + auto lookup = "{}.loki"_format(hostname); + _router.session_endpoint().resolve_sns( + lookup, + [this, + lookup, + sub = std::move(sub), + reply = std::move(reply), + msg_ptr = std::make_shared(std::move(msg)), + cname_only = q.qtype == dns::RRType::CNAME]( + std::optional maybe_netaddr, + bool /*assertive*/, + std::chrono::milliseconds ttl) mutable { + auto& msg = *msg_ptr; + msg.set_rr_name(lookup); + if (maybe_netaddr) + { + auto target = maybe_netaddr->to_string(); + msg.add_cname_reply(target, std::chrono::floor(ttl)); + if (cname_only) + return; + auto qname = sub.empty() ? target : "{}.{}"_format(fmt::join(sub, "."), target); + msg.set_rr_name(qname); + if (!handle_local(reply, msg, std::move(qname))) + { + log::warning( + logcat, "ONS '{}' subrequest did not properly handle sending a reply!", lookup); + return reply(msg.servfail()); + } + return; + } + // TODO FIXME: if `assertive` is true then we can provide a TTL for this failure + // (via an SOA authority record). (When not assertive we shouldn't do so, + // because not having an SOA TTL means a downstream recursive resolver shouldn't + // cache the negative response). + reply(msg.nxdomain()); + }); + return true; + } + + if (q.qtype == dns::RRType::TXT) + { + // TXT records can be used to query some basic info: + + // TXT on MYPUBKEY.sesh returns the basic version and netid: + if (localhost && sub.empty()) + msg.add_txt_reply("sessionrouter={} v={} netid={}"_format( + _router.is_service_node ? "relay" : "client", fmt::join(VERSION, "."), _router.netid())); + + // TXT on PUBKEY.snode gives back some basic RC info (if we have the RC) + else if (auto rid = is_snode(qname)) + { + if (auto* rc = _router.node_db().get_rc(*rid)) + { + msg.add_txt_reply("rc v={} i={} t={}"_format( + fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); + } + else + msg.nxdomain(); + } + else + msg.nxdomain(); + reply(std::move(msg)); + return true; + } + + // "Regular" A or AAAA lookups + if (bool aaaa = q.qtype == dns::RRType::AAAA; aaaa || q.qtype == dns::RRType::A) + { + // Attempt to parse a "pubkey.snode" or "pubkey.sesh": + if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) + { + // DNS lookup implies we want a session, so make one (NOP if we have one) + // This also means if we don't use that session the IP mapping will release when + // it expires, which it wouldn't otherwise without a tedious periodic check. + bool created_session = false; + try + { + created_session = (bool)_router.session_endpoint().initiate_remote_session(*maybe_netaddr, nullptr); + } + catch (const std::exception& e) + { + log::warning(logcat, "Failed to initiate remote session to {}: {}", *maybe_netaddr, e.what()); + } + if (created_session) + { + assert(_router.tun_endpoint()); + auto& tun = *_router.tun_endpoint(); + if (aaaa) + msg.add_reply(tun.map6(*maybe_netaddr)); + else if (!sub.empty() && sub.back() == "ipv4"sv) + { + // We don't map IPv4 addresses by default, but it is still possible to get + // one by requesting ipv4.somepubkey.sesh/snode (or a subdomain thereof). + if (auto v4_addr = tun.map4(*maybe_netaddr); v4_addr) + msg.add_reply(*v4_addr); + else + log::warning(logcat, "IPv4 mapping requested for {} failed.", *maybe_netaddr); + } + // else they requested A *not* using the magic ipv4 subdomain, so we only have + // AAAA to offer and thus we return a reply without an answer record (which is + // the proper DNS way to say "something exists at this address, but not with the + // type you requested requested", as opposed to this nx_reply below, which means + // "this record does not exist"). + } + else + msg.nxdomain(); + reply(std::move(msg)); + + return true; + } + + log::warning(logcat, "DNS query failure: '{}' is not a valid Session Router name or address", qname); + reply(msg.nxdomain()); + return true; + } + + if (q.qtype == dns::RRType::SRV && (tld == CLIENT_TLD || tld == "loki") && sub.size() == 2 + && sub[0].starts_with('_') && sub[1].starts_with('_')) + { + if (auto rid = parse_rid(hostname)) + { + _router.session_endpoint().lookup_client_intro( + *rid, + [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply)]( + const std::optional& cc) mutable { + if (cc) + { + for (const auto& srv : cc->SRVs()) + if (srv.service == sub[0] && srv.proto == sub[1]) + msg->add_reply(srv); + } + else + msg->nxdomain(); + + reply(std::move(*msg)); + }); + return true; + } + } + + // If we got through everything above without answering then they requested something weird + // (unhandled RR type, perhaps) and so let's just give an NXDOMAIN back: + reply(msg.nxdomain()); + return true; + } + + bool RequestHandler::handle_local_ptr(Message& msg, ReplyCallback& reply) + { + assert(msg.question); + if (msg.question->qtype != srouter::dns::RRType::PTR) + return false; + + auto ip = dns::decode_ptr(msg.question->qname); + if (!ip) + return false; + + auto [mapped, is_ours] = std::visit([this](const auto& ip) { return _router.reverse_lookup(ip); }, *ip); + if (!is_ours) + return false; + + if (mapped) + msg.add_ptr_reply(mapped->to_string()); + else + msg.nxdomain(); + + reply(std::move(msg)); + + return true; + } + + void RequestHandler::forward(Message&& m, ReplyCallback&& reply) + { + // TODO FIXME XXX TESTNET TOTHINK + log::critical(logcat, "FORWARDED REQUESTS NEEDS IMPLEMENTATION! RETURNING SERVFAIL"); + reply(m.servfail()); + } + +} // namespace srouter::dns diff --git a/src/dns/handler.hpp b/src/dns/handler.hpp new file mode 100644 index 000000000..85053fb1b --- /dev/null +++ b/src/dns/handler.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include "message.hpp" + +#include +#include + +namespace srouter +{ + class Router; + namespace quic = oxen::quic; +} // namespace srouter + +namespace srouter::dns +{ + class RequestHandler + { + public: + using ReplyCallback = std::function; + + explicit RequestHandler(Router& router); + + // Called when a request arrives to process the request; when the answer is ready, calls + // `reply()` with it. + void operator()(std::span request, const quic::Address& from, ReplyCallback reply); + + private: + // Secret value we use as a key in DNS server cookie hashing. We generate a random once on + // each startup as we currently have no need for this to be deterministic, and that + // regeneration also provides DNS cookie key rotation whenever we restart. + std::array _cookie_secret; + + Router& _router; + + // Called to check if the request is for a local name (i.e. .sesh, .snode, .loki, or a PTR + // record for one of the addresses in our tun). If so, this handles the request and returns + // true; otherwise returns false. + bool handle_local(ReplyCallback& reply, Message& msg, std::string qname); + + // Checks for PTR for a range we own, and if so, replies and returns true. Returns false if + // not a PTR for us (i.e. the caller should continue processing). + bool handle_local_ptr(Message& m, ReplyCallback& reply); + + // Answers the question recursively via our configured upstream DNS servers (if any) + void forward(Message&& m, ReplyCallback&& reply); + }; + +} // namespace srouter::dns diff --git a/src/dns/listener.cpp b/src/dns/listener.cpp new file mode 100644 index 000000000..16325fefa --- /dev/null +++ b/src/dns/listener.cpp @@ -0,0 +1,178 @@ +#include "listener.hpp" + +#include "router/router.hpp" +#include "util/logging.hpp" + +#include +#include + +namespace srouter::dns +{ + namespace + { + auto logcat = log::Cat("dns"); + + struct tcp_conn + { + Listener& listener; + bufferevent* bev; + quic::Address addr; + // This gets shared with the handler callback so that we can tell if the raw tcp_conn + // pointer is still valid: + std::shared_ptr alive = std::make_shared(true); + + tcp_conn(Listener& l, bufferevent* b, sockaddr* src, int socklen) + : listener{l}, bev{b}, addr{src, static_cast(socklen)} + {} + + void close() + { + bufferevent_free(bev); + bev = nullptr; + } + + ~tcp_conn() { *alive = false; } + }; + + } // namespace + + void Listener::evconnlistener_deleter::operator()(::evconnlistener* e) + { + if (e) + evconnlistener_free(e); + } + + Listener::Listener(Router& router, const quic::Address& bind) : _handler{router} { listen(router.loop, bind); } + + struct Listener::udp_socket_helper + { + std::unique_ptr sock; + }; + + // Defaulted, but here because the header doesn't have visibility into the predeclared unique_ptrs + Listener::~Listener() = default; + + void Listener::listen(quic::Loop& loop, const quic::Address& bind) + { + // call_get this so that we can be sure that the callbacks defined here can't be called + // before we are done setting it up: + loop.call_get([&] { + auto h = std::make_unique(); + + h->sock = std::make_unique( + loop.get_event_base(), bind, /*gso=*/false, [this, h = h.get()](quic::Packet&& pkt) { + if (pkt.path.remote == pkt.path.local) + { + log::warning(logcat, "DNS packet loop detected: ignoring UDP DNS request"); + return; + } + log::trace(logcat, "Incoming DNS UDP packet from {}", pkt.path.remote); + + // We don't need to worry about keep-alive here because we own the handler, and + // so if it's calling something then `this` must still be alive. + _handler(pkt.data(), pkt.path.remote, [path = pkt.path, udp = h->sock.get()](Message m) { + auto payload = m.encode(); + const size_t sz = payload.size(); + udp->send(path, payload.data(), &sz, 0, 1); + }); + }); + _udp.push_back(std::move(h)); + + _tcp.emplace_back(evconnlistener_new_bind( + loop.get_event_base(), + [](evconnlistener* listener, evutil_socket_t fd, sockaddr* src, int socklen, void* ctx) { + auto* bev = bufferevent_socket_new(evconnlistener_get_base(listener), fd, BEV_OPT_CLOSE_ON_FREE); + auto* c = new tcp_conn{*static_cast(ctx), bev, src, socklen}; + + log::trace(logcat, "Incoming DNS TCP connection from {}", c->addr); + + bufferevent_setcb( + bev, + [](bufferevent* bev, void* ctx) { + // read callback + auto* in = bufferevent_get_input(bev); + while (true) + { + log::trace(logcat, "Incoming DNS TCP data"); + uint16_t reqlen; + if (evbuffer_copyout(in, &reqlen, 2) < 2) + break; + oxenc::big_to_host_inplace(reqlen); + log::trace(logcat, "Incoming DNS TCP request of size {}", reqlen); + size_t pending = evbuffer_get_length(in) - 2; + if (pending < reqlen) + { + // We don't have enough of the request yet, so leave the buffer + // as-is: libevent won't call us again until more data arrives, + // and will just leave the current buffer data in place. + log::trace( + logcat, + "Not enough TCP data ({}) for request body ({}); delaying processing until we " + "get more", + pending, + reqlen); + break; + } + std::vector req; + req.resize(reqlen); + evbuffer_drain(in, 2); + evbuffer_remove(in, req.data(), reqlen); + log::trace(logcat, "Read {}-byte TCP DNS request", req.size()); + + auto* c = static_cast(ctx); + c->listener._handler(req, c->addr, [c, alive = c->alive](Message m) { + if (!*alive) + return; + auto* out = bufferevent_get_output(c->bev); + auto payload = m.encode(); + // The only difference between UDP DNS and TCP DNS encoding is that + // UDP is per-packet, but TCP is a stream of messages where each + // message is prefixed with the length of the message: + uint16_t size = oxenc::host_to_big(static_cast(payload.size())); + if (evbuffer_add(out, &size, 2) == -1 + || evbuffer_add(out, payload.data(), payload.size()) == -1) + { + log::warning(logcat, "Failed to write response to TCP connection; closing"); + bufferevent_free(c->bev); + delete c; + } + }); + } + }, + nullptr, + [](bufferevent* bev, short events, void* ctx) { + auto* c = static_cast(ctx); + // event callback + if (events & BEV_EVENT_EOF) + log::debug(logcat, "UDP TCP connection from {} closed by peer", c->addr); + if (events & BEV_EVENT_ERROR) + log::debug( + logcat, + "UDP TCP connection from {} closed by error: {}", + c->addr, + evutil_socket_error_to_string(EVUTIL_SOCKET_ERROR())); + if (events & BEV_EVENT_TIMEOUT) + // Is this even possible on a listening socket? + log::debug(logcat, "UDP TCP connection from {} timed out", c->addr); + + if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR | BEV_EVENT_TIMEOUT)) + { + bufferevent_free(bev); + delete c; + } + }, + c); + + bufferevent_enable(bev, EV_READ | EV_WRITE); + }, + this, + LEV_OPT_CLOSE_ON_FREE | LEV_OPT_REUSEABLE, + -1, + bind, + static_cast(bind.socklen()))); + + log::debug(logcat, "session-router DNS listening on {}", bind); + }); + } + +} // namespace srouter::dns diff --git a/src/dns/listener.hpp b/src/dns/listener.hpp new file mode 100644 index 000000000..8938a36b9 --- /dev/null +++ b/src/dns/listener.hpp @@ -0,0 +1,51 @@ +#pragma once + +#include "dns/handler.hpp" + +#include +#include + +#include +#include + +struct evconnlistener; + +namespace srouter +{ + class Router; +} +namespace srouter::dns +{ + namespace quic = oxen::quic; + + /// UDP+TCP listener for receiving and sending DNS requests. This generally works with a + /// dns::RequestHandler to actually generate the replies for a request, which then come back to + /// this class to actually send the response to the network. + class Listener + { + struct evconnlistener_deleter + { + void operator()(::evconnlistener* e); + }; + + struct udp_socket_helper; + + std::list> _udp; + std::list> _tcp; + + // The object that handles processing of the actual request once we have extracted it from a + // UDP packet or TCP stream: + RequestHandler _handler; + + public: + // Creates a TCP+UDP DNS listener that listens on `bind` for DNS requests. + Listener(Router& router, const quic::Address& bind); + + // Adds another TCP+UDP listener on `bind`. This is called implicitly during construction, + // but can also be called if there is a need to listen on multiple addresses. + void listen(quic::Loop& loop, const quic::Address& bind); + + ~Listener(); + }; + +} // namespace srouter::dns diff --git a/src/dns/message.cpp b/src/dns/message.cpp index 8d4c12075..fb00c4c22 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -1,43 +1,42 @@ #include "message.hpp" -#include "dns.hpp" #include "encode.hpp" -#include "net/ip_packet.hpp" +#include "flags.hpp" #include "srv_data.hpp" #include "util/logging.hpp" -#include #include #include #include #include -#include +#include #include namespace srouter::dns { static auto logcat = log::Cat("dns"); - Message::Message(const Question& question) : hdr_id{0}, hdr_fields{} { questions.push_back(question); } + Message::Message(Question question) : hdr_id{0}, hdr_fields{}, question{std::move(question)} {} Message Message::clone() const { Message c; c.hdr_id = hdr_id; c.hdr_fields = hdr_fields; - c.questions = questions; + c.question = question; c.additional_edns = additional_edns; // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) return c; } - std::vector Message::encode() const + std::vector Message::encode(bool max_size) const { std::vector tmp; - // If the client signalled EDNS support then we can use a larger payload, otherwise DNS is - // limited to 512 bytes. - tmp.resize(additional_edns ? additional_edns->max_payload() : 512); + tmp.resize( + max_size ? std::numeric_limits::max() + : additional_edns ? additional_edns->max_payload() + : 512); prev_names_t prev_names; std::span buf{tmp}; @@ -47,22 +46,59 @@ namespace srouter::dns buf, hdr_id, hdr_fields, - static_cast(questions.size()), + question ? uint16_t{1} : uint16_t{0}, static_cast(answers.size()), static_cast(0 /*authorities.size()*/), static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); - for (const auto& question : questions) - question.encode(buf, prev_names, buf_offset); + if (question) + question->encode(buf, prev_names, buf_offset); - for (auto& a : answers) - a->encode(buf, prev_names, buf_offset); + // If we run out of space and have to truncate then we are still supposed to include the + // EDNS part of the additional response, but other answers don't have to be: so if we hit + // such a failure, we're back up to this point (throwing away all the answers) so that we + // can include the EDNS response info. + auto initial_len = buf_offset; - if (additional_edns) - additional_edns->encode(buf, prev_names, buf_offset); + try + { + for (auto& a : answers) + a->encode(buf, prev_names, buf_offset); + + if (additional_edns) + additional_edns->encode(buf, prev_names, buf_offset); + } + catch (const std::out_of_range&) + { + log::debug(logcat, "Response too large! Setting truncation bit"); + + oxenc::write_host_as_big(hdr_fields | flags_TC, tmp.data() + 2); + + // Reset our buffer position back to just after the answers were added. We do this even + // if we aren't going to add EDNS stuff below, because we are not supposed to include + // partial RR entries in a truncated reply. + buf = std::span{tmp.data() + initial_len, tmp.size() - initial_len}; + buf_offset = initial_len; + + if (additional_edns) + { + try + { + additional_edns->encode(buf, prev_names, buf_offset); + } + catch (const std::out_of_range&) + { + // If this failed to then we don't have enough space for the EDNS so we'll just have to omit it + log::debug(logcat, "Unable to fit EDNS additional into DNS response!"); + buf = std::span{tmp.data() + initial_len, tmp.size() - initial_len}; + buf_offset = initial_len; + } + } + } // Trim the excess: tmp.resize(tmp.size() - buf.size()); + tmp.shrink_to_fit(); return tmp; } @@ -116,15 +152,20 @@ namespace srouter::dns { if (client_ip.size() != 4 && client_ip.size() != 16) throw std::logic_error{"Invalid client IP for Message::extract_question"}; - auto maybe = std::make_optional(); - auto& m = *maybe; + auto result = std::make_optional(); + auto& m = *result; uint16_t qd_count, an_count, ns_count, ar_count; if (!extract_ints(buf, m.hdr_id, m.hdr_fields, qd_count, an_count, ns_count, ar_count)) { - maybe.reset(); - return maybe; + result.reset(); + return result; + } + if (qd_count > 1) + { + log::warning(logcat, "Ignoring archaic DNS request with {} > 1 questions", qd_count); + m.bad_extract = true; + return result; } - m.questions.resize(qd_count); // Ignore these: // m.answers.resize(an_count); // m.authorities.resize(ns_count); @@ -132,9 +173,12 @@ namespace srouter::dns try { - for (auto& q : m.questions) + if (qd_count) + { + auto& q = m.question.emplace(); if (!q.extract(buf)) throw std::invalid_argument{"invalid question"}; + } // Skip any answers or authority records: for (uint16_t i = 0; i < an_count; i++) @@ -245,21 +289,9 @@ namespace srouter::dns catch (const std::exception& e) { log::debug(logcat, "failed to parse DNS message: {}", e.what()); - maybe.reset(); + m.bad_extract = true; } - return maybe; - } - - nlohmann::json Message::ToJSON() const - { - auto result = nlohmann::json{{"id", hdr_id}, {"fields", hdr_fields}}; - auto& ques = (result["questions"] = nlohmann::json::array()); - auto& ans = (result["answers"] = nlohmann::json::array()); - for (const auto& q : questions) - ques.push_back(q.ToJSON()); - for (const auto& a : answers) - ans.push_back(a->ToJSON()); return result; } @@ -271,14 +303,14 @@ namespace srouter::dns void Message::add_nodata_reply() { - if (not questions.empty()) + if (question) hdr_fields |= reply_flags; } template RR, typename... Args> void make_reply(Message& m, std::chrono::seconds ttl, Args&&... args) { - if (m.questions.empty()) + if (!m.question) return; m.hdr_fields |= reply_flags; @@ -304,32 +336,32 @@ namespace srouter::dns void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) { make_reply(*this, ttl, txt); } - void Message::set_nx_reply() + Message&& Message::apply_rcode(uint16_t rcode, bool authoritative) { - answers.clear(); - // authorities.clear(); - // additional.clear(); - - if (questions.size()) + hdr_fields = set_rcode(hdr_fields, rcode); + if (question) { - hdr_fields |= flags_RCODENxDomain; - // authorative response with recursion available hdr_fields |= reply_flags; + if (authoritative) + hdr_fields |= flags_AA; + else + hdr_fields &= ~flags_AA; } + return std::move(*this); } - void Message::set_serv_fail() + Message&& Message::servfail() { answers.clear(); + return apply_rcode(RCODE_ServFail); + } - if (questions.size()) - { - hdr_fields |= flags_RCODEServFail; - // authorative response with recursion available - hdr_fields |= reply_flags; - // A servfail is not an authoritative answer, so clear that bit: - hdr_fields &= ~flags_AA; - } + Message&& Message::formerr() + { + answers.clear(); + return apply_rcode(RCODE_FormErr); } + Message&& Message::nxdomain(bool authoritative) { return apply_rcode(RCODE_NxDomain, authoritative); } + } // namespace srouter::dns diff --git a/src/dns/message.hpp b/src/dns/message.hpp index 32bb8ee01..99fb1d86c 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -4,8 +4,6 @@ #include "question.hpp" #include "rr.hpp" -#include - #include namespace srouter @@ -19,9 +17,9 @@ namespace srouter struct Message { Message() = default; - explicit Message(const Question& question); + explicit Message(Question question); - // Non-copyable; see clone() if you want a copy with just the questions. + // Non-copyable; see clone() if you want a copy with just the question. Message(const Message&) = delete; Message(Message&&) = default; @@ -29,15 +27,22 @@ namespace srouter // Clones the message with question/flags/edns response data, but with no answers Message clone() const; - nlohmann::json ToJSON() const; - static constexpr auto DEFAULT_ANSWER_TTL = 10s; - // These two clear any answers that may have been added and then set the appropriate - // flags for a NXDomain (i.e. authoritative reply that the requested thing does not - // exist) or a ServFail (i.e. we don't know how to answer, maybe try someone else). - void set_nx_reply(); - void set_serv_fail(); + // These two methods mutates the message into a SERVFAIL or FORMERR response, clearing + // all answers. These return an value reference to the object itself to allow the call + // to operator like an implicit `std::move()` call as this is typically a final + // operation; in particular this means: `f(msg.nxdomain());` is equivalent to + // `msg.nxdomain(); f(std::move(msg));`. + Message&& servfail(); + Message&& formerr(); + + // Mutate message into a NXDOMAIN but without clearing existing answers. Returns an + // rvalue reference to the current object to allow the result to be easily moved away. + // + // The message with include the authoritative flag (AA) if the argument is omitted (or + // true), and omit it if false. + Message&& nxdomain(bool authoritative = true); // This clears any answers and sets the appropriate header flags for a BADCOOKIE // response. Note that this is only valid when the message has `additional_edns` as @@ -50,7 +55,7 @@ namespace srouter void set_rr_name(std::optional name); std::string_view get_rr_name() const { - return rr_name_override ? *rr_name_override : questions.size() ? questions.front().qname : ""sv; + return rr_name_override ? *rr_name_override : question ? question->qname : ""sv; } void add_nodata_reply(); @@ -68,24 +73,35 @@ namespace srouter void add_ptr_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); - std::vector encode() const; + // Encodes a response. If max_size is true then we allow up to 65535 bytes for the + // response, otherwise we allow either the EDNS max payload (up to 1232), or 512 + // (without EDNS in the query). + std::vector encode(bool max_size = false) const; // Parses a question Message from the given buf, removing the question from the prefix // of buf. `server_cookie_secret` and `client_addr` contains information needed for DNS // cookie handling; `server_cookie_secret` is something derived from the SR private key // seed + startup time, while client_addr is the raw bytes of the IP address (4 or 16 // bytes for IPv4/IPv6, respectively). + // + // Returns nullopt if the request cannot be parsed at all; returns a Message with + // `bad_extract` set to true if it was parseable but not valid and should be immediately + // replied to with an error (which will already be set up in the returned Message + // object). static std::optional extract_question( std::span& buf, std::span server_cookie_secret, std::span client_addr); + // See extract_question, above. + bool bad_extract{false}; + std::string to_string() const; uint16_t hdr_id; uint16_t hdr_fields; - std::vector questions; + std::optional question; std::vector> answers; // Currently unused: @@ -100,6 +116,8 @@ namespace srouter private: void add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl); + + Message&& apply_rcode(uint16_t rcode, bool authoritative = false); }; } // namespace dns diff --git a/src/dns/question.cpp b/src/dns/question.cpp index a72abef17..60dee9e57 100644 --- a/src/dns/question.cpp +++ b/src/dns/question.cpp @@ -1,7 +1,6 @@ #include "question.hpp" #include "address/address.hpp" -#include "dns.hpp" #include "encode.hpp" #include "util/logging.hpp" #include "util/logging/buffer.hpp" diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index b0c53a925..9fee05b8b 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -1,6 +1,5 @@ #include "rr.hpp" -#include "dns.hpp" #include "encode.hpp" #include diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 85148f171..cd94b83c2 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -48,6 +48,8 @@ namespace srouter::dns { ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} + virtual ~ResourceRecord() = default; + // Writes this RR to the beginning of buf, eliminating the written section from buf. Throws if buf is exceeded. // // This takes care of the basic stuff (name, type, class, ttl), then calls the virtual diff --git a/src/dns/server.cpp b/src/dns/server.cpp deleted file mode 100644 index 721ec9724..000000000 --- a/src/dns/server.cpp +++ /dev/null @@ -1,733 +0,0 @@ -#include "server.hpp" - -#include "constants/apple.hpp" -#include "constants/platform.hpp" -#include "dns.hpp" -#include "message.hpp" -#include "nm_platform.hpp" -#include "sd_platform.hpp" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace srouter::dns -{ - static auto logcat = log::Cat("dns"); - - void QueryJob_Base::cancel() - { - Message reply = _query.clone(); - reply.set_serv_fail(); - send_reply(reply.encode()); - } - - /// sucks up udp packets from a bound socket and feeds it to a server - class UDPReader : public PacketSource, public std::enable_shared_from_this - { - Server& _dns; - std::unique_ptr _udp; - quic::Address _local_addr; - - public: - explicit UDPReader(Server& dns, quic::Loop& loop, quic::Address bind) : _dns{dns} - { - _udp = std::make_unique( - loop.get_event_base(), bind, /*gso=*/false, [this](quic::Packet&& pkt) { - auto& src = pkt.path.remote; // "remote" address is packet source, we ("local") are destination - if (src == _local_addr) - { - log::debug(logcat, "DNS packet received, not handling because we're the packet source", src); - return; - } - - if (not _dns.maybe_handle_payload(shared_from_this(), _local_addr, src, pkt.data())) - log::warning(logcat, "did not handle dns packet from {} to {}", src, _local_addr); - else - log::trace(logcat, "Handled DNS packet from {} to {}", src, _local_addr); - }); - - if (auto maybe_addr = bound_on()) - { - _local_addr = *maybe_addr; - log::debug(logcat, "session-router DNS server bound on {}", _local_addr); - } - else - throw std::runtime_error{"cannot find which address our dns socket is bound on"}; - } - - std::optional bound_on() const override { return _udp->address(); } - - bool would_loop(const quic::Address& to, const quic::Address& /*from*/) const override - { - return to != _local_addr; - } - - void send_udp(const quic::Address& to, const quic::Address&, std::span data) const override - { - const size_t bufsize = data.size(); - size_t n_pkts = 1; - auto [ior, sent] = _udp->send(quic::Path{_local_addr, to}, data.data(), &bufsize, 0, n_pkts); - - log::trace( - logcat, - "dns server {} UDP packet to {} (ec={})", - ior.success() ? "sent" : "failed to send", - to, - ior.error_code); - } - }; - - namespace libunbound - { - class Resolver; - - class Query : public QueryJob_Base, public std::enable_shared_from_this - { - std::shared_ptr src; - quic::Address resolverAddr; - quic::Address askerAddr; - - public: - explicit Query( - std::weak_ptr parent_, - Message query, - std::shared_ptr pktsrc, - quic::Address toaddr, - quic::Address fromaddr) - : QueryJob_Base{std::move(query)}, - src{std::move(pktsrc)}, - resolverAddr{std::move(toaddr)}, - askerAddr{std::move(fromaddr)}, - parent{parent_} - {} - std::weak_ptr parent; - int id{}; - - void send_reply(std::vector buf) override; - }; - - /// Resolver_Base that uses libunbound - class Resolver final : public Resolver_Base, public std::enable_shared_from_this - { - ub_ctx* m_ctx = nullptr; - quic::Loop& _loop; -#ifdef _WIN32 - // windows is dumb so we do ub mainloop in a thread - std::thread runner; - std::atomic running; -#else - // std::shared_ptr _poller; -#endif - - std::optional _local_addr; - std::unordered_set> _pending; - - struct ub_result_deleter - { - void operator()(ub_result* ptr) { ::ub_resolve_free(ptr); } - }; - - const net::Platform* net_ptr() const { return srouter::net::Platform::Default_ptr(); } - - static void callback(void* data, int err, ub_result* _result) - { - log::debug(logcat, "got dns response from libunbound"); - // take ownership of ub_result - std::unique_ptr result{_result}; - // borrow query - auto* query = static_cast(data); - if (err) - { - // some kind of error from upstream - log::warning(logcat, "Upstream DNS failure: {}", ub_strerror(err)); - query->cancel(); - return; - } - - log::trace(logcat, "queueing dns response from libunbound to userland"); - - auto* ans = reinterpret_cast(result->answer_packet); - std::vector payload{ans, ans + result->answer_len}; - // Replace the `id` value in the unbound response (which is the first 2 bytes of the - // message) with the one we were queried with: - oxenc::write_host_as_big(query->underlying().hdr_id, payload.data()); - - // send reply - query->send_reply(std::move(payload)); - } - - void add_upstream_resolver(const quic::Address& dns) - { - auto str = "{}@{}"_format(dns.host(), dns.port()); - - if (auto err = ub_ctx_set_fwd(m_ctx, str.c_str())) - { - throw std::runtime_error{fmt::format("cannot use {} as upstream dns: {}", str, ub_strerror(err))}; - } - } - - bool configure_apple_trampoline(const quic::Address& dns) - { - // On Apple, when we turn on exit mode, we tear down and then reestablish the - // unbound resolver: in exit mode, we set use upstream to a localhost trampoline - // that redirects packets through the tunnel. In non-exit mode, we directly use the - // upstream, so we look here for a reconfiguration to use the trampoline port to - // check which state we're in. - // - // We have to do all this crap because we can't directly connect to upstream from - // here: within the network extension, macOS ignores the tunnel we are managing and - // so, if we didn't do this, all our DNS queries would leak out around the tunnel. - // Instead we have to bounce things through the objective C trampoline code (which - // is what actually handles the upstream querying) so that it can call into Apple's - // special snowflake API to set up a socket that has the magic Apple snowflake sauce - // added on top so that it actually routes through the tunnel instead of around it. - // - // But the trampoline *always* tries to send the packet through the tunnel, and that - // will only work in exit mode. - // - // All of this macos behaviour is all carefully and explicitly documented by Apple - // with plenty of examples and other exposition, of course, just like all of their - // wonderful new APIs to reinvent standard unix interfaces with half-baked - // replacements. - - if constexpr (platform::is_apple) - { - if (dns.host() == "127.0.0.1" and dns.port() == apple::dns_trampoline_port) - { - // macOS is stupid: the default (0.0.0.0) fails with "send failed: Can't - // assign requested address" when unbound tries to connect to the localhost - // address using a source address of 0.0.0.0. Yay apple. - set_opt("outgoing-interface:", "127.0.0.1"); - - // The trampoline expects just a single source port (and sends everything - // back to it). - set_opt("outgoing-range:", "1"); - set_opt("outgoing-port-avoid:", "0-65535"); - set_opt("outgoing-port-permit:", "{}"_format(apple::dns_trampoline_source_port)); - return true; - } - } - return false; - } - - void configure_upstream(const srouter::DnsConfig& conf) - { - bool is_apple_tramp = false; - - // set up forward dns - for (const auto& dns : conf._upstream_dns) - { - add_upstream_resolver(dns); - is_apple_tramp = is_apple_tramp or configure_apple_trampoline(dns); - } - - if (auto maybe_addr = conf._query_bind; maybe_addr and not is_apple_tramp) - { - quic::Address addr{*maybe_addr}; - auto host = addr.host(); - - if (addr.port() == 0) - { - // unbound manages their own sockets because of COURSE it does. so we find - // an open port on our system and use it so we KNOW what it is before giving - // it to unbound to explicitly bind to JUST that port. - - auto fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); -#ifdef _WIN32 - if (fd == INVALID_SOCKET) -#else - if (fd == -1) -#endif - { - throw std::invalid_argument{ - fmt::format("Failed to create UDP socket for unbound: {}", strerror(errno))}; - } - -#ifdef _WIN32 -#define CLOSE closesocket -#else -#define CLOSE close -#endif - if (0 != bind(fd, static_cast(addr), addr.socklen())) - { - CLOSE(fd); - throw std::invalid_argument{ - fmt::format("Failed to bind UDP socket for unbound: {}", strerror(errno))}; - } - struct sockaddr_storage sas; - auto* sa = reinterpret_cast(&sas); - socklen_t sa_len = sizeof(sas); - int rc = getsockname(fd, sa, &sa_len); - CLOSE(fd); -#undef CLOSE - if (rc != 0) - { - throw std::invalid_argument{ - fmt::format("Failed to query UDP port for unbound: {}", strerror(errno))}; - } - - addr = quic::Address{sa, sizeof(sockaddr)}; - } - _local_addr = addr; - - log::debug(logcat, "sending dns queries from {}", addr.to_string()); - // set up query bind port if needed - set_opt("outgoing-interface:", host); - set_opt("outgoing-range:", "1"); - set_opt("outgoing-port-avoid:", "0-65535"); - set_opt("outgoing-port-permit:", "{}"_format(addr.port())); - } - } - - void set_opt(const std::string& key, const std::string& val) - { - ub_ctx_set_option(m_ctx, key.c_str(), val.c_str()); - } - - // Copy of the DNS config (a copy because on some platforms, like Apple, we change the - // applied upstream DNS settings when turning on/off exit mode). - srouter::DnsConfig m_conf; - - public: - explicit Resolver(quic::Loop& loop, srouter::DnsConfig conf) : _loop{loop}, m_conf{std::move(conf)} - { - up(m_conf); - } - - ~Resolver() override { down(); } - - std::string_view resolver_name() const override { return "unbound"; } - - std::optional get_local_addr() const override { return _local_addr; } - - void remove_pending(const std::shared_ptr& query) { _pending.erase(query); } - - void up(const srouter::DnsConfig& conf) - { - if (m_ctx) - throw std::logic_error{"Internal error: attempt to Up() dns server multiple times"}; - - m_ctx = ::ub_ctx_create(); - // set libunbound settings - - set_opt("do-tcp:", "no"); - - for (const auto& [k, v] : conf.extra_opts) - set_opt(k, v); - - // add host files - for (const auto& file : conf.hostfiles) - { - const auto str = file.string(); - if (auto ret = ub_ctx_hosts(m_ctx, str.c_str())) - { - throw std::runtime_error{fmt::format("Failed to add host file {}: {}", file, ub_strerror(ret))}; - } - } - - configure_upstream(conf); - - // set async - ub_ctx_async(m_ctx, 1); - // setup mainloop -#ifdef _WIN32 - running = true; - runner = std::thread{[this]() { - while (running) - { - // poll and process callbacks it this thread - if (ub_poll(m_ctx)) - { - ub_process(m_ctx); - } - else // nothing to do, sleep. - std::this_thread::sleep_for(10ms); - } - }}; -#else - // TODO: replace uvw shim shit with new libev stuff - // if (auto loop_ptr = loop->MaybeGetUVWLoop()) - // { - // _poller = loop_ptr->resource(ub_fd(m_ctx)); - // _poller->on([this](auto&, auto&) { ub_process(m_ctx); }); - // _poller->start(uvw::PollHandle::Event::READABLE); - // return; - // } -#endif - } - - void down() override - { -#ifdef _WIN32 - if (running.exchange(false)) - { - log::debug(logcat, "shutting down win32 dns thread"); - runner.join(); - } -#else - // if (_poller) - // _poller->close(); -#endif - if (m_ctx) - { - ::ub_ctx_delete(m_ctx); - m_ctx = nullptr; - - // destroy any outstanding queries that unbound hasn't fired yet - if (not _pending.empty()) - { - log::debug(logcat, "cancelling {} pending queries", _pending.size()); - // We must copy because Cancel does a loop call to remove itself, but since - // we are already in the main loop it happens immediately, which would - // invalidate our iterator if we were looping through m_Pending at the time. - auto copy = _pending; - for (const auto& query : copy) - query->cancel(); - } - } - } - - int rank() const override { return 10; } - - void reset_resolver(std::optional> replace_upstream) override - { - down(); - if (replace_upstream) - m_conf._upstream_dns = std::move(*replace_upstream); - up(m_conf); - } - - template - void call(Callable&& f) - { - _loop.call(std::forward(f)); - } - - bool maybe_hook_dns( - const std::shared_ptr& source, - const Message& query, - const quic::Address& to, - const quic::Address& from) override - { - log::trace(logcat, "maybe_hook_dns called"); - auto tmp = std::make_shared(weak_from_this(), query.clone(), source, to, from); - // no questions, send fail - if (query.questions.empty()) - { - log::debug(logcat, "dns from {} to {} has empty query questions, sending failure reply", from, to); - tmp->cancel(); - return true; - } - - for (const auto& q : query.questions) - { - // dont process .sesh/.loki/.snode - if (q.has_tld(CLIENT_TLD) or q.has_tld(RELAY_TLD) or q.has_tld("loki")) - { - log::warning( - logcat, - "dns from {} to {} is for .{}/{}/loki but got to the unbound " - "resolver; sending failure reply", - from, - to, - CLIENT_TLD, - RELAY_TLD); - tmp->cancel(); - return true; - } - } - if (not m_ctx) - { - // we are down - log::debug( - logcat, - "dns from {} to {} got to the unbound resolver, but the resolver isn't set " - "up, sending failure reply", - from, - to); - tmp->cancel(); - return true; - } - -#ifdef _WIN32 - if (not running) - { - // we are stopping the win32 thread - log::debug( - logcat, - "dns from {} to {} got to the unbound resolver, but the resolver isn't " - "running, sending failure reply", - from, - to); - tmp->Cancel(); - return true; - } -#endif - const auto& q = query.questions[0]; - if (auto err = ub_resolve_async( - m_ctx, - std::string{q.name()}.c_str(), - static_cast(q.qtype), - static_cast(q.qclass), - tmp.get(), - &Resolver::callback, - nullptr)) - { - log::warning(logcat, "failed to send upstream query with libunbound: {}", ub_strerror(err)); - tmp->cancel(); - } - else - { - log::trace(logcat, "dns from {} to {} processing via libunbound", from, to); - _pending.insert(std::move(tmp)); - } - - return true; - } - }; - - void Query::send_reply(std::vector data) - { - log::trace(logcat, "Query::send_reply called"); - if (_done.test_and_set()) - return; - - auto parent_ptr = parent.lock(); - - if (parent_ptr) - { - parent_ptr->call( - [self = shared_from_this(), parent_ptr = std::move(parent_ptr), data = std::move(data)] { - log::trace( - logcat, - "forwarding dns response from libunbound to userland (resolverAddr: {}, " - "askerAddr: {})", - self->resolverAddr, - self->askerAddr); - self->src->send_udp(self->askerAddr, self->resolverAddr, data); - // remove query - parent_ptr->remove_pending(self); - }); - } - else - log::error(logcat, "no parent"); - } - } // namespace libunbound - - Server::Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif) - : _loop{loop}, _conf{std::move(conf)}, _platform{create_platform()}, m_NetIfIndex{std::move(netif)} - { - randombytes_buf(_cookie_secret.data(), _cookie_secret.size()); - } - - std::vector> Server::get_all_resolvers() const - { - return {_resolvers.begin(), _resolvers.end()}; - } - - void Server::start() - { - // set up udp sockets - for (const auto& addr : _conf._bind_addrs) - { - if (auto ptr = make_packet_source_on(addr, _conf)) - add_packet_source(std::move(ptr)); - } - - // add default resolver as needed - if (auto ptr = make_default_resolver()) - add_resolver(ptr); - - // FIXME: this should be handled by RoutePoker once it is resurrected, handling whether - // we eat all DNS traffic or just .sesh/.loki/.snode. For now, we only handle those. - set_dns_mode(false); - } - - std::shared_ptr Server::create_platform() const - { - auto plat = std::make_shared(); - if constexpr (srouter::platform::has_systemd) - { - plat->add_impl(std::make_unique()); - plat->add_impl(std::make_unique()); - } - return plat; - } - - std::shared_ptr Server::make_packet_source_on(const quic::Address& addr, const srouter::DnsConfig&) - { - return std::make_shared(*this, _loop, addr); - } - - std::shared_ptr Server::make_default_resolver() - { - if (_conf._upstream_dns.empty()) - { - log::debug( - logcat, - "explicitly no upstream dns providers specified, we will not resolve anything but " - ".{}/{}/loki", - CLIENT_TLD, - RELAY_TLD); - return nullptr; - } - - return std::make_shared(_loop, _conf); - } - - std::vector Server::bound_packet_source_addrs() const - { - std::vector addrs; - - for (const auto& src : _packet_sources) - { - if (auto ptr = src.lock()) - if (auto maybe_addr = ptr->bound_on()) - addrs.emplace_back(*maybe_addr); - } - return addrs; - } - - std::optional Server::first_bound_packet_source_addr() const - { - for (const auto& src : _packet_sources) - { - if (auto ptr = src.lock()) - if (auto bound = ptr->bound_on()) - return bound; - } - return std::nullopt; - } - - void Server::add_resolver(std::weak_ptr resolver) { _resolvers.insert(resolver); } - - void Server::add_resolver(std::shared_ptr resolver) - { - _owned_resolvers.insert(resolver); - add_resolver(std::weak_ptr{resolver}); - } - - void Server::add_packet_source(std::weak_ptr pkt) { _packet_sources.push_back(pkt); } - - void Server::add_packet_source(std::shared_ptr pkt) - { - add_packet_source(std::weak_ptr{pkt}); - _owned_packet_sources.push_back(std::move(pkt)); - } - - void Server::stop() - { - for (const auto& resolver : _resolvers) - { - if (auto ptr = resolver.lock()) - ptr->down(); - } - } - - void Server::reset() - { - for (const auto& resolver : _resolvers) - { - if (auto ptr = resolver.lock()) - ptr->reset_resolver(); - } - } - - void Server::set_dns_mode(bool all_queries) - { - if (auto maybe_addr = first_bound_packet_source_addr()) - _platform->set_resolver(m_NetIfIndex, *maybe_addr, all_queries); - } - - bool Server::maybe_handle_payload( - const std::shared_ptr& ptr, - const quic::Address& to, - const quic::Address& from, - std::span payload) - { - // dont process to prevent feedback loop - if (ptr->would_loop(to, from)) - { - log::warning(logcat, "preventing dns packet replay to={} from={}", to, from); - return false; - } - - std::span client_ip; - if (from.is_ipv4()) - client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; - else - client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; - - auto maybe = Message::extract_question(payload, _cookie_secret, client_ip); - if (not maybe) - { - log::warning(logcat, "invalid dns message format from {} to dns listener on {}", from, to); - return false; - } - auto& msg = *maybe; - - if (msg.additional_edns && msg.additional_edns->bad_cookie) - { - // Client gave a bad cookie; reply with a request failure, but one containing the new - // cookie so that the client can retry. - - // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS - // value. - msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; - // TODO FIXME: we currently always set the RA flag but that really should only be set - // when we have an upstream DNS server. (This TODO is also in message.cpp) - msg.hdr_fields |= flags_QR | flags_RA; - // badcookie is not an authoritative answer: - msg.hdr_fields &= ~flags_AA; - - ptr->send_udp(from, to, msg.encode()); - return true; - } - - // we don't provide a DoH resolver because it requires verified TLS - // TLS needs X509/ASN.1-DER and opting into the Root CA Cabal - // thankfully mozilla added a backdoor that allows ISPs to turn it off - // so we disable DoH for firefox using mozilla's ISP backdoor - // see: https://github.com/oxen-io/lokinet/issues/832 - for (const auto& q : msg.questions) - { - // is this firefox looking for their backdoor record? - if (q.name() == "use-application-dns.net") - { - // yea it is, let's turn off DoH because god is dead. - msg.set_nx_reply(); - // press F to pay respects and send it back where it came from - ptr->send_udp(from, to, msg.encode()); - return true; - } - } - - if (_resolvers.empty()) - { - log::warning(logcat, "Trying to resolve DNS query, but we no resolver set up."); - return false; - } - for (const auto& resolver : _resolvers) - { - if (auto res_ptr = resolver.lock()) - { - log::trace(logcat, "check resolver {} for dns from {} to {}", res_ptr->resolver_name(), from, to); - if (res_ptr->maybe_hook_dns(ptr, msg, to, from)) - { - log::trace(logcat, "resolver {} handling dns from {} to {}", res_ptr->resolver_name(), from, to); - return true; - } - } - } - return false; - } - -} // namespace srouter::dns diff --git a/src/dns/server.hpp b/src/dns/server.hpp deleted file mode 100644 index 4ba6e43a9..000000000 --- a/src/dns/server.hpp +++ /dev/null @@ -1,199 +0,0 @@ -#pragma once - -#include "config/config.hpp" -#include "message.hpp" -#include "net/ip_packet.hpp" -#include "platform.hpp" -#include "util/compare_ptr.hpp" - -#include -#include - -#include -#include - -namespace srouter::dns -{ - /// a job handling 1 dns query - class QueryJob_Base - { - protected: - /// the original dns query - Message _query; - - /// True if we've sent a reply (including via a call to cancel) - std::atomic_flag _done = ATOMIC_FLAG_INIT; - - public: - explicit QueryJob_Base(Message query) : _query{std::move(query)} {} - - virtual ~QueryJob_Base() = default; - - Message& underlying() { return _query; } - - const Message& underlying() const { return _query; } - - /// cancel this operation and inform anyone who cares - void cancel(); - - /// send a raw buffer back to the querier - virtual void send_reply(std::vector buf) = 0; - }; - - class PacketSource - { - public: - /// stop reading packets and end operation - virtual ~PacketSource() = default; - - /// return true if traffic with source and dest addresses would cause a - /// loop in resolution and thus should not be sent to query handlers - virtual bool would_loop(const quic::Address& to, const quic::Address& from) const = 0; - - /// send UDP payload with src and dst address containing buf on this packet source - virtual void send_udp( - const quic::Address& to, const quic::Address& from, std::span payload) const = 0; - - /// returns the sockaddr we are bound on if applicable - virtual std::optional bound_on() const = 0; - }; - - /// non complex implementation of QueryJob_Base for use in things that - /// only ever called on the mainloop thread - class QueryJob : public QueryJob_Base, std::enable_shared_from_this - { - std::shared_ptr src; - const quic::Address resolver; - const quic::Address asker; - - public: - explicit QueryJob( - std::shared_ptr source, Message query, const quic::Address& to_, const quic::Address& from_) - : QueryJob_Base{std::move(query)}, src{std::move(source)}, resolver{to_}, asker{from_} - {} - - void send_reply(std::vector buf) override { src->send_udp(asker, resolver, buf); } - }; - - /// handler of dns query hooking - /// intercepts dns for internal processing - class Resolver_Base - { - protected: - /// return the sorting order for this resolver - /// lower means it will be tried first - virtual int rank() const = 0; - - public: - virtual ~Resolver_Base() = default; - - /// less than via rank - bool operator<(const Resolver_Base& other) const { return rank() < other.rank(); } - - /// greater than via rank - bool operator>(const Resolver_Base& other) const { return rank() > other.rank(); } - - /// get local socket address that queries are sent from - virtual std::optional get_local_addr() const { return std::nullopt; } - - /// get printable name - virtual std::string_view resolver_name() const = 0; - - /// reset the resolver state, optionally replace upstream info with new info. The default - /// base implementation does nothing. - virtual void reset_resolver(std::optional> = std::nullopt) {} - - /// cancel all pending requests and cease further operation. Default operation is a no-op. - virtual void down() {} - - /// attempt to handle a dns message - /// returns true if we consumed this query and it should not be processed again - virtual bool maybe_hook_dns( - const std::shared_ptr& source, - const Message& query, - const quic::Address& to, - const quic::Address& from) = 0; - }; - - // Base class for DNS proxy - class Server - { - protected: - /// add a packet source to this server, does share ownership - void add_packet_source(std::shared_ptr resolver); - /// add a resolver to this packet handler, does share ownership - void add_resolver(std::shared_ptr resolver); - - /// create the platform dependant dns stuff - virtual std::shared_ptr create_platform() const; - - public: - virtual ~Server() = default; - - explicit Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif_index); - - /// returns all sockaddr we have from all of our PacketSources - std::vector bound_packet_source_addrs() const; - - /// returns the first sockaddr we have on our packet sources if we have one - std::optional first_bound_packet_source_addr() const; - - /// add a resolver to this packet handler, does not share ownership - void add_resolver(std::weak_ptr resolver); - - /// add a packet source to this server, does not share ownership - void add_packet_source(std::weak_ptr resolver); - - /// create a packet source bound on bindaddr but does not add it - virtual std::shared_ptr make_packet_source_on( - const quic::Address& bindaddr, const srouter::DnsConfig& conf); - - /// sets up all internal binds and such and begins operation - virtual void start(); - - /// stops all operation - virtual void stop(); - - /// reset the internal state - virtual void reset(); - - /// create the default resolver for out config - virtual std::shared_ptr make_default_resolver(); - - std::vector> get_all_resolvers() const; - - /// feed a packet buffer from a packet source. - /// returns true if we decided to process the packet and consumed it - /// returns false if we dont want to process the packet - bool maybe_handle_payload( - const std::shared_ptr& pktsource, - const quic::Address& resolver, - const quic::Address& from, - std::span buf); - - /// set which dns mode we are in. - /// true for intercepting all queries. false for just .sesh/.loki/.snode - void set_dns_mode(bool all_queries); - - protected: - quic::Loop& _loop; - srouter::DnsConfig _conf; - std::shared_ptr _platform; - - private: - const unsigned int m_NetIfIndex; - - // Secret value we use as a key in DNS server cookie hashing. We generate a random once on - // each startup as we currently have no need for this to be deterministic, and that - // introduces rotation whenever we restart. - std::array _cookie_secret; - - // TODO FIXME: this ownership model is cursed. - std::set, ComparePtr>> _owned_resolvers; - std::set, CompareWeakPtr> _resolvers; - - std::vector> _owned_packet_sources; - std::vector> _packet_sources; - }; - -} // namespace srouter::dns diff --git a/src/handlers/tun.cpp b/src/handlers/tun.cpp index 76d119a80..eb354da2b 100644 --- a/src/handlers/tun.cpp +++ b/src/handlers/tun.cpp @@ -13,7 +13,6 @@ #include "auth/auth.hpp" #include "constants/platform.hpp" #include "contact/sns.hpp" -#include "dns/dns.hpp" #include "dns/encode.hpp" #include "nodedb.hpp" #include "router/route_poker.hpp" @@ -27,205 +26,6 @@ namespace srouter::handlers { static auto logcat = log::Cat("tun"); - bool TunEndpoint::maybe_hook_dns( - const std::shared_ptr& source, - const dns::Message& query, - const quic::Address& to, - const quic::Address& from) - { - if (not should_hook_dns_message(query)) - return false; - - auto job = std::make_shared(source, query.clone(), to, from); - if (!handle_hooked_dns_message(query.clone(), [job](dns::Message msg) { job->send_reply(msg.encode()); })) - job->cancel(); - return true; - } - - /// Intercepts DNS IP packets on platforms where binding to a low port isn't viable. - /// (windows/macos/ios/android ... aka everything that is not linux... funny that) - class DnsInterceptor : public dns::PacketSource - { - ip_pkt_hook _hook; - quic::Address _our_ip; // maybe should be an IP type...? - srouter::DnsConfig _config; - - public: - explicit DnsInterceptor(ip_pkt_hook reply, quic::Address our_ip, srouter::DnsConfig conf) - : _hook{std::move(reply)}, _our_ip{std::move(our_ip)}, _config{std::move(conf)} - {} - - ~DnsInterceptor() override = default; - - void send_udp( - const quic::Address& to, const quic::Address& from, std::span payload) const override - { - log::critical(logcat, "DNS interceptor FIXME!"); - if (payload.empty()) - return; - // FIXME: this - (void)to; - (void)from; - (void)payload; - // _hook(data.make_udp(to, from)); - } - - std::optional bound_on() const override { return std::nullopt; } - - bool would_loop(const quic::Address& to, const quic::Address& from) const override - { - if constexpr (platform::is_apple) - { - // DNS on Apple is a bit weird because in order for the NetworkExtension itself to - // send data through the tunnel we have to proxy DNS requests through Apple APIs - // (and so our actual upstream DNS won't be set in our resolvers, which is why the - // vanilla WouldLoop won't work for us). However when active the mac also only - // queries the main tunnel IP for DNS, so we consider anything else to be - // upstream-bound DNS to let it through the tunnel. - return to != _our_ip; - } - else if (auto maybe_addr = _config._query_bind) - { - const auto& addr = *maybe_addr; - // omit traffic to and from our dns socket - return addr == to or addr == from; - } - return false; - } - }; - - class TunDNS : public dns::Server - { - const TunEndpoint* _tun; - std::optional _query_bind; - quic::Address _our_ip; - - public: - std::shared_ptr pkt_source; - - ~TunDNS() override = default; - - explicit TunDNS(TunEndpoint* ep, const srouter::DnsConfig& conf) - : dns::Server{ep->router().loop, conf, 0}, - _tun{ep}, - _query_bind{conf._query_bind}, - _our_ip{ep->get_ipv4()} // FIXME: What about IPv6? - { - if (_query_bind) - _our_ip.set_port(_query_bind->port()); - } - - std::shared_ptr make_packet_source_on( - const quic::Address&, const srouter::DnsConfig& conf) override - { - (void)_tun; - auto ptr = std::make_shared( - [](IPPacket pkt) { - (void)pkt; - // ep->handle_write_ip_packet(pkt.ConstBuffer(), pkt.srcv6(), pkt.dstv6(), 0); - }, - _our_ip, - conf); - pkt_source = ptr; - return ptr; - } - }; - - // NB: It looks like this could/should be called during the constructor, - // but as it passes weak_from_this to the dns server, it has to be after. - void TunEndpoint::setup_dns() - { - log::debug(logcat, "{} setting up DNS...", name()); - - auto& dns_config = _router.config().dns; - const auto& info = get_vpn_interface()->interface_info(); - - if (dns_config.l3_intercept) - { - // FIXME: this entire if block is so broken... - _dns = std::make_unique(this, dns_config); - auto* dns = static_cast(_dns.get()); - - uint16_t p = 53; - - while (p < 100) - { - try - { - _packet_router->add_udp_handler(p, [this, dns](IPPacket pkt) { - // TODO FIXME - log::critical(logcat, "TODO FIXME: L3 udp interceptor!"); - // if (dns->maybe_handle_payload(dns->pkt_source, pkt.destination(), pkt.source(), - // pkt.udp_data())) - // return; - - handle_outbound_packet(std::move(pkt)); - }); - } - catch (const std::exception& e) - { - if (p += 1; p >= 100) - throw std::runtime_error{"Failed to port map udp handler: {}"_format(e.what())}; - } - } - } - else - _dns = std::make_unique(_router.loop, dns_config, info.index); - - _dns->add_resolver(weak_from_this()); - _dns->start(); - - if (dns_config.l3_intercept) - { - if (auto vpn = _router.vpn_platform()) - { - // get the first local address we know of - std::optional localaddr; - - for (auto res : _dns->get_all_resolvers()) - { - if (auto ptr = res.lock()) - { - localaddr = ptr->get_local_addr(); - - if (localaddr) - break; - } - } - if (platform::is_windows) - { - // auto dns_io = vpn->create_packet_io(0, localaddr); - // router().loop()->add_ticker([dns_io, handler = m_PacketRouter]() { - // net::IPPacket pkt = dns_io->ReadNextPacket(); - // while (not pkt.empty()) - // { - // handler->HandleIPPacket(std::move(pkt)); - // pkt = dns_io->ReadNextPacket(); - // } - // }); - // m_RawDNS = dns_io; - } - - (void)vpn; - } - - if (_raw_DNS) - _raw_DNS->Start(); - } - } - - void TunEndpoint::reconfigure_dns(std::vector servers) - { - if (_dns) - { - for (auto weak : _dns->get_all_resolvers()) - { - if (auto ptr = weak.lock()) - ptr->reset_resolver(servers); - } - } - } - TunEndpoint::TunEndpoint(Router& r) : _router{r} { _packet_router = @@ -309,362 +109,8 @@ namespace srouter::handlers log::info(logcat, "{} got network interface:{}", name(), _if_name); } - static const auto localhost_ctld = "localhost.{}"_format(CLIENT_TLD); - static const auto dot_localhost_ctld = ".localhost.{}"_format(CLIENT_TLD); - static bool is_localhost(std::string_view qname) - { - return qname == "localhost.loki" or qname.ends_with(".localhost.loki") or qname == localhost_ctld - or qname.ends_with(dot_localhost_ctld); - } - - static std::optional parse_rid(std::string_view b32rid) - { - auto rid = std::make_optional(); - if (not rid->from_base32z(b32rid)) - rid.reset(); - return rid; - } - - static std::optional is_snode(std::string_view name) - { - if (name.ends_with(RELAY_DOT_TLD)) - name.remove_suffix(RELAY_DOT_TLD.size()); - else - return std::nullopt; - return parse_rid(name); - } - - static dns::Message& clear_dns_message(dns::Message& msg) - { - // msg.authorities.clear(); - // msg.additional.clear(); - msg.answers.clear(); - msg.hdr_fields &= ~dns::flags_RCODENxDomain; - return msg; - } - - template - static std::optional try_making(Args&&... args) - { - try - { - return std::make_optional(std::forward(args)...); - } - catch (...) - { - return std::nullopt; - } - } - static const auto random_snode = "random.{}"_format(RELAY_TLD); - bool TunEndpoint::handle_hooked_dns_message( - dns::Message msg, std::function reply, std::optional qname_override) - { - log::trace(logcat, "handle_hooked_dns_message"); - if (msg.questions.size() != 1) - { - log::warning(logcat, "bad number of dns questions: {}", msg.questions.size()); - return false; - } - - auto& q = msg.questions[0]; - - std::string qname; - if (qname_override) - qname = std::move(*qname_override); - else - qname = q.name(); - std::string hostname, tld; - std::vector sub; - { - auto nameparts = split(qname, "."); - if (nameparts.size() < 2) - { - log::warning(logcat, "bad DNS request, no TLD or hostname: {}", qname); - return false; - } - hostname = nameparts[nameparts.size() - 2]; - tld = nameparts.back(); - sub.reserve(nameparts.size() - 2); - for (auto s : std::views::take(nameparts, static_cast(nameparts.size()) - 2)) - sub.emplace_back(s); - } - bool localhost = is_localhost(qname); - - // localhost.sesh/localhost.loki is always a CNAME to our own pubkey, regardless of the - // question type. - if (localhost) - { - auto our_hostname = _router.id().to_string(); - auto our_tld = _router.is_service_node ? RELAY_TLD : CLIENT_TLD; - auto our_name = "{}.{}"_format(our_hostname, our_tld); - - if (tld == "loki") - { - // first: report a cname for the deprecated localhost.loki -> localhost.sesh - - msg.set_rr_name("localhost.loki"); - msg.add_cname_reply("localhost.{}"_format(our_tld)); - } - // report CNAME: localhost.sesh -> pubkey.sesh - msg.set_rr_name("localhost.{}"_format(our_tld)); - msg.add_cname_reply(our_name); - - if (q.qtype == dns::RRType::CNAME) - { - // If we were queried specifically for a cname, then we are done. - reply(std::move(msg)); - return true; - } - - // Otherwise we continue processing to be able to return supplemental records through - // the cname, so that if you request "foo.localhost.loki" we end up returning: - // localhost.loki CNAME for localhost.sesh - // localhost.sesh CNAME for PUBKEY.sesh - // foo.PUBKEY.sesh IN X VALUE (or whatever) - // And so for for the rest of the answer processing that we were given PUBKEY.sesh, - // rather than localhost.loki/.sesh: - qname = sub.empty() ? our_name : "{}.{}"_format(fmt::join(sub, "."), our_name); - msg.set_rr_name(qname); - - tld = our_tld; - hostname = std::move(our_hostname); - } - else if (qname == random_snode) - { - // Similar to the localhost case: we first return a CNAME of random.snode -> - // SOMEPK.snode, then continue processing as if that was what you asked for. - - if (auto* rc = _router.node_db().get_random_rc()) - { - hostname = rc->router_id().to_string(); - qname = "{}.{}"_format(hostname, RELAY_TLD); - msg.add_cname_reply(qname, 1s); - if (q.qtype == dns::RRType::CNAME) - { - reply(std::move(msg)); - return true; - } - - msg.set_rr_name(qname); - } - else - { - msg.set_nx_reply(); - reply(std::move(msg)); - return true; - } - } - else if (tld == "loki" && hostname.size() != oxenc::to_base32z_size(RouterID::SIZE)) - { - // ONS lookup: initiate a lookup and, when we get the response, set up a CNAME of - // NAME.loki -> PUBKEY.sesh, then recurse to process other parts of the request (such as - // mapping to a AAAA). - - // TODO: .sesh SNS resolution, once implemented - - // ONS lookup: - auto lookup = "{}.loki"_format(hostname); - _router.session_endpoint().resolve_sns( - lookup, - [this, - lookup, - sub = std::move(sub), - reply = std::move(reply), - msg_ptr = std::make_shared(std::move(msg)), - cname_only = q.qtype == dns::RRType::CNAME]( - std::optional maybe_netaddr, - bool assertive, - std::chrono::milliseconds ttl) mutable { - auto& msg = *msg_ptr; - msg.set_rr_name(lookup); - if (maybe_netaddr) - { - auto target = maybe_netaddr->to_string(); - msg.add_cname_reply(target); - if (cname_only) - return; - auto qname = sub.empty() ? target : "{}.{}"_format(fmt::join(sub, "."), target); - msg.set_rr_name(qname); - handle_hooked_dns_message(std::move(msg), std::move(reply), std::move(qname)); - return; - } - - if (assertive) - { - // We got an assertive "does not exist" message (and not just a failure - // or timeout), so add the nx reply - msg.set_nx_reply(); - // FIXME: we should be able to provide a TTL here - } - else - { - // We failed to get a response at all so just NX with a short timeout so - // that they will try again soon to resolve it. (We don't want to - // SERVFAIL here because that could make the resolver try another DNS - // server). - assert(!assertive); - // FIXME: should be able to specify a TTL here - msg.set_nx_reply(); - } - reply(std::move(msg)); - }); - return true; - } - - if (q.qtype == dns::RRType::TXT) - { - // TXT records can be used to query some basic info: - - // TXT on MYPUBKEY.sesh returns the basic version and netid: - if (localhost && sub.empty()) - msg.add_txt_reply("sessionrouter={} v={} netid={}"_format( - _router.is_service_node ? "relay" : "client", fmt::join(VERSION, "."), _router.netid())); - - // TXT on PUBKEY.snode gives back some basic RC info (if we have the RC) - else if (auto rid = is_snode(qname)) - { - if (auto* rc = _router.node_db().get_rc(*rid)) - { - msg.add_txt_reply("rc v={} a={} t={}"_format( - fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); - } - else - msg.set_nx_reply(); - } - else - msg.set_nx_reply(); - reply(std::move(msg)); - return true; - } - - // "Regular" A or AAAA lookups - if (bool aaaa = q.qtype == dns::RRType::AAAA; aaaa || q.qtype == dns::RRType::A) - { - // Attempt to parse a "pubkey.snode" or "pubkey.sesh": - if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) - { - // DNS lookup implies we want a session, so make one (NOP if we have one) - // This also means if we don't use that session the IP mapping will release when - // it expires, which it wouldn't otherwise without a tedious periodic check. - bool created_session = false; - try - { - created_session = (bool)_router.session_endpoint().initiate_remote_session(*maybe_netaddr, nullptr); - } - catch (const std::exception& e) - { - log::warning(logcat, "Failed to initiate remote session to {}: {}", *maybe_netaddr, e.what()); - } - if (created_session) - { - if (aaaa) - msg.add_reply(map6(*maybe_netaddr)); - else if (!sub.empty() && sub.back() == "ipv4"sv) - { - // We don't map IPv4 addresses by default, but it is still possible to get - // one by requesting ipv4.somepubkey.sesh/snode (or a subdomain thereof). - if (auto v4_addr = map4(*maybe_netaddr); v4_addr) - msg.add_reply(*v4_addr); - else - log::warning(logcat, "IPv4 mapping requested for {} failed.", *maybe_netaddr); - } - // else they requested A *not* using the magic ipv4 subdomain, so we only have - // AAAA to offer and thus we return a reply without an answer record (which is - // the proper DNS way to say "something exists at this address, but not with the - // type you requested requested", as opposed to this nx_reply below, which means - // "this record does not exist"). - } - else - msg.set_nx_reply(); - reply(std::move(msg)); - - return true; - } - - // Otherwise it's some query type we don't support, so return does-not-exist. - msg.set_nx_reply(); - reply(std::move(msg)); - return true; - } - - // Reverse DNS lookups: - if (q.qtype == dns::RRType::PTR) - { - // reverse dns - bool found = false; - if (auto ip = dns::decode_ptr(q.qname)) - std::visit( - [&](const auto& ip) { - if (auto addr = _lookup_mapped_ip(ip)) - { - msg.add_ptr_reply(addr->to_string()); - found = true; - } - }, - *ip); - - if (!found) - msg.set_nx_reply(); - - reply(std::move(msg)); - return true; - } - - if (q.qtype == dns::RRType::SRV && (tld == CLIENT_TLD || tld == "loki") && sub.size() == 2 - && sub[0].starts_with('_') && sub[1].starts_with('_')) - { - if (auto rid = parse_rid(hostname)) - { - _router.session_endpoint().lookup_client_intro( - *rid, - [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply)]( - const std::optional& cc) mutable { - if (cc) - { - for (const auto& srv : cc->SRVs()) - if (srv.service == sub[0] && srv.proto == sub[1]) - msg->add_reply(srv); - } - else - msg->set_nx_reply(); - - reply(std::move(*msg)); - }); - return true; - } - } - - msg.set_nx_reply(); - reply(std::move(msg)); - return true; - } - - bool TunEndpoint::should_hook_dns_message(const dns::Message& msg) const - { - if (msg.questions.size() == 1) - { - // Hook every .sesh/.snode/.loki query - for (auto tld : {CLIENT_TLD, RELAY_TLD, "loki"sv}) - if (msg.questions[0].has_tld(tld)) - return true; - - // hook any PTR records for ranges we own - if (msg.questions[0].qtype == srouter::dns::RRType::PTR) - { - if (auto ip = dns::decode_ptr(msg.questions[0].qname)) - { - if (auto* v4 = std::get_if(&*ip)) - return _local_net.contains(*v4); - return _local_ipv6_net.contains(std::get(*ip)); - } - return false; - } - } - return false; - } - std::string TunEndpoint::get_if_name() const { return _if_name; } const ipv4& TunEndpoint::get_ipv4() const { return _local_net.ip; } @@ -677,13 +123,11 @@ namespace srouter::handlers bool TunEndpoint::is_exit_node() const { return _router.is_exit_node(); } - bool TunEndpoint::stop() + void TunEndpoint::stop() { // stop vpn tunnel if (_net_if) _net_if->Stop(); - if (_raw_DNS) - _raw_DNS->Stop(); #if 0 // save address map if applicable @@ -708,11 +152,6 @@ namespace srouter::handlers // } } #endif - - if (_dns) - _dns->stop(); - - return true; } template diff --git a/src/handlers/tun.hpp b/src/handlers/tun.hpp index 7e38d964e..f3c932b58 100644 --- a/src/handlers/tun.hpp +++ b/src/handlers/tun.hpp @@ -1,10 +1,8 @@ #pragma once #include "address/map.hpp" -#include "dns/server.hpp" #include "ev/fd_poller.hpp" #include "net/ip_packet.hpp" -#include "tun_base.hpp" #include "util/thread/threading.hpp" #include "vpn/packet_router.hpp" #include "vpn/platform.hpp" @@ -22,20 +20,16 @@ namespace srouter::traffic_type namespace srouter::handlers { inline constexpr auto TUN = "tun"sv; - inline constexpr auto LOKI_RESOLVER = "session-router"sv; - class TunEndpoint : public TunEPBase, public dns::Resolver_Base, public std::enable_shared_from_this + class TunEndpoint { public: TunEndpoint(Router& r); - ~TunEndpoint() override; + ~TunEndpoint(); private: Router& _router; - /// dns subsystem for this endpoint - std::unique_ptr _dns; - /// our local ip network ipv4_net _local_net; IPv4RangeIterator _local_range_iterator{_local_net}; @@ -59,27 +53,11 @@ namespace srouter::handlers std::optional _persisting_addr_file = std::nullopt; bool persist_addrs{false}; - /// for raw packet dns - std::shared_ptr _raw_DNS; - public: vpn::NetworkInterface* get_vpn_interface() { return _net_if.get(); } std::string_view name() const { return TUN; } - int rank() const override { return 0; } - - std::string_view resolver_name() const override { return LOKI_RESOLVER; } - - bool maybe_hook_dns( - const std::shared_ptr& source, - const dns::Message& query, - const quic::Address& to, - const quic::Address& from) override; - - // Reconfigures DNS servers and restarts libunbound with the new servers. - void reconfigure_dns(std::vector servers); - void configure(); std::string get_if_name() const; @@ -94,23 +72,14 @@ namespace srouter::handlers const ipv4_net& get_ipv4_network() const; const ipv6_net& get_ipv6_network() const; - bool should_hook_dns_message(const dns::Message& msg) const; - - bool handle_hooked_dns_message( - dns::Message query, - std::function sendreply, - std::optional qname_override = std::nullopt); - void tick_tun(sys_ms now); - bool stop(); + void stop(); bool is_service_node() const; bool is_exit_node() const; - void setup_dns(); - // INPROGRESS: new API // Handles an outbound packet going OUT to the network void handle_outbound_packet(IPPacket pkt); @@ -118,7 +87,7 @@ namespace srouter::handlers void rewrite_and_send_packet(IPPacket&& pkt, const ipv4& src, const ipv4& dest); void rewrite_and_send_packet(IPPacket&& pkt, const ipv6& src, const ipv6& dest); - void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote) override; + void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote); // Handles an inbound packet coming IN from the network // bool handle_inbound_packet(IPPacket pkt, NetworkAddress remote, bool is_exit_session, bool @@ -128,7 +97,7 @@ namespace srouter::handlers // Router remote address with it. If the mapping already exists, this returns the existing // IP, otherwise it assigns a new one. The association persists until unmapped. Returns // the mapped ipv6 address. - ipv6 map6(const NetworkAddress& remote) override; + ipv6 map6(const NetworkAddress& remote); // Obtains an available IPv4 address from the tun device and associates the given Session // Router remote address with it. If the mapping already exists, this returns the existing @@ -139,12 +108,34 @@ namespace srouter::handlers // Returns the mapped addresses, or nullptr if an address could not be assigned (i.e. // because of IPv4 exhaustion in the allocated tun range, or because this client does not // support IPv4 addressing at all). - std::optional map4(const NetworkAddress& remote) override; + std::optional map4(const NetworkAddress& remote); + + // Takes an IPv4 or IPv6 address and returns {addr, true} if the address is a tun address + // range IP mapped to an address; {nullptr, true} if it is a tun address range IP but + // without a mapped address; or {nullptr, false} if it is not a tun address range IP. + template + std::pair, bool> reverse_lookup(const IP& ip) + requires std::same_as || std::same_as + { + std::pair, bool> result; + auto& [netaddr, in_range] = result; + if constexpr (std::same_as) + { + netaddr = _local_ipv4_mapping[ip]; + in_range = netaddr || _local_net.contains(ip); + } + else + { + netaddr = _local_ipv6_mapping[ip]; + in_range = netaddr || _local_ipv6_net.contains(ip); + } + return result; + } // Expires a mapped IP for the given remote from the tun IP map. The address will be added // as the most recently used address, and (if the configured cache size is exceeded) the least // recently used address will be forgotten. - void expire(const NetworkAddress& remote) override; + void expire(const NetworkAddress& remote); std::optional get_exit_policy() const { return _exit_policy; } @@ -159,7 +150,7 @@ namespace srouter::handlers Router& router() { return _router; } - void start_poller() override; + void start_poller(); private: // Stores assigned IP's for each session in/out of this Session Router instance @@ -168,18 +159,6 @@ namespace srouter::handlers address_map _local_ipv4_mapping; address_map _local_ipv6_mapping; - template - auto _lookup_mapped_ip(const IP& ip) - { - if constexpr (std::same_as) - return _local_ipv4_mapping[ip]; - else - { - static_assert(std::same_as); - return _local_ipv6_mapping[ip]; - } - } - // We keep a list of expired network addresses ordered by least-recently-used first. When // pruning the expired list, we pop off the front of the list. std::list _expired; diff --git a/src/handlers/tun_base.hpp b/src/handlers/tun_base.hpp deleted file mode 100644 index 2f4c772fe..000000000 --- a/src/handlers/tun_base.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "address/address.hpp" -#include "address/types.hpp" -#include "net/ip_packet.hpp" - -namespace srouter::handlers -{ - - // Abstract class for TUN handling. This base interface exists so that embedded clients can be - // built without needing to compile any tun code at all. - class TunEPBase - { - public: - virtual ~TunEPBase() = default; - - virtual void start_poller() = 0; - - virtual ipv6 map6(const NetworkAddress& remote) = 0; - virtual std::optional map4([[maybe_unused]] const NetworkAddress& remote) { return std::nullopt; } - - virtual void expire(const NetworkAddress& remote) = 0; - - virtual void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote) = 0; - }; - -} // namespace srouter::handlers diff --git a/src/router/router.cpp b/src/router/router.cpp index b5c6eccb8..9f40d100c 100644 --- a/src/router/router.cpp +++ b/src/router/router.cpp @@ -7,6 +7,7 @@ #include "constants/version.hpp" #include "contact/contactdb.hpp" #include "crypto/crypto.hpp" +#include "dns/listener.hpp" #include "link/link_manager.hpp" #include "nodedb.hpp" #include "util/formattable.hpp" @@ -62,7 +63,8 @@ namespace srouter // exceed the defaut 1MB limit). _omq->MAX_MSG_SIZE = -1; - _router_testing = std::make_shared(*this); + if (is_service_node) + _router_testing = std::make_shared(*this); #endif init_logging(); @@ -552,19 +554,42 @@ namespace srouter throw std::runtime_error{"This Session Router build only supports embedded configurations!"}; #else log::debug(logcat, "Initializing TUN device"); - auto tun = _loop->make_shared(*this); + _tun = _loop->make_shared(*this); // only (full) clients should have DNS, relays have no need for it if (!is_service_node) - tun->setup_dns(); + { + auto& dns_bind = config().dns._bind_addrs; + if (dns_bind.empty()) + { + // This configuration is allowed (a service-only client might use it), although a bit unusual + log::warning( + logcat, "[bind]:bind is empty: DNS disabled. Making outbound paths will not be possible"); + } + else + { + for (const auto& addr : dns_bind) + { + try + { + if (!_dns) + _dns = std::make_shared(*this, addr); + else + _dns->listen(loop, addr); + } + catch (const std::exception& e) + { + log::error(logcat, "Failed to initialize DNS listener on {}: {}", addr, e.what()); + } + } + } + } log::info( log_global, "Session Router internal network: {} on device {}", - tun->get_ipv4_network(), - tun->get_if_name()); - - _tun = std::move(tun); + _tun->get_ipv4_network(), + _tun->get_if_name()); #endif } else @@ -951,7 +976,8 @@ namespace srouter void Router::on_test_ping() { #ifndef SROUTER_EMBEDDED_ONLY - _router_testing->incoming_ping(); + if (_router_testing) + _router_testing->incoming_ping(); #endif } @@ -979,7 +1005,8 @@ namespace srouter srouter::sys::service_manager->stopping(); } - _router_testing->stop(); + if (_router_testing) + _router_testing->stop(); #endif _session_endpoint->stop(true); @@ -990,6 +1017,14 @@ namespace srouter log::debug(logcat, "closing all connections"); _link_manager->stop(); +#ifndef SROUTER_EMBEDDED_ONLY + if (_dns) + _dns.reset(); + + if (_tun) + _tun->stop(); +#endif + auto rv = _loop_ticker->stop(); log::debug(logcat, "router loop ticker stopped {}successfully!", rv ? "" : "un"); _loop_ticker.reset(); @@ -1020,6 +1055,9 @@ namespace srouter _link_endpoint = nullptr; _link_manager.reset(); + if (_tun) + _tun.reset(); + if (_router_close_cb) _router_close_cb(); @@ -1032,6 +1070,24 @@ namespace srouter }); } + std::pair, bool> Router::reverse_lookup(const ipv4& addr) const + { +#ifndef SROUTER_EMBEDDED_ONLY + if (_tun) + return _tun->reverse_lookup(addr); +#endif + return {std::nullopt, false}; + } + + std::pair, bool> Router::reverse_lookup(const ipv6& addr) const + { +#ifndef SROUTER_EMBEDDED_ONLY + if (_tun) + return _tun->reverse_lookup(addr); +#endif + return {std::nullopt, false}; + } + const srouter::net::Platform* Router::net() const { #ifndef SROUTER_EMBEDDED_ONLY diff --git a/src/router/router.hpp b/src/router/router.hpp index 9f485b79c..10c4173f4 100644 --- a/src/router/router.hpp +++ b/src/router/router.hpp @@ -3,7 +3,7 @@ #include "contact/relay_contact.hpp" #include "crypto/key_manager.hpp" #include "handlers/session.hpp" -#include "handlers/tun_base.hpp" +#include "handlers/tun.hpp" #include "path/build_stats.hpp" #include "path/path_context.hpp" #include "profiling.hpp" @@ -17,6 +17,7 @@ #include #include #include +#include namespace oxenmq { @@ -26,6 +27,10 @@ namespace oxenmq namespace srouter { + namespace dns + { + class Listener; + } namespace link { struct Connection; @@ -91,6 +96,12 @@ namespace srouter ~Router(); + // Non-copyable/movable: + Router(const Router&) = delete; + Router(Router&&) = delete; + Router& operator=(const Router&) = delete; + Router& operator=(Router&&) = delete; + private: // Internal functions called during construction: void configure(); @@ -126,7 +137,8 @@ namespace srouter link::Endpoint* _link_endpoint = nullptr; // These are only created in full platform mode (not embedded clients) - std::shared_ptr _tun; + std::shared_ptr _tun; + std::shared_ptr _dns; std::shared_ptr _vpn; std::shared_ptr _route_poker; @@ -197,7 +209,16 @@ namespace srouter bool is_fully_meshed() const; - const std::shared_ptr& tun_endpoint() { return _tun; } + const std::shared_ptr& tun_endpoint() { return _tun; } + + // Looks up the given IP in our TUN mapping and, if it is a TUN address and maps to a remote, returns the + // network address of the mapped-to address. The `.second` part of the result indicates + // whether the IP is on our TUN range, even if it is unmapped. That is, it can return: + // {address, true} -- address in tun range, and mapped + // {nullopt, true} -- address in tun range, but not mapped to a remote + // {nullopt, false} -- address not in tun range (or no tun at all) + std::pair, bool> reverse_lookup(const ipv4& addr) const; + std::pair, bool> reverse_lookup(const ipv6& addr) const; // Returns the net Platform pointer, or nullptr if this is an embedded client. const srouter::net::Platform* net() const; diff --git a/src/rpc/rpc_server.cpp b/src/rpc/rpc_server.cpp index c70079969..736523120 100644 --- a/src/rpc/rpc_server.cpp +++ b/src/rpc/rpc_server.cpp @@ -4,8 +4,6 @@ #include "config/ini.hpp" #include "constants/version.hpp" #include "contact/client_contact.hpp" -#include "dns/dns.hpp" -#include "dns/server.hpp" #include "router/router.hpp" #include "rpc/rpc_request_definitions.hpp" #include "rpc_request.hpp" diff --git a/src/session/session.cpp b/src/session/session.cpp index 71cf622d7..eb6dfa3e6 100644 --- a/src/session/session.cpp +++ b/src/session/session.cpp @@ -664,6 +664,7 @@ namespace srouter::session return; } +#ifndef SROUTER_EMBEDDED_ONLY // Otherwise we're not embedded; if the other side also isn't then this is just a raw IP // packet to handle via the tun endpoint, and the same for UDP packets from embedded // remotes (which also send raw UDP packets): @@ -682,7 +683,7 @@ namespace srouter::session // NOTE: At this time, tun clients always support ipv4, but ipv4 is only activated on use // (unlike IPv6 which is activated all the time). If this changes, a check for that should // short-circuit the call to map_session below. - if (!_r.embedded() && pkt.is_ipv4() && !ipv4_mapped) + if (pkt.is_ipv4() && !ipv4_mapped) { if (!_parent.map_session_v4(*this)) { @@ -692,7 +693,9 @@ namespace srouter::session ipv4_mapped = true; } + assert(_r.tun_endpoint()); // (We return above if embedded) _r.tun_endpoint()->handle_inbound_packet(std::move(pkt), dgram_type, _remote); +#endif } void Session::publish_client_contact(std::string_view encrypted_cc) diff --git a/src/session/session.hpp b/src/session/session.hpp index f67273001..b324ef268 100644 --- a/src/session/session.hpp +++ b/src/session/session.hpp @@ -3,7 +3,6 @@ #include "address/address.hpp" #include "constants/path.hpp" #include "ev/tcp.hpp" -#include "ev/udp.hpp" #include "net/ip_packet.hpp" #include "path/path.hpp" #include "path/path_handler.hpp" diff --git a/src/util/logging.hpp b/src/util/logging.hpp index 68c82a909..f66f5a6ca 100644 --- a/src/util/logging.hpp +++ b/src/util/logging.hpp @@ -2,7 +2,7 @@ // Header for making actual log statements such as srouter::log::Info and so on work. -#include +#include // IWYU pragma: export #include #include From 03cfc413a4cbef3e7ed5157962dcdcba0482f8df Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 14 Nov 2025 16:06:04 -0400 Subject: [PATCH 7/8] Add path info via DNS TXT; calculate jitter - A TXT lookup for path.PUBKEY.{sesh,snode} now returns info about your path in a TXT record. - Replace the stdev calculation for paths with a jitter calculation as that is probably slightly more relevant. (jitter = mean absolute difference between sequential pings) - Refactor the current_path methods to return more raw info rather that stringifying early, deferring stringification to later parts where needed (session_router, and now DNS). Also adds ping and expiry info into it. Example TXT info: ;; QUESTION SECTION: ;path.55fxrybf3jtausbnmxpgwcsz9t8qkf5pr8t5f4xyto4omjrkorpy.snode. IN TXT ;; ANSWER SECTION: path.55fxrybf3jtausbnmxpgwcsz9t8qkf5pr8t5f4xyto4omjrkorpy.snode. 0 IN TXT "d=out; path=55fxro86pzfka36f19qagzsuogk6qwek1g8hdgkx5menjn1s6iso@199.127.61.170 55fxne4863o4r4ohntbnx1baqupfgmghdw4sas5zbnxso4a9d1oy@172.93.103.156 55fxr5s76pmfaqfrhxbyfgnkcueok8j6a8c6fci9ts8axmwgmaoo@206.221.176.9 55fxrybf3jtausbnmxpgwcsz9t8qkf5pr8t5f4xy" "to4omjrkorpy@23.88.6.250; ttl=1160; p=116; pj=0.714; pr=8; pt=0; pT=0" where: - `d` is `in`/`out`/`none` showing the session direction. When `d` is not `none` this is followed by: - `path=...` space-separated PUBKEY@IP values along the path - `ttl=...` seconds remaining before this path expires - `p=...` average path ping time (in milliseconds) - `pj=...` average path jitter (milliseconds, with three decimal points) - `pr=...` number of ping responses - `pt=...` number of ping timeouts - `pT=...` number of recent ping timeouts (i.e. 3 means the last 3 pings have timed out). --- src/dns/handler.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/dns/rr.cpp | 17 +++++++++++++++-- src/path/path.cpp | 34 +++++++++++++++++++++++++--------- src/path/path.hpp | 17 +++++++++++++++-- src/session/session.cpp | 8 ++++---- src/session/session.hpp | 6 +++--- src/session_router.cpp | 21 ++++++++++++++------- 7 files changed, 114 insertions(+), 27 deletions(-) diff --git a/src/dns/handler.cpp b/src/dns/handler.cpp index 24cf52b6a..fb9886379 100644 --- a/src/dns/handler.cpp +++ b/src/dns/handler.cpp @@ -8,6 +8,8 @@ #include "router/router.hpp" #include "util/logging.hpp" +#include + namespace srouter::dns { #ifdef SROUTER_EMBEDDED_ONLY @@ -296,6 +298,42 @@ namespace srouter::dns else msg.nxdomain(); } + + // TXT on path.PUBKEY.{sesh,snode} returns the current path info to that node, if a + // session is established (nxdomain if no active session). + else if (sub.size() == 1 && sub.front() == "path") + { + log::debug(logcat, "TXT path request for {}.{}", hostname, tld); + if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) + { + if (auto* s = _router.session_endpoint().get_session(*maybe_netaddr); s && s->is_established()) + { + auto path = s->current_path_info(); + msg.add_txt_reply( + "d={}; path={}; ttl={}; p={}; pj={}.{:03d}; pr={}; pt={}; pT={}"_format( + s->is_outbound ? "out" : "in", + fmt::join( + std::views::transform( + path.relays, [](const auto& r) { return "{}@{}"_format(r.first, r.second); }), + " "), + std::chrono::round(path.expiry - srouter::time_now_ms()).count(), + path.ping_mean.count(), + path.ping_jitter / 1ms, + (path.ping_jitter % 1ms).count(), + path.ping_responses, + path.ping_timeouts, + path.ping_recent_timeouts), + 0s); + } + else + msg.add_txt_reply("d=none"); + } + else + { + log::warning(logcat, "Failed to parse network address {}.{} for path query", hostname, tld); + msg.nxdomain(); + } + } else msg.nxdomain(); reply(std::move(msg)); diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index 9fee05b8b..58d0dacfc 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -100,8 +100,21 @@ namespace srouter::dns RR_TXT::RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value) : RR_bytes{std::move(rr_name), ttl} { - auto* bytes = reinterpret_cast(value.data()); - rData.assign(bytes, bytes + value.size()); + // TXT records get encoded as {SZ}{data} where SZ is one byte indicating the length of data, + // however they can be repeated which is why we have SZ twice: once for the rData, but then + // again internally for multiple possible strings within the answer. + // + // In total that means we are storing the value length plus 1 additional byte per 255 length + // (or part thereof): + rData.reserve(value.size() + (value.size() + 254) / 255); + do + { + auto* bytes = reinterpret_cast(value.data()); + size_t size = std::min(255, value.size()); + rData.push_back(static_cast(size)); + rData.insert(rData.end(), bytes, bytes + size); + value.remove_prefix(size); + } while (!value.empty()); } void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const diff --git a/src/path/path.cpp b/src/path/path.cpp index a5610ceb7..dac38cb15 100644 --- a/src/path/path.cpp +++ b/src/path/path.cpp @@ -72,9 +72,8 @@ namespace srouter::path double success_pct = p.ping_responses / (double)(p.ping_responses + p.ping_timeouts) * 100.0; if (p.ping_responses == 1) return "{:.1f}%, {:.0f}ms avg"_format(success_pct, mean); - - double sd = std::sqrt(((double)p.ping_sq_cumulative - p.ping_responses * mean * mean) / (p.ping_responses - 1)); - return "{:.1f}%, {:.0f}ms avg, {:.1f}ms s.d."_format(success_pct, mean, sd); + double jitter = p.ping_responses < 2 ? 0.0 : (double)p.ping_abs_diffs.count() / (p.ping_responses - 1); + return "{:.1f}%, {:.0f}ms avg, {:.1f}ms jitter"_format(success_pct, mean, jitter); } void Path::do_ping(steady_ms start_time) @@ -96,10 +95,11 @@ namespace srouter::path auto time_taken = now - start_time; if (resp.ok()) { - ping_responses++; + if (++ping_responses > 1) + ping_abs_diffs += time_taken >= ping_last ? time_taken - ping_last : ping_last - time_taken; + ping_last = time_taken; ping_recent_timeouts = 0; ping_cumulative += time_taken; - ping_sq_cumulative += time_taken.count() * time_taken.count(); if (resp.body == messages::OK_RESPONSE) log::debug( @@ -318,13 +318,29 @@ namespace srouter::path } path_hop_stringifier Path::hop_string() const { return {hops}; } - std::vector> Path::get_hops_strings_and_ips() const + Path::Info Path::get_info() const { - std::vector> ret; + Info ret{}; + ret.expiry = _expiry; + if (ping_responses) + ret.ping_mean = std::chrono::round( + std::chrono::nanoseconds{ping_cumulative} / ping_responses); + if (ping_responses > 1) + ret.ping_jitter = std::chrono::round( + std::chrono::nanoseconds{ping_abs_diffs} / (ping_responses - 1)); + ret.ping_responses = ping_responses; + ret.ping_timeouts = ping_timeouts; + ret.ping_recent_timeouts = ping_recent_timeouts; for (const auto& hop : hops) { - auto rc = _router.node_db().get_rc(hop.router_id); - ret.emplace_back(NetworkAddress{hop.router_id, false}.to_string(), rc->addr().to_ipv4().to_string()); + auto* rc = _router.node_db().get_rc(hop.router_id); + if (rc) + ret.relays.emplace_back(hop.router_id, rc->addr().to_ipv4()); + else + { + log::warning(logcat, "Couldn't find RC of a router on our path?!"); + ret.relays.emplace_back(); + } } return ret; } diff --git a/src/path/path.hpp b/src/path/path.hpp index d870ff87f..316b6d1d1 100644 --- a/src/path/path.hpp +++ b/src/path/path.hpp @@ -72,7 +72,16 @@ namespace srouter::path path_hop_stringifier hop_string() const; - std::vector> get_hops_strings_and_ips() const; + struct Info + { + // relay pubkeys and IPv4 addresses, from edge -> pivot (or final relay) + std::vector> relays; + sys_ms expiry = {}; + std::chrono::milliseconds ping_mean; + std::chrono::microseconds ping_jitter; + int ping_responses, ping_timeouts, ping_recent_timeouts; + }; + Info get_info() const; sys_ms LastRemoteActivityAt() const { return last_recv_msg; } @@ -219,9 +228,13 @@ namespace srouter::path steady_ms next_ping{}; int ping_responses{0}, ping_timeouts{0}; int ping_recent_timeouts{0}; + std::chrono::milliseconds ping_last{0ms}; // Cumulative time of all `ping_responses` pings (divide by ping_responses for an average). std::chrono::milliseconds ping_cumulative{0ms}; - int64_t ping_sq_cumulative{0}; + // This is the cumulative absolute differences of all received sequential pings. E.g. if we + // have 4 pings [100, 101, 98, 98] then this equals (|100-101| + |101-98| + |98-98|). + // Dividing this by `ping_responses - 1` gives jitter. + std::chrono::milliseconds ping_abs_diffs{0ms}; }; } // namespace srouter::path diff --git a/src/session/session.cpp b/src/session/session.cpp index eb6dfa3e6..afbc1d255 100644 --- a/src/session/session.cpp +++ b/src/session/session.cpp @@ -1666,17 +1666,17 @@ namespace srouter::session _current_thop->downstream, "session_control"s, std::move(data), nullptr); } - std::vector> OutboundSession::current_path() const + path::Path::Info OutboundSession::current_path_info() const { if (_current_path) - return _current_path->get_hops_strings_and_ips(); + return _current_path->get_info(); return {}; } - std::vector> InboundClientSession::current_path() const + path::Path::Info InboundClientSession::current_path_info() const { if (_current_path) - return _current_path->get_hops_strings_and_ips(); + return _current_path->get_info(); return {}; } diff --git a/src/session/session.hpp b/src/session/session.hpp index b324ef268..380ef3032 100644 --- a/src/session/session.hpp +++ b/src/session/session.hpp @@ -219,7 +219,7 @@ namespace srouter // rather than waiting for the next tick) virtual void tick(sys_ms now); - virtual std::vector> current_path() const { return {}; }; + virtual path::Path::Info current_path_info() const { return {}; }; }; class OutboundSession : public path::PathHandler, public Session @@ -299,7 +299,7 @@ namespace srouter inline static constexpr int MAX_QUEUED_PACKETS = 30; - std::vector> current_path() const override; + path::Path::Info current_path_info() const override; }; // Outbound Session to Remote Relay @@ -418,7 +418,7 @@ namespace srouter void handle_path_switch(HopID pivot, std::shared_ptr path); - std::vector> current_path() const override; + path::Path::Info current_path_info() const override; std::string to_string() const override; }; diff --git a/src/session_router.cpp b/src/session_router.cpp index 3592a7f06..e268e4a6d 100644 --- a/src/session_router.cpp +++ b/src/session_router.cpp @@ -157,6 +157,14 @@ namespace session::router context->router->session_endpoint().unmap_udp_remote_port(netaddr, port); } + static snode_path to_snode_path(const srouter::path::Path::Info& info) + { + snode_path path; + for (const auto& [rid, ip] : info.relays) + path.emplace_back(srouter::NetworkAddress{rid, false}.to_string(), ip.to_string()); + return path; + } + std::optional SessionRouter::get_path_for_session(std::string_view remote) { srouter::NetworkAddress netaddr; @@ -172,10 +180,8 @@ namespace session::router return context->router->loop.call_get([&r = context->router, addr = std::move(netaddr)]() { std::optional ret; - if (auto s = r->session_endpoint().get_session(addr); s) - { - ret = s->current_path(); - } + if (auto* s = r->session_endpoint().get_session(addr)) + ret = to_snode_path(s->current_path_info()); return ret; }); } @@ -184,9 +190,10 @@ namespace session::router { return context->router->loop.call_get([&r = context->router]() { std::vector ret; - auto f = [&ret](const srouter::NetworkAddress& addr, const srouter::session::Session& s) { - ret.emplace_back(s.current_path(), addr.to_string()); - }; + r->session_endpoint().for_each_session( + [&ret](const srouter::NetworkAddress& addr, const srouter::session::Session& s) { + ret.emplace_back(to_snode_path(s.current_path_info()), addr.to_string()); + }); return ret; }); } From 92ce7c796b81687e9302df2c9e9eeaa38defdd54 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Fri, 14 Nov 2025 17:57:29 -0400 Subject: [PATCH 8/8] Fix deadlock or crash during shutdown This solves issues during destruction where we could get a segfault or a deadlock because of race conditions between connection close and link endpoint destruction. Basically what happened was: - during Router stop, link_endpoint gets destroyed. - that destruction closes all the connections, firing the on_conn_closed callbacks during destruction. - those callbacks were capturing a shared_ptr to the closed connection in a lambda job pushed onto the router loop - that router loop job can't run yet because we're already inside a router job doing the shutdown that triggered all of this - we get "router is stopped" - the queued cleanup code fires. There were multiple things wrong here: - trying to call `selected_alpn` (or `remote()`) would crash or deadlock because the network loop was gone, but those accessors go through a call_get. - the cleanup job had an invalid `this` pointer because link_endpoint was gone. - the cleanup job was also capturing a shared_ptr to a Connection tied to the link_endpoint's loop, and so attempting to destroy that lambda (even if we early return) could crash or deadlock because the loop that handles the Connection destructor isn't around anymore. This reworks it to fix it: - adds a shared_ptr canary we capture to make sure we are still alive, to early-return from the lambda if `this` isn't valid anymore. - captures all the conn details we need in the lambda, rather than capturing a shared_ptr to get it out of inside the lambda. - changes all the "is the conn pointer the same" logic to use the reference id rather than pointer (since we can't keep the conn pointer anymore). --- src/link/endpoint.cpp | 63 +++++++++++++++++++++++++++---------------- src/link/endpoint.hpp | 11 +++++++- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/link/endpoint.cpp b/src/link/endpoint.cpp index 9a758adc3..2172c9fcd 100644 --- a/src/link/endpoint.cpp +++ b/src/link/endpoint.cpp @@ -797,6 +797,8 @@ namespace srouter::link }); } + Endpoint::~Endpoint() { *canary = false; } + void Endpoint::on_conn_closed(quic::Connection& conn, uint64_t ec) { auto alpn = conn.selected_alpn(); @@ -815,13 +817,26 @@ namespace srouter::link return; } - router.loop.call([this, connptr = conn.shared_from_this(), ec] { - auto& conn = *connptr; - auto alpn = conn.selected_alpn(); - - std::optional rid; - if (conn.remote_key().size() == RouterID::SIZE) - rid.emplace(conn.remote_key().first()); + std::optional rid; + if (conn.remote_key().size() == RouterID::SIZE) + rid.emplace(conn.remote_key().first()); + + // NB: we must not capture a shared_ptr to conn here, because this lambda could outlive + // `this`; the canary lets us early-return if that happens, but the Connection destruction + // relies on `this.loop` to destroy: thus if we capture it we could delay that destruction + // attempt beyond the end of `this.loop`. Thus we capture everything we need into the + // lambda here, while we are still in the network loop. + + router.loop.call([this, + alive = canary, + conn_refid = conn.reference_id(), + alpn, + rid = std::move(rid), + remote_addr = conn.remote(), + ec, + was_inbound = conn.is_inbound()] { + if (!*alive) + return; bool found = false; @@ -833,14 +848,14 @@ namespace srouter::link { assert(router.is_service_node); auto& relcon = it->second; - if (relcon.inbound && connptr == relcon.inbound->conn) + if (relcon.inbound && relcon.inbound->conn && relcon.inbound->conn->reference_id() == conn_refid) { relcon.close(true); found = true; log::debug( logcat, "Inbound connection from {} closed (ec={})", rid->to_network_address(true), ec); } - if (relcon.outbound && connptr == relcon.outbound->conn) + if (relcon.outbound && relcon.outbound->conn && relcon.outbound->conn->reference_id() == conn_refid) { relcon.close(false); found = true; @@ -875,10 +890,10 @@ namespace srouter::link log::debug( logcat, "Closed redundant connection {} {} @ {} (cid={})", - conn.is_inbound() ? "from" : "to", + was_inbound ? "from" : "to", rid->to_network_address(true), - conn.remote(), - conn.reference_id()); + remote_addr, + conn_refid); found = true; } } @@ -886,11 +901,11 @@ namespace srouter::link { if (router.is_service_node) { - assert(conn.is_inbound()); // Relays do make outbound client conns for testing, - // but they do not use this close callback. - if (auto it = inbound_clients.find(conn.reference_id()); it != inbound_clients.end()) + assert(was_inbound); // Relays do make outbound client conns for testing, + // but they do not use this close callback. + if (auto it = inbound_clients.find(conn_refid); it != inbound_clients.end()) { - log::debug(logcat, "Client connection from {} closed (ec={})", conn.remote(), ec); + log::debug(logcat, "Client connection from {} closed (ec={})", remote_addr, ec); it->second->close(); inbound_clients.erase(it); found = true; @@ -898,9 +913,10 @@ namespace srouter::link } else { - assert(conn.is_outbound()); + assert(!was_inbound); - if (auto it = client_conns.find(*rid); it != client_conns.end() and connptr == it->second->conn) + if (auto it = client_conns.find(*rid); it != client_conns.end() && it->second && it->second->conn + && it->second->conn->reference_id() == conn_refid) { log::debug( logcat, @@ -912,12 +928,13 @@ namespace srouter::link } } } - else if (conn.is_outbound()) + else if (!was_inbound) { // Unknown or empty ALPN -- this is an outbound conn that didn't establish (and thus // didn't negotiate the ALPN): assert(rid); // Outbound conns start out with the target pubkey known - if (auto it = pending_outbound.find(*rid); it != pending_outbound.end() and connptr == it->second->conn) + if (auto it = pending_outbound.find(*rid); it != pending_outbound.end() && it->second + && it->second->conn && it->second->conn->reference_id() == conn_refid) { pending_outbound.erase(it); found = true; @@ -931,10 +948,10 @@ namespace srouter::link log::warning( logcat, "Closed connection {} {} @ {} (cid={}, ec={})", - conn.is_inbound() ? "from" : "to", + was_inbound ? "from" : "to", rid ? rid->to_string() : "", - conn.remote(), - conn.reference_id(), + remote_addr, + conn_refid, ec); if (not router.is_service_node) diff --git a/src/link/endpoint.hpp b/src/link/endpoint.hpp index 36bb6759d..f7715a2c5 100644 --- a/src/link/endpoint.hpp +++ b/src/link/endpoint.hpp @@ -83,10 +83,16 @@ namespace srouter::link public: explicit Endpoint(Manager& lm); + ~Endpoint(); + Manager& manager; Router& router; private: + // The network loop object. This *must* be declared before most of the below as some of the + // things below have destructors that run in this loop. + std::unique_ptr loop; + // Stores established relay-to-relay connections; only used by service nodes. std::unordered_map relay_conns; @@ -114,12 +120,15 @@ namespace srouter::link // only. std::unordered_map> inbound_clients; - std::unique_ptr loop; std::shared_ptr endpoint; std::shared_ptr redundancy_ticker; std::shared_ptr dereg_conn_ticker; std::shared_ptr tls_creds; + // Canary object that gets set to false during destruction to help short-circuit lambda that + // could potentially outlive `this`: + std::shared_ptr canary = std::make_shared(true); + public: void start_tickers();