diff --git a/.drone.jsonnet b/.drone.jsonnet index 3509e6ac0..c753769f0 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -291,7 +291,7 @@ local clang(version) = debian_pipeline( local full_llvm(version) = debian_pipeline( 'Debian sid/llvm-' + version, docker_base + 'debian-sid-clang', - deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], + deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libunwind-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], remove='g++'), oxen_repo=[], cmake_extra='-DCMAKE_C_COMPILER=clang-' + version + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 534c7c189..dbbe79343 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -156,11 +156,12 @@ if (SROUTER_FULL) # parse modify and reconstitute dns wire proto, dns queries and RR target_sources(session-router-dns PRIVATE dns/encode.cpp + dns/handler.cpp + dns/listener.cpp dns/message.cpp dns/platform.cpp dns/question.cpp dns/rr.cpp - dns/server.cpp ) # platform specific bits and bobs for setting dns diff --git a/src/config/config.cpp b/src/config/config.cpp index 17d2cbe46..03352b2d9 100644 --- a/src/config/config.cpp +++ b/src/config/config.cpp @@ -1035,6 +1035,8 @@ namespace srouter MultiValue, Comment{ "Address to bind to for handling DNS requests.", + "", + "Can be specified multiple times to bind to multiple addresses; can be set to empty to disable.", }, [this, parse_addr_for_dns](std::string arg) { if (not arg.empty()) diff --git a/src/dns/dns.hpp b/src/dns/dns.hpp deleted file mode 100644 index 0dae13dee..000000000 --- a/src/dns/dns.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace srouter::dns -{ - constexpr uint16_t flags_QR = 1 << 15; - constexpr uint16_t flags_AA = 1 << 10; - constexpr uint16_t flags_TC = 1 << 9; - constexpr uint16_t flags_RD = 1 << 8; - constexpr uint16_t flags_RA = 1 << 7; - constexpr uint16_t flags_RCODENxDomain = 3; - constexpr uint16_t flags_RCODEServFail = 2; - constexpr uint16_t flags_RCODENoError = 0; - -} // namespace srouter::dns diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 5318d72e8..911a1f967 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -46,36 +46,84 @@ namespace srouter::dns return name; } - size_t encode_name(std::span buf, std::string_view name) + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset) { - auto orig = buf.size(); if (name.size() && name.back() == '.') name.remove_suffix(1); - for (auto part : srouter::split(name, ".")) + // Look for a previously used suffix of this name. For instance, if we have a response + // consisting of: + // + // localhost.sesh IN CNAME mylongpubkey.sesh + // foo.mylongpubkey.sesh IN AAAA 1:2:3::4 + // + // then when we repeat the question itself (IN AAAA localhost.sesh) we echo that question + // back into the response as the 16 bytes: + // \x09localhost\x04sesh\x00 + // Suppose that this was written at location Z in the DNS message, this creates two + // pointable addresses: + // - "localhost.sesh" -> Z + // - "sesh" -> Z+10 + // + // Then we come to the answers, and for the first "localhost.sesh" value, we can simply + // write that as a single pointer [Z] (where the pointer is a 16-bit, big-endian value with + // the highest two bits set and the remaining 14 bits set to "Z"). + // + // Then we get to "mylongpubkey.sesh" and we can encode that as: + // + // \x34mylongpubkey[pointer to Z+10] + // + // This also creates a new pointable address: + // - "mylongpubkey.sesh" -> Y + // + // Then we come to foo.mylongpubkey.sesh and we can encode this as: + // + // - \x03foo[pointer to Y] + // + // i.e. we only need 6 bytes for this address instead of 1+3+1+52+1+4+1=63 bytes that we + // would need for the uncompressed version. + // + // Although this compression is optional, given how frequently we reuse long session router + // names (particularly for something like SRV records where a name can be repeated multiple + // times), and the DNS response size limit of 512 bytes, we implement that here. + + for (size_t pos = name.empty() ? std::string::npos : 0; pos != std::string_view::npos;) { + std::string_view check = name.substr(pos); + if (auto it = prev_names.find(check); it != prev_names.end()) + { + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + uint16_t ptr = uint16_t{0b11000000'00000000} | it->second; + oxenc::write_host_as_big(ptr, buf.data()); + buf = buf.subspan(2); + buf_offset += 2; + // A pointer is terminal (i.e. no nullptr to add), so we're done. + return; + } + + auto next = name.find('.', pos + 1); + auto part = next == std::string_view::npos ? check : name.substr(pos, next - pos); + size_t l = part.size(); if (l > 63 || l >= buf.size()) - return false; - buf.front() = static_cast(l); + throw std::out_of_range{"Buffer too small"}; + buf.front() = static_cast(l); // Length prefix std::memcpy(buf.data() + 1, part.data(), part.size()); + prev_names.emplace(std::string{check}, static_cast(buf_offset)); buf = buf.subspan(1 + part.size()); + buf_offset += 1 + part.size(); + + pos = next == std::string_view::npos ? next : next + 1; } + + // If we get here we wrote all the pieces without pointing at anything, so we need to append + // a null byte to terminate the name: if (buf.empty()) - return false; + throw std::out_of_range{"Buffer too small"}; buf.front() = std::byte{0}; buf = buf.subspan(1); - return orig - buf.size(); - } - - bool write_name_into(std::span& buf, std::string_view name) - { - if (auto s = encode_name(buf, name)) - { - buf = buf.subspan(s); - return true; - } - return false; + buf_offset++; } std::optional> decode_ptr(std::string_view name) diff --git a/src/dns/encode.hpp b/src/dns/encode.hpp index e0d5b7e4a..46ddd4b9e 100644 --- a/src/dns/encode.hpp +++ b/src/dns/encode.hpp @@ -6,45 +6,59 @@ #include #include +#include #include #include namespace srouter::dns { - /// Writes the encoded version of DNS name `name` into buf, and returns how many bytes of buf - /// were written. If buf is too small to store the encoded name, returns 0. - size_t encode_name(std::span buf, std::string_view name); + // Custom hasher to let us look up a string_view key in a string-keyed unordered map: + struct transparent_string_hash + { + using is_transparent = void; + [[nodiscard]] size_t operator()(std::string_view txt) const { return std::hash{}(txt); } + }; + + using prev_names_t = std::unordered_map>; - /// Same as encode_name, except that instead of returning the written size, on success it mutates the span - /// to drop the written prefix. Returns true (and prefix-drops the written part of the span) on success, - /// false on failure. Note that the failure case can still partially write into span. - bool write_name_into(std::span& buf, std::string_view name); + /// Writes the encoded version of DNS name `name` into buf, mutating buf to eliminate the + /// written bytes. Throws if buf is too small to store the encoded name. + /// + /// prev_names contains pointer values relative to the start of the message, used for name + /// compression, and buf_offset contains the relative positive of the beginning of buf to the + /// start of the message. New names added here should be added into it so that later repeated + /// names (or name suffixes) can use compression. + void encode_name(std::span& buf, std::string_view name, prev_names_t& prev_names, uint16_t& buf_offset); /// decode name from buffer, mutating the buffer to begin just past the extracted name. Return - /// nullopt (without mutating buf) on failure. + /// nullopt (without mutating buf) on failure. Does not currently support compressed names (but + /// those are not typically used in questions). std::optional extract_name(std::span& buf); /// Encodes an integer in big-endian order into the buffer, mutating the span to start just - /// after the written integer. Returns true on success, false if the span was too small. + /// after the written integer. Throws if buf is too small. Returns sizeof(T) (i.e. the amount + /// written into the buffer), for convenience. template - bool write_int_into(std::span& buf, T value) + size_t write_int_into(std::span& buf, T value) { if (buf.size() < sizeof(T)) - return false; + throw std::out_of_range{"Buffer too small"}; oxenc::write_host_as_big(value, buf.data()); buf = buf.subspan(sizeof(T)); - return true; + return sizeof(T); } - // Calls write_int_info multiple times with the given integers. Returns true (and modifies buf) - // if all success. If any fail then false is returned and buf is left unchanged. + // Calls write_int_info multiple times with the given integers. Throws if the buffer is too + // small. Returns the total size of the given integers (i.e. the number of bytes written to + // buf), for convenience. template - bool write_ints_into(std::span& buf, T... values) + size_t write_ints_into(std::span& buf, T... values) { - if (buf.size() < (0 + ... + sizeof(T))) - return false; + // NB: it's tempting to want to use `return (0 + ... + write_int_into())` here, but + // left-to-right evaluation of + operands isn't guaranteed, and that could put things into + // buf in the wrong order. With , as used here it is guaranteed (similarly to || or &&). ((void)write_int_into(buf, values), ...); - return true; + return (0 + ... + sizeof(T)); } /// Extracts a big-endian integer of the given type from the buffer, mutating the span to start @@ -72,23 +86,6 @@ namespace srouter::dns return true; } - // Takes some object T with an `size_t encode(buf)` function (such as various classes in this - // dns code) and attempts to call it with the given buffer. If it returns success (non-0) then - // this mutates `buf` to skip the written data and returns true; on failure it returns false. - template - bool encode_into(std::span& buf, const T& thing) - { - if (auto written = thing.encode(buf)) - { - buf = buf.subspan(written); - return true; - } - return false; - } - - // Writes encoded rr data into buf, mutating buf to point beyond the written data. Returns - // false (without mutating buf) if buf is too short; true on success. - bool write_rdata_into(std::span& buf, std::span rdata); // Extracts encoded rr data from buf, mutating buf to point beyond the extracted data. Returns // nullopt (without mutating buf) on error, the vector of decoded data on success. std::optional> extract_rdata(std::span& buf); diff --git a/src/dns/flags.hpp b/src/dns/flags.hpp new file mode 100644 index 000000000..de14b6663 --- /dev/null +++ b/src/dns/flags.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include + +namespace srouter::dns +{ + constexpr uint16_t flags_QR = 1 << 15; + constexpr uint16_t flags_AA = 1 << 10; + constexpr uint16_t flags_TC = 1 << 9; + constexpr uint16_t flags_RD = 1 << 8; + constexpr uint16_t flags_RA = 1 << 7; + + constexpr uint16_t flags_RCODE_mask = ~uint16_t{0b1111}; + + constexpr uint16_t RCODE_NxDomain = 3; + constexpr uint16_t RCODE_ServFail = 2; + constexpr uint16_t RCODE_FormErr = 1; + constexpr uint16_t RCODE_NoError = 0; + + inline constexpr uint16_t set_rcode(uint16_t flags, uint16_t rcode) { return (flags & flags_RCODE_mask) | rcode; } + +} // namespace srouter::dns diff --git a/src/dns/handler.cpp b/src/dns/handler.cpp new file mode 100644 index 000000000..fb9886379 --- /dev/null +++ b/src/dns/handler.cpp @@ -0,0 +1,455 @@ + +#include "handler.hpp" + +#include "dns/rr.hpp" +#include "flags.hpp" +#include "message.hpp" +#include "nodedb.hpp" +#include "router/router.hpp" +#include "util/logging.hpp" + +#include + +namespace srouter::dns +{ +#ifdef SROUTER_EMBEDDED_ONLY + static_assert(false, "dns::RequestHandler requires a full lokinet build!"); +#endif + + namespace + { + auto logcat = log::Cat("dns"); + + const auto random_snode = "random.{}"_format(RELAY_TLD); + + const auto localhost_ctld = "localhost.{}"_format(CLIENT_TLD); + const auto dot_localhost_ctld = ".localhost.{}"_format(CLIENT_TLD); + bool is_localhost(std::string_view qname) + { + return qname == "localhost.loki" or qname.ends_with(".localhost.loki") or qname == localhost_ctld + or qname.ends_with(dot_localhost_ctld); + } + + std::optional parse_rid(std::string_view b32rid) + { + auto rid = std::make_optional(); + if (not rid->from_base32z(b32rid)) + rid.reset(); + return rid; + } + + std::optional is_snode(std::string_view name) + { + if (name.ends_with(RELAY_DOT_TLD)) + name.remove_suffix(RELAY_DOT_TLD.size()); + else + return std::nullopt; + return parse_rid(name); + } + + template + std::optional try_making(Args&&... args) + { + try + { + return std::make_optional(std::forward(args)...); + } + catch (...) + { + return std::nullopt; + } + } + + } // namespace + + RequestHandler::RequestHandler(Router& router) : _router{router} + { + if (!_router.tun_endpoint()) + throw std::logic_error{"dns::RequestHandler requires a TUN endpoint"}; + } + + void RequestHandler::operator()(std::span request, const quic::Address& from, ReplyCallback reply) + { + std::span client_ip; + if (from.is_ipv4()) + client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; + else + client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; + + auto maybe = Message::extract_question(request, _cookie_secret, client_ip); + if (not maybe) + { + log::warning(logcat, "Ignoring unparseable DNS request from {}", from); + return; + } + auto& msg = *maybe; + + if (msg.bad_extract) + { + reply(std::move(msg)); + return; + } + + if (msg.additional_edns && msg.additional_edns->bad_cookie) + { + // Client gave a bad cookie; reply with a request failure, but one containing the new + // cookie so that the client can retry. + + // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS + // value. + msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; + // TODO FIXME: we currently always set the RA flag but that really should only be set + // when we have an upstream DNS server. (This TODO is also in message.cpp) + msg.hdr_fields |= flags_QR | flags_RA; + // badcookie is not an authoritative answer: + msg.hdr_fields &= ~flags_AA; + + reply(std::move(msg)); + return; + } + + // If there is no question then there is no answer to worry about. This is a bit weird, but + // is sometimes used by clients to get an initial DNS cookie (via EDNS) without making an + // actual request. + if (!msg.question) + { + reply(std::move(msg)); + return; + } + + auto& q = *msg.question; + + if (handle_local(reply, msg, std::string{q.name()})) + return; + + // we don't provide a DoH resolver because it requires verified TLS TLS needs X509/ASN.1-DER + // and opting into the Root CA Cabal thankfully mozilla added a backdoor that allows ISPs to + // turn it off so we disable DoH for firefox using mozilla's ISP backdoor see: + // https://github.com/oxen-io/lokinet/issues/832 + + // is this firefox looking for their backdoor record? + if (q.name() == "use-application-dns.net") + // yea it is, let's turn off DoH because god is dead. + return reply(msg.nxdomain()); // press F to pay respects and send it back where it came from + + // Not for us, so forward to upstream handler + forward(std::move(msg), std::move(reply)); + } + + bool RequestHandler::handle_local(ReplyCallback& reply, Message& msg, std::string qname) + { + // hook any PTR (reverse DNS) lookups for our local ranges + if (handle_local_ptr(msg, reply)) + return true; + + auto& q = *msg.question; + + if (!(q.has_tld(CLIENT_TLD) || q.has_tld(RELAY_TLD) || q.has_tld("loki"sv))) + return false; + + std::string hostname, tld; + std::vector sub; + { + auto nameparts = split(qname, "."); + if (nameparts.size() < 2) + { + log::warning(logcat, "bad DNS request, no TLD or hostname: {}", qname); + reply(msg.formerr()); + return true; + } + hostname = nameparts[nameparts.size() - 2]; + tld = nameparts.back(); + sub.reserve(nameparts.size() - 2); + for (auto s : std::views::take(nameparts, static_cast(nameparts.size()) - 2)) + sub.emplace_back(s); + } + + bool localhost = is_localhost(qname); + + // localhost.sesh/localhost.loki is always a CNAME to our own pubkey, regardless of the + // question type. + if (localhost) + { + auto our_hostname = _router.id().to_string(); + auto our_tld = _router.is_service_node ? RELAY_TLD : CLIENT_TLD; + auto our_name = "{}.{}"_format(our_hostname, our_tld); + + if (tld == "loki") + { + // first: report a cname for the deprecated localhost.loki -> localhost.sesh + + msg.set_rr_name("localhost.loki"); + msg.add_cname_reply("localhost.{}"_format(our_tld)); + } + // report CNAME: localhost.sesh -> pubkey.sesh + msg.set_rr_name("localhost.{}"_format(our_tld)); + msg.add_cname_reply(our_name); + + if (q.qtype == dns::RRType::CNAME) + { + // If we were queried specifically for a cname, then we are done. + reply(std::move(msg)); + return true; + } + + // Otherwise we continue processing to be able to return supplemental records through + // the cname, so that if you request "foo.localhost.loki" we end up returning: + // localhost.loki CNAME for localhost.sesh + // localhost.sesh CNAME for PUBKEY.sesh + // foo.PUBKEY.sesh IN X VALUE (or whatever) + // And so for for the rest of the answer processing that we were given PUBKEY.sesh, + // rather than localhost.loki/.sesh: + qname = sub.empty() ? our_name : "{}.{}"_format(fmt::join(sub, "."), our_name); + msg.set_rr_name(qname); + + tld = our_tld; + hostname = std::move(our_hostname); + } + else if (qname == random_snode) + { + // Similar to the localhost case: we first return a CNAME of random.snode -> + // SOMEPK.snode, then continue processing as if that was what you asked for. + + if (auto* rc = _router.node_db().get_random_rc()) + { + hostname = rc->router_id().to_string(); + qname = "{}.{}"_format(hostname, RELAY_TLD); + msg.add_cname_reply(qname, 1s); + if (q.qtype == dns::RRType::CNAME) + { + reply(std::move(msg)); + return true; + } + + msg.set_rr_name(qname); + } + else + { + // We found no RC at all, which probably means our connection is dead. + reply(msg.nxdomain()); + return true; + } + } + else if (tld == "loki" && hostname.size() != oxenc::to_base32z_size(RouterID::SIZE)) + { + // ONS lookup: initiate a lookup and, when we get the response, set up a CNAME of + // NAME.loki -> PUBKEY.sesh, then recurse to process other parts of the request (such as + // mapping to a AAAA). + + // TODO: .sesh SNS resolution, once implemented + + // ONS lookup: + auto lookup = "{}.loki"_format(hostname); + _router.session_endpoint().resolve_sns( + lookup, + [this, + lookup, + sub = std::move(sub), + reply = std::move(reply), + msg_ptr = std::make_shared(std::move(msg)), + cname_only = q.qtype == dns::RRType::CNAME]( + std::optional maybe_netaddr, + bool /*assertive*/, + std::chrono::milliseconds ttl) mutable { + auto& msg = *msg_ptr; + msg.set_rr_name(lookup); + if (maybe_netaddr) + { + auto target = maybe_netaddr->to_string(); + msg.add_cname_reply(target, std::chrono::floor(ttl)); + if (cname_only) + return; + auto qname = sub.empty() ? target : "{}.{}"_format(fmt::join(sub, "."), target); + msg.set_rr_name(qname); + if (!handle_local(reply, msg, std::move(qname))) + { + log::warning( + logcat, "ONS '{}' subrequest did not properly handle sending a reply!", lookup); + return reply(msg.servfail()); + } + return; + } + // TODO FIXME: if `assertive` is true then we can provide a TTL for this failure + // (via an SOA authority record). (When not assertive we shouldn't do so, + // because not having an SOA TTL means a downstream recursive resolver shouldn't + // cache the negative response). + reply(msg.nxdomain()); + }); + return true; + } + + if (q.qtype == dns::RRType::TXT) + { + // TXT records can be used to query some basic info: + + // TXT on MYPUBKEY.sesh returns the basic version and netid: + if (localhost && sub.empty()) + msg.add_txt_reply("sessionrouter={} v={} netid={}"_format( + _router.is_service_node ? "relay" : "client", fmt::join(VERSION, "."), _router.netid())); + + // TXT on PUBKEY.snode gives back some basic RC info (if we have the RC) + else if (auto rid = is_snode(qname)) + { + if (auto* rc = _router.node_db().get_rc(*rid)) + { + msg.add_txt_reply("rc v={} i={} t={}"_format( + fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); + } + else + msg.nxdomain(); + } + + // TXT on path.PUBKEY.{sesh,snode} returns the current path info to that node, if a + // session is established (nxdomain if no active session). + else if (sub.size() == 1 && sub.front() == "path") + { + log::debug(logcat, "TXT path request for {}.{}", hostname, tld); + if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) + { + if (auto* s = _router.session_endpoint().get_session(*maybe_netaddr); s && s->is_established()) + { + auto path = s->current_path_info(); + msg.add_txt_reply( + "d={}; path={}; ttl={}; p={}; pj={}.{:03d}; pr={}; pt={}; pT={}"_format( + s->is_outbound ? "out" : "in", + fmt::join( + std::views::transform( + path.relays, [](const auto& r) { return "{}@{}"_format(r.first, r.second); }), + " "), + std::chrono::round(path.expiry - srouter::time_now_ms()).count(), + path.ping_mean.count(), + path.ping_jitter / 1ms, + (path.ping_jitter % 1ms).count(), + path.ping_responses, + path.ping_timeouts, + path.ping_recent_timeouts), + 0s); + } + else + msg.add_txt_reply("d=none"); + } + else + { + log::warning(logcat, "Failed to parse network address {}.{} for path query", hostname, tld); + msg.nxdomain(); + } + } + else + msg.nxdomain(); + reply(std::move(msg)); + return true; + } + + // "Regular" A or AAAA lookups + if (bool aaaa = q.qtype == dns::RRType::AAAA; aaaa || q.qtype == dns::RRType::A) + { + // Attempt to parse a "pubkey.snode" or "pubkey.sesh": + if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) + { + // DNS lookup implies we want a session, so make one (NOP if we have one) + // This also means if we don't use that session the IP mapping will release when + // it expires, which it wouldn't otherwise without a tedious periodic check. + bool created_session = false; + try + { + created_session = (bool)_router.session_endpoint().initiate_remote_session(*maybe_netaddr, nullptr); + } + catch (const std::exception& e) + { + log::warning(logcat, "Failed to initiate remote session to {}: {}", *maybe_netaddr, e.what()); + } + if (created_session) + { + assert(_router.tun_endpoint()); + auto& tun = *_router.tun_endpoint(); + if (aaaa) + msg.add_reply(tun.map6(*maybe_netaddr)); + else if (!sub.empty() && sub.back() == "ipv4"sv) + { + // We don't map IPv4 addresses by default, but it is still possible to get + // one by requesting ipv4.somepubkey.sesh/snode (or a subdomain thereof). + if (auto v4_addr = tun.map4(*maybe_netaddr); v4_addr) + msg.add_reply(*v4_addr); + else + log::warning(logcat, "IPv4 mapping requested for {} failed.", *maybe_netaddr); + } + // else they requested A *not* using the magic ipv4 subdomain, so we only have + // AAAA to offer and thus we return a reply without an answer record (which is + // the proper DNS way to say "something exists at this address, but not with the + // type you requested requested", as opposed to this nx_reply below, which means + // "this record does not exist"). + } + else + msg.nxdomain(); + reply(std::move(msg)); + + return true; + } + + log::warning(logcat, "DNS query failure: '{}' is not a valid Session Router name or address", qname); + reply(msg.nxdomain()); + return true; + } + + if (q.qtype == dns::RRType::SRV && (tld == CLIENT_TLD || tld == "loki") && sub.size() == 2 + && sub[0].starts_with('_') && sub[1].starts_with('_')) + { + if (auto rid = parse_rid(hostname)) + { + _router.session_endpoint().lookup_client_intro( + *rid, + [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply)]( + const std::optional& cc) mutable { + if (cc) + { + for (const auto& srv : cc->SRVs()) + if (srv.service == sub[0] && srv.proto == sub[1]) + msg->add_reply(srv); + } + else + msg->nxdomain(); + + reply(std::move(*msg)); + }); + return true; + } + } + + // If we got through everything above without answering then they requested something weird + // (unhandled RR type, perhaps) and so let's just give an NXDOMAIN back: + reply(msg.nxdomain()); + return true; + } + + bool RequestHandler::handle_local_ptr(Message& msg, ReplyCallback& reply) + { + assert(msg.question); + if (msg.question->qtype != srouter::dns::RRType::PTR) + return false; + + auto ip = dns::decode_ptr(msg.question->qname); + if (!ip) + return false; + + auto [mapped, is_ours] = std::visit([this](const auto& ip) { return _router.reverse_lookup(ip); }, *ip); + if (!is_ours) + return false; + + if (mapped) + msg.add_ptr_reply(mapped->to_string()); + else + msg.nxdomain(); + + reply(std::move(msg)); + + return true; + } + + void RequestHandler::forward(Message&& m, ReplyCallback&& reply) + { + // TODO FIXME XXX TESTNET TOTHINK + log::critical(logcat, "FORWARDED REQUESTS NEEDS IMPLEMENTATION! RETURNING SERVFAIL"); + reply(m.servfail()); + } + +} // namespace srouter::dns diff --git a/src/dns/handler.hpp b/src/dns/handler.hpp new file mode 100644 index 000000000..85053fb1b --- /dev/null +++ b/src/dns/handler.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include "message.hpp" + +#include +#include + +namespace srouter +{ + class Router; + namespace quic = oxen::quic; +} // namespace srouter + +namespace srouter::dns +{ + class RequestHandler + { + public: + using ReplyCallback = std::function; + + explicit RequestHandler(Router& router); + + // Called when a request arrives to process the request; when the answer is ready, calls + // `reply()` with it. + void operator()(std::span request, const quic::Address& from, ReplyCallback reply); + + private: + // Secret value we use as a key in DNS server cookie hashing. We generate a random once on + // each startup as we currently have no need for this to be deterministic, and that + // regeneration also provides DNS cookie key rotation whenever we restart. + std::array _cookie_secret; + + Router& _router; + + // Called to check if the request is for a local name (i.e. .sesh, .snode, .loki, or a PTR + // record for one of the addresses in our tun). If so, this handles the request and returns + // true; otherwise returns false. + bool handle_local(ReplyCallback& reply, Message& msg, std::string qname); + + // Checks for PTR for a range we own, and if so, replies and returns true. Returns false if + // not a PTR for us (i.e. the caller should continue processing). + bool handle_local_ptr(Message& m, ReplyCallback& reply); + + // Answers the question recursively via our configured upstream DNS servers (if any) + void forward(Message&& m, ReplyCallback&& reply); + }; + +} // namespace srouter::dns diff --git a/src/dns/listener.cpp b/src/dns/listener.cpp new file mode 100644 index 000000000..16325fefa --- /dev/null +++ b/src/dns/listener.cpp @@ -0,0 +1,178 @@ +#include "listener.hpp" + +#include "router/router.hpp" +#include "util/logging.hpp" + +#include +#include + +namespace srouter::dns +{ + namespace + { + auto logcat = log::Cat("dns"); + + struct tcp_conn + { + Listener& listener; + bufferevent* bev; + quic::Address addr; + // This gets shared with the handler callback so that we can tell if the raw tcp_conn + // pointer is still valid: + std::shared_ptr alive = std::make_shared(true); + + tcp_conn(Listener& l, bufferevent* b, sockaddr* src, int socklen) + : listener{l}, bev{b}, addr{src, static_cast(socklen)} + {} + + void close() + { + bufferevent_free(bev); + bev = nullptr; + } + + ~tcp_conn() { *alive = false; } + }; + + } // namespace + + void Listener::evconnlistener_deleter::operator()(::evconnlistener* e) + { + if (e) + evconnlistener_free(e); + } + + Listener::Listener(Router& router, const quic::Address& bind) : _handler{router} { listen(router.loop, bind); } + + struct Listener::udp_socket_helper + { + std::unique_ptr sock; + }; + + // Defaulted, but here because the header doesn't have visibility into the predeclared unique_ptrs + Listener::~Listener() = default; + + void Listener::listen(quic::Loop& loop, const quic::Address& bind) + { + // call_get this so that we can be sure that the callbacks defined here can't be called + // before we are done setting it up: + loop.call_get([&] { + auto h = std::make_unique(); + + h->sock = std::make_unique( + loop.get_event_base(), bind, /*gso=*/false, [this, h = h.get()](quic::Packet&& pkt) { + if (pkt.path.remote == pkt.path.local) + { + log::warning(logcat, "DNS packet loop detected: ignoring UDP DNS request"); + return; + } + log::trace(logcat, "Incoming DNS UDP packet from {}", pkt.path.remote); + + // We don't need to worry about keep-alive here because we own the handler, and + // so if it's calling something then `this` must still be alive. + _handler(pkt.data(), pkt.path.remote, [path = pkt.path, udp = h->sock.get()](Message m) { + auto payload = m.encode(); + const size_t sz = payload.size(); + udp->send(path, payload.data(), &sz, 0, 1); + }); + }); + _udp.push_back(std::move(h)); + + _tcp.emplace_back(evconnlistener_new_bind( + loop.get_event_base(), + [](evconnlistener* listener, evutil_socket_t fd, sockaddr* src, int socklen, void* ctx) { + auto* bev = bufferevent_socket_new(evconnlistener_get_base(listener), fd, BEV_OPT_CLOSE_ON_FREE); + auto* c = new tcp_conn{*static_cast(ctx), bev, src, socklen}; + + log::trace(logcat, "Incoming DNS TCP connection from {}", c->addr); + + bufferevent_setcb( + bev, + [](bufferevent* bev, void* ctx) { + // read callback + auto* in = bufferevent_get_input(bev); + while (true) + { + log::trace(logcat, "Incoming DNS TCP data"); + uint16_t reqlen; + if (evbuffer_copyout(in, &reqlen, 2) < 2) + break; + oxenc::big_to_host_inplace(reqlen); + log::trace(logcat, "Incoming DNS TCP request of size {}", reqlen); + size_t pending = evbuffer_get_length(in) - 2; + if (pending < reqlen) + { + // We don't have enough of the request yet, so leave the buffer + // as-is: libevent won't call us again until more data arrives, + // and will just leave the current buffer data in place. + log::trace( + logcat, + "Not enough TCP data ({}) for request body ({}); delaying processing until we " + "get more", + pending, + reqlen); + break; + } + std::vector req; + req.resize(reqlen); + evbuffer_drain(in, 2); + evbuffer_remove(in, req.data(), reqlen); + log::trace(logcat, "Read {}-byte TCP DNS request", req.size()); + + auto* c = static_cast(ctx); + c->listener._handler(req, c->addr, [c, alive = c->alive](Message m) { + if (!*alive) + return; + auto* out = bufferevent_get_output(c->bev); + auto payload = m.encode(); + // The only difference between UDP DNS and TCP DNS encoding is that + // UDP is per-packet, but TCP is a stream of messages where each + // message is prefixed with the length of the message: + uint16_t size = oxenc::host_to_big(static_cast(payload.size())); + if (evbuffer_add(out, &size, 2) == -1 + || evbuffer_add(out, payload.data(), payload.size()) == -1) + { + log::warning(logcat, "Failed to write response to TCP connection; closing"); + bufferevent_free(c->bev); + delete c; + } + }); + } + }, + nullptr, + [](bufferevent* bev, short events, void* ctx) { + auto* c = static_cast(ctx); + // event callback + if (events & BEV_EVENT_EOF) + log::debug(logcat, "UDP TCP connection from {} closed by peer", c->addr); + if (events & BEV_EVENT_ERROR) + log::debug( + logcat, + "UDP TCP connection from {} closed by error: {}", + c->addr, + evutil_socket_error_to_string(EVUTIL_SOCKET_ERROR())); + if (events & BEV_EVENT_TIMEOUT) + // Is this even possible on a listening socket? + log::debug(logcat, "UDP TCP connection from {} timed out", c->addr); + + if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR | BEV_EVENT_TIMEOUT)) + { + bufferevent_free(bev); + delete c; + } + }, + c); + + bufferevent_enable(bev, EV_READ | EV_WRITE); + }, + this, + LEV_OPT_CLOSE_ON_FREE | LEV_OPT_REUSEABLE, + -1, + bind, + static_cast(bind.socklen()))); + + log::debug(logcat, "session-router DNS listening on {}", bind); + }); + } + +} // namespace srouter::dns diff --git a/src/dns/listener.hpp b/src/dns/listener.hpp new file mode 100644 index 000000000..8938a36b9 --- /dev/null +++ b/src/dns/listener.hpp @@ -0,0 +1,51 @@ +#pragma once + +#include "dns/handler.hpp" + +#include +#include + +#include +#include + +struct evconnlistener; + +namespace srouter +{ + class Router; +} +namespace srouter::dns +{ + namespace quic = oxen::quic; + + /// UDP+TCP listener for receiving and sending DNS requests. This generally works with a + /// dns::RequestHandler to actually generate the replies for a request, which then come back to + /// this class to actually send the response to the network. + class Listener + { + struct evconnlistener_deleter + { + void operator()(::evconnlistener* e); + }; + + struct udp_socket_helper; + + std::list> _udp; + std::list> _tcp; + + // The object that handles processing of the actual request once we have extracted it from a + // UDP packet or TCP stream: + RequestHandler _handler; + + public: + // Creates a TCP+UDP DNS listener that listens on `bind` for DNS requests. + Listener(Router& router, const quic::Address& bind); + + // Adds another TCP+UDP listener on `bind`. This is called implicitly during construction, + // but can also be called if there is a need to listen on multiple addresses. + void listen(quic::Loop& loop, const quic::Address& bind); + + ~Listener(); + }; + +} // namespace srouter::dns diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ac699b738..fb00c4c22 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -1,238 +1,367 @@ #include "message.hpp" -#include "dns.hpp" #include "encode.hpp" -#include "net/ip_packet.hpp" +#include "flags.hpp" #include "srv_data.hpp" #include "util/logging.hpp" -#include #include +#include #include +#include +#include +#include namespace srouter::dns { static auto logcat = log::Cat("dns"); - Message::Message(const Question& question) : hdr_id{0}, hdr_fields{} { questions.push_back(question); } + Message::Message(Question question) : hdr_id{0}, hdr_fields{}, question{std::move(question)} {} - size_t Message::encode(std::span buf) const + Message Message::clone() const { - auto orig = buf.size(); - if (!write_ints_into( - buf, - hdr_id, - hdr_fields, - static_cast(questions.size()), - static_cast(answers.size()), - static_cast(authorities.size()), - static_cast(additional.size()))) - return 0; - - for (const auto& question : questions) - if (!encode_into(buf, question)) - return 0; - - for (auto& a : answers) - if (!encode_into(buf, a)) - return 0; - - return orig - buf.size(); + Message c; + c.hdr_id = hdr_id; + c.hdr_fields = hdr_fields; + c.question = question; + c.additional_edns = additional_edns; + // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) + return c; } - std::optional Message::extract(std::span& buf) + std::vector Message::encode(bool max_size) const { - auto maybe = std::make_optional(); - auto& m = *maybe; - uint16_t qd_count, an_count, ns_count, ar_count; - if (!extract_ints(buf, m.hdr_id, m.hdr_fields, qd_count, an_count, ns_count, ar_count)) + std::vector tmp; + tmp.resize( + max_size ? std::numeric_limits::max() + : additional_edns ? additional_edns->max_payload() + : 512); + + prev_names_t prev_names; + std::span buf{tmp}; + uint16_t buf_offset = 0; + + buf_offset += write_ints_into( + buf, + hdr_id, + hdr_fields, + question ? uint16_t{1} : uint16_t{0}, + static_cast(answers.size()), + static_cast(0 /*authorities.size()*/), + static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); + + if (question) + question->encode(buf, prev_names, buf_offset); + + // If we run out of space and have to truncate then we are still supposed to include the + // EDNS part of the additional response, but other answers don't have to be: so if we hit + // such a failure, we're back up to this point (throwing away all the answers) so that we + // can include the EDNS response info. + auto initial_len = buf_offset; + + try { - maybe.reset(); - return maybe; - } - m.questions.resize(qd_count); - m.answers.resize(an_count); - // Ignore these: - // m.authorities.resize(ns_count); - // m.additional.resize(ar_count); + for (auto& a : answers) + a->encode(buf, prev_names, buf_offset); - for (auto& q : m.questions) + if (additional_edns) + additional_edns->encode(buf, prev_names, buf_offset); + } + catch (const std::out_of_range&) { - if (!q.extract(buf)) + log::debug(logcat, "Response too large! Setting truncation bit"); + + oxenc::write_host_as_big(hdr_fields | flags_TC, tmp.data() + 2); + + // Reset our buffer position back to just after the answers were added. We do this even + // if we aren't going to add EDNS stuff below, because we are not supposed to include + // partial RR entries in a truncated reply. + buf = std::span{tmp.data() + initial_len, tmp.size() - initial_len}; + buf_offset = initial_len; + + if (additional_edns) { - log::debug(logcat, "failed to decode question"); - maybe.reset(); - return maybe; + try + { + additional_edns->encode(buf, prev_names, buf_offset); + } + catch (const std::out_of_range&) + { + // If this failed to then we don't have enough space for the EDNS so we'll just have to omit it + log::debug(logcat, "Unable to fit EDNS additional into DNS response!"); + buf = std::span{tmp.data() + initial_len, tmp.size() - initial_len}; + buf_offset = initial_len; + } } } - for (auto* as : {&m.answers, &m.authorities, &m.additional}) - if (!as->empty()) - log::debug(logcat, "Ignoring answer/authorities/additional sections in dns Message"); - return maybe; - } + // Trim the excess: + tmp.resize(tmp.size() - buf.size()); + tmp.shrink_to_fit(); - nlohmann::json Message::ToJSON() const - { - auto result = nlohmann::json{{"id", hdr_id}, {"fields", hdr_fields}}; - auto& ques = (result["questions"] = nlohmann::json::array()); - auto& ans = (result["answers"] = nlohmann::json::array()); - for (const auto& q : questions) - ques.push_back(q.ToJSON()); - for (const auto& a : answers) - ans.push_back(a.ToJSON()); - return result; + return tmp; } - std::vector Message::encode() const + static std::array make_server_cookie( + std::span client_cookie, + std::span client_ip, + std::span server_cookie_secret, + std::chrono::sys_seconds ts = std::chrono::floor(std::chrono::system_clock::now())) { - std::vector tmp; - tmp.resize(1500); - auto size = encode(tmp); - if (size == 0) - throw std::runtime_error("cannot encode dns message"); - tmp.resize(size); - return tmp; + assert(client_ip.size() == 4 || client_ip.size() == 16); + + static_assert(server_cookie_secret.size() == crypto_shorthash_siphash24_KEYBYTES); + + std::array cookie; + auto ccookie = std::span{cookie}.first<8>(); + auto scookie = std::span{cookie}.last<16>(); + std::memcpy(ccookie.data(), client_cookie.data(), 8); + + // The first 8 bytes of the server cookie (as per RFC 9018) are: + // - version (always 1) + // - three reserved bytes + // - 4-byte, uint32 unix timestamp + scookie[0] = std::byte{1}; // Version + scookie[1] = std::byte{0}; // - + scookie[2] = std::byte{0}; // - reserved + scookie[3] = std::byte{0}; // - + auto ts_val = static_cast(ts.time_since_epoch().count()); + oxenc::write_host_as_big(ts_val, &scookie[4]); + + // The last 8 bytes of the server cookie are a hash of 8-byte client + // cookie, then the above 8 bytes server cookie fields, then the + // 4- or 16-byte client IP (in network order notation). + std::array hash_data{{0}}; + std::memcpy(hash_data.data(), ccookie.data(), 8); + std::memcpy(hash_data.data() + 8, scookie.data(), 8); + std::memcpy(hash_data.data() + 16, client_ip.data(), client_ip.size()); + crypto_shorthash_siphash24( + reinterpret_cast(scookie.data() + 8), + hash_data.data(), + 16 + client_ip.size(), + reinterpret_cast(server_cookie_secret.data())); + + return cookie; } - void Message::add_serv_fail() + std::optional Message::extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_ip) { - if (questions.size()) + if (client_ip.size() != 4 && client_ip.size() != 16) + throw std::logic_error{"Invalid client IP for Message::extract_question"}; + auto result = std::make_optional(); + auto& m = *result; + uint16_t qd_count, an_count, ns_count, ar_count; + if (!extract_ints(buf, m.hdr_id, m.hdr_fields, qd_count, an_count, ns_count, ar_count)) { - hdr_fields |= flags_RCODEServFail; - // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; + result.reset(); + return result; } - } + if (qd_count > 1) + { + log::warning(logcat, "Ignoring archaic DNS request with {} > 1 questions", qd_count); + m.bad_extract = true; + return result; + } + // Ignore these: + // m.answers.resize(an_count); + // m.authorities.resize(ns_count); + // m.additional.resize(ar_count); - static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; + try + { + if (qd_count) + { + auto& q = m.question.emplace(); + if (!q.extract(buf)) + throw std::invalid_argument{"invalid question"}; + } - void Message::add_reply(ipv4 addr, std::chrono::seconds ttl) - { - std::vector a; - a.resize(4); - oxenc::write_host_as_big(addr.addr, a.data()); - add_reply(RRClass::IN, RRType::A, std::move(a), ttl); - } + // Skip any answers or authority records: + for (uint16_t i = 0; i < an_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid answer RR"}; + for (uint16_t i = 0; i < ns_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid authority RR"}; - void Message::add_reply(ipv6 addr, std::chrono::seconds ttl) - { - std::vector aaaa; - aaaa.resize(16); - oxenc::write_host_as_big(addr.hi, aaaa.data()); - oxenc::write_host_as_big(addr.lo, aaaa.data() + 8); - return add_reply(RRClass::IN, RRType::AAAA, std::move(aaaa), ttl); + // In the additional section we look for an EDNS entry, and skip anything else: + for (uint16_t i = 0; i < ar_count; i++) + { + static_assert(crypto_shorthash_siphash24_KEYBYTES == 16); + auto a_rr = ParsedRR::extract(buf); + if (!a_rr) + throw std::invalid_argument{"invalid additional RR"}; + if (a_rr->name != "." || a_rr->rr_type != RRType::OPT) + { + continue; + } + + if (m.additional_edns) + throw std::invalid_argument{"found invalid multiple additional OPT records"}; + + auto max_payload = static_cast(a_rr->rr_class); + m.additional_edns.emplace(std::min(max_payload, 1232)); + + std::optional> cookie; + for (auto optbuf = a_rr->rdata; !optbuf.empty();) + { + if (optbuf.size() < 4) + throw std::invalid_argument{"additional OPT data section too small"}; + auto opt_code = oxenc::load_big_to_host(optbuf.data()); + auto opt_len = oxenc::load_big_to_host(optbuf.data() + 2); + optbuf = optbuf.subspan(4); + if (opt_len > optbuf.size()) + throw std::invalid_argument{"additional OPT option value length too small"}; + auto value = optbuf.subspan(0, opt_len); + optbuf = optbuf.subspan(opt_len); + + if (opt_code == PRR_EDNS::OPT_COOKIE) + { + if (m.additional_edns->cookie) + throw std::invalid_argument{"Duplicate OPT client cookies"}; + + if (value.size() == 8) + { + // This is the client sending a new cookie, requesting a new server + // cookie (i.e. because it doesn't currently have one). + + m.additional_edns->cookie = + make_server_cookie(value.first<8>(), client_ip, server_cookie_secret); + } + else if (value.size() == 24) + { + // This is the client sending its cookie along with a previously + // obtained server cookie for that client cookie, so we are supposed + // to validate it. + auto ccookie = value.first<8>(); + auto scookie = value.last<16>(); + + std::chrono::sys_seconds ts{ + std::chrono::seconds{oxenc::load_big_to_host(&scookie[4])}}; + + auto expected = make_server_cookie(ccookie, client_ip, server_cookie_secret, ts); + bool bad_cookie = std::memcmp(value.data(), expected.data(), 24) != 0; + + auto now = std::chrono::floor(std::chrono::system_clock::now()); + + if (!bad_cookie && ts >= now - 30min && ts <= now + 5min) + // Cookie is good and the timestamp in it is close to now, so the + // cookie stays as-is. + std::memcpy(m.additional_edns->cookie.emplace().data(), value.data(), 24); + + else + { + // If the cookie timestamp is too far away then it is a badcookie + // failure. (We don't have to worry about client clock skew because + // supposedly *we* issued this with the timestamp in it). + if (bad_cookie || ts < now - 1h || ts > now + 5min) + { + // When this is set we'll send a proper bad cookie response + // immediately after parsing: + m.additional_edns->bad_cookie = true; + // Extended rcode is, um, a wee bit hacky: we put the high 8 + // bits of the 12-bit error code into the OPT TTL field, and + // then continue to use the 4-bit RCODE for the bottom 4 bits. + m.additional_edns->ttl = + std::chrono::seconds{(uint32_t{PRR_EDNS::EXT_RCODE_BADCOOKIE} >> 4) << 24}; + // (The other bytes are all 0 values) + } + + // else it's valid, just a little bit (but not too) old and they are + // due for a new cookie. + + // In either of the above cases, we give the client a new cookie + // to use, with an updated new timestamp + m.additional_edns->cookie = + make_server_cookie(ccookie, client_ip, server_cookie_secret, now); + } + } + // Else we have an unparseable/non-understood cookie, and so we are supposed + // to ignore the option and discard the cookie data. + } + } + } + } + catch (const std::exception& e) + { + log::debug(logcat, "failed to parse DNS message: {}", e.what()); + m.bad_extract = true; + } + + return result; } void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } - void Message::add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl) + // TODO FIXME: "RA" means we advertise that we support recursion, but we should only do that + // when we have an upstream DNS server available. (This TODO is also in server.cpp) + static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; + + void Message::add_nodata_reply() { - if (questions.empty()) + if (question) + hdr_fields |= reply_flags; + } + + template RR, typename... Args> + void make_reply(Message& m, std::chrono::seconds ttl, Args&&... args) + { + if (!m.question) return; - hdr_fields |= reply_flags; + m.hdr_fields |= reply_flags; - auto& ans = answers.emplace_back(); - ans.rr_name = get_rr_name(); - ans.rr_type = type; - ans.rr_class = cls; - ans.ttl = ttl; - ans.rData = std::move(data); + m.answers.push_back(std::make_unique(std::string{m.get_rr_name()}, ttl, std::forward(args)...)); } - void Message::add_nodata_reply() - { - if (not questions.empty()) - hdr_fields |= reply_flags; - } + void Message::add_reply(const ipv4& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } + + void Message::add_reply(const ipv6& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } void Message::add_cname_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::CNAME, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode CNAME value {}", name); + make_reply(*this, ttl, std::string{name}); } void Message::add_ptr_reply(std::string_view name, std::chrono::seconds ttl) { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::PTR, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode PTR value {}", name); + make_reply(*this, ttl, std::string{name}); } - void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) - { - std::array tmp; - std::span remaining{tmp}; - if (!write_ints_into(remaining, srv.priority, srv.weight, srv.port)) - return; - if (!write_name_into(remaining, srv.target)) - return; + void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) { make_reply(*this, ttl, srv); } - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); - } + void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) { make_reply(*this, ttl, txt); } - void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) + Message&& Message::apply_rcode(uint16_t rcode, bool authoritative) { - std::array tmp; - std::span remaining{tmp}; - while (!txt.empty()) + hdr_fields = set_rcode(hdr_fields, rcode); + if (question) { - auto piecelen = std::min(txt.size(), size_t{255}); - if (remaining.size() <= piecelen) - throw std::length_error{"TXT record too big"}; - remaining.front() = static_cast(piecelen); - std::memcpy(remaining.data() + 1, txt.data(), piecelen); - txt.remove_prefix(piecelen); - remaining = remaining.subspan(1 + piecelen); + hdr_fields |= reply_flags; + if (authoritative) + hdr_fields |= flags_AA; + else + hdr_fields &= ~flags_AA; } - - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); + return std::move(*this); } - void Message::add_nx_reply() + Message&& Message::servfail() { - if (questions.size()) - { - answers.clear(); - authorities.clear(); - additional.clear(); - - // authorative response with recursion available - hdr_fields |= reply_flags; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - hdr_fields |= flags_RCODENxDomain; - } + answers.clear(); + return apply_rcode(RCODE_ServFail); } - std::string Message::to_string() const + Message&& Message::formerr() { - return fmt::format( - "[DNSMessage id={:x} fields={:x} questions={{{}}} answers={{{}}} authorities={{{}}} " - "additional={{{}}}]", - hdr_id, - hdr_fields, - fmt::join(questions, ","), - fmt::join(answers, ","), - fmt::join(authorities, ","), - fmt::join(additional, ",")); + answers.clear(); + return apply_rcode(RCODE_FormErr); } + Message&& Message::nxdomain(bool authoritative) { return apply_rcode(RCODE_NxDomain, authoritative); } + } // namespace srouter::dns diff --git a/src/dns/message.hpp b/src/dns/message.hpp index b12369091..99fb1d86c 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -4,8 +4,6 @@ #include "question.hpp" #include "rr.hpp" -#include - #include namespace srouter @@ -19,15 +17,37 @@ namespace srouter struct Message { Message() = default; - explicit Message(const Question& question); + explicit Message(Question question); - nlohmann::json ToJSON() const; + // Non-copyable; see clone() if you want a copy with just the question. + Message(const Message&) = delete; - static constexpr auto DEFAULT_ANSWER_TTL = 10s; + Message(Message&&) = default; - void add_nx_reply(); + // Clones the message with question/flags/edns response data, but with no answers + Message clone() const; - void add_serv_fail(); + static constexpr auto DEFAULT_ANSWER_TTL = 10s; + + // These two methods mutates the message into a SERVFAIL or FORMERR response, clearing + // all answers. These return an value reference to the object itself to allow the call + // to operator like an implicit `std::move()` call as this is typically a final + // operation; in particular this means: `f(msg.nxdomain());` is equivalent to + // `msg.nxdomain(); f(std::move(msg));`. + Message&& servfail(); + Message&& formerr(); + + // Mutate message into a NXDOMAIN but without clearing existing answers. Returns an + // rvalue reference to the current object to allow the result to be easily moved away. + // + // The message with include the authoritative flag (AA) if the argument is omitted (or + // true), and omit it if false. + Message&& nxdomain(bool authoritative = true); + + // This clears any answers and sets the appropriate header flags for a BADCOOKIE + // response. Note that this is only valid when the message has `additional_edns` as + // part of this error code value is carried in that additional RR data. + void set_badcookie_flags(); // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the @@ -35,7 +55,7 @@ namespace srouter void set_rr_name(std::optional name); std::string_view get_rr_name() const { - return rr_name_override ? *rr_name_override : questions.size() ? questions.front().qname : ""sv; + return rr_name_override ? *rr_name_override : question ? question->qname : ""sv; } void add_nodata_reply(); @@ -43,9 +63,9 @@ namespace srouter void add_cname_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN A' reply containing the given ipv4 address - void add_reply(ipv4 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv4& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN AAAA' reply containing the given ipv6 address - void add_reply(ipv6 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv6& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); void add_reply(const SRVData& srv, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); @@ -53,23 +73,51 @@ namespace srouter void add_ptr_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); - size_t encode(std::span buf) const; - std::vector encode() const; - - static std::optional extract(std::span& buf); + // Encodes a response. If max_size is true then we allow up to 65535 bytes for the + // response, otherwise we allow either the EDNS max payload (up to 1232), or 512 + // (without EDNS in the query). + std::vector encode(bool max_size = false) const; + + // Parses a question Message from the given buf, removing the question from the prefix + // of buf. `server_cookie_secret` and `client_addr` contains information needed for DNS + // cookie handling; `server_cookie_secret` is something derived from the SR private key + // seed + startup time, while client_addr is the raw bytes of the IP address (4 or 16 + // bytes for IPv4/IPv6, respectively). + // + // Returns nullopt if the request cannot be parsed at all; returns a Message with + // `bad_extract` set to true if it was parseable but not valid and should be immediately + // replied to with an error (which will already be set up in the returned Message + // object). + static std::optional extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_addr); + + // See extract_question, above. + bool bad_extract{false}; std::string to_string() const; uint16_t hdr_id; uint16_t hdr_fields; - std::vector questions; - std::vector answers; - std::vector authorities; - std::vector additional; + + std::optional question; + std::vector> answers; + + // Currently unused: + // std::vector authorities; + // std::vector additional; + + // Currently the only additional record we do anything with is the OPT section for + // enabling EDNS (most significantly for allowing large DNS packets) + std::optional additional_edns; + std::optional rr_name_override; private: void add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl); + + Message&& apply_rcode(uint16_t rcode, bool authoritative = false); }; } // namespace dns diff --git a/src/dns/question.cpp b/src/dns/question.cpp index 0473e6243..60dee9e57 100644 --- a/src/dns/question.cpp +++ b/src/dns/question.cpp @@ -1,7 +1,6 @@ #include "question.hpp" #include "address/address.hpp" -#include "dns.hpp" #include "encode.hpp" #include "util/logging.hpp" #include "util/logging/buffer.hpp" @@ -19,14 +18,10 @@ namespace srouter::dns throw std::invalid_argument{"qname cannot be empty"}; } - size_t Question::encode(std::span buf) const + void Question::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf; - if (!write_name_into(buf, qname)) - return 0; - if (!write_ints_into(buf, static_cast(qtype), static_cast(qclass))) - return 0; - return orig.size() - buf.size(); + encode_name(buf, qname, prev_names, buf_offset); + buf_offset += write_ints_into(buf, static_cast(qtype), static_cast(qclass)); } bool Question::extract(std::span& buf) diff --git a/src/dns/question.hpp b/src/dns/question.hpp index 1647b6a81..4efc35b5d 100644 --- a/src/dns/question.hpp +++ b/src/dns/question.hpp @@ -11,7 +11,7 @@ namespace srouter::dns Question() = default; Question(std::string name, RRType type); - size_t encode(std::span buf) const; + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; bool extract(std::span& buf); diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index fcd1070a8..58d0dacfc 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -1,45 +1,145 @@ #include "rr.hpp" -#include "dns.hpp" #include "encode.hpp" #include #include +#include +#include namespace srouter::dns { - ResourceRecord::ResourceRecord(std::string name, RRType type, std::vector data) - : rr_name{std::move(name)}, rr_type{type}, rr_class{RRClass::IN}, ttl{1s}, rData{std::move(data)} - {} + std::optional ParsedRR::extract(std::span& buf) + { + auto name = extract_name(buf); + if (!name || buf.size() < 2 + 2 + 4 + 2 /* type + class + ttl + rdatalen */) + return std::nullopt; + auto typ = oxenc::load_big_to_host(buf.data()); + auto cls = oxenc::load_big_to_host(buf.data() + 2); + auto ttl = oxenc::load_big_to_host(buf.data() + 4); + auto len = oxenc::load_big_to_host(buf.data() + 8); + buf = buf.subspan(10); + + if (buf.size() < len) + return std::nullopt; + + auto rdata = buf.subspan(0, len); + buf = buf.subspan(len); + + return ParsedRR{ + .name = std::move(*name), + .rr_type = static_cast(typ), + .rr_class = static_cast(cls), + .ttl = std::chrono::seconds{ttl}, + .rdata = rdata}; + } + + static void check_buf_size(const std::span& buf, size_t needed) + { + if (buf.size() < needed) + throw std::out_of_range{"DNS response exceeds max size"}; + } - size_t ResourceRecord::encode(std::span buf) const + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf.size(); - if (write_name_into(buf, rr_name) - && write_ints_into( - buf, - static_cast(rr_type), - static_cast(rr_class), - static_cast(ttl.count())) - && write_rdata_into(buf, rData)) - return orig - buf.size(); - return 0; + encode_name(buf, rr_name, prev_names, buf_offset); + buf_offset += write_ints_into( + buf, static_cast(rr_type()), static_cast(rr_class), static_cast(ttl.count())); + // The RR value is in a chunk with a 2-byte length in front of it. We don't actually know + // the length yet (especially for things like CNAME, where there might be name compression), + // so we're going to stick a 0 in and then come back and fill it in after we write the + // value. + check_buf_size(buf, 2); + auto size_buf = buf.subspan(0, 2); + buf_offset += 2; + buf = buf.subspan(2); + encode_data(buf, prev_names, buf_offset); + uint16_t size = buf.data() - size_buf.data() - 2; + oxenc::write_host_as_big(size, size_buf.data()); } nlohmann::json ResourceRecord::ToJSON() const { return nlohmann::json{ {"name", rr_name}, - {"type", static_cast(rr_type)}, + {"type", static_cast(rr_type())}, {"class", static_cast(rr_class)}, {"ttl", ttl.count()}, - {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}}; + /* FIXME: need to virtualize a display for the data, if we care about json representation: + {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}*/}; } std::string ResourceRecord::to_string() const { - return "RR:[ name:{} | type:{} | class:{} | ttl:{} | rdata-size:{} ]"_format( - rr_name, static_cast(rr_type), static_cast(rr_class), ttl, rData.size()); + return "RR:[name:{}|type:{}|class:{}|ttl:{}]"_format( + rr_name, static_cast(rr_type()), static_cast(rr_class), ttl); + } + + void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + check_buf_size(buf, rData.size()); + std::memcpy(buf.data(), rData.data(), rData.size()); + buf = buf.subspan(rData.size()); + buf_offset += rData.size(); + } + + RR_A::RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr) : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(4); + oxenc::write_host_as_big(addr.addr, rData.data()); + } + + RR_AAAA::RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr) + : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(16); + oxenc::write_host_as_big(addr.hi, rData.data()); + oxenc::write_host_as_big(addr.lo, rData.data() + 8); + } + + RR_TXT::RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value) + : RR_bytes{std::move(rr_name), ttl} + { + // TXT records get encoded as {SZ}{data} where SZ is one byte indicating the length of data, + // however they can be repeated which is why we have SZ twice: once for the rData, but then + // again internally for multiple possible strings within the answer. + // + // In total that means we are storing the value length plus 1 additional byte per 255 length + // (or part thereof): + rData.reserve(value.size() + (value.size() + 254) / 255); + do + { + auto* bytes = reinterpret_cast(value.data()); + size_t size = std::min(255, value.size()); + rData.push_back(static_cast(size)); + rData.insert(rData.end(), bytes, bytes + size); + value.remove_prefix(size); + } while (!value.empty()); + } + + void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + encode_name(buf, name, prev_names, buf_offset); + } + + void RR_SRV::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + buf_offset += write_ints_into(buf, priority, weight, port); + encode_name(buf, target, prev_names, buf_offset); + } + + void PRR_EDNS::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + if (cookie) + { + uint16_t datalen = 2 + 2 + cookie->size(); // code + length + data + check_buf_size(buf, datalen); + oxenc::write_host_as_big(OPT_COOKIE, buf.data()); + oxenc::write_host_as_big(static_cast(cookie->size()), buf.data() + 2); + std::memcpy(buf.data() + 4, cookie->data(), cookie->size()); + buf = buf.subspan(datalen); + buf_offset += datalen; + } } } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 798a624a4..cd94b83c2 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -1,5 +1,8 @@ #pragma once +#include "encode.hpp" +#include "srv_data.hpp" + #include #include @@ -15,34 +18,166 @@ namespace srouter::dns enum class RRType : uint16_t { A = 1, - NS = 2, CNAME = 5, PTR = 12, - MX = 15, TXT = 16, AAAA = 28, SRV = 33, + + OPT = 41, + }; + + // Parsed RR data: this is intentionally very raw and is only for extracting the data, not + // interpreting it. Note that the rdata value points into the input buf: the ParsedRR data + // should not be held longer than the input buffer! + struct ParsedRR + { + std::string name; + RRType rr_type; // *Not* necessarily one of the values defined above + RRClass rr_class; // *Not* necessarily one of the values defined above + std::chrono::seconds ttl; + std::span rdata; + + // Attempts to parse an RR from the beginning of `buf`. `buf` will have the prefix removed + // containing the extracted record. Returns nullopt on extraction error. + static std::optional extract(std::span& buf); }; + // Abstract base class we use for building RR responses struct ResourceRecord { - ResourceRecord() = default; - explicit ResourceRecord(std::string name, RRType type, std::vector rdata); + ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} + + virtual ~ResourceRecord() = default; + + // Writes this RR to the beginning of buf, eliminating the written section from buf. Throws if buf is exceeded. + // + // This takes care of the basic stuff (name, type, class, ttl), then calls the virtual + // encode_data() to write the value. + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; - // Writes this RR to the beginning of buf. Returns the number of bytes written, or 0 if the - // buffer is too small to hold it. - size_t encode(std::span buf) const; + virtual void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const = 0; nlohmann::json ToJSON() const; std::string to_string() const; std::string rr_name; - RRType rr_type; - RRClass rr_class; + RRClass rr_class = RRClass::IN; std::chrono::seconds ttl; - std::vector rData; + + virtual RRType rr_type() const = 0; static constexpr bool to_string_formattable = true; }; + + // Subclass of ResourceRecord that just has a binary check of data. Should not be used for data + // types containing compressible names in the value. The subclass must take care of encoding + // the rData member value as required; this base class encode_data simply barfs it into the + // buffer as-is. + struct RR_bytes : ResourceRecord + { + std::vector rData; + + using ResourceRecord::ResourceRecord; + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_A : RR_bytes + { + RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr); + RRType rr_type() const override { return RRType::A; } + }; + struct RR_AAAA : RR_bytes + { + RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr); + RRType rr_type() const override { return RRType::AAAA; } + }; + struct RR_TXT : RR_bytes + { + RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value); + RRType rr_type() const override { return RRType::TXT; } + }; + + // Base class for RR types that have a single target name as the value, such as CNAME and PTR + struct RR_target : ResourceRecord + { + std::string name; + + RR_target(std::string rr_name, std::chrono::seconds ttl, std::string name) + : ResourceRecord{std::move(rr_name), ttl}, name{std::move(name)} + {} + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_PTR : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::A; } + }; + struct RR_CNAME : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::CNAME; } + }; + struct RR_SRV : ResourceRecord + { + uint16_t priority; + uint16_t weight; + uint16_t port; + std::string target; + + RR_SRV(std::string rr_name, std::chrono::seconds ttl, const SRVData& srv) + : ResourceRecord{std::move(rr_name), ttl}, + priority{srv.priority}, + weight{srv.weight}, + port{srv.port}, + target{srv.target} + {} + + RRType rr_type() const override { return RRType::SRV; } + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + // Psuedo-RR for EDNS; a client sends this in the additional section if it supports EDNS, and + // the server sends it back (if provided) to confirm that the server also supports EDNS. + struct PRR_EDNS : ResourceRecord + { + static constexpr uint16_t OPT_COOKIE = 10; + static constexpr uint16_t EXT_RCODE_BADCOOKIE = 23; + + std::optional> cookie; + + // Will be true if the full cookie we were provided was invalid or expired, in which case we + // are supposed to immediately fail with an extended BADCOOKIE error code (which will be + // encoded if this object is encoded into the output with this bool set to true). + bool bad_cookie{false}; + + // Constructs an EDNS value. This is rather hacky, to try to mash it into the fairly + // inflexible older DNS protocol: + // - NAME is always empty (i.e. ".", the root domain) + // - 32-bit TTL is nothing to do with ttl, but actually 3 packed fields: + // - 8-bit "extended rcode" + // - 8-bit version (currently 0) + // - 16-bit flags of which there is one for DNSSEC and all others are reserved + // We currently always use 0 as we don't use extended rcode or dnssec. + // - CLASS isn't a class at all but rather contains the supported UDP payload size. We set + // it to the recommended 1232 size, but if a client gave us a smaller value we should + // reflect that instead. + // + // Beyond that, we support an optional DNS server cookie value (see RFC 7873 and 9018), + // which must be the 8-byte cookie sent by the client followed by a 16 byte server cookie. + PRR_EDNS(uint16_t max_payload, std::optional> cookie = std::nullopt) + : ResourceRecord{"", 0s}, cookie{std::move(cookie)} + { + rr_class = static_cast(max_payload); + } + + uint16_t max_payload() const { return static_cast(rr_class); } + constexpr RRType rr_type() const override { return RRType::OPT; } + void encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const override; + }; + } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp deleted file mode 100644 index 2395538fb..000000000 --- a/src/dns/server.cpp +++ /dev/null @@ -1,705 +0,0 @@ -#include "server.hpp" - -#include "constants/apple.hpp" -#include "constants/platform.hpp" -#include "message.hpp" -#include "nm_platform.hpp" -#include "sd_platform.hpp" - -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace srouter::dns -{ - static auto logcat = log::Cat("dns"); - - void QueryJob_Base::cancel() - { - Message reply{_query}; - reply.add_serv_fail(); - send_reply(reply.encode()); - } - - /// sucks up udp packets from a bound socket and feeds it to a server - class UDPReader : public PacketSource, public std::enable_shared_from_this - { - Server& _dns; - std::unique_ptr _udp; - quic::Address _local_addr; - - public: - explicit UDPReader(Server& dns, quic::Loop& loop, quic::Address bind) : _dns{dns} - { - _udp = std::make_unique( - loop.get_event_base(), bind, /*gso=*/false, [this](quic::Packet&& pkt) { - auto& src = pkt.path.remote; // "remote" address is packet source, we ("local") are destination - if (src == _local_addr) - { - log::debug(logcat, "DNS packet received, not handling because we're the packet source", src); - return; - } - - if (not _dns.maybe_handle_payload(shared_from_this(), _local_addr, src, pkt.data())) - log::warning(logcat, "did not handle dns packet from {} to {}", src, _local_addr); - else - log::trace(logcat, "Handled DNS packet from {} to {}", src, _local_addr); - }); - - if (auto maybe_addr = bound_on()) - { - _local_addr = *maybe_addr; - log::debug(logcat, "session-router DNS server bound on {}", _local_addr); - } - else - throw std::runtime_error{"cannot find which address our dns socket is bound on"}; - } - - std::optional bound_on() const override { return _udp->address(); } - - bool would_loop(const quic::Address& to, const quic::Address& /*from*/) const override - { - return to != _local_addr; - } - - void send_udp(const quic::Address& to, const quic::Address&, std::span data) const override - { - const size_t bufsize = data.size(); - size_t n_pkts = 1; - auto [ior, sent] = _udp->send(quic::Path{_local_addr, to}, data.data(), &bufsize, 0, n_pkts); - - log::trace( - logcat, - "dns server {} UDP packet to {} (ec={})", - ior.success() ? "sent" : "failed to send", - to, - ior.error_code); - } - }; - - namespace libunbound - { - class Resolver; - - class Query : public QueryJob_Base, public std::enable_shared_from_this - { - std::shared_ptr src; - quic::Address resolverAddr; - quic::Address askerAddr; - - public: - explicit Query( - std::weak_ptr parent_, - Message query, - std::shared_ptr pktsrc, - quic::Address toaddr, - quic::Address fromaddr) - : QueryJob_Base{std::move(query)}, - src{std::move(pktsrc)}, - resolverAddr{std::move(toaddr)}, - askerAddr{std::move(fromaddr)}, - parent{parent_} - {} - std::weak_ptr parent; - int id{}; - - void send_reply(std::vector buf) override; - }; - - /// Resolver_Base that uses libunbound - class Resolver final : public Resolver_Base, public std::enable_shared_from_this - { - ub_ctx* m_ctx = nullptr; - quic::Loop& _loop; -#ifdef _WIN32 - // windows is dumb so we do ub mainloop in a thread - std::thread runner; - std::atomic running; -#else - // std::shared_ptr _poller; -#endif - - std::optional _local_addr; - std::unordered_set> _pending; - - struct ub_result_deleter - { - void operator()(ub_result* ptr) { ::ub_resolve_free(ptr); } - }; - - const net::Platform* net_ptr() const { return srouter::net::Platform::Default_ptr(); } - - static void callback(void* data, int err, ub_result* _result) - { - log::debug(logcat, "got dns response from libunbound"); - // take ownership of ub_result - std::unique_ptr result{_result}; - // borrow query - auto* query = static_cast(data); - if (err) - { - // some kind of error from upstream - log::warning(logcat, "Upstream DNS failure: {}", ub_strerror(err)); - query->cancel(); - return; - } - - log::trace(logcat, "queueing dns response from libunbound to userland"); - - auto* ans = reinterpret_cast(result->answer_packet); - std::vector payload{ans, ans + result->answer_len}; - // Replace the `id` value in the unbound response (which is the first 2 bytes of the - // message) with the one we were queried with: - oxenc::write_host_as_big(query->underlying().hdr_id, payload.data()); - - // send reply - query->send_reply(std::move(payload)); - } - - void add_upstream_resolver(const quic::Address& dns) - { - auto str = "{}@{}"_format(dns.host(), dns.port()); - - if (auto err = ub_ctx_set_fwd(m_ctx, str.c_str())) - { - throw std::runtime_error{fmt::format("cannot use {} as upstream dns: {}", str, ub_strerror(err))}; - } - } - - bool configure_apple_trampoline(const quic::Address& dns) - { - // On Apple, when we turn on exit mode, we tear down and then reestablish the - // unbound resolver: in exit mode, we set use upstream to a localhost trampoline - // that redirects packets through the tunnel. In non-exit mode, we directly use the - // upstream, so we look here for a reconfiguration to use the trampoline port to - // check which state we're in. - // - // We have to do all this crap because we can't directly connect to upstream from - // here: within the network extension, macOS ignores the tunnel we are managing and - // so, if we didn't do this, all our DNS queries would leak out around the tunnel. - // Instead we have to bounce things through the objective C trampoline code (which - // is what actually handles the upstream querying) so that it can call into Apple's - // special snowflake API to set up a socket that has the magic Apple snowflake sauce - // added on top so that it actually routes through the tunnel instead of around it. - // - // But the trampoline *always* tries to send the packet through the tunnel, and that - // will only work in exit mode. - // - // All of this macos behaviour is all carefully and explicitly documented by Apple - // with plenty of examples and other exposition, of course, just like all of their - // wonderful new APIs to reinvent standard unix interfaces with half-baked - // replacements. - - if constexpr (platform::is_apple) - { - if (dns.host() == "127.0.0.1" and dns.port() == apple::dns_trampoline_port) - { - // macOS is stupid: the default (0.0.0.0) fails with "send failed: Can't - // assign requested address" when unbound tries to connect to the localhost - // address using a source address of 0.0.0.0. Yay apple. - set_opt("outgoing-interface:", "127.0.0.1"); - - // The trampoline expects just a single source port (and sends everything - // back to it). - set_opt("outgoing-range:", "1"); - set_opt("outgoing-port-avoid:", "0-65535"); - set_opt("outgoing-port-permit:", "{}"_format(apple::dns_trampoline_source_port)); - return true; - } - } - return false; - } - - void configure_upstream(const srouter::DnsConfig& conf) - { - bool is_apple_tramp = false; - - // set up forward dns - for (const auto& dns : conf._upstream_dns) - { - add_upstream_resolver(dns); - is_apple_tramp = is_apple_tramp or configure_apple_trampoline(dns); - } - - if (auto maybe_addr = conf._query_bind; maybe_addr and not is_apple_tramp) - { - quic::Address addr{*maybe_addr}; - auto host = addr.host(); - - if (addr.port() == 0) - { - // unbound manages their own sockets because of COURSE it does. so we find - // an open port on our system and use it so we KNOW what it is before giving - // it to unbound to explicitly bind to JUST that port. - - auto fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); -#ifdef _WIN32 - if (fd == INVALID_SOCKET) -#else - if (fd == -1) -#endif - { - throw std::invalid_argument{ - fmt::format("Failed to create UDP socket for unbound: {}", strerror(errno))}; - } - -#ifdef _WIN32 -#define CLOSE closesocket -#else -#define CLOSE close -#endif - if (0 != bind(fd, static_cast(addr), addr.socklen())) - { - CLOSE(fd); - throw std::invalid_argument{ - fmt::format("Failed to bind UDP socket for unbound: {}", strerror(errno))}; - } - struct sockaddr_storage sas; - auto* sa = reinterpret_cast(&sas); - socklen_t sa_len = sizeof(sas); - int rc = getsockname(fd, sa, &sa_len); - CLOSE(fd); -#undef CLOSE - if (rc != 0) - { - throw std::invalid_argument{ - fmt::format("Failed to query UDP port for unbound: {}", strerror(errno))}; - } - - addr = quic::Address{sa, sizeof(sockaddr)}; - } - _local_addr = addr; - - log::debug(logcat, "sending dns queries from {}", addr.to_string()); - // set up query bind port if needed - set_opt("outgoing-interface:", host); - set_opt("outgoing-range:", "1"); - set_opt("outgoing-port-avoid:", "0-65535"); - set_opt("outgoing-port-permit:", "{}"_format(addr.port())); - } - } - - void set_opt(const std::string& key, const std::string& val) - { - ub_ctx_set_option(m_ctx, key.c_str(), val.c_str()); - } - - // Copy of the DNS config (a copy because on some platforms, like Apple, we change the - // applied upstream DNS settings when turning on/off exit mode). - srouter::DnsConfig m_conf; - - public: - explicit Resolver(quic::Loop& loop, srouter::DnsConfig conf) : _loop{loop}, m_conf{std::move(conf)} - { - up(m_conf); - } - - ~Resolver() override { down(); } - - std::string_view resolver_name() const override { return "unbound"; } - - std::optional get_local_addr() const override { return _local_addr; } - - void remove_pending(const std::shared_ptr& query) { _pending.erase(query); } - - void up(const srouter::DnsConfig& conf) - { - if (m_ctx) - throw std::logic_error{"Internal error: attempt to Up() dns server multiple times"}; - - m_ctx = ::ub_ctx_create(); - // set libunbound settings - - set_opt("do-tcp:", "no"); - - for (const auto& [k, v] : conf.extra_opts) - set_opt(k, v); - - // add host files - for (const auto& file : conf.hostfiles) - { - const auto str = file.string(); - if (auto ret = ub_ctx_hosts(m_ctx, str.c_str())) - { - throw std::runtime_error{fmt::format("Failed to add host file {}: {}", file, ub_strerror(ret))}; - } - } - - configure_upstream(conf); - - // set async - ub_ctx_async(m_ctx, 1); - // setup mainloop -#ifdef _WIN32 - running = true; - runner = std::thread{[this]() { - while (running) - { - // poll and process callbacks it this thread - if (ub_poll(m_ctx)) - { - ub_process(m_ctx); - } - else // nothing to do, sleep. - std::this_thread::sleep_for(10ms); - } - }}; -#else - // TODO: replace uvw shim shit with new libev stuff - // if (auto loop_ptr = loop->MaybeGetUVWLoop()) - // { - // _poller = loop_ptr->resource(ub_fd(m_ctx)); - // _poller->on([this](auto&, auto&) { ub_process(m_ctx); }); - // _poller->start(uvw::PollHandle::Event::READABLE); - // return; - // } -#endif - } - - void down() override - { -#ifdef _WIN32 - if (running.exchange(false)) - { - log::debug(logcat, "shutting down win32 dns thread"); - runner.join(); - } -#else - // if (_poller) - // _poller->close(); -#endif - if (m_ctx) - { - ::ub_ctx_delete(m_ctx); - m_ctx = nullptr; - - // destroy any outstanding queries that unbound hasn't fired yet - if (not _pending.empty()) - { - log::debug(logcat, "cancelling {} pending queries", _pending.size()); - // We must copy because Cancel does a loop call to remove itself, but since - // we are already in the main loop it happens immediately, which would - // invalidate our iterator if we were looping through m_Pending at the time. - auto copy = _pending; - for (const auto& query : copy) - query->cancel(); - } - } - } - - int rank() const override { return 10; } - - void reset_resolver(std::optional> replace_upstream) override - { - down(); - if (replace_upstream) - m_conf._upstream_dns = std::move(*replace_upstream); - up(m_conf); - } - - template - void call(Callable&& f) - { - _loop.call(std::forward(f)); - } - - bool maybe_hook_dns( - const std::shared_ptr& source, - const Message& query, - const quic::Address& to, - const quic::Address& from) override - { - log::trace(logcat, "maybe_hook_dns called"); - auto tmp = std::make_shared(weak_from_this(), query, source, to, from); - // no questions, send fail - if (query.questions.empty()) - { - log::debug(logcat, "dns from {} to {} has empty query questions, sending failure reply", from, to); - tmp->cancel(); - return true; - } - - for (const auto& q : query.questions) - { - // dont process .sesh/.loki/.snode - if (q.has_tld(CLIENT_TLD) or q.has_tld(RELAY_TLD) or q.has_tld("loki")) - { - log::warning( - logcat, - "dns from {} to {} is for .{}/{}/loki but got to the unbound " - "resolver; sending failure reply", - from, - to, - CLIENT_TLD, - RELAY_TLD); - tmp->cancel(); - return true; - } - } - if (not m_ctx) - { - // we are down - log::debug( - logcat, - "dns from {} to {} got to the unbound resolver, but the resolver isn't set " - "up, sending failure reply", - from, - to); - tmp->cancel(); - return true; - } - -#ifdef _WIN32 - if (not running) - { - // we are stopping the win32 thread - log::debug( - logcat, - "dns from {} to {} got to the unbound resolver, but the resolver isn't " - "running, sending failure reply", - from, - to); - tmp->Cancel(); - return true; - } -#endif - const auto& q = query.questions[0]; - if (auto err = ub_resolve_async( - m_ctx, - std::string{q.name()}.c_str(), - static_cast(q.qtype), - static_cast(q.qclass), - tmp.get(), - &Resolver::callback, - nullptr)) - { - log::warning(logcat, "failed to send upstream query with libunbound: {}", ub_strerror(err)); - tmp->cancel(); - } - else - { - log::trace(logcat, "dns from {} to {} processing via libunbound", from, to); - _pending.insert(std::move(tmp)); - } - - return true; - } - }; - - void Query::send_reply(std::vector data) - { - log::trace(logcat, "Query::send_reply called"); - if (_done.test_and_set()) - return; - - auto parent_ptr = parent.lock(); - - if (parent_ptr) - { - parent_ptr->call( - [self = shared_from_this(), parent_ptr = std::move(parent_ptr), data = std::move(data)] { - log::trace( - logcat, - "forwarding dns response from libunbound to userland (resolverAddr: {}, " - "askerAddr: {})", - self->resolverAddr, - self->askerAddr); - self->src->send_udp(self->askerAddr, self->resolverAddr, data); - // remove query - parent_ptr->remove_pending(self); - }); - } - else - log::error(logcat, "no parent"); - } - } // namespace libunbound - - Server::Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif) - : _loop{loop}, _conf{std::move(conf)}, _platform{create_platform()}, m_NetIfIndex{std::move(netif)} - {} - - std::vector> Server::get_all_resolvers() const - { - return {_resolvers.begin(), _resolvers.end()}; - } - - void Server::start() - { - // set up udp sockets - for (const auto& addr : _conf._bind_addrs) - { - if (auto ptr = make_packet_source_on(addr, _conf)) - add_packet_source(std::move(ptr)); - } - - // add default resolver as needed - if (auto ptr = make_default_resolver()) - add_resolver(ptr); - - // FIXME: this should be handled by RoutePoker once it is resurrected, handling whether - // we eat all DNS traffic or just .sesh/.loki/.snode. For now, we only handle those. - set_dns_mode(false); - } - - std::shared_ptr Server::create_platform() const - { - auto plat = std::make_shared(); - if constexpr (srouter::platform::has_systemd) - { - plat->add_impl(std::make_unique()); - plat->add_impl(std::make_unique()); - } - return plat; - } - - std::shared_ptr Server::make_packet_source_on(const quic::Address& addr, const srouter::DnsConfig&) - { - return std::make_shared(*this, _loop, addr); - } - - std::shared_ptr Server::make_default_resolver() - { - if (_conf._upstream_dns.empty()) - { - log::debug( - logcat, - "explicitly no upstream dns providers specified, we will not resolve anything but " - ".{}/{}/loki", - CLIENT_TLD, - RELAY_TLD); - return nullptr; - } - - return std::make_shared(_loop, _conf); - } - - std::vector Server::bound_packet_source_addrs() const - { - std::vector addrs; - - for (const auto& src : _packet_sources) - { - if (auto ptr = src.lock()) - if (auto maybe_addr = ptr->bound_on()) - addrs.emplace_back(*maybe_addr); - } - return addrs; - } - - std::optional Server::first_bound_packet_source_addr() const - { - for (const auto& src : _packet_sources) - { - if (auto ptr = src.lock()) - if (auto bound = ptr->bound_on()) - return bound; - } - return std::nullopt; - } - - void Server::add_resolver(std::weak_ptr resolver) { _resolvers.insert(resolver); } - - void Server::add_resolver(std::shared_ptr resolver) - { - _owned_resolvers.insert(resolver); - add_resolver(std::weak_ptr{resolver}); - } - - void Server::add_packet_source(std::weak_ptr pkt) { _packet_sources.push_back(pkt); } - - void Server::add_packet_source(std::shared_ptr pkt) - { - add_packet_source(std::weak_ptr{pkt}); - _owned_packet_sources.push_back(std::move(pkt)); - } - - void Server::stop() - { - for (const auto& resolver : _resolvers) - { - if (auto ptr = resolver.lock()) - ptr->down(); - } - } - - void Server::reset() - { - for (const auto& resolver : _resolvers) - { - if (auto ptr = resolver.lock()) - ptr->reset_resolver(); - } - } - - void Server::set_dns_mode(bool all_queries) - { - if (auto maybe_addr = first_bound_packet_source_addr()) - _platform->set_resolver(m_NetIfIndex, *maybe_addr, all_queries); - } - - bool Server::maybe_handle_payload( - const std::shared_ptr& ptr, - const quic::Address& to, - const quic::Address& from, - std::span payload) - { - // dont process to prevent feedback loop - if (ptr->would_loop(to, from)) - { - log::warning(logcat, "preventing dns packet replay to={} from={}", to, from); - return false; - } - - auto maybe = Message::extract(payload); - if (not maybe) - { - log::warning(logcat, "invalid dns message format from {} to dns listener on {}", from, to); - return false; - } - - auto& msg = *maybe; - // we don't provide a DoH resolver because it requires verified TLS - // TLS needs X509/ASN.1-DER and opting into the Root CA Cabal - // thankfully mozilla added a backdoor that allows ISPs to turn it off - // so we disable DoH for firefox using mozilla's ISP backdoor - // see: https://github.com/oxen-io/lokinet/issues/832 - for (const auto& q : msg.questions) - { - // is this firefox looking for their backdoor record? - if (q.name() == "use-application-dns.net") - { - // yea it is, let's turn off DoH because god is dead. - msg.add_nx_reply(); - // press F to pay respects and send it back where it came from - ptr->send_udp(from, to, msg.encode()); - return true; - } - } - - if (_resolvers.empty()) - { - log::warning(logcat, "Trying to resolve DNS query, but we no resolver set up."); - return false; - } - for (const auto& resolver : _resolvers) - { - if (auto res_ptr = resolver.lock()) - { - log::trace(logcat, "check resolver {} for dns from {} to {}", res_ptr->resolver_name(), from, to); - if (res_ptr->maybe_hook_dns(ptr, msg, to, from)) - { - log::trace(logcat, "resolver {} handling dns from {} to {}", res_ptr->resolver_name(), from, to); - return true; - } - } - } - return false; - } - -} // namespace srouter::dns diff --git a/src/dns/server.hpp b/src/dns/server.hpp deleted file mode 100644 index a01b90757..000000000 --- a/src/dns/server.hpp +++ /dev/null @@ -1,196 +0,0 @@ -#pragma once - -#include "config/config.hpp" -#include "message.hpp" -#include "net/ip_packet.hpp" -#include "platform.hpp" -#include "util/compare_ptr.hpp" - -#include -#include - -#include -#include - -namespace srouter::dns -{ - /// a job handling 1 dns query - class QueryJob_Base - { - protected: - /// the original dns query - Message _query; - - /// True if we've sent a reply (including via a call to cancel) - std::atomic_flag _done = ATOMIC_FLAG_INIT; - - public: - explicit QueryJob_Base(Message query) : _query{std::move(query)} {} - - virtual ~QueryJob_Base() = default; - - Message& underlying() { return _query; } - - const Message& underlying() const { return _query; } - - /// cancel this operation and inform anyone who cares - void cancel(); - - /// send a raw buffer back to the querier - virtual void send_reply(std::vector buf) = 0; - }; - - class PacketSource - { - public: - /// stop reading packets and end operation - virtual ~PacketSource() = default; - - /// return true if traffic with source and dest addresses would cause a - /// loop in resolution and thus should not be sent to query handlers - virtual bool would_loop(const quic::Address& to, const quic::Address& from) const = 0; - - /// send UDP payload with src and dst address containing buf on this packet source - virtual void send_udp( - const quic::Address& to, const quic::Address& from, std::span payload) const = 0; - - /// returns the sockaddr we are bound on if applicable - virtual std::optional bound_on() const = 0; - }; - - /// non complex implementation of QueryJob_Base for use in things that - /// only ever called on the mainloop thread - class QueryJob : public QueryJob_Base, std::enable_shared_from_this - { - std::shared_ptr src; - const quic::Address resolver; - const quic::Address asker; - - public: - explicit QueryJob( - std::shared_ptr source, - const Message& query, - const quic::Address& to_, - const quic::Address& from_) - : QueryJob_Base{query}, src{std::move(source)}, resolver{to_}, asker{from_} - {} - - void send_reply(std::vector buf) override { src->send_udp(asker, resolver, buf); } - }; - - /// handler of dns query hooking - /// intercepts dns for internal processing - class Resolver_Base - { - protected: - /// return the sorting order for this resolver - /// lower means it will be tried first - virtual int rank() const = 0; - - public: - virtual ~Resolver_Base() = default; - - /// less than via rank - bool operator<(const Resolver_Base& other) const { return rank() < other.rank(); } - - /// greater than via rank - bool operator>(const Resolver_Base& other) const { return rank() > other.rank(); } - - /// get local socket address that queries are sent from - virtual std::optional get_local_addr() const { return std::nullopt; } - - /// get printable name - virtual std::string_view resolver_name() const = 0; - - /// reset the resolver state, optionally replace upstream info with new info. The default - /// base implementation does nothing. - virtual void reset_resolver(std::optional> = std::nullopt) {} - - /// cancel all pending requests and cease further operation. Default operation is a no-op. - virtual void down() {} - - /// attempt to handle a dns message - /// returns true if we consumed this query and it should not be processed again - virtual bool maybe_hook_dns( - const std::shared_ptr& source, - const Message& query, - const quic::Address& to, - const quic::Address& from) = 0; - }; - - // Base class for DNS proxy - class Server - { - protected: - /// add a packet source to this server, does share ownership - void add_packet_source(std::shared_ptr resolver); - /// add a resolver to this packet handler, does share ownership - void add_resolver(std::shared_ptr resolver); - - /// create the platform dependant dns stuff - virtual std::shared_ptr create_platform() const; - - public: - virtual ~Server() = default; - - explicit Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif_index); - - /// returns all sockaddr we have from all of our PacketSources - std::vector bound_packet_source_addrs() const; - - /// returns the first sockaddr we have on our packet sources if we have one - std::optional first_bound_packet_source_addr() const; - - /// add a resolver to this packet handler, does not share ownership - void add_resolver(std::weak_ptr resolver); - - /// add a packet source to this server, does not share ownership - void add_packet_source(std::weak_ptr resolver); - - /// create a packet source bound on bindaddr but does not add it - virtual std::shared_ptr make_packet_source_on( - const quic::Address& bindaddr, const srouter::DnsConfig& conf); - - /// sets up all internal binds and such and begins operation - virtual void start(); - - /// stops all operation - virtual void stop(); - - /// reset the internal state - virtual void reset(); - - /// create the default resolver for out config - virtual std::shared_ptr make_default_resolver(); - - std::vector> get_all_resolvers() const; - - /// feed a packet buffer from a packet source. - /// returns true if we decided to process the packet and consumed it - /// returns false if we dont want to process the packet - bool maybe_handle_payload( - const std::shared_ptr& pktsource, - const quic::Address& resolver, - const quic::Address& from, - std::span buf); - - /// set which dns mode we are in. - /// true for intercepting all queries. false for just .sesh/.loki/.snode - void set_dns_mode(bool all_queries); - - protected: - quic::Loop& _loop; - srouter::DnsConfig _conf; - std::shared_ptr _platform; - - private: - const unsigned int m_NetIfIndex; - // TODO FIXME: this ownership model is cursed. - std::set, ComparePtr>> _owned_resolvers; - std::set, CompareWeakPtr> _resolvers; - - std::vector> _owned_packet_sources; - std::vector> _packet_sources; - }; - -} // namespace srouter::dns diff --git a/src/ev/udp.cpp b/src/ev/udp.cpp deleted file mode 100644 index c48592e12..000000000 --- a/src/ev/udp.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "udp.hpp" - -namespace srouter -{ - static auto logcat = log::Cat("ev-udp"); - - inline constexpr size_t MAX_BATCH = -#if defined(OXEN_LIBQUIC_UDP_SENDMMSG) || defined(OXEN_LIBQUIC_UDP_GSO) - 24; -#else - 1; -#endif - - UDPHandle::UDPHandle(const std::shared_ptr& ev, const quic::Address& bind, net_pkt_hook cb) : _loop{ev} - { - socket = std::make_unique(ev->get_event_base(), bind, std::move(cb)); - _local = socket->address(); - } - - UDPHandle::~UDPHandle() { socket.reset(); } - - io_result UDPHandle::_send_impl(const quic::Path& path, std::byte* buf, size_t size, uint8_t ecn, size_t& n_pkts) - { - log::trace(logcat, "{} called", __PRETTY_FUNCTION__); - - auto* bufsize = &size; - - if (!socket) - { - log::warning(logcat, "Cannot send packets on closed socket ({})", path); - return io_result{EBADF}; - } - - assert(n_pkts >= 1 && n_pkts <= MAX_BATCH); - - log::trace(logcat, "Sending {} UDP packet(s) {}...", n_pkts, path); - - auto [ret, sent] = socket->send(path, buf, bufsize, ecn, n_pkts); - - if (ret.failure() && !ret.blocked()) - { - log::error(logcat, "Error sending packets {}: {}", path, ret.str_error()); - n_pkts = 0; // Drop any packets, as we had a serious error - return ret; - } - - if (sent < n_pkts) - { - if (sent == 0) // Didn't send *any* packets, i.e. we got entirely blocked - log::debug(logcat, "UDP sent none of {}", n_pkts); - - else - { - // We sent some but not all, so shift the unsent packets back to the beginning of buf/bufsize - log::debug(logcat, "UDP undersent {}/{}", sent, n_pkts); - size_t offset = std::accumulate(bufsize, bufsize + sent, size_t{0}); - size_t len = std::accumulate(bufsize + sent, bufsize + n_pkts, size_t{0}); - std::memmove(buf, buf + offset, len); - std::copy(bufsize + sent, bufsize + n_pkts, bufsize); - n_pkts -= sent; - } - - // We always return EAGAIN (so that .blocked() is true) if we failed to send all, even - // if that isn't strictly what we got back as the return value (sendmmsg gives back a - // non-error on *partial* success). - return io_result{EAGAIN}; - } - - n_pkts = 0; - - return ret; - } - - void UDPHandle::_send_or_queue( - const quic::Path& path, std::vector buf, uint8_t ecn, std::function callback) - { - log::trace(logcat, "{} called", __PRETTY_FUNCTION__); - - if (!socket) - { - log::warning(logcat, "Cannot sent to dead socket for path {}", path); - if (callback) - callback(io_result{EBADF}); - return; - } - - size_t n_pkts = 1; - // size_t bufsize = buf.size(); - auto res = _send_impl(path, buf.data(), buf.size(), ecn, n_pkts); - - if (res.blocked()) - { - socket->when_writeable([this, path, buf = std::move(buf), ecn, cb = std::move(callback)]() mutable { - _send_or_queue(path, std::move(buf), ecn, std::move(cb)); - }); - } - else if (callback) - callback({}); - } - - io_result UDPHandle::send(const quic::Address& dest, std::span data) - { - return _send_impl(quic::Path{_local, dest}, data.data(), data.size(), 0); - } -} // namespace srouter diff --git a/src/ev/udp.hpp b/src/ev/udp.hpp deleted file mode 100644 index 6179b9e11..000000000 --- a/src/ev/udp.hpp +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "net/ip_packet.hpp" -#include "util/logging.hpp" - -#include -#include -#include - -namespace srouter -{ - using UDPSocket = quic::UDPSocket; - - using io_result = quic::io_result; - - class UDPHandle - { - public: - UDPHandle() = delete; - explicit UDPHandle(const std::shared_ptr& ev, const quic::Address& bind, net_pkt_hook cb); - ~UDPHandle(); - - private: - std::shared_ptr _loop; - std::unique_ptr socket; - quic::Address _local; - - void _send_or_queue( - const quic::Path& path, - std::vector buf, - uint8_t ecn, - std::function callback = nullptr); - - public: - io_result send(const quic::Address& dest, std::span data); - - quic::Address bind() { return _local; } - }; - -} // namespace srouter diff --git a/src/handlers/tun.cpp b/src/handlers/tun.cpp index c33612ac5..eb354da2b 100644 --- a/src/handlers/tun.cpp +++ b/src/handlers/tun.cpp @@ -13,7 +13,6 @@ #include "auth/auth.hpp" #include "constants/platform.hpp" #include "contact/sns.hpp" -#include "dns/dns.hpp" #include "dns/encode.hpp" #include "nodedb.hpp" #include "router/route_poker.hpp" @@ -27,205 +26,6 @@ namespace srouter::handlers { static auto logcat = log::Cat("tun"); - bool TunEndpoint::maybe_hook_dns( - const std::shared_ptr& source, - const dns::Message& query, - const quic::Address& to, - const quic::Address& from) - { - if (not should_hook_dns_message(query)) - return false; - - auto job = std::make_shared(source, query, to, from); - if (!handle_hooked_dns_message(query, [job](dns::Message msg) { job->send_reply(msg.encode()); })) - job->cancel(); - return true; - } - - /// Intercepts DNS IP packets on platforms where binding to a low port isn't viable. - /// (windows/macos/ios/android ... aka everything that is not linux... funny that) - class DnsInterceptor : public dns::PacketSource - { - ip_pkt_hook _hook; - quic::Address _our_ip; // maybe should be an IP type...? - srouter::DnsConfig _config; - - public: - explicit DnsInterceptor(ip_pkt_hook reply, quic::Address our_ip, srouter::DnsConfig conf) - : _hook{std::move(reply)}, _our_ip{std::move(our_ip)}, _config{std::move(conf)} - {} - - ~DnsInterceptor() override = default; - - void send_udp( - const quic::Address& to, const quic::Address& from, std::span payload) const override - { - log::critical(logcat, "DNS interceptor FIXME!"); - if (payload.empty()) - return; - // FIXME: this - (void)to; - (void)from; - (void)payload; - // _hook(data.make_udp(to, from)); - } - - std::optional bound_on() const override { return std::nullopt; } - - bool would_loop(const quic::Address& to, const quic::Address& from) const override - { - if constexpr (platform::is_apple) - { - // DNS on Apple is a bit weird because in order for the NetworkExtension itself to - // send data through the tunnel we have to proxy DNS requests through Apple APIs - // (and so our actual upstream DNS won't be set in our resolvers, which is why the - // vanilla WouldLoop won't work for us). However when active the mac also only - // queries the main tunnel IP for DNS, so we consider anything else to be - // upstream-bound DNS to let it through the tunnel. - return to != _our_ip; - } - else if (auto maybe_addr = _config._query_bind) - { - const auto& addr = *maybe_addr; - // omit traffic to and from our dns socket - return addr == to or addr == from; - } - return false; - } - }; - - class TunDNS : public dns::Server - { - const TunEndpoint* _tun; - std::optional _query_bind; - quic::Address _our_ip; - - public: - std::shared_ptr pkt_source; - - ~TunDNS() override = default; - - explicit TunDNS(TunEndpoint* ep, const srouter::DnsConfig& conf) - : dns::Server{ep->router().loop, conf, 0}, - _tun{ep}, - _query_bind{conf._query_bind}, - _our_ip{ep->get_ipv4()} // FIXME: What about IPv6? - { - if (_query_bind) - _our_ip.set_port(_query_bind->port()); - } - - std::shared_ptr make_packet_source_on( - const quic::Address&, const srouter::DnsConfig& conf) override - { - (void)_tun; - auto ptr = std::make_shared( - [](IPPacket pkt) { - (void)pkt; - // ep->handle_write_ip_packet(pkt.ConstBuffer(), pkt.srcv6(), pkt.dstv6(), 0); - }, - _our_ip, - conf); - pkt_source = ptr; - return ptr; - } - }; - - // NB: It looks like this could/should be called during the constructor, - // but as it passes weak_from_this to the dns server, it has to be after. - void TunEndpoint::setup_dns() - { - log::debug(logcat, "{} setting up DNS...", name()); - - auto& dns_config = _router.config().dns; - const auto& info = get_vpn_interface()->interface_info(); - - if (dns_config.l3_intercept) - { - // FIXME: this entire if block is so broken... - _dns = std::make_unique(this, dns_config); - auto* dns = static_cast(_dns.get()); - - uint16_t p = 53; - - while (p < 100) - { - try - { - _packet_router->add_udp_handler(p, [this, dns](IPPacket pkt) { - // TODO FIXME - log::critical(logcat, "TODO FIXME: L3 udp interceptor!"); - // if (dns->maybe_handle_payload(dns->pkt_source, pkt.destination(), pkt.source(), - // pkt.udp_data())) - // return; - - handle_outbound_packet(std::move(pkt)); - }); - } - catch (const std::exception& e) - { - if (p += 1; p >= 100) - throw std::runtime_error{"Failed to port map udp handler: {}"_format(e.what())}; - } - } - } - else - _dns = std::make_unique(_router.loop, dns_config, info.index); - - _dns->add_resolver(weak_from_this()); - _dns->start(); - - if (dns_config.l3_intercept) - { - if (auto vpn = _router.vpn_platform()) - { - // get the first local address we know of - std::optional localaddr; - - for (auto res : _dns->get_all_resolvers()) - { - if (auto ptr = res.lock()) - { - localaddr = ptr->get_local_addr(); - - if (localaddr) - break; - } - } - if (platform::is_windows) - { - // auto dns_io = vpn->create_packet_io(0, localaddr); - // router().loop()->add_ticker([dns_io, handler = m_PacketRouter]() { - // net::IPPacket pkt = dns_io->ReadNextPacket(); - // while (not pkt.empty()) - // { - // handler->HandleIPPacket(std::move(pkt)); - // pkt = dns_io->ReadNextPacket(); - // } - // }); - // m_RawDNS = dns_io; - } - - (void)vpn; - } - - if (_raw_DNS) - _raw_DNS->Start(); - } - } - - void TunEndpoint::reconfigure_dns(std::vector servers) - { - if (_dns) - { - for (auto weak : _dns->get_all_resolvers()) - { - if (auto ptr = weak.lock()) - ptr->reset_resolver(servers); - } - } - } - TunEndpoint::TunEndpoint(Router& r) : _router{r} { _packet_router = @@ -309,361 +109,8 @@ namespace srouter::handlers log::info(logcat, "{} got network interface:{}", name(), _if_name); } - static const auto localhost_ctld = "localhost.{}"_format(CLIENT_TLD); - static const auto dot_localhost_ctld = ".localhost.{}"_format(CLIENT_TLD); - static bool is_localhost(std::string_view qname) - { - return qname == "localhost.loki" or qname.ends_with(".localhost.loki") or qname == localhost_ctld - or qname.ends_with(dot_localhost_ctld); - } - - static std::optional parse_rid(std::string_view b32rid) - { - auto rid = std::make_optional(); - if (not rid->from_base32z(b32rid)) - rid.reset(); - return rid; - } - - static std::optional is_snode(std::string_view name) - { - if (name.ends_with(RELAY_DOT_TLD)) - name.remove_suffix(RELAY_DOT_TLD.size()); - else - return std::nullopt; - return parse_rid(name); - } - - static dns::Message& clear_dns_message(dns::Message& msg) - { - msg.authorities.clear(); - msg.additional.clear(); - msg.answers.clear(); - msg.hdr_fields &= ~dns::flags_RCODENxDomain; - return msg; - } - - template - static std::optional try_making(Args&&... args) - { - try - { - return std::make_optional(std::forward(args)...); - } - catch (...) - { - return std::nullopt; - } - } - static const auto random_snode = "random.{}"_format(RELAY_TLD); - bool TunEndpoint::handle_hooked_dns_message( - dns::Message msg, std::function reply, std::optional qname_override) - { - log::trace(logcat, "handle_hooked_dns_message"); - if (msg.questions.size() != 1) - { - log::warning(logcat, "bad number of dns questions: {}", msg.questions.size()); - return false; - } - - auto& q = msg.questions[0]; - - std::string qname; - if (qname_override) - qname = std::move(*qname_override); - else - qname = q.name(); - std::string hostname, tld; - std::vector sub; - { - auto nameparts = split(qname, "."); - if (nameparts.size() < 2) - { - log::warning(logcat, "bad DNS request, no TLD or hostname: {}", qname); - return false; - } - hostname = nameparts[nameparts.size() - 2]; - tld = nameparts.back(); - sub.reserve(nameparts.size() - 2); - for (auto s : std::views::take(nameparts, static_cast(nameparts.size()) - 2)) - sub.emplace_back(s); - } - bool localhost = is_localhost(qname); - - // localhost.sesh/localhost.loki is always a CNAME to our own pubkey, regardless of the - // question type. - if (localhost) - { - auto our_hostname = _router.id().to_string(); - auto our_tld = _router.is_service_node ? RELAY_TLD : CLIENT_TLD; - auto our_name = "{}.{}"_format(our_hostname, our_tld); - - if (tld == "loki") - { - // first: report a cname for the deprecated localhost.loki -> localhost.sesh - - msg.set_rr_name("localhost.loki"); - msg.add_cname_reply("localhost.{}"_format(our_tld)); - } - // report CNAME: localhost.sesh -> pubkey.sesh - msg.set_rr_name("localhost.{}"_format(our_tld)); - msg.add_cname_reply(our_name); - - if (q.qtype == dns::RRType::CNAME) - { - // If we were queried specifically for a cname, then we are done. - reply(std::move(msg)); - return true; - } - - // Otherwise we continue processing to be able to return supplemental records through - // the cname, so that if you request "foo.localhost.loki" we end up returning: - // localhost.loki CNAME for localhost.sesh - // localhost.sesh CNAME for PUBKEY.sesh - // foo.PUBKEY.sesh IN X VALUE (or whatever) - // And so for for the rest of the answer processing that we were given PUBKEY.sesh, - // rather than localhost.loki/.sesh: - qname = sub.empty() ? our_name : "{}.{}"_format(fmt::join(sub, "."), our_name); - msg.set_rr_name(qname); - - tld = our_tld; - hostname = std::move(our_hostname); - } - else if (qname == random_snode) - { - // Similar to the localhost case: we first return a CNAME of random.snode -> - // SOMEPK.snode, then continue processing as if that was what you asked for. - - if (auto* rc = _router.node_db().get_random_rc()) - { - hostname = rc->router_id().to_string(); - qname = "{}.{}"_format(hostname, RELAY_TLD); - msg.add_cname_reply(qname, 1s); - if (q.qtype == dns::RRType::CNAME) - { - reply(std::move(msg)); - return true; - } - - msg.set_rr_name(qname); - } - else - { - msg.add_nx_reply(); - reply(std::move(msg)); - return true; - } - } - else if (tld == "loki" && hostname.size() != oxenc::to_base32z_size(RouterID::SIZE)) - { - // ONS lookup: initiate a lookup and, when we get the response, set up a CNAME of - // NAME.loki -> PUBKEY.sesh, then recurse to process other parts of the request (such as - // mapping to a AAAA). - - // TODO: .sesh SNS resolution, once implemented - - // ONS lookup: - auto lookup = "{}.loki"_format(hostname); - _router.session_endpoint().resolve_sns( - lookup, - [this, - lookup, - sub = std::move(sub), - reply = std::move(reply), - msg = std::move(msg), - cname_only = q.qtype == dns::RRType::CNAME]( - std::optional maybe_netaddr, - bool assertive, - std::chrono::milliseconds ttl) mutable { - msg.set_rr_name(lookup); - if (maybe_netaddr) - { - auto target = maybe_netaddr->to_string(); - msg.add_cname_reply(target); - if (cname_only) - return; - auto qname = sub.empty() ? target : "{}.{}"_format(fmt::join(sub, "."), target); - msg.set_rr_name(qname); - handle_hooked_dns_message(std::move(msg), std::move(reply), std::move(qname)); - return; - } - - if (assertive) - { - // We got an assertive "does not exist" message (and not just a failure - // or timeout), so add the nx reply - msg.add_nx_reply(); - // FIXME: we should be able to provide a TTL here - } - else - { - // We failed to get a response at all so just NX with a short timeout so - // that they will try again soon to resolve it. (We don't want to - // SERVFAIL here because that could make the resolver try another DNS - // server). - assert(!assertive); - // FIXME: should be able to specify a TTL here - msg.add_nx_reply(); - } - reply(std::move(msg)); - }); - return true; - } - - if (q.qtype == dns::RRType::TXT) - { - // TXT records can be used to query some basic info: - - // TXT on MYPUBKEY.sesh returns the basic version and netid: - if (localhost && sub.empty()) - msg.add_txt_reply("sessionrouter={} v={} netid={}"_format( - _router.is_service_node ? "relay" : "client", fmt::join(VERSION, "."), _router.netid())); - - // TXT on PUBKEY.snode gives back some basic RC info (if we have the RC) - else if (auto rid = is_snode(qname)) - { - if (auto* rc = _router.node_db().get_rc(*rid)) - { - msg.add_txt_reply("rc v={} a={} t={}"_format( - fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); - } - else - msg.add_nx_reply(); - } - else - msg.add_nx_reply(); - reply(msg); - return true; - } - - // "Regular" A or AAAA lookups - if (bool aaaa = q.qtype == dns::RRType::AAAA; aaaa || q.qtype == dns::RRType::A) - { - // Attempt to parse a "pubkey.snode" or "pubkey.sesh": - if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) - { - // DNS lookup implies we want a session, so make one (NOP if we have one) - // This also means if we don't use that session the IP mapping will release when - // it expires, which it wouldn't otherwise without a tedious periodic check. - bool created_session = false; - try - { - created_session = (bool)_router.session_endpoint().initiate_remote_session(*maybe_netaddr, nullptr); - } - catch (const std::exception& e) - { - log::warning(logcat, "Failed to initiate remote session to {}: {}", *maybe_netaddr, e.what()); - } - if (created_session) - { - if (aaaa) - msg.add_reply(map6(*maybe_netaddr)); - else if (!sub.empty() && sub.back() == "ipv4"sv) - { - // We don't map IPv4 addresses by default, but it is still possible to get - // one by requesting ipv4.somepubkey.sesh/snode (or a subdomain thereof). - if (auto v4_addr = map4(*maybe_netaddr); v4_addr) - msg.add_reply(*v4_addr); - else - log::warning(logcat, "IPv4 mapping requested for {} failed.", *maybe_netaddr); - } - // else they requested A *not* using the magic ipv4 subdomain, so we only have - // AAAA to offer and thus we return a reply without an answer record (which is - // the proper DNS way to say "something exists at this address, but not with the - // type you requested requested", as opposed to this nx_reply below, which means - // "this record does not exist"). - } - else - msg.add_nx_reply(); - reply(msg); - - return true; - } - - // Otherwise it's some query type we don't support, so return does-not-exist. - msg.add_nx_reply(); - reply(msg); - return true; - } - - // Reverse DNS lookups: - if (q.qtype == dns::RRType::PTR) - { - // reverse dns - bool found = false; - if (auto ip = dns::decode_ptr(q.qname)) - std::visit( - [&](const auto& ip) { - if (auto addr = _lookup_mapped_ip(ip)) - { - msg.add_ptr_reply(addr->to_string()); - found = true; - } - }, - *ip); - - if (!found) - msg.add_nx_reply(); - - reply(msg); - return true; - } - - if (q.qtype == dns::RRType::SRV && (tld == CLIENT_TLD || tld == "loki") && sub.size() == 2 - && sub[0].starts_with('_') && sub[1].starts_with('_')) - { - if (auto rid = parse_rid(hostname)) - { - _router.session_endpoint().lookup_client_intro( - *rid, - [msg = std::move(msg), sub, reply = std::move(reply)]( - const std::optional& cc) mutable { - if (cc) - { - for (const auto& srv : cc->SRVs()) - if (srv.service == sub[0] && srv.proto == sub[1]) - msg.add_reply(srv); - } - else - msg.add_nx_reply(); - - reply(msg); - }); - return true; - } - } - - msg.add_nx_reply(); - reply(msg); - return true; - } - - bool TunEndpoint::should_hook_dns_message(const dns::Message& msg) const - { - if (msg.questions.size() == 1) - { - // Hook every .sesh/.snode/.loki query - for (auto tld : {CLIENT_TLD, RELAY_TLD, "loki"sv}) - if (msg.questions[0].has_tld(tld)) - return true; - - // hook any PTR records for ranges we own - if (msg.questions[0].qtype == srouter::dns::RRType::PTR) - { - if (auto ip = dns::decode_ptr(msg.questions[0].qname)) - { - if (auto* v4 = std::get_if(&*ip)) - return _local_net.contains(*v4); - return _local_ipv6_net.contains(std::get(*ip)); - } - return false; - } - } - return false; - } - std::string TunEndpoint::get_if_name() const { return _if_name; } const ipv4& TunEndpoint::get_ipv4() const { return _local_net.ip; } @@ -676,13 +123,11 @@ namespace srouter::handlers bool TunEndpoint::is_exit_node() const { return _router.is_exit_node(); } - bool TunEndpoint::stop() + void TunEndpoint::stop() { // stop vpn tunnel if (_net_if) _net_if->Stop(); - if (_raw_DNS) - _raw_DNS->Stop(); #if 0 // save address map if applicable @@ -707,11 +152,6 @@ namespace srouter::handlers // } } #endif - - if (_dns) - _dns->stop(); - - return true; } template diff --git a/src/handlers/tun.hpp b/src/handlers/tun.hpp index 7e38d964e..f3c932b58 100644 --- a/src/handlers/tun.hpp +++ b/src/handlers/tun.hpp @@ -1,10 +1,8 @@ #pragma once #include "address/map.hpp" -#include "dns/server.hpp" #include "ev/fd_poller.hpp" #include "net/ip_packet.hpp" -#include "tun_base.hpp" #include "util/thread/threading.hpp" #include "vpn/packet_router.hpp" #include "vpn/platform.hpp" @@ -22,20 +20,16 @@ namespace srouter::traffic_type namespace srouter::handlers { inline constexpr auto TUN = "tun"sv; - inline constexpr auto LOKI_RESOLVER = "session-router"sv; - class TunEndpoint : public TunEPBase, public dns::Resolver_Base, public std::enable_shared_from_this + class TunEndpoint { public: TunEndpoint(Router& r); - ~TunEndpoint() override; + ~TunEndpoint(); private: Router& _router; - /// dns subsystem for this endpoint - std::unique_ptr _dns; - /// our local ip network ipv4_net _local_net; IPv4RangeIterator _local_range_iterator{_local_net}; @@ -59,27 +53,11 @@ namespace srouter::handlers std::optional _persisting_addr_file = std::nullopt; bool persist_addrs{false}; - /// for raw packet dns - std::shared_ptr _raw_DNS; - public: vpn::NetworkInterface* get_vpn_interface() { return _net_if.get(); } std::string_view name() const { return TUN; } - int rank() const override { return 0; } - - std::string_view resolver_name() const override { return LOKI_RESOLVER; } - - bool maybe_hook_dns( - const std::shared_ptr& source, - const dns::Message& query, - const quic::Address& to, - const quic::Address& from) override; - - // Reconfigures DNS servers and restarts libunbound with the new servers. - void reconfigure_dns(std::vector servers); - void configure(); std::string get_if_name() const; @@ -94,23 +72,14 @@ namespace srouter::handlers const ipv4_net& get_ipv4_network() const; const ipv6_net& get_ipv6_network() const; - bool should_hook_dns_message(const dns::Message& msg) const; - - bool handle_hooked_dns_message( - dns::Message query, - std::function sendreply, - std::optional qname_override = std::nullopt); - void tick_tun(sys_ms now); - bool stop(); + void stop(); bool is_service_node() const; bool is_exit_node() const; - void setup_dns(); - // INPROGRESS: new API // Handles an outbound packet going OUT to the network void handle_outbound_packet(IPPacket pkt); @@ -118,7 +87,7 @@ namespace srouter::handlers void rewrite_and_send_packet(IPPacket&& pkt, const ipv4& src, const ipv4& dest); void rewrite_and_send_packet(IPPacket&& pkt, const ipv6& src, const ipv6& dest); - void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote) override; + void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote); // Handles an inbound packet coming IN from the network // bool handle_inbound_packet(IPPacket pkt, NetworkAddress remote, bool is_exit_session, bool @@ -128,7 +97,7 @@ namespace srouter::handlers // Router remote address with it. If the mapping already exists, this returns the existing // IP, otherwise it assigns a new one. The association persists until unmapped. Returns // the mapped ipv6 address. - ipv6 map6(const NetworkAddress& remote) override; + ipv6 map6(const NetworkAddress& remote); // Obtains an available IPv4 address from the tun device and associates the given Session // Router remote address with it. If the mapping already exists, this returns the existing @@ -139,12 +108,34 @@ namespace srouter::handlers // Returns the mapped addresses, or nullptr if an address could not be assigned (i.e. // because of IPv4 exhaustion in the allocated tun range, or because this client does not // support IPv4 addressing at all). - std::optional map4(const NetworkAddress& remote) override; + std::optional map4(const NetworkAddress& remote); + + // Takes an IPv4 or IPv6 address and returns {addr, true} if the address is a tun address + // range IP mapped to an address; {nullptr, true} if it is a tun address range IP but + // without a mapped address; or {nullptr, false} if it is not a tun address range IP. + template + std::pair, bool> reverse_lookup(const IP& ip) + requires std::same_as || std::same_as + { + std::pair, bool> result; + auto& [netaddr, in_range] = result; + if constexpr (std::same_as) + { + netaddr = _local_ipv4_mapping[ip]; + in_range = netaddr || _local_net.contains(ip); + } + else + { + netaddr = _local_ipv6_mapping[ip]; + in_range = netaddr || _local_ipv6_net.contains(ip); + } + return result; + } // Expires a mapped IP for the given remote from the tun IP map. The address will be added // as the most recently used address, and (if the configured cache size is exceeded) the least // recently used address will be forgotten. - void expire(const NetworkAddress& remote) override; + void expire(const NetworkAddress& remote); std::optional get_exit_policy() const { return _exit_policy; } @@ -159,7 +150,7 @@ namespace srouter::handlers Router& router() { return _router; } - void start_poller() override; + void start_poller(); private: // Stores assigned IP's for each session in/out of this Session Router instance @@ -168,18 +159,6 @@ namespace srouter::handlers address_map _local_ipv4_mapping; address_map _local_ipv6_mapping; - template - auto _lookup_mapped_ip(const IP& ip) - { - if constexpr (std::same_as) - return _local_ipv4_mapping[ip]; - else - { - static_assert(std::same_as); - return _local_ipv6_mapping[ip]; - } - } - // We keep a list of expired network addresses ordered by least-recently-used first. When // pruning the expired list, we pop off the front of the list. std::list _expired; diff --git a/src/handlers/tun_base.hpp b/src/handlers/tun_base.hpp deleted file mode 100644 index 2f4c772fe..000000000 --- a/src/handlers/tun_base.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "address/address.hpp" -#include "address/types.hpp" -#include "net/ip_packet.hpp" - -namespace srouter::handlers -{ - - // Abstract class for TUN handling. This base interface exists so that embedded clients can be - // built without needing to compile any tun code at all. - class TunEPBase - { - public: - virtual ~TunEPBase() = default; - - virtual void start_poller() = 0; - - virtual ipv6 map6(const NetworkAddress& remote) = 0; - virtual std::optional map4([[maybe_unused]] const NetworkAddress& remote) { return std::nullopt; } - - virtual void expire(const NetworkAddress& remote) = 0; - - virtual void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote) = 0; - }; - -} // namespace srouter::handlers diff --git a/src/link/endpoint.cpp b/src/link/endpoint.cpp index 9a758adc3..2172c9fcd 100644 --- a/src/link/endpoint.cpp +++ b/src/link/endpoint.cpp @@ -797,6 +797,8 @@ namespace srouter::link }); } + Endpoint::~Endpoint() { *canary = false; } + void Endpoint::on_conn_closed(quic::Connection& conn, uint64_t ec) { auto alpn = conn.selected_alpn(); @@ -815,13 +817,26 @@ namespace srouter::link return; } - router.loop.call([this, connptr = conn.shared_from_this(), ec] { - auto& conn = *connptr; - auto alpn = conn.selected_alpn(); - - std::optional rid; - if (conn.remote_key().size() == RouterID::SIZE) - rid.emplace(conn.remote_key().first()); + std::optional rid; + if (conn.remote_key().size() == RouterID::SIZE) + rid.emplace(conn.remote_key().first()); + + // NB: we must not capture a shared_ptr to conn here, because this lambda could outlive + // `this`; the canary lets us early-return if that happens, but the Connection destruction + // relies on `this.loop` to destroy: thus if we capture it we could delay that destruction + // attempt beyond the end of `this.loop`. Thus we capture everything we need into the + // lambda here, while we are still in the network loop. + + router.loop.call([this, + alive = canary, + conn_refid = conn.reference_id(), + alpn, + rid = std::move(rid), + remote_addr = conn.remote(), + ec, + was_inbound = conn.is_inbound()] { + if (!*alive) + return; bool found = false; @@ -833,14 +848,14 @@ namespace srouter::link { assert(router.is_service_node); auto& relcon = it->second; - if (relcon.inbound && connptr == relcon.inbound->conn) + if (relcon.inbound && relcon.inbound->conn && relcon.inbound->conn->reference_id() == conn_refid) { relcon.close(true); found = true; log::debug( logcat, "Inbound connection from {} closed (ec={})", rid->to_network_address(true), ec); } - if (relcon.outbound && connptr == relcon.outbound->conn) + if (relcon.outbound && relcon.outbound->conn && relcon.outbound->conn->reference_id() == conn_refid) { relcon.close(false); found = true; @@ -875,10 +890,10 @@ namespace srouter::link log::debug( logcat, "Closed redundant connection {} {} @ {} (cid={})", - conn.is_inbound() ? "from" : "to", + was_inbound ? "from" : "to", rid->to_network_address(true), - conn.remote(), - conn.reference_id()); + remote_addr, + conn_refid); found = true; } } @@ -886,11 +901,11 @@ namespace srouter::link { if (router.is_service_node) { - assert(conn.is_inbound()); // Relays do make outbound client conns for testing, - // but they do not use this close callback. - if (auto it = inbound_clients.find(conn.reference_id()); it != inbound_clients.end()) + assert(was_inbound); // Relays do make outbound client conns for testing, + // but they do not use this close callback. + if (auto it = inbound_clients.find(conn_refid); it != inbound_clients.end()) { - log::debug(logcat, "Client connection from {} closed (ec={})", conn.remote(), ec); + log::debug(logcat, "Client connection from {} closed (ec={})", remote_addr, ec); it->second->close(); inbound_clients.erase(it); found = true; @@ -898,9 +913,10 @@ namespace srouter::link } else { - assert(conn.is_outbound()); + assert(!was_inbound); - if (auto it = client_conns.find(*rid); it != client_conns.end() and connptr == it->second->conn) + if (auto it = client_conns.find(*rid); it != client_conns.end() && it->second && it->second->conn + && it->second->conn->reference_id() == conn_refid) { log::debug( logcat, @@ -912,12 +928,13 @@ namespace srouter::link } } } - else if (conn.is_outbound()) + else if (!was_inbound) { // Unknown or empty ALPN -- this is an outbound conn that didn't establish (and thus // didn't negotiate the ALPN): assert(rid); // Outbound conns start out with the target pubkey known - if (auto it = pending_outbound.find(*rid); it != pending_outbound.end() and connptr == it->second->conn) + if (auto it = pending_outbound.find(*rid); it != pending_outbound.end() && it->second + && it->second->conn && it->second->conn->reference_id() == conn_refid) { pending_outbound.erase(it); found = true; @@ -931,10 +948,10 @@ namespace srouter::link log::warning( logcat, "Closed connection {} {} @ {} (cid={}, ec={})", - conn.is_inbound() ? "from" : "to", + was_inbound ? "from" : "to", rid ? rid->to_string() : "", - conn.remote(), - conn.reference_id(), + remote_addr, + conn_refid, ec); if (not router.is_service_node) diff --git a/src/link/endpoint.hpp b/src/link/endpoint.hpp index 36bb6759d..f7715a2c5 100644 --- a/src/link/endpoint.hpp +++ b/src/link/endpoint.hpp @@ -83,10 +83,16 @@ namespace srouter::link public: explicit Endpoint(Manager& lm); + ~Endpoint(); + Manager& manager; Router& router; private: + // The network loop object. This *must* be declared before most of the below as some of the + // things below have destructors that run in this loop. + std::unique_ptr loop; + // Stores established relay-to-relay connections; only used by service nodes. std::unordered_map relay_conns; @@ -114,12 +120,15 @@ namespace srouter::link // only. std::unordered_map> inbound_clients; - std::unique_ptr loop; std::shared_ptr endpoint; std::shared_ptr redundancy_ticker; std::shared_ptr dereg_conn_ticker; std::shared_ptr tls_creds; + // Canary object that gets set to false during destruction to help short-circuit lambda that + // could potentially outlive `this`: + std::shared_ptr canary = std::make_shared(true); + public: void start_tickers(); diff --git a/src/path/path.cpp b/src/path/path.cpp index a5610ceb7..dac38cb15 100644 --- a/src/path/path.cpp +++ b/src/path/path.cpp @@ -72,9 +72,8 @@ namespace srouter::path double success_pct = p.ping_responses / (double)(p.ping_responses + p.ping_timeouts) * 100.0; if (p.ping_responses == 1) return "{:.1f}%, {:.0f}ms avg"_format(success_pct, mean); - - double sd = std::sqrt(((double)p.ping_sq_cumulative - p.ping_responses * mean * mean) / (p.ping_responses - 1)); - return "{:.1f}%, {:.0f}ms avg, {:.1f}ms s.d."_format(success_pct, mean, sd); + double jitter = p.ping_responses < 2 ? 0.0 : (double)p.ping_abs_diffs.count() / (p.ping_responses - 1); + return "{:.1f}%, {:.0f}ms avg, {:.1f}ms jitter"_format(success_pct, mean, jitter); } void Path::do_ping(steady_ms start_time) @@ -96,10 +95,11 @@ namespace srouter::path auto time_taken = now - start_time; if (resp.ok()) { - ping_responses++; + if (++ping_responses > 1) + ping_abs_diffs += time_taken >= ping_last ? time_taken - ping_last : ping_last - time_taken; + ping_last = time_taken; ping_recent_timeouts = 0; ping_cumulative += time_taken; - ping_sq_cumulative += time_taken.count() * time_taken.count(); if (resp.body == messages::OK_RESPONSE) log::debug( @@ -318,13 +318,29 @@ namespace srouter::path } path_hop_stringifier Path::hop_string() const { return {hops}; } - std::vector> Path::get_hops_strings_and_ips() const + Path::Info Path::get_info() const { - std::vector> ret; + Info ret{}; + ret.expiry = _expiry; + if (ping_responses) + ret.ping_mean = std::chrono::round( + std::chrono::nanoseconds{ping_cumulative} / ping_responses); + if (ping_responses > 1) + ret.ping_jitter = std::chrono::round( + std::chrono::nanoseconds{ping_abs_diffs} / (ping_responses - 1)); + ret.ping_responses = ping_responses; + ret.ping_timeouts = ping_timeouts; + ret.ping_recent_timeouts = ping_recent_timeouts; for (const auto& hop : hops) { - auto rc = _router.node_db().get_rc(hop.router_id); - ret.emplace_back(NetworkAddress{hop.router_id, false}.to_string(), rc->addr().to_ipv4().to_string()); + auto* rc = _router.node_db().get_rc(hop.router_id); + if (rc) + ret.relays.emplace_back(hop.router_id, rc->addr().to_ipv4()); + else + { + log::warning(logcat, "Couldn't find RC of a router on our path?!"); + ret.relays.emplace_back(); + } } return ret; } diff --git a/src/path/path.hpp b/src/path/path.hpp index d870ff87f..316b6d1d1 100644 --- a/src/path/path.hpp +++ b/src/path/path.hpp @@ -72,7 +72,16 @@ namespace srouter::path path_hop_stringifier hop_string() const; - std::vector> get_hops_strings_and_ips() const; + struct Info + { + // relay pubkeys and IPv4 addresses, from edge -> pivot (or final relay) + std::vector> relays; + sys_ms expiry = {}; + std::chrono::milliseconds ping_mean; + std::chrono::microseconds ping_jitter; + int ping_responses, ping_timeouts, ping_recent_timeouts; + }; + Info get_info() const; sys_ms LastRemoteActivityAt() const { return last_recv_msg; } @@ -219,9 +228,13 @@ namespace srouter::path steady_ms next_ping{}; int ping_responses{0}, ping_timeouts{0}; int ping_recent_timeouts{0}; + std::chrono::milliseconds ping_last{0ms}; // Cumulative time of all `ping_responses` pings (divide by ping_responses for an average). std::chrono::milliseconds ping_cumulative{0ms}; - int64_t ping_sq_cumulative{0}; + // This is the cumulative absolute differences of all received sequential pings. E.g. if we + // have 4 pings [100, 101, 98, 98] then this equals (|100-101| + |101-98| + |98-98|). + // Dividing this by `ping_responses - 1` gives jitter. + std::chrono::milliseconds ping_abs_diffs{0ms}; }; } // namespace srouter::path diff --git a/src/router/router.cpp b/src/router/router.cpp index b5c6eccb8..9f40d100c 100644 --- a/src/router/router.cpp +++ b/src/router/router.cpp @@ -7,6 +7,7 @@ #include "constants/version.hpp" #include "contact/contactdb.hpp" #include "crypto/crypto.hpp" +#include "dns/listener.hpp" #include "link/link_manager.hpp" #include "nodedb.hpp" #include "util/formattable.hpp" @@ -62,7 +63,8 @@ namespace srouter // exceed the defaut 1MB limit). _omq->MAX_MSG_SIZE = -1; - _router_testing = std::make_shared(*this); + if (is_service_node) + _router_testing = std::make_shared(*this); #endif init_logging(); @@ -552,19 +554,42 @@ namespace srouter throw std::runtime_error{"This Session Router build only supports embedded configurations!"}; #else log::debug(logcat, "Initializing TUN device"); - auto tun = _loop->make_shared(*this); + _tun = _loop->make_shared(*this); // only (full) clients should have DNS, relays have no need for it if (!is_service_node) - tun->setup_dns(); + { + auto& dns_bind = config().dns._bind_addrs; + if (dns_bind.empty()) + { + // This configuration is allowed (a service-only client might use it), although a bit unusual + log::warning( + logcat, "[bind]:bind is empty: DNS disabled. Making outbound paths will not be possible"); + } + else + { + for (const auto& addr : dns_bind) + { + try + { + if (!_dns) + _dns = std::make_shared(*this, addr); + else + _dns->listen(loop, addr); + } + catch (const std::exception& e) + { + log::error(logcat, "Failed to initialize DNS listener on {}: {}", addr, e.what()); + } + } + } + } log::info( log_global, "Session Router internal network: {} on device {}", - tun->get_ipv4_network(), - tun->get_if_name()); - - _tun = std::move(tun); + _tun->get_ipv4_network(), + _tun->get_if_name()); #endif } else @@ -951,7 +976,8 @@ namespace srouter void Router::on_test_ping() { #ifndef SROUTER_EMBEDDED_ONLY - _router_testing->incoming_ping(); + if (_router_testing) + _router_testing->incoming_ping(); #endif } @@ -979,7 +1005,8 @@ namespace srouter srouter::sys::service_manager->stopping(); } - _router_testing->stop(); + if (_router_testing) + _router_testing->stop(); #endif _session_endpoint->stop(true); @@ -990,6 +1017,14 @@ namespace srouter log::debug(logcat, "closing all connections"); _link_manager->stop(); +#ifndef SROUTER_EMBEDDED_ONLY + if (_dns) + _dns.reset(); + + if (_tun) + _tun->stop(); +#endif + auto rv = _loop_ticker->stop(); log::debug(logcat, "router loop ticker stopped {}successfully!", rv ? "" : "un"); _loop_ticker.reset(); @@ -1020,6 +1055,9 @@ namespace srouter _link_endpoint = nullptr; _link_manager.reset(); + if (_tun) + _tun.reset(); + if (_router_close_cb) _router_close_cb(); @@ -1032,6 +1070,24 @@ namespace srouter }); } + std::pair, bool> Router::reverse_lookup(const ipv4& addr) const + { +#ifndef SROUTER_EMBEDDED_ONLY + if (_tun) + return _tun->reverse_lookup(addr); +#endif + return {std::nullopt, false}; + } + + std::pair, bool> Router::reverse_lookup(const ipv6& addr) const + { +#ifndef SROUTER_EMBEDDED_ONLY + if (_tun) + return _tun->reverse_lookup(addr); +#endif + return {std::nullopt, false}; + } + const srouter::net::Platform* Router::net() const { #ifndef SROUTER_EMBEDDED_ONLY diff --git a/src/router/router.hpp b/src/router/router.hpp index 9f485b79c..10c4173f4 100644 --- a/src/router/router.hpp +++ b/src/router/router.hpp @@ -3,7 +3,7 @@ #include "contact/relay_contact.hpp" #include "crypto/key_manager.hpp" #include "handlers/session.hpp" -#include "handlers/tun_base.hpp" +#include "handlers/tun.hpp" #include "path/build_stats.hpp" #include "path/path_context.hpp" #include "profiling.hpp" @@ -17,6 +17,7 @@ #include #include #include +#include namespace oxenmq { @@ -26,6 +27,10 @@ namespace oxenmq namespace srouter { + namespace dns + { + class Listener; + } namespace link { struct Connection; @@ -91,6 +96,12 @@ namespace srouter ~Router(); + // Non-copyable/movable: + Router(const Router&) = delete; + Router(Router&&) = delete; + Router& operator=(const Router&) = delete; + Router& operator=(Router&&) = delete; + private: // Internal functions called during construction: void configure(); @@ -126,7 +137,8 @@ namespace srouter link::Endpoint* _link_endpoint = nullptr; // These are only created in full platform mode (not embedded clients) - std::shared_ptr _tun; + std::shared_ptr _tun; + std::shared_ptr _dns; std::shared_ptr _vpn; std::shared_ptr _route_poker; @@ -197,7 +209,16 @@ namespace srouter bool is_fully_meshed() const; - const std::shared_ptr& tun_endpoint() { return _tun; } + const std::shared_ptr& tun_endpoint() { return _tun; } + + // Looks up the given IP in our TUN mapping and, if it is a TUN address and maps to a remote, returns the + // network address of the mapped-to address. The `.second` part of the result indicates + // whether the IP is on our TUN range, even if it is unmapped. That is, it can return: + // {address, true} -- address in tun range, and mapped + // {nullopt, true} -- address in tun range, but not mapped to a remote + // {nullopt, false} -- address not in tun range (or no tun at all) + std::pair, bool> reverse_lookup(const ipv4& addr) const; + std::pair, bool> reverse_lookup(const ipv6& addr) const; // Returns the net Platform pointer, or nullptr if this is an embedded client. const srouter::net::Platform* net() const; diff --git a/src/rpc/rpc_server.cpp b/src/rpc/rpc_server.cpp index 9f19c073a..736523120 100644 --- a/src/rpc/rpc_server.cpp +++ b/src/rpc/rpc_server.cpp @@ -4,8 +4,6 @@ #include "config/ini.hpp" #include "constants/version.hpp" #include "contact/client_contact.hpp" -#include "dns/dns.hpp" -#include "dns/server.hpp" #include "router/router.hpp" #include "rpc/rpc_request_definitions.hpp" #include "rpc_request.hpp" @@ -28,6 +26,7 @@ namespace srouter::rpc log::info(logcat, "RPC Server received request for endpoint `{}`", req.name); } +#if 0 // Fake packet source that serializes repsonses back into dns class DummyPacketSource final : public dns::PacketSource { @@ -47,6 +46,7 @@ namespace srouter::rpc /// returns the sockaddr we are bound on if applicable std::optional bound_on() const override { return std::nullopt; } }; +#endif bool check_path(std::string path) { diff --git a/src/session/session.cpp b/src/session/session.cpp index 71cf622d7..afbc1d255 100644 --- a/src/session/session.cpp +++ b/src/session/session.cpp @@ -664,6 +664,7 @@ namespace srouter::session return; } +#ifndef SROUTER_EMBEDDED_ONLY // Otherwise we're not embedded; if the other side also isn't then this is just a raw IP // packet to handle via the tun endpoint, and the same for UDP packets from embedded // remotes (which also send raw UDP packets): @@ -682,7 +683,7 @@ namespace srouter::session // NOTE: At this time, tun clients always support ipv4, but ipv4 is only activated on use // (unlike IPv6 which is activated all the time). If this changes, a check for that should // short-circuit the call to map_session below. - if (!_r.embedded() && pkt.is_ipv4() && !ipv4_mapped) + if (pkt.is_ipv4() && !ipv4_mapped) { if (!_parent.map_session_v4(*this)) { @@ -692,7 +693,9 @@ namespace srouter::session ipv4_mapped = true; } + assert(_r.tun_endpoint()); // (We return above if embedded) _r.tun_endpoint()->handle_inbound_packet(std::move(pkt), dgram_type, _remote); +#endif } void Session::publish_client_contact(std::string_view encrypted_cc) @@ -1663,17 +1666,17 @@ namespace srouter::session _current_thop->downstream, "session_control"s, std::move(data), nullptr); } - std::vector> OutboundSession::current_path() const + path::Path::Info OutboundSession::current_path_info() const { if (_current_path) - return _current_path->get_hops_strings_and_ips(); + return _current_path->get_info(); return {}; } - std::vector> InboundClientSession::current_path() const + path::Path::Info InboundClientSession::current_path_info() const { if (_current_path) - return _current_path->get_hops_strings_and_ips(); + return _current_path->get_info(); return {}; } diff --git a/src/session/session.hpp b/src/session/session.hpp index f67273001..380ef3032 100644 --- a/src/session/session.hpp +++ b/src/session/session.hpp @@ -3,7 +3,6 @@ #include "address/address.hpp" #include "constants/path.hpp" #include "ev/tcp.hpp" -#include "ev/udp.hpp" #include "net/ip_packet.hpp" #include "path/path.hpp" #include "path/path_handler.hpp" @@ -220,7 +219,7 @@ namespace srouter // rather than waiting for the next tick) virtual void tick(sys_ms now); - virtual std::vector> current_path() const { return {}; }; + virtual path::Path::Info current_path_info() const { return {}; }; }; class OutboundSession : public path::PathHandler, public Session @@ -300,7 +299,7 @@ namespace srouter inline static constexpr int MAX_QUEUED_PACKETS = 30; - std::vector> current_path() const override; + path::Path::Info current_path_info() const override; }; // Outbound Session to Remote Relay @@ -419,7 +418,7 @@ namespace srouter void handle_path_switch(HopID pivot, std::shared_ptr path); - std::vector> current_path() const override; + path::Path::Info current_path_info() const override; std::string to_string() const override; }; diff --git a/src/session_router.cpp b/src/session_router.cpp index 3592a7f06..e268e4a6d 100644 --- a/src/session_router.cpp +++ b/src/session_router.cpp @@ -157,6 +157,14 @@ namespace session::router context->router->session_endpoint().unmap_udp_remote_port(netaddr, port); } + static snode_path to_snode_path(const srouter::path::Path::Info& info) + { + snode_path path; + for (const auto& [rid, ip] : info.relays) + path.emplace_back(srouter::NetworkAddress{rid, false}.to_string(), ip.to_string()); + return path; + } + std::optional SessionRouter::get_path_for_session(std::string_view remote) { srouter::NetworkAddress netaddr; @@ -172,10 +180,8 @@ namespace session::router return context->router->loop.call_get([&r = context->router, addr = std::move(netaddr)]() { std::optional ret; - if (auto s = r->session_endpoint().get_session(addr); s) - { - ret = s->current_path(); - } + if (auto* s = r->session_endpoint().get_session(addr)) + ret = to_snode_path(s->current_path_info()); return ret; }); } @@ -184,9 +190,10 @@ namespace session::router { return context->router->loop.call_get([&r = context->router]() { std::vector ret; - auto f = [&ret](const srouter::NetworkAddress& addr, const srouter::session::Session& s) { - ret.emplace_back(s.current_path(), addr.to_string()); - }; + r->session_endpoint().for_each_session( + [&ret](const srouter::NetworkAddress& addr, const srouter::session::Session& s) { + ret.emplace_back(to_snode_path(s.current_path_info()), addr.to_string()); + }); return ret; }); } diff --git a/src/util/logging.hpp b/src/util/logging.hpp index 68c82a909..f66f5a6ca 100644 --- a/src/util/logging.hpp +++ b/src/util/logging.hpp @@ -2,7 +2,7 @@ // Header for making actual log statements such as srouter::log::Info and so on work. -#include +#include // IWYU pragma: export #include #include