diff --git a/.drone.jsonnet b/.drone.jsonnet index 3509e6ac0..c753769f0 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -291,7 +291,7 @@ local clang(version) = debian_pipeline( local full_llvm(version) = debian_pipeline( 'Debian sid/llvm-' + version, docker_base + 'debian-sid-clang', - deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], + deps=default_deps(add=['clang-' + version, ' lld-' + version, ' libc++-' + version + '-dev', 'libc++abi-' + version + '-dev', 'libunwind-' + version + '-dev', 'libngtcp2-crypto-gnutls-dev', 'libngtcp2-dev'], remove='g++'), oxen_repo=[], cmake_extra='-DCMAKE_C_COMPILER=clang-' + version + diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index d8ffc0e72..c59682e21 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -113,7 +113,7 @@ set(default_libcrypt OFF) if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT STATIC_LINK) pkg_check_modules(LIBCRYPT libcrypt IMPORTED_TARGET) - if(LIBCRYPTO_FOUND) + if(LIBCRYPT_FOUND) set(default_libcrypt ON) endif() endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 534c7c189..65362de18 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -156,11 +156,13 @@ if (SROUTER_FULL) # parse modify and reconstitute dns wire proto, dns queries and RR target_sources(session-router-dns PRIVATE dns/encode.cpp + dns/handler.cpp + dns/listener.cpp dns/message.cpp dns/platform.cpp dns/question.cpp dns/rr.cpp - dns/server.cpp + dns/unbound.cpp ) # platform specific bits and bobs for setting dns diff --git a/src/config/config.cpp b/src/config/config.cpp index 17d2cbe46..d9484de36 100644 --- a/src/config/config.cpp +++ b/src/config/config.cpp @@ -950,117 +950,117 @@ namespace srouter // see https://github.com/oxen-io/lokinet/issues/1887#issuecomment-1091897282 Default{"127.0.0.1:0"}, #endif - Default{"127.3.2.1:53"}, + Default{"127.3.2.1"}, #else - Default{"127.0.0.1:53"}, + Default{"127.0.0.1"}, #endif }; - auto parse_addr_for_dns = [](const std::string& arg) { - std::optional addr = std::nullopt; - std::string_view arg_v{arg}, port; - std::string host; - uint16_t p{DEFAULT_DNS_PORT}; - - if (auto pos = arg_v.find(':'); pos != arg_v.npos) - { - host = arg_v.substr(0, pos); - port = arg_v.substr(pos + 1); - - if (not srouter::parse_int(port, p)) - log::info(logcat, "Failed to parse port in arg:{}, defaulting to DNS port 53", port); - - addr = quic::Address{host, p}; - } - - return addr; - }; - conf.define_option( "dns", - "upstream", + "listen", FullClientOnly, + DefaultDNSBind, MultiValue, Comment{ - "Upstream resolver(s) to use as fallback for non-Session Router addresses.", - "Multiple values accepted.", + "Address(es) on which to listen for DNS requests. This can either be an IP address", + "(to use the default DNS port 53) or an IP followed by `:port' to listen on a custom", + "port. To specify an IPv6 address, surround the address with '[' and ']'.", + "", + "This option can be specified multiple times to bind to multiple addresses.", + "", + "If this Session Router instance has no need to establish outbound connection (for example,", + "for a hidden service) then this can be set to an empty string to disable the DNS listener", + "entirely. WARNING: disabling this makes it impossible to make new outbound connections!", }, - [this, parse_addr_for_dns](std::string arg) { + [this](const std::string& arg) { if (not arg.empty()) - { - if (auto maybe_addr = parse_addr_for_dns(arg)) - _upstream_dns.push_back(std::move(*maybe_addr)); - else - log::warning(logcat, "Failed to parse upstream DNS resolver address:{}", arg); - } + _listen_addrs.push_back(quic::Address::parse(arg, DEFAULT_DNS_PORT)); }); - conf.define_option( - "dns", - "l3-intercept", - FullClientOnly, - Default{ - platform::is_windows or platform::is_android or (platform::is_macos and not platform::is_apple_sysex)}, - Comment{"Intercept all dns traffic (udp/53) going into our Session Router network interface " - "instead of binding a local udp socket"}, - assignment_acceptor(l3_intercept)); - conf.define_option( "dns", - "query-bind", + "upstream", FullClientOnly, -#if defined(_WIN32) - Default{"0.0.0.0:0"}, -#else - Hidden, -#endif + MultiValue, + std::array{ + Default{"9.9.9.9"}, Default{"149.112.112.112"}, Default{"[2620:fe::fe]"}, Default{"[2620:fe::9]"}}, Comment{ - "Address to bind to for sending upstream DNS requests.", + "Upstream resolver(s) to use as fallback for non-Session Router addresses.", + "Multiple values accepted. Can be set to empty to disable upstream DNS resolution", + "for advanced setups.", + "", + "If not specified, the default is to use Quad9 public DNS (https://quad9.net).", }, - [this, parse_addr_for_dns](std::string arg) { + [this](const std::string& arg) { if (not arg.empty()) - { - if (auto maybe_addr = parse_addr_for_dns(arg)) - _query_bind = std::move(*maybe_addr); - else - log::warning(logcat, "Failed to parse bind address for DNS queries:{}", arg); - } + _upstream_dns.push_back(quic::Address::parse(arg, DEFAULT_DNS_PORT)); }); conf.define_option( "dns", - "bind", + "unbound", FullClientOnly, - DefaultDNSBind, MultiValue, Comment{ - "Address to bind to for handling DNS requests.", + "This option can be used to supply custom options to libunbound, which is used", + "internally when DNS requests are made that are not for a .sesh/.snode address.", + "", + "To add a custom option specify this option with a value of `unbound-option-name: value`;", + "for example, to limit the maximum record cache time:", + " unbound=cache-max-ttl: 3600", + "Or to enable DNSSEC validation:", + " unbound=trust-anchor-file: /path/to/dns/root.key", + "", + "You can use this option multiple times to specify more unbound options.", + "", + "See https://unbound.docs.nlnetlabs.nl/en/latest/manpages/unbound.conf.html", + "for all supported unbound options.", }, - [this, parse_addr_for_dns](std::string arg) { - if (not arg.empty()) + [this](std::string option) { + auto pos = option.find(':'); + if (pos == std::string::npos) + throw std::invalid_argument{ + "Invalid unbound option '{}': options must be formatted as `option: value`"_format(option)}; + auto key = std::string_view{option}.substr(0, pos); + auto value = std::string_view{option}.substr(pos + 1); + + for (auto* s : {&key, &value}) { - if (auto maybe_addr = parse_addr_for_dns(arg)) - { - _bind_addrs.push_back(std::move(*maybe_addr)); - } - else - log::warning(logcat, "Failed to parse bind address for handling DNS requests:{}", arg); + while (s->starts_with(' ')) + s->remove_prefix(1); + while (s->ends_with(' ')) + s->remove_suffix(1); } + if (key.empty() || value.empty()) + throw std::invalid_argument{ + "Invalid unbound option '{}': key and/or value cannot be empty"_format(option)}; + + unbound_opts.emplace_back("{}:"_format(key), std::string{value}); }); conf.define_option( "dns", - "add-hosts", + "unbound-hosts", FullClientOnly, - Comment{"Add a hosts file to the dns resolver", "For use with client side dns filtering"}, + Default{std::filesystem::path{"SYSTEM"}}, + Comment{ + "Configures unbound to use the given `hosts' files when resolving addresses. Can be", + "used to add custom addresses or perform client-side DNS filtering. If omitted or set", + "to the string 'SYSTEM' then the system default (/etc/hosts, or WINDIR/etc/hosts on", + "Windows) will be used. Can be set to an empty string to not add any hosts file.", + }, [this, rel_base = params.default_data_dir](std::filesystem::path path) { if (path.empty()) return; - if (path.is_relative()) - path = rel_base / path; - if (not exists(path)) - throw std::invalid_argument{"cannot add hosts file {} as it does not exist"_format(path)}; - hostfiles.emplace_back(std::move(path)); + if (path != std::filesystem::path{"SYSTEM"}) + { + if (path.is_relative()) + path = rel_base / path; + if (not exists(path)) + throw std::invalid_argument{"cannot add hosts file {} as it does not exist"_format(path)}; + } + unbound_hosts = std::move(path); }); // Ignored option (used by the systemd service file to disable resolvconf configuration). @@ -1074,10 +1074,6 @@ namespace srouter "(This is not used directly by Session Router itself, but by the Session Router init scripts", "on systems which use resolveconf)", }); - - // forward the rest to libunbound - conf.add_undeclared_handler( - "dns", [this](auto, std::string_view key, std::string_view val) { extra_opts.emplace(key, val); }); } void LinksConfig::define_config_options(ConfigDefinition& conf, const ConfigGenParameters& params) diff --git a/src/config/config.hpp b/src/config/config.hpp index f88096718..61f81c486 100644 --- a/src/config/config.hpp +++ b/src/config/config.hpp @@ -221,16 +221,17 @@ namespace srouter struct DnsConfig { - bool l3_intercept{false}; - - std::vector hostfiles; - std::vector _upstream_dns; - quic::Address _default_dns{"9.9.9.10", DEFAULT_DNS_PORT}; - std::optional _query_bind; - std::vector _bind_addrs; + std::vector _listen_addrs; + + // {"name:", "value"} pairs that we pass through to unbound to configure upstream DNS + // requests: + std::vector> unbound_opts; - std::unordered_multimap extra_opts; + // Unbound config doesn't support specifying a hosts file for some reason but has to be done + // via a different call. We allow a magic "SYSTEM" value here to instruct unbound to use + // the system default (by passing nullptr). + std::optional unbound_hosts; void define_config_options(ConfigDefinition& conf, const ConfigGenParameters& params); }; diff --git a/src/dns/dns.hpp b/src/dns/dns.hpp deleted file mode 100644 index 0dae13dee..000000000 --- a/src/dns/dns.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -namespace srouter::dns -{ - constexpr uint16_t flags_QR = 1 << 15; - constexpr uint16_t flags_AA = 1 << 10; - constexpr uint16_t flags_TC = 1 << 9; - constexpr uint16_t flags_RD = 1 << 8; - constexpr uint16_t flags_RA = 1 << 7; - constexpr uint16_t flags_RCODENxDomain = 3; - constexpr uint16_t flags_RCODEServFail = 2; - constexpr uint16_t flags_RCODENoError = 0; - -} // namespace srouter::dns diff --git a/src/dns/encode.cpp b/src/dns/encode.cpp index 5318d72e8..eb5c3bccb 100644 --- a/src/dns/encode.cpp +++ b/src/dns/encode.cpp @@ -1,6 +1,8 @@ #include "encode.hpp" #include "address/address.hpp" +#include "util/logging.hpp" +#include "util/logging/buffer.hpp" #include "util/str.hpp" #include @@ -10,6 +12,8 @@ namespace srouter::dns { + static auto logcat = log::Cat("dns"); + std::optional extract_name(std::span& buf) { std::optional name; @@ -46,36 +50,125 @@ namespace srouter::dns return name; } - size_t encode_name(std::span buf, std::string_view name) + std::optional> extract_name_data(std::span& buf) + { + log::trace(logcat, "Extracting name data from: {}", buffer_printer{buf}); + auto* p = buf.data(); + auto* end = p + buf.size(); + while (true) + { + if (p == end) + return std::nullopt; + auto len = static_cast(*p++); + if (len > 63) + { + // This is a compressed name pointer, so we need this byte and the next one, and + // then that's it, we're done. + if (p == end) + return std::nullopt; + p++; + break; + } + + if (len == 0) + break; // Terminating null + + // Otherwise we have a length prefix: + if (p + len >= end) + return std::nullopt; + p += len; + } + + auto result = std::make_optional>(buf.subspan(0, p - buf.data())); + buf = buf.subspan(p - buf.data()); + return result; + } + + void encode_name(std::span& buf, std::string_view name, prev_names_t* prev_names, uint16_t* buf_offset) { - auto orig = buf.size(); if (name.size() && name.back() == '.') name.remove_suffix(1); - for (auto part : srouter::split(name, ".")) + assert((prev_names && buf_offset) || (!prev_names && !buf_offset)); + + // Look for a previously used suffix of this name. For instance, if we have a response + // consisting of: + // + // localhost.sesh IN CNAME mylongpubkey.sesh + // foo.mylongpubkey.sesh IN AAAA 1:2:3::4 + // + // then when we repeat the question itself (IN AAAA localhost.sesh) we echo that question + // back into the response as the 16 bytes: + // \x09localhost\x04sesh\x00 + // Suppose that this was written at location Z in the DNS message, this creates two + // pointable addresses: + // - "localhost.sesh" -> Z + // - "sesh" -> Z+10 + // + // Then we come to the answers, and for the first "localhost.sesh" value, we can simply + // write that as a single pointer [Z] (where the pointer is a 16-bit, big-endian value with + // the highest two bits set and the remaining 14 bits set to "Z"). + // + // Then we get to "mylongpubkey.sesh" and we can encode that as: + // + // \x34mylongpubkey[pointer to Z+10] + // + // This also creates a new pointable address: + // - "mylongpubkey.sesh" -> Y + // + // Then we come to foo.mylongpubkey.sesh and we can encode this as: + // + // - \x03foo[pointer to Y] + // + // i.e. we only need 6 bytes for this address instead of 1+3+1+52+1+4+1=63 bytes that we + // would need for the uncompressed version. + // + // Although this compression is optional, given how frequently we reuse long session router + // names (particularly for something like SRV records where a name can be repeated multiple + // times), and the DNS response size limit of 512 bytes, we implement that here. + + for (size_t pos = name.empty() ? std::string::npos : 0; pos != std::string_view::npos;) { + std::string_view check = name.substr(pos); + if (prev_names) + if (auto it = prev_names->find(check); it != prev_names->end()) + { + if (buf.size() < 2) + throw std::out_of_range{"Buffer too small"}; + uint16_t ptr = uint16_t{0b11000000'00000000} | it->second; + oxenc::write_host_as_big(ptr, buf.data()); + buf = buf.subspan(2); + *buf_offset += 2; + // A pointer is terminal (i.e. no nullptr to add), so we're done. + return; + } + + auto next = name.find('.', pos + 1); + auto part = next == std::string_view::npos ? check : name.substr(pos, next - pos); + size_t l = part.size(); if (l > 63 || l >= buf.size()) - return false; - buf.front() = static_cast(l); + throw std::out_of_range{"Buffer too small"}; + buf.front() = static_cast(l); // Length prefix std::memcpy(buf.data() + 1, part.data(), part.size()); + if (prev_names) + { + prev_names->emplace(std::string{check}, static_cast(*buf_offset)); + *buf_offset += 1 + part.size(); + } buf = buf.subspan(1 + part.size()); + + pos = next == std::string_view::npos ? next : next + 1; } + + // If we get here we wrote all the pieces without pointing at anything, so we need to append + // a null byte to terminate the name: if (buf.empty()) - return false; + throw std::out_of_range{"Buffer too small"}; buf.front() = std::byte{0}; buf = buf.subspan(1); - return orig - buf.size(); - } - - bool write_name_into(std::span& buf, std::string_view name) - { - if (auto s = encode_name(buf, name)) - { - buf = buf.subspan(s); - return true; - } - return false; + if (buf_offset) + ++*buf_offset; } std::optional> decode_ptr(std::string_view name) diff --git a/src/dns/encode.hpp b/src/dns/encode.hpp index e0d5b7e4a..1939d1b3a 100644 --- a/src/dns/encode.hpp +++ b/src/dns/encode.hpp @@ -6,45 +6,67 @@ #include #include +#include #include #include namespace srouter::dns { - /// Writes the encoded version of DNS name `name` into buf, and returns how many bytes of buf - /// were written. If buf is too small to store the encoded name, returns 0. - size_t encode_name(std::span buf, std::string_view name); + // Custom hasher to let us look up a string_view key in a string-keyed unordered map: + struct transparent_string_hash + { + using is_transparent = void; + [[nodiscard]] size_t operator()(std::string_view txt) const { return std::hash{}(txt); } + }; + + using prev_names_t = std::unordered_map>; + + /// Writes the encoded version of DNS name `name` into buf, mutating buf to eliminate the + /// written bytes. Throws if buf is too small to store the encoded name. + /// + /// prev_names contains pointer values relative to the start of the message, used for name + /// compression, and buf_offset contains the relative positive of the beginning of buf to the + /// start of the message. New names added here should be added into it so that later repeated + /// names (or name suffixes) can use compression. + /// + /// These should normally be provided so that answers can compress names by pointing back into + /// the question, but can be nullptr to disable tracking (such as when dealing with + /// pre-compressed name data). + void encode_name(std::span& buf, std::string_view name, prev_names_t* prev_names, uint16_t* buf_offset); - /// Same as encode_name, except that instead of returning the written size, on success it mutates the span - /// to drop the written prefix. Returns true (and prefix-drops the written part of the span) on success, - /// false on failure. Note that the failure case can still partially write into span. - bool write_name_into(std::span& buf, std::string_view name); + /// Extracts the bytes making up an encoded name from the buffer, returning them and shortening + /// buf by the extracted bytes. + std::optional> extract_name_data(std::span& buf); /// decode name from buffer, mutating the buffer to begin just past the extracted name. Return - /// nullopt (without mutating buf) on failure. + /// nullopt (without mutating buf) on failure. Does not currently support compressed names (but + /// those are not typically used in questions). std::optional extract_name(std::span& buf); /// Encodes an integer in big-endian order into the buffer, mutating the span to start just - /// after the written integer. Returns true on success, false if the span was too small. + /// after the written integer. Throws if buf is too small. Returns sizeof(T) (i.e. the amount + /// written into the buffer), for convenience. template - bool write_int_into(std::span& buf, T value) + size_t write_int_into(std::span& buf, T value) { if (buf.size() < sizeof(T)) - return false; + throw std::out_of_range{"Buffer too small"}; oxenc::write_host_as_big(value, buf.data()); buf = buf.subspan(sizeof(T)); - return true; + return sizeof(T); } - // Calls write_int_info multiple times with the given integers. Returns true (and modifies buf) - // if all success. If any fail then false is returned and buf is left unchanged. + // Calls write_int_info multiple times with the given integers. Throws if the buffer is too + // small. Returns the total size of the given integers (i.e. the number of bytes written to + // buf), for convenience. template - bool write_ints_into(std::span& buf, T... values) + size_t write_ints_into(std::span& buf, T... values) { - if (buf.size() < (0 + ... + sizeof(T))) - return false; + // NB: it's tempting to want to use `return (0 + ... + write_int_into())` here, but + // left-to-right evaluation of + operands isn't guaranteed, and that could put things into + // buf in the wrong order. With , as used here it is guaranteed (similarly to || or &&). ((void)write_int_into(buf, values), ...); - return true; + return (0 + ... + sizeof(T)); } /// Extracts a big-endian integer of the given type from the buffer, mutating the span to start @@ -72,23 +94,6 @@ namespace srouter::dns return true; } - // Takes some object T with an `size_t encode(buf)` function (such as various classes in this - // dns code) and attempts to call it with the given buffer. If it returns success (non-0) then - // this mutates `buf` to skip the written data and returns true; on failure it returns false. - template - bool encode_into(std::span& buf, const T& thing) - { - if (auto written = thing.encode(buf)) - { - buf = buf.subspan(written); - return true; - } - return false; - } - - // Writes encoded rr data into buf, mutating buf to point beyond the written data. Returns - // false (without mutating buf) if buf is too short; true on success. - bool write_rdata_into(std::span& buf, std::span rdata); // Extracts encoded rr data from buf, mutating buf to point beyond the extracted data. Returns // nullopt (without mutating buf) on error, the vector of decoded data on success. std::optional> extract_rdata(std::span& buf); diff --git a/src/dns/flags.hpp b/src/dns/flags.hpp new file mode 100644 index 000000000..a756b5a41 --- /dev/null +++ b/src/dns/flags.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace srouter::dns +{ + constexpr uint16_t flags_QR = 1 << 15; + constexpr uint16_t flags_AA = 1 << 10; + constexpr uint16_t flags_TC = 1 << 9; + constexpr uint16_t flags_RD = 1 << 8; + constexpr uint16_t flags_RA = 1 << 7; + constexpr uint16_t flags_AD = 1 << 5; + constexpr uint16_t flags_CD = 1 << 4; + + constexpr uint16_t flags_RCODE_mask = ~uint16_t{0b1111}; + + constexpr uint16_t RCODE_Refused = 5; + constexpr uint16_t RCODE_NxDomain = 3; + constexpr uint16_t RCODE_ServFail = 2; + constexpr uint16_t RCODE_FormErr = 1; + constexpr uint16_t RCODE_NoError = 0; + + inline constexpr uint16_t set_rcode(uint16_t flags, uint16_t rcode) { return (flags & flags_RCODE_mask) | rcode; } + +} // namespace srouter::dns diff --git a/src/dns/handler.cpp b/src/dns/handler.cpp new file mode 100644 index 000000000..19f36627a --- /dev/null +++ b/src/dns/handler.cpp @@ -0,0 +1,488 @@ + +#include "handler.hpp" + +#include "dns/rr.hpp" +#include "flags.hpp" +#include "message.hpp" +#include "nodedb.hpp" +#include "router/router.hpp" +#include "util/logging.hpp" +#include "util/logging/buffer.hpp" + +#include + +namespace srouter::dns +{ +#ifdef SROUTER_EMBEDDED_ONLY + static_assert(false, "dns::RequestHandler requires a full lokinet build!"); +#endif + + namespace + { + auto logcat = log::Cat("dns"); + + const auto random_snode = "random.{}"_format(RELAY_TLD); + + const auto localhost_ctld = "localhost.{}"_format(CLIENT_TLD); + const auto dot_localhost_ctld = ".localhost.{}"_format(CLIENT_TLD); + bool is_localhost(std::string_view qname) + { + return qname == "localhost.loki" or qname.ends_with(".localhost.loki") or qname == localhost_ctld + or qname.ends_with(dot_localhost_ctld); + } + + std::optional parse_rid(std::string_view b32rid) + { + auto rid = std::make_optional(); + if (not rid->from_base32z(b32rid)) + rid.reset(); + return rid; + } + + std::optional is_snode(std::string_view name) + { + if (name.ends_with(RELAY_DOT_TLD)) + name.remove_suffix(RELAY_DOT_TLD.size()); + else + return std::nullopt; + return parse_rid(name); + } + + template + std::optional try_making(Args&&... args) + { + try + { + return std::make_optional(std::forward(args)...); + } + catch (...) + { + return std::nullopt; + } + } + + } // namespace + + RequestHandler::RequestHandler(Router& router) : _router{router} + { + if (!_router.tun_endpoint()) + throw std::logic_error{"dns::RequestHandler requires a TUN endpoint"}; + + if (!_router.config().dns._upstream_dns.empty()) + _unbound.emplace(_router); + } + + void RequestHandler::operator()( + std::span request, const quic::Address& from, ReplyCallback reply, bool tcp) + { + std::span client_ip; + if (from.is_ipv4()) + client_ip = {reinterpret_cast(&from.in4().sin_addr.s_addr), 4}; + else + client_ip = {reinterpret_cast(from.in6().sin6_addr.s6_addr), 16}; + + auto maybe = Message::extract_question(request, _cookie_secret, client_ip); + if (not maybe) + { + log::warning(logcat, "Ignoring unparseable DNS request from {}", from); + return; + } + auto& msg = *maybe; + + if (msg.bad_extract) + { + reply(msg.encode(tcp)); + return; + } + + if (msg.additional_edns && msg.additional_edns->bad_cookie) + { + // Client gave a bad cookie; reply with a request failure, but one containing the new + // cookie so that the client can retry. + + // The lower 4 bits of the BADCOOKIE code go here; the upper 8 bits are in the OPT EDNS + // value. + msg.hdr_fields |= PRR_EDNS::EXT_RCODE_BADCOOKIE & 0b1111; + // TODO FIXME: we currently always set the RA flag but that really should only be set + // when we have an upstream DNS server. (This TODO is also in message.cpp) + msg.hdr_fields |= flags_QR | flags_RA; + // badcookie is not an authoritative answer: + msg.hdr_fields &= ~flags_AA; + + reply(msg.encode(tcp)); + return; + } + + // If there is no question then there is no answer to worry about. This is a bit weird, but + // is sometimes used by clients to get an initial DNS cookie (via EDNS) without making an + // actual request. + if (!msg.question) + { + reply(msg.encode(tcp)); + return; + } + + auto& q = *msg.question; + + if (handle_local(reply, msg, std::string{q.name()}, tcp)) + return; + + // we don't provide a DoH resolver because it requires verified TLS TLS needs X509/ASN.1-DER + // and opting into the Root CA Cabal thankfully mozilla added a backdoor that allows ISPs to + // turn it off so we disable DoH for firefox using mozilla's ISP backdoor see: + // https://github.com/oxen-io/lokinet/issues/832 + + // is this firefox looking for their backdoor record? + if (q.name() == "use-application-dns.net") + // yea it is, let's turn off DoH because god is dead. + return reply(msg.nxdomain().encode(tcp)); // press F to pay respects and send it back where it came from + + // Not for us, so forward to upstream handler + forward(std::move(msg), std::move(reply), tcp); + } + + bool RequestHandler::handle_local(ReplyCallback& reply, Message& msg, std::string qname, bool tcp) + { + // hook any PTR (reverse DNS) lookups for our local ranges + if (handle_local_ptr(msg, reply, tcp)) + return true; + + auto& q = *msg.question; + + if (!(q.has_tld(CLIENT_TLD) || q.has_tld(RELAY_TLD) || q.has_tld("loki"sv))) + return false; + + std::string hostname, tld; + std::vector sub; + { + auto nameparts = split(qname, "."); + if (nameparts.size() < 2) + { + log::warning(logcat, "bad DNS request, no TLD or hostname: {}", qname); + reply(msg.formerr().encode(tcp)); + return true; + } + hostname = nameparts[nameparts.size() - 2]; + tld = nameparts.back(); + sub.reserve(nameparts.size() - 2); + for (auto s : std::views::take(nameparts, static_cast(nameparts.size()) - 2)) + sub.emplace_back(s); + } + + bool localhost = is_localhost(qname); + + // localhost.sesh/localhost.loki is always a CNAME to our own pubkey, regardless of the + // question type. + if (localhost) + { + auto our_hostname = _router.id().to_string(); + auto our_tld = _router.is_service_node ? RELAY_TLD : CLIENT_TLD; + auto our_name = "{}.{}"_format(our_hostname, our_tld); + + if (tld == "loki") + { + // first: report a cname for the deprecated localhost.loki -> localhost.sesh + + msg.set_rr_name("localhost.loki"); + msg.add_cname_reply("localhost.{}"_format(our_tld)); + } + // report CNAME: localhost.sesh -> pubkey.sesh + msg.set_rr_name("localhost.{}"_format(our_tld)); + msg.add_cname_reply(our_name); + + if (q.qtype == dns::RRType::CNAME) + { + // If we were queried specifically for a cname, then we are done. + reply(msg.encode(tcp)); + return true; + } + + // Otherwise we continue processing to be able to return supplemental records through + // the cname, so that if you request "foo.localhost.loki" we end up returning: + // localhost.loki CNAME for localhost.sesh + // localhost.sesh CNAME for PUBKEY.sesh + // foo.PUBKEY.sesh IN X VALUE (or whatever) + // And so for for the rest of the answer processing that we were given PUBKEY.sesh, + // rather than localhost.loki/.sesh: + qname = sub.empty() ? our_name : "{}.{}"_format(fmt::join(sub, "."), our_name); + msg.set_rr_name(qname); + + tld = our_tld; + hostname = std::move(our_hostname); + } + else if (qname == random_snode) + { + // Similar to the localhost case: we first return a CNAME of random.snode -> + // SOMEPK.snode, then continue processing as if that was what you asked for. + + if (auto* rc = _router.node_db().get_random_rc()) + { + hostname = rc->router_id().to_string(); + qname = "{}.{}"_format(hostname, RELAY_TLD); + msg.add_cname_reply(qname, 1s); + if (q.qtype == dns::RRType::CNAME) + { + reply(msg.encode(tcp)); + return true; + } + + msg.set_rr_name(qname); + } + else + { + // We found no RC at all, which probably means our connection is dead. + reply(msg.nxdomain().encode(tcp)); + return true; + } + } + else if (tld == "loki" && hostname.size() != oxenc::to_base32z_size(RouterID::SIZE)) + { + // ONS lookup: initiate a lookup and, when we get the response, set up a CNAME of + // NAME.loki -> PUBKEY.sesh, then recurse to process other parts of the request (such as + // mapping to a AAAA). + + // TODO: .sesh SNS resolution, once implemented + + // ONS lookup: + auto lookup = "{}.loki"_format(hostname); + _router.session_endpoint().resolve_sns( + lookup, + [this, + lookup, + sub = std::move(sub), + reply = std::move(reply), + msg_ptr = std::make_shared(std::move(msg)), + cname_only = q.qtype == dns::RRType::CNAME, + tcp]( + std::optional maybe_netaddr, + bool /*assertive*/, + std::chrono::milliseconds ttl) mutable { + auto& msg = *msg_ptr; + msg.set_rr_name(lookup); + if (maybe_netaddr) + { + auto target = maybe_netaddr->to_string(); + msg.add_cname_reply(target, std::chrono::floor(ttl)); + if (cname_only) + return; + auto qname = sub.empty() ? target : "{}.{}"_format(fmt::join(sub, "."), target); + msg.set_rr_name(qname); + if (!handle_local(reply, msg, std::move(qname), tcp)) + { + log::warning( + logcat, "ONS '{}' subrequest did not properly handle sending a reply!", lookup); + return reply(msg.servfail().encode(tcp)); + } + return; + } + // TODO FIXME: if `assertive` is true then we can provide a TTL for this failure + // (via an SOA authority record). (When not assertive we shouldn't do so, + // because not having an SOA TTL means a downstream recursive resolver shouldn't + // cache the negative response). + reply(msg.nxdomain().encode(tcp)); + }); + return true; + } + + if (q.qtype == dns::RRType::TXT) + { + // TXT records can be used to query some basic info: + + // TXT on MYPUBKEY.sesh returns the basic version and netid: + if (localhost && sub.empty()) + msg.add_txt_reply("sessionrouter={} v={} netid={}"_format( + _router.is_service_node ? "relay" : "client", fmt::join(VERSION, "."), _router.netid())); + + // TXT on PUBKEY.snode gives back some basic RC info (if we have the RC) + else if (auto rid = is_snode(qname)) + { + if (auto* rc = _router.node_db().get_rc(*rid)) + { + msg.add_txt_reply("rc v={} i={} t={}"_format( + fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); + } + else + msg.nxdomain(); + } + + // TXT on path.PUBKEY.{sesh,snode} returns the current path info to that node, if a + // session is established (nxdomain if no active session). + else if (sub.size() == 1 && sub.front() == "path") + { + log::debug(logcat, "TXT path request for {}.{}", hostname, tld); + if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) + { + if (auto* s = _router.session_endpoint().get_session(*maybe_netaddr); s && s->is_established()) + { + auto path = s->current_path_info(); + msg.add_txt_reply( + "d={}; path={}; ttl={}; p={}; pj={}.{:03d}; pr={}; pt={}; pT={}"_format( + s->is_outbound ? "out" : "in", + fmt::join( + std::views::transform( + path.relays, [](const auto& r) { return "{}@{}"_format(r.first, r.second); }), + " "), + std::chrono::round(path.expiry - srouter::time_now_ms()).count(), + path.ping_mean.count(), + path.ping_jitter / 1ms, + (path.ping_jitter % 1ms).count(), + path.ping_responses, + path.ping_timeouts, + path.ping_recent_timeouts), + 0s); + } + else + msg.add_txt_reply("d=none"); + } + else + { + log::warning(logcat, "Failed to parse network address {}.{} for path query", hostname, tld); + msg.nxdomain(); + } + } + else + msg.nxdomain(); + reply(msg.encode(tcp)); + return true; + } + + // "Regular" A or AAAA lookups + if (bool aaaa = q.qtype == dns::RRType::AAAA; aaaa || q.qtype == dns::RRType::A) + { + // Attempt to parse a "pubkey.snode" or "pubkey.sesh": + if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) + { + // DNS lookup implies we want a session, so make one (NOP if we have one) + // This also means if we don't use that session the IP mapping will release when + // it expires, which it wouldn't otherwise without a tedious periodic check. + bool created_session = false; + try + { + created_session = (bool)_router.session_endpoint().initiate_remote_session(*maybe_netaddr, nullptr); + } + catch (const std::exception& e) + { + log::warning(logcat, "Failed to initiate remote session to {}: {}", *maybe_netaddr, e.what()); + } + if (created_session) + { + assert(_router.tun_endpoint()); + auto& tun = *_router.tun_endpoint(); + if (aaaa) + msg.add_reply(tun.map6(*maybe_netaddr)); + else if (!sub.empty() && sub.back() == "ipv4"sv) + { + // We don't map IPv4 addresses by default, but it is still possible to get + // one by requesting ipv4.somepubkey.sesh/snode (or a subdomain thereof). + if (auto v4_addr = tun.map4(*maybe_netaddr); v4_addr) + msg.add_reply(*v4_addr); + else + log::warning(logcat, "IPv4 mapping requested for {} failed.", *maybe_netaddr); + } + // else they requested A *not* using the magic ipv4 subdomain, so we only have + // AAAA to offer and thus we return a reply without an answer record (which is + // the proper DNS way to say "something exists at this address, but not with the + // type you requested requested", as opposed to this nx_reply below, which means + // "this record does not exist"). + } + else + msg.nxdomain(); + reply(msg.encode(tcp)); + + return true; + } + + log::warning(logcat, "DNS query failure: '{}' is not a valid Session Router name or address", qname); + reply(msg.encode(tcp)); + return true; + } + + if (q.qtype == dns::RRType::SRV && (tld == CLIENT_TLD || tld == "loki") && sub.size() == 2 + && sub[0].starts_with('_') && sub[1].starts_with('_')) + { + if (auto rid = parse_rid(hostname)) + { + _router.session_endpoint().lookup_client_intro( + *rid, + [msg = std::make_shared(std::move(msg)), sub, reply = std::move(reply), tcp]( + const std::optional& cc) mutable { + if (cc) + { + for (const auto& srv : cc->SRVs()) + if (srv.service == sub[0] && srv.proto == sub[1]) + msg->add_reply(srv); + } + else + msg->nxdomain(); + + reply(msg->encode(tcp)); + }); + return true; + } + } + + // If we got through everything above without answering then they requested something weird + // (unhandled RR type, perhaps) and so let's just give an NXDOMAIN back: + reply(msg.nxdomain().encode(tcp)); + return true; + } + + bool RequestHandler::handle_local_ptr(Message& msg, ReplyCallback& reply, bool tcp) + { + assert(msg.question); + if (msg.question->qtype != srouter::dns::RRType::PTR) + return false; + + auto ip = dns::decode_ptr(msg.question->qname); + if (!ip) + return false; + + auto [mapped, is_ours] = std::visit([this](const auto& ip) { return _router.reverse_lookup(ip); }, *ip); + if (!is_ours) + return false; + + if (mapped) + msg.add_ptr_reply(mapped->to_string()); + else + msg.nxdomain(); + + reply(msg.encode(tcp)); + + return true; + } + + void RequestHandler::forward(Message&& m, ReplyCallback&& reply, bool tcp) + { + if (!_unbound) + { + log::warning( + logcat, "DNS request received for non-Session Router domain, but no upstream DNS is configured!"); + reply(m.refused().encode(tcp)); + return; + } + + assert(m.question); + + _unbound->query( + std::string{m.question->name()}, + m.question->qtype, + m.question->qclass, + [orig = std::make_shared(m.clone()), reply = std::move(reply), tcp]( + std::span response) mutable { + if (response.empty()) + return reply(orig->servfail().encode(tcp)); + + auto msg = RawMessage::parse(response); + if (!msg) + { + log::warning(logcat, "Failed to parse unbound query response: {}", buffer_printer{response}); + return reply(orig->servfail().encode(tcp)); + } + + msg->rewrite_for(*orig); + + reply(msg->encode(tcp)); + }); + } + +} // namespace srouter::dns diff --git a/src/dns/handler.hpp b/src/dns/handler.hpp new file mode 100644 index 000000000..84458a5ad --- /dev/null +++ b/src/dns/handler.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include "message.hpp" +#include "unbound.hpp" + +#include +#include + +namespace srouter +{ + class Router; + namespace quic = oxen::quic; +} // namespace srouter + +namespace srouter::dns +{ + class RequestHandler + { + public: + using ReplyCallback = std::function response)>; + + explicit RequestHandler(Router& router); + + // Called when a request arrives to process the request; when the answer is ready, calls + // `reply()` with it. If tcp is true then we allow up to 64k for the response, otherwise + // the max size is dependent on the DNS message itself. + void operator()( + std::span request, const quic::Address& from, ReplyCallback reply, bool tcp = false); + + private: + // Secret value we use as a key in DNS server cookie hashing. We generate a random once on + // each startup as we currently have no need for this to be deterministic, and that + // regeneration also provides DNS cookie key rotation whenever we restart. + std::array _cookie_secret; + + Router& _router; + + // Our unbound object for handling upstream DNS requests. Normally present, but can be + // explicitly disable in the config. If unset, we return REFUSED if called upon to resolve + // something outside of Session Router domains. + std::optional _unbound; + + // Called to check if the request is for a local name (i.e. .sesh, .snode, .loki, or a PTR + // record for one of the addresses in our tun). If so, this handles the request and returns + // true; otherwise returns false. + bool handle_local(ReplyCallback& reply, Message& msg, std::string qname, bool tcp); + + // Checks for PTR for a range we own, and if so, replies and returns true. Returns false if + // not a PTR for us (i.e. the caller should continue processing). + bool handle_local_ptr(Message& m, ReplyCallback& reply, bool tcp); + + // Answers the question recursively via our configured upstream DNS servers (if any) + void forward(Message&& m, ReplyCallback&& reply, bool tcp); + }; + +} // namespace srouter::dns diff --git a/src/dns/listener.cpp b/src/dns/listener.cpp new file mode 100644 index 000000000..0c5861264 --- /dev/null +++ b/src/dns/listener.cpp @@ -0,0 +1,184 @@ +#include "listener.hpp" + +#include "router/router.hpp" +#include "util/logging.hpp" + +#include +#include + +namespace srouter::dns +{ + namespace + { + auto logcat = log::Cat("dns"); + + struct tcp_conn + { + Listener& listener; + bufferevent* bev; + quic::Address addr; + // This gets shared with the handler callback so that we can tell if the raw tcp_conn + // pointer is still valid: + std::shared_ptr alive = std::make_shared(true); + + tcp_conn(Listener& l, bufferevent* b, sockaddr* src, int socklen) + : listener{l}, bev{b}, addr{src, static_cast(socklen)} + {} + + void close() + { + bufferevent_free(bev); + bev = nullptr; + } + + ~tcp_conn() { *alive = false; } + }; + + } // namespace + + void Listener::evconnlistener_deleter::operator()(::evconnlistener* e) + { + if (e) + evconnlistener_free(e); + } + + Listener::Listener(Router& router, const quic::Address& bind) : _handler{router} { listen(router.loop, bind); } + + struct Listener::udp_socket_helper + { + std::unique_ptr sock; + }; + + // Defaulted, but here because the header doesn't have visibility into the predeclared unique_ptrs + Listener::~Listener() = default; + + void Listener::listen(quic::Loop& loop, const quic::Address& bind) + { + // call_get this so that we can be sure that the callbacks defined here can't be called + // before we are done setting it up: + loop.call_get([&] { + auto h = std::make_unique(); + + h->sock = std::make_unique( + loop.get_event_base(), bind, /*gso=*/false, [this, h = h.get()](quic::Packet&& pkt) { + if (pkt.path.remote == pkt.path.local) + { + log::warning(logcat, "DNS packet loop detected: ignoring UDP DNS request"); + return; + } + log::trace(logcat, "Incoming DNS UDP packet from {}", pkt.path.remote); + + // We don't need to worry about keep-alive here because we own the handler, and + // so if it's calling something then `this` must still be alive. + _handler( + pkt.data(), + pkt.path.remote, + [path = pkt.path, udp = h->sock.get()](std::span payload) { + const size_t sz = payload.size(); + udp->send(path, payload.data(), &sz, 0, 1); + }); + }); + last_port = h->sock->address().port(); + _udp.push_back(std::move(h)); + + _tcp.emplace_back(evconnlistener_new_bind( + loop.get_event_base(), + [](evconnlistener* listener, evutil_socket_t fd, sockaddr* src, int socklen, void* ctx) { + auto* bev = bufferevent_socket_new(evconnlistener_get_base(listener), fd, BEV_OPT_CLOSE_ON_FREE); + auto* c = new tcp_conn{*static_cast(ctx), bev, src, socklen}; + + log::trace(logcat, "Incoming DNS TCP connection from {}", c->addr); + + bufferevent_setcb( + bev, + [](bufferevent* bev, void* ctx) { + // read callback + auto* in = bufferevent_get_input(bev); + while (true) + { + log::trace(logcat, "Incoming DNS TCP data"); + uint16_t reqlen; + if (evbuffer_copyout(in, &reqlen, 2) < 2) + break; + oxenc::big_to_host_inplace(reqlen); + log::trace(logcat, "Incoming DNS TCP request of size {}", reqlen); + size_t pending = evbuffer_get_length(in) - 2; + if (pending < reqlen) + { + // We don't have enough of the request yet, so leave the buffer + // as-is: libevent won't call us again until more data arrives, + // and will just leave the current buffer data in place. + log::trace( + logcat, + "Not enough TCP data ({}) for request body ({}); delaying processing until we " + "get more", + pending, + reqlen); + break; + } + std::vector req; + req.resize(reqlen); + evbuffer_drain(in, 2); + evbuffer_remove(in, req.data(), reqlen); + log::trace(logcat, "Read {}-byte TCP DNS request", req.size()); + + auto* c = static_cast(ctx); + c->listener._handler( + req, + c->addr, + [c, alive = c->alive](std::span payload) { + if (!*alive) + return; + auto* out = bufferevent_get_output(c->bev); + // The only difference between UDP DNS and TCP DNS encoding is that + // UDP is per-packet, but TCP is a stream of messages where each + // message is prefixed with the length of the message: + uint16_t size = oxenc::host_to_big(static_cast(payload.size())); + if (evbuffer_add(out, &size, 2) == -1 + || evbuffer_add(out, payload.data(), payload.size()) == -1) + { + log::warning(logcat, "Failed to write response to TCP connection; closing"); + bufferevent_free(c->bev); + delete c; + } + }, + true); + } + }, + nullptr, + [](bufferevent* bev, short events, void* ctx) { + auto* c = static_cast(ctx); + // event callback + if (events & BEV_EVENT_EOF) + log::debug(logcat, "UDP TCP connection from {} closed by peer", c->addr); + if (events & BEV_EVENT_ERROR) + log::debug( + logcat, + "UDP TCP connection from {} closed by error: {}", + c->addr, + evutil_socket_error_to_string(EVUTIL_SOCKET_ERROR())); + if (events & BEV_EVENT_TIMEOUT) + // Is this even possible on a listening socket? + log::debug(logcat, "UDP TCP connection from {} timed out", c->addr); + + if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR | BEV_EVENT_TIMEOUT)) + { + bufferevent_free(bev); + delete c; + } + }, + c); + + bufferevent_enable(bev, EV_READ | EV_WRITE); + }, + this, + LEV_OPT_CLOSE_ON_FREE | LEV_OPT_REUSEABLE, + -1, + bind, + static_cast(bind.socklen()))); + + log::debug(logcat, "session-router DNS listening on {}", bind); + }); + } + +} // namespace srouter::dns diff --git a/src/dns/listener.hpp b/src/dns/listener.hpp new file mode 100644 index 000000000..a8967cdc3 --- /dev/null +++ b/src/dns/listener.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include "dns/handler.hpp" + +#include +#include + +#include +#include + +struct evconnlistener; + +namespace srouter +{ + class Router; +} +namespace srouter::dns +{ + namespace quic = oxen::quic; + + /// UDP+TCP listener for receiving and sending DNS requests. This generally works with a + /// dns::RequestHandler to actually generate the replies for a request, which then come back to + /// this class to actually send the response to the network. + class Listener + { + struct evconnlistener_deleter + { + void operator()(::evconnlistener* e); + }; + + struct udp_socket_helper; + + std::list> _udp; + std::list> _tcp; + + // The object that handles processing of the actual request once we have extracted it from a + // UDP packet or TCP stream: + RequestHandler _handler; + + public: + // Creates a TCP+UDP DNS listener that listens on `bind` for DNS requests. + Listener(Router& router, const quic::Address& bind); + + // Adds another TCP+UDP listener on `bind`. This is called implicitly during construction, + // but can also be called if there is a need to listen on multiple addresses. + void listen(quic::Loop& loop, const quic::Address& bind); + + // Set to the last port on which we set up a listener; this is mainly intended to be used + // when listening on an address with a 0 port which will *actually* listen on a high random + // port. + uint16_t last_port; + + ~Listener(); + }; + +} // namespace srouter::dns diff --git a/src/dns/message.cpp b/src/dns/message.cpp index ac699b738..0edf61d18 100644 --- a/src/dns/message.cpp +++ b/src/dns/message.cpp @@ -1,238 +1,572 @@ #include "message.hpp" -#include "dns.hpp" #include "encode.hpp" -#include "net/ip_packet.hpp" +#include "flags.hpp" #include "srv_data.hpp" #include "util/logging.hpp" +#include "util/logging/buffer.hpp" -#include #include +#include #include +#include +#include +#include namespace srouter::dns { static auto logcat = log::Cat("dns"); - Message::Message(const Question& question) : hdr_id{0}, hdr_fields{} { questions.push_back(question); } + Message::Message(Question question) : hdr_id{0}, hdr_fields{}, question{std::move(question)} {} - size_t Message::encode(std::span buf) const + Message Message::clone() const { - auto orig = buf.size(); - if (!write_ints_into( - buf, - hdr_id, - hdr_fields, - static_cast(questions.size()), - static_cast(answers.size()), - static_cast(authorities.size()), - static_cast(additional.size()))) - return 0; - - for (const auto& question : questions) - if (!encode_into(buf, question)) - return 0; - - for (auto& a : answers) - if (!encode_into(buf, a)) - return 0; - - return orig - buf.size(); + Message c; + c.hdr_id = hdr_id; + c.hdr_fields = hdr_fields; + c.question = question; + c.additional_edns = additional_edns; + // Don't copy answers, or rr_name_override (which is just an intermediate answers helper) + return c; } - std::optional Message::extract(std::span& buf) + std::vector Message::encode(bool max_size) const { - auto maybe = std::make_optional(); - auto& m = *maybe; - uint16_t qd_count, an_count, ns_count, ar_count; - if (!extract_ints(buf, m.hdr_id, m.hdr_fields, qd_count, an_count, ns_count, ar_count)) + std::vector tmp; + tmp.resize( + max_size ? std::numeric_limits::max() + : additional_edns ? additional_edns->max_payload() + : 512); + + prev_names_t prev_names; + std::span buf{tmp}; + uint16_t buf_offset = 0; + + buf_offset += write_ints_into( + buf, + hdr_id, + hdr_fields, + question ? uint16_t{1} : uint16_t{0}, + static_cast(answers.size()), + static_cast(0 /*authorities.size()*/), + static_cast(additional_edns ? 1 : 0 /*additional.size()*/)); + + if (question) + question->encode(buf, prev_names, buf_offset); + + // If we run out of space and have to truncate then we are still supposed to include the + // EDNS part of the additional response, but other answers don't have to be: so if we hit + // such a failure, we're back up to this point (throwing away all the answers) so that we + // can include the EDNS response info. + auto initial_len = buf_offset; + + try { - maybe.reset(); - return maybe; - } - m.questions.resize(qd_count); - m.answers.resize(an_count); - // Ignore these: - // m.authorities.resize(ns_count); - // m.additional.resize(ar_count); + for (auto& a : answers) + a->encode(buf, prev_names, buf_offset); - for (auto& q : m.questions) + if (additional_edns) + additional_edns->encode(buf, prev_names, buf_offset); + } + catch (const std::out_of_range&) { - if (!q.extract(buf)) + log::debug(logcat, "Response too large! Setting truncation bit"); + + oxenc::write_host_as_big(hdr_fields | flags_TC, tmp.data() + 2); + + // Reset our buffer position back to just after the questions were added. We do this + // even if we aren't going to add EDNS stuff below, because we are not supposed to + // include partial RR entries in a truncated reply. + buf = std::span{tmp.data() + initial_len, tmp.size() - initial_len}; + // Replace the answers count with a 0: + oxenc::write_host_as_big(0, tmp.data() + 2 + 2 + 2); + buf_offset = initial_len; + + if (additional_edns) { - log::debug(logcat, "failed to decode question"); - maybe.reset(); - return maybe; + try + { + additional_edns->encode(buf, prev_names, buf_offset); + } + catch (const std::out_of_range&) + { + // If this failed to then we don't have enough space for the EDNS so we'll just have to omit it + log::debug(logcat, "Unable to fit EDNS additional into DNS response!"); + buf = std::span{tmp.data() + initial_len, tmp.size() - initial_len}; + buf_offset = initial_len; + // Replace the additional count with a 0: + oxenc::write_host_as_big(0, tmp.data() + 2 + 2 + 2 + 2 + 2); + } } } - for (auto* as : {&m.answers, &m.authorities, &m.additional}) - if (!as->empty()) - log::debug(logcat, "Ignoring answer/authorities/additional sections in dns Message"); - return maybe; - } + // Trim the excess: + tmp.resize(tmp.size() - buf.size()); + tmp.shrink_to_fit(); - nlohmann::json Message::ToJSON() const - { - auto result = nlohmann::json{{"id", hdr_id}, {"fields", hdr_fields}}; - auto& ques = (result["questions"] = nlohmann::json::array()); - auto& ans = (result["answers"] = nlohmann::json::array()); - for (const auto& q : questions) - ques.push_back(q.ToJSON()); - for (const auto& a : answers) - ans.push_back(a.ToJSON()); - return result; + return tmp; } - std::vector Message::encode() const + static std::array make_server_cookie( + std::span client_cookie, + std::span client_ip, + std::span server_cookie_secret, + std::chrono::sys_seconds ts = std::chrono::floor(std::chrono::system_clock::now())) { - std::vector tmp; - tmp.resize(1500); - auto size = encode(tmp); - if (size == 0) - throw std::runtime_error("cannot encode dns message"); - tmp.resize(size); - return tmp; + assert(client_ip.size() == 4 || client_ip.size() == 16); + + static_assert(server_cookie_secret.size() == crypto_shorthash_siphash24_KEYBYTES); + + std::array cookie; + auto ccookie = std::span{cookie}.first<8>(); + auto scookie = std::span{cookie}.last<16>(); + std::memcpy(ccookie.data(), client_cookie.data(), 8); + + // The first 8 bytes of the server cookie (as per RFC 9018) are: + // - version (always 1) + // - three reserved bytes + // - 4-byte, uint32 unix timestamp + scookie[0] = std::byte{1}; // Version + scookie[1] = std::byte{0}; // - + scookie[2] = std::byte{0}; // - reserved + scookie[3] = std::byte{0}; // - + auto ts_val = static_cast(ts.time_since_epoch().count()); + oxenc::write_host_as_big(ts_val, &scookie[4]); + + // The last 8 bytes of the server cookie are a hash of 8-byte client + // cookie, then the above 8 bytes server cookie fields, then the + // 4- or 16-byte client IP (in network order notation). + std::array hash_data{{0}}; + std::memcpy(hash_data.data(), ccookie.data(), 8); + std::memcpy(hash_data.data() + 8, scookie.data(), 8); + std::memcpy(hash_data.data() + 16, client_ip.data(), client_ip.size()); + crypto_shorthash_siphash24( + reinterpret_cast(scookie.data() + 8), + hash_data.data(), + 16 + client_ip.size(), + reinterpret_cast(server_cookie_secret.data())); + + return cookie; } - void Message::add_serv_fail() + std::optional Message::extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_ip) { - if (questions.size()) + if (client_ip.size() != 4 && client_ip.size() != 16) + throw std::logic_error{"Invalid client IP for Message::extract_question"}; + auto result = std::make_optional(); + auto& m = *result; + uint16_t qd_count, an_count, ns_count, ar_count; + if (!extract_ints(buf, m.hdr_id, m.hdr_fields, qd_count, an_count, ns_count, ar_count)) { - hdr_fields |= flags_RCODEServFail; - // authorative response with recursion available - hdr_fields |= flags_QR | flags_AA | flags_RA; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; + result.reset(); + return result; } + if (qd_count > 1) + { + log::warning(logcat, "Ignoring archaic DNS request with {} > 1 questions", qd_count); + m.bad_extract = true; + return result; + } + // Ignore these: + // m.answers.resize(an_count); + // m.authorities.resize(ns_count); + // m.additional.resize(ar_count); + + try + { + if (qd_count) + { + auto& q = m.question.emplace(); + if (!q.extract(buf)) + throw std::invalid_argument{"invalid question"}; + } + + // Skip any answers or authority records: + for (uint16_t i = 0; i < an_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid answer RR"}; + for (uint16_t i = 0; i < ns_count; i++) + if (!ParsedRR::extract(buf)) + throw std::invalid_argument{"invalid authority RR"}; + + // In the additional section we look for an EDNS entry, and skip anything else: + for (uint16_t i = 0; i < ar_count; i++) + { + static_assert(crypto_shorthash_siphash24_KEYBYTES == 16); + auto a_rr = ParsedRR::extract(buf); + if (!a_rr) + throw std::invalid_argument{"invalid additional RR"}; + if (a_rr->name != "." || a_rr->rr_type != RRType::OPT) + { + continue; + } + + if (m.additional_edns) + throw std::invalid_argument{"found invalid multiple additional OPT records"}; + + auto max_payload = static_cast(a_rr->rr_class); + m.additional_edns.emplace(std::min(max_payload, 1232), a_rr->ttl); + + std::optional> cookie; + for (auto optbuf = a_rr->rdata; !optbuf.empty();) + { + if (optbuf.size() < 4) + throw std::invalid_argument{"additional OPT data section too small"}; + auto opt_code = oxenc::load_big_to_host(optbuf.data()); + auto opt_len = oxenc::load_big_to_host(optbuf.data() + 2); + optbuf = optbuf.subspan(4); + if (opt_len > optbuf.size()) + throw std::invalid_argument{"additional OPT option value length too small"}; + auto value = optbuf.subspan(0, opt_len); + optbuf = optbuf.subspan(opt_len); + + if (opt_code == PRR_EDNS::OPT_COOKIE) + { + if (m.additional_edns->cookie) + throw std::invalid_argument{"Duplicate OPT client cookies"}; + + if (value.size() == 8) + { + // This is the client sending a new cookie, requesting a new server + // cookie (i.e. because it doesn't currently have one). + + m.additional_edns->cookie = + make_server_cookie(value.first<8>(), client_ip, server_cookie_secret); + } + else if (value.size() == 24) + { + // This is the client sending its cookie along with a previously + // obtained server cookie for that client cookie, so we are supposed + // to validate it. + auto ccookie = value.first<8>(); + auto scookie = value.last<16>(); + + std::chrono::sys_seconds ts{ + std::chrono::seconds{oxenc::load_big_to_host(&scookie[4])}}; + + auto expected = make_server_cookie(ccookie, client_ip, server_cookie_secret, ts); + bool bad_cookie = std::memcmp(value.data(), expected.data(), 24) != 0; + + auto now = std::chrono::floor(std::chrono::system_clock::now()); + + if (!bad_cookie && ts >= now - 30min && ts <= now + 5min) + // Cookie is good and the timestamp in it is close to now, so the + // cookie stays as-is. + std::memcpy(m.additional_edns->cookie.emplace().data(), value.data(), 24); + + else + { + // If the cookie timestamp is too far away then it is a badcookie + // failure. (We don't have to worry about client clock skew because + // supposedly *we* issued this with the timestamp in it). + if (bad_cookie || ts < now - 1h || ts > now + 5min) + { + // When this is set we'll send a proper bad cookie response + // immediately after parsing: + m.additional_edns->bad_cookie = true; + // Extended rcode is, um, a wee bit hacky: we put the high 8 + // bits of the 12-bit error code into the OPT TTL field, and + // then continue to use the 4-bit RCODE for the bottom 4 bits. + m.additional_edns->ttl = + std::chrono::seconds{(uint32_t{PRR_EDNS::EXT_RCODE_BADCOOKIE} >> 4) << 24}; + // (The other bytes are all 0 values) + } + + // else it's valid, just a little bit (but not too) old and they are + // due for a new cookie. + + // In either of the above cases, we give the client a new cookie + // to use, with an updated new timestamp + m.additional_edns->cookie = + make_server_cookie(ccookie, client_ip, server_cookie_secret, now); + } + } + // Else we have an unparseable/non-understood cookie, and so we are supposed + // to ignore the option and discard the cookie data. + } + } + } + } + catch (const std::exception& e) + { + log::debug(logcat, "failed to parse DNS message: {}", e.what()); + m.bad_extract = true; + } + + return result; } + void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } + + // TODO FIXME: "RA" means we advertise that we support recursion, but we should only do that + // when we have an upstream DNS server available. (This TODO is also in server.cpp) static constexpr uint16_t reply_flags = flags_QR | flags_AA | flags_RA; - void Message::add_reply(ipv4 addr, std::chrono::seconds ttl) + void Message::add_nodata_reply() { - std::vector a; - a.resize(4); - oxenc::write_host_as_big(addr.addr, a.data()); - add_reply(RRClass::IN, RRType::A, std::move(a), ttl); + if (question) + hdr_fields |= reply_flags; } - void Message::add_reply(ipv6 addr, std::chrono::seconds ttl) + template RR, typename... Args> + void make_reply(Message& m, std::chrono::seconds ttl, Args&&... args) { - std::vector aaaa; - aaaa.resize(16); - oxenc::write_host_as_big(addr.hi, aaaa.data()); - oxenc::write_host_as_big(addr.lo, aaaa.data() + 8); - return add_reply(RRClass::IN, RRType::AAAA, std::move(aaaa), ttl); + if (!m.question) + return; + + m.hdr_fields |= reply_flags; + + m.answers.push_back(std::make_unique(std::string{m.get_rr_name()}, ttl, std::forward(args)...)); } - void Message::set_rr_name(std::optional name) { rr_name_override = std::move(name); } + void Message::add_reply(const ipv4& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } - void Message::add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl) - { - if (questions.empty()) - return; + void Message::add_reply(const ipv6& addr, std::chrono::seconds ttl) { make_reply(*this, ttl, addr); } - hdr_fields |= reply_flags; + void Message::add_cname_reply(std::string_view name, std::chrono::seconds ttl) + { + make_reply(*this, ttl, std::string{name}); + } - auto& ans = answers.emplace_back(); - ans.rr_name = get_rr_name(); - ans.rr_type = type; - ans.rr_class = cls; - ans.ttl = ttl; - ans.rData = std::move(data); + void Message::add_ptr_reply(std::string_view name, std::chrono::seconds ttl) + { + make_reply(*this, ttl, std::string{name}); } - void Message::add_nodata_reply() + void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) { make_reply(*this, ttl, srv); } + + void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) { make_reply(*this, ttl, txt); } + + Message&& Message::apply_rcode(uint16_t rcode, bool authoritative) { - if (not questions.empty()) + hdr_fields = set_rcode(hdr_fields, rcode); + if (question) + { hdr_fields |= reply_flags; + if (authoritative) + hdr_fields |= flags_AA; + else + hdr_fields &= ~flags_AA; + } + return std::move(*this); } - void Message::add_cname_reply(std::string_view name, std::chrono::seconds ttl) + Message&& Message::servfail() { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::CNAME, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode CNAME value {}", name); + answers.clear(); + return apply_rcode(RCODE_ServFail); } - void Message::add_ptr_reply(std::string_view name, std::chrono::seconds ttl) + Message&& Message::formerr() { - std::array tmp; - if (auto len = encode_name(tmp, name)) - add_reply(RRClass::IN, RRType::PTR, std::vector{tmp.data(), tmp.data() + len}, ttl); - else - log::error(logcat, "Failed to encode PTR value {}", name); + answers.clear(); + return apply_rcode(RCODE_FormErr); } - void Message::add_reply(const SRVData& srv, std::chrono::seconds ttl) + Message&& Message::refused() { - std::array tmp; - std::span remaining{tmp}; - if (!write_ints_into(remaining, srv.priority, srv.weight, srv.port)) - return; - if (!write_name_into(remaining, srv.target)) - return; - - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); + answers.clear(); + return apply_rcode(RCODE_Refused); } - void Message::add_txt_reply(std::string_view txt, std::chrono::seconds ttl) + Message&& Message::nxdomain(bool authoritative) { return apply_rcode(RCODE_NxDomain, authoritative); } + + std::optional RawMessage::parse(std::span buf) { - std::array tmp; - std::span remaining{tmp}; - while (!txt.empty()) + auto result = std::make_optional(); + auto& m = *result; + + uint16_t qd_count, an_count, ns_count, ar_count; + if (!extract_ints(buf, m.hdr_id, m.hdr_fields, qd_count, an_count, ns_count, ar_count)) + { + log::debug(logcat, "Failed to parse DNS header from raw message"); + return std::nullopt; + } + + m.questions.resize(qd_count); + m.answers.resize(an_count); + m.authorities.resize(ns_count); + m.additional.resize(ar_count); + + for (auto& q : m.questions) + q.extract(buf); + + for (auto* sect : {&m.answers, &m.authorities, &m.additional}) { - auto piecelen = std::min(txt.size(), size_t{255}); - if (remaining.size() <= piecelen) - throw std::length_error{"TXT record too big"}; - remaining.front() = static_cast(piecelen); - std::memcpy(remaining.data() + 1, txt.data(), piecelen); - txt.remove_prefix(piecelen); - remaining = remaining.subspan(1 + piecelen); + for (auto& rr : *sect) + { + auto name_bytes = extract_name_data(buf); + if (!name_bytes) + { + log::debug(logcat, "Failed to extract name data from raw message"); + return std::nullopt; + } + log::trace(logcat, "Extracted name bytes: {}", buffer_printer{*name_bytes}); + rr.name.assign(name_bytes->begin(), name_bytes->end()); + uint16_t typ, cls; + uint32_t ttl; + uint16_t rdlen; + if (!extract_ints(buf, typ, cls, ttl, rdlen)) + { + log::debug(logcat, "Failed to extract type/class/ttl/len"); + return std::nullopt; + } + rr.type = static_cast(typ); + rr.cls = static_cast(cls); + rr.ttl = std::chrono::seconds{ttl}; + if (buf.size() < rdlen) + { + log::debug(logcat, "Buffer is too short: {} remaining but rdlen={}", buf.size(), rdlen); + return std::nullopt; + } + rr.rdata.assign(buf.data(), buf.data() + rdlen); + buf = buf.subspan(rdlen); + } } - add_reply( - RRClass::IN, - RRType::SRV, - std::vector{tmp.data(), tmp.data() + tmp.size() - remaining.size()}, - ttl); + return result; } - void Message::add_nx_reply() + void RawMessage::rewrite_for(const Message& orig) { - if (questions.size()) + // We need to rewrite a few things here: + // - replace hdr_id + // - update/replace hdr_fields + // - AD should be preserved only if the client used EDNS and had the DO bit set, else + // cleared. + // - CD/RD should be copied from the original client message + // - Clear the TC flag. (We can set if, if needed, when encoding) + // - strip TSIG additional section, if present. + // - If the original request used EDNS, replace or append the OPT section in additional + // - Else strip the OPT from additional, if present. + + hdr_id = orig.hdr_id; + if (!orig.additional_edns || !orig.additional_edns->DO_bit()) + hdr_fields &= ~flags_AD; + hdr_fields &= ~(flags_CD | flags_RD | flags_TC); + hdr_fields |= orig.hdr_fields & flags_CD; + hdr_fields |= orig.hdr_fields & flags_RD; + + for (auto it = additional.begin(); it != additional.end();) { - answers.clear(); - authorities.clear(); - additional.clear(); - - // authorative response with recursion available - hdr_fields |= reply_flags; - // don't allow recursion on this request - hdr_fields &= ~flags_RD; - hdr_fields |= flags_RCODENxDomain; + if (it->type == RRType::OPT || it->type == RRType::TSIG) + it = additional.erase(it); + else + ++it; } + + additional_edns = orig.additional_edns; } - std::string Message::to_string() const + std::vector RawMessage::encode(bool max_size) const { - return fmt::format( - "[DNSMessage id={:x} fields={:x} questions={{{}}} answers={{{}}} authorities={{{}}} " - "additional={{{}}}]", + std::vector tmp; + tmp.resize( + max_size ? std::numeric_limits::max() + : additional_edns ? additional_edns->max_payload() + : 512); + + std::span buf{tmp}; + + std::optional edns; + if (additional_edns) + edns = additional_edns->to_raw(); + + write_ints_into( + buf, hdr_id, hdr_fields, - fmt::join(questions, ","), - fmt::join(answers, ","), - fmt::join(authorities, ","), - fmt::join(additional, ",")); + static_cast(questions.size()), + static_cast(answers.size()), + static_cast(authorities.size()), + static_cast(additional.size() + (edns ? 1 : 0))); + + size_t header_end = buf.data() - tmp.data(); + + bool truncate = false; + + for (auto& q : questions) + { + try + { + encode_name(buf, q.name(), nullptr, nullptr); + write_ints_into(buf, static_cast(q.qtype), static_cast(q.qclass)); + } + catch (const std::out_of_range&) + { + truncate = true; + break; + } + } + + if (truncate) + log::warning( + logcat, "Unexpected DNS error: can't find question into {}-byte response message?!", tmp.size()); + + // If we fail to write the later sections, we'll back up to here so that we can at least + // write the EDNS RR in the additional section: + size_t q_end = truncate ? 0 : buf.data() - tmp.data(); + + auto write_section = [&](std::span section) { + if (truncate) + return; + for (const auto& rr : section) + if (!rr.write_to(buf)) + { + truncate = true; + return; + } + }; + + for (auto* sect : {&answers, &authorities, &additional}) + if (!truncate) + write_section(*sect); + + if (!truncate && edns) + // Append the EDNS (OPT RR) to the end of additional; this *could* cause truncation + // which is why we need to do it here and then try again (under truncate) below. + write_section({&*edns, 1}); + + if (truncate) + { + // We couldn't fit the entire reply, so we need to: + // - set the TC (truncate) bit in the header flags + oxenc::write_host_as_big(hdr_fields | flags_TC, tmp.data() + 2); + + // - throw away any answers/authorities/additionals by backing up to the end of the + // question section. + buf = std::span{tmp.data() + q_end, tmp.size() - q_end}; + + // - If we couldn't even write the question (which is very strange) then reset the + // question count to 0 and reset the buffer even further back to the end of the + // header: + if (q_end == 0) [[unlikely]] + { + buf = std::span{tmp.data() + header_end, tmp.size() - header_end}; + oxenc::write_host_as_big(uint16_t{0}, tmp.data() + 4); // question count + } + + // - Set the answers, authorities counts to 0 + oxenc::write_host_as_big(uint16_t{0}, tmp.data() + 6); // answer count + oxenc::write_host_as_big(uint16_t{0}, tmp.data() + 8); // authority count + + // - Set the additional count to 1 if we have EDNS info, 0 otherwise. + oxenc::write_host_as_big(edns ? uint16_t{1} : uint16_t{0}, tmp.data() + 10); // additional count + + // - Write the EDNS (OPT) RR for the additional section + // - If *this* fails to write then also reset additional to 0 + if (edns && !edns->write_to(buf)) + oxenc::write_host_as_big(uint16_t{0}, tmp.data() + 10); // additional count + } + + // Trim the excess: + tmp.resize(tmp.size() - buf.size()); + tmp.shrink_to_fit(); + return tmp; } } // namespace srouter::dns diff --git a/src/dns/message.hpp b/src/dns/message.hpp index b12369091..0b0cd5471 100644 --- a/src/dns/message.hpp +++ b/src/dns/message.hpp @@ -4,8 +4,6 @@ #include "question.hpp" #include "rr.hpp" -#include - #include namespace srouter @@ -16,18 +14,43 @@ namespace srouter { struct SRVData; + // Class representing a DNS question and response as returned by Session Router for local + // Session Router results (e.g. querying .sesh addresses). struct Message { Message() = default; - explicit Message(const Question& question); + explicit Message(Question question); - nlohmann::json ToJSON() const; + // Non-copyable; see clone() if you want a copy with just the question. + Message(const Message&) = delete; - static constexpr auto DEFAULT_ANSWER_TTL = 10s; + Message(Message&&) = default; + + // Clones the message with question/flags/edns response data, but with no answers + Message clone() const; - void add_nx_reply(); + static constexpr auto DEFAULT_ANSWER_TTL = 10s; - void add_serv_fail(); + // These two methods mutates the message into a SERVFAIL/FORMERR/REFUSED response code, + // clearing all answers. These return an value reference to the object itself to allow + // the call to operator like an implicit `std::move()` call as this is typically a final + // operation; in particular this means: `f(msg.servfail());` is equivalent to + // `msg.servfail(); f(std::move(msg));`. + Message&& servfail(); + Message&& formerr(); + Message&& refused(); + + // Mutate message into a NXDOMAIN but without clearing existing answers. Returns an + // rvalue reference to the current object to allow the result to be easily moved away. + // + // The message with include the authoritative flag (AA) if the argument is omitted (or + // true), and omit it if false. + Message&& nxdomain(bool authoritative = true); + + // This clears any answers and sets the appropriate header flags for a BADCOOKIE + // response. Note that this is only valid when the message has `additional_edns` as + // part of this error code value is carried in that additional RR data. + void set_badcookie_flags(); // Sets the RR name for future added entries, or resets it to default with nullopt. The // default (if not called or reset) is to use the question's name value. Once set, the @@ -35,7 +58,7 @@ namespace srouter void set_rr_name(std::optional name); std::string_view get_rr_name() const { - return rr_name_override ? *rr_name_override : questions.size() ? questions.front().qname : ""sv; + return rr_name_override ? *rr_name_override : question ? question->qname : ""sv; } void add_nodata_reply(); @@ -43,9 +66,9 @@ namespace srouter void add_cname_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN A' reply containing the given ipv4 address - void add_reply(ipv4 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv4& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); // Adds an 'IN AAAA' reply containing the given ipv6 address - void add_reply(ipv6 addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); + void add_reply(const ipv6& addr, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); void add_reply(const SRVData& srv, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); @@ -53,23 +76,78 @@ namespace srouter void add_ptr_reply(std::string_view name, std::chrono::seconds ttl = DEFAULT_ANSWER_TTL); - size_t encode(std::span buf) const; - std::vector encode() const; - - static std::optional extract(std::span& buf); + // Encodes a response. If max_size is true then we allow up to 65535 bytes for the + // response, otherwise we allow either the EDNS max payload (up to 1232), or 512 + // (without EDNS in the query). + std::vector encode(bool max_size = false) const; + + // Parses a question Message from the given buf, removing the question from the prefix + // of buf. `server_cookie_secret` and `client_addr` contains information needed for DNS + // cookie handling; `server_cookie_secret` is something derived from the SR private key + // seed + startup time, while client_addr is the raw bytes of the IP address (4 or 16 + // bytes for IPv4/IPv6, respectively). + // + // Returns nullopt if the request cannot be parsed at all; returns a Message with + // `bad_extract` set to true if it was parseable but not valid and should be immediately + // replied to with an error (which will already be set up in the returned Message + // object). + static std::optional extract_question( + std::span& buf, + std::span server_cookie_secret, + std::span client_addr); + + // See extract_question, above. + bool bad_extract{false}; std::string to_string() const; uint16_t hdr_id; uint16_t hdr_fields; - std::vector questions; - std::vector answers; - std::vector authorities; - std::vector additional; + + std::optional question; + std::vector> answers; + + // Currently unused: + // std::vector authorities; + // std::vector additional; + + // Currently the only additional record we do anything with is the OPT section for + // enabling EDNS (most significantly for allowing large DNS packets) + std::optional additional_edns; + std::optional rr_name_override; private: void add_reply(RRClass cls, RRType type, std::vector data, std::chrono::seconds ttl); + + Message&& apply_rcode(uint16_t rcode, bool authoritative = false); + }; + + // Somewhat similar to the above, but only designed for passing through a message (with + // a few required modifications) rather than building one. + struct RawMessage + { + uint16_t hdr_id; + uint16_t hdr_fields; + std::vector questions; + std::vector answers; + std::vector authorities; + std::vector additional; + + /// Parses a DNS message; returns nullopt if unparseable. Unlike Message, this parsing + /// only performs a raw parsing (i.e. there is no interpretation of values). + static std::optional parse(std::span msg); + + // Does some minor rewriting of the raw message according to the given Message that lead + // to the query. This includes updating the header id to match, updating fields to + // match the request, and removing EDNS or TSIG additional value. If the original + // message has an additional_edns value, it is copied into this object's additional_edns + // to be appended during encoding. + void rewrite_for(const Message& orig); + + std::optional additional_edns; + + std::vector encode(bool max_size = false) const; }; } // namespace dns diff --git a/src/dns/question.cpp b/src/dns/question.cpp index 0473e6243..03a63db96 100644 --- a/src/dns/question.cpp +++ b/src/dns/question.cpp @@ -1,7 +1,6 @@ #include "question.hpp" #include "address/address.hpp" -#include "dns.hpp" #include "encode.hpp" #include "util/logging.hpp" #include "util/logging/buffer.hpp" @@ -19,14 +18,10 @@ namespace srouter::dns throw std::invalid_argument{"qname cannot be empty"}; } - size_t Question::encode(std::span buf) const + void Question::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const { - auto orig = buf; - if (!write_name_into(buf, qname)) - return 0; - if (!write_ints_into(buf, static_cast(qtype), static_cast(qclass))) - return 0; - return orig.size() - buf.size(); + encode_name(buf, qname, &prev_names, &buf_offset); + buf_offset += write_ints_into(buf, static_cast(qtype), static_cast(qclass)); } bool Question::extract(std::span& buf) diff --git a/src/dns/question.hpp b/src/dns/question.hpp index 1647b6a81..4efc35b5d 100644 --- a/src/dns/question.hpp +++ b/src/dns/question.hpp @@ -11,7 +11,7 @@ namespace srouter::dns Question() = default; Question(std::string name, RRType type); - size_t encode(std::span buf) const; + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; bool extract(std::span& buf); diff --git a/src/dns/rr.cpp b/src/dns/rr.cpp index fcd1070a8..f9d8e2a2c 100644 --- a/src/dns/rr.cpp +++ b/src/dns/rr.cpp @@ -1,45 +1,184 @@ #include "rr.hpp" -#include "dns.hpp" #include "encode.hpp" #include #include +#include +#include namespace srouter::dns { - ResourceRecord::ResourceRecord(std::string name, RRType type, std::vector data) - : rr_name{std::move(name)}, rr_type{type}, rr_class{RRClass::IN}, ttl{1s}, rData{std::move(data)} - {} + std::optional ParsedRR::extract(std::span& buf) + { + auto name = extract_name(buf); + if (!name || buf.size() < 2 + 2 + 4 + 2 /* type + class + ttl + rdatalen */) + return std::nullopt; + auto typ = oxenc::load_big_to_host(buf.data()); + auto cls = oxenc::load_big_to_host(buf.data() + 2); + auto ttl = oxenc::load_big_to_host(buf.data() + 4); + auto len = oxenc::load_big_to_host(buf.data() + 8); + buf = buf.subspan(10); + + if (buf.size() < len) + return std::nullopt; + + auto rdata = buf.subspan(0, len); + buf = buf.subspan(len); + + return ParsedRR{ + .name = std::move(*name), + .rr_type = static_cast(typ), + .rr_class = static_cast(cls), + .ttl = std::chrono::seconds{ttl}, + .rdata = rdata}; + } - size_t ResourceRecord::encode(std::span buf) const + static void check_buf_size(const std::span& buf, size_t needed) { - auto orig = buf.size(); - if (write_name_into(buf, rr_name) - && write_ints_into( - buf, - static_cast(rr_type), - static_cast(rr_class), - static_cast(ttl.count())) - && write_rdata_into(buf, rData)) - return orig - buf.size(); - return 0; + if (buf.size() < needed) + throw std::out_of_range{"DNS response exceeds max size"}; + } + + void ResourceRecord::encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + encode_name(buf, rr_name, &prev_names, &buf_offset); + buf_offset += write_ints_into( + buf, static_cast(rr_type()), static_cast(rr_class), static_cast(ttl.count())); + // The RR value is in a chunk with a 2-byte length in front of it. We don't actually know + // the length yet (especially for things like CNAME, where there might be name compression), + // so we're going to stick a 0 in and then come back and fill it in after we write the + // value. + check_buf_size(buf, 2); + auto size_buf = buf.subspan(0, 2); + buf_offset += 2; + buf = buf.subspan(2); + encode_data(buf, prev_names, buf_offset); + uint16_t size = buf.data() - size_buf.data() - 2; + oxenc::write_host_as_big(size, size_buf.data()); } nlohmann::json ResourceRecord::ToJSON() const { return nlohmann::json{ {"name", rr_name}, - {"type", static_cast(rr_type)}, + {"type", static_cast(rr_type())}, {"class", static_cast(rr_class)}, {"ttl", ttl.count()}, - {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}}; + /* FIXME: need to virtualize a display for the data, if we care about json representation: + {"rdata", std::string{reinterpret_cast(rData.data()), rData.size()}}*/}; } std::string ResourceRecord::to_string() const { - return "RR:[ name:{} | type:{} | class:{} | ttl:{} | rdata-size:{} ]"_format( - rr_name, static_cast(rr_type), static_cast(rr_class), ttl, rData.size()); + return "RR:[name:{}|type:{}|class:{}|ttl:{}]"_format( + rr_name, static_cast(rr_type()), static_cast(rr_class), ttl); + } + + void RR_bytes::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + check_buf_size(buf, rData.size()); + std::memcpy(buf.data(), rData.data(), rData.size()); + buf = buf.subspan(rData.size()); + buf_offset += rData.size(); + } + + RR_A::RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr) : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(4); + oxenc::write_host_as_big(addr.addr, rData.data()); + } + + RR_AAAA::RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr) + : RR_bytes{std::move(rr_name), ttl} + { + rData.resize(16); + oxenc::write_host_as_big(addr.hi, rData.data()); + oxenc::write_host_as_big(addr.lo, rData.data() + 8); + } + + RR_TXT::RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value) + : RR_bytes{std::move(rr_name), ttl} + { + // TXT records get encoded as {SZ}{data} where SZ is one byte indicating the length of data, + // however they can be repeated which is why we have SZ twice: once for the rData, but then + // again internally for multiple possible strings within the answer. + // + // In total that means we are storing the value length plus 1 additional byte per 255 length + // (or part thereof): + rData.reserve(value.size() + (value.size() + 254) / 255); + do + { + auto* bytes = reinterpret_cast(value.data()); + size_t size = std::min(255, value.size()); + rData.push_back(static_cast(size)); + rData.insert(rData.end(), bytes, bytes + size); + value.remove_prefix(size); + } while (!value.empty()); + } + + void RR_target::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + encode_name(buf, name, &prev_names, &buf_offset); + } + + void RR_SRV::encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const + { + buf_offset += write_ints_into(buf, priority, weight, port); + encode_name(buf, target, &prev_names, &buf_offset); + } + + void PRR_EDNS::encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const + { + // NB: if we update this to write more than just the cookie, to_raw() below also needs to + // get updated. + if (cookie) + { + uint16_t datalen = 2 + 2 + cookie->size(); // code + length + data + check_buf_size(buf, datalen); + oxenc::write_host_as_big(OPT_COOKIE, buf.data()); + oxenc::write_host_as_big(static_cast(cookie->size()), buf.data() + 2); + std::memcpy(buf.data() + 4, cookie->data(), cookie->size()); + buf = buf.subspan(datalen); + buf_offset += datalen; + } + } + + RawRR PRR_EDNS::to_raw() const + { + RawRR raw; + raw.type = rr_type(); + raw.cls = rr_class; + raw.ttl = ttl; + raw.name.resize(1, std::byte{0}); // Encoded empty name (i.e. root domain) + if (cookie) + { + raw.rdata.resize(2 + 2 + cookie->size()); + std::span buf{raw.rdata}; + prev_names_t ignored; + [[maybe_unused]] uint16_t ignored2 = 0; + encode_data(buf, ignored, ignored2); + } + return raw; + } + + bool RawRR::write_to(std::span& buf) const + { + // NAME + TYPE + CLASS + TTL + RDLENGTH + RDATA + auto needed = name.size() + 2 + 2 + 4 + 2 + rdata.size(); + if (needed > buf.size()) + return false; + std::memcpy(buf.data(), name.data(), name.size()); + buf = buf.subspan(name.size()); + write_ints_into( + buf, + static_cast(type), + static_cast(cls), + static_cast(ttl.count()), + static_cast(rdata.size())); + std::memcpy(buf.data(), rdata.data(), rdata.size()); + buf = buf.subspan(rdata.size()); + return true; } } // namespace srouter::dns diff --git a/src/dns/rr.hpp b/src/dns/rr.hpp index 798a624a4..eea0aba45 100644 --- a/src/dns/rr.hpp +++ b/src/dns/rr.hpp @@ -1,5 +1,8 @@ #pragma once +#include "encode.hpp" +#include "srv_data.hpp" + #include #include @@ -15,34 +18,198 @@ namespace srouter::dns enum class RRType : uint16_t { A = 1, - NS = 2, CNAME = 5, PTR = 12, - MX = 15, TXT = 16, AAAA = 28, SRV = 33, + + OPT = 41, + TSIG = 250, + }; + + // Parsed RR data: this is intentionally very raw and is only for extracting the data, not + // interpreting it. Note that the rdata value points into the input buf: the ParsedRR data + // should not be held longer than the input buffer! + struct ParsedRR + { + std::string name; + RRType rr_type; // *Not* necessarily one of the values defined above + RRClass rr_class; // *Not* necessarily one of the values defined above + std::chrono::seconds ttl; + std::span rdata; + + // Attempts to parse an RR from the beginning of `buf`. `buf` will have the prefix removed + // containing the extracted record. Returns nullopt on extraction error. + static std::optional extract(std::span& buf); }; + // Unparsed RR data: this is used by RawMessage to hold the basic raw data of an RR, but without + // decoding non-integer binary values. That is, the NAME and RDATA are encoded exactly as + // provided (and so may have name compression pointers in them). This is designed so that it + // can be re-encoded in a byte-exact way (to avoid breaking compressed name values that may be + // in this or later RRs). + struct RawRR + { + std::vector name; + RRType type; + RRClass cls; + std::chrono::seconds ttl; + std::vector rdata; + + // Writes this RR data into `buf`, removing the written prefix from buf and returns true. + // If buf does not have enough room for the entire record then nothing is written, buf is + // not modified, and false is returned. + bool write_to(std::span& buf) const; + }; + + // Abstract base class we use for building RR responses struct ResourceRecord { - ResourceRecord() = default; - explicit ResourceRecord(std::string name, RRType type, std::vector rdata); + ResourceRecord(std::string rr_name, std::chrono::seconds ttl) : rr_name{std::move(rr_name)}, ttl{ttl} {} + + virtual ~ResourceRecord() = default; + + // Writes this RR to the beginning of buf, eliminating the written section from buf. Throws if buf is exceeded. + // + // This takes care of the basic stuff (name, type, class, ttl), then calls the virtual + // encode_data() to write the value. + void encode(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const; - // Writes this RR to the beginning of buf. Returns the number of bytes written, or 0 if the - // buffer is too small to hold it. - size_t encode(std::span buf) const; + virtual void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const = 0; nlohmann::json ToJSON() const; std::string to_string() const; std::string rr_name; - RRType rr_type; - RRClass rr_class; + RRClass rr_class = RRClass::IN; std::chrono::seconds ttl; - std::vector rData; + + virtual RRType rr_type() const = 0; static constexpr bool to_string_formattable = true; }; + + // Subclass of ResourceRecord that just has a binary check of data. Should not be used for data + // types containing compressible names in the value. The subclass must take care of encoding + // the rData member value as required; this base class encode_data simply barfs it into the + // buffer as-is. + struct RR_bytes : ResourceRecord + { + std::vector rData; + + using ResourceRecord::ResourceRecord; + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_A : RR_bytes + { + RR_A(std::string rr_name, std::chrono::seconds ttl, const ipv4& addr); + RRType rr_type() const override { return RRType::A; } + }; + struct RR_AAAA : RR_bytes + { + RR_AAAA(std::string rr_name, std::chrono::seconds ttl, const ipv6& addr); + RRType rr_type() const override { return RRType::AAAA; } + }; + struct RR_TXT : RR_bytes + { + RR_TXT(std::string rr_name, std::chrono::seconds ttl, std::string_view value); + RRType rr_type() const override { return RRType::TXT; } + }; + + // Base class for RR types that have a single target name as the value, such as CNAME and PTR + struct RR_target : ResourceRecord + { + std::string name; + + RR_target(std::string rr_name, std::chrono::seconds ttl, std::string name) + : ResourceRecord{std::move(rr_name), ttl}, name{std::move(name)} + {} + + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + struct RR_PTR : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::A; } + }; + struct RR_CNAME : RR_target + { + using RR_target::RR_target; + RRType rr_type() const override { return RRType::CNAME; } + }; + struct RR_SRV : ResourceRecord + { + uint16_t priority; + uint16_t weight; + uint16_t port; + std::string target; + + RR_SRV(std::string rr_name, std::chrono::seconds ttl, const SRVData& srv) + : ResourceRecord{std::move(rr_name), ttl}, + priority{srv.priority}, + weight{srv.weight}, + port{srv.port}, + target{srv.target} + {} + + RRType rr_type() const override { return RRType::SRV; } + void encode_data(std::span& buf, prev_names_t& prev_names, uint16_t& buf_offset) const override; + }; + + // Psuedo-RR for EDNS; a client sends this in the additional section if it supports EDNS, and + // the server sends it back (if provided) to confirm that the server also supports EDNS. + struct PRR_EDNS : ResourceRecord + { + static constexpr uint16_t OPT_COOKIE = 10; + static constexpr uint16_t EXT_RCODE_BADCOOKIE = 23; + + static constexpr uint32_t DO_BIT = 1 << 15; + + std::optional> cookie; + + // Will be true if the full cookie we were provided was invalid or expired, in which case we + // are supposed to immediately fail with an extended BADCOOKIE error code (which will be + // encoded if this object is encoded into the output with this bool set to true). + bool bad_cookie{false}; + + // Constructs an EDNS value. This is rather hacky, to try to mash it into the fairly + // inflexible older DNS protocol: + // - NAME is always empty (i.e. ".", the root domain) + // - 32-bit TTL is nothing to do with ttl, but actually 3 packed fields: + // - 8-bit "extended rcode" + // - 8-bit version (currently 0) + // - 16-bit flags of which there is one for DNSSEC and all others are reserved + // - CLASS isn't a class at all but rather contains the supported UDP payload size. We set + // it to the recommended 1232 size, but if a client gave us a smaller value we should + // reflect that instead. + // + // Beyond that, we support an optional DNS server cookie value (see RFC 7873 and 9018), + // which must be the 8-byte cookie sent by the client followed by a 16 byte server cookie. + PRR_EDNS( + uint16_t max_payload, + std::chrono::seconds pttl, + std::optional> cookie = std::nullopt) + : ResourceRecord{"", 0s}, cookie{std::move(cookie)} + { + // If the psuedo-ttl has the DO bit set then preserve that bit; otherwise we ignore + // anything in the pseudo-ttl (leaving it at 0): + if (pttl.count() & DO_BIT) + ttl = std::chrono::seconds{DO_BIT}; + rr_class = static_cast(max_payload); + } + + bool DO_bit() const { return ttl.count() & DO_BIT; } + + uint16_t max_payload() const { return static_cast(rr_class); } + constexpr RRType rr_type() const override { return RRType::OPT; } + void encode_data(std::span& buf, prev_names_t&, uint16_t& buf_offset) const override; + + RawRR to_raw() const; + }; + } // namespace srouter::dns diff --git a/src/dns/server.cpp b/src/dns/server.cpp deleted file mode 100644 index 2395538fb..000000000 --- a/src/dns/server.cpp +++ /dev/null @@ -1,705 +0,0 @@ -#include "server.hpp" - -#include "constants/apple.hpp" -#include "constants/platform.hpp" -#include "message.hpp" -#include "nm_platform.hpp" -#include "sd_platform.hpp" - -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace srouter::dns -{ - static auto logcat = log::Cat("dns"); - - void QueryJob_Base::cancel() - { - Message reply{_query}; - reply.add_serv_fail(); - send_reply(reply.encode()); - } - - /// sucks up udp packets from a bound socket and feeds it to a server - class UDPReader : public PacketSource, public std::enable_shared_from_this - { - Server& _dns; - std::unique_ptr _udp; - quic::Address _local_addr; - - public: - explicit UDPReader(Server& dns, quic::Loop& loop, quic::Address bind) : _dns{dns} - { - _udp = std::make_unique( - loop.get_event_base(), bind, /*gso=*/false, [this](quic::Packet&& pkt) { - auto& src = pkt.path.remote; // "remote" address is packet source, we ("local") are destination - if (src == _local_addr) - { - log::debug(logcat, "DNS packet received, not handling because we're the packet source", src); - return; - } - - if (not _dns.maybe_handle_payload(shared_from_this(), _local_addr, src, pkt.data())) - log::warning(logcat, "did not handle dns packet from {} to {}", src, _local_addr); - else - log::trace(logcat, "Handled DNS packet from {} to {}", src, _local_addr); - }); - - if (auto maybe_addr = bound_on()) - { - _local_addr = *maybe_addr; - log::debug(logcat, "session-router DNS server bound on {}", _local_addr); - } - else - throw std::runtime_error{"cannot find which address our dns socket is bound on"}; - } - - std::optional bound_on() const override { return _udp->address(); } - - bool would_loop(const quic::Address& to, const quic::Address& /*from*/) const override - { - return to != _local_addr; - } - - void send_udp(const quic::Address& to, const quic::Address&, std::span data) const override - { - const size_t bufsize = data.size(); - size_t n_pkts = 1; - auto [ior, sent] = _udp->send(quic::Path{_local_addr, to}, data.data(), &bufsize, 0, n_pkts); - - log::trace( - logcat, - "dns server {} UDP packet to {} (ec={})", - ior.success() ? "sent" : "failed to send", - to, - ior.error_code); - } - }; - - namespace libunbound - { - class Resolver; - - class Query : public QueryJob_Base, public std::enable_shared_from_this - { - std::shared_ptr src; - quic::Address resolverAddr; - quic::Address askerAddr; - - public: - explicit Query( - std::weak_ptr parent_, - Message query, - std::shared_ptr pktsrc, - quic::Address toaddr, - quic::Address fromaddr) - : QueryJob_Base{std::move(query)}, - src{std::move(pktsrc)}, - resolverAddr{std::move(toaddr)}, - askerAddr{std::move(fromaddr)}, - parent{parent_} - {} - std::weak_ptr parent; - int id{}; - - void send_reply(std::vector buf) override; - }; - - /// Resolver_Base that uses libunbound - class Resolver final : public Resolver_Base, public std::enable_shared_from_this - { - ub_ctx* m_ctx = nullptr; - quic::Loop& _loop; -#ifdef _WIN32 - // windows is dumb so we do ub mainloop in a thread - std::thread runner; - std::atomic running; -#else - // std::shared_ptr _poller; -#endif - - std::optional _local_addr; - std::unordered_set> _pending; - - struct ub_result_deleter - { - void operator()(ub_result* ptr) { ::ub_resolve_free(ptr); } - }; - - const net::Platform* net_ptr() const { return srouter::net::Platform::Default_ptr(); } - - static void callback(void* data, int err, ub_result* _result) - { - log::debug(logcat, "got dns response from libunbound"); - // take ownership of ub_result - std::unique_ptr result{_result}; - // borrow query - auto* query = static_cast(data); - if (err) - { - // some kind of error from upstream - log::warning(logcat, "Upstream DNS failure: {}", ub_strerror(err)); - query->cancel(); - return; - } - - log::trace(logcat, "queueing dns response from libunbound to userland"); - - auto* ans = reinterpret_cast(result->answer_packet); - std::vector payload{ans, ans + result->answer_len}; - // Replace the `id` value in the unbound response (which is the first 2 bytes of the - // message) with the one we were queried with: - oxenc::write_host_as_big(query->underlying().hdr_id, payload.data()); - - // send reply - query->send_reply(std::move(payload)); - } - - void add_upstream_resolver(const quic::Address& dns) - { - auto str = "{}@{}"_format(dns.host(), dns.port()); - - if (auto err = ub_ctx_set_fwd(m_ctx, str.c_str())) - { - throw std::runtime_error{fmt::format("cannot use {} as upstream dns: {}", str, ub_strerror(err))}; - } - } - - bool configure_apple_trampoline(const quic::Address& dns) - { - // On Apple, when we turn on exit mode, we tear down and then reestablish the - // unbound resolver: in exit mode, we set use upstream to a localhost trampoline - // that redirects packets through the tunnel. In non-exit mode, we directly use the - // upstream, so we look here for a reconfiguration to use the trampoline port to - // check which state we're in. - // - // We have to do all this crap because we can't directly connect to upstream from - // here: within the network extension, macOS ignores the tunnel we are managing and - // so, if we didn't do this, all our DNS queries would leak out around the tunnel. - // Instead we have to bounce things through the objective C trampoline code (which - // is what actually handles the upstream querying) so that it can call into Apple's - // special snowflake API to set up a socket that has the magic Apple snowflake sauce - // added on top so that it actually routes through the tunnel instead of around it. - // - // But the trampoline *always* tries to send the packet through the tunnel, and that - // will only work in exit mode. - // - // All of this macos behaviour is all carefully and explicitly documented by Apple - // with plenty of examples and other exposition, of course, just like all of their - // wonderful new APIs to reinvent standard unix interfaces with half-baked - // replacements. - - if constexpr (platform::is_apple) - { - if (dns.host() == "127.0.0.1" and dns.port() == apple::dns_trampoline_port) - { - // macOS is stupid: the default (0.0.0.0) fails with "send failed: Can't - // assign requested address" when unbound tries to connect to the localhost - // address using a source address of 0.0.0.0. Yay apple. - set_opt("outgoing-interface:", "127.0.0.1"); - - // The trampoline expects just a single source port (and sends everything - // back to it). - set_opt("outgoing-range:", "1"); - set_opt("outgoing-port-avoid:", "0-65535"); - set_opt("outgoing-port-permit:", "{}"_format(apple::dns_trampoline_source_port)); - return true; - } - } - return false; - } - - void configure_upstream(const srouter::DnsConfig& conf) - { - bool is_apple_tramp = false; - - // set up forward dns - for (const auto& dns : conf._upstream_dns) - { - add_upstream_resolver(dns); - is_apple_tramp = is_apple_tramp or configure_apple_trampoline(dns); - } - - if (auto maybe_addr = conf._query_bind; maybe_addr and not is_apple_tramp) - { - quic::Address addr{*maybe_addr}; - auto host = addr.host(); - - if (addr.port() == 0) - { - // unbound manages their own sockets because of COURSE it does. so we find - // an open port on our system and use it so we KNOW what it is before giving - // it to unbound to explicitly bind to JUST that port. - - auto fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); -#ifdef _WIN32 - if (fd == INVALID_SOCKET) -#else - if (fd == -1) -#endif - { - throw std::invalid_argument{ - fmt::format("Failed to create UDP socket for unbound: {}", strerror(errno))}; - } - -#ifdef _WIN32 -#define CLOSE closesocket -#else -#define CLOSE close -#endif - if (0 != bind(fd, static_cast(addr), addr.socklen())) - { - CLOSE(fd); - throw std::invalid_argument{ - fmt::format("Failed to bind UDP socket for unbound: {}", strerror(errno))}; - } - struct sockaddr_storage sas; - auto* sa = reinterpret_cast(&sas); - socklen_t sa_len = sizeof(sas); - int rc = getsockname(fd, sa, &sa_len); - CLOSE(fd); -#undef CLOSE - if (rc != 0) - { - throw std::invalid_argument{ - fmt::format("Failed to query UDP port for unbound: {}", strerror(errno))}; - } - - addr = quic::Address{sa, sizeof(sockaddr)}; - } - _local_addr = addr; - - log::debug(logcat, "sending dns queries from {}", addr.to_string()); - // set up query bind port if needed - set_opt("outgoing-interface:", host); - set_opt("outgoing-range:", "1"); - set_opt("outgoing-port-avoid:", "0-65535"); - set_opt("outgoing-port-permit:", "{}"_format(addr.port())); - } - } - - void set_opt(const std::string& key, const std::string& val) - { - ub_ctx_set_option(m_ctx, key.c_str(), val.c_str()); - } - - // Copy of the DNS config (a copy because on some platforms, like Apple, we change the - // applied upstream DNS settings when turning on/off exit mode). - srouter::DnsConfig m_conf; - - public: - explicit Resolver(quic::Loop& loop, srouter::DnsConfig conf) : _loop{loop}, m_conf{std::move(conf)} - { - up(m_conf); - } - - ~Resolver() override { down(); } - - std::string_view resolver_name() const override { return "unbound"; } - - std::optional get_local_addr() const override { return _local_addr; } - - void remove_pending(const std::shared_ptr& query) { _pending.erase(query); } - - void up(const srouter::DnsConfig& conf) - { - if (m_ctx) - throw std::logic_error{"Internal error: attempt to Up() dns server multiple times"}; - - m_ctx = ::ub_ctx_create(); - // set libunbound settings - - set_opt("do-tcp:", "no"); - - for (const auto& [k, v] : conf.extra_opts) - set_opt(k, v); - - // add host files - for (const auto& file : conf.hostfiles) - { - const auto str = file.string(); - if (auto ret = ub_ctx_hosts(m_ctx, str.c_str())) - { - throw std::runtime_error{fmt::format("Failed to add host file {}: {}", file, ub_strerror(ret))}; - } - } - - configure_upstream(conf); - - // set async - ub_ctx_async(m_ctx, 1); - // setup mainloop -#ifdef _WIN32 - running = true; - runner = std::thread{[this]() { - while (running) - { - // poll and process callbacks it this thread - if (ub_poll(m_ctx)) - { - ub_process(m_ctx); - } - else // nothing to do, sleep. - std::this_thread::sleep_for(10ms); - } - }}; -#else - // TODO: replace uvw shim shit with new libev stuff - // if (auto loop_ptr = loop->MaybeGetUVWLoop()) - // { - // _poller = loop_ptr->resource(ub_fd(m_ctx)); - // _poller->on([this](auto&, auto&) { ub_process(m_ctx); }); - // _poller->start(uvw::PollHandle::Event::READABLE); - // return; - // } -#endif - } - - void down() override - { -#ifdef _WIN32 - if (running.exchange(false)) - { - log::debug(logcat, "shutting down win32 dns thread"); - runner.join(); - } -#else - // if (_poller) - // _poller->close(); -#endif - if (m_ctx) - { - ::ub_ctx_delete(m_ctx); - m_ctx = nullptr; - - // destroy any outstanding queries that unbound hasn't fired yet - if (not _pending.empty()) - { - log::debug(logcat, "cancelling {} pending queries", _pending.size()); - // We must copy because Cancel does a loop call to remove itself, but since - // we are already in the main loop it happens immediately, which would - // invalidate our iterator if we were looping through m_Pending at the time. - auto copy = _pending; - for (const auto& query : copy) - query->cancel(); - } - } - } - - int rank() const override { return 10; } - - void reset_resolver(std::optional> replace_upstream) override - { - down(); - if (replace_upstream) - m_conf._upstream_dns = std::move(*replace_upstream); - up(m_conf); - } - - template - void call(Callable&& f) - { - _loop.call(std::forward(f)); - } - - bool maybe_hook_dns( - const std::shared_ptr& source, - const Message& query, - const quic::Address& to, - const quic::Address& from) override - { - log::trace(logcat, "maybe_hook_dns called"); - auto tmp = std::make_shared(weak_from_this(), query, source, to, from); - // no questions, send fail - if (query.questions.empty()) - { - log::debug(logcat, "dns from {} to {} has empty query questions, sending failure reply", from, to); - tmp->cancel(); - return true; - } - - for (const auto& q : query.questions) - { - // dont process .sesh/.loki/.snode - if (q.has_tld(CLIENT_TLD) or q.has_tld(RELAY_TLD) or q.has_tld("loki")) - { - log::warning( - logcat, - "dns from {} to {} is for .{}/{}/loki but got to the unbound " - "resolver; sending failure reply", - from, - to, - CLIENT_TLD, - RELAY_TLD); - tmp->cancel(); - return true; - } - } - if (not m_ctx) - { - // we are down - log::debug( - logcat, - "dns from {} to {} got to the unbound resolver, but the resolver isn't set " - "up, sending failure reply", - from, - to); - tmp->cancel(); - return true; - } - -#ifdef _WIN32 - if (not running) - { - // we are stopping the win32 thread - log::debug( - logcat, - "dns from {} to {} got to the unbound resolver, but the resolver isn't " - "running, sending failure reply", - from, - to); - tmp->Cancel(); - return true; - } -#endif - const auto& q = query.questions[0]; - if (auto err = ub_resolve_async( - m_ctx, - std::string{q.name()}.c_str(), - static_cast(q.qtype), - static_cast(q.qclass), - tmp.get(), - &Resolver::callback, - nullptr)) - { - log::warning(logcat, "failed to send upstream query with libunbound: {}", ub_strerror(err)); - tmp->cancel(); - } - else - { - log::trace(logcat, "dns from {} to {} processing via libunbound", from, to); - _pending.insert(std::move(tmp)); - } - - return true; - } - }; - - void Query::send_reply(std::vector data) - { - log::trace(logcat, "Query::send_reply called"); - if (_done.test_and_set()) - return; - - auto parent_ptr = parent.lock(); - - if (parent_ptr) - { - parent_ptr->call( - [self = shared_from_this(), parent_ptr = std::move(parent_ptr), data = std::move(data)] { - log::trace( - logcat, - "forwarding dns response from libunbound to userland (resolverAddr: {}, " - "askerAddr: {})", - self->resolverAddr, - self->askerAddr); - self->src->send_udp(self->askerAddr, self->resolverAddr, data); - // remove query - parent_ptr->remove_pending(self); - }); - } - else - log::error(logcat, "no parent"); - } - } // namespace libunbound - - Server::Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif) - : _loop{loop}, _conf{std::move(conf)}, _platform{create_platform()}, m_NetIfIndex{std::move(netif)} - {} - - std::vector> Server::get_all_resolvers() const - { - return {_resolvers.begin(), _resolvers.end()}; - } - - void Server::start() - { - // set up udp sockets - for (const auto& addr : _conf._bind_addrs) - { - if (auto ptr = make_packet_source_on(addr, _conf)) - add_packet_source(std::move(ptr)); - } - - // add default resolver as needed - if (auto ptr = make_default_resolver()) - add_resolver(ptr); - - // FIXME: this should be handled by RoutePoker once it is resurrected, handling whether - // we eat all DNS traffic or just .sesh/.loki/.snode. For now, we only handle those. - set_dns_mode(false); - } - - std::shared_ptr Server::create_platform() const - { - auto plat = std::make_shared(); - if constexpr (srouter::platform::has_systemd) - { - plat->add_impl(std::make_unique()); - plat->add_impl(std::make_unique()); - } - return plat; - } - - std::shared_ptr Server::make_packet_source_on(const quic::Address& addr, const srouter::DnsConfig&) - { - return std::make_shared(*this, _loop, addr); - } - - std::shared_ptr Server::make_default_resolver() - { - if (_conf._upstream_dns.empty()) - { - log::debug( - logcat, - "explicitly no upstream dns providers specified, we will not resolve anything but " - ".{}/{}/loki", - CLIENT_TLD, - RELAY_TLD); - return nullptr; - } - - return std::make_shared(_loop, _conf); - } - - std::vector Server::bound_packet_source_addrs() const - { - std::vector addrs; - - for (const auto& src : _packet_sources) - { - if (auto ptr = src.lock()) - if (auto maybe_addr = ptr->bound_on()) - addrs.emplace_back(*maybe_addr); - } - return addrs; - } - - std::optional Server::first_bound_packet_source_addr() const - { - for (const auto& src : _packet_sources) - { - if (auto ptr = src.lock()) - if (auto bound = ptr->bound_on()) - return bound; - } - return std::nullopt; - } - - void Server::add_resolver(std::weak_ptr resolver) { _resolvers.insert(resolver); } - - void Server::add_resolver(std::shared_ptr resolver) - { - _owned_resolvers.insert(resolver); - add_resolver(std::weak_ptr{resolver}); - } - - void Server::add_packet_source(std::weak_ptr pkt) { _packet_sources.push_back(pkt); } - - void Server::add_packet_source(std::shared_ptr pkt) - { - add_packet_source(std::weak_ptr{pkt}); - _owned_packet_sources.push_back(std::move(pkt)); - } - - void Server::stop() - { - for (const auto& resolver : _resolvers) - { - if (auto ptr = resolver.lock()) - ptr->down(); - } - } - - void Server::reset() - { - for (const auto& resolver : _resolvers) - { - if (auto ptr = resolver.lock()) - ptr->reset_resolver(); - } - } - - void Server::set_dns_mode(bool all_queries) - { - if (auto maybe_addr = first_bound_packet_source_addr()) - _platform->set_resolver(m_NetIfIndex, *maybe_addr, all_queries); - } - - bool Server::maybe_handle_payload( - const std::shared_ptr& ptr, - const quic::Address& to, - const quic::Address& from, - std::span payload) - { - // dont process to prevent feedback loop - if (ptr->would_loop(to, from)) - { - log::warning(logcat, "preventing dns packet replay to={} from={}", to, from); - return false; - } - - auto maybe = Message::extract(payload); - if (not maybe) - { - log::warning(logcat, "invalid dns message format from {} to dns listener on {}", from, to); - return false; - } - - auto& msg = *maybe; - // we don't provide a DoH resolver because it requires verified TLS - // TLS needs X509/ASN.1-DER and opting into the Root CA Cabal - // thankfully mozilla added a backdoor that allows ISPs to turn it off - // so we disable DoH for firefox using mozilla's ISP backdoor - // see: https://github.com/oxen-io/lokinet/issues/832 - for (const auto& q : msg.questions) - { - // is this firefox looking for their backdoor record? - if (q.name() == "use-application-dns.net") - { - // yea it is, let's turn off DoH because god is dead. - msg.add_nx_reply(); - // press F to pay respects and send it back where it came from - ptr->send_udp(from, to, msg.encode()); - return true; - } - } - - if (_resolvers.empty()) - { - log::warning(logcat, "Trying to resolve DNS query, but we no resolver set up."); - return false; - } - for (const auto& resolver : _resolvers) - { - if (auto res_ptr = resolver.lock()) - { - log::trace(logcat, "check resolver {} for dns from {} to {}", res_ptr->resolver_name(), from, to); - if (res_ptr->maybe_hook_dns(ptr, msg, to, from)) - { - log::trace(logcat, "resolver {} handling dns from {} to {}", res_ptr->resolver_name(), from, to); - return true; - } - } - } - return false; - } - -} // namespace srouter::dns diff --git a/src/dns/server.hpp b/src/dns/server.hpp deleted file mode 100644 index a01b90757..000000000 --- a/src/dns/server.hpp +++ /dev/null @@ -1,196 +0,0 @@ -#pragma once - -#include "config/config.hpp" -#include "message.hpp" -#include "net/ip_packet.hpp" -#include "platform.hpp" -#include "util/compare_ptr.hpp" - -#include -#include - -#include -#include - -namespace srouter::dns -{ - /// a job handling 1 dns query - class QueryJob_Base - { - protected: - /// the original dns query - Message _query; - - /// True if we've sent a reply (including via a call to cancel) - std::atomic_flag _done = ATOMIC_FLAG_INIT; - - public: - explicit QueryJob_Base(Message query) : _query{std::move(query)} {} - - virtual ~QueryJob_Base() = default; - - Message& underlying() { return _query; } - - const Message& underlying() const { return _query; } - - /// cancel this operation and inform anyone who cares - void cancel(); - - /// send a raw buffer back to the querier - virtual void send_reply(std::vector buf) = 0; - }; - - class PacketSource - { - public: - /// stop reading packets and end operation - virtual ~PacketSource() = default; - - /// return true if traffic with source and dest addresses would cause a - /// loop in resolution and thus should not be sent to query handlers - virtual bool would_loop(const quic::Address& to, const quic::Address& from) const = 0; - - /// send UDP payload with src and dst address containing buf on this packet source - virtual void send_udp( - const quic::Address& to, const quic::Address& from, std::span payload) const = 0; - - /// returns the sockaddr we are bound on if applicable - virtual std::optional bound_on() const = 0; - }; - - /// non complex implementation of QueryJob_Base for use in things that - /// only ever called on the mainloop thread - class QueryJob : public QueryJob_Base, std::enable_shared_from_this - { - std::shared_ptr src; - const quic::Address resolver; - const quic::Address asker; - - public: - explicit QueryJob( - std::shared_ptr source, - const Message& query, - const quic::Address& to_, - const quic::Address& from_) - : QueryJob_Base{query}, src{std::move(source)}, resolver{to_}, asker{from_} - {} - - void send_reply(std::vector buf) override { src->send_udp(asker, resolver, buf); } - }; - - /// handler of dns query hooking - /// intercepts dns for internal processing - class Resolver_Base - { - protected: - /// return the sorting order for this resolver - /// lower means it will be tried first - virtual int rank() const = 0; - - public: - virtual ~Resolver_Base() = default; - - /// less than via rank - bool operator<(const Resolver_Base& other) const { return rank() < other.rank(); } - - /// greater than via rank - bool operator>(const Resolver_Base& other) const { return rank() > other.rank(); } - - /// get local socket address that queries are sent from - virtual std::optional get_local_addr() const { return std::nullopt; } - - /// get printable name - virtual std::string_view resolver_name() const = 0; - - /// reset the resolver state, optionally replace upstream info with new info. The default - /// base implementation does nothing. - virtual void reset_resolver(std::optional> = std::nullopt) {} - - /// cancel all pending requests and cease further operation. Default operation is a no-op. - virtual void down() {} - - /// attempt to handle a dns message - /// returns true if we consumed this query and it should not be processed again - virtual bool maybe_hook_dns( - const std::shared_ptr& source, - const Message& query, - const quic::Address& to, - const quic::Address& from) = 0; - }; - - // Base class for DNS proxy - class Server - { - protected: - /// add a packet source to this server, does share ownership - void add_packet_source(std::shared_ptr resolver); - /// add a resolver to this packet handler, does share ownership - void add_resolver(std::shared_ptr resolver); - - /// create the platform dependant dns stuff - virtual std::shared_ptr create_platform() const; - - public: - virtual ~Server() = default; - - explicit Server(quic::Loop& loop, srouter::DnsConfig conf, unsigned int netif_index); - - /// returns all sockaddr we have from all of our PacketSources - std::vector bound_packet_source_addrs() const; - - /// returns the first sockaddr we have on our packet sources if we have one - std::optional first_bound_packet_source_addr() const; - - /// add a resolver to this packet handler, does not share ownership - void add_resolver(std::weak_ptr resolver); - - /// add a packet source to this server, does not share ownership - void add_packet_source(std::weak_ptr resolver); - - /// create a packet source bound on bindaddr but does not add it - virtual std::shared_ptr make_packet_source_on( - const quic::Address& bindaddr, const srouter::DnsConfig& conf); - - /// sets up all internal binds and such and begins operation - virtual void start(); - - /// stops all operation - virtual void stop(); - - /// reset the internal state - virtual void reset(); - - /// create the default resolver for out config - virtual std::shared_ptr make_default_resolver(); - - std::vector> get_all_resolvers() const; - - /// feed a packet buffer from a packet source. - /// returns true if we decided to process the packet and consumed it - /// returns false if we dont want to process the packet - bool maybe_handle_payload( - const std::shared_ptr& pktsource, - const quic::Address& resolver, - const quic::Address& from, - std::span buf); - - /// set which dns mode we are in. - /// true for intercepting all queries. false for just .sesh/.loki/.snode - void set_dns_mode(bool all_queries); - - protected: - quic::Loop& _loop; - srouter::DnsConfig _conf; - std::shared_ptr _platform; - - private: - const unsigned int m_NetIfIndex; - // TODO FIXME: this ownership model is cursed. - std::set, ComparePtr>> _owned_resolvers; - std::set, CompareWeakPtr> _resolvers; - - std::vector> _owned_packet_sources; - std::vector> _packet_sources; - }; - -} // namespace srouter::dns diff --git a/src/dns/unbound.cpp b/src/dns/unbound.cpp new file mode 100644 index 000000000..f21cb76bc --- /dev/null +++ b/src/dns/unbound.cpp @@ -0,0 +1,120 @@ +#include "unbound.hpp" + +#include "router/router.hpp" +#include "util/logging.hpp" +#include "util/try_calling.hpp" + +#include +#include + +namespace srouter::dns +{ + auto logcat = log::Cat("dns"); + + void Unbound::ub_ctx_deleter::operator()(ub_ctx* ctx) { ub_ctx_delete(ctx); } + + Unbound::Unbound(Router& router) : _loop{router.loop} + { + auto* ctx = ub_ctx_create_event(_loop.get_event_base()); + if (!ctx) + throw std::runtime_error{"Failed to initialize unbound upstream DNS resolver"}; + + _ctx.reset(ctx); + + auto& conf = router.config().dns; + + // Handler should not have constructed us if this isn't satisfied: + assert(!conf._upstream_dns.empty()); + + // Tell unbound to set up and use internal threads for processing queries + if (auto ret = ub_ctx_async(_ctx.get(), 1); ret != 0) + throw std::runtime_error{"Failed to initialize unbound async mode: {}"_format(ub_strerror(ret))}; + + for (auto& addr : conf._upstream_dns) + { + // libquic (as of v1.6.0) includes square brackets around the host() call for IPv6 + // addresses. (Later versions should be fixed). In case they are there, we need to + // work around it by stripping them off before providing that value to unbound: + auto h_maybe_brackets = addr.host(); + std::string_view h{h_maybe_brackets}; + if (h.starts_with('[') && h.ends_with(']')) + { + h.remove_prefix(1); + h.remove_suffix(1); + } + auto str = "{}@{}"_format(h, addr.port()); + if (auto err = ub_ctx_set_fwd(ctx, str.c_str())) + throw std::runtime_error{"Failed to configure {} as upstream dns: {}"_format(addr, ub_strerror(err))}; + } + + if (conf.unbound_hosts) + if (int ret = ub_ctx_hosts( + ctx, + *conf.unbound_hosts == std::filesystem::path{"SYSTEM"} ? nullptr : conf.unbound_hosts->c_str()); + ret != 0) + throw std::runtime_error{"Failed to register DNS hosts file: {}"_format(ub_strerror(ret))}; + + for (auto& [opt, value] : conf.unbound_opts) + if (auto ret = ub_ctx_set_option(_ctx.get(), opt.c_str(), value.c_str()); ret != 0) + throw std::runtime_error{ + "Failed to apply unbound option {} {}: {}"_format(opt, value, ub_strerror(ret))}; + } + + struct Unbound::active_query_state + { + Unbound* self; + std::function)> callback; + }; + + Unbound::~Unbound() + { + // Clean up any pending query callbacks: + for (auto* st : _active_queries) + delete st; + } + + void Unbound::query( + const std::string& name, + RRType type, + RRClass cls, + std::function response)> callback) + { + assert(_loop.inside()); + auto state = std::make_unique(this, std::move(callback)); + + int res = ub_resolve_event( + _ctx.get(), + name.c_str(), + static_cast(type), + static_cast(cls), + state.get(), + [](void* state_ptr, + int /*rcode*/, + void* packet, + int packet_len, + int /*sec*/, + char* /*bogus*/, + int /*ratelimited*/) { + if (!state_ptr) + return; + std::unique_ptr state{static_cast(state_ptr)}; + state->self->_active_queries.erase(state.get()); + + try_calling( + logcat, + state->callback, + std::span{static_cast(packet), static_cast(packet_len)}); + }, + nullptr /* async_id, which is only used for cancelling specific queries, which we don't do*/); + + if (res != 0) + { + log::warning(logcat, "Unbound failed to forward query: {}", ub_strerror(res)); + try_calling(logcat, state->callback, std::span{}); + return; + } + + _active_queries.insert(state.release()); + } + +} // namespace srouter::dns diff --git a/src/dns/unbound.hpp b/src/dns/unbound.hpp new file mode 100644 index 000000000..34967f2d2 --- /dev/null +++ b/src/dns/unbound.hpp @@ -0,0 +1,63 @@ +#pragma once + +#include "rr.hpp" + +#include +#include + +#include + +struct ub_ctx; + +namespace srouter +{ + class Router; +} + +namespace srouter::dns +{ + + namespace quic = oxen::quic; + + // TODO FIXME: Apple sys extension support. See older commits (e.g. v0.9.14) where there is a + // ConfigureAppleTrampoline with a bunch of comments about how it works and why it is needed + // (basically: because libunbound is *inside* the extension and traffic generated from inside + // does not go through the tunnel, so some hackery was summoned). + + class Unbound + { + public: + Unbound(Router& router); + + ~Unbound(); + + // Gives a query to unbound to resolve. When the result comes back, on_result will be + // called with the full DNS response from within the router loop (i.e. it is not necessary + // to loop.call(...) inside the given callback). + // + // The `response` could be empty, in the case of an unbound failure to submit the query (and + // can be called within the call to query()). + void query( + const std::string& name, + RRType type, + RRClass cls, + std::function response)> on_result); + + private: + quic::Loop& _loop; + + struct ub_ctx_deleter + { + void operator()(ub_ctx* ctx); + }; + std::unique_ptr _ctx; + + struct active_query_state; + + // Holds the state for any in-progress queries: these are normally cleaned up in the + // callback itself, but if we cancel all queries (i.e. when shutting down) we have to clean + // up any outstanding ones manually. + std::unordered_set _active_queries; + }; + +} // namespace srouter::dns diff --git a/src/ev/udp.cpp b/src/ev/udp.cpp deleted file mode 100644 index c48592e12..000000000 --- a/src/ev/udp.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "udp.hpp" - -namespace srouter -{ - static auto logcat = log::Cat("ev-udp"); - - inline constexpr size_t MAX_BATCH = -#if defined(OXEN_LIBQUIC_UDP_SENDMMSG) || defined(OXEN_LIBQUIC_UDP_GSO) - 24; -#else - 1; -#endif - - UDPHandle::UDPHandle(const std::shared_ptr& ev, const quic::Address& bind, net_pkt_hook cb) : _loop{ev} - { - socket = std::make_unique(ev->get_event_base(), bind, std::move(cb)); - _local = socket->address(); - } - - UDPHandle::~UDPHandle() { socket.reset(); } - - io_result UDPHandle::_send_impl(const quic::Path& path, std::byte* buf, size_t size, uint8_t ecn, size_t& n_pkts) - { - log::trace(logcat, "{} called", __PRETTY_FUNCTION__); - - auto* bufsize = &size; - - if (!socket) - { - log::warning(logcat, "Cannot send packets on closed socket ({})", path); - return io_result{EBADF}; - } - - assert(n_pkts >= 1 && n_pkts <= MAX_BATCH); - - log::trace(logcat, "Sending {} UDP packet(s) {}...", n_pkts, path); - - auto [ret, sent] = socket->send(path, buf, bufsize, ecn, n_pkts); - - if (ret.failure() && !ret.blocked()) - { - log::error(logcat, "Error sending packets {}: {}", path, ret.str_error()); - n_pkts = 0; // Drop any packets, as we had a serious error - return ret; - } - - if (sent < n_pkts) - { - if (sent == 0) // Didn't send *any* packets, i.e. we got entirely blocked - log::debug(logcat, "UDP sent none of {}", n_pkts); - - else - { - // We sent some but not all, so shift the unsent packets back to the beginning of buf/bufsize - log::debug(logcat, "UDP undersent {}/{}", sent, n_pkts); - size_t offset = std::accumulate(bufsize, bufsize + sent, size_t{0}); - size_t len = std::accumulate(bufsize + sent, bufsize + n_pkts, size_t{0}); - std::memmove(buf, buf + offset, len); - std::copy(bufsize + sent, bufsize + n_pkts, bufsize); - n_pkts -= sent; - } - - // We always return EAGAIN (so that .blocked() is true) if we failed to send all, even - // if that isn't strictly what we got back as the return value (sendmmsg gives back a - // non-error on *partial* success). - return io_result{EAGAIN}; - } - - n_pkts = 0; - - return ret; - } - - void UDPHandle::_send_or_queue( - const quic::Path& path, std::vector buf, uint8_t ecn, std::function callback) - { - log::trace(logcat, "{} called", __PRETTY_FUNCTION__); - - if (!socket) - { - log::warning(logcat, "Cannot sent to dead socket for path {}", path); - if (callback) - callback(io_result{EBADF}); - return; - } - - size_t n_pkts = 1; - // size_t bufsize = buf.size(); - auto res = _send_impl(path, buf.data(), buf.size(), ecn, n_pkts); - - if (res.blocked()) - { - socket->when_writeable([this, path, buf = std::move(buf), ecn, cb = std::move(callback)]() mutable { - _send_or_queue(path, std::move(buf), ecn, std::move(cb)); - }); - } - else if (callback) - callback({}); - } - - io_result UDPHandle::send(const quic::Address& dest, std::span data) - { - return _send_impl(quic::Path{_local, dest}, data.data(), data.size(), 0); - } -} // namespace srouter diff --git a/src/ev/udp.hpp b/src/ev/udp.hpp deleted file mode 100644 index 6179b9e11..000000000 --- a/src/ev/udp.hpp +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "net/ip_packet.hpp" -#include "util/logging.hpp" - -#include -#include -#include - -namespace srouter -{ - using UDPSocket = quic::UDPSocket; - - using io_result = quic::io_result; - - class UDPHandle - { - public: - UDPHandle() = delete; - explicit UDPHandle(const std::shared_ptr& ev, const quic::Address& bind, net_pkt_hook cb); - ~UDPHandle(); - - private: - std::shared_ptr _loop; - std::unique_ptr socket; - quic::Address _local; - - void _send_or_queue( - const quic::Path& path, - std::vector buf, - uint8_t ecn, - std::function callback = nullptr); - - public: - io_result send(const quic::Address& dest, std::span data); - - quic::Address bind() { return _local; } - }; - -} // namespace srouter diff --git a/src/handlers/tun.cpp b/src/handlers/tun.cpp index c33612ac5..eb354da2b 100644 --- a/src/handlers/tun.cpp +++ b/src/handlers/tun.cpp @@ -13,7 +13,6 @@ #include "auth/auth.hpp" #include "constants/platform.hpp" #include "contact/sns.hpp" -#include "dns/dns.hpp" #include "dns/encode.hpp" #include "nodedb.hpp" #include "router/route_poker.hpp" @@ -27,205 +26,6 @@ namespace srouter::handlers { static auto logcat = log::Cat("tun"); - bool TunEndpoint::maybe_hook_dns( - const std::shared_ptr& source, - const dns::Message& query, - const quic::Address& to, - const quic::Address& from) - { - if (not should_hook_dns_message(query)) - return false; - - auto job = std::make_shared(source, query, to, from); - if (!handle_hooked_dns_message(query, [job](dns::Message msg) { job->send_reply(msg.encode()); })) - job->cancel(); - return true; - } - - /// Intercepts DNS IP packets on platforms where binding to a low port isn't viable. - /// (windows/macos/ios/android ... aka everything that is not linux... funny that) - class DnsInterceptor : public dns::PacketSource - { - ip_pkt_hook _hook; - quic::Address _our_ip; // maybe should be an IP type...? - srouter::DnsConfig _config; - - public: - explicit DnsInterceptor(ip_pkt_hook reply, quic::Address our_ip, srouter::DnsConfig conf) - : _hook{std::move(reply)}, _our_ip{std::move(our_ip)}, _config{std::move(conf)} - {} - - ~DnsInterceptor() override = default; - - void send_udp( - const quic::Address& to, const quic::Address& from, std::span payload) const override - { - log::critical(logcat, "DNS interceptor FIXME!"); - if (payload.empty()) - return; - // FIXME: this - (void)to; - (void)from; - (void)payload; - // _hook(data.make_udp(to, from)); - } - - std::optional bound_on() const override { return std::nullopt; } - - bool would_loop(const quic::Address& to, const quic::Address& from) const override - { - if constexpr (platform::is_apple) - { - // DNS on Apple is a bit weird because in order for the NetworkExtension itself to - // send data through the tunnel we have to proxy DNS requests through Apple APIs - // (and so our actual upstream DNS won't be set in our resolvers, which is why the - // vanilla WouldLoop won't work for us). However when active the mac also only - // queries the main tunnel IP for DNS, so we consider anything else to be - // upstream-bound DNS to let it through the tunnel. - return to != _our_ip; - } - else if (auto maybe_addr = _config._query_bind) - { - const auto& addr = *maybe_addr; - // omit traffic to and from our dns socket - return addr == to or addr == from; - } - return false; - } - }; - - class TunDNS : public dns::Server - { - const TunEndpoint* _tun; - std::optional _query_bind; - quic::Address _our_ip; - - public: - std::shared_ptr pkt_source; - - ~TunDNS() override = default; - - explicit TunDNS(TunEndpoint* ep, const srouter::DnsConfig& conf) - : dns::Server{ep->router().loop, conf, 0}, - _tun{ep}, - _query_bind{conf._query_bind}, - _our_ip{ep->get_ipv4()} // FIXME: What about IPv6? - { - if (_query_bind) - _our_ip.set_port(_query_bind->port()); - } - - std::shared_ptr make_packet_source_on( - const quic::Address&, const srouter::DnsConfig& conf) override - { - (void)_tun; - auto ptr = std::make_shared( - [](IPPacket pkt) { - (void)pkt; - // ep->handle_write_ip_packet(pkt.ConstBuffer(), pkt.srcv6(), pkt.dstv6(), 0); - }, - _our_ip, - conf); - pkt_source = ptr; - return ptr; - } - }; - - // NB: It looks like this could/should be called during the constructor, - // but as it passes weak_from_this to the dns server, it has to be after. - void TunEndpoint::setup_dns() - { - log::debug(logcat, "{} setting up DNS...", name()); - - auto& dns_config = _router.config().dns; - const auto& info = get_vpn_interface()->interface_info(); - - if (dns_config.l3_intercept) - { - // FIXME: this entire if block is so broken... - _dns = std::make_unique(this, dns_config); - auto* dns = static_cast(_dns.get()); - - uint16_t p = 53; - - while (p < 100) - { - try - { - _packet_router->add_udp_handler(p, [this, dns](IPPacket pkt) { - // TODO FIXME - log::critical(logcat, "TODO FIXME: L3 udp interceptor!"); - // if (dns->maybe_handle_payload(dns->pkt_source, pkt.destination(), pkt.source(), - // pkt.udp_data())) - // return; - - handle_outbound_packet(std::move(pkt)); - }); - } - catch (const std::exception& e) - { - if (p += 1; p >= 100) - throw std::runtime_error{"Failed to port map udp handler: {}"_format(e.what())}; - } - } - } - else - _dns = std::make_unique(_router.loop, dns_config, info.index); - - _dns->add_resolver(weak_from_this()); - _dns->start(); - - if (dns_config.l3_intercept) - { - if (auto vpn = _router.vpn_platform()) - { - // get the first local address we know of - std::optional localaddr; - - for (auto res : _dns->get_all_resolvers()) - { - if (auto ptr = res.lock()) - { - localaddr = ptr->get_local_addr(); - - if (localaddr) - break; - } - } - if (platform::is_windows) - { - // auto dns_io = vpn->create_packet_io(0, localaddr); - // router().loop()->add_ticker([dns_io, handler = m_PacketRouter]() { - // net::IPPacket pkt = dns_io->ReadNextPacket(); - // while (not pkt.empty()) - // { - // handler->HandleIPPacket(std::move(pkt)); - // pkt = dns_io->ReadNextPacket(); - // } - // }); - // m_RawDNS = dns_io; - } - - (void)vpn; - } - - if (_raw_DNS) - _raw_DNS->Start(); - } - } - - void TunEndpoint::reconfigure_dns(std::vector servers) - { - if (_dns) - { - for (auto weak : _dns->get_all_resolvers()) - { - if (auto ptr = weak.lock()) - ptr->reset_resolver(servers); - } - } - } - TunEndpoint::TunEndpoint(Router& r) : _router{r} { _packet_router = @@ -309,361 +109,8 @@ namespace srouter::handlers log::info(logcat, "{} got network interface:{}", name(), _if_name); } - static const auto localhost_ctld = "localhost.{}"_format(CLIENT_TLD); - static const auto dot_localhost_ctld = ".localhost.{}"_format(CLIENT_TLD); - static bool is_localhost(std::string_view qname) - { - return qname == "localhost.loki" or qname.ends_with(".localhost.loki") or qname == localhost_ctld - or qname.ends_with(dot_localhost_ctld); - } - - static std::optional parse_rid(std::string_view b32rid) - { - auto rid = std::make_optional(); - if (not rid->from_base32z(b32rid)) - rid.reset(); - return rid; - } - - static std::optional is_snode(std::string_view name) - { - if (name.ends_with(RELAY_DOT_TLD)) - name.remove_suffix(RELAY_DOT_TLD.size()); - else - return std::nullopt; - return parse_rid(name); - } - - static dns::Message& clear_dns_message(dns::Message& msg) - { - msg.authorities.clear(); - msg.additional.clear(); - msg.answers.clear(); - msg.hdr_fields &= ~dns::flags_RCODENxDomain; - return msg; - } - - template - static std::optional try_making(Args&&... args) - { - try - { - return std::make_optional(std::forward(args)...); - } - catch (...) - { - return std::nullopt; - } - } - static const auto random_snode = "random.{}"_format(RELAY_TLD); - bool TunEndpoint::handle_hooked_dns_message( - dns::Message msg, std::function reply, std::optional qname_override) - { - log::trace(logcat, "handle_hooked_dns_message"); - if (msg.questions.size() != 1) - { - log::warning(logcat, "bad number of dns questions: {}", msg.questions.size()); - return false; - } - - auto& q = msg.questions[0]; - - std::string qname; - if (qname_override) - qname = std::move(*qname_override); - else - qname = q.name(); - std::string hostname, tld; - std::vector sub; - { - auto nameparts = split(qname, "."); - if (nameparts.size() < 2) - { - log::warning(logcat, "bad DNS request, no TLD or hostname: {}", qname); - return false; - } - hostname = nameparts[nameparts.size() - 2]; - tld = nameparts.back(); - sub.reserve(nameparts.size() - 2); - for (auto s : std::views::take(nameparts, static_cast(nameparts.size()) - 2)) - sub.emplace_back(s); - } - bool localhost = is_localhost(qname); - - // localhost.sesh/localhost.loki is always a CNAME to our own pubkey, regardless of the - // question type. - if (localhost) - { - auto our_hostname = _router.id().to_string(); - auto our_tld = _router.is_service_node ? RELAY_TLD : CLIENT_TLD; - auto our_name = "{}.{}"_format(our_hostname, our_tld); - - if (tld == "loki") - { - // first: report a cname for the deprecated localhost.loki -> localhost.sesh - - msg.set_rr_name("localhost.loki"); - msg.add_cname_reply("localhost.{}"_format(our_tld)); - } - // report CNAME: localhost.sesh -> pubkey.sesh - msg.set_rr_name("localhost.{}"_format(our_tld)); - msg.add_cname_reply(our_name); - - if (q.qtype == dns::RRType::CNAME) - { - // If we were queried specifically for a cname, then we are done. - reply(std::move(msg)); - return true; - } - - // Otherwise we continue processing to be able to return supplemental records through - // the cname, so that if you request "foo.localhost.loki" we end up returning: - // localhost.loki CNAME for localhost.sesh - // localhost.sesh CNAME for PUBKEY.sesh - // foo.PUBKEY.sesh IN X VALUE (or whatever) - // And so for for the rest of the answer processing that we were given PUBKEY.sesh, - // rather than localhost.loki/.sesh: - qname = sub.empty() ? our_name : "{}.{}"_format(fmt::join(sub, "."), our_name); - msg.set_rr_name(qname); - - tld = our_tld; - hostname = std::move(our_hostname); - } - else if (qname == random_snode) - { - // Similar to the localhost case: we first return a CNAME of random.snode -> - // SOMEPK.snode, then continue processing as if that was what you asked for. - - if (auto* rc = _router.node_db().get_random_rc()) - { - hostname = rc->router_id().to_string(); - qname = "{}.{}"_format(hostname, RELAY_TLD); - msg.add_cname_reply(qname, 1s); - if (q.qtype == dns::RRType::CNAME) - { - reply(std::move(msg)); - return true; - } - - msg.set_rr_name(qname); - } - else - { - msg.add_nx_reply(); - reply(std::move(msg)); - return true; - } - } - else if (tld == "loki" && hostname.size() != oxenc::to_base32z_size(RouterID::SIZE)) - { - // ONS lookup: initiate a lookup and, when we get the response, set up a CNAME of - // NAME.loki -> PUBKEY.sesh, then recurse to process other parts of the request (such as - // mapping to a AAAA). - - // TODO: .sesh SNS resolution, once implemented - - // ONS lookup: - auto lookup = "{}.loki"_format(hostname); - _router.session_endpoint().resolve_sns( - lookup, - [this, - lookup, - sub = std::move(sub), - reply = std::move(reply), - msg = std::move(msg), - cname_only = q.qtype == dns::RRType::CNAME]( - std::optional maybe_netaddr, - bool assertive, - std::chrono::milliseconds ttl) mutable { - msg.set_rr_name(lookup); - if (maybe_netaddr) - { - auto target = maybe_netaddr->to_string(); - msg.add_cname_reply(target); - if (cname_only) - return; - auto qname = sub.empty() ? target : "{}.{}"_format(fmt::join(sub, "."), target); - msg.set_rr_name(qname); - handle_hooked_dns_message(std::move(msg), std::move(reply), std::move(qname)); - return; - } - - if (assertive) - { - // We got an assertive "does not exist" message (and not just a failure - // or timeout), so add the nx reply - msg.add_nx_reply(); - // FIXME: we should be able to provide a TTL here - } - else - { - // We failed to get a response at all so just NX with a short timeout so - // that they will try again soon to resolve it. (We don't want to - // SERVFAIL here because that could make the resolver try another DNS - // server). - assert(!assertive); - // FIXME: should be able to specify a TTL here - msg.add_nx_reply(); - } - reply(std::move(msg)); - }); - return true; - } - - if (q.qtype == dns::RRType::TXT) - { - // TXT records can be used to query some basic info: - - // TXT on MYPUBKEY.sesh returns the basic version and netid: - if (localhost && sub.empty()) - msg.add_txt_reply("sessionrouter={} v={} netid={}"_format( - _router.is_service_node ? "relay" : "client", fmt::join(VERSION, "."), _router.netid())); - - // TXT on PUBKEY.snode gives back some basic RC info (if we have the RC) - else if (auto rid = is_snode(qname)) - { - if (auto* rc = _router.node_db().get_rc(*rid)) - { - msg.add_txt_reply("rc v={} a={} t={}"_format( - fmt::join(rc->version(), "."), rc->addr(), rc->timestamp().time_since_epoch().count())); - } - else - msg.add_nx_reply(); - } - else - msg.add_nx_reply(); - reply(msg); - return true; - } - - // "Regular" A or AAAA lookups - if (bool aaaa = q.qtype == dns::RRType::AAAA; aaaa || q.qtype == dns::RRType::A) - { - // Attempt to parse a "pubkey.snode" or "pubkey.sesh": - if (auto maybe_netaddr = try_making("{}.{}"_format(hostname, tld))) - { - // DNS lookup implies we want a session, so make one (NOP if we have one) - // This also means if we don't use that session the IP mapping will release when - // it expires, which it wouldn't otherwise without a tedious periodic check. - bool created_session = false; - try - { - created_session = (bool)_router.session_endpoint().initiate_remote_session(*maybe_netaddr, nullptr); - } - catch (const std::exception& e) - { - log::warning(logcat, "Failed to initiate remote session to {}: {}", *maybe_netaddr, e.what()); - } - if (created_session) - { - if (aaaa) - msg.add_reply(map6(*maybe_netaddr)); - else if (!sub.empty() && sub.back() == "ipv4"sv) - { - // We don't map IPv4 addresses by default, but it is still possible to get - // one by requesting ipv4.somepubkey.sesh/snode (or a subdomain thereof). - if (auto v4_addr = map4(*maybe_netaddr); v4_addr) - msg.add_reply(*v4_addr); - else - log::warning(logcat, "IPv4 mapping requested for {} failed.", *maybe_netaddr); - } - // else they requested A *not* using the magic ipv4 subdomain, so we only have - // AAAA to offer and thus we return a reply without an answer record (which is - // the proper DNS way to say "something exists at this address, but not with the - // type you requested requested", as opposed to this nx_reply below, which means - // "this record does not exist"). - } - else - msg.add_nx_reply(); - reply(msg); - - return true; - } - - // Otherwise it's some query type we don't support, so return does-not-exist. - msg.add_nx_reply(); - reply(msg); - return true; - } - - // Reverse DNS lookups: - if (q.qtype == dns::RRType::PTR) - { - // reverse dns - bool found = false; - if (auto ip = dns::decode_ptr(q.qname)) - std::visit( - [&](const auto& ip) { - if (auto addr = _lookup_mapped_ip(ip)) - { - msg.add_ptr_reply(addr->to_string()); - found = true; - } - }, - *ip); - - if (!found) - msg.add_nx_reply(); - - reply(msg); - return true; - } - - if (q.qtype == dns::RRType::SRV && (tld == CLIENT_TLD || tld == "loki") && sub.size() == 2 - && sub[0].starts_with('_') && sub[1].starts_with('_')) - { - if (auto rid = parse_rid(hostname)) - { - _router.session_endpoint().lookup_client_intro( - *rid, - [msg = std::move(msg), sub, reply = std::move(reply)]( - const std::optional& cc) mutable { - if (cc) - { - for (const auto& srv : cc->SRVs()) - if (srv.service == sub[0] && srv.proto == sub[1]) - msg.add_reply(srv); - } - else - msg.add_nx_reply(); - - reply(msg); - }); - return true; - } - } - - msg.add_nx_reply(); - reply(msg); - return true; - } - - bool TunEndpoint::should_hook_dns_message(const dns::Message& msg) const - { - if (msg.questions.size() == 1) - { - // Hook every .sesh/.snode/.loki query - for (auto tld : {CLIENT_TLD, RELAY_TLD, "loki"sv}) - if (msg.questions[0].has_tld(tld)) - return true; - - // hook any PTR records for ranges we own - if (msg.questions[0].qtype == srouter::dns::RRType::PTR) - { - if (auto ip = dns::decode_ptr(msg.questions[0].qname)) - { - if (auto* v4 = std::get_if(&*ip)) - return _local_net.contains(*v4); - return _local_ipv6_net.contains(std::get(*ip)); - } - return false; - } - } - return false; - } - std::string TunEndpoint::get_if_name() const { return _if_name; } const ipv4& TunEndpoint::get_ipv4() const { return _local_net.ip; } @@ -676,13 +123,11 @@ namespace srouter::handlers bool TunEndpoint::is_exit_node() const { return _router.is_exit_node(); } - bool TunEndpoint::stop() + void TunEndpoint::stop() { // stop vpn tunnel if (_net_if) _net_if->Stop(); - if (_raw_DNS) - _raw_DNS->Stop(); #if 0 // save address map if applicable @@ -707,11 +152,6 @@ namespace srouter::handlers // } } #endif - - if (_dns) - _dns->stop(); - - return true; } template diff --git a/src/handlers/tun.hpp b/src/handlers/tun.hpp index 7e38d964e..f3c932b58 100644 --- a/src/handlers/tun.hpp +++ b/src/handlers/tun.hpp @@ -1,10 +1,8 @@ #pragma once #include "address/map.hpp" -#include "dns/server.hpp" #include "ev/fd_poller.hpp" #include "net/ip_packet.hpp" -#include "tun_base.hpp" #include "util/thread/threading.hpp" #include "vpn/packet_router.hpp" #include "vpn/platform.hpp" @@ -22,20 +20,16 @@ namespace srouter::traffic_type namespace srouter::handlers { inline constexpr auto TUN = "tun"sv; - inline constexpr auto LOKI_RESOLVER = "session-router"sv; - class TunEndpoint : public TunEPBase, public dns::Resolver_Base, public std::enable_shared_from_this + class TunEndpoint { public: TunEndpoint(Router& r); - ~TunEndpoint() override; + ~TunEndpoint(); private: Router& _router; - /// dns subsystem for this endpoint - std::unique_ptr _dns; - /// our local ip network ipv4_net _local_net; IPv4RangeIterator _local_range_iterator{_local_net}; @@ -59,27 +53,11 @@ namespace srouter::handlers std::optional _persisting_addr_file = std::nullopt; bool persist_addrs{false}; - /// for raw packet dns - std::shared_ptr _raw_DNS; - public: vpn::NetworkInterface* get_vpn_interface() { return _net_if.get(); } std::string_view name() const { return TUN; } - int rank() const override { return 0; } - - std::string_view resolver_name() const override { return LOKI_RESOLVER; } - - bool maybe_hook_dns( - const std::shared_ptr& source, - const dns::Message& query, - const quic::Address& to, - const quic::Address& from) override; - - // Reconfigures DNS servers and restarts libunbound with the new servers. - void reconfigure_dns(std::vector servers); - void configure(); std::string get_if_name() const; @@ -94,23 +72,14 @@ namespace srouter::handlers const ipv4_net& get_ipv4_network() const; const ipv6_net& get_ipv6_network() const; - bool should_hook_dns_message(const dns::Message& msg) const; - - bool handle_hooked_dns_message( - dns::Message query, - std::function sendreply, - std::optional qname_override = std::nullopt); - void tick_tun(sys_ms now); - bool stop(); + void stop(); bool is_service_node() const; bool is_exit_node() const; - void setup_dns(); - // INPROGRESS: new API // Handles an outbound packet going OUT to the network void handle_outbound_packet(IPPacket pkt); @@ -118,7 +87,7 @@ namespace srouter::handlers void rewrite_and_send_packet(IPPacket&& pkt, const ipv4& src, const ipv4& dest); void rewrite_and_send_packet(IPPacket&& pkt, const ipv6& src, const ipv6& dest); - void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote) override; + void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote); // Handles an inbound packet coming IN from the network // bool handle_inbound_packet(IPPacket pkt, NetworkAddress remote, bool is_exit_session, bool @@ -128,7 +97,7 @@ namespace srouter::handlers // Router remote address with it. If the mapping already exists, this returns the existing // IP, otherwise it assigns a new one. The association persists until unmapped. Returns // the mapped ipv6 address. - ipv6 map6(const NetworkAddress& remote) override; + ipv6 map6(const NetworkAddress& remote); // Obtains an available IPv4 address from the tun device and associates the given Session // Router remote address with it. If the mapping already exists, this returns the existing @@ -139,12 +108,34 @@ namespace srouter::handlers // Returns the mapped addresses, or nullptr if an address could not be assigned (i.e. // because of IPv4 exhaustion in the allocated tun range, or because this client does not // support IPv4 addressing at all). - std::optional map4(const NetworkAddress& remote) override; + std::optional map4(const NetworkAddress& remote); + + // Takes an IPv4 or IPv6 address and returns {addr, true} if the address is a tun address + // range IP mapped to an address; {nullptr, true} if it is a tun address range IP but + // without a mapped address; or {nullptr, false} if it is not a tun address range IP. + template + std::pair, bool> reverse_lookup(const IP& ip) + requires std::same_as || std::same_as + { + std::pair, bool> result; + auto& [netaddr, in_range] = result; + if constexpr (std::same_as) + { + netaddr = _local_ipv4_mapping[ip]; + in_range = netaddr || _local_net.contains(ip); + } + else + { + netaddr = _local_ipv6_mapping[ip]; + in_range = netaddr || _local_ipv6_net.contains(ip); + } + return result; + } // Expires a mapped IP for the given remote from the tun IP map. The address will be added // as the most recently used address, and (if the configured cache size is exceeded) the least // recently used address will be forgotten. - void expire(const NetworkAddress& remote) override; + void expire(const NetworkAddress& remote); std::optional get_exit_policy() const { return _exit_policy; } @@ -159,7 +150,7 @@ namespace srouter::handlers Router& router() { return _router; } - void start_poller() override; + void start_poller(); private: // Stores assigned IP's for each session in/out of this Session Router instance @@ -168,18 +159,6 @@ namespace srouter::handlers address_map _local_ipv4_mapping; address_map _local_ipv6_mapping; - template - auto _lookup_mapped_ip(const IP& ip) - { - if constexpr (std::same_as) - return _local_ipv4_mapping[ip]; - else - { - static_assert(std::same_as); - return _local_ipv6_mapping[ip]; - } - } - // We keep a list of expired network addresses ordered by least-recently-used first. When // pruning the expired list, we pop off the front of the list. std::list _expired; diff --git a/src/handlers/tun_base.hpp b/src/handlers/tun_base.hpp deleted file mode 100644 index 2f4c772fe..000000000 --- a/src/handlers/tun_base.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include "address/address.hpp" -#include "address/types.hpp" -#include "net/ip_packet.hpp" - -namespace srouter::handlers -{ - - // Abstract class for TUN handling. This base interface exists so that embedded clients can be - // built without needing to compile any tun code at all. - class TunEPBase - { - public: - virtual ~TunEPBase() = default; - - virtual void start_poller() = 0; - - virtual ipv6 map6(const NetworkAddress& remote) = 0; - virtual std::optional map4([[maybe_unused]] const NetworkAddress& remote) { return std::nullopt; } - - virtual void expire(const NetworkAddress& remote) = 0; - - virtual void handle_inbound_packet(IPPacket pkt, uint8_t type, NetworkAddress remote) = 0; - }; - -} // namespace srouter::handlers diff --git a/src/link/endpoint.cpp b/src/link/endpoint.cpp index 9a758adc3..2172c9fcd 100644 --- a/src/link/endpoint.cpp +++ b/src/link/endpoint.cpp @@ -797,6 +797,8 @@ namespace srouter::link }); } + Endpoint::~Endpoint() { *canary = false; } + void Endpoint::on_conn_closed(quic::Connection& conn, uint64_t ec) { auto alpn = conn.selected_alpn(); @@ -815,13 +817,26 @@ namespace srouter::link return; } - router.loop.call([this, connptr = conn.shared_from_this(), ec] { - auto& conn = *connptr; - auto alpn = conn.selected_alpn(); - - std::optional rid; - if (conn.remote_key().size() == RouterID::SIZE) - rid.emplace(conn.remote_key().first()); + std::optional rid; + if (conn.remote_key().size() == RouterID::SIZE) + rid.emplace(conn.remote_key().first()); + + // NB: we must not capture a shared_ptr to conn here, because this lambda could outlive + // `this`; the canary lets us early-return if that happens, but the Connection destruction + // relies on `this.loop` to destroy: thus if we capture it we could delay that destruction + // attempt beyond the end of `this.loop`. Thus we capture everything we need into the + // lambda here, while we are still in the network loop. + + router.loop.call([this, + alive = canary, + conn_refid = conn.reference_id(), + alpn, + rid = std::move(rid), + remote_addr = conn.remote(), + ec, + was_inbound = conn.is_inbound()] { + if (!*alive) + return; bool found = false; @@ -833,14 +848,14 @@ namespace srouter::link { assert(router.is_service_node); auto& relcon = it->second; - if (relcon.inbound && connptr == relcon.inbound->conn) + if (relcon.inbound && relcon.inbound->conn && relcon.inbound->conn->reference_id() == conn_refid) { relcon.close(true); found = true; log::debug( logcat, "Inbound connection from {} closed (ec={})", rid->to_network_address(true), ec); } - if (relcon.outbound && connptr == relcon.outbound->conn) + if (relcon.outbound && relcon.outbound->conn && relcon.outbound->conn->reference_id() == conn_refid) { relcon.close(false); found = true; @@ -875,10 +890,10 @@ namespace srouter::link log::debug( logcat, "Closed redundant connection {} {} @ {} (cid={})", - conn.is_inbound() ? "from" : "to", + was_inbound ? "from" : "to", rid->to_network_address(true), - conn.remote(), - conn.reference_id()); + remote_addr, + conn_refid); found = true; } } @@ -886,11 +901,11 @@ namespace srouter::link { if (router.is_service_node) { - assert(conn.is_inbound()); // Relays do make outbound client conns for testing, - // but they do not use this close callback. - if (auto it = inbound_clients.find(conn.reference_id()); it != inbound_clients.end()) + assert(was_inbound); // Relays do make outbound client conns for testing, + // but they do not use this close callback. + if (auto it = inbound_clients.find(conn_refid); it != inbound_clients.end()) { - log::debug(logcat, "Client connection from {} closed (ec={})", conn.remote(), ec); + log::debug(logcat, "Client connection from {} closed (ec={})", remote_addr, ec); it->second->close(); inbound_clients.erase(it); found = true; @@ -898,9 +913,10 @@ namespace srouter::link } else { - assert(conn.is_outbound()); + assert(!was_inbound); - if (auto it = client_conns.find(*rid); it != client_conns.end() and connptr == it->second->conn) + if (auto it = client_conns.find(*rid); it != client_conns.end() && it->second && it->second->conn + && it->second->conn->reference_id() == conn_refid) { log::debug( logcat, @@ -912,12 +928,13 @@ namespace srouter::link } } } - else if (conn.is_outbound()) + else if (!was_inbound) { // Unknown or empty ALPN -- this is an outbound conn that didn't establish (and thus // didn't negotiate the ALPN): assert(rid); // Outbound conns start out with the target pubkey known - if (auto it = pending_outbound.find(*rid); it != pending_outbound.end() and connptr == it->second->conn) + if (auto it = pending_outbound.find(*rid); it != pending_outbound.end() && it->second + && it->second->conn && it->second->conn->reference_id() == conn_refid) { pending_outbound.erase(it); found = true; @@ -931,10 +948,10 @@ namespace srouter::link log::warning( logcat, "Closed connection {} {} @ {} (cid={}, ec={})", - conn.is_inbound() ? "from" : "to", + was_inbound ? "from" : "to", rid ? rid->to_string() : "", - conn.remote(), - conn.reference_id(), + remote_addr, + conn_refid, ec); if (not router.is_service_node) diff --git a/src/link/endpoint.hpp b/src/link/endpoint.hpp index 36bb6759d..f7715a2c5 100644 --- a/src/link/endpoint.hpp +++ b/src/link/endpoint.hpp @@ -83,10 +83,16 @@ namespace srouter::link public: explicit Endpoint(Manager& lm); + ~Endpoint(); + Manager& manager; Router& router; private: + // The network loop object. This *must* be declared before most of the below as some of the + // things below have destructors that run in this loop. + std::unique_ptr loop; + // Stores established relay-to-relay connections; only used by service nodes. std::unordered_map relay_conns; @@ -114,12 +120,15 @@ namespace srouter::link // only. std::unordered_map> inbound_clients; - std::unique_ptr loop; std::shared_ptr endpoint; std::shared_ptr redundancy_ticker; std::shared_ptr dereg_conn_ticker; std::shared_ptr tls_creds; + // Canary object that gets set to false during destruction to help short-circuit lambda that + // could potentially outlive `this`: + std::shared_ptr canary = std::make_shared(true); + public: void start_tickers(); diff --git a/src/path/path.cpp b/src/path/path.cpp index a5610ceb7..dac38cb15 100644 --- a/src/path/path.cpp +++ b/src/path/path.cpp @@ -72,9 +72,8 @@ namespace srouter::path double success_pct = p.ping_responses / (double)(p.ping_responses + p.ping_timeouts) * 100.0; if (p.ping_responses == 1) return "{:.1f}%, {:.0f}ms avg"_format(success_pct, mean); - - double sd = std::sqrt(((double)p.ping_sq_cumulative - p.ping_responses * mean * mean) / (p.ping_responses - 1)); - return "{:.1f}%, {:.0f}ms avg, {:.1f}ms s.d."_format(success_pct, mean, sd); + double jitter = p.ping_responses < 2 ? 0.0 : (double)p.ping_abs_diffs.count() / (p.ping_responses - 1); + return "{:.1f}%, {:.0f}ms avg, {:.1f}ms jitter"_format(success_pct, mean, jitter); } void Path::do_ping(steady_ms start_time) @@ -96,10 +95,11 @@ namespace srouter::path auto time_taken = now - start_time; if (resp.ok()) { - ping_responses++; + if (++ping_responses > 1) + ping_abs_diffs += time_taken >= ping_last ? time_taken - ping_last : ping_last - time_taken; + ping_last = time_taken; ping_recent_timeouts = 0; ping_cumulative += time_taken; - ping_sq_cumulative += time_taken.count() * time_taken.count(); if (resp.body == messages::OK_RESPONSE) log::debug( @@ -318,13 +318,29 @@ namespace srouter::path } path_hop_stringifier Path::hop_string() const { return {hops}; } - std::vector> Path::get_hops_strings_and_ips() const + Path::Info Path::get_info() const { - std::vector> ret; + Info ret{}; + ret.expiry = _expiry; + if (ping_responses) + ret.ping_mean = std::chrono::round( + std::chrono::nanoseconds{ping_cumulative} / ping_responses); + if (ping_responses > 1) + ret.ping_jitter = std::chrono::round( + std::chrono::nanoseconds{ping_abs_diffs} / (ping_responses - 1)); + ret.ping_responses = ping_responses; + ret.ping_timeouts = ping_timeouts; + ret.ping_recent_timeouts = ping_recent_timeouts; for (const auto& hop : hops) { - auto rc = _router.node_db().get_rc(hop.router_id); - ret.emplace_back(NetworkAddress{hop.router_id, false}.to_string(), rc->addr().to_ipv4().to_string()); + auto* rc = _router.node_db().get_rc(hop.router_id); + if (rc) + ret.relays.emplace_back(hop.router_id, rc->addr().to_ipv4()); + else + { + log::warning(logcat, "Couldn't find RC of a router on our path?!"); + ret.relays.emplace_back(); + } } return ret; } diff --git a/src/path/path.hpp b/src/path/path.hpp index d870ff87f..316b6d1d1 100644 --- a/src/path/path.hpp +++ b/src/path/path.hpp @@ -72,7 +72,16 @@ namespace srouter::path path_hop_stringifier hop_string() const; - std::vector> get_hops_strings_and_ips() const; + struct Info + { + // relay pubkeys and IPv4 addresses, from edge -> pivot (or final relay) + std::vector> relays; + sys_ms expiry = {}; + std::chrono::milliseconds ping_mean; + std::chrono::microseconds ping_jitter; + int ping_responses, ping_timeouts, ping_recent_timeouts; + }; + Info get_info() const; sys_ms LastRemoteActivityAt() const { return last_recv_msg; } @@ -219,9 +228,13 @@ namespace srouter::path steady_ms next_ping{}; int ping_responses{0}, ping_timeouts{0}; int ping_recent_timeouts{0}; + std::chrono::milliseconds ping_last{0ms}; // Cumulative time of all `ping_responses` pings (divide by ping_responses for an average). std::chrono::milliseconds ping_cumulative{0ms}; - int64_t ping_sq_cumulative{0}; + // This is the cumulative absolute differences of all received sequential pings. E.g. if we + // have 4 pings [100, 101, 98, 98] then this equals (|100-101| + |101-98| + |98-98|). + // Dividing this by `ping_responses - 1` gives jitter. + std::chrono::milliseconds ping_abs_diffs{0ms}; }; } // namespace srouter::path diff --git a/src/router/router.cpp b/src/router/router.cpp index b5c6eccb8..aea2803e2 100644 --- a/src/router/router.cpp +++ b/src/router/router.cpp @@ -7,6 +7,7 @@ #include "constants/version.hpp" #include "contact/contactdb.hpp" #include "crypto/crypto.hpp" +#include "dns/listener.hpp" #include "link/link_manager.hpp" #include "nodedb.hpp" #include "util/formattable.hpp" @@ -62,7 +63,8 @@ namespace srouter // exceed the defaut 1MB limit). _omq->MAX_MSG_SIZE = -1; - _router_testing = std::make_shared(*this); + if (is_service_node) + _router_testing = std::make_shared(*this); #endif init_logging(); @@ -552,19 +554,46 @@ namespace srouter throw std::runtime_error{"This Session Router build only supports embedded configurations!"}; #else log::debug(logcat, "Initializing TUN device"); - auto tun = _loop->make_shared(*this); + _tun = _loop->make_shared(*this); // only (full) clients should have DNS, relays have no need for it if (!is_service_node) - tun->setup_dns(); + { + auto& dns_bind = config().dns._listen_addrs; + if (dns_bind.empty()) + { + // This configuration is allowed (a service-only client might use it), although a bit unusual + log::warning( + logcat, "[dns]:listen is empty: DNS disabled. Making outbound paths will not be possible"); + } + else + { + try + { + for (const auto& addr : dns_bind) + { + if (!_dns) + _dns = _loop->make_shared(*this, addr); + else + _dns->listen(loop, addr); + + log::info(log_global, "DNS listening on {} port {}", addr.host(), _dns->last_port); + } + } + catch (const std::exception& e) + { + log::error( + logcat, "Failed to initialize DNS listener on {}: {}", fmt::join(dns_bind, ","), e.what()); + throw; + } + } + } log::info( log_global, "Session Router internal network: {} on device {}", - tun->get_ipv4_network(), - tun->get_if_name()); - - _tun = std::move(tun); + _tun->get_ipv4_network(), + _tun->get_if_name()); #endif } else @@ -951,7 +980,8 @@ namespace srouter void Router::on_test_ping() { #ifndef SROUTER_EMBEDDED_ONLY - _router_testing->incoming_ping(); + if (_router_testing) + _router_testing->incoming_ping(); #endif } @@ -979,7 +1009,8 @@ namespace srouter srouter::sys::service_manager->stopping(); } - _router_testing->stop(); + if (_router_testing) + _router_testing->stop(); #endif _session_endpoint->stop(true); @@ -990,6 +1021,14 @@ namespace srouter log::debug(logcat, "closing all connections"); _link_manager->stop(); +#ifndef SROUTER_EMBEDDED_ONLY + if (_dns) + _dns.reset(); + + if (_tun) + _tun->stop(); +#endif + auto rv = _loop_ticker->stop(); log::debug(logcat, "router loop ticker stopped {}successfully!", rv ? "" : "un"); _loop_ticker.reset(); @@ -1020,6 +1059,9 @@ namespace srouter _link_endpoint = nullptr; _link_manager.reset(); + if (_tun) + _tun.reset(); + if (_router_close_cb) _router_close_cb(); @@ -1032,6 +1074,24 @@ namespace srouter }); } + std::pair, bool> Router::reverse_lookup(const ipv4& addr) const + { +#ifndef SROUTER_EMBEDDED_ONLY + if (_tun) + return _tun->reverse_lookup(addr); +#endif + return {std::nullopt, false}; + } + + std::pair, bool> Router::reverse_lookup(const ipv6& addr) const + { +#ifndef SROUTER_EMBEDDED_ONLY + if (_tun) + return _tun->reverse_lookup(addr); +#endif + return {std::nullopt, false}; + } + const srouter::net::Platform* Router::net() const { #ifndef SROUTER_EMBEDDED_ONLY diff --git a/src/router/router.hpp b/src/router/router.hpp index 9f485b79c..10c4173f4 100644 --- a/src/router/router.hpp +++ b/src/router/router.hpp @@ -3,7 +3,7 @@ #include "contact/relay_contact.hpp" #include "crypto/key_manager.hpp" #include "handlers/session.hpp" -#include "handlers/tun_base.hpp" +#include "handlers/tun.hpp" #include "path/build_stats.hpp" #include "path/path_context.hpp" #include "profiling.hpp" @@ -17,6 +17,7 @@ #include #include #include +#include namespace oxenmq { @@ -26,6 +27,10 @@ namespace oxenmq namespace srouter { + namespace dns + { + class Listener; + } namespace link { struct Connection; @@ -91,6 +96,12 @@ namespace srouter ~Router(); + // Non-copyable/movable: + Router(const Router&) = delete; + Router(Router&&) = delete; + Router& operator=(const Router&) = delete; + Router& operator=(Router&&) = delete; + private: // Internal functions called during construction: void configure(); @@ -126,7 +137,8 @@ namespace srouter link::Endpoint* _link_endpoint = nullptr; // These are only created in full platform mode (not embedded clients) - std::shared_ptr _tun; + std::shared_ptr _tun; + std::shared_ptr _dns; std::shared_ptr _vpn; std::shared_ptr _route_poker; @@ -197,7 +209,16 @@ namespace srouter bool is_fully_meshed() const; - const std::shared_ptr& tun_endpoint() { return _tun; } + const std::shared_ptr& tun_endpoint() { return _tun; } + + // Looks up the given IP in our TUN mapping and, if it is a TUN address and maps to a remote, returns the + // network address of the mapped-to address. The `.second` part of the result indicates + // whether the IP is on our TUN range, even if it is unmapped. That is, it can return: + // {address, true} -- address in tun range, and mapped + // {nullopt, true} -- address in tun range, but not mapped to a remote + // {nullopt, false} -- address not in tun range (or no tun at all) + std::pair, bool> reverse_lookup(const ipv4& addr) const; + std::pair, bool> reverse_lookup(const ipv6& addr) const; // Returns the net Platform pointer, or nullptr if this is an embedded client. const srouter::net::Platform* net() const; diff --git a/src/rpc/rpc_server.cpp b/src/rpc/rpc_server.cpp index 9f19c073a..736523120 100644 --- a/src/rpc/rpc_server.cpp +++ b/src/rpc/rpc_server.cpp @@ -4,8 +4,6 @@ #include "config/ini.hpp" #include "constants/version.hpp" #include "contact/client_contact.hpp" -#include "dns/dns.hpp" -#include "dns/server.hpp" #include "router/router.hpp" #include "rpc/rpc_request_definitions.hpp" #include "rpc_request.hpp" @@ -28,6 +26,7 @@ namespace srouter::rpc log::info(logcat, "RPC Server received request for endpoint `{}`", req.name); } +#if 0 // Fake packet source that serializes repsonses back into dns class DummyPacketSource final : public dns::PacketSource { @@ -47,6 +46,7 @@ namespace srouter::rpc /// returns the sockaddr we are bound on if applicable std::optional bound_on() const override { return std::nullopt; } }; +#endif bool check_path(std::string path) { diff --git a/src/session/session.cpp b/src/session/session.cpp index 71cf622d7..afbc1d255 100644 --- a/src/session/session.cpp +++ b/src/session/session.cpp @@ -664,6 +664,7 @@ namespace srouter::session return; } +#ifndef SROUTER_EMBEDDED_ONLY // Otherwise we're not embedded; if the other side also isn't then this is just a raw IP // packet to handle via the tun endpoint, and the same for UDP packets from embedded // remotes (which also send raw UDP packets): @@ -682,7 +683,7 @@ namespace srouter::session // NOTE: At this time, tun clients always support ipv4, but ipv4 is only activated on use // (unlike IPv6 which is activated all the time). If this changes, a check for that should // short-circuit the call to map_session below. - if (!_r.embedded() && pkt.is_ipv4() && !ipv4_mapped) + if (pkt.is_ipv4() && !ipv4_mapped) { if (!_parent.map_session_v4(*this)) { @@ -692,7 +693,9 @@ namespace srouter::session ipv4_mapped = true; } + assert(_r.tun_endpoint()); // (We return above if embedded) _r.tun_endpoint()->handle_inbound_packet(std::move(pkt), dgram_type, _remote); +#endif } void Session::publish_client_contact(std::string_view encrypted_cc) @@ -1663,17 +1666,17 @@ namespace srouter::session _current_thop->downstream, "session_control"s, std::move(data), nullptr); } - std::vector> OutboundSession::current_path() const + path::Path::Info OutboundSession::current_path_info() const { if (_current_path) - return _current_path->get_hops_strings_and_ips(); + return _current_path->get_info(); return {}; } - std::vector> InboundClientSession::current_path() const + path::Path::Info InboundClientSession::current_path_info() const { if (_current_path) - return _current_path->get_hops_strings_and_ips(); + return _current_path->get_info(); return {}; } diff --git a/src/session/session.hpp b/src/session/session.hpp index f67273001..380ef3032 100644 --- a/src/session/session.hpp +++ b/src/session/session.hpp @@ -3,7 +3,6 @@ #include "address/address.hpp" #include "constants/path.hpp" #include "ev/tcp.hpp" -#include "ev/udp.hpp" #include "net/ip_packet.hpp" #include "path/path.hpp" #include "path/path_handler.hpp" @@ -220,7 +219,7 @@ namespace srouter // rather than waiting for the next tick) virtual void tick(sys_ms now); - virtual std::vector> current_path() const { return {}; }; + virtual path::Path::Info current_path_info() const { return {}; }; }; class OutboundSession : public path::PathHandler, public Session @@ -300,7 +299,7 @@ namespace srouter inline static constexpr int MAX_QUEUED_PACKETS = 30; - std::vector> current_path() const override; + path::Path::Info current_path_info() const override; }; // Outbound Session to Remote Relay @@ -419,7 +418,7 @@ namespace srouter void handle_path_switch(HopID pivot, std::shared_ptr path); - std::vector> current_path() const override; + path::Path::Info current_path_info() const override; std::string to_string() const override; }; diff --git a/src/session_router.cpp b/src/session_router.cpp index 3592a7f06..e268e4a6d 100644 --- a/src/session_router.cpp +++ b/src/session_router.cpp @@ -157,6 +157,14 @@ namespace session::router context->router->session_endpoint().unmap_udp_remote_port(netaddr, port); } + static snode_path to_snode_path(const srouter::path::Path::Info& info) + { + snode_path path; + for (const auto& [rid, ip] : info.relays) + path.emplace_back(srouter::NetworkAddress{rid, false}.to_string(), ip.to_string()); + return path; + } + std::optional SessionRouter::get_path_for_session(std::string_view remote) { srouter::NetworkAddress netaddr; @@ -172,10 +180,8 @@ namespace session::router return context->router->loop.call_get([&r = context->router, addr = std::move(netaddr)]() { std::optional ret; - if (auto s = r->session_endpoint().get_session(addr); s) - { - ret = s->current_path(); - } + if (auto* s = r->session_endpoint().get_session(addr)) + ret = to_snode_path(s->current_path_info()); return ret; }); } @@ -184,9 +190,10 @@ namespace session::router { return context->router->loop.call_get([&r = context->router]() { std::vector ret; - auto f = [&ret](const srouter::NetworkAddress& addr, const srouter::session::Session& s) { - ret.emplace_back(s.current_path(), addr.to_string()); - }; + r->session_endpoint().for_each_session( + [&ret](const srouter::NetworkAddress& addr, const srouter::session::Session& s) { + ret.emplace_back(to_snode_path(s.current_path_info()), addr.to_string()); + }); return ret; }); } diff --git a/src/util/logging.hpp b/src/util/logging.hpp index 68c82a909..f66f5a6ca 100644 --- a/src/util/logging.hpp +++ b/src/util/logging.hpp @@ -2,7 +2,7 @@ // Header for making actual log statements such as srouter::log::Info and so on work. -#include +#include // IWYU pragma: export #include #include