-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
forwarder.go
1101 lines (978 loc) · 30.9 KB
/
forwarder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package resolver
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"net/url"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
dns "golang.org/x/net/dns/dnsmessage"
"tailscale.com/control/controlknobs"
"tailscale.com/envknob"
"tailscale.com/net/dns/publicdns"
"tailscale.com/net/dnscache"
"tailscale.com/net/neterror"
"tailscale.com/net/netmon"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsdial"
"tailscale.com/types/dnstype"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
"tailscale.com/util/cloudenv"
"tailscale.com/util/dnsname"
"tailscale.com/util/race"
"tailscale.com/version"
)
// headerBytes is the number of bytes in a DNS message header.
const headerBytes = 12
// dnsFlagTruncated is set in the flags word when the packet is truncated.
const dnsFlagTruncated = 0x200
// truncatedFlagSet returns true if the DNS packet signals that it has
// been truncated. False is also returned if the packet was too small
// to be valid.
func truncatedFlagSet(pkt []byte) bool {
if len(pkt) < headerBytes {
return false
}
return (binary.BigEndian.Uint16(pkt[2:4]) & dnsFlagTruncated) != 0
}
const (
// dohTransportTimeout is how long to keep idle HTTP
// connections open to DNS-over-HTTPs servers. This is pretty
// arbitrary.
dohTransportTimeout = 30 * time.Second
// dohTransportTimeout is how much of a head start to give a DoH query
// that was upgraded from a well-known public DNS provider's IP before
// normal UDP mode is attempted as a fallback.
dohHeadStart = 500 * time.Millisecond
// wellKnownHostBackupDelay is how long to artificially delay upstream
// DNS queries to the "fallback" DNS server IP for a known provider
// (e.g. how long to wait to query Google's 8.8.4.4 after 8.8.8.8).
wellKnownHostBackupDelay = 200 * time.Millisecond
// udpRaceTimeout is the timeout after which we will start a DNS query
// over TCP while waiting for the UDP query to complete.
udpRaceTimeout = 2 * time.Second
// tcpQueryTimeout is the timeout for a DNS query performed over TCP.
// It matches the default 5sec timeout of the 'dig' utility.
tcpQueryTimeout = 5 * time.Second
)
// txid identifies a DNS transaction.
//
// As the standard DNS Request ID is only 16 bits, we extend it:
// the lower 32 bits are the zero-extended bits of the DNS Request ID;
// the upper 32 bits are the CRC32 checksum of the first question in the request.
// This makes probability of txid collision negligible.
type txid uint64
// getTxID computes the txid of the given DNS packet.
func getTxID(packet []byte) txid {
if len(packet) < headerBytes {
return 0
}
dnsid := binary.BigEndian.Uint16(packet[0:2])
// Previously, we hashed the question and combined it with the original txid
// which was useful when concurrent queries were multiplexed on a single
// local source port. We encountered some situations where the DNS server
// canonicalizes the question in the response (uppercase converted to
// lowercase in this case), which resulted in responses that we couldn't
// match to the original request due to hash mismatches.
return txid(dnsid)
}
func getRCode(packet []byte) dns.RCode {
if len(packet) < headerBytes {
// treat invalid packets as a refusal
return dns.RCode(5)
}
// get bottom 4 bits of 3rd byte
return dns.RCode(packet[3] & 0x0F)
}
// clampEDNSSize attempts to limit the maximum EDNS response size. This is not
// an exhaustive solution, instead only easy cases are currently handled in the
// interest of speed and reduced complexity. Only OPT records at the very end of
// the message with no option codes are addressed.
// TODO: handle more situations if we discover that they happen often
func clampEDNSSize(packet []byte, maxSize uint16) {
// optFixedBytes is the size of an OPT record with no option codes.
const optFixedBytes = 11
const edns0Version = 0
if len(packet) < headerBytes+optFixedBytes {
return
}
arCount := binary.BigEndian.Uint16(packet[10:12])
if arCount == 0 {
// OPT shows up in an AR, so there must be no OPT
return
}
// https://datatracker.ietf.org/doc/html/rfc6891#section-6.1.2
opt := packet[len(packet)-optFixedBytes:]
if opt[0] != 0 {
// OPT NAME must be 0 (root domain)
return
}
if dns.Type(binary.BigEndian.Uint16(opt[1:3])) != dns.TypeOPT {
// Not an OPT record
return
}
requestedSize := binary.BigEndian.Uint16(opt[3:5])
// Ignore extended RCODE in opt[5]
if opt[6] != edns0Version {
// Be conservative and don't touch unknown versions.
return
}
// Ignore flags in opt[6:9]
if binary.BigEndian.Uint16(opt[9:11]) != 0 {
// RDLEN must be 0 (no variable length data). We're at the end of the
// packet so this should be 0 anyway)..
return
}
if requestedSize <= maxSize {
return
}
// Clamp the maximum size
binary.BigEndian.PutUint16(opt[3:5], maxSize)
}
type route struct {
Suffix dnsname.FQDN
Resolvers []resolverAndDelay
}
// resolverAndDelay is an upstream DNS resolver and a delay for how
// long to wait before querying it.
type resolverAndDelay struct {
// name is the upstream resolver.
name *dnstype.Resolver
// startDelay is an amount to delay this resolver at
// start. It's used when, say, there are four Google or
// Cloudflare DNS IPs (two IPv4 + two IPv6) and we don't want
// to race all four at once.
startDelay time.Duration
}
// forwarder forwards DNS packets to a number of upstream nameservers.
type forwarder struct {
logf logger.Logf
netMon *netmon.Monitor // always non-nil
linkSel ForwardLinkSelector // TODO(bradfitz): remove this when tsdial.Dialer absorbs it
dialer *tsdial.Dialer
controlKnobs *controlknobs.Knobs // or nil
ctx context.Context // good until Close
ctxCancel context.CancelFunc // closes ctx
mu sync.Mutex // guards following
dohClient map[string]*http.Client // urlBase -> client
// routes are per-suffix resolvers to use, with
// the most specific routes first.
routes []route
// cloudHostFallback are last resort resolvers to use if no per-suffix
// resolver matches. These are only populated on cloud hosts where the
// platform provides a well-known recursive resolver.
//
// That is, if we're running on GCP or AWS where there's always a well-known
// IP of a recursive resolver, return that rather than having callers return
// SERVFAIL. This fixes both normal 100.100.100.100 resolution when
// /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS stub
// resolver lookup.
cloudHostFallback []resolverAndDelay
}
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, knobs *controlknobs.Knobs) *forwarder {
if netMon == nil {
panic("nil netMon")
}
f := &forwarder{
logf: logger.WithPrefix(logf, "forward: "),
netMon: netMon,
linkSel: linkSel,
dialer: dialer,
controlKnobs: knobs,
}
f.ctx, f.ctxCancel = context.WithCancel(context.Background())
return f
}
func (f *forwarder) Close() error {
f.ctxCancel()
return nil
}
// resolversWithDelays maps from a set of DNS server names to a slice of a type
// that included a startDelay, upgrading any well-known DoH (DNS-over-HTTP)
// servers in the process, insert a DoH lookup first before UDP fallbacks.
func resolversWithDelays(resolvers []*dnstype.Resolver) []resolverAndDelay {
rr := make([]resolverAndDelay, 0, len(resolvers)+2)
type dohState uint8
const addedDoH = dohState(1)
const addedDoHAndDontAddUDP = dohState(2)
// Add the known DoH ones first, starting immediately.
didDoH := map[string]dohState{}
for _, r := range resolvers {
ipp, ok := r.IPPort()
if !ok {
continue
}
dohBase, dohOnly, ok := publicdns.DoHEndpointFromIP(ipp.Addr())
if !ok || didDoH[dohBase] != 0 {
continue
}
if dohOnly {
didDoH[dohBase] = addedDoHAndDontAddUDP
} else {
didDoH[dohBase] = addedDoH
}
rr = append(rr, resolverAndDelay{name: &dnstype.Resolver{Addr: dohBase}})
}
type hostAndFam struct {
host string // some arbitrary string representing DNS host (currently the DoH base)
bits uint8 // either 32 or 128 for IPv4 vs IPv6s address family
}
done := map[hostAndFam]int{}
for _, r := range resolvers {
ipp, ok := r.IPPort()
if !ok {
// Pass non-IP ones through unchanged, without delay.
// (e.g. DNS-over-ExitDNS when using an exit node)
rr = append(rr, resolverAndDelay{name: r})
continue
}
ip := ipp.Addr()
var startDelay time.Duration
if host, _, ok := publicdns.DoHEndpointFromIP(ip); ok {
if didDoH[host] == addedDoHAndDontAddUDP {
continue
}
// We already did the DoH query early. These
// are for normal dns53 UDP queries.
startDelay = dohHeadStart
key := hostAndFam{host, uint8(ip.BitLen())}
if done[key] > 0 {
startDelay += wellKnownHostBackupDelay
}
done[key]++
}
rr = append(rr, resolverAndDelay{
name: r,
startDelay: startDelay,
})
}
return rr
}
var (
cloudResolversOnce sync.Once
cloudResolversLazy []resolverAndDelay
)
func cloudResolvers() []resolverAndDelay {
cloudResolversOnce.Do(func() {
if ip := cloudenv.Get().ResolverIP(); ip != "" {
cloudResolver := []*dnstype.Resolver{{Addr: ip}}
cloudResolversLazy = resolversWithDelays(cloudResolver)
}
})
return cloudResolversLazy
}
// setRoutes sets the routes to use for DNS forwarding. It's called by
// Resolver.SetConfig on reconfig.
//
// The memory referenced by routesBySuffix should not be modified.
func (f *forwarder) setRoutes(routesBySuffix map[dnsname.FQDN][]*dnstype.Resolver) {
routes := make([]route, 0, len(routesBySuffix))
cloudHostFallback := cloudResolvers()
for suffix, rs := range routesBySuffix {
if suffix == "." && len(rs) == 0 && len(cloudHostFallback) > 0 {
routes = append(routes, route{
Suffix: suffix,
Resolvers: cloudHostFallback,
})
} else {
routes = append(routes, route{
Suffix: suffix,
Resolvers: resolversWithDelays(rs),
})
}
}
if cloudenv.Get().HasInternalTLD() && len(cloudHostFallback) > 0 {
if _, ok := routesBySuffix["internal."]; !ok {
routes = append(routes, route{
Suffix: "internal.",
Resolvers: cloudHostFallback,
})
}
}
// Sort from longest prefix to shortest.
sort.Slice(routes, func(i, j int) bool {
return routes[i].Suffix.NumLabels() > routes[j].Suffix.NumLabels()
})
f.mu.Lock()
defer f.mu.Unlock()
f.routes = routes
f.cloudHostFallback = cloudHostFallback
}
var stdNetPacketListener nettype.PacketListenerWithNetIP = nettype.MakePacketListenerWithNetIP(new(net.ListenConfig))
func (f *forwarder) packetListener(ip netip.Addr) (nettype.PacketListenerWithNetIP, error) {
if f.linkSel == nil || initListenConfig == nil {
return stdNetPacketListener, nil
}
linkName := f.linkSel.PickLink(ip)
if linkName == "" {
return stdNetPacketListener, nil
}
lc := new(net.ListenConfig)
if err := initListenConfig(lc, f.netMon, linkName); err != nil {
return nil, err
}
return nettype.MakePacketListenerWithNetIP(lc), nil
}
// getKnownDoHClientForProvider returns an HTTP client for a specific DoH
// provider named by its DoH base URL (like "https://dns.google/dns-query").
//
// The returned client race/Happy Eyeballs dials all IPs for urlBase (usually
// 4), as statically known by the publicdns package.
func (f *forwarder) getKnownDoHClientForProvider(urlBase string) (c *http.Client, ok bool) {
f.mu.Lock()
defer f.mu.Unlock()
if c, ok := f.dohClient[urlBase]; ok {
return c, true
}
allIPs := publicdns.DoHIPsOfBase(urlBase)
if len(allIPs) == 0 {
return nil, false
}
dohURL, err := url.Parse(urlBase)
if err != nil {
return nil, false
}
dialer := dnscache.Dialer(f.getDialerType(), &dnscache.Resolver{
SingleHost: dohURL.Hostname(),
SingleHostStaticResult: allIPs,
Logf: f.logf,
})
c = &http.Client{
Transport: &http.Transport{
ForceAttemptHTTP2: true,
IdleConnTimeout: dohTransportTimeout,
// On mobile platforms TCP KeepAlive is disabled in the dialer,
// ensure that we timeout if the connection appears to be hung.
ResponseHeaderTimeout: 10 * time.Second,
DialContext: func(ctx context.Context, netw, addr string) (net.Conn, error) {
if !strings.HasPrefix(netw, "tcp") {
return nil, fmt.Errorf("unexpected network %q", netw)
}
return dialer(ctx, netw, addr)
},
},
}
if f.dohClient == nil {
f.dohClient = map[string]*http.Client{}
}
f.dohClient[urlBase] = c
return c, true
}
const dohType = "application/dns-message"
func (f *forwarder) sendDoH(ctx context.Context, urlBase string, c *http.Client, packet []byte) ([]byte, error) {
ctx = sockstats.WithSockStats(ctx, sockstats.LabelDNSForwarderDoH, f.logf)
metricDNSFwdDoH.Add(1)
req, err := http.NewRequestWithContext(ctx, "POST", urlBase, bytes.NewReader(packet))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", dohType)
req.Header.Set("Accept", dohType)
req.Header.Set("User-Agent", "tailscaled/"+version.Long())
hres, err := c.Do(req)
if err != nil {
metricDNSFwdDoHErrorTransport.Add(1)
return nil, err
}
defer hres.Body.Close()
if hres.StatusCode != 200 {
metricDNSFwdDoHErrorStatus.Add(1)
return nil, errors.New(hres.Status)
}
if ct := hres.Header.Get("Content-Type"); ct != dohType {
metricDNSFwdDoHErrorCT.Add(1)
return nil, fmt.Errorf("unexpected response Content-Type %q", ct)
}
res, err := io.ReadAll(hres.Body)
if err != nil {
metricDNSFwdDoHErrorBody.Add(1)
}
if truncatedFlagSet(res) {
metricDNSFwdTruncated.Add(1)
}
return res, err
}
var (
verboseDNSForward = envknob.RegisterBool("TS_DEBUG_DNS_FORWARD_SEND")
skipTCPRetry = envknob.RegisterBool("TS_DNS_FORWARD_SKIP_TCP_RETRY")
// For correlating log messages in the send() function; only used when
// verboseDNSForward() is true.
forwarderCount atomic.Uint64
)
// send sends packet to dst. It is best effort.
//
// send expects the reply to have the same txid as txidOut.
func (f *forwarder) send(ctx context.Context, fq *forwardQuery, rr resolverAndDelay) (ret []byte, err error) {
if verboseDNSForward() {
id := forwarderCount.Add(1)
f.logf("forwarder.send(%q) [%d] ...", rr.name.Addr, id)
defer func() {
f.logf("forwarder.send(%q) [%d] = %v, %v", rr.name.Addr, id, len(ret), err)
}()
}
if strings.HasPrefix(rr.name.Addr, "http://") {
return f.sendDoH(ctx, rr.name.Addr, f.dialer.PeerAPIHTTPClient(), fq.packet)
}
if strings.HasPrefix(rr.name.Addr, "https://") {
// Only known DoH providers are supported currently. Specifically, we
// only support DoH providers where we can TCP connect to them on port
// 443 at the same IP address they serve normal UDP DNS from (1.1.1.1,
// 8.8.8.8, 9.9.9.9, etc.) That's why OpenDNS and custom DoH providers
// aren't currently supported. There's no backup DNS resolution path for
// them.
urlBase := rr.name.Addr
if hc, ok := f.getKnownDoHClientForProvider(urlBase); ok {
return f.sendDoH(ctx, urlBase, hc, fq.packet)
}
metricDNSFwdErrorType.Add(1)
return nil, fmt.Errorf("arbitrary https:// resolvers not supported yet")
}
if strings.HasPrefix(rr.name.Addr, "tls://") {
metricDNSFwdErrorType.Add(1)
return nil, fmt.Errorf("tls:// resolvers not supported yet")
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
isUDPQuery := fq.family == "udp"
skipTCP := skipTCPRetry() || (f.controlKnobs != nil && f.controlKnobs.DisableDNSForwarderTCPRetries.Load())
// Print logs about retries if this was because of a truncated response.
var explicitRetry atomic.Bool // true if truncated UDP response retried
defer func() {
if !explicitRetry.Load() {
return
}
if err == nil {
f.logf("forwarder.send(%q): successfully retried via TCP", rr.name.Addr)
} else {
f.logf("forwarder.send(%q): could not retry via TCP: %v", rr.name.Addr, err)
}
}()
firstUDP := func(ctx context.Context) ([]byte, error) {
resp, err := f.sendUDP(ctx, fq, rr)
if err != nil {
return nil, err
}
if !truncatedFlagSet(resp) {
// Successful, non-truncated response; no retry.
return resp, nil
}
// If this is a UDP query, return it regardless of whether the
// response is truncated or not; the client can retry
// communicating with tailscaled over TCP. There's no point
// falling back to TCP for a truncated query if we can't return
// the results to the client.
if isUDPQuery {
return resp, nil
}
if skipTCP {
// Envknob or control knob disabled the TCP retry behaviour;
// just return what we have.
return resp, nil
}
// This is a TCP query from the client, and the UDP response
// from the upstream DNS server is truncated; map this to an
// error to cause our retry helper to immediately kick off the
// TCP retry.
explicitRetry.Store(true)
return nil, truncatedResponseError{resp}
}
thenTCP := func(ctx context.Context) ([]byte, error) {
// If we're skipping the TCP fallback, then wait until the
// context is canceled and return that error (i.e. not
// returning anything).
if skipTCP {
<-ctx.Done()
return nil, ctx.Err()
}
return f.sendTCP(ctx, fq, rr)
}
// If the input query is TCP, then don't have a timeout between
// starting UDP and TCP.
timeout := udpRaceTimeout
if !isUDPQuery {
timeout = 0
}
// Kick off the race between the UDP and TCP queries.
rh := race.New(timeout, firstUDP, thenTCP)
resp, err := rh.Start(ctx)
if err == nil {
return resp, nil
}
// If we got a truncated UDP response, return that instead of an error.
var trErr truncatedResponseError
if errors.As(err, &trErr) {
return trErr.res, nil
}
return nil, err
}
type truncatedResponseError struct {
res []byte
}
func (tr truncatedResponseError) Error() string { return "response truncated" }
var errServerFailure = errors.New("response code indicates server issue")
var errTxIDMismatch = errors.New("txid doesn't match")
func (f *forwarder) sendUDP(ctx context.Context, fq *forwardQuery, rr resolverAndDelay) (ret []byte, err error) {
ipp, ok := rr.name.IPPort()
if !ok {
metricDNSFwdErrorType.Add(1)
return nil, fmt.Errorf("unrecognized resolver type %q", rr.name.Addr)
}
metricDNSFwdUDP.Add(1)
ctx = sockstats.WithSockStats(ctx, sockstats.LabelDNSForwarderUDP, f.logf)
ln, err := f.packetListener(ipp.Addr())
if err != nil {
return nil, err
}
// Specify the exact UDP family to work around https://github.com/golang/go/issues/52264
udpFam := "udp4"
if ipp.Addr().Is6() {
udpFam = "udp6"
}
conn, err := ln.ListenPacket(ctx, udpFam, ":0")
if err != nil {
f.logf("ListenPacket failed: %v", err)
return nil, err
}
defer conn.Close()
fq.closeOnCtxDone.Add(conn)
defer fq.closeOnCtxDone.Remove(conn)
if _, err := conn.WriteToUDPAddrPort(fq.packet, ipp); err != nil {
metricDNSFwdUDPErrorWrite.Add(1)
if err := ctx.Err(); err != nil {
return nil, err
}
return nil, err
}
metricDNSFwdUDPWrote.Add(1)
// The 1 extra byte is to detect packet truncation.
out := make([]byte, maxResponseBytes+1)
n, _, err := conn.ReadFromUDPAddrPort(out)
if err != nil {
if err := ctx.Err(); err != nil {
return nil, err
}
if neterror.PacketWasTruncated(err) {
err = nil
} else {
metricDNSFwdUDPErrorRead.Add(1)
return nil, err
}
}
truncated := n > maxResponseBytes
if truncated {
n = maxResponseBytes
}
if n < headerBytes {
f.logf("recv: packet too small (%d bytes)", n)
}
out = out[:n]
txid := getTxID(out)
if txid != fq.txid {
metricDNSFwdUDPErrorTxID.Add(1)
return nil, errTxIDMismatch
}
rcode := getRCode(out)
// don't forward transient errors back to the client when the server fails
if rcode == dns.RCodeServerFailure {
f.logf("recv: response code indicating server failure: %d", rcode)
metricDNSFwdUDPErrorServer.Add(1)
return nil, errServerFailure
}
if truncated {
// Set the truncated bit if it wasn't already.
flags := binary.BigEndian.Uint16(out[2:4])
flags |= dnsFlagTruncated
binary.BigEndian.PutUint16(out[2:4], flags)
// TODO(#2067): Remove any incomplete records? RFC 1035 section 6.2
// states that truncation should head drop so that the authority
// section can be preserved if possible. However, the UDP read with
// a too-small buffer has already dropped the end, so that's the
// best we can do.
}
if truncatedFlagSet(out) {
metricDNSFwdTruncated.Add(1)
}
clampEDNSSize(out, maxResponseBytes)
metricDNSFwdUDPSuccess.Add(1)
return out, nil
}
func (f *forwarder) getDialerType() dnscache.DialContextFunc {
if f.controlKnobs != nil && f.controlKnobs.UserDialUseRoutes.Load() {
// It is safe to use UserDial as it dials external servers without going through Tailscale
// and closes connections on interface change in the same way as SystemDial does,
// thus preventing DNS resolution issues when switching between WiFi and cellular,
// but can also dial an internal DNS server on the Tailnet or via a subnet router.
//
// TODO(nickkhyl): Update tsdial.Dialer to reuse the bart.Table we create in net/tstun.Wrapper
// to avoid having two bart tables in memory, especially on iOS. Once that's done,
// we can get rid of the nodeAttr/control knob and always use UserDial for DNS.
//
// See https://github.com/tailscale/tailscale/issues/12027.
return f.dialer.UserDial
}
return f.dialer.SystemDial
}
func (f *forwarder) sendTCP(ctx context.Context, fq *forwardQuery, rr resolverAndDelay) (ret []byte, err error) {
ipp, ok := rr.name.IPPort()
if !ok {
metricDNSFwdErrorType.Add(1)
return nil, fmt.Errorf("unrecognized resolver type %q", rr.name.Addr)
}
metricDNSFwdTCP.Add(1)
ctx = sockstats.WithSockStats(ctx, sockstats.LabelDNSForwarderTCP, f.logf)
// Specify the exact family to work around https://github.com/golang/go/issues/52264
tcpFam := "tcp4"
if ipp.Addr().Is6() {
tcpFam = "tcp6"
}
ctx, cancel := context.WithTimeout(ctx, tcpQueryTimeout)
defer cancel()
conn, err := f.getDialerType()(ctx, tcpFam, ipp.String())
if err != nil {
return nil, err
}
defer conn.Close()
fq.closeOnCtxDone.Add(conn)
defer fq.closeOnCtxDone.Remove(conn)
ctxOrErr := func(err2 error) ([]byte, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
return nil, err2
}
// Write the query to the server.
query := make([]byte, len(fq.packet)+2)
binary.BigEndian.PutUint16(query, uint16(len(fq.packet)))
copy(query[2:], fq.packet)
if _, err := conn.Write(query); err != nil {
metricDNSFwdTCPErrorWrite.Add(1)
return ctxOrErr(err)
}
metricDNSFwdTCPWrote.Add(1)
// Read the header length back from the server
var length uint16
if err := binary.Read(conn, binary.BigEndian, &length); err != nil {
metricDNSFwdTCPErrorRead.Add(1)
return ctxOrErr(err)
}
// Now read the response
out := make([]byte, length)
n, err := io.ReadFull(conn, out)
if err != nil {
metricDNSFwdTCPErrorRead.Add(1)
return ctxOrErr(err)
}
if n < int(length) {
f.logf("sendTCP: packet too small (%d bytes)", n)
return nil, io.ErrUnexpectedEOF
}
out = out[:n]
txid := getTxID(out)
if txid != fq.txid {
metricDNSFwdTCPErrorTxID.Add(1)
return nil, errTxIDMismatch
}
rcode := getRCode(out)
// don't forward transient errors back to the client when the server fails
if rcode == dns.RCodeServerFailure {
f.logf("sendTCP: response code indicating server failure: %d", rcode)
metricDNSFwdTCPErrorServer.Add(1)
return nil, errServerFailure
}
// TODO(andrew): do we need to do this?
//clampEDNSSize(out, maxResponseBytes)
metricDNSFwdTCPSuccess.Add(1)
return out, nil
}
// resolvers returns the resolvers to use for domain.
func (f *forwarder) resolvers(domain dnsname.FQDN) []resolverAndDelay {
f.mu.Lock()
routes := f.routes
cloudHostFallback := f.cloudHostFallback
f.mu.Unlock()
for _, route := range routes {
if route.Suffix == "." || route.Suffix.Contains(domain) {
return route.Resolvers
}
}
return cloudHostFallback // or nil if no fallback
}
// forwardQuery is information and state about a forwarded DNS query that's
// being sent to 1 or more upstreams.
//
// In the case of racing against multiple equivalent upstreams
// (e.g. Google or CloudFlare's 4 DNS IPs: 2 IPv4 + 2 IPv6), this type
// handles racing them more intelligently than just blasting away 4
// queries at once.
type forwardQuery struct {
txid txid
packet []byte
family string // "tcp" or "udp"
// closeOnCtxDone lets send register values to Close if the
// caller's ctx expires. This avoids send from allocating its
// own waiting goroutine to interrupt the ReadFrom, as memory
// is tight on iOS and we want the number of pending DNS
// lookups to be bursty without too much associated
// goroutine/memory cost.
closeOnCtxDone *closePool
// TODO(bradfitz): add race delay state:
// mu sync.Mutex
// ...
}
// forwardWithDestChan forwards the query to all upstream nameservers
// and waits for the first response.
//
// It either sends to responseChan and returns nil, or returns a
// non-nil error (without sending to the channel).
//
// If resolvers is non-empty, it's used explicitly (notably, for exit
// node DNS proxy queries), otherwise f.resolvers is used.
func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, responseChan chan<- packet, resolvers ...resolverAndDelay) error {
metricDNSFwd.Add(1)
domain, err := nameFromQuery(query.bs)
if err != nil {
metricDNSFwdErrorName.Add(1)
return err
}
// Guarantee that the ctx we use below is done when this function returns.
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Drop DNS service discovery spam, primarily for battery life
// on mobile. Things like Spotify on iOS generate this traffic,
// when browsing for LAN devices. But even when filtering this
// out, playing on Sonos still works.
if hasRDNSBonjourPrefix(domain) {
metricDNSFwdDropBonjour.Add(1)
res, err := nxDomainResponse(query)
if err != nil {
f.logf("error parsing bonjour query: %v", err)
// Returning an error will cause an internal retry, there is
// nothing we can do if parsing failed. Just drop the packet.
return nil
}
select {
case <-ctx.Done():
return fmt.Errorf("waiting to send NXDOMAIN: %w", ctx.Err())
case responseChan <- res:
return nil
}
}
if fl := fwdLogAtomic.Load(); fl != nil {
fl.addName(string(domain))
}
clampEDNSSize(query.bs, maxResponseBytes)
if len(resolvers) == 0 {
resolvers = f.resolvers(domain)
if len(resolvers) == 0 {
metricDNSFwdErrorNoUpstream.Add(1)
f.logf("no upstream resolvers set, returning SERVFAIL")
res, err := servfailResponse(query)
if err != nil {
f.logf("building servfail response: %v", err)
// Returning an error will cause an internal retry, there is
// nothing we can do if parsing failed. Just drop the packet.
return nil
}
select {
case <-ctx.Done():
return fmt.Errorf("waiting to send SERVFAIL: %w", ctx.Err())
case responseChan <- res:
return nil
}
}
}
fq := &forwardQuery{
txid: getTxID(query.bs),
packet: query.bs,
family: query.family,
closeOnCtxDone: new(closePool),
}
defer fq.closeOnCtxDone.Close()
resc := make(chan []byte, 1) // it's fine buffered or not
errc := make(chan error, 1) // it's fine buffered or not too
for i := range resolvers {
go func(rr *resolverAndDelay) {
if rr.startDelay > 0 {
timer := time.NewTimer(rr.startDelay)
select {
case <-timer.C:
case <-ctx.Done():
timer.Stop()
return
}
}
resb, err := f.send(ctx, fq, *rr)
if err != nil {
err = fmt.Errorf("resolving using %q: %w", rr.name.Addr, err)
select {
case errc <- err:
case <-ctx.Done():
}
return
}
select {
case resc <- resb:
case <-ctx.Done():
}
}(&resolvers[i])
}
var firstErr error
var numErr int
for {
select {
case v := <-resc:
select {
case <-ctx.Done():
metricDNSFwdErrorContext.Add(1)
return fmt.Errorf("waiting to send response: %w", ctx.Err())
case responseChan <- packet{v, query.family, query.addr}:
metricDNSFwdSuccess.Add(1)
return nil
}
case err := <-errc:
if firstErr == nil {
firstErr = err
}
numErr++
if numErr == len(resolvers) {
if errors.Is(firstErr, errServerFailure) {
res, err := servfailResponse(query)
if err != nil {
f.logf("building servfail response: %v", err)
return firstErr
}
select {
case <-ctx.Done():
metricDNSFwdErrorContext.Add(1)
metricDNSFwdErrorContextGotError.Add(1)
case responseChan <- res:
}
}
return firstErr
}
case <-ctx.Done():
metricDNSFwdErrorContext.Add(1)
if firstErr != nil {
metricDNSFwdErrorContextGotError.Add(1)
return firstErr
}
// If we haven't got an error or a successful response,
// include all resolvers in the error message so we can
// at least see what what servers we're trying to
// query.
var resolverAddrs []string
for _, rr := range resolvers {
resolverAddrs = append(resolverAddrs, rr.name.Addr)
}
return fmt.Errorf("waiting for response or error from %v: %w", resolverAddrs, ctx.Err())
}
}
}
var initListenConfig func(_ *net.ListenConfig, _ *netmon.Monitor, tunName string) error
// nameFromQuery extracts the normalized query name from bs.
func nameFromQuery(bs []byte) (dnsname.FQDN, error) {
var parser dns.Parser
hdr, err := parser.Start(bs)
if err != nil {
return "", err