Skip to content

Commit

Permalink
wgengine/magicsock: hook up discovery messages, upgrade to LAN works
Browse files Browse the repository at this point in the history
Ping messages now go out somewhat regularly, pong replies are sent,
and pong replies are now partially handled enough to upgrade off DERP
to LAN.

CallMeMaybe packets are sent & received over DERP, but aren't yet
handled. That's next (and regular maintenance timers), and then WAN
should work.

Updates #483
  • Loading branch information
bradfitz committed Jul 1, 2020
1 parent 9b8ca21 commit 77d3ef3
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 29 deletions.
6 changes: 6 additions & 0 deletions disco/disco.go
Expand Up @@ -28,6 +28,12 @@ import (
"inet.af/netaddr"
)

// Magic is the 6 byte header of all discovery messages.
const Magic = "TS💬" // 6 bytes: 0x54 53 f0 9f 92 ac

// NonceLen is the length of the nonces used by nacl secretboxes.
const NonceLen = 24

type MessageType byte

const (
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -29,6 +29,6 @@ require (
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e
golang.org/x/time v0.0.0-20191024005414-555d28b269f0
honnef.co/go/tools v0.0.1-2020.1.4 // indirect
inet.af/netaddr v0.0.0-20200701171350-6509743f79d9
inet.af/netaddr v0.0.0-20200701194149-10bc159763c4
rsc.io/goversion v1.2.0
)
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -164,5 +164,7 @@ inet.af/netaddr v0.0.0-20200629220211-f44a6d25c536 h1:XFVw2MVOtmHBidx70M+I6vIw2F
inet.af/netaddr v0.0.0-20200629220211-f44a6d25c536/go.mod h1:qqYzz/2whtrbWJvt+DNWQyvekNN4ePQZcg2xc2/Yjww=
inet.af/netaddr v0.0.0-20200701171350-6509743f79d9 h1:F41nQsn8UGDPDXsOPwZQiaK8BmItPzyb0PYgjJSYIzw=
inet.af/netaddr v0.0.0-20200701171350-6509743f79d9/go.mod h1:qqYzz/2whtrbWJvt+DNWQyvekNN4ePQZcg2xc2/Yjww=
inet.af/netaddr v0.0.0-20200701194149-10bc159763c4 h1:dSXrLpRy86h4wfZWvzjyA2tuhMzX/lx0st4hzAh1VMA=
inet.af/netaddr v0.0.0-20200701194149-10bc159763c4/go.mod h1:qqYzz/2whtrbWJvt+DNWQyvekNN4ePQZcg2xc2/Yjww=
rsc.io/goversion v1.2.0 h1:SPn+NLTiAG7w30IRK/DKp1BjvpWabYgxlLp/+kx5J8w=
rsc.io/goversion v1.2.0/go.mod h1:Eih9y/uIBS3ulggl7KNJ09xGSLcuNaLgmvvqa07sgfo=
154 changes: 126 additions & 28 deletions wgengine/magicsock/magicsock.go
Expand Up @@ -9,6 +9,7 @@ package magicsock
import (
"bufio"
"context"
crand "crypto/rand"
"encoding/binary"
"errors"
"fmt"
Expand Down Expand Up @@ -93,6 +94,7 @@ type Conn struct {
peerSet map[key.Public]struct{}

discoPrivate key.Private
discoPublic tailcfg.DiscoKey // public of discoPrivate
nodeOfDisco map[tailcfg.DiscoKey]*tailcfg.Node
discoOfNode map[tailcfg.NodeKey]tailcfg.DiscoKey

Expand Down Expand Up @@ -511,7 +513,8 @@ func (c *Conn) SetDiscoPrivateKey(k key.Private) {
panic("unsupported")
}
c.discoPrivate = k
c.logf("magicsock: disco key set; public: %x", k.Public())
c.discoPublic = tailcfg.DiscoKey(k.Public())
c.logf("magicsock: disco key set; public: %x", c.discoPublic)
}

// c.mu must NOT be held.
Expand Down Expand Up @@ -658,7 +661,9 @@ func shouldSprayPacket(b []byte) bool {
return false
}

var logPacketDests, _ = strconv.ParseBool(os.Getenv("DEBUG_LOG_PACKET_DESTS"))
var logPacketDests, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_LOG_PACKET_DESTS"))

var logDisco, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DISCO"))

const sprayPeriod = 3 * time.Second

Expand Down Expand Up @@ -1367,6 +1372,25 @@ func (c *Conn) ReceiveIPv6(b []byte) (int, conn.Endpoint, *net.UDPAddr, error) {
}
}

func (c *Conn) sendDiscoMessage(dst netaddr.IPPort, dstKey key.Public, dstDisco tailcfg.DiscoKey, m disco.Message) error {
c.mu.Lock()
var nonce [disco.NonceLen]byte
if _, err := crand.Read(nonce[:]); err != nil {
panic(err) // worth dying for
}
pkt := make([]byte, 0, 512) // TODO: size it correctly? pool? if it matters.
pkt = append(pkt, disco.Magic...)
pkt = append(pkt, c.discoPublic[:]...)
pkt = append(pkt, nonce[:]...)
sharedKey := c.sharedDiscoKeyLocked(dstDisco)
c.mu.Unlock()

pkt = box.SealAfterPrecomputation(pkt, m.AppendMarshal(nil), &nonce, sharedKey)
err := c.sendAddr(dst, dstKey, pkt)
c.logf("magicsock: disco: sent %T to %v; err=%v", m, dst, err)
return err
}

// handleDiscoMessage reports whether msg was a Tailscale inter-node discovery message
// that was handled.
//
Expand All @@ -1380,34 +1404,41 @@ func (c *Conn) ReceiveIPv6(b []byte) (int, conn.Endpoint, *net.UDPAddr, error) {
// For messages received over DERP, the addr will be derpMagicIP (with
// port being the region)
func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
const magic = "TS💬"
const nonceLen = 24
const headerLen = len(magic) + len(tailcfg.DiscoKey{}) + nonceLen
if len(msg) < headerLen || string(msg[:len(magic)]) != magic {
const headerLen = len(disco.Magic) + len(tailcfg.DiscoKey{}) + disco.NonceLen
if len(msg) < headerLen || string(msg[:len(disco.Magic)]) != disco.Magic {
return false
}
var sender tailcfg.DiscoKey
copy(sender[:], msg[len(magic):])
copy(sender[:], msg[len(disco.Magic):])

c.mu.Lock()
defer c.mu.Unlock()

if logDisco {
c.logf("magicsock: disco: got disco-looking frame %v", sender)
}
if c.discoPrivate.IsZero() {
if logDisco {
c.logf("magicsock: disco: ignoring disco-looking frame, no local key")
}
return false
}

senderNode, ok := c.nodeOfDisco[sender]
if !ok {
// Returning false keeps passing it down, to WireGuard.
// WireGuard will almost surely reject it, but give it a chance.
if logDisco {
c.logf("magicsock: disco: ignoring disco-looking frame, don't know about %v", sender)
}
return false
}

// First, do we even know (and thus care) about this sender? If not,
// don't bother decrypting it.

var nonce [nonceLen]byte
copy(nonce[:], msg[len(magic)+len(key.Public{}):])
var nonce [disco.NonceLen]byte
copy(nonce[:], msg[len(disco.Magic)+len(key.Public{}):])
sealedBox := msg[headerLen:]
payload, ok := box.OpenAfterPrecomputation(nil, sealedBox, &nonce, c.sharedDiscoKeyLocked(sender))
if !ok {
Expand All @@ -1416,14 +1447,20 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
// and old packets might've still been in flight (or
// scheduled). This is particularly the case for LANs
// or non-NATed endpoints.
// Not worth logging. Pass on to wireguard, in case
// Don't log in normal case. Pass on to wireguard, in case
// it's actually a a wireguard packet (super unlikely,
// but).
if logDisco {
c.logf("magicsock: disco: failed to open naclbox from %v (wrong rcpt?)", sender)
}
// TODO(bradfitz): add some counter for this that logs rarely
return false
}

dm, err := disco.Parse(payload)
if logDisco {
c.logf("magicsock: disco: disco.Parse = %T, %v", dm, err)
}
if err != nil {
// Couldn't parse it, but it was inside a correctly
// signed box, so just ignore it, assuming it's from a
Expand Down Expand Up @@ -1452,17 +1489,20 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
}

func (c *Conn) handlePongLocked(m *disco.Pong, n *tailcfg.Node, dk tailcfg.DiscoKey, from netaddr.IPPort) {
de, ok := c.endpointOfDisco[dk]
if !ok {
return
}
c.logf("magicsock: disco: got pong from %s, tx=%x, disco=%x, src=%v (they saw %v)", n.Key.ShortString(), m.TxID, dk[:8], from, m.Src)
// TODO: implement
go de.handlePong(m)
}

func (c *Conn) handlePingLocked(m *disco.Ping, n *tailcfg.Node, dk tailcfg.DiscoKey, from netaddr.IPPort) {
c.logf("magicsock: disco: got ping tx %x from %s/%x at %v", m.TxID, n.Key.ShortString(), dk[:8], from)
reply := &disco.Pong{
go c.sendDiscoMessage(from, key.Public(n.Key), dk, &disco.Pong{
TxID: m.TxID,
Src: from,
}
go c.sendAddr(from, key.Public(n.Key), reply.AppendMarshal(nil))
})
}

// handleCallMeMaybeLocked is called when a discovery message arrives
Expand All @@ -1471,7 +1511,7 @@ func (c *Conn) handlePingLocked(m *disco.Ping, n *tailcfg.Node, dk tailcfg.Disco
// stateful firewall should be open. Now we can Ping back and make it
// through.
func (c *Conn) handleCallMeMaybeLocked(n *tailcfg.Node, dk tailcfg.DiscoKey) {
c.logf("magicsock: disco: got call-me-maybe packet from %s (disco=%x)", n.Key.ShortString, dk[:8])
c.logf("magicsock: disco: got call-me-maybe packet from %s (disco=%x)", n.Key.ShortString(), dk[:8])
// TODO: implement
}

Expand Down Expand Up @@ -2491,22 +2531,24 @@ type discoEndpoint struct {
lastSend time.Time
derpAddr netaddr.IPPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)

bestAddr netaddr.IPPort // best non-DERP path; zero if none
sentPing map[stun.TxID]sentPing
endpointState map[netaddr.IPPort]*endpointState
bestAddr netaddr.IPPort // best non-DERP path; zero if none
bestAddrLatency time.Duration
bestAddrAt time.Time // time best address re-confirmed
sentPing map[stun.TxID]sentPing
endpointState map[netaddr.IPPort]*endpointState

timers map[*time.Timer]bool
}

type endpointState struct {
// TODO: lastPing time.Time
lastPing time.Time
// TODO: lastPong time.Time
index int // index in nodecfg.Node.Endpoints
}

type sentPing struct {
// TODO: to netaddr.IPPort
// TODO: at time.Time
to netaddr.IPPort
at time.Time
}

// initFakeUDPAddr populates fakeWGAddr with a globally unique fake UDPAddr.
Expand Down Expand Up @@ -2554,15 +2596,12 @@ func (de *discoEndpoint) UpdateDst(addr *net.UDPAddr) error {
func (de *discoEndpoint) send(b []byte) error {
now := time.Now()

// TODO: all the disco messaging & state tracking & spraying,
// bringing over relevant AddrSet code. For now, just do DERP
// as a crutch while I work on other bits.
de.mu.Lock()
de.lastSend = now
derpAddr := de.derpAddr
bestAddr := de.bestAddr
if bestAddr.Port == 0 {
de.sendPingsLocked()
if bestAddr.IsZero() || de.bestAddrAt.Before(now.Add(-5*time.Second)) {
de.sendPingsLocked(now)
}
de.mu.Unlock()

Expand All @@ -2575,8 +2614,36 @@ func (de *discoEndpoint) send(b []byte) error {
return de.c.sendAddr(bestAddr, de.publicKey, b)
}

func (de *discoEndpoint) sendPingsLocked() {
// TODO
func (de *discoEndpoint) sendPingsLocked(now time.Time) {
sent := false
for ep, st := range de.endpointState {
if !st.lastPing.IsZero() && now.Sub(st.lastPing) < 5*time.Second {
continue
}
st.lastPing = now

txid := stun.NewTxID()
de.sentPing[txid] = sentPing{
to: ep,
at: now,
}
sent = true
go de.sendDiscoMessage(ep, &disco.Ping{TxID: [12]byte(txid)})
}
derpAddr := de.derpAddr
if sent && derpAddr.Port != 0 {
// In just a bit of a time (for goroutines above to schedule and run),
// send a message to peer via DERP informing them that we've sent
// so our firewall ports are probably open and now would be a good time
// for them to connect.
time.AfterFunc(5*time.Millisecond, func() {
de.sendDiscoMessage(derpAddr, disco.CallMeMaybe{})
})
}
}

func (de *discoEndpoint) sendDiscoMessage(dst netaddr.IPPort, dm disco.Message) error {
return de.c.sendDiscoMessage(dst, de.publicKey, de.discoKey, dm)
}

func (de *discoEndpoint) updateFromNode(n *tailcfg.Node) {
Expand Down Expand Up @@ -2629,6 +2696,37 @@ func (de *discoEndpoint) noteConnectivityChange() {
// TODO: reset state
}

func (de *discoEndpoint) handlePong(m *disco.Pong) {
de.mu.Lock()
defer de.mu.Unlock()

sp, ok := de.sentPing[m.TxID]
if !ok {
// This is not a pong for a ping we sent. Ignore.
return
}
delete(de.sentPing, m.TxID)

now := time.Now()
delay := now.Sub(sp.at)
de.c.logf("magicsock: disco: got pong reply after %v", delay)

// Expire our best address if we haven't heard from it in awhile.
tooOld := now.Add(-15 * time.Second)
if !de.bestAddr.IsZero() && de.bestAddrAt.Before(tooOld) {
de.bestAddr = netaddr.IPPort{}
}

// Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision
if de.bestAddr.IsZero() || delay < de.bestAddrLatency {
de.bestAddr = sp.to
de.bestAddrLatency = delay
de.bestAddrAt = now
de.c.logf("magicsock: disco: promoted %v to best address for %v", sp.to, de.publicKey.ShortString())
}
}

// cleanup is called when a discovery endpoint is no longer present in the NetworkMap.
// This is where we can do cleanup such as closing goroutines or canceling timers.
func (de *discoEndpoint) cleanup() {
Expand Down

0 comments on commit 77d3ef3

Please sign in to comment.