Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions libtailscale/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ func (a *App) runBackend(ctx context.Context, hardwareAttestation bool) error {
if hardwareAttestation {
a.backend.SetHardwareAttested()
}
defer b.CloseTUNs()
defer func() {
b.devices.Down()
b.CloseTUNs()
}()

hc := localapi.HandlerConfig{
Actor: ipnauth.Self,
Expand Down Expand Up @@ -253,6 +256,9 @@ func (a *App) runBackend(ctx context.Context, hardwareAttestation bool) error {
}
case s := <-onDisconnect:
if vpnService.service != nil && vpnService.service.ID() == s.ID() {
if b.devices.Down() {
log.Printf("tunnel brought down on disconnect")
}
b.CloseTUNs()
netns.SetAndroidProtectFunc(nil)
netns.SetAndroidBindToNetworkFunc(nil)
Expand Down Expand Up @@ -399,7 +405,9 @@ func (a *App) closeVpnService(err error, b *backend) {
log.Printf("localapi edit prefs error %v", localApiErr)
}

b.lastCfg = nil
if b.devices.Down() {
log.Printf("tunnel brought down on VPN service error: %v", err)
}
b.CloseTUNs()

vpnService.service.DisconnectVPN()
Expand Down
95 changes: 89 additions & 6 deletions libtailscale/multitun.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import (
"log"
"os"
"runtime/debug"
"sync"

"github.com/tailscale/wireguard-go/tun"
"tailscale.com/syncs"
)

// multiTUN implements a tun.Device that supports multiple
Expand All @@ -30,6 +32,19 @@ type multiTUN struct {
names chan chan nameReply
shutdowns chan struct{}
shutdownDone chan struct{}

downMu sync.Mutex
// downCh is closed when the multiTUN is brought down,
// such as when Tailscale transitions to the Stopped state.
// This indicates that all outgoing packets should be dropped,
// and [multiTUN.Write] should return immediately without blocking
// until [multiTUN.Up] is called. See [multiTUN.Down]
// and tailscale/tailscale#18679 for more details.
//
// It can be read without holding downMu, but the mutex
// must be held when writing.
downCh syncs.AtomicValue[chan struct{}]
down bool // whether the downCh is closed
}

// tunDevice wraps and drives a single run.Device.
Expand Down Expand Up @@ -76,7 +91,11 @@ func newTUNDevices() *multiTUN {
names: make(chan chan nameReply),
shutdowns: make(chan struct{}),
shutdownDone: make(chan struct{}),
down: true, // The device is initially down.
}
downCh := make(chan struct{})
d.downCh.Store(downCh)
close(downCh)
go d.run()
return d
}
Expand Down Expand Up @@ -256,6 +275,47 @@ func (d *multiTUN) add(dev tun.Device) {
d.devices <- dev
}

// Up brings the multiTUN up, allowing it to write packets
// to the underlying tunnel device. If there is no underlying
// device yet, write operations are pended until a new device
// is added with [multiTUN.add].
//
// It reports whether this call brought the device up.
func (d *multiTUN) Up() bool {
d.downMu.Lock()
defer d.downMu.Unlock()
if !d.down {
return false
}
d.downCh.Store(make(chan struct{}))
d.down = false
return true
}

// Down brings the multiTUN down, causing all outgoing packets
// to be dropped without waiting for the underlying tunnel device,
// and makes all [multiTUN.Write] calls return immediately
// until [multiTUN.Up] is called.
//
// It mainly exists to distinguish between cases where the underlying
// device is temporarily unavailable due to VPN reconfiguration,
// in which case write requests should be pended, and cases where
// Tailscale is stopped, where any pending and new requests
// should complete immediately to prevent deadlocks.
// See tailscale/tailscale#18679.
//
// It reports whether this call brought the device down.
func (d *multiTUN) Down() bool {
d.downMu.Lock()
defer d.downMu.Unlock()
if d.down {
return false
}
close(d.downCh.Load())
d.down = true
return true
}

func (d *multiTUN) File() *os.File {
// The underlying file descriptor is not constant on Android.
// Let's hope no-one uses it.
Expand All @@ -264,16 +324,39 @@ func (d *multiTUN) File() *os.File {

func (d *multiTUN) Read(data [][]byte, sizes []int, offset int) (int, error) {
r := make(chan ioReply)
d.reads <- ioRequest{data, sizes, offset, r}
rep := <-r
return rep.count, rep.err
select {
// We don't care about d.downCh here, as it's fine
// to continue waiting until the tunnel is up again
// or the multiTUN device is permanently closed.
// This does not block WireGuard reconfiguration.
case d.reads <- ioRequest{data, sizes, offset, r}:
rep := <-r
return rep.count, rep.err
case <-d.close:
// Return immediately if the multiTUN device is closed.
return 0, os.ErrClosed
}
}

func (d *multiTUN) Write(data [][]byte, offset int) (int, error) {
r := make(chan ioReply)
d.writes <- ioRequest{data, nil, offset, r}
rep := <-r
return rep.count, rep.err
select {
case d.writes <- ioRequest{data, nil, offset, r}:
rep := <-r
return rep.count, rep.err
case <-d.downCh.Load():
// Drop the packet silently if the tunnel is down.
// Otherwise, a race may occur during wireguard reconfig
// and result in a deadlock, since a wireguard-go/device.Peer
// cannot be removed until its RoutineSequentialReceiver
// returns, and it will not return if it is blocked in
// (*multiTUN).Write while sending to d.writes without
// a receiver on the other side of the pipe.
return 0, nil
case <-d.close:
// Return immediately if the multiTUN device is closed.
return 0, os.ErrClosed
}
}

func (d *multiTUN) MTU() (int, error) {
Expand Down
24 changes: 23 additions & 1 deletion libtailscale/net.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ var googleDNSServers = []netip.Addr{
netip.MustParseAddr("2001:4860:4860::8844"),
}

func (b *backend) updateTUN(rcfg *router.Config, dcfg *dns.OSConfig) error {
func (b *backend) updateTUN(rcfg *router.Config, dcfg *dns.OSConfig) (err error) {
b.logger.Logf("updateTUN: changed")
defer b.logger.Logf("updateTUN: finished")

Expand All @@ -89,6 +89,23 @@ func (b *backend) updateTUN(rcfg *router.Config, dcfg *dns.OSConfig) error {
b.CloseTUNs()
b.logger.Logf("updateTUN: closed old TUNs")

// Since the previous tunnel(s) are closed, the [multiTUN] device is
// not operational until a new underlying tunnel is created and added,
// which may never happen in case of an error or an empty [router.Config].
//
// Therefore, to prevent deadlocks where a [multiTUN.Write] would
// block waiting for a new tunnel to be added, we bring the multiTUN
// device down on exit unless a new [tun.Device] is created and added
// successfully. See tailscale/tailscale#18679.
//
// TODO(nickkhyl): revisit and simplify the [multiTUN] implementation?
hasTunnel := false
defer func() {
if !hasTunnel && b.devices.Down() {
b.logger.Logf("updateTUN: tunnel brought down: %v", err)
}
}()

if len(rcfg.LocalAddrs) == 0 {
return nil
}
Expand Down Expand Up @@ -182,8 +199,13 @@ func (b *backend) updateTUN(rcfg *router.Config, dcfg *dns.OSConfig) error {
b.logger.Logf("updateTUN: created TUN device")

b.devices.add(tunDev)
hasTunnel = true
b.logger.Logf("updateTUN: added TUN device")

if b.devices.Up() {
b.logger.Logf("tunnel brought up")
}

b.lastCfg = rcfg
b.lastDNSCfg = dcfg
return nil
Expand Down
Loading