Skip to content

Commit

Permalink
cmd/tailscale/cli: prevent concurrent Start calls in 'up'
Browse files Browse the repository at this point in the history
Seems to deflake tstest/integration tests. I can't reproduce it
anymore on one of my VMs that was consistently flaking after a dozen
runs before. Now I can run hundreds of times.

Updates tailscale#11649
Fixes tailscale#7036

Change-Id: I2f7d4ae97500d507bdd78af9e92cd1242e8e44b8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
  • Loading branch information
bradfitz committed Apr 16, 2024
1 parent 26f9bbc commit 0fba9e7
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 16 deletions.
27 changes: 22 additions & 5 deletions cmd/tailscale/cli/up.go
Original file line number Diff line number Diff line change
Expand Up @@ -496,11 +496,23 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
running := make(chan bool, 1) // gets value once in state ipn.Running
pumpErr := make(chan error, 1)

var printed bool // whether we've yet printed anything to stdout or stderr
var loginOnce sync.Once
startLoginInteractive := func() { loginOnce.Do(func() { localClient.StartLoginInteractive(ctx) }) }
// localAPIMu should be held while doing mutable LocalAPI calls
// to the backend. In particular, it prevents StartLoginInteractive from
// being called from the watcher goroutine while the Start call from
// the other goroutine is in progress.
// See https://github.com/tailscale/tailscale/issues/7036#issuecomment-2053771466
// TODO(bradfitz): simplify this once #11649 is cleaned up and Start is
// hopefully removed.
var localAPIMu sync.Mutex

startLoginInteractive := sync.OnceFunc(func() {
localAPIMu.Lock()
defer localAPIMu.Unlock()
localClient.StartLoginInteractive(ctx)
})

go func() {
var printed bool // whether we've yet printed anything to stdout or stderr
for {
n, err := watcher.Next()
if err != nil {
Expand Down Expand Up @@ -574,12 +586,14 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
// Special case: bare "tailscale up" means to just start
// running, if there's ever been a login.
if simpleUp {
localAPIMu.Lock()
_, err := localClient.EditPrefs(ctx, &ipn.MaskedPrefs{
Prefs: ipn.Prefs{
WantRunning: true,
},
WantRunningSet: true,
})
localAPIMu.Unlock()
if err != nil {
return err
}
Expand All @@ -596,10 +610,13 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
if err != nil {
return err
}
if err := localClient.Start(ctx, ipn.Options{
localAPIMu.Lock()
err = localClient.Start(ctx, ipn.Options{
AuthKey: authKey,
UpdatePrefs: prefs,
}); err != nil {
})
localAPIMu.Unlock()
if err != nil {
return err
}
if upArgs.forceReauth {
Expand Down
18 changes: 8 additions & 10 deletions tstest/integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ func TestConfigFileAuthKey(t *testing.T) {

func TestTwoNodes(t *testing.T) {
tstest.Shard(t)
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/3598")
tstest.Parallel(t)
env := newTestEnv(t)

Expand Down Expand Up @@ -400,7 +399,6 @@ func TestTwoNodes(t *testing.T) {
// PeersRemoved set) saying that the second node disappeared.
func TestIncrementalMapUpdatePeersRemoved(t *testing.T) {
tstest.Shard(t)
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/3598")
tstest.Parallel(t)
env := newTestEnv(t)

Expand Down Expand Up @@ -658,17 +656,14 @@ func TestNoControlConnWhenDown(t *testing.T) {
d2 := n1.StartDaemon()
n1.AwaitResponding()

st := n1.MustStatus()
if got, want := st.BackendState, "Stopped"; got != want {
t.Fatalf("after restart, state = %q; want %q", got, want)
}
n1.AwaitBackendState("Stopped")

ip2 := n1.AwaitIP4()
if ip1 != ip2 {
t.Errorf("IPs different: %q vs %q", ip1, ip2)
}

// The real test: verify our daemon doesn't have an HTTP request open.:
// The real test: verify our daemon doesn't have an HTTP request open.
if n := env.Control.InServeMap(); n != 0 {
t.Errorf("in serve map = %d; want 0", n)
}
Expand Down Expand Up @@ -1007,7 +1002,6 @@ func newTestEnv(t testing.TB, opts ...testEnvOpt) *testEnv {
if runtime.GOOS == "windows" {
t.Skip("not tested/working on Windows yet")
}
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/7036")
derpMap := RunDERPAndSTUN(t, logger.Discard, "127.0.0.1")
logc := new(LogCatcher)
control := &testcontrol.Server{
Expand Down Expand Up @@ -1389,15 +1383,19 @@ func (n *testNode) AwaitIP6() netip.Addr {

// AwaitRunning waits for n to reach the IPN state "Running".
func (n *testNode) AwaitRunning() {
n.AwaitBackendState("Running")
}

func (n *testNode) AwaitBackendState(state string) {
t := n.env.t
t.Helper()
if err := tstest.WaitFor(20*time.Second, func() error {
st, err := n.Status()
if err != nil {
return err
}
if st.BackendState != "Running" {
return fmt.Errorf("in state %q", st.BackendState)
if st.BackendState != state {
return fmt.Errorf("in state %q; want %q", st.BackendState, state)
}
return nil
}); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion wgengine/magicsock/magicsock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ func (localhostListener) ListenPacket(ctx context.Context, network, address stri
}

func TestTwoDevicePing(t *testing.T) {
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/1277")
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/11762")
l, ip := localhostListener{}, netaddr.IPv4(127, 0, 0, 1)
n := &devices{
m1: l,
Expand Down

0 comments on commit 0fba9e7

Please sign in to comment.