Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions internal/command/deploy/mock_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ type mockFlapsClient struct {
breakDestroy bool
breakLease bool

// uncordonTransientFailures causes Uncordon to fail this many times before
// succeeding, simulating transient API errors for retry tests.
uncordonTransientFailures int

// mu to protect the members below.
mu sync.Mutex
machines []*fly.Machine
Expand Down Expand Up @@ -308,6 +312,15 @@ func (m *mockFlapsClient) Uncordon(ctx context.Context, appName, machineID strin
return fmt.Errorf("failed to uncordon %s", machineID)
}

m.mu.Lock()
defer m.mu.Unlock()

if m.uncordonTransientFailures > 0 {
m.uncordonTransientFailures--

return fmt.Errorf("transient error uncordoning %s", machineID)
}

return nil
}

Expand Down
21 changes: 20 additions & 1 deletion internal/command/deploy/strategy_bluegreen.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ type blueGreen struct {

waitBeforeStop time.Duration
waitBeforeCordon time.Duration

uncordonRetryAttempts uint
uncordonRetryDelay time.Duration
}

func BlueGreenStrategy(md *machineDeployment, blueMachines []*machineUpdateEntry) *blueGreen {
Expand Down Expand Up @@ -123,6 +126,9 @@ func (bg *blueGreen) initialize() {

bg.waitBeforeStop = 10 * time.Second
bg.waitBeforeCordon = 10 * time.Second

bg.uncordonRetryAttempts = 5
bg.uncordonRetryDelay = 500 * time.Millisecond
}

func (bg *blueGreen) isAborted() bool {
Expand Down Expand Up @@ -478,7 +484,20 @@ func (bg *blueGreen) MarkGreenMachinesAsReadyForTraffic(ctx context.Context) err
if bg.isAborted() {
return ErrAborted
}
err := bg.flaps.Uncordon(ctx, bg.app.Name, gm.Machine().ID, "")
err := retry.Do(
func() error {
return bg.flaps.Uncordon(ctx, bg.app.Name, gm.Machine().ID, "")
},
retry.Context(ctx),
retry.Attempts(bg.uncordonRetryAttempts),
retry.Delay(bg.uncordonRetryDelay),
retry.MaxDelay(30*time.Second),
retry.DelayType(retry.BackOffDelay),
retry.OnRetry(func(n uint, err error) {
fmt.Fprintf(bg.io.ErrOut, " Retrying uncordon for machine %s (attempt %d/%d): %v\n",
bg.colorize.Bold(gm.FormattedMachineId()), n+2, bg.uncordonRetryAttempts, err)
}),
)
if err != nil {
return err
}
Expand Down
55 changes: 54 additions & 1 deletion internal/command/deploy/strategy_bluegreen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package deploy

import (
"context"
"fmt"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/superfly/fly-go"
fly "github.com/superfly/fly-go"
"github.com/superfly/fly-go/flaps"
"github.com/superfly/flyctl/internal/appconfig"
"github.com/superfly/flyctl/internal/flapsutil"
Expand Down Expand Up @@ -53,6 +54,7 @@ func newBlueGreenStrategy(client flapsutil.FlapsClient, numberOfExistingMachines
// Don't have to wait during tests.
strategy.waitBeforeStop = 0
strategy.waitBeforeCordon = 0
strategy.uncordonRetryDelay = 0

return strategy
}
Expand Down Expand Up @@ -97,6 +99,57 @@ func TestDeploy(t *testing.T) {
})
}

func TestMarkGreenMachinesAsReadyForTrafficRetries(t *testing.T) {
ios, _, _, _ := iostreams.Test()

// makeStrategyWithGreenMachines builds a blueGreen with pre-populated green
// machines, letting us test MarkGreenMachinesAsReadyForTraffic in isolation
// without running the full deploy pipeline.
makeStrategyWithGreenMachines := func(client *mockFlapsClient, greenCount int) *blueGreen {
bg := newBlueGreenStrategy(client, 0)
for i := range greenCount {
bg.greenMachines = append(bg.greenMachines, &machineUpdateEntry{
leasableMachine: machine.NewLeasableMachine(client, ios, "test-app", &fly.Machine{ID: fmt.Sprintf("green-%d", i+1)}, false),
launchInput: &fly.LaunchMachineInput{},
})
}

return bg
}

ctx := context.Background()

t.Run("succeeds immediately when no errors occur", func(t *testing.T) {
client := &mockFlapsClient{}
bg := makeStrategyWithGreenMachines(client, 3)

err := bg.MarkGreenMachinesAsReadyForTraffic(ctx)
assert.NoError(t, err)
})

t.Run("succeeds after transient uncordon failures are retried", func(t *testing.T) {
client := &mockFlapsClient{uncordonTransientFailures: 2}
bg := makeStrategyWithGreenMachines(client, 1)

err := bg.MarkGreenMachinesAsReadyForTraffic(ctx)
assert.NoError(t, err)

client.mu.Lock()
remaining := client.uncordonTransientFailures
client.mu.Unlock()
assert.Equal(t, 0, remaining, "all transient failures should have been consumed by retries")
})

t.Run("fails after all retry attempts are exhausted", func(t *testing.T) {
client := &mockFlapsClient{breakUncordon: true}
bg := makeStrategyWithGreenMachines(client, 1)
bg.uncordonRetryAttempts = 3

err := bg.MarkGreenMachinesAsReadyForTraffic(ctx)
assert.ErrorContains(t, err, "failed to uncordon")
})
}

func FuzzDeploy(f *testing.F) {
flapsClient := &mockFlapsClient{}

Expand Down