-
Notifications
You must be signed in to change notification settings - Fork 2
Fix rapid deploys causing orphaned sandboxes and stale routing #672
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
phinze
merged 5 commits into
main
from
mir-797-investigate-rapid-deploys-showing-up-as-sandbox-po
Mar 13, 2026
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
25c558c
Fix rapid deploys causing orphaned sandboxes, false crashes, and stal…
evanphx ad8ebe8
Add tests for sandbox lease invalidation in httpingress
phinze 2377903
Add blackbox test for zero-downtime deploys
phinze 0cee6ef
Review fixes: error propagation, channel guard, blackbox HTTPS
phinze 63b63a9
Address remaining review feedback
phinze File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| package harness | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "strconv" | ||
| "strings" | ||
| ) | ||
|
|
||
| // HTTPGet makes an HTTP GET request from inside the dev container via curl. | ||
| // The host header is set to the given hostname so ingress routing works, | ||
| // while the actual request goes to localhost:443 over HTTPS (with -k to | ||
| // skip certificate verification). Port 80 redirects to HTTPS, so we | ||
| // connect directly to avoid redirect resolution issues. | ||
| func HTTPGet(m *Miren, hostname, path string) (statusCode int, body string, err error) { | ||
| r := m.RunCmd("curl", "-sk", "-w", "\n%{http_code}", | ||
| "-H", fmt.Sprintf("Host: %s", hostname), | ||
| "--max-time", "10", | ||
| fmt.Sprintf("https://localhost:443%s", path)) | ||
|
|
||
| if !r.Success() { | ||
| return 0, "", fmt.Errorf("curl failed (exit %d): %s", r.ExitCode, strings.TrimSpace(r.Stderr)) | ||
| } | ||
|
|
||
| // Output format: body\nstatus_code | ||
| lines := strings.Split(strings.TrimRight(r.Stdout, "\n"), "\n") | ||
| if len(lines) < 1 { | ||
| return 0, "", fmt.Errorf("unexpected curl output: %q", r.Stdout) | ||
| } | ||
|
|
||
| statusStr := lines[len(lines)-1] | ||
| code, parseErr := strconv.Atoi(strings.TrimSpace(statusStr)) | ||
| if parseErr != nil { | ||
| return 0, "", fmt.Errorf("failed to parse status code %q: %v", statusStr, parseErr) | ||
| } | ||
|
|
||
| bodyStr := strings.Join(lines[:len(lines)-1], "\n") | ||
| return code, bodyStr, nil | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| //go:build blackbox | ||
|
|
||
| package blackbox | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "sync" | ||
| "sync/atomic" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "miren.dev/runtime/blackbox/harness" | ||
| ) | ||
|
|
||
| // TestZeroDowntimeDeploy verifies that HTTP requests continue to succeed during | ||
| // a redeploy. This exercises the proactive lease invalidation path: when old | ||
| // sandboxes go STOPPED, httpingress should evict their cached leases before | ||
| // any request hits a dead IP. | ||
| func TestZeroDowntimeDeploy(t *testing.T) { | ||
| c := harness.NewCluster(t) | ||
| m := harness.NewMiren(t, c) | ||
|
|
||
| // Deploy initial version | ||
| name := harness.DeployApp(t, m, harness.AppOptions{ | ||
| Testdata: "go-server", | ||
| Env: []string{"DEPLOY_VERSION=v1"}, | ||
| }) | ||
|
|
||
| // Set a route so we can reach the app via HTTP ingress | ||
| host := name + ".test.local" | ||
| m.MustRun("route", "set", host, name) | ||
| t.Cleanup(func() { | ||
| m.Run("route", "remove", host) | ||
| }) | ||
|
|
||
| // Verify HTTP works before we start | ||
| harness.Poll(t, "initial HTTP reachable", 30*time.Second, 2*time.Second, func() (bool, string) { | ||
| code, _, err := harness.HTTPGet(m, host, "/") | ||
| if err != nil { | ||
| return false, fmt.Sprintf("HTTP error: %v", err) | ||
| } | ||
| if code != 200 { | ||
| return false, fmt.Sprintf("HTTP status %d", code) | ||
| } | ||
| return true, "" | ||
| }) | ||
|
|
||
| // Start continuous HTTP requests in background | ||
| var ( | ||
| totalRequests atomic.Int64 | ||
| failedResults []string | ||
| failMu sync.Mutex | ||
| stop = make(chan struct{}) | ||
| done = make(chan struct{}) | ||
| ) | ||
|
|
||
| go func() { | ||
| defer close(done) | ||
| for { | ||
| select { | ||
| case <-stop: | ||
| return | ||
| default: | ||
| } | ||
|
|
||
| code, _, err := harness.HTTPGet(m, host, "/") | ||
| totalRequests.Add(1) | ||
|
|
||
| if err != nil || (code != 200 && code != 502 && code != 503) { | ||
| // 502/503 during brief transition are noted but may be acceptable | ||
| // in some edge cases; we track them all | ||
| } | ||
| if err != nil { | ||
| failMu.Lock() | ||
| failedResults = append(failedResults, fmt.Sprintf("request #%d: error: %v", totalRequests.Load(), err)) | ||
| failMu.Unlock() | ||
| } else if code != 200 { | ||
| failMu.Lock() | ||
| failedResults = append(failedResults, fmt.Sprintf("request #%d: HTTP %d", totalRequests.Load(), code)) | ||
| failMu.Unlock() | ||
| } | ||
|
|
||
| time.Sleep(200 * time.Millisecond) | ||
| } | ||
| }() | ||
|
|
||
| // Trigger a redeploy by changing an env var (forces new version + new sandbox) | ||
| t.Log("triggering redeploy...") | ||
| m.MustRun("deploy", "-a", name, "-d", m.ContainerPath(c.TestdataDir+"/go-server"), "-f", "-e", "DEPLOY_VERSION=v2") | ||
| harness.WaitForAppReady(t, m, name, 3*time.Minute) | ||
|
|
||
| // Let requests continue briefly after deploy settles to catch any stragglers | ||
| time.Sleep(3 * time.Second) | ||
|
|
||
| // Stop the request loop | ||
| close(stop) | ||
| <-done | ||
|
|
||
| total := totalRequests.Load() | ||
| t.Logf("total requests during deploy: %d", total) | ||
|
|
||
| failMu.Lock() | ||
| failures := failedResults | ||
| failMu.Unlock() | ||
|
|
||
| if len(failures) > 0 { | ||
| t.Errorf("had %d failed requests out of %d during deploy:", len(failures), total) | ||
| for _, f := range failures { | ||
| t.Logf(" %s", f) | ||
| } | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| package activator | ||
|
|
||
| import "miren.dev/runtime/api/compute/compute_v1alpha" | ||
|
|
||
| // NewTestLease creates a Lease with the given sandbox, size, and URL. | ||
| // This is only intended for use in tests outside the activator package. | ||
| func NewTestLease(sb *compute_v1alpha.Sandbox, size int, url string) *Lease { | ||
| return &Lease{ | ||
| sandbox: sb, | ||
| Size: size, | ||
| URL: url, | ||
| } | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.