Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ Running commands:
- Examples:
- `./hack/dev-exec go test ./pkg/entity/...` - Run tests
- `./hack/dev-exec m app list` - Use miren CLI
- `make bin/miren` - Rebuild binary (then `make dev-server-restart`)
- `./hack/dev-exec make bin/miren` - Rebuild binary inside dev container (then `make dev-server-restart`)

**Important**: The miren binary must be built **inside** the dev container (not on the host) so it has the correct architecture. Use `./hack/dev-exec make bin/miren` instead of `make bin/miren`.

**Managing the dev environment:**
- `make dev-stop` - Stop and remove the persistent dev container
Expand All @@ -75,7 +77,7 @@ m app list # Works immediately!

# Development iteration
vim path/to/code.go # Edit code
make bin/miren # Rebuild
./hack/dev-exec make bin/miren # Rebuild inside container
make dev-server-restart # Bounce server with new code

# Debugging
Expand Down
38 changes: 38 additions & 0 deletions blackbox/harness/http.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package harness

import (
"fmt"
"strconv"
"strings"
)

// HTTPGet makes an HTTP GET request from inside the dev container via curl.
// The host header is set to the given hostname so ingress routing works,
// while the actual request goes to localhost:443 over HTTPS (with -k to
// skip certificate verification). Port 80 redirects to HTTPS, so we
// connect directly to avoid redirect resolution issues.
func HTTPGet(m *Miren, hostname, path string) (statusCode int, body string, err error) {
r := m.RunCmd("curl", "-sk", "-w", "\n%{http_code}",
"-H", fmt.Sprintf("Host: %s", hostname),
"--max-time", "10",
fmt.Sprintf("https://localhost:443%s", path))

if !r.Success() {
return 0, "", fmt.Errorf("curl failed (exit %d): %s", r.ExitCode, strings.TrimSpace(r.Stderr))
}

// Output format: body\nstatus_code
lines := strings.Split(strings.TrimRight(r.Stdout, "\n"), "\n")
if len(lines) < 1 {
return 0, "", fmt.Errorf("unexpected curl output: %q", r.Stdout)
}

statusStr := lines[len(lines)-1]
code, parseErr := strconv.Atoi(strings.TrimSpace(statusStr))
if parseErr != nil {
return 0, "", fmt.Errorf("failed to parse status code %q: %v", statusStr, parseErr)
}

bodyStr := strings.Join(lines[:len(lines)-1], "\n")
return code, bodyStr, nil
}
56 changes: 56 additions & 0 deletions blackbox/harness/miren.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,62 @@ func (m *Miren) MustRun(args ...string) *Result {
return r
}

// RunCmd executes an arbitrary command (not miren CLI) in the dev container.
// In local mode it runs the command directly on the host.
func (m *Miren) RunCmd(args ...string) *Result {
m.t.Helper()
if len(args) == 0 {
m.t.Fatalf("RunCmd requires at least one argument")
return nil
}

var cmd *exec.Cmd

switch m.cluster.Mode {
case ModeDev:
devExec := filepath.Join(m.cluster.RepoRoot, "hack", "dev-exec")
cmd = exec.Command(devExec, args...)
cmd.Dir = m.cluster.RepoRoot
case ModeLocal:
cmd = exec.Command(args[0], args[1:]...)
default:
m.t.Fatalf("unknown mode: %s", m.cluster.Mode)
return nil
}

cmd.Env = append(cmd.Environ(), "TERM=dumb")

var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr

err := cmd.Run()
exitCode := 0
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
exitCode = exitErr.ExitCode()
} else {
m.t.Fatalf("failed to execute command: %v", err)
}
}

r := &Result{
ExitCode: exitCode,
Stdout: stdout.String(),
Stderr: stderr.String(),
}

m.t.Logf("cmd %s → exit %d", strings.Join(args, " "), exitCode)
if r.Stdout != "" {
m.t.Logf("stdout: %s", r.Stdout)
}
if r.Stderr != "" {
m.t.Logf("stderr: %s", r.Stderr)
}

return r
}

// ContainerPath translates a host-side path to a container-internal path.
// In dev mode, the repo is mounted at /src inside the iso container.
func (m *Miren) ContainerPath(hostPath string) string {
Expand Down
112 changes: 112 additions & 0 deletions blackbox/zero_downtime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
//go:build blackbox

package blackbox

import (
"fmt"
"sync"
"sync/atomic"
"testing"
"time"

"miren.dev/runtime/blackbox/harness"
)

// TestZeroDowntimeDeploy verifies that HTTP requests continue to succeed during
// a redeploy. This exercises the proactive lease invalidation path: when old
// sandboxes go STOPPED, httpingress should evict their cached leases before
// any request hits a dead IP.
func TestZeroDowntimeDeploy(t *testing.T) {
c := harness.NewCluster(t)
m := harness.NewMiren(t, c)

// Deploy initial version
name := harness.DeployApp(t, m, harness.AppOptions{
Testdata: "go-server",
Env: []string{"DEPLOY_VERSION=v1"},
})

// Set a route so we can reach the app via HTTP ingress
host := name + ".test.local"
m.MustRun("route", "set", host, name)
t.Cleanup(func() {
m.Run("route", "remove", host)
})

// Verify HTTP works before we start
harness.Poll(t, "initial HTTP reachable", 30*time.Second, 2*time.Second, func() (bool, string) {
code, _, err := harness.HTTPGet(m, host, "/")
if err != nil {
return false, fmt.Sprintf("HTTP error: %v", err)
}
if code != 200 {
return false, fmt.Sprintf("HTTP status %d", code)
}
return true, ""
})

// Start continuous HTTP requests in background
var (
totalRequests atomic.Int64
failedResults []string
failMu sync.Mutex
stop = make(chan struct{})
done = make(chan struct{})
)

go func() {
defer close(done)
for {
select {
case <-stop:
return
default:
}

code, _, err := harness.HTTPGet(m, host, "/")
totalRequests.Add(1)

if err != nil || (code != 200 && code != 502 && code != 503) {
// 502/503 during brief transition are noted but may be acceptable
// in some edge cases; we track them all
}
if err != nil {
failMu.Lock()
failedResults = append(failedResults, fmt.Sprintf("request #%d: error: %v", totalRequests.Load(), err))
failMu.Unlock()
} else if code != 200 {
failMu.Lock()
failedResults = append(failedResults, fmt.Sprintf("request #%d: HTTP %d", totalRequests.Load(), code))
failMu.Unlock()
}

time.Sleep(200 * time.Millisecond)
}
}()

// Trigger a redeploy by changing an env var (forces new version + new sandbox)
t.Log("triggering redeploy...")
m.MustRun("deploy", "-a", name, "-d", m.ContainerPath(c.TestdataDir+"/go-server"), "-f", "-e", "DEPLOY_VERSION=v2")
harness.WaitForAppReady(t, m, name, 3*time.Minute)

// Let requests continue briefly after deploy settles to catch any stragglers
time.Sleep(3 * time.Second)

// Stop the request loop
close(stop)
<-done

total := totalRequests.Load()
t.Logf("total requests during deploy: %d", total)

failMu.Lock()
failures := failedResults
failMu.Unlock()

if len(failures) > 0 {
t.Errorf("had %d failed requests out of %d during deploy:", len(failures), total)
for _, f := range failures {
t.Logf(" %s", f)
}
}
}
35 changes: 35 additions & 0 deletions components/activator/activator.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,21 @@ func (l *Lease) Pool() string {
return l.pool
}

// SandboxInvalidation is sent when a sandbox transitions away from RUNNING,
// signaling that any cached leases pointing to it should be invalidated.
type SandboxInvalidation struct {
SandboxID entity.Id
}

type AppActivator interface {
AcquireLease(ctx context.Context, ver *core_v1alpha.AppVersion, service string) (*Lease, error)
ReleaseLease(ctx context.Context, lease *Lease) error
RenewLease(ctx context.Context, lease *Lease) (*Lease, error)

// Invalidations returns a channel that receives notifications when
// sandboxes become non-RUNNING. Consumers should invalidate any cached
// leases referencing the invalidated sandbox.
Invalidations() <-chan SandboxInvalidation
}

type sandbox struct {
Expand Down Expand Up @@ -156,6 +167,9 @@ type localActivator struct {
// Map key is verKey (version + service), value is list of channels to notify
newSandboxChans map[verKey][]chan struct{}

// invalidationCh signals httpingress when a sandbox becomes non-RUNNING
invalidationCh chan SandboxInvalidation

log *slog.Logger
eac *entityserver_v1alpha.EntityAccessClient
}
Expand All @@ -170,6 +184,7 @@ func NewLocalActivator(ctx context.Context, log *slog.Logger, eac *entityserver_
poolSandboxes: make(map[entity.Id]*poolSandboxes),
pools: make(map[verKey]*poolState),
newSandboxChans: make(map[verKey][]chan struct{}),
invalidationCh: make(chan SandboxInvalidation, 64),
}

// Recover existing pools first (sandboxes need pools to exist)
Expand Down Expand Up @@ -963,6 +978,12 @@ func (a *localActivator) RenewLease(ctx context.Context, lease *Lease) (*Lease,

for _, s := range ps.sandboxes {
if s.sandbox == lease.sandbox {
// Reject renewal if sandbox is no longer running.
// This ensures httpingress invalidates cached leases for
// stopped/dead sandboxes on the next renewal cycle.
if s.sandbox.Status != compute_v1alpha.RUNNING {
return nil, fmt.Errorf("sandbox %s is %s", s.sandbox.ID, s.sandbox.Status)
}
s.lastRenewal = time.Now()
return lease, nil
}
Expand All @@ -971,6 +992,10 @@ func (a *localActivator) RenewLease(ctx context.Context, lease *Lease) (*Lease,
return nil, fmt.Errorf("sandbox not found")
}

func (a *localActivator) Invalidations() <-chan SandboxInvalidation {
return a.invalidationCh
}

func (a *localActivator) watchSandboxes(ctx context.Context) {
// Watch for sandbox changes: update status AND discover new RUNNING sandboxes
// This is the single source of sandbox discovery for the activator
Expand Down Expand Up @@ -1072,6 +1097,16 @@ func (a *localActivator) watchSandboxes(ctx context.Context) {
}
}

// Signal httpingress to invalidate cached leases for this sandbox
// when it transitions away from RUNNING
if oldStatus == compute_v1alpha.RUNNING && sb.Status != compute_v1alpha.RUNNING {
select {
case a.invalidationCh <- SandboxInvalidation{SandboxID: sb.ID}:
default:
a.log.Warn("invalidation channel full, dropping notification", "sandbox", sb.ID)
}
}

a.mu.Unlock()

if oldStatus != sb.Status {
Expand Down
13 changes: 13 additions & 0 deletions components/activator/testing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package activator

import "miren.dev/runtime/api/compute/compute_v1alpha"

// NewTestLease creates a Lease with the given sandbox, size, and URL.
// This is only intended for use in tests outside the activator package.
func NewTestLease(sb *compute_v1alpha.Sandbox, size int, url string) *Lease {
return &Lease{
sandbox: sb,
Size: size,
URL: url,
}
}
Loading
Loading