From 2971f74cb2f5bac709a9f4cb6be5054b024a780d Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 4 Jun 2026 10:22:23 -0400 Subject: [PATCH 01/11] e2e: abstract container behind backend interface (docker + hypeman) Introduce a Backend interface in server/e2e that captures the public surface the ~24 e2e_*_test.go files consume via *TestContainer (Start/Stop, the API/CDP/ChromeDriver endpoint accessors, API clients, Wait* helpers, Exec, ExitCh, Container). TestContainer is now a thin facade that delegates to a Backend selected at construction time. Two backends are provided: - dockerBackend: the historical testcontainers-go logic, moved verbatim behind the interface. Default, so existing CI is unchanged. - hypemanBackend: starts the image as a remote VM on a running Hypeman dev server via the github.com/kernel/hypeman-go client. Endpoints target the instance's network IP on the fixed guest ports (10001/9222/9224); Exec runs against the instance API server's /process/exec endpoint to preserve the (exitCode, combinedOutput, error) contract. Backend selection is via the KI_E2E_BACKEND env var (docker|hypeman, default docker). Hypeman connection details are read from env only and never hardcoded: KI_E2E_HYPEMAN_BASE_URL (or HYPEMAN_BASE_URL) and HYPEMAN_AUTH_TOKEN (or the SDK-native HYPEMAN_API_KEY). Optional GPU passthrough via KI_E2E_HYPEMAN_GPU_DEVICES and VM sizing via KI_E2E_HYPEMAN_SIZE. Test changes are minimal: six direct port-field accesses in two test files now use backend-agnostic accessors (CDPAddr, ChromeDriverURL, plus new ChromeDriverAddr/ChromeDriverWSURL helpers) instead of hardcoding 127.0.0.1:, which only ever worked for the Docker backend. Added infra-free unit tests for backend selection and hypeman config validation. This unblocks running the e2e suite against the GPU image (chromium-headful-vgpu) from kernel-images-private via the hypeman backend. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/e2e/backend.go | 119 +++++++++ server/e2e/backend_docker.go | 244 +++++++++++++++++++ server/e2e/backend_hypeman.go | 344 +++++++++++++++++++++++++++ server/e2e/backend_test.go | 65 +++++ server/e2e/container.go | 256 +++++--------------- server/e2e/e2e_bidi_test.go | 10 +- server/e2e/e2e_cdp_reconnect_test.go | 2 +- server/go.mod | 29 ++- server/go.sum | 50 ++-- 9 files changed, 891 insertions(+), 228 deletions(-) create mode 100644 server/e2e/backend.go create mode 100644 server/e2e/backend_docker.go create mode 100644 server/e2e/backend_hypeman.go create mode 100644 server/e2e/backend_test.go diff --git a/server/e2e/backend.go b/server/e2e/backend.go new file mode 100644 index 00000000..a9fe5d61 --- /dev/null +++ b/server/e2e/backend.go @@ -0,0 +1,119 @@ +package e2e + +import ( + "context" + "os" + "strings" + "testing" + + instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" + "github.com/testcontainers/testcontainers-go" +) + +// ContainerConfig holds optional configuration for instance startup. +// +// It is shared by every backend so that the ~24 e2e_*_test.go files can keep +// calling Start with the same shape regardless of where the browser instance +// actually runs (a local Docker container or a remote Hypeman VM). +type ContainerConfig struct { + Env map[string]string + HostAccess bool // Add host.docker.internal mapping (Docker backend only) +} + +// Backend is the abstraction every e2e browser-instance provider implements. +// +// It captures the public surface that the test files consume via *TestContainer. +// Two implementations exist: +// +// - dockerBackend: runs the image as a local Docker container via +// testcontainers-go (the historical behavior, still the default). +// - hypemanBackend: starts the image as a remote VM on a running Hypeman dev +// server using the github.com/kernel/hypeman-go client library. +// +// Keeping the surface identical means selecting a backend is a pure factory +// concern and requires no changes in individual tests. +type Backend interface { + // Start provisions and boots the browser instance. + Start(ctx context.Context, cfg ContainerConfig) error + // Stop tears the instance down and releases its resources. + Stop(ctx context.Context) error + + // APIBaseURL returns the base URL for the instance's control-plane API + // server (container port 10001). + APIBaseURL() string + // CDPURL returns the WebSocket URL for the DevTools proxy (port 9222). + CDPURL() string + // CDPAddr returns the TCP host:port for the DevTools proxy (port 9222). + CDPAddr() string + // ChromeDriverURL returns the base HTTP URL for the ChromeDriver proxy + // (port 9224). + ChromeDriverURL() string + + // APIClient returns an OpenAPI client bound to APIBaseURL. + APIClient() (*instanceoapi.ClientWithResponses, error) + // APIClientNoKeepAlive returns an OpenAPI client that disables HTTP + // connection reuse (useful after server restarts). + APIClientNoKeepAlive() (*instanceoapi.ClientWithResponses, error) + + // WaitReady blocks until the instance's API server is serving. + WaitReady(ctx context.Context) error + // WaitDevTools blocks until the CDP endpoint accepts connections. + WaitDevTools(ctx context.Context) error + // WaitChromeDriver blocks until the ChromeDriver proxy reports ready. + WaitChromeDriver(ctx context.Context) error + + // Exec runs a command inside the instance and returns the exit code and + // combined stdout+stderr output. + Exec(ctx context.Context, cmd []string) (int, string, error) + + // ExitCh returns a channel that fires when the instance exits. + ExitCh() <-chan error + + // Container returns the underlying testcontainers.Container for advanced + // Docker-only usage. It returns nil for non-Docker backends. + Container() testcontainers.Container +} + +// BackendKind enumerates the supported e2e backends. +type BackendKind string + +const ( + BackendDocker BackendKind = "docker" + BackendHypeman BackendKind = "hypeman" +) + +// envBackendKind is the env var that selects the backend. It defaults to +// "docker" so existing CI (which sets nothing) is unchanged. +const envBackendKind = "KI_E2E_BACKEND" + +// backendKindFromEnv reads and normalizes KI_E2E_BACKEND, defaulting to docker. +func backendKindFromEnv() BackendKind { + v := strings.TrimSpace(strings.ToLower(os.Getenv(envBackendKind))) + if v == "" { + return BackendDocker + } + return BackendKind(v) +} + +// newBackend constructs the backend selected by the KI_E2E_BACKEND env var. +// +// Selection is resolved here (and not per test) so that adding a backend never +// requires touching the test files. Unknown values fail the test loudly rather +// than silently falling back, to avoid masking misconfiguration in CI. +func newBackend(tb testing.TB, image string) Backend { + tb.Helper() + kind := backendKindFromEnv() + switch kind { + case BackendDocker: + return newDockerBackend(image) + case BackendHypeman: + b, err := newHypemanBackend(image) + if err != nil { + tb.Fatalf("e2e: failed to configure hypeman backend: %v", err) + } + return b + default: + tb.Fatalf("e2e: unsupported %s=%q (want %q or %q)", envBackendKind, kind, BackendDocker, BackendHypeman) + return nil + } +} diff --git a/server/e2e/backend_docker.go b/server/e2e/backend_docker.go new file mode 100644 index 00000000..e87c1c87 --- /dev/null +++ b/server/e2e/backend_docker.go @@ -0,0 +1,244 @@ +package e2e + +import ( + "context" + "fmt" + "net/http" + "time" + + "github.com/docker/docker/api/types/container" + "github.com/docker/go-connections/nat" + instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" +) + +// dockerBackend runs the image as a local Docker container via testcontainers-go. +// +// This is the historical e2e behavior, preserved verbatim and moved behind the +// Backend interface. It enables parallel test execution by giving each test its +// own dynamically allocated host ports. +type dockerBackend struct { + Name string + Image string + APIPort int // dynamically allocated host port -> container 10001 + CDPPort int // dynamically allocated host port -> container 9222 + ChromeDriverPort int // dynamically allocated host port -> container 9224 + ctr testcontainers.Container +} + +// newDockerBackend returns a Docker-backed Backend for the given image. +func newDockerBackend(image string) Backend { + return &dockerBackend{Image: image} +} + +// Start starts the container with the given configuration using testcontainers-go. +func (c *dockerBackend) Start(ctx context.Context, cfg ContainerConfig) error { + // Build environment variables + env := make(map[string]string) + for k, v := range cfg.Env { + env[k] = v + } + // Ensure CHROMIUM_FLAGS includes --no-sandbox for CI + if flags, ok := env["CHROMIUM_FLAGS"]; !ok { + env["CHROMIUM_FLAGS"] = "--no-sandbox" + } else if flags != "" { + env["CHROMIUM_FLAGS"] = flags + " --no-sandbox" + } else { + env["CHROMIUM_FLAGS"] = "--no-sandbox" + } + + // Build container request options + opts := []testcontainers.ContainerCustomizer{ + testcontainers.WithImage(c.Image), + testcontainers.WithExposedPorts("10001/tcp", "9222/tcp", "9224/tcp"), + testcontainers.WithEnv(env), + testcontainers.WithTmpfs(map[string]string{"/dev/shm": "size=2g,mode=1777"}), + // Set privileged mode for Chrome + testcontainers.WithHostConfigModifier(func(hc *container.HostConfig) { + hc.Privileged = true + }), + // Wait for the API to be ready + testcontainers.WithWaitStrategy( + wait.ForHTTP("/spec.yaml"). + WithPort("10001/tcp"). + WithStartupTimeout(2 * time.Minute), + ), + } + + // Add host access if requested + if cfg.HostAccess { + opts = append(opts, testcontainers.WithHostConfigModifier(func(hc *container.HostConfig) { + hc.ExtraHosts = append(hc.ExtraHosts, "host.docker.internal:host-gateway") + })) + } + + // Start container + ctr, err := testcontainers.Run(ctx, c.Image, opts...) + if err != nil { + return fmt.Errorf("failed to start container: %w", err) + } + c.ctr = ctr + + // Get container name + inspect, err := ctr.Inspect(ctx) + if err == nil { + c.Name = inspect.Name + } + + // Get mapped ports + apiPort, err := ctr.MappedPort(ctx, "10001/tcp") + if err != nil { + return fmt.Errorf("failed to get API port: %w", err) + } + c.APIPort = apiPort.Int() + + cdpPort, err := ctr.MappedPort(ctx, "9222/tcp") + if err != nil { + return fmt.Errorf("failed to get CDP port: %w", err) + } + c.CDPPort = cdpPort.Int() + + chromeDriverPort, err := ctr.MappedPort(ctx, "9224/tcp") + if err != nil { + return fmt.Errorf("failed to get ChromeDriver port: %w", err) + } + c.ChromeDriverPort = chromeDriverPort.Int() + + return nil +} + +// Stop stops and removes the container. +func (c *dockerBackend) Stop(ctx context.Context) error { + if c.ctr == nil { + return nil + } + return testcontainers.TerminateContainer(c.ctr) +} + +// APIBaseURL returns the URL for the container's API server. +func (c *dockerBackend) APIBaseURL() string { + return fmt.Sprintf("http://127.0.0.1:%d", c.APIPort) +} + +// CDPURL returns the WebSocket URL for the container's DevTools proxy. +func (c *dockerBackend) CDPURL() string { + return fmt.Sprintf("ws://127.0.0.1:%d/", c.CDPPort) +} + +// CDPAddr returns the TCP address for the container's DevTools proxy. +func (c *dockerBackend) CDPAddr() string { + return fmt.Sprintf("127.0.0.1:%d", c.CDPPort) +} + +// ChromeDriverURL returns the base HTTP URL for the container's ChromeDriver proxy. +func (c *dockerBackend) ChromeDriverURL() string { + return fmt.Sprintf("http://127.0.0.1:%d", c.ChromeDriverPort) +} + +// APIClient creates an OpenAPI client for this container's API. +func (c *dockerBackend) APIClient() (*instanceoapi.ClientWithResponses, error) { + return instanceoapi.NewClientWithResponses(c.APIBaseURL()) +} + +// APIClientNoKeepAlive creates an API client that doesn't reuse connections. +func (c *dockerBackend) APIClientNoKeepAlive() (*instanceoapi.ClientWithResponses, error) { + transport := &http.Transport{ + DisableKeepAlives: true, + } + httpClient := &http.Client{Transport: transport} + return instanceoapi.NewClientWithResponses(c.APIBaseURL(), instanceoapi.WithHTTPClient(httpClient)) +} + +// WaitReady waits for the container's API to become ready. +// Note: With testcontainers-go, this is usually handled by the wait strategy in +// Start(). This method performs an additional health check. +func (c *dockerBackend) WaitReady(ctx context.Context) error { + url := c.APIBaseURL() + "/spec.yaml" + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + client := &http.Client{Timeout: 2 * time.Second} + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + resp, err := client.Get(url) + if err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK { + return nil + } + } + } + } +} + +// WaitDevTools waits for the CDP WebSocket endpoint to be ready. +func (c *dockerBackend) WaitDevTools(ctx context.Context) error { + return wait.ForListeningPort(nat.Port("9222/tcp")). + WithStartupTimeout(2*time.Minute). + WaitUntilReady(ctx, c.ctr) +} + +// WaitChromeDriver waits for the ChromeDriver proxy (and upstream ChromeDriver) +// to be ready by polling the /status endpoint. +func (c *dockerBackend) WaitChromeDriver(ctx context.Context) error { + statusURL := c.ChromeDriverURL() + "/status" + client := &http.Client{Timeout: 2 * time.Second} + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + resp, err := client.Get(statusURL) + if err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK { + return nil + } + } + } + } +} + +// Exec executes a command inside the container and returns the combined output. +func (c *dockerBackend) Exec(ctx context.Context, cmd []string) (int, string, error) { + exitCode, reader, err := c.ctr.Exec(ctx, cmd) + if err != nil { + return exitCode, "", err + } + + // Read all output + buf := make([]byte, 0) + tmp := make([]byte, 1024) + for { + n, err := reader.Read(tmp) + if n > 0 { + buf = append(buf, tmp[:n]...) + } + if err != nil { + break + } + } + + return exitCode, string(buf), nil +} + +// ExitCh returns a channel that receives when the container exits. +// Note: testcontainers-go handles this internally; this is kept for API +// compatibility and returns a channel that never fires. +func (c *dockerBackend) ExitCh() <-chan error { + ch := make(chan error, 1) + return ch +} + +// Container returns the underlying testcontainers.Container for advanced usage. +func (c *dockerBackend) Container() testcontainers.Container { + return c.ctr +} diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go new file mode 100644 index 00000000..3a5ff9ca --- /dev/null +++ b/server/e2e/backend_hypeman.go @@ -0,0 +1,344 @@ +package e2e + +import ( + "context" + "encoding/base64" + "fmt" + "net" + "net/http" + "os" + "strings" + "time" + + hypeman "github.com/kernel/hypeman-go" + "github.com/kernel/hypeman-go/option" + instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" + "github.com/nrednav/cuid2" + "github.com/testcontainers/testcontainers-go" +) + +// Container ports exposed by the kernel-images browser image. These are fixed +// inside the guest; the Docker backend remaps them to random host ports, while +// the Hypeman backend reaches them directly on the instance's network IP. +const ( + hypemanAPIPort = 10001 + hypemanCDPPort = 9222 + hypemanChromeDriverPort = 9224 +) + +// Env var names for configuring the Hypeman backend. Secrets are referenced by +// name only and never hardcoded. +const ( + // envHypemanBaseURL overrides the hypeman dev server URL. If unset, the SDK + // falls back to its own HYPEMAN_BASE_URL lookup. + envHypemanBaseURL = "KI_E2E_HYPEMAN_BASE_URL" + // envHypemanToken is the preferred auth token var. It matches the API + // service's Railway staging variable name (HYPEMAN_AUTH_TOKEN). The SDK's + // native HYPEMAN_API_KEY is also honored as a fallback. + envHypemanToken = "HYPEMAN_AUTH_TOKEN" + // envHypemanGPUDevices is an optional comma-separated list of device IDs or + // names to attach for GPU passthrough (used by the GPU image). + envHypemanGPUDevices = "KI_E2E_HYPEMAN_GPU_DEVICES" + // envHypemanInstanceSize optionally overrides the VM memory size. + envHypemanInstanceSize = "KI_E2E_HYPEMAN_SIZE" +) + +// hypemanBackend starts the image as a remote VM on a running Hypeman dev server +// using the github.com/kernel/hypeman-go client library. +// +// The instance is reachable on its assigned network IP. All endpoint accessors +// target that IP on the fixed guest ports. Command execution is performed +// against the instance's own API server (/process/exec) so that callers get the +// same (exitCode, combinedOutput, error) shape as the Docker backend. +type hypemanBackend struct { + client hypeman.Client + image string + + instanceID string + ip string + + exitCh chan error +} + +// newHypemanBackend validates configuration and constructs a hypeman-backed +// Backend. The hypeman SDK reads HYPEMAN_BASE_URL / HYPEMAN_API_KEY from the +// environment; this constructor additionally wires the kernel-images-specific +// override vars (KI_E2E_HYPEMAN_BASE_URL, HYPEMAN_AUTH_TOKEN). +func newHypemanBackend(image string) (Backend, error) { + var opts []option.RequestOption + if base := strings.TrimSpace(os.Getenv(envHypemanBaseURL)); base != "" { + opts = append(opts, option.WithBaseURL(base)) + } + if token := strings.TrimSpace(os.Getenv(envHypemanToken)); token != "" { + opts = append(opts, option.WithAPIKey(token)) + } + + // Fail fast with an actionable message if neither this var nor the SDK's + // native vars provide connection details. + hasBase := strings.TrimSpace(os.Getenv(envHypemanBaseURL)) != "" || strings.TrimSpace(os.Getenv("HYPEMAN_BASE_URL")) != "" + hasToken := strings.TrimSpace(os.Getenv(envHypemanToken)) != "" || strings.TrimSpace(os.Getenv("HYPEMAN_API_KEY")) != "" + if !hasBase || !hasToken { + return nil, fmt.Errorf( + "hypeman backend requires a base URL (%s or HYPEMAN_BASE_URL) and a token (%s or HYPEMAN_API_KEY)", + envHypemanBaseURL, envHypemanToken, + ) + } + + return &hypemanBackend{ + client: hypeman.NewClient(opts...), + image: image, + exitCh: make(chan error, 1), + }, nil +} + +// Start creates and boots a hypeman instance for the image, waits for it to +// reach the Running state, and resolves its network IP. +func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { + env := make(map[string]string, len(cfg.Env)+1) + for k, v := range cfg.Env { + env[k] = v + } + // Mirror the Docker backend: ensure --no-sandbox is present for CI. + if flags, ok := env["CHROMIUM_FLAGS"]; !ok || flags == "" { + env["CHROMIUM_FLAGS"] = "--no-sandbox" + } else { + env["CHROMIUM_FLAGS"] = flags + " --no-sandbox" + } + + params := hypeman.InstanceNewParams{ + Image: c.image, + Name: hypemanInstanceName(), + Env: env, + } + if size := strings.TrimSpace(os.Getenv(envHypemanInstanceSize)); size != "" { + params.Size = hypeman.String(size) + } + if devs := parseCommaList(os.Getenv(envHypemanGPUDevices)); len(devs) > 0 { + params.Devices = devs + } + + inst, err := c.client.Instances.New(ctx, params) + if err != nil { + return fmt.Errorf("hypeman: create instance: %w", err) + } + c.instanceID = inst.ID + + // Wait for the guest program to start. The SDK caps the server-side wait at + // a few minutes; loop until our context deadline if needed. + if err := c.waitForRunning(ctx); err != nil { + return err + } + + ip, err := c.resolveIP(ctx) + if err != nil { + return err + } + c.ip = ip + return nil +} + +// waitForRunning polls the instance wait endpoint until the instance is Running +// or the context is done. +func (c *hypemanBackend) waitForRunning(ctx context.Context) error { + for { + if err := ctx.Err(); err != nil { + return fmt.Errorf("hypeman: waiting for Running: %w", err) + } + resp, err := c.client.Instances.Wait(ctx, c.instanceID, hypeman.InstanceWaitParams{ + State: hypeman.InstanceWaitParamsStateRunning, + Timeout: hypeman.String("60s"), + }) + if err == nil && resp != nil && string(resp.State) == string(hypeman.InstanceStateRunning) { + return nil + } + select { + case <-ctx.Done(): + return fmt.Errorf("hypeman: timed out waiting for instance %s to reach Running", c.instanceID) + case <-time.After(time.Second): + } + } +} + +// resolveIP fetches the instance until a network IP is assigned. +func (c *hypemanBackend) resolveIP(ctx context.Context) (string, error) { + for { + inst, err := c.client.Instances.Get(ctx, c.instanceID) + if err == nil && inst != nil && strings.TrimSpace(inst.Network.IP) != "" { + return inst.Network.IP, nil + } + select { + case <-ctx.Done(): + return "", fmt.Errorf("hypeman: timed out resolving IP for instance %s", c.instanceID) + case <-time.After(time.Second): + } + } +} + +// Stop deletes the hypeman instance. +func (c *hypemanBackend) Stop(ctx context.Context) error { + if c.instanceID == "" { + return nil + } + if err := c.client.Instances.Delete(ctx, c.instanceID); err != nil { + return fmt.Errorf("hypeman: delete instance %s: %w", c.instanceID, err) + } + select { + case c.exitCh <- nil: + default: + } + return nil +} + +func (c *hypemanBackend) APIBaseURL() string { + return fmt.Sprintf("http://%s:%d", c.ip, hypemanAPIPort) +} + +func (c *hypemanBackend) CDPURL() string { + return fmt.Sprintf("ws://%s:%d/", c.ip, hypemanCDPPort) +} + +func (c *hypemanBackend) CDPAddr() string { + return fmt.Sprintf("%s:%d", c.ip, hypemanCDPPort) +} + +func (c *hypemanBackend) ChromeDriverURL() string { + return fmt.Sprintf("http://%s:%d", c.ip, hypemanChromeDriverPort) +} + +func (c *hypemanBackend) APIClient() (*instanceoapi.ClientWithResponses, error) { + return instanceoapi.NewClientWithResponses(c.APIBaseURL()) +} + +func (c *hypemanBackend) APIClientNoKeepAlive() (*instanceoapi.ClientWithResponses, error) { + transport := &http.Transport{DisableKeepAlives: true} + httpClient := &http.Client{Transport: transport} + return instanceoapi.NewClientWithResponses(c.APIBaseURL(), instanceoapi.WithHTTPClient(httpClient)) +} + +// WaitReady polls the instance API server's /spec.yaml until it serves 200. +func (c *hypemanBackend) WaitReady(ctx context.Context) error { + return pollHTTP200(ctx, c.APIBaseURL()+"/spec.yaml", 200*time.Millisecond) +} + +// WaitDevTools polls the CDP TCP port until it accepts connections. +func (c *hypemanBackend) WaitDevTools(ctx context.Context) error { + addr := c.CDPAddr() + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + for { + conn, err := (&net.Dialer{Timeout: 2 * time.Second}).DialContext(ctx, "tcp", addr) + if err == nil { + conn.Close() + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + } + } +} + +// WaitChromeDriver polls the ChromeDriver proxy /status until it serves 200. +func (c *hypemanBackend) WaitChromeDriver(ctx context.Context) error { + return pollHTTP200(ctx, c.ChromeDriverURL()+"/status", 500*time.Millisecond) +} + +// Exec runs a command inside the guest via the instance API server's +// /process/exec endpoint, returning the exit code and combined stdout+stderr. +// +// The Docker backend invokes commands as an argv slice; we map the first +// element to Command and the remainder to Args so callers like +// Exec(ctx, []string{"sh", "-lc", "..."}) behave identically. +func (c *hypemanBackend) Exec(ctx context.Context, cmd []string) (int, string, error) { + if len(cmd) == 0 { + return -1, "", fmt.Errorf("hypeman: empty command") + } + client, err := c.APIClient() + if err != nil { + return -1, "", err + } + + body := instanceoapi.ProcessExecRequest{Command: cmd[0]} + if len(cmd) > 1 { + args := cmd[1:] + body.Args = &args + } + + resp, err := client.ProcessExecWithResponse(ctx, body) + if err != nil { + return -1, "", fmt.Errorf("hypeman: exec: %w", err) + } + if resp.JSON200 == nil { + return -1, "", fmt.Errorf("hypeman: exec returned status %d: %s", resp.StatusCode(), string(resp.Body)) + } + + out := decodeB64(resp.JSON200.StdoutB64) + decodeB64(resp.JSON200.StderrB64) + exitCode := 0 + if resp.JSON200.ExitCode != nil { + exitCode = *resp.JSON200.ExitCode + } + return exitCode, out, nil +} + +// ExitCh returns a channel that fires when the instance is stopped. +func (c *hypemanBackend) ExitCh() <-chan error { + return c.exitCh +} + +// Container returns nil: the hypeman backend is not Docker-based. +func (c *hypemanBackend) Container() testcontainers.Container { + return nil +} + +// hypemanInstanceName builds a DNS-safe, unique instance name. Hypeman requires +// lowercase letters, digits, and dashes only, not starting/ending with a dash. +func hypemanInstanceName() string { + return "ki-e2e-" + strings.ToLower(cuid2.Generate()) +} + +func parseCommaList(s string) []string { + var out []string + for _, part := range strings.Split(s, ",") { + if p := strings.TrimSpace(part); p != "" { + out = append(out, p) + } + } + return out +} + +func decodeB64(s *string) string { + if s == nil || *s == "" { + return "" + } + b, err := base64.StdEncoding.DecodeString(*s) + if err != nil { + return "" + } + return string(b) +} + +// pollHTTP200 polls url until it returns HTTP 200 or ctx is done. +func pollHTTP200(ctx context.Context, url string, interval time.Duration) error { + ticker := time.NewTicker(interval) + defer ticker.Stop() + client := &http.Client{Timeout: 2 * time.Second} + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return err + } + resp, err := client.Do(req) + if err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK { + return nil + } + } + } + } +} diff --git a/server/e2e/backend_test.go b/server/e2e/backend_test.go new file mode 100644 index 00000000..853108b4 --- /dev/null +++ b/server/e2e/backend_test.go @@ -0,0 +1,65 @@ +package e2e + +import "testing" + +// TestBackendKindFromEnv verifies the KI_E2E_BACKEND selection logic. These are +// cheap, infra-free unit tests safe to run in CI. +func TestBackendKindFromEnv(t *testing.T) { + cases := []struct { + name string + set bool + val string + want BackendKind + }{ + {name: "unset defaults to docker", set: false, want: BackendDocker}, + {name: "empty defaults to docker", set: true, val: "", want: BackendDocker}, + {name: "docker", set: true, val: "docker", want: BackendDocker}, + {name: "hypeman", set: true, val: "hypeman", want: BackendHypeman}, + {name: "case-insensitive + trimmed", set: true, val: " HYPEMAN ", want: BackendHypeman}, + {name: "unknown passes through", set: true, val: "bogus", want: BackendKind("bogus")}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if tc.set { + t.Setenv(envBackendKind, tc.val) + } else { + // t.Setenv requires a value; ensure the var is empty for the + // "unset" case by setting it to empty, which the function + // treats as the default. + t.Setenv(envBackendKind, "") + } + if got := backendKindFromEnv(); got != tc.want { + t.Fatalf("backendKindFromEnv() = %q, want %q", got, tc.want) + } + }) + } +} + +// TestNewHypemanBackendRequiresConfig ensures the hypeman backend fails fast and +// with an actionable message when connection details are missing. +func TestNewHypemanBackendRequiresConfig(t *testing.T) { + // Clear every env var the backend (and the SDK) consult. + for _, k := range []string{envHypemanBaseURL, "HYPEMAN_BASE_URL", envHypemanToken, "HYPEMAN_API_KEY"} { + t.Setenv(k, "") + } + if _, err := newHypemanBackend("some/image:tag"); err == nil { + t.Fatal("expected error when hypeman base URL/token are unset, got nil") + } +} + +// TestNewHypemanBackendWithConfig ensures a valid configuration constructs a +// backend without error. +func TestNewHypemanBackendWithConfig(t *testing.T) { + t.Setenv(envHypemanBaseURL, "http://hypeman.example.invalid:8080") + t.Setenv(envHypemanToken, "test-token-not-a-real-secret") + b, err := newHypemanBackend("some/image:tag") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if b == nil { + t.Fatal("expected non-nil backend") + } + if b.Container() != nil { + t.Fatal("hypeman backend Container() must be nil") + } +} diff --git a/server/e2e/container.go b/server/e2e/container.go index ada4b135..fe54e836 100644 --- a/server/e2e/container.go +++ b/server/e2e/container.go @@ -2,254 +2,122 @@ package e2e import ( "context" - "fmt" - "net/http" + "strings" "testing" - "time" - "github.com/docker/docker/api/types/container" - "github.com/docker/go-connections/nat" instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" ) -// TestContainer wraps testcontainers-go to manage a Docker container for e2e tests. -// This enables parallel test execution by giving each test its own dynamically allocated ports. +// TestContainer is the handle every e2e test uses to drive a browser instance. +// +// Historically this struct wrapped testcontainers-go directly. It is now a thin +// facade over a pluggable Backend (see backend.go), selected at construction +// time via the KI_E2E_BACKEND env var. The public method set is unchanged, so +// the ~24 e2e_*_test.go files that hold a *TestContainer continue to work +// without modification regardless of whether the instance runs as a local +// Docker container or a remote Hypeman VM. type TestContainer struct { - Name string - Image string - APIPort int // dynamically allocated host port -> container 10001 - CDPPort int // dynamically allocated host port -> container 9222 - ChromeDriverPort int // dynamically allocated host port -> container 9224 - ctr testcontainers.Container -} + // Image is the OCI image reference under test. + Image string -// ContainerConfig holds optional configuration for container startup. -type ContainerConfig struct { - Env map[string]string - HostAccess bool // Add host.docker.internal mapping + backend Backend } -// NewTestContainer creates a new test container placeholder. -// The actual container is started when Start() is called. +// NewTestContainer creates a new test container handle backed by the configured +// backend. The actual instance is provisioned when Start() is called. // Works with both *testing.T and *testing.B (any testing.TB). func NewTestContainer(tb testing.TB, image string) *TestContainer { tb.Helper() return &TestContainer{ - Image: image, + Image: image, + backend: newBackend(tb, image), } } -// Start starts the container with the given configuration using testcontainers-go. +// Start starts the instance with the given configuration. func (c *TestContainer) Start(ctx context.Context, cfg ContainerConfig) error { - // Build environment variables - env := make(map[string]string) - for k, v := range cfg.Env { - env[k] = v - } - // Ensure CHROMIUM_FLAGS includes --no-sandbox for CI - if flags, ok := env["CHROMIUM_FLAGS"]; !ok { - env["CHROMIUM_FLAGS"] = "--no-sandbox" - } else if flags != "" { - env["CHROMIUM_FLAGS"] = flags + " --no-sandbox" - } else { - env["CHROMIUM_FLAGS"] = "--no-sandbox" - } - - // Build container request options - opts := []testcontainers.ContainerCustomizer{ - testcontainers.WithImage(c.Image), - testcontainers.WithExposedPorts("10001/tcp", "9222/tcp", "9224/tcp"), - testcontainers.WithEnv(env), - testcontainers.WithTmpfs(map[string]string{"/dev/shm": "size=2g,mode=1777"}), - // Set privileged mode for Chrome - testcontainers.WithHostConfigModifier(func(hc *container.HostConfig) { - hc.Privileged = true - }), - // Wait for the API to be ready - testcontainers.WithWaitStrategy( - wait.ForHTTP("/spec.yaml"). - WithPort("10001/tcp"). - WithStartupTimeout(2 * time.Minute), - ), - } - - // Add host access if requested - if cfg.HostAccess { - opts = append(opts, testcontainers.WithHostConfigModifier(func(hc *container.HostConfig) { - hc.ExtraHosts = append(hc.ExtraHosts, "host.docker.internal:host-gateway") - })) - } - - // Start container - ctr, err := testcontainers.Run(ctx, c.Image, opts...) - if err != nil { - return fmt.Errorf("failed to start container: %w", err) - } - c.ctr = ctr - - // Get container name - inspect, err := ctr.Inspect(ctx) - if err == nil { - c.Name = inspect.Name - } - - // Get mapped ports - apiPort, err := ctr.MappedPort(ctx, "10001/tcp") - if err != nil { - return fmt.Errorf("failed to get API port: %w", err) - } - c.APIPort = apiPort.Int() - - cdpPort, err := ctr.MappedPort(ctx, "9222/tcp") - if err != nil { - return fmt.Errorf("failed to get CDP port: %w", err) - } - c.CDPPort = cdpPort.Int() - - chromeDriverPort, err := ctr.MappedPort(ctx, "9224/tcp") - if err != nil { - return fmt.Errorf("failed to get ChromeDriver port: %w", err) - } - c.ChromeDriverPort = chromeDriverPort.Int() - - return nil + return c.backend.Start(ctx, cfg) } -// Stop stops and removes the container. +// Stop stops and removes the instance. func (c *TestContainer) Stop(ctx context.Context) error { - if c.ctr == nil { - return nil - } - return testcontainers.TerminateContainer(c.ctr) + return c.backend.Stop(ctx) } -// APIBaseURL returns the URL for the container's API server. +// APIBaseURL returns the URL for the instance's API server. func (c *TestContainer) APIBaseURL() string { - return fmt.Sprintf("http://127.0.0.1:%d", c.APIPort) + return c.backend.APIBaseURL() } -// CDPURL returns the WebSocket URL for the container's DevTools proxy. +// CDPURL returns the WebSocket URL for the instance's DevTools proxy. func (c *TestContainer) CDPURL() string { - return fmt.Sprintf("ws://127.0.0.1:%d/", c.CDPPort) + return c.backend.CDPURL() } -// APIClient creates an OpenAPI client for this container's API. -func (c *TestContainer) APIClient() (*instanceoapi.ClientWithResponses, error) { - return instanceoapi.NewClientWithResponses(c.APIBaseURL()) +// CDPAddr returns the TCP address for the instance's DevTools proxy. +func (c *TestContainer) CDPAddr() string { + return c.backend.CDPAddr() } -// WaitReady waits for the container's API to become ready. -// Note: With testcontainers-go, this is usually handled by the wait strategy in Start(). -// This method is kept for compatibility and performs an additional health check. -func (c *TestContainer) WaitReady(ctx context.Context) error { - url := c.APIBaseURL() + "/spec.yaml" - ticker := time.NewTicker(200 * time.Millisecond) - defer ticker.Stop() - - client := &http.Client{Timeout: 2 * time.Second} - - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-ticker.C: - resp, err := client.Get(url) - if err == nil { - resp.Body.Close() - if resp.StatusCode == http.StatusOK { - return nil - } - } - } - } +// ChromeDriverURL returns the base HTTP URL for the instance's ChromeDriver proxy. +func (c *TestContainer) ChromeDriverURL() string { + return c.backend.ChromeDriverURL() } -// ExitCh returns a channel that receives when the container exits. -// Note: testcontainers-go handles this internally; this is kept for API compatibility. -func (c *TestContainer) ExitCh() <-chan error { - ch := make(chan error, 1) - // testcontainers-go doesn't expose an exit channel directly - // Return a channel that never fires - container lifecycle is managed by testcontainers - return ch +// ChromeDriverAddr returns the host:port for the instance's ChromeDriver proxy, +// derived from ChromeDriverURL (without scheme). Useful for substring assertions +// on proxy-rewritten URLs. +func (c *TestContainer) ChromeDriverAddr() string { + return strings.TrimPrefix(c.backend.ChromeDriverURL(), "http://") } -// WaitDevTools waits for the CDP WebSocket endpoint to be ready. -func (c *TestContainer) WaitDevTools(ctx context.Context) error { - return wait.ForListeningPort(nat.Port("9222/tcp")). - WithStartupTimeout(2*time.Minute). - WaitUntilReady(ctx, c.ctr) +// ChromeDriverWSURL returns the WebSocket URL (ws://host:port/path) for the +// instance's ChromeDriver proxy. path should include a leading slash. +func (c *TestContainer) ChromeDriverWSURL(path string) string { + return "ws://" + c.ChromeDriverAddr() + path +} + +// APIClient creates an OpenAPI client for this instance's API. +func (c *TestContainer) APIClient() (*instanceoapi.ClientWithResponses, error) { + return c.backend.APIClient() } // APIClientNoKeepAlive creates an API client that doesn't reuse connections. // This is useful after server restarts where existing connections may be stale. func (c *TestContainer) APIClientNoKeepAlive() (*instanceoapi.ClientWithResponses, error) { - transport := &http.Transport{ - DisableKeepAlives: true, - } - httpClient := &http.Client{Transport: transport} - return instanceoapi.NewClientWithResponses(c.APIBaseURL(), instanceoapi.WithHTTPClient(httpClient)) + return c.backend.APIClientNoKeepAlive() } -// CDPAddr returns the TCP address for the container's DevTools proxy. -func (c *TestContainer) CDPAddr() string { - return fmt.Sprintf("127.0.0.1:%d", c.CDPPort) +// WaitReady waits for the instance's API to become ready. +func (c *TestContainer) WaitReady(ctx context.Context) error { + return c.backend.WaitReady(ctx) } -// ChromeDriverURL returns the base HTTP URL for the container's ChromeDriver proxy. -func (c *TestContainer) ChromeDriverURL() string { - return fmt.Sprintf("http://127.0.0.1:%d", c.ChromeDriverPort) +// WaitDevTools waits for the CDP WebSocket endpoint to be ready. +func (c *TestContainer) WaitDevTools(ctx context.Context) error { + return c.backend.WaitDevTools(ctx) } // WaitChromeDriver waits for the ChromeDriver proxy (and upstream ChromeDriver) -// to be ready by polling the /status endpoint. +// to be ready. func (c *TestContainer) WaitChromeDriver(ctx context.Context) error { - statusURL := c.ChromeDriverURL() + "/status" - client := &http.Client{Timeout: 2 * time.Second} - ticker := time.NewTicker(500 * time.Millisecond) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return ctx.Err() - case <-ticker.C: - resp, err := client.Get(statusURL) - if err == nil { - resp.Body.Close() - if resp.StatusCode == http.StatusOK { - return nil - } - } - } - } + return c.backend.WaitChromeDriver(ctx) } -// Exec executes a command inside the container and returns the combined output. +// Exec executes a command inside the instance and returns the exit code and +// combined output. func (c *TestContainer) Exec(ctx context.Context, cmd []string) (int, string, error) { - exitCode, reader, err := c.ctr.Exec(ctx, cmd) - if err != nil { - return exitCode, "", err - } - - // Read all output - buf := make([]byte, 0) - tmp := make([]byte, 1024) - for { - n, err := reader.Read(tmp) - if n > 0 { - buf = append(buf, tmp[:n]...) - } - if err != nil { - break - } - } + return c.backend.Exec(ctx, cmd) +} - return exitCode, string(buf), nil +// ExitCh returns a channel that receives when the instance exits. +func (c *TestContainer) ExitCh() <-chan error { + return c.backend.ExitCh() } // Container returns the underlying testcontainers.Container for advanced usage. +// It returns nil for backends that are not Docker-based. func (c *TestContainer) Container() testcontainers.Container { - return c.ctr + return c.backend.Container() } diff --git a/server/e2e/e2e_bidi_test.go b/server/e2e/e2e_bidi_test.go index 730bc870..c5b89266 100644 --- a/server/e2e/e2e_bidi_test.go +++ b/server/e2e/e2e_bidi_test.go @@ -203,7 +203,7 @@ func TestBidiWebSocket(t *testing.T) { require.NoError(t, c.WaitChromeDriver(ctx), "chromedriver not ready") // Connect to BiDi WebSocket endpoint - bidiURL := fmt.Sprintf("ws://127.0.0.1:%d/session", c.ChromeDriverPort) + bidiURL := c.ChromeDriverWSURL("/session") t.Logf("connecting to BiDi endpoint: %s", bidiURL) conn, _, err := websocket.Dial(ctx, bidiURL, nil) @@ -376,7 +376,7 @@ func TestBidiHTTPSession(t *testing.T) { t.Logf("session ID: %s, webSocketUrl: %s", sessionID, wsURL) // Verify the proxy rewrote webSocketUrl to point through itself - expectedHost := fmt.Sprintf("127.0.0.1:%d", c.ChromeDriverPort) + expectedHost := c.ChromeDriverAddr() require.Contains(t, wsURL, expectedHost, "webSocketUrl should point through the proxy (expected host %s), got: %s", expectedHost, wsURL) @@ -445,7 +445,7 @@ func TestBidiPuppeteer(t *testing.T) { require.NoError(t, c.WaitReady(ctx), "api not ready") require.NoError(t, c.WaitChromeDriver(ctx), "chromedriver not ready") - endpoint := fmt.Sprintf("ws://127.0.0.1:%d/session", c.ChromeDriverPort) + endpoint := c.ChromeDriverWSURL("/session") t.Logf("running test-puppeteer-bidi.js against %s", endpoint) cmd := exec.CommandContext(ctx, "node", "test-puppeteer-bidi.js", "--endpoint", endpoint) @@ -476,7 +476,7 @@ func TestBidiVibium(t *testing.T) { require.NoError(t, c.WaitReady(ctx), "api not ready") require.NoError(t, c.WaitChromeDriver(ctx), "chromedriver not ready") - endpoint := fmt.Sprintf("ws://127.0.0.1:%d/session", c.ChromeDriverPort) + endpoint := c.ChromeDriverWSURL("/session") t.Logf("running test-vibium-bidi.js against %s", endpoint) cmd := exec.CommandContext(ctx, "node", "test-vibium-bidi.js", "--endpoint", endpoint) @@ -506,7 +506,7 @@ func TestBidiSelenium(t *testing.T) { require.NoError(t, c.WaitReady(ctx), "api not ready") require.NoError(t, c.WaitChromeDriver(ctx), "chromedriver not ready") - endpoint := fmt.Sprintf("http://127.0.0.1:%d", c.ChromeDriverPort) + endpoint := c.ChromeDriverURL() t.Logf("running test-selenium-bidi.js against %s", endpoint) cmd := exec.CommandContext(ctx, "node", "test-selenium-bidi.js", "--endpoint", endpoint) diff --git a/server/e2e/e2e_cdp_reconnect_test.go b/server/e2e/e2e_cdp_reconnect_test.go index d0e043c5..7c1ed226 100644 --- a/server/e2e/e2e_cdp_reconnect_test.go +++ b/server/e2e/e2e_cdp_reconnect_test.go @@ -456,7 +456,7 @@ func touchContainerFile(ctx context.Context, client *instanceoapi.ClientWithResp } func fetchBrowserWebSocketURL(ctx context.Context, c *TestContainer) (string, error) { - versionURL := fmt.Sprintf("http://127.0.0.1:%d/json/version", c.CDPPort) + versionURL := fmt.Sprintf("http://%s/json/version", c.CDPAddr()) req, err := http.NewRequestWithContext(ctx, http.MethodGet, versionURL, nil) if err != nil { return "", err diff --git a/server/go.mod b/server/go.mod index fd8296e9..f2518e7a 100644 --- a/server/go.mod +++ b/server/go.mod @@ -8,7 +8,7 @@ require ( github.com/avast/retry-go/v5 v5.0.0 github.com/coder/websocket v1.8.14 github.com/creack/pty v1.1.24 - github.com/docker/docker v28.5.1+incompatible + github.com/docker/docker v28.5.2+incompatible github.com/docker/go-connections v0.6.0 github.com/euank/go-kmsg-parser/v2 v2.1.0 github.com/fsnotify/fsnotify v1.9.0 @@ -18,6 +18,7 @@ require ( github.com/go-chi/chi/v5 v5.2.1 github.com/google/uuid v1.6.0 github.com/kelseyhightower/envconfig v1.4.0 + github.com/kernel/hypeman-go v0.20.0 github.com/klauspost/compress v1.18.3 github.com/m1k1o/neko/server v0.0.0-20251008185748-46e2fc7d3866 github.com/nrednav/cuid2 v1.1.0 @@ -26,7 +27,7 @@ require ( github.com/samber/lo v1.52.0 github.com/stretchr/testify v1.11.1 github.com/testcontainers/testcontainers-go v0.40.0 - golang.org/x/sync v0.17.0 + golang.org/x/sync v0.18.0 golang.org/x/sys v0.39.0 golang.org/x/term v0.37.0 gopkg.in/yaml.v3 v3.0.1 @@ -34,7 +35,7 @@ require ( require ( dario.cat/mergo v1.0.2 // indirect - github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect @@ -57,6 +58,7 @@ require ( github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -87,24 +89,31 @@ require ( github.com/sirupsen/logrus v1.9.3 // indirect github.com/speakeasy-api/jsonpath v0.6.0 // indirect github.com/speakeasy-api/openapi-overlay v0.10.2 // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect github.com/vmware-labs/yaml-jsonpath v0.3.2 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect go.opentelemetry.io/otel v1.39.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect go.opentelemetry.io/otel/metric v1.39.0 // indirect go.opentelemetry.io/otel/sdk v1.39.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.39.0 // indirect go.opentelemetry.io/otel/trace v1.39.0 // indirect - go.opentelemetry.io/proto/otlp v1.9.0 // indirect - golang.org/x/crypto v0.43.0 // indirect - golang.org/x/mod v0.28.0 // indirect - golang.org/x/net v0.45.0 // indirect - golang.org/x/text v0.30.0 // indirect - golang.org/x/tools v0.37.0 // indirect + golang.org/x/crypto v0.44.0 // indirect + golang.org/x/mod v0.29.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/text v0.31.0 // indirect + golang.org/x/tools v0.38.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect + google.golang.org/grpc v1.75.1 // indirect google.golang.org/protobuf v1.36.10 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gorm.io/gorm v1.25.7 // indirect diff --git a/server/go.sum b/server/go.sum index bb8925ce..1950c7a2 100644 --- a/server/go.sum +++ b/server/go.sum @@ -2,8 +2,8 @@ dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= @@ -39,8 +39,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM= -github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= +github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= @@ -118,6 +118,8 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= github.com/kelseyhightower/envconfig v1.4.0/go.mod h1:cccZRl6mQpaq41TPp5QxidR+Sa3axMbJDNb//FQX6Gg= +github.com/kernel/hypeman-go v0.20.0 h1:9kEMjtlko5oYSETwn9v829rJBv5GpcmoYjBjhjuwnBA= +github.com/kernel/hypeman-go v0.20.0/go.mod h1:guRrhyP9QW/ebUS1UcZ0uZLLJeGAAhDNzSi68U4M9hI= github.com/kernel/neko/server v0.0.0-20260213021128-abe9ac59a634 h1:Pn8Zag7TMXnMPdjz136NTjpGwI7rgx++BNzsH2b4w3I= github.com/kernel/neko/server v0.0.0-20260213021128-abe9ac59a634/go.mod h1:0+zactiySvtKwfe5JFjyNrSuQLA+EEPZl5bcfcZf1RM= github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw= @@ -227,6 +229,16 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/testcontainers/testcontainers-go v0.40.0 h1:pSdJYLOVgLE8YdUY2FHQ1Fxu+aMnb6JfVz1mxk7OeMU= github.com/testcontainers/testcontainers-go v0.40.0/go.mod h1:FSXV5KQtX2HAMlm7U3APNyLkkap35zNLxukw9oBi/MY= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= @@ -242,8 +254,8 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 h1:f0cb2XPmrqn4XMy9PNliTgRKJgS5WcL/u0/WRYGz4t0= @@ -254,6 +266,8 @@ go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A= @@ -261,11 +275,11 @@ go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pq golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= -golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU= +golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= -golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -273,13 +287,13 @@ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.45.0 h1:RLBg5JKixCy82FtLJpeNlVM0nrSqpCRYzVU1n8kj0tM= -golang.org/x/net v0.45.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= -golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -311,15 +325,15 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= -golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= -golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From d3e7dc08683df0097834d37c912523476dc5bd3a Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 4 Jun 2026 11:07:12 -0400 Subject: [PATCH 02/11] e2e: drop testcontainers leak from Backend; add hypeman ingress routing Addresses review feedback on the backend interface: - Remove Container() testcontainers.Container from the Backend interface (and the TestContainer facade). It leaked Docker-specifics into the otherwise backend-agnostic surface and was dead: no e2e test consumed it. The Docker backend keeps its *testcontainers.Container internally for Start/Exec. - Hypeman backend: reach instances via a single host-level wildcard ingress (find-or-create, keyed by tag managed-by=ki-e2e) instead of the instance's private network IP. Set KI_E2E_HYPEMAN_INGRESS_DOMAIN to route "-." through the host's reverse proxy to guest ports 10001/9222/9224; ingress is created at most once per host and never per instance. Unset = previous raw-IP behavior (needs L3 reachability to the instance subnet). KI_E2E_HYPEMAN_INGRESS_TLS toggles https/wss on :443. Verification: go build ./... and go vet ./e2e/ pass; new table tests cover raw-IP, ingress, and TLS endpoint derivation plus the shared-ingress params. Docker-backend e2e (TestDisplayResolutionChange + TestScreenshotHeadless) passes against onkernel/chromium-headful-private + chromium-headless-private. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/e2e/backend.go | 5 - server/e2e/backend_docker.go | 5 - server/e2e/backend_hypeman.go | 208 +++++++++++++++++++++++++++++++--- server/e2e/backend_test.go | 61 +++++++++- server/e2e/container.go | 7 -- 5 files changed, 249 insertions(+), 37 deletions(-) diff --git a/server/e2e/backend.go b/server/e2e/backend.go index a9fe5d61..856f7a2c 100644 --- a/server/e2e/backend.go +++ b/server/e2e/backend.go @@ -7,7 +7,6 @@ import ( "testing" instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" - "github.com/testcontainers/testcontainers-go" ) // ContainerConfig holds optional configuration for instance startup. @@ -68,10 +67,6 @@ type Backend interface { // ExitCh returns a channel that fires when the instance exits. ExitCh() <-chan error - - // Container returns the underlying testcontainers.Container for advanced - // Docker-only usage. It returns nil for non-Docker backends. - Container() testcontainers.Container } // BackendKind enumerates the supported e2e backends. diff --git a/server/e2e/backend_docker.go b/server/e2e/backend_docker.go index e87c1c87..7c583923 100644 --- a/server/e2e/backend_docker.go +++ b/server/e2e/backend_docker.go @@ -237,8 +237,3 @@ func (c *dockerBackend) ExitCh() <-chan error { ch := make(chan error, 1) return ch } - -// Container returns the underlying testcontainers.Container for advanced usage. -func (c *dockerBackend) Container() testcontainers.Container { - return c.ctr -} diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index 3a5ff9ca..0ed2766b 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -14,7 +14,6 @@ import ( "github.com/kernel/hypeman-go/option" instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" "github.com/nrednav/cuid2" - "github.com/testcontainers/testcontainers-go" ) // Container ports exposed by the kernel-images browser image. These are fixed @@ -41,22 +40,72 @@ const ( envHypemanGPUDevices = "KI_E2E_HYPEMAN_GPU_DEVICES" // envHypemanInstanceSize optionally overrides the VM memory size. envHypemanInstanceSize = "KI_E2E_HYPEMAN_SIZE" + // envHypemanIngressDomain selects hostname-based ingress routing. When set, + // the backend ensures a single host-level wildcard ingress exists and reaches + // each instance through Caddy at "-." instead of the + // instance's private network IP. Leave unset to use the raw network IP (only + // works from a network with L3 reachability to the hypeman instance subnet, + // e.g. the API's own tailnet-tagged hosts). + envHypemanIngressDomain = "KI_E2E_HYPEMAN_INGRESS_DOMAIN" + // envHypemanIngressTLS, when truthy, serves ingress endpoints over TLS + // (https/wss on :443) instead of plaintext (http/ws on :80). Plaintext is the + // default because ACME cert issuance requires the ingress hostname to be + // publicly resolvable, which is not the case on an internal/tailnet domain. + envHypemanIngressTLS = "KI_E2E_HYPEMAN_INGRESS_TLS" ) +// Shared, host-level ingress that routes every e2e instance by hostname. It is a +// find-or-create construct (created at most once per hypeman host) keyed by tag, +// because ingresses are host-level — we must not create one per instance. +const ( + ingressName = "ki-e2e" + ingressTagKey = "managed-by" + ingressTagVal = "ki-e2e" +) + +// ingressRoles maps a logical endpoint role to the guest port it targets. The +// public hostname for a role is "-."; the shared ingress +// matches the pattern "{instance}-." and routes to the captured +// instance on the guest port. +var ingressRoles = []struct { + role string + port int64 +}{ + {role: "api", port: hypemanAPIPort}, + {role: "cdp", port: hypemanCDPPort}, + {role: "cd", port: hypemanChromeDriverPort}, +} + // hypemanBackend starts the image as a remote VM on a running Hypeman dev server // using the github.com/kernel/hypeman-go client library. // -// The instance is reachable on its assigned network IP. All endpoint accessors -// target that IP on the fixed guest ports. Command execution is performed -// against the instance's own API server (/process/exec) so that callers get the -// same (exitCode, combinedOutput, error) shape as the Docker backend. +// Endpoints are reached one of two ways: +// +// - Ingress (preferred): when KI_E2E_HYPEMAN_INGRESS_DOMAIN is set, a single +// host-level wildcard ingress (find-or-create, keyed by tag) routes +// "-." through the hypeman host's reverse proxy to +// the instance's guest ports. This works from anywhere that can resolve the +// domain and reach the host's :80/:443, without L3 access to the instance +// subnet. +// - Raw network IP (fallback): the instance's assigned private IP on the fixed +// guest ports. Only works from a network with L3 reachability to the hypeman +// instance subnet (e.g. the API's own tailnet-tagged hosts). +// +// Command execution is performed against the instance's own API server +// (/process/exec) so that callers get the same (exitCode, combinedOutput, error) +// shape as the Docker backend. type hypemanBackend struct { client hypeman.Client image string instanceID string + name string ip string + // ingressDomain is empty in raw-IP mode; non-empty enables hostname routing. + ingressDomain string + ingressTLS bool + exitCh chan error } @@ -85,9 +134,11 @@ func newHypemanBackend(image string) (Backend, error) { } return &hypemanBackend{ - client: hypeman.NewClient(opts...), - image: image, - exitCh: make(chan error, 1), + client: hypeman.NewClient(opts...), + image: image, + ingressDomain: strings.TrimSpace(os.Getenv(envHypemanIngressDomain)), + ingressTLS: isTruthy(os.Getenv(envHypemanIngressTLS)), + exitCh: make(chan error, 1), }, nil } @@ -105,9 +156,10 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { env["CHROMIUM_FLAGS"] = flags + " --no-sandbox" } + c.name = hypemanInstanceName() params := hypeman.InstanceNewParams{ Image: c.image, - Name: hypemanInstanceName(), + Name: c.name, Env: env, } if size := strings.TrimSpace(os.Getenv(envHypemanInstanceSize)); size != "" { @@ -129,6 +181,16 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { return err } + // Hostname routing: ensure the shared host-level ingress exists, then derive + // endpoints from "-.". No instance IP needed. + if c.ingressDomain != "" { + if err := c.ensureIngress(ctx); err != nil { + return err + } + return nil + } + + // Raw-IP fallback: reach the instance directly on its private network IP. ip, err := c.resolveIP(ctx) if err != nil { return err @@ -137,6 +199,87 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { return nil } +// ensureIngress finds or creates the single shared, host-level ingress that +// routes every e2e instance by hostname. Ingresses are host-level constructs, so +// we must not create one per instance: we look one up by tag and only create it +// if absent (tolerating a creation race with concurrent test binaries). +func (c *hypemanBackend) ensureIngress(ctx context.Context) error { + if c.ingressExists(ctx) { + return nil + } + _, err := c.client.Ingresses.New(ctx, c.desiredIngressParams()) + if err != nil { + // Another runner may have created it concurrently; accept that. + if c.ingressExists(ctx) { + return nil + } + return fmt.Errorf("hypeman: ensure ingress %q: %w", ingressName, err) + } + return nil +} + +// ingressExists reports whether a managed ingress that routes our API role +// pattern is already present on the host. +func (c *hypemanBackend) ingressExists(ctx context.Context) bool { + list, err := c.client.Ingresses.List(ctx, hypeman.IngressListParams{ + Tags: map[string]string{ingressTagKey: ingressTagVal}, + }) + if err != nil || list == nil { + return false + } + wantAPI := c.ingressPatternHost("api") + for _, ing := range *list { + for _, rule := range ing.Rules { + if rule.Match.Hostname == wantAPI { + return true + } + } + } + return false +} + +// desiredIngressParams builds the shared ingress: one rule per role, each a +// wildcard pattern hostname "{instance}-." routed to the captured +// instance on the corresponding guest port. +func (c *hypemanBackend) desiredIngressParams() hypeman.IngressNewParams { + rules := make([]hypeman.IngressRuleParam, 0, len(ingressRoles)) + for _, r := range ingressRoles { + rules = append(rules, hypeman.IngressRuleParam{ + Match: hypeman.IngressMatchParam{ + Hostname: c.ingressPatternHost(r.role), + Port: hypeman.Int(c.ingressListenPort()), + }, + Target: hypeman.IngressTargetParam{ + Instance: "{instance}", + Port: r.port, + }, + Tls: hypeman.Bool(c.ingressTLS), + }) + } + return hypeman.IngressNewParams{ + Name: ingressName, + Rules: rules, + Tags: map[string]string{ingressTagKey: ingressTagVal}, + } +} + +// ingressPatternHost is the wildcard hostname pattern for a role (uses the +// {instance} capture). ingressHost is the concrete hostname for this instance. +func (c *hypemanBackend) ingressPatternHost(role string) string { + return fmt.Sprintf("{instance}-%s.%s", role, c.ingressDomain) +} + +func (c *hypemanBackend) ingressHost(role string) string { + return fmt.Sprintf("%s-%s.%s", c.name, role, c.ingressDomain) +} + +func (c *hypemanBackend) ingressListenPort() int64 { + if c.ingressTLS { + return 443 + } + return 80 +} + // waitForRunning polls the instance wait endpoint until the instance is Running // or the context is done. func (c *hypemanBackend) waitForRunning(ctx context.Context) error { @@ -190,19 +333,43 @@ func (c *hypemanBackend) Stop(ctx context.Context) error { } func (c *hypemanBackend) APIBaseURL() string { - return fmt.Sprintf("http://%s:%d", c.ip, hypemanAPIPort) + return c.httpScheme() + "://" + c.endpointHostPort("api", hypemanAPIPort) } func (c *hypemanBackend) CDPURL() string { - return fmt.Sprintf("ws://%s:%d/", c.ip, hypemanCDPPort) + return c.wsScheme() + "://" + c.endpointHostPort("cdp", hypemanCDPPort) + "/" } func (c *hypemanBackend) CDPAddr() string { - return fmt.Sprintf("%s:%d", c.ip, hypemanCDPPort) + return c.endpointHostPort("cdp", hypemanCDPPort) } func (c *hypemanBackend) ChromeDriverURL() string { - return fmt.Sprintf("http://%s:%d", c.ip, hypemanChromeDriverPort) + return c.httpScheme() + "://" + c.endpointHostPort("cd", hypemanChromeDriverPort) +} + +// endpointHostPort returns the host:port a caller should dial for a role: the +// ingress hostname on the proxy's listen port when hostname routing is enabled, +// otherwise the instance's private IP on the fixed guest port. +func (c *hypemanBackend) endpointHostPort(role string, guestPort int) string { + if c.ingressDomain != "" { + return fmt.Sprintf("%s:%d", c.ingressHost(role), c.ingressListenPort()) + } + return fmt.Sprintf("%s:%d", c.ip, guestPort) +} + +func (c *hypemanBackend) httpScheme() string { + if c.ingressDomain != "" && c.ingressTLS { + return "https" + } + return "http" +} + +func (c *hypemanBackend) wsScheme() string { + if c.ingressDomain != "" && c.ingressTLS { + return "wss" + } + return "ws" } func (c *hypemanBackend) APIClient() (*instanceoapi.ClientWithResponses, error) { @@ -286,17 +453,22 @@ func (c *hypemanBackend) ExitCh() <-chan error { return c.exitCh } -// Container returns nil: the hypeman backend is not Docker-based. -func (c *hypemanBackend) Container() testcontainers.Container { - return nil -} - // hypemanInstanceName builds a DNS-safe, unique instance name. Hypeman requires // lowercase letters, digits, and dashes only, not starting/ending with a dash. func hypemanInstanceName() string { return "ki-e2e-" + strings.ToLower(cuid2.Generate()) } +// isTruthy reports whether an env value means "on" (1/true/yes, case-insensitive). +func isTruthy(s string) bool { + switch strings.ToLower(strings.TrimSpace(s)) { + case "1", "true", "yes", "y", "on": + return true + default: + return false + } +} + func parseCommaList(s string) []string { var out []string for _, part := range strings.Split(s, ",") { diff --git a/server/e2e/backend_test.go b/server/e2e/backend_test.go index 853108b4..918733f9 100644 --- a/server/e2e/backend_test.go +++ b/server/e2e/backend_test.go @@ -59,7 +59,64 @@ func TestNewHypemanBackendWithConfig(t *testing.T) { if b == nil { t.Fatal("expected non-nil backend") } - if b.Container() != nil { - t.Fatal("hypeman backend Container() must be nil") +} + +// TestHypemanRawIPMode verifies endpoint derivation in the default raw-IP mode +// (no ingress domain): the private IP on the fixed guest ports. +func TestHypemanRawIPMode(t *testing.T) { + b := &hypemanBackend{ip: "10.1.2.3"} + for _, tc := range []struct{ name, got, want string }{ + {"api", b.APIBaseURL(), "http://10.1.2.3:10001"}, + {"cdp", b.CDPURL(), "ws://10.1.2.3:9222/"}, + {"cdpAddr", b.CDPAddr(), "10.1.2.3:9222"}, + {"cd", b.ChromeDriverURL(), "http://10.1.2.3:9224"}, + } { + if tc.got != tc.want { + t.Errorf("%s = %q, want %q", tc.name, tc.got, tc.want) + } + } +} + +// TestHypemanIngressRouting verifies hostname-routed endpoints and that the +// shared ingress params describe one wildcard rule per role on the proxy's +// plaintext listen port. The instance name contains dashes, which must end up +// inside the {instance} capture, not split the role suffix. +func TestHypemanIngressRouting(t *testing.T) { + b := &hypemanBackend{name: "ki-e2e-abc123", ingressDomain: "e2e.hypeman.dev"} + for _, tc := range []struct{ name, got, want string }{ + {"api", b.APIBaseURL(), "http://ki-e2e-abc123-api.e2e.hypeman.dev:80"}, + {"cdp", b.CDPURL(), "ws://ki-e2e-abc123-cdp.e2e.hypeman.dev:80/"}, + {"cdpAddr", b.CDPAddr(), "ki-e2e-abc123-cdp.e2e.hypeman.dev:80"}, + {"cd", b.ChromeDriverURL(), "http://ki-e2e-abc123-cd.e2e.hypeman.dev:80"}, + {"pattern", b.ingressPatternHost("api"), "{instance}-api.e2e.hypeman.dev"}, + } { + if tc.got != tc.want { + t.Errorf("%s = %q, want %q", tc.name, tc.got, tc.want) + } + } + + p := b.desiredIngressParams() + if p.Name != ingressName { + t.Errorf("ingress name = %q, want %q", p.Name, ingressName) + } + if len(p.Rules) != len(ingressRoles) { + t.Fatalf("got %d rules, want %d", len(p.Rules), len(ingressRoles)) + } + if got := p.Rules[0].Target.Instance; got != "{instance}" { + t.Errorf("rule[0] target instance = %q, want {instance}", got) + } + if got := p.Rules[0].Target.Port; got != hypemanAPIPort { + t.Errorf("rule[0] target port = %d, want %d", got, hypemanAPIPort) + } +} + +// TestHypemanIngressTLS verifies https/wss + :443 when TLS is enabled. +func TestHypemanIngressTLS(t *testing.T) { + b := &hypemanBackend{name: "x", ingressDomain: "d", ingressTLS: true} + if got, want := b.APIBaseURL(), "https://x-api.d:443"; got != want { + t.Errorf("APIBaseURL = %q, want %q", got, want) + } + if got, want := b.CDPURL(), "wss://x-cdp.d:443/"; got != want { + t.Errorf("CDPURL = %q, want %q", got, want) } } diff --git a/server/e2e/container.go b/server/e2e/container.go index fe54e836..790248bd 100644 --- a/server/e2e/container.go +++ b/server/e2e/container.go @@ -6,7 +6,6 @@ import ( "testing" instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" - "github.com/testcontainers/testcontainers-go" ) // TestContainer is the handle every e2e test uses to drive a browser instance. @@ -115,9 +114,3 @@ func (c *TestContainer) Exec(ctx context.Context, cmd []string) (int, string, er func (c *TestContainer) ExitCh() <-chan error { return c.backend.ExitCh() } - -// Container returns the underlying testcontainers.Container for advanced usage. -// It returns nil for backends that are not Docker-based. -func (c *TestContainer) Container() testcontainers.Container { - return c.backend.Container() -} From 98d60a546771abefb349d7840a746da456147593 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 4 Jun 2026 11:52:01 -0400 Subject: [PATCH 03/11] e2e: correct hypeman ingress to live model (port-routed wildcard, TLS) + vGPU profile --- server/e2e/backend_hypeman.go | 273 +++++++++++++++++++--------------- server/e2e/backend_test.go | 67 ++++++--- 2 files changed, 196 insertions(+), 144 deletions(-) diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index 0ed2766b..c6ac2224 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/http" + "net/url" "os" "strings" "time" @@ -16,9 +17,10 @@ import ( "github.com/nrednav/cuid2" ) -// Container ports exposed by the kernel-images browser image. These are fixed -// inside the guest; the Docker backend remaps them to random host ports, while -// the Hypeman backend reaches them directly on the instance's network IP. +// Guest ports exposed by the kernel-images browser image. They are fixed inside +// the guest; the Docker backend remaps them to random host ports, while the +// Hypeman backend reaches them either through an ingress (by listen port) or +// directly on the instance's private network IP. const ( hypemanAPIPort = 10001 hypemanCDPPort = 9222 @@ -36,64 +38,70 @@ const ( // native HYPEMAN_API_KEY is also honored as a fallback. envHypemanToken = "HYPEMAN_AUTH_TOKEN" // envHypemanGPUDevices is an optional comma-separated list of device IDs or - // names to attach for GPU passthrough (used by the GPU image). + // names to attach for GPU/PCI passthrough. envHypemanGPUDevices = "KI_E2E_HYPEMAN_GPU_DEVICES" + // envHypemanGPUProfile requests a vGPU profile (e.g. "NVIDIA L40S-2Q") for + // the instance; the host assigns the backing mdev. Required to boot the vGPU + // browser image (chromium-headful-vgpu). + envHypemanGPUProfile = "KI_E2E_HYPEMAN_GPU_PROFILE" // envHypemanInstanceSize optionally overrides the VM memory size. envHypemanInstanceSize = "KI_E2E_HYPEMAN_SIZE" - // envHypemanIngressDomain selects hostname-based ingress routing. When set, - // the backend ensures a single host-level wildcard ingress exists and reaches - // each instance through Caddy at "-." instead of the - // instance's private network IP. Leave unset to use the raw network IP (only - // works from a network with L3 reachability to the hypeman instance subnet, - // e.g. the API's own tailnet-tagged hosts). + // envHypemanIngressDomain overrides the wildcard ingress base domain. If + // unset it is derived from the base URL host by stripping a leading + // "hypeman." prefix (e.g. hypeman.dev-yul-hypeman-1.kernel.sh -> + // dev-yul-hypeman-1.kernel.sh), matching the host's "{instance}." + // wildcard ingresses. envHypemanIngressDomain = "KI_E2E_HYPEMAN_INGRESS_DOMAIN" - // envHypemanIngressTLS, when truthy, serves ingress endpoints over TLS - // (https/wss on :443) instead of plaintext (http/ws on :80). Plaintext is the - // default because ACME cert issuance requires the ingress hostname to be - // publicly resolvable, which is not the case on an internal/tailnet domain. + // envHypemanIngressTLS toggles TLS on ingress endpoints. Defaults to true + // (the host terminates TLS with a wildcard cert); set 0/false for plaintext. envHypemanIngressTLS = "KI_E2E_HYPEMAN_INGRESS_TLS" + // envHypemanRawIP forces reaching the instance on its private network IP + // instead of via ingress. Only works from a network with L3 reachability to + // the hypeman instance subnet (e.g. the API's own tailnet-tagged hosts). + envHypemanRawIP = "KI_E2E_HYPEMAN_RAW_IP" ) -// Shared, host-level ingress that routes every e2e instance by hostname. It is a -// find-or-create construct (created at most once per hypeman host) keyed by tag, -// because ingresses are host-level — we must not create one per instance. +// ingressRole maps a logical endpoint to the ingress listen port and the guest +// target port. Hostname routing uses a single wildcard hostname +// "{instance}." and differentiates roles by listen port, matching the +// host's existing convention (the browser API is exposed on :444 -> guest +// :10001). cdp/cd reuse the guest port as the listen port. +type ingressRole struct { + role string + listenPort int64 + targetPort int64 +} + +var ingressRoles = []ingressRole{ + {role: "api", listenPort: 444, targetPort: hypemanAPIPort}, + {role: "cdp", listenPort: hypemanCDPPort, targetPort: hypemanCDPPort}, + {role: "cd", listenPort: hypemanChromeDriverPort, targetPort: hypemanChromeDriverPort}, +} + +// Tag applied to ingresses this backend creates, so they are recognizable as +// e2e-managed. We still reuse any pre-existing ingress (e.g. the API's own +// browser ingress) regardless of tag — matching is by rule shape. const ( - ingressName = "ki-e2e" ingressTagKey = "managed-by" ingressTagVal = "ki-e2e" ) -// ingressRoles maps a logical endpoint role to the guest port it targets. The -// public hostname for a role is "-."; the shared ingress -// matches the pattern "{instance}-." and routes to the captured -// instance on the guest port. -var ingressRoles = []struct { - role string - port int64 -}{ - {role: "api", port: hypemanAPIPort}, - {role: "cdp", port: hypemanCDPPort}, - {role: "cd", port: hypemanChromeDriverPort}, -} - // hypemanBackend starts the image as a remote VM on a running Hypeman dev server // using the github.com/kernel/hypeman-go client library. // // Endpoints are reached one of two ways: // -// - Ingress (preferred): when KI_E2E_HYPEMAN_INGRESS_DOMAIN is set, a single -// host-level wildcard ingress (find-or-create, keyed by tag) routes -// "-." through the hypeman host's reverse proxy to -// the instance's guest ports. This works from anywhere that can resolve the -// domain and reach the host's :80/:443, without L3 access to the instance -// subnet. -// - Raw network IP (fallback): the instance's assigned private IP on the fixed -// guest ports. Only works from a network with L3 reachability to the hypeman -// instance subnet (e.g. the API's own tailnet-tagged hosts). +// - Ingress (default): a wildcard ingress per role routes +// ".:" through the host's reverse proxy to the +// instance's guest port. Each rule uses the "{instance}" hostname capture so +// a single host-level ingress serves every instance; rules are found-or- +// created (reusing pre-existing ones, e.g. the browser API :444 -> :10001). +// Works from anywhere that can resolve and reach the host. +// - Raw network IP (opt-in via KI_E2E_HYPEMAN_RAW_IP): the instance's private +// IP on the fixed guest ports. Needs L3 reachability to the instance subnet. // -// Command execution is performed against the instance's own API server -// (/process/exec) so that callers get the same (exitCode, combinedOutput, error) -// shape as the Docker backend. +// Command execution runs against the instance's own API server (/process/exec) +// so callers get the same (exitCode, combinedOutput, error) shape as Docker. type hypemanBackend struct { client hypeman.Client image string @@ -102,7 +110,7 @@ type hypemanBackend struct { name string ip string - // ingressDomain is empty in raw-IP mode; non-empty enables hostname routing. + useIngress bool ingressDomain string ingressTLS bool @@ -122,9 +130,14 @@ func newHypemanBackend(image string) (Backend, error) { opts = append(opts, option.WithAPIKey(token)) } + baseURL := strings.TrimSpace(os.Getenv(envHypemanBaseURL)) + if baseURL == "" { + baseURL = strings.TrimSpace(os.Getenv("HYPEMAN_BASE_URL")) + } + // Fail fast with an actionable message if neither this var nor the SDK's // native vars provide connection details. - hasBase := strings.TrimSpace(os.Getenv(envHypemanBaseURL)) != "" || strings.TrimSpace(os.Getenv("HYPEMAN_BASE_URL")) != "" + hasBase := baseURL != "" hasToken := strings.TrimSpace(os.Getenv(envHypemanToken)) != "" || strings.TrimSpace(os.Getenv("HYPEMAN_API_KEY")) != "" if !hasBase || !hasToken { return nil, fmt.Errorf( @@ -133,17 +146,34 @@ func newHypemanBackend(image string) (Backend, error) { ) } + domain := strings.TrimSpace(os.Getenv(envHypemanIngressDomain)) + if domain == "" { + domain = deriveIngressDomain(baseURL) + } + rawIP := isTruthy(os.Getenv(envHypemanRawIP)) + return &hypemanBackend{ client: hypeman.NewClient(opts...), image: image, - ingressDomain: strings.TrimSpace(os.Getenv(envHypemanIngressDomain)), - ingressTLS: isTruthy(os.Getenv(envHypemanIngressTLS)), + useIngress: !rawIP && domain != "", + ingressDomain: domain, + ingressTLS: envBoolDefault(envHypemanIngressTLS, true), exitCh: make(chan error, 1), }, nil } +// deriveIngressDomain extracts the wildcard ingress base domain from the control +// API base URL by stripping a leading "hypeman." label. +func deriveIngressDomain(baseURL string) string { + u, err := url.Parse(baseURL) + if err != nil || u.Hostname() == "" { + return "" + } + return strings.TrimPrefix(u.Hostname(), "hypeman.") +} + // Start creates and boots a hypeman instance for the image, waits for it to -// reach the Running state, and resolves its network IP. +// reach the Running state, then prepares the chosen routing mode. func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { env := make(map[string]string, len(cfg.Env)+1) for k, v := range cfg.Env { @@ -168,6 +198,9 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { if devs := parseCommaList(os.Getenv(envHypemanGPUDevices)); len(devs) > 0 { params.Devices = devs } + if profile := strings.TrimSpace(os.Getenv(envHypemanGPUProfile)); profile != "" { + params.GPU = hypeman.InstanceNewParamsGPU{Profile: hypeman.String(profile)} + } inst, err := c.client.Instances.New(ctx, params) if err != nil { @@ -181,13 +214,10 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { return err } - // Hostname routing: ensure the shared host-level ingress exists, then derive - // endpoints from "-.". No instance IP needed. - if c.ingressDomain != "" { - if err := c.ensureIngress(ctx); err != nil { - return err - } - return nil + if c.useIngress { + // Ensure the wildcard ingress rules exist; endpoints derive from the + // instance name + domain, so no instance IP is needed. + return c.ensureIngress(ctx) } // Raw-IP fallback: reach the instance directly on its private network IP. @@ -199,85 +229,78 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { return nil } -// ensureIngress finds or creates the single shared, host-level ingress that -// routes every e2e instance by hostname. Ingresses are host-level constructs, so -// we must not create one per instance: we look one up by tag and only create it -// if absent (tolerating a creation race with concurrent test binaries). +// ensureIngress finds or creates a wildcard ingress for each role. Ingresses are +// host-level constructs keyed by rule shape (wildcard hostname + listen port -> +// target port), so we reuse any pre-existing rule (e.g. the API's browser +// ingress) and only create what's missing — never one ingress per instance. func (c *hypemanBackend) ensureIngress(ctx context.Context) error { - if c.ingressExists(ctx) { - return nil - } - _, err := c.client.Ingresses.New(ctx, c.desiredIngressParams()) - if err != nil { - // Another runner may have created it concurrently; accept that. - if c.ingressExists(ctx) { - return nil + have := c.existingRuleSet(ctx) + for _, r := range ingressRoles { + key := ruleKey(c.wildcardHost(), r.listenPort, r.targetPort) + if have[key] { + continue + } + if _, err := c.client.Ingresses.New(ctx, c.roleIngressParams(r)); err != nil { + // Another runner may have created it concurrently; re-check. + if c.existingRuleSet(ctx)[key] { + continue + } + return fmt.Errorf("hypeman: ensure ingress for role %q (:%d->:%d): %w", r.role, r.listenPort, r.targetPort, err) } - return fmt.Errorf("hypeman: ensure ingress %q: %w", ingressName, err) } return nil } -// ingressExists reports whether a managed ingress that routes our API role -// pattern is already present on the host. -func (c *hypemanBackend) ingressExists(ctx context.Context) bool { - list, err := c.client.Ingresses.List(ctx, hypeman.IngressListParams{ - Tags: map[string]string{ingressTagKey: ingressTagVal}, - }) +// existingRuleSet lists all ingresses and indexes their rules by shape so we can +// reuse any rule (regardless of ingress name/tag) that already provides routing. +func (c *hypemanBackend) existingRuleSet(ctx context.Context) map[string]bool { + set := map[string]bool{} + list, err := c.client.Ingresses.List(ctx, hypeman.IngressListParams{}) if err != nil || list == nil { - return false + return set } - wantAPI := c.ingressPatternHost("api") for _, ing := range *list { for _, rule := range ing.Rules { - if rule.Match.Hostname == wantAPI { - return true - } + set[ruleKey(rule.Match.Hostname, rule.Match.Port, rule.Target.Port)] = true } } - return false + return set } -// desiredIngressParams builds the shared ingress: one rule per role, each a -// wildcard pattern hostname "{instance}-." routed to the captured -// instance on the corresponding guest port. -func (c *hypemanBackend) desiredIngressParams() hypeman.IngressNewParams { - rules := make([]hypeman.IngressRuleParam, 0, len(ingressRoles)) - for _, r := range ingressRoles { - rules = append(rules, hypeman.IngressRuleParam{ +func (c *hypemanBackend) roleIngressParams(r ingressRole) hypeman.IngressNewParams { + return hypeman.IngressNewParams{ + Name: "ki-e2e-" + r.role, + Rules: []hypeman.IngressRuleParam{{ Match: hypeman.IngressMatchParam{ - Hostname: c.ingressPatternHost(r.role), - Port: hypeman.Int(c.ingressListenPort()), + Hostname: c.wildcardHost(), + Port: hypeman.Int(r.listenPort), }, Target: hypeman.IngressTargetParam{ Instance: "{instance}", - Port: r.port, + Port: r.targetPort, }, Tls: hypeman.Bool(c.ingressTLS), - }) - } - return hypeman.IngressNewParams{ - Name: ingressName, - Rules: rules, - Tags: map[string]string{ingressTagKey: ingressTagVal}, + }}, + Tags: map[string]string{ingressTagKey: ingressTagVal}, } } -// ingressPatternHost is the wildcard hostname pattern for a role (uses the -// {instance} capture). ingressHost is the concrete hostname for this instance. -func (c *hypemanBackend) ingressPatternHost(role string) string { - return fmt.Sprintf("{instance}-%s.%s", role, c.ingressDomain) +func ruleKey(host string, listen, target int64) string { + return fmt.Sprintf("%s|%d|%d", host, listen, target) } -func (c *hypemanBackend) ingressHost(role string) string { - return fmt.Sprintf("%s-%s.%s", c.name, role, c.ingressDomain) -} +// wildcardHost is the pattern hostname ("{instance}.") used in ingress +// rules; ingressHost is the concrete hostname for this instance. +func (c *hypemanBackend) wildcardHost() string { return "{instance}." + c.ingressDomain } +func (c *hypemanBackend) ingressHost() string { return c.name + "." + c.ingressDomain } -func (c *hypemanBackend) ingressListenPort() int64 { - if c.ingressTLS { - return 443 +func (c *hypemanBackend) listenPortFor(role string) int64 { + for _, r := range ingressRoles { + if r.role == role { + return r.listenPort + } } - return 80 + return 0 } // waitForRunning polls the instance wait endpoint until the instance is Running @@ -317,7 +340,8 @@ func (c *hypemanBackend) resolveIP(ctx context.Context) (string, error) { } } -// Stop deletes the hypeman instance. +// Stop deletes the hypeman instance. The shared wildcard ingresses are +// host-level and intentionally left in place for reuse by other instances/runs. func (c *hypemanBackend) Stop(ctx context.Context) error { if c.instanceID == "" { return nil @@ -333,40 +357,40 @@ func (c *hypemanBackend) Stop(ctx context.Context) error { } func (c *hypemanBackend) APIBaseURL() string { - return c.httpScheme() + "://" + c.endpointHostPort("api", hypemanAPIPort) + return c.httpScheme() + "://" + c.endpoint("api", hypemanAPIPort) } func (c *hypemanBackend) CDPURL() string { - return c.wsScheme() + "://" + c.endpointHostPort("cdp", hypemanCDPPort) + "/" + return c.wsScheme() + "://" + c.endpoint("cdp", hypemanCDPPort) + "/" } func (c *hypemanBackend) CDPAddr() string { - return c.endpointHostPort("cdp", hypemanCDPPort) + return c.endpoint("cdp", hypemanCDPPort) } func (c *hypemanBackend) ChromeDriverURL() string { - return c.httpScheme() + "://" + c.endpointHostPort("cd", hypemanChromeDriverPort) + return c.httpScheme() + "://" + c.endpoint("cd", hypemanChromeDriverPort) } -// endpointHostPort returns the host:port a caller should dial for a role: the -// ingress hostname on the proxy's listen port when hostname routing is enabled, -// otherwise the instance's private IP on the fixed guest port. -func (c *hypemanBackend) endpointHostPort(role string, guestPort int) string { - if c.ingressDomain != "" { - return fmt.Sprintf("%s:%d", c.ingressHost(role), c.ingressListenPort()) +// endpoint returns the host:port a caller should dial for a role: the ingress +// hostname on the role's listen port when hostname routing is enabled, otherwise +// the instance's private IP on the fixed guest port. +func (c *hypemanBackend) endpoint(role string, guestPort int64) string { + if c.useIngress { + return fmt.Sprintf("%s:%d", c.ingressHost(), c.listenPortFor(role)) } return fmt.Sprintf("%s:%d", c.ip, guestPort) } func (c *hypemanBackend) httpScheme() string { - if c.ingressDomain != "" && c.ingressTLS { + if c.useIngress && c.ingressTLS { return "https" } return "http" } func (c *hypemanBackend) wsScheme() string { - if c.ingressDomain != "" && c.ingressTLS { + if c.useIngress && c.ingressTLS { return "wss" } return "ws" @@ -469,6 +493,15 @@ func isTruthy(s string) bool { } } +// envBoolDefault parses a boolean env var, returning def when unset/empty. +func envBoolDefault(name string, def bool) bool { + v := strings.TrimSpace(os.Getenv(name)) + if v == "" { + return def + } + return isTruthy(v) +} + func parseCommaList(s string) []string { var out []string for _, part := range strings.Split(s, ",") { diff --git a/server/e2e/backend_test.go b/server/e2e/backend_test.go index 918733f9..d42d3b5e 100644 --- a/server/e2e/backend_test.go +++ b/server/e2e/backend_test.go @@ -77,46 +77,65 @@ func TestHypemanRawIPMode(t *testing.T) { } } -// TestHypemanIngressRouting verifies hostname-routed endpoints and that the -// shared ingress params describe one wildcard rule per role on the proxy's -// plaintext listen port. The instance name contains dashes, which must end up -// inside the {instance} capture, not split the role suffix. +// TestHypemanIngressRouting verifies hostname-routed endpoints (single wildcard +// hostname, roles differentiated by listen port, TLS) and the per-role ingress +// params. The instance name contains dashes, which must stay inside the single +// {instance} hostname label. func TestHypemanIngressRouting(t *testing.T) { - b := &hypemanBackend{name: "ki-e2e-abc123", ingressDomain: "e2e.hypeman.dev"} + const domain = "dev-yul-hypeman-1.kernel.sh" + b := &hypemanBackend{name: "ki-e2e-abc123", useIngress: true, ingressDomain: domain, ingressTLS: true} for _, tc := range []struct{ name, got, want string }{ - {"api", b.APIBaseURL(), "http://ki-e2e-abc123-api.e2e.hypeman.dev:80"}, - {"cdp", b.CDPURL(), "ws://ki-e2e-abc123-cdp.e2e.hypeman.dev:80/"}, - {"cdpAddr", b.CDPAddr(), "ki-e2e-abc123-cdp.e2e.hypeman.dev:80"}, - {"cd", b.ChromeDriverURL(), "http://ki-e2e-abc123-cd.e2e.hypeman.dev:80"}, - {"pattern", b.ingressPatternHost("api"), "{instance}-api.e2e.hypeman.dev"}, + {"api", b.APIBaseURL(), "https://ki-e2e-abc123." + domain + ":444"}, + {"cdp", b.CDPURL(), "wss://ki-e2e-abc123." + domain + ":9222/"}, + {"cdpAddr", b.CDPAddr(), "ki-e2e-abc123." + domain + ":9222"}, + {"cd", b.ChromeDriverURL(), "https://ki-e2e-abc123." + domain + ":9224"}, + {"wildcard", b.wildcardHost(), "{instance}." + domain}, } { if tc.got != tc.want { t.Errorf("%s = %q, want %q", tc.name, tc.got, tc.want) } } - p := b.desiredIngressParams() - if p.Name != ingressName { - t.Errorf("ingress name = %q, want %q", p.Name, ingressName) + // The "api" role reuses the host's :444 -> :10001 browser ingress shape. + p := b.roleIngressParams(ingressRoles[0]) + if p.Name != "ki-e2e-api" { + t.Errorf("ingress name = %q, want ki-e2e-api", p.Name) } - if len(p.Rules) != len(ingressRoles) { - t.Fatalf("got %d rules, want %d", len(p.Rules), len(ingressRoles)) + if len(p.Rules) != 1 { + t.Fatalf("got %d rules, want 1", len(p.Rules)) } - if got := p.Rules[0].Target.Instance; got != "{instance}" { - t.Errorf("rule[0] target instance = %q, want {instance}", got) + r := p.Rules[0] + if r.Match.Hostname != "{instance}."+domain { + t.Errorf("match hostname = %q", r.Match.Hostname) } - if got := p.Rules[0].Target.Port; got != hypemanAPIPort { - t.Errorf("rule[0] target port = %d, want %d", got, hypemanAPIPort) + if got := r.Match.Port.Or(0); got != 444 { + t.Errorf("match port = %d, want 444", got) + } + if r.Target.Instance != "{instance}" || r.Target.Port != hypemanAPIPort { + t.Errorf("target = %q:%d, want {instance}:%d", r.Target.Instance, r.Target.Port, hypemanAPIPort) } } -// TestHypemanIngressTLS verifies https/wss + :443 when TLS is enabled. -func TestHypemanIngressTLS(t *testing.T) { - b := &hypemanBackend{name: "x", ingressDomain: "d", ingressTLS: true} - if got, want := b.APIBaseURL(), "https://x-api.d:443"; got != want { +// TestHypemanIngressPlaintext verifies http/ws when TLS is disabled. +func TestHypemanIngressPlaintext(t *testing.T) { + b := &hypemanBackend{name: "x", useIngress: true, ingressDomain: "d", ingressTLS: false} + if got, want := b.APIBaseURL(), "http://x.d:444"; got != want { t.Errorf("APIBaseURL = %q, want %q", got, want) } - if got, want := b.CDPURL(), "wss://x-cdp.d:443/"; got != want { + if got, want := b.CDPURL(), "ws://x.d:9222/"; got != want { t.Errorf("CDPURL = %q, want %q", got, want) } } + +// TestDeriveIngressDomain strips a leading "hypeman." from the control API host. +func TestDeriveIngressDomain(t *testing.T) { + for _, tc := range []struct{ in, want string }{ + {"https://hypeman.dev-yul-hypeman-1.kernel.sh", "dev-yul-hypeman-1.kernel.sh"}, + {"https://dev-yul-hypeman-1.kernel.sh", "dev-yul-hypeman-1.kernel.sh"}, + {"", ""}, + } { + if got := deriveIngressDomain(tc.in); got != tc.want { + t.Errorf("deriveIngressDomain(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} From 0c1d939d1433d98c65c68a330e97a71e759c2529 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 4 Jun 2026 12:05:29 -0400 Subject: [PATCH 04/11] e2e: de-leak HostAccess (backend-agnostic capability; hypeman rejects explicitly) --- server/e2e/backend.go | 9 +++++++-- server/e2e/backend_hypeman.go | 7 +++++++ server/e2e/backend_test.go | 16 +++++++++++++++- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/server/e2e/backend.go b/server/e2e/backend.go index 856f7a2c..706f2063 100644 --- a/server/e2e/backend.go +++ b/server/e2e/backend.go @@ -15,8 +15,13 @@ import ( // calling Start with the same shape regardless of where the browser instance // actually runs (a local Docker container or a remote Hypeman VM). type ContainerConfig struct { - Env map[string]string - HostAccess bool // Add host.docker.internal mapping (Docker backend only) + Env map[string]string + // HostAccess requests that the browser instance be able to reach a service + // the test stands up on its own host (loopback) — used by tests with a local + // fixture server (capmonster, persisted-login). How it's provided is a + // backend detail (the Docker backend maps host.docker.internal); backends + // that cannot bridge a remote instance to the test host reject it. + HostAccess bool } // Backend is the abstraction every e2e browser-instance provider implements. diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index c6ac2224..d89ef104 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -175,6 +175,13 @@ func deriveIngressDomain(baseURL string) string { // Start creates and boots a hypeman instance for the image, waits for it to // reach the Running state, then prepares the chosen routing mode. func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { + if cfg.HostAccess { + // A remote VM has no equivalent of Docker's host.docker.internal; we + // reject rather than silently ignore so host-fixture tests (capmonster, + // persisted-login) fail loudly here and stay on the Docker backend. + return fmt.Errorf("hypeman backend does not support ContainerConfig.HostAccess (no host loopback bridge for remote instances); run host-access tests on the docker backend") + } + env := make(map[string]string, len(cfg.Env)+1) for k, v := range cfg.Env { env[k] = v diff --git a/server/e2e/backend_test.go b/server/e2e/backend_test.go index d42d3b5e..0d66de90 100644 --- a/server/e2e/backend_test.go +++ b/server/e2e/backend_test.go @@ -1,6 +1,10 @@ package e2e -import "testing" +import ( + "context" + "strings" + "testing" +) // TestBackendKindFromEnv verifies the KI_E2E_BACKEND selection logic. These are // cheap, infra-free unit tests safe to run in CI. @@ -127,6 +131,16 @@ func TestHypemanIngressPlaintext(t *testing.T) { } } +// TestHypemanRejectsHostAccess verifies the hypeman backend refuses HostAccess +// (no host-loopback bridge for remote VMs) before doing any network I/O. +func TestHypemanRejectsHostAccess(t *testing.T) { + b := &hypemanBackend{} + err := b.Start(context.Background(), ContainerConfig{HostAccess: true}) + if err == nil || !strings.Contains(err.Error(), "HostAccess") { + t.Fatalf("expected HostAccess rejection, got %v", err) + } +} + // TestDeriveIngressDomain strips a leading "hypeman." from the control API host. func TestDeriveIngressDomain(t *testing.T) { for _, tc := range []struct{ in, want string }{ From c63bd8423892944e17c4442440c3eb7d83b3dd53 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 4 Jun 2026 15:38:59 -0400 Subject: [PATCH 05/11] e2e: hypeman backend takes an explicit config struct (no env reads in Start) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review: Start() reading env vars is surprising and couples the backend to the process environment. Introduce hypemanConfig holding every option (BaseURL, Token, IngressDomain, IngressTLS, RawIP, Size, DiskIOBps, GPUDevices, GPUProfile). newHypemanBackend(image, cfg) and Start now consume only the struct — env parsing collapses to a single hypemanConfigFromEnv() called by the e2e factory, so other callers can populate options explicitly and never touch the environment. Also defaults DiskIOBps to 62MB/s (KI_E2E_HYPEMAN_DISK_IO_BPS overrides): ad-hoc hypeman instances otherwise get ~15MB/s, which starves the in-guest playwright daemon's cold first-read (~43MB of node_modules) past its 5s start budget. With 62MB/s the daemon starts in time — validated: persist_login TestCookiePersistence Headless now PASSES on hypeman (was failing on "playwright daemon failed to start within 5s"). go build/vet/unit pass (incl. new TestHypemanConfigFromEnv); live hypeman TestDisplayResolutionChange passes via the new construction path. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/e2e/backend.go | 2 +- server/e2e/backend_hypeman.go | 122 ++++++++++++++++++++++++---------- server/e2e/backend_test.go | 49 +++++++++++--- 3 files changed, 127 insertions(+), 46 deletions(-) diff --git a/server/e2e/backend.go b/server/e2e/backend.go index 706f2063..2c554912 100644 --- a/server/e2e/backend.go +++ b/server/e2e/backend.go @@ -107,7 +107,7 @@ func newBackend(tb testing.TB, image string) Backend { case BackendDocker: return newDockerBackend(image) case BackendHypeman: - b, err := newHypemanBackend(image) + b, err := newHypemanBackend(image, hypemanConfigFromEnv()) if err != nil { tb.Fatalf("e2e: failed to configure hypeman backend: %v", err) } diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index d89ef104..884a345c 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -44,6 +44,11 @@ const ( // the instance; the host assigns the backing mdev. Required to boot the vGPU // browser image (chromium-headful-vgpu). envHypemanGPUProfile = "KI_E2E_HYPEMAN_GPU_PROFILE" + // envHypemanDiskIOBps overrides the instance disk I/O rate limit. Defaults to + // defaultHypemanDiskIOBps; the hypeman default for ad-hoc instances is much + // lower (~15MB/s), which starves cold first-reads at boot (e.g. the in-guest + // playwright daemon's ~43MB of node_modules) and can blow its 5s start budget. + envHypemanDiskIOBps = "KI_E2E_HYPEMAN_DISK_IO_BPS" // envHypemanInstanceSize optionally overrides the VM memory size. envHypemanInstanceSize = "KI_E2E_HYPEMAN_SIZE" // envHypemanIngressDomain overrides the wildcard ingress base domain. If @@ -61,6 +66,11 @@ const ( envHypemanRawIP = "KI_E2E_HYPEMAN_RAW_IP" ) +// defaultHypemanDiskIOBps matches what production browser instances run at, so +// e2e instances aren't disk-throttled into spurious timeouts. Format is the +// hypeman human-readable rate (e.g. "62MB/s"); "MiB" is not accepted. +const defaultHypemanDiskIOBps = "62MB/s" + // ingressRole maps a logical endpoint to the ingress listen port and the guest // target port. Hostname routing uses a single wildcard hostname // "{instance}." and differentiates roles by listen port, matching the @@ -105,11 +115,13 @@ const ( type hypemanBackend struct { client hypeman.Client image string + cfg hypemanConfig instanceID string name string ip string + // Derived from cfg at construction (see newHypemanBackend). useIngress bool ingressDomain string ingressTLS bool @@ -117,47 +129,72 @@ type hypemanBackend struct { exitCh chan error } -// newHypemanBackend validates configuration and constructs a hypeman-backed -// Backend. The hypeman SDK reads HYPEMAN_BASE_URL / HYPEMAN_API_KEY from the -// environment; this constructor additionally wires the kernel-images-specific -// override vars (KI_E2E_HYPEMAN_BASE_URL, HYPEMAN_AUTH_TOKEN). -func newHypemanBackend(image string) (Backend, error) { - var opts []option.RequestOption - if base := strings.TrimSpace(os.Getenv(envHypemanBaseURL)); base != "" { - opts = append(opts, option.WithBaseURL(base)) - } - if token := strings.TrimSpace(os.Getenv(envHypemanToken)); token != "" { - opts = append(opts, option.WithAPIKey(token)) - } - - baseURL := strings.TrimSpace(os.Getenv(envHypemanBaseURL)) - if baseURL == "" { - baseURL = strings.TrimSpace(os.Getenv("HYPEMAN_BASE_URL")) - } - - // Fail fast with an actionable message if neither this var nor the SDK's - // native vars provide connection details. - hasBase := baseURL != "" - hasToken := strings.TrimSpace(os.Getenv(envHypemanToken)) != "" || strings.TrimSpace(os.Getenv("HYPEMAN_API_KEY")) != "" - if !hasBase || !hasToken { +// hypemanConfig holds every option for the hypeman backend. Callers populate it +// explicitly; the backend itself reads no environment variables. The e2e factory +// builds it once via hypemanConfigFromEnv, but other callers can construct it +// directly (e.g. a future programmatic harness) and decide how to source values. +type hypemanConfig struct { + // BaseURL and Token authenticate against the hypeman control API. Both are + // required (validated by newHypemanBackend). + BaseURL string + Token string + // IngressDomain is the wildcard ingress base domain. If empty (and not + // RawIP), it is derived from BaseURL by stripping a leading "hypeman." label. + IngressDomain string + // IngressTLS serves ingress endpoints over TLS (https/wss on :443/role port). + IngressTLS bool + // RawIP reaches the instance on its private network IP instead of via ingress + // (needs L3 reachability to the instance subnet). + RawIP bool + // Size overrides the VM memory size; DiskIOBps overrides the disk I/O rate + // limit (hypeman "62MB/s"-style format). Empty DiskIOBps => defaultHypemanDiskIOBps. + Size string + DiskIOBps string + // GPUDevices attaches PCI-passthrough devices; GPUProfile requests a vGPU + // profile (e.g. "NVIDIA L40S-2Q"), required to boot the vGPU browser image. + GPUDevices []string + GPUProfile string +} + +// hypemanConfigFromEnv resolves a hypemanConfig from the KI_E2E_HYPEMAN_* / +// HYPEMAN_* environment variables. This is the single place the hypeman backend's +// configuration is read from the environment; the backend and Start do not. +func hypemanConfigFromEnv() hypemanConfig { + return hypemanConfig{ + BaseURL: firstNonEmpty(os.Getenv(envHypemanBaseURL), os.Getenv("HYPEMAN_BASE_URL")), + Token: firstNonEmpty(os.Getenv(envHypemanToken), os.Getenv("HYPEMAN_API_KEY")), + IngressDomain: strings.TrimSpace(os.Getenv(envHypemanIngressDomain)), + IngressTLS: envBoolDefault(envHypemanIngressTLS, true), + RawIP: isTruthy(os.Getenv(envHypemanRawIP)), + Size: strings.TrimSpace(os.Getenv(envHypemanInstanceSize)), + DiskIOBps: strings.TrimSpace(os.Getenv(envHypemanDiskIOBps)), + GPUDevices: parseCommaList(os.Getenv(envHypemanGPUDevices)), + GPUProfile: strings.TrimSpace(os.Getenv(envHypemanGPUProfile)), + } +} + +// newHypemanBackend validates the config and constructs a hypeman-backed Backend. +// It reads no environment — all options come from cfg. +func newHypemanBackend(image string, cfg hypemanConfig) (Backend, error) { + if cfg.BaseURL == "" || cfg.Token == "" { return nil, fmt.Errorf( "hypeman backend requires a base URL (%s or HYPEMAN_BASE_URL) and a token (%s or HYPEMAN_API_KEY)", envHypemanBaseURL, envHypemanToken, ) } - domain := strings.TrimSpace(os.Getenv(envHypemanIngressDomain)) + domain := cfg.IngressDomain if domain == "" { - domain = deriveIngressDomain(baseURL) + domain = deriveIngressDomain(cfg.BaseURL) } - rawIP := isTruthy(os.Getenv(envHypemanRawIP)) return &hypemanBackend{ - client: hypeman.NewClient(opts...), + client: hypeman.NewClient(option.WithBaseURL(cfg.BaseURL), option.WithAPIKey(cfg.Token)), image: image, - useIngress: !rawIP && domain != "", + cfg: cfg, + useIngress: !cfg.RawIP && domain != "", ingressDomain: domain, - ingressTLS: envBoolDefault(envHypemanIngressTLS, true), + ingressTLS: cfg.IngressTLS, exitCh: make(chan error, 1), }, nil } @@ -199,15 +236,20 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { Name: c.name, Env: env, } - if size := strings.TrimSpace(os.Getenv(envHypemanInstanceSize)); size != "" { - params.Size = hypeman.String(size) + if c.cfg.Size != "" { + params.Size = hypeman.String(c.cfg.Size) } - if devs := parseCommaList(os.Getenv(envHypemanGPUDevices)); len(devs) > 0 { - params.Devices = devs + if len(c.cfg.GPUDevices) > 0 { + params.Devices = c.cfg.GPUDevices } - if profile := strings.TrimSpace(os.Getenv(envHypemanGPUProfile)); profile != "" { - params.GPU = hypeman.InstanceNewParamsGPU{Profile: hypeman.String(profile)} + if c.cfg.GPUProfile != "" { + params.GPU = hypeman.InstanceNewParamsGPU{Profile: hypeman.String(c.cfg.GPUProfile)} } + diskIO := c.cfg.DiskIOBps + if diskIO == "" { + diskIO = defaultHypemanDiskIOBps + } + params.DiskIoBps = hypeman.String(diskIO) inst, err := c.client.Instances.New(ctx, params) if err != nil { @@ -509,6 +551,16 @@ func envBoolDefault(name string, def bool) bool { return isTruthy(v) } +// firstNonEmpty returns the first argument that is non-empty after trimming. +func firstNonEmpty(vals ...string) string { + for _, v := range vals { + if t := strings.TrimSpace(v); t != "" { + return t + } + } + return "" +} + func parseCommaList(s string) []string { var out []string for _, part := range strings.Split(s, ",") { diff --git a/server/e2e/backend_test.go b/server/e2e/backend_test.go index 0d66de90..060bbec7 100644 --- a/server/e2e/backend_test.go +++ b/server/e2e/backend_test.go @@ -42,21 +42,21 @@ func TestBackendKindFromEnv(t *testing.T) { // TestNewHypemanBackendRequiresConfig ensures the hypeman backend fails fast and // with an actionable message when connection details are missing. func TestNewHypemanBackendRequiresConfig(t *testing.T) { - // Clear every env var the backend (and the SDK) consult. - for _, k := range []string{envHypemanBaseURL, "HYPEMAN_BASE_URL", envHypemanToken, "HYPEMAN_API_KEY"} { - t.Setenv(k, "") + if _, err := newHypemanBackend("some/image:tag", hypemanConfig{}); err == nil { + t.Fatal("expected error when base URL/token are empty, got nil") } - if _, err := newHypemanBackend("some/image:tag"); err == nil { - t.Fatal("expected error when hypeman base URL/token are unset, got nil") + if _, err := newHypemanBackend("some/image:tag", hypemanConfig{BaseURL: "http://x"}); err == nil { + t.Fatal("expected error when token is empty, got nil") } } -// TestNewHypemanBackendWithConfig ensures a valid configuration constructs a -// backend without error. +// TestNewHypemanBackendWithConfig ensures a valid config constructs a backend +// without error — and without reading the environment. func TestNewHypemanBackendWithConfig(t *testing.T) { - t.Setenv(envHypemanBaseURL, "http://hypeman.example.invalid:8080") - t.Setenv(envHypemanToken, "test-token-not-a-real-secret") - b, err := newHypemanBackend("some/image:tag") + b, err := newHypemanBackend("some/image:tag", hypemanConfig{ + BaseURL: "http://hypeman.example.invalid:8080", + Token: "test-token-not-a-real-secret", + }) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -65,6 +65,35 @@ func TestNewHypemanBackendWithConfig(t *testing.T) { } } +// TestHypemanConfigFromEnv verifies env resolution happens in one place: the +// SDK-native fallbacks, the TLS default, and the comma-split GPU devices. +func TestHypemanConfigFromEnv(t *testing.T) { + t.Setenv(envHypemanBaseURL, "") + t.Setenv("HYPEMAN_BASE_URL", "https://hypeman.dev-x.kernel.sh") + t.Setenv(envHypemanToken, "") + t.Setenv("HYPEMAN_API_KEY", "tok") + t.Setenv(envHypemanIngressTLS, "") + t.Setenv(envHypemanGPUDevices, "a, b ,c") + t.Setenv(envHypemanGPUProfile, "NVIDIA L40S-2Q") + + cfg := hypemanConfigFromEnv() + if cfg.BaseURL != "https://hypeman.dev-x.kernel.sh" { + t.Errorf("BaseURL = %q (expected HYPEMAN_BASE_URL fallback)", cfg.BaseURL) + } + if cfg.Token != "tok" { + t.Errorf("Token = %q (expected HYPEMAN_API_KEY fallback)", cfg.Token) + } + if !cfg.IngressTLS { + t.Errorf("IngressTLS = false, want default true") + } + if len(cfg.GPUDevices) != 3 || cfg.GPUDevices[0] != "a" || cfg.GPUDevices[2] != "c" { + t.Errorf("GPUDevices = %v, want [a b c]", cfg.GPUDevices) + } + if cfg.GPUProfile != "NVIDIA L40S-2Q" { + t.Errorf("GPUProfile = %q", cfg.GPUProfile) + } +} + // TestHypemanRawIPMode verifies endpoint derivation in the default raw-IP mode // (no ingress domain): the private IP on the fixed guest ports. func TestHypemanRawIPMode(t *testing.T) { From 0a569114a764589696983bdc5124530968d5ef2b Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 4 Jun 2026 16:41:19 -0400 Subject: [PATCH 06/11] ci: add test-hypeman job running the e2e suite against the Hypeman backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the `test` job but with KI_E2E_BACKEND=hypeman, pointing E2E_CHROMIUM_*_IMAGE at the public onkernel/chromium-{headful,headless}: tags that build-headful/build-headless just pushed. Hypeman pulls those images itself on instance create, so the runner needs no docker login. Uses org var/secret HYPEMAN_API_URL / HYPEMAN_API_KEY. Note: we deliberately do NOT build the images inside Hypeman — its builder VM's writable layer is RAM-backed and hard-capped at memory_mb=16384, which is too small for the chromium image build (fails with "no space left on device"). The registry-pull approach sidesteps that entirely. See PR description. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/server-test.yaml | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/.github/workflows/server-test.yaml b/.github/workflows/server-test.yaml index f65ae214..2627425a 100644 --- a/.github/workflows/server-test.yaml +++ b/.github/workflows/server-test.yaml @@ -62,3 +62,56 @@ jobs: env: E2E_CHROMIUM_HEADFUL_IMAGE: onkernel/chromium-headful:${{ steps.vars.outputs.short_sha }} E2E_CHROMIUM_HEADLESS_IMAGE: onkernel/chromium-headless:${{ steps.vars.outputs.short_sha }} + + # Runs the same e2e suite against the Hypeman backend instead of local Docker. + # We do NOT build the images in Hypeman (its builder VM is RAM-disk-capped at + # memory_mb=16384, which is too small for the chromium image build — see PR + # description). Instead we reuse the public images that build-headful/ + # build-headless just pushed to Docker Hub: Hypeman pulls them itself on + # instance create (any public/private registry works via the host's docker + # creds), so the runner needs no docker login. KI_E2E_BACKEND=hypeman selects + # the remote-VM backend; it reaches instances through the host's wildcard + # ingress derived from HYPEMAN_BASE_URL. + test-hypeman: + runs-on: ubuntu-latest + needs: [build-headful, build-headless] + permissions: + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Chrome + uses: browser-actions/setup-chrome@v2 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + with: + version: 10 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: "server/go.mod" + cache: true + + - name: Compute short SHA for images + id: vars + shell: bash + run: echo "short_sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + + - name: Run server Makefile tests against Hypeman + run: make test + working-directory: server + env: + KI_E2E_BACKEND: hypeman + HYPEMAN_BASE_URL: ${{ vars.HYPEMAN_API_URL }} + HYPEMAN_API_KEY: ${{ secrets.HYPEMAN_API_KEY }} + E2E_CHROMIUM_HEADFUL_IMAGE: onkernel/chromium-headful:${{ steps.vars.outputs.short_sha }} + E2E_CHROMIUM_HEADLESS_IMAGE: onkernel/chromium-headless:${{ steps.vars.outputs.short_sha }} From 32c997ed029048def40e7d7c802d6561dbb0b7f4 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 5 Jun 2026 13:17:13 -0400 Subject: [PATCH 07/11] ci: fix hypeman var reference (vars.HYPEMAN_BASE_URL, not HYPEMAN_API_URL) --- .github/workflows/server-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/server-test.yaml b/.github/workflows/server-test.yaml index 2627425a..fb5f1eac 100644 --- a/.github/workflows/server-test.yaml +++ b/.github/workflows/server-test.yaml @@ -111,7 +111,7 @@ jobs: working-directory: server env: KI_E2E_BACKEND: hypeman - HYPEMAN_BASE_URL: ${{ vars.HYPEMAN_API_URL }} + HYPEMAN_BASE_URL: ${{ vars.HYPEMAN_BASE_URL }} HYPEMAN_API_KEY: ${{ secrets.HYPEMAN_API_KEY }} E2E_CHROMIUM_HEADFUL_IMAGE: onkernel/chromium-headful:${{ steps.vars.outputs.short_sha }} E2E_CHROMIUM_HEADLESS_IMAGE: onkernel/chromium-headless:${{ steps.vars.outputs.short_sha }} From 29b7a111619e598e382859e43bf2dd178cb6a0af Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 5 Jun 2026 13:23:45 -0400 Subject: [PATCH 08/11] ci: hypeman job runs e2e only (make test-e2e); split Makefile test targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test-hypeman job ran `make test`, which runs unit tests first — a flaky chromium-dependent unit test (lib/devtoolsproxy, unrelated to the backend) failed and blocked the e2e suite from running at all. Split `test` into `test-unit` + `test-e2e` and point the hypeman job at `test-e2e` so it exercises only the e2e suite on the Hypeman backend (unit tests already run in the `test` job). The var/secret fix is confirmed working — the prior config error is gone. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/server-test.yaml | 7 +++++-- server/Makefile | 14 ++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/server-test.yaml b/.github/workflows/server-test.yaml index fb5f1eac..91cd3b8a 100644 --- a/.github/workflows/server-test.yaml +++ b/.github/workflows/server-test.yaml @@ -106,8 +106,11 @@ jobs: shell: bash run: echo "short_sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" - - name: Run server Makefile tests against Hypeman - run: make test + # Only the e2e suite — unit tests run in the `test` job above; re-running + # them here just adds an unrelated failure surface (and they don't touch + # the backend abstraction). + - name: Run server e2e tests against Hypeman + run: make test-e2e working-directory: server env: KI_E2E_BACKEND: hypeman diff --git a/server/Makefile b/server/Makefile index f51ef0b9..0ce67332 100644 --- a/server/Makefile +++ b/server/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -.PHONY: oapi-generate build dev test clean +.PHONY: oapi-generate build dev test test-unit test-e2e clean BIN_DIR ?= $(CURDIR)/bin RECORDING_DIR ?= $(CURDIR)/recordings @@ -30,12 +30,18 @@ build: | $(BIN_DIR) dev: build $(RECORDING_DIR) OUTPUT_DIR=$(RECORDING_DIR) DISPLAY_NUM=$(DISPLAY_NUM) ./bin/api -# we run the e2e tests separately so that we can see the logs from the e2e tests as they run instead of waiting for all tests to complete -test: +# `test` runs unit + e2e. The two are split so callers (e.g. the Hypeman CI job) +# can run just the e2e suite, and so e2e logs stream as they run instead of +# waiting for all unit tests to complete. +test: test-unit test-e2e + +test-unit: go vet ./... go test -v -race $$(go list ./... | grep -v /e2e$$) + +test-e2e: @echo "" - @echo "=== Running e2e tests (testcontainers — this may take a few minutes) ===" + @echo "=== Running e2e tests (this may take a few minutes) ===" @echo "" go test -v -race -timeout 120m ./e2e/ From 14d48b4ab8471e93a6ee9beafccc810fe2ae012e Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 5 Jun 2026 13:31:46 -0400 Subject: [PATCH 09/11] e2e: clean up hypeman instance on Start failure; tag + nightly reaper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugbot (correctly): after Instances.New, a failure in waitForRunning/ ensureIngress/resolveIP returned from Start without deleting the instance, and tests only register Stop after a successful Start — so failed runs leaked a remote VM. Start now tears the instance down (fresh ctx, so a cancelled/expired parent ctx still deletes) if bring-up fails. Defense in depth for the cases Start can't cover (panic/timeout/crashed runner after a successful Start): tag instances managed-by=ki-e2e on create, and add a nightly workflow (hypeman-reap-e2e.yml) that deletes "ki-e2e-" instances older than 3h (> the 2h e2e timeout, so it can't touch an in-progress run). One reaper covers instances from both this repo and the private fork (shared dev server). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/hypeman-reap-e2e.yml | 90 ++++++++++++++++++++++++++ server/e2e/backend_hypeman.go | 19 ++++++ 2 files changed, 109 insertions(+) create mode 100644 .github/workflows/hypeman-reap-e2e.yml diff --git a/.github/workflows/hypeman-reap-e2e.yml b/.github/workflows/hypeman-reap-e2e.yml new file mode 100644 index 00000000..66482853 --- /dev/null +++ b/.github/workflows/hypeman-reap-e2e.yml @@ -0,0 +1,90 @@ +name: Reap leaked Hypeman e2e instances + +# The Hypeman e2e backend (server/e2e) names every instance "ki-e2e-" and +# tags it managed-by=ki-e2e. Start() deletes its instance on bring-up failure +# and tests delete on Stop, but a panic/timeout/crashed runner can still strand a +# remote VM. This nightly job reaps any "ki-e2e-" instance older than the age +# threshold (default 3h — comfortably longer than the 2h e2e timeout, so it can +# never delete an in-progress run's instance). It only touches instances; the +# shared ki-e2e ingresses are host-level and intentionally persistent. + +on: + schedule: + - cron: "0 7 * * *" # 07:00 UTC nightly + workflow_dispatch: + inputs: + max_age_hours: + description: "Delete ki-e2e instances older than this many hours" + default: "3" + dry_run: + description: "List what would be deleted without deleting" + default: "false" + +permissions: + contents: read + +jobs: + reap: + runs-on: ubuntu-latest + steps: + - name: Delete stale ki-e2e Hypeman instances + env: + HYPEMAN_BASE_URL: ${{ vars.HYPEMAN_BASE_URL }} + HYPEMAN_API_KEY: ${{ secrets.HYPEMAN_API_KEY }} + MAX_AGE_HOURS: ${{ github.event.inputs.max_age_hours || '3' }} + DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }} + run: | + set -euo pipefail + if [ -z "${HYPEMAN_BASE_URL:-}" ] || [ -z "${HYPEMAN_API_KEY:-}" ]; then + echo "::error::HYPEMAN_BASE_URL (var) and HYPEMAN_API_KEY (secret) must be set" + exit 1 + fi + python3 - <<'PY' + import os, json, datetime, urllib.request, urllib.error + + base = os.environ["HYPEMAN_BASE_URL"].rstrip("/") + key = os.environ["HYPEMAN_API_KEY"] + max_age = float(os.environ.get("MAX_AGE_HOURS", "3")) + dry_run = os.environ.get("DRY_RUN", "false").lower() in ("1", "true", "yes") + + def call(method, path): + req = urllib.request.Request( + base + path, method=method, + headers={"Authorization": "Bearer " + key, "Accept": "application/json"}, + ) + return urllib.request.urlopen(req, timeout=30) + + instances = json.load(call("GET", "/instances")) + now = datetime.datetime.now(datetime.timezone.utc) + cutoff = now - datetime.timedelta(hours=max_age) + + deleted = kept = failed = 0 + for inst in instances: + name = inst.get("name", "") + if not name.startswith("ki-e2e-"): + continue + created = inst.get("created_at", "") + try: + t = datetime.datetime.fromisoformat(created.replace("Z", "+00:00")) + except Exception: + t = None # unknown age -> treat as stale (reap) + if t is not None and t > cutoff: + kept += 1 + print(f"keep (fresh, < {max_age}h): {name} created={created}") + continue + if dry_run: + print(f"[dry-run] would delete {name} ({inst['id']}) created={created}") + deleted += 1 + continue + try: + call("DELETE", "/instances/" + inst["id"]) + deleted += 1 + print(f"deleted {name} ({inst['id']}) created={created}") + except urllib.error.HTTPError as e: + failed += 1 + print(f"::warning::failed to delete {name} ({inst['id']}): {e.code} {e.reason}") + + print(f"done: deleted={deleted} kept_fresh={kept} failed={failed} (dry_run={dry_run})") + if failed: + raise SystemExit(1) + PY diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index 884a345c..6191c59f 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -235,6 +235,9 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { Image: c.image, Name: c.name, Env: env, + // Tag so leaked instances (e.g. a test that panics after Start) are + // reapable by a scheduled job, in addition to the ki-e2e- name prefix. + Tags: map[string]string{ingressTagKey: ingressTagVal}, } if c.cfg.Size != "" { params.Size = hypeman.String(c.cfg.Size) @@ -257,6 +260,22 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { } c.instanceID = inst.ID + // The instance now exists. Callers (tests) only register Stop after Start + // returns nil, so if bring-up fails we must delete it here or leak a remote + // VM. Use a fresh context so cleanup still runs even if ctx was cancelled or + // hit its deadline (the common bring-up failure). + if err := c.bringUp(ctx); err != nil { + cleanupCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _ = c.Stop(cleanupCtx) + return err + } + return nil +} + +// bringUp waits for the just-created instance to reach Running and prepares the +// selected routing mode (ingress or raw IP). It assumes c.instanceID is set. +func (c *hypemanBackend) bringUp(ctx context.Context) error { // Wait for the guest program to start. The SDK caps the server-side wait at // a few minutes; loop until our context deadline if needed. if err := c.waitForRunning(ctx); err != nil { From 619b071d980bcd369afa909f182ef46927782aab Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 5 Jun 2026 15:12:16 -0400 Subject: [PATCH 10/11] e2e: retry hypeman create while image is still being pulled (image_not_ready) A freshly-pushed image tag isn't on the hypeman host yet on first use; the create call returns a retryable 400 image_not_ready while the pull runs in the background. Poll Instances.New until the pull completes or ctx is done, instead of failing the first test that uses a new tag. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/e2e/backend_hypeman.go | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index 6191c59f..691d7518 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -254,9 +254,9 @@ func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { } params.DiskIoBps = hypeman.String(diskIO) - inst, err := c.client.Instances.New(ctx, params) + inst, err := c.createWithImagePull(ctx, params) if err != nil { - return fmt.Errorf("hypeman: create instance: %w", err) + return err } c.instanceID = inst.ID @@ -297,6 +297,35 @@ func (c *hypemanBackend) bringUp(ctx context.Context) error { return nil } +// createWithImagePull creates the instance, retrying while Hypeman reports the +// image is still being pulled. A freshly-pushed tag isn't on the host yet, so +// the first create triggers a background pull and returns a retryable 400 +// image_not_ready; we poll until the pull completes or ctx is done. +func (c *hypemanBackend) createWithImagePull(ctx context.Context, params hypeman.InstanceNewParams) (*hypeman.Instance, error) { + ticker := time.NewTicker(3 * time.Second) + defer ticker.Stop() + for { + inst, err := c.client.Instances.New(ctx, params) + if err == nil { + return inst, nil + } + if !isImageNotReady(err) { + return nil, fmt.Errorf("hypeman: create instance: %w", err) + } + select { + case <-ctx.Done(): + return nil, fmt.Errorf("hypeman: create instance: image %q still pulling: %w", c.image, ctx.Err()) + case <-ticker.C: + } + } +} + +// isImageNotReady reports whether err is Hypeman's retryable "image is being +// pulled" response (HTTP 400, code image_not_ready). +func isImageNotReady(err error) bool { + return err != nil && strings.Contains(err.Error(), "image_not_ready") +} + // ensureIngress finds or creates a wildcard ingress for each role. Ingresses are // host-level constructs keyed by rule shape (wildcard hostname + listen port -> // target port), so we reuse any pre-existing rule (e.g. the API's browser From 6ef671477e51ae734b9d4d411d611dbe308ea1d3 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 5 Jun 2026 15:15:00 -0400 Subject: [PATCH 11/11] ci: drop public hypeman e2e job + reaper (re-homed to kernel-images-private) The hypeman e2e backend lives upstream here, but actually *running* it against the staging hypeman server is moving to kernel-images-private on a Tailscale- joined runner: CDP/ChromeDriver are being made tailnet-only, kernel-images is public (its CI logs would leak live instance CDP URLs), and self-hosted/tailnet runners shouldn't be exposed to a public repo. The public CI keeps the docker- backend e2e only. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/hypeman-reap-e2e.yml | 90 -------------------------- .github/workflows/server-test.yaml | 56 ---------------- 2 files changed, 146 deletions(-) delete mode 100644 .github/workflows/hypeman-reap-e2e.yml diff --git a/.github/workflows/hypeman-reap-e2e.yml b/.github/workflows/hypeman-reap-e2e.yml deleted file mode 100644 index 66482853..00000000 --- a/.github/workflows/hypeman-reap-e2e.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: Reap leaked Hypeman e2e instances - -# The Hypeman e2e backend (server/e2e) names every instance "ki-e2e-" and -# tags it managed-by=ki-e2e. Start() deletes its instance on bring-up failure -# and tests delete on Stop, but a panic/timeout/crashed runner can still strand a -# remote VM. This nightly job reaps any "ki-e2e-" instance older than the age -# threshold (default 3h — comfortably longer than the 2h e2e timeout, so it can -# never delete an in-progress run's instance). It only touches instances; the -# shared ki-e2e ingresses are host-level and intentionally persistent. - -on: - schedule: - - cron: "0 7 * * *" # 07:00 UTC nightly - workflow_dispatch: - inputs: - max_age_hours: - description: "Delete ki-e2e instances older than this many hours" - default: "3" - dry_run: - description: "List what would be deleted without deleting" - default: "false" - -permissions: - contents: read - -jobs: - reap: - runs-on: ubuntu-latest - steps: - - name: Delete stale ki-e2e Hypeman instances - env: - HYPEMAN_BASE_URL: ${{ vars.HYPEMAN_BASE_URL }} - HYPEMAN_API_KEY: ${{ secrets.HYPEMAN_API_KEY }} - MAX_AGE_HOURS: ${{ github.event.inputs.max_age_hours || '3' }} - DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }} - run: | - set -euo pipefail - if [ -z "${HYPEMAN_BASE_URL:-}" ] || [ -z "${HYPEMAN_API_KEY:-}" ]; then - echo "::error::HYPEMAN_BASE_URL (var) and HYPEMAN_API_KEY (secret) must be set" - exit 1 - fi - python3 - <<'PY' - import os, json, datetime, urllib.request, urllib.error - - base = os.environ["HYPEMAN_BASE_URL"].rstrip("/") - key = os.environ["HYPEMAN_API_KEY"] - max_age = float(os.environ.get("MAX_AGE_HOURS", "3")) - dry_run = os.environ.get("DRY_RUN", "false").lower() in ("1", "true", "yes") - - def call(method, path): - req = urllib.request.Request( - base + path, method=method, - headers={"Authorization": "Bearer " + key, "Accept": "application/json"}, - ) - return urllib.request.urlopen(req, timeout=30) - - instances = json.load(call("GET", "/instances")) - now = datetime.datetime.now(datetime.timezone.utc) - cutoff = now - datetime.timedelta(hours=max_age) - - deleted = kept = failed = 0 - for inst in instances: - name = inst.get("name", "") - if not name.startswith("ki-e2e-"): - continue - created = inst.get("created_at", "") - try: - t = datetime.datetime.fromisoformat(created.replace("Z", "+00:00")) - except Exception: - t = None # unknown age -> treat as stale (reap) - if t is not None and t > cutoff: - kept += 1 - print(f"keep (fresh, < {max_age}h): {name} created={created}") - continue - if dry_run: - print(f"[dry-run] would delete {name} ({inst['id']}) created={created}") - deleted += 1 - continue - try: - call("DELETE", "/instances/" + inst["id"]) - deleted += 1 - print(f"deleted {name} ({inst['id']}) created={created}") - except urllib.error.HTTPError as e: - failed += 1 - print(f"::warning::failed to delete {name} ({inst['id']}): {e.code} {e.reason}") - - print(f"done: deleted={deleted} kept_fresh={kept} failed={failed} (dry_run={dry_run})") - if failed: - raise SystemExit(1) - PY diff --git a/.github/workflows/server-test.yaml b/.github/workflows/server-test.yaml index 91cd3b8a..f65ae214 100644 --- a/.github/workflows/server-test.yaml +++ b/.github/workflows/server-test.yaml @@ -62,59 +62,3 @@ jobs: env: E2E_CHROMIUM_HEADFUL_IMAGE: onkernel/chromium-headful:${{ steps.vars.outputs.short_sha }} E2E_CHROMIUM_HEADLESS_IMAGE: onkernel/chromium-headless:${{ steps.vars.outputs.short_sha }} - - # Runs the same e2e suite against the Hypeman backend instead of local Docker. - # We do NOT build the images in Hypeman (its builder VM is RAM-disk-capped at - # memory_mb=16384, which is too small for the chromium image build — see PR - # description). Instead we reuse the public images that build-headful/ - # build-headless just pushed to Docker Hub: Hypeman pulls them itself on - # instance create (any public/private registry works via the host's docker - # creds), so the runner needs no docker login. KI_E2E_BACKEND=hypeman selects - # the remote-VM backend; it reaches instances through the host's wildcard - # ingress derived from HYPEMAN_BASE_URL. - test-hypeman: - runs-on: ubuntu-latest - needs: [build-headful, build-headless] - permissions: - contents: read - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Chrome - uses: browser-actions/setup-chrome@v2 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: 22 - - - name: Set up pnpm - uses: pnpm/action-setup@v4 - with: - version: 10 - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: "server/go.mod" - cache: true - - - name: Compute short SHA for images - id: vars - shell: bash - run: echo "short_sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" - - # Only the e2e suite — unit tests run in the `test` job above; re-running - # them here just adds an unrelated failure surface (and they don't touch - # the backend abstraction). - - name: Run server e2e tests against Hypeman - run: make test-e2e - working-directory: server - env: - KI_E2E_BACKEND: hypeman - HYPEMAN_BASE_URL: ${{ vars.HYPEMAN_BASE_URL }} - HYPEMAN_API_KEY: ${{ secrets.HYPEMAN_API_KEY }} - E2E_CHROMIUM_HEADFUL_IMAGE: onkernel/chromium-headful:${{ steps.vars.outputs.short_sha }} - E2E_CHROMIUM_HEADLESS_IMAGE: onkernel/chromium-headless:${{ steps.vars.outputs.short_sha }}