From 989fc4fee6df5d5014b696d21e08b90542bfc9ef Mon Sep 17 00:00:00 2001 From: Moritz Clasmeier Date: Tue, 7 Apr 2026 17:54:13 +0200 Subject: [PATCH 1/5] The gke auth plugin needs to be installed in the runtime stage, otherwise we will get an architecture mismatch --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d75852e..91fa54d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -81,8 +81,7 @@ RUN ARCH=${TARGETARCH:-amd64} && \ echo "Downloading gcloud SDK from: ${url}" && \ curl -o "/tmp/${filename}" -fsSL "${url}" && \ echo "${GCLOUD_SHA256} /tmp/${filename}" | sha256sum -c - && \ - tar -xz -C /tmp -f "/tmp/${filename}" && \ - /tmp/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin --quiet + tar -xz -C /tmp -f "/tmp/${filename}" # Stage 2: Runtime image based on Red Hat UBI Minimal FROM registry.access.redhat.com/ubi9/ubi-minimal:latest@sha256:83006d535923fcf1345067873524a3980316f51794f01d8655be55d6e9387183 @@ -164,6 +163,7 @@ RUN ARCH=${TARGETARCH:-amd64} && \ # Copy gcloud SDK from builder stage (extracted there to avoid UBI filesystem restrictions) COPY --from=builder /tmp/google-cloud-sdk /opt/google-cloud-sdk RUN ln -s /opt/google-cloud-sdk/bin/gcloud /usr/local/bin/gcloud && \ + /opt/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin --quiet && \ ln -s /opt/google-cloud-sdk/bin/gke-gcloud-auth-plugin /usr/local/bin/gke-gcloud-auth-plugin # 2. AWS (EKS) - aws-iam-authenticator From 2ca2b81369cb821672529e3961a15a104916be71 Mon Sep 17 00:00:00 2001 From: Moritz Clasmeier Date: Tue, 7 Apr 2026 19:21:02 +0200 Subject: [PATCH 2/5] Add retry mechanism for initialization --- internal/env/env.go | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/internal/env/env.go b/internal/env/env.go index efdff66..963f090 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -9,6 +9,7 @@ import ( "os/exec" "path/filepath" "strings" + "time" "github.com/stackrox/roxie/internal/containerutil" "github.com/stackrox/roxie/internal/logger" @@ -70,9 +71,6 @@ func isRunningInteractively() bool { // This avoids contacting the cluster on package import func ensureInitialized(log *logger.Logger) error { if !initialized { - if RunningInContainer { - log.Dim("Running containerized.") - } kubeConfig, err := fetchKubeConfig(log) if err != nil { return err @@ -133,14 +131,36 @@ type KubeCluster struct { } // Initialize performs environment initialization and sets the global variables. +// Retries on failure to handle race conditions during container startup, which I have +// observed in relation with podman :U mounts: the container was starting before the gcloud config +// was writable by the container user, hence GKE authentication failed immediately. func Initialize(log *logger.Logger) error { if log == nil { log = logger.New() } - if err := ensureInitialized(log); err != nil { - return fmt.Errorf("failed to initialize environment: %w", err) + if RunningInContainer { + log.Dim("Running containerized.") } - return nil + + const maxRetries = 3 + const delay = 20 * time.Millisecond + var lastErr error + + for attempt := 1; attempt <= maxRetries; attempt++ { + err := ensureInitialized(log) + if err == nil { + return nil + } + + lastErr = err + + if attempt < maxRetries { + log.Dim("Retrying...") + time.Sleep(delay) + } + } + + return fmt.Errorf("failed to initialize environment after %d attempts: %w", maxRetries, lastErr) } // detectClusterType implements the cluster type detection logic From a41a3fc75574916c57f48a4b056888657f52eed0 Mon Sep 17 00:00:00 2001 From: Moritz Clasmeier Date: Wed, 8 Apr 2026 09:05:51 +0200 Subject: [PATCH 3/5] Log retry attempt --- internal/env/env.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/env/env.go b/internal/env/env.go index 963f090..0ce8d09 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -155,7 +155,7 @@ func Initialize(log *logger.Logger) error { lastErr = err if attempt < maxRetries { - log.Dim("Retrying...") + log.Dimf("Attempt %d/%d failed: %v, retrying...", attempt, maxRetries, err) time.Sleep(delay) } } From 04b7877f8fbec5009886a5cd6c1996cf4d65a5c1 Mon Sep 17 00:00:00 2001 From: Moritz Clasmeier Date: Wed, 8 Apr 2026 09:06:05 +0200 Subject: [PATCH 4/5] Increase delay time to 500ms so that one retry seems to be sufficient usually. --- internal/env/env.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/env/env.go b/internal/env/env.go index 0ce8d09..b9cee7b 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -143,7 +143,7 @@ func Initialize(log *logger.Logger) error { } const maxRetries = 3 - const delay = 20 * time.Millisecond + const delay = 500 * time.Millisecond var lastErr error for attempt := 1; attempt <= maxRetries; attempt++ { From 0a9e8e9cd5df98f8a6014e991668ff471254d17c Mon Sep 17 00:00:00 2001 From: Moritz Clasmeier Date: Wed, 8 Apr 2026 09:17:18 +0200 Subject: [PATCH 5/5] Mutex-protect initialization --- internal/env/env.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/env/env.go b/internal/env/env.go index b9cee7b..dd81693 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -9,6 +9,7 @@ import ( "os/exec" "path/filepath" "strings" + "sync" "time" "github.com/stackrox/roxie/internal/containerutil" @@ -19,6 +20,7 @@ import ( var ( RunningInContainer bool RunningInteractively bool + initializationMutex sync.Mutex ) // ClusterType represents different types of Kubernetes clusters @@ -70,6 +72,9 @@ func isRunningInteractively() bool { // ensureInitialized performs lazy initialization of cluster information // This avoids contacting the cluster on package import func ensureInitialized(log *logger.Logger) error { + initializationMutex.Lock() + defer initializationMutex.Unlock() + if !initialized { kubeConfig, err := fetchKubeConfig(log) if err != nil {