diff --git a/Dockerfile b/Dockerfile index d75852e..91fa54d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -81,8 +81,7 @@ RUN ARCH=${TARGETARCH:-amd64} && \ echo "Downloading gcloud SDK from: ${url}" && \ curl -o "/tmp/${filename}" -fsSL "${url}" && \ echo "${GCLOUD_SHA256} /tmp/${filename}" | sha256sum -c - && \ - tar -xz -C /tmp -f "/tmp/${filename}" && \ - /tmp/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin --quiet + tar -xz -C /tmp -f "/tmp/${filename}" # Stage 2: Runtime image based on Red Hat UBI Minimal FROM registry.access.redhat.com/ubi9/ubi-minimal:latest@sha256:83006d535923fcf1345067873524a3980316f51794f01d8655be55d6e9387183 @@ -164,6 +163,7 @@ RUN ARCH=${TARGETARCH:-amd64} && \ # Copy gcloud SDK from builder stage (extracted there to avoid UBI filesystem restrictions) COPY --from=builder /tmp/google-cloud-sdk /opt/google-cloud-sdk RUN ln -s /opt/google-cloud-sdk/bin/gcloud /usr/local/bin/gcloud && \ + /opt/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin --quiet && \ ln -s /opt/google-cloud-sdk/bin/gke-gcloud-auth-plugin /usr/local/bin/gke-gcloud-auth-plugin # 2. AWS (EKS) - aws-iam-authenticator diff --git a/internal/env/env.go b/internal/env/env.go index efdff66..dd81693 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -9,6 +9,8 @@ import ( "os/exec" "path/filepath" "strings" + "sync" + "time" "github.com/stackrox/roxie/internal/containerutil" "github.com/stackrox/roxie/internal/logger" @@ -18,6 +20,7 @@ import ( var ( RunningInContainer bool RunningInteractively bool + initializationMutex sync.Mutex ) // ClusterType represents different types of Kubernetes clusters @@ -69,10 +72,10 @@ func isRunningInteractively() bool { // ensureInitialized performs lazy initialization of cluster information // This avoids contacting the cluster on package import func ensureInitialized(log *logger.Logger) error { + initializationMutex.Lock() + defer initializationMutex.Unlock() + if !initialized { - if RunningInContainer { - log.Dim("Running containerized.") - } kubeConfig, err := fetchKubeConfig(log) if err != nil { return err @@ -133,14 +136,36 @@ type KubeCluster struct { } // Initialize performs environment initialization and sets the global variables. +// Retries on failure to handle race conditions during container startup, which I have +// observed in relation with podman :U mounts: the container was starting before the gcloud config +// was writable by the container user, hence GKE authentication failed immediately. func Initialize(log *logger.Logger) error { if log == nil { log = logger.New() } - if err := ensureInitialized(log); err != nil { - return fmt.Errorf("failed to initialize environment: %w", err) + if RunningInContainer { + log.Dim("Running containerized.") } - return nil + + const maxRetries = 3 + const delay = 500 * time.Millisecond + var lastErr error + + for attempt := 1; attempt <= maxRetries; attempt++ { + err := ensureInitialized(log) + if err == nil { + return nil + } + + lastErr = err + + if attempt < maxRetries { + log.Dimf("Attempt %d/%d failed: %v, retrying...", attempt, maxRetries, err) + time.Sleep(delay) + } + } + + return fmt.Errorf("failed to initialize environment after %d attempts: %w", maxRetries, lastErr) } // detectClusterType implements the cluster type detection logic