simple-container-com · Cre-eD · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/github-actions.Dockerfile b/github-actions.Dockerfile
@@ -94,4 +94,4 @@ RUN pulumi version > /dev/null && \
     gcloud components list --filter="name:gke-gcloud-auth-plugin" --format="value(name)" | grep -q gke-gcloud-auth-plugin
 
 # Set the entrypoint
-ENTRYPOINT ["./github-actions"]
+ENTRYPOINT ["/root/github-actions"]
diff --git a/pkg/clouds/k8s/types.go b/pkg/clouds/k8s/types.go
@@ -50,6 +50,13 @@ type CaddyConfig struct {
 	UseSSL           *bool      `json:"useSSL,omitempty" yaml:"useSSL,omitempty"`                     // whether to use ssl by default (default: true)
 	// Deployment name override for existing Caddy deployments (used when adopting clusters)
 	DeploymentName *string `json:"deploymentName,omitempty" yaml:"deploymentName,omitempty"` // override deployment name when adopting existing Caddy
+	// TerminationGracePeriodSeconds overrides the pod-level terminationGracePeriodSeconds for Caddy.
+	// Should be greater than preStopSleepSeconds. Default: Kubernetes default (30s).
+	TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty" yaml:"terminationGracePeriodSeconds,omitempty"`
+	// PreStopSleepSeconds inserts a preStop exec sleep before SIGTERM is sent to Caddy.
+	// Allows load-balancer endpoint propagation and in-flight connection drain before shutdown.
+	// Prevents Cloudflare 521 errors during rolling updates. Default: 0 (disabled).
+	PreStopSleepSeconds *int `json:"preStopSleepSeconds,omitempty" yaml:"preStopSleepSeconds,omitempty"`
 }
 
 type DisruptionBudget struct {

diff --git a/pkg/clouds/pulumi/gcp/gke_autopilot_stack.go b/pkg/clouds/pulumi/gcp/gke_autopilot_stack.go
@@ -9,7 +9,6 @@ import (
 	"os"
 	"os/exec"
 	"strings"
-	"time"
 
 	auth "golang.org/x/oauth2/google"
 
@@ -233,14 +232,23 @@ func GkeAutopilotStack(ctx *sdk.Context, stack api.Stack, input api.ResourceInpu
 			Namespace:    namespace,
 			KubeProvider: kubeProvider,
 			Kubeconfig:   &kubeConfigOutput,
+			// caddy-update-hash goes into spec.template.metadata so Caddy pods roll only when
+			// the Caddyfile actually changes. Content-hash, not wall-clock time, prevents
+			// spurious restarts (and Cloudflare 521s) on every pulumi up.
 			Annotations: map[string]sdk.StringOutput{
-				"simple-container.com/caddy-updated-by": sdk.String(stackName).ToStringOutput(),
-				"simple-container.com/caddy-updated-at": sdk.String(time.Now().UTC().Format(time.RFC3339)).ToStringOutput(),
 				"simple-container.com/caddy-update-hash": sdk.All(sc.CaddyfileEntry).ApplyT(func(entry []any) string {
 					sum := md5.Sum([]byte(entry[0].(string)))
 					return hex.EncodeToString(sum[:])
 				}).(sdk.StringOutput),
 			},
+			// Informational annotations live on deployment metadata only — no pod restarts.
+			DeploymentAnnotations: map[string]sdk.StringOutput{
+				"simple-container.com/caddy-updated-by": sdk.String(stackName).ToStringOutput(),
+				"simple-container.com/caddy-updated-at": sdk.All(sc.CaddyfileEntry).ApplyT(func(entry []any) string {
+					sum := md5.Sum([]byte(entry[0].(string)))
+					return hex.EncodeToString(sum[:])[:8]
+				}).(sdk.StringOutput),
+			},
 			Opts: []sdk.ResourceOption{sdk.DependsOn([]sdk.Resource{sc.Service})},
 		})
 		if patchErr != nil {

diff --git a/pkg/clouds/pulumi/kubernetes/caddy.go b/pkg/clouds/pulumi/kubernetes/caddy.go
@@ -243,18 +243,20 @@ func DeployCaddyService(ctx *sdk.Context, caddy CaddyDeployment, input api.Resou
 	}
 
 	sc, err := DeploySimpleContainer(ctx, Args{
-		ServiceType:         serviceType, // to provision external IP
-		ProvisionIngress:    caddy.ProvisionIngress,
-		UseSSL:              useSSL,
-		Namespace:           namespace,
-		DeploymentName:      deploymentName,
-		Input:               input,
-		ServiceAccountName:  lo.ToPtr(serviceAccount.Name),
-		Deployment:          deploymentConfig,
-		SecretVolumes:       caddy.SecretVolumes,       // Cloud credentials volumes (e.g., GCP service account)
-		SecretVolumeOutputs: caddy.SecretVolumeOutputs, // Pulumi outputs for secret volumes
-		SecretEnvs:          secretEnvs,                // Secret environment variables
-		VPA:                 caddy.VPA,                 // Vertical Pod Autoscaler configuration for Caddy
+		ServiceType:                   serviceType, // to provision external IP
+		ProvisionIngress:              caddy.ProvisionIngress,
+		UseSSL:                        useSSL,
+		Namespace:                     namespace,
+		DeploymentName:                deploymentName,
+		Input:                         input,
+		ServiceAccountName:            lo.ToPtr(serviceAccount.Name),
+		Deployment:                    deploymentConfig,
+		SecretVolumes:                 caddy.SecretVolumes,       // Cloud credentials volumes (e.g., GCP service account)
+		SecretVolumeOutputs:           caddy.SecretVolumeOutputs, // Pulumi outputs for secret volumes
+		SecretEnvs:                    secretEnvs,                // Secret environment variables
+		VPA:                           caddy.VPA,                 // Vertical Pod Autoscaler configuration for Caddy
+		TerminationGracePeriodSeconds: lo.FromPtr(caddy.CaddyConfig).TerminationGracePeriodSeconds,
+		PreStopSleepSeconds:           lo.FromPtr(caddy.CaddyConfig).PreStopSleepSeconds,
 		Images: []*ContainerImage{
 			{
 				Container: caddyContainer,

diff --git a/pkg/clouds/pulumi/kubernetes/deployment.go b/pkg/clouds/pulumi/kubernetes/deployment.go
@@ -46,6 +46,10 @@ type Args struct {
 	ReadinessProbe         *k8s.CloudRunProbe // Global readiness probe configuration
 	LivenessProbe          *k8s.CloudRunProbe // Global liveness probe configuration
 	EphemeralSize          string
+	// TerminationGracePeriodSeconds overrides pod-level terminationGracePeriodSeconds.
+	TerminationGracePeriodSeconds *int
+	// PreStopSleepSeconds injects a preStop exec sleep on all containers, allowing LB drain before SIGTERM.
+	PreStopSleepSeconds *int
 }
 
 func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOption) (*SimpleContainer, error) {
@@ -181,13 +185,15 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti
 			resources.Requests = sdk.ToStringMap(c.Container.Resources.Requests)
 		}
 
+		lifecycle := buildPreStopLifecycle(args.PreStopSleepSeconds)
+
 		return corev1.ContainerArgs{
 			Args:            sdk.ToStringArray(c.Container.Args),
 			Command:         sdk.ToStringArray(c.Container.Command),
 			Env:             env,
 			Image:           c.ImageName,
 			ImagePullPolicy: sdk.String(lo.If(c.Container.ImagePullPolicy != nil, lo.FromPtr(c.Container.ImagePullPolicy)).Else("IfNotPresent")),
-			Lifecycle:       nil, // TODO
+			Lifecycle:       lifecycle,
 			LivenessProbe:   livenessProbe,
 			Name:            sdk.String(c.Container.Name),
 			Ports:           ports,
@@ -244,13 +250,14 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti
 		PodDisruption: lo.If(args.Deployment.DisruptionBudget != nil, args.Deployment.DisruptionBudget).Else(&k8s.DisruptionBudget{
 			MinAvailable: lo.ToPtr(1),
 		}),
-		RollingUpdate:       lo.If(args.Deployment.RollingUpdate != nil, toRollingUpdateArgs(args.Deployment.RollingUpdate)).Else(nil),
-		SecurityContext:     nil, // TODO
-		Log:                 args.Params.Log,
-		SecretVolumes:       args.SecretVolumes,
-		SecretVolumeOutputs: args.SecretVolumeOutputs,
-		ImagePullSecret:     args.ImagePullSecret,
-		EphemeralSize:       args.EphemeralSize,
+		RollingUpdate:                 lo.If(args.Deployment.RollingUpdate != nil, toRollingUpdateArgs(args.Deployment.RollingUpdate)).Else(nil),
+		SecurityContext:               nil, // TODO
+		Log:                           args.Params.Log,
+		SecretVolumes:                 args.SecretVolumes,
+		SecretVolumeOutputs:           args.SecretVolumeOutputs,
+		ImagePullSecret:               args.ImagePullSecret,
+		EphemeralSize:                 args.EphemeralSize,
+		TerminationGracePeriodSeconds: args.TerminationGracePeriodSeconds,
 	}, opts...)
 	if err != nil {
 		return nil, errors.Wrapf(err, "failed to provision simple container for stack %q in %q", stackName, args.Input.StackParams.Environment)
@@ -273,6 +280,23 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti
 	return sc, nil
 }
 
+// buildPreStopLifecycle returns a LifecycleArgs with an exec sleep preStop hook when
+// preStopSleepSeconds is set and > 0. The sleep lets the load-balancer finish draining
+// connections before the container receives SIGTERM, preventing 502/521 errors during
+// rolling updates.
+func buildPreStopLifecycle(preStopSleepSeconds *int) *corev1.LifecycleArgs {
+	if preStopSleepSeconds == nil || *preStopSleepSeconds <= 0 {
+		return nil
+	}
+	return &corev1.LifecycleArgs{
+		PreStop: &corev1.LifecycleHandlerArgs{
+			Exec: &corev1.ExecActionArgs{
+				Command: sdk.ToStringArray([]string{"sleep", fmt.Sprintf("%d", *preStopSleepSeconds)}),
+			},
+		},
+	}
+}
+
 func toRollingUpdateArgs(update *k8s.RollingUpdate) *v1.RollingUpdateDeploymentArgs {
 	return &v1.RollingUpdateDeploymentArgs{
 		MaxUnavailable: lo.If(lo.FromPtr(update).MaxUnavailable != nil, sdk.IntPtrFromPtr(lo.FromPtr(update).MaxUnavailable)).Else(nil),

diff --git a/pkg/clouds/pulumi/kubernetes/deployment_patch.go b/pkg/clouds/pulumi/kubernetes/deployment_patch.go
@@ -17,20 +17,50 @@ import (
 )
 
 type DeploymentPatchArgs struct {
-	PatchName    string
-	ServiceName  string
-	Namespace    string
-	Annotations  map[string]sdk.StringOutput
-	KubeProvider *sdkK8s.Provider  // Main Kubernetes provider (for dependencies)
-	Kubeconfig   *sdk.StringOutput // Optional: Kubeconfig for creating patch-specific provider
-	Opts         []sdk.ResourceOption
+	PatchName   string
+	ServiceName string
+	Namespace   string
+	// Annotations are applied to spec.template.metadata — changes here trigger a pod rolling update.
+	// Use only for values that should restart pods when changed (e.g. content hashes).
+	Annotations map[string]sdk.StringOutput
+	// DeploymentAnnotations are applied to metadata only — changes do NOT trigger pod restarts.
+	// Use for informational labels (e.g. caddy-updated-at, caddy-updated-by).
+	DeploymentAnnotations map[string]sdk.StringOutput
+	KubeProvider          *sdkK8s.Provider  // Main Kubernetes provider (for dependencies)
+	Kubeconfig            *sdk.StringOutput // Optional: Kubeconfig for creating patch-specific provider
+	Opts                  []sdk.ResourceOption
 }
 
 type deploymentPatchInputs struct {
-	Kubeconfig  string
-	Namespace   string
-	ServiceName string
-	Annotations map[string]string
+	Kubeconfig            string
+	Namespace             string
+	ServiceName           string
+	Annotations           map[string]string
+	DeploymentAnnotations map[string]string
+}
+
+// buildPodTemplatePatch returns the JSON patch that targets spec.template.metadata.annotations.
+// Changes here cause a rolling restart of pods.
+func buildPodTemplatePatch(annotations map[string]string) ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"spec": map[string]interface{}{
+			"template": map[string]interface{}{
+				"metadata": map[string]interface{}{
+					"annotations": annotations,
+				},
+			},
+		},
+	})
+}
+
+// buildDeploymentMetadataPatch returns the JSON patch that targets metadata.annotations.
+// Changes here do NOT trigger pod restarts.
+func buildDeploymentMetadataPatch(annotations map[string]string) ([]byte, error) {
+	return json.Marshal(map[string]interface{}{
+		"metadata": map[string]interface{}{
+			"annotations": annotations,
+		},
+	})
 }
 
 func patchDeploymentWithK8sClient(ctx context.Context, inputs deploymentPatchInputs) error {
@@ -45,40 +75,48 @@ func patchDeploymentWithK8sClient(ctx context.Context, inputs deploymentPatchInp
 		return fmt.Errorf("failed to create Kubernetes client: %w", err)
 	}
 
-	// Build the patch payload - only the annotations we want to update
-	patch := map[string]interface{}{
-		"spec": map[string]interface{}{
-			"template": map[string]interface{}{
-				"metadata": map[string]interface{}{
-					"annotations": inputs.Annotations,
-				},
-			},
-		},
+	patchOptions := metav1.PatchOptions{
+		FieldManager: "simple-container",
 	}
 
-	// Marshal to JSON
-	patchBytes, err := json.Marshal(patch)
-	if err != nil {
-		return fmt.Errorf("failed to marshal patch: %w", err)
-	}
+	// Patch spec.template.metadata.annotations — triggers rolling restart when values change.
+	if len(inputs.Annotations) > 0 {
+		patchBytes, err := buildPodTemplatePatch(inputs.Annotations)
+		if err != nil {
+			return fmt.Errorf("failed to marshal pod-template annotations patch: %w", err)
+		}
 
-	// Apply the patch using Strategic Merge Patch
-	// This is a true partial update that doesn't require full deployment spec
-	patchOptions := metav1.PatchOptions{
-		FieldManager: "simple-container",
+		_, err = clientSet.AppsV1().Deployments(inputs.Namespace).Patch(
+			ctx,
+			inputs.ServiceName,
+			types.StrategicMergePatchType,
+			patchBytes,
+			patchOptions,
+		)
+		if err != nil {
+			_, _ = fmt.Fprintf(os.Stderr, "❌ PATCH ERROR: failed to patch deployment pod-template annotations %s/%s: %v\n", inputs.Namespace, inputs.ServiceName, err)
+			return fmt.Errorf("failed to patch deployment %s/%s: %w", inputs.Namespace, inputs.ServiceName, err)
+		}
 	}
 
-	_, err = clientSet.AppsV1().Deployments(inputs.Namespace).Patch(
-		ctx,
-		inputs.ServiceName,
-		types.StrategicMergePatchType,
-		patchBytes,
-		patchOptions,
-	)
-	if err != nil {
-		// Log detailed error information for debugging
-		_, _ = fmt.Fprintf(os.Stderr, "❌ PATCH ERROR: failed to patch deployment %s/%s: %v\n", inputs.Namespace, inputs.ServiceName, err)
-		return fmt.Errorf("failed to patch deployment %s/%s: %w", inputs.Namespace, inputs.ServiceName, err)
+	// Patch metadata.annotations — informational only, does NOT trigger pod restarts.
+	if len(inputs.DeploymentAnnotations) > 0 {
+		patchBytes, err := buildDeploymentMetadataPatch(inputs.DeploymentAnnotations)
+		if err != nil {
+			return fmt.Errorf("failed to marshal deployment annotations patch: %w", err)
+		}
+
+		_, err = clientSet.AppsV1().Deployments(inputs.Namespace).Patch(
+			ctx,
+			inputs.ServiceName,
+			types.StrategicMergePatchType,
+			patchBytes,
+			patchOptions,
+		)
+		if err != nil {
+			_, _ = fmt.Fprintf(os.Stderr, "❌ PATCH ERROR: failed to patch deployment metadata annotations %s/%s: %v\n", inputs.Namespace, inputs.ServiceName, err)
+			return fmt.Errorf("failed to patch deployment metadata annotations %s/%s: %w", inputs.Namespace, inputs.ServiceName, err)
+		}
 	}
 
 	return nil
@@ -90,10 +128,11 @@ func PatchDeployment(ctx *sdk.Context, args *DeploymentPatchArgs) (*sdk.StringOu
 
 	// Convert map[string]StringOutput to StringMapOutput for proper resolution
 	annotationsOutput := sdk.ToStringMapOutput(args.Annotations)
+	deploymentAnnotationsOutput := sdk.ToStringMapOutput(args.DeploymentAnnotations)
 
 	// Apply the patch when all outputs are resolved
 	// Use ApplyTWithContext to get access to Pulumi's context
-	result := sdk.All(args.Kubeconfig, annotationsOutput).ApplyTWithContext(ctx.Context(), func(goCtx context.Context, vals []interface{}) (string, error) {
+	result := sdk.All(args.Kubeconfig, annotationsOutput, deploymentAnnotationsOutput).ApplyTWithContext(ctx.Context(), func(goCtx context.Context, vals []interface{}) (string, error) {
 		kubeconfigStr, ok := vals[0].(string)
 		if !ok || kubeconfigStr == "" {
 			return "", fmt.Errorf("kubeconfig is required for native Kubernetes client patching")
@@ -104,11 +143,17 @@ func PatchDeployment(ctx *sdk.Context, args *DeploymentPatchArgs) (*sdk.StringOu
 			return "", fmt.Errorf("failed to resolve annotations: got type %T", vals[1])
 		}
 
+		deploymentAnnotations, ok := vals[2].(map[string]string)
+		if !ok {
+			return "", fmt.Errorf("failed to resolve deployment annotations: got type %T", vals[2])
+		}
+
 		inputs := deploymentPatchInputs{
-			Kubeconfig:  kubeconfigStr,
-			Namespace:   args.Namespace,
-			ServiceName: args.ServiceName,
-			Annotations: annotations,
+			Kubeconfig:            kubeconfigStr,
+			Namespace:             args.Namespace,
+			ServiceName:           args.ServiceName,
+			Annotations:           annotations,
+			DeploymentAnnotations: deploymentAnnotations,
 		}
 
 		// Create a context that respects parent cancellation but allows extra time for patch to complete