Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion github-actions.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,4 @@ RUN pulumi version > /dev/null && \
gcloud components list --filter="name:gke-gcloud-auth-plugin" --format="value(name)" | grep -q gke-gcloud-auth-plugin

# Set the entrypoint
ENTRYPOINT ["./github-actions"]
ENTRYPOINT ["/root/github-actions"]
7 changes: 7 additions & 0 deletions pkg/clouds/k8s/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ type CaddyConfig struct {
UseSSL *bool `json:"useSSL,omitempty" yaml:"useSSL,omitempty"` // whether to use ssl by default (default: true)
// Deployment name override for existing Caddy deployments (used when adopting clusters)
DeploymentName *string `json:"deploymentName,omitempty" yaml:"deploymentName,omitempty"` // override deployment name when adopting existing Caddy
// TerminationGracePeriodSeconds overrides the pod-level terminationGracePeriodSeconds for Caddy.
// Should be greater than preStopSleepSeconds. Default: Kubernetes default (30s).
TerminationGracePeriodSeconds *int `json:"terminationGracePeriodSeconds,omitempty" yaml:"terminationGracePeriodSeconds,omitempty"`
// PreStopSleepSeconds inserts a preStop exec sleep before SIGTERM is sent to Caddy.
// Allows load-balancer endpoint propagation and in-flight connection drain before shutdown.
// Prevents Cloudflare 521 errors during rolling updates. Default: 0 (disabled).
PreStopSleepSeconds *int `json:"preStopSleepSeconds,omitempty" yaml:"preStopSleepSeconds,omitempty"`
}

type DisruptionBudget struct {
Expand Down
14 changes: 11 additions & 3 deletions pkg/clouds/pulumi/gcp/gke_autopilot_stack.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"os"
"os/exec"
"strings"
"time"

auth "golang.org/x/oauth2/google"

Expand Down Expand Up @@ -233,14 +232,23 @@ func GkeAutopilotStack(ctx *sdk.Context, stack api.Stack, input api.ResourceInpu
Namespace: namespace,
KubeProvider: kubeProvider,
Kubeconfig: &kubeConfigOutput,
// caddy-update-hash goes into spec.template.metadata so Caddy pods roll only when
// the Caddyfile actually changes. Content-hash, not wall-clock time, prevents
// spurious restarts (and Cloudflare 521s) on every pulumi up.
Annotations: map[string]sdk.StringOutput{
"simple-container.com/caddy-updated-by": sdk.String(stackName).ToStringOutput(),
"simple-container.com/caddy-updated-at": sdk.String(time.Now().UTC().Format(time.RFC3339)).ToStringOutput(),
"simple-container.com/caddy-update-hash": sdk.All(sc.CaddyfileEntry).ApplyT(func(entry []any) string {
sum := md5.Sum([]byte(entry[0].(string)))
return hex.EncodeToString(sum[:])
}).(sdk.StringOutput),
},
// Informational annotations live on deployment metadata only — no pod restarts.
DeploymentAnnotations: map[string]sdk.StringOutput{
"simple-container.com/caddy-updated-by": sdk.String(stackName).ToStringOutput(),
"simple-container.com/caddy-updated-at": sdk.All(sc.CaddyfileEntry).ApplyT(func(entry []any) string {
sum := md5.Sum([]byte(entry[0].(string)))
return hex.EncodeToString(sum[:])[:8]
}).(sdk.StringOutput),
},
Opts: []sdk.ResourceOption{sdk.DependsOn([]sdk.Resource{sc.Service})},
})
if patchErr != nil {
Expand Down
26 changes: 14 additions & 12 deletions pkg/clouds/pulumi/kubernetes/caddy.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,18 +243,20 @@ func DeployCaddyService(ctx *sdk.Context, caddy CaddyDeployment, input api.Resou
}

sc, err := DeploySimpleContainer(ctx, Args{
ServiceType: serviceType, // to provision external IP
ProvisionIngress: caddy.ProvisionIngress,
UseSSL: useSSL,
Namespace: namespace,
DeploymentName: deploymentName,
Input: input,
ServiceAccountName: lo.ToPtr(serviceAccount.Name),
Deployment: deploymentConfig,
SecretVolumes: caddy.SecretVolumes, // Cloud credentials volumes (e.g., GCP service account)
SecretVolumeOutputs: caddy.SecretVolumeOutputs, // Pulumi outputs for secret volumes
SecretEnvs: secretEnvs, // Secret environment variables
VPA: caddy.VPA, // Vertical Pod Autoscaler configuration for Caddy
ServiceType: serviceType, // to provision external IP
ProvisionIngress: caddy.ProvisionIngress,
UseSSL: useSSL,
Namespace: namespace,
DeploymentName: deploymentName,
Input: input,
ServiceAccountName: lo.ToPtr(serviceAccount.Name),
Deployment: deploymentConfig,
SecretVolumes: caddy.SecretVolumes, // Cloud credentials volumes (e.g., GCP service account)
SecretVolumeOutputs: caddy.SecretVolumeOutputs, // Pulumi outputs for secret volumes
SecretEnvs: secretEnvs, // Secret environment variables
VPA: caddy.VPA, // Vertical Pod Autoscaler configuration for Caddy
TerminationGracePeriodSeconds: lo.FromPtr(caddy.CaddyConfig).TerminationGracePeriodSeconds,
PreStopSleepSeconds: lo.FromPtr(caddy.CaddyConfig).PreStopSleepSeconds,
Images: []*ContainerImage{
{
Container: caddyContainer,
Expand Down
40 changes: 32 additions & 8 deletions pkg/clouds/pulumi/kubernetes/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ type Args struct {
ReadinessProbe *k8s.CloudRunProbe // Global readiness probe configuration
LivenessProbe *k8s.CloudRunProbe // Global liveness probe configuration
EphemeralSize string
// TerminationGracePeriodSeconds overrides pod-level terminationGracePeriodSeconds.
TerminationGracePeriodSeconds *int
// PreStopSleepSeconds injects a preStop exec sleep on all containers, allowing LB drain before SIGTERM.
PreStopSleepSeconds *int
}

func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOption) (*SimpleContainer, error) {
Expand Down Expand Up @@ -181,13 +185,15 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti
resources.Requests = sdk.ToStringMap(c.Container.Resources.Requests)
}

lifecycle := buildPreStopLifecycle(args.PreStopSleepSeconds)

return corev1.ContainerArgs{
Args: sdk.ToStringArray(c.Container.Args),
Command: sdk.ToStringArray(c.Container.Command),
Env: env,
Image: c.ImageName,
ImagePullPolicy: sdk.String(lo.If(c.Container.ImagePullPolicy != nil, lo.FromPtr(c.Container.ImagePullPolicy)).Else("IfNotPresent")),
Lifecycle: nil, // TODO
Lifecycle: lifecycle,
LivenessProbe: livenessProbe,
Name: sdk.String(c.Container.Name),
Ports: ports,
Expand Down Expand Up @@ -244,13 +250,14 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti
PodDisruption: lo.If(args.Deployment.DisruptionBudget != nil, args.Deployment.DisruptionBudget).Else(&k8s.DisruptionBudget{
MinAvailable: lo.ToPtr(1),
}),
RollingUpdate: lo.If(args.Deployment.RollingUpdate != nil, toRollingUpdateArgs(args.Deployment.RollingUpdate)).Else(nil),
SecurityContext: nil, // TODO
Log: args.Params.Log,
SecretVolumes: args.SecretVolumes,
SecretVolumeOutputs: args.SecretVolumeOutputs,
ImagePullSecret: args.ImagePullSecret,
EphemeralSize: args.EphemeralSize,
RollingUpdate: lo.If(args.Deployment.RollingUpdate != nil, toRollingUpdateArgs(args.Deployment.RollingUpdate)).Else(nil),
SecurityContext: nil, // TODO
Log: args.Params.Log,
SecretVolumes: args.SecretVolumes,
SecretVolumeOutputs: args.SecretVolumeOutputs,
ImagePullSecret: args.ImagePullSecret,
EphemeralSize: args.EphemeralSize,
TerminationGracePeriodSeconds: args.TerminationGracePeriodSeconds,
}, opts...)
if err != nil {
return nil, errors.Wrapf(err, "failed to provision simple container for stack %q in %q", stackName, args.Input.StackParams.Environment)
Expand All @@ -273,6 +280,23 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti
return sc, nil
}

// buildPreStopLifecycle returns a LifecycleArgs with an exec sleep preStop hook when
// preStopSleepSeconds is set and > 0. The sleep lets the load-balancer finish draining
// connections before the container receives SIGTERM, preventing 502/521 errors during
// rolling updates.
func buildPreStopLifecycle(preStopSleepSeconds *int) *corev1.LifecycleArgs {
if preStopSleepSeconds == nil || *preStopSleepSeconds <= 0 {
return nil
}
return &corev1.LifecycleArgs{
PreStop: &corev1.LifecycleHandlerArgs{
Exec: &corev1.ExecActionArgs{
Command: sdk.ToStringArray([]string{"sleep", fmt.Sprintf("%d", *preStopSleepSeconds)}),
},
},
}
}

func toRollingUpdateArgs(update *k8s.RollingUpdate) *v1.RollingUpdateDeploymentArgs {
return &v1.RollingUpdateDeploymentArgs{
MaxUnavailable: lo.If(lo.FromPtr(update).MaxUnavailable != nil, sdk.IntPtrFromPtr(lo.FromPtr(update).MaxUnavailable)).Else(nil),
Expand Down
135 changes: 90 additions & 45 deletions pkg/clouds/pulumi/kubernetes/deployment_patch.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,50 @@ import (
)

type DeploymentPatchArgs struct {
PatchName string
ServiceName string
Namespace string
Annotations map[string]sdk.StringOutput
KubeProvider *sdkK8s.Provider // Main Kubernetes provider (for dependencies)
Kubeconfig *sdk.StringOutput // Optional: Kubeconfig for creating patch-specific provider
Opts []sdk.ResourceOption
PatchName string
ServiceName string
Namespace string
// Annotations are applied to spec.template.metadata — changes here trigger a pod rolling update.
// Use only for values that should restart pods when changed (e.g. content hashes).
Annotations map[string]sdk.StringOutput
// DeploymentAnnotations are applied to metadata only — changes do NOT trigger pod restarts.
// Use for informational labels (e.g. caddy-updated-at, caddy-updated-by).
DeploymentAnnotations map[string]sdk.StringOutput
KubeProvider *sdkK8s.Provider // Main Kubernetes provider (for dependencies)
Kubeconfig *sdk.StringOutput // Optional: Kubeconfig for creating patch-specific provider
Opts []sdk.ResourceOption
}

type deploymentPatchInputs struct {
Kubeconfig string
Namespace string
ServiceName string
Annotations map[string]string
Kubeconfig string
Namespace string
ServiceName string
Annotations map[string]string
DeploymentAnnotations map[string]string
}

// buildPodTemplatePatch returns the JSON patch that targets spec.template.metadata.annotations.
// Changes here cause a rolling restart of pods.
func buildPodTemplatePatch(annotations map[string]string) ([]byte, error) {
return json.Marshal(map[string]interface{}{
"spec": map[string]interface{}{
"template": map[string]interface{}{
"metadata": map[string]interface{}{
"annotations": annotations,
},
},
},
})
}

// buildDeploymentMetadataPatch returns the JSON patch that targets metadata.annotations.
// Changes here do NOT trigger pod restarts.
func buildDeploymentMetadataPatch(annotations map[string]string) ([]byte, error) {
return json.Marshal(map[string]interface{}{
"metadata": map[string]interface{}{
"annotations": annotations,
},
})
}

func patchDeploymentWithK8sClient(ctx context.Context, inputs deploymentPatchInputs) error {
Expand All @@ -45,40 +75,48 @@ func patchDeploymentWithK8sClient(ctx context.Context, inputs deploymentPatchInp
return fmt.Errorf("failed to create Kubernetes client: %w", err)
}

// Build the patch payload - only the annotations we want to update
patch := map[string]interface{}{
"spec": map[string]interface{}{
"template": map[string]interface{}{
"metadata": map[string]interface{}{
"annotations": inputs.Annotations,
},
},
},
patchOptions := metav1.PatchOptions{
FieldManager: "simple-container",
}

// Marshal to JSON
patchBytes, err := json.Marshal(patch)
if err != nil {
return fmt.Errorf("failed to marshal patch: %w", err)
}
// Patch spec.template.metadata.annotations — triggers rolling restart when values change.
if len(inputs.Annotations) > 0 {
patchBytes, err := buildPodTemplatePatch(inputs.Annotations)
if err != nil {
return fmt.Errorf("failed to marshal pod-template annotations patch: %w", err)
}

// Apply the patch using Strategic Merge Patch
// This is a true partial update that doesn't require full deployment spec
patchOptions := metav1.PatchOptions{
FieldManager: "simple-container",
_, err = clientSet.AppsV1().Deployments(inputs.Namespace).Patch(
ctx,
inputs.ServiceName,
types.StrategicMergePatchType,
patchBytes,
patchOptions,
)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "❌ PATCH ERROR: failed to patch deployment pod-template annotations %s/%s: %v\n", inputs.Namespace, inputs.ServiceName, err)
return fmt.Errorf("failed to patch deployment %s/%s: %w", inputs.Namespace, inputs.ServiceName, err)
}
}

_, err = clientSet.AppsV1().Deployments(inputs.Namespace).Patch(
ctx,
inputs.ServiceName,
types.StrategicMergePatchType,
patchBytes,
patchOptions,
)
if err != nil {
// Log detailed error information for debugging
_, _ = fmt.Fprintf(os.Stderr, "❌ PATCH ERROR: failed to patch deployment %s/%s: %v\n", inputs.Namespace, inputs.ServiceName, err)
return fmt.Errorf("failed to patch deployment %s/%s: %w", inputs.Namespace, inputs.ServiceName, err)
// Patch metadata.annotations — informational only, does NOT trigger pod restarts.
if len(inputs.DeploymentAnnotations) > 0 {
patchBytes, err := buildDeploymentMetadataPatch(inputs.DeploymentAnnotations)
if err != nil {
return fmt.Errorf("failed to marshal deployment annotations patch: %w", err)
}

_, err = clientSet.AppsV1().Deployments(inputs.Namespace).Patch(
ctx,
inputs.ServiceName,
types.StrategicMergePatchType,
patchBytes,
patchOptions,
)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "❌ PATCH ERROR: failed to patch deployment metadata annotations %s/%s: %v\n", inputs.Namespace, inputs.ServiceName, err)
return fmt.Errorf("failed to patch deployment metadata annotations %s/%s: %w", inputs.Namespace, inputs.ServiceName, err)
}
}

return nil
Expand All @@ -90,10 +128,11 @@ func PatchDeployment(ctx *sdk.Context, args *DeploymentPatchArgs) (*sdk.StringOu

// Convert map[string]StringOutput to StringMapOutput for proper resolution
annotationsOutput := sdk.ToStringMapOutput(args.Annotations)
deploymentAnnotationsOutput := sdk.ToStringMapOutput(args.DeploymentAnnotations)

// Apply the patch when all outputs are resolved
// Use ApplyTWithContext to get access to Pulumi's context
result := sdk.All(args.Kubeconfig, annotationsOutput).ApplyTWithContext(ctx.Context(), func(goCtx context.Context, vals []interface{}) (string, error) {
result := sdk.All(args.Kubeconfig, annotationsOutput, deploymentAnnotationsOutput).ApplyTWithContext(ctx.Context(), func(goCtx context.Context, vals []interface{}) (string, error) {
kubeconfigStr, ok := vals[0].(string)
if !ok || kubeconfigStr == "" {
return "", fmt.Errorf("kubeconfig is required for native Kubernetes client patching")
Expand All @@ -104,11 +143,17 @@ func PatchDeployment(ctx *sdk.Context, args *DeploymentPatchArgs) (*sdk.StringOu
return "", fmt.Errorf("failed to resolve annotations: got type %T", vals[1])
}

deploymentAnnotations, ok := vals[2].(map[string]string)
if !ok {
return "", fmt.Errorf("failed to resolve deployment annotations: got type %T", vals[2])
}

inputs := deploymentPatchInputs{
Kubeconfig: kubeconfigStr,
Namespace: args.Namespace,
ServiceName: args.ServiceName,
Annotations: annotations,
Kubeconfig: kubeconfigStr,
Namespace: args.Namespace,
ServiceName: args.ServiceName,
Annotations: annotations,
DeploymentAnnotations: deploymentAnnotations,
}

// Create a context that respects parent cancellation but allows extra time for patch to complete
Expand Down
Loading
Loading