From 1156b3aa115b8131c18c649fa65a31553bf0f198 Mon Sep 17 00:00:00 2001 From: Lukasz Szaszkiewicz Date: Mon, 19 Jun 2023 10:06:08 +0200 Subject: [PATCH] webhooksupportabilitycontroller: do not use one second timeout when asserting a webhook connection previously the dial timeout to a webook was set to one second which seems to be very aggressive and can cause failures which can put the operator into degraded state. This PR reads the timeout value for a webhook from the spec or uses a default value of 10 seconds if it wasn't specified --- .../degraded_webhook.go | 13 ++++++++++--- .../degraded_webhook_admission.go | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/operator/webhooksupportabilitycontroller/degraded_webhook.go b/pkg/operator/webhooksupportabilitycontroller/degraded_webhook.go index 944dbe54cc..148a549c04 100644 --- a/pkg/operator/webhooksupportabilitycontroller/degraded_webhook.go +++ b/pkg/operator/webhooksupportabilitycontroller/degraded_webhook.go @@ -22,6 +22,9 @@ type webhookInfo struct { Service *serviceReference CABundle []byte FailurePolicyIsIgnore bool + // TimeoutSeconds specifies the timeout for a webhook. + // After the timeout passes, the webhook call will be ignored or the API call will fail + TimeoutSeconds *int32 } // serviceReference generically represents a service reference @@ -49,7 +52,7 @@ func (c *webhookSupportabilityController) updateWebhookConfigurationDegraded(ctx serviceMsgs = append(serviceMsgs, msg) continue } - err = c.assertConnect(ctx, webhook.Name, webhook.Service, webhook.CABundle) + err = c.assertConnect(ctx, webhook.Name, webhook.Service, webhook.CABundle, webhook.TimeoutSeconds) if err != nil { msg := fmt.Sprintf("%s: %s", webhook.Name, err) if webhook.FailurePolicyIsIgnore { @@ -94,7 +97,7 @@ func (c *webhookSupportabilityController) assertService(reference *serviceRefere } // assertConnect performs a dns lookup of service, opens a tcp connection, and performs a tls handshake. -func (c *webhookSupportabilityController) assertConnect(ctx context.Context, webhookName string, reference *serviceReference, caBundle []byte) error { +func (c *webhookSupportabilityController) assertConnect(ctx context.Context, webhookName string, reference *serviceReference, caBundle []byte, webhookTimeoutSeconds *int32) error { host := reference.Name + "." + reference.Namespace + ".svc" port := "443" if reference.Port != nil { @@ -104,6 +107,10 @@ func (c *webhookSupportabilityController) assertConnect(ctx context.Context, web if len(caBundle) > 0 { rootCAs.AppendCertsFromPEM(caBundle) } + timeout := 10 * time.Second + if webhookTimeoutSeconds != nil { + timeout = time.Duration(*webhookTimeoutSeconds) * time.Second + } // the last error that occurred in the loop below var err error // retry up to 3 times on error @@ -114,7 +121,7 @@ func (c *webhookSupportabilityController) assertConnect(ctx context.Context, web case <-time.After(time.Duration(i) * time.Second): } dialer := &tls.Dialer{ - NetDialer: &net.Dialer{Timeout: 1 * time.Second}, + NetDialer: &net.Dialer{Timeout: timeout}, Config: &tls.Config{ ServerName: host, RootCAs: rootCAs, diff --git a/pkg/operator/webhooksupportabilitycontroller/degraded_webhook_admission.go b/pkg/operator/webhooksupportabilitycontroller/degraded_webhook_admission.go index c98539c9f8..28ac9a7c9f 100644 --- a/pkg/operator/webhooksupportabilitycontroller/degraded_webhook_admission.go +++ b/pkg/operator/webhooksupportabilitycontroller/degraded_webhook_admission.go @@ -27,6 +27,7 @@ func (c *webhookSupportabilityController) updateMutatingAdmissionWebhookConfigur Name: webhook.Name, CABundle: webhook.ClientConfig.CABundle, FailurePolicyIsIgnore: webhook.FailurePolicy != nil && *webhook.FailurePolicy == admissionregistrationv1.Ignore, + TimeoutSeconds: webhook.TimeoutSeconds, } if webhook.ClientConfig.Service != nil { info.Service = &serviceReference{ @@ -58,6 +59,7 @@ func (c *webhookSupportabilityController) updateValidatingAdmissionWebhookConfig Name: webhook.Name, CABundle: webhook.ClientConfig.CABundle, FailurePolicyIsIgnore: webhook.FailurePolicy != nil && (*webhook.FailurePolicy == v1.Ignore), + TimeoutSeconds: webhook.TimeoutSeconds, } if webhook.ClientConfig.Service != nil {