Skip to content

Commit

Permalink
fix: set proper timeouts for KubePrism loadbalancer
Browse files Browse the repository at this point in the history
The default timeouts are very aggressive, and we should use explicit
timeouts so that healh checks don't run that often.

Fixes #7690

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Aug 31, 2023
1 parent b8fb55d commit 79bbdf4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
9 changes: 9 additions & 0 deletions internal/app/machined/pkg/controllers/k8s/kubeprism.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ import (
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/gen/slices"
"github.com/siderolabs/go-loadbalancer/controlplane"
"github.com/siderolabs/go-loadbalancer/upstream"
"github.com/siderolabs/go-pointer"
"go.uber.org/zap"

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/k8s"
)

Expand Down Expand Up @@ -190,6 +192,13 @@ func (ctrl *KubePrismController) startKubePrism(lbCfg *k8s.KubePrismConfig, logg

lb, err := controlplane.NewLoadBalancer(ctrl.balancerHost, ctrl.balancerPort,
logger.WithOptions(zap.IncreaseLevel(zap.ErrorLevel)), // silence the load balancer logs
controlplane.WithDialTimeout(constants.KubePrismDialTimeout),
controlplane.WithKeepAlivePeriod(constants.KubePrismKeepAlivePeriod),
controlplane.WithTCPUserTimeout(constants.KubePrismTCPUserTimeout),
controlplane.WithHealthCheckOptions(
upstream.WithHealthcheckInterval(constants.KubePrismHealthCheckInterval),
upstream.WithHealthcheckTimeout(constants.KubePrismHealthCheckTimeout),
),
)
if err != nil {
return fmt.Errorf("failed to create KubePrism: %w", err)
Expand Down
15 changes: 15 additions & 0 deletions pkg/machinery/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,21 @@ const (

// TcellMinimizeEnvironment is the environment variable to minimize tcell library memory usage (skips rune width calculation).
TcellMinimizeEnvironment = "TCELL_MINIMIZE=1"

// KubePrismDialTimeout is the timeout for the KubePrism loadbalancer dialing an endpoint.
KubePrismDialTimeout = 15 * time.Second

// KubePrismKeepAlivePeriod is the TCP keepalive period for the KubePrism loadbalancer.
KubePrismKeepAlivePeriod = 30 * time.Second

// KubePrismTCPUserTimeout is the TCP user timeout for the KubePrism loadbalancer.
KubePrismTCPUserTimeout = 30 * time.Second

// KubePrismHealthCheckInterval is the interval between health checks for the KubePrism loadbalancer.
KubePrismHealthCheckInterval = 20 * time.Second

// KubePrismHealthCheckTimeout is the timeout for health checks for the KubePrism loadbalancer.
KubePrismHealthCheckTimeout = 15 * time.Second
)

// See https://linux.die.net/man/3/klogctl
Expand Down

0 comments on commit 79bbdf4

Please sign in to comment.