Skip to content

Commit

Permalink
feature: Support Recheck Interval for health check
Browse files Browse the repository at this point in the history
closes traefik#6550

Signed-off-by: sswastik02 <40518186+sswastik02@users.noreply.github.com>
  • Loading branch information
sswastik02 committed May 19, 2024
1 parent 1ffbffb commit af3d0b9
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@
- "traefik.http.services.service02.loadbalancer.healthcheck.mode=foobar"
- "traefik.http.services.service02.loadbalancer.healthcheck.path=foobar"
- "traefik.http.services.service02.loadbalancer.healthcheck.port=42"
- "traefik.http.services.service02.loadbalancer.healthcheck.recheck=42s"
- "traefik.http.services.service02.loadbalancer.healthcheck.scheme=foobar"
- "traefik.http.services.service02.loadbalancer.healthcheck.status=42"
- "traefik.http.services.service02.loadbalancer.healthcheck.timeout=42s"
Expand Down
1 change: 1 addition & 0 deletions docs/content/reference/dynamic-configuration/file.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
port = 42
interval = "42s"
timeout = "42s"
recheck = "42s"
hostname = "foobar"
followRedirects = true
[http.services.Service02.loadBalancer.healthCheck.headers]
Expand Down
1 change: 1 addition & 0 deletions docs/content/reference/dynamic-configuration/file.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ http:
port: 42
interval: 42s
timeout: 42s
recheck: 42s
hostname: foobar
followRedirects: true
headers:
Expand Down
1 change: 1 addition & 0 deletions docs/content/reference/dynamic-configuration/kv-ref.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ THIS FILE MUST NOT BE EDITED BY HAND
| `traefik/http/services/Service02/loadBalancer/healthCheck/mode` | `foobar` |
| `traefik/http/services/Service02/loadBalancer/healthCheck/path` | `foobar` |
| `traefik/http/services/Service02/loadBalancer/healthCheck/port` | `42` |
| `traefik/http/services/Service02/loadBalancer/healthCheck/recheck` | `42s` |
| `traefik/http/services/Service02/loadBalancer/healthCheck/scheme` | `foobar` |
| `traefik/http/services/Service02/loadBalancer/healthCheck/status` | `42` |
| `traefik/http/services/Service02/loadBalancer/healthCheck/timeout` | `42s` |
Expand Down
4 changes: 4 additions & 0 deletions pkg/config/dynamic/http_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ const (
DefaultHealthCheckInterval = ptypes.Duration(30 * time.Second)
// DefaultHealthCheckTimeout is the default value for the ServerHealthCheck timeout.
DefaultHealthCheckTimeout = ptypes.Duration(5 * time.Second)
// DefaultHealthCheckRecheck is the default value for the ServerHealthCheck recheck.
DefaultHealthCheckRecheck = ptypes.Duration(10 * time.Second)

// DefaultPassHostHeader is the default value for the ServersLoadBalancer passHostHeader.
DefaultPassHostHeader = true
Expand Down Expand Up @@ -249,6 +251,7 @@ type ServerHealthCheck struct {
Port int `json:"port,omitempty" toml:"port,omitempty,omitzero" yaml:"port,omitempty" export:"true"`
Interval ptypes.Duration `json:"interval,omitempty" toml:"interval,omitempty" yaml:"interval,omitempty" export:"true"`
Timeout ptypes.Duration `json:"timeout,omitempty" toml:"timeout,omitempty" yaml:"timeout,omitempty" export:"true"`
Recheck ptypes.Duration `json:"recheck,omitempty" toml:"recheck,omitempty" yaml:"recheck,omitempty" export:"true"`
Hostname string `json:"hostname,omitempty" toml:"hostname,omitempty" yaml:"hostname,omitempty"`
FollowRedirects *bool `json:"followRedirects" toml:"followRedirects" yaml:"followRedirects" export:"true"`
Headers map[string]string `json:"headers,omitempty" toml:"headers,omitempty" yaml:"headers,omitempty" export:"true"`
Expand All @@ -261,6 +264,7 @@ func (h *ServerHealthCheck) SetDefaults() {
h.Mode = "http"
h.Interval = DefaultHealthCheckInterval
h.Timeout = DefaultHealthCheckTimeout
h.Recheck = DefaultHealthCheckRecheck
}

// +k8s:deepcopy-gen=true
Expand Down
38 changes: 34 additions & 4 deletions pkg/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ import (

const modeGRPC = "grpc"

// StatusSetter should be implemented by a service that, when the status of a
// StatusHanlder should be implemented by a service that, when the status of a
// registered target change, needs to be notified of that change.
type StatusSetter interface {
type StatusHandler interface {
GetStatus(ctx context.Context, childName string) bool
SetStatus(ctx context.Context, childName string, up bool)
}

Expand All @@ -41,20 +42,21 @@ type metricsHealthCheck interface {
}

type ServiceHealthChecker struct {
balancer StatusSetter
balancer StatusHandler
info *runtime.ServiceInfo

config *dynamic.ServerHealthCheck
interval time.Duration
timeout time.Duration
recheck time.Duration

metrics metricsHealthCheck

client *http.Client
targets map[string]*url.URL
}

func NewServiceHealthChecker(ctx context.Context, metrics metricsHealthCheck, config *dynamic.ServerHealthCheck, service StatusSetter, info *runtime.ServiceInfo, transport http.RoundTripper, targets map[string]*url.URL) *ServiceHealthChecker {
func NewServiceHealthChecker(ctx context.Context, metrics metricsHealthCheck, config *dynamic.ServerHealthCheck, service StatusHandler, info *runtime.ServiceInfo, transport http.RoundTripper, targets map[string]*url.URL) *ServiceHealthChecker {
logger := log.Ctx(ctx)

interval := time.Duration(config.Interval)
Expand All @@ -69,6 +71,12 @@ func NewServiceHealthChecker(ctx context.Context, metrics metricsHealthCheck, co
timeout = time.Duration(dynamic.DefaultHealthCheckTimeout)
}

recheck := time.Duration(config.Recheck)
if recheck <= 0 {
logger.Error().Msg("Health check recheck interval smaller than zero")
recheck = time.Duration(dynamic.DefaultHealthCheckRecheck)
}

client := &http.Client{
Transport: transport,
}
Expand All @@ -85,6 +93,7 @@ func NewServiceHealthChecker(ctx context.Context, metrics metricsHealthCheck, co
config: config,
interval: interval,
timeout: timeout,
recheck: recheck,
targets: targets,
client: client,
metrics: metrics,
Expand Down Expand Up @@ -126,6 +135,8 @@ func (shc *ServiceHealthChecker) Launch(ctx context.Context) {
serverUpMetricValue = float64(0)
}

current := shc.balancer.GetStatus(ctx, proxyName)

shc.balancer.SetStatus(ctx, proxyName, up)

statusStr := runtime.StatusDown
Expand All @@ -138,6 +149,25 @@ func (shc *ServiceHealthChecker) Launch(ctx context.Context) {
shc.metrics.ServiceServerUpGauge().
With("service", proxyName, "url", target.String()).
Set(serverUpMetricValue)

if up && !current {
// service was down and is now up
ticker.Stop()
log.Ctx(ctx).Debug().
Str("targetURL", target.String()).
Msg("Health check succeeded. Resetting healthcheck interval")
ticker = time.NewTicker(shc.interval)
}

if !up && current {
// service was up and is now down
ticker.Stop()
log.Ctx(ctx).Debug().
Str("targetURL", target.String()).
Msg("Health check failed. Setting healthcheck interval to recheck interval")
ticker = time.NewTicker(shc.recheck)
}

}
}
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/server/service/loadbalancer/wrr/wrr.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ func (b *Balancer) Pop() interface{} {
return h
}

func (b *Balancer) GetStatus(ctx context.Context, childName string) bool {
b.handlersMu.RLock()
defer b.handlersMu.RUnlock()
// acquire read lock before reading value from status

_, ok := b.status[childName]
// service exists if key exists with childName on b.status
return ok
}

// SetStatus sets on the balancer that its given child is now of the given
// status. balancerName is only needed for logging purposes.
func (b *Balancer) SetStatus(ctx context.Context, childName string, up bool) {
Expand Down

0 comments on commit af3d0b9

Please sign in to comment.