Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change GCE LB health check interval from 2s to 8s #70099

Merged
merged 1 commit into from
Oct 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions pkg/cloudprovider/providers/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ const (
maxTargetPoolCreateInstances = 200

// HTTP Load Balancer parameters
// Configure 2 second period for external health checks.
gceHcCheckIntervalSeconds = int64(2)
// Configure 8 second period for external health checks.
gceHcCheckIntervalSeconds = int64(8)
gceHcTimeoutSeconds = int64(1)
// Start sending requests as soon as a pod is found on the node.
gceHcHealthyThreshold = int64(1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be reducing the gceHcUnhealthyThreshold to 3, otherwise it will be very unresponsive.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

// Defaults to 5 * 2 = 10 seconds before the LB will steer traffic away
gceHcUnhealthyThreshold = int64(5)
// Defaults to 3 * 8 = 24 seconds before the LB will steer traffic away.
gceHcUnhealthyThreshold = int64(3)

gceComputeAPIEndpoint = "https://www.googleapis.com/compute/v1/"
gceComputeAPIEndpointBeta = "https://www.googleapis.com/compute/beta/"
Expand Down
49 changes: 45 additions & 4 deletions pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,14 @@ func (gce *GCECloud) ensureTargetPoolAndHealthCheck(tpExists, tpNeedsRecreation
return fmt.Errorf("failed to update target pool for load balancer (%s): %v", lbRefStr, err)
}
glog.Infof("ensureTargetPoolAndHealthCheck(%s): Updated target pool (with %d hosts).", lbRefStr, len(hosts))
if hcToCreate != nil {
if hc, err := gce.ensureHttpHealthCheck(hcToCreate.Name, hcToCreate.RequestPath, int32(hcToCreate.Port)); err != nil || hc == nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we probably want to add logic to keep higher values for the parameters on the healthcheck if the user has increased the value outside of Kubernetes. This gives the user a way out if their cluster is being impacted by the healthcheck volume.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logic added. Unit test also added to guard the logic.

return fmt.Errorf("Failed to ensure health check for %v port %d path %v: %v", loadBalancerName, hcToCreate.Port, hcToCreate.RequestPath, err)
}
}
} else {
// Panic worthy.
glog.Errorf("ensureTargetPoolAndHealthCheck(%s): target pool not exists and doesn't need to be created.", lbRefStr)
}
return nil
}
Expand Down Expand Up @@ -621,6 +629,37 @@ func makeHttpHealthCheck(name, path string, port int32) *compute.HttpHealthCheck
}
}

// mergeHttpHealthChecks reconciles HttpHealthCheck configures to be no smaller
// than the default values.
// E.g. old health check interval is 2s, new default is 8.
// The HC interval will be reconciled to 8 seconds.
// If the existing health check is larger than the default interval,
// the configuration will be kept.
func mergeHttpHealthChecks(hc, newHC *compute.HttpHealthCheck) *compute.HttpHealthCheck {
if hc.CheckIntervalSec > newHC.CheckIntervalSec {
grayluck marked this conversation as resolved.
Show resolved Hide resolved
newHC.CheckIntervalSec = hc.CheckIntervalSec
grayluck marked this conversation as resolved.
Show resolved Hide resolved
}
if hc.TimeoutSec > newHC.TimeoutSec {
newHC.TimeoutSec = hc.TimeoutSec
grayluck marked this conversation as resolved.
Show resolved Hide resolved
}
if hc.UnhealthyThreshold > newHC.UnhealthyThreshold {
newHC.UnhealthyThreshold = hc.UnhealthyThreshold
grayluck marked this conversation as resolved.
Show resolved Hide resolved
}
if hc.HealthyThreshold > newHC.HealthyThreshold {
newHC.HealthyThreshold = hc.HealthyThreshold
}
return newHC
}

// needToUpdateHttpHealthChecks checks whether the http healthcheck needs to be
// updated.
func needToUpdateHttpHealthChecks(hc, newHC *compute.HttpHealthCheck) bool {
changed := hc.Port != newHC.Port || hc.RequestPath != newHC.RequestPath || hc.Description != newHC.Description
changed = changed || hc.CheckIntervalSec < newHC.CheckIntervalSec || hc.TimeoutSec < newHC.TimeoutSec
changed = changed || hc.UnhealthyThreshold < newHC.UnhealthyThreshold || hc.HealthyThreshold < newHC.HealthyThreshold
return changed
}

func (gce *GCECloud) ensureHttpHealthCheck(name, path string, port int32) (hc *compute.HttpHealthCheck, err error) {
newHC := makeHttpHealthCheck(name, path, port)
hc, err = gce.GetHttpHealthCheck(name)
Expand All @@ -639,16 +678,18 @@ func (gce *GCECloud) ensureHttpHealthCheck(name, path string, port int32) (hc *c
}
// Validate health check fields
glog.V(4).Infof("Checking http health check params %s", name)
drift := hc.Port != int64(port) || hc.RequestPath != path || hc.Description != makeHealthCheckDescription(name)
drift = drift || hc.CheckIntervalSec != gceHcCheckIntervalSeconds || hc.TimeoutSec != gceHcTimeoutSeconds
drift = drift || hc.UnhealthyThreshold != gceHcUnhealthyThreshold || hc.HealthyThreshold != gceHcHealthyThreshold
if drift {
grayluck marked this conversation as resolved.
Show resolved Hide resolved
if needToUpdateHttpHealthChecks(hc, newHC) {
glog.Warningf("Health check %v exists but parameters have drifted - updating...", name)
newHC = mergeHttpHealthChecks(hc, newHC)
if err := gce.UpdateHttpHealthCheck(newHC); err != nil {
glog.Warningf("Failed to reconcile http health check %v parameters", name)
return nil, err
}
glog.V(4).Infof("Corrected health check %v parameters successful", name)
hc, err = gce.GetHttpHealthCheck(name)
if err != nil {
return nil, err
}
}
return hc, nil
}
Expand Down
151 changes: 151 additions & 0 deletions pkg/cloudprovider/providers/gce/gce_loadbalancer_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1032,3 +1032,154 @@ func TestEnsureExternalLoadBalancerErrors(t *testing.T) {
})
}
}

func TestExternalLoadBalancerEnsureHttpHealthCheck(t *testing.T) {
t.Parallel()

for _, tc := range []struct {
desc string
modifier func(*compute.HttpHealthCheck) *compute.HttpHealthCheck
wantEqual bool
}{
{"should ensure HC", func(_ *compute.HttpHealthCheck) *compute.HttpHealthCheck { return nil }, false},
{
"should reconcile HC interval",
func(hc *compute.HttpHealthCheck) *compute.HttpHealthCheck {
hc.CheckIntervalSec = gceHcCheckIntervalSeconds - 1
return hc
},
false,
},
{
"should allow HC to be configurable to bigger intervals",
func(hc *compute.HttpHealthCheck) *compute.HttpHealthCheck {
hc.CheckIntervalSec = gceHcCheckIntervalSeconds * 10
return hc
},
true,
},
{
"should allow HC to accept bigger intervals while applying default value to small thresholds",
func(hc *compute.HttpHealthCheck) *compute.HttpHealthCheck {
hc.CheckIntervalSec = gceHcCheckIntervalSeconds * 10
hc.UnhealthyThreshold = gceHcUnhealthyThreshold - 1
return hc
},
false,
},
} {
t.Run(tc.desc, func(t *testing.T) {

gce, err := fakeGCECloud(DefaultTestClusterValues())
require.NoError(t, err)
c := gce.c.(*cloud.MockGCE)
c.MockHttpHealthChecks.UpdateHook = func(ctx context.Context, key *meta.Key, obj *ga.HttpHealthCheck, m *cloud.MockHttpHealthChecks) error {
m.Objects[*key] = &cloud.MockHttpHealthChecksObj{Obj: obj}
return nil
}

hcName, hcPath, hcPort := "test-hc", "/healthz", int32(12345)
existingHC := makeHttpHealthCheck(hcName, hcPath, hcPort)
existingHC = tc.modifier(existingHC)
if existingHC != nil {
if err := gce.CreateHttpHealthCheck(existingHC); err != nil {
t.Fatalf("gce.CreateHttpHealthCheck(%#v) = %v; want err = nil", existingHC, err)
}
}
if _, err := gce.ensureHttpHealthCheck(hcName, hcPath, hcPort); err != nil {
t.Fatalf("gce.ensureHttpHealthCheck(%q, %q, %v) = _, %d; want err = nil", hcName, hcPath, hcPort, err)
}
if hc, err := gce.GetHttpHealthCheck(hcName); err != nil {
t.Fatalf("gce.GetHttpHealthCheck(%q) = _, %d; want err = nil", hcName, err)
} else {
if tc.wantEqual {
assert.Equal(t, hc, existingHC)
} else {
assert.NotEqual(t, hc, existingHC)
}
}
})
}

grayluck marked this conversation as resolved.
Show resolved Hide resolved
}

func TestMergeHttpHealthChecks(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
desc string
checkIntervalSec int64
timeoutSec int64
healthyThreshold int64
unhealthyThreshold int64
wantCheckIntervalSec int64
wantTimeoutSec int64
wantHealthyThreshold int64
wantUnhealthyThreshold int64
}{
{"unchanged", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"interval - too small - should reconcile", gceHcCheckIntervalSeconds - 1, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"timeout - too small - should reconcile", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds - 1, gceHcHealthyThreshold, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"healthy threshold - too small - should reconcile", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold - 1, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"unhealthy threshold - too small - should reconcile", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold - 1, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"interval - user configured - should keep", gceHcCheckIntervalSeconds + 1, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds + 1, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"timeout - user configured - should keep", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds + 1, gceHcHealthyThreshold, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds + 1, gceHcHealthyThreshold, gceHcUnhealthyThreshold},
{"healthy threshold - user configured - should keep", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold + 1, gceHcUnhealthyThreshold, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold + 1, gceHcUnhealthyThreshold},
{"unhealthy threshold - user configured - should keep", gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold + 1, gceHcCheckIntervalSeconds, gceHcTimeoutSeconds, gceHcHealthyThreshold, gceHcUnhealthyThreshold + 1},
} {
t.Run(tc.desc, func(t *testing.T) {
wantHC := makeHttpHealthCheck("hc", "/", 12345)
hc := &compute.HttpHealthCheck{
CheckIntervalSec: tc.checkIntervalSec,
TimeoutSec: tc.timeoutSec,
HealthyThreshold: tc.healthyThreshold,
UnhealthyThreshold: tc.unhealthyThreshold,
}
mergeHttpHealthChecks(hc, wantHC)
if wantHC.CheckIntervalSec != tc.wantCheckIntervalSec {
t.Errorf("wantHC.CheckIntervalSec = %d; want %d", wantHC.CheckIntervalSec, tc.checkIntervalSec)
}
if wantHC.TimeoutSec != tc.wantTimeoutSec {
t.Errorf("wantHC.TimeoutSec = %d; want %d", wantHC.TimeoutSec, tc.timeoutSec)
}
if wantHC.HealthyThreshold != tc.wantHealthyThreshold {
t.Errorf("wantHC.HealthyThreshold = %d; want %d", wantHC.HealthyThreshold, tc.healthyThreshold)
}
if wantHC.UnhealthyThreshold != tc.wantUnhealthyThreshold {
t.Errorf("wantHC.UnhealthyThreshold = %d; want %d", wantHC.UnhealthyThreshold, tc.unhealthyThreshold)
}
})
}
}

func TestNeedToUpdateHttpHealthChecks(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
desc string
modifier func(*compute.HttpHealthCheck)
wantChanged bool
}{
{"unchanged", nil, false},
{"desc does not match", func(hc *compute.HttpHealthCheck) { hc.Description = "bad-desc" }, true},
{"port does not match", func(hc *compute.HttpHealthCheck) { hc.Port = 54321 }, true},
{"requestPath does not match", func(hc *compute.HttpHealthCheck) { hc.RequestPath = "/anotherone" }, true},
{"interval needs update", func(hc *compute.HttpHealthCheck) { hc.CheckIntervalSec = gceHcCheckIntervalSeconds - 1 }, true},
{"timeout needs update", func(hc *compute.HttpHealthCheck) { hc.TimeoutSec = gceHcTimeoutSeconds - 1 }, true},
{"healthy threshold needs update", func(hc *compute.HttpHealthCheck) { hc.HealthyThreshold = gceHcHealthyThreshold - 1 }, true},
{"unhealthy threshold needs update", func(hc *compute.HttpHealthCheck) { hc.UnhealthyThreshold = gceHcUnhealthyThreshold - 1 }, true},
{"interval does not need update", func(hc *compute.HttpHealthCheck) { hc.CheckIntervalSec = gceHcCheckIntervalSeconds + 1 }, false},
{"timeout does not need update", func(hc *compute.HttpHealthCheck) { hc.TimeoutSec = gceHcTimeoutSeconds + 1 }, false},
{"healthy threshold does not need update", func(hc *compute.HttpHealthCheck) { hc.HealthyThreshold = gceHcHealthyThreshold + 1 }, false},
{"unhealthy threshold does not need update", func(hc *compute.HttpHealthCheck) { hc.UnhealthyThreshold = gceHcUnhealthyThreshold + 1 }, false},
} {
t.Run(tc.desc, func(t *testing.T) {
hc := makeHttpHealthCheck("hc", "/", 12345)
wantHC := makeHttpHealthCheck("hc", "/", 12345)
if tc.modifier != nil {
tc.modifier(hc)
}
if gotChanged := needToUpdateHttpHealthChecks(hc, wantHC); gotChanged != tc.wantChanged {
t.Errorf("needToUpdateHttpHealthChecks(%#v, %#v) = %t; want changed = %t", hc, wantHC, gotChanged, tc.wantChanged)
}
})
}
}
61 changes: 43 additions & 18 deletions pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,16 +405,19 @@ func (gce *GCECloud) ensureInternalHealthCheck(name string, svcName types.Namesp
return hc, nil
}

if healthChecksEqual(expectedHC, hc) {
return hc, nil
}

glog.V(2).Infof("ensureInternalHealthCheck: health check %v exists but parameters have drifted - updating...", name)
if err := gce.UpdateHealthCheck(expectedHC); err != nil {
glog.Warningf("Failed to reconcile http health check %v parameters", name)
return nil, err
if needToUpdateHealthChecks(hc, expectedHC) {
glog.V(2).Infof("ensureInternalHealthCheck: health check %v exists but parameters have drifted - updating...", name)
expectedHC = mergeHealthChecks(hc, expectedHC)
if err := gce.UpdateHealthCheck(expectedHC); err != nil {
glog.Warningf("Failed to reconcile http health check %v parameters", name)
return nil, err
}
glog.V(2).Infof("ensureInternalHealthCheck: corrected health check %v parameters successful", name)
hc, err = gce.GetHealthCheck(name)
if err != nil {
return nil, err
}
}
glog.V(2).Infof("ensureInternalHealthCheck: corrected health check %v parameters successful", name)
return hc, nil
}

Expand Down Expand Up @@ -620,15 +623,37 @@ func firewallRuleEqual(a, b *compute.Firewall) bool {
equalStringSets(a.TargetTags, b.TargetTags)
}

func healthChecksEqual(a, b *compute.HealthCheck) bool {
return a.HttpHealthCheck != nil && b.HttpHealthCheck != nil &&
a.HttpHealthCheck.Port == b.HttpHealthCheck.Port &&
a.HttpHealthCheck.RequestPath == b.HttpHealthCheck.RequestPath &&
a.Description == b.Description &&
a.CheckIntervalSec == b.CheckIntervalSec &&
a.TimeoutSec == b.TimeoutSec &&
a.UnhealthyThreshold == b.UnhealthyThreshold &&
a.HealthyThreshold == b.HealthyThreshold
// mergeHealthChecks reconciles HealthCheck configures to be no smaller than
// the default values.
// E.g. old health check interval is 2s, new default is 8.
// The HC interval will be reconciled to 8 seconds.
// If the existing health check is larger than the default interval,
// the configuration will be kept.
func mergeHealthChecks(hc, newHC *compute.HealthCheck) *compute.HealthCheck {
if hc.CheckIntervalSec > newHC.CheckIntervalSec {
newHC.CheckIntervalSec = hc.CheckIntervalSec
}
if hc.TimeoutSec > newHC.TimeoutSec {
newHC.TimeoutSec = hc.TimeoutSec
}
if hc.UnhealthyThreshold > newHC.UnhealthyThreshold {
newHC.UnhealthyThreshold = hc.UnhealthyThreshold
}
if hc.HealthyThreshold > newHC.HealthyThreshold {
newHC.HealthyThreshold = hc.HealthyThreshold
}
return newHC
}

// needToUpdateHealthChecks checks whether the healthcheck needs to be updated.
func needToUpdateHealthChecks(hc, newHC *compute.HealthCheck) bool {
if hc.HttpHealthCheck == nil || newHC.HttpHealthCheck == nil {
return true
}
changed := hc.HttpHealthCheck.Port != newHC.HttpHealthCheck.Port || hc.HttpHealthCheck.RequestPath != newHC.HttpHealthCheck.RequestPath || hc.Description != newHC.Description
changed = changed || hc.CheckIntervalSec < newHC.CheckIntervalSec || hc.TimeoutSec < newHC.TimeoutSec
changed = changed || hc.UnhealthyThreshold < newHC.UnhealthyThreshold || hc.HealthyThreshold < newHC.HealthyThreshold
return changed
}

// backendsListEqual asserts that backend lists are equal by instance group link only
Expand Down