Skip to content

Commit

Permalink
OCPBUGS-12475: ensure health check runs despite lost leadership
Browse files Browse the repository at this point in the history
This is done by adding the health check to the already existing
webserver, which is also serving metrics. This also updates library-go
to consume the new optional for adding health checks.
  • Loading branch information
tjungblu committed Apr 26, 2023
1 parent 836a8a4 commit 5bdb9ed
Show file tree
Hide file tree
Showing 186 changed files with 2,484 additions and 607 deletions.
4 changes: 2 additions & 2 deletions go.mod
Expand Up @@ -10,10 +10,10 @@ require (
github.com/google/uuid v1.1.2
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
github.com/mcuadros/go-version v0.0.0-20190830083331-035f6764e8d2
github.com/openshift/api v0.0.0-20230223193310-d964c7a58d75
github.com/openshift/api v0.0.0-20230424180646-e83c8e957ea4
github.com/openshift/build-machinery-go v0.0.0-20220913142420-e25cf57ea46d
github.com/openshift/client-go v0.0.0-20230120202327-72f107311084
github.com/openshift/library-go v0.0.0-20230308200407-f3277c772011
github.com/openshift/library-go v0.0.0-20230425205800-ab66adbd0bb5
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/common v0.37.0
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Expand Up @@ -479,12 +479,16 @@ github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa
github.com/onsi/gomega v1.23.0 h1:/oxKu9c2HVap+F3PfKort2Hw5DEU+HGlW8n+tguWsys=
github.com/openshift/api v0.0.0-20230223193310-d964c7a58d75 h1:OQJsfiach1cKBI1xUSNXKzuqi8nTpDRccR8gMGFkTIU=
github.com/openshift/api v0.0.0-20230223193310-d964c7a58d75/go.mod h1:ctXNyWanKEjGj8sss1KjjHQ3ENKFm33FFnS5BKaIPh4=
github.com/openshift/api v0.0.0-20230424180646-e83c8e957ea4 h1:orWeDcPL8qh07jyWAZXARZB+SGtZqjmjIbEh0CbKTZU=
github.com/openshift/api v0.0.0-20230424180646-e83c8e957ea4/go.mod h1:ctXNyWanKEjGj8sss1KjjHQ3ENKFm33FFnS5BKaIPh4=
github.com/openshift/build-machinery-go v0.0.0-20220913142420-e25cf57ea46d h1:RR4ah7FfaPR1WePizm0jlrsbmPu91xQZnAsVVreQV1k=
github.com/openshift/build-machinery-go v0.0.0-20220913142420-e25cf57ea46d/go.mod h1:b1BuldmJlbA/xYtdZvKi+7j5YGB44qJUJDZ9zwiNCfE=
github.com/openshift/client-go v0.0.0-20230120202327-72f107311084 h1:66uaqNwA+qYyQDwsMWUfjjau8ezmg1dzCqub13KZOcE=
github.com/openshift/client-go v0.0.0-20230120202327-72f107311084/go.mod h1:M3h9m001PWac3eAudGG3isUud6yBjr5XpzLYLLTlHKo=
github.com/openshift/library-go v0.0.0-20230308200407-f3277c772011 h1:RL6hf0cNc9uVZXQkU74a/J91XEo5iip2mWvJTwKgMg4=
github.com/openshift/library-go v0.0.0-20230308200407-f3277c772011/go.mod h1:OspkL5FZZapzNcka6UkNMFD7ifLT/dWUNvtwErpRK9k=
github.com/openshift/library-go v0.0.0-20230425205800-ab66adbd0bb5 h1:U+Cdda576x6c0s9PlTFf+JXSHUkPjg1u3Smb/piRTyc=
github.com/openshift/library-go v0.0.0-20230425205800-ab66adbd0bb5/go.mod h1:tUWJLc0m8/1GyMKXFKZMWWfaGtFhX1T6kdcGtiGtZIE=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
Expand Down
4 changes: 2 additions & 2 deletions manifests/0000_20_etcd-operator_06_deployment.yaml
Expand Up @@ -55,8 +55,8 @@ spec:
livenessProbe:
httpGet:
path: healthz
port: 8080
scheme: HTTP
port: 8443
scheme: HTTPS
timeoutSeconds: 30
periodSeconds: 60
successThreshold: 1
Expand Down
14 changes: 12 additions & 2 deletions pkg/cmd/operator/cmd.go
@@ -1,17 +1,27 @@
package operator

import (
"github.com/spf13/cobra"
"context"
"fmt"
"net/http"

"github.com/openshift/cluster-etcd-operator/pkg/operator"
"github.com/openshift/cluster-etcd-operator/pkg/version"
"github.com/openshift/library-go/pkg/controller/controllercmd"
"github.com/spf13/cobra"
"k8s.io/apiserver/pkg/server/healthz"
)

func NewOperator() *cobra.Command {
cmd := controllercmd.
NewControllerCommandConfig("openshift-cluster-etcd-operator", version.Get(), operator.RunOperator).
NewCommand()
WithHealthChecks(healthz.NamedCheck("controller-aliveness", func(_ *http.Request) error {
if !operator.AlivenessChecker.Alive() {
return fmt.Errorf("found unhealthy aliveness check, returning error")
}
return nil
})).
NewCommandWithContext(context.Background())
cmd.Use = "operator"
cmd.Short = "Start the Cluster etcd Operator"

Expand Down
51 changes: 13 additions & 38 deletions pkg/operator/starter.go
Expand Up @@ -3,7 +3,6 @@ package operator
import (
"context"
"fmt"
"net/http"
"os"
"regexp"
"time"
Expand Down Expand Up @@ -65,6 +64,8 @@ const masterMachineLabelSelectorString = "machine.openshift.io/cluster-api-machi
// masterNodeLabelSelectorString allows for getting only the master nodes, it matters in larger installations with many worker nodes
const masterNodeLabelSelectorString = "node-role.kubernetes.io/master"

var AlivenessChecker = health.NewMultiAlivenessChecker()

func RunOperator(ctx context.Context, controllerContext *controllercmd.ControllerContext) error {
// This kube client use protobuf, do not use it for CR
kubeClient, err := kubernetes.NewForConfig(controllerContext.ProtoKubeConfig)
Expand Down Expand Up @@ -147,8 +148,6 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
controllerContext.EventRecorder,
)

alivenessChecker := health.NewMultiAlivenessChecker()

staticResourceController := staticresourcecontroller.NewStaticResourceController(
"EtcdStaticResources",
etcd_assets.Asset,
Expand Down Expand Up @@ -179,7 +178,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
etcdClient)

targetConfigReconciler := targetconfigcontroller.NewTargetConfigController(
alivenessChecker,
AlivenessChecker,
os.Getenv("IMAGE"),
os.Getenv("OPERATOR_IMAGE"),
operatorClient,
Expand Down Expand Up @@ -291,7 +290,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
coreClient := clientset

etcdCertSignerController := etcdcertsigner.NewEtcdCertSignerController(
alivenessChecker,
AlivenessChecker,
coreClient,
operatorClient,
kubeInformersForNamespaces,
Expand All @@ -300,7 +299,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)

etcdEndpointsController := etcdendpointscontroller.NewEtcdEndpointsController(
alivenessChecker,
AlivenessChecker,
operatorClient,
etcdClient,
controllerContext.EventRecorder,
Expand All @@ -312,7 +311,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
machineAPI := ceohelpers.NewMachineAPI(masterMachineInformer, machinelistersv1beta1.NewMachineLister(masterMachineInformer.GetIndexer()), masterMachineLabelSelector)

clusterMemberController := clustermembercontroller.NewClusterMemberController(
alivenessChecker,
AlivenessChecker,
operatorClient,
machineAPI,
masterNodeInformer,
Expand All @@ -326,7 +325,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)

clusterMemberRemovalController := clustermemberremovalcontroller.NewClusterMemberRemovalController(
alivenessChecker,
AlivenessChecker,
operatorClient,
etcdClient,
machineAPI,
Expand All @@ -340,7 +339,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)

machineDeletionHooksController := machinedeletionhooks.NewMachineDeletionHooksController(
alivenessChecker,
AlivenessChecker,
operatorClient,
machineClient,
etcdClient,
Expand All @@ -352,14 +351,14 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
controllerContext.EventRecorder)

etcdMembersController := etcdmemberscontroller.NewEtcdMembersController(
alivenessChecker,
AlivenessChecker,
operatorClient,
etcdClient,
controllerContext.EventRecorder,
)

bootstrapTeardownController := bootstrapteardown.NewBootstrapTeardownController(
alivenessChecker,
AlivenessChecker,
operatorClient,
kubeInformersForNamespaces,
etcdClient,
Expand All @@ -368,7 +367,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)

scriptController := scriptcontroller.NewScriptControllerController(
alivenessChecker,
AlivenessChecker,
operatorClient,
kubeClient,
kubeInformersForNamespaces,
Expand All @@ -377,7 +376,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)

defragController := defragcontroller.NewDefragController(
alivenessChecker,
AlivenessChecker,
operatorClient,
etcdClient,
configInformers.Config().V1().Infrastructures().Lister(),
Expand All @@ -386,7 +385,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
)

upgradeBackupController := upgradebackupcontroller.NewUpgradeBackupController(
alivenessChecker,
AlivenessChecker,
operatorClient,
configClient.ConfigV1(),
kubeClient,
Expand Down Expand Up @@ -431,34 +430,10 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
go envVarController.Run(1, ctx.Done())
go staticPodControllers.Start(ctx)

err = runHealthzServer(alivenessChecker)
if err != nil {
return err
}

<-ctx.Done()
return nil
}

func runHealthzServer(alivenessChecker *health.MultiAlivenessChecker) error {
mux := http.NewServeMux()
mux.HandleFunc("/healthz", func(writer http.ResponseWriter, request *http.Request) {
writer.WriteHeader(http.StatusOK)
if !alivenessChecker.Alive() {
writer.WriteHeader(http.StatusServiceUnavailable)
}
})

addr := "0.0.0.0:8080"
klog.Infof("HealthZ is listening on %s", addr)
httpServer := &http.Server{
Addr: addr,
Handler: mux,
}

return httpServer.ListenAndServe()
}

// RevisionConfigMaps is a list of configmaps that are directly copied for the current values. A different actor/controller modifies these.
// the first element should be the configmap that contains the static pod manifest
var RevisionConfigMaps = []revision.RevisionResource{
Expand Down
2 changes: 2 additions & 0 deletions vendor/github.com/openshift/api/apiserver/.codegen.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions vendor/github.com/openshift/api/apps/v1/generated.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion vendor/github.com/openshift/api/apps/v1/types.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5bdb9ed

Please sign in to comment.