Skip to content

Commit

Permalink
Merge pull request #344 from mfojtik/worker-controller
Browse files Browse the repository at this point in the history
Bug 1878776: ingressnodesavailable: add controller that checks if router can schedule pods
  • Loading branch information
openshift-merge-robot committed Sep 14, 2020
2 parents f5d5b54 + 88e6eba commit ee0dce6
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 0 deletions.
19 changes: 19 additions & 0 deletions pkg/controllers/ingressnodesavailable/OWNERS
@@ -0,0 +1,19 @@
approvers:
- ironcladlou
- knobunc
- pravisankar
- ramr
- Miciah
- frobware
- danehans
- sgreene570
reviewers:
- ironcladlou
- knobunc
- pravisankar
- ramr
- Miciah
- frobware
- danehans
- sgreene570
component: Routing
@@ -0,0 +1,107 @@
package ingressnodesavailable

import (
"context"
"fmt"
"time"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
corev1informers "k8s.io/client-go/informers/core/v1"
corev1listers "k8s.io/client-go/listers/core/v1"

operatorv1 "github.com/openshift/api/operator/v1"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/common"
"github.com/openshift/library-go/pkg/controller/factory"
"github.com/openshift/library-go/pkg/operator/events"
"github.com/openshift/library-go/pkg/operator/v1helpers"
)

var knownConditionNames = sets.NewString(
"ReadyIngressNodesAvailable",
)

// ingressNodesAvailableController validates that router certs match the ingress domain
type ingressNodesAvailableController struct {
operatorClient v1helpers.OperatorClient
nodeLister corev1listers.NodeLister
}

func NewIngressNodesAvailableController(
operatorClient v1helpers.OperatorClient,
eventRecorder events.Recorder,
nodeInformer corev1informers.NodeInformer,
) factory.Controller {
controller := &ingressNodesAvailableController{
operatorClient: operatorClient,
nodeLister: nodeInformer.Lister(),
}

return factory.New().
WithInformers(
operatorClient.Informer(),
nodeInformer.Informer(),
).
WithSync(controller.sync).
ResyncEvery(1*time.Minute).
ToController("IngressNodesAvailableController", eventRecorder)
}

func countReadyWorkerNodes(nodes []*corev1.Node) int {
readyNodes := 0
for _, n := range nodes {
for _, c := range n.Status.Conditions {
if c.Type == "Ready" && c.Status == "True" {
readyNodes++
}
}
}
return readyNodes
}

func (c *ingressNodesAvailableController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
foundConditions := []operatorv1.OperatorCondition{}

workers, err := c.nodeLister.List(labels.SelectorFromSet(labels.Set{"node-role.kubernetes.io/worker": ""}))
if err != nil {
return err
}

// this is best-effort, workers can be tainted and not schedulable, which will result in auth operator to fail
// as the router need schedulable worker nodes.
workloadReadyNodes := countReadyWorkerNodes(workers)

// we don't have any worker nodes schedulable, but we can run clusters that have master nodes schedulable, so we need to check that
// before going available==false
var masters []*corev1.Node
masters, err = c.nodeLister.List(labels.SelectorFromSet(labels.Set{"node-role.kubernetes.io/master": ""}))
if err != nil {
return err
}
for _, n := range masters {
isSchedulable := true
for _, t := range n.Spec.Taints {
if t.Effect == "NoSchedule" && t.Key == "node-role.kubernetes.io/master" {
isSchedulable = false
break
}
}
// only count masters that can schedule workloads (eg. ingress pods)
if isSchedulable {
workloadReadyNodes++
}
}

if workloadReadyNodes == 0 {
foundConditions = append(foundConditions, operatorv1.OperatorCondition{
Type: "ReadyIngressNodesAvailable",
Status: operatorv1.ConditionFalse,
Reason: "NoReadyIngressNodes",
Message: fmt.Sprintf("Authentication require functional ingress which requires at least one schedulable and ready node. Got %d worker nodes and %d master nodes (none are schedulable or ready for ingress pods).",
len(workers), len(masters)),
})
}

return common.UpdateControllerConditions(c.operatorClient, knownConditionNames, foundConditions)
}
13 changes: 13 additions & 0 deletions pkg/operator/starter.go
Expand Up @@ -11,6 +11,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/klog"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
Expand Down Expand Up @@ -51,6 +52,7 @@ import (
"github.com/openshift/cluster-authentication-operator/pkg/controllers/configobservation/configobservercontroller"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/deployment"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/endpointaccessible"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/ingressnodesavailable"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/ingressstate"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/metadata"
"github.com/openshift/cluster-authentication-operator/pkg/controllers/payload"
Expand Down Expand Up @@ -184,6 +186,9 @@ func prepareOauthOperator(controllerContext *controllercmd.ControllerContext, op

openshiftAuthenticationInformers := operatorCtx.kubeInformersForNamespaces.InformersFor("openshift-authentication")
kubeSystemNamespaceInformers := operatorCtx.kubeInformersForNamespaces.InformersFor("kube-system")

kubeInformers := informers.NewSharedInformerFactory(operatorCtx.kubeClient, 10*time.Minute)

routeInformersNamespaced := routeinformer.NewSharedInformerFactoryWithOptions(routeClient, resync,
routeinformer.WithNamespace("openshift-authentication"),
routeinformer.WithTweakListOptions(singleNameListOptions("oauth-openshift")),
Expand Down Expand Up @@ -360,6 +365,12 @@ func prepareOauthOperator(controllerContext *controllercmd.ControllerContext, op
controllerContext.EventRecorder,
)

workersAvailableController := ingressnodesavailable.NewIngressNodesAvailableController(
operatorCtx.operatorClient,
controllerContext.EventRecorder,
kubeInformers.Core().V1().Nodes(),
)

authRouteCheckController := endpointaccessible.NewOAuthRouteCheckController(
operatorCtx.operatorClient,
routeInformersNamespaced.Route().V1().Routes(),
Expand All @@ -385,6 +396,7 @@ func prepareOauthOperator(controllerContext *controllercmd.ControllerContext, op
operatorCtx.informersToRunFunc = append(operatorCtx.informersToRunFunc,
routeInformersNamespaced.Start,
kubeSystemNamespaceInformers.Start,
kubeInformers.Start,
openshiftAuthenticationInformers.Start,
)

Expand All @@ -403,6 +415,7 @@ func prepareOauthOperator(controllerContext *controllercmd.ControllerContext, op
authRouteCheckController.Run,
authServiceCheckController.Run,
authServiceEndpointCheckController.Run,
workersAvailableController.Run,
func(ctx context.Context, workers int) { staleConditions.Run(ctx, workers) },
func(ctx context.Context, workers int) { ingressStateController.Run(ctx, workers) },
)
Expand Down

0 comments on commit ee0dce6

Please sign in to comment.