diff --git a/pkg/operator/staticpod/controller/installer/installer_controller.go b/pkg/operator/staticpod/controller/installer/installer_controller.go index 3a932df10d..e2f66e2290 100644 --- a/pkg/operator/staticpod/controller/installer/installer_controller.go +++ b/pkg/operator/staticpod/controller/installer/installer_controller.go @@ -98,6 +98,8 @@ type InstallerController struct { clock clock.Clock installerBackOff func(count int) time.Duration fallbackBackOff func(count int) time.Duration + + installPrecondition StaticPodInstallerPreconditionsFuncType } // InstallerPodMutationFunc is a function that has a chance at changing the installer pod before it is created @@ -128,6 +130,14 @@ func (c *InstallerController) WithStartupMonitorSupport(startupMonitorEnabled fu return c } +// StaticPodInstallerPreconditionsFuncType checks if installPrecondition is met (is true) and then proceeeds with creation of installer pod +type StaticPodInstallerPreconditionsFuncType func(ctx context.Context) (bool, error) + +func (c *InstallerController) WithInstallPrecondition(installPrecondition StaticPodInstallerPreconditionsFuncType) *InstallerController { + c.installPrecondition = installPrecondition + return c +} + // staticPodState is the status of a static pod that has been installed to a node. type staticPodState int @@ -478,7 +488,8 @@ func (c *InstallerController) manageInstallationPods(ctx context.Context, operat } } - if err := c.ensureInstallerPod(ctx, operatorSpec, currNodeState); err != nil { + requeue, err := c.ensureInstallerPod(ctx, operatorSpec, currNodeState) + if err != nil { c.eventRecorder.Warningf("InstallerPodFailed", "Failed to create installer pod for revision %d count %d on node %q: %v", currNodeState.TargetRevision, currNodeState.LastFailedCount, currNodeState.NodeName, err) // if a newer revision is pending, continue, so we retry later with the latest available revision @@ -486,6 +497,10 @@ func (c *InstallerController) manageInstallationPods(ctx context.Context, operat return true, 0, err } } + if requeue { + klog.V(4).Infof("Requeuing the creation of installer pod for revision %d on node %q", currNodeState.TargetRevision, currNodeState.NodeName) + return true, 0, nil + } } newCurrNodeState, _, reason, err := c.newNodeStateForInstallInProgress(ctx, currNodeState, operatorStatus.LatestAvailableRevision) @@ -851,7 +866,27 @@ func getInstallerPodName(ns *operatorv1.NodeStatus) string { } // ensureInstallerPod creates the installer pod with the secrets required to if it does not exist already -func (c *InstallerController) ensureInstallerPod(ctx context.Context, operatorSpec *operatorv1.StaticPodOperatorSpec, ns *operatorv1.NodeStatus) error { +// returns whether or not to requeue and if an error happened while ensuring the installer pod +func (c *InstallerController) ensureInstallerPod(ctx context.Context, operatorSpec *operatorv1.StaticPodOperatorSpec, ns *operatorv1.NodeStatus) (bool, error) { + // checks if a new installer pod should be created based on the preconditions being met + // preconditions are only evaluated when the installer pod isn't already present + installerPodName := getInstallerPodName(ns) + _, err := c.podsGetter.Pods(c.targetNamespace).Get(ctx, installerPodName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + if c.installPrecondition != nil { + shouldInstall, err := c.installPrecondition(ctx) + if err != nil { + return true, err + } + if !shouldInstall { + klog.Infof("Preconditions not met, skipping the creation of installer pod %s", installerPodName) + return true, nil + } + } + } else if err != nil { + return true, err + } + pod := resourceread.ReadPodV1OrDie(podTemplate) pod.Namespace = c.targetNamespace @@ -862,19 +897,19 @@ func (c *InstallerController) ensureInstallerPod(ctx context.Context, operatorSp ownerRefs, err := c.ownerRefsFn(ctx, ns.TargetRevision) if err != nil { - return fmt.Errorf("unable to set installer pod ownerrefs: %+v", err) + return true, fmt.Errorf("unable to set installer pod ownerrefs: %+v", err) } pod.OwnerReferences = ownerRefs if c.configMaps[0].Optional { - return fmt.Errorf("pod configmap %s is required, cannot be optional", c.configMaps[0].Name) + return true, fmt.Errorf("pod configmap %s is required, cannot be optional", c.configMaps[0].Name) } // if the startup monitor is enabled we need to acquire an exclusive lock // to coordinate the work between the installer and the monitor withStartupMonitorSupport, err := c.startupMonitorEnabled() if err != nil { - return fmt.Errorf("unable to determine if the startup monitor should be enabled: %v", err) + return true, fmt.Errorf("unable to determine if the startup monitor should be enabled: %v", err) } args := []string{ @@ -926,12 +961,14 @@ func (c *InstallerController) ensureInstallerPod(ctx context.Context, operatorSp // Some owners need to change aspects of the pod. Things like arguments for instance for _, fn := range c.installerPodMutationFns { if err := fn(pod, ns.NodeName, operatorSpec, ns.TargetRevision); err != nil { - return err + return true, err } } - _, _, err = resourceapply.ApplyPod(ctx, c.podsGetter, c.eventRecorder, pod) - return err + if _, _, err = resourceapply.ApplyPod(ctx, c.podsGetter, c.eventRecorder, pod); err != nil { + return true, err + } + return false, nil } func (c *InstallerController) setOwnerRefs(ctx context.Context, revision int32) ([]metav1.OwnerReference, error) { diff --git a/pkg/operator/staticpod/controller/installer/installer_controller_test.go b/pkg/operator/staticpod/controller/installer/installer_controller_test.go index 45eb711040..935dad56db 100644 --- a/pkg/operator/staticpod/controller/installer/installer_controller_test.go +++ b/pkg/operator/staticpod/controller/installer/installer_controller_test.go @@ -20,6 +20,7 @@ import ( "github.com/openshift/library-go/pkg/operator/v1helpers" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -1268,7 +1269,7 @@ func TestEnsureInstallerPod(t *testing.T) { c.ownerRefsFn = func(ctx context.Context, revision int32) ([]metav1.OwnerReference, error) { return []metav1.OwnerReference{}, nil } - err := c.ensureInstallerPod(context.TODO(), &operatorv1.StaticPodOperatorSpec{}, &operatorv1.NodeStatus{ + _, err := c.ensureInstallerPod(context.TODO(), &operatorv1.StaticPodOperatorSpec{}, &operatorv1.NodeStatus{ NodeName: "test-node-1", TargetRevision: 1, }) @@ -2062,15 +2063,16 @@ func TestCreateInstallerPodMultiNode(t *testing.T) { func TestInstallerController_manageInstallationPods(t *testing.T) { type fields struct { - targetNamespace string - staticPodName string - configMaps []revision.RevisionResource - secrets []revision.RevisionResource - command []string - operatorConfigClient v1helpers.StaticPodOperatorClient - kubeClient kubernetes.Interface - eventRecorder events.Recorder - installerPodImageFn func() string + targetNamespace string + staticPodName string + configMaps []revision.RevisionResource + secrets []revision.RevisionResource + command []string + operatorConfigClient v1helpers.StaticPodOperatorClient + kubeClient kubernetes.Interface + eventRecorder events.Recorder + installerPodImageFn func() string + installerPrecondition StaticPodInstallerPreconditionsFuncType } type args struct { operatorSpec *operatorv1.StaticPodOperatorSpec @@ -2078,35 +2080,223 @@ func TestInstallerController_manageInstallationPods(t *testing.T) { resourceVersion string } tests := []struct { - name string - fields fields - args args - want bool - wantErr bool + name string + fields fields + args args + staticPods []*corev1.Pod + wantNodeStatus []operatorv1.NodeStatus + wantRequeue bool + wantErr bool }{ - // TODO: Add test cases. + { + name: "if the precondition is false and a new revision is to be rolled out, requeue the creation of installer pod", + fields: fields{ + targetNamespace: "test", + staticPodName: "test-pod", + command: []string{}, + eventRecorder: eventstesting.NewTestingEventRecorder(t), + installerPrecondition: func(ctx context.Context) (bool, error) { + return false, nil + }, + }, + args: args{ + operatorSpec: &operatorv1.StaticPodOperatorSpec{ + OperatorSpec: operatorv1.OperatorSpec{}, + }, + originalOperatorStatus: &operatorv1.StaticPodOperatorStatus{ + LatestAvailableRevision: 4, + NodeStatuses: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 4, + }, + { + NodeName: "test-node-1", + CurrentRevision: 3, + TargetRevision: 4, + }, + }, + }, + }, + staticPods: []*corev1.Pod{ + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-0"), 4, corev1.PodRunning, true), + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-1"), 4, corev1.PodRunning, true), + }, + wantNodeStatus: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 4, + }, + { + NodeName: "test-node-1", + CurrentRevision: 3, + TargetRevision: 4, + }, + }, + wantRequeue: true, + wantErr: false, + }, + { + name: "if the precondition is false and no new revisions are to be rolled out, no requeue of the installer pod", + fields: fields{ + targetNamespace: "test", + staticPodName: "test-pod", + command: []string{}, + installerPrecondition: func(ctx context.Context) (bool, error) { + return false, nil + }, + }, + args: args{ + operatorSpec: &operatorv1.StaticPodOperatorSpec{ + OperatorSpec: operatorv1.OperatorSpec{}, + }, + originalOperatorStatus: &operatorv1.StaticPodOperatorStatus{ + LatestAvailableRevision: 3, + NodeStatuses: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 3, + }, + { + NodeName: "test-node-1", + CurrentRevision: 3, + }, + }, + }, + }, + staticPods: []*corev1.Pod{ + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-0"), 4, corev1.PodRunning, true), + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-1"), 4, corev1.PodRunning, true), + }, + wantNodeStatus: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 3, + }, + { + NodeName: "test-node-1", + CurrentRevision: 3, + }, + }, + wantRequeue: false, + wantErr: false, + }, + { + name: "if the precondition is true and a new revision is to be rolled out, no requeue of the installer pod", + fields: fields{ + targetNamespace: "test", + staticPodName: "test-pod", + configMaps: []revision.RevisionResource{{Name: "test-config"}}, + secrets: []revision.RevisionResource{{Name: "test-secret"}}, + command: []string{"/bin/true"}, + installerPodImageFn: func() string { + return "docker.io/foo/bar" + }, + installerPrecondition: func(ctx context.Context) (bool, error) { + return true, nil + }, + }, + args: args{ + operatorSpec: &operatorv1.StaticPodOperatorSpec{ + OperatorSpec: operatorv1.OperatorSpec{}, + }, + originalOperatorStatus: &operatorv1.StaticPodOperatorStatus{ + LatestAvailableRevision: 4, + NodeStatuses: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 3, + TargetRevision: 4, + }, + { + NodeName: "test-node-1", + CurrentRevision: 3, + }, + { + NodeName: "test-node-2", + CurrentRevision: 3, + }, + }, + }, + }, + staticPods: []*corev1.Pod{ + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-0"), 4, corev1.PodRunning, true), + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-1"), 4, corev1.PodRunning, true), + newStaticPod(mirrorPodNameForNode("test-pod", "test-node-2"), 4, corev1.PodRunning, true), + }, + wantNodeStatus: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 4, + }, + { + NodeName: "test-node-1", + CurrentRevision: 3, + }, + { + NodeName: "test-node-2", + CurrentRevision: 3, + }, + }, + wantRequeue: false, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + kubeClient := fake.NewSimpleClientset( + &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: tt.fields.targetNamespace, Name: "test-config"}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Namespace: tt.fields.targetNamespace, Name: "test-secret"}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Namespace: tt.fields.targetNamespace, Name: fmt.Sprintf("%s-%d", "test-secret", 1)}}, + &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: tt.fields.targetNamespace, Name: fmt.Sprintf("%s-%d", "test-config", 1)}}, + ) + + kubeClient.PrependReactor("create", "pods", func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { + installerPod := action.(ktesting.CreateAction).GetObject().(*corev1.Pod) + installerPod.Status.Phase = corev1.PodSucceeded + return false, nil, nil + }) + + for _, pod := range tt.staticPods { + if _, err := kubeClient.CoreV1().Pods(tt.fields.targetNamespace).Create(context.TODO(), pod, metav1.CreateOptions{}); err != nil { + t.Fatal(err) + } + } + eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events(tt.fields.targetNamespace), "test-operator", &corev1.ObjectReference{}) + c := &InstallerController{ targetNamespace: tt.fields.targetNamespace, staticPodName: tt.fields.staticPodName, configMaps: tt.fields.configMaps, secrets: tt.fields.secrets, command: tt.fields.command, - operatorClient: tt.fields.operatorConfigClient, - configMapsGetter: tt.fields.kubeClient.CoreV1(), - podsGetter: tt.fields.kubeClient.CoreV1(), - eventRecorder: tt.fields.eventRecorder, + operatorClient: v1helpers.NewFakeStaticPodOperatorClient(tt.args.operatorSpec, tt.args.originalOperatorStatus, nil, nil), + configMapsGetter: kubeClient.CoreV1(), + podsGetter: kubeClient.CoreV1(), + secretsGetter: kubeClient.CoreV1(), + eventRecorder: eventRecorder, installerPodImageFn: tt.fields.installerPodImageFn, + installPrecondition: tt.fields.installerPrecondition, + } + c.ownerRefsFn = func(ctx context.Context, revision int32) ([]metav1.OwnerReference, error) { + return []metav1.OwnerReference{}, nil + } + c.startupMonitorEnabled = func() (bool, error) { + return false, nil } + got, _, err := c.manageInstallationPods(context.TODO(), tt.args.operatorSpec, tt.args.originalOperatorStatus) if (err != nil) != tt.wantErr { t.Errorf("InstallerController.manageInstallationPods() error = %v, wantErr %v", err, tt.wantErr) return } - if got != tt.want { - t.Errorf("InstallerController.manageInstallationPods() = %v, want %v", got, tt.want) + if got != tt.wantRequeue { + t.Errorf("InstallerController.manageInstallationPods() = %v, wantRequeue %v", got, tt.wantRequeue) + } + + _, currOperatorStatus, _, _ := c.operatorClient.GetStaticPodOperatorState() + if !reflect.DeepEqual(currOperatorStatus.NodeStatuses, tt.wantNodeStatus) { + t.Errorf("currOperatorStatus.NodeStatuses = %v, wantOperatorStatus = %v ", currOperatorStatus.NodeStatuses, tt.wantNodeStatus) } }) } @@ -2601,6 +2791,129 @@ func TestTimeToWait(t *testing.T) { } } +// Test to ensure node statuses are updated for ongoing installations +func TestInstallerController_TestNodeStatuses_ForOngoingInstallations(t *testing.T) { + + kubeClient := fake.NewSimpleClientset( + &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: "test-config"}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: "test-secret"}}, + &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: fmt.Sprintf("%s-%d", "test-secret", 1)}}, + &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: fmt.Sprintf("%s-%d", "test-config", 1)}}, + ) + + kubeClient.PrependReactor("create", "pods", func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { + installerPod := action.(ktesting.CreateAction).GetObject().(*corev1.Pod) + installerPod.Status.Phase = corev1.PodRunning //the created installer pod is in progress + return false, installerPod, nil + }) + + operatorSpec := &operatorv1.StaticPodOperatorSpec{ + OperatorSpec: operatorv1.OperatorSpec{ + ManagementState: operatorv1.Managed, + }, + } + + // the node needs to be rolled out from revision 0 to 1 + originalOperatorStatus := &operatorv1.StaticPodOperatorStatus{ + LatestAvailableRevision: 1, + NodeStatuses: []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 0, + TargetRevision: 1, + }, + }, + } + + expectedNodeStatus := []operatorv1.NodeStatus{ + { + NodeName: "test-node-0", + CurrentRevision: 1, + }, + } + + staticPod := newStaticPod(mirrorPodNameForNode("test-pod", "test-node-0"), 1, corev1.PodRunning, true) + if _, err := kubeClient.CoreV1().Pods("test").Create(context.TODO(), staticPod, metav1.CreateOptions{}); err != nil { + t.Fatal(err) + } + + eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events("test"), "test-operator", &corev1.ObjectReference{}) + c := &InstallerController{ + targetNamespace: "test", + staticPodName: "test-pod", + configMaps: []revision.RevisionResource{{Name: "test-config"}}, + secrets: []revision.RevisionResource{{Name: "test-secret"}}, + command: []string{"/bin/true"}, + operatorClient: v1helpers.NewFakeStaticPodOperatorClient(operatorSpec, originalOperatorStatus, nil, nil), + configMapsGetter: kubeClient.CoreV1(), + secretsGetter: kubeClient.CoreV1(), + podsGetter: kubeClient.CoreV1(), + eventRecorder: eventRecorder, + installerPodImageFn: func() string { return "docker.io/foo/bar" }, + } + c.ownerRefsFn = func(ctx context.Context, revision int32) ([]metav1.OwnerReference, error) { + return []metav1.OwnerReference{}, nil + } + c.startupMonitorEnabled = func() (bool, error) { + return false, nil + } + + //t0: precondition is true and a new revision is to be rolled out. Then a new installer pod should be created and hence no requeue + c.installPrecondition = func(ctx context.Context) (bool, error) { + return true, nil + } + + requeue, _, err := c.manageInstallationPods(context.TODO(), operatorSpec, originalOperatorStatus) + if err != nil { + t.Fatal(err) + } + if requeue { + t.Errorf("The installer pod shouldn't be requeued as the precondition is false") + } + + // Since the installer pod is now in progress the originalOperatorStatus should be same + currOperatorSpec, currOperatorStatus, _, _ := c.operatorClient.GetStaticPodOperatorState() + if !reflect.DeepEqual(originalOperatorStatus, currOperatorStatus) { + t.Errorf("orignalOperatorStatus %v shouldn't be changed to %v as the installation is still in progress ", originalOperatorStatus, currOperatorStatus) + } + + // Verify the creation of new installer pod which is in progress + installerPodName := "installer-1-test-node-0" + installerPod, err := kubeClient.CoreV1().Pods(c.targetNamespace).Get(context.TODO(), installerPodName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + t.Fatalf("installer pod %s is not created as expected", installerPodName) + } else if err != nil { + t.Fatalf("error retrieving installer pod %s: %v", installerPodName, err) + } + + // t1: preconditions become false + c.installPrecondition = func(ctx context.Context) (bool, error) { + return false, nil + } + + // the installer pod which is in progress is now set to compeleted + installerPod.Status.Phase = corev1.PodSucceeded + if _, err := kubeClient.CoreV1().Pods(c.targetNamespace).UpdateStatus(context.TODO(), installerPod, metav1.UpdateOptions{}); err != nil { + t.Fatal(err) + } + + //t2: sync happens again, the created installer pod shouldn't be requeued which means the node statuses should be updated as the installer pod is now complete + requeue, _, err = c.manageInstallationPods(context.TODO(), currOperatorSpec, currOperatorStatus) + if err != nil { + t.Fatal(err) + } + if requeue { + t.Errorf("The installer pod shouldn't be requeued for an ongoing installation even when the precondition is false ") + } + + // verify the node status is updated even though the precondition is false, thus ensuring the node statuses are updated for the ongoing installation + _, currOperatorStatus, _, _ = c.operatorClient.GetStaticPodOperatorState() + if !reflect.DeepEqual(currOperatorStatus.NodeStatuses, expectedNodeStatus) { + t.Errorf("currOperatorStatus.NodeStatuses = %v, expectedNodeStatus = %v ", currOperatorStatus.NodeStatuses, expectedNodeStatus) + } + +} + func timestamp(s string) time.Time { t, err := time.Parse(time.RFC3339, fmt.Sprintf("2021-01-02T%sZ", s)) if err != nil { diff --git a/pkg/operator/staticpod/controllers.go b/pkg/operator/staticpod/controllers.go index a2ff6fd89e..34d0d8f248 100644 --- a/pkg/operator/staticpod/controllers.go +++ b/pkg/operator/staticpod/controllers.go @@ -75,6 +75,8 @@ type staticPodOperatorControllerBuilder struct { readyzEndpoint string pdbUnhealthyPodEvictionPolicy *v1.UnhealthyPodEvictionPolicyType guardCreateConditionalFunc func() (bool, bool, error) + + installPrecondition installer.StaticPodInstallerPreconditionsFuncType } func NewBuilder( @@ -114,6 +116,7 @@ type Builder interface { // This can help to drain/maintain a node and recover without a manual intervention when multiple instances of nodes or pods are misbehaving. // Use this with caution, as this option can disrupt perspective pods that have not yet had a chance to become healthy. WithPodDisruptionBudgetGuard(operatorNamespace, operatorName, readyzPort, readyzEndpoint string, pdbUnhealthyPodEvictionPolicy *v1.UnhealthyPodEvictionPolicyType, createConditionalFunc func() (bool, bool, error)) Builder + WithInstallPrecondition(installPrecondition installer.StaticPodInstallerPreconditionsFuncType) Builder ToControllers() (manager.ControllerManager, error) } @@ -196,6 +199,11 @@ func (b *staticPodOperatorControllerBuilder) WithPodDisruptionBudgetGuard(operat return b } +func (b *staticPodOperatorControllerBuilder) WithInstallPrecondition(installPrecondition installer.StaticPodInstallerPreconditionsFuncType) Builder { + b.installPrecondition = installPrecondition + return b +} + func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.ControllerManager, error) { manager := manager.NewControllerManager() @@ -257,6 +265,8 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller b.installerPodMutationFunc, ).WithMinReadyDuration( b.minReadyDuration, + ).WithInstallPrecondition( + b.installPrecondition, ), 1) manager.WithController(installerstate.NewInstallerStateController(