diff --git a/Makefile b/Makefile index ea48ee75..84eb13d1 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,7 @@ manifests: generate yq kustomize $(MV_TMP_DIR)/v1_service_platform-operators-controller-manager-metrics-service.yaml manifests/0000_50_cluster-platform-operator-manager_02-metricsservice.yaml $(MV_TMP_DIR)/apps_v1_deployment_platform-operators-controller-manager.yaml manifests/0000_50_cluster-platform-operator-manager_06-deployment.yaml $(MV_TMP_DIR)/config.openshift.io_v1_clusteroperator_platform-operators-aggregated.yaml manifests/0000_50_cluster-platform-operator-manager_07-aggregated-clusteroperator.yaml + $(MV_TMP_DIR)/config.openshift.io_v1_clusteroperator_platform-operators-core.yaml manifests/0000_50_cluster-platform-operator-manager_07-core-clusteroperator.yaml @# cluster-platform-operator-manager rbacs rm -f manifests/0000_50_cluster-platform-operator-manager_03_rbac.yaml diff --git a/cmd/main.go b/cmd/main.go index 49f03143..4230618f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -23,6 +23,7 @@ import ( // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" + "k8s.io/utils/clock" configv1 "github.com/openshift/api/config/v1" operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" @@ -35,6 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" platformv1alpha1 "github.com/openshift/api/platform/v1alpha1" + "github.com/openshift/platform-operators/internal/checker" "github.com/openshift/platform-operators/internal/clusteroperator" "github.com/openshift/platform-operators/internal/controllers" "github.com/openshift/platform-operators/internal/sourcer" @@ -99,13 +101,24 @@ func main() { } //+kubebuilder:scaffold:builder - // Add Aggregated CO controller to manager + // Add the core and aggregate ClusterOperator controllers to the manager. if err = (&controllers.AggregatedClusterOperatorReconciler{ Client: mgr.GetClient(), ReleaseVersion: clusteroperator.GetReleaseVariable(), SystemNamespace: util.PodNamespace(systemNamespace), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "AggregatedCO") + setupLog.Error(err, "unable to create controller", "controller", "AggregatedClusterOperator") + os.Exit(1) + } + if err = (&controllers.CoreClusterOperatorReconciler{ + Client: mgr.GetClient(), + Clock: clock.RealClock{}, + Checker: checker.ListChecker{Client: mgr.GetClient()}, + AvailabilityThreshold: clusteroperator.DefaultUnavailabilityThreshold, + ReleaseVersion: clusteroperator.GetReleaseVariable(), + SystemNamespace: util.PodNamespace(systemNamespace), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "CoreClusterOperator") os.Exit(1) } diff --git a/config/clusteroperator/core_clusteroperator.yaml b/config/clusteroperator/core_clusteroperator.yaml new file mode 100644 index 00000000..c735191a --- /dev/null +++ b/config/clusteroperator/core_clusteroperator.yaml @@ -0,0 +1,22 @@ +apiVersion: config.openshift.io/v1 +kind: ClusterOperator +metadata: + name: core +spec: {} +status: + versions: + - name: operator + version: "0.0.1-snapshot" + relatedObjects: + - group: '' + name: openshift-platform-operators + resource: namespaces + - group: platform.openshift.io + name: "" + resource: platformoperators + - group: core.rukpak.io + name: "" + resource: bundles + - group: core.rukpak.io + name: "" + resource: bundledeployments diff --git a/config/clusteroperator/kustomization.yaml b/config/clusteroperator/kustomization.yaml index ab79f62c..4aa99534 100644 --- a/config/clusteroperator/kustomization.yaml +++ b/config/clusteroperator/kustomization.yaml @@ -1,2 +1,3 @@ resources: - aggregated_clusteroperator.yaml +- core_clusteroperator.yaml diff --git a/go.mod b/go.mod index 0148fb82..8e7ca464 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( k8s.io/api v0.25.0 k8s.io/apimachinery v0.25.0 k8s.io/client-go v0.25.0 + k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed sigs.k8s.io/controller-runtime v0.12.3 ) @@ -85,7 +86,6 @@ require ( k8s.io/component-base v0.24.2 // indirect k8s.io/klog/v2 v2.70.1 // indirect k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1 // indirect - k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed // indirect sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect sigs.k8s.io/yaml v1.3.0 // indirect diff --git a/internal/checker/checker.go b/internal/checker/checker.go new file mode 100644 index 00000000..bfd3ce5c --- /dev/null +++ b/internal/checker/checker.go @@ -0,0 +1,31 @@ +package checker + +import ( + "context" + + configv1 "github.com/openshift/api/config/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + platformv1alpha1 "github.com/openshift/api/platform/v1alpha1" +) + +type Checker interface { + CheckAvailability(context.Context, *configv1.ClusterOperator) bool +} + +type ListChecker struct { + client.Client +} + +func (c ListChecker) CheckAvailability(ctx context.Context, _ *configv1.ClusterOperator) bool { + poList := &platformv1alpha1.PlatformOperatorList{} + return c.List(ctx, poList) == nil +} + +type NoopChecker struct { + Available bool +} + +func (c NoopChecker) CheckAvailability(_ context.Context, _ *configv1.ClusterOperator) bool { + return c.Available +} diff --git a/internal/clusteroperator/builder.go b/internal/clusteroperator/builder.go index f3e5c905..4586e533 100644 --- a/internal/clusteroperator/builder.go +++ b/internal/clusteroperator/builder.go @@ -8,6 +8,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + ReasonAsExpected = "AsExpected" +) + // From https://github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorstatus/builder.go // Note: the clock api-machinery library in that package is now deprecated, so it has been removed here. diff --git a/internal/clusteroperator/types.go b/internal/clusteroperator/types.go index ce948450..df7be834 100644 --- a/internal/clusteroperator/types.go +++ b/internal/clusteroperator/types.go @@ -1,6 +1,9 @@ package clusteroperator +import "time" + const ( - CoreResourceName = "platform-operators-core" - AggregateResourceName = "platform-operators-aggregated" + CoreResourceName = "platform-operators-core" + AggregateResourceName = "platform-operators-aggregated" + DefaultUnavailabilityThreshold = 5 * time.Minute ) diff --git a/internal/clusteroperator/util.go b/internal/clusteroperator/util.go new file mode 100644 index 00000000..47e0f380 --- /dev/null +++ b/internal/clusteroperator/util.go @@ -0,0 +1,45 @@ +package clusteroperator + +import ( + configv1 "github.com/openshift/api/config/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// FindStatusCondition finds the conditionType in conditions. +// Note: manually vendored from o/library-go/pkg/config/clusteroperator/v1helpers/status.go. +func FindStatusCondition(conditions []configv1.ClusterOperatorStatusCondition, conditionType configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition { + for i := range conditions { + if conditions[i].Type == conditionType { + return &conditions[i] + } + } + return nil +} + +func NewClusterOperator(name string) *configv1.ClusterOperator { + return &configv1.ClusterOperator{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Status: configv1.ClusterOperatorStatus{}, + } +} + +// SetDefaultStatusConditions adds the default ClusterOperator status conditions to +// the current Builder parameter. Those default status conditions are +// Progressing=True, Degraded=False, and Available=False. +func SetDefaultStatusConditions(builder *Builder, version string) { + builder.WithProgressing(configv1.ConditionTrue, "") + builder.WithDegraded(configv1.ConditionFalse) + builder.WithAvailable(configv1.ConditionFalse, "", "") + builder.WithVersion("operator", version) +} + +// SetDefaultRelatedObjects adds the default ClusterOperator related object +// configurations to the Builder parameter. +func SetDefaultRelatedObjects(builder *Builder, namespace string) { + builder.WithRelatedObject("", "namespaces", "", namespace) + builder.WithRelatedObject("platform.openshift.io", "platformoperators", "", "") + builder.WithRelatedObject("core.rukpak.io", "bundles", "", "") + builder.WithRelatedObject("core.rukpak.io", "bundledeployments", "", "") +} diff --git a/internal/controllers/aggregated_clusteroperator_controller.go b/internal/controllers/aggregated_clusteroperator_controller.go index 409a9449..f833ffa8 100644 --- a/internal/controllers/aggregated_clusteroperator_controller.go +++ b/internal/controllers/aggregated_clusteroperator_controller.go @@ -67,14 +67,8 @@ func (r *AggregatedClusterOperatorReconciler) Reconcile(ctx context.Context, req }() // Set the default CO status conditions: Progressing=True, Degraded=False, Available=False - coBuilder.WithProgressing(configv1.ConditionTrue, "") - coBuilder.WithDegraded(configv1.ConditionFalse) - coBuilder.WithAvailable(configv1.ConditionFalse, "", "") - coBuilder.WithVersion("operator", r.ReleaseVersion) - coBuilder.WithRelatedObject("", "namespaces", "", r.SystemNamespace) - coBuilder.WithRelatedObject("platform.openshift.io", "platformoperators", "", "") - coBuilder.WithRelatedObject("core.rukpak.io", "bundles", "", "") - coBuilder.WithRelatedObject("core.rukpak.io", "bundledeployments", "", "") + clusteroperator.SetDefaultStatusConditions(coBuilder, r.ReleaseVersion) + clusteroperator.SetDefaultRelatedObjects(coBuilder, r.SystemNamespace) poList := &platformv1alpha1.PlatformOperatorList{} if err := r.List(ctx, poList); err != nil { diff --git a/internal/controllers/core_clusteroperator_controller.go b/internal/controllers/core_clusteroperator_controller.go new file mode 100644 index 00000000..07c8a6f2 --- /dev/null +++ b/internal/controllers/core_clusteroperator_controller.go @@ -0,0 +1,166 @@ +/* +Copyright 2022. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "errors" + "fmt" + "time" + + configv1 "github.com/openshift/api/config/v1" + "k8s.io/utils/clock" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + logr "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/openshift/platform-operators/internal/checker" + "github.com/openshift/platform-operators/internal/clusteroperator" +) + +// TODO(tflannag): Appropriately set the "Progressing" status condition +// type during cluster upgrade events. +// FIXME(tflannag): I'm seeing unit test flakes where we're bumping +// the lastTransistionTime value despite being in the same state as +// before which is a bug. + +var ( + errUnavailable = errors.New("platform operators manager has failed an availability check") +) + +type CoreClusterOperatorReconciler struct { + client.Client + clock.Clock + checker.Checker + + ReleaseVersion string + SystemNamespace string + AvailabilityThreshold time.Duration +} + +//+kubebuilder:rbac:groups=platform.openshift.io,resources=platformoperators,verbs=list +//+kubebuilder:rbac:groups=config.openshift.io,resources=clusteroperators,verbs=get;list;watch +//+kubebuilder:rbac:groups=config.openshift.io,resources=clusteroperators/status,verbs=update;patch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile +func (r *CoreClusterOperatorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := logr.FromContext(ctx) + log.Info("reconciling request", "req", req.NamespacedName) + defer log.Info("finished reconciling request", "req", req.NamespacedName) + + coBuilder := clusteroperator.NewBuilder() + coWriter := clusteroperator.NewWriter(r.Client) + + core := &configv1.ClusterOperator{} + if err := r.Get(ctx, req.NamespacedName, core); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + defer func() { + if err := coWriter.UpdateStatus(ctx, core, coBuilder.GetStatus()); err != nil { + log.Error(err, "error updating CO status") + } + }() + + // Add the default ClusterOperator status configurations to the builder instance. + clusteroperator.SetDefaultStatusConditions(coBuilder, r.ReleaseVersion) + clusteroperator.SetDefaultRelatedObjects(coBuilder, r.SystemNamespace) + + // check whether the we're currently passing the availability checks. note: in + // the case where we were previously failing these checks, and we now have passed + // them, the expectation is that we're now setting an A=T state and purging any + // D=T states. + if available := r.CheckAvailability(ctx, core); available { + coBuilder.WithAvailable(configv1.ConditionTrue, fmt.Sprintf("The platform operator manager is available at %s", r.ReleaseVersion), clusteroperator.ReasonAsExpected) + coBuilder.WithProgressing(configv1.ConditionFalse, "") + coBuilder.WithDegraded(configv1.ConditionFalse) + return ctrl.Result{}, nil + } + + log.Info("manager failed an availability check") + // we failed the availability checks, and now need to determine whether we to set + // D=T if this is the first time we've failed an availability check to avoid + // prematurely setting A=F during transient events. + if meetsDegradedStatusCriteria(core) { + log.Info("setting degraded=true since this is the first violation") + // avoid stomping on the current A=T status condition value if that + // status condition type was previously set. + available := clusteroperator.FindStatusCondition(core.Status.Conditions, configv1.OperatorAvailable) + if available != nil && available.Status == configv1.ConditionTrue { + coBuilder.WithAvailable(configv1.ConditionTrue, available.Message, available.Reason) + } + coBuilder.WithDegraded(configv1.ConditionTrue) + return ctrl.Result{}, errUnavailable + } + // check whether the time spent in the the D=T state has exceeded the configured + // threshold, and mark the ClusterOperator as unavailable. + if timeInDegradedStateExceedsThreshold(ctx, core, r.Now(), r.AvailabilityThreshold) { + log.Info("adjusted timestamp has exceeded unavailability theshold: setting A=F and P=F") + + coBuilder.WithAvailable(configv1.ConditionFalse, "Exceeded platform operator availability timeout", "ExceededUnavailabilityThreshold") + coBuilder.WithProgressing(configv1.ConditionFalse, "Exceeded platform operator availability timeout") + coBuilder.WithDegraded(configv1.ConditionTrue) + } + return ctrl.Result{}, errUnavailable +} + +func meetsDegradedStatusCriteria(co *configv1.ClusterOperator) bool { + degraded := clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorDegraded) + return degraded == nil || degraded.Status != configv1.ConditionTrue +} + +func timeInDegradedStateExceedsThreshold( + ctx context.Context, + co *configv1.ClusterOperator, + startTime time.Time, + threshold time.Duration, +) bool { + degraded := clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorDegraded) + if degraded == nil { + return false + } + lastEncounteredTime := degraded.LastTransitionTime + adjustedTime := lastEncounteredTime.Add(threshold) + + logr.FromContext(ctx).Info("checking whether time spent in degraded state has exceeded the configured threshold", + "threshold", threshold.String(), + "current", startTime.String(), + "last", lastEncounteredTime.String(), + "adjusted", adjustedTime.String(), + ) + // check whether we've exceeded the availability threshold by comparing + // the currently recorded lastTransistionTime, adding the threshold buffer, and + // verifying whether that adjusted timestamp is less than the current clock timestamp. + return adjustedTime.Before(startTime) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *CoreClusterOperatorReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&configv1.ClusterOperator{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(object client.Object) bool { + // TODO(tflannag): Investigate using using a label selector to avoid caching + // all clusteroperator resources, and then filtering for the "core" clusteroperator + // resource from that shared cache. + return object.GetName() == clusteroperator.CoreResourceName + }))). + Complete(r) +} diff --git a/internal/controllers/core_clusteroperator_controller_test.go b/internal/controllers/core_clusteroperator_controller_test.go new file mode 100644 index 00000000..17cd5964 --- /dev/null +++ b/internal/controllers/core_clusteroperator_controller_test.go @@ -0,0 +1,261 @@ +package controllers + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + configv1 "github.com/openshift/api/config/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/clock" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/openshift/platform-operators/internal/checker" + "github.com/openshift/platform-operators/internal/clusteroperator" +) + +var _ = Describe("Core ClusterOperator Controller", func() { + var ( + ctx context.Context + ) + BeforeEach(func() { + ctx = context.Background() + }) + + When("the controller cannot successfully perform availability checks", func() { + var ( + r *CoreClusterOperatorReconciler + c client.Client + cancel context.CancelFunc + + co *configv1.ClusterOperator + ) + BeforeEach(func() { + mgr, err := manager.New(cfg, manager.Options{ + MetricsBindAddress: "0", + Scheme: scheme, + }) + Expect(err).ToNot(HaveOccurred()) + + c = mgr.GetClient() + + r = &CoreClusterOperatorReconciler{ + Client: c, + Clock: clock.RealClock{}, + Checker: checker.NoopChecker{ + Available: false, + }, + AvailabilityThreshold: 15 * time.Second, + } + + ctx, cancel = context.WithCancel(context.Background()) + go func() { Expect(mgr.GetCache().Start(ctx)) }() + Expect(mgr.GetCache().WaitForCacheSync(ctx)).To(BeTrue()) + + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + Expect(c.Create(ctx, co)).To(Succeed()) + }) + AfterEach(func() { + Expect(c.Delete(ctx, co)).To(Succeed()) + cancel() + }) + + It("should result in the clusteroperator reporting a D=T after during the initial availability failures", func() { + By("ensuring the next reconciliation returns an error") + _, err := r.Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: clusteroperator.CoreResourceName}, + }) + Expect(err).ToNot(BeNil()) + + By("ensuring the clusteroperator has been updated with D=T") + Eventually(func() (bool, error) { + co := &configv1.ClusterOperator{} + if err = r.Get(context.Background(), types.NamespacedName{Name: clusteroperator.CoreResourceName}, co); err != nil { + return false, err + } + + degraded := clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorDegraded) + return degraded != nil && degraded.Status == configv1.ConditionTrue, nil + }).Should(BeTrue()) + }) + + It("should eventually return an available=false status after exceeding the threshold", func() { + Eventually(func() (bool, error) { + _, err := r.Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: clusteroperator.CoreResourceName}, + }) + Expect(err).ToNot(BeNil()) + + co := &configv1.ClusterOperator{} + if err := r.Get(context.Background(), types.NamespacedName{Name: clusteroperator.CoreResourceName}, co); err != nil { + return false, err + } + available := clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable) + return available == nil || available.Status != configv1.ConditionFalse, nil + }).Should(BeTrue()) + }) + }) +}) + +var _ = Describe("meetsDegradedStatusCriteria", func() { + When("the co has an empty status", func() { + var ( + co *configv1.ClusterOperator + ) + BeforeEach(func() { + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + }) + It("should return true", func() { + Expect(meetsDegradedStatusCriteria(co)).To(BeTrue()) + }) + }) + + When("the co is reporting D=F status", func() { + var ( + co *configv1.ClusterOperator + ) + BeforeEach(func() { + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + SetStatusCondition(co, metav1.Condition{ + Type: string(configv1.OperatorDegraded), + Status: metav1.ConditionStatus(configv1.ConditionFalse), + }) + }) + It("should return true", func() { + Expect(meetsDegradedStatusCriteria(co)).To(BeTrue()) + }) + }) + + When("the co is reporting a D=T status", func() { + var ( + co *configv1.ClusterOperator + ) + BeforeEach(func() { + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + SetStatusCondition(co, metav1.Condition{ + Type: string(configv1.OperatorDegraded), + Status: metav1.ConditionStatus(configv1.ConditionTrue), + }) + }) + It("should return false", func() { + Expect(meetsDegradedStatusCriteria(co)).To(BeFalse()) + }) + }) +}) + +var _ = Describe("timeInDegradedStateExceedsThreshold", func() { + var ( + ctx context.Context + ) + BeforeEach(func() { + ctx = context.Background() + }) + When("the co has an empty status", func() { + var ( + co *configv1.ClusterOperator + ) + BeforeEach(func() { + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + }) + It("should return false", func() { + Expect(timeInDegradedStateExceedsThreshold(ctx, co, time.Now(), 15*time.Second)).To(BeFalse()) + }) + }) + + When("the co is reporting D=F status", func() { + var ( + co *configv1.ClusterOperator + ) + BeforeEach(func() { + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + SetStatusCondition(co, metav1.Condition{ + Type: string(configv1.OperatorDegraded), + Status: metav1.ConditionStatus(configv1.ConditionFalse), + }) + }) + It("should return false", func() { + Expect(timeInDegradedStateExceedsThreshold(ctx, co, time.Now(), 15*time.Second)).To(BeFalse()) + }) + }) + + When("the co is reporting a D=T status that's before the threshold", func() { + var ( + co *configv1.ClusterOperator + ) + BeforeEach(func() { + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + SetStatusCondition(co, metav1.Condition{ + Type: string(configv1.OperatorDegraded), + Status: metav1.ConditionStatus(configv1.ConditionTrue), + }) + }) + It("should return false", func() { + Expect(timeInDegradedStateExceedsThreshold(ctx, co, time.Now(), 15*time.Second)).To(BeFalse()) + }) + }) + + When("the co is reporting a D=T status that exceeds the threshold", func() { + var ( + co *configv1.ClusterOperator + start time.Time + ) + BeforeEach(func() { + start = time.Now() + + co = clusteroperator.NewClusterOperator(clusteroperator.CoreResourceName) + SetStatusCondition(co, metav1.Condition{ + Type: string(configv1.OperatorDegraded), + Status: metav1.ConditionStatus(configv1.ConditionTrue), + LastTransitionTime: metav1.NewTime(start.Add(15 * time.Second)), + }) + }) + It("should return false", func() { + Expect(timeInDegradedStateExceedsThreshold(ctx, co, start, 15*time.Second)).To(BeFalse()) + }) + }) +}) + +func SetStatusCondition(co *configv1.ClusterOperator, cond metav1.Condition) { + conditions := convertToMetaV1Conditions(co.Status.Conditions) + meta.SetStatusCondition(&conditions, metav1.Condition{ + Type: cond.Type, + Status: cond.Status, + Reason: cond.Reason, + Message: cond.Message, + }) + co.Status.Conditions = convertToClusterOperatorConditions(conditions) +} + +func convertToMetaV1Conditions(in []configv1.ClusterOperatorStatusCondition) []metav1.Condition { + out := make([]metav1.Condition, 0, len(in)) + for _, c := range in { + out = append(out, metav1.Condition{ + Type: string(c.Type), + Status: metav1.ConditionStatus(c.Status), + Reason: c.Reason, + Message: c.Message, + LastTransitionTime: c.LastTransitionTime, + }) + } + return out +} + +func convertToClusterOperatorConditions(in []metav1.Condition) []configv1.ClusterOperatorStatusCondition { + out := make([]configv1.ClusterOperatorStatusCondition, 0, len(in)) + for _, c := range in { + out = append(out, configv1.ClusterOperatorStatusCondition{ + Type: configv1.ClusterStatusConditionType(c.Type), + Status: configv1.ConditionStatus(c.Status), + Reason: c.Reason, + Message: c.Message, + LastTransitionTime: c.LastTransitionTime, + }) + } + return out +} diff --git a/internal/controllers/suite_test.go b/internal/controllers/suite_test.go index 9b52fd47..d33a4430 100644 --- a/internal/controllers/suite_test.go +++ b/internal/controllers/suite_test.go @@ -62,6 +62,7 @@ var _ = BeforeSuite(func() { By("bootstrapping test environment") testEnv = &envtest.Environment{ + // TODO: $ROOT_DIR CRDDirectoryPaths: []string{"../../manifests", "../../vendor/github.com/openshift/api/config/v1/"}, ErrorIfCRDPathMissing: true, } diff --git a/manifests/0000_50_cluster-platform-operator-manager_07-core-clusteroperator.yaml b/manifests/0000_50_cluster-platform-operator-manager_07-core-clusteroperator.yaml new file mode 100644 index 00000000..a0ea2af1 --- /dev/null +++ b/manifests/0000_50_cluster-platform-operator-manager_07-core-clusteroperator.yaml @@ -0,0 +1,28 @@ +apiVersion: config.openshift.io/v1 +kind: ClusterOperator +metadata: + annotations: + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/feature-set: TechPreviewNoUpgrade + name: platform-operators-core + namespace: openshift-platform-operators +spec: {} +status: + relatedObjects: + - group: "" + name: openshift-platform-operators + resource: namespaces + - group: platform.openshift.io + name: "" + resource: platformoperators + - group: core.rukpak.io + name: "" + resource: bundles + - group: core.rukpak.io + name: "" + resource: bundledeployments + versions: + - name: operator + version: 0.0.1-snapshot diff --git a/test/e2e/aggregated_clusteroperator_test.go b/test/e2e/aggregated_clusteroperator_test.go index ec099487..ed8d905b 100644 --- a/test/e2e/aggregated_clusteroperator_test.go +++ b/test/e2e/aggregated_clusteroperator_test.go @@ -34,7 +34,7 @@ var _ = Describe("aggregated clusteroperator controller", func() { if err := c.Get(ctx, types.NamespacedName{Name: clusteroperator.AggregateResourceName}, co); err != nil { return nil, err } - return FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil + return clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil }).Should(And( Not(BeNil()), WithTransform(func(c *configv1.ClusterOperatorStatusCondition) configv1.ClusterStatusConditionType { return c.Type }, Equal(configv1.OperatorAvailable)), @@ -111,7 +111,7 @@ var _ = Describe("aggregated clusteroperator controller", func() { if err := c.Get(ctx, types.NamespacedName{Name: clusteroperator.AggregateResourceName}, co); err != nil { return nil, err } - return FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil + return clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil }).Should(And( Not(BeNil()), WithTransform(func(c *configv1.ClusterOperatorStatusCondition) configv1.ClusterStatusConditionType { return c.Type }, Equal(configv1.OperatorAvailable)), @@ -180,7 +180,7 @@ var _ = Describe("aggregated clusteroperator controller", func() { if err := c.Get(ctx, types.NamespacedName{Name: clusteroperator.AggregateResourceName}, co); err != nil { return nil, err } - return FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil + return clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil }).Should(And( Not(BeNil()), WithTransform(func(c *configv1.ClusterOperatorStatusCondition) configv1.ClusterStatusConditionType { return c.Type }, Equal(configv1.OperatorAvailable)), @@ -263,7 +263,7 @@ var _ = Describe("aggregated clusteroperator controller", func() { if err := c.Get(ctx, types.NamespacedName{Name: clusteroperator.AggregateResourceName}, co); err != nil { return nil, err } - return FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil + return clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil }).Should(And( Not(BeNil()), WithTransform(func(c *configv1.ClusterOperatorStatusCondition) configv1.ClusterStatusConditionType { return c.Type }, Equal(configv1.OperatorAvailable)), @@ -274,14 +274,3 @@ var _ = Describe("aggregated clusteroperator controller", func() { }) }) }) - -// FindStatusCondition finds the conditionType in conditions. -// Note: manually vendored from o/library-go/pkg/config/clusteroperator/v1helpers/status.go. -func FindStatusCondition(conditions []configv1.ClusterOperatorStatusCondition, conditionType configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition { - for i := range conditions { - if conditions[i].Type == conditionType { - return &conditions[i] - } - } - return nil -} diff --git a/test/e2e/collect-ci-artifacts.sh b/test/e2e/collect-ci-artifacts.sh index c4f9f3b9..078bfc36 100755 --- a/test/e2e/collect-ci-artifacts.sh +++ b/test/e2e/collect-ci-artifacts.sh @@ -26,6 +26,7 @@ function ensure_kubectl() { function collect_artifacts() { commands=() + commands+=("get co platform-operators-core -o yaml") commands+=("get co platform-operators-aggregated -o yaml") commands+=("get platformoperators -o yaml") commands+=("get bundledeployments -o yaml") diff --git a/test/e2e/core_clusteroperator_test.go b/test/e2e/core_clusteroperator_test.go new file mode 100644 index 00000000..ae27784c --- /dev/null +++ b/test/e2e/core_clusteroperator_test.go @@ -0,0 +1,64 @@ +package e2e + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + configv1 "github.com/openshift/api/config/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openshift/platform-operators/internal/clusteroperator" +) + +var _ = Describe("core clusteroperator controller", func() { + var ( + ctx context.Context + ) + BeforeEach(func() { + ctx = context.Background() + }) + + When("the core clusteroperator exists", func() { + It("should consistently report Available=True", func() { + Consistently(func() (*configv1.ClusterOperatorStatusCondition, error) { + co := &configv1.ClusterOperator{} + if err := c.Get(ctx, types.NamespacedName{Name: clusteroperator.CoreResourceName}, co); err != nil { + return nil, err + } + return clusteroperator.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable), nil + }).Should(And( + Not(BeNil()), + WithTransform(func(c *configv1.ClusterOperatorStatusCondition) configv1.ClusterStatusConditionType { return c.Type }, Equal(configv1.OperatorAvailable)), + WithTransform(func(c *configv1.ClusterOperatorStatusCondition) configv1.ConditionStatus { return c.Status }, Equal(configv1.ConditionTrue)), + WithTransform(func(c *configv1.ClusterOperatorStatusCondition) string { return c.Reason }, Equal(clusteroperator.ReasonAsExpected)), + WithTransform(func(c *configv1.ClusterOperatorStatusCondition) string { return c.Message }, ContainSubstring("The platform operator manager is available")), + )) + }) + }) + + // TODO: disabling for now as https://github.com/openshift/cluster-version-operator/pull/840 + // should result in controllers being able to assume the CVO will manage the ClusterOperator + // resource for us. + PWhen("the core clusteroperator has been deleted", func() { + It("should be recreated", func() { + By("getting the core clusteroperator resource") + co := &configv1.ClusterOperator{} + err := c.Get(ctx, types.NamespacedName{Name: clusteroperator.CoreResourceName}, co) + Expect(err).To(BeNil()) + + By("recording the origin UUID of the core resource") + originalUID := co.GetUID() + + By("deleting the core clusteroperator resource") + Expect(c.Delete(ctx, co)).To(Succeed()) + + By("verifying the core clusteroperator resources' UID has changed") + Eventually(func() (types.UID, error) { + err := c.Get(ctx, client.ObjectKeyFromObject(co), co) + return co.GetUID(), err + }).ShouldNot(Equal(originalUID)) + }) + }) +})