Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rely on update events for MCP checks instead of polling #60

Merged
merged 6 commits into from
Jun 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/operator/controller/machineconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const (
finalizer = "NodeObservabilityMachineConfig"

// defaultRequeueTime is the default reconcile requeue time
defaultRequeueTime = 3 * time.Minute
defaultRequeueTime = time.Minute

// Empty is defined for empty string
Empty = ""
Expand Down
68 changes: 53 additions & 15 deletions pkg/operator/controller/machineconfig/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -105,15 +107,15 @@ func (r *MachineConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
r.CtrlConfig.Status.UpdateLastReconcileTime()
if errUpdate := r.updateStatus(ctx); errUpdate != nil {
r.Log.Error(err, "failed to update cleanup status")
result = ctrl.Result{RequeueAfter: 1 * time.Minute}
result = ctrl.Result{RequeueAfter: defaultRequeueTime}
err = utilerrors.NewAggregate([]error{err, errUpdate})
}
}
return
}

// Set finalizers on the NodeObservabilityMachineConfig resource
updated, err := r.withFinalizers(ctx, req)
updated, err := r.addFinalizer(ctx, req)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update NodeObservabilityMachineConfig with finalizers: %w", err)
}
Expand All @@ -133,22 +135,22 @@ func (r *MachineConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
r.CtrlConfig.Status.UpdateLastReconcileTime()
if errUpdate := r.updateStatus(ctx); errUpdate != nil {
r.Log.Error(err, "failed to update status")
result = ctrl.Result{RequeueAfter: 1 * time.Minute}
result = ctrl.Result{RequeueAfter: defaultRequeueTime}
err = utilerrors.NewAggregate([]error{err, errUpdate})
}
}()

requeue, err := r.inspectProfilingMCReq(ctx)
if err != nil {
r.Log.Error(err, "failed to reconcile requested configuration")
return ctrl.Result{RequeueAfter: 1 * time.Minute}, err
return ctrl.Result{RequeueAfter: defaultRequeueTime}, err
}
// if the configuration changes were made in the current reconciling
// will requeue to avoid the existing status of MCP to be considered
// and allow MCO to pick the changes and update correct state
if requeue {
r.Log.Info("updated configurations, reconcile again in 2 minutes")
return ctrl.Result{RequeueAfter: 2 * time.Minute}, nil
r.Log.Info("updated configurations, reconcile again in a minute")
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
}

return r.monitorProgress(ctx)
Expand All @@ -157,20 +159,47 @@ func (r *MachineConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
// SetupWithManager sets up the controller with the Manager.
func (r *MachineConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.NodeObservabilityMachineConfig{}).
WithEventFilter(predicate.GenerationChangedPredicate{}).
For(&v1alpha1.NodeObservabilityMachineConfig{}, builder.WithPredicates(ignoreNOMCStatusUpdates())).
Owns(&mcv1.MachineConfig{}).
Owns(&mcv1.MachineConfigPool{}).
Complete(r)
}

// ignoreNOMCStatusUpdates is for ignoring NodeObservabilityMachineConfig
// resource status update events and for not adding to the reconcile queue
func ignoreNOMCStatusUpdates() predicate.Predicate {
return predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
// old object does not exist, nothing to update
if e.ObjectOld == nil {
return false
}
// new object does not exist, nothing to update
if e.ObjectNew == nil {
return false
}

// if NOMC generated count is unchanged, it indicates
// spec or metadata has not changed and the event could be for
// status update which need not be queued for reconciliation
if _, ok := e.ObjectOld.(*v1alpha1.NodeObservabilityMachineConfig); ok {
return e.ObjectOld.GetGeneration() != e.ObjectNew.GetGeneration()
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In comment you mention ResourceVersion, but code compares Generation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment incorporated

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is good change, thanks!

return true
},
}
}

// cleanUp is handling deletion of NodeObservabilityMachineConfig resource
// deletion. Reverts all the changes made when debugging is enabled and
// restores the cluster to earlier state.
func (r *MachineConfigReconciler) cleanUp(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
disabled, err := r.ensureProfConfDisabled(ctx)
if err != nil {
return ctrl.Result{RequeueAfter: 1 * time.Minute}, err
return ctrl.Result{RequeueAfter: defaultRequeueTime}, err
}
if disabled {
return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
}

if r.CtrlConfig.Status.IsMachineConfigInProgress() {
Expand All @@ -193,8 +222,7 @@ func (r *MachineConfigReconciler) cleanUp(ctx context.Context, req ctrl.Request)
}

if removeFinalizer && hasFinalizer(r.CtrlConfig) {
// Remove the finalizer.
if _, err := r.withoutFinalizers(ctx, req, finalizer); err != nil {
if _, err := r.removeFinalizer(ctx, req, finalizer); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to remove finalizer from NodeObservabilityMachineConfig %s: %w", r.CtrlConfig.Name, err)
}
r.Log.Info("removed finalizer from NodeObservabilityMachineConfig resource, cleanup complete")
Expand All @@ -203,7 +231,9 @@ func (r *MachineConfigReconciler) cleanUp(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}

func (r *MachineConfigReconciler) withFinalizers(ctx context.Context, req ctrl.Request) (*v1alpha1.NodeObservabilityMachineConfig, error) {
// addFinalizer adds finalizer to NodeObservabilityMachineConfig resource
// if does not exist
func (r *MachineConfigReconciler) addFinalizer(ctx context.Context, req ctrl.Request) (*v1alpha1.NodeObservabilityMachineConfig, error) {
withFinalizers := &v1alpha1.NodeObservabilityMachineConfig{}

err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
Expand All @@ -228,7 +258,9 @@ func (r *MachineConfigReconciler) withFinalizers(ctx context.Context, req ctrl.R
return withFinalizers, err
}

func (r *MachineConfigReconciler) withoutFinalizers(ctx context.Context, req ctrl.Request, finalizer string) (*v1alpha1.NodeObservabilityMachineConfig, error) {
// removeFinalizer removes finalizers added to
// NodeObservabilityMachineConfig resource if present
func (r *MachineConfigReconciler) removeFinalizer(ctx context.Context, req ctrl.Request, finalizer string) (*v1alpha1.NodeObservabilityMachineConfig, error) {
withoutFinalizers := &v1alpha1.NodeObservabilityMachineConfig{}

err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
Expand Down Expand Up @@ -267,6 +299,8 @@ func (r *MachineConfigReconciler) withoutFinalizers(ctx context.Context, req ctr
return withoutFinalizers, err
}

// hasFinalizer checks if the required finalizer is present
// in the NodeObservabilityMachineConfig resource
func hasFinalizer(mc *v1alpha1.NodeObservabilityMachineConfig) bool {
hasFinalizer := false
for _, f := range mc.Finalizers {
Expand All @@ -278,6 +312,7 @@ func hasFinalizer(mc *v1alpha1.NodeObservabilityMachineConfig) bool {
return hasFinalizer
}

// updateStatus is updating the status subresource of NodeObservabilityMachineConfig
func (r *MachineConfigReconciler) updateStatus(ctx context.Context) error {

namespace := types.NamespacedName{Name: r.CtrlConfig.Name}
Expand Down Expand Up @@ -308,7 +343,7 @@ func (r *MachineConfigReconciler) updateStatus(ctx context.Context) error {
// if debugging is enabled
func (r *MachineConfigReconciler) inspectProfilingMCReq(ctx context.Context) (bool, error) {
if r.CtrlConfig.Status.IsMachineConfigInProgress() {
r.Log.Info("previous reconcile initiated operation in progress, changes not applied")
r.Log.V(3).Info("previous reconcile initiated operation in progress, changes not applied")
return false, nil
}

Expand Down Expand Up @@ -414,6 +449,9 @@ func (r *MachineConfigReconciler) ensureReqMCPNotExists(ctx context.Context) err
return r.deleteProfMCP(ctx)
}

// monitorProgress is for checking the progress of the MCPs based
// on configuration. nodeobservability MCP is checked when debug
// is enabled and worker MCP when disabled
func (r *MachineConfigReconciler) monitorProgress(ctx context.Context) (result ctrl.Result, err error) {

if r.CtrlConfig.Status.IsDebuggingEnabled() {
Expand Down
12 changes: 6 additions & 6 deletions pkg/operator/controller/machineconfig/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ func TestReconcile(t *testing.T) {
reqObjs: append([]runtime.Object{workerMCP}, nodes...),
wantErr: false,
asExpected: func(status v1alpha1.NodeObservabilityMachineConfigStatus, result ctrl.Result) bool {
if result.RequeueAfter != 2*time.Minute ||
if result.RequeueAfter != defaultRequeueTime ||
!status.IsDebuggingEnabled() ||
!status.IsMachineConfigInProgress() ||
status.IsDebuggingFailed() {
Expand All @@ -319,7 +319,7 @@ func TestReconcile(t *testing.T) {
},
wantErr: false,
asExpected: func(status v1alpha1.NodeObservabilityMachineConfigStatus, result ctrl.Result) bool {
if result.RequeueAfter > 1*time.Minute ||
if result.RequeueAfter > defaultRequeueTime ||
!status.IsDebuggingEnabled() ||
!status.IsMachineConfigInProgress() ||
status.IsDebuggingFailed() {
Expand Down Expand Up @@ -349,11 +349,11 @@ func TestReconcile(t *testing.T) {
name: "controller resource reconcile request too soon, low boundary value",
reqObjs: append([]runtime.Object{mcp, criomc, workerMCP}, labeledNodes...),
preReq: func(r *MachineConfigReconciler, o *[]runtime.Object) {
r.CtrlConfig.Status.LastReconcile = metav1.Time{Time: metav1.Now().Time.Add(-60 * time.Second)}
r.CtrlConfig.Status.LastReconcile = metav1.Time{Time: metav1.Now().Time.Add(-defaultRequeueTime)}
},
wantErr: false,
asExpected: func(status v1alpha1.NodeObservabilityMachineConfigStatus, result ctrl.Result) bool {
if result.RequeueAfter != 3*time.Minute ||
if result.RequeueAfter != 0 ||
!status.IsDebuggingEnabled() ||
!status.IsMachineConfigInProgress() ||
status.IsDebuggingFailed() {
Expand Down Expand Up @@ -392,7 +392,7 @@ func TestReconcile(t *testing.T) {
},
wantErr: false,
asExpected: func(status v1alpha1.NodeObservabilityMachineConfigStatus, result ctrl.Result) bool {
if result.RequeueAfter != 2*time.Minute ||
if result.RequeueAfter != defaultRequeueTime ||
status.IsDebuggingEnabled() ||
!status.IsMachineConfigInProgress() ||
status.IsDebuggingFailed() {
Expand Down Expand Up @@ -425,7 +425,7 @@ func TestReconcile(t *testing.T) {
},
wantErr: false,
asExpected: func(status v1alpha1.NodeObservabilityMachineConfigStatus, result ctrl.Result) bool {
if result.RequeueAfter != 1*time.Minute ||
if result.RequeueAfter != defaultRequeueTime ||
status.IsDebuggingEnabled() ||
!status.IsMachineConfigInProgress() ||
status.IsDebuggingFailed() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/controller/machineconfig/crio.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func (r *MachineConfigReconciler) createCrioProfConf(ctx context.Context) error
return err
}

if err := ctrlutil.SetOwnerReference(r.CtrlConfig, criomc, r.Scheme); err != nil {
if err := ctrlutil.SetControllerReference(r.CtrlConfig, criomc, r.Scheme); err != nil {
r.Log.Error(err, "failed to update owner info in CRI-O profiling MC resource")
}

Expand Down
16 changes: 8 additions & 8 deletions pkg/operator/controller/machineconfig/mcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func (r *MachineConfigReconciler) GetProfilingMCP(name string) *mcv1.MachineConf
func (r *MachineConfigReconciler) createMCP(ctx context.Context, name string) error {
mcp := r.GetProfilingMCP(name)

if err := ctrlutil.SetOwnerReference(r.CtrlConfig, mcp, r.Scheme); err != nil {
if err := ctrlutil.SetControllerReference(r.CtrlConfig, mcp, r.Scheme); err != nil {
r.Log.Error(err, "failed to update owner info in MCP", "MCP", name)
}

Expand Down Expand Up @@ -175,7 +175,7 @@ func (r *MachineConfigReconciler) CheckNodeObservabilityMCPStatus(ctx context.Co
r.CtrlConfig.Status.SetCondition(v1alpha1.DebugReady, metav1.ConditionFalse, v1alpha1.ReasonInProgress, msg)

r.Unlock()
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
return ctrl.Result{}, nil
}

if mcv1.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcv1.MachineConfigPoolUpdated) {
Expand Down Expand Up @@ -209,12 +209,12 @@ func (r *MachineConfigReconciler) CheckNodeObservabilityMCPStatus(ctx context.Co
msg := fmt.Sprintf("%s MCP has %d machines in degraded state, reverted changes", mcp.Name, mcp.Status.DegradedMachineCount)
r.CtrlConfig.Status.SetCondition(v1alpha1.DebugReady, metav1.ConditionFalse, v1alpha1.ReasonFailed, msg)
r.Unlock()
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
return ctrl.Result{}, nil
}

r.Unlock()
r.Log.Info("waiting for machine config update to complete on all machines", "MCP", mcp.Name)
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
return ctrl.Result{}, nil
}

// checkWorkerMCPStatus is for reconciling update status of all machines in profiling MCP
Expand All @@ -239,17 +239,17 @@ func (r *MachineConfigReconciler) checkWorkerMCPStatus(ctx context.Context) (ctr
}
r.Log.Info(msg)
r.Unlock()
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
return ctrl.Result{}, nil
}

if mcv1.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcv1.MachineConfigPoolUpdated) {

r.Unlock()
if err := r.ensureReqMCNotExists(ctx); err != nil {
return ctrl.Result{RequeueAfter: 1 * time.Minute}, err
return ctrl.Result{RequeueAfter: defaultRequeueTime}, err
}
if err := r.ensureReqMCPNotExists(ctx); err != nil {
return ctrl.Result{RequeueAfter: 1 * time.Minute}, err
return ctrl.Result{RequeueAfter: defaultRequeueTime}, err
}

var msg string
Expand Down Expand Up @@ -295,5 +295,5 @@ func (r *MachineConfigReconciler) checkWorkerMCPStatus(ctx context.Context) (ctr
if r.CtrlConfig.Status.IsDebuggingFailed() {
r.Log.Info("waiting for reverting to complete on all machines", "MCP", mcp.Name)
}
return ctrl.Result{RequeueAfter: defaultRequeueTime}, nil
return ctrl.Result{}, nil
}