Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix upgrade from OSC 1.4.1 #366

Merged
merged 3 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions api/v1/kataconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ type KataConfigSpec struct {

// KataConfigStatus defines the observed state of KataConfig
type KataConfigStatus struct {
// RuntimeClass is the names of the RuntimeClasses created by this controller
// RuntimeClasses is the names of the RuntimeClasses created by this controller
// +optional
RuntimeClass []string `json:"runtimeClass"`
RuntimeClasses []string `json:"runtimeClasses"`

// +optional
KataNodes KataNodesStatus `json:"kataNodes,omitempty"`
Expand Down
77 changes: 34 additions & 43 deletions controllers/openshift_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,42 +165,6 @@ func (r *KataConfigOpenShiftReconciler) Reconcile(ctx context.Context, req ctrl.
return ctrl.Result{}, updateErr
}

ds := r.processDaemonsetForMonitor()
// Set KataConfig instance as the owner and controller
if err := controllerutil.SetControllerReference(r.kataConfig, ds, r.Scheme); err != nil {
r.Log.Error(err, "failed to set controller reference on the monitor daemonset")
return ctrl.Result{}, err
}
r.Log.Info("controller reference set for the monitor daemonset")

foundDs := &appsv1.DaemonSet{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: ds.Name, Namespace: ds.Namespace}, foundDs)
if err != nil {
//The DaemonSet (DS) should be ideally created after the required SeLinux policy is installed on the
//node. One of the ways to ensure this is to check for the existence of "kata" runtimeclass before
//creating the DS
//Alternatively we can create the DS post execution of createRuntimeClass()
if k8serrors.IsNotFound(err) {
if contains(r.kataConfig.Status.RuntimeClass, "kata") {
r.Log.Info("Creating a new installation monitor daemonset", "ds.Namespace", ds.Namespace, "ds.Name", ds.Name)
err = r.Client.Create(context.TODO(), ds)
if err != nil {
r.Log.Error(err, "error when creating monitor daemonset")
res = ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}
}
}
} else {
r.Log.Error(err, "could not get monitor daemonset, try again")
res = ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}
}
} else {
r.Log.Info("Updating monitor daemonset", "ds.Namespace", ds.Namespace, "ds.Name", ds.Name)
err = r.Client.Update(context.TODO(), ds)
if err != nil {
r.Log.Error(err, "error when updating monitor daemonset")
res = ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}
}
}
cMap := r.processDashboardConfigMap()
if cMap == nil {
r.Log.Info("failed to generate config map for metrics dashboard")
Expand Down Expand Up @@ -597,7 +561,7 @@ func (r *KataConfigOpenShiftReconciler) listKataPods() error {
}
for _, pod := range podList.Items {
if pod.Spec.RuntimeClassName != nil {
if contains(r.kataConfig.Status.RuntimeClass, *pod.Spec.RuntimeClassName) {
if contains(r.kataConfig.Status.RuntimeClasses, *pod.Spec.RuntimeClassName) {
return fmt.Errorf("Existing pods using \"%v\" RuntimeClass found. Please delete the pods manually for KataConfig deletion to proceed", *pod.Spec.RuntimeClassName)
}
}
Expand Down Expand Up @@ -756,8 +720,8 @@ func (r *KataConfigOpenShiftReconciler) createRuntimeClass(runtimeClassName stri
}
}

if !contains(r.kataConfig.Status.RuntimeClass, runtimeClassName) {
r.kataConfig.Status.RuntimeClass = append(r.kataConfig.Status.RuntimeClass, runtimeClassName)
if !contains(r.kataConfig.Status.RuntimeClasses, runtimeClassName) {
r.kataConfig.Status.RuntimeClasses = append(r.kataConfig.Status.RuntimeClasses, runtimeClassName)
}

return nil
Expand Down Expand Up @@ -1161,6 +1125,37 @@ func (r *KataConfigOpenShiftReconciler) processKataConfigInstallRequest() (ctrl.
return reconcile.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
}

ds := r.processDaemonsetForMonitor()
// Set KataConfig instance as the owner and controller
if err = controllerutil.SetControllerReference(r.kataConfig, ds, r.Scheme); err != nil {
r.Log.Error(err, "failed to set controller reference on the monitor daemonset")
return ctrl.Result{}, err
}
r.Log.Info("controller reference set for the monitor daemonset")

foundDs := &appsv1.DaemonSet{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: ds.Name, Namespace: ds.Namespace}, foundDs)
if err != nil {
if k8serrors.IsNotFound(err) {
r.Log.Info("Creating a new installation monitor daemonset", "ds.Namespace", ds.Namespace, "ds.Name", ds.Name)
err = r.Client.Create(context.TODO(), ds)
if err != nil {
r.Log.Error(err, "error when creating monitor daemonset")
return ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
}
} else {
r.Log.Error(err, "could not get monitor daemonset, try again")
return ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
}
} else {
r.Log.Info("Updating monitor daemonset", "ds.Namespace", ds.Namespace, "ds.Name", ds.Name)
err = r.Client.Update(context.TODO(), ds)
if err != nil {
r.Log.Error(err, "error when updating monitor daemonset")
return ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
}
}

// create PeerPodConfig CRD and runtimeclass for peerpods
if r.kataConfig.Spec.EnablePeerPods {
err = r.enablePeerPodsMiscConfigs()
Expand Down Expand Up @@ -1672,10 +1667,6 @@ const (
// will be returned.
func (r *KataConfigOpenShiftReconciler) updateStatus() error {

if r.getInProgressConditionValue() != corev1.ConditionTrue {
return nil
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the commit message, why does just editing KataConfig externally trigger this problem? I'd say predating PR #329 should be the only scenario.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible to remove the node list with something like :

oc patch --type=merge --subresource=status --patch='{"status":{"kataNodes":null}}' kataconfig/my-kataconfig

Of course, people shouldn't do that but it doesn't mean we shouldn't be able to recover 😉

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean is, to my understanding predating PR #329 is the only actual condition. If that's fulfilled then any store will cause problems, right? A store can happen in a number of ways and the user editing the CR is in no way special among them.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope. Install 1.5.0 on a pristine cluster, deploy kata and do the oc patch above, you'll see in the controller logs that reconcile is called but the node list isn't rebuilt.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is un-rebuilding nodes a blocker?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is un-rebuilding nodes a blocker?

Not really as it doesn't prevent the operator to be functional.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I suspect there would be quite a lot of things that user could do to sabotage the controller which wouldn't recover. ;-) But that's a fact indepedent of the idea of this PR - in fact, this has always been true and continues to be true even after this PR I believe.

My idea was not to mix independent facts in the message and not put them in the same context as if they were related since that could confuse a future reader. I'm not insisting though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah this is merged already... never mind.

err, nodeList := r.getNodes()
if err != nil {
return err
Expand Down