Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebase devel on peer pods #311

Merged
merged 12 commits into from
May 16, 2023
192 changes: 113 additions & 79 deletions controllers/openshift_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,19 @@ func (r *KataConfigOpenShiftReconciler) Reconcile(ctx context.Context, req ctrl.
// indicated by the deletion timestamp being set.
if r.kataConfig.GetDeletionTimestamp() != nil {
res, err := r.processKataConfigDeleteRequest()
if err != nil {
return res, err
}

updateErr := r.Client.Status().Update(context.TODO(), r.kataConfig)
if updateErr != nil {
// The finalizer test is to get rid of the
// "Operation cannot be fulfilled [...] Precondition failed"
// error which happens when returning from a reconciliation that
// deleted our KataConfig by removing its finalizer. So if the
// finalizer is missing the actual KataConfig object is probably
// already gone from the cluster, hence the error.
if updateErr != nil && controllerutil.ContainsFinalizer(r.kataConfig, kataConfigFinalizer) {
r.Log.Info("Updating KataConfig failed", "err", updateErr)
return ctrl.Result{}, updateErr
}
return res, nil
return res, err
}

res, err := r.processKataConfigInstallRequest()
Expand Down Expand Up @@ -307,6 +312,31 @@ func (r *KataConfigOpenShiftReconciler) processLogLevel(desiredLogLevel string)
return nil
}

func (r *KataConfigOpenShiftReconciler) removeLogLevel() error {

r.Log.Info("removing logLevel ContainerRuntimeConfig")

ctrRuntimeCfg := &mcfgv1.ContainerRuntimeConfig{}
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: container_runtime_config_name}, ctrRuntimeCfg)
if err != nil {
if k8serrors.IsNotFound(err) {
r.Log.Info("no logLevel ContainerRuntimeConfig found, nothing to do")
return nil
} else {
r.Log.Info("could not get ContainerRuntimeConfig", "err", err)
return err
}
}

err = r.Client.Delete(context.TODO(), ctrRuntimeCfg)
if err != nil {
r.Log.Info("error deleting ContainerRuntimeConfig", "err", err)
return err
}
r.Log.Info("logLevel ContainerRuntimeConfig deleted successfully")
return nil
}

func (r *KataConfigOpenShiftReconciler) processDaemonsetForMonitor() *appsv1.DaemonSet {
var (
runPrivileged = false
Expand Down Expand Up @@ -528,6 +558,18 @@ func (r *KataConfigOpenShiftReconciler) addFinalizer() error {
return nil
}

func (r *KataConfigOpenShiftReconciler) removeFinalizer() error {
r.Log.Info("Removing finalizer from the KataConfig")
controllerutil.RemoveFinalizer(r.kataConfig, kataConfigFinalizer)

err := r.Client.Update(context.TODO(), r.kataConfig)
if err != nil {
r.Log.Error(err, "Unable to update KataConfig")
return err
}
return nil
}

func (r *KataConfigOpenShiftReconciler) listKataPods() error {
podList := &corev1.PodList{}
listOpts := []client.ListOption{
Expand Down Expand Up @@ -777,17 +819,21 @@ func (r *KataConfigOpenShiftReconciler) getNodeSelectorAsLabelSelector() *metav1
return &metav1.LabelSelector{MatchLabels: r.getNodeSelectorAsMap()}
}

func (r *KataConfigOpenShiftReconciler) processKataConfigDeleteRequest() (ctrl.Result, error) {
r.Log.Info("KataConfig deletion in progress: ")
machinePool, err := r.getMcpNameIfMcpExists()
func (r *KataConfigOpenShiftReconciler) isMcpUpdating(mcpName string) bool {
mcp := &mcfgv1.MachineConfigPool{}
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: mcpName}, mcp)
if err != nil {
return reconcile.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
r.Log.Info("Getting MachineConfigPool failed ", "machinePool", mcpName, "err", err)
return false
}
return mcfgv1.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating)
}

foundMcp := &mcfgv1.MachineConfigPool{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: machinePool}, foundMcp)
func (r *KataConfigOpenShiftReconciler) processKataConfigDeleteRequest() (ctrl.Result, error) {
r.Log.Info("KataConfig deletion in progress: ")
machinePool, err := r.getMcpName()
if err != nil {
return ctrl.Result{}, err
return reconcile.Result{Requeue: true, RequeueAfter: 15 * time.Second}, err
}

if contains(r.kataConfig.GetFinalizers(), kataConfigFinalizer) {
Expand Down Expand Up @@ -849,72 +895,69 @@ func (r *KataConfigOpenShiftReconciler) processKataConfigDeleteRequest() (ctrl.R
r.Log.Error(err, "Error found deleting machine config. If the machine config exists after installation it can be safely deleted manually.",
"mc", mc.Name)
}
// Sleep for MCP to reflect the changes
r.Log.Info("Pausing for a minute to make sure mcp has started syncing up")
time.Sleep(60 * time.Second)
r.Log.Info("Starting to wait for MCO to start reconciliation")
r.kataConfig.Status.WaitingForMcoToStart = true
r.kataConfig.Status.UnInstallationStatus.InProgress.IsInProgress = corev1.ConditionTrue
}

mcp := &mcfgv1.MachineConfigPool{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: machinePool}, mcp)
if err != nil {
r.Log.Error(err, "Unable to get MachineConfigPool ", "machinePool", machinePool)
return ctrl.Result{}, err
isConvergedCluster, _ := r.checkConvergedCluster()

// When nodes migrate from a source pool to a target pool the source
// pool is drained immediately and the nodes then slowly join the target
// pool. Thus the operation duration is dominated by the target pool
// part and the target pool is what we need to watch to find out when
// the operation is finished. When uninstalling kata on a regular
// cluster nodes leave "kata-oc" to rejoin "worker" so "worker" is our
// target pool. On a converged cluster, nodes leave "master" to rejoin
// it so "master" is both source and target in this case.
targetPool := "worker"
if isConvergedCluster {
targetPool = "master"
}
r.Log.Info("Monitoring mcp", "mcp name", mcp.Name, "ready machines", mcp.Status.ReadyMachineCount,
"total machines", mcp.Status.MachineCount)
r.kataConfig.Status.UnInstallationStatus.InProgress.IsInProgress = corev1.ConditionTrue
isMcoUpdating := r.isMcpUpdating(targetPool)

if !isMcoUpdating && r.kataConfig.Status.WaitingForMcoToStart {
r.Log.Info("Waiting for MCO to start updating.")
// We don't requeue, an MCP going Updated->Updating will
// trigger reconciliation by itself thanks to our watching MCPs.
return reconcile.Result{}, nil
} else {
r.Log.Info("No need to wait for MCO to start updating.", "isMcoUpdating", isMcoUpdating, "Status.WaitingForMcoToStart", r.kataConfig.Status.WaitingForMcoToStart)
r.kataConfig.Status.WaitingForMcoToStart = false
}

r.clearUninstallStatus()
_, result, err2, done := r.updateStatus(machinePool)
if !done {
return result, err2
}

if mcp.Status.ReadyMachineCount != mcp.Status.MachineCount {
return ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}, nil
if isMcoUpdating {
r.Log.Info("Waiting for MachineConfigPool to be fully updated", "machinePool", targetPool)
return reconcile.Result{}, nil
}

// Sleep for MCP to reflect the changes
r.Log.Info("Pausing for a minute to make sure mcp has started syncing up")
time.Sleep(60 * time.Second)
r.kataConfig.Status.UnInstallationStatus.InProgress.IsInProgress = corev1.ConditionFalse

//This is not applicable for converged cluster
isConvergedCluster, _ := r.checkConvergedCluster()
if !isConvergedCluster {
//Get "worker" MCP
wMcp := &mcfgv1.MachineConfigPool{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: "worker"}, wMcp)
if err != nil {
r.Log.Error(err, "Unable to get MachineConfigPool - worker")
return ctrl.Result{}, err
}

// At this time the kata-oc MCP is updated. However the worker MCP might still be in Updating state
// We'll need to wait for the worker MCP to complete Updating before deletion
r.Log.Info("Wait till worker MCP has updated")
if (wMcp.Status.ReadyMachineCount != wMcp.Status.MachineCount) &&
mcfgv1.IsMachineConfigPoolConditionTrue(wMcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) {
return ctrl.Result{Requeue: true, RequeueAfter: 15 * time.Second}, nil
}

err = r.Client.Delete(context.TODO(), mcp)
if err != nil {
r.Log.Error(err, "Unable to delete kata-oc MachineConfigPool")
r.Log.Info("Get()'ing MachineConfigPool to delete it", "machinePool", "kata-oc")
kataOcMcp := &mcfgv1.MachineConfigPool{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: "kata-oc"}, kataOcMcp)
if err == nil {
r.Log.Info("Deleting MachineConfigPool ", "machinePool", "kata-oc")
err = r.Client.Delete(context.TODO(), kataOcMcp)
if err != nil {
r.Log.Error(err, "Unable to delete kata-oc MachineConfigPool")
return ctrl.Result{}, err
}
} else if k8serrors.IsNotFound(err) {
r.Log.Info("MachineConfigPool not found", "machinePool", "kata-oc")
} else {
r.Log.Error(err, "Unable to get MachineConfigPool ", "machinePool", "kata-oc")
return ctrl.Result{}, err
}
}

r.kataConfig.Status.UnInstallationStatus.InProgress.IsInProgress = corev1.ConditionFalse
_, result, err2, done = r.updateStatus(machinePool)
r.clearInstallStatus()
if !done {
return result, err2
}
err = r.Client.Status().Update(context.TODO(), r.kataConfig)
if err != nil {
r.Log.Error(err, "Unable to update KataConfig status")
return ctrl.Result{}, err
}

ds := r.processDaemonsetForMonitor()
err = r.Client.Delete(context.TODO(), ds)
if err != nil {
Expand Down Expand Up @@ -944,13 +987,14 @@ func (r *KataConfigOpenShiftReconciler) processKataConfigDeleteRequest() (ctrl.R
}
}

r.Log.Info("Uninstallation completed. Proceeding with the KataConfig deletion")
controllerutil.RemoveFinalizer(r.kataConfig, kataConfigFinalizer)

err = r.Client.Update(context.TODO(), r.kataConfig)
err = r.removeLogLevel()
if err != nil {
r.Log.Error(err, "Unable to update KataConfig")
return ctrl.Result{}, err
return ctrl.Result{Requeue: true}, nil
}

r.Log.Info("Uninstallation completed. Proceeding with the KataConfig deletion")
if err = r.removeFinalizer(); err != nil {
return ctrl.Result{Requeue: true}, nil
}

return ctrl.Result{}, nil
Expand Down Expand Up @@ -1047,24 +1091,14 @@ func (r *KataConfigOpenShiftReconciler) processKataConfigInstallRequest() (ctrl.
}
}

isMcpUpdating := func(mcpName string) bool {
mcp := &mcfgv1.MachineConfigPool{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: mcpName}, mcp)
if err != nil {
r.Log.Info("Getting MachineConfigPool failed ", "machinePool", mcpName, "err", err)
return false
}
return mcfgv1.IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdating)
}

isKataMcpUpdating := isMcpUpdating(machinePool)
isKataMcpUpdating := r.isMcpUpdating(machinePool)
r.Log.Info("MCP updating state", "MCP name", machinePool, "is updating", isKataMcpUpdating)
if isKataMcpUpdating {
r.kataConfig.Status.InstallationStatus.IsInProgress = corev1.ConditionTrue
}
isMcoUpdating := isKataMcpUpdating
if !isConvergedCluster {
isWorkerUpdating := isMcpUpdating("worker")
isWorkerUpdating := r.isMcpUpdating("worker")
r.Log.Info("MCP updating state", "MCP name", "worker", "is updating", isWorkerUpdating)
if isWorkerUpdating {
r.kataConfig.Status.InstallationStatus.IsInProgress = corev1.ConditionTrue
Expand Down
12 changes: 2 additions & 10 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ import (
"os"
"path/filepath"
"testing"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/gexec"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -113,15 +111,7 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())

go func() {
defer GinkgoRecover()
err = k8sManager.Start(ctrl.SetupSignalHandler())
Expect(err).ToNot(HaveOccurred(), "failed to run manager")
gexec.KillAndWait(4 * time.Second)

// Teardown the test environment once controller is fnished.
// Otherwise from Kubernetes 1.21+, teardon timeouts waiting on
// kube-apiserver to return
err := testEnv.Stop()
Expect(err).ToNot(HaveOccurred())
}()

Expand All @@ -131,4 +121,6 @@ var _ = BeforeSuite(func() {

var _ = AfterSuite(func() {
By("tearing down the test environment")
err := testEnv.Stop()
Expect(err).ToNot(HaveOccurred())
})
5 changes: 2 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ module github.com/openshift/sandboxed-containers-operator
go 1.19

require (
github.com/confidential-containers/cloud-api-adaptor/peerpod-ctrl v0.0.0-20230504095339-77bef7014153
github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl v0.0.0-20230504095339-77bef7014153
github.com/confidential-containers/cloud-api-adaptor/peerpod-ctrl v0.0.0-20230512144533-a9941bba4692
github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl v0.0.0-20230512144533-a9941bba4692
github.com/coreos/ignition/v2 v2.9.0
github.com/ghodss/yaml v1.0.0
github.com/go-logr/logr v1.2.3
Expand Down Expand Up @@ -154,7 +154,6 @@ require (
)

replace (
//github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl => ../cloud-api-adaptor/peerpodconfig-ctrl
k8s.io/api => k8s.io/api v0.25.1
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.25.1
k8s.io/apimachinery => k8s.io/apimachinery v0.25.1
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,10 @@ github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u9
github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4=
github.com/confidential-containers/cloud-api-adaptor v0.5.1-0.20230504043629-580abeb128aa h1:dRChI1GYwl5maTTkBszTmcPOx541mORE1S4Cxm1mbp0=
github.com/confidential-containers/cloud-api-adaptor v0.5.1-0.20230504043629-580abeb128aa/go.mod h1:PgjQnT5SSuSMiVM2uVKj7Qq70lMc5nVhZOPYU0lyy+s=
github.com/confidential-containers/cloud-api-adaptor/peerpod-ctrl v0.0.0-20230504095339-77bef7014153 h1:Fi+koFVy+nTa69XzsLo+6ktjalQH15om/RxfzW0GLPQ=
github.com/confidential-containers/cloud-api-adaptor/peerpod-ctrl v0.0.0-20230504095339-77bef7014153/go.mod h1:MHjPSHrlD3SLbWqT6RteKSgZRc2fRRZahf5lfPYbHUM=
github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl v0.0.0-20230504095339-77bef7014153 h1:5w9u2wIXGi0DGWyjP43m8yIRL4OPcm5GS3/jWZ/6SGA=
github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl v0.0.0-20230504095339-77bef7014153/go.mod h1:LH9ur4GVe4uZM9MnQIGIeBr5CdCVMQ9AtUHGPs1WoD4=
github.com/confidential-containers/cloud-api-adaptor/peerpod-ctrl v0.0.0-20230512144533-a9941bba4692 h1:c+/JR7+sxetQ+wI6XwGnrZAZA/ZgWhXQtXTbO2w0siA=
github.com/confidential-containers/cloud-api-adaptor/peerpod-ctrl v0.0.0-20230512144533-a9941bba4692/go.mod h1:MHjPSHrlD3SLbWqT6RteKSgZRc2fRRZahf5lfPYbHUM=
github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl v0.0.0-20230512144533-a9941bba4692 h1:Ym7kkFxPKcGEY1MadMD+BPxZOX4Migp1Sbq7kLkua2U=
github.com/confidential-containers/cloud-api-adaptor/peerpodconfig-ctrl v0.0.0-20230512144533-a9941bba4692/go.mod h1:LH9ur4GVe4uZM9MnQIGIeBr5CdCVMQ9AtUHGPs1WoD4=
github.com/container-orchestrated-devices/container-device-interface v0.4.0/go.mod h1:E1zcucIkq9P3eyNmY+68dBQsTcsXJh9cgRo2IVNScKQ=
github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4/go.mod h1:nukgQABAEopAHvB6j7cnP5zJ+/3aVcE7hCYqvIwAHyE=
github.com/containerd/aufs v0.0.0-20201003224125-76a6863f2989/go.mod h1:AkGGQs9NM2vtYHaUen+NljV0/baGCAPELGm2q9ZXpWU=
Expand Down