Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New way of showing kata nodes in kataconfig status #329

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 21 additions & 0 deletions api/v1/kataconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ type KataConfigStatus struct {
// +optional
Upgradestatus KataUpgradeStatus `json:"upgradeStatus,omitempty"`

// +optional
KataNodes KataNodesStatus `json:"kataNodes,omitempty"`

// Used internally to persist state between reconciliations
// +optional
// +kubebuilder:default:=false
Expand Down Expand Up @@ -190,3 +193,21 @@ type FailedNodeStatus struct {
// Error message of the failed node reported by the installation daemon
Error string `json:"error"`
}

type KataNodesStatus struct {
// +optional
Installed []string `json:"installed,omitempty"`
// +optional
Installing []string `json:"installing,omitempty"`
// +optional
WaitingToInstall []string `json:"waitingToInstall,omitempty"`
// +optional
FailedToInstall []string `json:"failedToInstall,omitempty"`

// +optional
Uninstalling []string `json:"uninstalling,omitempty"`
// +optional
WaitingToUninstall []string `json:"waitingToUninstall,omitempty"`
// +optional
FailedToUninstall []string `json:"failedToUninstall,omitempty"`
}
185 changes: 175 additions & 10 deletions controllers/openshift_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,20 @@ func (r *KataConfigOpenShiftReconciler) newMCPforCR() *mcfgv1.MachineConfigPool
return mcp
}

func getExtensionName() string {
// RHCOS uses "sandboxed-containers" as thats resolved/translated in the machine-config-operator to "kata-containers"
// FCOS however does not get any translation in the machine-config-operator so we need to
// send in "kata-containers".
// Both are later send to rpm-ostree for installation.
//
// As RHCOS is rather special variant, use "kata-containers" by default, which also applies to FCOS
extension := os.Getenv("SANDBOXED_CONTAINERS_EXTENSION")
if len(extension) == 0 {
extension = "kata-containers"
}
return extension
}

func (r *KataConfigOpenShiftReconciler) newMCForCR(machinePool string) (*mcfgv1.MachineConfig, error) {
r.Log.Info("Creating MachineConfig for Custom Resource")

Expand All @@ -517,16 +531,7 @@ func (r *KataConfigOpenShiftReconciler) newMCForCR(machinePool string) (*mcfgv1.
return nil, err
}

// RHCOS uses "sandboxed-containers" as thats resolved/translated in the machine-config-operator to "kata-containers"
// FCOS however does not get any translation in the machine-config-operator so we need to
// send in "kata-containers".
// Both are later send to rpm-ostree for installation.
//
// As RHCOS is rather special variant, use "kata-containers" by default, which also applies to FCOS
extension := os.Getenv("SANDBOXED_CONTAINERS_EXTENSION")
if len(extension) == 0 {
extension = "kata-containers"
}
extension := getExtensionName()

mc := mcfgv1.MachineConfig{
TypeMeta: metav1.TypeMeta{
Expand Down Expand Up @@ -1800,6 +1805,7 @@ func (r *KataConfigOpenShiftReconciler) updateStatus() error {

r.clearInstallStatus()
r.clearUninstallStatus()
r.clearNodeStatusLists()

r.kataConfig.Status.TotalNodesCount = func() int {
err, nodes := r.getNodesWithLabels(r.getNodeSelectorAsMap())
Expand All @@ -1812,6 +1818,9 @@ func (r *KataConfigOpenShiftReconciler) updateStatus() error {

for _, node := range nodeList.Items {
if annotation, ok := node.Annotations["machineconfiguration.openshift.io/state"]; ok {

r.putNodeOnStatusList(&node)

switch annotation {
case NodeDone:
e := r.processDoneNode(&node)
Expand All @@ -1833,6 +1842,162 @@ func (r *KataConfigOpenShiftReconciler) updateStatus() error {
return err
}

// A set of mutually exclusive predicate functions that figure out kata
// installation status on a given Node from four pieces of data:
// - Node's MCO state
// - MachineConfig the Node is currently at
// - MachineConfig the Node is supposed to be at
// - and whether kata is enabled for the Node
func isNodeInstalled(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeDone && nodeCurrMc == nodeTargetMc && isKataEnabledOnNode
}

func isNodeNotInstalled(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeDone && nodeCurrMc == nodeTargetMc && !isKataEnabledOnNode
}

func isNodeInstalling(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeWorking && isKataEnabledOnNode
}

func isNodeUninstalling(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeWorking && !isKataEnabledOnNode
}

func isNodeWaitingToInstall(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeDone && nodeCurrMc != nodeTargetMc && isKataEnabledOnNode
}

func isNodeWaitingToUninstall(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeDone && nodeCurrMc != nodeTargetMc && !isKataEnabledOnNode
}

func isNodeFailedToInstall(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeDegraded && isKataEnabledOnNode
}

func isNodeFailedToUninstall(nodeMcoState string, nodeCurrMc string, nodeTargetMc string, isKataEnabledOnNode bool) bool {
return nodeMcoState == NodeDegraded && !isKataEnabledOnNode
}

func (r *KataConfigOpenShiftReconciler) putNodeOnStatusList(node *corev1.Node) error {

isConvergedCluster, err := r.checkConvergedCluster()
if err != nil {
return err
}

targetMcpName := func() string {
if isConvergedCluster {
return "master"
}
_, nodeLabeledForKata := node.Labels["node-role.kubernetes.io/kata-oc"]
if nodeLabeledForKata {
return "kata-oc"
} else {
return "worker"
}
}()

targetMcp, err := r.getMcpByName(targetMcpName)
if err != nil {
return err
}

nodeMcoState, ok := node.Annotations["machineconfiguration.openshift.io/state"]
if !ok {
return fmt.Errorf("Missing machineconfiguration.openshift.io/state on node %v", node.GetName())
}

nodeCurrMc, ok := node.Annotations["machineconfiguration.openshift.io/currentConfig"]
if !ok {
return fmt.Errorf("Missing machineconfiguration.openshift.io/currentConfig on node %v", node.GetName())
}

// Note that to figure out the MachineConfig our Node should be at we
// unfortunately cannot use
// machineconfiguration.openshift.io/desiredConfig as would seem
// logical and easy. The reason is that the MCO only sets
// `desiredConfig` to the actual desired config right before it starts
// updating the Node. So to get the correct target MachineConfig we
// need to look at the MachineConfigPool that our Node belongs to or
// will belong to shortly.
nodeTargetMc := targetMcp.Spec.Configuration.Name

// `isKataEnabledOnNode` is a per Node condition on regular clusters
// but cluster-wide on converged ones.
// On regular clusters, this is ultimately determined by
// KataConfig.spec.kataConfigPoolSelector (we use the
// node-role.kubernetes.io/kata-oc to find this above in this function,
// and the node-role is in turn assigned to Nodes based on the pool
// selector).
// On converged clusters, basically only two operations are possible:
// installing kata on all masters and uninstalling kata from all
// masters, no per-Node options can be supported. We find if kata is
// supposed to be installed on the cluster by examining the "master"
// MCP's MachineConfig to see if it installs the kata containers
// extension.
var isKataEnabledOnNode bool
if isConvergedCluster {
targetMc := &mcfgv1.MachineConfig{}
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: targetMcp.Spec.Configuration.Name}, targetMc)
if err != nil {
r.Log.Info("Failed to retrieve MachineConfig", "MC name", targetMcp.Spec.Configuration.Name, targetMc, "MCP name", targetMcpName)
return err
}

isKataEnabledOnNode = func() bool {
extensionName := getExtensionName()
for _, extName := range targetMc.Spec.Extensions {
if extName == extensionName {
return true
}
}
return false
}()
} else {
isKataEnabledOnNode = targetMcpName == "kata-oc"
}

if isNodeInstalled(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is Installed", "node", node.GetName())
r.kataConfig.Status.KataNodes.Installed = append(r.kataConfig.Status.KataNodes.Installed, node.GetName())
} else if isNodeNotInstalled(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is NotInstalled", "node", node.GetName())
} else if isNodeInstalling(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is Installing", "node", node.GetName())
r.kataConfig.Status.KataNodes.Installing = append(r.kataConfig.Status.KataNodes.Installing, node.GetName())
} else if isNodeUninstalling(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is Uninstalling", "node", node.GetName())
r.kataConfig.Status.KataNodes.Uninstalling = append(r.kataConfig.Status.KataNodes.Uninstalling, node.GetName())
} else if isNodeWaitingToInstall(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is WaitingToInstall", "node", node.GetName())
r.kataConfig.Status.KataNodes.WaitingToInstall = append(r.kataConfig.Status.KataNodes.WaitingToInstall, node.GetName())
} else if isNodeWaitingToUninstall(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is WaitingToUninstall", "node", node.GetName())
r.kataConfig.Status.KataNodes.WaitingToUninstall = append(r.kataConfig.Status.KataNodes.WaitingToUninstall, node.GetName())
} else if isNodeFailedToInstall(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is FailedToInstall", "node", node.GetName())
r.kataConfig.Status.KataNodes.FailedToInstall = append(r.kataConfig.Status.KataNodes.FailedToInstall, node.GetName())
} else if isNodeFailedToUninstall(nodeMcoState, nodeCurrMc, nodeTargetMc, isKataEnabledOnNode) {
r.Log.Info("node is FailedToUninstall", "node", node.GetName())
r.kataConfig.Status.KataNodes.FailedToUninstall = append(r.kataConfig.Status.KataNodes.FailedToUninstall, node.GetName())
}

return nil
}

func (r *KataConfigOpenShiftReconciler) clearNodeStatusLists() {
r.kataConfig.Status.KataNodes.Installed = nil
r.kataConfig.Status.KataNodes.Installing = nil
r.kataConfig.Status.KataNodes.WaitingToInstall = nil
r.kataConfig.Status.KataNodes.FailedToInstall = nil

r.kataConfig.Status.KataNodes.Uninstalling = nil
r.kataConfig.Status.KataNodes.WaitingToUninstall = nil
r.kataConfig.Status.KataNodes.FailedToUninstall = nil
}

func (r *KataConfigOpenShiftReconciler) clearInstallStatus() {
r.kataConfig.Status.InstallationStatus.Completed.CompletedNodesList = nil
r.kataConfig.Status.InstallationStatus.Completed.CompletedNodesCount = 0
Expand Down