Skip to content

Commit

Permalink
OCPBUGS-24792: Make MC names deterministic (openshift#875)
Browse files Browse the repository at this point in the history
* Make MC names deterministic

OpenShift Nodes can be a part of only one custom MachineConfigPool.
However, MCP configuration allows this not to be the case.  This is
caught by the machine-config-controller and reported as an error (<node>
belongs to <N> custom roles, cannot proceed with this Node).

In order to target an MCP with a configuration, NTO uses
machineConfigLabels.  However, one or more MCPs can select a particular
single MC.  This is due to the MCP's machineConfigSelector.  This is
another challenge scenario.

In the above two scenarios, it was possible for NTO to generate a
randomly-named MC based on the membership of one of the matching MCPs.
Then, a pruning function would mistakenly remove the other MCs,
seemingly unused.  This could result in a flip between the rendered MCs
and cause a Node reboot.

This PR makes the process of establishing the names of the MC for the
purposes of MachineConfigPool based matching deterministic.

Other changes/fixes:
 - Synced with MCO's latest getPoolsForNode() changes.
 - Logging in syncMachineConfigHyperShift().

Resolves: OCPBUGS-24792

* Enforce per-pool machineConfigLabels selectors

---------

Co-authored-by: Jiri Mencak <jmencak@users.noreply.github.com>
  • Loading branch information
2 people authored and rbaturov committed Dec 18, 2023
1 parent e2a089b commit 3606a06
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 35 deletions.
46 changes: 36 additions & 10 deletions pkg/operator/controller.go
Expand Up @@ -41,7 +41,6 @@ import (
"github.com/openshift/cluster-node-tuning-operator/pkg/util"
"github.com/openshift/cluster-node-tuning-operator/version"

mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
mcfgclientset "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned"
mcfginformers "github.com/openshift/machine-config-operator/pkg/generated/informers/externalversions"
)
Expand Down Expand Up @@ -622,7 +621,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
var (
tunedProfileName string
mcLabels map[string]string
pools []*mcfgv1.MachineConfigPool
operand tunedv1.OperandConfig
nodePoolName string
)
Expand Down Expand Up @@ -661,7 +659,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
return err
}
} else {
tunedProfileName, mcLabels, pools, operand, err = c.pc.calculateProfile(nodeName)
tunedProfileName, mcLabels, operand, err = c.pc.calculateProfile(nodeName)
if err != nil {
return err
}
Expand Down Expand Up @@ -732,12 +730,12 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
} else {
if mcLabels != nil {
// The TuneD daemon profile 'tunedProfileName' for nodeName matched with MachineConfig
// labels set for additional machine configuration. Sync the operator-created
// MachineConfig for MachineConfigPools 'pools'.
// labels 'mcLabels' set for additional machine configuration. Sync the operator-created
// MachineConfig based on 'mcLabels'.
if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) {
// Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName'
// has been successfully applied.
err := c.syncMachineConfig(pools, mcLabels, profile)
err := c.syncMachineConfig(mcLabels, profile)
if err != nil {
return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err)
}
Expand Down Expand Up @@ -779,25 +777,48 @@ func (c *Controller) getProviderName(nodeName string) (string, error) {
return util.GetProviderName(node.Spec.ProviderID), nil
}

func (c *Controller) syncMachineConfig(pools []*mcfgv1.MachineConfigPool, labels map[string]string, profile *tunedv1.Profile) error {
func (c *Controller) syncMachineConfig(labels map[string]string, profile *tunedv1.Profile) error {
var (
bootcmdline string
kernelArguments []string
)

<<<<<<< HEAD
name := getMachineConfigNameForPools(pools)
=======
pools, err := c.pc.getPoolsForMachineConfigLabelsSorted(labels)
if err != nil {
return err
}

// The following enforces per-pool machineConfigLabels selectors.
if len(pools) > 1 {
// Log an error and do not requeue, this is a configuration issue.
klog.Errorf("profile %v uses machineConfigLabels that match across multiple MCPs (%v); this is not supported",
profile.Name, printMachineConfigPoolsNames(pools))
return nil
}

name := GetMachineConfigNameForPools(pools)
klog.V(2).Infof("syncMachineConfig(): %v", name)

var bootcmdlineSet bool
>>>>>>> 778695d5 (OCPBUGS-24792: Make MC names deterministic (#875))
nodes, err := c.pc.getNodesForPool(pools[0])
if err != nil {
return err
}

bootcmdline, bootcmdlineSet := c.pc.state.bootcmdline[profile.Name]
bootcmdline, bootcmdlineSet = c.pc.state.bootcmdline[profile.Name]
if !bootcmdlineSet {
klog.V(2).Infof("syncMachineConfig(): bootcmdline for %s not cached, sync cancelled", profile.Name)
return nil
}

if ok := c.allNodesAgreeOnBootcmdline(nodes); !ok {
return fmt.Errorf("not all %d Nodes in MCP %v agree on bootcmdline: %s", len(nodes), pools[0].ObjectMeta.Name, bootcmdline)
// Log an error and do not requeue, this is a configuration issue.
klog.Errorf("not all %d Nodes in MCP %v agree on bootcmdline: %s", len(nodes), pools[0].ObjectMeta.Name, bootcmdline)
return nil
}

kernelArguments = util.SplitKernelArguments(bootcmdline)
Expand Down Expand Up @@ -943,7 +964,7 @@ func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *t
// If mcfgs are equivalent don't update
if kernelArgsEq && cmLabelsAndAnnotationsCorrect {
// No update needed
klog.V(2).Infof("syncMachineConfig(): MachineConfig %s doesn't need updating", mc.ObjectMeta.Name)
klog.V(2).Infof("syncMachineConfigHyperShift(): MachineConfig %s doesn't need updating", mc.ObjectMeta.Name)
return nil
}

Expand All @@ -953,8 +974,13 @@ func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *t
mc.Spec.KernelArguments = kernelArguments
mc.Spec.Config = mcNew.Spec.Config

<<<<<<< HEAD
l := machineConfigGenerationLogLine(false, !kernelArgsEq, bootcmdline)
klog.V(2).Infof("syncMachineConfig(): updating MachineConfig %s with%s", mc.ObjectMeta.Name, l)
=======
l := MachineConfigGenerationLogLine(!kernelArgsEq, bootcmdline)
klog.V(2).Infof("syncMachineConfigHyperShift(): updating MachineConfig %s with%s", mc.ObjectMeta.Name, l)
>>>>>>> 778695d5 (OCPBUGS-24792: Make MC names deterministic (#875))

newData, err := serializeMachineConfig(mc)
if err != nil {
Expand Down
81 changes: 66 additions & 15 deletions pkg/operator/mc.go
Expand Up @@ -3,7 +3,11 @@ package operator
import (
"encoding/json"
"fmt"
<<<<<<< HEAD
"reflect"
=======
"sort"
>>>>>>> 778695d5 (OCPBUGS-24792: Make MC names deterministic (#875))
"strings"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -71,6 +75,7 @@ func newMachineConfig(name string, annotations map[string]string, labels map[str
}
}

<<<<<<< HEAD
// IgnParseWrapper parses rawIgn for V3.2 ignition config and returns
// a V3.2 Config or an error.
func ignParseWrapper(rawIgn []byte) (interface{}, error) {
Expand Down Expand Up @@ -125,27 +130,56 @@ func ignEqual(mcOld, mcNew *mcfgv1.MachineConfig) (bool, error) {
}

func getMachineConfigNameForPools(pools []*mcfgv1.MachineConfigPool) string {
=======
func printMachineConfigPoolsNames(pools []*mcfgv1.MachineConfigPool) string {
>>>>>>> 778695d5 (OCPBUGS-24792: Make MC names deterministic (#875))
var (
sb strings.Builder
sbPrimary strings.Builder
poolNames []string
)

sb.WriteString(MachineConfigPrefix)
for _, pool := range pools {
if pool == nil {
continue
}
poolNames = append(poolNames, pool.ObjectMeta.Name)
}
sort.Strings(poolNames)

sb.WriteString("-")
if pool.Name == "master" || pool.Name == "worker" {
sbPrimary.WriteString(pool.ObjectMeta.Name)
} else {
// This is a custom pool; a node can be a member of only one custom pool => return its name.
sb.WriteString(pool.ObjectMeta.Name)
return sb.String()
for i, poolName := range poolNames {
if i > 0 {
sb.WriteString(",")
}
sb.WriteString(poolName)
}

return sb.String()
}

// GetMachineConfigNameForPools takes pools a slice of MachineConfigPools and returns
// a MachineConfig name to be used for MachineConfigPool based matching.
func GetMachineConfigNameForPools(pools []*mcfgv1.MachineConfigPool) string {
var (
sb strings.Builder
poolNames []string
)

for _, pool := range pools {
if pool == nil {
continue
}
poolNames = append(poolNames, pool.ObjectMeta.Name)
}
// See OCPBUGS-24792: the slice of MCP objects can be passed in random order.
sort.Strings(poolNames)

sb.WriteString(MachineConfigPrefix)
if len(poolNames) > 0 {
sb.WriteString("-")
// Use the first MCP's name out of all alphabetically sorted MCP names. This will either be a custom pool name
// or master/worker in that order.
sb.WriteString(poolNames[0])
}
sb.WriteString(sbPrimary.String())

return sb.String()
}
Expand Down Expand Up @@ -190,6 +224,21 @@ func (pc *ProfileCalculator) getPoolsForMachineConfigLabels(mcLabels map[string]
return pools, nil
}

// getPoolsForMachineConfigLabelsSorted is the same as getPoolsForMachineConfigLabels, but
// returns the MCPs alphabetically sorted by their names.
func (pc *ProfileCalculator) getPoolsForMachineConfigLabelsSorted(mcLabels map[string]string) ([]*mcfgv1.MachineConfigPool, error) {
pools, err := pc.getPoolsForMachineConfigLabels(mcLabels)
if err != nil {
return nil, err
}

sort.Slice(pools, func(i, j int) bool {
return pools[i].Name < pools[j].Name
})

return pools, nil
}

// getPoolsForNode chooses the MachineConfigPools that should be used for a given node.
// It disambiguates in the case where e.g. a node has both master/worker roles applied,
// and where a custom role may be used. It returns a slice of all the pools the node belongs to.
Expand Down Expand Up @@ -237,16 +286,18 @@ func (pc *ProfileCalculator) getPoolsForNode(node *corev1.Node) ([]*mcfgv1.Machi
klog.Errorf("node %s belongs to %d custom roles, cannot proceed with this Node", node.Name, len(custom))
return nil, nil
} else if len(custom) == 1 {
// We don't support making custom pools for masters
pls := []*mcfgv1.MachineConfigPool{}
if master != nil {
klog.Errorf("node %s has both master role and custom role %s", node.Name, custom[0].Name)
return nil, nil
// If we have a custom pool and master, defer to master and return.
pls = append(pls, master)
} else {
pls = append(pls, custom[0])
}
// One custom role, let's use its pool
pls := []*mcfgv1.MachineConfigPool{custom[0]}
if worker != nil {
pls = append(pls, worker)
}
// This allows us to have master, worker, infra but be in the master pool;
// or if !worker and !master then we just use the custom pool.
return pls, nil
} else if master != nil {
// In the case where a node is both master/worker, have it live under
Expand Down
18 changes: 8 additions & 10 deletions pkg/operator/profilecalculator.go
Expand Up @@ -155,17 +155,16 @@ func (pc *ProfileCalculator) nodeChangeHandler(nodeName string) (bool, error) {
// Returns
// * the tuned daemon profile name
// * MachineConfig labels if the profile was selected by machineConfigLabels
// * MachineConfigPools for 'nodeName' if the profile was selected by machineConfigLabels
// * whether to run the Tuned daemon in debug mode on node nodeName
// * an error if any
func (pc *ProfileCalculator) calculateProfile(nodeName string) (string, map[string]string, []*mcfgv1.MachineConfigPool, tunedv1.OperandConfig, error) {
func (pc *ProfileCalculator) calculateProfile(nodeName string) (string, map[string]string, tunedv1.OperandConfig, error) {
var operand tunedv1.OperandConfig

klog.V(3).Infof("calculateProfile(%s)", nodeName)
tunedList, err := pc.listers.TunedResources.List(labels.Everything())

if err != nil {
return "", nil, nil, operand, fmt.Errorf("failed to list Tuned: %v", err)
return "", nil, operand, fmt.Errorf("failed to list Tuned: %v", err)
}

for _, recommend := range tunedRecommend(tunedList) {
Expand All @@ -180,7 +179,7 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (string, map[stri
// we do not want to call profileMatches() in that case unless machineConfigLabels
// is undefined.
if (recommend.Match != nil || recommend.MachineConfigLabels == nil) && pc.profileMatches(recommend.Match, nodeName) {
return *recommend.Profile, nil, nil, recommend.Operand, nil
return *recommend.Profile, nil, recommend.Operand, nil
}

if recommend.MachineConfigLabels == nil {
Expand All @@ -194,30 +193,29 @@ func (pc *ProfileCalculator) calculateProfile(nodeName string) (string, map[stri
// is often unneeded and would likely have a performance impact.
node, err = pc.listers.Nodes.Get(nodeName)
if err != nil {
return "", nil, nil, operand, err
return "", nil, operand, err
}

pools, err = pc.getPoolsForNode(node)

if err != nil {
return "", nil, nil, operand, err
return "", nil, operand, err
}
}

// MachineConfigLabels based matching
if pc.machineConfigLabelsMatch(recommend.MachineConfigLabels, pools) {
return *recommend.Profile, recommend.MachineConfigLabels, pools, recommend.Operand, nil
return *recommend.Profile, recommend.MachineConfigLabels, recommend.Operand, nil
}
}

// This should never happen; the default Tuned CR should always be accessible and with a catch-all rule
// in the "recommend" section to select the default profile for the tuned daemon.
_, err = pc.listers.TunedResources.Get(tunedv1.TunedDefaultResourceName)
if err != nil {
return defaultProfile, nil, nil, operand, fmt.Errorf("failed to get Tuned %s: %v", tunedv1.TunedDefaultResourceName, err)
return defaultProfile, nil, operand, fmt.Errorf("failed to get Tuned %s: %v", tunedv1.TunedDefaultResourceName, err)
}

return defaultProfile, nil, nil, operand, fmt.Errorf("the default Tuned CR misses a catch-all profile selection")
return defaultProfile, nil, operand, fmt.Errorf("the default Tuned CR misses a catch-all profile selection")
}

// calculateProfileHyperShift calculates a tuned profile for Node nodeName.
Expand Down

0 comments on commit 3606a06

Please sign in to comment.