Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-17585: Tighten the rules for modifying Tuned Profiles #775

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions assets/tuned/manifests/ds-tuned.yaml
Expand Up @@ -65,6 +65,10 @@ spec:
name: lib-modules
mountPropagation: HostToContainer
readOnly: true
- mountPath: /var/lib/kubelet
name: var-lib-kubelet
mountPropagation: HostToContainer
readOnly: true
- mountPath: /host
name: host
mountPropagation: HostToContainer
Expand Down Expand Up @@ -116,6 +120,10 @@ spec:
path: /lib/modules
type: Directory
name: lib-modules
- hostPath:
path: /var/lib/kubelet
type: Directory
name: var-lib-kubelet
- name: host
hostPath:
path: /
Expand Down
6 changes: 2 additions & 4 deletions manifests/20-profile.crd.yaml
Expand Up @@ -81,10 +81,6 @@ spec:
is for internal use only and its fields may be changed/removed in the
future.
properties:
bootcmdline:
description: kernel parameters calculated by tuned for the active
Tuned profile
type: string
conditions:
description: conditions represents the state of the per-node Profile
application
Expand Down Expand Up @@ -126,3 +122,5 @@ spec:
type: object
served: true
storage: true
subresources:
status: {}
5 changes: 4 additions & 1 deletion manifests/40-rbac.yaml
Expand Up @@ -154,7 +154,10 @@ rules:
verbs: ["get","list","watch"]
- apiGroups: ["tuned.openshift.io"]
resources: ["profiles"]
verbs: ["get","list","update","watch","patch"]
verbs: ["get","list","watch"]
- apiGroups: ["tuned.openshift.io"]
resources: ["profiles/status"]
verbs: ["update"]
- apiGroups: ["security.openshift.io"]
resources: ["securitycontextconstraints"]
verbs: ["use"]
Expand Down
23 changes: 7 additions & 16 deletions pkg/apis/tuned/v1/tuned_types.go
Expand Up @@ -19,18 +19,12 @@ const (
// that reflects the node tuning operator status.
TunedClusterOperatorResourceName = "node-tuning"

// Annotation on Profiles to denote the operand version responsible for calculating and reporting
// the Profile status.
GeneratedByOperandVersionAnnotationKey string = "tuned.openshift.io/generated-by-operand-version"

// Tuned 'TunedRenderedResourceName' CR's .metadata.generation. This annotation is used on resources
// to note the Tuned 'TunedRenderedResourceName' generation based on which the resources with this
// annotation were created/updated.
RendredTunedGenerationAnnotationKey string = "tuned.openshift.io/rendered-tuned-generation"

// The value of this annotation is the TuneD profile based on which the resource with this annotation was
// created/updated.
TunedProfileAnnotationKey string = "tuned.openshift.io/tuned-profile"
// Name of the NTO operand for versioning in ClusterOperator.
TunedOperandName = "openshift-tuned"

// TunedBootcmdlineAnnotationKey is a Node-specific annotation denoting kernel command-line parameters
// calculated by TuneD for the current profile applied to that Node.
TunedBootcmdlineAnnotationKey string = "tuned.openshift.io/bootcmdline"
)

/////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -146,6 +140,7 @@ type TunedList struct {
/////////////////////////////////////////////////////////////////////////////////
// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// +kubebuilder:subresource:status

// Profile is a specification for a Profile resource.
type Profile struct {
Expand Down Expand Up @@ -176,10 +171,6 @@ type ProfileConfig struct {
// ProfileStatus is the status for a Profile resource; the status is for internal use only
// and its fields may be changed/removed in the future.
type ProfileStatus struct {
// kernel parameters calculated by tuned for the active Tuned profile
// +optional
Bootcmdline string `json:"bootcmdline"`

// the current profile in use by the Tuned daemon
TunedProfile string `json:"tunedProfile"`

Expand Down
119 changes: 58 additions & 61 deletions pkg/operator/controller.go
Expand Up @@ -655,6 +655,10 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {

metrics.ProfileCalculated(profileMf.Name, tunedProfileName)

if err != nil {
return fmt.Errorf("failed to get Tuned %s: %v", tunedv1.TunedRenderedResourceName, err)
}

profile, err := c.listers.TunedProfiles.Get(profileMf.Name)
if err != nil {
if errors.IsNotFound(err) {
Expand All @@ -672,7 +676,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
profileMf.Spec.Config.Debug = operand.Debug
profileMf.Spec.Config.TuneDConfig = operand.TuneDConfig
profileMf.Status.Conditions = tunedpkg.InitializeStatusConditions()
c.pc.state.profileLastUpdatedGeneration[profileMf.Name] = 1
_, err = c.clients.Tuned.TunedV1().Profiles(ntoconfig.WatchNamespace()).Create(context.TODO(), profileMf, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to create Profile %s: %v", profileMf.Name, err)
Expand All @@ -697,27 +700,18 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
return fmt.Errorf("failed to get ProviderName: %v", err)
}

// profileLastUpdatedGeneration is set for each Profile when it is created or updated.
// In the case where it is not already set due to an operator restart, set it and
// clear profile.Status.Bootcmdline instead of syncing MachineConfigs.
_, lastUpdateGenSet := c.pc.state.profileLastUpdatedGeneration[profile.Name]

if ntoconfig.InHyperShift() {
// nodePoolName is the name of the NodePool which the Node corresponding to this Profile
// is a part of. If nodePoolName is the empty string, it either means that Node label
// based matching was used, or we don't know the NodePool, so we should not sync the
// MachineConfigs.
if nodePoolName != "" && lastUpdateGenSet {
if profile.Generation > c.pc.state.profileLastUpdatedGeneration[profile.Name]+1 {
klog.Infof("refusing to sync MachineConfig because Profile %s is more than 1 generation ahead of last update by operator", profile.Name)
} else {
if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) {
// Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName'
// has been successfully applied.
err := c.syncMachineConfigHyperShift(nodePoolName, profile)
if err != nil {
return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err)
}
if nodePoolName != "" {
if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) {
// Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName'
// has been successfully applied.
err := c.syncMachineConfigHyperShift(nodePoolName, profile)
if err != nil {
return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err)
}
}
}
Expand All @@ -729,21 +723,19 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) {
// Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName'
// has been successfully applied.
err := c.syncMachineConfig(getMachineConfigNameForPools(pools), mcLabels, profile)
err := c.syncMachineConfig(pools, mcLabels, profile)
if err != nil {
return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err)
}
}
}
}

// Only update Profile if the spec needs to be changed OR if !lastUpdateGenSet.
// Update, triggering the operand to update the Profile status.
// Only update Profile if the spec needs to be changed.
if profile.Spec.Config.TunedProfile == tunedProfileName &&
profile.Spec.Config.Debug == operand.Debug &&
reflect.DeepEqual(profile.Spec.Config.TuneDConfig, operand.TuneDConfig) &&
profile.Spec.Config.ProviderName == providerName &&
lastUpdateGenSet {
profile.Spec.Config.ProviderName == providerName {
klog.V(2).Infof("syncProfile(): no need to update Profile %s", nodeName)
return nil
}
Expand All @@ -754,9 +746,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error {
profile.Spec.Config.ProviderName = providerName
profile.Status.Conditions = tunedpkg.InitializeStatusConditions()

c.pc.state.profileLastUpdatedGeneration[profile.Name] = profile.Generation + 1 // after this update

klog.V(2).Infof("syncProfile(): updating Profile %s [%s]. Last operator updated generation: %d", profile.Name, tunedProfileName, c.pc.state.profileLastUpdatedGeneration[profile.Name])
klog.V(2).Infof("syncProfile(): updating Profile %s [%s]", profile.Name, tunedProfileName)
_, err = c.clients.Tuned.TunedV1().Profiles(ntoconfig.WatchNamespace()).Update(context.TODO(), profile, metav1.UpdateOptions{})
if err != nil {
return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err)
Expand All @@ -775,25 +765,30 @@ func (c *Controller) getProviderName(nodeName string) (string, error) {
return util.GetProviderName(node.Spec.ProviderID), nil
}

func (c *Controller) syncMachineConfig(name string, labels map[string]string, profile *tunedv1.Profile) error {
func (c *Controller) syncMachineConfig(pools []*mcfgv1.MachineConfigPool, labels map[string]string, profile *tunedv1.Profile) error {
var (
kernelArguments []string
)

if v := profile.ObjectMeta.Annotations[tunedv1.GeneratedByOperandVersionAnnotationKey]; v != os.Getenv("RELEASE_VERSION") {
// This looks like an update triggered by an old (not-yet-upgraded) operand. Ignore it.
klog.Infof("refusing to sync MachineConfig %q due to Profile %q change generated by operand version %q", name, profile.Name, v)
name := getMachineConfigNameForPools(pools)
nodes, err := c.pc.getNodesForPool(pools[0])
if err != nil {
return err
}

bootcmdline, bootcmdlineSet := c.pc.state.bootcmdline[profile.Name]
if !bootcmdlineSet {
klog.V(2).Infof("syncMachineConfig(): bootcmdline for %s not cached, sync cancelled", profile.Name)
return nil
}

bootcmdline := profile.Status.Bootcmdline
if ok := c.allNodesAgreeOnBootcmdline(nodes); !ok {
return fmt.Errorf("not all %d Nodes in MCP %v agree on bootcmdline: %s", len(nodes), pools[0].ObjectMeta.Name, bootcmdline)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this propagate to the Status section of the Tuned object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what you mean. Tuned CR/object doesn't really use status.

}

kernelArguments = util.SplitKernelArguments(bootcmdline)

annotations := map[string]string{
GeneratedByControllerVersionAnnotationKey: version.Version,
tunedv1.RendredTunedGenerationAnnotationKey: profile.ObjectMeta.Annotations[tunedv1.RendredTunedGenerationAnnotationKey],
tunedv1.TunedProfileAnnotationKey: profile.Status.TunedProfile,
}
annotations := map[string]string{GeneratedByControllerVersionAnnotationKey: version.Version}

mc, err := c.listers.MachineConfigs.Get(name)
if err != nil {
Expand Down Expand Up @@ -823,17 +818,6 @@ func (c *Controller) syncMachineConfig(name string, labels map[string]string, pr
// No update needed
klog.V(2).Infof("syncMachineConfig(): MachineConfig %s doesn't need updating", mc.ObjectMeta.Name)
return nil
} else if mcAnnotationsMatch(mc.ObjectMeta.Annotations, annotations) {
// Kernel arguments differ and they were generated based on the same TuneD profile and Tuned/rendered CR.
machineCount, err := c.pc.getMachineCountForMachineConfigPool(mc.Name[len(MachineConfigPrefix)+1:])
if err != nil {
return err
} else if machineCount > 1 {
klog.Warningf("refusing to update MachineConfig %s for %s due to kernel arguments change with unchanged input configuration (%s/%s). Node(s) with different (CPU) topology in the same MCP?",
mc.Name, profile.Name, annotations[tunedv1.RendredTunedGenerationAnnotationKey], annotations[tunedv1.TunedProfileAnnotationKey])
c.bootcmdlineConflict[profile.Name] = true
return nil
}
}
mc = mc.DeepCopy() // never update the objects from cache
mc.ObjectMeta.Annotations = mcNew.ObjectMeta.Annotations
Expand All @@ -852,6 +836,26 @@ func (c *Controller) syncMachineConfig(name string, labels map[string]string, pr
return nil
}

// allNodesAgreeOnBootcmdline returns true if the current cached annotation 'TunedBootcmdlineAnnotationKey'
// of all Nodes in slice 'nodes' has the same value.
func (c *Controller) allNodesAgreeOnBootcmdline(nodes []*corev1.Node) bool {
if len(nodes) == 0 {
return true
}

match := true
bootcmdline := c.pc.state.bootcmdline[nodes[0].ObjectMeta.Name]
for _, node := range nodes[1:] {
if bootcmdline != c.pc.state.bootcmdline[node.ObjectMeta.Name] {
klog.V(2).Infof("found a conflicting bootcmdline %q for Node %q", c.pc.state.bootcmdline[node.ObjectMeta.Name], node.ObjectMeta.Name)
c.bootcmdlineConflict[node.ObjectMeta.Name] = true
match = false
}
}

return match
}

func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *tunedv1.Profile) error {
var (
kernelArguments []string
Expand All @@ -860,13 +864,16 @@ func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *t
mcName := MachineConfigPrefix + "-" + nodePoolName
configMapName := mcConfigMapName(nodePoolName)

if v := profile.ObjectMeta.Annotations[tunedv1.GeneratedByOperandVersionAnnotationKey]; v != os.Getenv("RELEASE_VERSION") {
// This looks like an update triggered by an old (not-yet-upgraded) operand. Ignore it.
klog.Infof("refusing to sync MachineConfig ConfigMap %q due to Profile %q change generated by operand version %q", configMapName, profile.Name, v)
return nil
nodes, err := c.getNodesForNodePool(nodePoolName)
if err != nil {
return fmt.Errorf("could not fetch a list of Nodes for NodePool %s: %v", nodePoolName, err)
}

bootcmdline := c.pc.state.bootcmdline[profile.Name]
if ok := c.allNodesAgreeOnBootcmdline(nodes); !ok {
return fmt.Errorf("not all %d Nodes in NodePool %v agree on bootcmdline: %s", len(nodes), nodePoolName, bootcmdline)
}

bootcmdline := profile.Status.Bootcmdline
kernelArguments = util.SplitKernelArguments(bootcmdline)

annotations := map[string]string{GeneratedByControllerVersionAnnotationKey: version.Version}
Expand Down Expand Up @@ -1401,16 +1408,6 @@ func (c *Controller) NeedLeaderElection() bool {
return true
}

func mcAnnotationsMatch(old, new map[string]string) bool {
for _, key := range []string{tunedv1.RendredTunedGenerationAnnotationKey, tunedv1.TunedProfileAnnotationKey} {
if old[key] != new[key] {
return false
}
}

return true
}

func tunedMapFromList(tuneds []tunedv1.Tuned) map[string]tunedv1.Tuned {
ret := map[string]tunedv1.Tuned{}
for _, tuned := range tuneds {
Expand Down
18 changes: 18 additions & 0 deletions pkg/operator/hypershift.go
Expand Up @@ -16,6 +16,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
serializer "k8s.io/apimachinery/pkg/runtime/serializer/json"
yamlutil "k8s.io/apimachinery/pkg/util/yaml"
Expand Down Expand Up @@ -172,6 +173,23 @@ func (c *Controller) getObjFromTunedConfigMap() ([]tunedv1.Tuned, error) {
return cmTuneds, nil
}

// getNodesForNodePool uses 'hypershiftNodePoolLabel' to return all Nodes which are in
// the NodePool 'nodePoolName'.
func (c *Controller) getNodesForNodePool(nodePoolName string) ([]*corev1.Node, error) {
nodes := []*corev1.Node{}

selector := labels.SelectorFromValidatedSet(
map[string]string{
hypershiftNodePoolLabel: nodePoolName,
})
nodes, err := c.pc.listers.Nodes.List(selector)
if err != nil {
return nodes, err
}

return nodes, err
}

// parseManifests parses a YAML or JSON document that may contain one or more
// kubernetes resources.
func parseTunedManifests(data []byte, nodePoolName string) ([]tunedv1.Tuned, error) {
Expand Down
30 changes: 30 additions & 0 deletions pkg/operator/mc.go
Expand Up @@ -259,6 +259,36 @@ func (pc *ProfileCalculator) getPoolsForNode(node *corev1.Node) ([]*mcfgv1.Machi
return []*mcfgv1.MachineConfigPool{worker}, nil
}

// getNodesForPool returns a list of Nodes for MachineConfigPool 'pool'.
func (pc *ProfileCalculator) getNodesForPool(pool *mcfgv1.MachineConfigPool) ([]*corev1.Node, error) {
selector, err := metav1.LabelSelectorAsSelector(pool.Spec.NodeSelector)
if err != nil {
return nil, fmt.Errorf("invalid label selector %s in MachineConfigPool %s: %v", util.ObjectInfo(selector), pool.ObjectMeta.Name, err)
}

initialNodes, err := pc.listers.Nodes.List(selector)
jmencak marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}

nodes := []*corev1.Node{}
for _, n := range initialNodes {
p, err := pc.getPrimaryPoolForNode(n)
if err != nil {
klog.Warningf("cannot get pool for node %q: %v", n.Name, err)
continue
}
if p == nil {
continue
}
if p.Name != pool.Name {
continue
}
nodes = append(nodes, n)
}
return nodes, nil
}

// getPrimaryPoolForNode uses getPoolsForNode and returns the first one which is the one the node targets
func (pc *ProfileCalculator) getPrimaryPoolForNode(node *corev1.Node) (*mcfgv1.MachineConfigPool, error) {
pools, err := pc.getPoolsForNode(node)
Expand Down