diff --git a/assets/tuned/manifests/ds-tuned.yaml b/assets/tuned/manifests/ds-tuned.yaml index 971593023..16e617b70 100644 --- a/assets/tuned/manifests/ds-tuned.yaml +++ b/assets/tuned/manifests/ds-tuned.yaml @@ -65,6 +65,10 @@ spec: name: lib-modules mountPropagation: HostToContainer readOnly: true + - mountPath: /var/lib/kubelet + name: var-lib-kubelet + mountPropagation: HostToContainer + readOnly: true - mountPath: /host name: host mountPropagation: HostToContainer @@ -116,6 +120,10 @@ spec: path: /lib/modules type: Directory name: lib-modules + - hostPath: + path: /var/lib/kubelet + type: Directory + name: var-lib-kubelet - name: host hostPath: path: / diff --git a/manifests/20-profile.crd.yaml b/manifests/20-profile.crd.yaml index 9fbbbe7fd..159f6cf5d 100644 --- a/manifests/20-profile.crd.yaml +++ b/manifests/20-profile.crd.yaml @@ -81,10 +81,6 @@ spec: is for internal use only and its fields may be changed/removed in the future. properties: - bootcmdline: - description: kernel parameters calculated by tuned for the active - Tuned profile - type: string conditions: description: conditions represents the state of the per-node Profile application @@ -126,3 +122,5 @@ spec: type: object served: true storage: true + subresources: + status: {} diff --git a/manifests/40-rbac.yaml b/manifests/40-rbac.yaml index 8bf53e4e6..1deb2ef5f 100644 --- a/manifests/40-rbac.yaml +++ b/manifests/40-rbac.yaml @@ -154,7 +154,10 @@ rules: verbs: ["get","list","watch"] - apiGroups: ["tuned.openshift.io"] resources: ["profiles"] - verbs: ["get","list","update","watch","patch"] + verbs: ["get","list","watch"] +- apiGroups: ["tuned.openshift.io"] + resources: ["profiles/status"] + verbs: ["update"] - apiGroups: ["security.openshift.io"] resources: ["securitycontextconstraints"] verbs: ["use"] diff --git a/pkg/apis/tuned/v1/tuned_types.go b/pkg/apis/tuned/v1/tuned_types.go index 6ddea656f..23d9eb476 100644 --- a/pkg/apis/tuned/v1/tuned_types.go +++ b/pkg/apis/tuned/v1/tuned_types.go @@ -19,18 +19,12 @@ const ( // that reflects the node tuning operator status. TunedClusterOperatorResourceName = "node-tuning" - // Annotation on Profiles to denote the operand version responsible for calculating and reporting - // the Profile status. - GeneratedByOperandVersionAnnotationKey string = "tuned.openshift.io/generated-by-operand-version" - - // Tuned 'TunedRenderedResourceName' CR's .metadata.generation. This annotation is used on resources - // to note the Tuned 'TunedRenderedResourceName' generation based on which the resources with this - // annotation were created/updated. - RendredTunedGenerationAnnotationKey string = "tuned.openshift.io/rendered-tuned-generation" - - // The value of this annotation is the TuneD profile based on which the resource with this annotation was - // created/updated. - TunedProfileAnnotationKey string = "tuned.openshift.io/tuned-profile" + // Name of the NTO operand for versioning in ClusterOperator. + TunedOperandName = "openshift-tuned" + + // TunedBootcmdlineAnnotationKey is a Node-specific annotation denoting kernel command-line parameters + // calculated by TuneD for the current profile applied to that Node. + TunedBootcmdlineAnnotationKey string = "tuned.openshift.io/bootcmdline" ) ///////////////////////////////////////////////////////////////////////////////// @@ -146,6 +140,7 @@ type TunedList struct { ///////////////////////////////////////////////////////////////////////////////// // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:subresource:status // Profile is a specification for a Profile resource. type Profile struct { @@ -176,10 +171,6 @@ type ProfileConfig struct { // ProfileStatus is the status for a Profile resource; the status is for internal use only // and its fields may be changed/removed in the future. type ProfileStatus struct { - // kernel parameters calculated by tuned for the active Tuned profile - // +optional - Bootcmdline string `json:"bootcmdline"` - // the current profile in use by the Tuned daemon TunedProfile string `json:"tunedProfile"` diff --git a/pkg/operator/controller.go b/pkg/operator/controller.go index 1ed5f64b1..5ea5c1171 100644 --- a/pkg/operator/controller.go +++ b/pkg/operator/controller.go @@ -655,6 +655,10 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { metrics.ProfileCalculated(profileMf.Name, tunedProfileName) + if err != nil { + return fmt.Errorf("failed to get Tuned %s: %v", tunedv1.TunedRenderedResourceName, err) + } + profile, err := c.listers.TunedProfiles.Get(profileMf.Name) if err != nil { if errors.IsNotFound(err) { @@ -672,7 +676,6 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { profileMf.Spec.Config.Debug = operand.Debug profileMf.Spec.Config.TuneDConfig = operand.TuneDConfig profileMf.Status.Conditions = tunedpkg.InitializeStatusConditions() - c.pc.state.profileLastUpdatedGeneration[profileMf.Name] = 1 _, err = c.clients.Tuned.TunedV1().Profiles(ntoconfig.WatchNamespace()).Create(context.TODO(), profileMf, metav1.CreateOptions{}) if err != nil { return fmt.Errorf("failed to create Profile %s: %v", profileMf.Name, err) @@ -697,27 +700,18 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { return fmt.Errorf("failed to get ProviderName: %v", err) } - // profileLastUpdatedGeneration is set for each Profile when it is created or updated. - // In the case where it is not already set due to an operator restart, set it and - // clear profile.Status.Bootcmdline instead of syncing MachineConfigs. - _, lastUpdateGenSet := c.pc.state.profileLastUpdatedGeneration[profile.Name] - if ntoconfig.InHyperShift() { // nodePoolName is the name of the NodePool which the Node corresponding to this Profile // is a part of. If nodePoolName is the empty string, it either means that Node label // based matching was used, or we don't know the NodePool, so we should not sync the // MachineConfigs. - if nodePoolName != "" && lastUpdateGenSet { - if profile.Generation > c.pc.state.profileLastUpdatedGeneration[profile.Name]+1 { - klog.Infof("refusing to sync MachineConfig because Profile %s is more than 1 generation ahead of last update by operator", profile.Name) - } else { - if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) { - // Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName' - // has been successfully applied. - err := c.syncMachineConfigHyperShift(nodePoolName, profile) - if err != nil { - return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err) - } + if nodePoolName != "" { + if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) { + // Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName' + // has been successfully applied. + err := c.syncMachineConfigHyperShift(nodePoolName, profile) + if err != nil { + return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err) } } } @@ -729,7 +723,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { if profile.Status.TunedProfile == tunedProfileName && profileApplied(profile) { // Synchronize MachineConfig only once the (calculated) TuneD profile 'tunedProfileName' // has been successfully applied. - err := c.syncMachineConfig(getMachineConfigNameForPools(pools), mcLabels, profile) + err := c.syncMachineConfig(pools, mcLabels, profile) if err != nil { return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err) } @@ -737,13 +731,11 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { } } - // Only update Profile if the spec needs to be changed OR if !lastUpdateGenSet. - // Update, triggering the operand to update the Profile status. + // Only update Profile if the spec needs to be changed. if profile.Spec.Config.TunedProfile == tunedProfileName && profile.Spec.Config.Debug == operand.Debug && reflect.DeepEqual(profile.Spec.Config.TuneDConfig, operand.TuneDConfig) && - profile.Spec.Config.ProviderName == providerName && - lastUpdateGenSet { + profile.Spec.Config.ProviderName == providerName { klog.V(2).Infof("syncProfile(): no need to update Profile %s", nodeName) return nil } @@ -754,9 +746,7 @@ func (c *Controller) syncProfile(tuned *tunedv1.Tuned, nodeName string) error { profile.Spec.Config.ProviderName = providerName profile.Status.Conditions = tunedpkg.InitializeStatusConditions() - c.pc.state.profileLastUpdatedGeneration[profile.Name] = profile.Generation + 1 // after this update - - klog.V(2).Infof("syncProfile(): updating Profile %s [%s]. Last operator updated generation: %d", profile.Name, tunedProfileName, c.pc.state.profileLastUpdatedGeneration[profile.Name]) + klog.V(2).Infof("syncProfile(): updating Profile %s [%s]", profile.Name, tunedProfileName) _, err = c.clients.Tuned.TunedV1().Profiles(ntoconfig.WatchNamespace()).Update(context.TODO(), profile, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("failed to update Profile %s: %v", profile.Name, err) @@ -775,25 +765,30 @@ func (c *Controller) getProviderName(nodeName string) (string, error) { return util.GetProviderName(node.Spec.ProviderID), nil } -func (c *Controller) syncMachineConfig(name string, labels map[string]string, profile *tunedv1.Profile) error { +func (c *Controller) syncMachineConfig(pools []*mcfgv1.MachineConfigPool, labels map[string]string, profile *tunedv1.Profile) error { var ( kernelArguments []string ) - if v := profile.ObjectMeta.Annotations[tunedv1.GeneratedByOperandVersionAnnotationKey]; v != os.Getenv("RELEASE_VERSION") { - // This looks like an update triggered by an old (not-yet-upgraded) operand. Ignore it. - klog.Infof("refusing to sync MachineConfig %q due to Profile %q change generated by operand version %q", name, profile.Name, v) + name := getMachineConfigNameForPools(pools) + nodes, err := c.pc.getNodesForPool(pools[0]) + if err != nil { + return err + } + + bootcmdline, bootcmdlineSet := c.pc.state.bootcmdline[profile.Name] + if !bootcmdlineSet { + klog.V(2).Infof("syncMachineConfig(): bootcmdline for %s not cached, sync cancelled", profile.Name) return nil } - bootcmdline := profile.Status.Bootcmdline + if ok := c.allNodesAgreeOnBootcmdline(nodes); !ok { + return fmt.Errorf("not all %d Nodes in MCP %v agree on bootcmdline: %s", len(nodes), pools[0].ObjectMeta.Name, bootcmdline) + } + kernelArguments = util.SplitKernelArguments(bootcmdline) - annotations := map[string]string{ - GeneratedByControllerVersionAnnotationKey: version.Version, - tunedv1.RendredTunedGenerationAnnotationKey: profile.ObjectMeta.Annotations[tunedv1.RendredTunedGenerationAnnotationKey], - tunedv1.TunedProfileAnnotationKey: profile.Status.TunedProfile, - } + annotations := map[string]string{GeneratedByControllerVersionAnnotationKey: version.Version} mc, err := c.listers.MachineConfigs.Get(name) if err != nil { @@ -823,17 +818,6 @@ func (c *Controller) syncMachineConfig(name string, labels map[string]string, pr // No update needed klog.V(2).Infof("syncMachineConfig(): MachineConfig %s doesn't need updating", mc.ObjectMeta.Name) return nil - } else if mcAnnotationsMatch(mc.ObjectMeta.Annotations, annotations) { - // Kernel arguments differ and they were generated based on the same TuneD profile and Tuned/rendered CR. - machineCount, err := c.pc.getMachineCountForMachineConfigPool(mc.Name[len(MachineConfigPrefix)+1:]) - if err != nil { - return err - } else if machineCount > 1 { - klog.Warningf("refusing to update MachineConfig %s for %s due to kernel arguments change with unchanged input configuration (%s/%s). Node(s) with different (CPU) topology in the same MCP?", - mc.Name, profile.Name, annotations[tunedv1.RendredTunedGenerationAnnotationKey], annotations[tunedv1.TunedProfileAnnotationKey]) - c.bootcmdlineConflict[profile.Name] = true - return nil - } } mc = mc.DeepCopy() // never update the objects from cache mc.ObjectMeta.Annotations = mcNew.ObjectMeta.Annotations @@ -852,6 +836,26 @@ func (c *Controller) syncMachineConfig(name string, labels map[string]string, pr return nil } +// allNodesAgreeOnBootcmdline returns true if the current cached annotation 'TunedBootcmdlineAnnotationKey' +// of all Nodes in slice 'nodes' has the same value. +func (c *Controller) allNodesAgreeOnBootcmdline(nodes []*corev1.Node) bool { + if len(nodes) == 0 { + return true + } + + match := true + bootcmdline := c.pc.state.bootcmdline[nodes[0].ObjectMeta.Name] + for _, node := range nodes[1:] { + if bootcmdline != c.pc.state.bootcmdline[node.ObjectMeta.Name] { + klog.V(2).Infof("found a conflicting bootcmdline %q for Node %q", c.pc.state.bootcmdline[node.ObjectMeta.Name], node.ObjectMeta.Name) + c.bootcmdlineConflict[node.ObjectMeta.Name] = true + match = false + } + } + + return match +} + func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *tunedv1.Profile) error { var ( kernelArguments []string @@ -860,13 +864,16 @@ func (c *Controller) syncMachineConfigHyperShift(nodePoolName string, profile *t mcName := MachineConfigPrefix + "-" + nodePoolName configMapName := mcConfigMapName(nodePoolName) - if v := profile.ObjectMeta.Annotations[tunedv1.GeneratedByOperandVersionAnnotationKey]; v != os.Getenv("RELEASE_VERSION") { - // This looks like an update triggered by an old (not-yet-upgraded) operand. Ignore it. - klog.Infof("refusing to sync MachineConfig ConfigMap %q due to Profile %q change generated by operand version %q", configMapName, profile.Name, v) - return nil + nodes, err := c.getNodesForNodePool(nodePoolName) + if err != nil { + return fmt.Errorf("could not fetch a list of Nodes for NodePool %s: %v", nodePoolName, err) + } + + bootcmdline := c.pc.state.bootcmdline[profile.Name] + if ok := c.allNodesAgreeOnBootcmdline(nodes); !ok { + return fmt.Errorf("not all %d Nodes in NodePool %v agree on bootcmdline: %s", len(nodes), nodePoolName, bootcmdline) } - bootcmdline := profile.Status.Bootcmdline kernelArguments = util.SplitKernelArguments(bootcmdline) annotations := map[string]string{GeneratedByControllerVersionAnnotationKey: version.Version} @@ -1401,16 +1408,6 @@ func (c *Controller) NeedLeaderElection() bool { return true } -func mcAnnotationsMatch(old, new map[string]string) bool { - for _, key := range []string{tunedv1.RendredTunedGenerationAnnotationKey, tunedv1.TunedProfileAnnotationKey} { - if old[key] != new[key] { - return false - } - } - - return true -} - func tunedMapFromList(tuneds []tunedv1.Tuned) map[string]tunedv1.Tuned { ret := map[string]tunedv1.Tuned{} for _, tuned := range tuneds { diff --git a/pkg/operator/hypershift.go b/pkg/operator/hypershift.go index 68c6835f7..341131dc5 100644 --- a/pkg/operator/hypershift.go +++ b/pkg/operator/hypershift.go @@ -16,6 +16,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" serializer "k8s.io/apimachinery/pkg/runtime/serializer/json" yamlutil "k8s.io/apimachinery/pkg/util/yaml" @@ -172,6 +173,23 @@ func (c *Controller) getObjFromTunedConfigMap() ([]tunedv1.Tuned, error) { return cmTuneds, nil } +// getNodesForNodePool uses 'hypershiftNodePoolLabel' to return all Nodes which are in +// the NodePool 'nodePoolName'. +func (c *Controller) getNodesForNodePool(nodePoolName string) ([]*corev1.Node, error) { + nodes := []*corev1.Node{} + + selector := labels.SelectorFromValidatedSet( + map[string]string{ + hypershiftNodePoolLabel: nodePoolName, + }) + nodes, err := c.pc.listers.Nodes.List(selector) + if err != nil { + return nodes, err + } + + return nodes, err +} + // parseManifests parses a YAML or JSON document that may contain one or more // kubernetes resources. func parseTunedManifests(data []byte, nodePoolName string) ([]tunedv1.Tuned, error) { diff --git a/pkg/operator/mc.go b/pkg/operator/mc.go index 12b717bd7..01d6fa051 100644 --- a/pkg/operator/mc.go +++ b/pkg/operator/mc.go @@ -259,6 +259,36 @@ func (pc *ProfileCalculator) getPoolsForNode(node *corev1.Node) ([]*mcfgv1.Machi return []*mcfgv1.MachineConfigPool{worker}, nil } +// getNodesForPool returns a list of Nodes for MachineConfigPool 'pool'. +func (pc *ProfileCalculator) getNodesForPool(pool *mcfgv1.MachineConfigPool) ([]*corev1.Node, error) { + selector, err := metav1.LabelSelectorAsSelector(pool.Spec.NodeSelector) + if err != nil { + return nil, fmt.Errorf("invalid label selector %s in MachineConfigPool %s: %v", util.ObjectInfo(selector), pool.ObjectMeta.Name, err) + } + + initialNodes, err := pc.listers.Nodes.List(selector) + if err != nil { + return nil, err + } + + nodes := []*corev1.Node{} + for _, n := range initialNodes { + p, err := pc.getPrimaryPoolForNode(n) + if err != nil { + klog.Warningf("cannot get pool for node %q: %v", n.Name, err) + continue + } + if p == nil { + continue + } + if p.Name != pool.Name { + continue + } + nodes = append(nodes, n) + } + return nodes, nil +} + // getPrimaryPoolForNode uses getPoolsForNode and returns the first one which is the one the node targets func (pc *ProfileCalculator) getPrimaryPoolForNode(node *corev1.Node) (*mcfgv1.MachineConfigPool, error) { pools, err := pc.getPoolsForNode(node) diff --git a/pkg/operator/profilecalculator.go b/pkg/operator/profilecalculator.go index 793d6b983..d35c5e66f 100644 --- a/pkg/operator/profilecalculator.go +++ b/pkg/operator/profilecalculator.go @@ -24,9 +24,6 @@ const ( ) type tunedState struct { - profileLastUpdatedGeneration map[string]int64 - // Node name: ^^^^^^ - // Last time operator changed profile: ^^^ nodeLabels map[string]map[string]string // Node name: ^^^^^^ // Node-specific label: ^^^^^^ @@ -37,6 +34,9 @@ type tunedState struct { providerIDs map[string]string // Node name: ^^^^^^ // provider-id ^^^^^^ + bootcmdline map[string]string + // Node name: ^^^^^^ + // bootcmdline ^^^^^^ } type ProfileCalculator struct { @@ -53,7 +53,7 @@ func NewProfileCalculator(listers *ntoclient.Listers, clients *ntoclient.Clients pc.state.nodeLabels = map[string]map[string]string{} pc.state.podLabels = map[string]map[string]map[string]string{} pc.state.providerIDs = map[string]string{} - pc.state.profileLastUpdatedGeneration = map[string]int64{} + pc.state.bootcmdline = map[string]string{} return pc } @@ -132,6 +132,13 @@ func (pc *ProfileCalculator) nodeChangeHandler(nodeName string) (bool, error) { change = true } + if node.ObjectMeta.Annotations != nil { + if bootcmdlineAnnotVal, bootcmdlineAnnotSet := node.ObjectMeta.Annotations[tunedv1.TunedBootcmdlineAnnotationKey]; bootcmdlineAnnotSet { + change = pc.state.bootcmdline[nodeName] != bootcmdlineAnnotVal + pc.state.bootcmdline[nodeName] = bootcmdlineAnnotVal + } + } + nodeLabelsNew := util.MapOfStringsCopy(node.Labels) if !util.MapOfStringsEqual(nodeLabelsNew, pc.state.nodeLabels[nodeName]) { diff --git a/pkg/operator/status.go b/pkg/operator/status.go index 3eb99e4b9..6ba3571f1 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -21,9 +21,15 @@ import ( "github.com/openshift/cluster-node-tuning-operator/pkg/metrics" ) +const ( + errGenerationMismatch = "generation mismatch" +) + // syncOperatorStatus computes the operator's current status and therefrom // creates or updates the ClusterOperator resource for the operator. func (c *Controller) syncOperatorStatus(tuned *tunedv1.Tuned) error { + var operandReleaseVersion string + klog.V(2).Infof("syncOperatorStatus()") co, err := c.getOrCreateOperatorStatus() @@ -33,10 +39,17 @@ func (c *Controller) syncOperatorStatus(tuned *tunedv1.Tuned) error { co = co.DeepCopy() // Never modify objects in cache oldConditions := co.Status.Conditions - co.Status.Conditions, err = c.computeStatusConditions(tuned, oldConditions) + co.Status.Conditions, operandReleaseVersion, err = c.computeStatus(tuned, oldConditions) if err != nil { + if err.Error() == errGenerationMismatch { + // Tuned DaemonSet has not updated its status yet + return nil + } return err } + if len(operandReleaseVersion) > 0 { + operatorv1helpers.SetOperandVersion(&co.Status.Versions, configv1.OperandVersion{Name: tunedv1.TunedOperandName, Version: operandReleaseVersion}) + } // every operator must report its version from the payload // if the operator is reporting available, it resets the release version to the present value if releaseVersion := os.Getenv("RELEASE_VERSION"); len(releaseVersion) > 0 { @@ -148,13 +161,15 @@ func (c *Controller) numProfilesWithBootcmdlineConflict(profileList []*tunedv1.P return numConflict } -// computeStatusConditions computes the operator's current state. -func (c *Controller) computeStatusConditions(tuned *tunedv1.Tuned, conditions []configv1.ClusterOperatorStatusCondition) ([]configv1.ClusterOperatorStatusCondition, error) { +// computeStatus computes the operator's current status. +func (c *Controller) computeStatus(tuned *tunedv1.Tuned, conditions []configv1.ClusterOperatorStatusCondition) ([]configv1.ClusterOperatorStatusCondition, string, error) { const ( // maximum number of seconds for the operator to be Unavailable with a unique // Reason/Message before setting the Degraded ClusterOperator condition maxTunedUnavailable = 7200 ) + var operandReleaseVersion string + dsMf := ntomf.TunedDaemonSet() availableCondition := configv1.ClusterOperatorStatusCondition{ @@ -210,7 +225,7 @@ func (c *Controller) computeStatusConditions(tuned *tunedv1.Tuned, conditions [] // This should not happen unless there was a manual intervention. // Preserve the previously known conditions and requeue. klog.Errorf("unable to calculate Operator status conditions, preserving the old ones: %v", err) - return conditions, err + return conditions, "", err } } else { klog.Errorf("setting all ClusterOperator conditions to Unknown: %v", err) @@ -221,6 +236,11 @@ func (c *Controller) computeStatusConditions(tuned *tunedv1.Tuned, conditions [] copyAvailableCondition() } } else { + if ds.Status.ObservedGeneration != ds.Generation { + // Do not base the conditions on stale information + return conditions, "", fmt.Errorf(errGenerationMismatch) + } + if ds.Status.NumberAvailable > 0 { // The operand maintained by the operator is reported as available in the cluster availableCondition.Status = configv1.ConditionTrue @@ -243,6 +263,7 @@ func (c *Controller) computeStatusConditions(tuned *tunedv1.Tuned, conditions [] // (including Nodes correctly running the daemon Pod) != the total number of // Nodes that are running updated daemon Pod. if ds.Status.DesiredNumberScheduled != ds.Status.UpdatedNumberScheduled || + ds.Status.CurrentNumberScheduled != ds.Status.NumberReady || ds.Status.DesiredNumberScheduled == 0 { klog.V(3).Infof("setting Progressing condition to true") progressingCondition.Status = configv1.ConditionTrue @@ -252,6 +273,7 @@ func (c *Controller) computeStatusConditions(tuned *tunedv1.Tuned, conditions [] progressingCondition.Status = configv1.ConditionFalse progressingCondition.Reason = "AsExpected" progressingCondition.Message = fmt.Sprintf("Cluster version is %q", os.Getenv("RELEASE_VERSION")) + operandReleaseVersion = os.Getenv("RELEASE_VERSION") } degradedCondition.Status = configv1.ConditionFalse @@ -327,7 +349,7 @@ func (c *Controller) computeStatusConditions(tuned *tunedv1.Tuned, conditions [] klog.V(3).Infof("operator status conditions: %v", conditions) - return conditions, nil + return conditions, operandReleaseVersion, nil } func getRelatedObjects() []configv1.ObjectReference { diff --git a/pkg/performanceprofile/controller/performanceprofile_controller.go b/pkg/performanceprofile/controller/performanceprofile_controller.go index 6de0eefed..7ce8b5278 100644 --- a/pkg/performanceprofile/controller/performanceprofile_controller.go +++ b/pkg/performanceprofile/controller/performanceprofile_controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "k8s.io/apimachinery/pkg/runtime/schema" + "os" "reflect" "time" @@ -30,6 +31,7 @@ import ( "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/machineconfig" "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/manifestset" profileutil "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile" + operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" mcov1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" corev1 "k8s.io/api/core/v1" @@ -370,10 +372,24 @@ func validateUpdateEvent(e *event.UpdateEvent) bool { // The Controller will requeue the Request to be processed again if the returned error is non-nil or // Result.Requeue is true, otherwise upon completion it will remove the work from the queue. func (r *PerformanceProfileReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + co, err := r.getClusterOperator() + if err != nil { + klog.Errorf("failed to get ClusterOperator: %v", err) + return reconcile.Result{}, err + } + + operatorReleaseVersion := os.Getenv("RELEASE_VERSION") + operandReleaseVersion := operatorv1helpers.FindOperandVersion(co.Status.Versions, tunedv1.TunedOperandName) + if operandReleaseVersion == nil || operatorReleaseVersion != operandReleaseVersion.Version { + // Upgrade in progress + klog.Infof("operator and operand release versions do not match") + return reconcile.Result{RequeueAfter: 10 * time.Second}, nil + } + klog.Info("Reconciling PerformanceProfile") // Fetch the PerformanceProfile instance instance := &performancev2.PerformanceProfile{} - err := r.Get(ctx, req.NamespacedName, instance) + err = r.Get(ctx, req.NamespacedName, instance) if err != nil { if k8serros.IsNotFound(err) { // Request object not found, could have been deleted after reconcile request. diff --git a/pkg/performanceprofile/controller/performanceprofile_controller_test.go b/pkg/performanceprofile/controller/performanceprofile_controller_test.go index e0fa559ae..02c7c5efb 100644 --- a/pkg/performanceprofile/controller/performanceprofile_controller_test.go +++ b/pkg/performanceprofile/controller/performanceprofile_controller_test.go @@ -44,12 +44,14 @@ var _ = Describe("Controller", func() { var profile *performancev2.PerformanceProfile var profileMCP *mcov1.MachineConfigPool var infra *apiconfigv1.Infrastructure + var clusterOperator *apiconfigv1.ClusterOperator var ctrcfg *mcov1.ContainerRuntimeConfig BeforeEach(func() { profileMCP = testutils.NewProfileMCP() profile = testutils.NewPerformanceProfile("test") infra = testutils.NewInfraResource(false) + clusterOperator = testutils.NewClusterOperator() request = reconcile.Request{ NamespacedName: types.NamespacedName{ Namespace: metav1.NamespaceNone, @@ -59,7 +61,7 @@ var _ = Describe("Controller", func() { }) It("should add finalizer to the performance profile", func() { - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -78,7 +80,7 @@ var _ = Describe("Controller", func() { }) It("should create all resources on first reconcile loop", func() { - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -116,7 +118,7 @@ var _ = Describe("Controller", func() { }) It("should create event on the second reconcile loop", func() { - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 2)).To(Equal(reconcile.Result{})) @@ -128,7 +130,7 @@ var _ = Describe("Controller", func() { }) It("should update the profile status", func() { - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -152,7 +154,7 @@ var _ = Describe("Controller", func() { }) It("should promote kubelet config failure condition", func() { - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) name := components.GetComponentName(profile.Name, components.ComponentNamePrefix) @@ -200,7 +202,7 @@ var _ = Describe("Controller", func() { }) It("should not promote old failure condition", func() { - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) name := components.GetComponentName(profile.Name, components.ComponentNamePrefix) @@ -258,7 +260,7 @@ var _ = Describe("Controller", func() { tunedOutdatedB.OwnerReferences = []metav1.OwnerReference{ {Name: profile.Name}, } - r := newFakeReconciler(profile, tunedOutdatedA, tunedOutdatedB, profileMCP, infra) + r := newFakeReconciler(profile, tunedOutdatedA, tunedOutdatedB, profileMCP, infra, clusterOperator) keyA := types.NamespacedName{ Name: tunedOutdatedA.Name, @@ -290,7 +292,7 @@ var _ = Describe("Controller", func() { It("should create nothing when pause annotation is set", func() { profile.Annotations = map[string]string{performancev2.PerformanceProfilePauseAnnotation: "true"} - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -351,7 +353,7 @@ var _ = Describe("Controller", func() { }) It("should not record new create event", func() { - r := newFakeReconciler(profile, mc, kc, tunedPerformance, runtimeClass, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, runtimeClass, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -368,7 +370,7 @@ var _ = Describe("Controller", func() { It("should update MC when RT kernel gets disabled", func() { profile.Spec.RealTimeKernel.Enabled = pointer.BoolPtr(false) - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -393,7 +395,7 @@ var _ = Describe("Controller", func() { Isolated: &isolated, } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -428,7 +430,7 @@ var _ = Describe("Controller", func() { BalanceIsolated: pointer.BoolPtr(true), } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -452,7 +454,7 @@ var _ = Describe("Controller", func() { BalanceIsolated: pointer.BoolPtr(false), } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -479,7 +481,7 @@ var _ = Describe("Controller", func() { }, } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -508,7 +510,7 @@ var _ = Describe("Controller", func() { }, } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -548,7 +550,7 @@ var _ = Describe("Controller", func() { }) It("should update status with generated tuned", func() { - r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) key := types.NamespacedName{ Name: components.GetComponentName(profile.Name, components.ProfileNamePerformance), @@ -569,7 +571,7 @@ var _ = Describe("Controller", func() { }) It("should update status with generated runtime class", func() { - r := newFakeReconciler(profile, mc, kc, tunedPerformance, runtimeClass, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, runtimeClass, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) key := types.NamespacedName{ @@ -631,7 +633,7 @@ var _ = Describe("Controller", func() { }, } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, mcp, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, mcp, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -697,7 +699,7 @@ var _ = Describe("Controller", func() { }, } - r := newFakeReconciler(profile, mc, kc, tunedPerformance, tuned, nodes, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, tuned, nodes, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -725,7 +727,7 @@ var _ = Describe("Controller", func() { profileMCP.Spec.MachineConfigSelector = &metav1.LabelSelector{ MatchLabels: map[string]string{"wrongKey": "bad"}, } - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) updatedProfile := &performancev2.PerformanceProfile{} @@ -753,7 +755,7 @@ var _ = Describe("Controller", func() { MatchLabels: map[string]string{"wrongKey": "bad"}, } profile.Spec.MachineConfigLabel = nil - r := newFakeReconciler(profile, profileMCP, infra) + r := newFakeReconciler(profile, profileMCP, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) updatedProfile := &performancev2.PerformanceProfile{} @@ -797,7 +799,7 @@ var _ = Describe("Controller", func() { runtimeClass := runtimeclass.New(profile, machineconfig.HighPerformanceRuntime) - r := newFakeReconciler(profile, mc, kc, tunedPerformance, runtimeClass, profileMCP, infra) + r := newFakeReconciler(profile, mc, kc, tunedPerformance, runtimeClass, profileMCP, infra, clusterOperator) result, err := r.Reconcile(context.TODO(), request) Expect(err).ToNot(HaveOccurred()) Expect(result).To(Equal(reconcile.Result{})) @@ -844,7 +846,7 @@ var _ = Describe("Controller", func() { It("should contain cpu partitioning files in machine config", func() { mc, err := machineconfig.New(profile, &infra.Status.CPUPartitioning, "") Expect(err).ToNot(HaveOccurred()) - r := newFakeReconciler(profile, profileMCP, mc, infra) + r := newFakeReconciler(profile, profileMCP, mc, infra, clusterOperator) Expect(reconcileTimes(r, request, 1)).To(Equal(reconcile.Result{})) @@ -936,7 +938,7 @@ var _ = Describe("Controller", func() { mc, err := machineconfig.New(profile, &infra.Status.CPUPartitioning, "") Expect(err).ToNot(HaveOccurred()) - r := newFakeReconciler(profile, profileMCP, mc, infra, ctrcfg) + r := newFakeReconciler(profile, profileMCP, mc, infra, ctrcfg, clusterOperator) Expect(reconcileTimes(r, request, 2)).To(Equal(reconcile.Result{})) By("Verifying MC update") diff --git a/pkg/performanceprofile/controller/resources.go b/pkg/performanceprofile/controller/resources.go index aa491e916..731a5d43c 100644 --- a/pkg/performanceprofile/controller/resources.go +++ b/pkg/performanceprofile/controller/resources.go @@ -5,6 +5,7 @@ import ( "encoding/json" "reflect" + apiconfigv1 "github.com/openshift/api/config/v1" tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" mcov1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" @@ -63,6 +64,18 @@ func (r *PerformanceProfileReconciler) getMutatedMachineConfig(mc *mcov1.Machine return mutated, nil } +func (r *PerformanceProfileReconciler) getClusterOperator() (*apiconfigv1.ClusterOperator, error) { + co := &apiconfigv1.ClusterOperator{} + key := types.NamespacedName{ + Name: "node-tuning", + Namespace: metav1.NamespaceNone, + } + if err := r.Get(context.TODO(), key, co); err != nil { + return nil, err + } + return co, nil +} + func (r *PerformanceProfileReconciler) createOrUpdateMachineConfig(mc *mcov1.MachineConfig) error { _, err := r.getMachineConfig(mc.Name) if errors.IsNotFound(err) { diff --git a/pkg/performanceprofile/utils/testing/testing.go b/pkg/performanceprofile/utils/testing/testing.go index c38a7d09a..3a8c9c0ce 100644 --- a/pkg/performanceprofile/utils/testing/testing.go +++ b/pkg/performanceprofile/utils/testing/testing.go @@ -3,6 +3,7 @@ package testing import ( apiconfigv1 "github.com/openshift/api/config/v1" performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2" + tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1" mcov1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -128,6 +129,22 @@ func NewInfraResource(pin bool) *apiconfigv1.Infrastructure { } } +func NewClusterOperator() *apiconfigv1.ClusterOperator { + return &apiconfigv1.ClusterOperator{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-tuning", + }, + Status: apiconfigv1.ClusterOperatorStatus{ + Versions: []apiconfigv1.OperandVersion{ + { + Name: tunedv1.TunedOperandName, + Version: "", + }, + }, + }, + } +} + func NewContainerRuntimeConfig(runtime mcov1.ContainerRuntimeDefaultRuntime, mcpSelector map[string]string) *mcov1.ContainerRuntimeConfig { return &mcov1.ContainerRuntimeConfig{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/tuned/controller.go b/pkg/tuned/controller.go index b2b0633e3..91bee1857 100644 --- a/pkg/tuned/controller.go +++ b/pkg/tuned/controller.go @@ -18,11 +18,14 @@ import ( fsnotify "gopkg.in/fsnotify.v1" "gopkg.in/ini.v1" + corev1 "k8s.io/api/core/v1" kmeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" @@ -83,6 +86,9 @@ const ( wqKindDaemon = "daemon" wqKindTuned = "tuned" wqKindProfile = "profile" + + // path to kubelet kubeconfig + kubeletKubeconfigPath = "/var/lib/kubelet/kubeconfig" ) // Types @@ -97,6 +103,7 @@ type sockAccepted struct { type Controller struct { kubeconfig *restclient.Config + kubeclient kubernetes.Interface // workqueue is a rate limited work queue. This is used to queue work to be // processed instead of performing it as soon as a change happens. @@ -146,7 +153,6 @@ type Controller struct { tunedTicker *time.Ticker // ticker that fires if TuneD daemon fails to report "profile applied/reload failed" within tunedTimeout tunedTimeout int // timeout for TuneD daemon to report "profile applied/reload failed" [s] tunedMainCfg *ini.File // global TuneD configuration as defined in tuned-main.conf - tunedRendGen int64 // rendered Tuned generation as observed during the last successful sync of TuneD profiles to disk } type wqKey struct { @@ -178,16 +184,35 @@ func parseCmdOpts() { flag.Parse() } +// Get a client from kubelet's kubeconfig to write to the Node object. +func getKubeClient() (kubernetes.Interface, error) { + config, err := clientcmd.BuildConfigFromFlags("", kubeletKubeconfigPath) + if err != nil { + return nil, err + } + kubeClient, err := kubernetes.NewForConfig(restclient.AddUserAgent(config, programName)) + if err != nil { + return nil, err + } + + return kubeClient, nil +} + func newController(stopCh <-chan struct{}) (*Controller, error) { kubeconfig, err := ntoclient.GetConfig() if err != nil { return nil, err } + kubeclient, err := getKubeClient() + if err != nil { + return nil, err + } listers := &ntoclient.Listers{} clients := &ntoclient.Clients{} controller := &Controller{ kubeconfig: kubeconfig, + kubeclient: kubeclient, listers: listers, clients: clients, tunedExit: make(chan bool, 1), @@ -264,7 +289,6 @@ func (c *Controller) sync(key wqKey) error { if err != nil { return err } - c.tunedRendGen = tuned.ObjectMeta.Generation c.change.rendered = change // Notify the event processor that the Tuned k8s object containing TuneD profiles changed. c.wqTuneD.Add(wqKey{kind: wqKindDaemon}) @@ -918,6 +942,44 @@ func getNodeName() string { return name } +func (c *Controller) getNodeForProfile(nodeName string) (*corev1.Node, error) { + node, err := c.kubeclient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get Node %s: %v", nodeName, err) + } + return node, nil +} + +func (c *Controller) updateNodeAnnotations(node *corev1.Node, annotations map[string]string) (err error) { + var ( + change bool + ) + node = node.DeepCopy() // never update the objects from cache + + if node.ObjectMeta.Annotations == nil { + node.ObjectMeta.Annotations = map[string]string{} + } + + for k, v := range annotations { + change = change || node.ObjectMeta.Annotations[k] != v + node.ObjectMeta.Annotations[k] = v + } + + if !change { + // No Node annotations changed, no need to update + return nil + } + + // See OCPBUGS-17585: Instead of carrying information that could affect other cluster nodes in + // Profiles's status, use the kubelet's credentials to write to the Node's resource. + _, err = c.kubeclient.CoreV1().Nodes().Update(context.TODO(), node, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update Node %s: %v", node.ObjectMeta.Name, err) + } + + return nil +} + // Method updateTunedProfile updates a Tuned Profile with information to report back // to the operator. Note this method must be called only when the TuneD daemon is // not reloading. @@ -943,15 +1005,17 @@ func (c *Controller) updateTunedProfile() (err error) { return err } + node, err := c.getNodeForProfile(getNodeName()) + if node.ObjectMeta.Annotations == nil { + node.ObjectMeta.Annotations = map[string]string{} + } + statusConditions := computeStatusConditions(c.daemon.status, c.daemon.stderr, profile.Status.Conditions) - annotationsChanged := profile.ObjectMeta.Annotations == nil || - profile.ObjectMeta.Annotations[tunedv1.GeneratedByOperandVersionAnnotationKey] != os.Getenv("RELEASE_VERSION") || - profile.ObjectMeta.Annotations[tunedv1.RendredTunedGenerationAnnotationKey] != fmt.Sprintf("%d", c.tunedRendGen) + bootcmdlineAnnotVal, bootcmdlineAnnotSet := node.ObjectMeta.Annotations[tunedv1.TunedBootcmdlineAnnotationKey] - if profile.Status.Bootcmdline == bootcmdline && + if bootcmdlineAnnotSet && bootcmdlineAnnotVal == bootcmdline && profile.Status.TunedProfile == activeProfile && - conditionsEqual(profile.Status.Conditions, statusConditions) && - !annotationsChanged { + conditionsEqual(profile.Status.Conditions, statusConditions) { // Do not update node Profile unnecessarily (e.g. bootcmdline did not change). // This will save operator CPU cycles trying to reconcile objects that do not // need reconciling. @@ -959,17 +1023,17 @@ func (c *Controller) updateTunedProfile() (err error) { return nil } + annotations := map[string]string{tunedv1.TunedBootcmdlineAnnotationKey: bootcmdline} + err = c.updateNodeAnnotations(node, annotations) + if err != nil { + return err + } + profile = profile.DeepCopy() // never update the objects from cache - profile.Status.Bootcmdline = bootcmdline profile.Status.TunedProfile = activeProfile profile.Status.Conditions = statusConditions - if profile.ObjectMeta.Annotations == nil { - profile.ObjectMeta.Annotations = map[string]string{} - } - profile.ObjectMeta.Annotations[tunedv1.GeneratedByOperandVersionAnnotationKey] = os.Getenv("RELEASE_VERSION") - profile.ObjectMeta.Annotations[tunedv1.RendredTunedGenerationAnnotationKey] = fmt.Sprintf("%d", c.tunedRendGen) - _, err = c.clients.Tuned.TunedV1().Profiles(operandNamespace).Update(context.TODO(), profile, metav1.UpdateOptions{}) + _, err = c.clients.Tuned.TunedV1().Profiles(operandNamespace).UpdateStatus(context.TODO(), profile, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("failed to update Profile %s status: %v", profile.Name, err) }