Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-4.14] OCPBUGS-26072: Fix bootstrap with NTO Operator and duplicate MachineConfigs #4098

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion pkg/controller/bootstrap/bootstrap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ spec:
}
})
}

}

func TestBootstrapRun(t *testing.T) {
Expand Down
31 changes: 12 additions & 19 deletions pkg/controller/kubelet-config/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"context"
"encoding/json"
"fmt"
"reflect"
"strconv"
"strings"

Expand Down Expand Up @@ -127,6 +126,17 @@ func createNewDefaultNodeconfig() *osev1.Node {
}
}

func createNewDefaultNodeconfigWithCgroup(mode osev1.CgroupMode) *osev1.Node {
return &osev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: ctrlcommon.ClusterNodeInstanceName,
},
Spec: osev1.NodeSpec{
CgroupMode: mode,
},
}
}

func getConfigNode(ctrl *Controller, key string) (*osev1.Node, error) {
nodeConfig, err := ctrl.nodeConfigLister.Get(ctrlcommon.ClusterNodeInstanceName)
if errors.IsNotFound(err) {
Expand All @@ -139,12 +149,6 @@ func getConfigNode(ctrl *Controller, key string) (*osev1.Node, error) {

// updateOriginalKubeConfigwithNodeConfig updates the original Kubelet Configuration based on the Nodespecific configuration
func updateOriginalKubeConfigwithNodeConfig(node *osev1.Node, originalKubeletConfig *kubeletconfigv1beta1.KubeletConfiguration) error {
if node == nil {
return fmt.Errorf("node configuration not found, failed to update the original kubelet configuration")
}
if reflect.DeepEqual(node.Spec, osev1.NodeSpec{}) {
return fmt.Errorf("empty node resource spec found")
}
// updating the kubelet specific fields based on the Node's workerlatency profile.
// (TODO): The durations can be replaced with the defined constants in the openshift/api repository once the respective changes are merged.
switch node.Spec.WorkerLatencyProfile {
Expand All @@ -165,15 +169,6 @@ func updateOriginalKubeConfigwithNodeConfig(node *osev1.Node, originalKubeletCon

// updateMachineConfigwithCgroup updates the Machine Config object based on the cgroup mode present in the Config Node resource.
func updateMachineConfigwithCgroup(node *osev1.Node, mc *mcfgv1.MachineConfig) error {
if node == nil {
return fmt.Errorf("node configuration not found, failed to update the machine config with the cgroup information")
}
if reflect.DeepEqual(node.Spec, osev1.NodeSpec{}) {
return fmt.Errorf("empty config node resource spec found")
}
if mc == nil || reflect.DeepEqual(mc.Spec, mcfgv1.MachineConfigSpec{}) {
return fmt.Errorf("machine config not found, failed to update the machine config with the cgroup information")
}
// updating the Machine Config resource with the relevant cgroup config
var (
kernelArgsv1 = []string{"systemd.unified_cgroup_hierarchy=0", "systemd.legacy_systemd_cgroup_controller=1"}
Expand All @@ -184,11 +179,9 @@ func updateMachineConfigwithCgroup(node *osev1.Node, mc *mcfgv1.MachineConfig) e
case osev1.CgroupModeV1:
kernelArgsToAdd = append(kernelArgsToAdd, kernelArgsv1...)
kernelArgsToRemove = append(kernelArgsToRemove, kernelArgsv2...)
case osev1.CgroupModeV2:
case osev1.CgroupModeV2, osev1.CgroupModeEmpty:
kernelArgsToAdd = append(kernelArgsToAdd, kernelArgsv2...)
kernelArgsToRemove = append(kernelArgsToRemove, kernelArgsv1...)
case emptyInput:
return nil
default:
return fmt.Errorf("unknown cgroup mode found %v, failed to update the machine config resource", node.Spec.CgroupMode)
}
Expand Down
32 changes: 7 additions & 25 deletions pkg/controller/kubelet-config/kubelet_config_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,6 @@ func (ctrl *Controller) syncNodeConfigHandler(key string) error {
if err := ctrl.cleanUpDuplicatedMC(managedNodeConfigKeyPrefix); err != nil {
return err
}
// explicitly setting the cgroupMode to "v2" and also updating the config node's spec if found empty
// This helps in updating the cgroupMode on all the worker nodes if they still have cgroupsv1 (Ex: RHEL8 workers)
if nodeConfig.Spec.CgroupMode == osev1.CgroupModeEmpty {
nodeConfig.Spec.CgroupMode = osev1.CgroupModeV2
ctrl.configClient.ConfigV1().Nodes().Update(context.TODO(), nodeConfig, metav1.UpdateOptions{})
}
// checking if the Node spec is empty and accordingly returning from here.
if reflect.DeepEqual(nodeConfig.Spec, osev1.NodeSpec{}) {
klog.V(2).Info("empty Node resource found")
return nil
}

// Fetch the controllerconfig
cc, err := ctrl.ccLister.Get(ctrlcommon.ControllerConfigName)
Expand Down Expand Up @@ -131,9 +120,11 @@ func (ctrl *Controller) syncNodeConfigHandler(key string) error {
}
}
// The following code updates the MC with the relevant CGroups version
err = updateMachineConfigwithCgroup(nodeConfig, mc)
if err != nil {
return err
if role == ctrlcommon.MachineConfigPoolWorker || role == ctrlcommon.MachineConfigPoolMaster {
err = updateMachineConfigwithCgroup(nodeConfig, mc)
if err != nil {
return err
}
}
// Encode the new config into raw JSON
cfgIgn, err := kubeletConfigToIgnFile(originalKubeConfig)
Expand Down Expand Up @@ -198,7 +189,7 @@ func (ctrl *Controller) enqueueNodeConfig(nodeConfig *osev1.Node) {
}

func (ctrl *Controller) updateNodeConfig(old, cur interface{}) {
var isValidWorkerLatencyProfleTransition = true
isValidWorkerLatencyProfleTransition := true
oldNode, ok := old.(*osev1.Node)
if !ok {
utilruntime.HandleError(fmt.Errorf("Couldn't retrieve the old object from the Update Node Config event %#v", old))
Expand Down Expand Up @@ -277,16 +268,7 @@ func RunNodeConfigBootstrap(templateDir string, featureGateAccess featuregates.F
if nodeConfig == nil {
return nil, fmt.Errorf("nodes.config.openshift.io resource not found")
}
// explicitly setting the cgroupMode to "v2" and also updating the config node's spec if found empty
// This helps in updating the cgroupMode on all the worker nodes if they still have cgroupsv1 (Ex: RHEL8 workers)
if nodeConfig.Spec.CgroupMode == osev1.CgroupModeEmpty {
nodeConfig.Spec.CgroupMode = osev1.CgroupModeV2
}
// checking if the Node spec is empty and accordingly returning from here.
if reflect.DeepEqual(nodeConfig.Spec, osev1.NodeSpec{}) {
klog.V(2).Info("empty Node resource found")
return nil, nil
}

configs := []*mcfgv1.MachineConfig{}

for _, pool := range mcpPools {
Expand Down
50 changes: 39 additions & 11 deletions pkg/controller/kubelet-config/kubelet_config_nodes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,54 @@ func TestNodeConfigDefault(t *testing.T) {
}

func TestBootstrapNodeConfigDefault(t *testing.T) {
configNodeCgroupDefault := createNewDefaultNodeconfig()
configNodeCgroupV1 := createNewDefaultNodeconfigWithCgroup(osev1.CgroupModeV1)
configNodeCgroupV2 := createNewDefaultNodeconfigWithCgroup(osev1.CgroupModeV2)

expected := map[*osev1.Node]struct {
Name string
MasterKernelArgs []string
WorkerKernelArgs []string
}{
configNodeCgroupDefault: {
Name: "Default",
MasterKernelArgs: []string{"systemd.unified_cgroup_hierarchy=1", "cgroup_no_v1=\"all\"", "psi=1"},
WorkerKernelArgs: []string{"systemd.unified_cgroup_hierarchy=1", "cgroup_no_v1=\"all\"", "psi=1"},
},
configNodeCgroupV2: {
Name: "Cgroupv2",
MasterKernelArgs: []string{"systemd.unified_cgroup_hierarchy=1", "cgroup_no_v1=\"all\"", "psi=1"},
WorkerKernelArgs: []string{"systemd.unified_cgroup_hierarchy=1", "cgroup_no_v1=\"all\"", "psi=1"},
},
configNodeCgroupV1: {
Name: "Cgroupv1",
MasterKernelArgs: []string{"systemd.unified_cgroup_hierarchy=0", "systemd.legacy_systemd_cgroup_controller=1"},
WorkerKernelArgs: []string{"systemd.unified_cgroup_hierarchy=0", "systemd.legacy_systemd_cgroup_controller=1"},
},
}

for _, platform := range []configv1.PlatformType{configv1.AWSPlatformType, configv1.NonePlatformType, "unrecognized"} {
t.Run(string(platform), func(t *testing.T) {

cc := newControllerConfig(ctrlcommon.ControllerConfigName, platform)
mcp := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0")
mcp1 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0")
mcps := []*mcfgv1.MachineConfigPool{mcp}
mcps = append(mcps, mcp1)

fgAccess := createNewDefaultFeatureGateAccess()
configNode := createNewDefaultNodeconfig()

mcs, err := RunNodeConfigBootstrap("../../../templates", fgAccess, cc, configNode, mcps)
if err != nil {
t.Errorf("could not run node config bootstrap: %v", err)
}
if len(mcs) != 2 {
t.Errorf("expected %v machine configs generated with the default node config, got 0 machine configs", len(mcs))
for _, configNode := range []*osev1.Node{configNodeCgroupDefault, configNodeCgroupV1, configNodeCgroupV2} {
expect := expected[configNode]
t.Run(fmt.Sprintf("Testing %v", expect.Name), func(t *testing.T) {
mcs, err := RunNodeConfigBootstrap("../../../templates", fgAccess, cc, configNode, mcps)
if err != nil {
t.Errorf("could not run node config bootstrap: %v", err)
}
expectedCount := 2
if len(mcs) != expectedCount {
t.Errorf("expected %v machine configs generated with the default node config, got %d machine configs", expectedCount, len(mcs))
}
require.Equal(t, mcs[0].Spec.KernelArguments, expect.MasterKernelArgs)
})
}
})
}
Expand All @@ -104,7 +134,6 @@ func TestBootstrapNodeConfigDefault(t *testing.T) {
func TestBootstrapNoNodeConfig(t *testing.T) {
for _, platform := range []configv1.PlatformType{configv1.AWSPlatformType, configv1.NonePlatformType, "unrecognized"} {
t.Run(string(platform), func(t *testing.T) {

cc := newControllerConfig(ctrlcommon.ControllerConfigName, platform)
mcp := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0")
mcps := []*mcfgv1.MachineConfigPool{mcp}
Expand Down Expand Up @@ -147,7 +176,6 @@ func TestNodeConfigCustom(t *testing.T) {
f.mcpLister = append(f.mcpLister, mcp)

nodeConfig := &osev1.Node{

ObjectMeta: metav1.ObjectMeta{
Name: ctrlcommon.ClusterNodeInstanceName,
},
Expand Down