Skip to content

Commit

Permalink
Merge pull request #8021 from johngmyers/cordon
Browse files Browse the repository at this point in the history
Support tainting all nodes needing update during rolling update
  • Loading branch information
k8s-ci-robot committed Jan 4, 2020
2 parents 2c96c67 + 4d16192 commit 5ecf8d9
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 110 deletions.
4 changes: 4 additions & 0 deletions pkg/instancegroups/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ go_library(
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/json:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
Expand All @@ -41,5 +44,6 @@ go_test(
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/fake:go_default_library",
"//vendor/k8s.io/client-go/testing:go_default_library",
],
)
70 changes: 70 additions & 0 deletions pkg/instancegroups/instancegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import (
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/json"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/klog"
api "k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/cloudinstances"
Expand All @@ -34,6 +37,8 @@ import (
"k8s.io/kops/upup/pkg/fi"
)

const rollingUpdateTaintKey = "kops.k8s.io/scheduled-for-update"

// RollingUpdateInstanceGroup is the AWS ASG backing an InstanceGroup.
type RollingUpdateInstanceGroup struct {
// Cloud is the kops cloud provider
Expand Down Expand Up @@ -137,6 +142,13 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
}
}

if !rollingUpdateData.CloudOnly {
err = r.taintAllNeedUpdate(update, rollingUpdateData)
if err != nil {
return err
}
}

for _, u := range update {
instanceId := u.ID

Expand Down Expand Up @@ -221,6 +233,64 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
return nil
}

func (r *RollingUpdateInstanceGroup) taintAllNeedUpdate(update []*cloudinstances.CloudInstanceGroupMember, rollingUpdateData *RollingUpdateCluster) error {
var toTaint []*corev1.Node
for _, u := range update {
if u.Node != nil && !u.Node.Spec.Unschedulable {
foundTaint := false
for _, taint := range u.Node.Spec.Taints {
if taint.Key == rollingUpdateTaintKey {
foundTaint = true
}
}
if !foundTaint {
toTaint = append(toTaint, u.Node)
}
}
}
if len(toTaint) > 0 {
noun := "nodes"
if len(toTaint) == 1 {
noun = "node"
}
klog.Infof("Tainting %d %s in %q instancegroup.", len(toTaint), noun, r.CloudGroup.InstanceGroup.Name)
for _, n := range toTaint {
if err := r.patchTaint(rollingUpdateData, n); err != nil {
if rollingUpdateData.FailOnDrainError {
return fmt.Errorf("failed to taint node %q: %v", n, err)
}
klog.Infof("Ignoring error tainting node %q: %v", n, err)
}
}
}
return nil
}

func (r *RollingUpdateInstanceGroup) patchTaint(rollingUpdateData *RollingUpdateCluster, node *corev1.Node) error {
oldData, err := json.Marshal(node)
if err != nil {
return err
}

node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
Key: rollingUpdateTaintKey,
Effect: corev1.TaintEffectPreferNoSchedule,
})

newData, err := json.Marshal(node)
if err != nil {
return err
}

patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, node)
if err != nil {
return err
}

_, err = rollingUpdateData.K8sClient.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patchBytes)
return err
}

// validateClusterWithDuration runs validation.ValidateCluster until either we get positive result or the timeout expires
func (r *RollingUpdateInstanceGroup) validateClusterWithDuration(rollingUpdateData *RollingUpdateCluster, duration time.Duration) error {
// Try to validate cluster at least once, this will handle durations that are lower
Expand Down
Loading

0 comments on commit 5ecf8d9

Please sign in to comment.