Skip to content

Commit

Permalink
Merge pull request #66397 from gnufied/fix-default-max-volume-ebs
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 66410, 66398, 66061, 66397, 65558). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix volume limit for EBS on m5 and c5 instances

This is a fix for lower volume limits on m5 and c5 instance types while we wait for kubernetes/enhancements#554 to land GA.

This problem became urgent because many of our users are trying to migrate to those instance types in light of spectre/meltdown vulnerability but  lower volume limit on those instance types often causes cluster instability. Yes they can workaround by configuring the scheduler with lower limit but often this becomes somewhat difficult to do when cluster is mixed. 

The newer default limits were picked from https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html

Text about spectre/meltdown is available on - https://community.bitnami.com/t/spectre-variant-2/54961/5

/sig storage
/sig scheduling

```release-note
Fix volume limit for EBS on m5 and c5 instance types
```
  • Loading branch information
Kubernetes Submit Queue committed Jul 21, 2018
2 parents 0bf7427 + 45b8107 commit 827aa93
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
Expand Down Expand Up @@ -826,6 +827,23 @@ func TestVolumeCountConflicts(t *testing.T) {
}
}

func TestMaxVolumeFunc(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-m5-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "m5.large",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != DefaultMaxEBSM5VolumeLimit {
t.Errorf("Expected max volume to be %d got %d", DefaultMaxEBSM5VolumeLimit, maxVolume)
}
}

func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo {
nodeInfo := schedulercache.NewNodeInfo(pods...)
node := &v1.Node{
Expand Down
56 changes: 44 additions & 12 deletions pkg/scheduler/algorithm/predicates/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"os"
"regexp"
"strconv"
"sync"

Expand Down Expand Up @@ -97,6 +98,8 @@ const (
// Amazon recommends no more than 40; the system root volume uses at least one.
// See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits
DefaultMaxEBSVolumes = 39
// DefaultMaxEBSM5VolumeLimit is default EBS volume limit on m5 and c5 instances
DefaultMaxEBSM5VolumeLimit = 25
// DefaultMaxGCEPDVolumes defines the maximum number of PD Volumes for GCE
// GCE instances can have up to 16 PD volumes attached.
DefaultMaxGCEPDVolumes = 16
Expand Down Expand Up @@ -291,7 +294,7 @@ func NoDiskConflict(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *sch
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
volumeLimitKey v1.ResourceName
maxVolumes int
maxVolumeFunc func(node *v1.Node) int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo

Expand All @@ -317,23 +320,19 @@ type VolumeFilter struct {
func NewMaxPDVolumeCountPredicate(
filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
var filter VolumeFilter
var maxVolumes int
var volumeLimitKey v1.ResourceName

switch filterName {

case EBSVolumeFilterType:
filter = EBSVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxEBSVolumes)
case GCEPDVolumeFilterType:
filter = GCEPDVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxGCEPDVolumes)
case AzureDiskVolumeFilterType:
filter = AzureDiskVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxAzureDiskVolumes)
default:
glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
GCEPDVolumeFilterType, AzureDiskVolumeFilterType)
Expand All @@ -343,7 +342,7 @@ func NewMaxPDVolumeCountPredicate(
c := &MaxPDVolumeCountChecker{
filter: filter,
volumeLimitKey: volumeLimitKey,
maxVolumes: maxVolumes,
maxVolumeFunc: getMaxVolumeFunc(filterName),
pvInfo: pvInfo,
pvcInfo: pvcInfo,
randomVolumeIDPrefix: rand.String(32),
Expand All @@ -352,19 +351,52 @@ func NewMaxPDVolumeCountPredicate(
return c.predicate
}

// getMaxVols checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVols(defaultVal int) int {
func getMaxVolumeFunc(filterName string) func(node *v1.Node) int {
return func(node *v1.Node) int {
maxVolumesFromEnv := getMaxVolLimitFromEnv()
if maxVolumesFromEnv > 0 {
return maxVolumesFromEnv
}

var nodeInstanceType string
for k, v := range node.ObjectMeta.Labels {
if k == kubeletapis.LabelInstanceType {
nodeInstanceType = v
}
}
switch filterName {
case EBSVolumeFilterType:
return getMaxEBSVolume(nodeInstanceType)
case GCEPDVolumeFilterType:
return DefaultMaxGCEPDVolumes
case AzureDiskVolumeFilterType:
return DefaultMaxAzureDiskVolumes
default:
return -1
}
}
}

func getMaxEBSVolume(nodeInstanceType string) int {
if ok, _ := regexp.MatchString("^[cm]5.*", nodeInstanceType); ok {
return DefaultMaxEBSM5VolumeLimit
}
return DefaultMaxEBSVolumes
}

// getMaxVolLimitFromEnv checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVolLimitFromEnv() int {
if rawMaxVols := os.Getenv(KubeMaxPDVols); rawMaxVols != "" {
if parsedMaxVols, err := strconv.Atoi(rawMaxVols); err != nil {
glog.Errorf("Unable to parse maximum PD volumes value, using default of %v: %v", defaultVal, err)
glog.Errorf("Unable to parse maximum PD volumes value, using default: %v", err)
} else if parsedMaxVols <= 0 {
glog.Errorf("Maximum PD volumes must be a positive value, using default of %v", defaultVal)
glog.Errorf("Maximum PD volumes must be a positive value, using default ")
} else {
return parsedMaxVols
}
}

return defaultVal
return -1
}

func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error {
Expand Down Expand Up @@ -454,7 +486,7 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat
}

numNewVolumes := len(newVolumes)
maxAttachLimit := c.maxVolumes
maxAttachLimit := c.maxVolumeFunc(nodeInfo.Node())

if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
volumeLimits := nodeInfo.VolumeLimits()
Expand Down
7 changes: 3 additions & 4 deletions pkg/scheduler/algorithm/predicates/predicates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3913,7 +3913,6 @@ func TestVolumeZonePredicateWithVolumeBinding(t *testing.T) {

func TestGetMaxVols(t *testing.T) {
previousValue := os.Getenv(KubeMaxPDVols)
defaultValue := 39

tests := []struct {
rawMaxVols string
Expand All @@ -3922,12 +3921,12 @@ func TestGetMaxVols(t *testing.T) {
}{
{
rawMaxVols: "invalid",
expected: defaultValue,
expected: -1,
name: "Unable to parse maximum PD volumes value, using default value",
},
{
rawMaxVols: "-2",
expected: defaultValue,
expected: -1,
name: "Maximum PD volumes must be a positive value, using default value",
},
{
Expand All @@ -3940,7 +3939,7 @@ func TestGetMaxVols(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
os.Setenv(KubeMaxPDVols, test.rawMaxVols)
result := getMaxVols(defaultValue)
result := getMaxVolLimitFromEnv()
if result != test.expected {
t.Errorf("expected %v got %v", test.expected, result)
}
Expand Down

0 comments on commit 827aa93

Please sign in to comment.