Navigation Menu

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core, schedule: use size as a factor of score #830

Merged
merged 9 commits into from Nov 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions conf/config.toml
Expand Up @@ -51,6 +51,7 @@ max-store-down-time = "1h"
leader-schedule-limit = 64
region-schedule-limit = 16
replica-schedule-limit = 24
tolerant-size-ratio = 2.5

# customized schedulers, the format is as below
# if empty, it will use balance-leader, balance-region, hot-region as default
Expand Down
7 changes: 7 additions & 0 deletions server/cache.go
Expand Up @@ -170,6 +170,13 @@ func (c *clusterInfo) GetStores() []*core.StoreInfo {
return c.BasicCluster.GetStores()
}

// GetStoresAverageScore returns the total resource score of all StoreInfo
func (c *clusterInfo) GetStoresAverageScore(kind core.ResourceKind) float64 {
c.RLock()
defer c.RUnlock()
return c.BasicCluster.GetStoresAverageScore(kind)
}

func (c *clusterInfo) getMetaStores() []*metapb.Store {
c.RLock()
defer c.RUnlock()
Expand Down
9 changes: 9 additions & 0 deletions server/config.go
Expand Up @@ -316,6 +316,8 @@ type ScheduleConfig struct {
RegionScheduleLimit uint64 `toml:"region-schedule-limit,omitempty" json:"region-schedule-limit"`
// ReplicaScheduleLimit is the max coexist replica schedules.
ReplicaScheduleLimit uint64 `toml:"replica-schedule-limit,omitempty" json:"replica-schedule-limit"`
// TolerantSizeRatio is the ratio of buffer size for balance scheduler.
TolerantSizeRatio float64 `toml:"tolerant-size-ratio,omitempty" json:"tolerant-size-ratio"`
// Schedulers support for loding customized schedulers
Schedulers SchedulerConfigs `toml:"schedulers,omitempty" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade
}
Expand All @@ -329,6 +331,7 @@ func (c *ScheduleConfig) clone() *ScheduleConfig {
LeaderScheduleLimit: c.LeaderScheduleLimit,
RegionScheduleLimit: c.RegionScheduleLimit,
ReplicaScheduleLimit: c.ReplicaScheduleLimit,
TolerantSizeRatio: c.TolerantSizeRatio,
Schedulers: schedulers,
}
}
Expand All @@ -350,6 +353,7 @@ const (
defaultLeaderScheduleLimit = 64
defaultRegionScheduleLimit = 12
defaultReplicaScheduleLimit = 16
defaultTolerantSizeRatio = 2.5
)

var defaultSchedulers = SchedulerConfigs{
Expand All @@ -365,6 +369,7 @@ func (c *ScheduleConfig) adjust() {
adjustUint64(&c.LeaderScheduleLimit, defaultLeaderScheduleLimit)
adjustUint64(&c.RegionScheduleLimit, defaultRegionScheduleLimit)
adjustUint64(&c.ReplicaScheduleLimit, defaultReplicaScheduleLimit)
adjustFloat64(&c.TolerantSizeRatio, defaultTolerantSizeRatio)
adjustSchedulers(&c.Schedulers, defaultSchedulers)
}

Expand Down Expand Up @@ -454,6 +459,10 @@ func (o *scheduleOption) GetReplicaScheduleLimit() uint64 {
return o.load().ReplicaScheduleLimit
}

func (o *scheduleOption) GetTolerantSizeRatio() float64 {
return o.load().TolerantSizeRatio
}

func (o *scheduleOption) GetSchedulers() SchedulerConfigs {
return o.load().Schedulers
}
Expand Down
43 changes: 39 additions & 4 deletions server/core/store.go
Expand Up @@ -106,17 +106,17 @@ const minWeight = 1e-6
// LeaderScore returns the store's leader score: leaderCount / leaderWeight.
func (s *StoreInfo) LeaderScore() float64 {
if s.LeaderWeight <= 0 {
return float64(s.LeaderCount) / minWeight
return float64(s.LeaderSize) / minWeight
}
return float64(s.LeaderCount) / s.LeaderWeight
return float64(s.LeaderSize) / s.LeaderWeight
}

// RegionScore returns the store's region score: regionSize / regionWeight.
func (s *StoreInfo) RegionScore() float64 {
if s.RegionWeight <= 0 {
return float64(s.RegionCount) / minWeight
return float64(s.RegionSize) / minWeight
}
return float64(s.RegionCount) / s.RegionWeight
return float64(s.RegionSize) / s.RegionWeight
}

// StorageSize returns store's used storage size reported from tikv.
Expand Down Expand Up @@ -175,6 +175,24 @@ func (s *StoreInfo) ResourceScore(kind ResourceKind) float64 {
}
}

// ResourceWeight returns weight of leader/region in the score
func (s *StoreInfo) ResourceWeight(kind ResourceKind) float64 {
switch kind {
case LeaderKind:
if s.LeaderWeight <= 0 {
return minWeight
}
return s.LeaderWeight
case RegionKind:
if s.RegionWeight <= 0 {
return minWeight
}
return s.RegionWeight
default:
return 0
}
}

// GetStartTS returns the start timestamp.
func (s *StoreInfo) GetStartTS() time.Time {
return time.Unix(int64(s.Stats.GetStartTime()), 0)
Expand Down Expand Up @@ -366,6 +384,23 @@ func (s *StoresInfo) SetRegionSize(storeID uint64, regionSize int64) {
}
}

// AverageResourceScore return the total resource score of all StoreInfo
func (s *StoresInfo) AverageResourceScore(kind ResourceKind) float64 {
var totalResourceSize int64
var totalResourceWeight float64
for _, s := range s.stores {
if s.IsUp() {
totalResourceWeight += s.ResourceWeight(kind)
totalResourceSize += s.ResourceSize(kind)
}
}

if totalResourceWeight == 0 {
return 0
}
return float64(totalResourceSize) / totalResourceWeight
}

// TotalWrittenBytes return the total written bytes of all StoreInfo
func (s *StoresInfo) TotalWrittenBytes() uint64 {
var totalWrittenBytes uint64
Expand Down
5 changes: 5 additions & 0 deletions server/schedule/basic_cluster.go
Expand Up @@ -135,6 +135,11 @@ func (bc *BasicCluster) GetLeaderStore(region *core.RegionInfo) *core.StoreInfo
return bc.Stores.GetStore(region.Leader.GetStoreId())
}

// GetStoresAverageScore returns the total resource score of all StoreInfo
func (bc *BasicCluster) GetStoresAverageScore(kind core.ResourceKind) float64 {
return bc.Stores.AverageResourceScore(kind)
}

// BlockStore stops balancer from selecting the store.
func (bc *BasicCluster) BlockStore(storeID uint64) error {
return errors.Trace(bc.Stores.BlockStore(storeID))
Expand Down
1 change: 1 addition & 0 deletions server/schedule/opts.go
Expand Up @@ -21,6 +21,7 @@ import (
type Options interface {
GetLeaderScheduleLimit() uint64
GetRegionScheduleLimit() uint64
GetTolerantSizeRatio() float64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here you use tolerant size, but your TOML file use tolerate size, please choose one.


GetMaxSnapshotCount() uint64
GetMaxPendingPeerCount() uint64
Expand Down
1 change: 1 addition & 0 deletions server/schedule/scheduler.go
Expand Up @@ -34,6 +34,7 @@ type Cluster interface {
GetRegionStores(region *core.RegionInfo) []*core.StoreInfo
GetFollowerStores(region *core.RegionInfo) []*core.StoreInfo
GetLeaderStore(region *core.RegionInfo) *core.StoreInfo
GetStoresAverageScore(kind core.ResourceKind) float64
ScanRegions(startKey []byte, limit int) []*core.RegionInfo

BlockStore(id uint64) error
Expand Down
3 changes: 2 additions & 1 deletion server/schedulers/balance_leader.go
Expand Up @@ -76,7 +76,8 @@ func (l *balanceLeaderScheduler) Schedule(cluster schedule.Cluster, opInfluence

source := cluster.GetStore(region.Leader.GetStoreId())
target := cluster.GetStore(newLeader.GetStoreId())
if !shouldBalance(source, target, core.LeaderKind, opInfluence) {
avgScore := cluster.GetStoresAverageScore(core.LeaderKind)
if !shouldBalance(source, target, avgScore, core.LeaderKind, region, opInfluence, l.opt.GetTolerantSizeRatio()) {
schedulerCounter.WithLabelValues(l.GetName(), "skip").Inc()
return nil
}
Expand Down
3 changes: 2 additions & 1 deletion server/schedulers/balance_region.go
Expand Up @@ -118,7 +118,8 @@ func (s *balanceRegionScheduler) transferPeer(cluster schedule.Cluster, region *
}

target := cluster.GetStore(newPeer.GetStoreId())
if !shouldBalance(source, target, core.RegionKind, opInfluence) {
avgScore := cluster.GetStoresAverageScore(core.RegionKind)
if !shouldBalance(source, target, avgScore, core.RegionKind, region, opInfluence, s.opt.GetTolerantSizeRatio()) {
schedulerCounter.WithLabelValues(s.GetName(), "skip").Inc()
return nil
}
Expand Down